Split cpu time metric into user/system components like prof.log

The total can be calculated from the two parts via Prometheus/Grafana
if desired, so it's more informative to pass them as separate parts.
This commit is contained in:
Tim Wojtulewicz 2024-08-05 13:06:49 -07:00
parent 206f5cd522
commit a6843067e9
4 changed files with 29 additions and 27 deletions

View file

@ -137,11 +137,19 @@ void Manager::InitPostScript() {
return metric; return metric;
}); });
cpu_gauge = GaugeInstance("process", "cpu", {}, "Total user and system CPU time spent", "seconds", cpu_user_counter = CounterInstance("process", "cpu_user", {}, "Total user CPU time spent", "seconds",
[]() -> prometheus::ClientMetric { []() -> prometheus::ClientMetric {
auto* s = get_stats(); auto* s = get_stats();
prometheus::ClientMetric metric; prometheus::ClientMetric metric;
metric.gauge.value = s->cpu; metric.gauge.value = s->cpu_user;
return metric;
});
cpu_system_counter = CounterInstance("process", "cpu_system", {}, "Total system CPU time spent", "seconds",
[]() -> prometheus::ClientMetric {
auto* s = get_stats();
prometheus::ClientMetric metric;
metric.gauge.value = s->cpu_system;
return metric; return metric;
}); });
@ -623,18 +631,6 @@ void Manager::WaitForPrometheusCallbacks() {
using namespace std::literals; using namespace std::literals;
using namespace zeek::telemetry; using namespace zeek::telemetry;
namespace {
template<class T>
auto toVector(zeek::Span<T> xs) {
std::vector<std::remove_const_t<T>> result;
for ( auto&& x : xs )
result.emplace_back(x);
return result;
}
} // namespace
SCENARIO("telemetry managers provide access to counter families") { SCENARIO("telemetry managers provide access to counter families") {
GIVEN("a telemetry manager") { GIVEN("a telemetry manager") {
Manager mgr; Manager mgr;

View file

@ -263,7 +263,8 @@ private:
GaugePtr rss_gauge; GaugePtr rss_gauge;
GaugePtr vms_gauge; GaugePtr vms_gauge;
GaugePtr cpu_gauge; CounterPtr cpu_user_counter;
CounterPtr cpu_system_counter;
GaugePtr fds_gauge; GaugePtr fds_gauge;
std::shared_ptr<prometheus::Registry> prometheus_registry; std::shared_ptr<prometheus::Registry> prometheus_registry;

View file

@ -34,10 +34,10 @@ process_stats get_process_stats() {
if ( task_info(mach_task_self(), TASK_THREAD_TIMES_INFO, reinterpret_cast<task_info_t>(&info), &count) == if ( task_info(mach_task_self(), TASK_THREAD_TIMES_INFO, reinterpret_cast<task_info_t>(&info), &count) ==
KERN_SUCCESS ) { KERN_SUCCESS ) {
// Round to milliseconds. // Round to milliseconds.
result.cpu += info.user_time.seconds; result.cpu_user += info.user_time.seconds;
result.cpu += ceil(info.user_time.microseconds / 1000.0) / 1000.0; result.cpu_user += ceil(info.user_time.microseconds / 1000.0) / 1000.0;
result.cpu += info.system_time.seconds; result.cpu_system += info.system_time.seconds;
result.cpu += ceil(info.system_time.microseconds / 1000.0) / 1000.0; result.cpu_system += ceil(info.system_time.microseconds / 1000.0) / 1000.0;
} }
} }
// Fetch open file handles. // Fetch open file handles.
@ -154,7 +154,8 @@ process_stats get_process_stats() {
result.rss = rss_pages * page_size; result.rss = rss_pages * page_size;
result.vms = vmsize_bytes; result.vms = vmsize_bytes;
result.cpu = static_cast<double>(utime_ticks + stime_ticks) / ticks_per_second; result.cpu_user = static_cast<double>(utime_ticks) / ticks_per_second;
result.cpu_system = static_cast<double>(stime_ticks) / ticks_per_second;
result.fds = count_entries_in_directory("/proc/self/fd"); result.fds = count_entries_in_directory("/proc/self/fd");
} }
@ -187,7 +188,10 @@ process_stats get_process_stats() {
if ( kp ) { if ( kp ) {
result.vms = kp->ki_size; result.vms = kp->ki_size;
result.rss = kp->ki_rssize * getpagesize(); result.rss = kp->ki_rssize * getpagesize();
result.cpu = static_cast<double>(kp->ki_runtime) / 1000000.0; result.cpu_user = static_cast<double>(kp->ki_rusage.ru_utime.tv_sec) +
(static_cast<double>(kp->ki_rusage.ru_utime.tv_usec) / 1e6);
result.cpu_system = static_cast<double>(kp->ki_rusage.ru_stime.tv_sec) +
(static_cast<double>(kp->ki_rusage.ru_stime.tv_usec) / 1e6);
struct procstat* procstat = procstat_open_sysctl(); struct procstat* procstat = procstat_open_sysctl();
struct filestat_list* files = procstat_getfiles(procstat, kp, 0); struct filestat_list* files = procstat_getfiles(procstat, kp, 0);

View file

@ -9,7 +9,8 @@ namespace zeek::telemetry::detail {
struct process_stats { struct process_stats {
int64_t rss = 0; int64_t rss = 0;
int64_t vms = 0; int64_t vms = 0;
double cpu = 0.0; double cpu_user = 0.0;
double cpu_system = 0.0;
int64_t fds = 0; int64_t fds = 0;
}; };