Split cpu time metric into user/system components like prof.log

The total can be calculated from the two parts via Prometheus/Grafana
if desired, so it's more informative to pass them as separate parts.
This commit is contained in:
Tim Wojtulewicz 2024-08-05 13:06:49 -07:00
parent 206f5cd522
commit a6843067e9
4 changed files with 29 additions and 27 deletions

View file

@ -137,13 +137,21 @@ void Manager::InitPostScript() {
return metric;
});
cpu_gauge = GaugeInstance("process", "cpu", {}, "Total user and system CPU time spent", "seconds",
[]() -> prometheus::ClientMetric {
auto* s = get_stats();
prometheus::ClientMetric metric;
metric.gauge.value = s->cpu;
return metric;
});
cpu_user_counter = CounterInstance("process", "cpu_user", {}, "Total user CPU time spent", "seconds",
[]() -> prometheus::ClientMetric {
auto* s = get_stats();
prometheus::ClientMetric metric;
metric.gauge.value = s->cpu_user;
return metric;
});
cpu_system_counter = CounterInstance("process", "cpu_system", {}, "Total system CPU time spent", "seconds",
[]() -> prometheus::ClientMetric {
auto* s = get_stats();
prometheus::ClientMetric metric;
metric.gauge.value = s->cpu_system;
return metric;
});
fds_gauge = GaugeInstance("process", "open_fds", {}, "Number of open file descriptors", "",
[]() -> prometheus::ClientMetric {
@ -623,18 +631,6 @@ void Manager::WaitForPrometheusCallbacks() {
using namespace std::literals;
using namespace zeek::telemetry;
namespace {
template<class T>
auto toVector(zeek::Span<T> xs) {
std::vector<std::remove_const_t<T>> result;
for ( auto&& x : xs )
result.emplace_back(x);
return result;
}
} // namespace
SCENARIO("telemetry managers provide access to counter families") {
GIVEN("a telemetry manager") {
Manager mgr;

View file

@ -263,7 +263,8 @@ private:
GaugePtr rss_gauge;
GaugePtr vms_gauge;
GaugePtr cpu_gauge;
CounterPtr cpu_user_counter;
CounterPtr cpu_system_counter;
GaugePtr fds_gauge;
std::shared_ptr<prometheus::Registry> prometheus_registry;

View file

@ -34,10 +34,10 @@ process_stats get_process_stats() {
if ( task_info(mach_task_self(), TASK_THREAD_TIMES_INFO, reinterpret_cast<task_info_t>(&info), &count) ==
KERN_SUCCESS ) {
// Round to milliseconds.
result.cpu += info.user_time.seconds;
result.cpu += ceil(info.user_time.microseconds / 1000.0) / 1000.0;
result.cpu += info.system_time.seconds;
result.cpu += ceil(info.system_time.microseconds / 1000.0) / 1000.0;
result.cpu_user += info.user_time.seconds;
result.cpu_user += ceil(info.user_time.microseconds / 1000.0) / 1000.0;
result.cpu_system += info.system_time.seconds;
result.cpu_system += ceil(info.system_time.microseconds / 1000.0) / 1000.0;
}
}
// Fetch open file handles.
@ -154,7 +154,8 @@ process_stats get_process_stats() {
result.rss = rss_pages * page_size;
result.vms = vmsize_bytes;
result.cpu = static_cast<double>(utime_ticks + stime_ticks) / ticks_per_second;
result.cpu_user = static_cast<double>(utime_ticks) / ticks_per_second;
result.cpu_system = static_cast<double>(stime_ticks) / ticks_per_second;
result.fds = count_entries_in_directory("/proc/self/fd");
}
@ -187,7 +188,10 @@ process_stats get_process_stats() {
if ( kp ) {
result.vms = kp->ki_size;
result.rss = kp->ki_rssize * getpagesize();
result.cpu = static_cast<double>(kp->ki_runtime) / 1000000.0;
result.cpu_user = static_cast<double>(kp->ki_rusage.ru_utime.tv_sec) +
(static_cast<double>(kp->ki_rusage.ru_utime.tv_usec) / 1e6);
result.cpu_system = static_cast<double>(kp->ki_rusage.ru_stime.tv_sec) +
(static_cast<double>(kp->ki_rusage.ru_stime.tv_usec) / 1e6);
struct procstat* procstat = procstat_open_sysctl();
struct filestat_list* files = procstat_getfiles(procstat, kp, 0);

View file

@ -9,7 +9,8 @@ namespace zeek::telemetry::detail {
struct process_stats {
int64_t rss = 0;
int64_t vms = 0;
double cpu = 0.0;
double cpu_user = 0.0;
double cpu_system = 0.0;
int64_t fds = 0;
};