From c3c6ee5a2b4dce248814700b76a64304e4cdf588 Mon Sep 17 00:00:00 2001 From: Arne Welzel Date: Wed, 26 Mar 2025 11:42:52 +0100 Subject: [PATCH] telemetry: Run callbacks at collect time Calling collect_metrics() from a script would not invoke metric callbacks, resulting in most of the process metrics to be zero when a Zeek process isn't scraped via Prometheus. Fixes #4309 --- src/telemetry/Manager.cc | 17 +++++++---- src/telemetry/Manager.h | 5 ++++ .../out | 13 +++++++++ .../frameworks/telemetry/process-collect.zeek | 29 +++++++++++++++++++ 4 files changed, 58 insertions(+), 6 deletions(-) create mode 100644 testing/btest/Baseline/scripts.base.frameworks.telemetry.process-collect/out create mode 100644 testing/btest/scripts/base/frameworks/telemetry/process-collect.zeek diff --git a/src/telemetry/Manager.cc b/src/telemetry/Manager.cc index d6e8bb7aa3..9b4b3212e3 100644 --- a/src/telemetry/Manager.cc +++ b/src/telemetry/Manager.cc @@ -278,6 +278,14 @@ void Manager::InvokeTelemetrySyncHook() { in_sync_hook = false; } + +void Manager::UpdateMetrics() { + InvokeTelemetrySyncHook(); + + for ( const auto& [name, f] : families ) + f->RunCallbacks(); +} + ValPtr Manager::CollectMetrics(std::string_view prefix_pattern, std::string_view name_pattern) { static auto metrics_vector_type = zeek::id::find_type("Telemetry::MetricVector"); static auto string_vec_type = zeek::id::find_type("string_vec"); @@ -290,7 +298,7 @@ ValPtr Manager::CollectMetrics(std::string_view prefix_pattern, std::string_view static auto metric_opts_type = zeek::id::find_type("Telemetry::MetricOpts"); static auto metric_type_idx = metric_opts_type->FieldOffset("metric_type"); - InvokeTelemetrySyncHook(); + UpdateMetrics(); VectorValPtr ret_val = make_intrusive(metrics_vector_type); @@ -369,7 +377,7 @@ ValPtr Manager::CollectHistogramMetrics(std::string_view prefix_pattern, std::st static auto metric_opts_type = zeek::id::find_type("Telemetry::MetricOpts"); static auto metric_type_idx = metric_opts_type->FieldOffset("metric_type"); - InvokeTelemetrySyncHook(); + UpdateMetrics(); VectorValPtr ret_val = make_intrusive(metrics_vector_type); @@ -597,10 +605,7 @@ void Manager::ProcessFd(int fd, int flags) { collector_flare.Extinguish(); - for ( const auto& [name, f] : families ) - f->RunCallbacks(); - - InvokeTelemetrySyncHook(); + UpdateMetrics(); collector_response_idx = collector_request_idx; diff --git a/src/telemetry/Manager.h b/src/telemetry/Manager.h index 9523385b77..0d90d9e4ad 100644 --- a/src/telemetry/Manager.h +++ b/src/telemetry/Manager.h @@ -264,6 +264,11 @@ private: */ void InvokeTelemetrySyncHook(); + /** + * Runs the telemetry sync hooks and metric callbacks. + */ + void UpdateMetrics(); + bool in_sync_hook = false; std::map> families; diff --git a/testing/btest/Baseline/scripts.base.frameworks.telemetry.process-collect/out b/testing/btest/Baseline/scripts.base.frameworks.telemetry.process-collect/out new file mode 100644 index 0000000000..a080efc2a5 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.telemetry.process-collect/out @@ -0,0 +1,13 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +zeek_init +process_cpu_user_seconds_total, [], >0.0, good +process_cpu_system_seconds_total, [], >0.0, good +process_resident_memory_bytes, [], >0.0, good +process_virtual_memory_bytes, [], >0.0, good +process_open_fds, [], >0.0, good +zeek_done +process_cpu_user_seconds_total, [], >0.0, good +process_cpu_system_seconds_total, [], >0.0, good +process_resident_memory_bytes, [], >0.0, good +process_virtual_memory_bytes, [], >0.0, good +process_open_fds, [], >0.0, good diff --git a/testing/btest/scripts/base/frameworks/telemetry/process-collect.zeek b/testing/btest/scripts/base/frameworks/telemetry/process-collect.zeek new file mode 100644 index 0000000000..c12d72e252 --- /dev/null +++ b/testing/btest/scripts/base/frameworks/telemetry/process-collect.zeek @@ -0,0 +1,29 @@ +# @TEST-DOC: Calling collect_metrics() invokes callbacks for process (and other) metrics. +# Not compilable to C++ due to globals being initialized to a record that +# has an opaque type as a field. +# @TEST-REQUIRES: test "${ZEEK_USE_CPP}" != "1" +# +# @TEST-EXEC: zeek -r $TRACES/http/get.trace -b %INPUT >out +# @TEST-EXEC: btest-diff out + + +@load base/frameworks/telemetry + +function print_metrics(ms: vector of Telemetry::Metric) { + for (_, m in ms) + print m$opts$name, m$label_values, m$value > 0.0 ? ">0.0, good" : "0.0, bad"; +} + +event zeek_init() + { + print "zeek_init"; + local ms = Telemetry::collect_metrics("process"); + print_metrics(ms); + } + +event zeek_done() + { + print "zeek_done"; + local ms = Telemetry::collect_metrics("process"); + print_metrics(ms); + }