From 1cad305e58aeaf328dc5b4d56b441b44491d7179 Mon Sep 17 00:00:00 2001 From: Tim Wojtulewicz Date: Thu, 14 Mar 2024 15:08:08 -0700 Subject: [PATCH] Add support for callbacks for gauges/counters, restore process stat metrics --- CMakeLists.txt | 5 ++-- auxil/prometheus-cpp | 2 +- src/telemetry/Counter.h | 33 +++++++++++++++------ src/telemetry/Gauge.h | 35 +++++++++++++++------- src/telemetry/Histogram.h | 1 - src/telemetry/Manager.cc | 55 ++++++++++++++++------------------- src/telemetry/Manager.h | 20 ++++++++----- src/telemetry/ProcessStats.cc | 33 +++++++++------------ 8 files changed, 103 insertions(+), 81 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0053285c72..3669d8358b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -234,13 +234,13 @@ if (ZEEK_STANDALONE) endif () # Tell zeek_target_link_libraries to add library dependencies as PRIVATE. set(zeek_exe_access PRIVATE) -else () - add_library(zeek_lib STATIC) if (${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD") target_link_libraries(zeek_exe PRIVATE util) target_link_libraries(zeek_exe PRIVATE procstat) endif () +else () + add_library(zeek_lib STATIC) endif () if (TARGET zeek_lib) @@ -258,6 +258,7 @@ if (TARGET zeek_lib) target_link_libraries(zeek_exe PRIVATE util) target_link_libraries(zeek_exe PRIVATE procstat) endif () + endif () # When building our fuzzers, we also need one extra top-level target that diff --git a/auxil/prometheus-cpp b/auxil/prometheus-cpp index 4bd38da318..cdb357ad55 160000 --- a/auxil/prometheus-cpp +++ b/auxil/prometheus-cpp @@ -1 +1 @@ -Subproject commit 4bd38da318ec54af8e2d8d5d0bdbd5eb9bc0784f +Subproject commit cdb357ad556c9ba96cbfa90fed2940fedf101673 diff --git a/src/telemetry/Counter.h b/src/telemetry/Counter.h index b8614cd844..d011ac60c3 100644 --- a/src/telemetry/Counter.h +++ b/src/telemetry/Counter.h @@ -41,7 +41,12 @@ public: return Value(); } - BaseType Value() const noexcept { return static_cast(handle.Value()); } + BaseType Value() const noexcept { + // Use Collect() here instead of Value() to correctly handle metrics with + // callbacks. + auto metric = handle.Collect(); + return static_cast(metric.counter.value); + } bool operator==(const BaseCounter& rhs) const noexcept { return &handle == &rhs.handle; } bool operator!=(const BaseCounter& rhs) const noexcept { return &handle != &rhs.handle; } @@ -49,8 +54,12 @@ public: bool CompareLabels(const prometheus::Labels& lbls) const { return labels == lbls; } protected: - explicit BaseCounter(FamilyType& family, const prometheus::Labels& labels) noexcept - : handle(family.Add(labels)), labels(labels) {} + explicit BaseCounter(FamilyType& family, const prometheus::Labels& labels, + prometheus::CollectCallbackPtr callback = nullptr) noexcept + : handle(family.Add(labels)), labels(labels) { + if ( callback ) + handle.AddCollectCallback(callback); + } Handle& handle; prometheus::Labels labels; @@ -63,7 +72,9 @@ protected: class IntCounter : public BaseCounter { public: static inline const char* OpaqueName = "IntCounterMetricVal"; - explicit IntCounter(FamilyType& family, const prometheus::Labels& labels) noexcept : BaseCounter(family, labels) {} + explicit IntCounter(FamilyType& family, const prometheus::Labels& labels, + prometheus::CollectCallbackPtr callback = nullptr) noexcept + : BaseCounter(family, labels, callback) {} }; /** @@ -72,7 +83,9 @@ public: class DblCounter : public BaseCounter { public: static inline const char* OpaqueName = "DblCounterMetricVal"; - explicit DblCounter(FamilyType& family, const prometheus::Labels& labels) noexcept : BaseCounter(family, labels) {} + explicit DblCounter(FamilyType& family, const prometheus::Labels& labels, + prometheus::CollectCallbackPtr callback = nullptr) noexcept + : BaseCounter(family, labels, callback) {} }; template @@ -89,7 +102,8 @@ public: * Returns the metrics handle for given labels, creating a new instance * lazily if necessary. */ - std::shared_ptr GetOrAdd(Span labels) { + std::shared_ptr GetOrAdd(Span labels, + prometheus::CollectCallbackPtr callback = nullptr) { prometheus::Labels p_labels = BuildPrometheusLabels(labels); auto check = [&](const std::shared_ptr& counter) { return counter->CompareLabels(p_labels); }; @@ -97,7 +111,7 @@ public: if ( auto it = std::find_if(counters.begin(), counters.end(), check); it != counters.end() ) return *it; - auto counter = std::make_shared(family, p_labels); + auto counter = std::make_shared(family, p_labels, callback); counters.push_back(counter); return counter; } @@ -105,8 +119,9 @@ public: /** * @copydoc GetOrAdd */ - std::shared_ptr GetOrAdd(std::initializer_list labels) { - return GetOrAdd(Span{labels.begin(), labels.size()}); + std::shared_ptr GetOrAdd(std::initializer_list labels, + prometheus::CollectCallbackPtr callback = nullptr) { + return GetOrAdd(Span{labels.begin(), labels.size()}, callback); } std::vector>& GetAllCounters() { return counters; } diff --git a/src/telemetry/Gauge.h b/src/telemetry/Gauge.h index 24b30ff009..7fecb3a3be 100644 --- a/src/telemetry/Gauge.h +++ b/src/telemetry/Gauge.h @@ -59,7 +59,12 @@ public: return Value(); } - BaseType Value() const noexcept { return static_cast(handle.Value()); } + BaseType Value() const noexcept { + // Use Collect() here instead of Value() to correctly handle metrics + // with callbacks. + auto metric = handle.Collect(); + return static_cast(metric.gauge.value); + } /** * Directly sets the value of the gauge. @@ -72,8 +77,12 @@ public: bool CompareLabels(const prometheus::Labels& lbls) const { return labels == lbls; } protected: - explicit BaseGauge(FamilyType& family, const prometheus::Labels& labels) noexcept - : handle(family.Add(labels)), labels(labels) {} + explicit BaseGauge(FamilyType& family, const prometheus::Labels& labels, + prometheus::CollectCallbackPtr callback = nullptr) noexcept + : handle(family.Add(labels)), labels(labels) { + if ( callback ) + handle.AddCollectCallback(callback); + } Handle& handle; prometheus::Labels labels; @@ -88,7 +97,9 @@ class IntGauge : public BaseGauge { public: static inline const char* OpaqueName = "IntGaugeMetricVal"; - explicit IntGauge(FamilyType& family, const prometheus::Labels& labels) noexcept : BaseGauge(family, labels) {} + explicit IntGauge(FamilyType& family, const prometheus::Labels& labels, + prometheus::CollectCallbackPtr callback = nullptr) noexcept + : BaseGauge(family, labels, callback) {} IntGauge(const IntGauge&) = delete; IntGauge& operator=(const IntGauge&) = delete; @@ -102,7 +113,9 @@ class DblGauge : public BaseGauge { public: static inline const char* OpaqueName = "DblGaugeMetricVal"; - explicit DblGauge(FamilyType& family, const prometheus::Labels& labels) noexcept : BaseGauge(family, labels) {} + explicit DblGauge(FamilyType& family, const prometheus::Labels& labels, + prometheus::CollectCallbackPtr callback = nullptr) noexcept + : BaseGauge(family, labels, callback) {} DblGauge(const DblGauge&) = delete; DblGauge& operator=(const DblGauge&) = delete; @@ -121,7 +134,8 @@ public: * Returns the metrics handle for given labels, creating a new instance * lazily if necessary. */ - std::shared_ptr GetOrAdd(Span labels) { + std::shared_ptr GetOrAdd(Span labels, + prometheus::CollectCallbackPtr callback = nullptr) { prometheus::Labels p_labels = BuildPrometheusLabels(labels); auto check = [&](const std::shared_ptr& gauge) { return gauge->CompareLabels(p_labels); }; @@ -129,7 +143,7 @@ public: if ( auto it = std::find_if(gauges.begin(), gauges.end(), check); it != gauges.end() ) return *it; - auto gauge = std::make_shared(family, p_labels); + auto gauge = std::make_shared(family, p_labels, callback); gauges.push_back(gauge); return gauge; } @@ -137,8 +151,9 @@ public: /** * @copydoc GetOrAdd */ - std::shared_ptr GetOrAdd(std::initializer_list labels) { - return GetOrAdd(Span{labels.begin(), labels.size()}); + std::shared_ptr GetOrAdd(std::initializer_list labels, + prometheus::CollectCallbackPtr callback = nullptr) { + return GetOrAdd(Span{labels.begin(), labels.size()}, callback); } std::vector>& GetAllGauges() { return gauges; } @@ -196,7 +211,6 @@ public: std::string_view unit = "", bool is_sum = false) : BaseGaugeFamily(prefix, name, labels, helptext, std::move(registry), unit, is_sum) {} - IntGaugeFamily(const IntGaugeFamily&) noexcept = default; IntGaugeFamily& operator=(const IntGaugeFamily&) noexcept = delete; @@ -215,7 +229,6 @@ public: std::string_view unit = "", bool is_sum = false) : BaseGaugeFamily(prefix, name, labels, helptext, std::move(registry), unit, is_sum) {} - DblGaugeFamily(const DblGaugeFamily&) noexcept = default; DblGaugeFamily& operator=(const DblGaugeFamily&) noexcept = delete; diff --git a/src/telemetry/Histogram.h b/src/telemetry/Histogram.h index fdcf26e30c..efe9c0f154 100644 --- a/src/telemetry/Histogram.h +++ b/src/telemetry/Histogram.h @@ -28,7 +28,6 @@ public: void Observe(BaseType value) noexcept { handle.Observe(value); } /// @return The sum of all observed values. - // TODO BaseType Sum() const noexcept { auto metric = handle.Collect(); return static_cast(metric.histogram.sample_sum); diff --git a/src/telemetry/Manager.cc b/src/telemetry/Manager.cc index bf4364ead1..78a34f190c 100644 --- a/src/telemetry/Manager.cc +++ b/src/telemetry/Manager.cc @@ -89,42 +89,37 @@ void Manager::InitPostScript() { return &this->current_process_stats; }; -/* - rss_gauge = - GaugeInstance("process", "resident_memory", {}, "Resident memory size", "bytes", false, - [](metrics_api::ObserverResult r, void* state) { - auto* s = get_stats(); - opentelemetry::nostd::get< - opentelemetry::nostd::shared_ptr>>(r) - ->Observe(s->rss); - }); + rss_gauge = GaugeInstance("process", "resident_memory", {}, "Resident memory size", "bytes", false, + []() -> prometheus::ClientMetric { + auto* s = get_stats(); + prometheus::ClientMetric metric; + metric.gauge.value = static_cast(s->rss); + return metric; + }); - vms_gauge = - GaugeInstance("process", "virtual_memory", {}, "Virtual memory size", "bytes", false, - [](metrics_api::ObserverResult r, void* state) { - auto* s = get_stats(); - opentelemetry::nostd::get< - opentelemetry::nostd::shared_ptr>>(r) - ->Observe(s->vms); - }); + vms_gauge = GaugeInstance("process", "virtual_memory", {}, "Virtual memory size", "bytes", false, + []() -> prometheus::ClientMetric { + auto* s = get_stats(); + prometheus::ClientMetric metric; + metric.gauge.value = static_cast(s->vms); + return metric; + }); cpu_gauge = GaugeInstance("process", "cpu", {}, "Total user and system CPU time spent", "seconds", false, - [](metrics_api::ObserverResult r, void* state) { + []() -> prometheus::ClientMetric { auto* s = get_stats(); - opentelemetry::nostd::get< - opentelemetry::nostd::shared_ptr>>(r) - ->Observe(s->cpu); + prometheus::ClientMetric metric; + metric.gauge.value = s->cpu; + return metric; }); - fds_gauge = - GaugeInstance("process", "open_fds", {}, "Number of open file descriptors", "", false, - [](metrics_api::ObserverResult r, void* state) { - auto* s = get_stats(); - opentelemetry::nostd::get< - opentelemetry::nostd::shared_ptr>>(r) - ->Observe(s->fds); - }); -*/ + fds_gauge = GaugeInstance("process", "open_fds", {}, "Number of open file descriptors", "", false, + []() -> prometheus::ClientMetric { + auto* s = get_stats(); + prometheus::ClientMetric metric; + metric.gauge.value = static_cast(s->fds); + return metric; + }); #endif } diff --git a/src/telemetry/Manager.h b/src/telemetry/Manager.h index 0567def827..c235e74a43 100644 --- a/src/telemetry/Manager.h +++ b/src/telemetry/Manager.h @@ -122,10 +122,11 @@ public: template std::shared_ptr> CounterInstance(std::string_view prefix, std::string_view name, Span labels, std::string_view helptext, - std::string_view unit = "", bool is_sum = false) { + std::string_view unit = "", bool is_sum = false, + prometheus::CollectCallbackPtr callback = nullptr) { return WithLabelNames(labels, [&, this](auto labelNames) { auto family = CounterFamily(prefix, name, labelNames, helptext, unit, is_sum); - return family->GetOrAdd(labels); + return family->GetOrAdd(labels, callback); }); } @@ -134,9 +135,10 @@ public: std::shared_ptr> CounterInstance(std::string_view prefix, std::string_view name, std::initializer_list labels, std::string_view helptext, std::string_view unit = "", - bool is_sum = false) { + bool is_sum = false, + prometheus::CollectCallbackPtr callback = nullptr) { auto lbl_span = Span{labels.begin(), labels.size()}; - return CounterInstance(prefix, name, lbl_span, helptext, unit, is_sum); + return CounterInstance(prefix, name, lbl_span, helptext, unit, is_sum, callback); } /** @@ -199,10 +201,11 @@ public: template std::shared_ptr> GaugeInstance(std::string_view prefix, std::string_view name, Span labels, std::string_view helptext, - std::string_view unit = "", bool is_sum = false) { + std::string_view unit = "", bool is_sum = false, + prometheus::CollectCallbackPtr callback = nullptr) { return WithLabelNames(labels, [&, this](auto labelNames) { auto family = GaugeFamily(prefix, name, labelNames, helptext, unit, is_sum); - return family->GetOrAdd(labels); + return family->GetOrAdd(labels, callback); }); } @@ -210,9 +213,10 @@ public: template std::shared_ptr> GaugeInstance(std::string_view prefix, std::string_view name, std::initializer_list labels, std::string_view helptext, - std::string_view unit = "", bool is_sum = false) { + std::string_view unit = "", bool is_sum = false, + prometheus::CollectCallbackPtr callback = nullptr) { auto lbl_span = Span{labels.begin(), labels.size()}; - return GaugeInstance(prefix, name, lbl_span, helptext, unit, is_sum); + return GaugeInstance(prefix, name, lbl_span, helptext, unit, is_sum, callback); } // Forces the compiler to use the type `Span` instead of trying to diff --git a/src/telemetry/ProcessStats.cc b/src/telemetry/ProcessStats.cc index b5c3c48022..c0e9d322b7 100644 --- a/src/telemetry/ProcessStats.cc +++ b/src/telemetry/ProcessStats.cc @@ -173,32 +173,27 @@ process_stats get_process_stats() { process_stats result; struct kinfo_proc* kp = kinfo_getproc(getpid()); - result.vms = kp->ki_size; - result.rss = kp->ki_rssize * getpagesize(); - result.cpu = static_cast(kp->ki_runtime) / 1000000.0; + if ( kp ) { + result.vms = kp->ki_size; + result.rss = kp->ki_rssize * getpagesize(); + result.cpu = static_cast(kp->ki_runtime) / 1000000.0; - struct procstat* procstat = procstat_open_sysctl(); - struct filestat_list* files = procstat_getfiles(procstat, kp, 0); - struct filestat* file = nullptr; + struct procstat* procstat = procstat_open_sysctl(); + struct filestat_list* files = procstat_getfiles(procstat, kp, 0); + struct filestat* file = nullptr; - // Use one of the looping methods from sys/queue.h instead of - // implementing this by hand. - STAILQ_FOREACH(file, files, next) - result.fds++; + // Use one of the looping methods from sys/queue.h instead of + // implementing this by hand. + STAILQ_FOREACH(file, files, next) + result.fds++; - procstat_freeprocs(procstat, kp); - procstat_close(procstat); + procstat_freeprocs(procstat, kp); + procstat_close(procstat); + } return result; } -#else - -process_stats get_process_stats() { - process_stats result = {0}; - return result; -} - } // namespace zeek::telemetry::detail #endif