diff --git a/CMakeLists.txt b/CMakeLists.txt index 3669d8358b..60de8b05fc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -236,7 +236,7 @@ if (ZEEK_STANDALONE) set(zeek_exe_access PRIVATE) if (${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD") - target_link_libraries(zeek_exe PRIVATE util) + target_link_libraries(zeek_exe PRIVATE /usr/lib/libutil.so) target_link_libraries(zeek_exe PRIVATE procstat) endif () else () @@ -255,8 +255,8 @@ if (TARGET zeek_lib) set(zeek_lib_access PRIVATE) if (${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD") - target_link_libraries(zeek_exe PRIVATE util) - target_link_libraries(zeek_exe PRIVATE procstat) + target_link_libraries(zeek_lib PRIVATE /usr/lib/libutil.so) + target_link_libraries(zeek_lib PRIVATE procstat) endif () endif () diff --git a/scripts/base/init-bare.zeek b/scripts/base/init-bare.zeek index 32626832bb..d77493032c 100644 --- a/scripts/base/init-bare.zeek +++ b/scripts/base/init-bare.zeek @@ -5790,11 +5790,17 @@ export { ## for any metrics involving process state (CPU, memory, etc). prefix: string; - ## The human-readable name of the metric. + ## The human-readable name of the metric. This is set to the + ## full prefixed name including the unit when returned from + ## :zeek:see:`Telemetry::collect_metrics` or + ## :zeek:see:`Telemetry::collect_histogram_metrics`. name: string; - ## The unit of the metric. Set to a blank string if this is a unit-less metric. - unit: string; + ## The unit of the metric. Leave this unset for a unit-less + ## metric. Will be unset when returned from + ## :zeek:see:`Telemetry::collect_metrics` or + ## :zeek:see:`Telemetry::collect_histogram_metrics`. + unit: string &optional; ## Documentation for this metric. help_text: string &optional; diff --git a/scripts/policy/misc/stats.zeek b/scripts/policy/misc/stats.zeek index 4562f91a93..cf5bb1cbd0 100644 --- a/scripts/policy/misc/stats.zeek +++ b/scripts/policy/misc/stats.zeek @@ -91,35 +91,35 @@ export { global bytes_received_cf = Telemetry::register_counter_family([ $prefix="zeek", $name="net-received-bytes", - $unit="1", + $unit="", $help_text="Total number of bytes received", ]); global packets_received_cf = Telemetry::register_counter_family([ $prefix="zeek", $name="net-received-packets", - $unit="1", + $unit="", $help_text="Total number of packets received", ]); global packets_dropped_cf = Telemetry::register_counter_family([ $prefix="zeek", $name="net-dropped-packets", - $unit="1", + $unit="", $help_text="Total number of packets dropped", ]); global link_packets_cf = Telemetry::register_counter_family([ $prefix="zeek", $name="net-link-packets", - $unit="1", + $unit="", $help_text="Total number of packets on the packet source link before filtering", ]); global packets_filtered_cf = Telemetry::register_counter_family([ $prefix="zeek", $name="net-filtered-packets", - $unit="1", + $unit="", $help_text="Total number of packets filtered", ]); diff --git a/src/EventHandler.cc b/src/EventHandler.cc index 46be3fb7c3..9e5066189a 100644 --- a/src/EventHandler.cc +++ b/src/EventHandler.cc @@ -47,7 +47,7 @@ void EventHandler::Call(Args* vl, bool no_remote, double ts) { if ( ! call_count ) { static auto eh_invocations_family = telemetry_mgr->CounterFamily("zeek", "event-handler-invocations", {"name"}, - "Number of times the given event handler was called", "1", true); + "Number of times the given event handler was called", "", true); call_count = eh_invocations_family->GetOrAdd({{"name", name}}); } diff --git a/src/logging/Manager.cc b/src/logging/Manager.cc index a3758336bc..515ba829ce 100644 --- a/src/logging/Manager.cc +++ b/src/logging/Manager.cc @@ -419,13 +419,13 @@ Manager::Manager() : plugin::ComponentManager("Log", "Writer"), total_log_stream_writes_family(telemetry_mgr->CounterFamily("zeek", "log-stream-writes", {"module", "stream"}, "Total number of log writes for the given stream.", - "1", true)), + "", true)), total_log_writer_writes_family( telemetry_mgr ->CounterFamily("zeek", "log-writer-writes", {"writer", "module", "stream", "filter-name", "path"}, "Total number of log writes passed to a concrete log writer not vetoed by stream or " "filter policies.", - "1", true)) { + "", true)) { rotations_pending = 0; } diff --git a/src/session/Manager.cc b/src/session/Manager.cc index 9aead4db4f..f6ec7c6275 100644 --- a/src/session/Manager.cc +++ b/src/session/Manager.cc @@ -48,7 +48,7 @@ public: auto active_family = telemetry_mgr->GaugeFamily("zeek", "active-sessions", {"protocol"}, "Active Zeek Sessions"); auto total_family = - telemetry_mgr->CounterFamily("zeek", "total-sessions", {"protocol"}, "Total number of sessions", "1", true); + telemetry_mgr->CounterFamily("zeek", "total-sessions", {"protocol"}, "Total number of sessions", "", true); auto [it, inserted] = entries.insert({protocol, Protocol{active_family, total_family, protocol}}); diff --git a/src/telemetry/CMakeLists.txt b/src/telemetry/CMakeLists.txt index cb6e1b73f9..92983f2930 100644 --- a/src/telemetry/CMakeLists.txt +++ b/src/telemetry/CMakeLists.txt @@ -2,9 +2,9 @@ zeek_add_subdir_library( telemetry SOURCES Manager.cc - MetricFamily.cc Opaques.cc ProcessStats.cc + Utils.cc BIFS telemetry.bif) diff --git a/src/telemetry/Counter.h b/src/telemetry/Counter.h index d011ac60c3..8c0f8aae4d 100644 --- a/src/telemetry/Counter.h +++ b/src/telemetry/Counter.h @@ -4,10 +4,11 @@ #include #include -#include +#include #include "zeek/Span.h" #include "zeek/telemetry/MetricFamily.h" +#include "zeek/telemetry/Utils.h" #include "zeek/telemetry/telemetry.bif.h" #include "prometheus/counter.h" @@ -30,7 +31,7 @@ public: * Increments the value by @p amount. * @pre `amount >= 0` */ - void Inc(BaseType amount) noexcept { handle.Increment(amount); } + void Inc(BaseType amount) noexcept { handle->Increment(amount); } /** * Increments the value by 1. @@ -44,7 +45,7 @@ public: BaseType Value() const noexcept { // Use Collect() here instead of Value() to correctly handle metrics with // callbacks. - auto metric = handle.Collect(); + auto metric = handle->Collect(); return static_cast(metric.counter.value); } @@ -54,25 +55,24 @@ public: bool CompareLabels(const prometheus::Labels& lbls) const { return labels == lbls; } protected: - explicit BaseCounter(FamilyType& family, const prometheus::Labels& labels, + explicit BaseCounter(FamilyType* family, const prometheus::Labels& labels, prometheus::CollectCallbackPtr callback = nullptr) noexcept - : handle(family.Add(labels)), labels(labels) { + : handle(&(family->Add(labels))), labels(labels) { if ( callback ) - handle.AddCollectCallback(callback); + handle->AddCollectCallback(callback); } - Handle& handle; + Handle* handle; prometheus::Labels labels; - BaseType last_value = 0; }; /** * A handle to a metric that represents an integer value that can only go up. */ -class IntCounter : public BaseCounter { +class IntCounter final : public BaseCounter { public: static inline const char* OpaqueName = "IntCounterMetricVal"; - explicit IntCounter(FamilyType& family, const prometheus::Labels& labels, + explicit IntCounter(FamilyType* family, const prometheus::Labels& labels, prometheus::CollectCallbackPtr callback = nullptr) noexcept : BaseCounter(family, labels, callback) {} }; @@ -80,10 +80,10 @@ public: /** * A handle to a metric that represents a double value that can only go up. */ -class DblCounter : public BaseCounter { +class DblCounter final : public BaseCounter { public: static inline const char* OpaqueName = "DblCounterMetricVal"; - explicit DblCounter(FamilyType& family, const prometheus::Labels& labels, + explicit DblCounter(FamilyType* family, const prometheus::Labels& labels, prometheus::CollectCallbackPtr callback = nullptr) noexcept : BaseCounter(family, labels, callback) {} }; @@ -92,19 +92,13 @@ template class BaseCounterFamily : public MetricFamily, public std::enable_shared_from_this> { public: - BaseCounterFamily(std::string_view prefix, std::string_view name, Span labels, - std::string_view helptext, std::shared_ptr registry, - std::string_view unit = "", bool is_sum = false) - : MetricFamily(prefix, name, labels, helptext, unit, is_sum), - family(prometheus::BuildCounter().Name(full_name).Help(std::string{helptext}).Register(*registry)) {} - /** * Returns the metrics handle for given labels, creating a new instance * lazily if necessary. */ std::shared_ptr GetOrAdd(Span labels, prometheus::CollectCallbackPtr callback = nullptr) { - prometheus::Labels p_labels = BuildPrometheusLabels(labels); + prometheus::Labels p_labels = detail::BuildPrometheusLabels(labels); auto check = [&](const std::shared_ptr& counter) { return counter->CompareLabels(p_labels); }; @@ -124,63 +118,23 @@ public: return GetOrAdd(Span{labels.begin(), labels.size()}, callback); } - std::vector>& GetAllCounters() { return counters; } - - std::vector Collect() const override { - static auto string_vec_type = zeek::id::find_type("string_vec"); - static auto metric_record_type = zeek::id::find_type("Telemetry::Metric"); - static auto opts_idx = metric_record_type->FieldOffset("opts"); - static auto labels_idx = metric_record_type->FieldOffset("labels"); - static auto value_idx = metric_record_type->FieldOffset("value"); - static auto count_value_idx = metric_record_type->FieldOffset("count_value"); - - RecordValPtr opts_record = GetMetricOptsRecord(); - - std::vector records; - for ( const auto& ctr : counters ) { - auto label_values_vec = make_intrusive(string_vec_type); - for ( const auto& [label_key, label] : ctr->Labels() ) { - // We don't include the endpoint key/value unless it's a prometheus request - if ( label_key != "endpoint" ) - label_values_vec->Append(make_intrusive(label)); - } - - auto r = make_intrusive(metric_record_type); - r->Assign(labels_idx, label_values_vec); - r->Assign(opts_idx, opts_record); - - if constexpr ( std::is_same_v ) - r->Assign(value_idx, zeek::make_intrusive(ctr->Value())); - else { - r->Assign(value_idx, zeek::make_intrusive(static_cast(ctr->Value()))); - r->Assign(count_value_idx, val_mgr->Count(ctr->Value())); - } - - records.push_back(std::move(r)); - } - - return records; - } - protected: - prometheus::Family& family; + BaseCounterFamily(prometheus::Family* family, Span labels) + : MetricFamily(labels), family(family) {} + + prometheus::Family* family; std::vector> counters; }; /** * Manages a collection of IntCounter metrics. */ -class IntCounterFamily : public BaseCounterFamily { +class IntCounterFamily final : public BaseCounterFamily { public: static inline const char* OpaqueName = "IntCounterMetricFamilyVal"; - explicit IntCounterFamily(std::string_view prefix, std::string_view name, Span labels, - std::string_view helptext, std::shared_ptr registry, - std::string_view unit = "", bool is_sum = false) - : BaseCounterFamily(prefix, name, labels, helptext, std::move(registry), unit, is_sum) {} - - IntCounterFamily(const IntCounterFamily&) noexcept = default; - IntCounterFamily& operator=(const IntCounterFamily&) noexcept = delete; + explicit IntCounterFamily(prometheus::Family* family, Span labels) + : BaseCounterFamily(family, labels) {} zeek_int_t MetricType() const noexcept override { return BifEnum::Telemetry::MetricType::INT_COUNTER; } }; @@ -188,17 +142,12 @@ public: /** * Manages a collection of DblCounter metrics. */ -class DblCounterFamily : public BaseCounterFamily { +class DblCounterFamily final : public BaseCounterFamily { public: static inline const char* OpaqueName = "DblCounterMetricFamilyVal"; - explicit DblCounterFamily(std::string_view prefix, std::string_view name, Span labels, - std::string_view helptext, std::shared_ptr registry, - std::string_view unit = "", bool is_sum = false) - : BaseCounterFamily(prefix, name, labels, helptext, std::move(registry), unit, is_sum) {} - - DblCounterFamily(const DblCounterFamily&) noexcept = default; - DblCounterFamily& operator=(const DblCounterFamily&) noexcept = delete; + explicit DblCounterFamily(prometheus::Family* family, Span labels) + : BaseCounterFamily(family, labels) {} zeek_int_t MetricType() const noexcept override { return BifEnum::Telemetry::MetricType::DOUBLE_COUNTER; } }; diff --git a/src/telemetry/Gauge.h b/src/telemetry/Gauge.h index 7fecb3a3be..dfe2904161 100644 --- a/src/telemetry/Gauge.h +++ b/src/telemetry/Gauge.h @@ -4,10 +4,11 @@ #include #include -#include +#include #include "zeek/Span.h" #include "zeek/telemetry/MetricFamily.h" +#include "zeek/telemetry/Utils.h" #include "zeek/telemetry/telemetry.bif.h" #include "prometheus/family.h" @@ -77,27 +78,26 @@ public: bool CompareLabels(const prometheus::Labels& lbls) const { return labels == lbls; } protected: - explicit BaseGauge(FamilyType& family, const prometheus::Labels& labels, + explicit BaseGauge(FamilyType* family, const prometheus::Labels& labels, prometheus::CollectCallbackPtr callback = nullptr) noexcept - : handle(family.Add(labels)), labels(labels) { + : handle(family->Add(labels)), labels(labels) { if ( callback ) handle.AddCollectCallback(callback); } Handle& handle; prometheus::Labels labels; - BaseType last_value = 0; }; /** * A handle to a metric that represents an integer value. Gauges are more * permissive than counters and also allow decrementing the value. */ -class IntGauge : public BaseGauge { +class IntGauge final : public BaseGauge { public: static inline const char* OpaqueName = "IntGaugeMetricVal"; - explicit IntGauge(FamilyType& family, const prometheus::Labels& labels, + explicit IntGauge(FamilyType* family, const prometheus::Labels& labels, prometheus::CollectCallbackPtr callback = nullptr) noexcept : BaseGauge(family, labels, callback) {} @@ -109,11 +109,11 @@ public: * A handle to a metric that represents a double value. Gauges are more * permissive than counters and also allow decrementing the value. */ -class DblGauge : public BaseGauge { +class DblGauge final : public BaseGauge { public: static inline const char* OpaqueName = "DblGaugeMetricVal"; - explicit DblGauge(FamilyType& family, const prometheus::Labels& labels, + explicit DblGauge(FamilyType* family, const prometheus::Labels& labels, prometheus::CollectCallbackPtr callback = nullptr) noexcept : BaseGauge(family, labels, callback) {} @@ -124,19 +124,13 @@ public: template class BaseGaugeFamily : public MetricFamily, public std::enable_shared_from_this> { public: - BaseGaugeFamily(std::string_view prefix, std::string_view name, Span labels, - std::string_view helptext, std::shared_ptr registry, - std::string_view unit = "", bool is_sum = false) - : MetricFamily(prefix, name, labels, helptext, unit, is_sum), - family(prometheus::BuildGauge().Name(full_name).Help(std::string{helptext}).Register(*registry)) {} - /** * Returns the metrics handle for given labels, creating a new instance * lazily if necessary. */ std::shared_ptr GetOrAdd(Span labels, prometheus::CollectCallbackPtr callback = nullptr) { - prometheus::Labels p_labels = BuildPrometheusLabels(labels); + prometheus::Labels p_labels = detail::BuildPrometheusLabels(labels); auto check = [&](const std::shared_ptr& gauge) { return gauge->CompareLabels(p_labels); }; @@ -156,63 +150,23 @@ public: return GetOrAdd(Span{labels.begin(), labels.size()}, callback); } - std::vector>& GetAllGauges() { return gauges; } - - std::vector Collect() const override { - static auto string_vec_type = zeek::id::find_type("string_vec"); - static auto metric_record_type = zeek::id::find_type("Telemetry::Metric"); - static auto opts_idx = metric_record_type->FieldOffset("opts"); - static auto labels_idx = metric_record_type->FieldOffset("labels"); - static auto value_idx = metric_record_type->FieldOffset("value"); - static auto count_value_idx = metric_record_type->FieldOffset("count_value"); - - RecordValPtr opts_record = GetMetricOptsRecord(); - - std::vector records; - for ( const auto& g : gauges ) { - auto label_values_vec = make_intrusive(string_vec_type); - for ( const auto& [label_key, label] : g->Labels() ) { - // We don't include the endpoint key/value unless it's a prometheus request - if ( label_key != "endpoint" ) - label_values_vec->Append(make_intrusive(label)); - } - - auto r = make_intrusive(metric_record_type); - r->Assign(labels_idx, label_values_vec); - r->Assign(opts_idx, opts_record); - - if constexpr ( std::is_same_v ) - r->Assign(value_idx, zeek::make_intrusive(g->Value())); - else { - r->Assign(value_idx, zeek::make_intrusive(static_cast(g->Value()))); - r->Assign(count_value_idx, val_mgr->Count(g->Value())); - } - - records.push_back(std::move(r)); - } - - return records; - } - protected: - prometheus::Family& family; + BaseGaugeFamily(prometheus::Family* family, Span labels) + : MetricFamily(labels), family(family) {} + + prometheus::Family* family; std::vector> gauges; }; /** * Manages a collection of IntGauge metrics. */ -class IntGaugeFamily : public BaseGaugeFamily { +class IntGaugeFamily final : public BaseGaugeFamily { public: static inline const char* OpaqueName = "IntGaugeMetricFamilyVal"; - IntGaugeFamily(std::string_view prefix, std::string_view name, Span labels, - std::string_view helptext, std::shared_ptr registry, - std::string_view unit = "", bool is_sum = false) - : BaseGaugeFamily(prefix, name, labels, helptext, std::move(registry), unit, is_sum) {} - - IntGaugeFamily(const IntGaugeFamily&) noexcept = default; - IntGaugeFamily& operator=(const IntGaugeFamily&) noexcept = delete; + explicit IntGaugeFamily(prometheus::Family* family, Span labels) + : BaseGaugeFamily(family, labels) {} zeek_int_t MetricType() const noexcept override { return BifEnum::Telemetry::MetricType::INT_GAUGE; } }; @@ -220,17 +174,12 @@ public: /** * Manages a collection of DblGauge metrics. */ -class DblGaugeFamily : public BaseGaugeFamily { +class DblGaugeFamily final : public BaseGaugeFamily { public: static inline const char* OpaqueName = "DblGaugeMetricFamilyVal"; - DblGaugeFamily(std::string_view prefix, std::string_view name, Span labels, - std::string_view helptext, std::shared_ptr registry, - std::string_view unit = "", bool is_sum = false) - : BaseGaugeFamily(prefix, name, labels, helptext, std::move(registry), unit, is_sum) {} - - DblGaugeFamily(const DblGaugeFamily&) noexcept = default; - DblGaugeFamily& operator=(const DblGaugeFamily&) noexcept = delete; + explicit DblGaugeFamily(prometheus::Family* family, Span labels) + : BaseGaugeFamily(family, labels) {} zeek_int_t MetricType() const noexcept override { return BifEnum::Telemetry::MetricType::DOUBLE_GAUGE; } }; @@ -240,7 +189,6 @@ namespace detail { template struct GaugeOracle { static_assert(std::is_same::value, "Gauge only supports int64_t and double"); - using type = IntGauge; }; diff --git a/src/telemetry/Histogram.h b/src/telemetry/Histogram.h index efe9c0f154..eddd216a09 100644 --- a/src/telemetry/Histogram.h +++ b/src/telemetry/Histogram.h @@ -4,10 +4,11 @@ #include #include -#include +#include #include "zeek/Span.h" #include "zeek/telemetry/MetricFamily.h" +#include "zeek/telemetry/Utils.h" #include "zeek/telemetry/telemetry.bif.h" #include "prometheus/family.h" @@ -39,9 +40,9 @@ public: bool CompareLabels(const prometheus::Labels& lbls) const { return labels == lbls; } protected: - explicit BaseHistogram(FamilyType& family, const prometheus::Labels& labels, + explicit BaseHistogram(FamilyType* family, const prometheus::Labels& labels, prometheus::Histogram::BucketBoundaries bounds) noexcept - : handle(family.Add(labels, std::move(bounds))), labels(labels) {} + : handle(family->Add(labels, std::move(bounds))), labels(labels) {} Handle& handle; prometheus::Labels labels; @@ -52,11 +53,11 @@ protected: * measurements with integer precision. Sorts individual measurements into * configurable buckets. */ -class IntHistogram : public BaseHistogram { +class IntHistogram final : public BaseHistogram { public: static inline const char* OpaqueName = "IntHistogramMetricVal"; - explicit IntHistogram(FamilyType& family, const prometheus::Labels& labels, + explicit IntHistogram(FamilyType* family, const prometheus::Labels& labels, prometheus::Histogram::BucketBoundaries bounds) noexcept : BaseHistogram(family, labels, std::move(bounds)) {} @@ -70,11 +71,11 @@ public: * measurements with integer precision. Sorts individual measurements into * configurable buckets. */ -class DblHistogram : public BaseHistogram { +class DblHistogram final : public BaseHistogram { public: static inline const char* OpaqueName = "DblHistogramMetricVal"; - explicit DblHistogram(FamilyType& family, const prometheus::Labels& labels, + explicit DblHistogram(FamilyType* family, const prometheus::Labels& labels, prometheus::Histogram::BucketBoundaries bounds) noexcept : BaseHistogram(family, labels, std::move(bounds)) {} @@ -92,7 +93,7 @@ public: * lazily if necessary. */ std::shared_ptr GetOrAdd(Span labels) { - prometheus::Labels p_labels = BuildPrometheusLabels(labels); + prometheus::Labels p_labels = detail::BuildPrometheusLabels(labels); auto check = [&](const std::shared_ptr& histo) { return histo->CompareLabels(p_labels); }; @@ -111,94 +112,14 @@ public: return GetOrAdd(Span{labels.begin(), labels.size()}); } - std::vector Collect() const override { - static auto string_vec_type = zeek::id::find_type("string_vec"); - static auto double_vec_type = zeek::id::find_type("double_vec"); - static auto count_vec_type = zeek::id::find_type("index_vec"); - static auto histogram_metric_type = zeek::id::find_type("Telemetry::HistogramMetric"); - static auto labels_idx = histogram_metric_type->FieldOffset("labels"); - static auto values_idx = histogram_metric_type->FieldOffset("values"); - static auto count_values_idx = histogram_metric_type->FieldOffset("count_values"); - - static auto observations_idx = histogram_metric_type->FieldOffset("observations"); - static auto count_observations_idx = histogram_metric_type->FieldOffset("count_observations"); - - static auto sum_idx = histogram_metric_type->FieldOffset("sum"); - static auto count_sum_idx = histogram_metric_type->FieldOffset("count_sum"); - - static auto opts_idx = histogram_metric_type->FieldOffset("opts"); - static auto opts_rt = zeek::id::find_type("Telemetry::MetricOpts"); - static auto bounds_idx = opts_rt->FieldOffset("bounds"); - static auto count_bounds_idx = opts_rt->FieldOffset("count_bounds"); - - RecordValPtr opts_record = GetMetricOptsRecord(); - - std::vector records; - for ( const auto& h : histograms ) { - auto label_values_vec = make_intrusive(string_vec_type); - for ( const auto& [label_key, label] : h->Labels() ) { - // We don't include the endpoint key/value unless it's a prometheus request - if ( label_key != "endpoint" ) - label_values_vec->Append(make_intrusive(label)); - } - - auto r = make_intrusive(histogram_metric_type); - r->Assign(labels_idx, label_values_vec); - r->Assign(opts_idx, opts_record); - - auto histo_data = h->Collect(); - - auto counts_double_vec = make_intrusive(double_vec_type); - auto counts_count_vec = make_intrusive(count_vec_type); - uint64_t last = 0.0; - for ( const auto& b : histo_data.bucket ) { - counts_double_vec->Append( - zeek::make_intrusive(static_cast(b.cumulative_count - last))); - counts_count_vec->Append(val_mgr->Count(b.cumulative_count - last)); - last = b.cumulative_count; - } - - // TODO: these could be generated at creation time instead of repeatedly here - auto bounds_vec = make_intrusive(double_vec_type); - auto count_bounds_vec = make_intrusive(count_vec_type); - for ( auto b : boundaries ) { - bounds_vec->Append(zeek::make_intrusive(b)); - count_bounds_vec->Append(val_mgr->Count(static_cast(b))); - } - - bounds_vec->Append(zeek::make_intrusive(std::numeric_limits::infinity())); - count_bounds_vec->Append(val_mgr->Count(std::numeric_limits::infinity())); - - r->Assign(values_idx, counts_double_vec); - r->Assign(observations_idx, zeek::make_intrusive(static_cast(histo_data.sample_count))); - r->Assign(sum_idx, zeek::make_intrusive(histo_data.sample_sum)); - - RecordValPtr local_opts_record = r->GetField(opts_idx); - local_opts_record->Assign(bounds_idx, bounds_vec); - - if constexpr ( ! std::is_same_v ) { - r->Assign(count_values_idx, counts_count_vec); - r->Assign(count_observations_idx, val_mgr->Count(histo_data.sample_count)); - r->Assign(count_sum_idx, val_mgr->Count(static_cast(histo_data.sample_sum))); - r->Assign(count_bounds_idx, count_bounds_vec); - } - - records.push_back(std::move(r)); - } - - return records; - } - protected: - BaseHistogramFamily(std::string_view prefix, std::string_view name, Span labels, - Span default_upper_bounds, std::string_view helptext, - std::shared_ptr registry, std::string_view unit = "") - : MetricFamily(prefix, name, labels, helptext, unit, false), - family(prometheus::BuildHistogram().Name(full_name).Help(std::string{helptext}).Register(*registry)) { - std::copy(default_upper_bounds.begin(), default_upper_bounds.end(), std::back_inserter(boundaries)); + BaseHistogramFamily(prometheus::Family* family, Span bounds, + Span labels) + : MetricFamily(labels), family(family) { + std::copy(bounds.begin(), bounds.end(), std::back_inserter(boundaries)); } - prometheus::Family& family; + prometheus::Family* family; prometheus::Histogram::BucketBoundaries boundaries; std::vector> histograms; }; @@ -206,17 +127,13 @@ protected: /** * Manages a collection of IntHistogram metrics. */ -class IntHistogramFamily : public BaseHistogramFamily { +class IntHistogramFamily final : public BaseHistogramFamily { public: static inline const char* OpaqueName = "IntHistogramMetricFamilyVal"; - IntHistogramFamily(std::string_view prefix, std::string_view name, Span labels, - Span default_upper_bounds, std::string_view helptext, - std::shared_ptr registry, std::string_view unit = "") - : BaseHistogramFamily(prefix, name, labels, default_upper_bounds, helptext, std::move(registry), unit) {} - - IntHistogramFamily(const IntHistogramFamily&) noexcept = delete; - IntHistogramFamily& operator=(const IntHistogramFamily&) noexcept = delete; + explicit IntHistogramFamily(prometheus::Family* family, Span bounds, + Span labels) + : BaseHistogramFamily(family, bounds, labels) {} zeek_int_t MetricType() const noexcept override { return BifEnum::Telemetry::MetricType::INT_HISTOGRAM; } }; @@ -224,17 +141,13 @@ public: /** * Manages a collection of DblHistogram metrics. */ -class DblHistogramFamily : public BaseHistogramFamily { +class DblHistogramFamily final : public BaseHistogramFamily { public: static inline const char* OpaqueName = "DblHistogramMetricFamilyVal"; - DblHistogramFamily(std::string_view prefix, std::string_view name, Span labels, - Span default_upper_bounds, std::string_view helptext, - std::shared_ptr registry, std::string_view unit = "") - : BaseHistogramFamily(prefix, name, labels, default_upper_bounds, helptext, std::move(registry), unit) {} - - DblHistogramFamily(const DblHistogramFamily&) noexcept = delete; - DblHistogramFamily& operator=(const DblHistogramFamily&) noexcept = delete; + explicit DblHistogramFamily(prometheus::Family* family, Span bounds, + Span labels) + : BaseHistogramFamily(family, bounds, labels) {} zeek_int_t MetricType() const noexcept override { return BifEnum::Telemetry::MetricType::DOUBLE_HISTOGRAM; } }; diff --git a/src/telemetry/Manager.cc b/src/telemetry/Manager.cc index 9c242e2f96..cfe5ee56ff 100644 --- a/src/telemetry/Manager.cc +++ b/src/telemetry/Manager.cc @@ -119,33 +119,141 @@ void Manager::InitPostScript() { #endif } -std::shared_ptr Manager::LookupFamily(std::string_view prefix, std::string_view name) const { - auto check = [&](const auto& fam) { return fam.second->Prefix() == prefix && fam.second->Name() == name; }; +// -- collect metric stuff ----------------------------------------------------- - if ( auto it = std::find_if(families.begin(), families.end(), check); it != families.end() ) +RecordValPtr Manager::GetMetricOptsRecord(const prometheus::MetricFamily& metric_family) { + // Avoid recreating this repeatedly + if ( auto it = opts_records.find(metric_family.name); it != opts_records.end() ) return it->second; - return nullptr; -} + // Get the opt record + static auto string_vec_type = zeek::id::find_type("string_vec"); + static auto metric_opts_type = zeek::id::find_type("Telemetry::MetricOpts"); -// -- collect metric stuff ----------------------------------------------------- + static auto prefix_idx = metric_opts_type->FieldOffset("prefix"); + static auto name_idx = metric_opts_type->FieldOffset("name"); + static auto help_text_idx = metric_opts_type->FieldOffset("help_text"); + static auto unit_idx = metric_opts_type->FieldOffset("unit"); + static auto labels_idx = metric_opts_type->FieldOffset("labels"); + static auto is_total_idx = metric_opts_type->FieldOffset("is_total"); + static auto metric_type_idx = metric_opts_type->FieldOffset("metric_type"); + + auto record_val = make_intrusive(metric_opts_type); + record_val->Assign(name_idx, make_intrusive(metric_family.name)); + record_val->Assign(help_text_idx, make_intrusive(metric_family.help)); + + // prometheus-cpp doesn't store the prefix information separately. we pull the word + // before the first underscore as the prefix instead. The Prometheus docs state + // that the prefix "should exist" not "must exist" so it's possible this could result + // in incorrect data, but it should be correct for all of our uses. + std::string prefix; + auto first_underscore = metric_family.name.find('_'); + if ( first_underscore != std::string::npos ) + prefix = metric_family.name.substr(0, first_underscore); + + record_val->Assign(prefix_idx, make_intrusive(prefix)); + + // Assume that a metric ending with _total is always a summed metric so we can set that. + record_val->Assign(is_total_idx, val_mgr->Bool(util::ends_with(metric_family.name, "_total"))); + + auto label_names_vec = make_intrusive(string_vec_type); + + // Check if this is a Zeek-internal metric. We keep a little more information about a metric + // for these than we do for ones that were inserted into prom-cpp directly. + if ( auto it = families.find(metric_family.name); it != families.end() ) { + record_val->Assign(metric_type_idx, + zeek::BifType::Enum::Telemetry::MetricType->GetEnumVal(it->second->MetricType())); + + for ( const auto& lbl : it->second->LabelNames() ) + label_names_vec->Append(make_intrusive(lbl)); + } + else { + // prom-cpp stores everything internally as doubles + if ( metric_family.type == prometheus::MetricType::Counter ) + record_val->Assign(metric_type_idx, zeek::BifType::Enum::Telemetry::MetricType->GetEnumVal( + BifEnum::Telemetry::MetricType::DOUBLE_COUNTER)); + if ( metric_family.type == prometheus::MetricType::Gauge ) + record_val->Assign(metric_type_idx, zeek::BifType::Enum::Telemetry::MetricType->GetEnumVal( + BifEnum::Telemetry::MetricType::DOUBLE_GAUGE)); + if ( metric_family.type == prometheus::MetricType::Histogram ) + record_val->Assign(metric_type_idx, zeek::BifType::Enum::Telemetry::MetricType->GetEnumVal( + BifEnum::Telemetry::MetricType::DOUBLE_HISTOGRAM)); + + // prom-cpp doesn't store label names anywhere other than in each instrument. just assume + // they're always going to be the same across all of the instruments and use the names from + // the first one. + // TODO: is this check here ever false? + if ( ! metric_family.metric.empty() ) + for ( const auto& lbl : metric_family.metric[0].label ) + label_names_vec->Append(make_intrusive(lbl.name)); + } + + record_val->Assign(labels_idx, label_names_vec); + + opts_records.insert({metric_family.name, record_val}); + + return record_val; +} ValPtr Manager::CollectMetrics(std::string_view prefix_pattern, std::string_view name_pattern) { static auto metrics_vector_type = zeek::id::find_type("any_vec"); + static auto string_vec_type = zeek::id::find_type("string_vec"); + static auto metric_record_type = zeek::id::find_type("Telemetry::Metric"); + static auto opts_idx = metric_record_type->FieldOffset("opts"); + static auto labels_idx = metric_record_type->FieldOffset("labels"); + static auto value_idx = metric_record_type->FieldOffset("value"); + static auto count_value_idx = metric_record_type->FieldOffset("count_value"); + + static auto metric_opts_type = zeek::id::find_type("Telemetry::MetricOpts"); + static auto metric_type_idx = metric_opts_type->FieldOffset("metric_type"); + VectorValPtr ret_val = make_intrusive(metrics_vector_type); - // Build a map of all of the families that match the patterns based on their full prefixed - // name. This will let us match those families against the items returned from the otel reader. - for ( const auto& [name, family] : families ) { - // Histograms are handled by CollectHistogramMetrics and should be ignored here. - if ( family->MetricType() == BifEnum::Telemetry::MetricType::INT_HISTOGRAM || - family->MetricType() == BifEnum::Telemetry::MetricType::DOUBLE_HISTOGRAM ) + // Due to the name containing the full information about a metric including a potential unit add an + // asterisk to the end of the full pattern so matches work correctly. + std::string full_pattern = util::fmt("%s_%s", prefix_pattern.data(), name_pattern.data()); + if ( full_pattern[full_pattern.size() - 1] != '*' ) + full_pattern.append("*"); + + auto collected = prometheus_registry->Collect(); + for ( const auto& fam : collected ) { + if ( fam.type == prometheus::MetricType::Histogram ) continue; - if ( family->Matches(prefix_pattern, name_pattern) ) { - auto records = family->Collect(); - for ( const auto& r : records ) - ret_val->Append(r); + if ( fnmatch(full_pattern.c_str(), fam.name.c_str(), 0) == FNM_NOMATCH ) + continue; + + RecordValPtr opts_record = GetMetricOptsRecord(fam); + + for ( const auto& inst : fam.metric ) { + auto label_values_vec = make_intrusive(string_vec_type); + for ( const auto& label : inst.label ) { + // We don't include the endpoint key/value unless it's a prometheus request + if ( label.name != "endpoint" ) + label_values_vec->Append(make_intrusive(label.value)); + } + + auto r = make_intrusive(metric_record_type); + r->Assign(labels_idx, label_values_vec); + r->Assign(opts_idx, opts_record); + + if ( fam.type == prometheus::MetricType::Counter ) + r->Assign(value_idx, zeek::make_intrusive(inst.counter.value)); + else if ( fam.type == prometheus::MetricType::Gauge ) + r->Assign(value_idx, zeek::make_intrusive(inst.gauge.value)); + + // Use the information from GetMetaricOptsRecord to check whether we need to add the integer + // fields, or if this is a double. + if ( opts_record->GetField(metric_type_idx)->Get() == + BifEnum::Telemetry::MetricType::INT_COUNTER ) { + r->Assign(count_value_idx, val_mgr->Count(static_cast(inst.counter.value))); + } + else if ( opts_record->GetField(metric_type_idx)->Get() == + BifEnum::Telemetry::MetricType::INT_GAUGE ) { + r->Assign(count_value_idx, val_mgr->Count(static_cast(inst.gauge.value))); + } + + ret_val->Append(r); } } @@ -154,19 +262,104 @@ ValPtr Manager::CollectMetrics(std::string_view prefix_pattern, std::string_view ValPtr Manager::CollectHistogramMetrics(std::string_view prefix_pattern, std::string_view name_pattern) { static auto metrics_vector_type = zeek::id::find_type("any_vec"); + static auto string_vec_type = zeek::id::find_type("string_vec"); + static auto double_vec_type = zeek::id::find_type("double_vec"); + static auto count_vec_type = zeek::id::find_type("index_vec"); + static auto histogram_metric_type = zeek::id::find_type("Telemetry::HistogramMetric"); + static auto labels_idx = histogram_metric_type->FieldOffset("labels"); + static auto values_idx = histogram_metric_type->FieldOffset("values"); + static auto count_values_idx = histogram_metric_type->FieldOffset("count_values"); + + static auto observations_idx = histogram_metric_type->FieldOffset("observations"); + static auto count_observations_idx = histogram_metric_type->FieldOffset("count_observations"); + + static auto sum_idx = histogram_metric_type->FieldOffset("sum"); + static auto count_sum_idx = histogram_metric_type->FieldOffset("count_sum"); + + static auto opts_idx = histogram_metric_type->FieldOffset("opts"); + static auto opts_rt = zeek::id::find_type("Telemetry::MetricOpts"); + static auto bounds_idx = opts_rt->FieldOffset("bounds"); + static auto count_bounds_idx = opts_rt->FieldOffset("count_bounds"); + + static auto metric_opts_type = zeek::id::find_type("Telemetry::MetricOpts"); + static auto metric_type_idx = metric_opts_type->FieldOffset("metric_type"); + VectorValPtr ret_val = make_intrusive(metrics_vector_type); - // Build a map of all of the families that match the patterns based on their full prefixed - // name. This will let us match those families against the items returned from the otel reader. - for ( const auto& [name, family] : families ) { - if ( family->MetricType() != BifEnum::Telemetry::MetricType::INT_HISTOGRAM && - family->MetricType() != BifEnum::Telemetry::MetricType::DOUBLE_HISTOGRAM ) + // Due to the name containing the full information about a metric including a potential unit add an + // asterisk to the end of the full pattern so matches work correctly. + std::string full_pattern = util::fmt("%s_%s", prefix_pattern.data(), name_pattern.data()); + if ( full_pattern[full_pattern.size() - 1] != '*' ) + full_pattern.append("*"); + + auto collected = prometheus_registry->Collect(); + for ( const auto& fam : collected ) { + if ( fam.type != prometheus::MetricType::Histogram ) continue; - if ( family->Matches(prefix_pattern, name_pattern) ) { - auto records = family->Collect(); - for ( const auto& r : records ) - ret_val->Append(r); + if ( fnmatch(full_pattern.c_str(), fam.name.c_str(), 0) == FNM_NOMATCH ) + continue; + + RecordValPtr opts_record = GetMetricOptsRecord(fam); + + for ( const auto& inst : fam.metric ) { + auto label_values_vec = make_intrusive(string_vec_type); + for ( const auto& label : inst.label ) { + // We don't include the endpoint key/value unless it's a prometheus request + if ( label.name != "endpoint" ) + label_values_vec->Append(make_intrusive(label.value)); + } + + auto r = make_intrusive(histogram_metric_type); + r->Assign(labels_idx, label_values_vec); + r->Assign(opts_idx, opts_record); + + auto double_values_vec = make_intrusive(double_vec_type); + auto count_values_vec = make_intrusive(count_vec_type); + std::vector boundaries; + uint64_t last = 0.0; + for ( const auto& b : inst.histogram.bucket ) { + double_values_vec->Append( + zeek::make_intrusive(static_cast(b.cumulative_count - last))); + count_values_vec->Append(val_mgr->Count(b.cumulative_count - last)); + last = b.cumulative_count; + boundaries.push_back(b.upper_bound); + } + + // TODO: these could be stored somehow to avoid recreating them repeatedly + auto bounds_vec = make_intrusive(double_vec_type); + auto count_bounds_vec = make_intrusive(count_vec_type); + for ( auto b : boundaries ) { + bounds_vec->Append(zeek::make_intrusive(b)); + + // The boundaries from prom-cpp include the infinite boundary in double. + // This can't be converted safely to int64_t, so check for that case and + // set the int64_t version. + if ( b != std::numeric_limits::infinity() ) + count_bounds_vec->Append(val_mgr->Count(static_cast(b))); + else + count_bounds_vec->Append(val_mgr->Count(std::numeric_limits::infinity())); + } + + r->Assign(values_idx, double_values_vec); + r->Assign(observations_idx, + zeek::make_intrusive(static_cast(inst.histogram.sample_count))); + r->Assign(sum_idx, zeek::make_intrusive(inst.histogram.sample_sum)); + + RecordValPtr local_opts_record = r->GetField(opts_idx); + local_opts_record->Assign(bounds_idx, bounds_vec); + + // Use the information from GetMetaricOptsRecord to check whether we need to add the integer + // fields, or if this is a double. + if ( opts_record->GetField(metric_type_idx)->Get() == + BifEnum::Telemetry::MetricType::INT_HISTOGRAM ) { + r->Assign(count_values_idx, count_values_vec); + r->Assign(count_observations_idx, val_mgr->Count(inst.histogram.sample_count)); + r->Assign(count_sum_idx, val_mgr->Count(static_cast(inst.histogram.sample_sum))); + local_opts_record->Assign(count_bounds_idx, count_bounds_vec); + } + + ret_val->Append(r); } } @@ -225,16 +418,8 @@ SCENARIO("telemetry managers provide access to counter families") { GIVEN("a telemetry manager") { Manager mgr; WHEN("retrieving an IntCounter family") { - auto family = mgr.CounterFamily("zeek", "requests", {"method"}, "test", "1", true); - THEN("the family object stores the parameters") { - CHECK_EQ(family->Prefix(), "zeek"sv); - CHECK_EQ(family->Name(), "requests"sv); - CHECK_EQ(toVector(family->LabelNames()), std::vector{"method"s}); - CHECK_EQ(family->Helptext(), "test"sv); - CHECK_EQ(family->Unit(), "1"sv); - CHECK_EQ(family->IsSum(), true); - } - AND_THEN("GetOrAdd returns the same metric for the same labels") { + auto family = mgr.CounterFamily("zeek", "requests", {"method"}, "test", "", true); + THEN("GetOrAdd returns the same metric for the same labels") { auto first = family->GetOrAdd({{"method", "get"}}); auto second = family->GetOrAdd({{"method", "get"}}); CHECK_EQ(first, second); @@ -247,15 +432,7 @@ SCENARIO("telemetry managers provide access to counter families") { } WHEN("retrieving a DblCounter family") { auto family = mgr.CounterFamily("zeek", "runtime", {"query"}, "test", "seconds", true); - THEN("the family object stores the parameters") { - CHECK_EQ(family->Prefix(), "zeek"sv); - CHECK_EQ(family->Name(), "runtime"sv); - CHECK_EQ(toVector(family->LabelNames()), std::vector{"query"s}); - CHECK_EQ(family->Helptext(), "test"sv); - CHECK_EQ(family->Unit(), "seconds"sv); - CHECK_EQ(family->IsSum(), true); - } - AND_THEN("GetOrAdd returns the same metric for the same labels") { + THEN("GetOrAdd returns the same metric for the same labels") { auto first = family->GetOrAdd({{"query", "foo"}}); auto second = family->GetOrAdd({{"query", "foo"}}); CHECK_EQ(first, second); @@ -273,16 +450,8 @@ SCENARIO("telemetry managers provide access to gauge families") { GIVEN("a telemetry manager") { Manager mgr; WHEN("retrieving an IntGauge family") { - auto family = mgr.GaugeFamily("zeek", "open-connections", {"protocol"}, "test", "1"); - THEN("the family object stores the parameters") { - CHECK_EQ(family->Prefix(), "zeek"sv); - CHECK_EQ(family->Name(), "open_connections"sv); - CHECK_EQ(toVector(family->LabelNames()), std::vector{"protocol"s}); - CHECK_EQ(family->Helptext(), "test"sv); - CHECK_EQ(family->Unit(), "1"sv); - CHECK_EQ(family->IsSum(), false); - } - AND_THEN("GetOrAdd returns the same metric for the same labels") { + auto family = mgr.GaugeFamily("zeek", "open-connections", {"protocol"}, "test", ""); + THEN("GetOrAdd returns the same metric for the same labels") { auto first = family->GetOrAdd({{"protocol", "tcp"}}); auto second = family->GetOrAdd({{"protocol", "tcp"}}); CHECK_EQ(first, second); @@ -295,15 +464,7 @@ SCENARIO("telemetry managers provide access to gauge families") { } WHEN("retrieving a DblGauge family") { auto family = mgr.GaugeFamily("zeek", "water-level", {"river"}, "test", "meters"); - THEN("the family object stores the parameters") { - CHECK_EQ(family->Prefix(), "zeek"sv); - CHECK_EQ(family->Name(), "water_level"sv); - CHECK_EQ(toVector(family->LabelNames()), std::vector{"river"s}); - CHECK_EQ(family->Helptext(), "test"sv); - CHECK_EQ(family->Unit(), "meters"sv); - CHECK_EQ(family->IsSum(), false); - } - AND_THEN("GetOrAdd returns the same metric for the same labels") { + THEN("GetOrAdd returns the same metric for the same labels") { auto first = family->GetOrAdd({{"river", "Sacramento"}}); auto second = family->GetOrAdd({{"river", "Sacramento"}}); CHECK_EQ(first, second); @@ -323,15 +484,7 @@ SCENARIO("telemetry managers provide access to histogram families") { WHEN("retrieving an IntHistogram family") { int64_t buckets[] = {10, 20}; auto family = mgr.HistogramFamily("zeek", "payload-size", {"protocol"}, buckets, "test", "bytes"); - THEN("the family object stores the parameters") { - CHECK_EQ(family->Prefix(), "zeek"sv); - CHECK_EQ(family->Name(), "payload_size"sv); - CHECK_EQ(toVector(family->LabelNames()), std::vector{"protocol"s}); - CHECK_EQ(family->Helptext(), "test"sv); - CHECK_EQ(family->Unit(), "bytes"sv); - CHECK_EQ(family->IsSum(), false); - } - AND_THEN("GetOrAdd returns the same metric for the same labels") { + THEN("GetOrAdd returns the same metric for the same labels") { auto first = family->GetOrAdd({{"protocol", "tcp"}}); auto second = family->GetOrAdd({{"protocol", "tcp"}}); CHECK_EQ(first, second); @@ -345,15 +498,7 @@ SCENARIO("telemetry managers provide access to histogram families") { WHEN("retrieving a DblHistogram family") { double buckets[] = {10.0, 20.0}; auto family = mgr.HistogramFamily("zeek", "parse-time", {"protocol"}, buckets, "test", "seconds"); - THEN("the family object stores the parameters") { - CHECK_EQ(family->Prefix(), "zeek"sv); - CHECK_EQ(family->Name(), "parse_time"sv); - CHECK_EQ(toVector(family->LabelNames()), std::vector{"protocol"s}); - CHECK_EQ(family->Helptext(), "test"sv); - CHECK_EQ(family->Unit(), "seconds"sv); - CHECK_EQ(family->IsSum(), false); - } - AND_THEN("GetOrAdd returns the same metric for the same labels") { + THEN("GetOrAdd returns the same metric for the same labels") { auto first = family->GetOrAdd({{"protocol", "tcp"}}); auto second = family->GetOrAdd({{"protocol", "tcp"}}); CHECK_EQ(first, second); diff --git a/src/telemetry/Manager.h b/src/telemetry/Manager.h index a060d23271..717dfd55a0 100644 --- a/src/telemetry/Manager.h +++ b/src/telemetry/Manager.h @@ -15,6 +15,7 @@ #include "zeek/telemetry/Gauge.h" #include "zeek/telemetry/Histogram.h" #include "zeek/telemetry/ProcessStats.h" +#include "zeek/telemetry/Utils.h" #include "prometheus/exposer.h" #include "prometheus/registry.h" @@ -26,8 +27,6 @@ using RecordValPtr = IntrusivePtr; namespace zeek::telemetry { -class OtelReader; - /** * Manages a collection of metric families. */ @@ -69,33 +68,31 @@ public: * @param helptext Short explanation of the metric. * @param unit Unit of measurement. * @param is_sum Indicates whether this metric accumulates something, where only the total value is of interest. - * @param callback Passing a callback method will enable asynchronous mode. The callback method will be called by - * the metrics subsystem whenever data is requested. */ template auto CounterFamily(std::string_view prefix, std::string_view name, Span labels, std::string_view helptext, std::string_view unit = "", bool is_sum = false) { - auto fam = LookupFamily(prefix, name); + auto full_name = detail::BuildFullPrometheusName(prefix, name, unit, is_sum); + + auto& prom_fam = + prometheus::BuildCounter().Name(full_name).Help(std::string{helptext}).Register(*prometheus_registry); if constexpr ( std::is_same::value ) { - if ( fam ) - return std::static_pointer_cast(fam); + if ( auto it = families.find(prom_fam.GetName()); it != families.end() ) + return std::static_pointer_cast(it->second); - auto int_fam = - std::make_shared(prefix, name, labels, helptext, prometheus_registry, unit, is_sum); - families.insert_or_assign(int_fam->FullName(), int_fam); - return int_fam; + auto fam = std::make_shared(&prom_fam, labels); + families.insert({prom_fam.GetName(), fam}); + return fam; } else { static_assert(std::is_same::value, "metrics only support int64_t and double values"); + if ( auto it = families.find(prom_fam.GetName()); it != families.end() ) + return std::static_pointer_cast(it->second); - if ( fam ) - return std::static_pointer_cast(fam); - - auto dbl_fam = - std::make_shared(prefix, name, labels, helptext, prometheus_registry, unit, is_sum); - families.insert_or_assign(dbl_fam->FullName(), dbl_fam); - return dbl_fam; + auto fam = std::make_shared(&prom_fam, labels); + families.insert({prom_fam.GetName(), fam}); + return fam; } } @@ -149,32 +146,31 @@ public: * @param helptext Short explanation of the metric. * @param unit Unit of measurement. * @param is_sum Indicates whether this metric accumulates something, where only the total value is of interest. - * @param callback Passing a callback method will enable asynchronous mode. The callback method will be called by - * the metrics subsystem whenever data is requested. */ template auto GaugeFamily(std::string_view prefix, std::string_view name, Span labels, std::string_view helptext, std::string_view unit = "", bool is_sum = false) { - auto fam = LookupFamily(prefix, name); + auto full_name = detail::BuildFullPrometheusName(prefix, name, unit, is_sum); + + auto& prom_fam = + prometheus::BuildGauge().Name(full_name).Help(std::string{helptext}).Register(*prometheus_registry); if constexpr ( std::is_same::value ) { - if ( fam ) - return std::static_pointer_cast(fam); + if ( auto it = families.find(prom_fam.GetName()); it != families.end() ) + return std::static_pointer_cast(it->second); - auto int_fam = - std::make_shared(prefix, name, labels, helptext, prometheus_registry, unit, is_sum); - families.insert_or_assign(int_fam->FullName(), int_fam); - return int_fam; + auto fam = std::make_shared(&prom_fam, labels); + families.insert({prom_fam.GetName(), fam}); + return fam; } else { static_assert(std::is_same::value, "metrics only support int64_t and double values"); - if ( fam ) - return std::static_pointer_cast(fam); + if ( auto it = families.find(prom_fam.GetName()); it != families.end() ) + return std::static_pointer_cast(it->second); - auto dbl_fam = - std::make_shared(prefix, name, labels, helptext, prometheus_registry, unit, is_sum); - families.insert_or_assign(dbl_fam->FullName(), dbl_fam); - return dbl_fam; + auto fam = std::make_shared(&prom_fam, labels); + families.insert({prom_fam.GetName(), fam}); + return fam; } } @@ -239,52 +235,48 @@ public: * reserved. * @param name The human-readable name of the metric, e.g., `requests`. * @param labels Names for all label dimensions of the metric. - * @param default_upper_bounds Upper bounds for the metric buckets. + * @param bounds Upper bounds for the metric buckets. * @param helptext Short explanation of the metric. * @param unit Unit of measurement. Please use base units such as `bytes` or * `seconds` (prefer lowercase). The pseudo-unit `1` identifies * dimensionless counts. - * @param is_sum Setting this to `true` indicates that this metric adds - * something up to a total, where only the total value is of - * interest. For example, the total number of HTTP requests. * @note The first call wins when calling this function multiple times with * different bucket settings. Users may also override - * @p default_upper_bounds via run-time configuration. + * @p bounds via run-time configuration. */ template auto HistogramFamily(std::string_view prefix, std::string_view name, Span labels, - ConstSpan default_upper_bounds, std::string_view helptext, - std::string_view unit = "") { - auto fam = LookupFamily(prefix, name); + ConstSpan bounds, std::string_view helptext, std::string_view unit = "") { + auto full_name = detail::BuildFullPrometheusName(prefix, name, unit); + + auto& prom_fam = + prometheus::BuildHistogram().Name(full_name).Help(std::string{helptext}).Register(*prometheus_registry); if constexpr ( std::is_same::value ) { - if ( fam ) - return std::static_pointer_cast(fam); + if ( auto it = families.find(prom_fam.GetName()); it != families.end() ) + return std::static_pointer_cast(it->second); - auto int_fam = std::make_shared(prefix, name, labels, default_upper_bounds, helptext, - prometheus_registry, unit); - families.insert_or_assign(int_fam->FullName(), int_fam); - return int_fam; + auto fam = std::make_shared(&prom_fam, bounds, labels); + families.insert({prom_fam.GetName(), fam}); + return fam; } else { static_assert(std::is_same::value, "metrics only support int64_t and double values"); - if ( fam ) - return std::static_pointer_cast(fam); + if ( auto it = families.find(prom_fam.GetName()); it != families.end() ) + return std::static_pointer_cast(it->second); - auto dbl_fam = std::make_shared(prefix, name, labels, default_upper_bounds, helptext, - prometheus_registry, unit); - families.insert_or_assign(dbl_fam->FullName(), dbl_fam); - return dbl_fam; + auto fam = std::make_shared(&prom_fam, bounds, labels); + families.insert({prom_fam.GetName(), fam}); + return fam; } } /// @copydoc HistogramFamily template auto HistogramFamily(std::string_view prefix, std::string_view name, std::initializer_list labels, - ConstSpan default_upper_bounds, std::string_view helptext, - std::string_view unit = "") { + ConstSpan bounds, std::string_view helptext, std::string_view unit = "") { auto lbl_span = Span{labels.begin(), labels.size()}; - return HistogramFamily(prefix, name, lbl_span, default_upper_bounds, helptext, unit); + return HistogramFamily(prefix, name, lbl_span, bounds, helptext, unit); } /** @@ -295,25 +287,21 @@ public: * reserved. * @param name The human-readable name of the metric, e.g., `requests`. * @param labels Names for all label dimensions of the metric. - * @param default_upper_bounds Upper bounds for the metric buckets. + * @param bounds Upper bounds for the metric buckets. * @param helptext Short explanation of the metric. * @param unit Unit of measurement. Please use base units such as `bytes` or * `seconds` (prefer lowercase). The pseudo-unit `1` identifies * dimensionless counts. - * @param is_sum Setting this to `true` indicates that this metric adds - * something up to a total, where only the total value is of - * interest. For example, the total number of HTTP requests. * @note The first call wins when calling this function multiple times with * different bucket settings. Users may also override - * @p default_upper_bounds via run-time configuration. + * @p bounds via run-time configuration. */ template std::shared_ptr> HistogramInstance(std::string_view prefix, std::string_view name, - Span labels, - ConstSpan default_upper_bounds, + Span labels, ConstSpan bounds, std::string_view helptext, std::string_view unit = "") { return WithLabelNames(labels, [&, this](auto labelNames) { - auto family = HistogramFamily(prefix, name, labelNames, default_upper_bounds, helptext, unit); + auto family = HistogramFamily(prefix, name, labelNames, bounds, helptext, unit); return family->GetOrAdd(labels); }); } @@ -322,17 +310,11 @@ public: template std::shared_ptr> HistogramInstance(std::string_view prefix, std::string_view name, std::initializer_list labels, - std::initializer_list default_upper_bounds, + std::initializer_list bounds, std::string_view helptext, std::string_view unit = "") { auto lbls = Span{labels.begin(), labels.size()}; - auto bounds = Span{default_upper_bounds.begin(), default_upper_bounds.size()}; - return HistogramInstance(prefix, name, lbls, bounds, helptext, unit); - } - - std::shared_ptr GetFamilyByFullName(const std::string& full_name) const { - if ( auto it = families.find(full_name); it != families.end() ) - return it->second; - return nullptr; + auto bounds_span = Span{bounds.begin(), bounds.size()}; + return HistogramInstance(prefix, name, lbls, bounds_span, helptext, unit); } /** @@ -368,10 +350,10 @@ protected: } private: - std::shared_ptr LookupFamily(std::string_view prefix, std::string_view name) const; + RecordValPtr GetMetricOptsRecord(const prometheus::MetricFamily& metric_family); - std::shared_ptr otel_reader; std::map> families; + std::map opts_records; detail::process_stats current_process_stats; double process_stats_last_updated = 0.0; diff --git a/src/telemetry/MetricFamily.cc b/src/telemetry/MetricFamily.cc deleted file mode 100644 index a4c8c51e1b..0000000000 --- a/src/telemetry/MetricFamily.cc +++ /dev/null @@ -1,79 +0,0 @@ -#include "zeek/telemetry/MetricFamily.h" - -#include - -#include "zeek/Val.h" -#include "zeek/telemetry/telemetry.bif.h" - -namespace zeek::telemetry { - -MetricFamily::MetricFamily(std::string_view prefix, std::string_view name, Span lbls, - std::string_view helptext, std::string_view unit, bool is_sum) - : prefix(prefix), helptext(helptext), unit(unit), is_sum(is_sum) { - this->name = util::strreplace(std::string{name}, "-", "_"); - for ( const auto& lbl : lbls ) { - labels.emplace_back(lbl); - } - - full_name = util::fmt("%s_%s", this->prefix.c_str(), this->name.c_str()); -} - -RecordValPtr MetricFamily::GetMetricOptsRecord() const { - if ( record_val ) - return record_val; - - static auto string_vec_type = zeek::id::find_type("string_vec"); - static auto metric_opts_type = zeek::id::find_type("Telemetry::MetricOpts"); - - static auto prefix_idx = metric_opts_type->FieldOffset("prefix"); - static auto name_idx = metric_opts_type->FieldOffset("name"); - static auto help_text_idx = metric_opts_type->FieldOffset("help_text"); - static auto unit_idx = metric_opts_type->FieldOffset("unit"); - static auto is_total_idx = metric_opts_type->FieldOffset("is_total"); - static auto labels_idx = metric_opts_type->FieldOffset("labels"); - static auto bounds_idx = metric_opts_type->FieldOffset("bounds"); - static auto metric_type_idx = metric_opts_type->FieldOffset("metric_type"); - - record_val = make_intrusive(metric_opts_type); - record_val->Assign(prefix_idx, make_intrusive(prefix)); - record_val->Assign(name_idx, make_intrusive(name)); - record_val->Assign(help_text_idx, make_intrusive(helptext)); - record_val->Assign(unit_idx, make_intrusive(unit)); - record_val->Assign(is_total_idx, val_mgr->Bool(is_sum)); - - auto label_names_vec = make_intrusive(string_vec_type); - for ( const auto& lbl : labels ) - label_names_vec->Append(make_intrusive(lbl)); - - record_val->Assign(labels_idx, label_names_vec); - - record_val->Assign(metric_type_idx, zeek::BifType::Enum::Telemetry::MetricType->GetEnumVal(MetricType())); - - return record_val; -} - -bool MetricFamily::Matches(std::string_view prefix_pattern, std::string_view name_pattern) const noexcept { - return fnmatch(prefix_pattern.data(), prefix.c_str(), 0) != FNM_NOMATCH && - fnmatch(name_pattern.data(), name.c_str(), 0) != FNM_NOMATCH; -} - -prometheus::Labels MetricFamily::BuildPrometheusLabels(Span labels) { - prometheus::Labels p_labels; - - bool found_endpoint = false; - for ( const auto& lbl : labels ) { - p_labels.emplace(util::strreplace(std::string{lbl.first}, "-", "_"), lbl.second); - if ( lbl.first == "endpoint" ) - found_endpoint = true; - } - - if ( ! found_endpoint ) { - auto endpoint = id::find_val("Telemetry::metrics_endpoint_name")->AsStringVal(); - if ( endpoint && endpoint->Len() > 0 ) - p_labels.emplace("endpoint", endpoint->ToStdString()); - } - - return p_labels; -} - -} // namespace zeek::telemetry diff --git a/src/telemetry/MetricFamily.h b/src/telemetry/MetricFamily.h index d7797512b0..942932566e 100644 --- a/src/telemetry/MetricFamily.h +++ b/src/telemetry/MetricFamily.h @@ -3,117 +3,32 @@ #pragma once #include -#include -#include +#include #include "zeek/Span.h" -#include "zeek/Val.h" - -#include "prometheus/labels.h" +#include "zeek/util.h" namespace zeek::telemetry { -/** - * A key-value pair for a single label dimension. - */ -using LabelView = std::pair; - /** * Manages a collection (family) of metrics. All members of the family share * the same prefix (namespace), name, and label dimensions. */ class MetricFamily { public: - MetricFamily() = delete; - MetricFamily(const MetricFamily&) noexcept = default; - MetricFamily& operator=(const MetricFamily&) noexcept = default; - virtual ~MetricFamily() = default; - /** - * @return The prefix (namespace) this family belongs to. Builtin metrics - * of Zeek return @c zeek. Custom metrics, e.g., created in a - * script, may use a prefix that represents the application/script - * or protocol (e.g. @c http) name. - */ - std::string_view Prefix() const noexcept { return prefix; } + virtual zeek_int_t MetricType() const = 0; - /** - * @return The human-readable name of the metric, e.g., - * @p open-connections. - */ - std::string_view Name() const noexcept { return name; } - - /** - * @return The complete name for the family including prefix. - */ - std::string FullName() const noexcept { return full_name; } - - /** - * @return The names for all label dimensions. - */ - Span LabelNames() const noexcept { return labels; } - - /** - * @return A short explanation of the metric. - */ - std::string_view Helptext() const noexcept { return helptext; } - - /** - * @return The unit of measurement, preferably a base unit such as @c bytes - * or @c seconds. - */ - std::string_view Unit() const noexcept { return unit; } - - /** - * @return Whether metrics of this family accumulate values, where only the - * total value is of interest. For example, the total number of - * HTTP requests. - */ - bool IsSum() const noexcept { return is_sum; } - - /** - * Converts the family data into script layer record. This record - * lazily-allocated and reused for each instrument associated with this - * family. - * - * @return A script layer Telemetry::Metric record for this family. - */ - RecordValPtr GetMetricOptsRecord() const; - - /** - * @return The type of this metric, defined as one of the values in the - * script-layer Telemetry::MetricType enum. - */ - virtual zeek_int_t MetricType() const noexcept = 0; - - /** - * @return Whether the prefix and name of this family matches the patterns - * provided. - */ - bool Matches(std::string_view prefix_pattern, std::string_view name_pattern) const noexcept; - - virtual std::vector Collect() const = 0; + std::vector LabelNames() const { return label_names; } protected: - MetricFamily(std::string_view prefix, std::string_view name, Span lbls, - std::string_view helptext, std::string_view unit, bool is_sum = false); + MetricFamily(Span labels) { + for ( const auto& lbl : labels ) + label_names.emplace_back(lbl); + } - /** - * Builds a set of labels for prometheus based on a set of labels from - * Zeek. This adds an 'endpoint' label if it's missing from the set. - */ - static prometheus::Labels BuildPrometheusLabels(Span labels); - - std::string prefix; - std::string name; - std::string full_name; - std::vector labels; - std::string helptext; - std::string unit; - bool is_sum = false; - - mutable RecordValPtr record_val; + std::vector label_names; }; } // namespace zeek::telemetry diff --git a/src/telemetry/Utils.cc b/src/telemetry/Utils.cc new file mode 100644 index 0000000000..5cae319853 --- /dev/null +++ b/src/telemetry/Utils.cc @@ -0,0 +1,54 @@ +#include "Utils.h" + +#include "zeek/ID.h" +#include "zeek/Reporter.h" +#include "zeek/Val.h" +#include "zeek/telemetry/telemetry.bif.h" +#include "zeek/util.h" + +using namespace zeek; + +namespace zeek::telemetry::detail { + +std::string BuildFullPrometheusName(std::string_view prefix, std::string_view name, std::string_view unit, + bool is_sum) { + if ( prefix.empty() || name.empty() ) + reporter->FatalError("Telemetry metric families must have a non-zero-length prefix and name"); + + std::string fn = util::fmt("%s_%s", prefix.data(), name.data()); + std::for_each(fn.begin(), fn.end(), [](char& c) { + if ( ! std::isalnum(c) ) + c = '_'; + }); + + // We were previously using "1" to mean "no unit value" for whatever reason, so we have to handle that now + // to mean the same thing. + if ( ! unit.empty() && unit != "1" ) + fn.append("_").append(unit); + + if ( is_sum ) + fn.append("_total"); + + return fn; +} + +prometheus::Labels BuildPrometheusLabels(Span labels) { + prometheus::Labels p_labels; + + bool found_endpoint = false; + for ( const auto& lbl : labels ) { + p_labels.emplace(util::strreplace(std::string{lbl.first}, "-", "_"), lbl.second); + if ( lbl.first == "endpoint" ) + found_endpoint = true; + } + + if ( ! found_endpoint ) { + auto endpoint = id::find_val("Telemetry::metrics_endpoint_name")->AsStringVal(); + if ( endpoint && endpoint->Len() > 0 ) + p_labels.emplace("endpoint", endpoint->ToStdString()); + } + + return p_labels; +} + +} // namespace zeek::telemetry::detail diff --git a/src/telemetry/Utils.h b/src/telemetry/Utils.h new file mode 100644 index 0000000000..dbccd54565 --- /dev/null +++ b/src/telemetry/Utils.h @@ -0,0 +1,30 @@ +#pragma once + +#include + +#include "zeek/Span.h" +#include "zeek/Val.h" + +#include "prometheus/family.h" +#include "prometheus/labels.h" + +namespace zeek::telemetry { + +using LabelView = std::pair; + +namespace detail { + +/** + * Builds a set of labels for prometheus based on a set of labels from + * Zeek. This adds an 'endpoint' label if it's missing from the set. + */ +prometheus::Labels BuildPrometheusLabels(Span labels); + +/** + * Builds a full metric name for Prometheus from prefix, name, and unit values. + */ +std::string BuildFullPrometheusName(std::string_view prefix, std::string_view name, std::string_view unit, + bool is_sum = false); + +} // namespace detail +} // namespace zeek::telemetry diff --git a/src/telemetry/telemetry.bif b/src/telemetry/telemetry.bif index d294cae270..ac46a2e6d5 100644 --- a/src/telemetry/telemetry.bif +++ b/src/telemetry/telemetry.bif @@ -73,7 +73,7 @@ std::vector sv_tbl(zeek::TableVal* xs) return result; } -bool is_valid(zeek::Span labels, +bool labels_valid(zeek::Span labels, zeek::Span label_names) { auto key_in_label_names = [keys{label_names}](auto x) @@ -131,7 +131,7 @@ function Telemetry::__int_counter_metric_get_or_add%(family: opaque of int_count { auto hdl = ptr->GetHandle(); auto lbl_map = sv_tbl(labels->AsTableVal()); - if ( is_valid(lbl_map, hdl->LabelNames()) ) + if ( labels_valid(lbl_map, hdl->LabelNames()) ) { auto res = hdl->GetOrAdd(lbl_map); return zeek::make_intrusive(res); @@ -191,7 +191,7 @@ function Telemetry::__dbl_counter_metric_get_or_add%(family: opaque of dbl_count { auto hdl = ptr->GetHandle(); auto lbl_map = sv_tbl(labels->AsTableVal()); - if ( is_valid(lbl_map, hdl->LabelNames()) ) + if ( labels_valid(lbl_map, hdl->LabelNames()) ) { auto res = hdl->GetOrAdd(lbl_map); return zeek::make_intrusive(res); @@ -251,7 +251,7 @@ function Telemetry::__int_gauge_metric_get_or_add%(family: opaque of int_gauge_m { auto hdl = ptr->GetHandle(); auto lbl_map = sv_tbl(labels->AsTableVal()); - if ( is_valid(lbl_map, hdl->LabelNames()) ) + if ( labels_valid(lbl_map, hdl->LabelNames()) ) { auto res = hdl->GetOrAdd(lbl_map); return zeek::make_intrusive(res); @@ -317,7 +317,7 @@ function Telemetry::__dbl_gauge_metric_get_or_add%(family: opaque of dbl_gauge_m { auto hdl = ptr->GetHandle(); auto lbl_map = sv_tbl(labels->AsTableVal()); - if ( is_valid(lbl_map, hdl->LabelNames()) ) + if ( labels_valid(lbl_map, hdl->LabelNames()) ) { auto res = hdl->GetOrAdd(lbl_map); return zeek::make_intrusive(res); @@ -386,7 +386,7 @@ function Telemetry::__int_histogram_metric_get_or_add%(family: opaque of int_his { auto hdl = ptr->GetHandle(); auto lbl_map = sv_tbl(labels->AsTableVal()); - if ( is_valid(lbl_map, hdl->LabelNames()) ) + if ( labels_valid(lbl_map, hdl->LabelNames()) ) { auto res = hdl->GetOrAdd(lbl_map); return zeek::make_intrusive(res); @@ -449,7 +449,7 @@ function Telemetry::__dbl_histogram_metric_get_or_add%(family: opaque of dbl_his { auto hdl = ptr->GetHandle(); auto lbl_map = sv_tbl(labels->AsTableVal()); - if ( is_valid(lbl_map, hdl->LabelNames()) ) + if ( labels_valid(lbl_map, hdl->LabelNames()) ) { auto res = hdl->GetOrAdd(lbl_map); return zeek::make_intrusive(res);