diff --git a/.gitmodules b/.gitmodules index ddb2651a18..38e0606337 100644 --- a/.gitmodules +++ b/.gitmodules @@ -75,4 +75,4 @@ url = https://github.com/microsoft/vcpkg [submodule "auxil/prometheus-cpp"] path = auxil/prometheus-cpp - url = https://github.com/jupp0r/prometheus-cpp + url = https://github.com/zeek/prometheus-cpp diff --git a/CHANGES b/CHANGES index 3e4e6cab70..bd37e63a9d 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,33 @@ +7.0.0-dev.362 | 2024-06-04 14:16:54 -0700 + + * Switch to zeek fork of prometheus-cpp (Tim Wojtulewicz, Corelight) + + * Remove unnecessary shared_from_this on instrument classes (Tim Wojtulewicz, Corelight) + + * Restore label_names field in MetricOpts record (Tim Wojtulewicz) + + * Change how we count FDs on Linux to fix zeekctl stop issues (Tim Wojtulewicz) + + * Update zeekctl tests for telemetry rework (Tim Wojtulewicz, Corelight) + + * Use forward declarations of prometheus-cpp types in telemetry::Manager (Tim Wojtulewicz, Corelight) + + * Add prometheus-cpp files to install set for plugins to use (Tim Wojtulewicz, Corelight) + + * Fix a memory leak with the CivetWeb callbacks in telemetry (Tim Wojtulewicz, Corelight) + + * Fix a bunch of copy-instead-of-move findings from Coverity (Tim Wojtulewicz, Corelight) + + * Move telmetry label names out of opts records, into main metric records (Tim Wojtulewicz, Corelight) + + * Ensure the order of label values matches the label names (Tim Wojtulewicz, Corelight) + + * Remove prefix column from telemetry.log (Tim Wojtulewicz, Corelight) + + * Fix race condition by pre-building the cluster json data for services.json (Tim Wojtulewicz) + + * Set running_under_test for scripts.base.frameworks.logging.telemetry test (Tim Wojtulewicz, Corelight) + 7.0.0-dev.347 | 2024-06-04 11:36:13 -0700 * Update reporter.bif to describe special case of errors in init (Smoot) diff --git a/CMakeLists.txt b/CMakeLists.txt index 60de8b05fc..13ea5a5fe2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -338,15 +338,20 @@ add_zeek_dynamic_plugin_build_interface_include_directories( ${PROJECT_SOURCE_DIR}/auxil/broker/libbroker ${PROJECT_SOURCE_DIR}/auxil/paraglob/include ${PROJECT_SOURCE_DIR}/auxil/rapidjson/include + ${PROJECT_SOURCE_DIR}/auxil/prometheus-cpp/core/include ${CMAKE_BINARY_DIR}/src ${CMAKE_BINARY_DIR}/src/include ${CMAKE_BINARY_DIR}/auxil/binpac/lib - ${CMAKE_BINARY_DIR}/auxil/broker/libbroker) + ${CMAKE_BINARY_DIR}/auxil/broker/libbroker + ${CMAKE_BINARY_DIR}/auxil/prometheus-cpp/core/include) # threading/formatters/JSON.h includes rapidjson headers and may be used # by external plugins, extend the include path. target_include_directories(zeek_dynamic_plugin_base SYSTEM INTERFACE $) +target_include_directories( + zeek_dynamic_plugin_base SYSTEM + INTERFACE $) # Convenience function for adding an OBJECT library that feeds directly into the # main target(s). @@ -1013,6 +1018,12 @@ install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/auxil/rapidjson/include/rapidjson install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/auxil/filesystem/include/ghc DESTINATION include/zeek/3rdparty/) +install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/auxil/prometheus-cpp/core/include/prometheus + DESTINATION include/zeek/3rdparty/prometheus-cpp/include) + +install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/auxil/prometheus-cpp/core/include/prometheus + DESTINATION include/zeek/3rdparty/prometheus-cpp/include) + # Create 3rdparty/ghc within the build directory so that the include for # "zeek/3rdparty/ghc/filesystem.hpp" works within the build tree. execute_process(COMMAND "${CMAKE_COMMAND}" -E make_directory diff --git a/VERSION b/VERSION index 46bc65f12e..09b0d31715 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -7.0.0-dev.347 +7.0.0-dev.362 diff --git a/auxil/prometheus-cpp b/auxil/prometheus-cpp index cdb357ad55..2fec7205d1 160000 --- a/auxil/prometheus-cpp +++ b/auxil/prometheus-cpp @@ -1 +1 @@ -Subproject commit cdb357ad556c9ba96cbfa90fed2940fedf101673 +Subproject commit 2fec7205d1a9cb4829b86c943d599696d53de85c diff --git a/auxil/zeekctl b/auxil/zeekctl index 4dad935e9c..614228f93b 160000 --- a/auxil/zeekctl +++ b/auxil/zeekctl @@ -1 +1 @@ -Subproject commit 4dad935e9c995b7ae2f0a4e7677892fcfb988cf0 +Subproject commit 614228f93bec4a991e3aa50055b70a0644781607 diff --git a/scripts/base/frameworks/telemetry/main.zeek b/scripts/base/frameworks/telemetry/main.zeek index 59e480a125..d71a7d9783 100644 --- a/scripts/base/frameworks/telemetry/main.zeek +++ b/scripts/base/frameworks/telemetry/main.zeek @@ -295,11 +295,11 @@ function register_counter_family(opts: MetricOpts): CounterFamily local f = Telemetry::__counter_family( opts$prefix, opts$name, - opts$labels, + opts$label_names, opts$help_text, opts$unit ); - return CounterFamily($__family=f, $__labels=opts$labels); + return CounterFamily($__family=f, $__labels=opts$label_names); } # Fallback Counter returned when there are issues with the labels. @@ -354,11 +354,11 @@ function register_gauge_family(opts: MetricOpts): GaugeFamily local f = Telemetry::__gauge_family( opts$prefix, opts$name, - opts$labels, + opts$label_names, opts$help_text, opts$unit ); - return GaugeFamily($__family=f, $__labels=opts$labels); + return GaugeFamily($__family=f, $__labels=opts$label_names); } # Fallback Gauge returned when there are issues with the label usage. @@ -422,12 +422,12 @@ function register_histogram_family(opts: MetricOpts): HistogramFamily local f = Telemetry::__histogram_family( opts$prefix, opts$name, - opts$labels, + opts$label_names, opts$bounds, opts$help_text, opts$unit ); - return HistogramFamily($__family=f, $__labels=opts$labels); + return HistogramFamily($__family=f, $__labels=opts$label_names); } # Fallback Histogram when there are issues with the labels. @@ -484,8 +484,8 @@ global version_gauge_family = Telemetry::register_gauge_family([ $name="version_info", $unit="", $help_text="The Zeek version", - $labels=vector("version_number", "major", "minor", "patch", "commit", - "beta", "debug","version_string") + $label_names=vector("version_number", "major", "minor", "patch", "commit", + "beta", "debug","version_string") ]); event zeek_init() diff --git a/scripts/base/init-bare.zeek b/scripts/base/init-bare.zeek index dd47c55aad..30b49def26 100644 --- a/scripts/base/init-bare.zeek +++ b/scripts/base/init-bare.zeek @@ -5807,8 +5807,12 @@ export { ## label values have to be provided. Examples of a label might ## be the protocol a general observation applies to, the ## directionality in a traffic flow, or protocol-specific - ## context like a particular message type. - labels: vector of string &default=vector(); + ## context like a particular message type. This field is only + ## used in the construction of new metrics and will not be + ## filled in when returned from + ## :zeek:see:`Telemetry::collect_metrics` or + ## :zeek:see:`Telemetry::collect_histogram_metrics`, + label_names: vector of string &default=vector(); ## Whether the metric represents something that is accumulating. ## Defaults to ``T`` for counters and ``F`` for gauges and @@ -5832,8 +5836,16 @@ export { ## A :zeek:see:`Telemetry::MetricOpts` record describing this metric. opts: MetricOpts; + ## The label names (also called dimensions) of the metric. When + ## instantiating or working with concrete metrics, corresponding + ## label values have to be provided. Examples of a label might + ## be the protocol a general observation applies to, the + ## directionality in a traffic flow, or protocol-specific + ## context like a particular message type. + label_names: vector of string &default=vector(); + ## The label values associated with this metric, if any. - labels: vector of string; + label_values: vector of string &optional; ## The value of gauge or counter cast to a double ## independent of the underlying data type. @@ -5847,8 +5859,16 @@ export { ## A :zeek:see:`Telemetry::MetricOpts` record describing this histogram. opts: MetricOpts; - ## The label values associated with this histogram, if any. - labels: vector of string; + ## The label names (also called dimensions) of the metric. When + ## instantiating or working with concrete metrics, corresponding + ## label values have to be provided. Examples of a label might + ## be the protocol a general observation applies to, the + ## directionality in a traffic flow, or protocol-specific + ## context like a particular message type. + label_names: vector of string &default=vector(); + + ## The label values associated with this metric, if any. + label_values: vector of string &optional; ## Individual counters for each of the buckets as ## described by the *bounds* field in *opts*; diff --git a/scripts/policy/frameworks/telemetry/log.zeek b/scripts/policy/frameworks/telemetry/log.zeek index 935b92cefa..8ee376eee4 100644 --- a/scripts/policy/frameworks/telemetry/log.zeek +++ b/scripts/policy/frameworks/telemetry/log.zeek @@ -33,9 +33,6 @@ export { ## the underlying metric type. metric_type: string &log; - ## The prefix (namespace) of the metric. - prefix: string &log; - ## The name of the metric. name: string &log; @@ -57,9 +54,6 @@ export { ## Peer that generated this log. peer: string &log; - ## The prefix (namespace) of the metric. - prefix: string &log; - ## The name of the metric. name: string &log; @@ -137,10 +131,9 @@ function do_log() local rec = Info($ts=ts, $peer=peer_description, $metric_type=metric_type, - $prefix=m$opts$prefix, $name=m$opts$name, - $labels=m$opts$labels, - $label_values=m$labels, + $labels=m$label_names, + $label_values=m$label_values, $value=m$value); Log::write(LOG, rec); @@ -168,10 +161,9 @@ function do_log() local hrec = HistogramInfo($ts=ts, $peer=peer_description, - $prefix=hm$opts$prefix, $name=hm$opts$name, - $labels=hm$opts$labels, - $label_values=hm$labels, + $labels=hm$label_names, + $label_values=hm$label_values, $bounds=hm$opts$bounds, $values=hm$values, $sum=hm$sum, diff --git a/src/telemetry/Counter.cc b/src/telemetry/Counter.cc index 5abb624f20..8b34624254 100644 --- a/src/telemetry/Counter.cc +++ b/src/telemetry/Counter.cc @@ -5,7 +5,7 @@ using namespace zeek::telemetry; Counter::Counter(FamilyType* family, const prometheus::Labels& labels, prometheus::CollectCallbackPtr callback) noexcept : handle(family->Add(labels)), labels(labels) { if ( callback ) { - handle.AddCollectCallback(callback); + handle.AddCollectCallback(std::move(callback)); has_callback = true; } } @@ -37,5 +37,5 @@ std::shared_ptr CounterFamily::GetOrAdd(Span labels, std::shared_ptr CounterFamily::GetOrAdd(std::initializer_list labels, prometheus::CollectCallbackPtr callback) { - return GetOrAdd(Span{labels.begin(), labels.size()}, callback); + return GetOrAdd(Span{labels.begin(), labels.size()}, std::move(callback)); } diff --git a/src/telemetry/Counter.h b/src/telemetry/Counter.h index 5f8ec2bf32..f6c49315b7 100644 --- a/src/telemetry/Counter.h +++ b/src/telemetry/Counter.h @@ -63,7 +63,7 @@ private: using CounterPtr = std::shared_ptr; -class CounterFamily : public MetricFamily, public std::enable_shared_from_this { +class CounterFamily : public MetricFamily { public: static inline const char* OpaqueName = "CounterMetricFamilyVal"; diff --git a/src/telemetry/Gauge.cc b/src/telemetry/Gauge.cc index f3f510b436..273c9a57bf 100644 --- a/src/telemetry/Gauge.cc +++ b/src/telemetry/Gauge.cc @@ -17,7 +17,7 @@ double Gauge::Value() const noexcept { Gauge::Gauge(FamilyType* family, const prometheus::Labels& labels, prometheus::CollectCallbackPtr callback) noexcept : handle(family->Add(labels)), labels(labels) { if ( callback ) { - handle.AddCollectCallback(callback); + handle.AddCollectCallback(std::move(callback)); has_callback = true; } } @@ -37,5 +37,5 @@ std::shared_ptr GaugeFamily::GetOrAdd(Span labels, prome std::shared_ptr GaugeFamily::GetOrAdd(std::initializer_list labels, prometheus::CollectCallbackPtr callback) { - return GetOrAdd(Span{labels.begin(), labels.size()}, callback); + return GetOrAdd(Span{labels.begin(), labels.size()}, std::move(callback)); } diff --git a/src/telemetry/Gauge.h b/src/telemetry/Gauge.h index cf04e7a9a0..900cb7b784 100644 --- a/src/telemetry/Gauge.h +++ b/src/telemetry/Gauge.h @@ -81,7 +81,7 @@ private: using GaugePtr = std::shared_ptr; -class GaugeFamily : public MetricFamily, public std::enable_shared_from_this { +class GaugeFamily : public MetricFamily { public: static inline const char* OpaqueName = "GaugeMetricFamilyVal"; diff --git a/src/telemetry/Histogram.h b/src/telemetry/Histogram.h index 65d371cd6d..ec8858e463 100644 --- a/src/telemetry/Histogram.h +++ b/src/telemetry/Histogram.h @@ -46,7 +46,7 @@ private: using HistogramPtr = std::shared_ptr; -class HistogramFamily : public MetricFamily, public std::enable_shared_from_this { +class HistogramFamily : public MetricFamily { public: static inline const char* OpaqueName = "HistogramMetricFamilyVal"; diff --git a/src/telemetry/Manager.cc b/src/telemetry/Manager.cc index 8b0a037e74..3ad3968e7d 100644 --- a/src/telemetry/Manager.cc +++ b/src/telemetry/Manager.cc @@ -6,6 +6,8 @@ // CivetServer is from the civetweb submodule in prometheus-cpp #include +#include +#include #include #include #include @@ -25,6 +27,10 @@ namespace zeek::telemetry { Manager::Manager() { prometheus_registry = std::make_shared(); } +// This can't be defined as =default because of the use of unique_ptr with a forward-declared type +// in Manager.h +Manager::~Manager() {} + void Manager::InitPostScript() { // Metrics port setting is used to calculate a URL for prometheus scraping std::string prometheus_url; @@ -44,6 +50,8 @@ void Manager::InitPostScript() { static auto manager_type = node_type_type->Lookup("Cluster", "MANAGER"); if ( local_node_type == manager_type ) { + BuildClusterJson(); + callbacks = new CivetCallbacks(); callbacks->begin_request = [](struct mg_connection* conn) -> int { // Handle the services.json request ourselves by building up a response based on @@ -64,6 +72,9 @@ void Manager::InitPostScript() { try { prometheus_exposer = std::make_unique(prometheus_url, 2, callbacks); + + // CivetWeb stores a copy of the callbacks, so we're safe to delete the pointer here + delete callbacks; } catch ( const CivetException& exc ) { reporter->FatalError("Failed to setup Prometheus endpoint: %s\n", exc.what()); } @@ -132,7 +143,6 @@ RecordValPtr Manager::GetMetricOptsRecord(const prometheus::MetricFamily& metric static auto name_idx = metric_opts_type->FieldOffset("name"); static auto help_text_idx = metric_opts_type->FieldOffset("help_text"); static auto unit_idx = metric_opts_type->FieldOffset("unit"); - static auto labels_idx = metric_opts_type->FieldOffset("labels"); static auto is_total_idx = metric_opts_type->FieldOffset("is_total"); static auto metric_type_idx = metric_opts_type->FieldOffset("metric_type"); @@ -154,55 +164,15 @@ RecordValPtr Manager::GetMetricOptsRecord(const prometheus::MetricFamily& metric // Assume that a metric ending with _total is always a summed metric so we can set that. record_val->Assign(is_total_idx, val_mgr->Bool(util::ends_with(metric_family.name, "_total"))); - auto label_names_vec = make_intrusive(string_vec_type); - - // Check if this is a Zeek-internal metric. We keep a little more information about a metric - // for these than we do for ones that were inserted into prom-cpp directly. - if ( auto it = families.find(metric_family.name); it != families.end() ) { - record_val->Assign(metric_type_idx, - zeek::BifType::Enum::Telemetry::MetricType->GetEnumVal(it->second->MetricType())); - - for ( const auto& lbl : it->second->LabelNames() ) - label_names_vec->Append(make_intrusive(lbl)); - } - else { - // prom-cpp stores everything internally as doubles - if ( metric_family.type == prometheus::MetricType::Counter ) - record_val->Assign(metric_type_idx, zeek::BifType::Enum::Telemetry::MetricType->GetEnumVal( - BifEnum::Telemetry::MetricType::COUNTER)); - if ( metric_family.type == prometheus::MetricType::Gauge ) - record_val->Assign(metric_type_idx, zeek::BifType::Enum::Telemetry::MetricType->GetEnumVal( - BifEnum::Telemetry::MetricType::GAUGE)); - if ( metric_family.type == prometheus::MetricType::Histogram ) - record_val->Assign(metric_type_idx, zeek::BifType::Enum::Telemetry::MetricType->GetEnumVal( - BifEnum::Telemetry::MetricType::HISTOGRAM)); - - // prometheus-cpp doesn't store label names anywhere other than in each - // instrument. this is valid because label names can be different - // between instruments within a single family for prometheus. we don't - // follow that model in Zeek, so use the names from the first instrument - // but validate that they're the same in the rest and warn if not. - if ( ! metric_family.metric.empty() ) { - std::unordered_set names; - for ( const auto& lbl : metric_family.metric[0].label ) { - label_names_vec->Append(make_intrusive(lbl.name)); - names.insert(lbl.name); - } - - if ( metric_family.metric.size() > 1 ) { - for ( size_t i = 1; i < metric_family.metric.size(); ++i ) { - for ( const auto& lbl : metric_family.metric[i].label ) { - if ( names.count(lbl.name) == 0 ) - reporter->Warning( - "Telemetry labels must be the same across all instruments for metric family %s\n", - metric_family.name.c_str()); - } - } - } - } - } - - record_val->Assign(labels_idx, label_names_vec); + if ( metric_family.type == prometheus::MetricType::Counter ) + record_val->Assign(metric_type_idx, zeek::BifType::Enum::Telemetry::MetricType->GetEnumVal( + BifEnum::Telemetry::MetricType::COUNTER)); + if ( metric_family.type == prometheus::MetricType::Gauge ) + record_val->Assign(metric_type_idx, zeek::BifType::Enum::Telemetry::MetricType->GetEnumVal( + BifEnum::Telemetry::MetricType::GAUGE)); + if ( metric_family.type == prometheus::MetricType::Histogram ) + record_val->Assign(metric_type_idx, zeek::BifType::Enum::Telemetry::MetricType->GetEnumVal( + BifEnum::Telemetry::MetricType::HISTOGRAM)); opts_records.insert({metric_family.name, record_val}); @@ -242,8 +212,8 @@ static bool comparer(const std::optional& a, const std::optional& b, auto a_r = a->ToVal(type)->AsRecordVal(); auto b_r = b->ToVal(type)->AsRecordVal(); - auto a_labels = a_r->GetField("labels"); - auto b_labels = b_r->GetField("labels"); + auto a_labels = a_r->GetField("label_values"); + auto b_labels = b_r->GetField("label_values"); return compare_string_vectors(a_labels, b_labels); } @@ -262,8 +232,9 @@ ValPtr Manager::CollectMetrics(std::string_view prefix_pattern, std::string_view static auto string_vec_type = zeek::id::find_type("string_vec"); static auto metric_record_type = zeek::id::find_type("Telemetry::Metric"); static auto opts_idx = metric_record_type->FieldOffset("opts"); - static auto labels_idx = metric_record_type->FieldOffset("labels"); static auto value_idx = metric_record_type->FieldOffset("value"); + static auto label_names_idx = metric_record_type->FieldOffset("label_names"); + static auto label_values_idx = metric_record_type->FieldOffset("label_values"); static auto metric_opts_type = zeek::id::find_type("Telemetry::MetricOpts"); static auto metric_type_idx = metric_opts_type->FieldOffset("metric_type"); @@ -287,15 +258,7 @@ ValPtr Manager::CollectMetrics(std::string_view prefix_pattern, std::string_view RecordValPtr opts_record = GetMetricOptsRecord(fam); for ( const auto& inst : fam.metric ) { - auto label_values_vec = make_intrusive(string_vec_type); - for ( const auto& label : inst.label ) { - // We don't include the endpoint key/value unless it's a prometheus request - if ( label.name != "endpoint" ) - label_values_vec->Append(make_intrusive(label.value)); - } - auto r = make_intrusive(metric_record_type); - r->Assign(labels_idx, label_values_vec); r->Assign(opts_idx, opts_record); if ( fam.type == prometheus::MetricType::Counter ) @@ -303,7 +266,18 @@ ValPtr Manager::CollectMetrics(std::string_view prefix_pattern, std::string_view else if ( fam.type == prometheus::MetricType::Gauge ) r->Assign(value_idx, zeek::make_intrusive(inst.gauge.value)); - ret_val->Append(r); + auto label_names_vec = make_intrusive(string_vec_type); + auto label_values_vec = make_intrusive(string_vec_type); + + for ( const auto& lbl : inst.label ) { + label_names_vec->Append(make_intrusive(lbl.name)); + label_values_vec->Append(make_intrusive(lbl.value)); + } + + r->Assign(label_names_idx, std::move(label_names_vec)); + r->Assign(label_values_idx, std::move(label_values_vec)); + + ret_val->Append(std::move(r)); } } @@ -320,7 +294,7 @@ ValPtr Manager::CollectMetrics(std::string_view prefix_pattern, std::string_view } } - return ret_val; + return std::move(ret_val); } ValPtr Manager::CollectHistogramMetrics(std::string_view prefix_pattern, std::string_view name_pattern) { @@ -328,8 +302,9 @@ ValPtr Manager::CollectHistogramMetrics(std::string_view prefix_pattern, std::st static auto string_vec_type = zeek::id::find_type("string_vec"); static auto double_vec_type = zeek::id::find_type("double_vec"); static auto histogram_metric_type = zeek::id::find_type("Telemetry::HistogramMetric"); - static auto labels_idx = histogram_metric_type->FieldOffset("labels"); static auto values_idx = histogram_metric_type->FieldOffset("values"); + static auto label_names_idx = histogram_metric_type->FieldOffset("label_names"); + static auto label_values_idx = histogram_metric_type->FieldOffset("label_values"); static auto observations_idx = histogram_metric_type->FieldOffset("observations"); static auto sum_idx = histogram_metric_type->FieldOffset("sum"); @@ -360,16 +335,19 @@ ValPtr Manager::CollectHistogramMetrics(std::string_view prefix_pattern, std::st RecordValPtr opts_record = GetMetricOptsRecord(fam); for ( const auto& inst : fam.metric ) { - auto label_values_vec = make_intrusive(string_vec_type); - for ( const auto& label : inst.label ) { - // We don't include the endpoint key/value unless it's a prometheus request - if ( label.name != "endpoint" ) - label_values_vec->Append(make_intrusive(label.value)); + auto r = make_intrusive(histogram_metric_type); + r->Assign(opts_idx, opts_record); + + auto label_names_vec = make_intrusive(string_vec_type); + auto label_values_vec = make_intrusive(string_vec_type); + + for ( const auto& lbl : inst.label ) { + label_names_vec->Append(make_intrusive(lbl.name)); + label_values_vec->Append(make_intrusive(lbl.value)); } - auto r = make_intrusive(histogram_metric_type); - r->Assign(labels_idx, label_values_vec); - r->Assign(opts_idx, opts_record); + r->Assign(label_names_idx, std::move(label_names_vec)); + r->Assign(label_values_idx, std::move(label_values_vec)); auto double_values_vec = make_intrusive(double_vec_type); std::vector boundaries; @@ -392,9 +370,9 @@ ValPtr Manager::CollectHistogramMetrics(std::string_view prefix_pattern, std::st r->Assign(sum_idx, zeek::make_intrusive(inst.histogram.sample_sum)); RecordValPtr local_opts_record = r->GetField(opts_idx); - local_opts_record->Assign(bounds_idx, bounds_vec); + local_opts_record->Assign(bounds_idx, std::move(bounds_vec)); - ret_val->Append(r); + ret_val->Append(std::move(r)); } } @@ -411,10 +389,10 @@ ValPtr Manager::CollectHistogramMetrics(std::string_view prefix_pattern, std::st } } - return ret_val; + return std::move(ret_val); } -std::string Manager::GetClusterJson() const { +void Manager::BuildClusterJson() { rapidjson::StringBuffer buffer; json::detail::NullDoubleWriter writer(buffer); @@ -423,8 +401,9 @@ std::string Manager::GetClusterJson() const { writer.Key("targets"); writer.StartArray(); - auto cluster_nodes = id::find_val("Cluster::nodes")->AsTableVal()->ToMap(); - for ( const auto& [idx, value] : cluster_nodes ) { + auto& node_val = id::find_val("Cluster::nodes"); + auto node_map = node_val->AsTableVal()->ToMap(); + for ( const auto& [idx, value] : node_map ) { auto node = value->AsRecordVal(); auto ip = node->GetField("ip"); auto port = node->GetField("metrics_port"); @@ -440,7 +419,7 @@ std::string Manager::GetClusterJson() const { writer.EndObject(); writer.EndArray(); - return buffer.GetString(); + cluster_json = buffer.GetString(); } CounterFamilyPtr Manager::CounterFamily(std::string_view prefix, std::string_view name, @@ -518,7 +497,7 @@ GaugePtr Manager::GaugeInstance(std::string_view prefix, std::string_view name, std::string_view helptext, std::string_view unit, prometheus::CollectCallbackPtr callback) { auto lbl_span = Span{labels.begin(), labels.size()}; - return GaugeInstance(prefix, name, lbl_span, helptext, unit, callback); + return GaugeInstance(prefix, name, lbl_span, helptext, unit, std::move(callback)); } HistogramFamilyPtr Manager::HistogramFamily(std::string_view prefix, std::string_view name, diff --git a/src/telemetry/Manager.h b/src/telemetry/Manager.h index 8a1deb5fc4..c4c2537f1a 100644 --- a/src/telemetry/Manager.h +++ b/src/telemetry/Manager.h @@ -2,8 +2,6 @@ #pragma once -#include -#include #include #include #include @@ -24,6 +22,11 @@ class RecordVal; using RecordValPtr = IntrusivePtr; } // namespace zeek +namespace prometheus { +class Exposer; +class Registry; +} // namespace prometheus + namespace zeek::telemetry { /** @@ -37,7 +40,7 @@ public: Manager& operator=(const Manager&) = delete; - ~Manager() = default; + ~Manager(); /** * Initialization of the manager. This is called late during Zeek's @@ -200,7 +203,7 @@ public: * @return A JSON description of the cluster configuration for reporting * to Prometheus for service discovery requests. */ - std::string GetClusterJson() const; + std::string GetClusterJson() const { return cluster_json; } /** * @return The pointer to the prometheus-cpp registry used by the telemetry @@ -230,6 +233,7 @@ protected: private: RecordValPtr GetMetricOptsRecord(const prometheus::MetricFamily& metric_family); + void BuildClusterJson(); std::map> families; std::map opts_records; @@ -242,11 +246,10 @@ private: GaugePtr cpu_gauge; GaugePtr fds_gauge; - std::string endpoint_name; - std::vector export_prefixes; - std::shared_ptr prometheus_registry; std::unique_ptr prometheus_exposer; + + std::string cluster_json; }; } // namespace zeek::telemetry diff --git a/src/telemetry/ProcessStats.cc b/src/telemetry/ProcessStats.cc index c0e9d322b7..f2a0447b63 100644 --- a/src/telemetry/ProcessStats.cc +++ b/src/telemetry/ProcessStats.cc @@ -67,6 +67,19 @@ std::atomic global_page_size; namespace zeek::telemetry::detail { +int64_t count_entries_in_directory(const char* path) { + int64_t result = 0; + if ( auto dptr = opendir(path); dptr != nullptr ) { + for ( auto entry = readdir(dptr); entry != nullptr; entry = readdir(dptr) ) { + auto fname = entry->d_name; + if ( strcmp(".", fname) != 0 && strcmp("..", fname) != 0 ) + ++result; + } + closedir(dptr); + } + return result; +} + /// Caches the result from a `sysconf` call in a cache variable to avoid /// frequent syscalls. Sets `cache_var` to -1 in case of an error. Initially, /// `cache_var` must be 0 and we assume a successful syscall would always return @@ -143,9 +156,7 @@ process_stats get_process_stats() { result.vms = vmsize_bytes; result.cpu = static_cast(utime_ticks + stime_ticks) / ticks_per_second; - zeek::filesystem::path fd_path{"/proc/self/fd"}; - result.fds = - std::distance(zeek::filesystem::directory_iterator{fd_path}, zeek::filesystem::directory_iterator{}); + result.fds = count_entries_in_directory("/proc/self/fd"); } return result; diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.telemetry/telemetry.log b/testing/btest/Baseline/scripts.base.frameworks.logging.telemetry/telemetry.log index 6e0e60a2f8..f371070a8e 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.logging.telemetry/telemetry.log +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.telemetry/telemetry.log @@ -5,12 +5,12 @@ #unset_field - #path telemetry #open XXXX-XX-XX-XX-XX-XX -#fields ts peer metric_type prefix name labels label_values value -#types time string string string string vector[string] vector[string] double -XXXXXXXXXX.XXXXXX zeek counter zeek zeek_log_stream_writes_total module,stream Conn,Conn::LOG 34.0 -XXXXXXXXXX.XXXXXX zeek counter zeek zeek_log_stream_writes_total module,stream DNS,DNS::LOG 34.0 -XXXXXXXXXX.XXXXXX zeek counter zeek zeek_log_stream_writes_total module,stream HTTP,HTTP::LOG 14.0 -XXXXXXXXXX.XXXXXX zeek counter zeek zeek_log_writer_writes_total writer,module,stream,filter-name,path default,Conn,conn,Conn::LOG,Log::WRITER_ASCII 30.0 -XXXXXXXXXX.XXXXXX zeek counter zeek zeek_log_writer_writes_total writer,module,stream,filter-name,path default,DNS,dns,DNS::LOG,Log::WRITER_ASCII 23.0 -XXXXXXXXXX.XXXXXX zeek counter zeek zeek_log_writer_writes_total writer,module,stream,filter-name,path default,HTTP,http,HTTP::LOG,Log::WRITER_ASCII 10.0 +#fields ts peer metric_type name labels label_values value +#types time string string string vector[string] vector[string] double +XXXXXXXXXX.XXXXXX zeek counter zeek_log_stream_writes_total module,stream Conn,Conn::LOG 34.0 +XXXXXXXXXX.XXXXXX zeek counter zeek_log_stream_writes_total module,stream DNS,DNS::LOG 34.0 +XXXXXXXXXX.XXXXXX zeek counter zeek_log_stream_writes_total module,stream HTTP,HTTP::LOG 14.0 +XXXXXXXXXX.XXXXXX zeek counter zeek_log_writer_writes_total filter_name,module,path,stream,writer default,Conn,conn,Conn::LOG,Log::WRITER_ASCII 30.0 +XXXXXXXXXX.XXXXXX zeek counter zeek_log_writer_writes_total filter_name,module,path,stream,writer default,DNS,dns,DNS::LOG,Log::WRITER_ASCII 23.0 +XXXXXXXXXX.XXXXXX zeek counter zeek_log_writer_writes_total filter_name,module,path,stream,writer default,HTTP,http,HTTP::LOG,Log::WRITER_ASCII 10.0 #close XXXX-XX-XX-XX-XX-XX diff --git a/testing/btest/Baseline/scripts.policy.frameworks.telemetry.log-prefixes/telemetry.log b/testing/btest/Baseline/scripts.policy.frameworks.telemetry.log-prefixes/telemetry.log index d79fef633b..af06992a04 100644 --- a/testing/btest/Baseline/scripts.policy.frameworks.telemetry.log-prefixes/telemetry.log +++ b/testing/btest/Baseline/scripts.policy.frameworks.telemetry.log-prefixes/telemetry.log @@ -5,7 +5,7 @@ #unset_field - #path telemetry #open XXXX-XX-XX-XX-XX-XX -#fields ts peer metric_type prefix name labels label_values value -#types time string string string string vector[string] vector[string] double -XXXXXXXXXX.XXXXXX zeek counter btest btest_connections_total proto tcp 500.0 +#fields ts peer metric_type name labels label_values value +#types time string string string vector[string] vector[string] double +XXXXXXXXXX.XXXXXX zeek counter btest_connections_total proto tcp 500.0 #close XXXX-XX-XX-XX-XX-XX diff --git a/testing/btest/Baseline/scripts.policy.frameworks.telemetry.log-prefixes/telemetry_histogram.log b/testing/btest/Baseline/scripts.policy.frameworks.telemetry.log-prefixes/telemetry_histogram.log index 79adb57972..a30298db72 100644 --- a/testing/btest/Baseline/scripts.policy.frameworks.telemetry.log-prefixes/telemetry_histogram.log +++ b/testing/btest/Baseline/scripts.policy.frameworks.telemetry.log-prefixes/telemetry_histogram.log @@ -5,8 +5,8 @@ #unset_field - #path telemetry_histogram #open XXXX-XX-XX-XX-XX-XX -#fields ts peer prefix name labels label_values bounds values sum observations -#types time string string string vector[string] vector[string] vector[double] vector[double] double double -XXXXXXXXXX.XXXXXX zeek btest btest_connection_duration_seconds (empty) (empty) 2.0,3.0,4.0,5.0,6.0,10.0,inf 0.0,0.0,0.0,0.0,0.0,0.0,0.0 0.0 0.0 -XXXXXXXXXX.XXXXXX zeek btest btest_connection_duration_seconds (empty) (empty) 2.0,3.0,4.0,5.0,6.0,10.0,inf 0.0,322.0,90.0,5.0,76.0,7.0,0.0 1650.264644 500.0 +#fields ts peer name labels label_values bounds values sum observations +#types time string string vector[string] vector[string] vector[double] vector[double] double double +XXXXXXXXXX.XXXXXX zeek btest_connection_duration_seconds (empty) (empty) 2.0,3.0,4.0,5.0,6.0,10.0,inf 0.0,0.0,0.0,0.0,0.0,0.0,0.0 0.0 0.0 +XXXXXXXXXX.XXXXXX zeek btest_connection_duration_seconds (empty) (empty) 2.0,3.0,4.0,5.0,6.0,10.0,inf 0.0,322.0,90.0,5.0,76.0,7.0,0.0 1650.264644 500.0 #close XXXX-XX-XX-XX-XX-XX diff --git a/testing/btest/Baseline/scripts.policy.frameworks.telemetry.log/telemetry.log.filtered b/testing/btest/Baseline/scripts.policy.frameworks.telemetry.log/telemetry.log.filtered index 19b4b90ddb..c7b26a1f28 100644 --- a/testing/btest/Baseline/scripts.policy.frameworks.telemetry.log/telemetry.log.filtered +++ b/testing/btest/Baseline/scripts.policy.frameworks.telemetry.log/telemetry.log.filtered @@ -1,5 +1,5 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -XXXXXXXXXX.XXXXXX zeek counter zeek zeek_total_sessions_total protocol tcp 1.0 -XXXXXXXXXX.XXXXXX zeek gauge zeek zeek_active_sessions protocol tcp 1.0 -XXXXXXXXXX.XXXXXX zeek counter zeek zeek_total_sessions_total protocol tcp 500.0 -XXXXXXXXXX.XXXXXX zeek gauge zeek zeek_active_sessions protocol tcp 500.0 +XXXXXXXXXX.XXXXXX zeek counter zeek_total_sessions_total protocol tcp 1.0 +XXXXXXXXXX.XXXXXX zeek gauge zeek_active_sessions protocol tcp 1.0 +XXXXXXXXXX.XXXXXX zeek counter zeek_total_sessions_total protocol tcp 500.0 +XXXXXXXXXX.XXXXXX zeek gauge zeek_active_sessions protocol tcp 500.0 diff --git a/testing/btest/Baseline/scripts.policy.frameworks.telemetry.log/telemetry_histogram.log.filtered b/testing/btest/Baseline/scripts.policy.frameworks.telemetry.log/telemetry_histogram.log.filtered index b382cd5ca6..d47ba69d07 100644 --- a/testing/btest/Baseline/scripts.policy.frameworks.telemetry.log/telemetry_histogram.log.filtered +++ b/testing/btest/Baseline/scripts.policy.frameworks.telemetry.log/telemetry_histogram.log.filtered @@ -1,3 +1,3 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -XXXXXXXXXX.XXXXXX zeek zeek zeek_connection_duration_seconds (empty) (empty) 2.0,3.0,4.0,5.0,6.0,10.0,inf 0.0,0.0,0.0,0.0,0.0,0.0,0.0 0.0 0.0 -XXXXXXXXXX.XXXXXX zeek zeek zeek_connection_duration_seconds (empty) (empty) 2.0,3.0,4.0,5.0,6.0,10.0,inf 0.0,322.0,90.0,5.0,76.0,7.0,0.0 1650.264644 500.0 +XXXXXXXXXX.XXXXXX zeek zeek_connection_duration_seconds (empty) (empty) 2.0,3.0,4.0,5.0,6.0,10.0,inf 0.0,0.0,0.0,0.0,0.0,0.0,0.0 0.0 0.0 +XXXXXXXXXX.XXXXXX zeek zeek_connection_duration_seconds (empty) (empty) 2.0,3.0,4.0,5.0,6.0,10.0,inf 0.0,322.0,90.0,5.0,76.0,7.0,0.0 1650.264644 500.0 diff --git a/testing/btest/scripts/base/frameworks/logging/telemetry.zeek b/testing/btest/scripts/base/frameworks/logging/telemetry.zeek index a71ffd7d00..43c1ab641e 100644 --- a/testing/btest/scripts/base/frameworks/logging/telemetry.zeek +++ b/testing/btest/scripts/base/frameworks/logging/telemetry.zeek @@ -8,6 +8,9 @@ @load policy/frameworks/telemetry/log +# Force telemetry output to be sorted for test determinism +redef running_under_test = T; + global http_logs = 0; hook HTTP::log_policy(rec: HTTP::Info, id: Log::ID, filter: Log::Filter) { @@ -28,7 +31,7 @@ hook Log::log_stream_policy(rec: any, id: Log::ID) hook Telemetry::log_policy(rec: Telemetry::Info, id: Log::ID, filter: Log::Filter) { - if ( rec$prefix != "zeek" || /^zeek_log_/ !in rec$name ) + if ( /^zeek_log_/ !in rec$name ) break; if ( /HTTP|DNS|Conn/ !in cat(rec$label_values) ) diff --git a/testing/btest/scripts/base/frameworks/telemetry/basic.zeek b/testing/btest/scripts/base/frameworks/telemetry/basic.zeek index 72b675dc0a..0592bff684 100644 --- a/testing/btest/scripts/base/frameworks/telemetry/basic.zeek +++ b/testing/btest/scripts/base/frameworks/telemetry/basic.zeek @@ -15,7 +15,7 @@ global btest_a_cf = Telemetry::register_counter_family([ $name="a_test", $unit="", $help_text="A btest metric", - $labels=vector("x", "y") + $label_names=vector("x", "y") ]); global btest_b_cf = Telemetry::register_counter_family([ @@ -23,7 +23,7 @@ global btest_b_cf = Telemetry::register_counter_family([ $name="b_test", $unit="", $help_text="Another btest metric", - $labels=vector("x", "y") + $label_names=vector("x", "y") ]); global btest_c_cf = Telemetry::register_counter_family([ @@ -31,7 +31,7 @@ global btest_c_cf = Telemetry::register_counter_family([ $name="c_test", $unit="", $help_text="The last btest metric", - $labels=vector("x", "y") + $label_names=vector("x", "y") ]); global system_sensor_temp_gf = Telemetry::register_gauge_family([ @@ -39,7 +39,7 @@ global system_sensor_temp_gf = Telemetry::register_gauge_family([ $name="sensor_temperature", $unit="celsius", $help_text="Temperatures reported by sensors in the system", - $labels=vector("name") + $label_names=vector("name") ]); global btest_sample_histogram_hf = Telemetry::register_histogram_family([ @@ -48,7 +48,7 @@ global btest_sample_histogram_hf = Telemetry::register_histogram_family([ $unit="", $help_text="A sample histogram that is not returned by Telemetry::collect_metrics", $bounds=vector(1.0, 2.0, 3.0, 4.0, 5.0), - $labels=vector("dim") + $label_names=vector("dim") ]); function print_metrics(what: string, metrics: vector of Telemetry::Metric) @@ -57,7 +57,7 @@ function print_metrics(what: string, metrics: vector of Telemetry::Metric) for (i in metrics) { local m = metrics[i]; - print m$opts$metric_type, m$opts$prefix, m$opts$name, m$opts$labels, m$labels, m$value; + print m$opts$metric_type, m$opts$prefix, m$opts$name, m$label_names, m$label_values, m$value; } } @@ -67,7 +67,7 @@ function print_histogram_metrics(what: string, metrics: vector of Telemetry::His for (i in metrics) { local m = metrics[i]; - print m$opts$metric_type, m$opts$prefix, m$opts$name, m$opts$bounds, m$opts$labels, m$labels, m$values, m$sum, m$observations; + print m$opts$metric_type, m$opts$prefix, m$opts$name, m$opts$bounds, m$label_names, m$label_values, m$values, m$sum, m$observations; } } diff --git a/testing/btest/scripts/base/frameworks/telemetry/conn-duration-histogram.zeek b/testing/btest/scripts/base/frameworks/telemetry/conn-duration-histogram.zeek index b892f5740c..3f01d9ddf3 100644 --- a/testing/btest/scripts/base/frameworks/telemetry/conn-duration-histogram.zeek +++ b/testing/btest/scripts/base/frameworks/telemetry/conn-duration-histogram.zeek @@ -18,10 +18,10 @@ global connection_duration_hf = Telemetry::register_histogram_family([ global realistic_connection_duration_hf = Telemetry::register_histogram_family([ $prefix="zeek", $name="realistic_connection_duration", - $labels=vector("proto"), + $label_names=vector("proto"), $unit="seconds", $help_text="Monitored connection durations by protocol", - $bounds=vector(0.1, 1.0, 10.0, 30.0, 60.0, 120.0, 300, 900.0, 1800.0) + $bounds=vector(0.1, 1.0, 10.0, 30.0, 60.0, 120.0, 300, 900.0, 1800.0), ]); global connection_duration_h = Telemetry::histogram_with(connection_duration_hf); @@ -42,8 +42,8 @@ event zeek_done() &priority=-100 { local hm = histogram_metrics[i]; print hm$opts$metric_type, hm$opts$prefix, hm$opts$name; - print hm$opts$labels; - print hm$labels; + print hm$label_names; + print hm$label_values; print hm$opts$bounds; print hm$values; print hm$observations, hm$sum; diff --git a/testing/btest/scripts/base/frameworks/telemetry/event-handler-invocations.zeek b/testing/btest/scripts/base/frameworks/telemetry/event-handler-invocations.zeek index 5060c357a8..c0a9c73b2d 100644 --- a/testing/btest/scripts/base/frameworks/telemetry/event-handler-invocations.zeek +++ b/testing/btest/scripts/base/frameworks/telemetry/event-handler-invocations.zeek @@ -16,7 +16,7 @@ event zeek_done() &priority=-100 local ms = Telemetry::collect_metrics("zeek", "event_handler_invocations"); for ( _, m in ms ) { - if ( /zeek_.*|connection_.*/ in cat(m$labels)) - print m$opts$prefix, m$opts$name, m$labels, m$value; + if ( /zeek_.*|connection_.*/ in cat(m$label_values)) + print m$opts$prefix, m$opts$name, m$label_values, m$value; } } diff --git a/testing/btest/scripts/base/utils/json.test b/testing/btest/scripts/base/utils/json.test index 30e8e201ba..da741102fe 100644 --- a/testing/btest/scripts/base/utils/json.test +++ b/testing/btest/scripts/base/utils/json.test @@ -141,7 +141,7 @@ event zeek_init() $name="btest_testing_gauge", $unit="", $help_text="Btest testing", - $labels=vector("dim_1"), + $label_names=vector("dim_1"), ]); local gauge = Telemetry::gauge_with(gauge_family, vector("dim_1_value")); print to_json(gauge); @@ -152,7 +152,7 @@ event zeek_init() $name="btest_testing_counter", $unit="", $help_text="Btest testing", - $labels=vector("dim_1"), + $label_names=vector("dim_1"), ]); local counter = Telemetry::counter_with(counter_family, vector("dim_1_value")); print to_json(counter); diff --git a/testing/btest/scripts/policy/frameworks/telemetry/log-prefixes.zeek b/testing/btest/scripts/policy/frameworks/telemetry/log-prefixes.zeek index 8c208fd9b5..0752c605e2 100644 --- a/testing/btest/scripts/policy/frameworks/telemetry/log-prefixes.zeek +++ b/testing/btest/scripts/policy/frameworks/telemetry/log-prefixes.zeek @@ -13,7 +13,7 @@ global connections_by_proto_cf = Telemetry::register_counter_family([ $name="connections", $unit="", $help_text="Total number of monitored connections", - $labels=vector("proto") + $label_names=vector("proto") ]); global connection_duration_hf = Telemetry::register_histogram_family([