diff --git a/NEWS b/NEWS index 89630a0c9a..29b4656731 100644 --- a/NEWS +++ b/NEWS @@ -48,6 +48,18 @@ Breaking Changes redef EventMetadata::add_missing_remote_network_timestamp = T; +- The "endpoint" label of metrics exposed via Prometheus or the ``telemetry.log`` + was renamed to "node". This is done for consistency with cluster terminology: + The label values have always been the value of ``Cluster::node`, so it's more intuitive + to call it. The "endpoint" name originated from a time when the telemetry framework + was implemented in Broker. + + To revert to the "endpoint" label, you can do the following, but we strongly + suggest to migrate to the new default "node" instead: + + redef Telemetry::metrics_endpoint_label = "endpoint"; + + New Functionality ----------------- diff --git a/scripts/base/frameworks/telemetry/options.zeek b/scripts/base/frameworks/telemetry/options.zeek index 29ad4631d3..00453446ad 100644 --- a/scripts/base/frameworks/telemetry/options.zeek +++ b/scripts/base/frameworks/telemetry/options.zeek @@ -15,6 +15,13 @@ export { ## HTTP. The default value means Zeek won't expose the port. const metrics_port = 0/unknown &redef; + ## Every metric automatically receives a label with the following name + ## and the metrics_endpoint_name as value to identify the originating + ## cluster node. + ## The label was previously hard-code as "endpoint", and that's why + ## the variable is called the way it is, but "node" is the better label. + const metrics_endpoint_label = "node" &redef; + ## ID for the metrics exporter. This is used as the 'endpoint' label ## value when exporting data to Prometheus. In a cluster setup, this ## defaults to the name of the node in the cluster configuration. diff --git a/src/telemetry/Utils.cc b/src/telemetry/Utils.cc index c7cca4ec84..8aa8e33872 100644 --- a/src/telemetry/Utils.cc +++ b/src/telemetry/Utils.cc @@ -43,18 +43,21 @@ std::string BuildFullPrometheusName(std::string_view prefix, std::string_view na prometheus::Labels BuildPrometheusLabels(Span labels) { prometheus::Labels p_labels; - bool found_endpoint = false; + static std::string metrics_endpoint_label = + id::find_val("Telemetry::metrics_endpoint_label")->ToStdString(); + + static std::string metrics_endpoint_name = + id::find_val("Telemetry::metrics_endpoint_name")->ToStdString(); + + bool found_endpoint_label = false; for ( const auto& lbl : labels ) { p_labels.emplace(util::strreplace(std::string{lbl.first}, "-", "_"), lbl.second); - if ( lbl.first == "endpoint" ) - found_endpoint = true; + if ( lbl.first == metrics_endpoint_label ) + found_endpoint_label = true; } - if ( ! found_endpoint ) { - auto endpoint = id::find_val("Telemetry::metrics_endpoint_name")->AsStringVal(); - if ( endpoint && endpoint->Len() > 0 ) - p_labels.emplace("endpoint", endpoint->ToStdString()); - } + if ( ! found_endpoint_label && ! metrics_endpoint_label.empty() && ! metrics_endpoint_name.empty() ) + p_labels.emplace(metrics_endpoint_label, metrics_endpoint_name); return p_labels; } diff --git a/testing/btest/Baseline/broker.telemetry/manager.out b/testing/btest/Baseline/broker.telemetry/manager.out index a48dbf580b..e2ddc10fa7 100644 --- a/testing/btest/Baseline/broker.telemetry/manager.out +++ b/testing/btest/Baseline/broker.telemetry/manager.out @@ -1,4 +1,4 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -Telemetry::COUNTER, zeek, zeek_broker_peer_buffer_overflows_total, [endpoint, peer], [manager, worker-1] -Telemetry::GAUGE, zeek, zeek_broker_peer_buffer_messages, [endpoint, peer], [manager, worker-1] -Telemetry::GAUGE, zeek, zeek_broker_peer_buffer_recent_max_messages, [endpoint, peer], [manager, worker-1] +Telemetry::COUNTER, zeek, zeek_broker_peer_buffer_overflows_total, [node, peer], [manager, worker-1] +Telemetry::GAUGE, zeek, zeek_broker_peer_buffer_messages, [node, peer], [manager, worker-1] +Telemetry::GAUGE, zeek, zeek_broker_peer_buffer_recent_max_messages, [node, peer], [manager, worker-1] diff --git a/testing/btest/Baseline/broker.telemetry/worker-1.out b/testing/btest/Baseline/broker.telemetry/worker-1.out index a2e34b9a04..11d72c8e35 100644 --- a/testing/btest/Baseline/broker.telemetry/worker-1.out +++ b/testing/btest/Baseline/broker.telemetry/worker-1.out @@ -1,4 +1,4 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -Telemetry::COUNTER, zeek, zeek_broker_peer_buffer_overflows_total, [endpoint, peer], [worker-1, manager] -Telemetry::GAUGE, zeek, zeek_broker_peer_buffer_messages, [endpoint, peer], [worker-1, manager] -Telemetry::GAUGE, zeek, zeek_broker_peer_buffer_recent_max_messages, [endpoint, peer], [worker-1, manager] +Telemetry::COUNTER, zeek, zeek_broker_peer_buffer_overflows_total, [node, peer], [worker-1, manager] +Telemetry::GAUGE, zeek, zeek_broker_peer_buffer_messages, [node, peer], [worker-1, manager] +Telemetry::GAUGE, zeek, zeek_broker_peer_buffer_recent_max_messages, [node, peer], [worker-1, manager] diff --git a/testing/btest/Baseline/scripts.policy.frameworks.telemetry.prometheus/manager.services.out b/testing/btest/Baseline/scripts.policy.frameworks.telemetry.prometheus/manager.services.out index eab828e89f..980ee38d03 100644 --- a/testing/btest/Baseline/scripts.policy.frameworks.telemetry.prometheus/manager.services.out +++ b/testing/btest/Baseline/scripts.policy.frameworks.telemetry.prometheus/manager.services.out @@ -1,5 +1,5 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -endpoint="manager" -endpoint="logger-1" -endpoint="proxy-1" -endpoint="worker-1" +node="manager" +node="logger-1" +node="proxy-1" +node="worker-1" diff --git a/testing/btest/scripts/policy/frameworks/telemetry/prometheus.zeek b/testing/btest/scripts/policy/frameworks/telemetry/prometheus.zeek index 8f15087403..9b09784e1f 100644 --- a/testing/btest/scripts/policy/frameworks/telemetry/prometheus.zeek +++ b/testing/btest/scripts/policy/frameworks/telemetry/prometheus.zeek @@ -47,7 +47,7 @@ services_data=$(curl -s -m 5 ${services_url}) for host in $(echo ${services_data} | jq -r '.[0].targets[]' | sort); do metrics=$(curl -m 5 --trace trace-${host}.out http://${host}/metrics) if [ $? -eq 0 ] ; then - version_info=$(echo ${metrics} | grep -Eo "zeek_version_info\{[^}]+\}" | grep -o 'endpoint="[^"]*"') + version_info=$(echo ${metrics} | grep -Eo "zeek_version_info\{[^}]+\}" | grep -o 'node="[^"]*"') echo ${version_info} >> ${output_file}; else echo "Failed to request data from ${host}" >> ${output_file}