telemetry: Rename endpoint label to node label

Using a label named "endpoint" is not intuitive and requires explaining to
users that it's really just the Cluster::node value. Change the label to
"node", so that we don't need to do the explaining.

This probably breaks some existing users of the Prometheus metrics, but after
looking more at metrics recently, "endpoint" really is a thorn in my eye.
This commit is contained in:
Arne Welzel 2025-06-06 11:18:07 +02:00
parent 8189716adc
commit eea194ddd8
7 changed files with 41 additions and 19 deletions

12
NEWS
View file

@ -48,6 +48,18 @@ Breaking Changes
redef EventMetadata::add_missing_remote_network_timestamp = T; redef EventMetadata::add_missing_remote_network_timestamp = T;
- The "endpoint" label of metrics exposed via Prometheus or the ``telemetry.log``
was renamed to "node". This is done for consistency with cluster terminology:
The label values have always been the value of ``Cluster::node`, so it's more intuitive
to call it. The "endpoint" name originated from a time when the telemetry framework
was implemented in Broker.
To revert to the "endpoint" label, you can do the following, but we strongly
suggest to migrate to the new default "node" instead:
redef Telemetry::metrics_endpoint_label = "endpoint";
New Functionality New Functionality
----------------- -----------------

View file

@ -15,6 +15,13 @@ export {
## HTTP. The default value means Zeek won't expose the port. ## HTTP. The default value means Zeek won't expose the port.
const metrics_port = 0/unknown &redef; const metrics_port = 0/unknown &redef;
## Every metric automatically receives a label with the following name
## and the metrics_endpoint_name as value to identify the originating
## cluster node.
## The label was previously hard-code as "endpoint", and that's why
## the variable is called the way it is, but "node" is the better label.
const metrics_endpoint_label = "node" &redef;
## ID for the metrics exporter. This is used as the 'endpoint' label ## ID for the metrics exporter. This is used as the 'endpoint' label
## value when exporting data to Prometheus. In a cluster setup, this ## value when exporting data to Prometheus. In a cluster setup, this
## defaults to the name of the node in the cluster configuration. ## defaults to the name of the node in the cluster configuration.

View file

@ -43,18 +43,21 @@ std::string BuildFullPrometheusName(std::string_view prefix, std::string_view na
prometheus::Labels BuildPrometheusLabels(Span<const LabelView> labels) { prometheus::Labels BuildPrometheusLabels(Span<const LabelView> labels) {
prometheus::Labels p_labels; prometheus::Labels p_labels;
bool found_endpoint = false; static std::string metrics_endpoint_label =
id::find_val<zeek::StringVal>("Telemetry::metrics_endpoint_label")->ToStdString();
static std::string metrics_endpoint_name =
id::find_val<zeek::StringVal>("Telemetry::metrics_endpoint_name")->ToStdString();
bool found_endpoint_label = false;
for ( const auto& lbl : labels ) { for ( const auto& lbl : labels ) {
p_labels.emplace(util::strreplace(std::string{lbl.first}, "-", "_"), lbl.second); p_labels.emplace(util::strreplace(std::string{lbl.first}, "-", "_"), lbl.second);
if ( lbl.first == "endpoint" ) if ( lbl.first == metrics_endpoint_label )
found_endpoint = true; found_endpoint_label = true;
} }
if ( ! found_endpoint ) { if ( ! found_endpoint_label && ! metrics_endpoint_label.empty() && ! metrics_endpoint_name.empty() )
auto endpoint = id::find_val("Telemetry::metrics_endpoint_name")->AsStringVal(); p_labels.emplace(metrics_endpoint_label, metrics_endpoint_name);
if ( endpoint && endpoint->Len() > 0 )
p_labels.emplace("endpoint", endpoint->ToStdString());
}
return p_labels; return p_labels;
} }

View file

@ -1,4 +1,4 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
Telemetry::COUNTER, zeek, zeek_broker_peer_buffer_overflows_total, [endpoint, peer], [manager, worker-1] Telemetry::COUNTER, zeek, zeek_broker_peer_buffer_overflows_total, [node, peer], [manager, worker-1]
Telemetry::GAUGE, zeek, zeek_broker_peer_buffer_messages, [endpoint, peer], [manager, worker-1] Telemetry::GAUGE, zeek, zeek_broker_peer_buffer_messages, [node, peer], [manager, worker-1]
Telemetry::GAUGE, zeek, zeek_broker_peer_buffer_recent_max_messages, [endpoint, peer], [manager, worker-1] Telemetry::GAUGE, zeek, zeek_broker_peer_buffer_recent_max_messages, [node, peer], [manager, worker-1]

View file

@ -1,4 +1,4 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
Telemetry::COUNTER, zeek, zeek_broker_peer_buffer_overflows_total, [endpoint, peer], [worker-1, manager] Telemetry::COUNTER, zeek, zeek_broker_peer_buffer_overflows_total, [node, peer], [worker-1, manager]
Telemetry::GAUGE, zeek, zeek_broker_peer_buffer_messages, [endpoint, peer], [worker-1, manager] Telemetry::GAUGE, zeek, zeek_broker_peer_buffer_messages, [node, peer], [worker-1, manager]
Telemetry::GAUGE, zeek, zeek_broker_peer_buffer_recent_max_messages, [endpoint, peer], [worker-1, manager] Telemetry::GAUGE, zeek, zeek_broker_peer_buffer_recent_max_messages, [node, peer], [worker-1, manager]

View file

@ -1,5 +1,5 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
endpoint="manager" node="manager"
endpoint="logger-1" node="logger-1"
endpoint="proxy-1" node="proxy-1"
endpoint="worker-1" node="worker-1"

View file

@ -47,7 +47,7 @@ services_data=$(curl -s -m 5 ${services_url})
for host in $(echo ${services_data} | jq -r '.[0].targets[]' | sort); do for host in $(echo ${services_data} | jq -r '.[0].targets[]' | sort); do
metrics=$(curl -m 5 --trace trace-${host}.out http://${host}/metrics) metrics=$(curl -m 5 --trace trace-${host}.out http://${host}/metrics)
if [ $? -eq 0 ] ; then if [ $? -eq 0 ] ; then
version_info=$(echo ${metrics} | grep -Eo "zeek_version_info\{[^}]+\}" | grep -o 'endpoint="[^"]*"') version_info=$(echo ${metrics} | grep -Eo "zeek_version_info\{[^}]+\}" | grep -o 'node="[^"]*"')
echo ${version_info} >> ${output_file}; echo ${version_info} >> ${output_file};
else else
echo "Failed to request data from ${host}" >> ${output_file} echo "Failed to request data from ${host}" >> ${output_file}