Merge remote-tracking branch 'origin/topic/awelzel/telemetry-endpoint-to-node-rename'

* origin/topic/awelzel/telemetry-endpoint-to-node-rename:
  telemetry: Rename endpoint label to node label
This commit is contained in:
Arne Welzel 2025-06-25 09:33:46 +02:00
commit 4b472f2771
9 changed files with 52 additions and 20 deletions

11
CHANGES
View file

@ -1,3 +1,14 @@
8.0.0-dev.517 | 2025-06-25 09:33:46 +0200
* telemetry: Rename endpoint label to node label (Arne Welzel, Corelight)
Using a label named "endpoint" is not intuitive and requires explaining to
users that it's really just the Cluster::node value. Change the label to
"node", so that we don't need to do the explaining.
This probably breaks some existing users of the Prometheus metrics, but after
looking more at metrics recently, "endpoint" really is a thorn in my eye.
8.0.0-dev.514 | 2025-06-24 15:38:18 -0700
* Add get_tags_by_category BIF method (bhaskarbhar)

11
NEWS
View file

@ -36,6 +36,17 @@ Breaking Changes
redef LogAscii::json_timestamps = JSON::TS_MILLIS_UNSIGNED;
- The "endpoint" label of metrics exposed via Prometheus or the ``telemetry.log``
was renamed to "node". This is done for consistency with cluster terminology:
The label values have always been the value of ``Cluster::node`, so it's more intuitive
to call it. The "endpoint" name originated from a time when the telemetry framework
was implemented in Broker.
To revert to the "endpoint" label, you can do the following, but we strongly
suggest to migrate to the new default "node" instead:
redef Telemetry::metrics_endpoint_label = "endpoint";
- The ``current_event_time()`` builtin function as well as ``Event::Time()``
and ``EventMgr::CurrentEventTime()`` now return ``-1.0`` if no timestamp
metadata is available for the current event, or if no event is being

View file

@ -1 +1 @@
8.0.0-dev.514
8.0.0-dev.517

View file

@ -15,6 +15,13 @@ export {
## HTTP. The default value means Zeek won't expose the port.
const metrics_port = 0/unknown &redef;
## Every metric automatically receives a label with the following name
## and the metrics_endpoint_name as value to identify the originating
## cluster node.
## The label was previously hard-code as "endpoint", and that's why
## the variable is called the way it is, but "node" is the better label.
const metrics_endpoint_label = "node" &redef;
## ID for the metrics exporter. This is used as the 'endpoint' label
## value when exporting data to Prometheus. In a cluster setup, this
## defaults to the name of the node in the cluster configuration.

View file

@ -43,18 +43,21 @@ std::string BuildFullPrometheusName(std::string_view prefix, std::string_view na
prometheus::Labels BuildPrometheusLabels(Span<const LabelView> labels) {
prometheus::Labels p_labels;
bool found_endpoint = false;
static std::string metrics_endpoint_label =
id::find_val<zeek::StringVal>("Telemetry::metrics_endpoint_label")->ToStdString();
static std::string metrics_endpoint_name =
id::find_val<zeek::StringVal>("Telemetry::metrics_endpoint_name")->ToStdString();
bool found_endpoint_label = false;
for ( const auto& lbl : labels ) {
p_labels.emplace(util::strreplace(std::string{lbl.first}, "-", "_"), lbl.second);
if ( lbl.first == "endpoint" )
found_endpoint = true;
if ( lbl.first == metrics_endpoint_label )
found_endpoint_label = true;
}
if ( ! found_endpoint ) {
auto endpoint = id::find_val("Telemetry::metrics_endpoint_name")->AsStringVal();
if ( endpoint && endpoint->Len() > 0 )
p_labels.emplace("endpoint", endpoint->ToStdString());
}
if ( ! found_endpoint_label && ! metrics_endpoint_label.empty() && ! metrics_endpoint_name.empty() )
p_labels.emplace(metrics_endpoint_label, metrics_endpoint_name);
return p_labels;
}

View file

@ -1,4 +1,4 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
Telemetry::COUNTER, zeek, zeek_broker_peer_buffer_overflows_total, [endpoint, peer], [manager, worker-1]
Telemetry::GAUGE, zeek, zeek_broker_peer_buffer_messages, [endpoint, peer], [manager, worker-1]
Telemetry::GAUGE, zeek, zeek_broker_peer_buffer_recent_max_messages, [endpoint, peer], [manager, worker-1]
Telemetry::COUNTER, zeek, zeek_broker_peer_buffer_overflows_total, [node, peer], [manager, worker-1]
Telemetry::GAUGE, zeek, zeek_broker_peer_buffer_messages, [node, peer], [manager, worker-1]
Telemetry::GAUGE, zeek, zeek_broker_peer_buffer_recent_max_messages, [node, peer], [manager, worker-1]

View file

@ -1,4 +1,4 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
Telemetry::COUNTER, zeek, zeek_broker_peer_buffer_overflows_total, [endpoint, peer], [worker-1, manager]
Telemetry::GAUGE, zeek, zeek_broker_peer_buffer_messages, [endpoint, peer], [worker-1, manager]
Telemetry::GAUGE, zeek, zeek_broker_peer_buffer_recent_max_messages, [endpoint, peer], [worker-1, manager]
Telemetry::COUNTER, zeek, zeek_broker_peer_buffer_overflows_total, [node, peer], [worker-1, manager]
Telemetry::GAUGE, zeek, zeek_broker_peer_buffer_messages, [node, peer], [worker-1, manager]
Telemetry::GAUGE, zeek, zeek_broker_peer_buffer_recent_max_messages, [node, peer], [worker-1, manager]

View file

@ -1,5 +1,5 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
endpoint="manager"
endpoint="logger-1"
endpoint="proxy-1"
endpoint="worker-1"
node="manager"
node="logger-1"
node="proxy-1"
node="worker-1"

View file

@ -47,7 +47,7 @@ services_data=$(curl -s -m 5 ${services_url})
for host in $(echo ${services_data} | jq -r '.[0].targets[]' | sort); do
metrics=$(curl -m 5 --trace trace-${host}.out http://${host}/metrics)
if [ $? -eq 0 ] ; then
version_info=$(echo ${metrics} | grep -Eo "zeek_version_info\{[^}]+\}" | grep -o 'endpoint="[^"]*"')
version_info=$(echo ${metrics} | grep -Eo "zeek_version_info\{[^}]+\}" | grep -o 'node="[^"]*"')
echo ${version_info} >> ${output_file};
else
echo "Failed to request data from ${host}" >> ${output_file}