Add cluster framework telemetry for Broker's send-buffer use

This hooks into Telemetry::sync() to update Broker-level metrics tracking the
peerings' send buffer state. We do this in the cluster framework so we can label
the resulting metrics with Zeek cluster node names, not Broker's endpoint IDs.
This commit is contained in:
Christian Kreibich 2025-04-16 18:00:28 -07:00
parent 0a05a9aa99
commit 1bea3121db
4 changed files with 66 additions and 1 deletions

View file

@ -391,7 +391,18 @@ export {
## Returns: a unique identifier for the local broker endpoint.
global node_id: function(): string;
## Obtain each peering's send-buffer fill level. The keys are Broker
## endpoint IDs.
##
## Returns: number of messages queued for sending, for each peering.
global peer_buffer_levels: function(): table[string] of count;
## Obtain each peering's number of send-buffer overflows. The keys are
## Broker endpoint IDs. Note that for buffer policy "disconnect", these
## overflows are short-lived, since Broker will remove those peerings
## upon overflow.
##
## Returns: number of send-buffer overflows, for each peering.
global peer_buffer_overflows: function(): table[string] of count;
## Sends all pending log messages to remote peers. This normally

View file

@ -14,8 +14,11 @@ redef Broker::log_topic = Cluster::rr_log_topic;
# Add a cluster prefix.
@prefixes += cluster
# This should soon condition on loading only when Broker is in use.
# Broker-specific additions:
@if ( Cluster::backend == Cluster::CLUSTER_BACKEND_BROKER )
@load ./broker-backpressure
@load ./broker-telemetry
@endif
@if ( Supervisor::is_supervised() )
# When running a supervised cluster, populate Cluster::nodes from the node table

View file

@ -0,0 +1,50 @@
# Additional Broker-specific metrics that use Zeek cluster-level node names.
@load base/frameworks/telemetry
module Cluster;
global broker_peer_buffer_levels_gf = Telemetry::register_gauge_family([
$prefix="zeek",
$name="broker-peer-buffer-levels",
$unit="",
$label_names=vector("peer"),
$help_text="Number of messages queued in Broker's per-peer send buffers",
]);
global broker_peer_buffer_overflows_cf = Telemetry::register_counter_family([
$prefix="zeek",
$name="broker-peer-buffer-overflows",
$unit="",
$label_names=vector("peer"),
$help_text="Number of overflows in Broker's per-peer send buffers",
]);
hook Telemetry::sync()
{
local peers: table[string] of count = Broker::peer_buffer_levels();
local nn: NamedNode;
for ( peer, level in peers )
{
# Translate the Broker IDs to Zeek-level node names. We skip
# telemetry for peers where this mapping fails, i.e. ones for
# connections to external systems.
nn = nodeid_to_node(peer);
if ( |nn$name| > 0 )
Telemetry::gauge_family_set(broker_peer_buffer_levels_gf,
vector(nn$name), level);
}
peers = Broker::peer_buffer_overflows();
for ( peer, overflows in peers )
{
nn = nodeid_to_node(peer);
if ( |nn$name| > 0 )
Telemetry::counter_family_set(broker_peer_buffer_overflows_cf,
vector(nn$name), overflows);
}
}

View file

@ -1,6 +1,7 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
-./frameworks/cluster/broker-backpressure.zeek
-./frameworks/cluster/broker-stores.zeek
-./frameworks/cluster/broker-telemetry.zeek
-./frameworks/cluster/nodes/logger.zeek
-./frameworks/cluster/nodes/manager.zeek
-./frameworks/cluster/nodes/proxy.zeek