mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 06:38:20 +00:00

While we support initializing records via coercion from an expression list, e.g., local x: X = [$x1=1, $x2=2]; this can sometimes obscure the code to readers, e.g., when assigning to value declared and typed elsewhere. The language runtime has a similar overhead since instead of just constructing a known type it needs to check at runtime that the coercion from the expression list is valid; this can be slower than just writing the readible code in the first place, see #4559. With this patch we use explicit construction, e.g., local x = X($x1=1, $x2=2);
104 lines
3.8 KiB
Text
104 lines
3.8 KiB
Text
# Additional Broker-specific metrics that use Zeek cluster-level node names.
|
|
|
|
@load base/frameworks/telemetry
|
|
|
|
module Cluster;
|
|
|
|
## This gauge tracks the current number of locally queued messages in each
|
|
## Broker peering's send buffer. The "peer" label identifies the remote side of
|
|
## the peering, containing a Zeek cluster node name.
|
|
global broker_peer_buffer_messages_gf = Telemetry::register_gauge_family(Telemetry::MetricOpts(
|
|
$prefix="zeek",
|
|
$name="broker-peer-buffer-messages",
|
|
$unit="",
|
|
$label_names=vector("peer"),
|
|
$help_text="Number of messages queued in Broker's send buffers",
|
|
));
|
|
|
|
## This gauge tracks recent maximum queue lengths for each Broker peering's send
|
|
## buffer. Most of the time the send buffers are nearly empty, so this gauge
|
|
## helps understand recent bursts of messages. "Recent" here means
|
|
## :zeek:see:`Broker::buffer_stats_reset_interval`. The time window advances in
|
|
## increments of at least the stats interval, not incrementally with every new
|
|
## observed message. That is, Zeek keeps a timestamp of when the window started,
|
|
## and once it notices that the interval has passed, it moves the start of the
|
|
## window to current time.
|
|
global broker_peer_buffer_recent_max_messages_gf = Telemetry::register_gauge_family(Telemetry::MetricOpts(
|
|
$prefix="zeek",
|
|
$name="broker-peer-buffer-recent-max-messages",
|
|
$unit="",
|
|
$label_names=vector("peer"),
|
|
$help_text="Maximum number of messages recently queued in Broker's send buffers",
|
|
));
|
|
|
|
## This counter tracks for each Broker peering the number of times its send
|
|
## buffer has overflowed. For the "disconnect" policy this can at most be 1,
|
|
## since Broker stops the peering at this time. For the "drop_oldest" and
|
|
## "drop_newest" policies (see :zeek:see:`Broker:peer_overflow_policy`) the count
|
|
## instead reflects the number of messages lost.
|
|
global broker_peer_buffer_overflows_cf = Telemetry::register_counter_family(Telemetry::MetricOpts(
|
|
$prefix="zeek",
|
|
$name="broker-peer-buffer-overflows",
|
|
$unit="",
|
|
$label_names=vector("peer"),
|
|
$help_text="Number of overflows in Broker's send buffers",
|
|
));
|
|
|
|
|
|
# A helper to track overflow counts over past peerings as well as the current
|
|
# one. The peer_id field allows us to identify when the counter has reset: a
|
|
# Broker ID different from the one on file means it's a new peering.
|
|
type EpochData: record {
|
|
peer_id: string;
|
|
num_overflows: count &default=0;
|
|
num_past_overflows: count &default=0;
|
|
};
|
|
|
|
# This maps from a cluster node name to its EpochData.
|
|
global peering_epoch_data: table[string] of EpochData;
|
|
|
|
hook Telemetry::sync()
|
|
{
|
|
local peers = Broker::peering_stats();
|
|
local nn: NamedNode;
|
|
local labels: vector of string;
|
|
local ed: EpochData;
|
|
|
|
for ( peer_id, stats in peers )
|
|
{
|
|
# Translate the Broker IDs to Zeek-level node names. We skip
|
|
# telemetry for peers where this mapping fails, i.e. ones for
|
|
# connections to external systems.
|
|
nn = nodeid_to_node(peer_id);
|
|
|
|
if ( |nn$name| == 0 )
|
|
next;
|
|
|
|
labels = vector(nn$name);
|
|
|
|
Telemetry::gauge_family_set(broker_peer_buffer_messages_gf,
|
|
labels, stats$num_queued);
|
|
Telemetry::gauge_family_set(broker_peer_buffer_recent_max_messages_gf,
|
|
labels, stats$max_queued_recently);
|
|
|
|
if ( nn$name !in peering_epoch_data )
|
|
peering_epoch_data[nn$name] = EpochData($peer_id=peer_id);
|
|
|
|
ed = peering_epoch_data[nn$name];
|
|
|
|
if ( peer_id != ed$peer_id )
|
|
{
|
|
# A new peering. Ensure that we account for overflows in
|
|
# past ones. There is a risk here that we might have
|
|
# missed a peering altogether if we scrape infrequently,
|
|
# but re-peering should be a rare event.
|
|
ed$peer_id = peer_id;
|
|
ed$num_past_overflows += ed$num_overflows;
|
|
}
|
|
|
|
ed$num_overflows = stats$num_overflows;
|
|
|
|
Telemetry::counter_family_set(broker_peer_buffer_overflows_cf,
|
|
labels, ed$num_past_overflows + ed$num_overflows);
|
|
}
|
|
}
|