mirror of
https://github.com/zeek/zeek.git
synced 2025-10-13 03:58:20 +00:00
Merge branch 'topic/christian/fix-broker-peering-overflows-metric'
* topic/christian/fix-broker-peering-overflows-metric:
Bugfix: accurately track Broker buffer overflows w/ multiple peerings
(cherry picked from commit 8d79429555
)
This commit is contained in:
parent
589f146549
commit
61f094e928
3 changed files with 51 additions and 10 deletions
6
CHANGES
6
CHANGES
|
@ -1,3 +1,9 @@
|
||||||
|
7.2.0-rc1.19 | 2025-05-08 15:14:29 -0700
|
||||||
|
|
||||||
|
* Bugfix: accurately track Broker buffer overflows w/ multiple peerings (Christian Kreibich, Corelight)
|
||||||
|
|
||||||
|
(cherry picked from commit 8d7942955573673a1eedd98d027a1efcaec485c8)
|
||||||
|
|
||||||
7.2.0-rc1.18 | 2025-05-08 14:47:17 -0700
|
7.2.0-rc1.18 | 2025-05-08 14:47:17 -0700
|
||||||
|
|
||||||
* Bump auxil/spicy to latest release (Benjamin Bannier, Corelight)
|
* Bump auxil/spicy to latest release (Benjamin Bannier, Corelight)
|
||||||
|
|
2
VERSION
2
VERSION
|
@ -1 +1 @@
|
||||||
7.2.0-rc1.18
|
7.2.0-rc1.19
|
||||||
|
|
|
@ -44,26 +44,61 @@ global broker_peer_buffer_overflows_cf = Telemetry::register_counter_family([
|
||||||
$help_text="Number of overflows in Broker's send buffers",
|
$help_text="Number of overflows in Broker's send buffers",
|
||||||
]);
|
]);
|
||||||
|
|
||||||
|
|
||||||
|
# A helper to track overflow counts over past peerings as well as the current
|
||||||
|
# one. The peer_id field allows us to identify when the counter has reset: a
|
||||||
|
# Broker ID different from the one on file means it's a new peering.
|
||||||
|
type EpochData: record {
|
||||||
|
peer_id: string;
|
||||||
|
num_overflows: count &default=0;
|
||||||
|
num_past_overflows: count &default=0;
|
||||||
|
};
|
||||||
|
|
||||||
|
# This maps from a cluster node name to its EpochData.
|
||||||
|
global peering_epoch_data: table[string] of EpochData;
|
||||||
|
|
||||||
hook Telemetry::sync()
|
hook Telemetry::sync()
|
||||||
{
|
{
|
||||||
local peers = Broker::peering_stats();
|
local peers = Broker::peering_stats();
|
||||||
local nn: NamedNode;
|
local nn: NamedNode;
|
||||||
|
local labels: vector of string;
|
||||||
|
local ed: EpochData;
|
||||||
|
|
||||||
for ( peer, stats in peers )
|
for ( peer_id, stats in peers )
|
||||||
{
|
{
|
||||||
# Translate the Broker IDs to Zeek-level node names. We skip
|
# Translate the Broker IDs to Zeek-level node names. We skip
|
||||||
# telemetry for peers where this mapping fails, i.e. ones for
|
# telemetry for peers where this mapping fails, i.e. ones for
|
||||||
# connections to external systems.
|
# connections to external systems.
|
||||||
nn = nodeid_to_node(peer);
|
nn = nodeid_to_node(peer_id);
|
||||||
|
|
||||||
|
if ( |nn$name| == 0 )
|
||||||
|
next;
|
||||||
|
|
||||||
|
labels = vector(nn$name);
|
||||||
|
|
||||||
if ( |nn$name| > 0 )
|
|
||||||
{
|
|
||||||
Telemetry::gauge_family_set(broker_peer_buffer_messages_gf,
|
Telemetry::gauge_family_set(broker_peer_buffer_messages_gf,
|
||||||
vector(nn$name), stats$num_queued);
|
labels, stats$num_queued);
|
||||||
Telemetry::gauge_family_set(broker_peer_buffer_recent_max_messages_gf,
|
Telemetry::gauge_family_set(broker_peer_buffer_recent_max_messages_gf,
|
||||||
vector(nn$name), stats$max_queued_recently);
|
labels, stats$max_queued_recently);
|
||||||
|
|
||||||
|
if ( nn$name !in peering_epoch_data )
|
||||||
|
peering_epoch_data[nn$name] = EpochData($peer_id=peer_id);
|
||||||
|
|
||||||
|
ed = peering_epoch_data[nn$name];
|
||||||
|
|
||||||
|
if ( peer_id != ed$peer_id )
|
||||||
|
{
|
||||||
|
# A new peering. Ensure that we account for overflows in
|
||||||
|
# past ones. There is a risk here that we might have
|
||||||
|
# missed a peering altogether if we scrape infrequently,
|
||||||
|
# but re-peering should be a rare event.
|
||||||
|
ed$peer_id = peer_id;
|
||||||
|
ed$num_past_overflows += ed$num_overflows;
|
||||||
|
}
|
||||||
|
|
||||||
|
ed$num_overflows = stats$num_overflows;
|
||||||
|
|
||||||
Telemetry::counter_family_set(broker_peer_buffer_overflows_cf,
|
Telemetry::counter_family_set(broker_peer_buffer_overflows_cf,
|
||||||
vector(nn$name), stats$num_overflows);
|
labels, ed$num_past_overflows + ed$num_overflows);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue