mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 14:48:21 +00:00
Add backpressure disconnect notification to cluster.log and via telemetry
This adds a Broker-specific script to the cluster framework, loaded only when Zeek is running in cluster mode. It adds logging in cluster.log as well as telemetry via a metrics counter for Broker-observed backpressure disconnects. The new zeek_broker_backpressure_disconnects counter, labeled by the neighboring peer that the reporting node has determined to be unresponsive, counts the number of unpeerings for this reason. Here the node "worker" has observed node "proxy" falling behind once: # HELP zeek_broker_backpressure_disconnects_total Number of Broker peering drops due to a neighbor falling too far behind in message I/O # TYPE zeek_broker_backpressure_disconnects_total counter zeek_broker_backpressure_disconnects_total{endpoint="worker",peer="proxy"} 1 Includes small btest baseline update to reflect @load of a new script.
This commit is contained in:
parent
d260a5b7a9
commit
ead6134501
3 changed files with 33 additions and 0 deletions
|
@ -14,6 +14,9 @@ redef Broker::log_topic = Cluster::rr_log_topic;
|
|||
# Add a cluster prefix.
|
||||
@prefixes += cluster
|
||||
|
||||
# This should soon condition on loading only when Broker is in use.
|
||||
@load ./broker-backpressure
|
||||
|
||||
@if ( Supervisor::is_supervised() )
|
||||
# When running a supervised cluster, populate Cluster::nodes from the node table
|
||||
# the Supervisor provides to new Zeek nodes. The management framework configures
|
||||
|
|
29
scripts/base/frameworks/cluster/broker-backpressure.zeek
Normal file
29
scripts/base/frameworks/cluster/broker-backpressure.zeek
Normal file
|
@ -0,0 +1,29 @@
|
|||
# Notifications for Broker-reported backpressure overflow.
|
||||
# See base/frameworks/broker/backpressure.zeek for context.
|
||||
|
||||
@load base/frameworks/telemetry
|
||||
|
||||
module Cluster;
|
||||
|
||||
global broker_backpressure_disconnects_cf = Telemetry::register_counter_family([
|
||||
$prefix="zeek",
|
||||
$name="broker-backpressure-disconnects",
|
||||
$unit="",
|
||||
$label_names=vector("peer"),
|
||||
$help_text="Number of Broker peerings dropped due to a neighbor falling behind in message I/O",
|
||||
]);
|
||||
|
||||
event Broker::peer_removed(endpoint: Broker::EndpointInfo, msg: string)
|
||||
{
|
||||
if ( ! endpoint?$network || "caf::sec::backpressure_overflow" !in msg )
|
||||
return;
|
||||
|
||||
local nn = nodeid_to_node(endpoint$id);
|
||||
|
||||
Cluster::log(fmt("removed due to backpressure overflow: %s%s:%s (%s)",
|
||||
nn$name != "" ? "" : "non-cluster peer ",
|
||||
endpoint$network$address, endpoint$network$bound_port,
|
||||
nn$name != "" ? nn$name : endpoint$id));
|
||||
Telemetry::counter_family_inc(broker_backpressure_disconnects_cf,
|
||||
vector(nn$name != "" ? nn$name : "unknown"));
|
||||
}
|
|
@ -1,4 +1,5 @@
|
|||
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
|
||||
-./frameworks/cluster/broker-backpressure.zeek
|
||||
-./frameworks/cluster/broker-stores.zeek
|
||||
-./frameworks/cluster/nodes/logger.zeek
|
||||
-./frameworks/cluster/nodes/manager.zeek
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue