Merge branch 'topic/christian/broker-tuning'

* topic/christian/broker-tuning:
  Lower listen/connect retry intervals in Broker and the cluster framework to 1sec
  Bump cluster testsuite
  Switch Broker's default backpressure policy to drop_oldest, bump buffer sizes
  Deprecate Broker::congestion_queue_size and stop using it internally
This commit is contained in:
Christian Kreibich 2025-04-25 10:23:30 -07:00
commit ebd0207352
8 changed files with 23 additions and 21 deletions

10
CHANGES
View file

@ -1,3 +1,13 @@
7.2.0-dev.649 | 2025-04-25 10:23:30 -0700
* Lower listen/connect retry intervals in Broker and the cluster framework to 1sec (Christian Kreibich, Corelight)
* Bump cluster testsuite (Christian Kreibich, Corelight)
* Switch Broker's default backpressure policy to drop_oldest, bump buffer sizes (Christian Kreibich, Corelight)
* Deprecate Broker::congestion_queue_size and stop using it internally (Christian Kreibich, Corelight)
7.2.0-dev.644 | 2025-04-25 10:02:58 -0700
* Add basic btest to verify that Broker peering telemetry is available. (Christian Kreibich, Corelight)

View file

@ -1 +1 @@
7.2.0-dev.644
7.2.0-dev.649

View file

@ -19,7 +19,7 @@ export {
## use already. Use of the ZEEK_DEFAULT_LISTEN_RETRY environment variable
## (set as a number of seconds) will override this option and also
## any values given to :zeek:see:`Broker::listen`.
const default_listen_retry = 30sec &redef;
const default_listen_retry = 1sec &redef;
## Default address on which to listen.
##
@ -36,7 +36,7 @@ export {
## ZEEK_DEFAULT_CONNECT_RETRY environment variable (set as number of
## seconds) will override this option and also any values given to
## :zeek:see:`Broker::peer`.
const default_connect_retry = 30sec &redef;
const default_connect_retry = 1sec &redef;
## If true, do not use SSL for network connections. By default, SSL will
## even be used if no certificates / CAs have been configured. In that case
@ -72,7 +72,7 @@ export {
## The number of buffered messages at the Broker/CAF layer after which
## a subscriber considers themselves congested (i.e. tune the congestion
## control mechanisms).
const congestion_queue_size = 200 &redef;
const congestion_queue_size = 200 &redef &deprecated="Remove in v8.1. Non-functional since v5.0";
## The max number of log entries per log stream to batch together when
## sending log messages to a remote logger.
@ -89,20 +89,20 @@ export {
## Max number of items we buffer at most per peer. What action to take when
## the buffer reaches its maximum size is determined by
## :zeek:see:`Broker::peer_overflow_policy`.
const peer_buffer_size = 2048 &redef;
const peer_buffer_size = 8192 &redef;
## Configures how Broker responds to peers that cannot keep up with the
## incoming message rate. Available strategies:
## - disconnect: drop the connection to the unresponsive peer
## - drop_newest: replace the newest message in the buffer
## - drop_oldest: removed the olsted message from the buffer, then append
const peer_overflow_policy = "disconnect" &redef;
const peer_overflow_policy = "drop_oldest" &redef;
## Same as :zeek:see:`Broker::peer_buffer_size` but for WebSocket clients.
const web_socket_buffer_size = 512 &redef;
const web_socket_buffer_size = 8192 &redef;
## Same as :zeek:see:`Broker::peer_overflow_policy` but for WebSocket clients.
const web_socket_overflow_policy = "disconnect" &redef;
const web_socket_overflow_policy = "drop_oldest" &redef;
## How frequently Zeek resets some peering/client buffer statistics,
## such as ``max_queued_recently`` in :zeek:see:`BrokerPeeringStats`.

View file

@ -262,7 +262,7 @@ export {
## Interval for retrying failed connections between cluster nodes.
## If set, the ZEEK_DEFAULT_CONNECT_RETRY (given in number of seconds)
## environment variable overrides this option.
const retry_interval = 1min &redef;
const retry_interval = 1sec &redef;
## When using broker-enabled cluster framework, nodes broadcast this event
## to exchange their user-defined name along with a string that uniquely

View file

@ -404,11 +404,9 @@ class BrokerState {
public:
using LogSeverityLevel = Observer::LogSeverityLevel;
BrokerState(broker::configuration config, size_t congestion_queue_size, LoggerQueuePtr queue,
PeerBufferStatePtr pbstate)
BrokerState(broker::configuration config, LoggerQueuePtr queue, PeerBufferStatePtr pbstate)
: endpoint(std::move(config), telemetry_mgr->GetRegistry()),
subscriber(
endpoint.make_subscriber({broker::topic::statuses(), broker::topic::errors()}, congestion_queue_size)),
subscriber(endpoint.make_subscriber({broker::topic::statuses(), broker::topic::errors()})),
loggerQueue(std::move(queue)),
peerBufferState(std::move(pbstate)) {
peerBufferState->SetEndpoint(&endpoint);
@ -594,8 +592,7 @@ void Manager::DoInitPostScript() {
auto observer = std::make_shared<Observer>(adapterVerbosity, queue, pbstate);
broker::logger(observer); // *must* be called before creating the BrokerState
auto cqs = get_option("Broker::congestion_queue_size")->AsCount();
bstate = std::make_shared<BrokerState>(std::move(config), cqs, queue, pbstate);
bstate = std::make_shared<BrokerState>(std::move(config), queue, pbstate);
bstate->logSeverity = static_cast<BrokerSeverityLevel>(logSeverityVal);
bstate->stderrSeverity = static_cast<BrokerSeverityLevel>(stderrSeverityVal);

View file

@ -23,7 +23,6 @@ redef Cluster::nodes = {
redef exit_only_after_terminate = T;
redef Log::enable_local_logging = T;
redef Log::default_rotation_interval = 0secs;
redef Cluster::retry_interval = 1sec;
function print_metrics(metrics: vector of Telemetry::Metric)
{

View file

@ -20,10 +20,6 @@ redef Cluster::nodes = {
@load misc/weird-stats
@load policy/frameworks/cluster/experimental
redef Cluster::retry_interval = 1sec;
redef Broker::default_listen_retry = 1sec;
redef Broker::default_connect_retry = 1sec;
redef Log::enable_local_logging = T;
redef Log::default_rotation_interval = 0secs;
redef WeirdStats::weird_stat_interval = 5secs;

View file

@ -1 +1 @@
2d1f0ae518b26938e24bd26f701dab17e174a626
de6bc382b2320185c168e9f429e47904034510d3