From 17bc61546755aa58839a054624cf5fd5686b87b0 Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Tue, 17 Nov 2015 13:28:29 -0800 Subject: [PATCH] Making cluster reconnect timeout configurable. --- CHANGES | 6 +++ VERSION | 2 +- scripts/base/frameworks/cluster/main.bro | 43 ++++++++------- .../frameworks/cluster/setup-connections.bro | 54 +++++++++---------- 4 files changed, 57 insertions(+), 48 deletions(-) diff --git a/CHANGES b/CHANGES index f6006ffffb..d507490c47 100644 --- a/CHANGES +++ b/CHANGES @@ -1,4 +1,10 @@ +2.4-211 | 2015-11-17 13:28:29 -0800 + + * Making cluster reconnect timeout configurable. (Robin Sommer) + + * Bugfix for child process' communication loop. (Robin Sommer) + 2.4-209 | 2015-11-16 07:31:22 -0800 * Updating submodule(s). diff --git a/VERSION b/VERSION index 9f44806f55..d332944371 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.4-209 +2.4-211 diff --git a/scripts/base/frameworks/cluster/main.bro b/scripts/base/frameworks/cluster/main.bro index 218e309bad..3451cb4169 100644 --- a/scripts/base/frameworks/cluster/main.bro +++ b/scripts/base/frameworks/cluster/main.bro @@ -43,35 +43,35 @@ export { ## software. TIME_MACHINE, }; - + ## Events raised by a manager and handled by the workers. const manager2worker_events = /Drop::.*/ &redef; - + ## Events raised by a manager and handled by proxies. const manager2proxy_events = /EMPTY/ &redef; - + ## Events raised by proxies and handled by a manager. const proxy2manager_events = /EMPTY/ &redef; - + ## Events raised by proxies and handled by workers. const proxy2worker_events = /EMPTY/ &redef; - + ## Events raised by workers and handled by a manager. const worker2manager_events = /(TimeMachine::command|Drop::.*)/ &redef; - + ## Events raised by workers and handled by proxies. const worker2proxy_events = /EMPTY/ &redef; - + ## Events raised by TimeMachine instances and handled by a manager. const tm2manager_events = /EMPTY/ &redef; - + ## Events raised by TimeMachine instances and handled by workers. const tm2worker_events = /EMPTY/ &redef; - - ## Events sent by the control host (i.e. BroControl) when dynamically + + ## Events sent by the control host (i.e. BroControl) when dynamically ## connecting to a running instance to update settings or request data. const control_events = Control::controller_events &redef; - + ## Record type to indicate a node in a cluster. type Node: record { ## Identifies the type of cluster node in this node's configuration. @@ -96,13 +96,13 @@ export { ## Name of a time machine node with which this node connects. time_machine: string &optional; }; - + ## This function can be called at any time to determine if the cluster ## framework is being enabled for this run. ## ## Returns: True if :bro:id:`Cluster::node` has been set. global is_enabled: function(): bool; - + ## This function can be called at any time to determine what type of ## cluster node the current Bro instance is going to be acting as. ## If :bro:id:`Cluster::is_enabled` returns false, then @@ -110,22 +110,25 @@ export { ## ## Returns: The :bro:type:`Cluster::NodeType` the calling node acts as. global local_node_type: function(): NodeType; - + ## This gives the value for the number of workers currently connected to, - ## and it's maintained internally by the cluster framework. It's - ## primarily intended for use by managers to find out how many workers + ## and it's maintained internally by the cluster framework. It's + ## primarily intended for use by managers to find out how many workers ## should be responding to requests. global worker_count: count = 0; - + ## The cluster layout definition. This should be placed into a filter - ## named cluster-layout.bro somewhere in the BROPATH. It will be + ## named cluster-layout.bro somewhere in the BROPATH. It will be ## automatically loaded if the CLUSTER_NODE environment variable is set. ## Note that BroControl handles all of this automatically. const nodes: table[string] of Node = {} &redef; - + ## This is usually supplied on the command line for each instance ## of the cluster that is started up. const node = getenv("CLUSTER_NODE") &redef; + + ## Interval for retrying failed connections between cluster nodes. + const retry_interval = 1min &redef; } function is_enabled(): bool @@ -158,6 +161,6 @@ event bro_init() &priority=5 Reporter::error(fmt("'%s' is not a valid node in the Cluster::nodes configuration", node)); terminate(); } - + Log::create_stream(Cluster::LOG, [$columns=Info, $path="cluster"]); } diff --git a/scripts/base/frameworks/cluster/setup-connections.bro b/scripts/base/frameworks/cluster/setup-connections.bro index 4576f5b913..95aff64a6c 100644 --- a/scripts/base/frameworks/cluster/setup-connections.bro +++ b/scripts/base/frameworks/cluster/setup-connections.bro @@ -11,7 +11,7 @@ module Cluster; event bro_init() &priority=9 { local me = nodes[node]; - + for ( i in Cluster::nodes ) { local n = nodes[i]; @@ -22,35 +22,35 @@ event bro_init() &priority=9 Communication::nodes["control"] = [$host=n$ip, $zone_id=n$zone_id, $connect=F, $class="control", $events=control_events]; - + if ( me$node_type == MANAGER ) { if ( n$node_type == WORKER && n$manager == node ) Communication::nodes[i] = [$host=n$ip, $zone_id=n$zone_id, $connect=F, $class=i, $events=worker2manager_events, $request_logs=T]; - + if ( n$node_type == PROXY && n$manager == node ) Communication::nodes[i] = [$host=n$ip, $zone_id=n$zone_id, $connect=F, $class=i, $events=proxy2manager_events, $request_logs=T]; - + if ( n$node_type == TIME_MACHINE && me?$time_machine && me$time_machine == i ) Communication::nodes["time-machine"] = [$host=nodes[i]$ip, $zone_id=nodes[i]$zone_id, $p=nodes[i]$p, - $connect=T, $retry=1min, + $connect=T, $retry=retry_interval, $events=tm2manager_events]; } - + else if ( me$node_type == PROXY ) { if ( n$node_type == WORKER && n$proxy == node ) Communication::nodes[i] = [$host=n$ip, $zone_id=n$zone_id, $connect=F, $class=i, $sync=T, $auth=T, $events=worker2proxy_events]; - - # accepts connections from the previous one. + + # accepts connections from the previous one. # (This is not ideal for setups with many proxies) # FIXME: Once we're using multiple proxies, we should also figure out some $class scheme ... if ( n$node_type == PROXY ) @@ -58,49 +58,49 @@ event bro_init() &priority=9 if ( n?$proxy ) Communication::nodes[i] = [$host=n$ip, $zone_id=n$zone_id, $p=n$p, - $connect=T, $auth=F, $sync=T, $retry=1mins]; + $connect=T, $auth=F, $sync=T, $retry=retry_interval]; else if ( me?$proxy && me$proxy == i ) Communication::nodes[me$proxy] = [$host=nodes[i]$ip, $zone_id=nodes[i]$zone_id, $connect=F, $auth=T, $sync=T]; } - + # Finally the manager, to send it status updates. if ( n$node_type == MANAGER && me$manager == i ) - Communication::nodes["manager"] = [$host=nodes[i]$ip, - $zone_id=nodes[i]$zone_id, - $p=nodes[i]$p, - $connect=T, $retry=1mins, + Communication::nodes["manager"] = [$host=nodes[i]$ip, + $zone_id=nodes[i]$zone_id, + $p=nodes[i]$p, + $connect=T, $retry=retry_interval, $class=node, $events=manager2proxy_events]; } else if ( me$node_type == WORKER ) { if ( n$node_type == MANAGER && me$manager == i ) - Communication::nodes["manager"] = [$host=nodes[i]$ip, + Communication::nodes["manager"] = [$host=nodes[i]$ip, $zone_id=nodes[i]$zone_id, $p=nodes[i]$p, - $connect=T, $retry=1mins, - $class=node, + $connect=T, $retry=retry_interval, + $class=node, $events=manager2worker_events]; - + if ( n$node_type == PROXY && me$proxy == i ) - Communication::nodes["proxy"] = [$host=nodes[i]$ip, + Communication::nodes["proxy"] = [$host=nodes[i]$ip, $zone_id=nodes[i]$zone_id, $p=nodes[i]$p, - $connect=T, $retry=1mins, - $sync=T, $class=node, + $connect=T, $retry=retry_interval, + $sync=T, $class=node, $events=proxy2worker_events]; - - if ( n$node_type == TIME_MACHINE && + + if ( n$node_type == TIME_MACHINE && me?$time_machine && me$time_machine == i ) - Communication::nodes["time-machine"] = [$host=nodes[i]$ip, + Communication::nodes["time-machine"] = [$host=nodes[i]$ip, $zone_id=nodes[i]$zone_id, $p=nodes[i]$p, - $connect=T, - $retry=1min, + $connect=T, + $retry=retry_interval, $events=tm2worker_events]; - + } } }