Making cluster reconnect timeout configurable.

This commit is contained in:
Robin Sommer 2015-11-17 13:28:29 -08:00
parent 931837c5a2
commit 17bc615467
4 changed files with 57 additions and 48 deletions

View file

@ -1,4 +1,10 @@
2.4-211 | 2015-11-17 13:28:29 -0800
* Making cluster reconnect timeout configurable. (Robin Sommer)
* Bugfix for child process' communication loop. (Robin Sommer)
2.4-209 | 2015-11-16 07:31:22 -0800 2.4-209 | 2015-11-16 07:31:22 -0800
* Updating submodule(s). * Updating submodule(s).

View file

@ -1 +1 @@
2.4-209 2.4-211

View file

@ -43,35 +43,35 @@ export {
## software. ## software.
TIME_MACHINE, TIME_MACHINE,
}; };
## Events raised by a manager and handled by the workers. ## Events raised by a manager and handled by the workers.
const manager2worker_events = /Drop::.*/ &redef; const manager2worker_events = /Drop::.*/ &redef;
## Events raised by a manager and handled by proxies. ## Events raised by a manager and handled by proxies.
const manager2proxy_events = /EMPTY/ &redef; const manager2proxy_events = /EMPTY/ &redef;
## Events raised by proxies and handled by a manager. ## Events raised by proxies and handled by a manager.
const proxy2manager_events = /EMPTY/ &redef; const proxy2manager_events = /EMPTY/ &redef;
## Events raised by proxies and handled by workers. ## Events raised by proxies and handled by workers.
const proxy2worker_events = /EMPTY/ &redef; const proxy2worker_events = /EMPTY/ &redef;
## Events raised by workers and handled by a manager. ## Events raised by workers and handled by a manager.
const worker2manager_events = /(TimeMachine::command|Drop::.*)/ &redef; const worker2manager_events = /(TimeMachine::command|Drop::.*)/ &redef;
## Events raised by workers and handled by proxies. ## Events raised by workers and handled by proxies.
const worker2proxy_events = /EMPTY/ &redef; const worker2proxy_events = /EMPTY/ &redef;
## Events raised by TimeMachine instances and handled by a manager. ## Events raised by TimeMachine instances and handled by a manager.
const tm2manager_events = /EMPTY/ &redef; const tm2manager_events = /EMPTY/ &redef;
## Events raised by TimeMachine instances and handled by workers. ## Events raised by TimeMachine instances and handled by workers.
const tm2worker_events = /EMPTY/ &redef; const tm2worker_events = /EMPTY/ &redef;
## Events sent by the control host (i.e. BroControl) when dynamically ## Events sent by the control host (i.e. BroControl) when dynamically
## connecting to a running instance to update settings or request data. ## connecting to a running instance to update settings or request data.
const control_events = Control::controller_events &redef; const control_events = Control::controller_events &redef;
## Record type to indicate a node in a cluster. ## Record type to indicate a node in a cluster.
type Node: record { type Node: record {
## Identifies the type of cluster node in this node's configuration. ## Identifies the type of cluster node in this node's configuration.
@ -96,13 +96,13 @@ export {
## Name of a time machine node with which this node connects. ## Name of a time machine node with which this node connects.
time_machine: string &optional; time_machine: string &optional;
}; };
## This function can be called at any time to determine if the cluster ## This function can be called at any time to determine if the cluster
## framework is being enabled for this run. ## framework is being enabled for this run.
## ##
## Returns: True if :bro:id:`Cluster::node` has been set. ## Returns: True if :bro:id:`Cluster::node` has been set.
global is_enabled: function(): bool; global is_enabled: function(): bool;
## This function can be called at any time to determine what type of ## This function can be called at any time to determine what type of
## cluster node the current Bro instance is going to be acting as. ## cluster node the current Bro instance is going to be acting as.
## If :bro:id:`Cluster::is_enabled` returns false, then ## If :bro:id:`Cluster::is_enabled` returns false, then
@ -110,22 +110,25 @@ export {
## ##
## Returns: The :bro:type:`Cluster::NodeType` the calling node acts as. ## Returns: The :bro:type:`Cluster::NodeType` the calling node acts as.
global local_node_type: function(): NodeType; global local_node_type: function(): NodeType;
## This gives the value for the number of workers currently connected to, ## This gives the value for the number of workers currently connected to,
## and it's maintained internally by the cluster framework. It's ## and it's maintained internally by the cluster framework. It's
## primarily intended for use by managers to find out how many workers ## primarily intended for use by managers to find out how many workers
## should be responding to requests. ## should be responding to requests.
global worker_count: count = 0; global worker_count: count = 0;
## The cluster layout definition. This should be placed into a filter ## The cluster layout definition. This should be placed into a filter
## named cluster-layout.bro somewhere in the BROPATH. It will be ## named cluster-layout.bro somewhere in the BROPATH. It will be
## automatically loaded if the CLUSTER_NODE environment variable is set. ## automatically loaded if the CLUSTER_NODE environment variable is set.
## Note that BroControl handles all of this automatically. ## Note that BroControl handles all of this automatically.
const nodes: table[string] of Node = {} &redef; const nodes: table[string] of Node = {} &redef;
## This is usually supplied on the command line for each instance ## This is usually supplied on the command line for each instance
## of the cluster that is started up. ## of the cluster that is started up.
const node = getenv("CLUSTER_NODE") &redef; const node = getenv("CLUSTER_NODE") &redef;
## Interval for retrying failed connections between cluster nodes.
const retry_interval = 1min &redef;
} }
function is_enabled(): bool function is_enabled(): bool
@ -158,6 +161,6 @@ event bro_init() &priority=5
Reporter::error(fmt("'%s' is not a valid node in the Cluster::nodes configuration", node)); Reporter::error(fmt("'%s' is not a valid node in the Cluster::nodes configuration", node));
terminate(); terminate();
} }
Log::create_stream(Cluster::LOG, [$columns=Info, $path="cluster"]); Log::create_stream(Cluster::LOG, [$columns=Info, $path="cluster"]);
} }

View file

@ -11,7 +11,7 @@ module Cluster;
event bro_init() &priority=9 event bro_init() &priority=9
{ {
local me = nodes[node]; local me = nodes[node];
for ( i in Cluster::nodes ) for ( i in Cluster::nodes )
{ {
local n = nodes[i]; local n = nodes[i];
@ -22,35 +22,35 @@ event bro_init() &priority=9
Communication::nodes["control"] = [$host=n$ip, $zone_id=n$zone_id, Communication::nodes["control"] = [$host=n$ip, $zone_id=n$zone_id,
$connect=F, $class="control", $connect=F, $class="control",
$events=control_events]; $events=control_events];
if ( me$node_type == MANAGER ) if ( me$node_type == MANAGER )
{ {
if ( n$node_type == WORKER && n$manager == node ) if ( n$node_type == WORKER && n$manager == node )
Communication::nodes[i] = Communication::nodes[i] =
[$host=n$ip, $zone_id=n$zone_id, $connect=F, [$host=n$ip, $zone_id=n$zone_id, $connect=F,
$class=i, $events=worker2manager_events, $request_logs=T]; $class=i, $events=worker2manager_events, $request_logs=T];
if ( n$node_type == PROXY && n$manager == node ) if ( n$node_type == PROXY && n$manager == node )
Communication::nodes[i] = Communication::nodes[i] =
[$host=n$ip, $zone_id=n$zone_id, $connect=F, [$host=n$ip, $zone_id=n$zone_id, $connect=F,
$class=i, $events=proxy2manager_events, $request_logs=T]; $class=i, $events=proxy2manager_events, $request_logs=T];
if ( n$node_type == TIME_MACHINE && me?$time_machine && me$time_machine == i ) if ( n$node_type == TIME_MACHINE && me?$time_machine && me$time_machine == i )
Communication::nodes["time-machine"] = [$host=nodes[i]$ip, Communication::nodes["time-machine"] = [$host=nodes[i]$ip,
$zone_id=nodes[i]$zone_id, $zone_id=nodes[i]$zone_id,
$p=nodes[i]$p, $p=nodes[i]$p,
$connect=T, $retry=1min, $connect=T, $retry=retry_interval,
$events=tm2manager_events]; $events=tm2manager_events];
} }
else if ( me$node_type == PROXY ) else if ( me$node_type == PROXY )
{ {
if ( n$node_type == WORKER && n$proxy == node ) if ( n$node_type == WORKER && n$proxy == node )
Communication::nodes[i] = Communication::nodes[i] =
[$host=n$ip, $zone_id=n$zone_id, $connect=F, $class=i, [$host=n$ip, $zone_id=n$zone_id, $connect=F, $class=i,
$sync=T, $auth=T, $events=worker2proxy_events]; $sync=T, $auth=T, $events=worker2proxy_events];
# accepts connections from the previous one. # accepts connections from the previous one.
# (This is not ideal for setups with many proxies) # (This is not ideal for setups with many proxies)
# FIXME: Once we're using multiple proxies, we should also figure out some $class scheme ... # FIXME: Once we're using multiple proxies, we should also figure out some $class scheme ...
if ( n$node_type == PROXY ) if ( n$node_type == PROXY )
@ -58,49 +58,49 @@ event bro_init() &priority=9
if ( n?$proxy ) if ( n?$proxy )
Communication::nodes[i] Communication::nodes[i]
= [$host=n$ip, $zone_id=n$zone_id, $p=n$p, = [$host=n$ip, $zone_id=n$zone_id, $p=n$p,
$connect=T, $auth=F, $sync=T, $retry=1mins]; $connect=T, $auth=F, $sync=T, $retry=retry_interval];
else if ( me?$proxy && me$proxy == i ) else if ( me?$proxy && me$proxy == i )
Communication::nodes[me$proxy] Communication::nodes[me$proxy]
= [$host=nodes[i]$ip, $zone_id=nodes[i]$zone_id, = [$host=nodes[i]$ip, $zone_id=nodes[i]$zone_id,
$connect=F, $auth=T, $sync=T]; $connect=F, $auth=T, $sync=T];
} }
# Finally the manager, to send it status updates. # Finally the manager, to send it status updates.
if ( n$node_type == MANAGER && me$manager == i ) if ( n$node_type == MANAGER && me$manager == i )
Communication::nodes["manager"] = [$host=nodes[i]$ip, Communication::nodes["manager"] = [$host=nodes[i]$ip,
$zone_id=nodes[i]$zone_id, $zone_id=nodes[i]$zone_id,
$p=nodes[i]$p, $p=nodes[i]$p,
$connect=T, $retry=1mins, $connect=T, $retry=retry_interval,
$class=node, $class=node,
$events=manager2proxy_events]; $events=manager2proxy_events];
} }
else if ( me$node_type == WORKER ) else if ( me$node_type == WORKER )
{ {
if ( n$node_type == MANAGER && me$manager == i ) if ( n$node_type == MANAGER && me$manager == i )
Communication::nodes["manager"] = [$host=nodes[i]$ip, Communication::nodes["manager"] = [$host=nodes[i]$ip,
$zone_id=nodes[i]$zone_id, $zone_id=nodes[i]$zone_id,
$p=nodes[i]$p, $p=nodes[i]$p,
$connect=T, $retry=1mins, $connect=T, $retry=retry_interval,
$class=node, $class=node,
$events=manager2worker_events]; $events=manager2worker_events];
if ( n$node_type == PROXY && me$proxy == i ) if ( n$node_type == PROXY && me$proxy == i )
Communication::nodes["proxy"] = [$host=nodes[i]$ip, Communication::nodes["proxy"] = [$host=nodes[i]$ip,
$zone_id=nodes[i]$zone_id, $zone_id=nodes[i]$zone_id,
$p=nodes[i]$p, $p=nodes[i]$p,
$connect=T, $retry=1mins, $connect=T, $retry=retry_interval,
$sync=T, $class=node, $sync=T, $class=node,
$events=proxy2worker_events]; $events=proxy2worker_events];
if ( n$node_type == TIME_MACHINE && if ( n$node_type == TIME_MACHINE &&
me?$time_machine && me$time_machine == i ) me?$time_machine && me$time_machine == i )
Communication::nodes["time-machine"] = [$host=nodes[i]$ip, Communication::nodes["time-machine"] = [$host=nodes[i]$ip,
$zone_id=nodes[i]$zone_id, $zone_id=nodes[i]$zone_id,
$p=nodes[i]$p, $p=nodes[i]$p,
$connect=T, $connect=T,
$retry=1min, $retry=retry_interval,
$events=tm2worker_events]; $events=tm2worker_events];
} }
} }
} }