Making cluster reconnect timeout configurable.

This commit is contained in:
Robin Sommer 2015-11-17 13:28:29 -08:00
parent 931837c5a2
commit 17bc615467
4 changed files with 57 additions and 48 deletions

View file

@ -43,35 +43,35 @@ export {
## software.
TIME_MACHINE,
};
## Events raised by a manager and handled by the workers.
const manager2worker_events = /Drop::.*/ &redef;
## Events raised by a manager and handled by proxies.
const manager2proxy_events = /EMPTY/ &redef;
## Events raised by proxies and handled by a manager.
const proxy2manager_events = /EMPTY/ &redef;
## Events raised by proxies and handled by workers.
const proxy2worker_events = /EMPTY/ &redef;
## Events raised by workers and handled by a manager.
const worker2manager_events = /(TimeMachine::command|Drop::.*)/ &redef;
## Events raised by workers and handled by proxies.
const worker2proxy_events = /EMPTY/ &redef;
## Events raised by TimeMachine instances and handled by a manager.
const tm2manager_events = /EMPTY/ &redef;
## Events raised by TimeMachine instances and handled by workers.
const tm2worker_events = /EMPTY/ &redef;
## Events sent by the control host (i.e. BroControl) when dynamically
## Events sent by the control host (i.e. BroControl) when dynamically
## connecting to a running instance to update settings or request data.
const control_events = Control::controller_events &redef;
## Record type to indicate a node in a cluster.
type Node: record {
## Identifies the type of cluster node in this node's configuration.
@ -96,13 +96,13 @@ export {
## Name of a time machine node with which this node connects.
time_machine: string &optional;
};
## This function can be called at any time to determine if the cluster
## framework is being enabled for this run.
##
## Returns: True if :bro:id:`Cluster::node` has been set.
global is_enabled: function(): bool;
## This function can be called at any time to determine what type of
## cluster node the current Bro instance is going to be acting as.
## If :bro:id:`Cluster::is_enabled` returns false, then
@ -110,22 +110,25 @@ export {
##
## Returns: The :bro:type:`Cluster::NodeType` the calling node acts as.
global local_node_type: function(): NodeType;
## This gives the value for the number of workers currently connected to,
## and it's maintained internally by the cluster framework. It's
## primarily intended for use by managers to find out how many workers
## and it's maintained internally by the cluster framework. It's
## primarily intended for use by managers to find out how many workers
## should be responding to requests.
global worker_count: count = 0;
## The cluster layout definition. This should be placed into a filter
## named cluster-layout.bro somewhere in the BROPATH. It will be
## named cluster-layout.bro somewhere in the BROPATH. It will be
## automatically loaded if the CLUSTER_NODE environment variable is set.
## Note that BroControl handles all of this automatically.
const nodes: table[string] of Node = {} &redef;
## This is usually supplied on the command line for each instance
## of the cluster that is started up.
const node = getenv("CLUSTER_NODE") &redef;
## Interval for retrying failed connections between cluster nodes.
const retry_interval = 1min &redef;
}
function is_enabled(): bool
@ -158,6 +161,6 @@ event bro_init() &priority=5
Reporter::error(fmt("'%s' is not a valid node in the Cluster::nodes configuration", node));
terminate();
}
Log::create_stream(Cluster::LOG, [$columns=Info, $path="cluster"]);
}

View file

@ -11,7 +11,7 @@ module Cluster;
event bro_init() &priority=9
{
local me = nodes[node];
for ( i in Cluster::nodes )
{
local n = nodes[i];
@ -22,35 +22,35 @@ event bro_init() &priority=9
Communication::nodes["control"] = [$host=n$ip, $zone_id=n$zone_id,
$connect=F, $class="control",
$events=control_events];
if ( me$node_type == MANAGER )
{
if ( n$node_type == WORKER && n$manager == node )
Communication::nodes[i] =
[$host=n$ip, $zone_id=n$zone_id, $connect=F,
$class=i, $events=worker2manager_events, $request_logs=T];
if ( n$node_type == PROXY && n$manager == node )
Communication::nodes[i] =
[$host=n$ip, $zone_id=n$zone_id, $connect=F,
$class=i, $events=proxy2manager_events, $request_logs=T];
if ( n$node_type == TIME_MACHINE && me?$time_machine && me$time_machine == i )
Communication::nodes["time-machine"] = [$host=nodes[i]$ip,
$zone_id=nodes[i]$zone_id,
$p=nodes[i]$p,
$connect=T, $retry=1min,
$connect=T, $retry=retry_interval,
$events=tm2manager_events];
}
else if ( me$node_type == PROXY )
{
if ( n$node_type == WORKER && n$proxy == node )
Communication::nodes[i] =
[$host=n$ip, $zone_id=n$zone_id, $connect=F, $class=i,
$sync=T, $auth=T, $events=worker2proxy_events];
# accepts connections from the previous one.
# accepts connections from the previous one.
# (This is not ideal for setups with many proxies)
# FIXME: Once we're using multiple proxies, we should also figure out some $class scheme ...
if ( n$node_type == PROXY )
@ -58,49 +58,49 @@ event bro_init() &priority=9
if ( n?$proxy )
Communication::nodes[i]
= [$host=n$ip, $zone_id=n$zone_id, $p=n$p,
$connect=T, $auth=F, $sync=T, $retry=1mins];
$connect=T, $auth=F, $sync=T, $retry=retry_interval];
else if ( me?$proxy && me$proxy == i )
Communication::nodes[me$proxy]
= [$host=nodes[i]$ip, $zone_id=nodes[i]$zone_id,
$connect=F, $auth=T, $sync=T];
}
# Finally the manager, to send it status updates.
if ( n$node_type == MANAGER && me$manager == i )
Communication::nodes["manager"] = [$host=nodes[i]$ip,
$zone_id=nodes[i]$zone_id,
$p=nodes[i]$p,
$connect=T, $retry=1mins,
Communication::nodes["manager"] = [$host=nodes[i]$ip,
$zone_id=nodes[i]$zone_id,
$p=nodes[i]$p,
$connect=T, $retry=retry_interval,
$class=node,
$events=manager2proxy_events];
}
else if ( me$node_type == WORKER )
{
if ( n$node_type == MANAGER && me$manager == i )
Communication::nodes["manager"] = [$host=nodes[i]$ip,
Communication::nodes["manager"] = [$host=nodes[i]$ip,
$zone_id=nodes[i]$zone_id,
$p=nodes[i]$p,
$connect=T, $retry=1mins,
$class=node,
$connect=T, $retry=retry_interval,
$class=node,
$events=manager2worker_events];
if ( n$node_type == PROXY && me$proxy == i )
Communication::nodes["proxy"] = [$host=nodes[i]$ip,
Communication::nodes["proxy"] = [$host=nodes[i]$ip,
$zone_id=nodes[i]$zone_id,
$p=nodes[i]$p,
$connect=T, $retry=1mins,
$sync=T, $class=node,
$connect=T, $retry=retry_interval,
$sync=T, $class=node,
$events=proxy2worker_events];
if ( n$node_type == TIME_MACHINE &&
if ( n$node_type == TIME_MACHINE &&
me?$time_machine && me$time_machine == i )
Communication::nodes["time-machine"] = [$host=nodes[i]$ip,
Communication::nodes["time-machine"] = [$host=nodes[i]$ip,
$zone_id=nodes[i]$zone_id,
$p=nodes[i]$p,
$connect=T,
$retry=1min,
$connect=T,
$retry=retry_interval,
$events=tm2worker_events];
}
}
}