From 17bc61546755aa58839a054624cf5fd5686b87b0 Mon Sep 17 00:00:00 2001
From: Robin Sommer <robin@icir.org>
Date: Tue, 17 Nov 2015 13:28:29 -0800
Subject: [PATCH] Making cluster reconnect timeout configurable.

---
 CHANGES                                       |  6 +++
 VERSION                                       |  2 +-
 scripts/base/frameworks/cluster/main.bro      | 43 ++++++++-------
 .../frameworks/cluster/setup-connections.bro  | 54 +++++++++----------
 4 files changed, 57 insertions(+), 48 deletions(-)

diff --git a/CHANGES b/CHANGES
index f6006ffffb..d507490c47 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,4 +1,10 @@
 
+2.4-211 | 2015-11-17 13:28:29 -0800
+
+  * Making cluster reconnect timeout configurable. (Robin Sommer)
+
+  * Bugfix for child process' communication loop. (Robin Sommer)
+
 2.4-209 | 2015-11-16 07:31:22 -0800
 
   * Updating submodule(s).
diff --git a/VERSION b/VERSION
index 9f44806f55..d332944371 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2.4-209
+2.4-211
diff --git a/scripts/base/frameworks/cluster/main.bro b/scripts/base/frameworks/cluster/main.bro
index 218e309bad..3451cb4169 100644
--- a/scripts/base/frameworks/cluster/main.bro
+++ b/scripts/base/frameworks/cluster/main.bro
@@ -43,35 +43,35 @@ export {
 		## software.
 		TIME_MACHINE,
 	};
-	
+
 	## Events raised by a manager and handled by the workers.
 	const manager2worker_events = /Drop::.*/ &redef;
-	
+
 	## Events raised by a manager and handled by proxies.
 	const manager2proxy_events = /EMPTY/ &redef;
-	
+
 	## Events raised by proxies and handled by a manager.
 	const proxy2manager_events = /EMPTY/ &redef;
-	
+
 	## Events raised by proxies and handled by workers.
 	const proxy2worker_events = /EMPTY/ &redef;
-	
+
 	## Events raised by workers and handled by a manager.
 	const worker2manager_events = /(TimeMachine::command|Drop::.*)/ &redef;
-	
+
 	## Events raised by workers and handled by proxies.
 	const worker2proxy_events = /EMPTY/ &redef;
-	
+
 	## Events raised by TimeMachine instances and handled by a manager.
 	const tm2manager_events = /EMPTY/ &redef;
-	
+
 	## Events raised by TimeMachine instances and handled by workers.
 	const tm2worker_events = /EMPTY/ &redef;
-	
-	## Events sent by the control host (i.e. BroControl) when dynamically 
+
+	## Events sent by the control host (i.e. BroControl) when dynamically
 	## connecting to a running instance to update settings or request data.
 	const control_events = Control::controller_events &redef;
-	
+
 	## Record type to indicate a node in a cluster.
 	type Node: record {
 		## Identifies the type of cluster node in this node's configuration.
@@ -96,13 +96,13 @@ export {
 		## Name of a time machine node with which this node connects.
 		time_machine: string      &optional;
 	};
-	
+
 	## This function can be called at any time to determine if the cluster
 	## framework is being enabled for this run.
 	##
 	## Returns: True if :bro:id:`Cluster::node` has been set.
 	global is_enabled: function(): bool;
-	
+
 	## This function can be called at any time to determine what type of
 	## cluster node the current Bro instance is going to be acting as.
 	## If :bro:id:`Cluster::is_enabled` returns false, then
@@ -110,22 +110,25 @@ export {
 	##
 	## Returns: The :bro:type:`Cluster::NodeType` the calling node acts as.
 	global local_node_type: function(): NodeType;
-	
+
 	## This gives the value for the number of workers currently connected to,
-	## and it's maintained internally by the cluster framework.  It's 
-	## primarily intended for use by managers to find out how many workers 
+	## and it's maintained internally by the cluster framework.  It's
+	## primarily intended for use by managers to find out how many workers
 	## should be responding to requests.
 	global worker_count: count = 0;
-	
+
 	## The cluster layout definition.  This should be placed into a filter
-	## named cluster-layout.bro somewhere in the BROPATH.  It will be 
+	## named cluster-layout.bro somewhere in the BROPATH.  It will be
 	## automatically loaded if the CLUSTER_NODE environment variable is set.
 	## Note that BroControl handles all of this automatically.
 	const nodes: table[string] of Node = {} &redef;
-	
+
 	## This is usually supplied on the command line for each instance
 	## of the cluster that is started up.
 	const node = getenv("CLUSTER_NODE") &redef;
+
+	## Interval for retrying failed connections between cluster nodes.
+	const retry_interval = 1min &redef;
 }
 
 function is_enabled(): bool
@@ -158,6 +161,6 @@ event bro_init() &priority=5
 		Reporter::error(fmt("'%s' is not a valid node in the Cluster::nodes configuration", node));
 		terminate();
 		}
-	
+
 	Log::create_stream(Cluster::LOG, [$columns=Info, $path="cluster"]);
 	}
diff --git a/scripts/base/frameworks/cluster/setup-connections.bro b/scripts/base/frameworks/cluster/setup-connections.bro
index 4576f5b913..95aff64a6c 100644
--- a/scripts/base/frameworks/cluster/setup-connections.bro
+++ b/scripts/base/frameworks/cluster/setup-connections.bro
@@ -11,7 +11,7 @@ module Cluster;
 event bro_init() &priority=9
 	{
 	local me = nodes[node];
-	
+
 	for ( i in Cluster::nodes )
 		{
 		local n = nodes[i];
@@ -22,35 +22,35 @@ event bro_init() &priority=9
 			Communication::nodes["control"] = [$host=n$ip, $zone_id=n$zone_id,
 			                                   $connect=F, $class="control",
 			                                   $events=control_events];
-		
+
 		if ( me$node_type == MANAGER )
 			{
 			if ( n$node_type == WORKER && n$manager == node )
 				Communication::nodes[i] =
 				    [$host=n$ip, $zone_id=n$zone_id, $connect=F,
 				     $class=i, $events=worker2manager_events, $request_logs=T];
-			
+
 			if ( n$node_type == PROXY && n$manager == node )
 				Communication::nodes[i] =
 				    [$host=n$ip, $zone_id=n$zone_id, $connect=F,
 				     $class=i, $events=proxy2manager_events, $request_logs=T];
-				
+
 			if ( n$node_type == TIME_MACHINE && me?$time_machine && me$time_machine == i )
 				Communication::nodes["time-machine"] = [$host=nodes[i]$ip,
 				                                        $zone_id=nodes[i]$zone_id,
 				                                        $p=nodes[i]$p,
-				                                        $connect=T, $retry=1min,
+				                                        $connect=T, $retry=retry_interval,
 				                                        $events=tm2manager_events];
 			}
-		
+
 		else if ( me$node_type == PROXY )
 			{
 			if ( n$node_type == WORKER && n$proxy == node )
 				Communication::nodes[i] =
 					[$host=n$ip, $zone_id=n$zone_id, $connect=F, $class=i,
 					 $sync=T, $auth=T, $events=worker2proxy_events];
-			
-			# accepts connections from the previous one. 
+
+			# accepts connections from the previous one.
 			# (This is not ideal for setups with many proxies)
 			# FIXME: Once we're using multiple proxies, we should also figure out some $class scheme ...
 			if ( n$node_type == PROXY )
@@ -58,49 +58,49 @@ event bro_init() &priority=9
 				if ( n?$proxy )
 					Communication::nodes[i]
 					     = [$host=n$ip, $zone_id=n$zone_id, $p=n$p,
-					        $connect=T, $auth=F, $sync=T, $retry=1mins];
+					        $connect=T, $auth=F, $sync=T, $retry=retry_interval];
 				else if ( me?$proxy && me$proxy == i )
 					Communication::nodes[me$proxy]
 					     = [$host=nodes[i]$ip, $zone_id=nodes[i]$zone_id,
 					        $connect=F, $auth=T, $sync=T];
 				}
-			
+
 			# Finally the manager, to send it status updates.
 			if ( n$node_type == MANAGER && me$manager == i )
-				Communication::nodes["manager"] = [$host=nodes[i]$ip, 
-				                                   $zone_id=nodes[i]$zone_id, 
-				                                   $p=nodes[i]$p, 
-				                                   $connect=T, $retry=1mins, 
+				Communication::nodes["manager"] = [$host=nodes[i]$ip,
+				                                   $zone_id=nodes[i]$zone_id,
+				                                   $p=nodes[i]$p,
+				                                   $connect=T, $retry=retry_interval,
 				                                   $class=node,
 				                                   $events=manager2proxy_events];
 			}
 		else if ( me$node_type == WORKER )
 			{
 			if ( n$node_type == MANAGER && me$manager == i )
-				Communication::nodes["manager"] = [$host=nodes[i]$ip, 
+				Communication::nodes["manager"] = [$host=nodes[i]$ip,
 				                                   $zone_id=nodes[i]$zone_id,
 				                                   $p=nodes[i]$p,
-				                                   $connect=T, $retry=1mins, 
-				                                   $class=node, 
+				                                   $connect=T, $retry=retry_interval,
+				                                   $class=node,
 				                                   $events=manager2worker_events];
-			
+
 			if ( n$node_type == PROXY && me$proxy == i )
-				Communication::nodes["proxy"] = [$host=nodes[i]$ip, 
+				Communication::nodes["proxy"] = [$host=nodes[i]$ip,
 				                                 $zone_id=nodes[i]$zone_id,
 				                                 $p=nodes[i]$p,
-				                                 $connect=T, $retry=1mins, 
-				                                 $sync=T, $class=node, 
+				                                 $connect=T, $retry=retry_interval,
+				                                 $sync=T, $class=node,
 				                                 $events=proxy2worker_events];
-			
-			if ( n$node_type == TIME_MACHINE && 
+
+			if ( n$node_type == TIME_MACHINE &&
 			     me?$time_machine && me$time_machine == i )
-				Communication::nodes["time-machine"] = [$host=nodes[i]$ip, 
+				Communication::nodes["time-machine"] = [$host=nodes[i]$ip,
 				                                        $zone_id=nodes[i]$zone_id,
 				                                        $p=nodes[i]$p,
-				                                        $connect=T, 
-				                                        $retry=1min,
+				                                        $connect=T,
+				                                        $retry=retry_interval,
 				                                        $events=tm2worker_events];
-			
+
 			}
 		}
 	}