Merge remote-tracking branch 'origin/master' into topic/bernhard/hyperloglog

2025-10-04 15:48:19 +00:00 · 2013-05-03 22:58:02 -07:00 · 2013-05-03 22:58:02 -07:00 · 3e74cdc6e0
commit 3e74cdc6e0
parent 1cf506071d 69c7363147
37 changed files with 475 additions and 344 deletions
--- a/scripts/base/frameworks/sumstats/cluster.bro
+++ b/scripts/base/frameworks/sumstats/cluster.bro
@ -10,49 +10,48 @@
 module SumStats;

 export {
-	## Allows a user to decide how large of result groups the 
-	## workers should transmit values for cluster stats aggregation.
+	## Allows a user to decide how large of result groups the workers should transmit
+	## values for cluster stats aggregation.
 	const cluster_send_in_groups_of = 50 &redef;
-	
-	## The percent of the full threshold value that needs to be met 
-	## on a single worker for that worker to send the value to its manager in
-	## order for it to request a global view for that value.  There is no
-	## requirement that the manager requests a global view for the key
-	## since it may opt not to if it requested a global view for the key
-	## recently.
+
+	## The percent of the full threshold value that needs to be met on a single worker
+	## for that worker to send the value to its manager in order for it to request a
+	## global view for that value.  There is no requirement that the manager requests
+	## a global view for the key since it may opt not to if it requested a global view
+	## for the key recently.
 	const cluster_request_global_view_percent = 0.2 &redef;

 	## This is to deal with intermediate update overload.  A manager will only allow
-	## this many intermediate update requests to the workers to be inflight at 
-	## any given time.  Requested intermediate updates are currently thrown out
-	## and not performed.  In practice this should hopefully have a minimal effect.
+	## this many intermediate update requests to the workers to be inflight at any
+	## given time.  Requested intermediate updates are currently thrown out and not
+	## performed.  In practice this should hopefully have a minimal effect.
 	const max_outstanding_global_views = 10 &redef;

-	## Intermediate updates can cause overload situations on very large clusters.
-	## This option may help reduce load and correct intermittent problems.
-	## The goal for this option is also meant to be temporary.
+	## Intermediate updates can cause overload situations on very large clusters. This
+	## option may help reduce load and correct intermittent problems. The goal for this
+	## option is also meant to be temporary.
 	const enable_intermediate_updates = T &redef;

-	## Event sent by the manager in a cluster to initiate the 
-	## collection of values for a sumstat.
+	## Event sent by the manager in a cluster to initiate the collection of values for
+	## a sumstat.
 	global cluster_ss_request: event(uid: string, ssid: string);

-	## Event sent by nodes that are collecting sumstats after receiving
-	## a request for the sumstat from the manager.
+	## Event sent by nodes that are collecting sumstats after receiving a request for
+	## the sumstat from the manager.
 	global cluster_ss_response: event(uid: string, ssid: string, data: ResultTable, done: bool);

-	## This event is sent by the manager in a cluster to initiate the
-	## collection of a single key value from a sumstat.  It's typically
-	## used to get intermediate updates before the break interval triggers
-	## to speed detection of a value crossing a threshold.
+	## This event is sent by the manager in a cluster to initiate the collection of
+	## a single key value from a sumstat.  It's typically used to get intermediate
+	## updates before the break interval triggers to speed detection of a value
+	## crossing a threshold.
 	global cluster_key_request: event(uid: string, ssid: string, key: Key);

-	## This event is sent by nodes in response to a 
+	## This event is sent by nodes in response to a
 	## :bro:id:`SumStats::cluster_key_request` event.
 	global cluster_key_response: event(uid: string, ssid: string, key: Key, result: Result);

-	## This is sent by workers to indicate that they crossed the percent of the 
-	## current threshold by the percentage defined globally in 
+	## This is sent by workers to indicate that they crossed the percent
+	## of the current threshold by the percentage defined globally in
 	## :bro:id:`SumStats::cluster_request_global_view_percent`
 	global cluster_key_intermediate_response: event(ssid: string, key: SumStats::Key);

@ -69,7 +68,7 @@ redef Cluster::manager2worker_events += /SumStats::thresholds_reset/;
 redef Cluster::worker2manager_events += /SumStats::cluster_(ss_response|key_response|key_intermediate_response)/;

@if ( Cluster::local_node_type() != Cluster::MANAGER )
-# This variable is maintained to know what keys have recently sent as 
+# This variable is maintained to know what keys have recently sent as
 # intermediate updates so they don't overwhelm their manager. The count that is
 # yielded is the number of times the percentage threshold has been crossed and
 # an intermediate result has been received.
@ -82,7 +81,7 @@ event bro_init() &priority=-100
 	reducer_store = table();
 	}

-# This is done on all non-manager node types in the event that a sumstat is 
+# This is done on all non-manager node types in the event that a sumstat is
 # being collected somewhere other than a worker.
 function data_added(ss: SumStat, key: Key, result: Result)
 	{
@ -92,9 +91,9 @@ function data_added(ss: SumStat, key: Key, result: Result)
 		return;

 	# If val is 5 and global view % is 0.1 (10%), pct_val will be 50.  If that
-	# crosses the full threshold then it's a candidate to send as an 
+	# crosses the full threshold then it's a candidate to send as an
 	# intermediate update.
-	if ( enable_intermediate_updates && 
+	if ( enable_intermediate_updates &&
 	     check_thresholds(ss, key, result, cluster_request_global_view_percent) )
 		{
 		# kick off intermediate update
@ -113,19 +112,21 @@ event SumStats::send_data(uid: string, ssid: string, data: ResultTable)
 		{
 		local_data[key] = data[key];
 		delete data[key];
-		
+
 		# Only send cluster_send_in_groups_of at a time.  Queue another
 		# event to send the next group.
 		if ( cluster_send_in_groups_of == ++num_added )
 			break;
 		}
-	
+
 	local done = F;
 	# If data is empty, this sumstat is done.
 	if ( |data| == 0 )
 		done = T;
-	
-	event SumStats::cluster_ss_response(uid, ssid, local_data, done);
+
+	# Note: copy is needed to compensate serialization caching issue. This should be
+	# changed to something else later. 
+	event SumStats::cluster_ss_response(uid, ssid, copy(local_data), done);
 	if ( ! done )
 		schedule 0.01 sec { SumStats::send_data(uid, ssid, data) };
 	}
@ -133,7 +134,7 @@ event SumStats::send_data(uid: string, ssid: string, data: ResultTable)
 event SumStats::cluster_ss_request(uid: string, ssid: string)
 	{
 	#print fmt("WORKER %s: received the cluster_ss_request event for %s.", Cluster::node, id);
-	
+
 	# Initiate sending all of the data for the requested stats.
 	if ( ssid in result_store )
 		event SumStats::send_data(uid, ssid, result_store[ssid]);
@ -145,13 +146,16 @@ event SumStats::cluster_ss_request(uid: string, ssid: string)
 	if ( ssid in stats_store )
 		reset(stats_store[ssid]);
 	}
-	
+
 event SumStats::cluster_key_request(uid: string, ssid: string, key: Key)
 	{
 	if ( ssid in result_store && key in result_store[ssid] )
 		{
 		#print fmt("WORKER %s: received the cluster_key_request event for %s=%s.", Cluster::node, key2str(key), data);
-		event SumStats::cluster_key_response(uid, ssid, key, result_store[ssid][key]);
+
+		# Note: copy is needed to compensate serialization caching issue. This should be
+		# changed to something else later. 
+		event SumStats::cluster_key_response(uid, ssid, key, copy(result_store[ssid][key]));
 		}
 	else
 		{
@ -179,27 +183,27 @@ event SumStats::thresholds_reset(ssid: string)

@if ( Cluster::local_node_type() == Cluster::MANAGER )

-# This variable is maintained by manager nodes as they collect and aggregate 
-# results.  
+# This variable is maintained by manager nodes as they collect and aggregate
+# results.
 # Index on a uid.
 global stats_results: table[string] of ResultTable &read_expire=1min;

 # This variable is maintained by manager nodes to track how many "dones" they
-# collected per collection unique id.  Once the number of results for a uid 
-# matches the number of peer nodes that results should be coming from, the 
+# collected per collection unique id.  Once the number of results for a uid
+# matches the number of peer nodes that results should be coming from, the
 # result is written out and deleted from here.
 # Indexed on a uid.
 # TODO: add an &expire_func in case not all results are received.
 global done_with: table[string] of count &read_expire=1min &default=0;

-# This variable is maintained by managers to track intermediate responses as 
-# they are getting a global view for a certain key. 
+# This variable is maintained by managers to track intermediate responses as
+# they are getting a global view for a certain key.
 # Indexed on a uid.
 global key_requests: table[string] of Result &read_expire=1min;

 # This variable is maintained by managers to prevent overwhelming communication due
-# to too many intermediate updates.  Each sumstat is tracked separately so that 
-# one won't overwhelm and degrade other quieter sumstats. 
+# to too many intermediate updates.  Each sumstat is tracked separately so that
+# one won't overwhelm and degrade other quieter sumstats.
 # Indexed on a sumstat id.
 global outstanding_global_views: table[string] of count &default=0;

@ -211,11 +215,11 @@ event SumStats::finish_epoch(ss: SumStat)
 		{
 		#print fmt("%.6f MANAGER: breaking %s sumstat for %s sumstat", network_time(), ss$name, ss$id);
 		local uid = unique_id("");
-		
+
 		if ( uid in stats_results )
 			delete stats_results[uid];
 		stats_results[uid] = table();
-		
+
 		# Request data from peers.
 		event SumStats::cluster_ss_request(uid, ss$id);
 		}
@ -224,7 +228,7 @@ event SumStats::finish_epoch(ss: SumStat)
 	schedule ss$epoch { SumStats::finish_epoch(ss) };
 	}

-# This is unlikely to be called often, but it's here in 
+# This is unlikely to be called often, but it's here in
 # case there are sumstats being collected by managers.
 function data_added(ss: SumStat, key: Key, result: Result)
 	{
@ -234,7 +238,7 @@ function data_added(ss: SumStat, key: Key, result: Result)
 		event SumStats::cluster_threshold_crossed(ss$id, key, threshold_tracker[ss$id][key]);
 		}
 	}
-	
+
 event SumStats::cluster_key_response(uid: string, ssid: string, key: Key, result: Result)
 	{
 	#print fmt("%0.6f MANAGER: receiving key data from %s - %s=%s", network_time(), get_event_peer()$descr, key2str(key), result);
@ -277,7 +281,7 @@ event SumStats::cluster_key_intermediate_response(ssid: string, key: Key)
 	if ( ssid in outstanding_global_views &&
 	     |outstanding_global_views[ssid]| > max_outstanding_global_views )
 		{
-		# Don't do this intermediate update.  Perhaps at some point in the future 
+		# Don't do this intermediate update.  Perhaps at some point in the future
 		# we will queue and randomly select from these ignored intermediate
 		# update requests.
 		return;
@ -308,7 +312,7 @@ event SumStats::cluster_ss_response(uid: string, ssid: string, data: ResultTable
 			local_data[key] = data[key];

 		# If a stat is done being collected, thresholds for each key
-		# need to be checked so we're doing it here to avoid doubly 
+		# need to be checked so we're doing it here to avoid doubly
 		# iterating over each key.
 		if ( Cluster::worker_count == done_with[uid] )
 			{
@ -319,7 +323,7 @@ event SumStats::cluster_ss_response(uid: string, ssid: string, data: ResultTable
 				}
 			}
 		}
-	
+
 	# If the data has been collected from all peers, we are done and ready to finish.
 	if ( Cluster::worker_count == done_with[uid] )
 		{
--- a/scripts/base/frameworks/sumstats/main.bro
+++ b/scripts/base/frameworks/sumstats/main.bro
@ -1,5 +1,5 @@
-##! The summary statistics framework provides a way to 
-##! summarize large streams of data into simple reduced 
+##! The summary statistics framework provides a way to
+##! summarize large streams of data into simple reduced
 ##! measurements.

 module SumStats;
@ -10,24 +10,24 @@ export {
 		PLACEHOLDER
 	};

-	## Represents a thing which is having summarization 
+	## Represents a thing which is having summarization
 	## results collected for it.
 	type Key: record {
-		## A non-address related summarization or a sub-key for 
-		## an address based summarization. An example might be 
+		## A non-address related summarization or a sub-key for
+		## an address based summarization. An example might be
 		## successful SSH connections by client IP address
 		## where the client string would be the key value.
-		## Another example might be number of HTTP requests to 
-		## a particular value in a Host header.  This is an 
-		## example of a non-host based metric since multiple 
-		## IP addresses could respond for the same Host 
+		## Another example might be number of HTTP requests to
+		## a particular value in a Host header.  This is an
+		## example of a non-host based metric since multiple
+		## IP addresses could respond for the same Host
 		## header value.
 		str:  string &optional;
-	
+
 		## Host is the value to which this metric applies.
 		host: addr &optional;
 	};
-	
+
 	## Represents data being added for a single observation.
 	## Only supply a single field at a time!
 	type Observation: record {
@ -40,17 +40,17 @@ export {
 	};

 	type Reducer: record {
-		## Observation stream identifier for the reducer 
+		## Observation stream identifier for the reducer
 		## to attach to.
 		stream:         string;

 		## The calculations to perform on the data points.
 		apply:          set[Calculation];
-		
-		## A predicate so that you can decide per key if you 
+
+		## A predicate so that you can decide per key if you
 		## would like to accept the data being inserted.
 		pred:           function(key: SumStats::Key, obs: SumStats::Observation): bool &optional;
-		
+
 		## A function to normalize the key.  This can be used to aggregate or
 		## normalize the entire key.
 		normalize_key:  function(key: SumStats::Key): Key &optional;
@ -59,11 +59,11 @@ export {
 	## Value calculated for an observation stream fed into a reducer.
 	## Most of the fields are added by plugins.
 	type ResultVal: record {
-		## The time when the first observation was added to 
+		## The time when the first observation was added to
 		## this result value.
 		begin:  time;

-		## The time when the last observation was added to 
+		## The time when the last observation was added to
 		## this result value.
 		end:    time;

@ -74,55 +74,56 @@ export {
 	## Type to store results for multiple reducers.
 	type Result: table[string] of ResultVal;

-	## Type to store a table of sumstats results indexed 
+	## Type to store a table of sumstats results indexed
 	## by keys.
 	type ResultTable: table[Key] of Result;

-	## SumStats represent an aggregation of reducers along with 
+	## SumStats represent an aggregation of reducers along with
 	## mechanisms to handle various situations like the epoch ending
 	## or thresholds being crossed.
-	## It's best to not access any global state outside 
-	## of the variables given to the callbacks because there 
-	## is no assurance provided as to where the callbacks 
+	##
+	## It's best to not access any global state outside
+	## of the variables given to the callbacks because there
+	## is no assurance provided as to where the callbacks
 	## will be executed on clusters.
 	type SumStat: record {
-		## The interval at which this filter should be "broken" 
-		## and the '$epoch_finished' callback called.  The 
+		## The interval at which this filter should be "broken"
+		## and the '$epoch_finished' callback called.  The
 		## results are also reset at this time so any threshold
-		## based detection needs to be set to a 
-		## value that should be expected to happen within 
+		## based detection needs to be set to a
+		## value that should be expected to happen within
 		## this epoch.
 		epoch:              interval;

 		## The reducers for the SumStat
 		reducers:           set[Reducer];

-		## Provide a function to calculate a value from the 
-		## :bro:see:`Result` structure which will be used 
-		## for thresholding.  
+		## Provide a function to calculate a value from the
+		## :bro:see:`Result` structure which will be used
+		## for thresholding.
 		## This is required if a $threshold value is given.
 		threshold_val:      function(key: SumStats::Key, result: SumStats::Result): count &optional;

-		## The threshold value for calling the 
+		## The threshold value for calling the
 		## $threshold_crossed callback.
 		threshold:          count             &optional;
-		
-		## A series of thresholds for calling the 
+
+		## A series of thresholds for calling the
 		## $threshold_crossed callback.
 		threshold_series:   vector of count   &optional;

 		## A callback that is called when a threshold is crossed.
 		threshold_crossed:  function(key: SumStats::Key, result: SumStats::Result) &optional;
-		
-		## A callback with the full collection of Results for 
+
+		## A callback with the full collection of Results for
 		## this SumStat.
 		epoch_finished:    function(rt: SumStats::ResultTable) &optional;
 	};
-	
+
 	## Create a summary statistic.
 	global create: function(ss: SumStats::SumStat);

-	## Add data into an observation stream. This should be 
+	## Add data into an observation stream. This should be
 	## called when a script has measured some point value.
 	##
 	## id: The observation stream identifier that the data
@ -143,13 +144,13 @@ export {
 	};

 	## This event is generated when thresholds are reset for a SumStat.
-	## 
+	##
 	## ssid: SumStats ID that thresholds were reset for.
 	global thresholds_reset: event(ssid: string);

-	## Helper function to represent a :bro:type:`SumStats::Key` value as 
+	## Helper function to represent a :bro:type:`SumStats::Key` value as
 	## a simple string.
-	## 
+	##
 	## key: The metric key that is to be converted into a string.
 	##
 	## Returns: A string representation of the metric key.
@ -181,16 +182,17 @@ global result_store: table[string] of ResultTable = table();
 # Store of threshold information.
 global thresholds_store: table[string, Key] of bool = table();

-# This is called whenever
-# key values are updated and the new val is given as the `val` argument.
-# It's only prototyped here because cluster and non-cluster have separate 
-# implementations.
+# This is called whenever key values are updated and the new val is given as the
+# `val` argument. It's only prototyped here because cluster and non-cluster have
+# separate  implementations.
 global data_added: function(ss: SumStat, key: Key, result: Result);

 # Prototype the hook point for plugins to do calculations.
 global observe_hook: hook(r: Reducer, val: double, data: Observation, rv: ResultVal);
+
 # Prototype the hook point for plugins to initialize any result values.
 global init_resultval_hook: hook(r: Reducer, rv: ResultVal);
+
 # Prototype the hook point for plugins to merge Results.
 global compose_resultvals_hook: hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal);

@ -252,7 +254,7 @@ function compose_results(r1: Result, r2: Result): Result
 				result[data_id] = r2[data_id];
 			}
 		}
-	
+
 	return result;
 	}

@ -306,25 +308,25 @@ function observe(id: string, key: Key, obs: Observation)
 		if ( r?$normalize_key )
 			key = r$normalize_key(copy(key));

-		# If this reducer has a predicate, run the predicate 
+		# If this reducer has a predicate, run the predicate
 		# and skip this key if the predicate return false.
 		if ( r?$pred && ! r$pred(key, obs) )
 			next;
-		
+
 		local ss = stats_store[r$sid];
-		
+
 		# If there is a threshold and no epoch_finished callback
 		# we don't need to continue counting since the data will
 		# never be accessed.  This was leading
-		# to some state management issues when measuring 
+		# to some state management issues when measuring
 		# uniqueness.
-		# NOTE: this optimization could need removed in the 
+		# NOTE: this optimization could need removed in the
 		#       future if on demand access is provided to the
 		#       SumStats results.
 		if ( ! ss?$epoch_finished &&
 		     r$sid in threshold_tracker &&
 		     key in threshold_tracker[r$sid] &&
-		     ( ss?$threshold && 
+		     ( ss?$threshold &&
 		       threshold_tracker[r$sid][key]$is_threshold_crossed ) ||
 		     ( ss?$threshold_series &&
 		       threshold_tracker[r$sid][key]$threshold_series_index+1 == |ss$threshold_series| ) )
@ -356,7 +358,7 @@ function observe(id: string, key: Key, obs: Observation)
 		}
 	}

-# This function checks if a threshold has been crossed.  It is also used as a method to implement 
+# This function checks if a threshold has been crossed.  It is also used as a method to implement
 # mid-break-interval threshold crossing detection for cluster deployments.
 function check_thresholds(ss: SumStat, key: Key, result: Result, modify_pct: double): bool
 	{
@ -399,7 +401,7 @@ function check_thresholds(ss: SumStat, key: Key, result: Result, modify_pct: dou
 	     |ss$threshold_series| >= tt$threshold_series_index &&
 	     watch >= ss$threshold_series[tt$threshold_series_index] )
 		{
-		# A threshold series was given and the value crossed the next 
+		# A threshold series was given and the value crossed the next
 		# value in the series.
 		return T;
 		}
--- a/scripts/base/frameworks/sumstats/non-cluster.bro
+++ b/scripts/base/frameworks/sumstats/non-cluster.bro
@ -15,8 +15,8 @@ event SumStats::finish_epoch(ss: SumStat)

 	schedule ss$epoch { SumStats::finish_epoch(ss) };
 	}
-	
-	
+
+
 function data_added(ss: SumStat, key: Key, result: Result)
 	{
 	if ( check_thresholds(ss, key, result, 1.0) )
--- a/scripts/base/frameworks/sumstats/plugins/average.bro
+++ b/scripts/base/frameworks/sumstats/plugins/average.bro
@ -1,9 +1,9 @@
-@load base/frameworks/sumstats
+@load base/frameworks/sumstats/main

 module SumStats;

 export {
-	redef enum Calculation += { 
+	redef enum Calculation += {
 		## Calculate the average of the values.
 		AVERAGE
 	};
@ -33,4 +33,4 @@ hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
 		result$average = rv1$average;
 	else if ( rv2?$average )
 		result$average = rv2$average;
-	}
+	}
--- a/scripts/base/frameworks/sumstats/plugins/max.bro
+++ b/scripts/base/frameworks/sumstats/plugins/max.bro
@ -1,9 +1,9 @@
-@load base/frameworks/sumstats
+@load base/frameworks/sumstats/main

 module SumStats;

 export {
-	redef enum Calculation += { 
+	redef enum Calculation += {
 		## Find the maximum value.
 		MAX
 	};
@ -18,7 +18,7 @@ hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
 	{
 	if ( MAX in r$apply )
 		{
-		if ( ! rv?$max ) 
+		if ( ! rv?$max )
 			rv$max = val;
 		else if ( val > rv$max )
 			rv$max = val;
--- a/scripts/base/frameworks/sumstats/plugins/min.bro
+++ b/scripts/base/frameworks/sumstats/plugins/min.bro
@ -1,9 +1,9 @@
-@load base/frameworks/sumstats
+@load base/frameworks/sumstats/main

 module SumStats;

 export {
-	redef enum Calculation += { 
+	redef enum Calculation += {
 		## Find the minimum value.
 		MIN
 	};
@ -18,7 +18,7 @@ hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
 	{
 	if ( MIN in r$apply )
 		{
-		if ( ! rv?$min ) 
+		if ( ! rv?$min )
 			rv$min = val;
 		else if ( val < rv$min )
 			rv$min = val;
@ -33,4 +33,4 @@ hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
 		result$min = rv1$min;
 	else if ( rv2?$min )
 		result$min = rv2$min;
-	}
+	}
--- a/scripts/base/frameworks/sumstats/plugins/sample.bro
+++ b/scripts/base/frameworks/sumstats/plugins/sample.bro
@ -1,4 +1,4 @@
-@load base/frameworks/sumstats
+@load base/frameworks/sumstats/main
@load base/utils/queue

 module SumStats;
@ -10,10 +10,8 @@ export {
 	};

 	redef record ResultVal += {
-		## This is the queue where samples
-		## are maintained.  Use the 
-		## :bro:see:`SumStats::get_samples` function
-		## to get a vector of the samples.
+		## This is the queue where samples are maintained.  Use the
+		## :bro:see:`SumStats::get_samples` function to get a vector of the samples.
 		samples: Queue::Queue &optional;
 	};

@ -48,4 +46,4 @@ hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
 		result$samples = rv1$samples;
 	else if ( rv2?$samples )
 		result$samples = rv2$samples;
-	}
+	}
--- a/scripts/base/frameworks/sumstats/plugins/std-dev.bro
+++ b/scripts/base/frameworks/sumstats/plugins/std-dev.bro
@ -1,10 +1,10 @@
+@load base/frameworks/sumstats/main
@load ./variance
-@load base/frameworks/sumstats

 module SumStats;

 export {
-	redef enum Calculation += { 
+	redef enum Calculation += {
 		## Find the standard deviation of the values.
 		STD_DEV
 	};
--- a/scripts/base/frameworks/sumstats/plugins/sum.bro
+++ b/scripts/base/frameworks/sumstats/plugins/sum.bro
@ -1,9 +1,9 @@
-@load base/frameworks/sumstats
+@load base/frameworks/sumstats/main

 module SumStats;

 export {
-	redef enum Calculation += { 
+	redef enum Calculation += {
 		## Sums the values given.  For string values,
 		## this will be the number of strings given.
 		SUM
@ -48,4 +48,4 @@ hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
 		if ( rv2?$sum )
 			result$sum += rv2$sum;
 		}
-	}
+	}
--- a/scripts/base/frameworks/sumstats/plugins/unique.bro
+++ b/scripts/base/frameworks/sumstats/plugins/unique.bro
@ -1,9 +1,9 @@
-@load base/frameworks/sumstats
+@load base/frameworks/sumstats/main

 module SumStats;

 export {
-	redef enum Calculation += { 
+	redef enum Calculation += {
 		## Calculate the number of unique values.
 		UNIQUE
 	};
@ -16,8 +16,8 @@ export {
 }

 redef record ResultVal += {
-	# Internal use only.  This is not meant to be publically available 
-	# because we don't want to trust that we can inspect the values 
+	# Internal use only.  This is not meant to be publically available
+	# because we don't want to trust that we can inspect the values
 	# since we will like move to a probalistic data structure in the future.
 	# TODO: in the future this will optionally be a hyperloglog structure
 	unique_vals: set[Observation] &optional;
@ -27,7 +27,7 @@ hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
 	{
 	if ( UNIQUE in r$apply )
 		{
-		if ( ! rv?$unique_vals ) 
+		if ( ! rv?$unique_vals )
 			rv$unique_vals=set();
 		add rv$unique_vals[obs];
 		rv$unique = |rv$unique_vals|;
@ -40,7 +40,7 @@ hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
 		{
 		if ( rv1?$unique_vals )
 			result$unique_vals = rv1$unique_vals;
-		
+
 		if ( rv2?$unique_vals )
 			if ( ! result?$unique_vals )
 				result$unique_vals = rv2$unique_vals;
@ -50,4 +50,4 @@ hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)

 		result$unique = |result$unique_vals|;
 		}
-	}
+	}
--- a/scripts/base/frameworks/sumstats/plugins/variance.bro
+++ b/scripts/base/frameworks/sumstats/plugins/variance.bro
@ -1,10 +1,10 @@
+@load base/frameworks/sumstats/main
@load ./average
-@load base/frameworks/sumstats

 module SumStats;

 export {
-	redef enum Calculation += { 
+	redef enum Calculation += {
 		## Find the variance of the values.
 		VARIANCE
 	};
@ -66,4 +66,4 @@ hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
 		result$prev_avg = rv2$prev_avg;

 	calc_variance(result);
-	}
+	}
--- a/scripts/base/protocols/ssh/main.bro
+++ b/scripts/base/protocols/ssh/main.bro
@ -1,7 +1,7 @@
-##! Base SSH analysis script.  The heuristic to blindly determine success or 
+##! Base SSH analysis script.  The heuristic to blindly determine success or
 ##! failure for SSH connections is implemented here.  At this time, it only
 ##! uses the size of the data being returned from the server to make the
-##! heuristic determination about success of the connection.  
+##! heuristic determination about success of the connection.
 ##! Requires that :bro:id:`use_conn_size_analyzer` is set to T!  The heuristic
 ##! is not attempted if the connection size analyzer isn't enabled.

@ -17,7 +17,7 @@ module SSH;
 export {
 	## The SSH protocol logging stream identifier.
 	redef enum Log::ID += { LOG };
-	
+
 	type Info: record {
 		## Time when the SSH connection began.
 		ts:              time         &log;
@ -26,9 +26,9 @@ export {
 		## The connection's 4-tuple of endpoint addresses/ports.
 		id:              conn_id      &log;
 		## Indicates if the login was heuristically guessed to be "success",
-		## "failure", or "undetermined".  
+		## "failure", or "undetermined".
 		status:          string       &log &default="undetermined";
-		## Direction of the connection.  If the client was a local host 
+		## Direction of the connection.  If the client was a local host
 		## logging into an external host, this would be OUTBOUND. INBOUND
 		## would be set for the opposite situation.
 		# TODO: handle local-local and remote-remote better.
@ -38,33 +38,33 @@ export {
 		## Software string from the server.
 		server:          string       &log &optional;
 		## Amount of data returned from the server. This is currently
-		## the only measure of the success heuristic and it is logged to 
+		## the only measure of the success heuristic and it is logged to
 		## assist analysts looking at the logs to make their own determination
 		## about the success on a case-by-case basis.
 		resp_size:       count        &log &default=0;
-		
+
 		## Indicate if the SSH session is done being watched.
 		done:            bool         &default=F;
 	};
-	
-	## The size in bytes of data sent by the server at which the SSH 
+
+	## The size in bytes of data sent by the server at which the SSH
 	## connection is presumed to be successful.
 	const authentication_data_size = 4000 &redef;
-	
+
 	## If true, we tell the event engine to not look at further data
 	## packets after the initial SSH handshake. Helps with performance
 	## (especially with large file transfers) but precludes some
 	## kinds of analyses.
 	const skip_processing_after_detection = F &redef;
-	
+
 	## Event that is generated when the heuristic thinks that a login
 	## was successful.
 	global heuristic_successful_login: event(c: connection);
-	
+
 	## Event that is generated when the heuristic thinks that a login
 	## failed.
 	global heuristic_failed_login: event(c: connection);
-	
+
 	## Event that can be handled to access the :bro:type:`SSH::Info`
 	## record as it is sent on to the logging framework.
 	global log_ssh: event(rec: Info);
@ -102,21 +102,21 @@ function check_ssh_connection(c: connection, done: bool)
 	# If already done watching this connection, just return.
 	if ( c$ssh$done )
 		return;
-	
+
 	if ( done )
 		{
-		# If this connection is done, then we can look to see if 
+		# If this connection is done, then we can look to see if
 		# this matches the conditions for a failed login.  Failed
 		# logins are only detected at connection state removal.

-		if ( # Require originators to have sent at least 50 bytes. 
+		if ( # Require originators to have sent at least 50 bytes.
 		     c$orig$size > 50 &&
 		     # Responders must be below 4000 bytes.
-		     c$resp$size < 4000 && 
+		     c$resp$size < 4000 &&
 		     # Responder must have sent fewer than 40 packets.
 		     c$resp$num_pkts < 40 &&
 		     # If there was a content gap we can't reliably do this heuristic.
-		     c?$conn && c$conn$missed_bytes == 0)# && 
+		     c?$conn && c$conn$missed_bytes == 0)# &&
 		     # Only "normal" connections can count.
 		     #c$conn?$conn_state && c$conn$conn_state in valid_states )
 			{
@ -147,13 +147,13 @@ function check_ssh_connection(c: connection, done: bool)

 	# Set the direction for the log.
 	c$ssh$direction = Site::is_local_addr(c$id$orig_h) ? OUTBOUND : INBOUND;
-	
+
 	# Set the "done" flag to prevent the watching event from rescheduling
 	# after detection is done.
 	c$ssh$done=T;

 	Log::write(SSH::LOG, c$ssh);
-	
+
 	if ( skip_processing_after_detection )
 		{
 		# Stop watching this connection, we don't care about it anymore.
@ -186,12 +186,12 @@ event ssh_server_version(c: connection, version: string) &priority=5
 	set_session(c);
 	c$ssh$server = version;
 	}
-	
+
 event ssh_client_version(c: connection, version: string) &priority=5
 	{
 	set_session(c);
 	c$ssh$client = version;
-	
+
 	# The heuristic detection for SSH relies on the ConnSize analyzer.
 	# Don't do the heuristics if it's disabled.
 	if ( use_conn_size_analyzer )
--- a/scripts/base/utils/queue.bro
+++ b/scripts/base/utils/queue.bro
@ -6,7 +6,7 @@ export {
 	## Settings for initializing the queue.
 	type Settings: record {
 		## If a maximum length is set for the queue
-		## it will maintain itself at that 
+		## it will maintain itself at that
 		## maximum length automatically.
 		max_len: count &optional;
 	};
@ -15,17 +15,17 @@ export {
 	type Queue: record {};

 	## Initialize a queue record structure.
-	## 
+	##
 	## s: A :bro:record:`Settings` record configuring the queue.
 	##
 	## Returns: An opaque queue record.
 	global init:       function(s: Settings): Queue;

 	## Put a string onto the beginning of a queue.
-	## 
+	##
 	## q: The queue to put the value into.
-	## 
-	## val: The value to insert into the queue. 
+	##
+	## val: The value to insert into the queue.
 	global put:       function(q: Queue, val: any);

 	## Get a string from the end of a queue.
@ -35,29 +35,29 @@ export {
 	## Returns: The value gotten from the queue.
 	global get:        function(q: Queue): any;

-	## Merge two queue's together.  If any settings are applied 
+	## Merge two queue's together.  If any settings are applied
 	## to the queues, the settings from q1 are used for the new
 	## merged queue.
-	## 
+	##
 	## q1: The first queue.  Settings are taken from here.
 	##
 	## q2: The second queue.
-	## 
+	##
 	## Returns: A new queue from merging the other two together.
 	global merge:      function(q1: Queue, q2: Queue): Queue;

 	## Get the number of items in a queue.
-	## 
+	##
 	## q: The queue.
 	##
 	## Returns: The length of the queue.
 	global len:     function(q: Queue): count;
-	
+
 	## Get the contents of the queue as a vector.
-	## 
+	##
 	## q: The queue.
 	##
-	## ret: A vector containing the 
+	## ret: A vector containing the
 	##      current contents of q as the type of ret.
 	global get_vector: function(q: Queue, ret: vector of any);

@ -130,7 +130,7 @@ function get_vector(q: Queue, ret: vector of any)
 	local i = q$bottom;
 	local j = 0;
 	# Really dumb hack, this is only to provide
-	# the iteration for the correct number of 
+	# the iteration for the correct number of
 	# values in q$vals.
 	for ( ignored_val in q$vals )
 		{
--- a/scripts/base/utils/time.bro
+++ b/scripts/base/utils/time.bro
@ -1,6 +1,6 @@

 ## Given an interval, returns a string of the form 3m34s to
-## give a minimalized human readable string for the minutes 
+## give a minimalized human readable string for the minutes
 ## and seconds represented by the interval.
 function duration_to_mins_secs(dur: interval): string
 	{