Merge remote-tracking branch 'origin/master' into topic/bernhard/hyperloglog

2025-10-04 07:38:19 +00:00 · 2013-05-03 22:58:02 -07:00 · 2013-05-03 22:58:02 -07:00 · 3e74cdc6e0
commit 3e74cdc6e0
parent 1cf506071d 69c7363147
37 changed files with 475 additions and 344 deletions
--- a/scripts/base/frameworks/sumstats/cluster.bro
+++ b/scripts/base/frameworks/sumstats/cluster.bro
@ -10,49 +10,48 @@
 module SumStats;

 export {
-	## Allows a user to decide how large of result groups the 
-	## workers should transmit values for cluster stats aggregation.
+	## Allows a user to decide how large of result groups the workers should transmit
+	## values for cluster stats aggregation.
 	const cluster_send_in_groups_of = 50 &redef;
-	
-	## The percent of the full threshold value that needs to be met 
-	## on a single worker for that worker to send the value to its manager in
-	## order for it to request a global view for that value.  There is no
-	## requirement that the manager requests a global view for the key
-	## since it may opt not to if it requested a global view for the key
-	## recently.
+
+	## The percent of the full threshold value that needs to be met on a single worker
+	## for that worker to send the value to its manager in order for it to request a
+	## global view for that value.  There is no requirement that the manager requests
+	## a global view for the key since it may opt not to if it requested a global view
+	## for the key recently.
 	const cluster_request_global_view_percent = 0.2 &redef;

 	## This is to deal with intermediate update overload.  A manager will only allow
-	## this many intermediate update requests to the workers to be inflight at 
-	## any given time.  Requested intermediate updates are currently thrown out
-	## and not performed.  In practice this should hopefully have a minimal effect.
+	## this many intermediate update requests to the workers to be inflight at any
+	## given time.  Requested intermediate updates are currently thrown out and not
+	## performed.  In practice this should hopefully have a minimal effect.
 	const max_outstanding_global_views = 10 &redef;

-	## Intermediate updates can cause overload situations on very large clusters.
-	## This option may help reduce load and correct intermittent problems.
-	## The goal for this option is also meant to be temporary.
+	## Intermediate updates can cause overload situations on very large clusters. This
+	## option may help reduce load and correct intermittent problems. The goal for this
+	## option is also meant to be temporary.
 	const enable_intermediate_updates = T &redef;

-	## Event sent by the manager in a cluster to initiate the 
-	## collection of values for a sumstat.
+	## Event sent by the manager in a cluster to initiate the collection of values for
+	## a sumstat.
 	global cluster_ss_request: event(uid: string, ssid: string);

-	## Event sent by nodes that are collecting sumstats after receiving
-	## a request for the sumstat from the manager.
+	## Event sent by nodes that are collecting sumstats after receiving a request for
+	## the sumstat from the manager.
 	global cluster_ss_response: event(uid: string, ssid: string, data: ResultTable, done: bool);

-	## This event is sent by the manager in a cluster to initiate the
-	## collection of a single key value from a sumstat.  It's typically
-	## used to get intermediate updates before the break interval triggers
-	## to speed detection of a value crossing a threshold.
+	## This event is sent by the manager in a cluster to initiate the collection of
+	## a single key value from a sumstat.  It's typically used to get intermediate
+	## updates before the break interval triggers to speed detection of a value
+	## crossing a threshold.
 	global cluster_key_request: event(uid: string, ssid: string, key: Key);

-	## This event is sent by nodes in response to a 
+	## This event is sent by nodes in response to a
 	## :bro:id:`SumStats::cluster_key_request` event.
 	global cluster_key_response: event(uid: string, ssid: string, key: Key, result: Result);

-	## This is sent by workers to indicate that they crossed the percent of the 
-	## current threshold by the percentage defined globally in 
+	## This is sent by workers to indicate that they crossed the percent
+	## of the current threshold by the percentage defined globally in
 	## :bro:id:`SumStats::cluster_request_global_view_percent`
 	global cluster_key_intermediate_response: event(ssid: string, key: SumStats::Key);

@ -69,7 +68,7 @@ redef Cluster::manager2worker_events += /SumStats::thresholds_reset/;
 redef Cluster::worker2manager_events += /SumStats::cluster_(ss_response|key_response|key_intermediate_response)/;

@if ( Cluster::local_node_type() != Cluster::MANAGER )
-# This variable is maintained to know what keys have recently sent as 
+# This variable is maintained to know what keys have recently sent as
 # intermediate updates so they don't overwhelm their manager. The count that is
 # yielded is the number of times the percentage threshold has been crossed and
 # an intermediate result has been received.
@ -82,7 +81,7 @@ event bro_init() &priority=-100
 	reducer_store = table();
 	}

-# This is done on all non-manager node types in the event that a sumstat is 
+# This is done on all non-manager node types in the event that a sumstat is
 # being collected somewhere other than a worker.
 function data_added(ss: SumStat, key: Key, result: Result)
 	{
@ -92,9 +91,9 @@ function data_added(ss: SumStat, key: Key, result: Result)
 		return;

 	# If val is 5 and global view % is 0.1 (10%), pct_val will be 50.  If that
-	# crosses the full threshold then it's a candidate to send as an 
+	# crosses the full threshold then it's a candidate to send as an
 	# intermediate update.
-	if ( enable_intermediate_updates && 
+	if ( enable_intermediate_updates &&
 	     check_thresholds(ss, key, result, cluster_request_global_view_percent) )
 		{
 		# kick off intermediate update
@ -113,19 +112,21 @@ event SumStats::send_data(uid: string, ssid: string, data: ResultTable)
 		{
 		local_data[key] = data[key];
 		delete data[key];
-		
+
 		# Only send cluster_send_in_groups_of at a time.  Queue another
 		# event to send the next group.
 		if ( cluster_send_in_groups_of == ++num_added )
 			break;
 		}
-	
+
 	local done = F;
 	# If data is empty, this sumstat is done.
 	if ( |data| == 0 )
 		done = T;
-	
-	event SumStats::cluster_ss_response(uid, ssid, local_data, done);
+
+	# Note: copy is needed to compensate serialization caching issue. This should be
+	# changed to something else later. 
+	event SumStats::cluster_ss_response(uid, ssid, copy(local_data), done);
 	if ( ! done )
 		schedule 0.01 sec { SumStats::send_data(uid, ssid, data) };
 	}
@ -133,7 +134,7 @@ event SumStats::send_data(uid: string, ssid: string, data: ResultTable)
 event SumStats::cluster_ss_request(uid: string, ssid: string)
 	{
 	#print fmt("WORKER %s: received the cluster_ss_request event for %s.", Cluster::node, id);
-	
+
 	# Initiate sending all of the data for the requested stats.
 	if ( ssid in result_store )
 		event SumStats::send_data(uid, ssid, result_store[ssid]);
@ -145,13 +146,16 @@ event SumStats::cluster_ss_request(uid: string, ssid: string)
 	if ( ssid in stats_store )
 		reset(stats_store[ssid]);
 	}
-	
+
 event SumStats::cluster_key_request(uid: string, ssid: string, key: Key)
 	{
 	if ( ssid in result_store && key in result_store[ssid] )
 		{
 		#print fmt("WORKER %s: received the cluster_key_request event for %s=%s.", Cluster::node, key2str(key), data);
-		event SumStats::cluster_key_response(uid, ssid, key, result_store[ssid][key]);
+
+		# Note: copy is needed to compensate serialization caching issue. This should be
+		# changed to something else later. 
+		event SumStats::cluster_key_response(uid, ssid, key, copy(result_store[ssid][key]));
 		}
 	else
 		{
@ -179,27 +183,27 @@ event SumStats::thresholds_reset(ssid: string)

@if ( Cluster::local_node_type() == Cluster::MANAGER )

-# This variable is maintained by manager nodes as they collect and aggregate 
-# results.  
+# This variable is maintained by manager nodes as they collect and aggregate
+# results.
 # Index on a uid.
 global stats_results: table[string] of ResultTable &read_expire=1min;

 # This variable is maintained by manager nodes to track how many "dones" they
-# collected per collection unique id.  Once the number of results for a uid 
-# matches the number of peer nodes that results should be coming from, the 
+# collected per collection unique id.  Once the number of results for a uid
+# matches the number of peer nodes that results should be coming from, the
 # result is written out and deleted from here.
 # Indexed on a uid.
 # TODO: add an &expire_func in case not all results are received.
 global done_with: table[string] of count &read_expire=1min &default=0;

-# This variable is maintained by managers to track intermediate responses as 
-# they are getting a global view for a certain key. 
+# This variable is maintained by managers to track intermediate responses as
+# they are getting a global view for a certain key.
 # Indexed on a uid.
 global key_requests: table[string] of Result &read_expire=1min;

 # This variable is maintained by managers to prevent overwhelming communication due
-# to too many intermediate updates.  Each sumstat is tracked separately so that 
-# one won't overwhelm and degrade other quieter sumstats. 
+# to too many intermediate updates.  Each sumstat is tracked separately so that
+# one won't overwhelm and degrade other quieter sumstats.
 # Indexed on a sumstat id.
 global outstanding_global_views: table[string] of count &default=0;

@ -211,11 +215,11 @@ event SumStats::finish_epoch(ss: SumStat)
 		{
 		#print fmt("%.6f MANAGER: breaking %s sumstat for %s sumstat", network_time(), ss$name, ss$id);
 		local uid = unique_id("");
-		
+
 		if ( uid in stats_results )
 			delete stats_results[uid];
 		stats_results[uid] = table();
-		
+
 		# Request data from peers.
 		event SumStats::cluster_ss_request(uid, ss$id);
 		}
@ -224,7 +228,7 @@ event SumStats::finish_epoch(ss: SumStat)
 	schedule ss$epoch { SumStats::finish_epoch(ss) };
 	}

-# This is unlikely to be called often, but it's here in 
+# This is unlikely to be called often, but it's here in
 # case there are sumstats being collected by managers.
 function data_added(ss: SumStat, key: Key, result: Result)
 	{
@ -234,7 +238,7 @@ function data_added(ss: SumStat, key: Key, result: Result)
 		event SumStats::cluster_threshold_crossed(ss$id, key, threshold_tracker[ss$id][key]);
 		}
 	}
-	
+
 event SumStats::cluster_key_response(uid: string, ssid: string, key: Key, result: Result)
 	{
 	#print fmt("%0.6f MANAGER: receiving key data from %s - %s=%s", network_time(), get_event_peer()$descr, key2str(key), result);
@ -277,7 +281,7 @@ event SumStats::cluster_key_intermediate_response(ssid: string, key: Key)
 	if ( ssid in outstanding_global_views &&
 	     |outstanding_global_views[ssid]| > max_outstanding_global_views )
 		{
-		# Don't do this intermediate update.  Perhaps at some point in the future 
+		# Don't do this intermediate update.  Perhaps at some point in the future
 		# we will queue and randomly select from these ignored intermediate
 		# update requests.
 		return;
@ -308,7 +312,7 @@ event SumStats::cluster_ss_response(uid: string, ssid: string, data: ResultTable
 			local_data[key] = data[key];

 		# If a stat is done being collected, thresholds for each key
-		# need to be checked so we're doing it here to avoid doubly 
+		# need to be checked so we're doing it here to avoid doubly
 		# iterating over each key.
 		if ( Cluster::worker_count == done_with[uid] )
 			{
@ -319,7 +323,7 @@ event SumStats::cluster_ss_response(uid: string, ssid: string, data: ResultTable
 				}
 			}
 		}
-	
+
 	# If the data has been collected from all peers, we are done and ready to finish.
 	if ( Cluster::worker_count == done_with[uid] )
 		{
--- a/scripts/base/frameworks/sumstats/main.bro
+++ b/scripts/base/frameworks/sumstats/main.bro
@ -1,5 +1,5 @@
-##! The summary statistics framework provides a way to 
-##! summarize large streams of data into simple reduced 
+##! The summary statistics framework provides a way to
+##! summarize large streams of data into simple reduced
 ##! measurements.

 module SumStats;
@ -10,24 +10,24 @@ export {
 		PLACEHOLDER
 	};

-	## Represents a thing which is having summarization 
+	## Represents a thing which is having summarization
 	## results collected for it.
 	type Key: record {
-		## A non-address related summarization or a sub-key for 
-		## an address based summarization. An example might be 
+		## A non-address related summarization or a sub-key for
+		## an address based summarization. An example might be
 		## successful SSH connections by client IP address
 		## where the client string would be the key value.
-		## Another example might be number of HTTP requests to 
-		## a particular value in a Host header.  This is an 
-		## example of a non-host based metric since multiple 
-		## IP addresses could respond for the same Host 
+		## Another example might be number of HTTP requests to
+		## a particular value in a Host header.  This is an
+		## example of a non-host based metric since multiple
+		## IP addresses could respond for the same Host
 		## header value.
 		str:  string &optional;
-	
+
 		## Host is the value to which this metric applies.
 		host: addr &optional;
 	};
-	
+
 	## Represents data being added for a single observation.
 	## Only supply a single field at a time!
 	type Observation: record {
@ -40,17 +40,17 @@ export {
 	};

 	type Reducer: record {
-		## Observation stream identifier for the reducer 
+		## Observation stream identifier for the reducer
 		## to attach to.
 		stream:         string;

 		## The calculations to perform on the data points.
 		apply:          set[Calculation];
-		
-		## A predicate so that you can decide per key if you 
+
+		## A predicate so that you can decide per key if you
 		## would like to accept the data being inserted.
 		pred:           function(key: SumStats::Key, obs: SumStats::Observation): bool &optional;
-		
+
 		## A function to normalize the key.  This can be used to aggregate or
 		## normalize the entire key.
 		normalize_key:  function(key: SumStats::Key): Key &optional;
@ -59,11 +59,11 @@ export {
 	## Value calculated for an observation stream fed into a reducer.
 	## Most of the fields are added by plugins.
 	type ResultVal: record {
-		## The time when the first observation was added to 
+		## The time when the first observation was added to
 		## this result value.
 		begin:  time;

-		## The time when the last observation was added to 
+		## The time when the last observation was added to
 		## this result value.
 		end:    time;

@ -74,55 +74,56 @@ export {
 	## Type to store results for multiple reducers.
 	type Result: table[string] of ResultVal;

-	## Type to store a table of sumstats results indexed 
+	## Type to store a table of sumstats results indexed
 	## by keys.
 	type ResultTable: table[Key] of Result;

-	## SumStats represent an aggregation of reducers along with 
+	## SumStats represent an aggregation of reducers along with
 	## mechanisms to handle various situations like the epoch ending
 	## or thresholds being crossed.
-	## It's best to not access any global state outside 
-	## of the variables given to the callbacks because there 
-	## is no assurance provided as to where the callbacks 
+	##
+	## It's best to not access any global state outside
+	## of the variables given to the callbacks because there
+	## is no assurance provided as to where the callbacks
 	## will be executed on clusters.
 	type SumStat: record {
-		## The interval at which this filter should be "broken" 
-		## and the '$epoch_finished' callback called.  The 
+		## The interval at which this filter should be "broken"
+		## and the '$epoch_finished' callback called.  The
 		## results are also reset at this time so any threshold
-		## based detection needs to be set to a 
-		## value that should be expected to happen within 
+		## based detection needs to be set to a
+		## value that should be expected to happen within
 		## this epoch.
 		epoch:              interval;

 		## The reducers for the SumStat
 		reducers:           set[Reducer];

-		## Provide a function to calculate a value from the 
-		## :bro:see:`Result` structure which will be used 
-		## for thresholding.  
+		## Provide a function to calculate a value from the
+		## :bro:see:`Result` structure which will be used
+		## for thresholding.
 		## This is required if a $threshold value is given.
 		threshold_val:      function(key: SumStats::Key, result: SumStats::Result): count &optional;

-		## The threshold value for calling the 
+		## The threshold value for calling the
 		## $threshold_crossed callback.
 		threshold:          count             &optional;
-		
-		## A series of thresholds for calling the 
+
+		## A series of thresholds for calling the
 		## $threshold_crossed callback.
 		threshold_series:   vector of count   &optional;

 		## A callback that is called when a threshold is crossed.
 		threshold_crossed:  function(key: SumStats::Key, result: SumStats::Result) &optional;
-		
-		## A callback with the full collection of Results for 
+
+		## A callback with the full collection of Results for
 		## this SumStat.
 		epoch_finished:    function(rt: SumStats::ResultTable) &optional;
 	};
-	
+
 	## Create a summary statistic.
 	global create: function(ss: SumStats::SumStat);

-	## Add data into an observation stream. This should be 
+	## Add data into an observation stream. This should be
 	## called when a script has measured some point value.
 	##
 	## id: The observation stream identifier that the data
@ -143,13 +144,13 @@ export {
 	};

 	## This event is generated when thresholds are reset for a SumStat.
-	## 
+	##
 	## ssid: SumStats ID that thresholds were reset for.
 	global thresholds_reset: event(ssid: string);

-	## Helper function to represent a :bro:type:`SumStats::Key` value as 
+	## Helper function to represent a :bro:type:`SumStats::Key` value as
 	## a simple string.
-	## 
+	##
 	## key: The metric key that is to be converted into a string.
 	##
 	## Returns: A string representation of the metric key.
@ -181,16 +182,17 @@ global result_store: table[string] of ResultTable = table();
 # Store of threshold information.
 global thresholds_store: table[string, Key] of bool = table();

-# This is called whenever
-# key values are updated and the new val is given as the `val` argument.
-# It's only prototyped here because cluster and non-cluster have separate 
-# implementations.
+# This is called whenever key values are updated and the new val is given as the
+# `val` argument. It's only prototyped here because cluster and non-cluster have
+# separate  implementations.
 global data_added: function(ss: SumStat, key: Key, result: Result);

 # Prototype the hook point for plugins to do calculations.
 global observe_hook: hook(r: Reducer, val: double, data: Observation, rv: ResultVal);
+
 # Prototype the hook point for plugins to initialize any result values.
 global init_resultval_hook: hook(r: Reducer, rv: ResultVal);
+
 # Prototype the hook point for plugins to merge Results.
 global compose_resultvals_hook: hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal);

@ -252,7 +254,7 @@ function compose_results(r1: Result, r2: Result): Result
 				result[data_id] = r2[data_id];
 			}
 		}
-	
+
 	return result;
 	}

@ -306,25 +308,25 @@ function observe(id: string, key: Key, obs: Observation)
 		if ( r?$normalize_key )
 			key = r$normalize_key(copy(key));

-		# If this reducer has a predicate, run the predicate 
+		# If this reducer has a predicate, run the predicate
 		# and skip this key if the predicate return false.
 		if ( r?$pred && ! r$pred(key, obs) )
 			next;
-		
+
 		local ss = stats_store[r$sid];
-		
+
 		# If there is a threshold and no epoch_finished callback
 		# we don't need to continue counting since the data will
 		# never be accessed.  This was leading
-		# to some state management issues when measuring 
+		# to some state management issues when measuring
 		# uniqueness.
-		# NOTE: this optimization could need removed in the 
+		# NOTE: this optimization could need removed in the
 		#       future if on demand access is provided to the
 		#       SumStats results.
 		if ( ! ss?$epoch_finished &&
 		     r$sid in threshold_tracker &&
 		     key in threshold_tracker[r$sid] &&
-		     ( ss?$threshold && 
+		     ( ss?$threshold &&
 		       threshold_tracker[r$sid][key]$is_threshold_crossed ) ||
 		     ( ss?$threshold_series &&
 		       threshold_tracker[r$sid][key]$threshold_series_index+1 == |ss$threshold_series| ) )
@ -356,7 +358,7 @@ function observe(id: string, key: Key, obs: Observation)
 		}
 	}

-# This function checks if a threshold has been crossed.  It is also used as a method to implement 
+# This function checks if a threshold has been crossed.  It is also used as a method to implement
 # mid-break-interval threshold crossing detection for cluster deployments.
 function check_thresholds(ss: SumStat, key: Key, result: Result, modify_pct: double): bool
 	{
@ -399,7 +401,7 @@ function check_thresholds(ss: SumStat, key: Key, result: Result, modify_pct: dou
 	     |ss$threshold_series| >= tt$threshold_series_index &&
 	     watch >= ss$threshold_series[tt$threshold_series_index] )
 		{
-		# A threshold series was given and the value crossed the next 
+		# A threshold series was given and the value crossed the next
 		# value in the series.
 		return T;
 		}
--- a/scripts/base/frameworks/sumstats/non-cluster.bro
+++ b/scripts/base/frameworks/sumstats/non-cluster.bro
@ -15,8 +15,8 @@ event SumStats::finish_epoch(ss: SumStat)

 	schedule ss$epoch { SumStats::finish_epoch(ss) };
 	}
-	
-	
+
+
 function data_added(ss: SumStat, key: Key, result: Result)
 	{
 	if ( check_thresholds(ss, key, result, 1.0) )
--- a/scripts/base/frameworks/sumstats/plugins/average.bro
+++ b/scripts/base/frameworks/sumstats/plugins/average.bro
@ -1,9 +1,9 @@
-@load base/frameworks/sumstats
+@load base/frameworks/sumstats/main

 module SumStats;

 export {
-	redef enum Calculation += { 
+	redef enum Calculation += {
 		## Calculate the average of the values.
 		AVERAGE
 	};
@ -33,4 +33,4 @@ hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
 		result$average = rv1$average;
 	else if ( rv2?$average )
 		result$average = rv2$average;
-	}
+	}
--- a/scripts/base/frameworks/sumstats/plugins/max.bro
+++ b/scripts/base/frameworks/sumstats/plugins/max.bro
@ -1,9 +1,9 @@
-@load base/frameworks/sumstats
+@load base/frameworks/sumstats/main

 module SumStats;

 export {
-	redef enum Calculation += { 
+	redef enum Calculation += {
 		## Find the maximum value.
 		MAX
 	};
@ -18,7 +18,7 @@ hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
 	{
 	if ( MAX in r$apply )
 		{
-		if ( ! rv?$max ) 
+		if ( ! rv?$max )
 			rv$max = val;
 		else if ( val > rv$max )
 			rv$max = val;
--- a/scripts/base/frameworks/sumstats/plugins/min.bro
+++ b/scripts/base/frameworks/sumstats/plugins/min.bro
@ -1,9 +1,9 @@
-@load base/frameworks/sumstats
+@load base/frameworks/sumstats/main

 module SumStats;

 export {
-	redef enum Calculation += { 
+	redef enum Calculation += {
 		## Find the minimum value.
 		MIN
 	};
@ -18,7 +18,7 @@ hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
 	{
 	if ( MIN in r$apply )
 		{
-		if ( ! rv?$min ) 
+		if ( ! rv?$min )
 			rv$min = val;
 		else if ( val < rv$min )
 			rv$min = val;
@ -33,4 +33,4 @@ hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
 		result$min = rv1$min;
 	else if ( rv2?$min )
 		result$min = rv2$min;
-	}
+	}
--- a/scripts/base/frameworks/sumstats/plugins/sample.bro
+++ b/scripts/base/frameworks/sumstats/plugins/sample.bro
@ -1,4 +1,4 @@
-@load base/frameworks/sumstats
+@load base/frameworks/sumstats/main
@load base/utils/queue

 module SumStats;
@ -10,10 +10,8 @@ export {
 	};

 	redef record ResultVal += {
-		## This is the queue where samples
-		## are maintained.  Use the 
-		## :bro:see:`SumStats::get_samples` function
-		## to get a vector of the samples.
+		## This is the queue where samples are maintained.  Use the
+		## :bro:see:`SumStats::get_samples` function to get a vector of the samples.
 		samples: Queue::Queue &optional;
 	};

@ -48,4 +46,4 @@ hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
 		result$samples = rv1$samples;
 	else if ( rv2?$samples )
 		result$samples = rv2$samples;
-	}
+	}
--- a/scripts/base/frameworks/sumstats/plugins/std-dev.bro
+++ b/scripts/base/frameworks/sumstats/plugins/std-dev.bro
@ -1,10 +1,10 @@
+@load base/frameworks/sumstats/main
@load ./variance
-@load base/frameworks/sumstats

 module SumStats;

 export {
-	redef enum Calculation += { 
+	redef enum Calculation += {
 		## Find the standard deviation of the values.
 		STD_DEV
 	};
--- a/scripts/base/frameworks/sumstats/plugins/sum.bro
+++ b/scripts/base/frameworks/sumstats/plugins/sum.bro
@ -1,9 +1,9 @@
-@load base/frameworks/sumstats
+@load base/frameworks/sumstats/main

 module SumStats;

 export {
-	redef enum Calculation += { 
+	redef enum Calculation += {
 		## Sums the values given.  For string values,
 		## this will be the number of strings given.
 		SUM
@ -48,4 +48,4 @@ hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
 		if ( rv2?$sum )
 			result$sum += rv2$sum;
 		}
-	}
+	}
--- a/scripts/base/frameworks/sumstats/plugins/unique.bro
+++ b/scripts/base/frameworks/sumstats/plugins/unique.bro
@ -1,9 +1,9 @@
-@load base/frameworks/sumstats
+@load base/frameworks/sumstats/main

 module SumStats;

 export {
-	redef enum Calculation += { 
+	redef enum Calculation += {
 		## Calculate the number of unique values.
 		UNIQUE
 	};
@ -16,8 +16,8 @@ export {
 }

 redef record ResultVal += {
-	# Internal use only.  This is not meant to be publically available 
-	# because we don't want to trust that we can inspect the values 
+	# Internal use only.  This is not meant to be publically available
+	# because we don't want to trust that we can inspect the values
 	# since we will like move to a probalistic data structure in the future.
 	# TODO: in the future this will optionally be a hyperloglog structure
 	unique_vals: set[Observation] &optional;
@ -27,7 +27,7 @@ hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
 	{
 	if ( UNIQUE in r$apply )
 		{
-		if ( ! rv?$unique_vals ) 
+		if ( ! rv?$unique_vals )
 			rv$unique_vals=set();
 		add rv$unique_vals[obs];
 		rv$unique = |rv$unique_vals|;
@ -40,7 +40,7 @@ hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
 		{
 		if ( rv1?$unique_vals )
 			result$unique_vals = rv1$unique_vals;
-		
+
 		if ( rv2?$unique_vals )
 			if ( ! result?$unique_vals )
 				result$unique_vals = rv2$unique_vals;
@ -50,4 +50,4 @@ hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)

 		result$unique = |result$unique_vals|;
 		}
-	}
+	}
--- a/scripts/base/frameworks/sumstats/plugins/variance.bro
+++ b/scripts/base/frameworks/sumstats/plugins/variance.bro
@ -1,10 +1,10 @@
+@load base/frameworks/sumstats/main
@load ./average
-@load base/frameworks/sumstats

 module SumStats;

 export {
-	redef enum Calculation += { 
+	redef enum Calculation += {
 		## Find the variance of the values.
 		VARIANCE
 	};
@ -66,4 +66,4 @@ hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
 		result$prev_avg = rv2$prev_avg;

 	calc_variance(result);
-	}
+	}