Merge remote branch 'origin/master' into topic/bernhard/sqlite

2025-10-07 17:18:20 +00:00 · 2013-05-12 20:47:55 -07:00 · 2013-05-12 20:47:55 -07:00 · 747ba68030
commit 747ba68030
parent b968103c92 69c7363147
78 changed files with 2650 additions and 1387 deletions
--- a/scripts/base/frameworks/metrics/cluster.bro
+++ b/scripts/base/frameworks/metrics/cluster.bro
@ -1,264 +0,0 @@
-##! This implements transparent cluster support for the metrics framework.
-##! Do not load this file directly.  It's only meant to be loaded automatically
-##! and will be depending on if the cluster framework has been enabled.
-##! The goal of this script is to make metric calculation completely and
-##! transparently automated when running on a cluster.
-##! 
-##! Events defined here are not exported deliberately because they are meant
-##! to be an internal implementation detail.
-
-@load base/frameworks/cluster
-@load ./main
-
-module Metrics;
-
-export {
-	## Allows a user to decide how large of result groups the 
-	## workers should transmit values for cluster metric aggregation.
-	const cluster_send_in_groups_of = 50 &redef;
-	
-	## The percent of the full threshold value that needs to be met 
-	## on a single worker for that worker to send the value to its manager in
-	## order for it to request a global view for that value.  There is no
-	## requirement that the manager requests a global view for the index
-	## since it may opt not to if it requested a global view for the index
-	## recently.
-	const cluster_request_global_view_percent = 0.1 &redef;
-	
-	## Event sent by the manager in a cluster to initiate the 
-	## collection of metrics values for a filter.
-	global cluster_filter_request: event(uid: string, id: ID, filter_name: string);
-
-	## Event sent by nodes that are collecting metrics after receiving
-	## a request for the metric filter from the manager.
-	global cluster_filter_response: event(uid: string, id: ID, filter_name: string, data: MetricTable, done: bool);
-
-	## This event is sent by the manager in a cluster to initiate the
-	## collection of a single index value from a filter.  It's typically
-	## used to get intermediate updates before the break interval triggers
-	## to speed detection of a value crossing a threshold.
-	global cluster_index_request: event(uid: string, id: ID, filter_name: string, index: Index);
-
-	## This event is sent by nodes in response to a 
-	## :bro:id:`Metrics::cluster_index_request` event.
-	global cluster_index_response: event(uid: string, id: ID, filter_name: string, index: Index, val: count);
-
-	## This is sent by workers to indicate that they crossed the percent of the 
-	## current threshold by the percentage defined globally in 
-	## :bro:id:`Metrics::cluster_request_global_view_percent`
-	global cluster_index_intermediate_response: event(id: Metrics::ID, filter_name: string, index: Metrics::Index, val: count);
-
-	## This event is scheduled internally on workers to send result chunks.
-	global send_data: event(uid: string, id: ID, filter_name: string, data: MetricTable);
-	
-}
-
-
-# This is maintained by managers so they can know what data they requested and
-# when they requested it.
-global requested_results: table[string] of time = table() &create_expire=5mins;
-
-# TODO: The next 4 variables make the assumption that a value never 
-#       takes longer than 5 minutes to transmit from workers to manager.  This needs to 
-#       be tunable or self-tuning.  These should also be restructured to be
-#       maintained within a single variable.
-
-# This variable is maintained by manager nodes as they collect and aggregate 
-# results.
-global filter_results: table[string, ID, string] of MetricTable &create_expire=5mins;
-
-# This variable is maintained by manager nodes to track how many "dones" they
-# collected per collection unique id.  Once the number of results for a uid 
-# matches the number of peer nodes that results should be coming from, the 
-# result is written out and deleted from here.
-# TODO: add an &expire_func in case not all results are received.
-global done_with: table[string] of count &create_expire=5mins &default=0;
-
-# This variable is maintained by managers to track intermediate responses as 
-# they are getting a global view for a certain index.
-global index_requests: table[string, ID, string, Index] of count &create_expire=5mins &default=0;
-
-# This variable is maintained by all hosts for different purposes. Non-managers
-# maintain it to know what indexes they have recently sent as intermediate
-# updates so they don't overwhelm their manager. Managers maintain it so they
-# don't overwhelm workers with intermediate index requests. The count that is
-# yielded is the number of times the percentage threshold has been crossed and
-# an intermediate result has been received. The manager may optionally request
-# the index again before data expires from here if too many workers are crossing
-# the percentage threshold (not implemented yet!).
-global recent_global_view_indexes: table[ID, string, Index] of count &create_expire=5mins &default=0;
-
-# Add events to the cluster framework to make this work.
-redef Cluster::manager2worker_events += /Metrics::cluster_(filter_request|index_request)/;
-redef Cluster::worker2manager_events += /Metrics::cluster_(filter_response|index_response|index_intermediate_response)/;
-
-@if ( Cluster::local_node_type() != Cluster::MANAGER )
-# This is done on all non-manager node types in the event that a metric is 
-# being collected somewhere other than a worker.
-function data_added(filter: Filter, index: Index, val: count)
-	{
-	# If an intermediate update for this value was sent recently, don't send
-	# it again.
-	if ( [filter$id, filter$name, index] in recent_global_view_indexes )
-		return;
-		
-	# If val is 5 and global view % is 0.1 (10%), pct_val will be 50.  If that
-	# crosses the full threshold then it's a candidate to send as an 
-	# intermediate update.
-	local pct_val = double_to_count(val / cluster_request_global_view_percent);
-	
-	if ( check_notice(filter, index, pct_val) ) 
-		{
-		# kick off intermediate update
-		event Metrics::cluster_index_intermediate_response(filter$id, filter$name, index, val);
-		
-		++recent_global_view_indexes[filter$id, filter$name, index];
-		}
-	}
-
-event Metrics::send_data(uid: string, id: ID, filter_name: string, data: MetricTable)
-	{
-	#print fmt("WORKER %s: sending data for uid %s...", Cluster::node, uid);
-	
-	local local_data: MetricTable;
-	local num_added = 0;
-	for ( index in data )
-		{
-		local_data[index] = data[index];
-		delete data[index];
-		
-		# Only send cluster_send_in_groups_of at a time.  Queue another
-		# event to send the next group.
-		if ( cluster_send_in_groups_of == ++num_added )
-			break;
-		}
-	
-	local done = F;
-	# If data is empty, this metric is done.
-	if ( |data| == 0 )
-		done = T;
-	
-	event Metrics::cluster_filter_response(uid, id, filter_name, local_data, done);
-	if ( ! done )
-		event Metrics::send_data(uid, id, filter_name, data);
-	}
-
-event Metrics::cluster_filter_request(uid: string, id: ID, filter_name: string)
-	{
-	#print fmt("WORKER %s: received the cluster_filter_request event.", Cluster::node);
-	
-	# Initiate sending all of the data for the requested filter.
-	event Metrics::send_data(uid, id, filter_name, store[id, filter_name]);
-	
-	# Lookup the actual filter and reset it, the reference to the data
-	# currently stored will be maintained interally by the send_data event.
-	reset(filter_store[id, filter_name]);
-	}
-	
-event Metrics::cluster_index_request(uid: string, id: ID, filter_name: string, index: Index)
-	{
-	local val=0;
-	if ( index in store[id, filter_name] )
-		val = store[id, filter_name][index];
-	
-	# fmt("WORKER %s: received the cluster_index_request event for %s=%d.", Cluster::node, index2str(index), val);
-	event Metrics::cluster_index_response(uid, id, filter_name, index, val);
-	}
-
-@endif
-
-
-@if ( Cluster::local_node_type() == Cluster::MANAGER )
-
-# Manager's handle logging.
-event Metrics::log_it(filter: Filter)
-	{
-	#print fmt("%.6f MANAGER: breaking %s filter for %s metric", network_time(), filter$name, filter$id);
-	
-	local uid = unique_id("");
-	
-	# Set some tracking variables.
-	requested_results[uid] = network_time();
-	filter_results[uid, filter$id, filter$name] = table();
-	
-	# Request data from peers.
-	event Metrics::cluster_filter_request(uid, filter$id, filter$name);
-	# Schedule the log_it event for the next break period.
-	schedule filter$break_interval { Metrics::log_it(filter) };
-	}
-
-# This is unlikely to be called often, but it's here in case there are metrics
-# being collected by managers.
-function data_added(filter: Filter, index: Index, val: count)
-	{
-	if ( check_notice(filter, index, val) )
-		do_notice(filter, index, val);
-	}
-	
-event Metrics::cluster_index_response(uid: string, id: ID, filter_name: string, index: Index, val: count)
-	{
-	#print fmt("%0.6f MANAGER: receiving index data from %s", network_time(), get_event_peer()$descr);
-
-	if ( [uid, id, filter_name, index] !in index_requests )
-		index_requests[uid, id, filter_name, index] = 0;
-	
-	index_requests[uid, id, filter_name, index] += val;
-	local ir = index_requests[uid, id, filter_name, index];
-	
-	++done_with[uid];
-	if ( Cluster::worker_count == done_with[uid] )
-		{
-		if ( check_notice(filter_store[id, filter_name], index, ir) )
-			do_notice(filter_store[id, filter_name], index, ir);
-		delete done_with[uid];
-		delete index_requests[uid, id, filter_name, index];
-		}
-	}
-
-# Managers handle intermediate updates here.
-event Metrics::cluster_index_intermediate_response(id: ID, filter_name: string, index: Index, val: count)
-	{
-	#print fmt("MANAGER: receiving intermediate index data from %s", get_event_peer()$descr);
-	#print fmt("MANAGER: requesting index data for %s", index2str(index));
-	
-	local uid = unique_id("");
-	event Metrics::cluster_index_request(uid, id, filter_name, index);
-	++recent_global_view_indexes[id, filter_name, index];
-	}
-
-event Metrics::cluster_filter_response(uid: string, id: ID, filter_name: string, data: MetricTable, done: bool)
-	{
-	#print fmt("MANAGER: receiving results from %s", get_event_peer()$descr);
-	
-	local local_data = filter_results[uid, id, filter_name];
-	for ( index in data )
-		{
-		if ( index !in local_data )
-			local_data[index] = 0;
-		local_data[index] += data[index];
-		}
-	
-	# Mark another worker as being "done" for this uid.
-	if ( done )
-		++done_with[uid];
-	
-	# If the data has been collected from all peers, we are done and ready to log.
-	if ( Cluster::worker_count == done_with[uid] )
-		{
-		local ts = network_time();
-		# Log the time this was initially requested if it's available.
-		if ( uid in requested_results )
-			{
-			ts = requested_results[uid];
-			delete requested_results[uid];
-			}
-		
-		write_log(ts, filter_store[id, filter_name], local_data);
-		
-		# Clean up
-		delete filter_results[uid, id, filter_name];
-		delete done_with[uid];
-		}
-	}
-
-@endif
--- a/scripts/base/frameworks/metrics/main.bro
+++ b/scripts/base/frameworks/metrics/main.bro
@ -1,320 +0,0 @@
-##! The metrics framework provides a way to count and measure data.  
-
-@load base/frameworks/notice
-
-module Metrics;
-
-export {
-	## The metrics logging stream identifier.
-	redef enum Log::ID += { LOG };
-	
-	## Identifiers for metrics to collect.
-	type ID: enum {
-		## Blank placeholder value.
-		NOTHING,
-	};
-	
-	## The default interval used for "breaking" metrics and writing the 
-	## current value to the logging stream.
-	const default_break_interval = 15mins &redef;
-	
-	## This is the interval for how often threshold based notices will happen 
-	## after they have already fired.
-	const renotice_interval = 1hr &redef;
-	
-	## Represents a thing which is having metrics collected for it.  An instance
-	## of this record type and a :bro:type:`Metrics::ID` together represent a 
-	## single measurement.
-	type Index: record {
-		## Host is the value to which this metric applies.
-		host:         addr &optional;
-		
-		## A non-address related metric or a sub-key for an address based metric.
-		## An example might be successful SSH connections by client IP address
-		## where the client string would be the index value.
-		## Another example might be number of HTTP requests to a particular
-		## value in a Host header.  This is an example of a non-host based
-		## metric since multiple IP addresses could respond for the same Host
-		## header value.
-		str:        string &optional;
-		
-		## The CIDR block that this metric applies to.  This is typically
-		## only used internally for host based aggregation.
-		network:      subnet &optional;
-	} &log;
-	
-	## The record type that is used for logging metrics.
-	type Info: record {
-		## Timestamp at which the metric was "broken".
-		ts:           time   &log;
-		## What measurement the metric represents.
-		metric_id:    ID     &log;
-		## The name of the filter being logged.  :bro:type:`Metrics::ID` values
-		## can have multiple filters which represent different perspectives on
-		## the data so this is necessary to understand the value.
-		filter_name:  string &log;
-		## What the metric value applies to.
-		index:        Index  &log;
-		## The simple numeric value of the metric.
-		value:        count  &log;
-	};
-	
-    # TODO: configure a metrics filter logging stream to log the current
-	#       metrics configuration in case someone is looking through
-	#       old logs and the configuration has changed since then.
-	
-	## Filters define how the data from a metric is aggregated and handled.  
-	## Filters can be used to set how often the measurements are cut or "broken"
-	## and logged or how the data within them is aggregated.  It's also 
-	## possible to disable logging and use filters for thresholding.
-	type Filter: record {
-		## The :bro:type:`Metrics::ID` that this filter applies to.
-		id:                ID                      &optional;
-		## The name for this filter so that multiple filters can be
-		## applied to a single metrics to get a different view of the same
-		## metric data being collected (different aggregation, break, etc).
-		name:              string                  &default="default";
-		## A predicate so that you can decide per index if you would like
-		## to accept the data being inserted.
-		pred:              function(index: Index): bool &optional;
-		## Global mask by which you'd like to aggregate traffic.
-		aggregation_mask:  count                   &optional;
-		## This is essentially a mapping table between addresses and subnets.
-		aggregation_table: table[subnet] of subnet &optional;
-		## The interval at which this filter should be "broken" and written
-		## to the logging stream.  The counters are also reset to zero at 
-		## this time so any threshold based detection needs to be set to a 
-		## number that should be expected to happen within this period.
-		break_interval:    interval                &default=default_break_interval;
-		## This determines if the result of this filter is sent to the metrics
-		## logging stream.  One use for the logging framework is as an internal
-		## thresholding and statistics gathering utility that is meant to
-		## never log but rather to generate notices and derive data.
-		log:               bool                    &default=T;
-		## If this and a $notice_threshold value are set, this notice type
-		## will be generated by the metrics framework.
-		note:              Notice::Type            &optional;
-		## A straight threshold for generating a notice.
-		notice_threshold:  count                   &optional;
-		## A series of thresholds at which to generate notices.
-		notice_thresholds: vector of count         &optional;
-		## How often this notice should be raised for this filter.  It 
-		## will be generated everytime it crosses a threshold, but if the 
-		## $break_interval is set to 5mins and this is set to 1hr the notice
-		## only be generated once per hour even if something crosses the
-		## threshold in every break interval.
-		notice_freq:       interval                &optional;
-	};
-	
-	## Function to associate a metric filter with a metric ID.
-	## 
-	## id: The metric ID that the filter should be associated with.
-	##
-	## filter: The record representing the filter configuration.
-	global add_filter: function(id: ID, filter: Filter);
-	
-	## Add data into a :bro:type:`Metrics::ID`.  This should be called when
-	## a script has measured some point value and is ready to increment the
-	## counters.
-	##
-	## id: The metric ID that the data represents.
-	##
-	## index: The metric index that the value is to be added to.
-	##
-	## increment: How much to increment the counter by.
-	global add_data: function(id: ID, index: Index, increment: count);
-	
-	## Helper function to represent a :bro:type:`Metrics::Index` value as 
-	## a simple string
-	## 
-	## index: The metric index that is to be converted into a string.
-	##
-	## Returns: A string reprentation of the metric index.
-	global index2str: function(index: Index): string;
-	
-	## Event that is used to "finish" metrics and adapt the metrics
-	## framework for clustered or non-clustered usage.
-	##
-	## ..note: This is primarily intended for internal use.
-	global log_it: event(filter: Filter);
-	
-	## Event to access metrics records as they are passed to the logging framework.
-	global log_metrics: event(rec: Info);
-	
-	## Type to store a table of metrics values.  Interal use only!
-	type MetricTable: table[Index] of count &default=0;
-}
-
-redef record Notice::Info += {
-	metric_index: Index &log &optional;
-};
-
-global metric_filters: table[ID] of vector of Filter = table();
-global filter_store: table[ID, string] of Filter = table();
-
-# This is indexed by metric ID and stream filter name.
-global store: table[ID, string] of MetricTable = table() &default=table();
-
-# This function checks if a threshold has been crossed and generates a 
-# notice if it has.  It is also used as a method to implement 
-# mid-break-interval threshold crossing detection for cluster deployments.
-global check_notice: function(filter: Filter, index: Index, val: count): bool;
-
-# This is hook for watching thresholds being crossed.  It is called whenever
-# index values are updated and the new val is given as the `val` argument.
-global data_added: function(filter: Filter, index: Index, val: count);
-
-# This stores the current threshold index for filters using the
-# $notice_threshold and $notice_thresholds elements.
-global thresholds: table[ID, string, Index] of count = {} &create_expire=renotice_interval &default=0;
-
-event bro_init() &priority=5
-	{
-	Log::create_stream(Metrics::LOG, [$columns=Info, $ev=log_metrics]);
-	}
-
-function index2str(index: Index): string
-	{
-	local out = "";
-	if ( index?$host )
-		out = fmt("%shost=%s", out, index$host);
-	if ( index?$network )
-		out = fmt("%s%snetwork=%s", out, |out|==0 ? "" : ", ", index$network);
-	if ( index?$str )
-		out = fmt("%s%sstr=%s", out, |out|==0 ? "" : ", ", index$str);
-	return fmt("metric_index(%s)", out);
-	}
-	
-function write_log(ts: time, filter: Filter, data: MetricTable)
-	{
-	for ( index in data )
-		{
-		local val = data[index];
-		local m: Info = [$ts=ts,
-		                 $metric_id=filter$id,
-		                 $filter_name=filter$name,
-		                 $index=index,
-		                 $value=val];
-		
-		if ( filter$log )
-			Log::write(Metrics::LOG, m);
-		}
-	}
-
-
-function reset(filter: Filter)
-	{
-	store[filter$id, filter$name] = table();
-	}
-
-function add_filter(id: ID, filter: Filter)
-	{
-	if ( filter?$aggregation_table && filter?$aggregation_mask )
-		{
-		print "INVALID Metric filter: Defined $aggregation_table and $aggregation_mask.";
-		return;
-		}
-	if ( [id, filter$name] in store )
-		{
-		print fmt("INVALID Metric filter: Filter with name \"%s\" already exists.", filter$name);
-		return;
-		}
-	if ( filter?$notice_threshold && filter?$notice_thresholds )
-		{
-		print "INVALID Metric filter: Defined both $notice_threshold and $notice_thresholds";
-		return;
-		}
-	
-	if ( ! filter?$id )
-		filter$id = id;
-	
-	if ( id !in metric_filters )
-		metric_filters[id] = vector();
-	metric_filters[id][|metric_filters[id]|] = filter;
-
-	filter_store[id, filter$name] = filter;
-	store[id, filter$name] = table();
-	
-	schedule filter$break_interval { Metrics::log_it(filter) };
-	}
-	
-function add_data(id: ID, index: Index, increment: count)
-	{
-	if ( id !in metric_filters )
-		return;
-	
-	local filters = metric_filters[id];
-	
-	# Try to add the data to all of the defined filters for the metric.
-	for ( filter_id in filters )
-		{
-		local filter = filters[filter_id];
-		
-		# If this filter has a predicate, run the predicate and skip this
-		# index if the predicate return false.
-		if ( filter?$pred && ! filter$pred(index) )
-			next;
-		
-		if ( index?$host )
-			{
-			if ( filter?$aggregation_mask )
-				{
-				index$network = mask_addr(index$host, filter$aggregation_mask);
-				delete index$host;
-				}
-			else if ( filter?$aggregation_table )
-				{
-				# Don't add the data if the aggregation table doesn't include 
-				# the given host address.
-				if ( index$host !in filter$aggregation_table )
-					return;
-				index$network = filter$aggregation_table[index$host];
-				delete index$host;
-				}
-			}
-		
-		local metric_tbl = store[id, filter$name];
-		if ( index !in metric_tbl )
-			metric_tbl[index] = 0;
-		metric_tbl[index] += increment;
-		
-		data_added(filter, index, metric_tbl[index]);
-		}
-	}
-
-function check_notice(filter: Filter, index: Index, val: count): bool
-	{
-	if ( (filter?$notice_threshold &&
-	      [filter$id, filter$name, index] !in thresholds &&
-	      val >= filter$notice_threshold) ||
-	     (filter?$notice_thresholds &&
-	      |filter$notice_thresholds| <= thresholds[filter$id, filter$name, index] &&
-	      val >= filter$notice_thresholds[thresholds[filter$id, filter$name, index]]) )
-		return T;
-	else
-		return F;
-	}
-		
-function do_notice(filter: Filter, index: Index, val: count)
-	{
-	# We include $peer_descr here because the a manager count have actually 
-	# generated the notice even though the current remote peer for the event 
-	# calling this could be a worker if this is running as a cluster.
-	local n: Notice::Info = [$note=filter$note, 
-	                         $n=val, 
-	                         $metric_index=index, 
-	                         $peer_descr=peer_description];
-	n$msg = fmt("Threshold crossed by %s %d/%d", index2str(index), val, filter$notice_threshold);
-	if ( index?$str )
-		n$sub = index$str;
-	if ( index?$host )
-		n$src = index$host;
-	# TODO: not sure where to put the network yet.
-	
-	NOTICE(n);
-	
-	# This just needs set to some value so that it doesn't refire the 
-	# notice until it expires from the table or it crosses the next 
-	# threshold in the case of vectors of thresholds.
-	++thresholds[filter$id, filter$name, index];
-	}
--- a/scripts/base/frameworks/metrics/non-cluster.bro
+++ b/scripts/base/frameworks/metrics/non-cluster.bro
@ -1,21 +0,0 @@
-@load ./main
-
-module Metrics;
-
-event Metrics::log_it(filter: Filter)
-	{
-	local id = filter$id;
-	local name = filter$name;
-	
-	write_log(network_time(), filter, store[id, name]);
-	reset(filter);
-	
-	schedule filter$break_interval { Metrics::log_it(filter) };
-	}
-	
-	
-function data_added(filter: Filter, index: Index, val: count)
-	{
-	if ( check_notice(filter, index, val) )
-		do_notice(filter, index, val);
-	}
--- a/scripts/base/frameworks/sumstats/load.bro
+++ b/scripts/base/frameworks/sumstats/load.bro
@ -1,4 +1,5 @@
@load ./main
+@load ./plugins

 # The cluster framework must be loaded first.
@load base/frameworks/cluster
--- a/scripts/base/frameworks/sumstats/cluster.bro
+++ b/scripts/base/frameworks/sumstats/cluster.bro
@ -0,0 +1,346 @@
+##! This implements transparent cluster support for the SumStats framework.
+##! Do not load this file directly.  It's only meant to be loaded automatically
+##! and will be depending on if the cluster framework has been enabled.
+##! The goal of this script is to make sumstats calculation completely and
+##! transparently automated when running on a cluster.
+
+@load base/frameworks/cluster
+@load ./main
+
+module SumStats;
+
+export {
+	## Allows a user to decide how large of result groups the workers should transmit
+	## values for cluster stats aggregation.
+	const cluster_send_in_groups_of = 50 &redef;
+
+	## The percent of the full threshold value that needs to be met on a single worker
+	## for that worker to send the value to its manager in order for it to request a
+	## global view for that value.  There is no requirement that the manager requests
+	## a global view for the key since it may opt not to if it requested a global view
+	## for the key recently.
+	const cluster_request_global_view_percent = 0.2 &redef;
+
+	## This is to deal with intermediate update overload.  A manager will only allow
+	## this many intermediate update requests to the workers to be inflight at any
+	## given time.  Requested intermediate updates are currently thrown out and not
+	## performed.  In practice this should hopefully have a minimal effect.
+	const max_outstanding_global_views = 10 &redef;
+
+	## Intermediate updates can cause overload situations on very large clusters. This
+	## option may help reduce load and correct intermittent problems. The goal for this
+	## option is also meant to be temporary.
+	const enable_intermediate_updates = T &redef;
+
+	## Event sent by the manager in a cluster to initiate the collection of values for
+	## a sumstat.
+	global cluster_ss_request: event(uid: string, ssid: string);
+
+	## Event sent by nodes that are collecting sumstats after receiving a request for
+	## the sumstat from the manager.
+	global cluster_ss_response: event(uid: string, ssid: string, data: ResultTable, done: bool);
+
+	## This event is sent by the manager in a cluster to initiate the collection of
+	## a single key value from a sumstat.  It's typically used to get intermediate
+	## updates before the break interval triggers to speed detection of a value
+	## crossing a threshold.
+	global cluster_key_request: event(uid: string, ssid: string, key: Key);
+
+	## This event is sent by nodes in response to a
+	## :bro:id:`SumStats::cluster_key_request` event.
+	global cluster_key_response: event(uid: string, ssid: string, key: Key, result: Result);
+
+	## This is sent by workers to indicate that they crossed the percent
+	## of the current threshold by the percentage defined globally in
+	## :bro:id:`SumStats::cluster_request_global_view_percent`
+	global cluster_key_intermediate_response: event(ssid: string, key: SumStats::Key);
+
+	## This event is scheduled internally on workers to send result chunks.
+	global send_data: event(uid: string, ssid: string, data: ResultTable);
+
+	## This event is generated when a threshold is crossed.
+	global cluster_threshold_crossed: event(ssid: string, key: SumStats::Key, thold: Thresholding);
+}
+
+# Add events to the cluster framework to make this work.
+redef Cluster::manager2worker_events += /SumStats::cluster_(ss_request|key_request|threshold_crossed)/;
+redef Cluster::manager2worker_events += /SumStats::thresholds_reset/;
+redef Cluster::worker2manager_events += /SumStats::cluster_(ss_response|key_response|key_intermediate_response)/;
+
+@if ( Cluster::local_node_type() != Cluster::MANAGER )
+# This variable is maintained to know what keys have recently sent as
+# intermediate updates so they don't overwhelm their manager. The count that is
+# yielded is the number of times the percentage threshold has been crossed and
+# an intermediate result has been received.
+global recent_global_view_keys: table[string, Key] of count &create_expire=1min &default=0;
+
+event bro_init() &priority=-100
+	{
+	# The manager is the only host allowed to track these.
+	stats_store = table();
+	reducer_store = table();
+	}
+
+# This is done on all non-manager node types in the event that a sumstat is
+# being collected somewhere other than a worker.
+function data_added(ss: SumStat, key: Key, result: Result)
+	{
+	# If an intermediate update for this value was sent recently, don't send
+	# it again.
+	if ( [ss$id, key] in recent_global_view_keys )
+		return;
+
+	# If val is 5 and global view % is 0.1 (10%), pct_val will be 50.  If that
+	# crosses the full threshold then it's a candidate to send as an
+	# intermediate update.
+	if ( enable_intermediate_updates &&
+	     check_thresholds(ss, key, result, cluster_request_global_view_percent) )
+		{
+		# kick off intermediate update
+		event SumStats::cluster_key_intermediate_response(ss$id, key);
+		++recent_global_view_keys[ss$id, key];
+		}
+	}
+
+event SumStats::send_data(uid: string, ssid: string, data: ResultTable)
+	{
+	#print fmt("WORKER %s: sending data for uid %s...", Cluster::node, uid);
+
+	local local_data: ResultTable = table();
+	local num_added = 0;
+	for ( key in data )
+		{
+		local_data[key] = data[key];
+		delete data[key];
+
+		# Only send cluster_send_in_groups_of at a time.  Queue another
+		# event to send the next group.
+		if ( cluster_send_in_groups_of == ++num_added )
+			break;
+		}
+
+	local done = F;
+	# If data is empty, this sumstat is done.
+	if ( |data| == 0 )
+		done = T;
+
+	# Note: copy is needed to compensate serialization caching issue. This should be
+	# changed to something else later. 
+	event SumStats::cluster_ss_response(uid, ssid, copy(local_data), done);
+	if ( ! done )
+		schedule 0.01 sec { SumStats::send_data(uid, ssid, data) };
+	}
+
+event SumStats::cluster_ss_request(uid: string, ssid: string)
+	{
+	#print fmt("WORKER %s: received the cluster_ss_request event for %s.", Cluster::node, id);
+
+	# Initiate sending all of the data for the requested stats.
+	if ( ssid in result_store )
+		event SumStats::send_data(uid, ssid, result_store[ssid]);
+	else
+		event SumStats::send_data(uid, ssid, table());
+
+	# Lookup the actual sumstats and reset it, the reference to the data
+	# currently stored will be maintained internally by the send_data event.
+	if ( ssid in stats_store )
+		reset(stats_store[ssid]);
+	}
+
+event SumStats::cluster_key_request(uid: string, ssid: string, key: Key)
+	{
+	if ( ssid in result_store && key in result_store[ssid] )
+		{
+		#print fmt("WORKER %s: received the cluster_key_request event for %s=%s.", Cluster::node, key2str(key), data);
+
+		# Note: copy is needed to compensate serialization caching issue. This should be
+		# changed to something else later. 
+		event SumStats::cluster_key_response(uid, ssid, key, copy(result_store[ssid][key]));
+		}
+	else
+		{
+		# We need to send an empty response if we don't have the data so that the manager
+		# can know that it heard back from all of the workers.
+		event SumStats::cluster_key_response(uid, ssid, key, table());
+		}
+	}
+
+event SumStats::cluster_threshold_crossed(ssid: string, key: SumStats::Key, thold: Thresholding)
+	{
+	if ( ssid !in threshold_tracker )
+		threshold_tracker[ssid] = table();
+
+	threshold_tracker[ssid][key] = thold;
+	}
+
+event SumStats::thresholds_reset(ssid: string)
+	{
+	threshold_tracker[ssid] = table();
+	}
+
+@endif
+
+
+@if ( Cluster::local_node_type() == Cluster::MANAGER )
+
+# This variable is maintained by manager nodes as they collect and aggregate
+# results.
+# Index on a uid.
+global stats_results: table[string] of ResultTable &read_expire=1min;
+
+# This variable is maintained by manager nodes to track how many "dones" they
+# collected per collection unique id.  Once the number of results for a uid
+# matches the number of peer nodes that results should be coming from, the
+# result is written out and deleted from here.
+# Indexed on a uid.
+# TODO: add an &expire_func in case not all results are received.
+global done_with: table[string] of count &read_expire=1min &default=0;
+
+# This variable is maintained by managers to track intermediate responses as
+# they are getting a global view for a certain key.
+# Indexed on a uid.
+global key_requests: table[string] of Result &read_expire=1min;
+
+# This variable is maintained by managers to prevent overwhelming communication due
+# to too many intermediate updates.  Each sumstat is tracked separately so that
+# one won't overwhelm and degrade other quieter sumstats.
+# Indexed on a sumstat id.
+global outstanding_global_views: table[string] of count &default=0;
+
+const zero_time = double_to_time(0.0);
+# Managers handle logging.
+event SumStats::finish_epoch(ss: SumStat)
+	{
+	if ( network_time() > zero_time )
+		{
+		#print fmt("%.6f MANAGER: breaking %s sumstat for %s sumstat", network_time(), ss$name, ss$id);
+		local uid = unique_id("");
+
+		if ( uid in stats_results )
+			delete stats_results[uid];
+		stats_results[uid] = table();
+
+		# Request data from peers.
+		event SumStats::cluster_ss_request(uid, ss$id);
+		}
+
+	# Schedule the next finish_epoch event.
+	schedule ss$epoch { SumStats::finish_epoch(ss) };
+	}
+
+# This is unlikely to be called often, but it's here in
+# case there are sumstats being collected by managers.
+function data_added(ss: SumStat, key: Key, result: Result)
+	{
+	if ( check_thresholds(ss, key, result, 1.0) )
+		{
+		threshold_crossed(ss, key, result);
+		event SumStats::cluster_threshold_crossed(ss$id, key, threshold_tracker[ss$id][key]);
+		}
+	}
+
+event SumStats::cluster_key_response(uid: string, ssid: string, key: Key, result: Result)
+	{
+	#print fmt("%0.6f MANAGER: receiving key data from %s - %s=%s", network_time(), get_event_peer()$descr, key2str(key), result);
+
+	# We only want to try and do a value merge if there are actually measured datapoints
+	# in the Result.
+	if ( uid in key_requests )
+		key_requests[uid] = compose_results(key_requests[uid], result);
+	else
+		key_requests[uid] = result;
+
+	# Mark that a worker is done.
+	++done_with[uid];
+
+	#print fmt("worker_count:%d :: done_with:%d", Cluster::worker_count, done_with[uid]);
+	if ( Cluster::worker_count == done_with[uid] )
+		{
+		local ss = stats_store[ssid];
+		local ir = key_requests[uid];
+		if ( check_thresholds(ss, key, ir, 1.0) )
+			{
+			threshold_crossed(ss, key, ir);
+			event SumStats::cluster_threshold_crossed(ss$id, key, threshold_tracker[ss$id][key]);
+			}
+
+		delete done_with[uid];
+		delete key_requests[uid];
+		# Check that there is an outstanding view before subtracting.
+		if ( outstanding_global_views[ssid] > 0 )
+			--outstanding_global_views[ssid];
+		}
+	}
+
+# Managers handle intermediate updates here.
+event SumStats::cluster_key_intermediate_response(ssid: string, key: Key)
+	{
+	#print fmt("MANAGER: receiving intermediate key data from %s", get_event_peer()$descr);
+	#print fmt("MANAGER: requesting key data for %s", key2str(key));
+
+	if ( ssid in outstanding_global_views &&
+	     |outstanding_global_views[ssid]| > max_outstanding_global_views )
+		{
+		# Don't do this intermediate update.  Perhaps at some point in the future
+		# we will queue and randomly select from these ignored intermediate
+		# update requests.
+		return;
+		}
+
+	++outstanding_global_views[ssid];
+
+	local uid = unique_id("");
+	event SumStats::cluster_key_request(uid, ssid, key);
+	}
+
+event SumStats::cluster_ss_response(uid: string, ssid: string, data: ResultTable, done: bool)
+	{
+	#print fmt("MANAGER: receiving results from %s", get_event_peer()$descr);
+
+	# Mark another worker as being "done" for this uid.
+	if ( done )
+		++done_with[uid];
+
+	local local_data = stats_results[uid];
+	local ss = stats_store[ssid];
+
+	for ( key in data )
+		{
+		if ( key in local_data )
+			local_data[key] = compose_results(local_data[key], data[key]);
+		else
+			local_data[key] = data[key];
+
+		# If a stat is done being collected, thresholds for each key
+		# need to be checked so we're doing it here to avoid doubly
+		# iterating over each key.
+		if ( Cluster::worker_count == done_with[uid] )
+			{
+			if ( check_thresholds(ss, key, local_data[key], 1.0) )
+				{
+				threshold_crossed(ss, key, local_data[key]);
+				event SumStats::cluster_threshold_crossed(ss$id, key, threshold_tracker[ss$id][key]);
+				}
+			}
+		}
+
+	# If the data has been collected from all peers, we are done and ready to finish.
+	if ( Cluster::worker_count == done_with[uid] )
+		{
+		if ( ss?$epoch_finished )
+			ss$epoch_finished(local_data);
+
+		# Clean up
+		delete stats_results[uid];
+		delete done_with[uid];
+		# Not sure I need to reset the sumstat on the manager.
+		reset(ss);
+		}
+	}
+
+event remote_connection_handshake_done(p: event_peer) &priority=5
+	{
+	send_id(p, "SumStats::stats_store");
+	send_id(p, "SumStats::reducer_store");
+	}
+@endif
--- a/scripts/base/frameworks/sumstats/main.bro
+++ b/scripts/base/frameworks/sumstats/main.bro
@ -0,0 +1,436 @@
+##! The summary statistics framework provides a way to
+##! summarize large streams of data into simple reduced
+##! measurements.
+
+module SumStats;
+
+export {
+	## The various calculations are all defined as plugins.
+	type Calculation: enum {
+		PLACEHOLDER
+	};
+
+	## Represents a thing which is having summarization
+	## results collected for it.
+	type Key: record {
+		## A non-address related summarization or a sub-key for
+		## an address based summarization. An example might be
+		## successful SSH connections by client IP address
+		## where the client string would be the key value.
+		## Another example might be number of HTTP requests to
+		## a particular value in a Host header.  This is an
+		## example of a non-host based metric since multiple
+		## IP addresses could respond for the same Host
+		## header value.
+		str:  string &optional;
+
+		## Host is the value to which this metric applies.
+		host: addr &optional;
+	};
+
+	## Represents data being added for a single observation.
+	## Only supply a single field at a time!
+	type Observation: record {
+		## Count value.
+		num:  count  &optional;
+		## Double value.
+		dbl:  double &optional;
+		## String value.
+		str:  string &optional;
+	};
+
+	type Reducer: record {
+		## Observation stream identifier for the reducer
+		## to attach to.
+		stream:         string;
+
+		## The calculations to perform on the data points.
+		apply:          set[Calculation];
+
+		## A predicate so that you can decide per key if you
+		## would like to accept the data being inserted.
+		pred:           function(key: SumStats::Key, obs: SumStats::Observation): bool &optional;
+
+		## A function to normalize the key.  This can be used to aggregate or
+		## normalize the entire key.
+		normalize_key:  function(key: SumStats::Key): Key &optional;
+	};
+
+	## Value calculated for an observation stream fed into a reducer.
+	## Most of the fields are added by plugins.
+	type ResultVal: record {
+		## The time when the first observation was added to
+		## this result value.
+		begin:  time;
+
+		## The time when the last observation was added to
+		## this result value.
+		end:    time;
+
+		## The number of observations received.
+		num:    count &default=0;
+	};
+
+	## Type to store results for multiple reducers.
+	type Result: table[string] of ResultVal;
+
+	## Type to store a table of sumstats results indexed
+	## by keys.
+	type ResultTable: table[Key] of Result;
+
+	## SumStats represent an aggregation of reducers along with
+	## mechanisms to handle various situations like the epoch ending
+	## or thresholds being crossed.
+	##
+	## It's best to not access any global state outside
+	## of the variables given to the callbacks because there
+	## is no assurance provided as to where the callbacks
+	## will be executed on clusters.
+	type SumStat: record {
+		## The interval at which this filter should be "broken"
+		## and the '$epoch_finished' callback called.  The
+		## results are also reset at this time so any threshold
+		## based detection needs to be set to a
+		## value that should be expected to happen within
+		## this epoch.
+		epoch:              interval;
+
+		## The reducers for the SumStat
+		reducers:           set[Reducer];
+
+		## Provide a function to calculate a value from the
+		## :bro:see:`Result` structure which will be used
+		## for thresholding.
+		## This is required if a $threshold value is given.
+		threshold_val:      function(key: SumStats::Key, result: SumStats::Result): count &optional;
+
+		## The threshold value for calling the
+		## $threshold_crossed callback.
+		threshold:          count             &optional;
+
+		## A series of thresholds for calling the
+		## $threshold_crossed callback.
+		threshold_series:   vector of count   &optional;
+
+		## A callback that is called when a threshold is crossed.
+		threshold_crossed:  function(key: SumStats::Key, result: SumStats::Result) &optional;
+
+		## A callback with the full collection of Results for
+		## this SumStat.
+		epoch_finished:    function(rt: SumStats::ResultTable) &optional;
+	};
+
+	## Create a summary statistic.
+	global create: function(ss: SumStats::SumStat);
+
+	## Add data into an observation stream. This should be
+	## called when a script has measured some point value.
+	##
+	## id: The observation stream identifier that the data
+	##     point represents.
+	##
+	## key: The key that the value is related to.
+	##
+	## obs: The data point to send into the stream.
+	global observe: function(id: string, key: SumStats::Key, obs: SumStats::Observation);
+
+	## This record is primarily used for internal threshold tracking.
+	type Thresholding: record {
+		# Internal use only.  Indicates if a simple threshold was already crossed.
+		is_threshold_crossed: bool &default=F;
+
+		# Internal use only.  Current key for threshold series.
+		threshold_series_index: count &default=0;
+	};
+
+	## This event is generated when thresholds are reset for a SumStat.
+	##
+	## ssid: SumStats ID that thresholds were reset for.
+	global thresholds_reset: event(ssid: string);
+
+	## Helper function to represent a :bro:type:`SumStats::Key` value as
+	## a simple string.
+	##
+	## key: The metric key that is to be converted into a string.
+	##
+	## Returns: A string representation of the metric key.
+	global key2str: function(key: SumStats::Key): string;
+}
+
+redef record Reducer += {
+	# Internal use only.  Provides a reference back to the related SumStats by it's ID.
+	sid: string &optional;
+};
+
+# Internal use only.  For tracking thresholds per sumstat and key.
+global threshold_tracker: table[string] of table[Key] of Thresholding &optional;
+
+redef record SumStat += {
+	# Internal use only (mostly for cluster coherency).
+	id: string &optional;
+};
+
+# Store of sumstats indexed on the sumstat id.
+global stats_store: table[string] of SumStat = table();
+
+# Store of reducers indexed on the data point stream id.
+global reducer_store: table[string] of set[Reducer] = table();
+
+# Store of results indexed on the measurement id.
+global result_store: table[string] of ResultTable = table();
+
+# Store of threshold information.
+global thresholds_store: table[string, Key] of bool = table();
+
+# This is called whenever key values are updated and the new val is given as the
+# `val` argument. It's only prototyped here because cluster and non-cluster have
+# separate  implementations.
+global data_added: function(ss: SumStat, key: Key, result: Result);
+
+# Prototype the hook point for plugins to do calculations.
+global observe_hook: hook(r: Reducer, val: double, data: Observation, rv: ResultVal);
+
+# Prototype the hook point for plugins to initialize any result values.
+global init_resultval_hook: hook(r: Reducer, rv: ResultVal);
+
+# Prototype the hook point for plugins to merge Results.
+global compose_resultvals_hook: hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal);
+
+# Event that is used to "finish" measurements and adapt the measurement
+# framework for clustered or non-clustered usage.
+global finish_epoch: event(ss: SumStat);
+
+function key2str(key: Key): string
+	{
+	local out = "";
+	if ( key?$host )
+		out = fmt("%shost=%s", out, key$host);
+	if ( key?$str )
+		out = fmt("%s%sstr=%s", out, |out|==0 ? "" : ", ", key$str);
+	return fmt("sumstats_key(%s)", out);
+	}
+
+function init_resultval(r: Reducer): ResultVal
+	{
+	local rv: ResultVal = [$begin=network_time(), $end=network_time()];
+	hook init_resultval_hook(r, rv);
+	return rv;
+	}
+
+function compose_resultvals(rv1: ResultVal, rv2: ResultVal): ResultVal
+	{
+	local result: ResultVal;
+
+	result$begin = (rv1$begin < rv2$begin) ? rv1$begin : rv2$begin;
+	result$end = (rv1$end > rv2$end) ? rv1$end : rv2$end;
+	result$num = rv1$num + rv2$num;
+
+	# Run the plugin composition hooks.
+	hook compose_resultvals_hook(result, rv1, rv2);
+	return result;
+	}
+
+function compose_results(r1: Result, r2: Result): Result
+	{
+	local result: Result = table();
+
+	if ( |r1| > |r2| )
+		{
+		for ( data_id in r1 )
+			{
+			if ( data_id in r2 )
+				result[data_id] = compose_resultvals(r1[data_id], r2[data_id]);
+			else
+				result[data_id] = r1[data_id];
+			}
+		}
+	else
+		{
+		for ( data_id in r2 )
+			{
+			if ( data_id in r1 )
+				result[data_id] = compose_resultvals(r1[data_id], r2[data_id]);
+			else
+				result[data_id] = r2[data_id];
+			}
+		}
+
+	return result;
+	}
+
+
+function reset(ss: SumStat)
+	{
+	if ( ss$id in result_store )
+		delete result_store[ss$id];
+
+	result_store[ss$id] = table();
+
+	if ( ss?$threshold || ss?$threshold_series )
+		{
+		threshold_tracker[ss$id] = table();
+		event SumStats::thresholds_reset(ss$id);
+		}
+	}
+
+function create(ss: SumStat)
+	{
+	if ( (ss?$threshold || ss?$threshold_series) && ! ss?$threshold_val )
+		{
+		Reporter::error("SumStats given a threshold with no $threshold_val function");
+		}
+
+	if ( ! ss?$id )
+		ss$id=unique_id("");
+	threshold_tracker[ss$id] = table();
+	stats_store[ss$id] = ss;
+
+	for ( reducer in ss$reducers )
+		{
+		reducer$sid = ss$id;
+		if ( reducer$stream !in reducer_store )
+			reducer_store[reducer$stream] = set();
+		add reducer_store[reducer$stream][reducer];
+		}
+
+	reset(ss);
+	schedule ss$epoch { SumStats::finish_epoch(ss) };
+	}
+
+function observe(id: string, key: Key, obs: Observation)
+	{
+	if ( id !in reducer_store )
+		return;
+
+	# Try to add the data to all of the defined reducers.
+	for ( r in reducer_store[id] )
+		{
+		if ( r?$normalize_key )
+			key = r$normalize_key(copy(key));
+
+		# If this reducer has a predicate, run the predicate
+		# and skip this key if the predicate return false.
+		if ( r?$pred && ! r$pred(key, obs) )
+			next;
+
+		local ss = stats_store[r$sid];
+
+		# If there is a threshold and no epoch_finished callback
+		# we don't need to continue counting since the data will
+		# never be accessed.  This was leading
+		# to some state management issues when measuring
+		# uniqueness.
+		# NOTE: this optimization could need removed in the
+		#       future if on demand access is provided to the
+		#       SumStats results.
+		if ( ! ss?$epoch_finished &&
+		     r$sid in threshold_tracker &&
+		     key in threshold_tracker[r$sid] &&
+		     ( ss?$threshold &&
+		       threshold_tracker[r$sid][key]$is_threshold_crossed ) ||
+		     ( ss?$threshold_series &&
+		       threshold_tracker[r$sid][key]$threshold_series_index+1 == |ss$threshold_series| ) )
+			next;
+
+		if ( r$sid !in result_store )
+			result_store[ss$id] = table();
+		local results = result_store[r$sid];
+
+		if ( key !in results )
+			results[key] = table();
+		local result = results[key];
+
+		if ( id !in result )
+			result[id] = init_resultval(r);
+		local result_val = result[id];
+
+		++result_val$num;
+		# Continually update the $end field.
+		result_val$end=network_time();
+
+		# If a string was given, fall back to 1.0 as the value.
+		local val = 1.0;
+		if ( obs?$num || obs?$dbl )
+			val = obs?$dbl ? obs$dbl : obs$num;
+
+		hook observe_hook(r, val, obs, result_val);
+		data_added(ss, key, result);
+		}
+	}
+
+# This function checks if a threshold has been crossed.  It is also used as a method to implement
+# mid-break-interval threshold crossing detection for cluster deployments.
+function check_thresholds(ss: SumStat, key: Key, result: Result, modify_pct: double): bool
+	{
+	if ( ! (ss?$threshold || ss?$threshold_series) )
+		return F;
+
+	# Add in the extra ResultVals to make threshold_vals easier to write.
+	if ( |ss$reducers| != |result| )
+		{
+		for ( reducer in ss$reducers )
+			{
+			if ( reducer$stream !in result )
+				result[reducer$stream] = init_resultval(reducer);
+			}
+		}
+
+	local watch = ss$threshold_val(key, result);
+
+	if ( modify_pct < 1.0 && modify_pct > 0.0 )
+		watch = double_to_count(floor(watch/modify_pct));
+
+	if ( ss$id !in threshold_tracker )
+		threshold_tracker[ss$id] = table();
+	local t_tracker = threshold_tracker[ss$id];
+
+	if ( key !in t_tracker )
+		{
+		local ttmp: Thresholding;
+		t_tracker[key] = ttmp;
+		}
+	local tt = t_tracker[key];
+
+	if ( ss?$threshold && ! tt$is_threshold_crossed && watch >= ss$threshold )
+		{
+		# Value crossed the threshold.
+		return T;
+		}
+
+	if ( ss?$threshold_series &&
+	     |ss$threshold_series| >= tt$threshold_series_index &&
+	     watch >= ss$threshold_series[tt$threshold_series_index] )
+		{
+		# A threshold series was given and the value crossed the next
+		# value in the series.
+		return T;
+		}
+
+	return F;
+	}
+
+function threshold_crossed(ss: SumStat, key: Key, result: Result)
+	{
+	# If there is no callback, there is no point in any of this.
+	if ( ! ss?$threshold_crossed )
+		return;
+
+	# Add in the extra ResultVals to make threshold_crossed callbacks easier to write.
+	if ( |ss$reducers| != |result| )
+		{
+		for ( reducer in ss$reducers )
+			{
+			if ( reducer$stream !in result )
+				result[reducer$stream] = init_resultval(reducer);
+			}
+		}
+
+	ss$threshold_crossed(key, result);
+	local tt = threshold_tracker[ss$id][key];
+	tt$is_threshold_crossed = T;
+
+	# Bump up to the next threshold series index if a threshold series is being used.
+	if ( ss?$threshold_series )
+		++tt$threshold_series_index;
+	}
+
--- a/scripts/base/frameworks/sumstats/non-cluster.bro
+++ b/scripts/base/frameworks/sumstats/non-cluster.bro
@ -0,0 +1,24 @@
+@load ./main
+
+module SumStats;
+
+event SumStats::finish_epoch(ss: SumStat)
+	{
+	if ( ss$id in result_store )
+		{
+		local data = result_store[ss$id];
+		if ( ss?$epoch_finished )
+			ss$epoch_finished(data);
+
+		reset(ss);
+		}
+
+	schedule ss$epoch { SumStats::finish_epoch(ss) };
+	}
+
+
+function data_added(ss: SumStat, key: Key, result: Result)
+	{
+	if ( check_thresholds(ss, key, result, 1.0) )
+		threshold_crossed(ss, key, result);
+	}
--- a/scripts/base/frameworks/sumstats/plugins/load.bro
+++ b/scripts/base/frameworks/sumstats/plugins/load.bro
@ -0,0 +1,8 @@
+@load ./average
+@load ./max
+@load ./min
+@load ./sample
+@load ./std-dev
+@load ./sum
+@load ./unique
+@load ./variance
--- a/scripts/base/frameworks/sumstats/plugins/average.bro
+++ b/scripts/base/frameworks/sumstats/plugins/average.bro
@ -0,0 +1,36 @@
+@load base/frameworks/sumstats/main
+
+module SumStats;
+
+export {
+	redef enum Calculation += {
+		## Calculate the average of the values.
+		AVERAGE
+	};
+
+	redef record ResultVal += {
+		## For numeric data, this calculates the average of all values.
+		average: double &optional;
+	};
+}
+
+hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
+	{
+	if ( AVERAGE in r$apply )
+		{
+		if ( ! rv?$average )
+			rv$average = val;
+		else
+			rv$average += (val - rv$average) / rv$num;
+		}
+	}
+
+hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
+	{
+	if ( rv1?$average && rv2?$average )
+		result$average = ((rv1$average*rv1$num) + (rv2$average*rv2$num))/(rv1$num+rv2$num);
+	else if ( rv1?$average )
+		result$average = rv1$average;
+	else if ( rv2?$average )
+		result$average = rv2$average;
+	}
--- a/scripts/base/frameworks/sumstats/plugins/max.bro
+++ b/scripts/base/frameworks/sumstats/plugins/max.bro
@ -0,0 +1,38 @@
+@load base/frameworks/sumstats/main
+
+module SumStats;
+
+export {
+	redef enum Calculation += {
+		## Find the maximum value.
+		MAX
+	};
+
+	redef record ResultVal += {
+		## For numeric data, this tracks the maximum value given.
+		max: double &optional;
+	};
+}
+
+hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
+	{
+	if ( MAX in r$apply )
+		{
+		if ( ! rv?$max )
+			rv$max = val;
+		else if ( val > rv$max )
+			rv$max = val;
+		}
+	}
+
+hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
+	{
+	if ( rv1?$max && rv2?$max )
+		result$max = (rv1$max > rv2$max) ? rv1$max : rv2$max;
+	else if ( rv1?$max )
+		result$max = rv1$max;
+	else if ( rv2?$max )
+		result$max = rv2$max;
+	}
+
+
--- a/scripts/base/frameworks/sumstats/plugins/min.bro
+++ b/scripts/base/frameworks/sumstats/plugins/min.bro
@ -0,0 +1,36 @@
+@load base/frameworks/sumstats/main
+
+module SumStats;
+
+export {
+	redef enum Calculation += {
+		## Find the minimum value.
+		MIN
+	};
+
+	redef record ResultVal += {
+		## For numeric data, this tracks the minimum value given.
+		min: double &optional;
+	};
+}
+
+hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
+	{
+	if ( MIN in r$apply )
+		{
+		if ( ! rv?$min )
+			rv$min = val;
+		else if ( val < rv$min )
+			rv$min = val;
+		}
+	}
+
+hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
+	{
+	if ( rv1?$min && rv2?$min )
+		result$min = (rv1$min < rv2$min) ? rv1$min : rv2$min;
+	else if ( rv1?$min )
+		result$min = rv1$min;
+	else if ( rv2?$min )
+		result$min = rv2$min;
+	}
--- a/scripts/base/frameworks/sumstats/plugins/sample.bro
+++ b/scripts/base/frameworks/sumstats/plugins/sample.bro
@ -0,0 +1,49 @@
+@load base/frameworks/sumstats/main
+@load base/utils/queue
+
+module SumStats;
+
+export {
+	redef record Reducer += {
+		## A number of sample Observations to collect.
+		samples: count &default=0;
+	};
+
+	redef record ResultVal += {
+		## This is the queue where samples are maintained.  Use the
+		## :bro:see:`SumStats::get_samples` function to get a vector of the samples.
+		samples: Queue::Queue &optional;
+	};
+
+	## Get a vector of sample Observation values from a ResultVal.
+	global get_samples: function(rv: ResultVal): vector of Observation;
+}
+
+function get_samples(rv: ResultVal): vector of Observation
+	{
+	local s: vector of Observation = vector();
+	if ( rv?$samples )
+		Queue::get_vector(rv$samples, s);
+	return s;
+	}
+
+hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
+	{
+	if ( r$samples > 0 )
+		{
+		if ( ! rv?$samples )
+			rv$samples = Queue::init([$max_len=r$samples]);
+		Queue::put(rv$samples, obs);
+		}
+	}
+
+hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
+	{
+	# Merge $samples
+	if ( rv1?$samples && rv2?$samples )
+		result$samples = Queue::merge(rv1$samples, rv2$samples);
+	else if ( rv1?$samples )
+		result$samples = rv1$samples;
+	else if ( rv2?$samples )
+		result$samples = rv2$samples;
+	}
--- a/scripts/base/frameworks/sumstats/plugins/std-dev.bro
+++ b/scripts/base/frameworks/sumstats/plugins/std-dev.bro
@ -0,0 +1,34 @@
+@load base/frameworks/sumstats/main
+@load ./variance
+
+module SumStats;
+
+export {
+	redef enum Calculation += {
+		## Find the standard deviation of the values.
+		STD_DEV
+	};
+
+	redef record ResultVal += {
+		## For numeric data, this calculates the standard deviation.
+		std_dev: double &default=0.0;
+	};
+}
+
+function calc_std_dev(rv: ResultVal)
+	{
+	if ( rv?$variance )
+		rv$std_dev = sqrt(rv$variance);
+	}
+
+# This depends on the variance plugin which uses priority -5
+hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal) &priority=-10
+	{
+	if ( STD_DEV in r$apply )
+		calc_std_dev(rv);
+	}
+
+hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal) &priority=-10
+	{
+	calc_std_dev(result);
+	}
--- a/scripts/base/frameworks/sumstats/plugins/sum.bro
+++ b/scripts/base/frameworks/sumstats/plugins/sum.bro
@ -0,0 +1,51 @@
+@load base/frameworks/sumstats/main
+
+module SumStats;
+
+export {
+	redef enum Calculation += {
+		## Sums the values given.  For string values,
+		## this will be the number of strings given.
+		SUM
+	};
+
+	redef record ResultVal += {
+		## For numeric data, this tracks the sum of all values.
+		sum: double &default=0.0;
+	};
+
+	type threshold_function: function(key: SumStats::Key, result: SumStats::Result): count;
+	global sum_threshold: function(data_id: string): threshold_function;
+}
+
+function sum_threshold(data_id: string): threshold_function
+	{
+	return function(key: SumStats::Key, result: SumStats::Result): count
+		{
+		print fmt("data_id: %s", data_id);
+		print result;
+		return double_to_count(result[data_id]$sum);
+		};
+	}
+
+hook init_resultval_hook(r: Reducer, rv: ResultVal)
+	{
+	if ( SUM in r$apply && ! rv?$sum )
+		rv$sum = 0;
+	}
+
+hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
+	{
+	if ( SUM in r$apply )
+		rv$sum += val;
+	}
+
+hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
+	{
+	if ( rv1?$sum || rv2?$sum )
+		{
+		result$sum = rv1?$sum ? rv1$sum : 0;
+		if ( rv2?$sum )
+			result$sum += rv2$sum;
+		}
+	}
--- a/scripts/base/frameworks/sumstats/plugins/unique.bro
+++ b/scripts/base/frameworks/sumstats/plugins/unique.bro
@ -0,0 +1,53 @@
+@load base/frameworks/sumstats/main
+
+module SumStats;
+
+export {
+	redef enum Calculation += {
+		## Calculate the number of unique values.
+		UNIQUE
+	};
+
+	redef record ResultVal += {
+		## If cardinality is being tracked, the number of unique
+		## items is tracked here.
+		unique: count &default=0;
+	};
+}
+
+redef record ResultVal += {
+	# Internal use only.  This is not meant to be publically available
+	# because we don't want to trust that we can inspect the values
+	# since we will like move to a probalistic data structure in the future.
+	# TODO: in the future this will optionally be a hyperloglog structure
+	unique_vals: set[Observation] &optional;
+};
+
+hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
+	{
+	if ( UNIQUE in r$apply )
+		{
+		if ( ! rv?$unique_vals )
+			rv$unique_vals=set();
+		add rv$unique_vals[obs];
+		rv$unique = |rv$unique_vals|;
+		}
+	}
+
+hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
+	{
+	if ( rv1?$unique_vals || rv2?$unique_vals )
+		{
+		if ( rv1?$unique_vals )
+			result$unique_vals = rv1$unique_vals;
+
+		if ( rv2?$unique_vals )
+			if ( ! result?$unique_vals )
+				result$unique_vals = rv2$unique_vals;
+			else
+				for ( val2 in rv2$unique_vals )
+					add result$unique_vals[val2];
+
+		result$unique = |result$unique_vals|;
+		}
+	}
--- a/scripts/base/frameworks/sumstats/plugins/variance.bro
+++ b/scripts/base/frameworks/sumstats/plugins/variance.bro
@ -0,0 +1,69 @@
+@load base/frameworks/sumstats/main
+@load ./average
+
+module SumStats;
+
+export {
+	redef enum Calculation += {
+		## Find the variance of the values.
+		VARIANCE
+	};
+
+	redef record ResultVal += {
+		## For numeric data, this calculates the variance.
+		variance: double &optional;
+	};
+}
+
+redef record ResultVal += {
+	# Internal use only.  Used for incrementally calculating variance.
+	prev_avg: double &optional;
+
+	# Internal use only.  For calculating incremental variance.
+	var_s: double &default=0.0;
+};
+
+function calc_variance(rv: ResultVal)
+	{
+	rv$variance = (rv$num > 1) ? rv$var_s/(rv$num-1) : 0.0;
+	}
+
+# Reduced priority since this depends on the average
+hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal) &priority=-5
+	{
+	if ( VARIANCE in r$apply )
+		{
+		if ( rv$num > 1 )
+			rv$var_s += ((val - rv$prev_avg) * (val - rv$average));
+
+		calc_variance(rv);
+		rv$prev_avg = rv$average;
+		}
+	}
+
+# Reduced priority since this depends on the average
+hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal) &priority=-5
+	{
+	if ( rv1?$var_s && rv1?$average &&
+	     rv2?$var_s && rv2?$average )
+		{
+		local rv1_avg_sq = (rv1$average - result$average);
+		rv1_avg_sq = rv1_avg_sq*rv1_avg_sq;
+		local rv2_avg_sq = (rv2$average - result$average);
+		rv2_avg_sq = rv2_avg_sq*rv2_avg_sq;
+		result$var_s = rv1$num*(rv1$var_s/rv1$num + rv1_avg_sq) + rv2$num*(rv2$var_s/rv2$num + rv2_avg_sq);
+		}
+	else if ( rv1?$var_s )
+		result$var_s = rv1$var_s;
+	else if ( rv2?$var_s )
+		result$var_s = rv2$var_s;
+
+	if ( rv1?$prev_avg && rv2?$prev_avg )
+		result$prev_avg = ((rv1$prev_avg*rv1$num) + (rv2$prev_avg*rv2$num))/(rv1$num+rv2$num);
+	else if ( rv1?$prev_avg )
+		result$prev_avg = rv1$prev_avg;
+	else if ( rv2?$prev_avg )
+		result$prev_avg = rv2$prev_avg;
+
+	calc_variance(result);
+	}
--- a/scripts/base/init-default.bro
+++ b/scripts/base/init-default.bro
@ -12,8 +12,10 @@
@load base/utils/numbers
@load base/utils/paths
@load base/utils/patterns
+@load base/utils/queue
@load base/utils/strings
@load base/utils/thresholds
+@load base/utils/time
@load base/utils/urls

 # This has some deep interplay between types and BiFs so it's 
@ -27,9 +29,9 @@
@load base/frameworks/communication
@load base/frameworks/control
@load base/frameworks/cluster
-@load base/frameworks/metrics
@load base/frameworks/intel
@load base/frameworks/reporter
+@load base/frameworks/sumstats
@load base/frameworks/tunnels

@load base/protocols/conn
--- a/scripts/base/protocols/ftp/main.bro
+++ b/scripts/base/protocols/ftp/main.bro
@ -56,10 +56,10 @@ export {
 		tags:             set[string] &log &default=set();
 		
 		## Current working directory that this session is in.  By making
-		## the default value '/.', we can indicate that unless something
+		## the default value '.', we can indicate that unless something
 		## more concrete is discovered that the existing but unknown
 		## directory is ok to use.
-		cwd:                string  &default="/.";
+		cwd:                string  &default=".";
 		
 		## Command that is currently waiting for a response.
 		cmdarg:             CmdArg  &optional;
@ -172,7 +172,13 @@ function ftp_message(s: Info)
 		
 		local arg = s$cmdarg$arg;
 		if ( s$cmdarg$cmd in file_cmds )
-			arg = fmt("ftp://%s%s", addr_to_uri(s$id$resp_h), build_path_compressed(s$cwd, arg));
+			{
+			local comp_path = build_path_compressed(s$cwd, arg);
+			if ( comp_path[0] != "/" )
+				comp_path = cat("/", comp_path);
+
+			arg = fmt("ftp://%s%s", addr_to_uri(s$id$resp_h), comp_path);
+			}
 		
 		s$ts=s$cmdarg$ts;
 		s$command=s$cmdarg$cmd;
@ -240,16 +246,13 @@ event ftp_request(c: connection, command: string, arg: string) &priority=5

 event ftp_reply(c: connection, code: count, msg: string, cont_resp: bool) &priority=5
 	{
-	# TODO: figure out what to do with continued FTP response (not used much)
-	#if ( cont_resp ) return;
-
-	local id = c$id;
 	set_ftp_session(c);
-	
 	c$ftp$cmdarg = get_pending_cmd(c$ftp$pending_commands, code, msg);
-	
 	c$ftp$reply_code = code;
 	c$ftp$reply_msg = msg;
+
+	# TODO: figure out what to do with continued FTP response (not used much)
+	if ( cont_resp ) return;
 	
 	# TODO: do some sort of generic clear text login processing here.
 	local response_xyz = parse_ftp_reply_code(code);
@ -278,10 +281,10 @@ event ftp_reply(c: connection, code: count, msg: string, cont_resp: bool) &prior
 			c$ftp$passive=T;
 			
 			if ( code == 229 && data$h == [::] )
-				data$h = id$resp_h;
+				data$h = c$id$resp_h;
 			
 			ftp_data_expected[data$h, data$p] = c$ftp;
-			expect_connection(id$orig_h, data$h, data$p, ANALYZER_FILE, 5mins);
+			expect_connection(c$id$orig_h, data$h, data$p, ANALYZER_FILE, 5mins);
 			}
 		else
 			{
--- a/scripts/base/protocols/ssh/main.bro
+++ b/scripts/base/protocols/ssh/main.bro
@ -1,10 +1,11 @@
-##! Base SSH analysis script.  The heuristic to blindly determine success or 
+##! Base SSH analysis script.  The heuristic to blindly determine success or
 ##! failure for SSH connections is implemented here.  At this time, it only
 ##! uses the size of the data being returned from the server to make the
-##! heuristic determination about success of the connection.  
+##! heuristic determination about success of the connection.
 ##! Requires that :bro:id:`use_conn_size_analyzer` is set to T!  The heuristic
 ##! is not attempted if the connection size analyzer isn't enabled.

+@load base/protocols/conn
@load base/frameworks/notice
@load base/utils/site
@load base/utils/thresholds
@ -16,12 +17,6 @@ module SSH;
 export {
 	## The SSH protocol logging stream identifier.
 	redef enum Log::ID += { LOG };
-	
-	redef enum Notice::Type += { 
-		## Indicates that a heuristically detected "successful" SSH 
-		## authentication occurred.
-		Login 
-	};

 	type Info: record {
 		## Time when the SSH connection began.
@ -30,10 +25,10 @@ export {
 		uid:             string       &log;
 		## The connection's 4-tuple of endpoint addresses/ports.
 		id:              conn_id      &log;
-		## Indicates if the login was heuristically guessed to be "success"
-		## or "failure".
-		status:          string       &log &optional;
-		## Direction of the connection.  If the client was a local host 
+		## Indicates if the login was heuristically guessed to be "success",
+		## "failure", or "undetermined".
+		status:          string       &log &default="undetermined";
+		## Direction of the connection.  If the client was a local host
 		## logging into an external host, this would be OUTBOUND. INBOUND
 		## would be set for the opposite situation.
 		# TODO: handle local-local and remote-remote better.
@ -43,33 +38,33 @@ export {
 		## Software string from the server.
 		server:          string       &log &optional;
 		## Amount of data returned from the server. This is currently
-		## the only measure of the success heuristic and it is logged to 
+		## the only measure of the success heuristic and it is logged to
 		## assist analysts looking at the logs to make their own determination
 		## about the success on a case-by-case basis.
 		resp_size:       count        &log &default=0;
-		
+
 		## Indicate if the SSH session is done being watched.
 		done:            bool         &default=F;
 	};
-	
-	## The size in bytes of data sent by the server at which the SSH 
+
+	## The size in bytes of data sent by the server at which the SSH
 	## connection is presumed to be successful.
-	const authentication_data_size = 5500 &redef;
-	
+	const authentication_data_size = 4000 &redef;
+
 	## If true, we tell the event engine to not look at further data
 	## packets after the initial SSH handshake. Helps with performance
 	## (especially with large file transfers) but precludes some
-	## kinds of analyses (e.g., tracking connection size).
+	## kinds of analyses.
 	const skip_processing_after_detection = F &redef;
-	
+
 	## Event that is generated when the heuristic thinks that a login
 	## was successful.
 	global heuristic_successful_login: event(c: connection);
-	
+
 	## Event that is generated when the heuristic thinks that a login
 	## failed.
 	global heuristic_failed_login: event(c: connection);
-	
+
 	## Event that can be handled to access the :bro:type:`SSH::Info`
 	## record as it is sent on to the logging framework.
 	global log_ssh: event(rec: Info);
@ -104,55 +99,61 @@ function set_session(c: connection)

 function check_ssh_connection(c: connection, done: bool)
 	{
-	# If done watching this connection, just return.
+	# If already done watching this connection, just return.
 	if ( c$ssh$done )
 		return;
-	
-	# Make sure conn_size_analyzer is active by checking 
-	# resp$num_bytes_ip.  In general it should always be active though.
-	if ( ! c$resp?$num_bytes_ip )
-		return;
-	
-	# Remove the IP and TCP header length from the total size.
-	# TODO: Fix for IPv6.  This whole approach also seems to break in some 
-	#       cases where there are more header bytes than num_bytes_ip.
-	local header_bytes = c$resp$num_pkts*32 + c$resp$num_pkts*20;
-	local server_bytes = c$resp$num_bytes_ip;
-	if ( server_bytes >= header_bytes )
-		server_bytes = server_bytes - header_bytes;
-	else
-		server_bytes = c$resp$size;
-	
-	# If this is still a live connection and the byte count has not crossed 
-	# the threshold, just return and let the rescheduled check happen later.
-	if ( ! done && server_bytes < authentication_data_size )
-		return;

-	# Make sure the server has sent back more than 50 bytes to filter out
-	# hosts that are just port scanning.  Nothing is ever logged if the server
-	# doesn't send back at least 50 bytes.
-	if ( server_bytes < 50 )
-		return;
-
-	c$ssh$direction = Site::is_local_addr(c$id$orig_h) ? OUTBOUND : INBOUND;
-	c$ssh$resp_size = server_bytes;
-	
-	if ( server_bytes < authentication_data_size )
+	if ( done )
 		{
-		c$ssh$status  = "failure";
-		event SSH::heuristic_failed_login(c);
+		# If this connection is done, then we can look to see if
+		# this matches the conditions for a failed login.  Failed
+		# logins are only detected at connection state removal.
+
+		if ( # Require originators to have sent at least 50 bytes.
+		     c$orig$size > 50 &&
+		     # Responders must be below 4000 bytes.
+		     c$resp$size < 4000 &&
+		     # Responder must have sent fewer than 40 packets.
+		     c$resp$num_pkts < 40 &&
+		     # If there was a content gap we can't reliably do this heuristic.
+		     c?$conn && c$conn$missed_bytes == 0)# &&
+		     # Only "normal" connections can count.
+		     #c$conn?$conn_state && c$conn$conn_state in valid_states )
+			{
+			c$ssh$status = "failure";
+			event SSH::heuristic_failed_login(c);
+			}
+
+		if ( c$resp$size > authentication_data_size )
+			{
+			c$ssh$status = "success";
+			event SSH::heuristic_successful_login(c);
+			}
 		}
 	else
-		{ 
-		# presumed successful login
-		c$ssh$status = "success";
-		event SSH::heuristic_successful_login(c);
+		{
+		# If this connection is still being tracked, then it's possible
+		# to watch for it to be a successful connection.
+		if ( c$resp$size > authentication_data_size )
+			{
+			c$ssh$status = "success";
+			event SSH::heuristic_successful_login(c);
+			}
+		else
+			# This connection must be tracked longer.  Let the scheduled
+			# check happen again.
+			return;
 		}
-	
+
+	# Set the direction for the log.
+	c$ssh$direction = Site::is_local_addr(c$id$orig_h) ? OUTBOUND : INBOUND;
+
 	# Set the "done" flag to prevent the watching event from rescheduling
 	# after detection is done.
 	c$ssh$done=T;
-	
+
+	Log::write(SSH::LOG, c$ssh);
+
 	if ( skip_processing_after_detection )
 		{
 		# Stop watching this connection, we don't care about it anymore.
@ -161,18 +162,6 @@ function check_ssh_connection(c: connection, done: bool)
 		}
 	}

-event SSH::heuristic_successful_login(c: connection) &priority=-5
-	{
-	NOTICE([$note=Login, 
-	        $msg="Heuristically detected successful SSH login.",
-	        $conn=c]);
-	
-	Log::write(SSH::LOG, c$ssh);
-	}
-event SSH::heuristic_failed_login(c: connection) &priority=-5
-	{
-	Log::write(SSH::LOG, c$ssh);
-	}

 event connection_state_remove(c: connection) &priority=-5
 	{
@ -197,12 +186,12 @@ event ssh_server_version(c: connection, version: string) &priority=5
 	set_session(c);
 	c$ssh$server = version;
 	}
-	
+
 event ssh_client_version(c: connection, version: string) &priority=5
 	{
 	set_session(c);
 	c$ssh$client = version;
-	
+
 	# The heuristic detection for SSH relies on the ConnSize analyzer.
 	# Don't do the heuristics if it's disabled.
 	if ( use_conn_size_analyzer )
--- a/scripts/base/utils/paths.bro
+++ b/scripts/base/utils/paths.bro
@ -19,7 +19,7 @@ function extract_path(input: string): string
 	}

 ## Compresses a given path by removing '..'s and the parent directory it
-## references and also removing '/'s.
+## references and also removing dual '/'s and extraneous '/./'s.
 ## dir: a path string, either relative or absolute
 ## Returns: a compressed version of the input path
 function compress_path(dir: string): string
@ -41,7 +41,7 @@ function compress_path(dir: string): string
 		return compress_path(dir);
 		}

-	const multislash_sep = /(\/){2,}/;
+	const multislash_sep = /(\/\.?){2,}/;
 	parts = split_all(dir, multislash_sep);
 	for ( i in parts )
 		if ( i % 2 == 0 )
--- a/scripts/base/utils/queue.bro
+++ b/scripts/base/utils/queue.bro
@ -0,0 +1,143 @@
+##! A FIFO queue.
+
+module Queue;
+
+export {
+	## Settings for initializing the queue.
+	type Settings: record {
+		## If a maximum length is set for the queue
+		## it will maintain itself at that
+		## maximum length automatically.
+		max_len: count &optional;
+	};
+
+	## The internal data structure for the queue.
+	type Queue: record {};
+
+	## Initialize a queue record structure.
+	##
+	## s: A :bro:record:`Settings` record configuring the queue.
+	##
+	## Returns: An opaque queue record.
+	global init:       function(s: Settings): Queue;
+
+	## Put a string onto the beginning of a queue.
+	##
+	## q: The queue to put the value into.
+	##
+	## val: The value to insert into the queue.
+	global put:       function(q: Queue, val: any);
+
+	## Get a string from the end of a queue.
+	##
+	## q: The queue to get the string from.
+	##
+	## Returns: The value gotten from the queue.
+	global get:        function(q: Queue): any;
+
+	## Merge two queue's together.  If any settings are applied
+	## to the queues, the settings from q1 are used for the new
+	## merged queue.
+	##
+	## q1: The first queue.  Settings are taken from here.
+	##
+	## q2: The second queue.
+	##
+	## Returns: A new queue from merging the other two together.
+	global merge:      function(q1: Queue, q2: Queue): Queue;
+
+	## Get the number of items in a queue.
+	##
+	## q: The queue.
+	##
+	## Returns: The length of the queue.
+	global len:     function(q: Queue): count;
+
+	## Get the contents of the queue as a vector.
+	##
+	## q: The queue.
+	##
+	## ret: A vector containing the
+	##      current contents of q as the type of ret.
+	global get_vector: function(q: Queue, ret: vector of any);
+
+}
+
+redef record Queue += {
+	# Indicator for if the queue was appropriately initialized.
+	initialized: bool                   &default=F;
+	# The values are stored here.
+	vals:        table[count] of any &optional;
+	# Settings for the queue.
+	settings:    Settings               &optional;
+	# The top value in the vals table.
+	top:         count                  &default=0;
+	# The bottom value in the vals table.
+	bottom:      count                  &default=0;
+	# The number of bytes in the queue.
+	size:        count                  &default=0;
+};
+
+function init(s: Settings): Queue
+	{
+	local q: Queue;
+	q$vals=table();
+	q$settings = copy(s);
+	q$initialized=T;
+	return q;
+	}
+
+function put(q: Queue, val: any)
+	{
+	if ( q$settings?$max_len && len(q) >= q$settings$max_len )
+		get(q);
+	q$vals[q$top] = val;
+	++q$top;
+	}
+
+function get(q: Queue): any
+	{
+	local ret = q$vals[q$bottom];
+	delete q$vals[q$bottom];
+	++q$bottom;
+	return ret;
+	}
+
+function merge(q1: Queue, q2: Queue): Queue
+	{
+	local ret = init(q1$settings);
+	local i = q1$bottom;
+	local j = q2$bottom;
+	for ( ignored_val in q1$vals )
+		{
+		if ( i in q1$vals )
+			put(ret, q1$vals[i]);
+		if ( j in q2$vals )
+			put(ret, q2$vals[j]);
+		++i;
+		++j;
+		}
+	return ret;
+	}
+
+function len(q: Queue): count
+	{
+	return |q$vals|;
+	}
+
+function get_vector(q: Queue, ret: vector of any)
+	{
+	local i = q$bottom;
+	local j = 0;
+	# Really dumb hack, this is only to provide
+	# the iteration for the correct number of
+	# values in q$vals.
+	for ( ignored_val in q$vals )
+		{
+		if ( i >= q$top )
+			break;
+
+		ret[j] = q$vals[i];
+		++j; ++i;
+		}
+	}
--- a/scripts/base/utils/time.bro
+++ b/scripts/base/utils/time.bro
@ -0,0 +1,9 @@
+
+## Given an interval, returns a string of the form 3m34s to
+## give a minimalized human readable string for the minutes
+## and seconds represented by the interval.
+function duration_to_mins_secs(dur: interval): string
+	{
+	local dur_count = double_to_count(interval_to_double(dur));
+	return fmt("%dm%ds", dur_count/60, dur_count%60);
+	}
--- a/scripts/policy/frameworks/metrics/conn-example.bro
+++ b/scripts/policy/frameworks/metrics/conn-example.bro
@ -1,25 +0,0 @@
-##! An example of using the metrics framework to collect connection metrics 
-##! aggregated into /24 CIDR ranges.
-
-@load base/frameworks/metrics
-@load base/utils/site
-
-redef enum Metrics::ID += { 
-	CONNS_ORIGINATED, 
-	CONNS_RESPONDED 
-};
-
-event bro_init()
-	{
-	Metrics::add_filter(CONNS_ORIGINATED, [$aggregation_mask=24, $break_interval=1mins]);
-	
-	# Site::local_nets must be defined in order for this to actually do anything.
-	Metrics::add_filter(CONNS_RESPONDED,  [$aggregation_table=Site::local_nets_table, $break_interval=1mins]);
-	}
-
-event connection_established(c: connection)
-	{
-	Metrics::add_data(CONNS_ORIGINATED, [$host=c$id$orig_h], 1);
-	Metrics::add_data(CONNS_RESPONDED,  [$host=c$id$resp_h], 1);
-	}
-	
--- a/scripts/policy/frameworks/metrics/http-example.bro
+++ b/scripts/policy/frameworks/metrics/http-example.bro
@ -1,37 +0,0 @@
-##! Provides an example of aggregating and limiting collection down to 
-##! only local networks.  Additionally, the status code for the response from
-##! the request is added into the metric.
-
-@load base/frameworks/metrics
-@load base/protocols/http
-@load base/utils/site
-
-redef enum Metrics::ID += {
-	## Measures HTTP requests indexed on both the request host and the response
-	## code from the server.
-	HTTP_REQUESTS_BY_STATUS_CODE,
-
-	## Currently unfinished and not working.
-	HTTP_REQUESTS_BY_HOST_HEADER,
-};
-
-event bro_init()
-	{
-	# TODO: these are waiting on a fix with table vals + records before they will work.
-	#Metrics::add_filter(HTTP_REQUESTS_BY_HOST_HEADER, 
-	#                    [$pred(index: Metrics::Index) = { return Site::is_local_addr(index$host); },
-	#                     $aggregation_mask=24,
-	#                     $break_interval=1min]);
-	
-	# Site::local_nets must be defined in order for this to actually do anything.
-	Metrics::add_filter(HTTP_REQUESTS_BY_STATUS_CODE, [$aggregation_table=Site::local_nets_table,
-	                                                   $break_interval=1min]);
-	}
-
-event HTTP::log_http(rec: HTTP::Info)
-	{
-	if ( rec?$host )
-		Metrics::add_data(HTTP_REQUESTS_BY_HOST_HEADER, [$str=rec$host], 1);
-	if ( rec?$status_code )
-		Metrics::add_data(HTTP_REQUESTS_BY_STATUS_CODE, [$host=rec$id$orig_h, $str=fmt("%d", rec$status_code)], 1);
-	}
--- a/scripts/policy/frameworks/metrics/ssl-example.bro
+++ b/scripts/policy/frameworks/metrics/ssl-example.bro
@ -1,28 +0,0 @@
-##! Provides an example of using the metrics framework to collect the number
-##! of times a specific server name indicator value is seen in SSL session
-##! establishments.  Names ending in google.com are being filtered out as an
-##! example of the predicate based filtering in metrics filters.
-
-@load base/frameworks/metrics
-@load base/protocols/ssl
-
-redef enum Metrics::ID += {
-	SSL_SERVERNAME,
-};
-
-event bro_init()
-	{
-	Metrics::add_filter(SSL_SERVERNAME, 
-		[$name="no-google-ssl-servers",
-		 $pred(index: Metrics::Index) = { 
-		    return (/google\.com$/ !in index$str); 
-		 },
-		 $break_interval=10secs
-		]);
-	}
-
-event SSL::log_ssl(rec: SSL::Info)
-	{
-	if ( rec?$server_name )
-		Metrics::add_data(SSL_SERVERNAME, [$str=rec$server_name], 1);
-	}
--- a/scripts/policy/misc/app-metrics.bro
+++ b/scripts/policy/misc/app-metrics.bro
@ -0,0 +1,109 @@
+@load base/protocols/http
+@load base/protocols/ssl
+@load base/frameworks/sumstats
+
+module AppStats;
+
+export {
+	redef enum Log::ID += { LOG };
+
+	type Info: record {
+		## Timestamp when the log line was finished and written.
+		ts:         time   &log;
+		## Time interval that the log line covers.
+		ts_delta:   interval &log;
+		## The name of the "app", like "facebook" or "netflix".
+		app:        string &log;
+		## The number of unique local hosts using the app.
+		uniq_hosts: count  &log;
+		## The number of hits to the app in total.
+		hits:       count  &log;
+		## The total number of bytes received by users of the app.
+		bytes:      count  &log;
+	};
+
+	## The frequency of logging the stats collected by this script.
+	const break_interval = 15mins &redef;
+}
+
+redef record connection += {
+	resp_hostname: string &optional;
+};
+
+event bro_init() &priority=3
+	{
+	Log::create_stream(AppStats::LOG, [$columns=Info]);
+
+	local r1: SumStats::Reducer = [$stream="apps.bytes", $apply=set(SumStats::SUM)];
+	local r2: SumStats::Reducer = [$stream="apps.hits",  $apply=set(SumStats::UNIQUE)];
+	SumStats::create([$epoch=break_interval,
+	                  $reducers=set(r1, r2),
+	                  $epoch_finished(data: SumStats::ResultTable) =
+	                  	{
+	                  	local l: Info;
+	                  	l$ts = network_time();
+	                  	l$ts_delta = break_interval;
+	                  	for ( key in data )
+	                  		{
+	                  		local result = data[key];
+	                  		l$app        = key$str;
+	                  		l$bytes      = double_to_count(floor(result["apps.bytes"]$sum));
+	                  		l$hits       = result["apps.hits"]$num;
+	                  		l$uniq_hosts = result["apps.hits"]$unique;
+	                  		Log::write(LOG, l);
+	                  		}
+	                  	}]);
+	}
+
+function add_sumstats(id: conn_id, hostname: string, size: count)
+	{
+	if ( /\.youtube\.com$/ in hostname && size > 512*1024 )
+		{
+		SumStats::observe("apps.bytes", [$str="youtube"], [$num=size]);
+		SumStats::observe("apps.hits",  [$str="youtube"], [$str=cat(id$orig_h)]);
+		}
+	else if ( /(\.facebook\.com|\.fbcdn\.net)$/ in hostname && size > 20 )
+		{
+		SumStats::observe("apps.bytes", [$str="facebook"], [$num=size]);
+		SumStats::observe("apps.hits",  [$str="facebook"], [$str=cat(id$orig_h)]);
+		}
+	else if ( /\.google\.com$/ in hostname && size > 20 )
+		{
+		SumStats::observe("apps.bytes", [$str="google"], [$num=size]);
+		SumStats::observe("apps.hits",  [$str="google"], [$str=cat(id$orig_h)]);
+		}
+	else if ( /\.nflximg\.com$/ in hostname && size > 200*1024 )
+		{
+		SumStats::observe("apps.bytes", [$str="netflix"], [$num=size]);
+		SumStats::observe("apps.hits",  [$str="netflix"], [$str=cat(id$orig_h)]);
+		}
+	else if ( /\.(pandora|p-cdn)\.com$/ in hostname && size > 512*1024 )
+		{
+		SumStats::observe("apps.bytes", [$str="pandora"], [$num=size]);
+		SumStats::observe("apps.hits",  [$str="pandora"], [$str=cat(id$orig_h)]);
+		}
+	else if ( /\.gmail\.com$/ in hostname && size > 20 )
+		{
+		SumStats::observe("apps.bytes", [$str="gmail"], [$num=size]);
+		SumStats::observe("apps.hits",  [$str="gmail"], [$str=cat(id$orig_h)]);
+		}
+}
+
+
+event ssl_established(c: connection)
+	{
+	if ( c?$ssl && c$ssl?$server_name )
+		c$resp_hostname = c$ssl$server_name;
+	}
+
+event connection_finished(c: connection)
+	{
+	if ( c?$resp_hostname )
+		add_sumstats(c$id, c$resp_hostname, c$resp$size);
+	}
+
+event HTTP::log_http(rec: HTTP::Info)
+	{
+	if( rec?$host )
+		add_sumstats(rec$id, rec$host, rec$response_body_len);
+	}
--- a/scripts/policy/misc/capture-loss.bro
+++ b/scripts/policy/misc/capture-loss.bro
@ -8,7 +8,6 @@
 ##! for a sequence number that's above a gap).

@load base/frameworks/notice
-@load base/frameworks/metrics

 module CaptureLoss;

--- a/scripts/policy/misc/detect-traceroute/load.bro
+++ b/scripts/policy/misc/detect-traceroute/load.bro
@ -0,0 +1 @@
+@load ./main
--- a/scripts/policy/misc/detect-traceroute/detect-low-ttls.sig
+++ b/scripts/policy/misc/detect-traceroute/detect-low-ttls.sig
@ -0,0 +1,9 @@
+signature traceroute-detector-ipv4 {
+	header ip[8] < 10
+	event "match"
+}
+
+signature traceroute-detector-ipv6 {
+	header ip6[7] < 10
+	event "match"
+}
--- a/scripts/policy/misc/detect-traceroute/main.bro
+++ b/scripts/policy/misc/detect-traceroute/main.bro
@ -0,0 +1,98 @@
+##! This script detects a large number of ICMP Time Exceeded messages heading toward
+##! hosts that have sent low TTL packets. It generates a notice when the number of
+##! ICMP Time Exceeded messages for a source-destination pair exceeds a
+##! threshold.
+@load base/frameworks/sumstats
+@load base/frameworks/signatures
+@load-sigs ./detect-low-ttls.sig
+
+redef Signatures::ignored_ids += /traceroute-detector.*/;
+
+module Traceroute;
+
+export {
+	redef enum Log::ID += { LOG };
+
+	redef enum Notice::Type += {
+		## Indicates that a host was seen running traceroutes.  For more
+		## detail about specific traceroutes that we run, refer to the
+		## traceroute.log.
+		Detected
+	};
+
+	## By default this script requires that any host detected running traceroutes
+	## first send low TTL packets (TTL < 10) to the traceroute destination host.
+	## Changing this this setting to `F` will relax the detection a bit by
+	## solely relying on ICMP time-exceeded messages to detect traceroute.
+	const require_low_ttl_packets = T &redef;
+
+	## Defines the threshold for ICMP Time Exceeded messages for a src-dst pair.
+	## This threshold only comes into play after a host is found to be
+	## sending low ttl packets.
+	const icmp_time_exceeded_threshold = 3 &redef;
+
+	## Interval at which to watch for the
+	## :bro:id:`ICMPTimeExceeded::icmp_time_exceeded_threshold` variable to be crossed.
+	## At the end of each interval the counter is reset.
+	const icmp_time_exceeded_interval = 3min &redef;
+
+	## The log record for the traceroute log.
+	type Info: record {
+		## Timestamp
+		ts:    time &log;
+		## Address initiaing the traceroute.
+		src:   addr &log;
+		## Destination address of the traceroute.
+		dst:   addr &log;
+		## Protocol used for the traceroute.
+		proto: string &log;
+	};
+
+	global log_traceroute: event(rec: Traceroute::Info);
+}
+
+event bro_init() &priority=5
+	{
+	Log::create_stream(Traceroute::LOG, [$columns=Info, $ev=log_traceroute]);
+
+	local r1: SumStats::Reducer = [$stream="traceroute.time_exceeded", $apply=set(SumStats::UNIQUE)];
+	local r2: SumStats::Reducer = [$stream="traceroute.low_ttl_packet", $apply=set(SumStats::SUM)];
+	SumStats::create([$epoch=icmp_time_exceeded_interval,
+	                  $reducers=set(r1, r2),
+	                  $threshold_val(key: SumStats::Key, result: SumStats::Result) =
+	                  	{
+	                  	# Give a threshold value of zero depending on if the host
+	                  	# sends a low ttl packet.
+	                  	if ( require_low_ttl_packets && result["traceroute.low_ttl_packet"]$sum == 0 )
+	                  		return 0;
+	                  	else
+	                  		return result["traceroute.time_exceeded"]$unique;
+	                  	},
+	                  $threshold=icmp_time_exceeded_threshold,
+	                  $threshold_crossed(key: SumStats::Key, result: SumStats::Result) =
+	                  	{
+	                  	local parts = split_n(key$str, /-/, F, 2);
+	                  	local src = to_addr(parts[1]);
+	                  	local dst = to_addr(parts[2]);
+	                  	local proto = parts[3];
+	                  	Log::write(LOG, [$ts=network_time(), $src=src, $dst=dst, $proto=proto]);
+	                  	NOTICE([$note=Traceroute::Detected,
+	                  	        $msg=fmt("%s seems to be running traceroute using %s", src, proto),
+	                  	        $src=src,
+	                  	        $identifier=cat(src,proto)]);
+	                  	}]);
+	}
+
+# Low TTL packets are detected with a signature.
+event signature_match(state: signature_state, msg: string, data: string)
+	{
+	if ( state$sig_id == /traceroute-detector.*/ )
+		{
+		SumStats::observe("traceroute.low_ttl_packet", [$str=cat(state$conn$id$orig_h,"-",state$conn$id$resp_h,"-",get_port_transport_proto(state$conn$id$resp_p))], [$num=1]);
+		}
+	}
+
+event icmp_time_exceeded(c: connection, icmp: icmp_conn, code: count, context: icmp_context)
+	{
+	SumStats::observe("traceroute.time_exceeded", [$str=cat(context$id$orig_h,"-",context$id$resp_h,"-",get_port_transport_proto(context$id$resp_p))], [$str=cat(c$id$orig_h)]);
+	}
--- a/scripts/policy/misc/loaded-scripts.bro
+++ b/scripts/policy/misc/loaded-scripts.bro
@ -1,4 +1,5 @@
 ##! Log the loaded scripts.
+@load base/utils/paths

 module LoadedScripts;

@ -34,5 +35,5 @@ event bro_init() &priority=5

 event bro_script_loaded(path: string, level: count)
 	{
-	Log::write(LoadedScripts::LOG, [$name=cat(depth[level], path)]);
+	Log::write(LoadedScripts::LOG, [$name=cat(depth[level], compress_path(path))]);
 	}
--- a/scripts/policy/misc/scan.bro
+++ b/scripts/policy/misc/scan.bro
@ -0,0 +1,193 @@
+##! TCP Scan detection
+##!
+##! ..Authors: Sheharbano Khattak
+##!            Seth Hall
+##!            All the authors of the old scan.bro
+
+@load base/frameworks/notice
+@load base/frameworks/sumstats
+
+@load base/utils/time
+
+module Scan;
+
+export {
+	redef enum Notice::Type += {
+		## Address scans detect that a host appears to be scanning some number of
+		## destinations on a single port. This notice is generated when more than
+		## :bro:id:`addr_scan_threshold` unique hosts are seen over the previous
+		## :bro:id:`addr_scan_interval` time range.
+		Address_Scan,
+
+		## Port scans detect that an attacking host appears to be scanning a
+		## single victim host on several ports.  This notice is generated when
+		## an attacking host attempts to connect to :bro:id:`port_scan_threshold`
+		## unique ports on a single host over the previous
+		## :bro:id:`port_scan_interval` time range.
+		Port_Scan,
+	};
+
+	## Failed connection attempts are tracked over this time interval for the address
+	## scan detection.  A higher interval will detect slower scanners, but may also
+	## yield more false positives.
+	const addr_scan_interval = 5min &redef;
+
+	## Failed connection attempts are tracked over this time interval for the port scan
+	## detection.  A higher interval will detect slower scanners, but may also yield
+	## more false positives.
+	const port_scan_interval = 5min &redef;
+
+	## The threshold of a unique number of hosts a scanning host has to have failed
+	## connections with on a single port.
+	const addr_scan_threshold = 25 &redef;
+
+	## The threshold of a number of unique ports a scanning host has to have failed
+	## connections with on a single victim host.
+	const port_scan_threshold = 15 &redef;
+
+	## Custom thresholds based on service for address scan.  This is primarily
+	## useful for setting reduced thresholds for specific ports.
+	const addr_scan_custom_thresholds: table[port] of count &redef;
+
+	global Scan::addr_scan_policy: hook(scanner: addr, victim: addr, scanned_port: port);
+	global Scan::port_scan_policy: hook(scanner: addr, victim: addr, scanned_port: port);
+}
+
+event bro_init() &priority=5
+	{
+	local r1: SumStats::Reducer = [$stream="scan.addr.fail", $apply=set(SumStats::UNIQUE)];
+	SumStats::create([$epoch=addr_scan_interval,
+	                  $reducers=set(r1),
+	                  $threshold_val(key: SumStats::Key, result: SumStats::Result) =
+	                  	{
+	                  	return double_to_count(result["scan.addr.fail"]$unique);
+	                  	},
+	                  #$threshold_func=check_addr_scan_threshold,
+	                  $threshold=addr_scan_threshold,
+	                  $threshold_crossed(key: SumStats::Key, result: SumStats::Result) =
+	                  	{
+	                  	local r = result["scan.addr.fail"];
+	                  	local side = Site::is_local_addr(key$host) ? "local" : "remote";
+	                  	local dur = duration_to_mins_secs(r$end-r$begin);
+	                  	local message=fmt("%s scanned at least %d unique hosts on port %s in %s", key$host, r$unique, key$str, dur);
+	                  	NOTICE([$note=Address_Scan,
+	                  	        $src=key$host,
+	                  	        $p=to_port(key$str),
+	                  	        $sub=side,
+	                  	        $msg=message,
+	                  	        $identifier=cat(key$host)]);
+	                  	}]);
+
+	# Note: port scans are tracked similar to: table[src_ip, dst_ip] of set(port);
+	local r2: SumStats::Reducer = [$stream="scan.port.fail", $apply=set(SumStats::UNIQUE)];
+	SumStats::create([$epoch=port_scan_interval,
+	                  $reducers=set(r2),
+	                  $threshold_val(key: SumStats::Key, result: SumStats::Result) =
+	                  	{
+	                  	return double_to_count(result["scan.port.fail"]$unique);
+	                  	},
+	                  $threshold=port_scan_threshold,
+	                  $threshold_crossed(key: SumStats::Key, result: SumStats::Result) =
+	                  	{
+	                  	local r = result["scan.port.fail"];
+	                  	local side = Site::is_local_addr(key$host) ? "local" : "remote";
+	                  	local dur = duration_to_mins_secs(r$end-r$begin);
+	                  	local message = fmt("%s scanned at least %d unique ports of host %s in %s", key$host, r$unique, key$str, dur);
+	                  	NOTICE([$note=Port_Scan,
+	                  	        $src=key$host,
+	                  	        $dst=to_addr(key$str),
+	                  	        $sub=side,
+	                  	        $msg=message,
+	                  	        $identifier=cat(key$host)]);
+	                  	}]);
+	}
+
+function add_sumstats(id: conn_id, reverse: bool)
+	{
+	local scanner      = id$orig_h;
+	local victim       = id$resp_h;
+	local scanned_port = id$resp_p;
+
+	if ( reverse )
+		{
+		scanner      = id$resp_h;
+		victim       = id$orig_h;
+		scanned_port = id$orig_p;
+		}
+
+	if ( hook Scan::addr_scan_policy(scanner, victim, scanned_port) )
+		SumStats::observe("scan.addr.fail", [$host=scanner, $str=cat(scanned_port)], [$str=cat(victim)]);
+
+	if ( hook Scan::port_scan_policy(scanner, victim, scanned_port) )
+		SumStats::observe("scan.port.fail", [$host=scanner, $str=cat(victim)], [$str=cat(scanned_port)]);
+	}
+
+function is_failed_conn(c: connection): bool
+	{
+	# Sr || ( (hR || ShR) && (data not sent in any direction) )
+	if ( (c$orig$state == TCP_SYN_SENT && c$resp$state == TCP_RESET) ||
+	     (((c$orig$state == TCP_RESET && c$resp$state == TCP_SYN_ACK_SENT) ||
+	       (c$orig$state == TCP_RESET && c$resp$state == TCP_ESTABLISHED && "S" in c$history )
+	      ) && /[Dd]/ !in c$history )
+	   )
+		return T;
+	return F;
+	}
+
+function is_reverse_failed_conn(c: connection): bool
+	{
+	# reverse scan i.e. conn dest is the scanner
+	# sR || ( (Hr || sHr) && (data not sent in any direction) )
+	if ( (c$resp$state == TCP_SYN_SENT && c$orig$state == TCP_RESET) ||
+	     (((c$resp$state == TCP_RESET && c$orig$state == TCP_SYN_ACK_SENT) ||
+	       (c$resp$state == TCP_RESET && c$orig$state == TCP_ESTABLISHED && "s" in c$history )
+	      ) && /[Dd]/ !in c$history )
+	   )
+		return T;
+	return F;
+	}
+
+## Generated for an unsuccessful connection attempt. This
+## event is raised when an originator unsuccessfully attempted
+## to establish a connection. “Unsuccessful” is defined as at least
+## tcp_attempt_delay seconds having elapsed since the originator first sent a
+## connection establishment packet to the destination without seeing a reply.
+event connection_attempt(c: connection)
+	{
+	local is_reverse_scan = F;
+	if ( "H" in c$history )
+		is_reverse_scan = T;
+
+	add_sumstats(c$id, is_reverse_scan);
+	}
+
+## Generated for a rejected TCP connection. This event is raised when an originator
+## attempted to setup a TCP connection but the responder replied with a RST packet
+## denying it.
+event connection_rejected(c: connection)
+	{
+	local is_reverse_scan = F;
+	if ( "s" in c$history )
+		is_reverse_scan = T;
+
+	add_sumstats(c$id, is_reverse_scan);
+	}
+
+## Generated when an endpoint aborted a TCP connection. The event is raised when
+## one endpoint of an *established* TCP connection aborted by sending a RST packet.
+event connection_reset(c: connection)
+	{
+	if ( is_failed_conn(c) )
+		add_sumstats(c$id, F);
+	else if ( is_reverse_failed_conn(c) )
+		add_sumstats(c$id, T);
+	}
+
+## Generated for each still-open connection when Bro terminates.
+event connection_pending(c: connection)
+	{
+	if ( is_failed_conn(c) )
+		add_sumstats(c$id, F);
+	else if ( is_reverse_failed_conn(c) )
+		add_sumstats(c$id, T);
+	}
--- a/scripts/policy/protocols/ftp/detect-bruteforcing.bro
+++ b/scripts/policy/protocols/ftp/detect-bruteforcing.bro
@ -0,0 +1,59 @@
+##! FTP brute-forcing detector, triggering when too  many rejected usernames or
+##! failed passwords have occured from a single address.
+
+@load base/protocols/ftp
+@load base/frameworks/sumstats
+
+@load base/utils/time
+
+module FTP;
+
+export {
+	redef enum Notice::Type += {
+		## Indicates a host bruteforcing FTP logins by watching for too many
+		## rejected usernames or failed passwords.
+		Bruteforcing
+	};
+
+	## How many rejected usernames or passwords are required before being
+	## considered to be bruteforcing.
+	const bruteforce_threshold = 20 &redef;
+
+	## The time period in which the threshold needs to be crossed before
+	## being reset.
+	const bruteforce_measurement_interval = 15mins &redef;
+}
+
+
+event bro_init()
+	{
+	local r1: SumStats::Reducer = [$stream="ftp.failed_auth", $apply=set(SumStats::UNIQUE)];
+	SumStats::create([$epoch=bruteforce_measurement_interval,
+	                  $reducers=set(r1),
+	                  $threshold_val(key: SumStats::Key, result: SumStats::Result) =
+	                  	{
+	                  	return result["ftp.failed_auth"]$num;
+	                  	},
+	                  $threshold=bruteforce_threshold,
+	                  $threshold_crossed(key: SumStats::Key, result: SumStats::Result) =
+	                  	{
+	                  	local r = result["ftp.failed_auth"];
+	                  	local dur = duration_to_mins_secs(r$end-r$begin);
+	                  	local plural = r$unique>1 ? "s" : "";
+	                  	local message = fmt("%s had %d failed logins on %d FTP server%s in %s", key$host, r$num, r$unique, plural, dur);
+	                  	NOTICE([$note=FTP::Bruteforcing,
+	                  	        $src=key$host,
+	                  	        $msg=message,
+	                  	        $identifier=cat(key$host)]);
+	                  	}]);
+	}
+
+event ftp_reply(c: connection, code: count, msg: string, cont_resp: bool)
+	{
+	local cmd = c$ftp$cmdarg$cmd;
+	if ( cmd == "USER" || cmd == "PASS" )
+		{
+		if ( FTP::parse_ftp_reply_code(code)$x == 5 )
+			SumStats::observe("ftp.failed_auth", [$host=c$id$orig_h], [$str=cat(c$id$resp_h)]);
+		}
+	}
--- a/scripts/policy/protocols/http/detect-sqli.bro
+++ b/scripts/policy/protocols/http/detect-sqli.bro
@ -1,7 +1,7 @@
 ##! SQL injection attack detection in HTTP.

@load base/frameworks/notice
-@load base/frameworks/metrics
+@load base/frameworks/sumstats
@load base/protocols/http

 module HTTP;
@ -14,36 +14,34 @@ export {
 		## it.  This is tracked by IP address as opposed to hostname.
 		SQL_Injection_Victim,
 	};
-	
-	redef enum Metrics::ID += {
-		## Metric to track SQL injection attackers.
-		SQLI_ATTACKER,
-		## Metrics to track SQL injection victims.
-		SQLI_VICTIM,
-	};

 	redef enum Tags += {
 		## Indicator of a URI based SQL injection attack.
 		URI_SQLI,
-		## Indicator of client body based SQL injection attack.  This is 
+		## Indicator of client body based SQL injection attack.  This is
 		## typically the body content of a POST request. Not implemented yet.
 		POST_SQLI,
 		## Indicator of a cookie based SQL injection attack. Not implemented yet.
 		COOKIE_SQLI,
 	};
-	
+
 	## Defines the threshold that determines if an SQL injection attack
-	## is ongoing based on the number of requests that appear to be SQL 
+	## is ongoing based on the number of requests that appear to be SQL
 	## injection attacks.
 	const sqli_requests_threshold = 50 &redef;
-	
+
 	## Interval at which to watch for the
 	## :bro:id:`HTTP::sqli_requests_threshold` variable to be crossed.
 	## At the end of each interval the counter is reset.
 	const sqli_requests_interval = 5min &redef;

+	## Collecting samples will add extra data to notice emails
+	## by collecting some sample SQL injection url paths.  Disable
+	## sample collection by setting this value to 0.
+	const collect_SQLi_samples = 5 &redef;
+
 	## Regular expression is used to match URI based SQL injections.
-	const match_sql_injection_uri = 
+	const match_sql_injection_uri =
 		  /[\?&][^[:blank:]\x00-\x37\|]+?=[\-[:alnum:]%]+([[:blank:]\x00-\x37]|\/\*.*?\*\/)*['"]?([[:blank:]\x00-\x37]|\/\*.*?\*\/|\)?;)+.*?([hH][aA][vV][iI][nN][gG]|[uU][nN][iI][oO][nN]|[eE][xX][eE][cC]|[sS][eE][lL][eE][cC][tT]|[dD][eE][lL][eE][tT][eE]|[dD][rR][oO][pP]|[dD][eE][cC][lL][aA][rR][eE]|[cC][rR][eE][aA][tT][eE]|[iI][nN][sS][eE][rR][tT])([[:blank:]\x00-\x37]|\/\*.*?\*\/)+/
 		| /[\?&][^[:blank:]\x00-\x37\|]+?=[\-0-9%]+([[:blank:]\x00-\x37]|\/\*.*?\*\/)*['"]?([[:blank:]\x00-\x37]|\/\*.*?\*\/|\)?;)+([xX]?[oO][rR]|[nN]?[aA][nN][dD])([[:blank:]\x00-\x37]|\/\*.*?\*\/)+['"]?(([^a-zA-Z&]+)?=|[eE][xX][iI][sS][tT][sS])/
 		| /[\?&][^[:blank:]\x00-\x37]+?=[\-0-9%]*([[:blank:]\x00-\x37]|\/\*.*?\*\/)*['"]([[:blank:]\x00-\x37]|\/\*.*?\*\/)*(-|=|\+|\|\|)([[:blank:]\x00-\x37]|\/\*.*?\*\/)*([0-9]|\(?[cC][oO][nN][vV][eE][rR][tT]|[cC][aA][sS][tT])/
@ -52,20 +50,54 @@ export {
 		| /\/\*![[:digit:]]{5}.*?\*\// &redef;
 }

+function format_sqli_samples(samples: vector of SumStats::Observation): string
+	{
+	local ret = "SQL Injection samples\n---------------------";
+	for ( i in samples )
+		ret += "\n" + samples[i]$str;
+	return ret;
+	}
+
 event bro_init() &priority=3
 	{
-	# Add filters to the metrics so that the metrics framework knows how to 
+	# Add filters to the metrics so that the metrics framework knows how to
 	# determine when it looks like an actual attack and how to respond when
 	# thresholds are crossed.
-	
-	Metrics::add_filter(SQLI_ATTACKER, [$log=F,
-	                                   $notice_threshold=sqli_requests_threshold,
-	                                   $break_interval=sqli_requests_interval,
-	                                   $note=SQL_Injection_Attacker]);
-	Metrics::add_filter(SQLI_VICTIM, [$log=F,
-	                                 $notice_threshold=sqli_requests_threshold,
-	                                 $break_interval=sqli_requests_interval,
-	                                 $note=SQL_Injection_Victim]);
+	local r1: SumStats::Reducer = [$stream="http.sqli.attacker", $apply=set(SumStats::SUM), $samples=collect_SQLi_samples];
+	SumStats::create([$epoch=sqli_requests_interval,
+	                  $reducers=set(r1),
+	                  $threshold_val(key: SumStats::Key, result: SumStats::Result) =
+	                  	{
+	                  	return double_to_count(result["http.sqli.attacker"]$sum);
+	                  	},
+	                  $threshold=sqli_requests_threshold,
+	                  $threshold_crossed(key: SumStats::Key, result: SumStats::Result) =
+	                  	{
+	                  	local r = result["http.sqli.attacker"];
+	                  	NOTICE([$note=SQL_Injection_Attacker,
+	                  	        $msg="An SQL injection attacker was discovered!",
+	                  	        $email_body_sections=vector(format_sqli_samples(SumStats::get_samples(r))),
+	                  	        $src=key$host,
+	                  	        $identifier=cat(key$host)]);
+	                  	}]);
+
+	local r2: SumStats::Reducer = [$stream="http.sqli.victim", $apply=set(SumStats::SUM), $samples=collect_SQLi_samples];
+	SumStats::create([$epoch=sqli_requests_interval,
+	                  $reducers=set(r2),
+	                  $threshold_val(key: SumStats::Key, result: SumStats::Result) =
+	                  	{
+	                  	return double_to_count(result["http.sqli.victim"]$sum);
+	                  	},
+	                  $threshold=sqli_requests_threshold,
+	                  $threshold_crossed(key: SumStats::Key, result: SumStats::Result) =
+	                  	{
+	                  	local r = result["http.sqli.victim"];
+	                  	NOTICE([$note=SQL_Injection_Victim,
+	                  	        $msg="An SQL injection victim was discovered!",
+	                  	        $email_body_sections=vector(format_sqli_samples(SumStats::get_samples(r))),
+	                  	        $src=key$host,
+	                  	        $identifier=cat(key$host)]);
+	                  	}]);
 	}

 event http_request(c: connection, method: string, original_URI: string,
@ -74,8 +106,8 @@ event http_request(c: connection, method: string, original_URI: string,
 	if ( match_sql_injection_uri in unescaped_URI )
 		{
 		add c$http$tags[URI_SQLI];
-		
-		Metrics::add_data(SQLI_ATTACKER, [$host=c$id$orig_h], 1);
-		Metrics::add_data(SQLI_VICTIM, [$host=c$id$resp_h], 1);
+
+		SumStats::observe("http.sqli.attacker", [$host=c$id$orig_h], [$str=original_URI]);
+		SumStats::observe("http.sqli.victim",   [$host=c$id$resp_h], [$str=original_URI]);
 		}
 	}
--- a/scripts/policy/protocols/ssh/detect-bruteforcing.bro
+++ b/scripts/policy/protocols/ssh/detect-bruteforcing.bro
@ -2,7 +2,7 @@
 ##! bruteforcing over SSH.

@load base/protocols/ssh
-@load base/frameworks/metrics
+@load base/frameworks/sumstats
@load base/frameworks/notice
@load base/frameworks/intel

@ -10,7 +10,7 @@ module SSH;

 export {
 	redef enum Notice::Type += {
-		## Indicates that a host has been identified as crossing the 
+		## Indicates that a host has been identified as crossing the
 		## :bro:id:`SSH::password_guesses_limit` threshold with heuristically
 		## determined failed logins.
 		Password_Guessing,
@ -19,10 +19,10 @@ export {
 		## currently implemented.
 		Login_By_Password_Guesser,
 	};
-	
-	redef enum Metrics::ID  += {
-		## Metric is to measure failed logins.
-		FAILED_LOGIN,
+
+	redef enum Intel::Where += {
+		## An indicator of the login for the intel framework.
+		SSH::SUCCESSFUL_LOGIN,
 	};

 	## The number of failed SSH connections before a host is designated as
@ -33,47 +33,54 @@ export {
 	## model of a password guesser.
 	const guessing_timeout = 30 mins &redef;

-	## This value can be used to exclude hosts or entire networks from being 
+	## This value can be used to exclude hosts or entire networks from being
 	## tracked as potential "guessers".  There are cases where the success
-	## heuristic fails and this acts as the whitelist.  The index represents 
+	## heuristic fails and this acts as the whitelist.  The index represents
 	## client subnets and the yield value represents server subnets.
 	const ignore_guessers: table[subnet] of subnet &redef;
-
-	## Tracks hosts identified as guessing passwords.
-	global password_guessers: set[addr] 
-		&read_expire=guessing_timeout+1hr &synchronized &redef;
 }

 event bro_init()
 	{
-	Metrics::add_filter(FAILED_LOGIN, [$name="detect-bruteforcing", $log=F,
-	                                   $note=Password_Guessing,
-	                                   $notice_threshold=password_guesses_limit,
-	                                   $notice_freq=1hr,
-	                                   $break_interval=guessing_timeout]);
+	local r1: SumStats::Reducer = [$stream="ssh.login.failure", $apply=set(SumStats::SUM)];
+	SumStats::create([$epoch=guessing_timeout,
+	                  $reducers=set(r1),
+	                  $threshold_val(key: SumStats::Key, result: SumStats::Result) =
+	                  	{
+	                  	return double_to_count(result["ssh.login.failure"]$sum);
+	                  	},
+	                  $threshold=password_guesses_limit,
+	                  $threshold_crossed(key: SumStats::Key, result: SumStats::Result) =
+	                  	{
+	                  	local r = result["ssh.login.failure"];
+	                  	# Generate the notice.
+	                  	NOTICE([$note=Password_Guessing,
+	                  	        $msg=fmt("%s appears to be guessing SSH passwords (seen in %d connections).", key$host, r$num),
+	                  	        $src=key$host,
+	                  	        $identifier=cat(key$host)]);
+	                  	# Insert the guesser into the intel framework.
+	                  	Intel::insert([$host=key$host,
+	                  	               $meta=[$source="local",
+	                  	                      $desc=fmt("Bro observed %d apparently failed SSH connections.", r$num)]]);
+	                  	}]);
 	}

 event SSH::heuristic_successful_login(c: connection)
 	{
 	local id = c$id;
-	
-	# TODO: This is out for the moment pending some more additions to the 
-	#       metrics framework.
-	#if ( id$orig_h in password_guessers )
-	#	{
-	#	NOTICE([$note=Login_By_Password_Guesser,
-	#	        $conn=c,
-	#	        $msg=fmt("Successful SSH login by password guesser %s", id$orig_h)]);
-	#	}
+
+	Intel::seen([$host=id$orig_h,
+	             $conn=c,
+	             $where=SSH::SUCCESSFUL_LOGIN]);
 	}

 event SSH::heuristic_failed_login(c: connection)
 	{
 	local id = c$id;
-	
-	# Add data to the FAILED_LOGIN metric unless this connection should 
+
+	# Add data to the FAILED_LOGIN metric unless this connection should
 	# be ignored.
 	if ( ! (id$orig_h in ignore_guessers &&
 	        id$resp_h in ignore_guessers[id$orig_h]) )
-		Metrics::add_data(FAILED_LOGIN, [$host=id$orig_h], 1);
+		SumStats::observe("ssh.login.failure", [$host=id$orig_h], [$num=1]);
 	}
--- a/scripts/site/local.bro
+++ b/scripts/site/local.bro
@ -8,6 +8,9 @@
 # Apply the default tuning scripts for common tuning settings.
@load tuning/defaults

+# Load the scan detection script.
+@load misc/scan
+
 # Generate notices when vulnerable versions of software are discovered.
 # The default is to only monitor software found in the address space defined
 # as "local".  Refer to the software framework's documentation for more 
--- a/scripts/test-all-policy.bro
+++ b/scripts/test-all-policy.bro
@ -24,9 +24,6 @@
@load frameworks/intel/smtp.bro
@load frameworks/intel/ssl.bro
@load frameworks/intel/where-locations.bro
-@load frameworks/metrics/conn-example.bro
-@load frameworks/metrics/http-example.bro
-@load frameworks/metrics/ssl-example.bro
@load frameworks/software/version-changes.bro
@load frameworks/software/vulnerable.bro
@load integration/barnyard2/__load__.bro
@ -35,9 +32,13 @@
@load integration/collective-intel/__load__.bro
@load integration/collective-intel/main.bro
@load misc/analysis-groups.bro
+@load misc/app-metrics.bro
@load misc/capture-loss.bro
+@load misc/detect-traceroute/__load__.bro
+@load misc/detect-traceroute/main.bro
@load misc/loaded-scripts.bro
@load misc/profiling.bro
+@load misc/scan.bro
@load misc/stats.bro
@load misc/trim-trace-file.bro
@load protocols/conn/known-hosts.bro
@ -45,6 +46,7 @@
@load protocols/conn/weirds.bro
@load protocols/dns/auth-addl.bro
@load protocols/dns/detect-external-names.bro
+@load protocols/ftp/detect-bruteforcing.bro
@load protocols/ftp/detect.bro
@load protocols/ftp/software.bro
@load protocols/http/detect-MHR.bro