Merge remote branch 'origin/master' into topic/bernhard/input-update

2025-10-14 20:48:21 +00:00 · 2013-05-27 20:32:50 -07:00 · 2013-05-27 20:32:50 -07:00 · 3719524a6a
commit 3719524a6a
parent fed914252c d67123d0c3
423 changed files with 240373 additions and 161770 deletions
--- a/scripts/base/frameworks/file-analysis/load.bro
+++ b/scripts/base/frameworks/file-analysis/load.bro
@ -0,0 +1 @@
+@load ./main.bro
--- a/scripts/base/frameworks/file-analysis/main.bro
+++ b/scripts/base/frameworks/file-analysis/main.bro
@ -0,0 +1,351 @@
+##! An interface for driving the analysis of files, possibly independent of
+##! any network protocol over which they're transported.
+
+@load base/file_analysis.bif
+@load base/frameworks/logging
+
+module FileAnalysis;
+
+export {
+	redef enum Log::ID += {
+		## Logging stream for file analysis.
+		LOG
+	};
+
+	## A structure which represents a desired type of file analysis.
+	type AnalyzerArgs: record {
+		## The type of analysis.
+		tag: Analyzer;
+
+		## The local filename to which to write an extracted file.  Must be
+		## set when *tag* is :bro:see:`FileAnalysis::ANALYZER_EXTRACT`.
+		extract_filename: string &optional;
+
+		## An event which will be generated for all new file contents,
+		## chunk-wise.
+		chunk_event: event(f: fa_file, data: string, off: count) &optional;
+
+		## An event which will be generated for all new file contents,
+		## stream-wise.
+		stream_event: event(f: fa_file, data: string) &optional;
+	} &redef;
+
+	## Contains all metadata related to the analysis of a given file.
+	## For the most part, fields here are derived from ones of the same name
+	## in :bro:see:`fa_file`.
+	type Info: record {
+		## An identifier associated with a single file.
+		id: string &log;
+
+		## Identifier associated with a container file from which this one was
+		## extracted as part of the file analysis.
+		parent_id: string &log &optional;
+
+		## An identification of the source of the file data.  E.g. it may be
+		## a network protocol over which it was transferred, or a local file
+		## path which was read, or some other input source.
+		source: string &log &optional;
+
+		## If the source of this file is is a network connection, this field
+		## may be set to indicate the directionality.
+		is_orig: bool &log &optional;
+
+		## The time at which the last activity for the file was seen.
+		last_active: time &log;
+
+		## Number of bytes provided to the file analysis engine for the file.
+		seen_bytes: count &log &default=0;
+
+		## Total number of bytes that are supposed to comprise the full file.
+		total_bytes: count &log &optional;
+
+		## The number of bytes in the file stream that were completely missed
+		## during the process of analysis e.g. due to dropped packets.
+		missing_bytes: count &log &default=0;
+
+		## The number of not all-in-sequence bytes in the file stream that
+		## were delivered to file analyzers due to reassembly buffer overflow.
+		overflow_bytes: count &log &default=0;
+
+		## The amount of time between receiving new data for this file that
+		## the analysis engine will wait before giving up on it.
+		timeout_interval: interval &log &optional;
+
+		## The number of bytes at the beginning of a file to save for later
+		## inspection in *bof_buffer* field.
+		bof_buffer_size: count &log &optional;
+
+		## A mime type provided by libmagic against the *bof_buffer*, or
+		## in the cases where no buffering of the beginning of file occurs,
+		## an initial guess of the mime type based on the first data seen.
+		mime_type: string &log &optional;
+
+		## Whether the file analysis timed out at least once for the file.
+		timedout: bool &log &default=F;
+
+		## Connection UIDS over which the file was transferred.
+		conn_uids: set[string] &log;
+
+		## A set of analysis types done during the file analysis.
+		analyzers: set[Analyzer] &log;
+
+		## Local filenames of extracted files.
+		extracted_files: set[string] &log;
+
+		## An MD5 digest of the file contents.
+		md5: string &log &optional;
+
+		## A SHA1 digest of the file contents.
+		sha1: string &log &optional;
+
+		## A SHA256 digest of the file contents.
+		sha256: string &log &optional;
+	} &redef;
+
+	## A table that can be used to disable file analysis completely for
+	## any files transferred over given network protocol analyzers.
+	const disable: table[AnalyzerTag] of bool = table() &redef;
+
+	## Event that can be handled to access the Info record as it is sent on
+	## to the logging framework.
+	global log_file_analysis: event(rec: Info);
+
+	## The salt concatenated to unique file handle strings generated by
+	## :bro:see:`get_file_handle` before hashing them in to a file id
+	## (the *id* field of :bro:see:`fa_file`).
+	## Provided to help mitigate the possiblility of manipulating parts of
+	## network connections that factor in to the file handle in order to
+	## generate two handles that would hash to the same file id.
+	const salt = "I recommend changing this." &redef;
+
+	## Sets the *timeout_interval* field of :bro:see:`fa_file`, which is
+	## used to determine the length of inactivity that is allowed for a file
+	## before internal state related to it is cleaned up.
+	##
+	## f: the file.
+	##
+	## t: the amount of time the file can remain inactive before discarding.
+	##
+	## Returns: true if the timeout interval was set, or false if analysis
+	##          for the *id* isn't currently active.
+	global set_timeout_interval: function(f: fa_file, t: interval): bool;
+
+	## Postpones the timeout of file analysis for a given file.
+	## When used within a :bro:see:`file_timeout` handler for, the analysis
+	## the analysis will delay timing out for the period of time indicated by
+	## the *timeout_interval* field of :bro:see:`fa_file`, which can be set
+	## with :bro:see:`FileAnalysis::set_timeout_interval`.
+	##
+	## f: the file.
+	##
+	## Returns: true if the timeout will be postponed, or false if analysis
+	##          for the *id* isn't currently active.
+	global postpone_timeout: function(f: fa_file): bool;
+
+	## Adds an analyzer to the analysis of a given file.
+	##
+	## f: the file.
+	##
+	## args: the analyzer type to add along with any arguments it takes.
+	##
+	## Returns: true if the analyzer will be added, or false if analysis
+	##          for the *id* isn't currently active or the *args*
+	##          were invalid for the analyzer type.
+	global add_analyzer: function(f: fa_file, args: AnalyzerArgs): bool;
+
+	## Removes an analyzer from the analysis of a given file.
+	##
+	## f: the file.
+	##
+	## args: the analyzer (type and args) to remove.
+	##
+	## Returns: true if the analyzer will be removed, or false if analysis
+	##          for the *id* isn't currently active.
+	global remove_analyzer: function(f: fa_file, args: AnalyzerArgs): bool;
+
+	## Stops/ignores any further analysis of a given file.
+	##
+	## f: the file.
+	##
+	## Returns: true if analysis for the given file will be ignored for the
+	##          rest of it's contents, or false if analysis for the *id*
+	##          isn't currently active.
+	global stop: function(f: fa_file): bool;
+
+	## Sends a sequential stream of data in for file analysis.
+	## Meant for use when providing external file analysis input (e.g.
+	## from the input framework).
+	##
+	## source: a string that uniquely identifies the logical file that the
+	##         data is a part of and describes its source.
+	##
+	## data: bytestring contents of the file to analyze.
+	global data_stream: function(source: string, data: string);
+
+	## Sends a non-sequential chunk of data in for file analysis.
+	## Meant for use when providing external file analysis input (e.g.
+	## from the input framework).
+	##
+	## source: a string that uniquely identifies the logical file that the
+	##         data is a part of and describes its source.
+	##
+	## data: bytestring contents of the file to analyze.
+	##
+	## offset: the offset within the file that this chunk starts.
+	global data_chunk: function(source: string, data: string, offset: count);
+
+	## Signals a content gap in the file bytestream.
+	## Meant for use when providing external file analysis input (e.g.
+	## from the input framework).
+	##
+	## source: a string that uniquely identifies the logical file that the
+	##         data is a part of and describes its source.
+	##
+	## offset: the offset within the file that this gap starts.
+	##
+	## len: the number of bytes that are missing.
+	global gap: function(source: string, offset: count, len: count);
+
+	## Signals the total size of a file.
+	## Meant for use when providing external file analysis input (e.g.
+	## from the input framework).
+	##
+	## source: a string that uniquely identifies the logical file that the
+	##         data is a part of and describes its source.
+	##
+	## size: the number of bytes that comprise the full file.
+	global set_size: function(source: string, size: count);
+
+	## Signals the end of a file.
+	## Meant for use when providing external file analysis input (e.g.
+	## from the input framework).
+	##
+	## source: a string that uniquely identifies the logical file that the
+	##         data is a part of and describes its source.
+	global eof: function(source: string);
+}
+
+redef record fa_file += {
+	info: Info &optional;
+};
+
+function set_info(f: fa_file)
+	{
+	if ( ! f?$info )
+		{
+		local tmp: Info;
+		f$info = tmp;
+		}
+
+	f$info$id = f$id;
+	if ( f?$parent_id ) f$info$parent_id = f$parent_id;
+	if ( f?$source ) f$info$source = f$source;
+	if ( f?$is_orig ) f$info$is_orig = f$is_orig;
+	f$info$last_active = f$last_active;
+	f$info$seen_bytes = f$seen_bytes;
+	if ( f?$total_bytes ) f$info$total_bytes = f$total_bytes;
+	f$info$missing_bytes = f$missing_bytes;
+	f$info$overflow_bytes = f$overflow_bytes;
+	f$info$timeout_interval = f$timeout_interval;
+	f$info$bof_buffer_size = f$bof_buffer_size;
+	if ( f?$mime_type ) f$info$mime_type = f$mime_type;
+	if ( f?$conns )
+		for ( cid in f$conns )
+			add f$info$conn_uids[f$conns[cid]$uid];
+	}
+
+function set_timeout_interval(f: fa_file, t: interval): bool
+	{
+	return __set_timeout_interval(f$id, t);
+	}
+
+function postpone_timeout(f: fa_file): bool
+	{
+	return __postpone_timeout(f$id);
+	}
+
+function add_analyzer(f: fa_file, args: AnalyzerArgs): bool
+	{
+	if ( ! __add_analyzer(f$id, args) ) return F;
+
+	set_info(f);
+	add f$info$analyzers[args$tag];
+
+	if ( args$tag == FileAnalysis::ANALYZER_EXTRACT )
+		add f$info$extracted_files[args$extract_filename];
+
+	return T;
+	}
+
+function remove_analyzer(f: fa_file, args: AnalyzerArgs): bool
+	{
+	return __remove_analyzer(f$id, args);
+	}
+
+function stop(f: fa_file): bool
+	{
+	return __stop(f$id);
+	}
+
+function data_stream(source: string, data: string)
+	{
+	__data_stream(source, data);
+	}
+
+function data_chunk(source: string, data: string, offset: count)
+	{
+	__data_chunk(source, data, offset);
+	}
+
+function gap(source: string, offset: count, len: count)
+	{
+	__gap(source, offset, len);
+	}
+
+function set_size(source: string, size: count)
+	{
+	__set_size(source, size);
+	}
+
+function eof(source: string)
+	{
+	__eof(source);
+	}
+
+event bro_init() &priority=5
+	{
+	Log::create_stream(FileAnalysis::LOG,
+	                   [$columns=Info, $ev=log_file_analysis]);
+	}
+
+event file_timeout(f: fa_file) &priority=5
+	{
+	set_info(f);
+	f$info$timedout = T;
+	}
+
+event file_hash(f: fa_file, kind: string, hash: string) &priority=5
+	{
+	set_info(f);
+	switch ( kind ) {
+	case "md5":
+		f$info$md5 = hash;
+		break;
+	case "sha1":
+		f$info$sha1 = hash;
+		break;
+	case "sha256":
+		f$info$sha256 = hash;
+		break;
+	}
+	}
+
+event file_state_remove(f: fa_file) &priority=5
+	{
+	set_info(f);
+	}
+
+event file_state_remove(f: fa_file) &priority=-5
+	{
+	Log::write(FileAnalysis::LOG, f$info);
+	}
--- a/scripts/base/frameworks/input/load.bro
+++ b/scripts/base/frameworks/input/load.bro
@ -2,4 +2,5 @@
@load ./readers/ascii
@load ./readers/raw
@load ./readers/benchmark
-
+@load ./readers/binary
+@load ./readers/sqlite
--- a/scripts/base/frameworks/input/readers/binary.bro
+++ b/scripts/base/frameworks/input/readers/binary.bro
@ -0,0 +1,8 @@
+##! Interface for the binary input reader.
+
+module InputBinary;
+
+export {
+	## Size of data chunks to read from the input file at a time.
+	const chunk_size = 1024 &redef;
+}
--- a/scripts/base/frameworks/input/readers/sqlite.bro
+++ b/scripts/base/frameworks/input/readers/sqlite.bro
@ -0,0 +1,17 @@
+##! Interface for the SQLite input reader.
+##!
+##! The defaults are set to match Bro's ASCII output.
+
+module InputSQLite;
+
+export {
+	## Separator between set elements.
+	## Please note that the separator has to be exactly one character long.
+	const set_separator = Input::set_separator &redef;
+
+	## String to use for an unset &optional field.
+	const unset_field = Input::unset_field &redef;
+
+	## String to use for empty fields.
+	const empty_field = Input::empty_field &redef;
+}
--- a/scripts/base/frameworks/logging/load.bro
+++ b/scripts/base/frameworks/logging/load.bro
@ -2,5 +2,6 @@
@load ./postprocessors
@load ./writers/ascii
@load ./writers/dataseries
+@load ./writers/sqlite
@load ./writers/elasticsearch
@load ./writers/none
--- a/scripts/base/frameworks/logging/main.bro
+++ b/scripts/base/frameworks/logging/main.bro
@ -189,6 +189,15 @@ export {
 	## .. bro:see:: Log::add_default_filter Log::remove_default_filter
 	global create_stream: function(id: ID, stream: Stream) : bool;

+	## Removes a logging stream completely, stopping all the threads.
+	##
+	## id: The ID enum to be associated with the new logging stream.
+	##
+	## Returns: True if a new stream was successfully removed.
+	##
+	## .. bro:see:: Log::create_stream
+	global remove_stream: function(id: ID) : bool;
+
 	## Enables a previously disabled logging stream.  Disabled streams
 	## will not be written to until they are enabled again.  New streams
 	## are enabled by default.
@ -442,6 +451,12 @@ function create_stream(id: ID, stream: Stream) : bool
 	return add_default_filter(id);
 	}

+function remove_stream(id: ID) : bool
+	{
+	delete active_streams[id];
+	return __remove_stream(id);
+	}
+
 function disable_stream(id: ID) : bool
 	{
 	delete active_streams[id];
--- a/scripts/base/frameworks/logging/writers/sqlite.bro
+++ b/scripts/base/frameworks/logging/writers/sqlite.bro
@ -0,0 +1,17 @@
+##! Interface for the SQLite log writer.  Redefinable options are available
+##! to tweak the output format of the SQLite reader.
+
+module LogSQLite;
+
+export {
+	## Separator between set elements.
+	const set_separator = Log::set_separator &redef;
+
+	## String to use for an unset &optional field.
+	const unset_field = Log::unset_field &redef;
+
+	## String to use for empty fields. This should be different from
+        ## *unset_field* to make the output non-ambigious.
+	const empty_field = Log::empty_field &redef;
+}
+
--- a/scripts/base/frameworks/metrics/cluster.bro
+++ b/scripts/base/frameworks/metrics/cluster.bro
@ -1,264 +0,0 @@
-##! This implements transparent cluster support for the metrics framework.
-##! Do not load this file directly.  It's only meant to be loaded automatically
-##! and will be depending on if the cluster framework has been enabled.
-##! The goal of this script is to make metric calculation completely and
-##! transparently automated when running on a cluster.
-##! 
-##! Events defined here are not exported deliberately because they are meant
-##! to be an internal implementation detail.
-
-@load base/frameworks/cluster
-@load ./main
-
-module Metrics;
-
-export {
-	## Allows a user to decide how large of result groups the 
-	## workers should transmit values for cluster metric aggregation.
-	const cluster_send_in_groups_of = 50 &redef;
-	
-	## The percent of the full threshold value that needs to be met 
-	## on a single worker for that worker to send the value to its manager in
-	## order for it to request a global view for that value.  There is no
-	## requirement that the manager requests a global view for the index
-	## since it may opt not to if it requested a global view for the index
-	## recently.
-	const cluster_request_global_view_percent = 0.1 &redef;
-	
-	## Event sent by the manager in a cluster to initiate the 
-	## collection of metrics values for a filter.
-	global cluster_filter_request: event(uid: string, id: ID, filter_name: string);
-
-	## Event sent by nodes that are collecting metrics after receiving
-	## a request for the metric filter from the manager.
-	global cluster_filter_response: event(uid: string, id: ID, filter_name: string, data: MetricTable, done: bool);
-
-	## This event is sent by the manager in a cluster to initiate the
-	## collection of a single index value from a filter.  It's typically
-	## used to get intermediate updates before the break interval triggers
-	## to speed detection of a value crossing a threshold.
-	global cluster_index_request: event(uid: string, id: ID, filter_name: string, index: Index);
-
-	## This event is sent by nodes in response to a 
-	## :bro:id:`Metrics::cluster_index_request` event.
-	global cluster_index_response: event(uid: string, id: ID, filter_name: string, index: Index, val: count);
-
-	## This is sent by workers to indicate that they crossed the percent of the 
-	## current threshold by the percentage defined globally in 
-	## :bro:id:`Metrics::cluster_request_global_view_percent`
-	global cluster_index_intermediate_response: event(id: Metrics::ID, filter_name: string, index: Metrics::Index, val: count);
-
-	## This event is scheduled internally on workers to send result chunks.
-	global send_data: event(uid: string, id: ID, filter_name: string, data: MetricTable);
-	
-}
-
-
-# This is maintained by managers so they can know what data they requested and
-# when they requested it.
-global requested_results: table[string] of time = table() &create_expire=5mins;
-
-# TODO: The next 4 variables make the assumption that a value never 
-#       takes longer than 5 minutes to transmit from workers to manager.  This needs to 
-#       be tunable or self-tuning.  These should also be restructured to be
-#       maintained within a single variable.
-
-# This variable is maintained by manager nodes as they collect and aggregate 
-# results.
-global filter_results: table[string, ID, string] of MetricTable &create_expire=5mins;
-
-# This variable is maintained by manager nodes to track how many "dones" they
-# collected per collection unique id.  Once the number of results for a uid 
-# matches the number of peer nodes that results should be coming from, the 
-# result is written out and deleted from here.
-# TODO: add an &expire_func in case not all results are received.
-global done_with: table[string] of count &create_expire=5mins &default=0;
-
-# This variable is maintained by managers to track intermediate responses as 
-# they are getting a global view for a certain index.
-global index_requests: table[string, ID, string, Index] of count &create_expire=5mins &default=0;
-
-# This variable is maintained by all hosts for different purposes. Non-managers
-# maintain it to know what indexes they have recently sent as intermediate
-# updates so they don't overwhelm their manager. Managers maintain it so they
-# don't overwhelm workers with intermediate index requests. The count that is
-# yielded is the number of times the percentage threshold has been crossed and
-# an intermediate result has been received. The manager may optionally request
-# the index again before data expires from here if too many workers are crossing
-# the percentage threshold (not implemented yet!).
-global recent_global_view_indexes: table[ID, string, Index] of count &create_expire=5mins &default=0;
-
-# Add events to the cluster framework to make this work.
-redef Cluster::manager2worker_events += /Metrics::cluster_(filter_request|index_request)/;
-redef Cluster::worker2manager_events += /Metrics::cluster_(filter_response|index_response|index_intermediate_response)/;
-
-@if ( Cluster::local_node_type() != Cluster::MANAGER )
-# This is done on all non-manager node types in the event that a metric is 
-# being collected somewhere other than a worker.
-function data_added(filter: Filter, index: Index, val: count)
-	{
-	# If an intermediate update for this value was sent recently, don't send
-	# it again.
-	if ( [filter$id, filter$name, index] in recent_global_view_indexes )
-		return;
-		
-	# If val is 5 and global view % is 0.1 (10%), pct_val will be 50.  If that
-	# crosses the full threshold then it's a candidate to send as an 
-	# intermediate update.
-	local pct_val = double_to_count(val / cluster_request_global_view_percent);
-	
-	if ( check_notice(filter, index, pct_val) ) 
-		{
-		# kick off intermediate update
-		event Metrics::cluster_index_intermediate_response(filter$id, filter$name, index, val);
-		
-		++recent_global_view_indexes[filter$id, filter$name, index];
-		}
-	}
-
-event Metrics::send_data(uid: string, id: ID, filter_name: string, data: MetricTable)
-	{
-	#print fmt("WORKER %s: sending data for uid %s...", Cluster::node, uid);
-	
-	local local_data: MetricTable;
-	local num_added = 0;
-	for ( index in data )
-		{
-		local_data[index] = data[index];
-		delete data[index];
-		
-		# Only send cluster_send_in_groups_of at a time.  Queue another
-		# event to send the next group.
-		if ( cluster_send_in_groups_of == ++num_added )
-			break;
-		}
-	
-	local done = F;
-	# If data is empty, this metric is done.
-	if ( |data| == 0 )
-		done = T;
-	
-	event Metrics::cluster_filter_response(uid, id, filter_name, local_data, done);
-	if ( ! done )
-		event Metrics::send_data(uid, id, filter_name, data);
-	}
-
-event Metrics::cluster_filter_request(uid: string, id: ID, filter_name: string)
-	{
-	#print fmt("WORKER %s: received the cluster_filter_request event.", Cluster::node);
-	
-	# Initiate sending all of the data for the requested filter.
-	event Metrics::send_data(uid, id, filter_name, store[id, filter_name]);
-	
-	# Lookup the actual filter and reset it, the reference to the data
-	# currently stored will be maintained interally by the send_data event.
-	reset(filter_store[id, filter_name]);
-	}
-	
-event Metrics::cluster_index_request(uid: string, id: ID, filter_name: string, index: Index)
-	{
-	local val=0;
-	if ( index in store[id, filter_name] )
-		val = store[id, filter_name][index];
-	
-	# fmt("WORKER %s: received the cluster_index_request event for %s=%d.", Cluster::node, index2str(index), val);
-	event Metrics::cluster_index_response(uid, id, filter_name, index, val);
-	}
-
-@endif
-
-
-@if ( Cluster::local_node_type() == Cluster::MANAGER )
-
-# Manager's handle logging.
-event Metrics::log_it(filter: Filter)
-	{
-	#print fmt("%.6f MANAGER: breaking %s filter for %s metric", network_time(), filter$name, filter$id);
-	
-	local uid = unique_id("");
-	
-	# Set some tracking variables.
-	requested_results[uid] = network_time();
-	filter_results[uid, filter$id, filter$name] = table();
-	
-	# Request data from peers.
-	event Metrics::cluster_filter_request(uid, filter$id, filter$name);
-	# Schedule the log_it event for the next break period.
-	schedule filter$break_interval { Metrics::log_it(filter) };
-	}
-
-# This is unlikely to be called often, but it's here in case there are metrics
-# being collected by managers.
-function data_added(filter: Filter, index: Index, val: count)
-	{
-	if ( check_notice(filter, index, val) )
-		do_notice(filter, index, val);
-	}
-	
-event Metrics::cluster_index_response(uid: string, id: ID, filter_name: string, index: Index, val: count)
-	{
-	#print fmt("%0.6f MANAGER: receiving index data from %s", network_time(), get_event_peer()$descr);
-
-	if ( [uid, id, filter_name, index] !in index_requests )
-		index_requests[uid, id, filter_name, index] = 0;
-	
-	index_requests[uid, id, filter_name, index] += val;
-	local ir = index_requests[uid, id, filter_name, index];
-	
-	++done_with[uid];
-	if ( Cluster::worker_count == done_with[uid] )
-		{
-		if ( check_notice(filter_store[id, filter_name], index, ir) )
-			do_notice(filter_store[id, filter_name], index, ir);
-		delete done_with[uid];
-		delete index_requests[uid, id, filter_name, index];
-		}
-	}
-
-# Managers handle intermediate updates here.
-event Metrics::cluster_index_intermediate_response(id: ID, filter_name: string, index: Index, val: count)
-	{
-	#print fmt("MANAGER: receiving intermediate index data from %s", get_event_peer()$descr);
-	#print fmt("MANAGER: requesting index data for %s", index2str(index));
-	
-	local uid = unique_id("");
-	event Metrics::cluster_index_request(uid, id, filter_name, index);
-	++recent_global_view_indexes[id, filter_name, index];
-	}
-
-event Metrics::cluster_filter_response(uid: string, id: ID, filter_name: string, data: MetricTable, done: bool)
-	{
-	#print fmt("MANAGER: receiving results from %s", get_event_peer()$descr);
-	
-	local local_data = filter_results[uid, id, filter_name];
-	for ( index in data )
-		{
-		if ( index !in local_data )
-			local_data[index] = 0;
-		local_data[index] += data[index];
-		}
-	
-	# Mark another worker as being "done" for this uid.
-	if ( done )
-		++done_with[uid];
-	
-	# If the data has been collected from all peers, we are done and ready to log.
-	if ( Cluster::worker_count == done_with[uid] )
-		{
-		local ts = network_time();
-		# Log the time this was initially requested if it's available.
-		if ( uid in requested_results )
-			{
-			ts = requested_results[uid];
-			delete requested_results[uid];
-			}
-		
-		write_log(ts, filter_store[id, filter_name], local_data);
-		
-		# Clean up
-		delete filter_results[uid, id, filter_name];
-		delete done_with[uid];
-		}
-	}
-
-@endif
--- a/scripts/base/frameworks/metrics/main.bro
+++ b/scripts/base/frameworks/metrics/main.bro
@ -1,320 +0,0 @@
-##! The metrics framework provides a way to count and measure data.  
-
-@load base/frameworks/notice
-
-module Metrics;
-
-export {
-	## The metrics logging stream identifier.
-	redef enum Log::ID += { LOG };
-	
-	## Identifiers for metrics to collect.
-	type ID: enum {
-		## Blank placeholder value.
-		NOTHING,
-	};
-	
-	## The default interval used for "breaking" metrics and writing the 
-	## current value to the logging stream.
-	const default_break_interval = 15mins &redef;
-	
-	## This is the interval for how often threshold based notices will happen 
-	## after they have already fired.
-	const renotice_interval = 1hr &redef;
-	
-	## Represents a thing which is having metrics collected for it.  An instance
-	## of this record type and a :bro:type:`Metrics::ID` together represent a 
-	## single measurement.
-	type Index: record {
-		## Host is the value to which this metric applies.
-		host:         addr &optional;
-		
-		## A non-address related metric or a sub-key for an address based metric.
-		## An example might be successful SSH connections by client IP address
-		## where the client string would be the index value.
-		## Another example might be number of HTTP requests to a particular
-		## value in a Host header.  This is an example of a non-host based
-		## metric since multiple IP addresses could respond for the same Host
-		## header value.
-		str:        string &optional;
-		
-		## The CIDR block that this metric applies to.  This is typically
-		## only used internally for host based aggregation.
-		network:      subnet &optional;
-	} &log;
-	
-	## The record type that is used for logging metrics.
-	type Info: record {
-		## Timestamp at which the metric was "broken".
-		ts:           time   &log;
-		## What measurement the metric represents.
-		metric_id:    ID     &log;
-		## The name of the filter being logged.  :bro:type:`Metrics::ID` values
-		## can have multiple filters which represent different perspectives on
-		## the data so this is necessary to understand the value.
-		filter_name:  string &log;
-		## What the metric value applies to.
-		index:        Index  &log;
-		## The simple numeric value of the metric.
-		value:        count  &log;
-	};
-	
-    # TODO: configure a metrics filter logging stream to log the current
-	#       metrics configuration in case someone is looking through
-	#       old logs and the configuration has changed since then.
-	
-	## Filters define how the data from a metric is aggregated and handled.  
-	## Filters can be used to set how often the measurements are cut or "broken"
-	## and logged or how the data within them is aggregated.  It's also 
-	## possible to disable logging and use filters for thresholding.
-	type Filter: record {
-		## The :bro:type:`Metrics::ID` that this filter applies to.
-		id:                ID                      &optional;
-		## The name for this filter so that multiple filters can be
-		## applied to a single metrics to get a different view of the same
-		## metric data being collected (different aggregation, break, etc).
-		name:              string                  &default="default";
-		## A predicate so that you can decide per index if you would like
-		## to accept the data being inserted.
-		pred:              function(index: Index): bool &optional;
-		## Global mask by which you'd like to aggregate traffic.
-		aggregation_mask:  count                   &optional;
-		## This is essentially a mapping table between addresses and subnets.
-		aggregation_table: table[subnet] of subnet &optional;
-		## The interval at which this filter should be "broken" and written
-		## to the logging stream.  The counters are also reset to zero at 
-		## this time so any threshold based detection needs to be set to a 
-		## number that should be expected to happen within this period.
-		break_interval:    interval                &default=default_break_interval;
-		## This determines if the result of this filter is sent to the metrics
-		## logging stream.  One use for the logging framework is as an internal
-		## thresholding and statistics gathering utility that is meant to
-		## never log but rather to generate notices and derive data.
-		log:               bool                    &default=T;
-		## If this and a $notice_threshold value are set, this notice type
-		## will be generated by the metrics framework.
-		note:              Notice::Type            &optional;
-		## A straight threshold for generating a notice.
-		notice_threshold:  count                   &optional;
-		## A series of thresholds at which to generate notices.
-		notice_thresholds: vector of count         &optional;
-		## How often this notice should be raised for this filter.  It 
-		## will be generated everytime it crosses a threshold, but if the 
-		## $break_interval is set to 5mins and this is set to 1hr the notice
-		## only be generated once per hour even if something crosses the
-		## threshold in every break interval.
-		notice_freq:       interval                &optional;
-	};
-	
-	## Function to associate a metric filter with a metric ID.
-	## 
-	## id: The metric ID that the filter should be associated with.
-	##
-	## filter: The record representing the filter configuration.
-	global add_filter: function(id: ID, filter: Filter);
-	
-	## Add data into a :bro:type:`Metrics::ID`.  This should be called when
-	## a script has measured some point value and is ready to increment the
-	## counters.
-	##
-	## id: The metric ID that the data represents.
-	##
-	## index: The metric index that the value is to be added to.
-	##
-	## increment: How much to increment the counter by.
-	global add_data: function(id: ID, index: Index, increment: count);
-	
-	## Helper function to represent a :bro:type:`Metrics::Index` value as 
-	## a simple string
-	## 
-	## index: The metric index that is to be converted into a string.
-	##
-	## Returns: A string reprentation of the metric index.
-	global index2str: function(index: Index): string;
-	
-	## Event that is used to "finish" metrics and adapt the metrics
-	## framework for clustered or non-clustered usage.
-	##
-	## ..note: This is primarily intended for internal use.
-	global log_it: event(filter: Filter);
-	
-	## Event to access metrics records as they are passed to the logging framework.
-	global log_metrics: event(rec: Info);
-	
-	## Type to store a table of metrics values.  Interal use only!
-	type MetricTable: table[Index] of count &default=0;
-}
-
-redef record Notice::Info += {
-	metric_index: Index &log &optional;
-};
-
-global metric_filters: table[ID] of vector of Filter = table();
-global filter_store: table[ID, string] of Filter = table();
-
-# This is indexed by metric ID and stream filter name.
-global store: table[ID, string] of MetricTable = table() &default=table();
-
-# This function checks if a threshold has been crossed and generates a 
-# notice if it has.  It is also used as a method to implement 
-# mid-break-interval threshold crossing detection for cluster deployments.
-global check_notice: function(filter: Filter, index: Index, val: count): bool;
-
-# This is hook for watching thresholds being crossed.  It is called whenever
-# index values are updated and the new val is given as the `val` argument.
-global data_added: function(filter: Filter, index: Index, val: count);
-
-# This stores the current threshold index for filters using the
-# $notice_threshold and $notice_thresholds elements.
-global thresholds: table[ID, string, Index] of count = {} &create_expire=renotice_interval &default=0;
-
-event bro_init() &priority=5
-	{
-	Log::create_stream(Metrics::LOG, [$columns=Info, $ev=log_metrics]);
-	}
-
-function index2str(index: Index): string
-	{
-	local out = "";
-	if ( index?$host )
-		out = fmt("%shost=%s", out, index$host);
-	if ( index?$network )
-		out = fmt("%s%snetwork=%s", out, |out|==0 ? "" : ", ", index$network);
-	if ( index?$str )
-		out = fmt("%s%sstr=%s", out, |out|==0 ? "" : ", ", index$str);
-	return fmt("metric_index(%s)", out);
-	}
-	
-function write_log(ts: time, filter: Filter, data: MetricTable)
-	{
-	for ( index in data )
-		{
-		local val = data[index];
-		local m: Info = [$ts=ts,
-		                 $metric_id=filter$id,
-		                 $filter_name=filter$name,
-		                 $index=index,
-		                 $value=val];
-		
-		if ( filter$log )
-			Log::write(Metrics::LOG, m);
-		}
-	}
-
-
-function reset(filter: Filter)
-	{
-	store[filter$id, filter$name] = table();
-	}
-
-function add_filter(id: ID, filter: Filter)
-	{
-	if ( filter?$aggregation_table && filter?$aggregation_mask )
-		{
-		print "INVALID Metric filter: Defined $aggregation_table and $aggregation_mask.";
-		return;
-		}
-	if ( [id, filter$name] in store )
-		{
-		print fmt("INVALID Metric filter: Filter with name \"%s\" already exists.", filter$name);
-		return;
-		}
-	if ( filter?$notice_threshold && filter?$notice_thresholds )
-		{
-		print "INVALID Metric filter: Defined both $notice_threshold and $notice_thresholds";
-		return;
-		}
-	
-	if ( ! filter?$id )
-		filter$id = id;
-	
-	if ( id !in metric_filters )
-		metric_filters[id] = vector();
-	metric_filters[id][|metric_filters[id]|] = filter;
-
-	filter_store[id, filter$name] = filter;
-	store[id, filter$name] = table();
-	
-	schedule filter$break_interval { Metrics::log_it(filter) };
-	}
-	
-function add_data(id: ID, index: Index, increment: count)
-	{
-	if ( id !in metric_filters )
-		return;
-	
-	local filters = metric_filters[id];
-	
-	# Try to add the data to all of the defined filters for the metric.
-	for ( filter_id in filters )
-		{
-		local filter = filters[filter_id];
-		
-		# If this filter has a predicate, run the predicate and skip this
-		# index if the predicate return false.
-		if ( filter?$pred && ! filter$pred(index) )
-			next;
-		
-		if ( index?$host )
-			{
-			if ( filter?$aggregation_mask )
-				{
-				index$network = mask_addr(index$host, filter$aggregation_mask);
-				delete index$host;
-				}
-			else if ( filter?$aggregation_table )
-				{
-				# Don't add the data if the aggregation table doesn't include 
-				# the given host address.
-				if ( index$host !in filter$aggregation_table )
-					return;
-				index$network = filter$aggregation_table[index$host];
-				delete index$host;
-				}
-			}
-		
-		local metric_tbl = store[id, filter$name];
-		if ( index !in metric_tbl )
-			metric_tbl[index] = 0;
-		metric_tbl[index] += increment;
-		
-		data_added(filter, index, metric_tbl[index]);
-		}
-	}
-
-function check_notice(filter: Filter, index: Index, val: count): bool
-	{
-	if ( (filter?$notice_threshold &&
-	      [filter$id, filter$name, index] !in thresholds &&
-	      val >= filter$notice_threshold) ||
-	     (filter?$notice_thresholds &&
-	      |filter$notice_thresholds| <= thresholds[filter$id, filter$name, index] &&
-	      val >= filter$notice_thresholds[thresholds[filter$id, filter$name, index]]) )
-		return T;
-	else
-		return F;
-	}
-		
-function do_notice(filter: Filter, index: Index, val: count)
-	{
-	# We include $peer_descr here because the a manager count have actually 
-	# generated the notice even though the current remote peer for the event 
-	# calling this could be a worker if this is running as a cluster.
-	local n: Notice::Info = [$note=filter$note, 
-	                         $n=val, 
-	                         $metric_index=index, 
-	                         $peer_descr=peer_description];
-	n$msg = fmt("Threshold crossed by %s %d/%d", index2str(index), val, filter$notice_threshold);
-	if ( index?$str )
-		n$sub = index$str;
-	if ( index?$host )
-		n$src = index$host;
-	# TODO: not sure where to put the network yet.
-	
-	NOTICE(n);
-	
-	# This just needs set to some value so that it doesn't refire the 
-	# notice until it expires from the table or it crosses the next 
-	# threshold in the case of vectors of thresholds.
-	++thresholds[filter$id, filter$name, index];
-	}
--- a/scripts/base/frameworks/metrics/non-cluster.bro
+++ b/scripts/base/frameworks/metrics/non-cluster.bro
@ -1,21 +0,0 @@
-@load ./main
-
-module Metrics;
-
-event Metrics::log_it(filter: Filter)
-	{
-	local id = filter$id;
-	local name = filter$name;
-	
-	write_log(network_time(), filter, store[id, name]);
-	reset(filter);
-	
-	schedule filter$break_interval { Metrics::log_it(filter) };
-	}
-	
-	
-function data_added(filter: Filter, index: Index, val: count)
-	{
-	if ( check_notice(filter, index, val) )
-		do_notice(filter, index, val);
-	}
--- a/scripts/base/frameworks/notice/main.bro
+++ b/scripts/base/frameworks/notice/main.bro
@ -431,9 +431,6 @@ hook Notice::notice(n: Notice::Info) &priority=-5
 		}
 	}

-## This determines if a notice is being suppressed.  It is only used
-## internally as part of the mechanics for the global :bro:id:`NOTICE`
-## function.
 function is_being_suppressed(n: Notice::Info): bool
 	{
 	if ( n?$identifier && [n$note, n$identifier] in suppressing )
--- a/scripts/base/frameworks/sumstats/load.bro
+++ b/scripts/base/frameworks/sumstats/load.bro
@ -1,4 +1,5 @@
@load ./main
+@load ./plugins

 # The cluster framework must be loaded first.
@load base/frameworks/cluster
--- a/scripts/base/frameworks/sumstats/cluster.bro
+++ b/scripts/base/frameworks/sumstats/cluster.bro
@ -0,0 +1,346 @@
+##! This implements transparent cluster support for the SumStats framework.
+##! Do not load this file directly.  It's only meant to be loaded automatically
+##! and will be depending on if the cluster framework has been enabled.
+##! The goal of this script is to make sumstats calculation completely and
+##! transparently automated when running on a cluster.
+
+@load base/frameworks/cluster
+@load ./main
+
+module SumStats;
+
+export {
+	## Allows a user to decide how large of result groups the workers should transmit
+	## values for cluster stats aggregation.
+	const cluster_send_in_groups_of = 50 &redef;
+
+	## The percent of the full threshold value that needs to be met on a single worker
+	## for that worker to send the value to its manager in order for it to request a
+	## global view for that value.  There is no requirement that the manager requests
+	## a global view for the key since it may opt not to if it requested a global view
+	## for the key recently.
+	const cluster_request_global_view_percent = 0.2 &redef;
+
+	## This is to deal with intermediate update overload.  A manager will only allow
+	## this many intermediate update requests to the workers to be inflight at any
+	## given time.  Requested intermediate updates are currently thrown out and not
+	## performed.  In practice this should hopefully have a minimal effect.
+	const max_outstanding_global_views = 10 &redef;
+
+	## Intermediate updates can cause overload situations on very large clusters. This
+	## option may help reduce load and correct intermittent problems. The goal for this
+	## option is also meant to be temporary.
+	const enable_intermediate_updates = T &redef;
+
+	## Event sent by the manager in a cluster to initiate the collection of values for
+	## a sumstat.
+	global cluster_ss_request: event(uid: string, ssid: string);
+
+	## Event sent by nodes that are collecting sumstats after receiving a request for
+	## the sumstat from the manager.
+	global cluster_ss_response: event(uid: string, ssid: string, data: ResultTable, done: bool);
+
+	## This event is sent by the manager in a cluster to initiate the collection of
+	## a single key value from a sumstat.  It's typically used to get intermediate
+	## updates before the break interval triggers to speed detection of a value
+	## crossing a threshold.
+	global cluster_key_request: event(uid: string, ssid: string, key: Key);
+
+	## This event is sent by nodes in response to a
+	## :bro:id:`SumStats::cluster_key_request` event.
+	global cluster_key_response: event(uid: string, ssid: string, key: Key, result: Result);
+
+	## This is sent by workers to indicate that they crossed the percent
+	## of the current threshold by the percentage defined globally in
+	## :bro:id:`SumStats::cluster_request_global_view_percent`
+	global cluster_key_intermediate_response: event(ssid: string, key: SumStats::Key);
+
+	## This event is scheduled internally on workers to send result chunks.
+	global send_data: event(uid: string, ssid: string, data: ResultTable);
+
+	## This event is generated when a threshold is crossed.
+	global cluster_threshold_crossed: event(ssid: string, key: SumStats::Key, thold: Thresholding);
+}
+
+# Add events to the cluster framework to make this work.
+redef Cluster::manager2worker_events += /SumStats::cluster_(ss_request|key_request|threshold_crossed)/;
+redef Cluster::manager2worker_events += /SumStats::thresholds_reset/;
+redef Cluster::worker2manager_events += /SumStats::cluster_(ss_response|key_response|key_intermediate_response)/;
+
+@if ( Cluster::local_node_type() != Cluster::MANAGER )
+# This variable is maintained to know what keys have recently sent as
+# intermediate updates so they don't overwhelm their manager. The count that is
+# yielded is the number of times the percentage threshold has been crossed and
+# an intermediate result has been received.
+global recent_global_view_keys: table[string, Key] of count &create_expire=1min &default=0;
+
+event bro_init() &priority=-100
+	{
+	# The manager is the only host allowed to track these.
+	stats_store = table();
+	reducer_store = table();
+	}
+
+# This is done on all non-manager node types in the event that a sumstat is
+# being collected somewhere other than a worker.
+function data_added(ss: SumStat, key: Key, result: Result)
+	{
+	# If an intermediate update for this value was sent recently, don't send
+	# it again.
+	if ( [ss$id, key] in recent_global_view_keys )
+		return;
+
+	# If val is 5 and global view % is 0.1 (10%), pct_val will be 50.  If that
+	# crosses the full threshold then it's a candidate to send as an
+	# intermediate update.
+	if ( enable_intermediate_updates &&
+	     check_thresholds(ss, key, result, cluster_request_global_view_percent) )
+		{
+		# kick off intermediate update
+		event SumStats::cluster_key_intermediate_response(ss$id, key);
+		++recent_global_view_keys[ss$id, key];
+		}
+	}
+
+event SumStats::send_data(uid: string, ssid: string, data: ResultTable)
+	{
+	#print fmt("WORKER %s: sending data for uid %s...", Cluster::node, uid);
+
+	local local_data: ResultTable = table();
+	local num_added = 0;
+	for ( key in data )
+		{
+		local_data[key] = data[key];
+		delete data[key];
+
+		# Only send cluster_send_in_groups_of at a time.  Queue another
+		# event to send the next group.
+		if ( cluster_send_in_groups_of == ++num_added )
+			break;
+		}
+
+	local done = F;
+	# If data is empty, this sumstat is done.
+	if ( |data| == 0 )
+		done = T;
+
+	# Note: copy is needed to compensate serialization caching issue. This should be
+	# changed to something else later. 
+	event SumStats::cluster_ss_response(uid, ssid, copy(local_data), done);
+	if ( ! done )
+		schedule 0.01 sec { SumStats::send_data(uid, ssid, data) };
+	}
+
+event SumStats::cluster_ss_request(uid: string, ssid: string)
+	{
+	#print fmt("WORKER %s: received the cluster_ss_request event for %s.", Cluster::node, id);
+
+	# Initiate sending all of the data for the requested stats.
+	if ( ssid in result_store )
+		event SumStats::send_data(uid, ssid, result_store[ssid]);
+	else
+		event SumStats::send_data(uid, ssid, table());
+
+	# Lookup the actual sumstats and reset it, the reference to the data
+	# currently stored will be maintained internally by the send_data event.
+	if ( ssid in stats_store )
+		reset(stats_store[ssid]);
+	}
+
+event SumStats::cluster_key_request(uid: string, ssid: string, key: Key)
+	{
+	if ( ssid in result_store && key in result_store[ssid] )
+		{
+		#print fmt("WORKER %s: received the cluster_key_request event for %s=%s.", Cluster::node, key2str(key), data);
+
+		# Note: copy is needed to compensate serialization caching issue. This should be
+		# changed to something else later. 
+		event SumStats::cluster_key_response(uid, ssid, key, copy(result_store[ssid][key]));
+		}
+	else
+		{
+		# We need to send an empty response if we don't have the data so that the manager
+		# can know that it heard back from all of the workers.
+		event SumStats::cluster_key_response(uid, ssid, key, table());
+		}
+	}
+
+event SumStats::cluster_threshold_crossed(ssid: string, key: SumStats::Key, thold: Thresholding)
+	{
+	if ( ssid !in threshold_tracker )
+		threshold_tracker[ssid] = table();
+
+	threshold_tracker[ssid][key] = thold;
+	}
+
+event SumStats::thresholds_reset(ssid: string)
+	{
+	threshold_tracker[ssid] = table();
+	}
+
+@endif
+
+
+@if ( Cluster::local_node_type() == Cluster::MANAGER )
+
+# This variable is maintained by manager nodes as they collect and aggregate
+# results.
+# Index on a uid.
+global stats_results: table[string] of ResultTable &read_expire=1min;
+
+# This variable is maintained by manager nodes to track how many "dones" they
+# collected per collection unique id.  Once the number of results for a uid
+# matches the number of peer nodes that results should be coming from, the
+# result is written out and deleted from here.
+# Indexed on a uid.
+# TODO: add an &expire_func in case not all results are received.
+global done_with: table[string] of count &read_expire=1min &default=0;
+
+# This variable is maintained by managers to track intermediate responses as
+# they are getting a global view for a certain key.
+# Indexed on a uid.
+global key_requests: table[string] of Result &read_expire=1min;
+
+# This variable is maintained by managers to prevent overwhelming communication due
+# to too many intermediate updates.  Each sumstat is tracked separately so that
+# one won't overwhelm and degrade other quieter sumstats.
+# Indexed on a sumstat id.
+global outstanding_global_views: table[string] of count &default=0;
+
+const zero_time = double_to_time(0.0);
+# Managers handle logging.
+event SumStats::finish_epoch(ss: SumStat)
+	{
+	if ( network_time() > zero_time )
+		{
+		#print fmt("%.6f MANAGER: breaking %s sumstat for %s sumstat", network_time(), ss$name, ss$id);
+		local uid = unique_id("");
+
+		if ( uid in stats_results )
+			delete stats_results[uid];
+		stats_results[uid] = table();
+
+		# Request data from peers.
+		event SumStats::cluster_ss_request(uid, ss$id);
+		}
+
+	# Schedule the next finish_epoch event.
+	schedule ss$epoch { SumStats::finish_epoch(ss) };
+	}
+
+# This is unlikely to be called often, but it's here in
+# case there are sumstats being collected by managers.
+function data_added(ss: SumStat, key: Key, result: Result)
+	{
+	if ( check_thresholds(ss, key, result, 1.0) )
+		{
+		threshold_crossed(ss, key, result);
+		event SumStats::cluster_threshold_crossed(ss$id, key, threshold_tracker[ss$id][key]);
+		}
+	}
+
+event SumStats::cluster_key_response(uid: string, ssid: string, key: Key, result: Result)
+	{
+	#print fmt("%0.6f MANAGER: receiving key data from %s - %s=%s", network_time(), get_event_peer()$descr, key2str(key), result);
+
+	# We only want to try and do a value merge if there are actually measured datapoints
+	# in the Result.
+	if ( uid in key_requests )
+		key_requests[uid] = compose_results(key_requests[uid], result);
+	else
+		key_requests[uid] = result;
+
+	# Mark that a worker is done.
+	++done_with[uid];
+
+	#print fmt("worker_count:%d :: done_with:%d", Cluster::worker_count, done_with[uid]);
+	if ( Cluster::worker_count == done_with[uid] )
+		{
+		local ss = stats_store[ssid];
+		local ir = key_requests[uid];
+		if ( check_thresholds(ss, key, ir, 1.0) )
+			{
+			threshold_crossed(ss, key, ir);
+			event SumStats::cluster_threshold_crossed(ss$id, key, threshold_tracker[ss$id][key]);
+			}
+
+		delete done_with[uid];
+		delete key_requests[uid];
+		# Check that there is an outstanding view before subtracting.
+		if ( outstanding_global_views[ssid] > 0 )
+			--outstanding_global_views[ssid];
+		}
+	}
+
+# Managers handle intermediate updates here.
+event SumStats::cluster_key_intermediate_response(ssid: string, key: Key)
+	{
+	#print fmt("MANAGER: receiving intermediate key data from %s", get_event_peer()$descr);
+	#print fmt("MANAGER: requesting key data for %s", key2str(key));
+
+	if ( ssid in outstanding_global_views &&
+	     |outstanding_global_views[ssid]| > max_outstanding_global_views )
+		{
+		# Don't do this intermediate update.  Perhaps at some point in the future
+		# we will queue and randomly select from these ignored intermediate
+		# update requests.
+		return;
+		}
+
+	++outstanding_global_views[ssid];
+
+	local uid = unique_id("");
+	event SumStats::cluster_key_request(uid, ssid, key);
+	}
+
+event SumStats::cluster_ss_response(uid: string, ssid: string, data: ResultTable, done: bool)
+	{
+	#print fmt("MANAGER: receiving results from %s", get_event_peer()$descr);
+
+	# Mark another worker as being "done" for this uid.
+	if ( done )
+		++done_with[uid];
+
+	local local_data = stats_results[uid];
+	local ss = stats_store[ssid];
+
+	for ( key in data )
+		{
+		if ( key in local_data )
+			local_data[key] = compose_results(local_data[key], data[key]);
+		else
+			local_data[key] = data[key];
+
+		# If a stat is done being collected, thresholds for each key
+		# need to be checked so we're doing it here to avoid doubly
+		# iterating over each key.
+		if ( Cluster::worker_count == done_with[uid] )
+			{
+			if ( check_thresholds(ss, key, local_data[key], 1.0) )
+				{
+				threshold_crossed(ss, key, local_data[key]);
+				event SumStats::cluster_threshold_crossed(ss$id, key, threshold_tracker[ss$id][key]);
+				}
+			}
+		}
+
+	# If the data has been collected from all peers, we are done and ready to finish.
+	if ( Cluster::worker_count == done_with[uid] )
+		{
+		if ( ss?$epoch_finished )
+			ss$epoch_finished(local_data);
+
+		# Clean up
+		delete stats_results[uid];
+		delete done_with[uid];
+		# Not sure I need to reset the sumstat on the manager.
+		reset(ss);
+		}
+	}
+
+event remote_connection_handshake_done(p: event_peer) &priority=5
+	{
+	send_id(p, "SumStats::stats_store");
+	send_id(p, "SumStats::reducer_store");
+	}
+@endif
--- a/scripts/base/frameworks/sumstats/main.bro
+++ b/scripts/base/frameworks/sumstats/main.bro
@ -0,0 +1,436 @@
+##! The summary statistics framework provides a way to
+##! summarize large streams of data into simple reduced
+##! measurements.
+
+module SumStats;
+
+export {
+	## The various calculations are all defined as plugins.
+	type Calculation: enum {
+		PLACEHOLDER
+	};
+
+	## Represents a thing which is having summarization
+	## results collected for it.
+	type Key: record {
+		## A non-address related summarization or a sub-key for
+		## an address based summarization. An example might be
+		## successful SSH connections by client IP address
+		## where the client string would be the key value.
+		## Another example might be number of HTTP requests to
+		## a particular value in a Host header.  This is an
+		## example of a non-host based metric since multiple
+		## IP addresses could respond for the same Host
+		## header value.
+		str:  string &optional;
+
+		## Host is the value to which this metric applies.
+		host: addr &optional;
+	};
+
+	## Represents data being added for a single observation.
+	## Only supply a single field at a time!
+	type Observation: record {
+		## Count value.
+		num:  count  &optional;
+		## Double value.
+		dbl:  double &optional;
+		## String value.
+		str:  string &optional;
+	};
+
+	type Reducer: record {
+		## Observation stream identifier for the reducer
+		## to attach to.
+		stream:         string;
+
+		## The calculations to perform on the data points.
+		apply:          set[Calculation];
+
+		## A predicate so that you can decide per key if you
+		## would like to accept the data being inserted.
+		pred:           function(key: SumStats::Key, obs: SumStats::Observation): bool &optional;
+
+		## A function to normalize the key.  This can be used to aggregate or
+		## normalize the entire key.
+		normalize_key:  function(key: SumStats::Key): Key &optional;
+	};
+
+	## Value calculated for an observation stream fed into a reducer.
+	## Most of the fields are added by plugins.
+	type ResultVal: record {
+		## The time when the first observation was added to
+		## this result value.
+		begin:  time;
+
+		## The time when the last observation was added to
+		## this result value.
+		end:    time;
+
+		## The number of observations received.
+		num:    count &default=0;
+	};
+
+	## Type to store results for multiple reducers.
+	type Result: table[string] of ResultVal;
+
+	## Type to store a table of sumstats results indexed
+	## by keys.
+	type ResultTable: table[Key] of Result;
+
+	## SumStats represent an aggregation of reducers along with
+	## mechanisms to handle various situations like the epoch ending
+	## or thresholds being crossed.
+	##
+	## It's best to not access any global state outside
+	## of the variables given to the callbacks because there
+	## is no assurance provided as to where the callbacks
+	## will be executed on clusters.
+	type SumStat: record {
+		## The interval at which this filter should be "broken"
+		## and the '$epoch_finished' callback called.  The
+		## results are also reset at this time so any threshold
+		## based detection needs to be set to a
+		## value that should be expected to happen within
+		## this epoch.
+		epoch:              interval;
+
+		## The reducers for the SumStat
+		reducers:           set[Reducer];
+
+		## Provide a function to calculate a value from the
+		## :bro:see:`SumStats::Result` structure which will be used
+		## for thresholding.
+		## This is required if a $threshold value is given.
+		threshold_val:      function(key: SumStats::Key, result: SumStats::Result): count &optional;
+
+		## The threshold value for calling the
+		## $threshold_crossed callback.
+		threshold:          count             &optional;
+
+		## A series of thresholds for calling the
+		## $threshold_crossed callback.
+		threshold_series:   vector of count   &optional;
+
+		## A callback that is called when a threshold is crossed.
+		threshold_crossed:  function(key: SumStats::Key, result: SumStats::Result) &optional;
+
+		## A callback with the full collection of Results for
+		## this SumStat.
+		epoch_finished:    function(rt: SumStats::ResultTable) &optional;
+	};
+
+	## Create a summary statistic.
+	global create: function(ss: SumStats::SumStat);
+
+	## Add data into an observation stream. This should be
+	## called when a script has measured some point value.
+	##
+	## id: The observation stream identifier that the data
+	##     point represents.
+	##
+	## key: The key that the value is related to.
+	##
+	## obs: The data point to send into the stream.
+	global observe: function(id: string, key: SumStats::Key, obs: SumStats::Observation);
+
+	## This record is primarily used for internal threshold tracking.
+	type Thresholding: record {
+		# Internal use only.  Indicates if a simple threshold was already crossed.
+		is_threshold_crossed: bool &default=F;
+
+		# Internal use only.  Current key for threshold series.
+		threshold_series_index: count &default=0;
+	};
+
+	## This event is generated when thresholds are reset for a SumStat.
+	##
+	## ssid: SumStats ID that thresholds were reset for.
+	global thresholds_reset: event(ssid: string);
+
+	## Helper function to represent a :bro:type:`SumStats::Key` value as
+	## a simple string.
+	##
+	## key: The metric key that is to be converted into a string.
+	##
+	## Returns: A string representation of the metric key.
+	global key2str: function(key: SumStats::Key): string;
+}
+
+redef record Reducer += {
+	# Internal use only.  Provides a reference back to the related SumStats by it's ID.
+	sid: string &optional;
+};
+
+# Internal use only.  For tracking thresholds per sumstat and key.
+global threshold_tracker: table[string] of table[Key] of Thresholding &optional;
+
+redef record SumStat += {
+	# Internal use only (mostly for cluster coherency).
+	id: string &optional;
+};
+
+# Store of sumstats indexed on the sumstat id.
+global stats_store: table[string] of SumStat = table();
+
+# Store of reducers indexed on the data point stream id.
+global reducer_store: table[string] of set[Reducer] = table();
+
+# Store of results indexed on the measurement id.
+global result_store: table[string] of ResultTable = table();
+
+# Store of threshold information.
+global thresholds_store: table[string, Key] of bool = table();
+
+# This is called whenever key values are updated and the new val is given as the
+# `val` argument. It's only prototyped here because cluster and non-cluster have
+# separate  implementations.
+global data_added: function(ss: SumStat, key: Key, result: Result);
+
+# Prototype the hook point for plugins to do calculations.
+global observe_hook: hook(r: Reducer, val: double, data: Observation, rv: ResultVal);
+
+# Prototype the hook point for plugins to initialize any result values.
+global init_resultval_hook: hook(r: Reducer, rv: ResultVal);
+
+# Prototype the hook point for plugins to merge Results.
+global compose_resultvals_hook: hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal);
+
+# Event that is used to "finish" measurements and adapt the measurement
+# framework for clustered or non-clustered usage.
+global finish_epoch: event(ss: SumStat);
+
+function key2str(key: Key): string
+	{
+	local out = "";
+	if ( key?$host )
+		out = fmt("%shost=%s", out, key$host);
+	if ( key?$str )
+		out = fmt("%s%sstr=%s", out, |out|==0 ? "" : ", ", key$str);
+	return fmt("sumstats_key(%s)", out);
+	}
+
+function init_resultval(r: Reducer): ResultVal
+	{
+	local rv: ResultVal = [$begin=network_time(), $end=network_time()];
+	hook init_resultval_hook(r, rv);
+	return rv;
+	}
+
+function compose_resultvals(rv1: ResultVal, rv2: ResultVal): ResultVal
+	{
+	local result: ResultVal;
+
+	result$begin = (rv1$begin < rv2$begin) ? rv1$begin : rv2$begin;
+	result$end = (rv1$end > rv2$end) ? rv1$end : rv2$end;
+	result$num = rv1$num + rv2$num;
+
+	# Run the plugin composition hooks.
+	hook compose_resultvals_hook(result, rv1, rv2);
+	return result;
+	}
+
+function compose_results(r1: Result, r2: Result): Result
+	{
+	local result: Result = table();
+
+	if ( |r1| > |r2| )
+		{
+		for ( data_id in r1 )
+			{
+			if ( data_id in r2 )
+				result[data_id] = compose_resultvals(r1[data_id], r2[data_id]);
+			else
+				result[data_id] = r1[data_id];
+			}
+		}
+	else
+		{
+		for ( data_id in r2 )
+			{
+			if ( data_id in r1 )
+				result[data_id] = compose_resultvals(r1[data_id], r2[data_id]);
+			else
+				result[data_id] = r2[data_id];
+			}
+		}
+
+	return result;
+	}
+
+
+function reset(ss: SumStat)
+	{
+	if ( ss$id in result_store )
+		delete result_store[ss$id];
+
+	result_store[ss$id] = table();
+
+	if ( ss?$threshold || ss?$threshold_series )
+		{
+		threshold_tracker[ss$id] = table();
+		event SumStats::thresholds_reset(ss$id);
+		}
+	}
+
+function create(ss: SumStat)
+	{
+	if ( (ss?$threshold || ss?$threshold_series) && ! ss?$threshold_val )
+		{
+		Reporter::error("SumStats given a threshold with no $threshold_val function");
+		}
+
+	if ( ! ss?$id )
+		ss$id=unique_id("");
+	threshold_tracker[ss$id] = table();
+	stats_store[ss$id] = ss;
+
+	for ( reducer in ss$reducers )
+		{
+		reducer$sid = ss$id;
+		if ( reducer$stream !in reducer_store )
+			reducer_store[reducer$stream] = set();
+		add reducer_store[reducer$stream][reducer];
+		}
+
+	reset(ss);
+	schedule ss$epoch { SumStats::finish_epoch(ss) };
+	}
+
+function observe(id: string, key: Key, obs: Observation)
+	{
+	if ( id !in reducer_store )
+		return;
+
+	# Try to add the data to all of the defined reducers.
+	for ( r in reducer_store[id] )
+		{
+		if ( r?$normalize_key )
+			key = r$normalize_key(copy(key));
+
+		# If this reducer has a predicate, run the predicate
+		# and skip this key if the predicate return false.
+		if ( r?$pred && ! r$pred(key, obs) )
+			next;
+
+		local ss = stats_store[r$sid];
+
+		# If there is a threshold and no epoch_finished callback
+		# we don't need to continue counting since the data will
+		# never be accessed.  This was leading
+		# to some state management issues when measuring
+		# uniqueness.
+		# NOTE: this optimization could need removed in the
+		#       future if on demand access is provided to the
+		#       SumStats results.
+		if ( ! ss?$epoch_finished &&
+		     r$sid in threshold_tracker &&
+		     key in threshold_tracker[r$sid] &&
+		     ( ss?$threshold &&
+		       threshold_tracker[r$sid][key]$is_threshold_crossed ) ||
+		     ( ss?$threshold_series &&
+		       threshold_tracker[r$sid][key]$threshold_series_index+1 == |ss$threshold_series| ) )
+			next;
+
+		if ( r$sid !in result_store )
+			result_store[ss$id] = table();
+		local results = result_store[r$sid];
+
+		if ( key !in results )
+			results[key] = table();
+		local result = results[key];
+
+		if ( id !in result )
+			result[id] = init_resultval(r);
+		local result_val = result[id];
+
+		++result_val$num;
+		# Continually update the $end field.
+		result_val$end=network_time();
+
+		# If a string was given, fall back to 1.0 as the value.
+		local val = 1.0;
+		if ( obs?$num || obs?$dbl )
+			val = obs?$dbl ? obs$dbl : obs$num;
+
+		hook observe_hook(r, val, obs, result_val);
+		data_added(ss, key, result);
+		}
+	}
+
+# This function checks if a threshold has been crossed.  It is also used as a method to implement
+# mid-break-interval threshold crossing detection for cluster deployments.
+function check_thresholds(ss: SumStat, key: Key, result: Result, modify_pct: double): bool
+	{
+	if ( ! (ss?$threshold || ss?$threshold_series) )
+		return F;
+
+	# Add in the extra ResultVals to make threshold_vals easier to write.
+	if ( |ss$reducers| != |result| )
+		{
+		for ( reducer in ss$reducers )
+			{
+			if ( reducer$stream !in result )
+				result[reducer$stream] = init_resultval(reducer);
+			}
+		}
+
+	local watch = ss$threshold_val(key, result);
+
+	if ( modify_pct < 1.0 && modify_pct > 0.0 )
+		watch = double_to_count(floor(watch/modify_pct));
+
+	if ( ss$id !in threshold_tracker )
+		threshold_tracker[ss$id] = table();
+	local t_tracker = threshold_tracker[ss$id];
+
+	if ( key !in t_tracker )
+		{
+		local ttmp: Thresholding;
+		t_tracker[key] = ttmp;
+		}
+	local tt = t_tracker[key];
+
+	if ( ss?$threshold && ! tt$is_threshold_crossed && watch >= ss$threshold )
+		{
+		# Value crossed the threshold.
+		return T;
+		}
+
+	if ( ss?$threshold_series &&
+	     |ss$threshold_series| >= tt$threshold_series_index &&
+	     watch >= ss$threshold_series[tt$threshold_series_index] )
+		{
+		# A threshold series was given and the value crossed the next
+		# value in the series.
+		return T;
+		}
+
+	return F;
+	}
+
+function threshold_crossed(ss: SumStat, key: Key, result: Result)
+	{
+	# If there is no callback, there is no point in any of this.
+	if ( ! ss?$threshold_crossed )
+		return;
+
+	# Add in the extra ResultVals to make threshold_crossed callbacks easier to write.
+	if ( |ss$reducers| != |result| )
+		{
+		for ( reducer in ss$reducers )
+			{
+			if ( reducer$stream !in result )
+				result[reducer$stream] = init_resultval(reducer);
+			}
+		}
+
+	ss$threshold_crossed(key, result);
+	local tt = threshold_tracker[ss$id][key];
+	tt$is_threshold_crossed = T;
+
+	# Bump up to the next threshold series index if a threshold series is being used.
+	if ( ss?$threshold_series )
+		++tt$threshold_series_index;
+	}
+
--- a/scripts/base/frameworks/sumstats/non-cluster.bro
+++ b/scripts/base/frameworks/sumstats/non-cluster.bro
@ -0,0 +1,24 @@
+@load ./main
+
+module SumStats;
+
+event SumStats::finish_epoch(ss: SumStat)
+	{
+	if ( ss$id in result_store )
+		{
+		local data = result_store[ss$id];
+		if ( ss?$epoch_finished )
+			ss$epoch_finished(data);
+
+		reset(ss);
+		}
+
+	schedule ss$epoch { SumStats::finish_epoch(ss) };
+	}
+
+
+function data_added(ss: SumStat, key: Key, result: Result)
+	{
+	if ( check_thresholds(ss, key, result, 1.0) )
+		threshold_crossed(ss, key, result);
+	}
--- a/scripts/base/frameworks/sumstats/plugins/load.bro
+++ b/scripts/base/frameworks/sumstats/plugins/load.bro
@ -0,0 +1,9 @@
+@load ./average
+@load ./last
+@load ./max
+@load ./min
+@load ./sample
+@load ./std-dev
+@load ./sum
+@load ./unique
+@load ./variance
--- a/scripts/base/frameworks/sumstats/plugins/average.bro
+++ b/scripts/base/frameworks/sumstats/plugins/average.bro
@ -0,0 +1,36 @@
+@load base/frameworks/sumstats/main
+
+module SumStats;
+
+export {
+	redef enum Calculation += {
+		## Calculate the average of the values.
+		AVERAGE
+	};
+
+	redef record ResultVal += {
+		## For numeric data, this calculates the average of all values.
+		average: double &optional;
+	};
+}
+
+hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
+	{
+	if ( AVERAGE in r$apply )
+		{
+		if ( ! rv?$average )
+			rv$average = val;
+		else
+			rv$average += (val - rv$average) / rv$num;
+		}
+	}
+
+hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
+	{
+	if ( rv1?$average && rv2?$average )
+		result$average = ((rv1$average*rv1$num) + (rv2$average*rv2$num))/(rv1$num+rv2$num);
+	else if ( rv1?$average )
+		result$average = rv1$average;
+	else if ( rv2?$average )
+		result$average = rv2$average;
+	}
--- a/scripts/base/frameworks/sumstats/plugins/last.bro
+++ b/scripts/base/frameworks/sumstats/plugins/last.bro
@ -0,0 +1,55 @@
+@load base/frameworks/sumstats
+@load base/utils/queue
+
+module SumStats;
+
+export {
+	redef enum Calculation += {
+		## Keep last X observations in a queue
+		LAST
+	};
+
+	redef record Reducer += {
+		## number of elements to keep.
+		num_last_elements: count &default=0;
+	};
+
+	redef record ResultVal += {
+		## This is the queue where elements are maintained.  Use the
+		## :bro:see:`SumStats::get_last` function to get a vector of
+		## the current element values.
+		last_elements: Queue::Queue &optional;
+	};
+
+	## Get a vector of element values from a ResultVal.
+	global get_last: function(rv: ResultVal): vector of Observation;
+}
+
+function get_last(rv: ResultVal): vector of Observation
+	{
+	local s: vector of Observation = vector();
+	if ( rv?$last_elements )
+		Queue::get_vector(rv$last_elements, s);
+	return s;
+	}
+
+hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
+	{
+	if ( LAST in r$apply && r$num_last_elements > 0 )
+		{
+		if ( ! rv?$last_elements )
+			rv$last_elements = Queue::init([$max_len=r$num_last_elements]);
+		Queue::put(rv$last_elements, obs);
+		}
+	}
+
+hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
+	{
+	# Merge $samples
+	if ( rv1?$last_elements && rv2?$last_elements )
+		result$last_elements = Queue::merge(rv1$last_elements, rv2$last_elements);
+	else if ( rv1?$last_elements )
+		result$last_elements = rv1$last_elements;
+	else if ( rv2?$last_elements )
+		result$last_elements = rv2$last_elements;
+	}
--- a/scripts/base/frameworks/sumstats/plugins/max.bro
+++ b/scripts/base/frameworks/sumstats/plugins/max.bro
@ -0,0 +1,38 @@
+@load base/frameworks/sumstats/main
+
+module SumStats;
+
+export {
+	redef enum Calculation += {
+		## Find the maximum value.
+		MAX
+	};
+
+	redef record ResultVal += {
+		## For numeric data, this tracks the maximum value given.
+		max: double &optional;
+	};
+}
+
+hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
+	{
+	if ( MAX in r$apply )
+		{
+		if ( ! rv?$max )
+			rv$max = val;
+		else if ( val > rv$max )
+			rv$max = val;
+		}
+	}
+
+hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
+	{
+	if ( rv1?$max && rv2?$max )
+		result$max = (rv1$max > rv2$max) ? rv1$max : rv2$max;
+	else if ( rv1?$max )
+		result$max = rv1$max;
+	else if ( rv2?$max )
+		result$max = rv2$max;
+	}
+
+
--- a/scripts/base/frameworks/sumstats/plugins/min.bro
+++ b/scripts/base/frameworks/sumstats/plugins/min.bro
@ -0,0 +1,36 @@
+@load base/frameworks/sumstats/main
+
+module SumStats;
+
+export {
+	redef enum Calculation += {
+		## Find the minimum value.
+		MIN
+	};
+
+	redef record ResultVal += {
+		## For numeric data, this tracks the minimum value given.
+		min: double &optional;
+	};
+}
+
+hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
+	{
+	if ( MIN in r$apply )
+		{
+		if ( ! rv?$min )
+			rv$min = val;
+		else if ( val < rv$min )
+			rv$min = val;
+		}
+	}
+
+hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
+	{
+	if ( rv1?$min && rv2?$min )
+		result$min = (rv1$min < rv2$min) ? rv1$min : rv2$min;
+	else if ( rv1?$min )
+		result$min = rv1$min;
+	else if ( rv2?$min )
+		result$min = rv2$min;
+	}
--- a/scripts/base/frameworks/sumstats/plugins/sample.bro
+++ b/scripts/base/frameworks/sumstats/plugins/sample.bro
@ -0,0 +1,120 @@
+@load base/frameworks/sumstats/main
+
+module SumStats;
+
+export {
+	redef enum Calculation += {
+		## Get uniquely distributed random samples from the observation stream.
+		SAMPLE
+	};
+
+	redef record Reducer += {
+		## A number of sample Observations to collect.
+		num_samples: count &default=0;
+	};
+
+	redef record ResultVal += {
+		## This is the vector in which the samples are maintained.
+		samples: vector of Observation &default=vector();
+
+		## Number of total observed elements.
+		sample_elements: count &default=0;
+	};
+}
+
+redef record ResultVal += {
+	# Internal use only.  This is not meant to be publically available
+	# and just a copy of num_samples from the Reducer. Needed for availability
+	# in the compose hook.
+	num_samples: count &default=0;
+};
+
+hook init_resultval_hook(r: Reducer, rv: ResultVal)
+	{
+	if ( SAMPLE in r$apply )
+		rv$num_samples = r$num_samples;
+	}
+
+function sample_add_sample(obs:Observation, rv: ResultVal)
+	{
+	++rv$sample_elements;
+
+	if ( |rv$samples| < rv$num_samples )
+		rv$samples[|rv$samples|] = obs;
+	else
+		{
+		local ra = rand(rv$sample_elements);
+		if ( ra < rv$num_samples )
+			rv$samples[ra] = obs;
+		}
+
+	}
+
+hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
+	{
+	if ( SAMPLE in r$apply )
+		{
+		sample_add_sample(obs, rv);
+		}
+	}
+
+hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
+	{
+	if ( rv1$num_samples != rv2$num_samples )
+		{
+		Reporter::error("Merging sample sets with differing sizes is not supported");
+		return;
+		}
+
+	local num_samples = rv1$num_samples;
+	result$num_samples = num_samples;
+
+	if ( |rv1$samples| > num_samples || |rv2$samples| > num_samples )
+		{
+		Reporter::error("Sample vector with too many elements. Aborting.");
+		return;
+		}
+
+
+	if ( |rv1$samples| != num_samples && |rv2$samples| < num_samples )
+		{
+		if ( |rv1$samples| != rv1$sample_elements || |rv2$samples| < rv2$sample_elements )
+			{
+			Reporter::error("Mismatch in sample element size and tracking. Aborting merge");
+			return;
+			}
+
+		for ( i in rv1$samples )
+			sample_add_sample(rv1$samples[i], result);
+
+		for ( i in rv2$samples)
+			sample_add_sample(rv2$samples[i], result);
+		}
+	else
+		{
+		local other_vector: vector of Observation;
+		local othercount: count;
+		
+		if ( rv1$sample_elements > rv2$sample_elements )
+			{
+			result$samples = copy(rv1$samples);
+			other_vector = rv2$samples;
+			othercount = rv2$sample_elements;
+			}
+		else
+			{
+			result$samples = copy(rv2$samples);
+			other_vector = rv1$samples;
+			othercount = rv1$sample_elements;
+			}
+
+		local totalcount = rv1$sample_elements + rv2$sample_elements;
+		result$sample_elements = totalcount;
+
+		for ( i in other_vector )
+			{
+			if ( rand(totalcount) <= othercount )
+				result$samples[i] = other_vector[i];
+			}
+		}
+	}
--- a/scripts/base/frameworks/sumstats/plugins/std-dev.bro
+++ b/scripts/base/frameworks/sumstats/plugins/std-dev.bro
@ -0,0 +1,34 @@
+@load base/frameworks/sumstats/main
+@load ./variance
+
+module SumStats;
+
+export {
+	redef enum Calculation += {
+		## Find the standard deviation of the values.
+		STD_DEV
+	};
+
+	redef record ResultVal += {
+		## For numeric data, this calculates the standard deviation.
+		std_dev: double &default=0.0;
+	};
+}
+
+function calc_std_dev(rv: ResultVal)
+	{
+	if ( rv?$variance )
+		rv$std_dev = sqrt(rv$variance);
+	}
+
+# This depends on the variance plugin which uses priority -5
+hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal) &priority=-10
+	{
+	if ( STD_DEV in r$apply )
+		calc_std_dev(rv);
+	}
+
+hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal) &priority=-10
+	{
+	calc_std_dev(result);
+	}
--- a/scripts/base/frameworks/sumstats/plugins/sum.bro
+++ b/scripts/base/frameworks/sumstats/plugins/sum.bro
@ -0,0 +1,51 @@
+@load base/frameworks/sumstats/main
+
+module SumStats;
+
+export {
+	redef enum Calculation += {
+		## Sums the values given.  For string values,
+		## this will be the number of strings given.
+		SUM
+	};
+
+	redef record ResultVal += {
+		## For numeric data, this tracks the sum of all values.
+		sum: double &default=0.0;
+	};
+
+	type threshold_function: function(key: SumStats::Key, result: SumStats::Result): count;
+	global sum_threshold: function(data_id: string): threshold_function;
+}
+
+function sum_threshold(data_id: string): threshold_function
+	{
+	return function(key: SumStats::Key, result: SumStats::Result): count
+		{
+		print fmt("data_id: %s", data_id);
+		print result;
+		return double_to_count(result[data_id]$sum);
+		};
+	}
+
+hook init_resultval_hook(r: Reducer, rv: ResultVal)
+	{
+	if ( SUM in r$apply && ! rv?$sum )
+		rv$sum = 0;
+	}
+
+hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
+	{
+	if ( SUM in r$apply )
+		rv$sum += val;
+	}
+
+hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
+	{
+	if ( rv1?$sum || rv2?$sum )
+		{
+		result$sum = rv1?$sum ? rv1$sum : 0;
+		if ( rv2?$sum )
+			result$sum += rv2$sum;
+		}
+	}
--- a/scripts/base/frameworks/sumstats/plugins/unique.bro
+++ b/scripts/base/frameworks/sumstats/plugins/unique.bro
@ -0,0 +1,53 @@
+@load base/frameworks/sumstats/main
+
+module SumStats;
+
+export {
+	redef enum Calculation += {
+		## Calculate the number of unique values.
+		UNIQUE
+	};
+
+	redef record ResultVal += {
+		## If cardinality is being tracked, the number of unique
+		## items is tracked here.
+		unique: count &default=0;
+	};
+}
+
+redef record ResultVal += {
+	# Internal use only.  This is not meant to be publically available
+	# because we don't want to trust that we can inspect the values
+	# since we will like move to a probalistic data structure in the future.
+	# TODO: in the future this will optionally be a hyperloglog structure
+	unique_vals: set[Observation] &optional;
+};
+
+hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
+	{
+	if ( UNIQUE in r$apply )
+		{
+		if ( ! rv?$unique_vals )
+			rv$unique_vals=set();
+		add rv$unique_vals[obs];
+		rv$unique = |rv$unique_vals|;
+		}
+	}
+
+hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
+	{
+	if ( rv1?$unique_vals || rv2?$unique_vals )
+		{
+		if ( rv1?$unique_vals )
+			result$unique_vals = copy(rv1$unique_vals);
+
+		if ( rv2?$unique_vals )
+			if ( ! result?$unique_vals )
+				result$unique_vals = copy(rv2$unique_vals);
+			else
+				for ( val2 in rv2$unique_vals )
+					add result$unique_vals[copy(val2)];
+
+		result$unique = |result$unique_vals|;
+		}
+	}
--- a/scripts/base/frameworks/sumstats/plugins/variance.bro
+++ b/scripts/base/frameworks/sumstats/plugins/variance.bro
@ -0,0 +1,69 @@
+@load base/frameworks/sumstats/main
+@load ./average
+
+module SumStats;
+
+export {
+	redef enum Calculation += {
+		## Find the variance of the values.
+		VARIANCE
+	};
+
+	redef record ResultVal += {
+		## For numeric data, this calculates the variance.
+		variance: double &optional;
+	};
+}
+
+redef record ResultVal += {
+	# Internal use only.  Used for incrementally calculating variance.
+	prev_avg: double &optional;
+
+	# Internal use only.  For calculating incremental variance.
+	var_s: double &default=0.0;
+};
+
+function calc_variance(rv: ResultVal)
+	{
+	rv$variance = (rv$num > 1) ? rv$var_s/(rv$num-1) : 0.0;
+	}
+
+# Reduced priority since this depends on the average
+hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal) &priority=-5
+	{
+	if ( VARIANCE in r$apply )
+		{
+		if ( rv$num > 1 )
+			rv$var_s += ((val - rv$prev_avg) * (val - rv$average));
+
+		calc_variance(rv);
+		rv$prev_avg = rv$average;
+		}
+	}
+
+# Reduced priority since this depends on the average
+hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal) &priority=-5
+	{
+	if ( rv1?$var_s && rv1?$average &&
+	     rv2?$var_s && rv2?$average )
+		{
+		local rv1_avg_sq = (rv1$average - result$average);
+		rv1_avg_sq = rv1_avg_sq*rv1_avg_sq;
+		local rv2_avg_sq = (rv2$average - result$average);
+		rv2_avg_sq = rv2_avg_sq*rv2_avg_sq;
+		result$var_s = rv1$num*(rv1$var_s/rv1$num + rv1_avg_sq) + rv2$num*(rv2$var_s/rv2$num + rv2_avg_sq);
+		}
+	else if ( rv1?$var_s )
+		result$var_s = rv1$var_s;
+	else if ( rv2?$var_s )
+		result$var_s = rv2$var_s;
+
+	if ( rv1?$prev_avg && rv2?$prev_avg )
+		result$prev_avg = ((rv1$prev_avg*rv1$num) + (rv2$prev_avg*rv2$num))/(rv1$num+rv2$num);
+	else if ( rv1?$prev_avg )
+		result$prev_avg = rv1$prev_avg;
+	else if ( rv2?$prev_avg )
+		result$prev_avg = rv2$prev_avg;
+
+	calc_variance(result);
+	}
--- a/scripts/base/init-bare.bro
+++ b/scripts/base/init-bare.bro
@ -300,7 +300,7 @@ type connection: record {
 	## one protocol analyzer is able to parse the same data. If so, all will
 	## be recorded. Also note that the recorced services are independent of any
 	## transport-level protocols.
-        service: set[string];
+	service: set[string];
 	addl: string;	##< Deprecated.
 	hot: count;	##< Deprecated.
 	history: string;	##< State history of connections. See *history* in :bro:see:`Conn::Info`.
@ -316,6 +316,73 @@ type connection: record {
 	tunnel: EncapsulatingConnVector &optional;
 };

+## Default amount of time a file can be inactive before the file analysis
+## gives up and discards any internal state related to the file.
+const default_file_timeout_interval: interval = 2 mins &redef;
+
+## Default amount of bytes that file analysis will buffer before raising
+## :bro:see:`file_new`.
+const default_file_bof_buffer_size: count = 1024 &redef;
+
+## A file that Bro is analyzing.  This is Bro's type for describing the basic
+## internal metadata collected about a "file", which is essentially just a
+## byte stream that is e.g. pulled from a network connection or possibly
+## some other input source.
+type fa_file: record {
+	## An identifier associated with a single file.
+	id: string;
+
+	## Identifier associated with a container file from which this one was
+	## extracted as part of the file analysis.
+	parent_id: string &optional;
+
+	## An identification of the source of the file data.  E.g. it may be
+	## a network protocol over which it was transferred, or a local file
+	## path which was read, or some other input source.
+	source: string &optional;
+
+	## If the source of this file is is a network connection, this field
+	## may be set to indicate the directionality.
+	is_orig: bool &optional;
+
+	## The set of connections over which the file was transferred.
+	conns: table[conn_id] of connection &optional;
+
+	## The time at which the last activity for the file was seen.
+	last_active: time;
+
+	## Number of bytes provided to the file analysis engine for the file.
+	seen_bytes: count &default=0;
+
+	## Total number of bytes that are supposed to comprise the full file.
+	total_bytes: count &optional;
+
+	## The number of bytes in the file stream that were completely missed
+	## during the process of analysis e.g. due to dropped packets.
+	missing_bytes: count &default=0;
+
+	## The number of not all-in-sequence bytes in the file stream that
+	## were delivered to file analyzers due to reassembly buffer overflow.
+	overflow_bytes: count &default=0;
+
+	## The amount of time between receiving new data for this file that
+	## the analysis engine will wait before giving up on it.
+	timeout_interval: interval &default=default_file_timeout_interval;
+
+	## The number of bytes at the beginning of a file to save for later
+	## inspection in *bof_buffer* field.
+	bof_buffer_size: count &default=default_file_bof_buffer_size;
+
+	## The content of the beginning of a file up to *bof_buffer_size* bytes.
+	## This is also the buffer that's used for file/mime type detection.
+	bof_buffer: string &optional;
+
+	## A mime type provided by libmagic against the *bof_buffer*, or
+	## in the cases where no buffering of the beginning of file occurs,
+	## an initial guess of the mime type based on the first data seen.
+	mime_type: string &optional;
+} &redef;
+
 ## Fields of a SYN packet.
 ##
 ## .. bro:see:: connection_SYN_packet
@ -3034,3 +3101,4 @@ const snaplen = 8192 &redef;

@load base/frameworks/input

+@load base/frameworks/file-analysis
--- a/scripts/base/init-default.bro
+++ b/scripts/base/init-default.bro
@ -12,8 +12,10 @@
@load base/utils/numbers
@load base/utils/paths
@load base/utils/patterns
+@load base/utils/queue
@load base/utils/strings
@load base/utils/thresholds
+@load base/utils/time
@load base/utils/urls

 # This has some deep interplay between types and BiFs so it's 
@ -27,9 +29,9 @@
@load base/frameworks/communication
@load base/frameworks/control
@load base/frameworks/cluster
-@load base/frameworks/metrics
@load base/frameworks/intel
@load base/frameworks/reporter
+@load base/frameworks/sumstats
@load base/frameworks/tunnels

@load base/protocols/conn
--- a/scripts/base/protocols/dns/main.bro
+++ b/scripts/base/protocols/dns/main.bro
@ -1,6 +1,7 @@
 ##! Base DNS analysis script which tracks and logs DNS queries along with
 ##! their responses.

+@load base/utils/queue
@load ./consts

 module DNS;
@ -73,19 +74,6 @@ export {
 		total_replies: count           &optional;
 	};

-	## A record type which tracks the status of DNS queries for a given
-	## :bro:type:`connection`.
-	type State: record {
-		## Indexed by query id, returns Info record corresponding to
-		## query/response which haven't completed yet.
-		pending: table[count] of Info &optional;
-
-		## This is the list of DNS responses that have completed based on the
-		## number of responses declared and the number received.  The contents
-		## of the set are transaction IDs.
-		finished_answers: set[count] &optional;
-	};
-
 	## An event that can be handled to access the :bro:type:`DNS::Info`
 	## record as it is sent to the logging framework.
 	global log_dns: event(rec: Info);
@ -102,8 +90,33 @@ export {
 	##
 	## reply: The specific response information according to RR type/class.
 	global do_reply: event(c: connection, msg: dns_msg, ans: dns_answer, reply: string);
+
+	## A hook that is called whenever a session is being set.
+	## This can be used if additional initialization logic needs to happen
+	## when creating a new session value.
+	##
+	## c: The connection involved in the new session
+	## 
+	## msg: The DNS message header information.
+	##
+	## is_query: Indicator for if this is being called for a query or a response.
+	global set_session: hook(c: connection, msg: dns_msg, is_query: bool);
+
+	## A record type which tracks the status of DNS queries for a given
+	## :bro:type:`connection`.
+	type State: record {
+		## Indexed by query id, returns Info record corresponding to
+		## query/response which haven't completed yet.
+		pending: table[count] of Queue::Queue;
+
+		## This is the list of DNS responses that have completed based on the
+		## number of responses declared and the number received.  The contents
+		## of the set are transaction IDs.
+		finished_answers: set[count];
+	};
 }

+
 redef record connection += {
 	dns:       Info  &optional;
 	dns_state: State &optional;
@ -134,14 +147,6 @@ event bro_init() &priority=5

 function new_session(c: connection, trans_id: count): Info
 	{
-	if ( ! c?$dns_state )
-		{
-		local state: State;
-		state$pending=table();
-		state$finished_answers=set();
-		c$dns_state = state;
-		}
-
 	local info: Info;
 	info$ts       = network_time();
 	info$id       = c$id;
@ -151,18 +156,37 @@ function new_session(c: connection, trans_id: count): Info
 	return info;
 	}

-function set_session(c: connection, msg: dns_msg, is_query: bool)
+hook set_session(c: connection, msg: dns_msg, is_query: bool) &priority=5
 	{
-	if ( ! c?$dns_state || msg$id !in c$dns_state$pending )
+	if ( ! c?$dns_state )
 		{
-		c$dns_state$pending[msg$id] = new_session(c, msg$id);
-		# Try deleting this transaction id from the set of finished answers.
-		# Sometimes hosts will reuse ports and transaction ids and this should
-		# be considered to be a legit scenario (although bad practice).
-		delete c$dns_state$finished_answers[msg$id];
+		local state: State;
+		c$dns_state = state;
 		}

-	c$dns = c$dns_state$pending[msg$id];
+	if ( msg$id !in c$dns_state$pending )
+		c$dns_state$pending[msg$id] = Queue::init();
+	
+	local info: Info;
+	# If this is either a query or this is the reply but
+	# no Info records are in the queue (we missed the query?)
+	# we need to create an Info record and put it in the queue.  
+	if ( is_query ||
+	     Queue::len(c$dns_state$pending[msg$id]) == 0 )
+		{
+		info = new_session(c, msg$id);
+		Queue::put(c$dns_state$pending[msg$id], info);
+		}
+
+	if ( is_query )
+		# If this is a query, assign the newly created info variable
+		# so that the world looks correct to anything else handling
+		# this query.
+		c$dns = info;
+	else
+		# Peek at the next item in the queue for this trans_id and 
+		# assign it to c$dns since this is a response.
+		c$dns = Queue::peek(c$dns_state$pending[msg$id]);

 	if ( ! is_query )
 		{
@ -190,7 +214,7 @@ function set_session(c: connection, msg: dns_msg, is_query: bool)

 event dns_message(c: connection, is_orig: bool, msg: dns_msg, len: count) &priority=5
 	{
-	set_session(c, msg, is_orig);
+	hook set_session(c, msg, is_orig);
 	}

 event DNS::do_reply(c: connection, msg: dns_msg, ans: dns_answer, reply: string) &priority=5
@ -200,9 +224,6 @@ event DNS::do_reply(c: connection, msg: dns_msg, ans: dns_answer, reply: string)
 		c$dns$AA    = msg$AA;
 		c$dns$RA    = msg$RA;

-		if ( msg$id in c$dns_state$finished_answers )
-			event conn_weird("dns_reply_seen_after_done", c, "");
-
 		if ( reply != "" )
 			{
 			if ( ! c$dns?$answers )
@ -217,7 +238,6 @@ event DNS::do_reply(c: connection, msg: dns_msg, ans: dns_answer, reply: string)
 		if ( c$dns?$answers && c$dns?$total_answers &&
 		     |c$dns$answers| == c$dns$total_answers )
 			{
-			add c$dns_state$finished_answers[c$dns$trans_id];
 			# Indicate this request/reply pair is ready to be logged.
 			c$dns$ready = T;
 			}
@ -230,7 +250,7 @@ event DNS::do_reply(c: connection, msg: dns_msg, ans: dns_answer, reply: string)
 		{
 		Log::write(DNS::LOG, c$dns);
 		# This record is logged and no longer pending.
-		delete c$dns_state$pending[c$dns$trans_id];
+		Queue::get(c$dns_state$pending[c$dns$trans_id]);
 		delete c$dns;
 		}
 	}
@ -243,15 +263,14 @@ event dns_request(c: connection, msg: dns_msg, query: string, qtype: count, qcla
 	c$dns$qclass_name = classes[qclass];
 	c$dns$qtype       = qtype;
 	c$dns$qtype_name  = query_types[qtype];
+	c$dns$Z           = msg$Z;

 	# Decode netbios name queries
 	# Note: I'm ignoring the name type for now.  Not sure if this should be
 	#       worked into the query/response in some fashion.
 	if ( c$id$resp_p == 137/udp )
 		query = decode_netbios_name(query);
-	c$dns$query    = query;
-
-	c$dns$Z = msg$Z;
+	c$dns$query = query;
 	}

 event dns_A_reply(c: connection, msg: dns_msg, ans: dns_answer, a: addr) &priority=5
@ -339,6 +358,13 @@ event connection_state_remove(c: connection) &priority=-5
 	# If Bro is expiring state, we should go ahead and log all unlogged
 	# request/response pairs now.
 	for ( trans_id in c$dns_state$pending )
-		Log::write(DNS::LOG, c$dns_state$pending[trans_id]);
+		{
+		local infos: vector of Info;
+		Queue::get_vector(c$dns_state$pending[trans_id], infos);
+		for ( i in infos )
+			{
+			Log::write(DNS::LOG, infos[i]);
+			}
+		}
 	}

--- a/scripts/base/protocols/ftp/load.bro
+++ b/scripts/base/protocols/ftp/load.bro
@ -1,4 +1,5 @@
@load ./utils-commands
@load ./main
+@load ./file-analysis
@load ./file-extract
@load ./gridftp
--- a/scripts/base/protocols/ftp/file-analysis.bro
+++ b/scripts/base/protocols/ftp/file-analysis.bro
@ -0,0 +1,47 @@
+@load ./main
+@load base/utils/conn-ids
+@load base/frameworks/file-analysis/main
+
+module FTP;
+
+export {
+	## Default file handle provider for FTP.
+	global get_file_handle: function(c: connection, is_orig: bool): string;
+}
+
+function get_handle_string(c: connection): string
+	{
+	return cat(ANALYZER_FTP_DATA, " ", c$start_time, " ", id_string(c$id));
+	}
+
+function get_file_handle(c: connection, is_orig: bool): string
+	{
+	if ( [c$id$resp_h, c$id$resp_p] !in ftp_data_expected ) return "";
+
+	local info: FTP::Info = ftp_data_expected[c$id$resp_h, c$id$resp_p];
+
+	if ( info$passive )
+		# FTP client initiates data channel.
+		if ( is_orig )
+			# Don't care about FTP client data.
+			return "";
+		else
+			# Do care about FTP server data.
+			return get_handle_string(c);
+	else
+		# FTP server initiates dta channel.
+		if ( is_orig )
+			# Do care about FTP server data.
+			return get_handle_string(c);
+		else
+			# Don't care about FTP client data.
+			return "";
+	}
+
+module GLOBAL;
+
+event get_file_handle(tag: AnalyzerTag, c: connection, is_orig: bool)
+	{
+	if ( tag != ANALYZER_FTP_DATA ) return;
+	set_file_handle(FTP::get_file_handle(c, is_orig));
+	}
--- a/scripts/base/protocols/ftp/file-extract.bro
+++ b/scripts/base/protocols/ftp/file-extract.bro
@ -13,53 +13,76 @@ export {
 	const extraction_prefix = "ftp-item" &redef;
 }

+global extract_count: count = 0;
+
 redef record Info += {
 	## On disk file where it was extracted to.
-	extraction_file:       file &log &optional;
+	extraction_file:       string &log &optional;
 	
 	## Indicates if the current command/response pair should attempt to 
 	## extract the file if a file was transferred.
 	extract_file:          bool &default=F;
-	
-	## Internal tracking of the total number of files extracted during this 
-	## session.
-	num_extracted_files:   count &default=0;
 };

-event file_transferred(c: connection, prefix: string, descr: string,
-			mime_type: string) &priority=3
+function get_extraction_name(f: fa_file): string
 	{
-	local id = c$id;
-	if ( [id$resp_h, id$resp_p] !in ftp_data_expected )
-		return;
-		
-	local s = ftp_data_expected[id$resp_h, id$resp_p];
+	local r = fmt("%s-%s-%d.dat", extraction_prefix, f$id, extract_count);
+	++extract_count;
+	return r;
+	}

-	if ( extract_file_types in s$mime_type )
+event file_new(f: fa_file) &priority=5
+	{
+	if ( ! f?$source ) return;
+	if ( f$source != "FTP_DATA" ) return;
+
+	if ( f?$mime_type && extract_file_types in f$mime_type )
 		{
-		s$extract_file = T;
-		++s$num_extracted_files;
+		FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
+		                           $extract_filename=get_extraction_name(f)]);
+		return;
+		}
+
+	if ( ! f?$conns ) return;
+
+	for ( cid in f$conns )
+		{
+		local c: connection = f$conns[cid];
+
+		if ( [cid$resp_h, cid$resp_p] !in ftp_data_expected ) next;
+
+		local s = ftp_data_expected[cid$resp_h, cid$resp_p];
+
+		if ( ! s$extract_file ) next;
+
+		FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
+		                           $extract_filename=get_extraction_name(f)]);
+		return;
 		}
 	}

-event file_transferred(c: connection, prefix: string, descr: string,
-			mime_type: string) &priority=-4
+event file_state_remove(f: fa_file) &priority=4
 	{
-	local id = c$id;
-	if ( [id$resp_h, id$resp_p] !in ftp_data_expected )
-		return;
-		
-	local s = ftp_data_expected[id$resp_h, id$resp_p];
-	
-	if ( s$extract_file )
+	if ( ! f?$source ) return;
+	if ( f$source != "FTP_DATA" ) return;
+	if ( ! f?$info ) return;
+
+	for ( filename in f$info$extracted_files )
 		{
-		local suffix = fmt("%d.dat", s$num_extracted_files);
-		local fname = generate_extraction_filename(extraction_prefix, c, suffix);
-		s$extraction_file = open(fname);
-		if ( s$passive )
-			set_contents_file(id, CONTENTS_RESP, s$extraction_file);
-		else
-			set_contents_file(id, CONTENTS_ORIG, s$extraction_file);
+		local s: FTP::Info;
+		s$ts = network_time();
+		s$tags = set();
+		s$user = "<ftp-data>";
+		s$extraction_file = filename;
+
+		if ( f?$conns )
+			for ( cid in f$conns )
+				{
+				s$uid = f$conns[cid]$uid;
+				s$id = cid;
+				}
+
+		Log::write(FTP::LOG, s);
 		}
 	}

--- a/scripts/base/protocols/ftp/main.bro
+++ b/scripts/base/protocols/ftp/main.bro
@ -16,7 +16,8 @@ export {
 	
 	## List of commands that should have their command/response pairs logged.
 	const logged_commands = {
-		"APPE", "DELE", "RETR", "STOR", "STOU", "ACCT"
+		"APPE", "DELE", "RETR", "STOR", "STOU", "ACCT", "PORT", "PASV", "EPRT",
+		"EPSV"
 	} &redef;
 	
 	## This setting changes if passwords used in FTP sessions are captured or not.
@ -24,6 +25,18 @@ export {
 	
 	## User IDs that can be considered "anonymous".
 	const guest_ids = { "anonymous", "ftp", "ftpuser", "guest" } &redef;
+
+	## The expected endpoints of an FTP data channel.
+	type ExpectedDataChannel: record {
+		## Whether PASV mode is toggled for control channel.
+		passive: bool &log;
+		## The host that will be initiating the data connection.
+		orig_h: addr &log;
+		## The host that will be accepting the data connection.
+		resp_h: addr &log;
+		## The port at which the acceptor is listening for the data connection.
+		resp_p: port &log;
+	};
 	
 	type Info: record {
 		## Time when the command was sent.
@ -43,8 +56,6 @@ export {
 		
 		## Libmagic "sniffed" file type if the command indicates a file transfer.
 		mime_type:        string      &log &optional;
-		## Libmagic "sniffed" file description if the command indicates a file transfer.
-		mime_desc:        string      &log &optional;
 		## Size of the file if the command indicates a file transfer.
 		file_size:        count       &log &optional;
 		
@ -53,13 +64,16 @@ export {
 		## Reply message from the server in response to the command.
 		reply_msg:        string      &log &optional;
 		## Arbitrary tags that may indicate a particular attribute of this command.
-		tags:             set[string] &log &default=set();
-		
+		tags:             set[string] &log;
+
+		## Expected FTP data channel.
+		data_channel:     ExpectedDataChannel &log &optional;
+
 		## Current working directory that this session is in.  By making
-		## the default value '/.', we can indicate that unless something
+		## the default value '.', we can indicate that unless something
 		## more concrete is discovered that the existing but unknown
 		## directory is ok to use.
-		cwd:                string  &default="/.";
+		cwd:                string  &default=".";
 		
 		## Command that is currently waiting for a response.
 		cmdarg:             CmdArg  &optional;
@ -93,6 +107,7 @@ export {
 # Add the state tracking information variable to the connection record
 redef record connection += {
 	ftp: Info &optional;
+	ftp_data_reuse: bool &default=F;
 };

 # Configure DPD
@ -103,7 +118,7 @@ redef dpd_config += { [ANALYZER_FTP] = [$ports = ports] };
 redef likely_server_ports += { 21/tcp, 2811/tcp };

 # Establish the variable for tracking expected connections.
-global ftp_data_expected: table[addr, port] of Info &create_expire=5mins;
+global ftp_data_expected: table[addr, port] of Info &read_expire=5mins;

 event bro_init() &priority=5
 	{
@ -172,7 +187,13 @@ function ftp_message(s: Info)
 		
 		local arg = s$cmdarg$arg;
 		if ( s$cmdarg$cmd in file_cmds )
-			arg = fmt("ftp://%s%s", addr_to_uri(s$id$resp_h), build_path_compressed(s$cwd, arg));
+			{
+			local comp_path = build_path_compressed(s$cwd, arg);
+			if ( comp_path[0] != "/" )
+				comp_path = cat("/", comp_path);
+
+			arg = fmt("ftp://%s%s", addr_to_uri(s$id$resp_h), comp_path);
+			}
 		
 		s$ts=s$cmdarg$ts;
 		s$command=s$cmdarg$cmd;
@ -180,7 +201,7 @@ function ftp_message(s: Info)
 			delete s$arg;
 		else
 			s$arg=arg;
-		
+
 		Log::write(FTP::LOG, s);
 		}
 	
@ -188,10 +209,20 @@ function ftp_message(s: Info)
 	# and may not be used in all commands so they need reset to "blank" 
 	# values after logging.
 	delete s$mime_type;
-	delete s$mime_desc;
 	delete s$file_size;
+	# Same with data channel.
+	delete s$data_channel;
 	# Tags are cleared everytime too.
-	delete s$tags;
+	s$tags = set();
+	}
+
+function add_expected_data_channel(s: Info, chan: ExpectedDataChannel)
+	{
+	s$passive = chan$passive;
+	s$data_channel = chan;
+	ftp_data_expected[chan$resp_h, chan$resp_p] = s;
+	expect_connection(chan$orig_h, chan$resp_h, chan$resp_p, ANALYZER_FTP_DATA,
+	                  5mins);
 	}

 event ftp_request(c: connection, command: string, arg: string) &priority=5
@ -226,9 +257,8 @@ event ftp_request(c: connection, command: string, arg: string) &priority=5

 		if ( data$valid )
 			{
-			c$ftp$passive=F;
-			ftp_data_expected[data$h, data$p] = c$ftp;
-			expect_connection(id$resp_h, data$h, data$p, ANALYZER_FILE, 5mins);
+			add_expected_data_channel(c$ftp, [$passive=F, $orig_h=id$resp_h,
+			                                  $resp_h=data$h, $resp_p=data$p]);
 			}
 		else
 			{
@ -240,16 +270,13 @@ event ftp_request(c: connection, command: string, arg: string) &priority=5

 event ftp_reply(c: connection, code: count, msg: string, cont_resp: bool) &priority=5
 	{
-	# TODO: figure out what to do with continued FTP response (not used much)
-	#if ( cont_resp ) return;
-
-	local id = c$id;
 	set_ftp_session(c);
-	
 	c$ftp$cmdarg = get_pending_cmd(c$ftp$pending_commands, code, msg);
-	
 	c$ftp$reply_code = code;
 	c$ftp$reply_msg = msg;
+
+	# TODO: figure out what to do with continued FTP response (not used much)
+	if ( cont_resp ) return;
 	
 	# TODO: do some sort of generic clear text login processing here.
 	local response_xyz = parse_ftp_reply_code(code);
@ -278,10 +305,10 @@ event ftp_reply(c: connection, code: count, msg: string, cont_resp: bool) &prior
 			c$ftp$passive=T;
 			
 			if ( code == 229 && data$h == [::] )
-				data$h = id$resp_h;
-			
-			ftp_data_expected[data$h, data$p] = c$ftp;
-			expect_connection(id$orig_h, data$h, data$p, ANALYZER_FILE, 5mins);
+				data$h = c$id$resp_h;
+
+			add_expected_data_channel(c$ftp, [$passive=T, $orig_h=c$id$orig_h,
+			                          $resp_h=data$h, $resp_p=data$p]);
 			}
 		else
 			{
@ -311,7 +338,6 @@ event ftp_reply(c: connection, code: count, msg: string, cont_resp: bool) &prior
 		}
 	}

-
 event expected_connection_seen(c: connection, a: count) &priority=10
 	{
 	local id = c$id;
@ -327,18 +353,21 @@ event file_transferred(c: connection, prefix: string, descr: string,
 		{
 		local s = ftp_data_expected[id$resp_h, id$resp_p];
 		s$mime_type = split1(mime_type, /;/)[1];
-		s$mime_desc = descr;
 		}
 	}
-	
-event file_transferred(c: connection, prefix: string, descr: string,
-			mime_type: string) &priority=-5
+
+event connection_reused(c: connection) &priority=5
 	{
-	local id = c$id;
-	if ( [id$resp_h, id$resp_p] in ftp_data_expected )
-		delete ftp_data_expected[id$resp_h, id$resp_p];
+	if ( "ftp-data" in c$service )
+		c$ftp_data_reuse = T;
 	}
 	
+event connection_state_remove(c: connection) &priority=-5
+	{
+	if ( c$ftp_data_reuse ) return;
+	delete ftp_data_expected[c$id$resp_h, c$id$resp_p];
+	}
+
 # Use state remove event to cover connections terminated by RST.
 event connection_state_remove(c: connection) &priority=-5
 	{
--- a/scripts/base/protocols/http/load.bro
+++ b/scripts/base/protocols/http/load.bro
@ -1,5 +1,6 @@
@load ./main
@load ./utils
+@load ./file-analysis
@load ./file-ident
@load ./file-hash
@load ./file-extract
--- a/scripts/base/protocols/http/file-analysis.bro
+++ b/scripts/base/protocols/http/file-analysis.bro
@ -0,0 +1,31 @@
+@load ./main
+@load ./utils
+@load base/utils/conn-ids
+@load base/frameworks/file-analysis/main
+
+module HTTP;
+
+export {
+	## Default file handle provider for HTTP.
+	global get_file_handle: function(c: connection, is_orig: bool): string;
+}
+
+function get_file_handle(c: connection, is_orig: bool): string
+	{
+	if ( ! c?$http ) return "";
+
+	if ( c$http$range_request )
+		return cat(ANALYZER_HTTP, " ", is_orig, " ", c$id$orig_h, " ",
+		           build_url(c$http));
+
+	return cat(ANALYZER_HTTP, " ", c$start_time, " ", is_orig, " ",
+	           c$http$trans_depth, " ", id_string(c$id));
+	}
+
+module GLOBAL;
+
+event get_file_handle(tag: AnalyzerTag, c: connection, is_orig: bool)
+	{
+	if ( tag != ANALYZER_HTTP ) return;
+	set_file_handle(HTTP::get_file_handle(c, is_orig));
+	}
--- a/scripts/base/protocols/http/file-extract.bro
+++ b/scripts/base/protocols/http/file-extract.bro
@ -2,8 +2,7 @@
 ##! the message body from the server can be extracted with this script.

@load ./main
-@load ./file-ident
-@load base/utils/files
+@load ./file-analysis

 module HTTP;

@ -16,45 +15,70 @@ export {

 	redef record Info += {
 		## On-disk file where the response body was extracted to.
-		extraction_file:  file &log &optional;
+		extraction_file:  string &log &optional;
 		
 		## Indicates if the response body is to be extracted or not.  Must be 
-		## set before or by the first :bro:id:`http_entity_data` event for the
-		## content.
+		## set before or by the first :bro:see:`file_new` for the file content.
 		extract_file:     bool &default=F;
 	};
 }

-event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=-5
+global extract_count: count = 0;
+
+function get_extraction_name(f: fa_file): string
 	{
-	# Client body extraction is not currently supported in this script.
-	if ( is_orig )
-		return;
-	
-	if ( c$http$first_chunk )
+	local r = fmt("%s-%s-%d.dat", extraction_prefix, f$id, extract_count);
+	++extract_count;
+	return r;
+	}
+
+event file_new(f: fa_file) &priority=5
+	{
+	if ( ! f?$source ) return;
+	if ( f$source != "HTTP" ) return;
+	if ( ! f?$conns ) return;
+
+	local fname: string;
+	local c: connection;
+
+	if ( f?$mime_type && extract_file_types in f$mime_type )
 		{
-		if ( c$http?$mime_type &&
-		     extract_file_types in c$http$mime_type )
+		fname = get_extraction_name(f);
+		FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
+		                               $extract_filename=fname]);
+
+		for ( cid in f$conns )
 			{
-			c$http$extract_file = T;
-			}
-			
-		if ( c$http$extract_file )
-			{
-			local suffix = fmt("%s_%d.dat", is_orig ? "orig" : "resp", c$http_state$current_response);
-			local fname = generate_extraction_filename(extraction_prefix, c, suffix);
-			
-			c$http$extraction_file = open(fname);
-			enable_raw_output(c$http$extraction_file);
+			c = f$conns[cid];
+			if ( ! c?$http ) next;
+			c$http$extraction_file = fname;
 			}
+
+		return;
 		}

-	if ( c$http?$extraction_file )
-		print c$http$extraction_file, data;
-	}
+	local extracting: bool = F;

-event http_end_entity(c: connection, is_orig: bool)
-	{
-	if ( c$http?$extraction_file )
-		close(c$http$extraction_file);
+	for ( cid in f$conns )
+		{
+		c = f$conns[cid];
+
+		if ( ! c?$http ) next;
+
+		if ( ! c$http$extract_file ) next;
+
+		fname = get_extraction_name(f);
+		FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
+		                               $extract_filename=fname]);
+		extracting = T;
+		break;
+		}
+
+	if ( extracting )
+		for ( cid in f$conns )
+			{
+			c = f$conns[cid];
+			if ( ! c?$http ) next;
+			c$http$extraction_file = fname;
+			}
 	}
--- a/scripts/base/protocols/http/file-hash.bro
+++ b/scripts/base/protocols/http/file-hash.bro
@ -1,15 +1,11 @@
 ##! Calculate hashes for HTTP body transfers.

-@load ./file-ident
+@load ./main
+@load ./file-analysis

 module HTTP;

 export {
-	redef enum Notice::Type += {
-		## Indicates that an MD5 sum was calculated for an HTTP response body.
-		MD5,
-	};
-
 	redef record Info += {
 		## MD5 sum for a file transferred over HTTP calculated from the 
 		## response body.
@ -19,10 +15,6 @@ export {
 		## if a file should have an MD5 sum generated.  It must be
 		## set to T at the time of or before the first chunk of body data.
 		calc_md5:        bool       &default=F;
-		
-		## Indicates if an MD5 sum is being calculated for the current 
-		## request/response pair.
-		md5_handle: opaque of md5   &optional;
 	};
 	
 	## Generate MD5 sums for these filetypes.
@ -31,62 +23,46 @@ export {
 	                   &redef;
 }

-## Initialize and calculate the hash.
-event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=5
+event file_new(f: fa_file) &priority=5
 	{
-	if ( is_orig || ! c?$http ) return;
-	
-	if ( c$http$first_chunk )
-		{
-		if ( c$http$calc_md5 || 
-		     (c$http?$mime_type && generate_md5 in c$http$mime_type) )
-			{
-			c$http$md5_handle = md5_hash_init();
-			}
-		}
-	
-	if ( c$http?$md5_handle )
-		md5_hash_update(c$http$md5_handle, data);
-	}
-	
-## In the event of a content gap during a file transfer, detect the state for
-## the MD5 sum calculation and stop calculating the MD5 since it would be 
-## incorrect anyway.
-event content_gap(c: connection, is_orig: bool, seq: count, length: count) &priority=5
-	{
-	if ( is_orig || ! c?$http || ! c$http?$md5_handle ) return;
-	
-	set_state(c, F, is_orig);
-	md5_hash_finish(c$http$md5_handle); # Ignore return value.
-	delete c$http$md5_handle;
-	}
+	if ( ! f?$source ) return;
+	if ( f$source != "HTTP" ) return;

-## When the file finishes downloading, finish the hash and generate a notice.
-event http_message_done(c: connection, is_orig: bool, stat: http_message_stat) &priority=-3
-	{
-	if ( is_orig || ! c?$http ) return;
-	
-	if ( c$http?$md5_handle )
+	if ( f?$mime_type && generate_md5 in f$mime_type )
 		{
-		local url = build_url_http(c$http);
-		c$http$md5 = md5_hash_finish(c$http$md5_handle);
-		delete c$http$md5_handle;
-		
-		NOTICE([$note=MD5, $msg=fmt("%s %s %s", c$id$orig_h, c$http$md5, url),
-		        $sub=c$http$md5, $conn=c]);
+		FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
+		return;
+		}
+
+	if ( ! f?$conns ) return;
+
+	for ( cid in f$conns )
+		{
+		local c: connection = f$conns[cid];
+
+		if ( ! c?$http ) next;
+
+		if ( ! c$http$calc_md5 ) next;
+
+		FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
+		return;
 		}
 	}

-event connection_state_remove(c: connection) &priority=-5
+event file_state_remove(f: fa_file) &priority=4
 	{
-	if ( c?$http_state && 
-	     c$http_state$current_response in c$http_state$pending &&
-	     c$http_state$pending[c$http_state$current_response]?$md5_handle )
+	if ( ! f?$source ) return;
+	if ( f$source != "HTTP" ) return;
+	if ( ! f?$conns ) return;
+	if ( ! f?$info ) return;
+	if ( ! f$info?$md5 ) return;
+
+	for ( cid in f$conns )
 		{
-		# The MD5 sum isn't going to be saved anywhere since the entire 
-		# body wouldn't have been seen anyway and we'd just be giving an
-		# incorrect MD5 sum.
-		md5_hash_finish(c$http$md5_handle);
-		delete c$http$md5_handle;
+		local c: connection = f$conns[cid];
+
+		if ( ! c?$http ) next;
+
+		c$http$md5 = f$info$md5;
 		}
 	}
--- a/scripts/base/protocols/http/file-ident.bro
+++ b/scripts/base/protocols/http/file-ident.bro
@ -1,37 +1,28 @@
 ##! Identification of file types in HTTP response bodies with file content sniffing.

-@load base/frameworks/signatures
@load base/frameworks/notice
@load ./main
@load ./utils
-
-# Add the magic number signatures to the core signature set.
-@load-sigs ./file-ident.sig
-
-# Ignore the signatures used to match files
-redef Signatures::ignored_ids += /^matchfile-/;
+@load ./file-analysis

 module HTTP;

 export {
 	redef enum Notice::Type += {
-		## Indicates when the file extension doesn't seem to match the file contents.
+		## Indicates when the file extension doesn't seem to match the file
+		## contents.
 		Incorrect_File_Type,
 	};

 	redef record Info += {
 		## Mime type of response body identified by content sniffing.
 		mime_type:    string   &log &optional;
-		
-		## Indicates that no data of the current file transfer has been
-		## seen yet.  After the first :bro:id:`http_entity_data` event, it 
-		## will be set to F.
-		first_chunk:     bool &default=T;
 	};
 	
-	## Mapping between mime types and regular expressions for URLs
-	## The :bro:enum:`HTTP::Incorrect_File_Type` notice is generated if the pattern 
-	## doesn't match the mime type that was discovered.
+	## Mapping between mime type strings (without character set) and
+	## regular expressions for URLs.
+	## The :bro:enum:`HTTP::Incorrect_File_Type` notice is generated if the
+	## pattern doesn't match the mime type that was discovered.
 	const mime_types_extensions: table[string] of pattern = {
 		["application/x-dosexec"] = /\.([eE][xX][eE]|[dD][lL][lL])/,
 	} &redef;
@ -43,43 +34,72 @@ export {
 	const ignored_incorrect_file_type_urls = /^$/ &redef;
 }

-event signature_match(state: signature_state, msg: string, data: string) &priority=5
+event file_new(f: fa_file) &priority=5
 	{
-	# Only signatures matching file types are dealt with here.
-	if ( /^matchfile-/ !in state$sig_id ) return;
+	if ( ! f?$source ) return;
+	if ( f$source != "HTTP" ) return;
+	if ( ! f?$mime_type ) return;
+	if ( ! f?$conns ) return;

-	local c = state$conn;
-	set_state(c, F, F);
-	
-	# Not much point in any of this if we don't know about the HTTP session.
-	if ( ! c?$http ) return;
-	
-	# Set the mime type that was detected.
-	c$http$mime_type = msg;
-	
-	if ( msg in mime_types_extensions && 
-	     c$http?$uri && mime_types_extensions[msg] !in c$http$uri )
+	for ( cid in f$conns )
 		{
+		local c: connection = f$conns[cid];
+
+		if ( ! c?$http ) next;
+
+		c$http$mime_type = f$mime_type;
+
+		local mime_str: string = c$http$mime_type;
+
+		if ( mime_str !in mime_types_extensions ) next;
+		if ( ! c$http?$uri ) next;
+		if ( mime_types_extensions[mime_str] in c$http$uri ) next;
+
 		local url = build_url_http(c$http);
-		
-		if ( url == ignored_incorrect_file_type_urls )
-			return;
-		
-		local message = fmt("%s %s %s", msg, c$http$method, url);
+
+		if ( url == ignored_incorrect_file_type_urls ) next;
+
+		local message = fmt("%s %s %s", mime_str, c$http$method, url);
 		NOTICE([$note=Incorrect_File_Type,
 		        $msg=message,
 		        $conn=c]);
 		}
 	}

-event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=5
+event file_over_new_connection(f: fa_file, c: connection) &priority=5
 	{
-	if ( c$http$first_chunk && ! c$http?$mime_type )
-			c$http$mime_type = split1(identify_data(data, T), /;/)[1];
+	if ( ! f?$source ) return;
+	if ( f$source != "HTTP" ) return;
+	if ( ! f?$mime_type ) return;
+	if ( ! c?$http ) return;
+
+	# Spread the mime around (e.g. for partial content, file_type event only
+	# happens once for the first connection, but if there's subsequent
+	# connections to transfer the same file, they'll be lacking the mime_type
+	# field if we don't do this).
+	c$http$mime_type = f$mime_type;
 	}
-	
-event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=-10
+
+# Tracks byte-range request / partial content response mime types, indexed
+# by [connection, uri] pairs.  This is needed because a person can pipeline
+# byte-range requests over multiple connections to the same uri.  Without
+# the tracking, only the first request in the pipeline for each connection
+# would get a mime_type field assigned to it (by the FileAnalysis policy hooks).
+global partial_types: table[conn_id, string] of string &read_expire=5mins;
+
+# Priority 4 so that it runs before the handler that will write to http.log.
+event http_message_done(c: connection, is_orig: bool, stat: http_message_stat)
+	&priority=4
 	{
-	if ( c$http$first_chunk )
-		c$http$first_chunk=F;
+	if ( ! c$http$range_request ) return;
+	if ( ! c$http?$uri ) return;
+
+	if ( c$http?$mime_type )
+		{
+		partial_types[c$id, c$http$uri] = c$http$mime_type;
+		return;
+		}
+
+	if ( [c$id, c$http$uri] in partial_types )
+		c$http$mime_type = partial_types[c$id, c$http$uri];
 	}
--- a/scripts/base/protocols/http/file-ident.sig
+++ b/scripts/base/protocols/http/file-ident.sig
@ -1,144 +0,0 @@
-# These signatures are used as a replacement for libmagic.  The signature
-# name needs to start with "matchfile" and the "event" directive takes 
-# the mime type of the file matched by the http-reply-body pattern.
-#
-# Signatures from: http://www.garykessler.net/library/file_sigs.html
-
-signature matchfile-exe {
-	http-reply-body /\x4D\x5A/
-	event "application/x-dosexec"
-}
-
-signature matchfile-elf {
-	http-reply-body /\x7F\x45\x4C\x46/
-	event "application/x-executable"
-}
-
-signature matchfile-script {
-	# This is meant to match the interpreter declaration at the top of many 
-	# interpreted scripts.
-	http-reply-body /\#\![[:blank:]]?\//
-	event "application/x-script"
-}
-
-signature matchfile-wmv {
-	http-reply-body /\x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C/
-	event "video/x-ms-wmv"
-}
-
-signature matchfile-flv {
-	http-reply-body /\x46\x4C\x56\x01/
-	event "video/x-flv"
-}
-
-signature matchfile-swf {
-	http-reply-body /[\x46\x43]\x57\x53/
-	event "application/x-shockwave-flash"
-}
-
-signature matchfile-jar {
-	http-reply-body /\x5F\x27\xA8\x89/
-	event "application/java-archive"
-}
-
-signature matchfile-class {
-	http-reply-body /\xCA\xFE\xBA\xBE/
-	event "application/java-byte-code"
-}
-
-signature matchfile-msoffice-2007 {
-	# MS Office 2007 XML documents
-	http-reply-body /\x50\x4B\x03\x04\x14\x00\x06\x00/
-	event "application/msoffice"
-}
-
-signature matchfile-msoffice {
-	# Older MS Office files
-	http-reply-body /\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1/
-	event "application/msoffice"
-}
-
-signature matchfile-rtf {
-	http-reply-body /\x7B\x5C\x72\x74\x66\x31/
-	event "application/rtf"
-}
-
-signature matchfile-lnk {
-	http-reply-body /\x4C\x00\x00\x00\x01\x14\x02\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x00\x46/
-	event "application/x-ms-shortcut"
-}
-
-signature matchfile-torrent {
-	http-reply-body /\x64\x38\x3A\x61\x6E\x6E\x6F\x75\x6E\x63\x65/
-	event "application/x-bittorrent"
-}
-
-signature matchfile-pdf {
-	http-reply-body /\x25\x50\x44\x46/
-	event "application/pdf"
-}
-
-signature matchfile-html {
-	http-reply-body /<[hH][tT][mM][lL]/
-	event "text/html"
-}
-
-signature matchfile-html2 {
-	http-reply-body /<![dD][oO][cC][tT][yY][pP][eE][[:blank:]][hH][tT][mM][lL]/
-	event "text/html"
-}
-
-signature matchfile-xml {
-	http-reply-body /<\??[xX][mM][lL]/
-	event "text/xml"
-}
-
-signature matchfile-gif {
-	http-reply-body /\x47\x49\x46\x38[\x37\x39]\x61/
-	event "image/gif"
-}
-
-signature matchfile-jpg {
-	http-reply-body /\xFF\xD8\xFF[\xDB\xE0\xE1\xE2\xE3\xE8]..[\x4A\x45\x53][\x46\x78\x50][\x49\x69][\x46\x66]/
-	event "image/jpeg"
-}
-
-signature matchfile-tiff {
-	http-reply-body /\x4D\x4D\x00[\x2A\x2B]/
-	event "image/tiff"
-}
-
-signature matchfile-png {
-	http-reply-body /\x89\x50\x4e\x47/
-	event "image/png"
-}
-
-signature matchfile-zip {
-	http-reply-body /\x50\x4B\x03\x04/
-	event "application/zip"
-}
-
-signature matchfile-bzip {
-	http-reply-body /\x42\x5A\x68/
-	event "application/bzip2"
-}
-
-signature matchfile-gzip {
-	http-reply-body /\x1F\x8B\x08/
-	event "application/x-gzip"
-}
-
-signature matchfile-cab {
-	http-reply-body /\x4D\x53\x43\x46/
-	event "application/vnd.ms-cab-compressed"
-}
-
-signature matchfile-rar {
-	http-reply-body /\x52\x61\x72\x21\x1A\x07\x00/
-	event "application/x-rar-compressed"
-}
-
-signature matchfile-7z {
-	http-reply-body /\x37\x7A\xBC\xAF\x27\x1C/
-	event "application/x-7z-compressed"
-}
--- a/scripts/base/protocols/http/main.bro
+++ b/scripts/base/protocols/http/main.bro
@ -71,6 +71,10 @@ export {
 		
 		## All of the headers that may indicate if the request was proxied.
 		proxied:                 set[string] &log &optional;
+
+		## Indicates if this request can assume 206 partial content in
+		## response.
+		range_request:           bool &default=F;
 	};
 	
 	## Structure to maintain state for an HTTP connection with multiple 
@ -235,6 +239,9 @@ event http_header(c: connection, is_orig: bool, name: string, value: string) &pr
 		else if ( name == "HOST" )
 			# The split is done to remove the occasional port value that shows up here.
 			c$http$host = split1(value, /:/)[1];
+
+		else if ( name == "RANGE" )
+			c$http$range_request = T;
 		
 		else if ( name == "USER-AGENT" )
 			c$http$user_agent = value;
--- a/scripts/base/protocols/irc/load.bro
+++ b/scripts/base/protocols/irc/load.bro
@ -1,2 +1,3 @@
@load ./main
-@load ./dcc-send
+@load ./dcc-send
+@load ./file-analysis
--- a/scripts/base/protocols/irc/dcc-send.bro
+++ b/scripts/base/protocols/irc/dcc-send.bro
@ -28,69 +28,140 @@ export {
 		dcc_file_size:         count  &log &optional;
 		## Sniffed mime type of the file.
 		dcc_mime_type:         string &log &optional;
-		
+
 		## The file handle for the file to be extracted
-		extraction_file:       file &log &optional;
-		
+		extraction_file:       string &log &optional;
+
 		## A boolean to indicate if the current file transfer should be extracted.
 		extract_file:          bool &default=F;
-		
-		## The count of the number of file that have been extracted during the session.
-		num_extracted_files:   count &default=0;
 	};
 }

-global dcc_expected_transfers: table[addr, port] of Info = table();
+global dcc_expected_transfers: table[addr, port] of Info &read_expire=5mins;

-event file_transferred(c: connection, prefix: string, descr: string,
-                       mime_type: string) &priority=3
+global extract_count: count = 0;
+
+function set_dcc_mime(f: fa_file)
 	{
-	local id = c$id;
-	if ( [id$resp_h, id$resp_p] !in dcc_expected_transfers )
-		return;
-		
-	local irc = dcc_expected_transfers[id$resp_h, id$resp_p];
-	
-	irc$dcc_mime_type = split1(mime_type, /;/)[1];
+	if ( ! f?$conns ) return;

-	if ( extract_file_types == irc$dcc_mime_type )
+	for ( cid in f$conns )
 		{
-		irc$extract_file = T;
-		}
-		
-	if ( irc$extract_file )
-		{
-		local suffix = fmt("%d.dat", ++irc$num_extracted_files);
-		local fname = generate_extraction_filename(extraction_prefix, c, suffix);
-		irc$extraction_file = open(fname);
+		local c: connection = f$conns[cid];
+
+		if ( [cid$resp_h, cid$resp_p] !in dcc_expected_transfers ) next;
+
+		local s = dcc_expected_transfers[cid$resp_h, cid$resp_p];
+
+		s$dcc_mime_type = f$mime_type;
 		}
 	}

-event file_transferred(c: connection, prefix: string, descr: string,
-			mime_type: string) &priority=-4
+function set_dcc_extraction_file(f: fa_file, filename: string)
 	{
-	local id = c$id;
-	if ( [id$resp_h, id$resp_p] !in dcc_expected_transfers )
+	if ( ! f?$conns ) return;
+
+	for ( cid in f$conns )
+		{
+		local c: connection = f$conns[cid];
+
+		if ( [cid$resp_h, cid$resp_p] !in dcc_expected_transfers ) next;
+
+		local s = dcc_expected_transfers[cid$resp_h, cid$resp_p];
+
+		s$extraction_file = filename;
+		}
+	}
+
+function get_extraction_name(f: fa_file): string
+	{
+	local r = fmt("%s-%s-%d.dat", extraction_prefix, f$id, extract_count);
+	++extract_count;
+	return r;
+	}
+
+# this handler sets the IRC::Info mime type
+event file_new(f: fa_file) &priority=5
+	{
+	if ( ! f?$source ) return;
+	if ( f$source != "IRC_DATA" ) return;
+	if ( ! f?$mime_type ) return;
+
+	set_dcc_mime(f);
+	}
+
+# this handler check if file extraction is desired
+event file_new(f: fa_file) &priority=5
+	{
+	if ( ! f?$source ) return;
+	if ( f$source != "IRC_DATA" ) return;
+
+	local fname: string;
+
+	if ( f?$mime_type && extract_file_types in f$mime_type )
+		{
+		fname = get_extraction_name(f);
+		FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
+		                               $extract_filename=fname]);
+		set_dcc_extraction_file(f, fname);
 		return;
+		}

-	local irc = dcc_expected_transfers[id$resp_h, id$resp_p];
+	if ( ! f?$conns ) return;

-	local tmp = irc$command;
-	irc$command = "DCC";
-	Log::write(IRC::LOG, irc);
-	irc$command = tmp;
+	for ( cid in f$conns )
+		{
+		local c: connection = f$conns[cid];

-	if ( irc?$extraction_file )
-		set_contents_file(id, CONTENTS_RESP, irc$extraction_file);
+		if ( [cid$resp_h, cid$resp_p] !in dcc_expected_transfers ) next;

-	# Delete these values in case another DCC transfer 
-	# happens during the IRC session.
-	delete irc$extract_file;
-	delete irc$extraction_file;
-	delete irc$dcc_file_name;
-	delete irc$dcc_file_size;
-	delete irc$dcc_mime_type;
-	delete dcc_expected_transfers[id$resp_h, id$resp_p];
+		local s = dcc_expected_transfers[cid$resp_h, cid$resp_p];
+
+		if ( ! s$extract_file ) next;
+
+		fname = get_extraction_name(f);
+		FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
+		                               $extract_filename=fname]);
+		s$extraction_file = fname;
+		return;
+		}
+	}
+
+function log_dcc(f: fa_file)
+	{
+	if ( ! f?$conns ) return;
+
+	for ( cid in f$conns )
+		{
+		local c: connection = f$conns[cid];
+
+		if ( [cid$resp_h, cid$resp_p] !in dcc_expected_transfers ) next;
+
+		local irc = dcc_expected_transfers[cid$resp_h, cid$resp_p];
+
+		local tmp = irc$command;
+		irc$command = "DCC";
+		Log::write(IRC::LOG, irc);
+		irc$command = tmp;
+
+		# Delete these values in case another DCC transfer 
+		# happens during the IRC session.
+		delete irc$extract_file;
+		delete irc$extraction_file;
+		delete irc$dcc_file_name;
+		delete irc$dcc_file_size;
+		delete irc$dcc_mime_type;
+
+		return;
+		}
+	}
+
+event file_new(f: fa_file) &priority=-5
+	{
+	if ( ! f?$source ) return;
+	if ( f$source != "IRC_DATA" ) return;
+
+	log_dcc(f);
 	}

 event irc_dcc_message(c: connection, is_orig: bool,
@ -100,11 +171,11 @@ event irc_dcc_message(c: connection, is_orig: bool,
 	{
 	set_session(c);
 	if ( dcc_type != "SEND" )
-            return;
+		return;
 	c$irc$dcc_file_name = argument;
 	c$irc$dcc_file_size = size;
 	local p = count_to_port(dest_port, tcp);
-	expect_connection(to_addr("0.0.0.0"), address, p, ANALYZER_FILE, 5 min);
+	expect_connection(to_addr("0.0.0.0"), address, p, ANALYZER_IRC_DATA, 5 min);
 	dcc_expected_transfers[address, p] = c$irc;
 	}

@ -114,3 +185,8 @@ event expected_connection_seen(c: connection, a: count) &priority=10
 	if ( [id$resp_h, id$resp_p] in dcc_expected_transfers )
 		add c$service["irc-dcc-data"];
 	}
+
+event connection_state_remove(c: connection) &priority=-5
+	{
+	delete dcc_expected_transfers[c$id$resp_h, c$id$resp_p];
+	}
--- a/scripts/base/protocols/irc/file-analysis.bro
+++ b/scripts/base/protocols/irc/file-analysis.bro
@ -0,0 +1,24 @@
+@load ./dcc-send.bro
+@load base/utils/conn-ids
+@load base/frameworks/file-analysis/main
+
+module IRC;
+
+export {
+	## Default file handle provider for IRC.
+	global get_file_handle: function(c: connection, is_orig: bool): string;
+}
+
+function get_file_handle(c: connection, is_orig: bool): string
+	{
+	if ( is_orig ) return "";
+	return cat(ANALYZER_IRC_DATA, " ", c$start_time, " ", id_string(c$id));
+	}
+
+module GLOBAL;
+
+event get_file_handle(tag: AnalyzerTag, c: connection, is_orig: bool)
+	{
+	if ( tag != ANALYZER_IRC_DATA ) return;
+	set_file_handle(IRC::get_file_handle(c, is_orig));
+	}
--- a/scripts/base/protocols/smtp/load.bro
+++ b/scripts/base/protocols/smtp/load.bro
@ -1,3 +1,4 @@
@load ./main
@load ./entities
-@load ./entities-excerpt
+@load ./entities-excerpt
+@load ./file-analysis
--- a/scripts/base/protocols/smtp/entities-excerpt.bro
+++ b/scripts/base/protocols/smtp/entities-excerpt.bro
@ -9,44 +9,29 @@ export {
 	redef record SMTP::EntityInfo += {
 		## The entity body excerpt.
 		excerpt:    string &log &default="";
-		
-		## Internal tracking to know how much of the body should be included
-		## in the excerpt.
-		excerpt_len: count &optional;
 	};
 	
 	## This is the default value for how much of the entity body should be
-	## included for all MIME entities.
+	## included for all MIME entities.  The lesser of this value and
+	## :bro:see:`default_file_bof_buffer_size` will be used.
 	const default_entity_excerpt_len = 0 &redef;
-	
-	## This table defines how much of various entity bodies should be 
-	## included in excerpts.
-	const entity_excerpt_len: table[string] of count = {} 
-		&redef
-		&default = default_entity_excerpt_len;
 }

-event mime_segment_data(c: connection, length: count, data: string) &priority=-1
+event file_new(f: fa_file) &priority=5
 	{
-	if ( ! c?$smtp ) return;
-	
-	if ( c$smtp$current_entity$content_len == 0 )
-		c$smtp$current_entity$excerpt_len = entity_excerpt_len[c$smtp$current_entity$mime_type];
-	}
+	if ( ! f?$source ) return;
+	if ( f$source != "SMTP" ) return;
+	if ( ! f?$bof_buffer ) return;
+	if ( ! f?$conns ) return;

-event mime_segment_data(c: connection, length: count, data: string) &priority=-2
-	{
-	if ( ! c?$smtp ) return;
-	
-	local ent = c$smtp$current_entity;
-	if ( ent$content_len < ent$excerpt_len )
+	for ( cid in f$conns )
 		{
-		if ( ent$content_len + length < ent$excerpt_len )
-			ent$excerpt = cat(ent$excerpt, data);
-		else
-			{
-			local x_bytes = ent$excerpt_len - ent$content_len;
-			ent$excerpt = cat(ent$excerpt, sub_bytes(data, 1, x_bytes));
-			}
+		local c: connection = f$conns[cid];
+
+		if ( ! c?$smtp ) next;
+
+		if ( default_entity_excerpt_len > 0 )
+			c$smtp$current_entity$excerpt =
+			        f$bof_buffer[0:default_entity_excerpt_len];
 		}
 	}
--- a/scripts/base/protocols/smtp/entities.bro
+++ b/scripts/base/protocols/smtp/entities.bro
@ -7,11 +7,6 @@
 module SMTP;

 export {
-	redef enum Notice::Type += {
-		## Indicates that an MD5 sum was calculated for a MIME message.
-		MD5,
-	};
-
 	redef enum Log::ID += { ENTITIES_LOG };

 	type EntityInfo: record {
@ -34,15 +29,12 @@ export {
 		## Optionally calculate the file's MD5 sum.  Must be set prior to the 
 		## first data chunk being see in an event.
 		calc_md5:         bool            &default=F;
-		## This boolean value indicates if an MD5 sum is being calculated 
-		## for the current file transfer.
-		md5_handle:       opaque of md5   &optional;
 		
 		## Optionally write the file to disk.  Must be set prior to first 
 		## data chunk being seen in an event.
 		extract_file:     bool            &default=F;
 		## Store the file handle here for the file currently being extracted.
-		extraction_file:  file            &log &optional;
+		extraction_file:  string          &log &optional;
 	};

 	redef record Info += {
@ -51,9 +43,6 @@ export {
 	};

 	redef record State += {
-		## Store a count of the number of files that have been transferred in
-		## a conversation to create unique file names on disk.
-		num_extracted_files:  count   &default=0;
 		## Track the number of MIME encoded files transferred during a session.
 		mime_level:           count   &default=0;
 	};
@ -77,6 +66,8 @@ export {
 	global log_mime: event(rec: EntityInfo);
 }

+global extract_count: count = 0;
+
 event bro_init() &priority=5
 	{
 	Log::create_stream(SMTP::ENTITIES_LOG, [$columns=EntityInfo, $ev=log_mime]);
@ -97,77 +88,128 @@ function set_session(c: connection, new_entity: bool)
 		}
 	}

+function get_extraction_name(f: fa_file): string
+	{
+	local r = fmt("%s-%s-%d.dat", extraction_prefix, f$id, extract_count);
+	++extract_count;
+	return r;
+	}
+
 event mime_begin_entity(c: connection) &priority=10
 	{
 	if ( ! c?$smtp ) return;
-	
+
 	set_session(c, T);
 	}

-# This has priority -10 because other handlers need to know the current
-# content_len before it's updated by this handler.
-event mime_segment_data(c: connection, length: count, data: string) &priority=-10
+event file_new(f: fa_file) &priority=5
 	{
-	if ( ! c?$smtp ) return;
-	
-	c$smtp$current_entity$content_len = c$smtp$current_entity$content_len + length;
-	}
+	if ( ! f?$source ) return;
+	if ( f$source != "SMTP" ) return;
+	if ( ! f?$conns ) return;

-event mime_segment_data(c: connection, length: count, data: string) &priority=7
-    {
-	if ( ! c?$smtp ) return;
-	if ( c$smtp$current_entity$content_len == 0 )
-		c$smtp$current_entity$mime_type = split1(identify_data(data, T), /;/)[1];
-	}
+	local fname: string;
+	local extracting: bool = F;

-event mime_segment_data(c: connection, length: count, data: string) &priority=-5
-	{
-	if ( ! c?$smtp ) return;
-
-	if ( c$smtp$current_entity$content_len == 0 )
+	for ( cid in f$conns )
 		{
-		local entity = c$smtp$current_entity;
-		if ( generate_md5 in entity$mime_type && ! never_calc_md5 )
-			entity$calc_md5 = T;
+		local c: connection = f$conns[cid];

-		if ( entity$calc_md5 )
-			entity$md5_handle = md5_hash_init();
-		}
+		if ( ! c?$smtp ) next;
+		if ( ! c$smtp?$current_entity ) next;

-	if ( c$smtp$current_entity?$md5_handle )
-		md5_hash_update(entity$md5_handle, data);
-}
+		if ( c$smtp$current_entity$extract_file )
+			{
+			if ( ! extracting )
+				{
+				fname = get_extraction_name(f);
+				FileAnalysis::add_analyzer(f,
+				                           [$tag=FileAnalysis::ANALYZER_EXTRACT,
+				                            $extract_filename=fname]);
+				extracting = T;
+				++extract_count;
+				}

-## In the event of a content gap during the MIME transfer, detect the state for
-## the MD5 sum calculation and stop calculating the MD5 since it would be
-## incorrect anyway.
-event content_gap(c: connection, is_orig: bool, seq: count, length: count) &priority=5
-	{
-	if ( is_orig || ! c?$smtp || ! c$smtp?$current_entity ) return;
+			c$smtp$current_entity$extraction_file = fname;
+			}

-	local entity = c$smtp$current_entity;
-	if ( entity?$md5_handle )
-		{
-		md5_hash_finish(entity$md5_handle);
-		delete entity$md5_handle;
+		if ( c$smtp$current_entity$calc_md5 )
+			FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
 		}
 	}

-event mime_end_entity(c: connection) &priority=-3
-    {
-	# TODO: this check is only due to a bug in mime_end_entity that
-	#       causes the event to be generated twice for the same real event.
-	if ( ! c?$smtp || ! c$smtp?$current_entity )
+function check_extract_by_type(f: fa_file)
+	{
+	if ( extract_file_types !in f$mime_type ) return;
+
+	if ( f?$info && FileAnalysis::ANALYZER_EXTRACT in f$info$analyzers )
 		return;

-	local entity = c$smtp$current_entity;
-	if ( entity?$md5_handle )
-		{
-		entity$md5 = md5_hash_finish(entity$md5_handle);
-		delete entity$md5_handle;
+	local fname: string = get_extraction_name(f);
+	FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
+	                               $extract_filename=fname]);

-		NOTICE([$note=MD5, $msg=fmt("Calculated a hash for a MIME entity from %s", c$id$orig_h),
-				$sub=entity$md5, $conn=c]);
+	if ( ! f?$conns ) return;
+
+	for ( cid in f$conns )
+		{
+		local c: connection = f$conns[cid];
+		if ( ! c?$smtp ) next;
+		c$smtp$current_entity$extraction_file = fname;
+		}
+	}
+
+function check_md5_by_type(f: fa_file)
+	{
+	if ( never_calc_md5 ) return;
+	if ( generate_md5 !in f$mime_type ) return;
+
+	FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
+	}
+
+event file_new(f: fa_file) &priority=5
+	{
+	if ( ! f?$source ) return;
+	if ( f$source != "SMTP" ) return;
+	if ( ! f?$mime_type ) return;
+
+	if ( f?$conns )
+		for ( cid in f$conns )
+			{
+			local c: connection = f$conns[cid];
+
+			if ( ! c?$smtp ) next;
+			if ( ! c$smtp?$current_entity ) next;
+
+			c$smtp$current_entity$mime_type = f$mime_type;
+			}
+
+	check_extract_by_type(f);
+	check_md5_by_type(f);
+	}
+
+event file_state_remove(f: fa_file) &priority=4
+	{
+	if ( ! f?$source ) return;
+	if ( f$source != "SMTP" ) return;
+	if ( ! f?$conns ) return;
+
+	for ( cid in f$conns )
+		{
+		local c: connection = f$conns[cid];
+
+		if ( ! c?$smtp ) next;
+		if ( ! c$smtp?$current_entity ) next;
+		# Only log if there was some content.
+		if ( f$seen_bytes == 0 ) next;
+
+		if ( f?$info && f$info?$md5 )
+			c$smtp$current_entity$md5 = f$info$md5;
+
+		c$smtp$current_entity$content_len = f$seen_bytes;
+		Log::write(SMTP::ENTITIES_LOG, c$smtp$current_entity);
+		delete c$smtp$current_entity;
+		return;
 		}
 	}

@ -179,66 +221,7 @@ event mime_one_header(c: connection, h: mime_header_rec)
 	     /[fF][iI][lL][eE][nN][aA][mM][eE]/ in h$value )
 		c$smtp$current_entity$filename = extract_filename_from_content_disposition(h$value);

-	if ( h$name == "CONTENT-TYPE" && 
+	if ( h$name == "CONTENT-TYPE" &&
 	     /[nN][aA][mM][eE][:blank:]*=/ in h$value )
 		c$smtp$current_entity$filename = extract_filename_from_content_disposition(h$value);
 	}
-
-event mime_end_entity(c: connection) &priority=-5
-	{
-	if ( ! c?$smtp ) return;
-
-	# This check and the delete below are just to cope with a bug where
-	# mime_end_entity can be generated multiple times for the same event.
-	if ( ! c$smtp?$current_entity )
-		return;
-
-	# Only log is there was some content.
-	if ( c$smtp$current_entity$content_len > 0 )
-		Log::write(SMTP::ENTITIES_LOG, c$smtp$current_entity);
-
-	delete c$smtp$current_entity;
-	}
-
-event mime_segment_data(c: connection, length: count, data: string) &priority=5
-	{
-	if ( ! c?$smtp ) return;
-	
-	if ( extract_file_types in c$smtp$current_entity$mime_type )
-		c$smtp$current_entity$extract_file = T;
-	}
-
-event mime_segment_data(c: connection, length: count, data: string) &priority=3
-	{
-	if ( ! c?$smtp ) return;
-	
-	if ( c$smtp$current_entity$extract_file && 
-	     c$smtp$current_entity$content_len == 0 )
-		{
-		local suffix = fmt("%d.dat", ++c$smtp_state$num_extracted_files);
-		local fname = generate_extraction_filename(extraction_prefix, c, suffix);
-		c$smtp$current_entity$extraction_file = open(fname);
-		enable_raw_output(c$smtp$current_entity$extraction_file);
-		}
-	}
-
-event mime_segment_data(c: connection, length: count, data: string) &priority=-5
-	{
-	if ( ! c?$smtp ) return;
-	
-	if ( c$smtp$current_entity$extract_file && c$smtp$current_entity?$extraction_file )
-		print c$smtp$current_entity$extraction_file, data;
-	}
-
-event mime_end_entity(c: connection) &priority=-3
-	{
-	if ( ! c?$smtp ) return;
-	
-	# TODO: this check is only due to a bug in mime_end_entity that
-	#       causes the event to be generated twice for the same real event.
-	if ( ! c$smtp?$current_entity )
-		return;
-
-	if ( c$smtp$current_entity?$extraction_file )
-		close(c$smtp$current_entity$extraction_file);
-	}
--- a/scripts/base/protocols/smtp/file-analysis.bro
+++ b/scripts/base/protocols/smtp/file-analysis.bro
@ -0,0 +1,26 @@
+@load ./main
+@load ./entities
+@load base/utils/conn-ids
+@load base/frameworks/file-analysis/main
+
+module SMTP;
+
+export {
+	## Default file handle provider for SMTP.
+	global get_file_handle: function(c: connection, is_orig: bool): string;
+}
+
+function get_file_handle(c: connection, is_orig: bool): string
+	{
+	if ( ! c?$smtp ) return "";
+	return cat(ANALYZER_SMTP, " ", c$start_time, " ", c$smtp$trans_depth, " ",
+	           c$smtp_state$mime_level);
+	}
+
+module GLOBAL;
+
+event get_file_handle(tag: AnalyzerTag, c: connection, is_orig: bool)
+	{
+	if ( tag != ANALYZER_SMTP ) return;
+	set_file_handle(SMTP::get_file_handle(c, is_orig));
+	}
--- a/scripts/base/protocols/ssh/main.bro
+++ b/scripts/base/protocols/ssh/main.bro
@ -1,10 +1,11 @@
-##! Base SSH analysis script.  The heuristic to blindly determine success or 
+##! Base SSH analysis script.  The heuristic to blindly determine success or
 ##! failure for SSH connections is implemented here.  At this time, it only
 ##! uses the size of the data being returned from the server to make the
-##! heuristic determination about success of the connection.  
+##! heuristic determination about success of the connection.
 ##! Requires that :bro:id:`use_conn_size_analyzer` is set to T!  The heuristic
 ##! is not attempted if the connection size analyzer isn't enabled.

+@load base/protocols/conn
@load base/frameworks/notice
@load base/utils/site
@load base/utils/thresholds
@ -16,12 +17,6 @@ module SSH;
 export {
 	## The SSH protocol logging stream identifier.
 	redef enum Log::ID += { LOG };
-	
-	redef enum Notice::Type += { 
-		## Indicates that a heuristically detected "successful" SSH 
-		## authentication occurred.
-		Login 
-	};

 	type Info: record {
 		## Time when the SSH connection began.
@ -30,10 +25,10 @@ export {
 		uid:             string       &log;
 		## The connection's 4-tuple of endpoint addresses/ports.
 		id:              conn_id      &log;
-		## Indicates if the login was heuristically guessed to be "success"
-		## or "failure".
-		status:          string       &log &optional;
-		## Direction of the connection.  If the client was a local host 
+		## Indicates if the login was heuristically guessed to be "success",
+		## "failure", or "undetermined".
+		status:          string       &log &default="undetermined";
+		## Direction of the connection.  If the client was a local host
 		## logging into an external host, this would be OUTBOUND. INBOUND
 		## would be set for the opposite situation.
 		# TODO: handle local-local and remote-remote better.
@ -43,33 +38,33 @@ export {
 		## Software string from the server.
 		server:          string       &log &optional;
 		## Amount of data returned from the server. This is currently
-		## the only measure of the success heuristic and it is logged to 
+		## the only measure of the success heuristic and it is logged to
 		## assist analysts looking at the logs to make their own determination
 		## about the success on a case-by-case basis.
 		resp_size:       count        &log &default=0;
-		
+
 		## Indicate if the SSH session is done being watched.
 		done:            bool         &default=F;
 	};
-	
-	## The size in bytes of data sent by the server at which the SSH 
+
+	## The size in bytes of data sent by the server at which the SSH
 	## connection is presumed to be successful.
-	const authentication_data_size = 5500 &redef;
-	
+	const authentication_data_size = 4000 &redef;
+
 	## If true, we tell the event engine to not look at further data
 	## packets after the initial SSH handshake. Helps with performance
 	## (especially with large file transfers) but precludes some
-	## kinds of analyses (e.g., tracking connection size).
+	## kinds of analyses.
 	const skip_processing_after_detection = F &redef;
-	
+
 	## Event that is generated when the heuristic thinks that a login
 	## was successful.
 	global heuristic_successful_login: event(c: connection);
-	
+
 	## Event that is generated when the heuristic thinks that a login
 	## failed.
 	global heuristic_failed_login: event(c: connection);
-	
+
 	## Event that can be handled to access the :bro:type:`SSH::Info`
 	## record as it is sent on to the logging framework.
 	global log_ssh: event(rec: Info);
@ -104,55 +99,61 @@ function set_session(c: connection)

 function check_ssh_connection(c: connection, done: bool)
 	{
-	# If done watching this connection, just return.
+	# If already done watching this connection, just return.
 	if ( c$ssh$done )
 		return;
-	
-	# Make sure conn_size_analyzer is active by checking 
-	# resp$num_bytes_ip.  In general it should always be active though.
-	if ( ! c$resp?$num_bytes_ip )
-		return;
-	
-	# Remove the IP and TCP header length from the total size.
-	# TODO: Fix for IPv6.  This whole approach also seems to break in some 
-	#       cases where there are more header bytes than num_bytes_ip.
-	local header_bytes = c$resp$num_pkts*32 + c$resp$num_pkts*20;
-	local server_bytes = c$resp$num_bytes_ip;
-	if ( server_bytes >= header_bytes )
-		server_bytes = server_bytes - header_bytes;
-	else
-		server_bytes = c$resp$size;
-	
-	# If this is still a live connection and the byte count has not crossed 
-	# the threshold, just return and let the rescheduled check happen later.
-	if ( ! done && server_bytes < authentication_data_size )
-		return;

-	# Make sure the server has sent back more than 50 bytes to filter out
-	# hosts that are just port scanning.  Nothing is ever logged if the server
-	# doesn't send back at least 50 bytes.
-	if ( server_bytes < 50 )
-		return;
-
-	c$ssh$direction = Site::is_local_addr(c$id$orig_h) ? OUTBOUND : INBOUND;
-	c$ssh$resp_size = server_bytes;
-	
-	if ( server_bytes < authentication_data_size )
+	if ( done )
 		{
-		c$ssh$status  = "failure";
-		event SSH::heuristic_failed_login(c);
+		# If this connection is done, then we can look to see if
+		# this matches the conditions for a failed login.  Failed
+		# logins are only detected at connection state removal.
+
+		if ( # Require originators to have sent at least 50 bytes.
+		     c$orig$size > 50 &&
+		     # Responders must be below 4000 bytes.
+		     c$resp$size < 4000 &&
+		     # Responder must have sent fewer than 40 packets.
+		     c$resp$num_pkts < 40 &&
+		     # If there was a content gap we can't reliably do this heuristic.
+		     c?$conn && c$conn$missed_bytes == 0)# &&
+		     # Only "normal" connections can count.
+		     #c$conn?$conn_state && c$conn$conn_state in valid_states )
+			{
+			c$ssh$status = "failure";
+			event SSH::heuristic_failed_login(c);
+			}
+
+		if ( c$resp$size > authentication_data_size )
+			{
+			c$ssh$status = "success";
+			event SSH::heuristic_successful_login(c);
+			}
 		}
 	else
-		{ 
-		# presumed successful login
-		c$ssh$status = "success";
-		event SSH::heuristic_successful_login(c);
+		{
+		# If this connection is still being tracked, then it's possible
+		# to watch for it to be a successful connection.
+		if ( c$resp$size > authentication_data_size )
+			{
+			c$ssh$status = "success";
+			event SSH::heuristic_successful_login(c);
+			}
+		else
+			# This connection must be tracked longer.  Let the scheduled
+			# check happen again.
+			return;
 		}
-	
+
+	# Set the direction for the log.
+	c$ssh$direction = Site::is_local_addr(c$id$orig_h) ? OUTBOUND : INBOUND;
+
 	# Set the "done" flag to prevent the watching event from rescheduling
 	# after detection is done.
 	c$ssh$done=T;
-	
+
+	Log::write(SSH::LOG, c$ssh);
+
 	if ( skip_processing_after_detection )
 		{
 		# Stop watching this connection, we don't care about it anymore.
@ -161,18 +162,6 @@ function check_ssh_connection(c: connection, done: bool)
 		}
 	}

-event SSH::heuristic_successful_login(c: connection) &priority=-5
-	{
-	NOTICE([$note=Login, 
-	        $msg="Heuristically detected successful SSH login.",
-	        $conn=c]);
-	
-	Log::write(SSH::LOG, c$ssh);
-	}
-event SSH::heuristic_failed_login(c: connection) &priority=-5
-	{
-	Log::write(SSH::LOG, c$ssh);
-	}

 event connection_state_remove(c: connection) &priority=-5
 	{
@ -197,12 +186,12 @@ event ssh_server_version(c: connection, version: string) &priority=5
 	set_session(c);
 	c$ssh$server = version;
 	}
-	
+
 event ssh_client_version(c: connection, version: string) &priority=5
 	{
 	set_session(c);
 	c$ssh$client = version;
-	
+
 	# The heuristic detection for SSH relies on the ConnSize analyzer.
 	# Don't do the heuristics if it's disabled.
 	if ( use_conn_size_analyzer )
--- a/scripts/base/utils/paths.bro
+++ b/scripts/base/utils/paths.bro
@ -19,7 +19,7 @@ function extract_path(input: string): string
 	}

 ## Compresses a given path by removing '..'s and the parent directory it
-## references and also removing '/'s.
+## references and also removing dual '/'s and extraneous '/./'s.
 ## dir: a path string, either relative or absolute
 ## Returns: a compressed version of the input path
 function compress_path(dir: string): string
@ -41,7 +41,7 @@ function compress_path(dir: string): string
 		return compress_path(dir);
 		}

-	const multislash_sep = /(\/){2,}/;
+	const multislash_sep = /(\/\.?){2,}/;
 	parts = split_all(dir, multislash_sep);
 	for ( i in parts )
 		if ( i % 2 == 0 )
--- a/scripts/base/utils/queue.bro
+++ b/scripts/base/utils/queue.bro
@ -0,0 +1,155 @@
+##! A FIFO queue.
+
+module Queue;
+
+export {
+	## Settings for initializing the queue.
+	type Settings: record {
+		## If a maximum length is set for the queue
+		## it will maintain itself at that
+		## maximum length automatically.
+		max_len: count &optional;
+	};
+
+	## The internal data structure for the queue.
+	type Queue: record {};
+
+	## Initialize a queue record structure.
+	##
+	## s: A record which configures the queue.
+	##
+	## Returns: An opaque queue record.
+	global init:       function(s: Settings &default=[]): Queue;
+
+	## Put a value onto the beginning of a queue.
+	##
+	## q: The queue to put the value into.
+	##
+	## val: The value to insert into the queue.
+	global put:       function(q: Queue, val: any);
+
+	## Get a value from the end of a queue.
+	##
+	## q: The queue to get the value from.
+	##
+	## Returns: The value gotten from the queue.
+	global get:        function(q: Queue): any;
+
+	## Peek at the value at the end of the queue without removing it.
+	##
+	## q: The queue to get the value from.
+	##
+	## Returns: The value at the end of the queue.
+	global peek:      function(q: Queue): any;
+
+	## Merge two queue's together.  If any settings are applied
+	## to the queues, the settings from q1 are used for the new
+	## merged queue.
+	##
+	## q1: The first queue.  Settings are taken from here.
+	##
+	## q2: The second queue.
+	##
+	## Returns: A new queue from merging the other two together.
+	global merge:      function(q1: Queue, q2: Queue): Queue;
+
+	## Get the number of items in a queue.
+	##
+	## q: The queue.
+	##
+	## Returns: The length of the queue.
+	global len:     function(q: Queue): count;
+
+	## Get the contents of the queue as a vector.
+	##
+	## q: The queue.
+	##
+	## ret: A vector containing the
+	##      current contents of q as the type of ret.
+	global get_vector: function(q: Queue, ret: vector of any);
+
+}
+
+redef record Queue += {
+	# Indicator for if the queue was appropriately initialized.
+	initialized: bool                   &default=F;
+	# The values are stored here.
+	vals:        table[count] of any &optional;
+	# Settings for the queue.
+	settings:    Settings               &optional;
+	# The top value in the vals table.
+	top:         count                  &default=0;
+	# The bottom value in the vals table.
+	bottom:      count                  &default=0;
+	# The number of bytes in the queue.
+	size:        count                  &default=0;
+};
+
+function init(s: Settings): Queue
+	{
+	local q: Queue;
+	q$vals=table();
+	q$settings = copy(s);
+	q$initialized=T;
+	return q;
+	}
+
+function put(q: Queue, val: any)
+	{
+	if ( q$settings?$max_len && len(q) >= q$settings$max_len )
+		get(q);
+	q$vals[q$top] = val;
+	++q$top;
+	}
+
+function get(q: Queue): any
+	{
+	local ret = q$vals[q$bottom];
+	delete q$vals[q$bottom];
+	++q$bottom;
+	return ret;
+	}
+
+function peek(q: Queue): any
+	{
+	return q$vals[q$bottom];
+	}
+
+function merge(q1: Queue, q2: Queue): Queue
+	{
+	local ret = init(q1$settings);
+	local i = q1$bottom;
+	local j = q2$bottom;
+	for ( ignored_val in q1$vals )
+		{
+		if ( i in q1$vals )
+			put(ret, q1$vals[i]);
+		if ( j in q2$vals )
+			put(ret, q2$vals[j]);
+		++i;
+		++j;
+		}
+	return ret;
+	}
+
+function len(q: Queue): count
+	{
+	return |q$vals|;
+	}
+
+function get_vector(q: Queue, ret: vector of any)
+	{
+	local i = q$bottom;
+	local j = 0;
+	# Really dumb hack, this is only to provide
+	# the iteration for the correct number of
+	# values in q$vals.
+	for ( ignored_val in q$vals )
+		{
+		if ( i >= q$top )
+			break;
+
+		ret[j] = q$vals[i];
+		++j; ++i;
+		}
+	}
--- a/scripts/base/utils/time.bro
+++ b/scripts/base/utils/time.bro
@ -0,0 +1,9 @@
+
+## Given an interval, returns a string of the form 3m34s to
+## give a minimalized human readable string for the minutes
+## and seconds represented by the interval.
+function duration_to_mins_secs(dur: interval): string
+	{
+	local dur_count = double_to_count(interval_to_double(dur));
+	return fmt("%dm%ds", dur_count/60, dur_count%60);
+	}