Merge branch 'master' into topic/jsiwek/exec-module

2025-10-13 12:08:20 +00:00 · 2013-07-22 10:23:28 -05:00 · 2013-07-22 10:23:28 -05:00 · ca5abbf13a
commit ca5abbf13a
parent 08348b2bc2 d349520745
876 changed files with 260597 additions and 174691 deletions
--- a/scripts/base/frameworks/analyzer/load.bro
+++ b/scripts/base/frameworks/analyzer/load.bro
@ -0,0 +1 @@
+@load ./main
--- a/scripts/base/frameworks/analyzer/main.bro
+++ b/scripts/base/frameworks/analyzer/main.bro
@ -0,0 +1,217 @@
+##! Framework for managing Bro's protocol analyzers.
+##!
+##! The analyzer framework allows to dynamically enable or disable analyzers, as
+##! well as to manage the well-known ports which automatically activate a
+##! particular analyzer for new connections.
+##!
+##! Protocol analyzers are identified by unique tags of type
+##! :bro:type:`Analyzer::Tag`, such as :bro:enum:`Analyzer::ANALYZER_HTTP` and
+##! :bro:enum:`Analyzer::ANALYZER_HTTP`. These tags are defined internally by
+##! the analyzers themselves, and documented in their analyzer-specific
+##! description along with the events that they generate.
+
+@load base/frameworks/packet-filter/utils
+
+module Analyzer;
+
+export {
+	## If true, all available analyzers are initially disabled at startup. One
+	## can then selectively enable them with
+	## :bro:id:`Analyzer::enable_analyzer`.
+	global disable_all = F &redef;
+
+	## Enables an analyzer. Once enabled, the analyzer may be used for analysis
+	## of future connections as decided by Bro's dynamic protocol detection.
+	##
+	## tag: The tag of the analyzer to enable.
+	##
+	## Returns: True if the analyzer was successfully enabled.
+	global enable_analyzer: function(tag: Analyzer::Tag) : bool;
+
+	## Disables an analyzer. Once disabled, the analyzer will not be used
+	## further for analysis of future connections.
+	##
+	## tag: The tag of the analyzer to disable.
+	##
+	## Returns: True if the analyzer was successfully disabled.
+	global disable_analyzer: function(tag: Analyzer::Tag) : bool;
+
+	## Registers a set of well-known ports for an analyzer. If a future
+	## connection on one of these ports is seen, the analyzer will be
+	## automatically assigned to parsing it. The function *adds* to all ports
+	## already registered, it doesn't replace them.
+	##
+	## tag: The tag of the analyzer.
+	##
+	## ports: The set of well-known ports to associate with the analyzer.
+	##
+	## Returns: True if the ports were sucessfully registered.
+	global register_for_ports: function(tag: Analyzer::Tag, ports: set[port]) : bool;
+
+	## Registers an individual well-known port for an analyzer. If a future
+	## connection on this port is seen, the analyzer will be automatically
+	## assigned to parsing it. The function *adds* to all ports already
+	## registered, it doesn't replace them.
+	##
+	## tag: The tag of the analyzer.
+	##
+	## p: The well-known port to associate with the analyzer.
+	##
+	## Returns: True if the port was sucessfully registered.
+	global register_for_port: function(tag: Analyzer::Tag, p: port) : bool;
+
+	## Returns a set of all well-known ports currently registered for a
+	## specific analyzer.
+	##
+	## tag: The tag of the analyzer.
+	##
+	## Returns: The set of ports.
+	global registered_ports: function(tag: Analyzer::Tag) : set[port];
+
+	## Returns a table of all ports-to-analyzer mappings currently registered.
+	##
+	## Returns: A table mapping each analyzer to the set of ports
+	##          registered for it.
+	global all_registered_ports: function() : table[Analyzer::Tag] of set[port];
+
+	## Translates an analyzer type to a string with the analyzer's name.
+	##
+	## tag: The analyzer tag.
+	##
+	## Returns: The analyzer name corresponding to the tag.
+	global name: function(tag: Analyzer::Tag) : string;
+
+	## Schedules an analyzer for a future connection originating from a given IP
+	## address and port.
+	##
+	## orig: The IP address originating a connection in the future.
+	##       0.0.0.0 can be used as a wildcard to match any originator address.
+	##
+	## resp: The IP address responding to a connection from *orig*.
+	##
+	## resp_p: The destination port at *resp*.
+	##
+	## analyzer: The analyzer ID.
+	##
+	## tout: A timeout interval after which the scheduling request will be
+	##       discarded if the connection has not yet been seen.
+	##
+	## Returns: True if succesful.
+	global schedule_analyzer: function(orig: addr, resp: addr, resp_p: port,
+	                                   analyzer: Analyzer::Tag, tout: interval) : bool;
+
+	## Automatically creates a BPF filter for the specified protocol based
+	## on the data supplied for the protocol through the
+	## :bro:see:`Analyzer::register_for_ports` function.
+	##
+	## tag: The analyzer tag.
+	##
+	## Returns: BPF filter string.
+	global analyzer_to_bpf: function(tag: Analyzer::Tag): string;
+
+	## Create a BPF filter which matches all of the ports defined
+	## by the various protocol analysis scripts as "registered ports"
+	## for the protocol.
+	global get_bpf: function(): string;
+
+	## A set of analyzers to disable by default at startup. The default set
+	## contains legacy analyzers that are no longer supported.
+	global disabled_analyzers: set[Analyzer::Tag] = {
+		ANALYZER_INTERCONN,
+		ANALYZER_STEPPINGSTONE,
+		ANALYZER_BACKDOOR,
+		ANALYZER_TCPSTATS,
+	} &redef;
+}
+
+@load base/bif/analyzer.bif
+
+global ports: table[Analyzer::Tag] of set[port];
+
+event bro_init() &priority=5
+	{
+	if ( disable_all )
+		__disable_all_analyzers();
+
+	for ( a in disabled_analyzers )
+		disable_analyzer(a);
+	}
+
+function enable_analyzer(tag: Analyzer::Tag) : bool
+	{
+	return __enable_analyzer(tag);
+	}
+
+function disable_analyzer(tag: Analyzer::Tag) : bool
+	{
+	return __disable_analyzer(tag);
+	}
+
+function register_for_ports(tag: Analyzer::Tag, ports: set[port]) : bool
+	{
+	local rc = T;
+
+	for ( p in ports )
+		{
+		if ( ! register_for_port(tag, p) )
+			rc = F;
+		}
+
+	return rc;
+	}
+
+function register_for_port(tag: Analyzer::Tag, p: port) : bool
+	{
+	if ( ! __register_for_port(tag, p) )
+		return F;
+
+	if ( tag !in ports )
+		ports[tag] = set();
+
+	add ports[tag][p];
+	return T;
+	}
+
+function registered_ports(tag: Analyzer::Tag) : set[port]
+	{
+	return tag in ports ? ports[tag] : set();
+	}
+
+function all_registered_ports(): table[Analyzer::Tag] of set[port]
+	{
+	return ports;
+	}
+
+function name(atype: Analyzer::Tag) : string
+	{
+	return __name(atype);
+	}
+
+function schedule_analyzer(orig: addr, resp: addr, resp_p: port,
+			   analyzer: Analyzer::Tag, tout: interval) : bool
+	{
+	return __schedule_analyzer(orig, resp, resp_p, analyzer, tout);
+	}
+
+function analyzer_to_bpf(tag: Analyzer::Tag): string
+	{
+	# Return an empty string if an undefined analyzer was given.
+	if ( tag !in ports )
+		return "";
+
+	local output = "";
+	for ( p in ports[tag] )
+		output = PacketFilter::combine_filters(output, "or", PacketFilter::port_to_bpf(p));
+	return output;
+	}
+
+function get_bpf(): string
+	{
+	local output = "";
+	for ( tag in ports )
+		{
+		output = PacketFilter::combine_filters(output, "or", analyzer_to_bpf(tag));
+		}
+	return output;
+	}
+
--- a/scripts/base/frameworks/communication/main.bro
+++ b/scripts/base/frameworks/communication/main.bro
@ -216,12 +216,9 @@ function setup_peer(p: event_peer, node: Node)
 		request_remote_events(p, node$events);
 		}

-	if ( node?$capture_filter )
+	if ( node?$capture_filter && node$capture_filter != "" )
 		{
 		local filter = node$capture_filter;
-		if ( filter == "" )
-			filter = PacketFilter::default_filter;
-
 		do_script_log(p, fmt("sending capture_filter: %s", filter));
 		send_capture_filter(p, filter);
 		}
--- a/scripts/base/frameworks/dpd/dpd.sig
+++ b/scripts/base/frameworks/dpd/dpd.sig
@ -1,212 +0,0 @@
-# Signatures to initiate dynamic protocol detection.
-
-signature dpd_ftp_client {
-  ip-proto == tcp
-  payload /(|.*[\n\r]) *[uU][sS][eE][rR] /
-  tcp-state originator
-}
-
-# Match for server greeting (220, 120) and for login or passwd
-# required (230, 331).
-signature dpd_ftp_server {
-  ip-proto == tcp
-  payload /[\n\r ]*(120|220)[^0-9].*[\n\r] *(230|331)[^0-9]/
-  tcp-state responder
-  requires-reverse-signature dpd_ftp_client
-  enable "ftp"
-}
-
-signature dpd_http_client {
-  ip-proto == tcp
-  payload /^[[:space:]]*(GET|HEAD|POST)[[:space:]]*/
-  tcp-state originator
-}
-
-signature dpd_http_server {
-  ip-proto == tcp
-  payload /^HTTP\/[0-9]/
-  tcp-state responder
-  requires-reverse-signature dpd_http_client
-  enable "http"
-}
-
-signature dpd_bittorrenttracker_client {
-  ip-proto == tcp
-  payload /^.*\/announce\?.*info_hash/
-  tcp-state originator
-}
-
-signature dpd_bittorrenttracker_server {
-  ip-proto == tcp
-  payload /^HTTP\/[0-9]/
-  tcp-state responder
-  requires-reverse-signature dpd_bittorrenttracker_client
-  enable "bittorrenttracker"
-}
-
-signature dpd_bittorrent_peer1 {
-  ip-proto == tcp
-  payload /^\x13BitTorrent protocol/
-  tcp-state originator
-}
-
-signature dpd_bittorrent_peer2 {
-  ip-proto == tcp
-  payload /^\x13BitTorrent protocol/
-  tcp-state responder
-  requires-reverse-signature dpd_bittorrent_peer1
-  enable "bittorrent"
-}
-
-signature irc_client1 {
-  ip-proto == tcp
-  payload /(|.*[\r\n]) *[Uu][Ss][Ee][Rr] +.+[\n\r]+ *[Nn][Ii][Cc][Kk] +.*[\r\n]/
-  requires-reverse-signature irc_server_reply
-  tcp-state originator
-  enable "irc"
-}
-
-signature irc_client2 {
-  ip-proto == tcp
-  payload /(|.*[\r\n]) *[Nn][Ii][Cc][Kk] +.+[\r\n]+ *[Uu][Ss][Ee][Rr] +.+[\r\n]/
-  requires-reverse-signature irc_server_reply
-  tcp-state originator
-  enable "irc"
-}
-
-signature irc_server_reply {
-  ip-proto == tcp
-  payload /^(|.*[\n\r])(:[^ \n\r]+ )?[0-9][0-9][0-9] /
-  tcp-state responder
-}
-
-signature irc_server_to_server1 {
-  ip-proto == tcp
-  payload /(|.*[\r\n]) *[Ss][Ee][Rr][Vv][Ee][Rr] +[^ ]+ +[0-9]+ +:.+[\r\n]/
-}
-
-signature irc_server_to_server2 {
-  ip-proto == tcp
-  payload /(|.*[\r\n]) *[Ss][Ee][Rr][Vv][Ee][Rr] +[^ ]+ +[0-9]+ +:.+[\r\n]/
-  requires-reverse-signature irc_server_to_server1
-  enable "irc"
-}
-
-signature dpd_smtp_client {
-  ip-proto == tcp
-  payload /(|.*[\n\r])[[:space:]]*([hH][eE][lL][oO]|[eE][hH][lL][oO])/
-  requires-reverse-signature dpd_smtp_server
-  enable "smtp"
-  tcp-state originator
-}
-
-signature dpd_smtp_server {
-  ip-proto == tcp
-  payload /^[[:space:]]*220[[:space:]-]/
-  tcp-state responder
-}
-
-signature dpd_ssh_client {
-  ip-proto == tcp
-  payload /^[sS][sS][hH]-/
-  requires-reverse-signature dpd_ssh_server
-  enable "ssh"
-  tcp-state originator
-}
-
-signature dpd_ssh_server {
-  ip-proto == tcp
-  payload /^[sS][sS][hH]-/
-  tcp-state responder
-}
-
-signature dpd_pop3_server {
-  ip-proto == tcp
-  payload /^\+OK/
-  requires-reverse-signature dpd_pop3_client
-  enable "pop3"
-  tcp-state responder
-}
-
-signature dpd_pop3_client {
-  ip-proto == tcp
-  payload /(|.*[\r\n])[[:space:]]*([uU][sS][eE][rR][[:space:]]|[aA][pP][oO][pP][[:space:]]|[cC][aA][pP][aA]|[aA][uU][tT][hH])/
-  tcp-state originator
-}
-
-signature dpd_ssl_server {
-  ip-proto == tcp
-  # Server hello.
-  payload /^(\x16\x03[\x00\x01\x02]..\x02...\x03[\x00\x01\x02]|...?\x04..\x00\x02).*/
-  requires-reverse-signature dpd_ssl_client
-  enable "ssl"
-  tcp-state responder
-}
-
-signature dpd_ssl_client {
-  ip-proto == tcp
-  # Client hello.
-  payload /^(\x16\x03[\x00\x01\x02]..\x01...\x03[\x00\x01\x02]|...?\x01[\x00\x01\x02][\x02\x03]).*/
-  tcp-state originator
-}
-
-signature dpd_ayiya {
-  ip-proto = udp
-  payload /^..\x11\x29/
-  enable "ayiya"
-}
-
-signature dpd_teredo {
-  ip-proto = udp
-  payload /^(\x00\x00)|(\x00\x01)|([\x60-\x6f])/
-  enable "teredo"
-}
-
-signature dpd_socks4_client {
-	ip-proto == tcp
-	# '32' is a rather arbitrary max length for the user name.
-	payload /^\x04[\x01\x02].{0,32}\x00/
-	tcp-state originator
-}
-
-signature dpd_socks4_server {
-	ip-proto == tcp
-	requires-reverse-signature dpd_socks4_client
-	payload /^\x00[\x5a\x5b\x5c\x5d]/
-	tcp-state responder
-	enable "socks"
-}
-
-signature dpd_socks4_reverse_client {
-	ip-proto == tcp
-	# '32' is a rather arbitrary max length for the user name.
-	payload /^\x04[\x01\x02].{0,32}\x00/
-	tcp-state responder
-}
-
-signature dpd_socks4_reverse_server {
-	ip-proto == tcp
-	requires-reverse-signature dpd_socks4_reverse_client
-	payload /^\x00[\x5a\x5b\x5c\x5d]/
-	tcp-state originator
-	enable "socks"
-}
-
-signature dpd_socks5_client {
-	ip-proto == tcp
-	# Watch for a few authentication methods to reduce false positives.
-	payload /^\x05.[\x00\x01\x02]/
-	tcp-state originator
-}
-
-signature dpd_socks5_server {
-	ip-proto == tcp
-	requires-reverse-signature dpd_socks5_client
-	# Watch for a single authentication method to be chosen by the server or
-	# the server to indicate the no authentication is required.
-	payload /^\x05(\x00|\x01[\x00\x01\x02])/
-	tcp-state responder
-	enable "socks"
-}
-
-
--- a/scripts/base/frameworks/dpd/main.bro
+++ b/scripts/base/frameworks/dpd/main.bro
@ -3,8 +3,6 @@

 module DPD;

-@load-sigs ./dpd.sig
-
 export {
 	## Add the DPD logging stream identifier.
 	redef enum Log::ID += { LOG };
@ -23,12 +21,12 @@ export {
 		analyzer:       string          &log;
 		## The textual reason for the analysis failure.
 		failure_reason: string          &log;
-		
-		## Disabled analyzer IDs.  This is only for internal tracking 
+
+		## Disabled analyzer IDs.  This is only for internal tracking
 		## so as to not attempt to disable analyzers multiple times.
 		disabled_aids:  set[count];
 	};
-	
+
 	## Ignore violations which go this many bytes into the connection.
 	## Set to 0 to never ignore protocol violations.
 	const ignore_violations_after = 10 * 1024 &redef;
@ -41,41 +39,30 @@ redef record connection += {
 event bro_init() &priority=5
 	{
 	Log::create_stream(DPD::LOG, [$columns=Info]);
-	
-	# Populate the internal DPD analysis variable.
-	for ( a in dpd_config )
-		{
-		for ( p in dpd_config[a]$ports )
-			{
-			if ( p !in dpd_analyzer_ports )
-				dpd_analyzer_ports[p] = set();
-			add dpd_analyzer_ports[p][a];
-			}
-		}
 	}

-event protocol_confirmation(c: connection, atype: count, aid: count) &priority=10
+event protocol_confirmation(c: connection, atype: Analyzer::Tag, aid: count) &priority=10
 	{
-	local analyzer = analyzer_name(atype);
-	
+	local analyzer = Analyzer::name(atype);
+
 	if ( fmt("-%s",analyzer) in c$service )
 		delete c$service[fmt("-%s", analyzer)];

 	add c$service[analyzer];
 	}

-event protocol_violation(c: connection, atype: count, aid: count,
+event protocol_violation(c: connection, atype: Analyzer::Tag, aid: count,
                         reason: string) &priority=10
 	{
-	local analyzer = analyzer_name(atype);
+	local analyzer = Analyzer::name(atype);
 	# If the service hasn't been confirmed yet, don't generate a log message
 	# for the protocol violation.
 	if ( analyzer !in c$service )
 		return;
-		
+
 	delete c$service[analyzer];
 	add c$service[fmt("-%s", analyzer)];
-	
+
 	local info: Info;
 	info$ts=network_time();
 	info$uid=c$uid;
@ -86,7 +73,7 @@ event protocol_violation(c: connection, atype: count, aid: count,
 	c$dpd = info;
 	}

-event protocol_violation(c: connection, atype: count, aid: count, reason: string) &priority=5
+event protocol_violation(c: connection, atype: Analyzer::Tag, aid: count, reason: string) &priority=5
 	{
 	if ( !c?$dpd || aid in c$dpd$disabled_aids )
 		return;
@ -94,13 +81,13 @@ event protocol_violation(c: connection, atype: count, aid: count, reason: string
 	local size = c$orig$size + c$resp$size;
 	if ( ignore_violations_after > 0 && size > ignore_violations_after )
 		return;
-	
+
 	# Disable the analyzer that raised the last core-generated event.
 	disable_analyzer(c$id, aid);
 	add c$dpd$disabled_aids[aid];
 	}

-event protocol_violation(c: connection, atype: count, aid: count,
+event protocol_violation(c: connection, atype: Analyzer::Tag, aid: count,
 				reason: string) &priority=-5
 	{
 	if ( c?$dpd )
--- a/scripts/base/frameworks/file-analysis/load.bro
+++ b/scripts/base/frameworks/file-analysis/load.bro
@ -0,0 +1 @@
+@load ./main.bro
--- a/scripts/base/frameworks/file-analysis/main.bro
+++ b/scripts/base/frameworks/file-analysis/main.bro
@ -0,0 +1,261 @@
+##! An interface for driving the analysis of files, possibly independent of
+##! any network protocol over which they're transported.
+
+@load base/bif/file_analysis.bif
+@load base/frameworks/logging
+
+module FileAnalysis;
+
+export {
+	redef enum Log::ID += {
+		## Logging stream for file analysis.
+		LOG
+	};
+
+	## A structure which represents a desired type of file analysis.
+	type AnalyzerArgs: record {
+		## The type of analysis.
+		tag: FileAnalysis::Tag;
+
+		## The local filename to which to write an extracted file.  Must be
+		## set when *tag* is :bro:see:`FileAnalysis::ANALYZER_EXTRACT`.
+		extract_filename: string &optional;
+
+		## An event which will be generated for all new file contents,
+		## chunk-wise.  Used when *tag* is
+		## :bro:see:`FileAnalysis::ANALYZER_DATA_EVENT`.
+		chunk_event: event(f: fa_file, data: string, off: count) &optional;
+
+		## An event which will be generated for all new file contents,
+		## stream-wise.  Used when *tag* is
+		## :bro:see:`FileAnalysis::ANALYZER_DATA_EVENT`.
+		stream_event: event(f: fa_file, data: string) &optional;
+	} &redef;
+
+	## Contains all metadata related to the analysis of a given file.
+	## For the most part, fields here are derived from ones of the same name
+	## in :bro:see:`fa_file`.
+	type Info: record {
+		## An identifier associated with a single file.
+		id: string &log;
+
+		## Identifier associated with a container file from which this one was
+		## extracted as part of the file analysis.
+		parent_id: string &log &optional;
+
+		## An identification of the source of the file data.  E.g. it may be
+		## a network protocol over which it was transferred, or a local file
+		## path which was read, or some other input source.
+		source: string &log &optional;
+
+		## If the source of this file is is a network connection, this field
+		## may be set to indicate the directionality.
+		is_orig: bool &log &optional;
+
+		## The time at which the last activity for the file was seen.
+		last_active: time &log;
+
+		## Number of bytes provided to the file analysis engine for the file.
+		seen_bytes: count &log &default=0;
+
+		## Total number of bytes that are supposed to comprise the full file.
+		total_bytes: count &log &optional;
+
+		## The number of bytes in the file stream that were completely missed
+		## during the process of analysis e.g. due to dropped packets.
+		missing_bytes: count &log &default=0;
+
+		## The number of not all-in-sequence bytes in the file stream that
+		## were delivered to file analyzers due to reassembly buffer overflow.
+		overflow_bytes: count &log &default=0;
+
+		## The amount of time between receiving new data for this file that
+		## the analysis engine will wait before giving up on it.
+		timeout_interval: interval &log &optional;
+
+		## The number of bytes at the beginning of a file to save for later
+		## inspection in *bof_buffer* field.
+		bof_buffer_size: count &log &optional;
+
+		## A mime type provided by libmagic against the *bof_buffer*, or
+		## in the cases where no buffering of the beginning of file occurs,
+		## an initial guess of the mime type based on the first data seen.
+		mime_type: string &log &optional;
+
+		## Whether the file analysis timed out at least once for the file.
+		timedout: bool &log &default=F;
+
+		## Connection UIDS over which the file was transferred.
+		conn_uids: set[string] &log;
+
+		## A set of analysis types done during the file analysis.
+		analyzers: set[FileAnalysis::Tag];
+
+		## Local filenames of extracted files.
+		extracted_files: set[string] &log;
+
+		## An MD5 digest of the file contents.
+		md5: string &log &optional;
+
+		## A SHA1 digest of the file contents.
+		sha1: string &log &optional;
+
+		## A SHA256 digest of the file contents.
+		sha256: string &log &optional;
+	} &redef;
+
+	## A table that can be used to disable file analysis completely for
+	## any files transferred over given network protocol analyzers.
+	const disable: table[Analyzer::Tag] of bool = table() &redef;
+
+	## Event that can be handled to access the Info record as it is sent on
+	## to the logging framework.
+	global log_file_analysis: event(rec: Info);
+
+	## The salt concatenated to unique file handle strings generated by
+	## :bro:see:`get_file_handle` before hashing them in to a file id
+	## (the *id* field of :bro:see:`fa_file`).
+	## Provided to help mitigate the possiblility of manipulating parts of
+	## network connections that factor in to the file handle in order to
+	## generate two handles that would hash to the same file id.
+	const salt = "I recommend changing this." &redef;
+
+	## Sets the *timeout_interval* field of :bro:see:`fa_file`, which is
+	## used to determine the length of inactivity that is allowed for a file
+	## before internal state related to it is cleaned up.  When used within a
+	## :bro:see:`file_timeout` handler, the analysis will delay timing out
+	## again for the period specified by *t*.
+	##
+	## f: the file.
+	##
+	## t: the amount of time the file can remain inactive before discarding.
+	##
+	## Returns: true if the timeout interval was set, or false if analysis
+	##          for the *id* isn't currently active.
+	global set_timeout_interval: function(f: fa_file, t: interval): bool;
+
+	## Adds an analyzer to the analysis of a given file.
+	##
+	## f: the file.
+	##
+	## args: the analyzer type to add along with any arguments it takes.
+	##
+	## Returns: true if the analyzer will be added, or false if analysis
+	##          for the *id* isn't currently active or the *args*
+	##          were invalid for the analyzer type.
+	global add_analyzer: function(f: fa_file, args: AnalyzerArgs): bool;
+
+	## Removes an analyzer from the analysis of a given file.
+	##
+	## f: the file.
+	##
+	## args: the analyzer (type and args) to remove.
+	##
+	## Returns: true if the analyzer will be removed, or false if analysis
+	##          for the *id* isn't currently active.
+	global remove_analyzer: function(f: fa_file, args: AnalyzerArgs): bool;
+
+	## Stops/ignores any further analysis of a given file.
+	##
+	## f: the file.
+	##
+	## Returns: true if analysis for the given file will be ignored for the
+	##          rest of it's contents, or false if analysis for the *id*
+	##          isn't currently active.
+	global stop: function(f: fa_file): bool;
+}
+
+redef record fa_file += {
+	info: Info &optional;
+};
+
+function set_info(f: fa_file)
+	{
+	if ( ! f?$info )
+		{
+		local tmp: Info;
+		f$info = tmp;
+		}
+
+	f$info$id = f$id;
+	if ( f?$parent_id ) f$info$parent_id = f$parent_id;
+	if ( f?$source ) f$info$source = f$source;
+	if ( f?$is_orig ) f$info$is_orig = f$is_orig;
+	f$info$last_active = f$last_active;
+	f$info$seen_bytes = f$seen_bytes;
+	if ( f?$total_bytes ) f$info$total_bytes = f$total_bytes;
+	f$info$missing_bytes = f$missing_bytes;
+	f$info$overflow_bytes = f$overflow_bytes;
+	f$info$timeout_interval = f$timeout_interval;
+	f$info$bof_buffer_size = f$bof_buffer_size;
+	if ( f?$mime_type ) f$info$mime_type = f$mime_type;
+	if ( f?$conns )
+		for ( cid in f$conns )
+			add f$info$conn_uids[f$conns[cid]$uid];
+	}
+
+function set_timeout_interval(f: fa_file, t: interval): bool
+	{
+	return __set_timeout_interval(f$id, t);
+	}
+
+function add_analyzer(f: fa_file, args: AnalyzerArgs): bool
+	{
+	if ( ! __add_analyzer(f$id, args) ) return F;
+
+	set_info(f);
+	add f$info$analyzers[args$tag];
+
+	if ( args$tag == FileAnalysis::ANALYZER_EXTRACT )
+		add f$info$extracted_files[args$extract_filename];
+
+	return T;
+	}
+
+function remove_analyzer(f: fa_file, args: AnalyzerArgs): bool
+	{
+	return __remove_analyzer(f$id, args);
+	}
+
+function stop(f: fa_file): bool
+	{
+	return __stop(f$id);
+	}
+
+event bro_init() &priority=5
+	{
+	Log::create_stream(FileAnalysis::LOG,
+	                   [$columns=Info, $ev=log_file_analysis]);
+	}
+
+event file_timeout(f: fa_file) &priority=5
+	{
+	set_info(f);
+	f$info$timedout = T;
+	}
+
+event file_hash(f: fa_file, kind: string, hash: string) &priority=5
+	{
+	set_info(f);
+	switch ( kind ) {
+	case "md5":
+		f$info$md5 = hash;
+		break;
+	case "sha1":
+		f$info$sha1 = hash;
+		break;
+	case "sha256":
+		f$info$sha256 = hash;
+		break;
+	}
+	}
+
+event file_state_remove(f: fa_file) &priority=5
+	{
+	set_info(f);
+	}
+
+event file_state_remove(f: fa_file) &priority=-5
+	{
+	Log::write(FileAnalysis::LOG, f$info);
+	}
--- a/scripts/base/frameworks/input/load.bro
+++ b/scripts/base/frameworks/input/load.bro
@ -2,4 +2,5 @@
@load ./readers/ascii
@load ./readers/raw
@load ./readers/benchmark
-
+@load ./readers/binary
+@load ./readers/sqlite
--- a/scripts/base/frameworks/input/main.bro
+++ b/scripts/base/frameworks/input/main.bro
@ -122,6 +122,34 @@ export {
 		config: table[string] of string &default=table();
 	};

+	## A file analyis input stream type used to forward input data to the
+	## file analysis framework.
+	type AnalysisDescription: record {
+		## String that allows the reader to find the source.
+		## For `READER_ASCII`, this is the filename.
+		source: string;
+
+		## Reader to use for this steam.  Compatible readers must be
+		## able to accept a filter of a single string type (i.e.
+		## they read a byte stream).
+		reader: Reader &default=Input::READER_BINARY;
+
+		## Read mode to use for this stream
+		mode: Mode &default=default_mode;
+
+		## Descriptive name that uniquely identifies the input source.
+		## Can be used used to remove a stream at a later time.
+		## This will also be used for the unique *source* field of
+		## :bro:see:`fa_file`.  Most of the time, the best choice for this
+		## field will be the same value as the *source* field.
+		name: string;
+
+		## A key/value table that will be passed on the reader.
+		## Interpretation of the values is left to the writer, but
+		## usually they will be used for configuration purposes.
+		config: table[string] of string &default=table();
+	};
+
 	## Create a new table input from a given source. Returns true on success.
 	##
 	## description: `TableDescription` record describing the source.
@ -132,6 +160,14 @@ export {
 	## description: `TableDescription` record describing the source.
 	global add_event: function(description: Input::EventDescription) : bool;

+	## Create a new file analysis input from a given source.  Data read from
+	## the source is automatically forwarded to the file analysis framework.
+	##
+	## description: A record describing the source
+	##
+	## Returns: true on sucess.
+	global add_analysis: function(description: Input::AnalysisDescription) : bool;
+
 	## Remove a input stream. Returns true on success and false if the named stream was
 	## not found.
 	##
@ -149,7 +185,7 @@ export {
 	global end_of_data: event(name: string, source:string);
 }

-@load base/input.bif
+@load base/bif/input.bif


 module Input;
@ -164,6 +200,11 @@ function add_event(description: Input::EventDescription) : bool
 	return __create_event_stream(description);
 	}

+function add_analysis(description: Input::AnalysisDescription) : bool
+	{
+	return __create_analysis_stream(description);
+	}
+
 function remove(id: string) : bool
 	{
 	return __remove_stream(id);
--- a/scripts/base/frameworks/input/readers/binary.bro
+++ b/scripts/base/frameworks/input/readers/binary.bro
@ -0,0 +1,8 @@
+##! Interface for the binary input reader.
+
+module InputBinary;
+
+export {
+	## Size of data chunks to read from the input file at a time.
+	const chunk_size = 1024 &redef;
+}
--- a/scripts/base/frameworks/input/readers/raw.bro
+++ b/scripts/base/frameworks/input/readers/raw.bro
@ -7,11 +7,11 @@ export {
 	## Please note that the separator has to be exactly one character long
 	const record_separator = "\n" &redef;

-        ## Event that is called, when a process created by the raw reader exits.
+	## Event that is called when a process created by the raw reader exits.
 	##
 	## name: name of the input stream
 	## source: source of the input stream
 	## exit_code: exit code of the program, or number of the signal that forced the program to exit
 	## signal_exit: false when program exitted normally, true when program was forced to exit by a signal
-        global process_finished: event(name: string, source:string, exit_code:count, signal_exit:bool);
+	global process_finished: event(name: string, source:string, exit_code:count, signal_exit:bool);
 }
--- a/scripts/base/frameworks/input/readers/sqlite.bro
+++ b/scripts/base/frameworks/input/readers/sqlite.bro
@ -0,0 +1,17 @@
+##! Interface for the SQLite input reader.
+##!
+##! The defaults are set to match Bro's ASCII output.
+
+module InputSQLite;
+
+export {
+	## Separator between set elements.
+	## Please note that the separator has to be exactly one character long.
+	const set_separator = Input::set_separator &redef;
+
+	## String to use for an unset &optional field.
+	const unset_field = Input::unset_field &redef;
+
+	## String to use for empty fields.
+	const empty_field = Input::empty_field &redef;
+}
--- a/scripts/base/frameworks/logging/load.bro
+++ b/scripts/base/frameworks/logging/load.bro
@ -2,5 +2,6 @@
@load ./postprocessors
@load ./writers/ascii
@load ./writers/dataseries
+@load ./writers/sqlite
@load ./writers/elasticsearch
@load ./writers/none
--- a/scripts/base/frameworks/logging/main.bro
+++ b/scripts/base/frameworks/logging/main.bro
@ -195,7 +195,7 @@ export {
 	##
 	## Returns: True if a new stream was successfully removed.
 	##
-	## .. bro:see:: Log:create_stream
+	## .. bro:see:: Log::create_stream
 	global remove_stream: function(id: ID) : bool;

 	## Enables a previously disabled logging stream.  Disabled streams
@ -366,7 +366,7 @@ export {
 # We keep a script-level copy of all filters so that we can manipulate them.
 global filters: table[ID, string] of Filter;

-@load base/logging.bif # Needs Filter and Stream defined.
+@load base/bif/logging.bif # Needs Filter and Stream defined.

 module Log;

@ -454,7 +454,6 @@ function create_stream(id: ID, stream: Stream) : bool
 function remove_stream(id: ID) : bool
 	{
 	delete active_streams[id];
-
 	return __remove_stream(id);
 	}

--- a/scripts/base/frameworks/logging/writers/sqlite.bro
+++ b/scripts/base/frameworks/logging/writers/sqlite.bro
@ -0,0 +1,17 @@
+##! Interface for the SQLite log writer.  Redefinable options are available
+##! to tweak the output format of the SQLite reader.
+
+module LogSQLite;
+
+export {
+	## Separator between set elements.
+	const set_separator = Log::set_separator &redef;
+
+	## String to use for an unset &optional field.
+	const unset_field = Log::unset_field &redef;
+
+	## String to use for empty fields. This should be different from
+        ## *unset_field* to make the output non-ambigious.
+	const empty_field = Log::empty_field &redef;
+}
+
--- a/scripts/base/frameworks/metrics/cluster.bro
+++ b/scripts/base/frameworks/metrics/cluster.bro
@ -1,264 +0,0 @@
-##! This implements transparent cluster support for the metrics framework.
-##! Do not load this file directly.  It's only meant to be loaded automatically
-##! and will be depending on if the cluster framework has been enabled.
-##! The goal of this script is to make metric calculation completely and
-##! transparently automated when running on a cluster.
-##! 
-##! Events defined here are not exported deliberately because they are meant
-##! to be an internal implementation detail.
-
-@load base/frameworks/cluster
-@load ./main
-
-module Metrics;
-
-export {
-	## Allows a user to decide how large of result groups the 
-	## workers should transmit values for cluster metric aggregation.
-	const cluster_send_in_groups_of = 50 &redef;
-	
-	## The percent of the full threshold value that needs to be met 
-	## on a single worker for that worker to send the value to its manager in
-	## order for it to request a global view for that value.  There is no
-	## requirement that the manager requests a global view for the index
-	## since it may opt not to if it requested a global view for the index
-	## recently.
-	const cluster_request_global_view_percent = 0.1 &redef;
-	
-	## Event sent by the manager in a cluster to initiate the 
-	## collection of metrics values for a filter.
-	global cluster_filter_request: event(uid: string, id: ID, filter_name: string);
-
-	## Event sent by nodes that are collecting metrics after receiving
-	## a request for the metric filter from the manager.
-	global cluster_filter_response: event(uid: string, id: ID, filter_name: string, data: MetricTable, done: bool);
-
-	## This event is sent by the manager in a cluster to initiate the
-	## collection of a single index value from a filter.  It's typically
-	## used to get intermediate updates before the break interval triggers
-	## to speed detection of a value crossing a threshold.
-	global cluster_index_request: event(uid: string, id: ID, filter_name: string, index: Index);
-
-	## This event is sent by nodes in response to a 
-	## :bro:id:`Metrics::cluster_index_request` event.
-	global cluster_index_response: event(uid: string, id: ID, filter_name: string, index: Index, val: count);
-
-	## This is sent by workers to indicate that they crossed the percent of the 
-	## current threshold by the percentage defined globally in 
-	## :bro:id:`Metrics::cluster_request_global_view_percent`
-	global cluster_index_intermediate_response: event(id: Metrics::ID, filter_name: string, index: Metrics::Index, val: count);
-
-	## This event is scheduled internally on workers to send result chunks.
-	global send_data: event(uid: string, id: ID, filter_name: string, data: MetricTable);
-	
-}
-
-
-# This is maintained by managers so they can know what data they requested and
-# when they requested it.
-global requested_results: table[string] of time = table() &create_expire=5mins;
-
-# TODO: The next 4 variables make the assumption that a value never 
-#       takes longer than 5 minutes to transmit from workers to manager.  This needs to 
-#       be tunable or self-tuning.  These should also be restructured to be
-#       maintained within a single variable.
-
-# This variable is maintained by manager nodes as they collect and aggregate 
-# results.
-global filter_results: table[string, ID, string] of MetricTable &create_expire=5mins;
-
-# This variable is maintained by manager nodes to track how many "dones" they
-# collected per collection unique id.  Once the number of results for a uid 
-# matches the number of peer nodes that results should be coming from, the 
-# result is written out and deleted from here.
-# TODO: add an &expire_func in case not all results are received.
-global done_with: table[string] of count &create_expire=5mins &default=0;
-
-# This variable is maintained by managers to track intermediate responses as 
-# they are getting a global view for a certain index.
-global index_requests: table[string, ID, string, Index] of count &create_expire=5mins &default=0;
-
-# This variable is maintained by all hosts for different purposes. Non-managers
-# maintain it to know what indexes they have recently sent as intermediate
-# updates so they don't overwhelm their manager. Managers maintain it so they
-# don't overwhelm workers with intermediate index requests. The count that is
-# yielded is the number of times the percentage threshold has been crossed and
-# an intermediate result has been received. The manager may optionally request
-# the index again before data expires from here if too many workers are crossing
-# the percentage threshold (not implemented yet!).
-global recent_global_view_indexes: table[ID, string, Index] of count &create_expire=5mins &default=0;
-
-# Add events to the cluster framework to make this work.
-redef Cluster::manager2worker_events += /Metrics::cluster_(filter_request|index_request)/;
-redef Cluster::worker2manager_events += /Metrics::cluster_(filter_response|index_response|index_intermediate_response)/;
-
-@if ( Cluster::local_node_type() != Cluster::MANAGER )
-# This is done on all non-manager node types in the event that a metric is 
-# being collected somewhere other than a worker.
-function data_added(filter: Filter, index: Index, val: count)
-	{
-	# If an intermediate update for this value was sent recently, don't send
-	# it again.
-	if ( [filter$id, filter$name, index] in recent_global_view_indexes )
-		return;
-		
-	# If val is 5 and global view % is 0.1 (10%), pct_val will be 50.  If that
-	# crosses the full threshold then it's a candidate to send as an 
-	# intermediate update.
-	local pct_val = double_to_count(val / cluster_request_global_view_percent);
-	
-	if ( check_notice(filter, index, pct_val) ) 
-		{
-		# kick off intermediate update
-		event Metrics::cluster_index_intermediate_response(filter$id, filter$name, index, val);
-		
-		++recent_global_view_indexes[filter$id, filter$name, index];
-		}
-	}
-
-event Metrics::send_data(uid: string, id: ID, filter_name: string, data: MetricTable)
-	{
-	#print fmt("WORKER %s: sending data for uid %s...", Cluster::node, uid);
-	
-	local local_data: MetricTable;
-	local num_added = 0;
-	for ( index in data )
-		{
-		local_data[index] = data[index];
-		delete data[index];
-		
-		# Only send cluster_send_in_groups_of at a time.  Queue another
-		# event to send the next group.
-		if ( cluster_send_in_groups_of == ++num_added )
-			break;
-		}
-	
-	local done = F;
-	# If data is empty, this metric is done.
-	if ( |data| == 0 )
-		done = T;
-	
-	event Metrics::cluster_filter_response(uid, id, filter_name, local_data, done);
-	if ( ! done )
-		event Metrics::send_data(uid, id, filter_name, data);
-	}
-
-event Metrics::cluster_filter_request(uid: string, id: ID, filter_name: string)
-	{
-	#print fmt("WORKER %s: received the cluster_filter_request event.", Cluster::node);
-	
-	# Initiate sending all of the data for the requested filter.
-	event Metrics::send_data(uid, id, filter_name, store[id, filter_name]);
-	
-	# Lookup the actual filter and reset it, the reference to the data
-	# currently stored will be maintained interally by the send_data event.
-	reset(filter_store[id, filter_name]);
-	}
-	
-event Metrics::cluster_index_request(uid: string, id: ID, filter_name: string, index: Index)
-	{
-	local val=0;
-	if ( index in store[id, filter_name] )
-		val = store[id, filter_name][index];
-	
-	# fmt("WORKER %s: received the cluster_index_request event for %s=%d.", Cluster::node, index2str(index), val);
-	event Metrics::cluster_index_response(uid, id, filter_name, index, val);
-	}
-
-@endif
-
-
-@if ( Cluster::local_node_type() == Cluster::MANAGER )
-
-# Manager's handle logging.
-event Metrics::log_it(filter: Filter)
-	{
-	#print fmt("%.6f MANAGER: breaking %s filter for %s metric", network_time(), filter$name, filter$id);
-	
-	local uid = unique_id("");
-	
-	# Set some tracking variables.
-	requested_results[uid] = network_time();
-	filter_results[uid, filter$id, filter$name] = table();
-	
-	# Request data from peers.
-	event Metrics::cluster_filter_request(uid, filter$id, filter$name);
-	# Schedule the log_it event for the next break period.
-	schedule filter$break_interval { Metrics::log_it(filter) };
-	}
-
-# This is unlikely to be called often, but it's here in case there are metrics
-# being collected by managers.
-function data_added(filter: Filter, index: Index, val: count)
-	{
-	if ( check_notice(filter, index, val) )
-		do_notice(filter, index, val);
-	}
-	
-event Metrics::cluster_index_response(uid: string, id: ID, filter_name: string, index: Index, val: count)
-	{
-	#print fmt("%0.6f MANAGER: receiving index data from %s", network_time(), get_event_peer()$descr);
-
-	if ( [uid, id, filter_name, index] !in index_requests )
-		index_requests[uid, id, filter_name, index] = 0;
-	
-	index_requests[uid, id, filter_name, index] += val;
-	local ir = index_requests[uid, id, filter_name, index];
-	
-	++done_with[uid];
-	if ( Cluster::worker_count == done_with[uid] )
-		{
-		if ( check_notice(filter_store[id, filter_name], index, ir) )
-			do_notice(filter_store[id, filter_name], index, ir);
-		delete done_with[uid];
-		delete index_requests[uid, id, filter_name, index];
-		}
-	}
-
-# Managers handle intermediate updates here.
-event Metrics::cluster_index_intermediate_response(id: ID, filter_name: string, index: Index, val: count)
-	{
-	#print fmt("MANAGER: receiving intermediate index data from %s", get_event_peer()$descr);
-	#print fmt("MANAGER: requesting index data for %s", index2str(index));
-	
-	local uid = unique_id("");
-	event Metrics::cluster_index_request(uid, id, filter_name, index);
-	++recent_global_view_indexes[id, filter_name, index];
-	}
-
-event Metrics::cluster_filter_response(uid: string, id: ID, filter_name: string, data: MetricTable, done: bool)
-	{
-	#print fmt("MANAGER: receiving results from %s", get_event_peer()$descr);
-	
-	local local_data = filter_results[uid, id, filter_name];
-	for ( index in data )
-		{
-		if ( index !in local_data )
-			local_data[index] = 0;
-		local_data[index] += data[index];
-		}
-	
-	# Mark another worker as being "done" for this uid.
-	if ( done )
-		++done_with[uid];
-	
-	# If the data has been collected from all peers, we are done and ready to log.
-	if ( Cluster::worker_count == done_with[uid] )
-		{
-		local ts = network_time();
-		# Log the time this was initially requested if it's available.
-		if ( uid in requested_results )
-			{
-			ts = requested_results[uid];
-			delete requested_results[uid];
-			}
-		
-		write_log(ts, filter_store[id, filter_name], local_data);
-		
-		# Clean up
-		delete filter_results[uid, id, filter_name];
-		delete done_with[uid];
-		}
-	}
-
-@endif
--- a/scripts/base/frameworks/metrics/main.bro
+++ b/scripts/base/frameworks/metrics/main.bro
@ -1,320 +0,0 @@
-##! The metrics framework provides a way to count and measure data.  
-
-@load base/frameworks/notice
-
-module Metrics;
-
-export {
-	## The metrics logging stream identifier.
-	redef enum Log::ID += { LOG };
-	
-	## Identifiers for metrics to collect.
-	type ID: enum {
-		## Blank placeholder value.
-		NOTHING,
-	};
-	
-	## The default interval used for "breaking" metrics and writing the 
-	## current value to the logging stream.
-	const default_break_interval = 15mins &redef;
-	
-	## This is the interval for how often threshold based notices will happen 
-	## after they have already fired.
-	const renotice_interval = 1hr &redef;
-	
-	## Represents a thing which is having metrics collected for it.  An instance
-	## of this record type and a :bro:type:`Metrics::ID` together represent a 
-	## single measurement.
-	type Index: record {
-		## Host is the value to which this metric applies.
-		host:         addr &optional;
-		
-		## A non-address related metric or a sub-key for an address based metric.
-		## An example might be successful SSH connections by client IP address
-		## where the client string would be the index value.
-		## Another example might be number of HTTP requests to a particular
-		## value in a Host header.  This is an example of a non-host based
-		## metric since multiple IP addresses could respond for the same Host
-		## header value.
-		str:        string &optional;
-		
-		## The CIDR block that this metric applies to.  This is typically
-		## only used internally for host based aggregation.
-		network:      subnet &optional;
-	} &log;
-	
-	## The record type that is used for logging metrics.
-	type Info: record {
-		## Timestamp at which the metric was "broken".
-		ts:           time   &log;
-		## What measurement the metric represents.
-		metric_id:    ID     &log;
-		## The name of the filter being logged.  :bro:type:`Metrics::ID` values
-		## can have multiple filters which represent different perspectives on
-		## the data so this is necessary to understand the value.
-		filter_name:  string &log;
-		## What the metric value applies to.
-		index:        Index  &log;
-		## The simple numeric value of the metric.
-		value:        count  &log;
-	};
-	
-    # TODO: configure a metrics filter logging stream to log the current
-	#       metrics configuration in case someone is looking through
-	#       old logs and the configuration has changed since then.
-	
-	## Filters define how the data from a metric is aggregated and handled.  
-	## Filters can be used to set how often the measurements are cut or "broken"
-	## and logged or how the data within them is aggregated.  It's also 
-	## possible to disable logging and use filters for thresholding.
-	type Filter: record {
-		## The :bro:type:`Metrics::ID` that this filter applies to.
-		id:                ID                      &optional;
-		## The name for this filter so that multiple filters can be
-		## applied to a single metrics to get a different view of the same
-		## metric data being collected (different aggregation, break, etc).
-		name:              string                  &default="default";
-		## A predicate so that you can decide per index if you would like
-		## to accept the data being inserted.
-		pred:              function(index: Index): bool &optional;
-		## Global mask by which you'd like to aggregate traffic.
-		aggregation_mask:  count                   &optional;
-		## This is essentially a mapping table between addresses and subnets.
-		aggregation_table: table[subnet] of subnet &optional;
-		## The interval at which this filter should be "broken" and written
-		## to the logging stream.  The counters are also reset to zero at 
-		## this time so any threshold based detection needs to be set to a 
-		## number that should be expected to happen within this period.
-		break_interval:    interval                &default=default_break_interval;
-		## This determines if the result of this filter is sent to the metrics
-		## logging stream.  One use for the logging framework is as an internal
-		## thresholding and statistics gathering utility that is meant to
-		## never log but rather to generate notices and derive data.
-		log:               bool                    &default=T;
-		## If this and a $notice_threshold value are set, this notice type
-		## will be generated by the metrics framework.
-		note:              Notice::Type            &optional;
-		## A straight threshold for generating a notice.
-		notice_threshold:  count                   &optional;
-		## A series of thresholds at which to generate notices.
-		notice_thresholds: vector of count         &optional;
-		## How often this notice should be raised for this filter.  It 
-		## will be generated everytime it crosses a threshold, but if the 
-		## $break_interval is set to 5mins and this is set to 1hr the notice
-		## only be generated once per hour even if something crosses the
-		## threshold in every break interval.
-		notice_freq:       interval                &optional;
-	};
-	
-	## Function to associate a metric filter with a metric ID.
-	## 
-	## id: The metric ID that the filter should be associated with.
-	##
-	## filter: The record representing the filter configuration.
-	global add_filter: function(id: ID, filter: Filter);
-	
-	## Add data into a :bro:type:`Metrics::ID`.  This should be called when
-	## a script has measured some point value and is ready to increment the
-	## counters.
-	##
-	## id: The metric ID that the data represents.
-	##
-	## index: The metric index that the value is to be added to.
-	##
-	## increment: How much to increment the counter by.
-	global add_data: function(id: ID, index: Index, increment: count);
-	
-	## Helper function to represent a :bro:type:`Metrics::Index` value as 
-	## a simple string
-	## 
-	## index: The metric index that is to be converted into a string.
-	##
-	## Returns: A string reprentation of the metric index.
-	global index2str: function(index: Index): string;
-	
-	## Event that is used to "finish" metrics and adapt the metrics
-	## framework for clustered or non-clustered usage.
-	##
-	## ..note: This is primarily intended for internal use.
-	global log_it: event(filter: Filter);
-	
-	## Event to access metrics records as they are passed to the logging framework.
-	global log_metrics: event(rec: Info);
-	
-	## Type to store a table of metrics values.  Interal use only!
-	type MetricTable: table[Index] of count &default=0;
-}
-
-redef record Notice::Info += {
-	metric_index: Index &log &optional;
-};
-
-global metric_filters: table[ID] of vector of Filter = table();
-global filter_store: table[ID, string] of Filter = table();
-
-# This is indexed by metric ID and stream filter name.
-global store: table[ID, string] of MetricTable = table() &default=table();
-
-# This function checks if a threshold has been crossed and generates a 
-# notice if it has.  It is also used as a method to implement 
-# mid-break-interval threshold crossing detection for cluster deployments.
-global check_notice: function(filter: Filter, index: Index, val: count): bool;
-
-# This is hook for watching thresholds being crossed.  It is called whenever
-# index values are updated and the new val is given as the `val` argument.
-global data_added: function(filter: Filter, index: Index, val: count);
-
-# This stores the current threshold index for filters using the
-# $notice_threshold and $notice_thresholds elements.
-global thresholds: table[ID, string, Index] of count = {} &create_expire=renotice_interval &default=0;
-
-event bro_init() &priority=5
-	{
-	Log::create_stream(Metrics::LOG, [$columns=Info, $ev=log_metrics]);
-	}
-
-function index2str(index: Index): string
-	{
-	local out = "";
-	if ( index?$host )
-		out = fmt("%shost=%s", out, index$host);
-	if ( index?$network )
-		out = fmt("%s%snetwork=%s", out, |out|==0 ? "" : ", ", index$network);
-	if ( index?$str )
-		out = fmt("%s%sstr=%s", out, |out|==0 ? "" : ", ", index$str);
-	return fmt("metric_index(%s)", out);
-	}
-	
-function write_log(ts: time, filter: Filter, data: MetricTable)
-	{
-	for ( index in data )
-		{
-		local val = data[index];
-		local m: Info = [$ts=ts,
-		                 $metric_id=filter$id,
-		                 $filter_name=filter$name,
-		                 $index=index,
-		                 $value=val];
-		
-		if ( filter$log )
-			Log::write(Metrics::LOG, m);
-		}
-	}
-
-
-function reset(filter: Filter)
-	{
-	store[filter$id, filter$name] = table();
-	}
-
-function add_filter(id: ID, filter: Filter)
-	{
-	if ( filter?$aggregation_table && filter?$aggregation_mask )
-		{
-		print "INVALID Metric filter: Defined $aggregation_table and $aggregation_mask.";
-		return;
-		}
-	if ( [id, filter$name] in store )
-		{
-		print fmt("INVALID Metric filter: Filter with name \"%s\" already exists.", filter$name);
-		return;
-		}
-	if ( filter?$notice_threshold && filter?$notice_thresholds )
-		{
-		print "INVALID Metric filter: Defined both $notice_threshold and $notice_thresholds";
-		return;
-		}
-	
-	if ( ! filter?$id )
-		filter$id = id;
-	
-	if ( id !in metric_filters )
-		metric_filters[id] = vector();
-	metric_filters[id][|metric_filters[id]|] = filter;
-
-	filter_store[id, filter$name] = filter;
-	store[id, filter$name] = table();
-	
-	schedule filter$break_interval { Metrics::log_it(filter) };
-	}
-	
-function add_data(id: ID, index: Index, increment: count)
-	{
-	if ( id !in metric_filters )
-		return;
-	
-	local filters = metric_filters[id];
-	
-	# Try to add the data to all of the defined filters for the metric.
-	for ( filter_id in filters )
-		{
-		local filter = filters[filter_id];
-		
-		# If this filter has a predicate, run the predicate and skip this
-		# index if the predicate return false.
-		if ( filter?$pred && ! filter$pred(index) )
-			next;
-		
-		if ( index?$host )
-			{
-			if ( filter?$aggregation_mask )
-				{
-				index$network = mask_addr(index$host, filter$aggregation_mask);
-				delete index$host;
-				}
-			else if ( filter?$aggregation_table )
-				{
-				# Don't add the data if the aggregation table doesn't include 
-				# the given host address.
-				if ( index$host !in filter$aggregation_table )
-					return;
-				index$network = filter$aggregation_table[index$host];
-				delete index$host;
-				}
-			}
-		
-		local metric_tbl = store[id, filter$name];
-		if ( index !in metric_tbl )
-			metric_tbl[index] = 0;
-		metric_tbl[index] += increment;
-		
-		data_added(filter, index, metric_tbl[index]);
-		}
-	}
-
-function check_notice(filter: Filter, index: Index, val: count): bool
-	{
-	if ( (filter?$notice_threshold &&
-	      [filter$id, filter$name, index] !in thresholds &&
-	      val >= filter$notice_threshold) ||
-	     (filter?$notice_thresholds &&
-	      |filter$notice_thresholds| <= thresholds[filter$id, filter$name, index] &&
-	      val >= filter$notice_thresholds[thresholds[filter$id, filter$name, index]]) )
-		return T;
-	else
-		return F;
-	}
-		
-function do_notice(filter: Filter, index: Index, val: count)
-	{
-	# We include $peer_descr here because the a manager count have actually 
-	# generated the notice even though the current remote peer for the event 
-	# calling this could be a worker if this is running as a cluster.
-	local n: Notice::Info = [$note=filter$note, 
-	                         $n=val, 
-	                         $metric_index=index, 
-	                         $peer_descr=peer_description];
-	n$msg = fmt("Threshold crossed by %s %d/%d", index2str(index), val, filter$notice_threshold);
-	if ( index?$str )
-		n$sub = index$str;
-	if ( index?$host )
-		n$src = index$host;
-	# TODO: not sure where to put the network yet.
-	
-	NOTICE(n);
-	
-	# This just needs set to some value so that it doesn't refire the 
-	# notice until it expires from the table or it crosses the next 
-	# threshold in the case of vectors of thresholds.
-	++thresholds[filter$id, filter$name, index];
-	}
--- a/scripts/base/frameworks/metrics/non-cluster.bro
+++ b/scripts/base/frameworks/metrics/non-cluster.bro
@ -1,21 +0,0 @@
-@load ./main
-
-module Metrics;
-
-event Metrics::log_it(filter: Filter)
-	{
-	local id = filter$id;
-	local name = filter$name;
-	
-	write_log(network_time(), filter, store[id, name]);
-	reset(filter);
-	
-	schedule filter$break_interval { Metrics::log_it(filter) };
-	}
-	
-	
-function data_added(filter: Filter, index: Index, val: count)
-	{
-	if ( check_notice(filter, index, val) )
-		do_notice(filter, index, val);
-	}
--- a/scripts/base/frameworks/notice/main.bro
+++ b/scripts/base/frameworks/notice/main.bro
@ -431,9 +431,6 @@ hook Notice::notice(n: Notice::Info) &priority=-5
 		}
 	}

-## This determines if a notice is being suppressed.  It is only used
-## internally as part of the mechanics for the global :bro:id:`NOTICE`
-## function.
 function is_being_suppressed(n: Notice::Info): bool
 	{
 	if ( n?$identifier && [n$note, n$identifier] in suppressing )
--- a/scripts/base/frameworks/packet-filter/load.bro
+++ b/scripts/base/frameworks/packet-filter/load.bro
@ -1,2 +1,3 @@
+@load ./utils
@load ./main
@load ./netstats
--- a/scripts/base/frameworks/packet-filter/main.bro
+++ b/scripts/base/frameworks/packet-filter/main.bro
@ -1,10 +1,12 @@
 ##! This script supports how Bro sets it's BPF capture filter.  By default
-##! Bro sets an unrestricted filter that allows all traffic.  If a filter
+##! Bro sets a capture filter that allows all traffic.  If a filter
 ##! is set on the command line, that filter takes precedence over the default
 ##! open filter and all filters defined in Bro scripts with the
 ##! :bro:id:`capture_filters` and :bro:id:`restrict_filters` variables.

@load base/frameworks/notice
+@load base/frameworks/analyzer
+@load ./utils

 module PacketFilter;

@ -14,11 +16,14 @@ export {

 	## Add notice types related to packet filter errors.
 	redef enum Notice::Type += {
-		## This notice is generated if a packet filter is unable to be compiled.
+		## This notice is generated if a packet filter cannot be compiled.
 		Compile_Failure,

-		## This notice is generated if a packet filter is fails to install.
+		## Generated if a packet filter is fails to install.
 		Install_Failure,
+
+		## Generated when a notice takes too long to compile.
+		Too_Long_To_Compile_Filter
 	};

 	## The record type defining columns to be logged in the packet filter
@ -42,83 +47,248 @@ export {
 		success: bool  &log &default=T;
 	};

-	## By default, Bro will examine all packets. If this is set to false,
-	## it will dynamically build a BPF filter that only select protocols
-	## for which the user has loaded a corresponding analysis script.
-	## The latter used to be default for Bro versions < 2.0. That has now
-	## changed however to enable port-independent protocol analysis.
-	const all_packets = T &redef;
+	## The BPF filter that is used by default to define what traffic should
+	## be captured.  Filters defined in :bro:id:`restrict_filters` will still
+	## be applied to reduce the captured traffic.
+	const default_capture_filter = "ip or not ip" &redef;

 	## Filter string which is unconditionally or'ed to the beginning of every
 	## dynamically built filter.
 	const unrestricted_filter = "" &redef;

+	## Filter string which is unconditionally and'ed to the beginning of every
+	## dynamically built filter.  This is mostly used when a custom filter is being
+	## used but MPLS or VLAN tags are on the traffic.
+	const restricted_filter = "" &redef;
+
+	## The maximum amount of time that you'd like to allow for BPF filters to compile.
+	## If this time is exceeded, compensation measures may be taken by the framework
+	## to reduce the filter size.  This threshold being crossed also results in
+	## the :bro:see:`PacketFilter::Too_Long_To_Compile_Filter` notice.
+	const max_filter_compile_time = 100msec &redef;
+
+	## Install a BPF filter to exclude some traffic.  The filter should positively
+	## match what is to be excluded, it will be wrapped in a "not".
+	##
+	## filter_id: An arbitrary string that can be used to identify
+	##            the filter.
+	##
+	## filter: A BPF expression of traffic that should be excluded.
+	##
+	## Returns: A boolean value to indicate if the filter was successfully
+	##          installed or not.
+	global exclude: function(filter_id: string, filter: string): bool;
+
+	## Install a temporary filter to traffic which should not be passed through
+	## the BPF filter.  The filter should match the traffic you don't want
+	## to see (it will be wrapped in a "not" condition).
+	##
+	## filter_id: An arbitrary string that can be used to identify
+	##            the filter.
+	##
+	## filter: A BPF expression of traffic that should be excluded.
+	##
+	## length: The duration for which this filter should be put in place.
+	##
+	## Returns: A boolean value to indicate if the filter was successfully
+	##          installed or not.
+	global exclude_for: function(filter_id: string, filter: string, span: interval): bool;
+
 	## Call this function to build and install a new dynamically built
 	## packet filter.
-	global install: function();
+	global install: function(): bool;
+
+	## A data structure to represent filter generating plugins.
+	type FilterPlugin: record {
+		## A function that is directly called when generating the complete filter.
+		func : function();
+	};
+
+	## API function to register a new plugin for dynamic restriction filters.
+	global register_filter_plugin: function(fp: FilterPlugin);
+
+	## Enables the old filtering approach of "only watch common ports for
+	## analyzed protocols".
+        ##
+	## Unless you know what you are doing, leave this set to F.
+	const enable_auto_protocol_capture_filters = F &redef;

 	## This is where the default packet filter is stored and it should not
 	## normally be modified by users.
-	global default_filter = "<not set yet>";
+	global current_filter = "<not set yet>";
 }

+global dynamic_restrict_filters: table[string] of string = {};
+
+# Track if a filter is currently building so functions that would ultimately
+# install a filter immediately can still be used but they won't try to build or
+# install the filter.
+global currently_building = F;
+
+# Internal tracking for if the the filter being built has possibly been changed.
+global filter_changed = F;
+
+global filter_plugins: set[FilterPlugin] = {};
+
 redef enum PcapFilterID += {
 	DefaultPcapFilter,
+	FilterTester,
 };

-function combine_filters(lfilter: string, rfilter: string, op: string): string
+function test_filter(filter: string): bool
 	{
-	if ( lfilter == "" && rfilter == "" )
-		return "";
-	else if ( lfilter == "" )
-		return rfilter;
-	else if ( rfilter == "" )
-		return lfilter;
-	else
-		return fmt("(%s) %s (%s)", lfilter, op, rfilter);
+	if ( ! precompile_pcap_filter(FilterTester, filter) )
+		{
+		# The given filter was invalid
+		# TODO: generate a notice.
+		return F;
+		}
+	return T;
 	}

-function build_default_filter(): string
+# This tracks any changes for filtering mechanisms that play along nice
+# and set filter_changed to T.
+event filter_change_tracking()
+	{
+	if ( filter_changed )
+		install();
+
+	schedule 5min { filter_change_tracking() };
+	}
+
+event bro_init() &priority=5
+	{
+	Log::create_stream(PacketFilter::LOG, [$columns=Info]);
+
+	# Preverify the capture and restrict filters to give more granular failure messages.
+	for ( id in capture_filters )
+		{
+		if ( ! test_filter(capture_filters[id]) )
+			Reporter::fatal(fmt("Invalid capture_filter named '%s' - '%s'", id, capture_filters[id]));
+		}
+
+	for ( id in restrict_filters )
+		{
+		if ( ! test_filter(restrict_filters[id]) )
+			Reporter::fatal(fmt("Invalid restrict filter named '%s' - '%s'", id, restrict_filters[id]));
+		}
+	}
+
+event bro_init() &priority=-5
+	{
+	install();
+
+	event filter_change_tracking();
+	}
+
+function register_filter_plugin(fp: FilterPlugin)
+	{
+	add filter_plugins[fp];
+	}
+
+event remove_dynamic_filter(filter_id: string)
+	{
+	if ( filter_id in dynamic_restrict_filters )
+		{
+		delete dynamic_restrict_filters[filter_id];
+		install();
+		}
+	}
+
+function exclude(filter_id: string, filter: string): bool
+	{
+	if ( ! test_filter(filter) )
+		return F;
+
+	dynamic_restrict_filters[filter_id] = filter;
+	install();
+	return T;
+	}
+
+function exclude_for(filter_id: string, filter: string, span: interval): bool
+	{
+	if ( exclude(filter_id, filter) )
+		{
+		schedule span { remove_dynamic_filter(filter_id) };
+		return T;
+		}
+	return F;
+	}
+
+function build(): string
 	{
 	if ( cmd_line_bpf_filter != "" )
 		# Return what the user specified on the command line;
 		return cmd_line_bpf_filter;

-	if ( all_packets )
-		# Return an "always true" filter.
-		return "ip or not ip";
+	currently_building = T;

-	# Build filter dynamically.
+	# Generate all of the plugin based filters.
+	for ( plugin in filter_plugins )
+		{
+		plugin$func();
+		}

-	# First the capture_filter.
 	local cfilter = "";
-	for ( id in capture_filters )
-		cfilter = combine_filters(cfilter, capture_filters[id], "or");
+	if ( |capture_filters| == 0 && ! enable_auto_protocol_capture_filters )
+		cfilter = default_capture_filter;

-	# Then the restrict_filter.
+	for ( id in capture_filters )
+		cfilter = combine_filters(cfilter, "or", capture_filters[id]);
+
+	if ( enable_auto_protocol_capture_filters )
+		cfilter = combine_filters(cfilter, "or", Analyzer::get_bpf());
+
+	# Apply the restriction filters.
 	local rfilter = "";
 	for ( id in restrict_filters )
-		rfilter = combine_filters(rfilter, restrict_filters[id], "and");
+		rfilter = combine_filters(rfilter, "and", restrict_filters[id]);
+
+	# Apply the dynamic restriction filters.
+	for ( filt in dynamic_restrict_filters )
+		rfilter = combine_filters(rfilter, "and", string_cat("not (", dynamic_restrict_filters[filt], ")"));

 	# Finally, join them into one filter.
-	local filter = combine_filters(rfilter, cfilter, "and");
-	if ( unrestricted_filter != "" )
-		filter = combine_filters(unrestricted_filter, filter, "or");
+	local filter = combine_filters(cfilter, "and", rfilter);

+	if ( unrestricted_filter != "" )
+		filter = combine_filters(unrestricted_filter, "or", filter);
+	if ( restricted_filter != "" )
+		filter = combine_filters(restricted_filter, "and", filter);
+
+	currently_building = F;
 	return filter;
 	}

-function install()
+function install(): bool
 	{
-	default_filter = build_default_filter();
+	if ( currently_building )
+		return F;

-	if ( ! precompile_pcap_filter(DefaultPcapFilter, default_filter) )
+	local tmp_filter = build();
+
+	# No need to proceed if the filter hasn't changed.
+	if ( tmp_filter == current_filter )
+		return F;
+
+	local ts = current_time();
+	if ( ! precompile_pcap_filter(DefaultPcapFilter, tmp_filter) )
 		{
 		NOTICE([$note=Compile_Failure,
 		        $msg=fmt("Compiling packet filter failed"),
-		        $sub=default_filter]);
-		Reporter::fatal(fmt("Bad pcap filter '%s'", default_filter));
+		        $sub=tmp_filter]);
+		if ( network_time() == 0.0 )
+			Reporter::fatal(fmt("Bad pcap filter '%s'", tmp_filter));
+		else
+			Reporter::warning(fmt("Bad pcap filter '%s'", tmp_filter));
 		}
+	local diff = current_time()-ts;
+	if ( diff > max_filter_compile_time )
+		NOTICE([$note=Too_Long_To_Compile_Filter,
+		        $msg=fmt("A BPF filter is taking longer than %0.1f seconds to compile", diff)]);
+
+	# Set it to the current filter if it passed precompiling
+	current_filter = tmp_filter;

 	# Do an audit log for the packet filter.
 	local info: Info;
@ -129,7 +299,7 @@ function install()
 		info$ts = current_time();
 		info$init = T;
 		}
-	info$filter = default_filter;
+	info$filter = current_filter;

 	if ( ! install_pcap_filter(DefaultPcapFilter) )
 		{
@ -137,15 +307,13 @@ function install()
 		info$success = F;
 		NOTICE([$note=Install_Failure,
 		        $msg=fmt("Installing packet filter failed"),
-		        $sub=default_filter]);
+		        $sub=current_filter]);
 		}

 	if ( reading_live_traffic() || reading_traces() )
 		Log::write(PacketFilter::LOG, info);
-	}

-event bro_init() &priority=10
-	{
-	Log::create_stream(PacketFilter::LOG, [$columns=Info]);
-	PacketFilter::install();
+	# Update the filter change tracking
+	filter_changed = F;
+	return T;
 	}
--- a/scripts/base/frameworks/packet-filter/netstats.bro
+++ b/scripts/base/frameworks/packet-filter/netstats.bro
@ -13,7 +13,7 @@ export {
 	};
 	
 	## This is the interval between individual statistics collection.
-	const stats_collection_interval = 10secs;
+	const stats_collection_interval = 5min;
 }

 event net_stats_update(last_stat: NetStats)
--- a/scripts/base/frameworks/packet-filter/utils.bro
+++ b/scripts/base/frameworks/packet-filter/utils.bro
@ -0,0 +1,58 @@
+module PacketFilter;
+
+export {
+	## Takes a :bro:type:`port` and returns a BPF expression which will
+	## match the port.
+	##
+	## p: The port.
+	##
+	## Returns: A valid BPF filter string for matching the port.
+	global port_to_bpf: function(p: port): string;
+
+	## Create a BPF filter to sample IPv4 and IPv6 traffic.
+	##
+	## num_parts: The number of parts the traffic should be split into.
+	##
+	## this_part: The part of the traffic this filter will accept.  0-based.
+	global sampling_filter: function(num_parts: count, this_part: count): string;
+
+	## Combines two valid BPF filter strings with a string based operator
+	## to form a new filter.
+	##
+	## lfilter: Filter which will go on the left side.
+	##
+	## op: Operation being applied (typically "or" or "and").
+	##
+	## rfilter: Filter which will go on the right side.
+	##
+	## Returns: A new string representing the two filters combined with
+	##          the operator.  Either filter being an empty string will
+	##          still result in a valid filter.
+	global combine_filters: function(lfilter: string, op: string, rfilter: string): string;
+}
+
+function port_to_bpf(p: port): string
+	{
+	local tp = get_port_transport_proto(p);
+	return cat(tp, " and ", fmt("port %d", p));
+	}
+
+function combine_filters(lfilter: string,  op: string, rfilter: string): string
+	{
+	if ( lfilter == "" && rfilter == "" )
+		return "";
+	else if ( lfilter == "" )
+		return rfilter;
+	else if ( rfilter == "" )
+		return lfilter;
+	else
+		return fmt("(%s) %s (%s)", lfilter, op, rfilter);
+	}
+
+function sampling_filter(num_parts: count, this_part: count): string
+	{
+	local v4_filter = fmt("ip and ((ip[14:2]+ip[18:2]) - (%d*((ip[14:2]+ip[18:2])/%d)) == %d)", num_parts, num_parts, this_part);
+	# TODO: this is probably a fairly suboptimal filter, but it should work for now.
+	local v6_filter = fmt("ip6 and ((ip6[22:2]+ip6[38:2]) - (%d*((ip6[22:2]+ip6[38:2])/%d)) == %d)", num_parts, num_parts, this_part);
+	return combine_filters(v4_filter, "or", v6_filter);
+	}
--- a/scripts/base/frameworks/reporter/main.bro
+++ b/scripts/base/frameworks/reporter/main.bro
@ -9,7 +9,7 @@
 ##! Note that this framework deals with the handling of internally generated
 ##! reporter messages, for the interface in to actually creating interface
 ##! into actually creating reporter messages from the scripting layer, use
-##! the built-in functions in :doc:`/scripts/base/reporter.bif`.
+##! the built-in functions in :doc:`/scripts/base/bif/reporter.bif`.

 module Reporter;

--- a/scripts/base/frameworks/sumstats/load.bro
+++ b/scripts/base/frameworks/sumstats/load.bro
@ -1,4 +1,5 @@
@load ./main
+@load ./plugins

 # The cluster framework must be loaded first.
@load base/frameworks/cluster
--- a/scripts/base/frameworks/sumstats/cluster.bro
+++ b/scripts/base/frameworks/sumstats/cluster.bro
@ -0,0 +1,346 @@
+##! This implements transparent cluster support for the SumStats framework.
+##! Do not load this file directly.  It's only meant to be loaded automatically
+##! and will be depending on if the cluster framework has been enabled.
+##! The goal of this script is to make sumstats calculation completely and
+##! transparently automated when running on a cluster.
+
+@load base/frameworks/cluster
+@load ./main
+
+module SumStats;
+
+export {
+	## Allows a user to decide how large of result groups the workers should transmit
+	## values for cluster stats aggregation.
+	const cluster_send_in_groups_of = 50 &redef;
+
+	## The percent of the full threshold value that needs to be met on a single worker
+	## for that worker to send the value to its manager in order for it to request a
+	## global view for that value.  There is no requirement that the manager requests
+	## a global view for the key since it may opt not to if it requested a global view
+	## for the key recently.
+	const cluster_request_global_view_percent = 0.2 &redef;
+
+	## This is to deal with intermediate update overload.  A manager will only allow
+	## this many intermediate update requests to the workers to be inflight at any
+	## given time.  Requested intermediate updates are currently thrown out and not
+	## performed.  In practice this should hopefully have a minimal effect.
+	const max_outstanding_global_views = 10 &redef;
+
+	## Intermediate updates can cause overload situations on very large clusters. This
+	## option may help reduce load and correct intermittent problems. The goal for this
+	## option is also meant to be temporary.
+	const enable_intermediate_updates = T &redef;
+
+	## Event sent by the manager in a cluster to initiate the collection of values for
+	## a sumstat.
+	global cluster_ss_request: event(uid: string, ssid: string);
+
+	## Event sent by nodes that are collecting sumstats after receiving a request for
+	## the sumstat from the manager.
+	global cluster_ss_response: event(uid: string, ssid: string, data: ResultTable, done: bool);
+
+	## This event is sent by the manager in a cluster to initiate the collection of
+	## a single key value from a sumstat.  It's typically used to get intermediate
+	## updates before the break interval triggers to speed detection of a value
+	## crossing a threshold.
+	global cluster_key_request: event(uid: string, ssid: string, key: Key);
+
+	## This event is sent by nodes in response to a
+	## :bro:id:`SumStats::cluster_key_request` event.
+	global cluster_key_response: event(uid: string, ssid: string, key: Key, result: Result);
+
+	## This is sent by workers to indicate that they crossed the percent
+	## of the current threshold by the percentage defined globally in
+	## :bro:id:`SumStats::cluster_request_global_view_percent`
+	global cluster_key_intermediate_response: event(ssid: string, key: SumStats::Key);
+
+	## This event is scheduled internally on workers to send result chunks.
+	global send_data: event(uid: string, ssid: string, data: ResultTable);
+
+	## This event is generated when a threshold is crossed.
+	global cluster_threshold_crossed: event(ssid: string, key: SumStats::Key, thold: Thresholding);
+}
+
+# Add events to the cluster framework to make this work.
+redef Cluster::manager2worker_events += /SumStats::cluster_(ss_request|key_request|threshold_crossed)/;
+redef Cluster::manager2worker_events += /SumStats::thresholds_reset/;
+redef Cluster::worker2manager_events += /SumStats::cluster_(ss_response|key_response|key_intermediate_response)/;
+
+@if ( Cluster::local_node_type() != Cluster::MANAGER )
+# This variable is maintained to know what keys have recently sent as
+# intermediate updates so they don't overwhelm their manager. The count that is
+# yielded is the number of times the percentage threshold has been crossed and
+# an intermediate result has been received.
+global recent_global_view_keys: table[string, Key] of count &create_expire=1min &default=0;
+
+event bro_init() &priority=-100
+	{
+	# The manager is the only host allowed to track these.
+	stats_store = table();
+	reducer_store = table();
+	}
+
+# This is done on all non-manager node types in the event that a sumstat is
+# being collected somewhere other than a worker.
+function data_added(ss: SumStat, key: Key, result: Result)
+	{
+	# If an intermediate update for this value was sent recently, don't send
+	# it again.
+	if ( [ss$id, key] in recent_global_view_keys )
+		return;
+
+	# If val is 5 and global view % is 0.1 (10%), pct_val will be 50.  If that
+	# crosses the full threshold then it's a candidate to send as an
+	# intermediate update.
+	if ( enable_intermediate_updates &&
+	     check_thresholds(ss, key, result, cluster_request_global_view_percent) )
+		{
+		# kick off intermediate update
+		event SumStats::cluster_key_intermediate_response(ss$id, key);
+		++recent_global_view_keys[ss$id, key];
+		}
+	}
+
+event SumStats::send_data(uid: string, ssid: string, data: ResultTable)
+	{
+	#print fmt("WORKER %s: sending data for uid %s...", Cluster::node, uid);
+
+	local local_data: ResultTable = table();
+	local num_added = 0;
+	for ( key in data )
+		{
+		local_data[key] = data[key];
+		delete data[key];
+
+		# Only send cluster_send_in_groups_of at a time.  Queue another
+		# event to send the next group.
+		if ( cluster_send_in_groups_of == ++num_added )
+			break;
+		}
+
+	local done = F;
+	# If data is empty, this sumstat is done.
+	if ( |data| == 0 )
+		done = T;
+
+	# Note: copy is needed to compensate serialization caching issue. This should be
+	# changed to something else later. 
+	event SumStats::cluster_ss_response(uid, ssid, copy(local_data), done);
+	if ( ! done )
+		schedule 0.01 sec { SumStats::send_data(uid, ssid, data) };
+	}
+
+event SumStats::cluster_ss_request(uid: string, ssid: string)
+	{
+	#print fmt("WORKER %s: received the cluster_ss_request event for %s.", Cluster::node, id);
+
+	# Initiate sending all of the data for the requested stats.
+	if ( ssid in result_store )
+		event SumStats::send_data(uid, ssid, result_store[ssid]);
+	else
+		event SumStats::send_data(uid, ssid, table());
+
+	# Lookup the actual sumstats and reset it, the reference to the data
+	# currently stored will be maintained internally by the send_data event.
+	if ( ssid in stats_store )
+		reset(stats_store[ssid]);
+	}
+
+event SumStats::cluster_key_request(uid: string, ssid: string, key: Key)
+	{
+	if ( ssid in result_store && key in result_store[ssid] )
+		{
+		#print fmt("WORKER %s: received the cluster_key_request event for %s=%s.", Cluster::node, key2str(key), data);
+
+		# Note: copy is needed to compensate serialization caching issue. This should be
+		# changed to something else later. 
+		event SumStats::cluster_key_response(uid, ssid, key, copy(result_store[ssid][key]));
+		}
+	else
+		{
+		# We need to send an empty response if we don't have the data so that the manager
+		# can know that it heard back from all of the workers.
+		event SumStats::cluster_key_response(uid, ssid, key, table());
+		}
+	}
+
+event SumStats::cluster_threshold_crossed(ssid: string, key: SumStats::Key, thold: Thresholding)
+	{
+	if ( ssid !in threshold_tracker )
+		threshold_tracker[ssid] = table();
+
+	threshold_tracker[ssid][key] = thold;
+	}
+
+event SumStats::thresholds_reset(ssid: string)
+	{
+	threshold_tracker[ssid] = table();
+	}
+
+@endif
+
+
+@if ( Cluster::local_node_type() == Cluster::MANAGER )
+
+# This variable is maintained by manager nodes as they collect and aggregate
+# results.
+# Index on a uid.
+global stats_results: table[string] of ResultTable &read_expire=1min;
+
+# This variable is maintained by manager nodes to track how many "dones" they
+# collected per collection unique id.  Once the number of results for a uid
+# matches the number of peer nodes that results should be coming from, the
+# result is written out and deleted from here.
+# Indexed on a uid.
+# TODO: add an &expire_func in case not all results are received.
+global done_with: table[string] of count &read_expire=1min &default=0;
+
+# This variable is maintained by managers to track intermediate responses as
+# they are getting a global view for a certain key.
+# Indexed on a uid.
+global key_requests: table[string] of Result &read_expire=1min;
+
+# This variable is maintained by managers to prevent overwhelming communication due
+# to too many intermediate updates.  Each sumstat is tracked separately so that
+# one won't overwhelm and degrade other quieter sumstats.
+# Indexed on a sumstat id.
+global outstanding_global_views: table[string] of count &default=0;
+
+const zero_time = double_to_time(0.0);
+# Managers handle logging.
+event SumStats::finish_epoch(ss: SumStat)
+	{
+	if ( network_time() > zero_time )
+		{
+		#print fmt("%.6f MANAGER: breaking %s sumstat for %s sumstat", network_time(), ss$name, ss$id);
+		local uid = unique_id("");
+
+		if ( uid in stats_results )
+			delete stats_results[uid];
+		stats_results[uid] = table();
+
+		# Request data from peers.
+		event SumStats::cluster_ss_request(uid, ss$id);
+		}
+
+	# Schedule the next finish_epoch event.
+	schedule ss$epoch { SumStats::finish_epoch(ss) };
+	}
+
+# This is unlikely to be called often, but it's here in
+# case there are sumstats being collected by managers.
+function data_added(ss: SumStat, key: Key, result: Result)
+	{
+	if ( check_thresholds(ss, key, result, 1.0) )
+		{
+		threshold_crossed(ss, key, result);
+		event SumStats::cluster_threshold_crossed(ss$id, key, threshold_tracker[ss$id][key]);
+		}
+	}
+
+event SumStats::cluster_key_response(uid: string, ssid: string, key: Key, result: Result)
+	{
+	#print fmt("%0.6f MANAGER: receiving key data from %s - %s=%s", network_time(), get_event_peer()$descr, key2str(key), result);
+
+	# We only want to try and do a value merge if there are actually measured datapoints
+	# in the Result.
+	if ( uid in key_requests )
+		key_requests[uid] = compose_results(key_requests[uid], result);
+	else
+		key_requests[uid] = result;
+
+	# Mark that a worker is done.
+	++done_with[uid];
+
+	#print fmt("worker_count:%d :: done_with:%d", Cluster::worker_count, done_with[uid]);
+	if ( Cluster::worker_count == done_with[uid] )
+		{
+		local ss = stats_store[ssid];
+		local ir = key_requests[uid];
+		if ( check_thresholds(ss, key, ir, 1.0) )
+			{
+			threshold_crossed(ss, key, ir);
+			event SumStats::cluster_threshold_crossed(ss$id, key, threshold_tracker[ss$id][key]);
+			}
+
+		delete done_with[uid];
+		delete key_requests[uid];
+		# Check that there is an outstanding view before subtracting.
+		if ( outstanding_global_views[ssid] > 0 )
+			--outstanding_global_views[ssid];
+		}
+	}
+
+# Managers handle intermediate updates here.
+event SumStats::cluster_key_intermediate_response(ssid: string, key: Key)
+	{
+	#print fmt("MANAGER: receiving intermediate key data from %s", get_event_peer()$descr);
+	#print fmt("MANAGER: requesting key data for %s", key2str(key));
+
+	if ( ssid in outstanding_global_views &&
+	     |outstanding_global_views[ssid]| > max_outstanding_global_views )
+		{
+		# Don't do this intermediate update.  Perhaps at some point in the future
+		# we will queue and randomly select from these ignored intermediate
+		# update requests.
+		return;
+		}
+
+	++outstanding_global_views[ssid];
+
+	local uid = unique_id("");
+	event SumStats::cluster_key_request(uid, ssid, key);
+	}
+
+event SumStats::cluster_ss_response(uid: string, ssid: string, data: ResultTable, done: bool)
+	{
+	#print fmt("MANAGER: receiving results from %s", get_event_peer()$descr);
+
+	# Mark another worker as being "done" for this uid.
+	if ( done )
+		++done_with[uid];
+
+	local local_data = stats_results[uid];
+	local ss = stats_store[ssid];
+
+	for ( key in data )
+		{
+		if ( key in local_data )
+			local_data[key] = compose_results(local_data[key], data[key]);
+		else
+			local_data[key] = data[key];
+
+		# If a stat is done being collected, thresholds for each key
+		# need to be checked so we're doing it here to avoid doubly
+		# iterating over each key.
+		if ( Cluster::worker_count == done_with[uid] )
+			{
+			if ( check_thresholds(ss, key, local_data[key], 1.0) )
+				{
+				threshold_crossed(ss, key, local_data[key]);
+				event SumStats::cluster_threshold_crossed(ss$id, key, threshold_tracker[ss$id][key]);
+				}
+			}
+		}
+
+	# If the data has been collected from all peers, we are done and ready to finish.
+	if ( Cluster::worker_count == done_with[uid] )
+		{
+		if ( ss?$epoch_finished )
+			ss$epoch_finished(local_data);
+
+		# Clean up
+		delete stats_results[uid];
+		delete done_with[uid];
+		# Not sure I need to reset the sumstat on the manager.
+		reset(ss);
+		}
+	}
+
+event remote_connection_handshake_done(p: event_peer) &priority=5
+	{
+	send_id(p, "SumStats::stats_store");
+	send_id(p, "SumStats::reducer_store");
+	}
+@endif
--- a/scripts/base/frameworks/sumstats/main.bro
+++ b/scripts/base/frameworks/sumstats/main.bro
@ -0,0 +1,436 @@
+##! The summary statistics framework provides a way to
+##! summarize large streams of data into simple reduced
+##! measurements.
+
+module SumStats;
+
+export {
+	## The various calculations are all defined as plugins.
+	type Calculation: enum {
+		PLACEHOLDER
+	};
+
+	## Represents a thing which is having summarization
+	## results collected for it.
+	type Key: record {
+		## A non-address related summarization or a sub-key for
+		## an address based summarization. An example might be
+		## successful SSH connections by client IP address
+		## where the client string would be the key value.
+		## Another example might be number of HTTP requests to
+		## a particular value in a Host header.  This is an
+		## example of a non-host based metric since multiple
+		## IP addresses could respond for the same Host
+		## header value.
+		str:  string &optional;
+
+		## Host is the value to which this metric applies.
+		host: addr &optional;
+	};
+
+	## Represents data being added for a single observation.
+	## Only supply a single field at a time!
+	type Observation: record {
+		## Count value.
+		num:  count  &optional;
+		## Double value.
+		dbl:  double &optional;
+		## String value.
+		str:  string &optional;
+	};
+
+	type Reducer: record {
+		## Observation stream identifier for the reducer
+		## to attach to.
+		stream:         string;
+
+		## The calculations to perform on the data points.
+		apply:          set[Calculation];
+
+		## A predicate so that you can decide per key if you
+		## would like to accept the data being inserted.
+		pred:           function(key: SumStats::Key, obs: SumStats::Observation): bool &optional;
+
+		## A function to normalize the key.  This can be used to aggregate or
+		## normalize the entire key.
+		normalize_key:  function(key: SumStats::Key): Key &optional;
+	};
+
+	## Value calculated for an observation stream fed into a reducer.
+	## Most of the fields are added by plugins.
+	type ResultVal: record {
+		## The time when the first observation was added to
+		## this result value.
+		begin:  time;
+
+		## The time when the last observation was added to
+		## this result value.
+		end:    time;
+
+		## The number of observations received.
+		num:    count &default=0;
+	};
+
+	## Type to store results for multiple reducers.
+	type Result: table[string] of ResultVal;
+
+	## Type to store a table of sumstats results indexed
+	## by keys.
+	type ResultTable: table[Key] of Result;
+
+	## SumStats represent an aggregation of reducers along with
+	## mechanisms to handle various situations like the epoch ending
+	## or thresholds being crossed.
+	##
+	## It's best to not access any global state outside
+	## of the variables given to the callbacks because there
+	## is no assurance provided as to where the callbacks
+	## will be executed on clusters.
+	type SumStat: record {
+		## The interval at which this filter should be "broken"
+		## and the '$epoch_finished' callback called.  The
+		## results are also reset at this time so any threshold
+		## based detection needs to be set to a
+		## value that should be expected to happen within
+		## this epoch.
+		epoch:              interval;
+
+		## The reducers for the SumStat
+		reducers:           set[Reducer];
+
+		## Provide a function to calculate a value from the
+		## :bro:see:`SumStats::Result` structure which will be used
+		## for thresholding.
+		## This is required if a $threshold value is given.
+		threshold_val:      function(key: SumStats::Key, result: SumStats::Result): count &optional;
+
+		## The threshold value for calling the
+		## $threshold_crossed callback.
+		threshold:          count             &optional;
+
+		## A series of thresholds for calling the
+		## $threshold_crossed callback.
+		threshold_series:   vector of count   &optional;
+
+		## A callback that is called when a threshold is crossed.
+		threshold_crossed:  function(key: SumStats::Key, result: SumStats::Result) &optional;
+
+		## A callback with the full collection of Results for
+		## this SumStat.
+		epoch_finished:    function(rt: SumStats::ResultTable) &optional;
+	};
+
+	## Create a summary statistic.
+	global create: function(ss: SumStats::SumStat);
+
+	## Add data into an observation stream. This should be
+	## called when a script has measured some point value.
+	##
+	## id: The observation stream identifier that the data
+	##     point represents.
+	##
+	## key: The key that the value is related to.
+	##
+	## obs: The data point to send into the stream.
+	global observe: function(id: string, key: SumStats::Key, obs: SumStats::Observation);
+
+	## This record is primarily used for internal threshold tracking.
+	type Thresholding: record {
+		# Internal use only.  Indicates if a simple threshold was already crossed.
+		is_threshold_crossed: bool &default=F;
+
+		# Internal use only.  Current key for threshold series.
+		threshold_series_index: count &default=0;
+	};
+
+	## This event is generated when thresholds are reset for a SumStat.
+	##
+	## ssid: SumStats ID that thresholds were reset for.
+	global thresholds_reset: event(ssid: string);
+
+	## Helper function to represent a :bro:type:`SumStats::Key` value as
+	## a simple string.
+	##
+	## key: The metric key that is to be converted into a string.
+	##
+	## Returns: A string representation of the metric key.
+	global key2str: function(key: SumStats::Key): string;
+}
+
+redef record Reducer += {
+	# Internal use only.  Provides a reference back to the related SumStats by it's ID.
+	sid: string &optional;
+};
+
+# Internal use only.  For tracking thresholds per sumstat and key.
+global threshold_tracker: table[string] of table[Key] of Thresholding &optional;
+
+redef record SumStat += {
+	# Internal use only (mostly for cluster coherency).
+	id: string &optional;
+};
+
+# Store of sumstats indexed on the sumstat id.
+global stats_store: table[string] of SumStat = table();
+
+# Store of reducers indexed on the data point stream id.
+global reducer_store: table[string] of set[Reducer] = table();
+
+# Store of results indexed on the measurement id.
+global result_store: table[string] of ResultTable = table();
+
+# Store of threshold information.
+global thresholds_store: table[string, Key] of bool = table();
+
+# This is called whenever key values are updated and the new val is given as the
+# `val` argument. It's only prototyped here because cluster and non-cluster have
+# separate  implementations.
+global data_added: function(ss: SumStat, key: Key, result: Result);
+
+# Prototype the hook point for plugins to do calculations.
+global observe_hook: hook(r: Reducer, val: double, data: Observation, rv: ResultVal);
+
+# Prototype the hook point for plugins to initialize any result values.
+global init_resultval_hook: hook(r: Reducer, rv: ResultVal);
+
+# Prototype the hook point for plugins to merge Results.
+global compose_resultvals_hook: hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal);
+
+# Event that is used to "finish" measurements and adapt the measurement
+# framework for clustered or non-clustered usage.
+global finish_epoch: event(ss: SumStat);
+
+function key2str(key: Key): string
+	{
+	local out = "";
+	if ( key?$host )
+		out = fmt("%shost=%s", out, key$host);
+	if ( key?$str )
+		out = fmt("%s%sstr=%s", out, |out|==0 ? "" : ", ", key$str);
+	return fmt("sumstats_key(%s)", out);
+	}
+
+function init_resultval(r: Reducer): ResultVal
+	{
+	local rv: ResultVal = [$begin=network_time(), $end=network_time()];
+	hook init_resultval_hook(r, rv);
+	return rv;
+	}
+
+function compose_resultvals(rv1: ResultVal, rv2: ResultVal): ResultVal
+	{
+	local result: ResultVal;
+
+	result$begin = (rv1$begin < rv2$begin) ? rv1$begin : rv2$begin;
+	result$end = (rv1$end > rv2$end) ? rv1$end : rv2$end;
+	result$num = rv1$num + rv2$num;
+
+	# Run the plugin composition hooks.
+	hook compose_resultvals_hook(result, rv1, rv2);
+	return result;
+	}
+
+function compose_results(r1: Result, r2: Result): Result
+	{
+	local result: Result = table();
+
+	if ( |r1| > |r2| )
+		{
+		for ( data_id in r1 )
+			{
+			if ( data_id in r2 )
+				result[data_id] = compose_resultvals(r1[data_id], r2[data_id]);
+			else
+				result[data_id] = r1[data_id];
+			}
+		}
+	else
+		{
+		for ( data_id in r2 )
+			{
+			if ( data_id in r1 )
+				result[data_id] = compose_resultvals(r1[data_id], r2[data_id]);
+			else
+				result[data_id] = r2[data_id];
+			}
+		}
+
+	return result;
+	}
+
+
+function reset(ss: SumStat)
+	{
+	if ( ss$id in result_store )
+		delete result_store[ss$id];
+
+	result_store[ss$id] = table();
+
+	if ( ss?$threshold || ss?$threshold_series )
+		{
+		threshold_tracker[ss$id] = table();
+		event SumStats::thresholds_reset(ss$id);
+		}
+	}
+
+function create(ss: SumStat)
+	{
+	if ( (ss?$threshold || ss?$threshold_series) && ! ss?$threshold_val )
+		{
+		Reporter::error("SumStats given a threshold with no $threshold_val function");
+		}
+
+	if ( ! ss?$id )
+		ss$id=unique_id("");
+	threshold_tracker[ss$id] = table();
+	stats_store[ss$id] = ss;
+
+	for ( reducer in ss$reducers )
+		{
+		reducer$sid = ss$id;
+		if ( reducer$stream !in reducer_store )
+			reducer_store[reducer$stream] = set();
+		add reducer_store[reducer$stream][reducer];
+		}
+
+	reset(ss);
+	schedule ss$epoch { SumStats::finish_epoch(ss) };
+	}
+
+function observe(id: string, key: Key, obs: Observation)
+	{
+	if ( id !in reducer_store )
+		return;
+
+	# Try to add the data to all of the defined reducers.
+	for ( r in reducer_store[id] )
+		{
+		if ( r?$normalize_key )
+			key = r$normalize_key(copy(key));
+
+		# If this reducer has a predicate, run the predicate
+		# and skip this key if the predicate return false.
+		if ( r?$pred && ! r$pred(key, obs) )
+			next;
+
+		local ss = stats_store[r$sid];
+
+		# If there is a threshold and no epoch_finished callback
+		# we don't need to continue counting since the data will
+		# never be accessed.  This was leading
+		# to some state management issues when measuring
+		# uniqueness.
+		# NOTE: this optimization could need removed in the
+		#       future if on demand access is provided to the
+		#       SumStats results.
+		if ( ! ss?$epoch_finished &&
+		     r$sid in threshold_tracker &&
+		     key in threshold_tracker[r$sid] &&
+		     ( ss?$threshold &&
+		       threshold_tracker[r$sid][key]$is_threshold_crossed ) ||
+		     ( ss?$threshold_series &&
+		       threshold_tracker[r$sid][key]$threshold_series_index+1 == |ss$threshold_series| ) )
+			next;
+
+		if ( r$sid !in result_store )
+			result_store[ss$id] = table();
+		local results = result_store[r$sid];
+
+		if ( key !in results )
+			results[key] = table();
+		local result = results[key];
+
+		if ( id !in result )
+			result[id] = init_resultval(r);
+		local result_val = result[id];
+
+		++result_val$num;
+		# Continually update the $end field.
+		result_val$end=network_time();
+
+		# If a string was given, fall back to 1.0 as the value.
+		local val = 1.0;
+		if ( obs?$num || obs?$dbl )
+			val = obs?$dbl ? obs$dbl : obs$num;
+
+		hook observe_hook(r, val, obs, result_val);
+		data_added(ss, key, result);
+		}
+	}
+
+# This function checks if a threshold has been crossed.  It is also used as a method to implement
+# mid-break-interval threshold crossing detection for cluster deployments.
+function check_thresholds(ss: SumStat, key: Key, result: Result, modify_pct: double): bool
+	{
+	if ( ! (ss?$threshold || ss?$threshold_series) )
+		return F;
+
+	# Add in the extra ResultVals to make threshold_vals easier to write.
+	if ( |ss$reducers| != |result| )
+		{
+		for ( reducer in ss$reducers )
+			{
+			if ( reducer$stream !in result )
+				result[reducer$stream] = init_resultval(reducer);
+			}
+		}
+
+	local watch = ss$threshold_val(key, result);
+
+	if ( modify_pct < 1.0 && modify_pct > 0.0 )
+		watch = double_to_count(floor(watch/modify_pct));
+
+	if ( ss$id !in threshold_tracker )
+		threshold_tracker[ss$id] = table();
+	local t_tracker = threshold_tracker[ss$id];
+
+	if ( key !in t_tracker )
+		{
+		local ttmp: Thresholding;
+		t_tracker[key] = ttmp;
+		}
+	local tt = t_tracker[key];
+
+	if ( ss?$threshold && ! tt$is_threshold_crossed && watch >= ss$threshold )
+		{
+		# Value crossed the threshold.
+		return T;
+		}
+
+	if ( ss?$threshold_series &&
+	     |ss$threshold_series| >= tt$threshold_series_index &&
+	     watch >= ss$threshold_series[tt$threshold_series_index] )
+		{
+		# A threshold series was given and the value crossed the next
+		# value in the series.
+		return T;
+		}
+
+	return F;
+	}
+
+function threshold_crossed(ss: SumStat, key: Key, result: Result)
+	{
+	# If there is no callback, there is no point in any of this.
+	if ( ! ss?$threshold_crossed )
+		return;
+
+	# Add in the extra ResultVals to make threshold_crossed callbacks easier to write.
+	if ( |ss$reducers| != |result| )
+		{
+		for ( reducer in ss$reducers )
+			{
+			if ( reducer$stream !in result )
+				result[reducer$stream] = init_resultval(reducer);
+			}
+		}
+
+	ss$threshold_crossed(key, result);
+	local tt = threshold_tracker[ss$id][key];
+	tt$is_threshold_crossed = T;
+
+	# Bump up to the next threshold series index if a threshold series is being used.
+	if ( ss?$threshold_series )
+		++tt$threshold_series_index;
+	}
+
--- a/scripts/base/frameworks/sumstats/non-cluster.bro
+++ b/scripts/base/frameworks/sumstats/non-cluster.bro
@ -0,0 +1,24 @@
+@load ./main
+
+module SumStats;
+
+event SumStats::finish_epoch(ss: SumStat)
+	{
+	if ( ss$id in result_store )
+		{
+		local data = result_store[ss$id];
+		if ( ss?$epoch_finished )
+			ss$epoch_finished(data);
+
+		reset(ss);
+		}
+
+	schedule ss$epoch { SumStats::finish_epoch(ss) };
+	}
+
+
+function data_added(ss: SumStat, key: Key, result: Result)
+	{
+	if ( check_thresholds(ss, key, result, 1.0) )
+		threshold_crossed(ss, key, result);
+	}
--- a/scripts/base/frameworks/sumstats/plugins/load.bro
+++ b/scripts/base/frameworks/sumstats/plugins/load.bro
@ -0,0 +1,9 @@
+@load ./average
+@load ./last
+@load ./max
+@load ./min
+@load ./sample
+@load ./std-dev
+@load ./sum
+@load ./unique
+@load ./variance
--- a/scripts/base/frameworks/sumstats/plugins/average.bro
+++ b/scripts/base/frameworks/sumstats/plugins/average.bro
@ -0,0 +1,36 @@
+@load base/frameworks/sumstats/main
+
+module SumStats;
+
+export {
+	redef enum Calculation += {
+		## Calculate the average of the values.
+		AVERAGE
+	};
+
+	redef record ResultVal += {
+		## For numeric data, this calculates the average of all values.
+		average: double &optional;
+	};
+}
+
+hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
+	{
+	if ( AVERAGE in r$apply )
+		{
+		if ( ! rv?$average )
+			rv$average = val;
+		else
+			rv$average += (val - rv$average) / rv$num;
+		}
+	}
+
+hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
+	{
+	if ( rv1?$average && rv2?$average )
+		result$average = ((rv1$average*rv1$num) + (rv2$average*rv2$num))/(rv1$num+rv2$num);
+	else if ( rv1?$average )
+		result$average = rv1$average;
+	else if ( rv2?$average )
+		result$average = rv2$average;
+	}
--- a/scripts/base/frameworks/sumstats/plugins/last.bro
+++ b/scripts/base/frameworks/sumstats/plugins/last.bro
@ -0,0 +1,55 @@
+@load base/frameworks/sumstats
+@load base/utils/queue
+
+module SumStats;
+
+export {
+	redef enum Calculation += {
+		## Keep last X observations in a queue
+		LAST
+	};
+
+	redef record Reducer += {
+		## number of elements to keep.
+		num_last_elements: count &default=0;
+	};
+
+	redef record ResultVal += {
+		## This is the queue where elements are maintained.  Use the
+		## :bro:see:`SumStats::get_last` function to get a vector of
+		## the current element values.
+		last_elements: Queue::Queue &optional;
+	};
+
+	## Get a vector of element values from a ResultVal.
+	global get_last: function(rv: ResultVal): vector of Observation;
+}
+
+function get_last(rv: ResultVal): vector of Observation
+	{
+	local s: vector of Observation = vector();
+	if ( rv?$last_elements )
+		Queue::get_vector(rv$last_elements, s);
+	return s;
+	}
+
+hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
+	{
+	if ( LAST in r$apply && r$num_last_elements > 0 )
+		{
+		if ( ! rv?$last_elements )
+			rv$last_elements = Queue::init([$max_len=r$num_last_elements]);
+		Queue::put(rv$last_elements, obs);
+		}
+	}
+
+hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
+	{
+	# Merge $samples
+	if ( rv1?$last_elements && rv2?$last_elements )
+		result$last_elements = Queue::merge(rv1$last_elements, rv2$last_elements);
+	else if ( rv1?$last_elements )
+		result$last_elements = rv1$last_elements;
+	else if ( rv2?$last_elements )
+		result$last_elements = rv2$last_elements;
+	}
--- a/scripts/base/frameworks/sumstats/plugins/max.bro
+++ b/scripts/base/frameworks/sumstats/plugins/max.bro
@ -0,0 +1,38 @@
+@load base/frameworks/sumstats/main
+
+module SumStats;
+
+export {
+	redef enum Calculation += {
+		## Find the maximum value.
+		MAX
+	};
+
+	redef record ResultVal += {
+		## For numeric data, this tracks the maximum value given.
+		max: double &optional;
+	};
+}
+
+hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
+	{
+	if ( MAX in r$apply )
+		{
+		if ( ! rv?$max )
+			rv$max = val;
+		else if ( val > rv$max )
+			rv$max = val;
+		}
+	}
+
+hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
+	{
+	if ( rv1?$max && rv2?$max )
+		result$max = (rv1$max > rv2$max) ? rv1$max : rv2$max;
+	else if ( rv1?$max )
+		result$max = rv1$max;
+	else if ( rv2?$max )
+		result$max = rv2$max;
+	}
+
+
--- a/scripts/base/frameworks/sumstats/plugins/min.bro
+++ b/scripts/base/frameworks/sumstats/plugins/min.bro
@ -0,0 +1,36 @@
+@load base/frameworks/sumstats/main
+
+module SumStats;
+
+export {
+	redef enum Calculation += {
+		## Find the minimum value.
+		MIN
+	};
+
+	redef record ResultVal += {
+		## For numeric data, this tracks the minimum value given.
+		min: double &optional;
+	};
+}
+
+hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
+	{
+	if ( MIN in r$apply )
+		{
+		if ( ! rv?$min )
+			rv$min = val;
+		else if ( val < rv$min )
+			rv$min = val;
+		}
+	}
+
+hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
+	{
+	if ( rv1?$min && rv2?$min )
+		result$min = (rv1$min < rv2$min) ? rv1$min : rv2$min;
+	else if ( rv1?$min )
+		result$min = rv1$min;
+	else if ( rv2?$min )
+		result$min = rv2$min;
+	}
--- a/scripts/base/frameworks/sumstats/plugins/sample.bro
+++ b/scripts/base/frameworks/sumstats/plugins/sample.bro
@ -0,0 +1,120 @@
+@load base/frameworks/sumstats/main
+
+module SumStats;
+
+export {
+	redef enum Calculation += {
+		## Get uniquely distributed random samples from the observation stream.
+		SAMPLE
+	};
+
+	redef record Reducer += {
+		## A number of sample Observations to collect.
+		num_samples: count &default=0;
+	};
+
+	redef record ResultVal += {
+		## This is the vector in which the samples are maintained.
+		samples: vector of Observation &default=vector();
+
+		## Number of total observed elements.
+		sample_elements: count &default=0;
+	};
+}
+
+redef record ResultVal += {
+	# Internal use only.  This is not meant to be publically available
+	# and just a copy of num_samples from the Reducer. Needed for availability
+	# in the compose hook.
+	num_samples: count &default=0;
+};
+
+hook init_resultval_hook(r: Reducer, rv: ResultVal)
+	{
+	if ( SAMPLE in r$apply )
+		rv$num_samples = r$num_samples;
+	}
+
+function sample_add_sample(obs:Observation, rv: ResultVal)
+	{
+	++rv$sample_elements;
+
+	if ( |rv$samples| < rv$num_samples )
+		rv$samples[|rv$samples|] = obs;
+	else
+		{
+		local ra = rand(rv$sample_elements);
+		if ( ra < rv$num_samples )
+			rv$samples[ra] = obs;
+		}
+
+	}
+
+hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
+	{
+	if ( SAMPLE in r$apply )
+		{
+		sample_add_sample(obs, rv);
+		}
+	}
+
+hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
+	{
+	if ( rv1$num_samples != rv2$num_samples )
+		{
+		Reporter::error("Merging sample sets with differing sizes is not supported");
+		return;
+		}
+
+	local num_samples = rv1$num_samples;
+	result$num_samples = num_samples;
+
+	if ( |rv1$samples| > num_samples || |rv2$samples| > num_samples )
+		{
+		Reporter::error("Sample vector with too many elements. Aborting.");
+		return;
+		}
+
+
+	if ( |rv1$samples| != num_samples && |rv2$samples| < num_samples )
+		{
+		if ( |rv1$samples| != rv1$sample_elements || |rv2$samples| < rv2$sample_elements )
+			{
+			Reporter::error("Mismatch in sample element size and tracking. Aborting merge");
+			return;
+			}
+
+		for ( i in rv1$samples )
+			sample_add_sample(rv1$samples[i], result);
+
+		for ( i in rv2$samples)
+			sample_add_sample(rv2$samples[i], result);
+		}
+	else
+		{
+		local other_vector: vector of Observation;
+		local othercount: count;
+		
+		if ( rv1$sample_elements > rv2$sample_elements )
+			{
+			result$samples = copy(rv1$samples);
+			other_vector = rv2$samples;
+			othercount = rv2$sample_elements;
+			}
+		else
+			{
+			result$samples = copy(rv2$samples);
+			other_vector = rv1$samples;
+			othercount = rv1$sample_elements;
+			}
+
+		local totalcount = rv1$sample_elements + rv2$sample_elements;
+		result$sample_elements = totalcount;
+
+		for ( i in other_vector )
+			{
+			if ( rand(totalcount) <= othercount )
+				result$samples[i] = other_vector[i];
+			}
+		}
+	}
--- a/scripts/base/frameworks/sumstats/plugins/std-dev.bro
+++ b/scripts/base/frameworks/sumstats/plugins/std-dev.bro
@ -0,0 +1,34 @@
+@load base/frameworks/sumstats/main
+@load ./variance
+
+module SumStats;
+
+export {
+	redef enum Calculation += {
+		## Find the standard deviation of the values.
+		STD_DEV
+	};
+
+	redef record ResultVal += {
+		## For numeric data, this calculates the standard deviation.
+		std_dev: double &default=0.0;
+	};
+}
+
+function calc_std_dev(rv: ResultVal)
+	{
+	if ( rv?$variance )
+		rv$std_dev = sqrt(rv$variance);
+	}
+
+# This depends on the variance plugin which uses priority -5
+hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal) &priority=-10
+	{
+	if ( STD_DEV in r$apply )
+		calc_std_dev(rv);
+	}
+
+hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal) &priority=-10
+	{
+	calc_std_dev(result);
+	}
--- a/scripts/base/frameworks/sumstats/plugins/sum.bro
+++ b/scripts/base/frameworks/sumstats/plugins/sum.bro
@ -0,0 +1,51 @@
+@load base/frameworks/sumstats/main
+
+module SumStats;
+
+export {
+	redef enum Calculation += {
+		## Sums the values given.  For string values,
+		## this will be the number of strings given.
+		SUM
+	};
+
+	redef record ResultVal += {
+		## For numeric data, this tracks the sum of all values.
+		sum: double &default=0.0;
+	};
+
+	type threshold_function: function(key: SumStats::Key, result: SumStats::Result): count;
+	global sum_threshold: function(data_id: string): threshold_function;
+}
+
+function sum_threshold(data_id: string): threshold_function
+	{
+	return function(key: SumStats::Key, result: SumStats::Result): count
+		{
+		print fmt("data_id: %s", data_id);
+		print result;
+		return double_to_count(result[data_id]$sum);
+		};
+	}
+
+hook init_resultval_hook(r: Reducer, rv: ResultVal)
+	{
+	if ( SUM in r$apply && ! rv?$sum )
+		rv$sum = 0;
+	}
+
+hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
+	{
+	if ( SUM in r$apply )
+		rv$sum += val;
+	}
+
+hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
+	{
+	if ( rv1?$sum || rv2?$sum )
+		{
+		result$sum = rv1?$sum ? rv1$sum : 0;
+		if ( rv2?$sum )
+			result$sum += rv2$sum;
+		}
+	}
--- a/scripts/base/frameworks/sumstats/plugins/unique.bro
+++ b/scripts/base/frameworks/sumstats/plugins/unique.bro
@ -0,0 +1,53 @@
+@load base/frameworks/sumstats/main
+
+module SumStats;
+
+export {
+	redef enum Calculation += {
+		## Calculate the number of unique values.
+		UNIQUE
+	};
+
+	redef record ResultVal += {
+		## If cardinality is being tracked, the number of unique
+		## items is tracked here.
+		unique: count &default=0;
+	};
+}
+
+redef record ResultVal += {
+	# Internal use only.  This is not meant to be publically available
+	# because we don't want to trust that we can inspect the values
+	# since we will like move to a probalistic data structure in the future.
+	# TODO: in the future this will optionally be a hyperloglog structure
+	unique_vals: set[Observation] &optional;
+};
+
+hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
+	{
+	if ( UNIQUE in r$apply )
+		{
+		if ( ! rv?$unique_vals )
+			rv$unique_vals=set();
+		add rv$unique_vals[obs];
+		rv$unique = |rv$unique_vals|;
+		}
+	}
+
+hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
+	{
+	if ( rv1?$unique_vals || rv2?$unique_vals )
+		{
+		if ( rv1?$unique_vals )
+			result$unique_vals = copy(rv1$unique_vals);
+
+		if ( rv2?$unique_vals )
+			if ( ! result?$unique_vals )
+				result$unique_vals = copy(rv2$unique_vals);
+			else
+				for ( val2 in rv2$unique_vals )
+					add result$unique_vals[copy(val2)];
+
+		result$unique = |result$unique_vals|;
+		}
+	}
--- a/scripts/base/frameworks/sumstats/plugins/variance.bro
+++ b/scripts/base/frameworks/sumstats/plugins/variance.bro
@ -0,0 +1,69 @@
+@load base/frameworks/sumstats/main
+@load ./average
+
+module SumStats;
+
+export {
+	redef enum Calculation += {
+		## Find the variance of the values.
+		VARIANCE
+	};
+
+	redef record ResultVal += {
+		## For numeric data, this calculates the variance.
+		variance: double &optional;
+	};
+}
+
+redef record ResultVal += {
+	# Internal use only.  Used for incrementally calculating variance.
+	prev_avg: double &optional;
+
+	# Internal use only.  For calculating incremental variance.
+	var_s: double &default=0.0;
+};
+
+function calc_variance(rv: ResultVal)
+	{
+	rv$variance = (rv$num > 1) ? rv$var_s/(rv$num-1) : 0.0;
+	}
+
+# Reduced priority since this depends on the average
+hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal) &priority=-5
+	{
+	if ( VARIANCE in r$apply )
+		{
+		if ( rv$num > 1 )
+			rv$var_s += ((val - rv$prev_avg) * (val - rv$average));
+
+		calc_variance(rv);
+		rv$prev_avg = rv$average;
+		}
+	}
+
+# Reduced priority since this depends on the average
+hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal) &priority=-5
+	{
+	if ( rv1?$var_s && rv1?$average &&
+	     rv2?$var_s && rv2?$average )
+		{
+		local rv1_avg_sq = (rv1$average - result$average);
+		rv1_avg_sq = rv1_avg_sq*rv1_avg_sq;
+		local rv2_avg_sq = (rv2$average - result$average);
+		rv2_avg_sq = rv2_avg_sq*rv2_avg_sq;
+		result$var_s = rv1$num*(rv1$var_s/rv1$num + rv1_avg_sq) + rv2$num*(rv2$var_s/rv2$num + rv2_avg_sq);
+		}
+	else if ( rv1?$var_s )
+		result$var_s = rv1$var_s;
+	else if ( rv2?$var_s )
+		result$var_s = rv2$var_s;
+
+	if ( rv1?$prev_avg && rv2?$prev_avg )
+		result$prev_avg = ((rv1$prev_avg*rv1$num) + (rv2$prev_avg*rv2$num))/(rv1$num+rv2$num);
+	else if ( rv1?$prev_avg )
+		result$prev_avg = rv1$prev_avg;
+	else if ( rv2?$prev_avg )
+		result$prev_avg = rv2$prev_avg;
+
+	calc_variance(result);
+	}
--- a/scripts/base/frameworks/tunnels/main.bro
+++ b/scripts/base/frameworks/tunnels/main.bro
@ -83,19 +83,17 @@ export {
 }

 const ayiya_ports = { 5072/udp };
-redef dpd_config += { [ANALYZER_AYIYA] = [$ports = ayiya_ports] };
-
 const teredo_ports = { 3544/udp };
-redef dpd_config += { [ANALYZER_TEREDO] = [$ports = teredo_ports] };
-
 const gtpv1_ports = { 2152/udp, 2123/udp };
-redef dpd_config += { [ANALYZER_GTPV1] = [$ports = gtpv1_ports] };
-
 redef likely_server_ports += { ayiya_ports, teredo_ports, gtpv1_ports };

 event bro_init() &priority=5
 	{
 	Log::create_stream(Tunnel::LOG, [$columns=Info]);
+
+	Analyzer::register_for_ports(Analyzer::ANALYZER_AYIYA, ayiya_ports);
+	Analyzer::register_for_ports(Analyzer::ANALYZER_TEREDO, teredo_ports);
+	Analyzer::register_for_ports(Analyzer::ANALYZER_GTPV1, gtpv1_ports);
 	}

 function register_all(ecv: EncapsulatingConnVector)
--- a/scripts/base/init-bare.bro
+++ b/scripts/base/init-bare.bro
@ -1,5 +1,5 @@
-@load base/const.bif
-@load base/types.bif
+@load base/bif/const.bif.bro
+@load base/bif/types.bif

 # Type declarations

@ -222,17 +222,6 @@ type endpoint_stats: record {
 	endian_type: count;
 };

-## A unique analyzer instance ID. Each time instantiates a protocol analyzers
-## for a connection, it assigns it a unique ID that can be used to reference
-## that instance.
-##
-## .. bro:see:: analyzer_name disable_analyzer protocol_confirmation
-##    protocol_violation
-##
-## .. todo::While we declare an alias for the type here, the events/functions still
-##    use ``count``. That should be changed.
-type AnalyzerID: count;
-
 module Tunnel;
 export {
 	## Records the identity of an encapsulating parent of a tunneled connection.
@ -300,7 +289,7 @@ type connection: record {
 	## one protocol analyzer is able to parse the same data. If so, all will
 	## be recorded. Also note that the recorced services are independent of any
 	## transport-level protocols.
-        service: set[string];
+	service: set[string];
 	addl: string;	##< Deprecated.
 	hot: count;	##< Deprecated.
 	history: string;	##< State history of connections. See *history* in :bro:see:`Conn::Info`.
@ -316,6 +305,73 @@ type connection: record {
 	tunnel: EncapsulatingConnVector &optional;
 };

+## Default amount of time a file can be inactive before the file analysis
+## gives up and discards any internal state related to the file.
+const default_file_timeout_interval: interval = 2 mins &redef;
+
+## Default amount of bytes that file analysis will buffer before raising
+## :bro:see:`file_new`.
+const default_file_bof_buffer_size: count = 1024 &redef;
+
+## A file that Bro is analyzing.  This is Bro's type for describing the basic
+## internal metadata collected about a "file", which is essentially just a
+## byte stream that is e.g. pulled from a network connection or possibly
+## some other input source.
+type fa_file: record {
+	## An identifier associated with a single file.
+	id: string;
+
+	## Identifier associated with a container file from which this one was
+	## extracted as part of the file analysis.
+	parent_id: string &optional;
+
+	## An identification of the source of the file data.  E.g. it may be
+	## a network protocol over which it was transferred, or a local file
+	## path which was read, or some other input source.
+	source: string &optional;
+
+	## If the source of this file is is a network connection, this field
+	## may be set to indicate the directionality.
+	is_orig: bool &optional;
+
+	## The set of connections over which the file was transferred.
+	conns: table[conn_id] of connection &optional;
+
+	## The time at which the last activity for the file was seen.
+	last_active: time;
+
+	## Number of bytes provided to the file analysis engine for the file.
+	seen_bytes: count &default=0;
+
+	## Total number of bytes that are supposed to comprise the full file.
+	total_bytes: count &optional;
+
+	## The number of bytes in the file stream that were completely missed
+	## during the process of analysis e.g. due to dropped packets.
+	missing_bytes: count &default=0;
+
+	## The number of not all-in-sequence bytes in the file stream that
+	## were delivered to file analyzers due to reassembly buffer overflow.
+	overflow_bytes: count &default=0;
+
+	## The amount of time between receiving new data for this file that
+	## the analysis engine will wait before giving up on it.
+	timeout_interval: interval &default=default_file_timeout_interval;
+
+	## The number of bytes at the beginning of a file to save for later
+	## inspection in *bof_buffer* field.
+	bof_buffer_size: count &default=default_file_bof_buffer_size;
+
+	## The content of the beginning of a file up to *bof_buffer_size* bytes.
+	## This is also the buffer that's used for file/mime type detection.
+	bof_buffer: string &optional;
+
+	## A mime type provided by libmagic against the *bof_buffer*, or
+	## in the cases where no buffering of the beginning of file occurs,
+	## an initial guess of the mime type based on the first data seen.
+	mime_type: string &optional;
+} &redef;
+
 ## Fields of a SYN packet.
 ##
 ## .. bro:see:: connection_SYN_packet
@ -646,9 +702,9 @@ type entropy_test_result: record {
 };

 # Prototypes of Bro built-in functions.
-@load base/strings.bif
-@load base/bro.bif
-@load base/reporter.bif
+@load base/bif/strings.bif
+@load base/bif/bro.bif
+@load base/bif/reporter.bif

 ## Deprecated. This is superseded by the new logging framework.
 global log_file_name: function(tag: string): string &redef;
@ -710,19 +766,6 @@ global signature_files = "" &add_func = add_signature_file;
 ## ``p0f`` fingerprint file to use. Will be searched relative to ``BROPATH``.
 const passive_fingerprint_file = "base/misc/p0f.fp" &redef;

-# todo::testing to see if I can remove these without causing problems.
-#const ftp = 21/tcp;
-#const ssh = 22/tcp;
-#const telnet = 23/tcp;
-#const smtp = 25/tcp;
-#const domain = 53/tcp;	# note, doesn't include UDP version
-#const gopher = 70/tcp;
-#const finger = 79/tcp;
-#const http = 80/tcp;
-#const ident = 113/tcp;
-#const bgp = 179/tcp;
-#const rlogin = 513/tcp;
-
 # TCP values for :bro:see:`endpoint` *state* field.
 # todo::these should go into an enum to make them autodoc'able.
 const TCP_INACTIVE = 0;	##< Endpoint is still inactive.
@ -2656,7 +2699,7 @@ export {
 }
 module GLOBAL;

-@load base/event.bif
+@load base/bif/event.bif

 ## BPF filter the user has set via the -f command line options. Empty if none.
 const cmd_line_bpf_filter = "" &redef;
@ -2846,34 +2889,11 @@ const remote_trace_sync_peers = 0 &redef;
 ## consistency check.
 const remote_check_sync_consistency = F &redef;

-## Analyzer tags. The core automatically defines constants
-## ``ANALYZER_<analyzer-name>*``, e.g., ``ANALYZER_HTTP``.
-##
-## .. bro:see:: dpd_config
-##
-## .. todo::We should autodoc these automaticallty generated constants.
-type AnalyzerTag: count;
-
-## Set of ports activating a particular protocol analysis.
-##
-## .. bro:see:: dpd_config
-type dpd_protocol_config: record {
-	ports: set[port] &optional;	##< Set of ports.
-};
-
-## Port configuration for Bro's "dynamic protocol detection". Protocol
-## analyzers can be activated via either well-known ports or content analysis.
-## This table defines the ports.
-##
-## .. bro:see:: dpd_reassemble_first_packets dpd_buffer_size
-##    dpd_match_only_beginning dpd_ignore_ports
-const dpd_config: table[AnalyzerTag] of dpd_protocol_config = {} &redef;
-
 ## Reassemble the beginning of all TCP connections before doing
 ## signature-matching. Enabling this provides more accurate matching at the
 ## expensive of CPU cycles.
 ##
-## .. bro:see:: dpd_config dpd_buffer_size
+## .. bro:see:: dpd_buffer_size
 ##    dpd_match_only_beginning dpd_ignore_ports
 ##
 ## .. note:: Despite the name, this option affects *all* signature matching, not
@ -2888,24 +2908,24 @@ const dpd_reassemble_first_packets = T &redef;
 ## activated afterwards. Then only analyzers that can deal with partial
 ## connections will be able to analyze the session.
 ##
-## .. bro:see:: dpd_reassemble_first_packets dpd_config dpd_match_only_beginning
+## .. bro:see:: dpd_reassemble_first_packets dpd_match_only_beginning
 ##    dpd_ignore_ports
 const dpd_buffer_size = 1024 &redef;

 ## If true, stops signature matching if dpd_buffer_size has been reached.
 ##
 ## .. bro:see:: dpd_reassemble_first_packets dpd_buffer_size
-##    dpd_config dpd_ignore_ports
+##    dpd_ignore_ports
 ##
 ## .. note:: Despite the name, this option affects *all* signature matching, not
 ##    only signatures used for dynamic protocol detection.
 const dpd_match_only_beginning = T &redef;

 ## If true, don't consider any ports for deciding which protocol analyzer to
-## use. If so, the value of :bro:see:`dpd_config` is ignored.
+## use.
 ##
 ## .. bro:see:: dpd_reassemble_first_packets dpd_buffer_size
-##    dpd_match_only_beginning dpd_config
+##    dpd_match_only_beginning
 const dpd_ignore_ports = F &redef;

 ## Ports which the core considers being likely used by servers. For ports in
@ -2913,13 +2933,6 @@ const dpd_ignore_ports = F &redef;
 ## connection if it misses the initial handshake.
 const likely_server_ports: set[port] &redef;

-## Deprated. Set of all ports for which we know an analyzer, built by
-## :doc:`/scripts/base/frameworks/dpd/main`.
-##
-## .. todo::This should be defined by :doc:`/scripts/base/frameworks/dpd/main`
-##    itself we still need it.
-global dpd_analyzer_ports: table[port] of set[AnalyzerTag];
-
 ## Per-incident timer managers are drained after this amount of inactivity.
 const timer_mgr_inactivity_timeout = 1 min &redef;

@ -3028,9 +3041,12 @@ module GLOBAL;
 ## Number of bytes per packet to capture from live interfaces.
 const snaplen = 8192 &redef;

-# Load the logging framework here because it uses fairly deep integration with
+# Load BiFs defined by plugins.
+@load base/bif/plugins
+
+# Load these frameworks here because they use fairly deep integration with
 # BiFs and script-land defined types.
@load base/frameworks/logging
-
@load base/frameworks/input
-
+@load base/frameworks/analyzer
+@load base/frameworks/file-analysis
--- a/scripts/base/init-default.bro
+++ b/scripts/base/init-default.bro
@ -15,14 +15,17 @@
@load base/utils/numbers
@load base/utils/paths
@load base/utils/patterns
+@load base/utils/queue
@load base/utils/strings
@load base/utils/thresholds
+@load base/utils/time
@load base/utils/urls

 # This has some deep interplay between types and BiFs so it's 
 # loaded in base/init-bare.bro
 #@load base/frameworks/logging
@load base/frameworks/notice
+@load base/frameworks/analyzer
@load base/frameworks/dpd
@load base/frameworks/signatures
@load base/frameworks/packet-filter
@ -30,9 +33,9 @@
@load base/frameworks/communication
@load base/frameworks/control
@load base/frameworks/cluster
-@load base/frameworks/metrics
@load base/frameworks/intel
@load base/frameworks/reporter
+@load base/frameworks/sumstats
@load base/frameworks/tunnels

@load base/protocols/conn
@ -41,10 +44,12 @@
@load base/protocols/http
@load base/protocols/irc
@load base/protocols/modbus
+@load base/protocols/pop3
@load base/protocols/smtp
@load base/protocols/socks
@load base/protocols/ssh
@load base/protocols/ssl
@load base/protocols/syslog
+@load base/protocols/tunnels

@load base/misc/find-checksum-offloading
--- a/scripts/base/protocols/conn/inactivity.bro
+++ b/scripts/base/protocols/conn/inactivity.bro
@ -6,9 +6,9 @@ module Conn;
 export {
 	## Define inactivity timeouts by the service detected being used over
 	## the connection.
-	const analyzer_inactivity_timeouts: table[AnalyzerTag] of interval = {
+	const analyzer_inactivity_timeouts: table[Analyzer::Tag] of interval = {
 		# For interactive services, allow longer periods of inactivity.
-		[[ANALYZER_SSH, ANALYZER_FTP]] = 1 hrs,
+		[[Analyzer::ANALYZER_SSH, Analyzer::ANALYZER_FTP]] = 1 hrs,
 	} &redef;
 	
 	## Define inactivity timeouts based on common protocol ports.
@ -18,7 +18,7 @@ export {
 	
 }
 	
-event protocol_confirmation(c: connection, atype: count, aid: count)
+event protocol_confirmation(c: connection, atype: Analyzer::Tag, aid: count)
 	{
 	if ( atype in analyzer_inactivity_timeouts )
 		set_inactivity_timeout(c$id, analyzer_inactivity_timeouts[atype]);
--- a/scripts/base/protocols/dns/main.bro
+++ b/scripts/base/protocols/dns/main.bro
@ -1,6 +1,7 @@
 ##! Base DNS analysis script which tracks and logs DNS queries along with
 ##! their responses.

+@load base/utils/queue
@load ./consts

 module DNS;
@ -73,19 +74,6 @@ export {
 		total_replies: count           &optional;
 	};

-	## A record type which tracks the status of DNS queries for a given
-	## :bro:type:`connection`.
-	type State: record {
-		## Indexed by query id, returns Info record corresponding to
-		## query/response which haven't completed yet.
-		pending: table[count] of Info &optional;
-
-		## This is the list of DNS responses that have completed based on the
-		## number of responses declared and the number received.  The contents
-		## of the set are transaction IDs.
-		finished_answers: set[count] &optional;
-	};
-
 	## An event that can be handled to access the :bro:type:`DNS::Info`
 	## record as it is sent to the logging framework.
 	global log_dns: event(rec: Info);
@ -102,46 +90,49 @@ export {
 	##
 	## reply: The specific response information according to RR type/class.
 	global do_reply: event(c: connection, msg: dns_msg, ans: dns_answer, reply: string);
+
+	## A hook that is called whenever a session is being set.
+	## This can be used if additional initialization logic needs to happen
+	## when creating a new session value.
+	##
+	## c: The connection involved in the new session
+	## 
+	## msg: The DNS message header information.
+	##
+	## is_query: Indicator for if this is being called for a query or a response.
+	global set_session: hook(c: connection, msg: dns_msg, is_query: bool);
+
+	## A record type which tracks the status of DNS queries for a given
+	## :bro:type:`connection`.
+	type State: record {
+		## Indexed by query id, returns Info record corresponding to
+		## query/response which haven't completed yet.
+		pending: table[count] of Queue::Queue;
+
+		## This is the list of DNS responses that have completed based on the
+		## number of responses declared and the number received.  The contents
+		## of the set are transaction IDs.
+		finished_answers: set[count];
+	};
 }

+
 redef record connection += {
 	dns:       Info  &optional;
 	dns_state: State &optional;
 };

-# DPD configuration.
-redef capture_filters += {
-	["dns"] = "port 53",
-	["mdns"] = "udp and port 5353",
-	["llmns"] = "udp and port 5355",
-	["netbios-ns"] = "udp port 137",
-};
-
-const dns_ports = { 53/udp, 53/tcp, 137/udp, 5353/udp, 5355/udp };
-redef dpd_config += { [ANALYZER_DNS] = [$ports = dns_ports] };
-
-const dns_udp_ports = { 53/udp, 137/udp, 5353/udp, 5355/udp };
-const dns_tcp_ports = { 53/tcp };
-redef dpd_config += { [ANALYZER_DNS_UDP_BINPAC] = [$ports = dns_udp_ports] };
-redef dpd_config += { [ANALYZER_DNS_TCP_BINPAC] = [$ports = dns_tcp_ports] };
-
-redef likely_server_ports += { 53/udp, 53/tcp, 137/udp, 5353/udp, 5355/udp };
+const ports = { 53/udp, 53/tcp, 137/udp, 5353/udp, 5355/udp };
+redef likely_server_ports += { ports };

 event bro_init() &priority=5
 	{
 	Log::create_stream(DNS::LOG, [$columns=Info, $ev=log_dns]);
+	Analyzer::register_for_ports(Analyzer::ANALYZER_DNS, ports);
 	}

 function new_session(c: connection, trans_id: count): Info
 	{
-	if ( ! c?$dns_state )
-		{
-		local state: State;
-		state$pending=table();
-		state$finished_answers=set();
-		c$dns_state = state;
-		}
-
 	local info: Info;
 	info$ts       = network_time();
 	info$id       = c$id;
@ -151,18 +142,37 @@ function new_session(c: connection, trans_id: count): Info
 	return info;
 	}

-function set_session(c: connection, msg: dns_msg, is_query: bool)
+hook set_session(c: connection, msg: dns_msg, is_query: bool) &priority=5
 	{
-	if ( ! c?$dns_state || msg$id !in c$dns_state$pending )
+	if ( ! c?$dns_state )
 		{
-		c$dns_state$pending[msg$id] = new_session(c, msg$id);
-		# Try deleting this transaction id from the set of finished answers.
-		# Sometimes hosts will reuse ports and transaction ids and this should
-		# be considered to be a legit scenario (although bad practice).
-		delete c$dns_state$finished_answers[msg$id];
+		local state: State;
+		c$dns_state = state;
 		}

-	c$dns = c$dns_state$pending[msg$id];
+	if ( msg$id !in c$dns_state$pending )
+		c$dns_state$pending[msg$id] = Queue::init();
+	
+	local info: Info;
+	# If this is either a query or this is the reply but
+	# no Info records are in the queue (we missed the query?)
+	# we need to create an Info record and put it in the queue.  
+	if ( is_query ||
+	     Queue::len(c$dns_state$pending[msg$id]) == 0 )
+		{
+		info = new_session(c, msg$id);
+		Queue::put(c$dns_state$pending[msg$id], info);
+		}
+
+	if ( is_query )
+		# If this is a query, assign the newly created info variable
+		# so that the world looks correct to anything else handling
+		# this query.
+		c$dns = info;
+	else
+		# Peek at the next item in the queue for this trans_id and 
+		# assign it to c$dns since this is a response.
+		c$dns = Queue::peek(c$dns_state$pending[msg$id]);

 	if ( ! is_query )
 		{
@ -190,19 +200,21 @@ function set_session(c: connection, msg: dns_msg, is_query: bool)

 event dns_message(c: connection, is_orig: bool, msg: dns_msg, len: count) &priority=5
 	{
-	set_session(c, msg, is_orig);
+	hook set_session(c, msg, is_orig);
 	}

 event DNS::do_reply(c: connection, msg: dns_msg, ans: dns_answer, reply: string) &priority=5
 	{
 	if ( ans$answer_type == DNS_ANS )
 		{
+		if ( ! c?$dns )
+			{
+			event conn_weird("dns_unmatched_reply", c, "");
+			hook set_session(c, msg, F);
+			}
 		c$dns$AA    = msg$AA;
 		c$dns$RA    = msg$RA;

-		if ( msg$id in c$dns_state$finished_answers )
-			event conn_weird("dns_reply_seen_after_done", c, "");
-
 		if ( reply != "" )
 			{
 			if ( ! c$dns?$answers )
@ -217,7 +229,6 @@ event DNS::do_reply(c: connection, msg: dns_msg, ans: dns_answer, reply: string)
 		if ( c$dns?$answers && c$dns?$total_answers &&
 		     |c$dns$answers| == c$dns$total_answers )
 			{
-			add c$dns_state$finished_answers[c$dns$trans_id];
 			# Indicate this request/reply pair is ready to be logged.
 			c$dns$ready = T;
 			}
@ -230,7 +241,7 @@ event DNS::do_reply(c: connection, msg: dns_msg, ans: dns_answer, reply: string)
 		{
 		Log::write(DNS::LOG, c$dns);
 		# This record is logged and no longer pending.
-		delete c$dns_state$pending[c$dns$trans_id];
+		Queue::get(c$dns_state$pending[c$dns$trans_id]);
 		delete c$dns;
 		}
 	}
@ -243,15 +254,14 @@ event dns_request(c: connection, msg: dns_msg, query: string, qtype: count, qcla
 	c$dns$qclass_name = classes[qclass];
 	c$dns$qtype       = qtype;
 	c$dns$qtype_name  = query_types[qtype];
+	c$dns$Z           = msg$Z;

 	# Decode netbios name queries
 	# Note: I'm ignoring the name type for now.  Not sure if this should be
 	#       worked into the query/response in some fashion.
 	if ( c$id$resp_p == 137/udp )
 		query = decode_netbios_name(query);
-	c$dns$query    = query;
-
-	c$dns$Z = msg$Z;
+	c$dns$query = query;
 	}

 event dns_A_reply(c: connection, msg: dns_msg, ans: dns_answer, a: addr) &priority=5
@ -339,6 +349,13 @@ event connection_state_remove(c: connection) &priority=-5
 	# If Bro is expiring state, we should go ahead and log all unlogged
 	# request/response pairs now.
 	for ( trans_id in c$dns_state$pending )
-		Log::write(DNS::LOG, c$dns_state$pending[trans_id]);
+		{
+		local infos: vector of Info;
+		Queue::get_vector(c$dns_state$pending[trans_id], infos);
+		for ( i in infos )
+			{
+			Log::write(DNS::LOG, infos[i]);
+			}
+		}
 	}

--- a/scripts/base/protocols/ftp/load.bro
+++ b/scripts/base/protocols/ftp/load.bro
@ -1,4 +1,7 @@
@load ./utils-commands
@load ./main
+@load ./file-analysis
@load ./file-extract
@load ./gridftp
+
+@load-sigs ./dpd.sig
--- a/scripts/base/protocols/ftp/dpd.sig
+++ b/scripts/base/protocols/ftp/dpd.sig
@ -0,0 +1,15 @@
+signature dpd_ftp_client {
+  ip-proto == tcp
+  payload /(|.*[\n\r]) *[uU][sS][eE][rR] /
+  tcp-state originator
+}
+
+# Match for server greeting (220, 120) and for login or passwd
+# required (230, 331).
+signature dpd_ftp_server {
+  ip-proto == tcp
+  payload /[\n\r ]*(120|220)[^0-9].*[\n\r] *(230|331)[^0-9]/
+  tcp-state responder
+  requires-reverse-signature dpd_ftp_client
+  enable "ftp"
+}
--- a/scripts/base/protocols/ftp/file-analysis.bro
+++ b/scripts/base/protocols/ftp/file-analysis.bro
@ -0,0 +1,48 @@
+@load ./main
+@load base/utils/conn-ids
+@load base/frameworks/file-analysis/main
+
+module FTP;
+
+export {
+	## Default file handle provider for FTP.
+	global get_file_handle: function(c: connection, is_orig: bool): string;
+}
+
+function get_handle_string(c: connection): string
+	{
+	return cat(Analyzer::ANALYZER_FTP_DATA, " ", c$start_time, " ", id_string(c$id));
+	}
+
+function get_file_handle(c: connection, is_orig: bool): string
+	{
+	if ( [c$id$resp_h, c$id$resp_p] !in ftp_data_expected ) return "";
+
+	local info: FTP::Info = ftp_data_expected[c$id$resp_h, c$id$resp_p];
+
+	if ( info$passive )
+		# FTP client initiates data channel.
+		if ( is_orig )
+			# Don't care about FTP client data.
+			return "";
+		else
+			# Do care about FTP server data.
+			return get_handle_string(c);
+	else
+		# FTP server initiates dta channel.
+		if ( is_orig )
+			# Do care about FTP server data.
+			return get_handle_string(c);
+		else
+			# Don't care about FTP client data.
+			return "";
+	}
+
+module GLOBAL;
+
+event get_file_handle(tag: Analyzer::Tag, c: connection, is_orig: bool)
+	&priority=5
+	{
+	if ( tag != Analyzer::ANALYZER_FTP_DATA ) return;
+	set_file_handle(FTP::get_file_handle(c, is_orig));
+	}
--- a/scripts/base/protocols/ftp/file-extract.bro
+++ b/scripts/base/protocols/ftp/file-extract.bro
@ -15,51 +15,71 @@ export {

 redef record Info += {
 	## On disk file where it was extracted to.
-	extraction_file:       file &log &optional;
+	extraction_file:       string &log &optional;
 	
 	## Indicates if the current command/response pair should attempt to 
 	## extract the file if a file was transferred.
 	extract_file:          bool &default=F;
-	
-	## Internal tracking of the total number of files extracted during this 
-	## session.
-	num_extracted_files:   count &default=0;
 };

-event file_transferred(c: connection, prefix: string, descr: string,
-			mime_type: string) &priority=3
+function get_extraction_name(f: fa_file): string
 	{
-	local id = c$id;
-	if ( [id$resp_h, id$resp_p] !in ftp_data_expected )
-		return;
-		
-	local s = ftp_data_expected[id$resp_h, id$resp_p];
+	local r = fmt("%s-%s.dat", extraction_prefix, f$id);
+	return r;
+	}

-	if ( extract_file_types in s$mime_type )
+event file_new(f: fa_file) &priority=5
+	{
+	if ( ! f?$source ) return;
+	if ( f$source != "FTP_DATA" ) return;
+
+	if ( f?$mime_type && extract_file_types in f$mime_type )
 		{
-		s$extract_file = T;
-		++s$num_extracted_files;
+		FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
+		                           $extract_filename=get_extraction_name(f)]);
+		return;
+		}
+
+	if ( ! f?$conns ) return;
+
+	for ( cid in f$conns )
+		{
+		local c: connection = f$conns[cid];
+
+		if ( [cid$resp_h, cid$resp_p] !in ftp_data_expected ) next;
+
+		local s = ftp_data_expected[cid$resp_h, cid$resp_p];
+
+		if ( ! s$extract_file ) next;
+
+		FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
+		                           $extract_filename=get_extraction_name(f)]);
+		return;
 		}
 	}

-event file_transferred(c: connection, prefix: string, descr: string,
-			mime_type: string) &priority=-4
+event file_state_remove(f: fa_file) &priority=4
 	{
-	local id = c$id;
-	if ( [id$resp_h, id$resp_p] !in ftp_data_expected )
-		return;
-		
-	local s = ftp_data_expected[id$resp_h, id$resp_p];
-	
-	if ( s$extract_file )
+	if ( ! f?$source ) return;
+	if ( f$source != "FTP_DATA" ) return;
+	if ( ! f?$info ) return;
+
+	for ( filename in f$info$extracted_files )
 		{
-		local suffix = fmt("%d.dat", s$num_extracted_files);
-		local fname = generate_extraction_filename(extraction_prefix, c, suffix);
-		s$extraction_file = open(fname);
-		if ( s$passive )
-			set_contents_file(id, CONTENTS_RESP, s$extraction_file);
-		else
-			set_contents_file(id, CONTENTS_ORIG, s$extraction_file);
+		local s: FTP::Info;
+		s$ts = network_time();
+		s$tags = set();
+		s$user = "<ftp-data>";
+		s$extraction_file = filename;
+
+		if ( f?$conns )
+			for ( cid in f$conns )
+				{
+				s$uid = f$conns[cid]$uid;
+				s$id = cid;
+				}
+
+		Log::write(FTP::LOG, s);
 		}
 	}

--- a/scripts/base/protocols/ftp/main.bro
+++ b/scripts/base/protocols/ftp/main.bro
@ -1,6 +1,6 @@
 ##! The logging this script does is primarily focused on logging FTP commands
 ##! along with metadata.  For example, if files are transferred, the argument
-##! will take on the full path that the client is at along with the requested 
+##! will take on the full path that the client is at along with the requested
 ##! file name.

@load ./utils-commands
@ -13,18 +13,31 @@ module FTP;
 export {
 	## The FTP protocol logging stream identifier.
 	redef enum Log::ID += { LOG };
-	
+
 	## List of commands that should have their command/response pairs logged.
 	const logged_commands = {
-		"APPE", "DELE", "RETR", "STOR", "STOU", "ACCT"
+		"APPE", "DELE", "RETR", "STOR", "STOU", "ACCT", "PORT", "PASV", "EPRT",
+		"EPSV"
 	} &redef;
-	
+
 	## This setting changes if passwords used in FTP sessions are captured or not.
 	const default_capture_password = F &redef;
-	
+
 	## User IDs that can be considered "anonymous".
 	const guest_ids = { "anonymous", "ftp", "ftpuser", "guest" } &redef;
-	
+
+	## The expected endpoints of an FTP data channel.
+	type ExpectedDataChannel: record {
+		## Whether PASV mode is toggled for control channel.
+		passive: bool &log;
+		## The host that will be initiating the data connection.
+		orig_h: addr &log;
+		## The host that will be accepting the data connection.
+		resp_h: addr &log;
+		## The port at which the acceptor is listening for the data connection.
+		resp_p: port &log;
+	};
+
 	type Info: record {
 		## Time when the command was sent.
 		ts:               time        &log;
@ -40,51 +53,52 @@ export {
 		command:          string      &log &optional;
 		## Argument for the command if one is given.
 		arg:              string      &log &optional;
-		
+
 		## Libmagic "sniffed" file type if the command indicates a file transfer.
 		mime_type:        string      &log &optional;
-		## Libmagic "sniffed" file description if the command indicates a file transfer.
-		mime_desc:        string      &log &optional;
 		## Size of the file if the command indicates a file transfer.
 		file_size:        count       &log &optional;
-		
+
 		## Reply code from the server in response to the command.
 		reply_code:       count       &log &optional;
 		## Reply message from the server in response to the command.
 		reply_msg:        string      &log &optional;
 		## Arbitrary tags that may indicate a particular attribute of this command.
-		tags:             set[string] &log &default=set();
-		
+		tags:             set[string] &log;
+
+		## Expected FTP data channel.
+		data_channel:     ExpectedDataChannel &log &optional;
+
 		## Current working directory that this session is in.  By making
-		## the default value '/.', we can indicate that unless something
+		## the default value '.', we can indicate that unless something
 		## more concrete is discovered that the existing but unknown
 		## directory is ok to use.
-		cwd:                string  &default="/.";
-		
+		cwd:                string  &default=".";
+
 		## Command that is currently waiting for a response.
 		cmdarg:             CmdArg  &optional;
-		## Queue for commands that have been sent but not yet responded to 
+		## Queue for commands that have been sent but not yet responded to
 		## are tracked here.
 		pending_commands:   PendingCmds;
-		
+
 		## Indicates if the session is in active or passive mode.
 		passive:            bool &default=F;
-		
+
 		## Determines if the password will be captured for this request.
 		capture_password:   bool &default=default_capture_password;
 	};

-	## This record is to hold a parsed FTP reply code.  For example, for the 
+	## This record is to hold a parsed FTP reply code.  For example, for the
 	## 201 status code, the digits would be parsed as: x->2, y->0, z=>1.
 	type ReplyCode: record {
 		x: count;
 		y: count;
 		z: count;
 	};
-	
+
 	## Parse FTP reply codes into the three constituent single digit values.
 	global parse_ftp_reply_code: function(code: count): ReplyCode;
-	
+
 	## Event that can be handled to access the :bro:type:`FTP::Info`
 	## record as it is sent on to the logging framework.
 	global log_ftp: event(rec: Info);
@ -93,23 +107,21 @@ export {
 # Add the state tracking information variable to the connection record
 redef record connection += {
 	ftp: Info &optional;
+	ftp_data_reuse: bool &default=F;
 };

-# Configure DPD
-const ports = { 21/tcp, 2811/tcp } &redef; # 2811/tcp is GridFTP.
-redef capture_filters += { ["ftp"] = "port 21 and port 2811" };
-redef dpd_config += { [ANALYZER_FTP] = [$ports = ports] };
-
-redef likely_server_ports += { 21/tcp, 2811/tcp };
-
-# Establish the variable for tracking expected connections.
-global ftp_data_expected: table[addr, port] of Info &create_expire=5mins;
+const ports = { 21/tcp, 2811/tcp };
+redef likely_server_ports += { ports };

 event bro_init() &priority=5
 	{
 	Log::create_stream(FTP::LOG, [$columns=Info, $ev=log_ftp]);
+	Analyzer::register_for_ports(Analyzer::ANALYZER_FTP, ports);
 	}

+# Establish the variable for tracking expected connections.
+global ftp_data_expected: table[addr, port] of Info &read_expire=5mins;
+
 ## A set of commands where the argument can be expected to refer
 ## to a file or directory.
 const file_cmds = {
@ -151,7 +163,7 @@ function set_ftp_session(c: connection)
 		s$uid=c$uid;
 		s$id=c$id;
 		c$ftp=s;
-		
+
 		# Add a shim command so the server can respond with some init response.
 		add_pending_cmd(c$ftp$pending_commands, "<init>", "");
 		}
@ -163,35 +175,51 @@ function ftp_message(s: Info)
 	# or it's a deliberately logged command.
 	if ( |s$tags| > 0 || (s?$cmdarg && s$cmdarg$cmd in logged_commands) )
 		{
-		if ( s?$password && 
-		     ! s$capture_password && 
+		if ( s?$password &&
+		     ! s$capture_password &&
 		     to_lower(s$user) !in guest_ids )
 			{
 			s$password = "<hidden>";
 			}
-		
+
 		local arg = s$cmdarg$arg;
 		if ( s$cmdarg$cmd in file_cmds )
-			arg = fmt("ftp://%s%s", addr_to_uri(s$id$resp_h), build_path_compressed(s$cwd, arg));
-		
+			{
+			local comp_path = build_path_compressed(s$cwd, arg);
+			if ( comp_path[0] != "/" )
+				comp_path = cat("/", comp_path);
+
+			arg = fmt("ftp://%s%s", addr_to_uri(s$id$resp_h), comp_path);
+			}
+
 		s$ts=s$cmdarg$ts;
 		s$command=s$cmdarg$cmd;
 		if ( arg == "" )
 			delete s$arg;
 		else
 			s$arg=arg;
-		
+
 		Log::write(FTP::LOG, s);
 		}
-	
-	# The MIME and file_size fields are specific to file transfer commands 
-	# and may not be used in all commands so they need reset to "blank" 
+
+	# The MIME and file_size fields are specific to file transfer commands
+	# and may not be used in all commands so they need reset to "blank"
 	# values after logging.
 	delete s$mime_type;
-	delete s$mime_desc;
 	delete s$file_size;
+	# Same with data channel.
+	delete s$data_channel;
 	# Tags are cleared everytime too.
-	delete s$tags;
+	s$tags = set();
+	}
+
+function add_expected_data_channel(s: Info, chan: ExpectedDataChannel)
+	{
+	s$passive = chan$passive;
+	s$data_channel = chan;
+	ftp_data_expected[chan$resp_h, chan$resp_p] = s;
+	Analyzer::schedule_analyzer(chan$orig_h, chan$resp_h, chan$resp_p, Analyzer::ANALYZER_FTP_DATA,
+				    5mins);
 	}

 event ftp_request(c: connection, command: string, arg: string) &priority=5
@ -206,19 +234,19 @@ event ftp_request(c: connection, command: string, arg: string) &priority=5
 		remove_pending_cmd(c$ftp$pending_commands, c$ftp$cmdarg);
 		ftp_message(c$ftp);
 		}
-	
+
 	local id = c$id;
 	set_ftp_session(c);
-		
+
 	# Queue up the new command and argument
 	add_pending_cmd(c$ftp$pending_commands, command, arg);
-	
+
 	if ( command == "USER" )
 		c$ftp$user = arg;
-	
+
 	else if ( command == "PASS" )
 		c$ftp$password = arg;
-	
+
 	else if ( command == "PORT" || command == "EPRT" )
 		{
 		local data = (command == "PORT") ?
@ -226,9 +254,8 @@ event ftp_request(c: connection, command: string, arg: string) &priority=5

 		if ( data$valid )
 			{
-			c$ftp$passive=F;
-			ftp_data_expected[data$h, data$p] = c$ftp;
-			expect_connection(id$resp_h, data$h, data$p, ANALYZER_FILE, 5mins);
+			add_expected_data_channel(c$ftp, [$passive=F, $orig_h=id$resp_h,
+			                                  $resp_h=data$h, $resp_p=data$p]);
 			}
 		else
 			{
@ -240,17 +267,14 @@ event ftp_request(c: connection, command: string, arg: string) &priority=5

 event ftp_reply(c: connection, code: count, msg: string, cont_resp: bool) &priority=5
 	{
-	# TODO: figure out what to do with continued FTP response (not used much)
-	#if ( cont_resp ) return;
-
-	local id = c$id;
 	set_ftp_session(c);
-	
 	c$ftp$cmdarg = get_pending_cmd(c$ftp$pending_commands, code, msg);
-	
 	c$ftp$reply_code = code;
 	c$ftp$reply_msg = msg;
-	
+
+	# TODO: figure out what to do with continued FTP response (not used much)
+	if ( cont_resp ) return;
+
 	# TODO: do some sort of generic clear text login processing here.
 	local response_xyz = parse_ftp_reply_code(code);
 	#if ( response_xyz$x == 2 &&  # successful
@ -266,22 +290,22 @@ event ftp_reply(c: connection, code: count, msg: string, cont_resp: bool) &prior
 		#       if that's given as well which would be more correct.
 		c$ftp$file_size = extract_count(msg);
 		}
-		
+
 	# PASV and EPSV processing
 	else if ( (code == 227 || code == 229) &&
 	          (c$ftp$cmdarg$cmd == "PASV" || c$ftp$cmdarg$cmd == "EPSV") )
 		{
 		local data = (code == 227) ? parse_ftp_pasv(msg) : parse_ftp_epsv(msg);
-		
+
 		if ( data$valid )
 			{
 			c$ftp$passive=T;
-			
+
 			if ( code == 229 && data$h == [::] )
-				data$h = id$resp_h;
-			
-			ftp_data_expected[data$h, data$p] = c$ftp;
-			expect_connection(id$orig_h, data$h, data$p, ANALYZER_FILE, 5mins);
+				data$h = c$id$resp_h;
+
+			add_expected_data_channel(c$ftp, [$passive=T, $orig_h=c$id$orig_h,
+			                          $resp_h=data$h, $resp_p=data$p]);
 			}
 		else
 			{
@ -300,9 +324,9 @@ event ftp_reply(c: connection, code: count, msg: string, cont_resp: bool) &prior
 		else if ( c$ftp$cmdarg$cmd == "PWD" || c$ftp$cmdarg$cmd == "XPWD" )
 			c$ftp$cwd = extract_path(msg);
 		}
-	
+
 	# In case there are multiple commands queued, go ahead and remove the
-	# command here and log because we can't do the normal processing pipeline 
+	# command here and log because we can't do the normal processing pipeline
 	# to wait for a new command before logging the command/response pair.
 	if ( |c$ftp$pending_commands| > 1 )
 		{
@ -311,8 +335,7 @@ event ftp_reply(c: connection, code: count, msg: string, cont_resp: bool) &prior
 		}
 	}

-
-event expected_connection_seen(c: connection, a: count) &priority=10
+event scheduled_analyzer_applied(c: connection, a: Analyzer::Tag) &priority=10
 	{
 	local id = c$id;
 	if ( [id$resp_h, id$resp_p] in ftp_data_expected )
@ -327,18 +350,21 @@ event file_transferred(c: connection, prefix: string, descr: string,
 		{
 		local s = ftp_data_expected[id$resp_h, id$resp_p];
 		s$mime_type = split1(mime_type, /;/)[1];
-		s$mime_desc = descr;
 		}
 	}
-	
-event file_transferred(c: connection, prefix: string, descr: string,
-			mime_type: string) &priority=-5
+
+event connection_reused(c: connection) &priority=5
 	{
-	local id = c$id;
-	if ( [id$resp_h, id$resp_p] in ftp_data_expected )
-		delete ftp_data_expected[id$resp_h, id$resp_p];
+	if ( "ftp-data" in c$service )
+		c$ftp_data_reuse = T;
 	}
-	
+
+event connection_state_remove(c: connection) &priority=-5
+	{
+	if ( c$ftp_data_reuse ) return;
+	delete ftp_data_expected[c$id$resp_h, c$id$resp_p];
+	}
+
 # Use state remove event to cover connections terminated by RST.
 event connection_state_remove(c: connection) &priority=-5
 	{
--- a/scripts/base/protocols/http/load.bro
+++ b/scripts/base/protocols/http/load.bro
@ -1,5 +1,8 @@
@load ./main
@load ./utils
+@load ./file-analysis
@load ./file-ident
@load ./file-hash
@load ./file-extract
+
+@load-sigs ./dpd.sig
--- a/scripts/base/protocols/http/dpd.sig
+++ b/scripts/base/protocols/http/dpd.sig
@ -0,0 +1,13 @@
+signature dpd_http_client {
+  ip-proto == tcp
+  payload /^[[:space:]]*(GET|HEAD|POST)[[:space:]]*/
+  tcp-state originator
+}
+
+signature dpd_http_server {
+  ip-proto == tcp
+  payload /^HTTP\/[0-9]/
+  tcp-state responder
+  requires-reverse-signature dpd_http_client
+  enable "http"
+}
--- a/scripts/base/protocols/http/file-analysis.bro
+++ b/scripts/base/protocols/http/file-analysis.bro
@ -0,0 +1,54 @@
+@load ./main
+@load ./utils
+@load base/utils/conn-ids
+@load base/frameworks/file-analysis/main
+
+module HTTP;
+
+export {
+	redef record HTTP::Info += {
+		## Number of MIME entities in the HTTP request message body so far.
+		request_mime_level: count &default=0;
+		## Number of MIME entities in the HTTP response message body so far.
+		response_mime_level: count &default=0;
+	};
+
+	## Default file handle provider for HTTP.
+	global get_file_handle: function(c: connection, is_orig: bool): string;
+}
+
+event http_begin_entity(c: connection, is_orig: bool) &priority=5
+	{
+	if ( ! c?$http )
+		return;
+
+	if ( is_orig )
+		++c$http$request_mime_level;
+	else
+		++c$http$response_mime_level;
+	}
+
+function get_file_handle(c: connection, is_orig: bool): string
+	{
+	if ( ! c?$http ) return "";
+
+	local mime_level: count =
+	        is_orig ? c$http$request_mime_level : c$http$response_mime_level;
+	local mime_level_str: string = mime_level > 1 ? cat(mime_level) : "";
+
+	if ( c$http$range_request )
+		return cat(Analyzer::ANALYZER_HTTP, " ", is_orig, " ", c$id$orig_h, " ",
+		           build_url(c$http));
+
+	return cat(Analyzer::ANALYZER_HTTP, " ", c$start_time, " ", is_orig, " ",
+	           c$http$trans_depth, mime_level_str, " ", id_string(c$id));
+	}
+
+module GLOBAL;
+
+event get_file_handle(tag: Analyzer::Tag, c: connection, is_orig: bool)
+	&priority=5
+	{
+	if ( tag != Analyzer::ANALYZER_HTTP ) return;
+	set_file_handle(HTTP::get_file_handle(c, is_orig));
+	}
--- a/scripts/base/protocols/http/file-extract.bro
+++ b/scripts/base/protocols/http/file-extract.bro
@ -2,8 +2,7 @@
 ##! the message body from the server can be extracted with this script.

@load ./main
-@load ./file-ident
-@load base/utils/files
+@load ./file-analysis

 module HTTP;

@ -15,46 +14,87 @@ export {
 	const extraction_prefix = "http-item" &redef;

 	redef record Info += {
-		## On-disk file where the response body was extracted to.
-		extraction_file:  file &log &optional;
+		## On-disk location where files in request body were extracted.
+		extracted_request_files: vector of string &log &optional;
+
+		## On-disk location where files in response body were extracted.
+		extracted_response_files: vector of string &log &optional;
 		
 		## Indicates if the response body is to be extracted or not.  Must be 
-		## set before or by the first :bro:id:`http_entity_data` event for the
-		## content.
+		## set before or by the first :bro:see:`file_new` for the file content.
 		extract_file:     bool &default=F;
 	};
 }

-event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=-5
+function get_extraction_name(f: fa_file): string
+	{
+	local r = fmt("%s-%s.dat", extraction_prefix, f$id);
+	return r;
+	}
+
+function add_extraction_file(c: connection, is_orig: bool, fn: string)
 	{
-	# Client body extraction is not currently supported in this script.
 	if ( is_orig )
-		return;
-	
-	if ( c$http$first_chunk )
 		{
-		if ( c$http?$mime_type &&
-		     extract_file_types in c$http$mime_type )
+		if ( ! c$http?$extracted_request_files )
+			c$http$extracted_request_files = vector();
+		c$http$extracted_request_files[|c$http$extracted_request_files|] = fn;
+		}
+	else
+		{
+		if ( ! c$http?$extracted_response_files )
+			c$http$extracted_response_files = vector();
+		c$http$extracted_response_files[|c$http$extracted_response_files|] = fn;
+		}
+	}
+
+event file_new(f: fa_file) &priority=5
+	{
+	if ( ! f?$source ) return;
+	if ( f$source != "HTTP" ) return;
+	if ( ! f?$conns ) return;
+
+	local fname: string;
+	local c: connection;
+
+	if ( f?$mime_type && extract_file_types in f$mime_type )
+		{
+		fname = get_extraction_name(f);
+		FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
+		                               $extract_filename=fname]);
+
+		for ( cid in f$conns )
 			{
-			c$http$extract_file = T;
-			}
-			
-		if ( c$http$extract_file )
-			{
-			local suffix = fmt("%s_%d.dat", is_orig ? "orig" : "resp", c$http_state$current_response);
-			local fname = generate_extraction_filename(extraction_prefix, c, suffix);
-			
-			c$http$extraction_file = open(fname);
-			enable_raw_output(c$http$extraction_file);
+			c = f$conns[cid];
+			if ( ! c?$http ) next;
+			add_extraction_file(c, f$is_orig, fname);
 			}
+
+		return;
 		}

-	if ( c$http?$extraction_file )
-		print c$http$extraction_file, data;
-	}
+	local extracting: bool = F;

-event http_end_entity(c: connection, is_orig: bool)
-	{
-	if ( c$http?$extraction_file )
-		close(c$http$extraction_file);
+	for ( cid in f$conns )
+		{
+		c = f$conns[cid];
+
+		if ( ! c?$http ) next;
+
+		if ( ! c$http$extract_file ) next;
+
+		fname = get_extraction_name(f);
+		FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
+		                               $extract_filename=fname]);
+		extracting = T;
+		break;
+		}
+
+	if ( extracting )
+		for ( cid in f$conns )
+			{
+			c = f$conns[cid];
+			if ( ! c?$http ) next;
+			add_extraction_file(c, f$is_orig, fname);
+			}
 	}
--- a/scripts/base/protocols/http/file-hash.bro
+++ b/scripts/base/protocols/http/file-hash.bro
@ -1,15 +1,11 @@
 ##! Calculate hashes for HTTP body transfers.

-@load ./file-ident
+@load ./main
+@load ./file-analysis

 module HTTP;

 export {
-	redef enum Notice::Type += {
-		## Indicates that an MD5 sum was calculated for an HTTP response body.
-		MD5,
-	};
-
 	redef record Info += {
 		## MD5 sum for a file transferred over HTTP calculated from the 
 		## response body.
@ -19,10 +15,6 @@ export {
 		## if a file should have an MD5 sum generated.  It must be
 		## set to T at the time of or before the first chunk of body data.
 		calc_md5:        bool       &default=F;
-		
-		## Indicates if an MD5 sum is being calculated for the current 
-		## request/response pair.
-		md5_handle: opaque of md5   &optional;
 	};
 	
 	## Generate MD5 sums for these filetypes.
@ -31,62 +23,46 @@ export {
 	                   &redef;
 }

-## Initialize and calculate the hash.
-event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=5
+event file_new(f: fa_file) &priority=5
 	{
-	if ( is_orig || ! c?$http ) return;
-	
-	if ( c$http$first_chunk )
-		{
-		if ( c$http$calc_md5 || 
-		     (c$http?$mime_type && generate_md5 in c$http$mime_type) )
-			{
-			c$http$md5_handle = md5_hash_init();
-			}
-		}
-	
-	if ( c$http?$md5_handle )
-		md5_hash_update(c$http$md5_handle, data);
-	}
-	
-## In the event of a content gap during a file transfer, detect the state for
-## the MD5 sum calculation and stop calculating the MD5 since it would be 
-## incorrect anyway.
-event content_gap(c: connection, is_orig: bool, seq: count, length: count) &priority=5
-	{
-	if ( is_orig || ! c?$http || ! c$http?$md5_handle ) return;
-	
-	set_state(c, F, is_orig);
-	md5_hash_finish(c$http$md5_handle); # Ignore return value.
-	delete c$http$md5_handle;
-	}
+	if ( ! f?$source ) return;
+	if ( f$source != "HTTP" ) return;

-## When the file finishes downloading, finish the hash and generate a notice.
-event http_message_done(c: connection, is_orig: bool, stat: http_message_stat) &priority=-3
-	{
-	if ( is_orig || ! c?$http ) return;
-	
-	if ( c$http?$md5_handle )
+	if ( f?$mime_type && generate_md5 in f$mime_type )
 		{
-		local url = build_url_http(c$http);
-		c$http$md5 = md5_hash_finish(c$http$md5_handle);
-		delete c$http$md5_handle;
-		
-		NOTICE([$note=MD5, $msg=fmt("%s %s %s", c$id$orig_h, c$http$md5, url),
-		        $sub=c$http$md5, $conn=c]);
+		FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
+		return;
+		}
+
+	if ( ! f?$conns ) return;
+
+	for ( cid in f$conns )
+		{
+		local c: connection = f$conns[cid];
+
+		if ( ! c?$http ) next;
+
+		if ( ! c$http$calc_md5 ) next;
+
+		FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
+		return;
 		}
 	}

-event connection_state_remove(c: connection) &priority=-5
+event file_state_remove(f: fa_file) &priority=4
 	{
-	if ( c?$http_state && 
-	     c$http_state$current_response in c$http_state$pending &&
-	     c$http_state$pending[c$http_state$current_response]?$md5_handle )
+	if ( ! f?$source ) return;
+	if ( f$source != "HTTP" ) return;
+	if ( ! f?$conns ) return;
+	if ( ! f?$info ) return;
+	if ( ! f$info?$md5 ) return;
+
+	for ( cid in f$conns )
 		{
-		# The MD5 sum isn't going to be saved anywhere since the entire 
-		# body wouldn't have been seen anyway and we'd just be giving an
-		# incorrect MD5 sum.
-		md5_hash_finish(c$http$md5_handle);
-		delete c$http$md5_handle;
+		local c: connection = f$conns[cid];
+
+		if ( ! c?$http ) next;
+
+		c$http$md5 = f$info$md5;
 		}
 	}
--- a/scripts/base/protocols/http/file-ident.bro
+++ b/scripts/base/protocols/http/file-ident.bro
@ -1,37 +1,28 @@
 ##! Identification of file types in HTTP response bodies with file content sniffing.

-@load base/frameworks/signatures
@load base/frameworks/notice
@load ./main
@load ./utils
-
-# Add the magic number signatures to the core signature set.
-@load-sigs ./file-ident.sig
-
-# Ignore the signatures used to match files
-redef Signatures::ignored_ids += /^matchfile-/;
+@load ./file-analysis

 module HTTP;

 export {
 	redef enum Notice::Type += {
-		## Indicates when the file extension doesn't seem to match the file contents.
+		## Indicates when the file extension doesn't seem to match the file
+		## contents.
 		Incorrect_File_Type,
 	};

 	redef record Info += {
 		## Mime type of response body identified by content sniffing.
 		mime_type:    string   &log &optional;
-		
-		## Indicates that no data of the current file transfer has been
-		## seen yet.  After the first :bro:id:`http_entity_data` event, it 
-		## will be set to F.
-		first_chunk:     bool &default=T;
 	};
 	
-	## Mapping between mime types and regular expressions for URLs
-	## The :bro:enum:`HTTP::Incorrect_File_Type` notice is generated if the pattern 
-	## doesn't match the mime type that was discovered.
+	## Mapping between mime type strings (without character set) and
+	## regular expressions for URLs.
+	## The :bro:enum:`HTTP::Incorrect_File_Type` notice is generated if the
+	## pattern doesn't match the mime type that was discovered.
 	const mime_types_extensions: table[string] of pattern = {
 		["application/x-dosexec"] = /\.([eE][xX][eE]|[dD][lL][lL])/,
 	} &redef;
@ -43,43 +34,72 @@ export {
 	const ignored_incorrect_file_type_urls = /^$/ &redef;
 }

-event signature_match(state: signature_state, msg: string, data: string) &priority=5
+event file_new(f: fa_file) &priority=5
 	{
-	# Only signatures matching file types are dealt with here.
-	if ( /^matchfile-/ !in state$sig_id ) return;
+	if ( ! f?$source ) return;
+	if ( f$source != "HTTP" ) return;
+	if ( ! f?$mime_type ) return;
+	if ( ! f?$conns ) return;

-	local c = state$conn;
-	set_state(c, F, F);
-	
-	# Not much point in any of this if we don't know about the HTTP session.
-	if ( ! c?$http ) return;
-	
-	# Set the mime type that was detected.
-	c$http$mime_type = msg;
-	
-	if ( msg in mime_types_extensions && 
-	     c$http?$uri && mime_types_extensions[msg] !in c$http$uri )
+	for ( cid in f$conns )
 		{
+		local c: connection = f$conns[cid];
+
+		if ( ! c?$http ) next;
+
+		c$http$mime_type = f$mime_type;
+
+		local mime_str: string = c$http$mime_type;
+
+		if ( mime_str !in mime_types_extensions ) next;
+		if ( ! c$http?$uri ) next;
+		if ( mime_types_extensions[mime_str] in c$http$uri ) next;
+
 		local url = build_url_http(c$http);
-		
-		if ( url == ignored_incorrect_file_type_urls )
-			return;
-		
-		local message = fmt("%s %s %s", msg, c$http$method, url);
+
+		if ( url == ignored_incorrect_file_type_urls ) next;
+
+		local message = fmt("%s %s %s", mime_str, c$http$method, url);
 		NOTICE([$note=Incorrect_File_Type,
 		        $msg=message,
 		        $conn=c]);
 		}
 	}

-event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=5
+event file_over_new_connection(f: fa_file, c: connection) &priority=5
 	{
-	if ( c$http$first_chunk && ! c$http?$mime_type )
-			c$http$mime_type = split1(identify_data(data, T), /;/)[1];
+	if ( ! f?$source ) return;
+	if ( f$source != "HTTP" ) return;
+	if ( ! f?$mime_type ) return;
+	if ( ! c?$http ) return;
+
+	# Spread the mime around (e.g. for partial content, file_type event only
+	# happens once for the first connection, but if there's subsequent
+	# connections to transfer the same file, they'll be lacking the mime_type
+	# field if we don't do this).
+	c$http$mime_type = f$mime_type;
 	}
-	
-event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=-10
+
+# Tracks byte-range request / partial content response mime types, indexed
+# by [connection, uri] pairs.  This is needed because a person can pipeline
+# byte-range requests over multiple connections to the same uri.  Without
+# the tracking, only the first request in the pipeline for each connection
+# would get a mime_type field assigned to it (by the FileAnalysis policy hooks).
+global partial_types: table[conn_id, string] of string &read_expire=5mins;
+
+# Priority 4 so that it runs before the handler that will write to http.log.
+event http_message_done(c: connection, is_orig: bool, stat: http_message_stat)
+	&priority=4
 	{
-	if ( c$http$first_chunk )
-		c$http$first_chunk=F;
+	if ( ! c$http$range_request ) return;
+	if ( ! c$http?$uri ) return;
+
+	if ( c$http?$mime_type )
+		{
+		partial_types[c$id, c$http$uri] = c$http$mime_type;
+		return;
+		}
+
+	if ( [c$id, c$http$uri] in partial_types )
+		c$http$mime_type = partial_types[c$id, c$http$uri];
 	}
--- a/scripts/base/protocols/http/file-ident.sig
+++ b/scripts/base/protocols/http/file-ident.sig
@ -1,144 +0,0 @@
-# These signatures are used as a replacement for libmagic.  The signature
-# name needs to start with "matchfile" and the "event" directive takes 
-# the mime type of the file matched by the http-reply-body pattern.
-#
-# Signatures from: http://www.garykessler.net/library/file_sigs.html
-
-signature matchfile-exe {
-	http-reply-body /\x4D\x5A/
-	event "application/x-dosexec"
-}
-
-signature matchfile-elf {
-	http-reply-body /\x7F\x45\x4C\x46/
-	event "application/x-executable"
-}
-
-signature matchfile-script {
-	# This is meant to match the interpreter declaration at the top of many 
-	# interpreted scripts.
-	http-reply-body /\#\![[:blank:]]?\//
-	event "application/x-script"
-}
-
-signature matchfile-wmv {
-	http-reply-body /\x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C/
-	event "video/x-ms-wmv"
-}
-
-signature matchfile-flv {
-	http-reply-body /\x46\x4C\x56\x01/
-	event "video/x-flv"
-}
-
-signature matchfile-swf {
-	http-reply-body /[\x46\x43]\x57\x53/
-	event "application/x-shockwave-flash"
-}
-
-signature matchfile-jar {
-	http-reply-body /\x5F\x27\xA8\x89/
-	event "application/java-archive"
-}
-
-signature matchfile-class {
-	http-reply-body /\xCA\xFE\xBA\xBE/
-	event "application/java-byte-code"
-}
-
-signature matchfile-msoffice-2007 {
-	# MS Office 2007 XML documents
-	http-reply-body /\x50\x4B\x03\x04\x14\x00\x06\x00/
-	event "application/msoffice"
-}
-
-signature matchfile-msoffice {
-	# Older MS Office files
-	http-reply-body /\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1/
-	event "application/msoffice"
-}
-
-signature matchfile-rtf {
-	http-reply-body /\x7B\x5C\x72\x74\x66\x31/
-	event "application/rtf"
-}
-
-signature matchfile-lnk {
-	http-reply-body /\x4C\x00\x00\x00\x01\x14\x02\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x00\x46/
-	event "application/x-ms-shortcut"
-}
-
-signature matchfile-torrent {
-	http-reply-body /\x64\x38\x3A\x61\x6E\x6E\x6F\x75\x6E\x63\x65/
-	event "application/x-bittorrent"
-}
-
-signature matchfile-pdf {
-	http-reply-body /\x25\x50\x44\x46/
-	event "application/pdf"
-}
-
-signature matchfile-html {
-	http-reply-body /<[hH][tT][mM][lL]/
-	event "text/html"
-}
-
-signature matchfile-html2 {
-	http-reply-body /<![dD][oO][cC][tT][yY][pP][eE][[:blank:]][hH][tT][mM][lL]/
-	event "text/html"
-}
-
-signature matchfile-xml {
-	http-reply-body /<\??[xX][mM][lL]/
-	event "text/xml"
-}
-
-signature matchfile-gif {
-	http-reply-body /\x47\x49\x46\x38[\x37\x39]\x61/
-	event "image/gif"
-}
-
-signature matchfile-jpg {
-	http-reply-body /\xFF\xD8\xFF[\xDB\xE0\xE1\xE2\xE3\xE8]..[\x4A\x45\x53][\x46\x78\x50][\x49\x69][\x46\x66]/
-	event "image/jpeg"
-}
-
-signature matchfile-tiff {
-	http-reply-body /\x4D\x4D\x00[\x2A\x2B]/
-	event "image/tiff"
-}
-
-signature matchfile-png {
-	http-reply-body /\x89\x50\x4e\x47/
-	event "image/png"
-}
-
-signature matchfile-zip {
-	http-reply-body /\x50\x4B\x03\x04/
-	event "application/zip"
-}
-
-signature matchfile-bzip {
-	http-reply-body /\x42\x5A\x68/
-	event "application/bzip2"
-}
-
-signature matchfile-gzip {
-	http-reply-body /\x1F\x8B\x08/
-	event "application/x-gzip"
-}
-
-signature matchfile-cab {
-	http-reply-body /\x4D\x53\x43\x46/
-	event "application/vnd.ms-cab-compressed"
-}
-
-signature matchfile-rar {
-	http-reply-body /\x52\x61\x72\x21\x1A\x07\x00/
-	event "application/x-rar-compressed"
-}
-
-signature matchfile-7z {
-	http-reply-body /\x37\x7A\xBC\xAF\x27\x1C/
-	event "application/x-7z-compressed"
-}
--- a/scripts/base/protocols/http/main.bro
+++ b/scripts/base/protocols/http/main.bro
@ -71,6 +71,10 @@ export {
 		
 		## All of the headers that may indicate if the request was proxied.
 		proxied:                 set[string] &log &optional;
+
+		## Indicates if this request can assume 206 partial content in
+		## response.
+		range_request:           bool &default=F;
 	};
 	
 	## Structure to maintain state for an HTTP connection with multiple 
@ -119,28 +123,18 @@ redef record connection += {
 	http_state:  State &optional;
 };

-# Initialize the HTTP logging stream.
-event bro_init() &priority=5
-	{
-	Log::create_stream(HTTP::LOG, [$columns=Info, $ev=log_http]);
-	}
-
-# DPD configuration.
 const ports = {
 	80/tcp, 81/tcp, 631/tcp, 1080/tcp, 3128/tcp,
 	8000/tcp, 8080/tcp, 8888/tcp,
 };
-redef dpd_config += { 
-	[[ANALYZER_HTTP, ANALYZER_HTTP_BINPAC]] = [$ports = ports],
-};
-redef capture_filters +=  {
-	["http"] = "tcp and port (80 or 81 or 631 or 1080 or 3138 or 8000 or 8080 or 8888)"
-};
+redef likely_server_ports += { ports };

-redef likely_server_ports += { 
-	80/tcp, 81/tcp, 631/tcp, 1080/tcp, 3138/tcp,
-	8000/tcp, 8080/tcp, 8888/tcp,
-};
+# Initialize the HTTP logging stream and ports.
+event bro_init() &priority=5
+	{
+	Log::create_stream(HTTP::LOG, [$columns=Info, $ev=log_http]);
+	Analyzer::register_for_ports(Analyzer::ANALYZER_HTTP, ports);
+	}

 function code_in_range(c: count, min: count, max: count) : bool
 	{
@ -235,6 +229,9 @@ event http_header(c: connection, is_orig: bool, name: string, value: string) &pr
 		else if ( name == "HOST" )
 			# The split is done to remove the occasional port value that shows up here.
 			c$http$host = split1(value, /:/)[1];
+
+		else if ( name == "RANGE" )
+			c$http$range_request = T;
 		
 		else if ( name == "USER-AGENT" )
 			c$http$user_agent = value;
--- a/scripts/base/protocols/irc/load.bro
+++ b/scripts/base/protocols/irc/load.bro
@ -1,2 +1,5 @@
@load ./main
-@load ./dcc-send
+@load ./dcc-send
+@load ./file-analysis
+
+@load-sigs ./dpd.sig
--- a/scripts/base/protocols/irc/dcc-send.bro
+++ b/scripts/base/protocols/irc/dcc-send.bro
@ -28,69 +28,137 @@ export {
 		dcc_file_size:         count  &log &optional;
 		## Sniffed mime type of the file.
 		dcc_mime_type:         string &log &optional;
-		
+
 		## The file handle for the file to be extracted
-		extraction_file:       file &log &optional;
-		
+		extraction_file:       string &log &optional;
+
 		## A boolean to indicate if the current file transfer should be extracted.
 		extract_file:          bool &default=F;
-		
-		## The count of the number of file that have been extracted during the session.
-		num_extracted_files:   count &default=0;
 	};
 }

-global dcc_expected_transfers: table[addr, port] of Info = table();
+global dcc_expected_transfers: table[addr, port] of Info &read_expire=5mins;

-event file_transferred(c: connection, prefix: string, descr: string,
-                       mime_type: string) &priority=3
+function set_dcc_mime(f: fa_file)
 	{
-	local id = c$id;
-	if ( [id$resp_h, id$resp_p] !in dcc_expected_transfers )
-		return;
-		
-	local irc = dcc_expected_transfers[id$resp_h, id$resp_p];
-	
-	irc$dcc_mime_type = split1(mime_type, /;/)[1];
+	if ( ! f?$conns ) return;

-	if ( extract_file_types == irc$dcc_mime_type )
+	for ( cid in f$conns )
 		{
-		irc$extract_file = T;
-		}
-		
-	if ( irc$extract_file )
-		{
-		local suffix = fmt("%d.dat", ++irc$num_extracted_files);
-		local fname = generate_extraction_filename(extraction_prefix, c, suffix);
-		irc$extraction_file = open(fname);
+		local c: connection = f$conns[cid];
+
+		if ( [cid$resp_h, cid$resp_p] !in dcc_expected_transfers ) next;
+
+		local s = dcc_expected_transfers[cid$resp_h, cid$resp_p];
+
+		s$dcc_mime_type = f$mime_type;
 		}
 	}

-event file_transferred(c: connection, prefix: string, descr: string,
-			mime_type: string) &priority=-4
+function set_dcc_extraction_file(f: fa_file, filename: string)
 	{
-	local id = c$id;
-	if ( [id$resp_h, id$resp_p] !in dcc_expected_transfers )
+	if ( ! f?$conns ) return;
+
+	for ( cid in f$conns )
+		{
+		local c: connection = f$conns[cid];
+
+		if ( [cid$resp_h, cid$resp_p] !in dcc_expected_transfers ) next;
+
+		local s = dcc_expected_transfers[cid$resp_h, cid$resp_p];
+
+		s$extraction_file = filename;
+		}
+	}
+
+function get_extraction_name(f: fa_file): string
+	{
+	local r = fmt("%s-%s.dat", extraction_prefix, f$id);
+	return r;
+	}
+
+# this handler sets the IRC::Info mime type
+event file_new(f: fa_file) &priority=5
+	{
+	if ( ! f?$source ) return;
+	if ( f$source != "IRC_DATA" ) return;
+	if ( ! f?$mime_type ) return;
+
+	set_dcc_mime(f);
+	}
+
+# this handler check if file extraction is desired
+event file_new(f: fa_file) &priority=5
+	{
+	if ( ! f?$source ) return;
+	if ( f$source != "IRC_DATA" ) return;
+
+	local fname: string;
+
+	if ( f?$mime_type && extract_file_types in f$mime_type )
+		{
+		fname = get_extraction_name(f);
+		FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
+		                               $extract_filename=fname]);
+		set_dcc_extraction_file(f, fname);
 		return;
+		}

-	local irc = dcc_expected_transfers[id$resp_h, id$resp_p];
+	if ( ! f?$conns ) return;

-	local tmp = irc$command;
-	irc$command = "DCC";
-	Log::write(IRC::LOG, irc);
-	irc$command = tmp;
+	for ( cid in f$conns )
+		{
+		local c: connection = f$conns[cid];

-	if ( irc?$extraction_file )
-		set_contents_file(id, CONTENTS_RESP, irc$extraction_file);
+		if ( [cid$resp_h, cid$resp_p] !in dcc_expected_transfers ) next;

-	# Delete these values in case another DCC transfer 
-	# happens during the IRC session.
-	delete irc$extract_file;
-	delete irc$extraction_file;
-	delete irc$dcc_file_name;
-	delete irc$dcc_file_size;
-	delete irc$dcc_mime_type;
-	delete dcc_expected_transfers[id$resp_h, id$resp_p];
+		local s = dcc_expected_transfers[cid$resp_h, cid$resp_p];
+
+		if ( ! s$extract_file ) next;
+
+		fname = get_extraction_name(f);
+		FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
+		                               $extract_filename=fname]);
+		s$extraction_file = fname;
+		return;
+		}
+	}
+
+function log_dcc(f: fa_file)
+	{
+	if ( ! f?$conns ) return;
+
+	for ( cid in f$conns )
+		{
+		local c: connection = f$conns[cid];
+
+		if ( [cid$resp_h, cid$resp_p] !in dcc_expected_transfers ) next;
+
+		local irc = dcc_expected_transfers[cid$resp_h, cid$resp_p];
+
+		local tmp = irc$command;
+		irc$command = "DCC";
+		Log::write(IRC::LOG, irc);
+		irc$command = tmp;
+
+		# Delete these values in case another DCC transfer 
+		# happens during the IRC session.
+		delete irc$extract_file;
+		delete irc$extraction_file;
+		delete irc$dcc_file_name;
+		delete irc$dcc_file_size;
+		delete irc$dcc_mime_type;
+
+		return;
+		}
+	}
+
+event file_new(f: fa_file) &priority=-5
+	{
+	if ( ! f?$source ) return;
+	if ( f$source != "IRC_DATA" ) return;
+
+	log_dcc(f);
 	}

 event irc_dcc_message(c: connection, is_orig: bool,
@ -100,17 +168,22 @@ event irc_dcc_message(c: connection, is_orig: bool,
 	{
 	set_session(c);
 	if ( dcc_type != "SEND" )
-            return;
+		return;
 	c$irc$dcc_file_name = argument;
 	c$irc$dcc_file_size = size;
 	local p = count_to_port(dest_port, tcp);
-	expect_connection(to_addr("0.0.0.0"), address, p, ANALYZER_FILE, 5 min);
+	Analyzer::schedule_analyzer(0.0.0.0, address, p, Analyzer::ANALYZER_IRC_DATA, 5 min);
 	dcc_expected_transfers[address, p] = c$irc;
 	}

-event expected_connection_seen(c: connection, a: count) &priority=10
+event expected_connection_seen(c: connection, a: Analyzer::Tag) &priority=10
 	{
 	local id = c$id;
 	if ( [id$resp_h, id$resp_p] in dcc_expected_transfers )
 		add c$service["irc-dcc-data"];
 	}
+
+event connection_state_remove(c: connection) &priority=-5
+	{
+	delete dcc_expected_transfers[c$id$resp_h, c$id$resp_p];
+	}
--- a/scripts/base/protocols/irc/dpd.sig
+++ b/scripts/base/protocols/irc/dpd.sig
@ -0,0 +1,33 @@
+signature irc_client1 {
+  ip-proto == tcp
+  payload /(|.*[\r\n]) *[Uu][Ss][Ee][Rr] +.+[\n\r]+ *[Nn][Ii][Cc][Kk] +.*[\r\n]/
+  requires-reverse-signature irc_server_reply
+  tcp-state originator
+  enable "irc"
+}
+
+signature irc_client2 {
+  ip-proto == tcp
+  payload /(|.*[\r\n]) *[Nn][Ii][Cc][Kk] +.+[\r\n]+ *[Uu][Ss][Ee][Rr] +.+[\r\n]/
+  requires-reverse-signature irc_server_reply
+  tcp-state originator
+  enable "irc"
+}
+
+signature irc_server_reply {
+  ip-proto == tcp
+  payload /^(|.*[\n\r])(:[^ \n\r]+ )?[0-9][0-9][0-9] /
+  tcp-state responder
+}
+
+signature irc_server_to_server1 {
+  ip-proto == tcp
+  payload /(|.*[\r\n]) *[Ss][Ee][Rr][Vv][Ee][Rr] +[^ ]+ +[0-9]+ +:.+[\r\n]/
+}
+
+signature irc_server_to_server2 {
+  ip-proto == tcp
+  payload /(|.*[\r\n]) *[Ss][Ee][Rr][Vv][Ee][Rr] +[^ ]+ +[0-9]+ +:.+[\r\n]/
+  requires-reverse-signature irc_server_to_server1
+  enable "irc"
+}
--- a/scripts/base/protocols/irc/file-analysis.bro
+++ b/scripts/base/protocols/irc/file-analysis.bro
@ -0,0 +1,25 @@
+@load ./dcc-send.bro
+@load base/utils/conn-ids
+@load base/frameworks/file-analysis/main
+
+module IRC;
+
+export {
+	## Default file handle provider for IRC.
+	global get_file_handle: function(c: connection, is_orig: bool): string;
+}
+
+function get_file_handle(c: connection, is_orig: bool): string
+	{
+	if ( is_orig ) return "";
+	return cat(Analyzer::ANALYZER_IRC_DATA, " ", c$start_time, " ", id_string(c$id));
+	}
+
+module GLOBAL;
+
+event get_file_handle(tag: Analyzer::Tag, c: connection, is_orig: bool)
+	&priority=5
+	{
+	if ( tag != Analyzer::ANALYZER_IRC_DATA ) return;
+	set_file_handle(IRC::get_file_handle(c, is_orig));
+	}
--- a/scripts/base/protocols/irc/main.bro
+++ b/scripts/base/protocols/irc/main.bro
@ -38,21 +38,13 @@ redef record connection += {
 	irc:  Info &optional;
 };

-# Some common IRC ports.
-redef capture_filters += { ["irc-6666"] = "port 6666" };
-redef capture_filters += { ["irc-6667"] = "port 6667" };
-redef capture_filters += { ["irc-6668"] = "port 6668" };
-redef capture_filters += { ["irc-6669"] = "port 6669" };
-
-# DPD configuration.
-const irc_ports = { 6666/tcp, 6667/tcp, 6668/tcp, 6669/tcp };
-redef dpd_config += { [ANALYZER_IRC] = [$ports = irc_ports] };
-
-redef likely_server_ports += { 6666/tcp, 6667/tcp, 6668/tcp, 6669/tcp };
+const ports = { 6666/tcp, 6667/tcp, 6668/tcp, 6669/tcp };
+redef likely_server_ports += { ports };

 event bro_init() &priority=5
 	{
 	Log::create_stream(IRC::LOG, [$columns=Info, $ev=irc_log]);
+	Analyzer::register_for_ports(Analyzer::ANALYZER_IRC, ports);
 	}
 	
 function new_session(c: connection): Info
--- a/scripts/base/protocols/modbus/main.bro
+++ b/scripts/base/protocols/modbus/main.bro
@ -29,14 +29,13 @@ redef record connection += {
 	modbus: Info &optional;
 };

-# Configure DPD and the packet filter.
-redef capture_filters += { ["modbus"] = "tcp port 502" };
-redef dpd_config += { [ANALYZER_MODBUS] = [$ports = set(502/tcp)] };
-redef likely_server_ports += { 502/tcp };
+const ports = { 502/tcp };
+redef likely_server_ports += { ports };

 event bro_init() &priority=5
 	{
 	Log::create_stream(Modbus::LOG, [$columns=Info, $ev=log_modbus]);
+	Analyzer::register_for_ports(Analyzer::ANALYZER_MODBUS, ports);
 	}

 event modbus_message(c: connection, headers: ModbusHeaders, is_orig: bool) &priority=5
--- a/scripts/base/protocols/pop3/load.bro
+++ b/scripts/base/protocols/pop3/load.bro
@ -0,0 +1,2 @@
+
+@load-sigs ./dpd.sig
--- a/scripts/base/protocols/pop3/dpd.sig
+++ b/scripts/base/protocols/pop3/dpd.sig
@ -0,0 +1,13 @@
+signature dpd_pop3_server {
+  ip-proto == tcp
+  payload /^\+OK/
+  requires-reverse-signature dpd_pop3_client
+  enable "pop3"
+  tcp-state responder
+}
+
+signature dpd_pop3_client {
+  ip-proto == tcp
+  payload /(|.*[\r\n])[[:space:]]*([uU][sS][eE][rR][[:space:]]|[aA][pP][oO][pP][[:space:]]|[cC][aA][pP][aA]|[aA][uU][tT][hH])/
+  tcp-state originator
+}
--- a/scripts/base/protocols/smtp/load.bro
+++ b/scripts/base/protocols/smtp/load.bro
@ -1,3 +1,6 @@
@load ./main
@load ./entities
-@load ./entities-excerpt
+@load ./entities-excerpt
+@load ./file-analysis
+
+@load-sigs ./dpd.sig
--- a/scripts/base/protocols/smtp/dpd.sig
+++ b/scripts/base/protocols/smtp/dpd.sig
@ -0,0 +1,13 @@
+signature dpd_smtp_client {
+  ip-proto == tcp
+  payload /(|.*[\n\r])[[:space:]]*([hH][eE][lL][oO]|[eE][hH][lL][oO])/
+  requires-reverse-signature dpd_smtp_server
+  enable "smtp"
+  tcp-state originator
+}
+
+signature dpd_smtp_server {
+  ip-proto == tcp
+  payload /^[[:space:]]*220[[:space:]-]/
+  tcp-state responder
+}
--- a/scripts/base/protocols/smtp/entities-excerpt.bro
+++ b/scripts/base/protocols/smtp/entities-excerpt.bro
@ -9,44 +9,29 @@ export {
 	redef record SMTP::EntityInfo += {
 		## The entity body excerpt.
 		excerpt:    string &log &default="";
-		
-		## Internal tracking to know how much of the body should be included
-		## in the excerpt.
-		excerpt_len: count &optional;
 	};
 	
 	## This is the default value for how much of the entity body should be
-	## included for all MIME entities.
+	## included for all MIME entities.  The lesser of this value and
+	## :bro:see:`default_file_bof_buffer_size` will be used.
 	const default_entity_excerpt_len = 0 &redef;
-	
-	## This table defines how much of various entity bodies should be 
-	## included in excerpts.
-	const entity_excerpt_len: table[string] of count = {} 
-		&redef
-		&default = default_entity_excerpt_len;
 }

-event mime_segment_data(c: connection, length: count, data: string) &priority=-1
+event file_new(f: fa_file) &priority=5
 	{
-	if ( ! c?$smtp ) return;
-	
-	if ( c$smtp$current_entity$content_len == 0 )
-		c$smtp$current_entity$excerpt_len = entity_excerpt_len[c$smtp$current_entity$mime_type];
-	}
+	if ( ! f?$source ) return;
+	if ( f$source != "SMTP" ) return;
+	if ( ! f?$bof_buffer ) return;
+	if ( ! f?$conns ) return;

-event mime_segment_data(c: connection, length: count, data: string) &priority=-2
-	{
-	if ( ! c?$smtp ) return;
-	
-	local ent = c$smtp$current_entity;
-	if ( ent$content_len < ent$excerpt_len )
+	for ( cid in f$conns )
 		{
-		if ( ent$content_len + length < ent$excerpt_len )
-			ent$excerpt = cat(ent$excerpt, data);
-		else
-			{
-			local x_bytes = ent$excerpt_len - ent$content_len;
-			ent$excerpt = cat(ent$excerpt, sub_bytes(data, 1, x_bytes));
-			}
+		local c: connection = f$conns[cid];
+
+		if ( ! c?$smtp ) next;
+
+		if ( default_entity_excerpt_len > 0 )
+			c$smtp$current_entity$excerpt =
+			        f$bof_buffer[0:default_entity_excerpt_len];
 		}
 	}
--- a/scripts/base/protocols/smtp/entities.bro
+++ b/scripts/base/protocols/smtp/entities.bro
@ -7,11 +7,6 @@
 module SMTP;

 export {
-	redef enum Notice::Type += {
-		## Indicates that an MD5 sum was calculated for a MIME message.
-		MD5,
-	};
-
 	redef enum Log::ID += { ENTITIES_LOG };

 	type EntityInfo: record {
@ -34,15 +29,12 @@ export {
 		## Optionally calculate the file's MD5 sum.  Must be set prior to the 
 		## first data chunk being see in an event.
 		calc_md5:         bool            &default=F;
-		## This boolean value indicates if an MD5 sum is being calculated 
-		## for the current file transfer.
-		md5_handle:       opaque of md5   &optional;
 		
 		## Optionally write the file to disk.  Must be set prior to first 
 		## data chunk being seen in an event.
 		extract_file:     bool            &default=F;
 		## Store the file handle here for the file currently being extracted.
-		extraction_file:  file            &log &optional;
+		extraction_file:  string          &log &optional;
 	};

 	redef record Info += {
@ -51,9 +43,6 @@ export {
 	};

 	redef record State += {
-		## Store a count of the number of files that have been transferred in
-		## a conversation to create unique file names on disk.
-		num_extracted_files:  count   &default=0;
 		## Track the number of MIME encoded files transferred during a session.
 		mime_level:           count   &default=0;
 	};
@ -97,77 +86,126 @@ function set_session(c: connection, new_entity: bool)
 		}
 	}

+function get_extraction_name(f: fa_file): string
+	{
+	local r = fmt("%s-%s.dat", extraction_prefix, f$id);
+	return r;
+	}
+
 event mime_begin_entity(c: connection) &priority=10
 	{
 	if ( ! c?$smtp ) return;
-	
+
 	set_session(c, T);
 	}

-# This has priority -10 because other handlers need to know the current
-# content_len before it's updated by this handler.
-event mime_segment_data(c: connection, length: count, data: string) &priority=-10
+event file_new(f: fa_file) &priority=5
 	{
-	if ( ! c?$smtp ) return;
-	
-	c$smtp$current_entity$content_len = c$smtp$current_entity$content_len + length;
-	}
+	if ( ! f?$source ) return;
+	if ( f$source != "SMTP" ) return;
+	if ( ! f?$conns ) return;

-event mime_segment_data(c: connection, length: count, data: string) &priority=7
-    {
-	if ( ! c?$smtp ) return;
-	if ( c$smtp$current_entity$content_len == 0 )
-		c$smtp$current_entity$mime_type = split1(identify_data(data, T), /;/)[1];
-	}
+	local fname: string;
+	local extracting: bool = F;

-event mime_segment_data(c: connection, length: count, data: string) &priority=-5
-	{
-	if ( ! c?$smtp ) return;
-
-	if ( c$smtp$current_entity$content_len == 0 )
+	for ( cid in f$conns )
 		{
-		local entity = c$smtp$current_entity;
-		if ( generate_md5 in entity$mime_type && ! never_calc_md5 )
-			entity$calc_md5 = T;
+		local c: connection = f$conns[cid];

-		if ( entity$calc_md5 )
-			entity$md5_handle = md5_hash_init();
-		}
+		if ( ! c?$smtp ) next;
+		if ( ! c$smtp?$current_entity ) next;

-	if ( c$smtp$current_entity?$md5_handle )
-		md5_hash_update(entity$md5_handle, data);
-}
+		if ( c$smtp$current_entity$extract_file )
+			{
+			if ( ! extracting )
+				{
+				fname = get_extraction_name(f);
+				FileAnalysis::add_analyzer(f,
+				                           [$tag=FileAnalysis::ANALYZER_EXTRACT,
+				                            $extract_filename=fname]);
+				extracting = T;
+				}

-## In the event of a content gap during the MIME transfer, detect the state for
-## the MD5 sum calculation and stop calculating the MD5 since it would be
-## incorrect anyway.
-event content_gap(c: connection, is_orig: bool, seq: count, length: count) &priority=5
-	{
-	if ( is_orig || ! c?$smtp || ! c$smtp?$current_entity ) return;
+			c$smtp$current_entity$extraction_file = fname;
+			}

-	local entity = c$smtp$current_entity;
-	if ( entity?$md5_handle )
-		{
-		md5_hash_finish(entity$md5_handle);
-		delete entity$md5_handle;
+		if ( c$smtp$current_entity$calc_md5 )
+			FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
 		}
 	}

-event mime_end_entity(c: connection) &priority=-3
-    {
-	# TODO: this check is only due to a bug in mime_end_entity that
-	#       causes the event to be generated twice for the same real event.
-	if ( ! c?$smtp || ! c$smtp?$current_entity )
+function check_extract_by_type(f: fa_file)
+	{
+	if ( extract_file_types !in f$mime_type ) return;
+
+	if ( f?$info && FileAnalysis::ANALYZER_EXTRACT in f$info$analyzers )
 		return;

-	local entity = c$smtp$current_entity;
-	if ( entity?$md5_handle )
-		{
-		entity$md5 = md5_hash_finish(entity$md5_handle);
-		delete entity$md5_handle;
+	local fname: string = get_extraction_name(f);
+	FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
+	                               $extract_filename=fname]);

-		NOTICE([$note=MD5, $msg=fmt("Calculated a hash for a MIME entity from %s", c$id$orig_h),
-				$sub=entity$md5, $conn=c]);
+	if ( ! f?$conns ) return;
+
+	for ( cid in f$conns )
+		{
+		local c: connection = f$conns[cid];
+		if ( ! c?$smtp ) next;
+		c$smtp$current_entity$extraction_file = fname;
+		}
+	}
+
+function check_md5_by_type(f: fa_file)
+	{
+	if ( never_calc_md5 ) return;
+	if ( generate_md5 !in f$mime_type ) return;
+
+	FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
+	}
+
+event file_new(f: fa_file) &priority=5
+	{
+	if ( ! f?$source ) return;
+	if ( f$source != "SMTP" ) return;
+	if ( ! f?$mime_type ) return;
+
+	if ( f?$conns )
+		for ( cid in f$conns )
+			{
+			local c: connection = f$conns[cid];
+
+			if ( ! c?$smtp ) next;
+			if ( ! c$smtp?$current_entity ) next;
+
+			c$smtp$current_entity$mime_type = f$mime_type;
+			}
+
+	check_extract_by_type(f);
+	check_md5_by_type(f);
+	}
+
+event file_state_remove(f: fa_file) &priority=4
+	{
+	if ( ! f?$source ) return;
+	if ( f$source != "SMTP" ) return;
+	if ( ! f?$conns ) return;
+
+	for ( cid in f$conns )
+		{
+		local c: connection = f$conns[cid];
+
+		if ( ! c?$smtp ) next;
+		if ( ! c$smtp?$current_entity ) next;
+		# Only log if there was some content.
+		if ( f$seen_bytes == 0 ) next;
+
+		if ( f?$info && f$info?$md5 )
+			c$smtp$current_entity$md5 = f$info$md5;
+
+		c$smtp$current_entity$content_len = f$seen_bytes;
+		Log::write(SMTP::ENTITIES_LOG, c$smtp$current_entity);
+		delete c$smtp$current_entity;
+		return;
 		}
 	}

@ -179,66 +217,7 @@ event mime_one_header(c: connection, h: mime_header_rec)
 	     /[fF][iI][lL][eE][nN][aA][mM][eE]/ in h$value )
 		c$smtp$current_entity$filename = extract_filename_from_content_disposition(h$value);

-	if ( h$name == "CONTENT-TYPE" && 
+	if ( h$name == "CONTENT-TYPE" &&
 	     /[nN][aA][mM][eE][:blank:]*=/ in h$value )
 		c$smtp$current_entity$filename = extract_filename_from_content_disposition(h$value);
 	}
-
-event mime_end_entity(c: connection) &priority=-5
-	{
-	if ( ! c?$smtp ) return;
-
-	# This check and the delete below are just to cope with a bug where
-	# mime_end_entity can be generated multiple times for the same event.
-	if ( ! c$smtp?$current_entity )
-		return;
-
-	# Only log is there was some content.
-	if ( c$smtp$current_entity$content_len > 0 )
-		Log::write(SMTP::ENTITIES_LOG, c$smtp$current_entity);
-
-	delete c$smtp$current_entity;
-	}
-
-event mime_segment_data(c: connection, length: count, data: string) &priority=5
-	{
-	if ( ! c?$smtp ) return;
-	
-	if ( extract_file_types in c$smtp$current_entity$mime_type )
-		c$smtp$current_entity$extract_file = T;
-	}
-
-event mime_segment_data(c: connection, length: count, data: string) &priority=3
-	{
-	if ( ! c?$smtp ) return;
-	
-	if ( c$smtp$current_entity$extract_file && 
-	     c$smtp$current_entity$content_len == 0 )
-		{
-		local suffix = fmt("%d.dat", ++c$smtp_state$num_extracted_files);
-		local fname = generate_extraction_filename(extraction_prefix, c, suffix);
-		c$smtp$current_entity$extraction_file = open(fname);
-		enable_raw_output(c$smtp$current_entity$extraction_file);
-		}
-	}
-
-event mime_segment_data(c: connection, length: count, data: string) &priority=-5
-	{
-	if ( ! c?$smtp ) return;
-	
-	if ( c$smtp$current_entity$extract_file && c$smtp$current_entity?$extraction_file )
-		print c$smtp$current_entity$extraction_file, data;
-	}
-
-event mime_end_entity(c: connection) &priority=-3
-	{
-	if ( ! c?$smtp ) return;
-	
-	# TODO: this check is only due to a bug in mime_end_entity that
-	#       causes the event to be generated twice for the same real event.
-	if ( ! c$smtp?$current_entity )
-		return;
-
-	if ( c$smtp$current_entity?$extraction_file )
-		close(c$smtp$current_entity$extraction_file);
-	}
--- a/scripts/base/protocols/smtp/file-analysis.bro
+++ b/scripts/base/protocols/smtp/file-analysis.bro
@ -0,0 +1,27 @@
+@load ./main
+@load ./entities
+@load base/utils/conn-ids
+@load base/frameworks/file-analysis/main
+
+module SMTP;
+
+export {
+	## Default file handle provider for SMTP.
+	global get_file_handle: function(c: connection, is_orig: bool): string;
+}
+
+function get_file_handle(c: connection, is_orig: bool): string
+	{
+	if ( ! c?$smtp ) return "";
+	return cat(Analyzer::ANALYZER_SMTP, " ", c$start_time, " ", c$smtp$trans_depth, " ",
+	           c$smtp_state$mime_level);
+	}
+
+module GLOBAL;
+
+event get_file_handle(tag: Analyzer::Tag, c: connection, is_orig: bool)
+	&priority=5
+	{
+	if ( tag != Analyzer::ANALYZER_SMTP ) return;
+	set_file_handle(SMTP::get_file_handle(c, is_orig));
+	}
--- a/scripts/base/protocols/smtp/main.bro
+++ b/scripts/base/protocols/smtp/main.bro
@ -74,9 +74,6 @@ export {
 	const mail_path_capture = ALL_HOSTS &redef;
 		
 	global log_smtp: event(rec: Info);
-	
-	## Configure the default ports for SMTP analysis.
-	const ports = { 25/tcp, 587/tcp } &redef;
 }

 redef record connection += { 
@ -84,15 +81,13 @@ redef record connection += {
 	smtp_state: State &optional;
 };

-# Configure DPD
-redef capture_filters += { ["smtp"] = "tcp port 25 or tcp port 587" };
-redef dpd_config += { [ANALYZER_SMTP] = [$ports = ports] };
-
-redef likely_server_ports += { 25/tcp, 587/tcp };
+const ports = { 25/tcp, 587/tcp };
+redef likely_server_ports += { ports };

 event bro_init() &priority=5
 	{
 	Log::create_stream(SMTP::LOG, [$columns=SMTP::Info, $ev=log_smtp]);
+	Analyzer::register_for_ports(Analyzer::ANALYZER_SMTP, ports);
 	}
 	
 function find_address_in_smtp_header(header: string): string
--- a/scripts/base/protocols/socks/load.bro
+++ b/scripts/base/protocols/socks/load.bro
@ -1,2 +1,4 @@
@load ./consts
-@load ./main
+@load ./main
+
+@load-sigs ./dpd.sig
--- a/scripts/base/protocols/socks/dpd.sig
+++ b/scripts/base/protocols/socks/dpd.sig
@ -0,0 +1,48 @@
+signature dpd_socks4_client {
+	ip-proto == tcp
+	# '32' is a rather arbitrary max length for the user name.
+	payload /^\x04[\x01\x02].{0,32}\x00/
+	tcp-state originator
+}
+
+signature dpd_socks4_server {
+	ip-proto == tcp
+	requires-reverse-signature dpd_socks4_client
+	payload /^\x00[\x5a\x5b\x5c\x5d]/
+	tcp-state responder
+	enable "socks"
+}
+
+signature dpd_socks4_reverse_client {
+	ip-proto == tcp
+	# '32' is a rather arbitrary max length for the user name.
+	payload /^\x04[\x01\x02].{0,32}\x00/
+	tcp-state responder
+}
+
+signature dpd_socks4_reverse_server {
+	ip-proto == tcp
+	requires-reverse-signature dpd_socks4_reverse_client
+	payload /^\x00[\x5a\x5b\x5c\x5d]/
+	tcp-state originator
+	enable "socks"
+}
+
+signature dpd_socks5_client {
+	ip-proto == tcp
+	# Watch for a few authentication methods to reduce false positives.
+	payload /^\x05.[\x00\x01\x02]/
+	tcp-state originator
+}
+
+signature dpd_socks5_server {
+	ip-proto == tcp
+	requires-reverse-signature dpd_socks5_client
+	# Watch for a single authentication method to be chosen by the server or
+	# the server to indicate the no authentication is required.
+	payload /^\x05(\x00|\x01[\x00\x01\x02])/
+	tcp-state responder
+	enable "socks"
+}
+
+
--- a/scripts/base/protocols/socks/main.bro
+++ b/scripts/base/protocols/socks/main.bro
@ -34,20 +34,19 @@ export {
 	global log_socks: event(rec: Info);
 }

+const ports = { 1080/tcp };
+redef likely_server_ports += { ports };
+
 event bro_init() &priority=5
 	{
 	Log::create_stream(SOCKS::LOG, [$columns=Info, $ev=log_socks]);
+	Analyzer::register_for_ports(Analyzer::ANALYZER_SOCKS, ports);
 	}

 redef record connection += {
 	socks: SOCKS::Info &optional;
 };

-# Configure DPD
-redef capture_filters += { ["socks"] = "tcp port 1080" };
-redef dpd_config += { [ANALYZER_SOCKS] = [$ports = set(1080/tcp)] };
-redef likely_server_ports += { 1080/tcp };
-
 function set_session(c: connection, version: count)
 	{
 	if ( ! c?$socks )
--- a/scripts/base/protocols/ssh/load.bro
+++ b/scripts/base/protocols/ssh/load.bro
@ -1 +1,3 @@
-@load ./main
+@load ./main
+
+@load-sigs ./dpd.sig
--- a/scripts/base/protocols/ssh/dpd.sig
+++ b/scripts/base/protocols/ssh/dpd.sig
@ -0,0 +1,13 @@
+signature dpd_ssh_client {
+  ip-proto == tcp
+  payload /^[sS][sS][hH]-/
+  requires-reverse-signature dpd_ssh_server
+  enable "ssh"
+  tcp-state originator
+}
+
+signature dpd_ssh_server {
+  ip-proto == tcp
+  payload /^[sS][sS][hH]-/
+  tcp-state responder
+}
--- a/scripts/base/protocols/ssh/main.bro
+++ b/scripts/base/protocols/ssh/main.bro
@ -1,10 +1,11 @@
-##! Base SSH analysis script.  The heuristic to blindly determine success or 
+##! Base SSH analysis script.  The heuristic to blindly determine success or
 ##! failure for SSH connections is implemented here.  At this time, it only
 ##! uses the size of the data being returned from the server to make the
-##! heuristic determination about success of the connection.  
+##! heuristic determination about success of the connection.
 ##! Requires that :bro:id:`use_conn_size_analyzer` is set to T!  The heuristic
 ##! is not attempted if the connection size analyzer isn't enabled.

+@load base/protocols/conn
@load base/frameworks/notice
@load base/utils/site
@load base/utils/thresholds
@ -16,12 +17,6 @@ module SSH;
 export {
 	## The SSH protocol logging stream identifier.
 	redef enum Log::ID += { LOG };
-	
-	redef enum Notice::Type += { 
-		## Indicates that a heuristically detected "successful" SSH 
-		## authentication occurred.
-		Login 
-	};

 	type Info: record {
 		## Time when the SSH connection began.
@ -30,10 +25,10 @@ export {
 		uid:             string       &log;
 		## The connection's 4-tuple of endpoint addresses/ports.
 		id:              conn_id      &log;
-		## Indicates if the login was heuristically guessed to be "success"
-		## or "failure".
-		status:          string       &log &optional;
-		## Direction of the connection.  If the client was a local host 
+		## Indicates if the login was heuristically guessed to be "success",
+		## "failure", or "undetermined".
+		status:          string       &log &default="undetermined";
+		## Direction of the connection.  If the client was a local host
 		## logging into an external host, this would be OUTBOUND. INBOUND
 		## would be set for the opposite situation.
 		# TODO: handle local-local and remote-remote better.
@ -43,51 +38,49 @@ export {
 		## Software string from the server.
 		server:          string       &log &optional;
 		## Amount of data returned from the server. This is currently
-		## the only measure of the success heuristic and it is logged to 
+		## the only measure of the success heuristic and it is logged to
 		## assist analysts looking at the logs to make their own determination
 		## about the success on a case-by-case basis.
 		resp_size:       count        &log &default=0;
-		
+
 		## Indicate if the SSH session is done being watched.
 		done:            bool         &default=F;
 	};
-	
-	## The size in bytes of data sent by the server at which the SSH 
+
+	## The size in bytes of data sent by the server at which the SSH
 	## connection is presumed to be successful.
-	const authentication_data_size = 5500 &redef;
-	
+	const authentication_data_size = 4000 &redef;
+
 	## If true, we tell the event engine to not look at further data
 	## packets after the initial SSH handshake. Helps with performance
 	## (especially with large file transfers) but precludes some
-	## kinds of analyses (e.g., tracking connection size).
+	## kinds of analyses.
 	const skip_processing_after_detection = F &redef;
-	
+
 	## Event that is generated when the heuristic thinks that a login
 	## was successful.
 	global heuristic_successful_login: event(c: connection);
-	
+
 	## Event that is generated when the heuristic thinks that a login
 	## failed.
 	global heuristic_failed_login: event(c: connection);
-	
+
 	## Event that can be handled to access the :bro:type:`SSH::Info`
 	## record as it is sent on to the logging framework.
 	global log_ssh: event(rec: Info);
 }

-# Configure DPD and the packet filter
-redef capture_filters += { ["ssh"] = "tcp port 22" };
-redef dpd_config += { [ANALYZER_SSH] = [$ports = set(22/tcp)] };
-
-redef likely_server_ports += { 22/tcp };
-
 redef record connection += {
 	ssh: Info &optional;
 };

+const ports = { 22/tcp };
+redef likely_server_ports += { ports };
+
 event bro_init() &priority=5
 {
 	Log::create_stream(SSH::LOG, [$columns=Info, $ev=log_ssh]);
+	Analyzer::register_for_ports(Analyzer::ANALYZER_SSH, ports);
 }

 function set_session(c: connection)
@ -104,55 +97,61 @@ function set_session(c: connection)

 function check_ssh_connection(c: connection, done: bool)
 	{
-	# If done watching this connection, just return.
+	# If already done watching this connection, just return.
 	if ( c$ssh$done )
 		return;
-	
-	# Make sure conn_size_analyzer is active by checking 
-	# resp$num_bytes_ip.  In general it should always be active though.
-	if ( ! c$resp?$num_bytes_ip )
-		return;
-	
-	# Remove the IP and TCP header length from the total size.
-	# TODO: Fix for IPv6.  This whole approach also seems to break in some 
-	#       cases where there are more header bytes than num_bytes_ip.
-	local header_bytes = c$resp$num_pkts*32 + c$resp$num_pkts*20;
-	local server_bytes = c$resp$num_bytes_ip;
-	if ( server_bytes >= header_bytes )
-		server_bytes = server_bytes - header_bytes;
-	else
-		server_bytes = c$resp$size;
-	
-	# If this is still a live connection and the byte count has not crossed 
-	# the threshold, just return and let the rescheduled check happen later.
-	if ( ! done && server_bytes < authentication_data_size )
-		return;

-	# Make sure the server has sent back more than 50 bytes to filter out
-	# hosts that are just port scanning.  Nothing is ever logged if the server
-	# doesn't send back at least 50 bytes.
-	if ( server_bytes < 50 )
-		return;
-
-	c$ssh$direction = Site::is_local_addr(c$id$orig_h) ? OUTBOUND : INBOUND;
-	c$ssh$resp_size = server_bytes;
-	
-	if ( server_bytes < authentication_data_size )
+	if ( done )
 		{
-		c$ssh$status  = "failure";
-		event SSH::heuristic_failed_login(c);
+		# If this connection is done, then we can look to see if
+		# this matches the conditions for a failed login.  Failed
+		# logins are only detected at connection state removal.
+
+		if ( # Require originators to have sent at least 50 bytes.
+		     c$orig$size > 50 &&
+		     # Responders must be below 4000 bytes.
+		     c$resp$size < 4000 &&
+		     # Responder must have sent fewer than 40 packets.
+		     c$resp$num_pkts < 40 &&
+		     # If there was a content gap we can't reliably do this heuristic.
+		     c?$conn && c$conn$missed_bytes == 0 )# &&
+		     # Only "normal" connections can count.
+		     #c$conn?$conn_state && c$conn$conn_state in valid_states )
+			{
+			c$ssh$status = "failure";
+			event SSH::heuristic_failed_login(c);
+			}
+
+		if ( c$resp$size > authentication_data_size )
+			{
+			c$ssh$status = "success";
+			event SSH::heuristic_successful_login(c);
+			}
 		}
 	else
-		{ 
-		# presumed successful login
-		c$ssh$status = "success";
-		event SSH::heuristic_successful_login(c);
+		{
+		# If this connection is still being tracked, then it's possible
+		# to watch for it to be a successful connection.
+		if ( c$resp$size > authentication_data_size )
+			{
+			c$ssh$status = "success";
+			event SSH::heuristic_successful_login(c);
+			}
+		else
+			# This connection must be tracked longer.  Let the scheduled
+			# check happen again.
+			return;
 		}
-	
+
+	# Set the direction for the log.
+	c$ssh$direction = Site::is_local_addr(c$id$orig_h) ? OUTBOUND : INBOUND;
+
 	# Set the "done" flag to prevent the watching event from rescheduling
 	# after detection is done.
 	c$ssh$done=T;
-	
+
+	Log::write(SSH::LOG, c$ssh);
+
 	if ( skip_processing_after_detection )
 		{
 		# Stop watching this connection, we don't care about it anymore.
@ -161,18 +160,6 @@ function check_ssh_connection(c: connection, done: bool)
 		}
 	}

-event SSH::heuristic_successful_login(c: connection) &priority=-5
-	{
-	NOTICE([$note=Login, 
-	        $msg="Heuristically detected successful SSH login.",
-	        $conn=c]);
-	
-	Log::write(SSH::LOG, c$ssh);
-	}
-event SSH::heuristic_failed_login(c: connection) &priority=-5
-	{
-	Log::write(SSH::LOG, c$ssh);
-	}

 event connection_state_remove(c: connection) &priority=-5
 	{
@ -187,6 +174,7 @@ event ssh_watcher(c: connection)
 	if ( ! connection_exists(id) )
 		return;

+	lookup_connection(c$id);
 	check_ssh_connection(c, F);
 	if ( ! c$ssh$done )
 		schedule +15secs { ssh_watcher(c) };
@ -197,12 +185,12 @@ event ssh_server_version(c: connection, version: string) &priority=5
 	set_session(c);
 	c$ssh$server = version;
 	}
-	
+
 event ssh_client_version(c: connection, version: string) &priority=5
 	{
 	set_session(c);
 	c$ssh$client = version;
-	
+
 	# The heuristic detection for SSH relies on the ConnSize analyzer.
 	# Don't do the heuristics if it's disabled.
 	if ( use_conn_size_analyzer )
--- a/scripts/base/protocols/ssl/load.bro
+++ b/scripts/base/protocols/ssl/load.bro
@ -1,3 +1,5 @@
@load ./consts
@load ./main
@load ./mozilla-ca-list
+
+@load-sigs ./dpd.sig
--- a/scripts/base/protocols/ssl/dpd.sig
+++ b/scripts/base/protocols/ssl/dpd.sig
@ -0,0 +1,15 @@
+signature dpd_ssl_server {
+  ip-proto == tcp
+  # Server hello.
+  payload /^(\x16\x03[\x00\x01\x02]..\x02...\x03[\x00\x01\x02]|...?\x04..\x00\x02).*/
+  requires-reverse-signature dpd_ssl_client
+  enable "ssl"
+  tcp-state responder
+}
+
+signature dpd_ssl_client {
+  ip-proto == tcp
+  # Client hello.
+  payload /^(\x16\x03[\x00\x01\x02]..\x01...\x03[\x00\x01\x02]|...?\x01[\x00\x01\x02][\x02\x03]).*/
+  tcp-state originator
+}
--- a/scripts/base/protocols/ssl/main.bro
+++ b/scripts/base/protocols/ssl/main.bro
@ -94,46 +94,17 @@ redef record Info += {
 		delay_tokens: set[string] &optional;
 };

-event bro_init() &priority=5
-	{
-	Log::create_stream(SSL::LOG, [$columns=Info, $ev=log_ssl]);
-	}
-
-redef capture_filters += {
-	["ssl"] = "tcp port 443",
-	["nntps"] = "tcp port 563",
-	["imap4-ssl"] = "tcp port 585",
-	["sshell"] = "tcp port 614",
-	["ldaps"] = "tcp port 636",
-	["ftps-data"] = "tcp port 989",
-	["ftps"] = "tcp port 990",
-	["telnets"] = "tcp port 992",
-	["imaps"] = "tcp port 993",
-	["ircs"] = "tcp port 994",
-	["pop3s"] = "tcp port 995",
-	["xmpps"] = "tcp port 5223",
-};
-
 const ports = {
 	443/tcp, 563/tcp, 585/tcp, 614/tcp, 636/tcp,
 	989/tcp, 990/tcp, 992/tcp, 993/tcp, 995/tcp, 5223/tcp
 };
+redef likely_server_ports += { ports };

-redef dpd_config += {
-	[[ANALYZER_SSL]] = [$ports = ports]
-};
-
-redef likely_server_ports += {
-	443/tcp, 563/tcp, 585/tcp, 614/tcp, 636/tcp,
-	989/tcp, 990/tcp, 992/tcp, 993/tcp, 995/tcp, 5223/tcp
-};
-
-# A queue that buffers log records.
-global log_delay_queue: table[count] of Info;
-# The top queue index where records are added.
-global log_delay_queue_head = 0;
-# The bottom queue index that points to the next record to be flushed.
-global log_delay_queue_tail = 0;
+event bro_init() &priority=5
+	{
+	Log::create_stream(SSL::LOG, [$columns=Info, $ev=log_ssl]);
+	Analyzer::register_for_ports(Analyzer::ANALYZER_SSL, ports);
+	}

 function set_session(c: connection)
 	{
@ -144,26 +115,17 @@ function set_session(c: connection)

 function delay_log(info: Info, token: string)
 	{
-	info$delay_tokens = set();
+	if ( ! info?$delay_tokens )
+		info$delay_tokens = set();
 	add info$delay_tokens[token];
-
-	log_delay_queue[log_delay_queue_head] = info;
-	++log_delay_queue_head;
 	}

 function undelay_log(info: Info, token: string)
 	{
-	if ( token in info$delay_tokens )
+	if ( info?$delay_tokens && token in info$delay_tokens )
 		delete info$delay_tokens[token];
 	}

-global log_record: function(info: Info);
-
-event delay_logging(info: Info)
-	{
-	log_record(info);
-	}
-
 function log_record(info: Info)
 	{
 	if ( ! info?$delay_tokens || |info$delay_tokens| == 0 )
@ -172,26 +134,14 @@ function log_record(info: Info)
 		}
 	else
 		{
-		for ( unused_index in log_delay_queue )
+		when ( |info$delay_tokens| == 0 )
 			{
-			if ( log_delay_queue_head == log_delay_queue_tail )
-				return;
-			if ( |log_delay_queue[log_delay_queue_tail]$delay_tokens| > 0 )
-				{
-				if ( info$ts + max_log_delay > network_time() )
-					{
-					schedule 1sec { delay_logging(info) };
-					return;
-					}
-				else
-					{
-					Reporter::info(fmt("SSL delay tokens not released in time (%s)",
-					                   info$delay_tokens));
-					}
-				}
-			Log::write(SSL::LOG, log_delay_queue[log_delay_queue_tail]);
-			delete log_delay_queue[log_delay_queue_tail];
-			++log_delay_queue_tail;
+			log_record(info);
+			}
+		timeout SSL::max_log_delay
+			{
+			Reporter::info(fmt("SSL delay tokens not released in time (%s tokens remaining)",
+			                   |info$delay_tokens|));
 			}
 		}
 	}
@ -288,28 +238,16 @@ event ssl_established(c: connection) &priority=-5
 	finish(c);
 	}

-event protocol_confirmation(c: connection, atype: count, aid: count) &priority=5
+event protocol_confirmation(c: connection, atype: Analyzer::Tag, aid: count) &priority=5
 	{
 	# Check by checking for existence of c$ssl record.
-	if ( c?$ssl && analyzer_name(atype) == "SSL" )
+	if ( c?$ssl && atype == Analyzer::ANALYZER_SSL )
 		c$ssl$analyzer_id = aid;
 	}

-event protocol_violation(c: connection, atype: count, aid: count,
+event protocol_violation(c: connection, atype: Analyzer::Tag, aid: count,
                         reason: string) &priority=5
 	{
 	if ( c?$ssl )
 		finish(c);
 	}
-
-event bro_done()
-	{
-	if ( |log_delay_queue| == 0 )
-		return;
-	for ( unused_index in log_delay_queue )
-		{
-		Log::write(SSL::LOG, log_delay_queue[log_delay_queue_tail]);
-		delete log_delay_queue[log_delay_queue_tail];
-		++log_delay_queue_tail;
-		}
-	}
--- a/scripts/base/protocols/syslog/main.bro
+++ b/scripts/base/protocols/syslog/main.bro
@ -26,19 +26,17 @@ export {
 	};
 }

-redef capture_filters += { ["syslog"] = "port 514" };
-const ports = { 514/udp } &redef;
-redef dpd_config += { [ANALYZER_SYSLOG_BINPAC] = [$ports = ports] };
-
-redef likely_server_ports += { 514/udp };
-
 redef record connection += {
 	syslog: Info &optional;
 };

+const ports = { 514/udp };
+redef likely_server_ports += { ports };
+
 event bro_init() &priority=5
 	{
 	Log::create_stream(Syslog::LOG, [$columns=Info]);
+	Analyzer::register_for_ports(Analyzer::ANALYZER_SYSLOG, ports);
 	}

 event syslog_message(c: connection, facility: count, severity: count, msg: string) &priority=5
--- a/scripts/base/protocols/tunnels/load.bro
+++ b/scripts/base/protocols/tunnels/load.bro
@ -0,0 +1 @@
+@load-sigs ./dpd.sig
--- a/scripts/base/protocols/tunnels/dpd.sig
+++ b/scripts/base/protocols/tunnels/dpd.sig
@ -0,0 +1,14 @@
+# Provide DPD signatures for tunneling protocols that otherwise
+# wouldn't be detected at all.
+
+signature dpd_ayiya {
+  ip-proto = udp
+  payload /^..\x11\x29/
+  enable "ayiya"
+}
+
+signature dpd_teredo {
+  ip-proto = udp
+  payload /^(\x00\x00)|(\x00\x01)|([\x60-\x6f])/
+  enable "teredo"
+}
--- a/scripts/base/utils/paths.bro
+++ b/scripts/base/utils/paths.bro
@ -19,7 +19,7 @@ function extract_path(input: string): string
 	}

 ## Compresses a given path by removing '..'s and the parent directory it
-## references and also removing '/'s.
+## references and also removing dual '/'s and extraneous '/./'s.
 ## dir: a path string, either relative or absolute
 ## Returns: a compressed version of the input path
 function compress_path(dir: string): string
@ -41,7 +41,7 @@ function compress_path(dir: string): string
 		return compress_path(dir);
 		}

-	const multislash_sep = /(\/){2,}/;
+	const multislash_sep = /(\/\.?){2,}/;
 	parts = split_all(dir, multislash_sep);
 	for ( i in parts )
 		if ( i % 2 == 0 )
--- a/scripts/base/utils/queue.bro
+++ b/scripts/base/utils/queue.bro
@ -0,0 +1,155 @@
+##! A FIFO queue.
+
+module Queue;
+
+export {
+	## Settings for initializing the queue.
+	type Settings: record {
+		## If a maximum length is set for the queue
+		## it will maintain itself at that
+		## maximum length automatically.
+		max_len: count &optional;
+	};
+
+	## The internal data structure for the queue.
+	type Queue: record {};
+
+	## Initialize a queue record structure.
+	##
+	## s: A record which configures the queue.
+	##
+	## Returns: An opaque queue record.
+	global init:       function(s: Settings &default=[]): Queue;
+
+	## Put a value onto the beginning of a queue.
+	##
+	## q: The queue to put the value into.
+	##
+	## val: The value to insert into the queue.
+	global put:       function(q: Queue, val: any);
+
+	## Get a value from the end of a queue.
+	##
+	## q: The queue to get the value from.
+	##
+	## Returns: The value gotten from the queue.
+	global get:        function(q: Queue): any;
+
+	## Peek at the value at the end of the queue without removing it.
+	##
+	## q: The queue to get the value from.
+	##
+	## Returns: The value at the end of the queue.
+	global peek:      function(q: Queue): any;
+
+	## Merge two queue's together.  If any settings are applied
+	## to the queues, the settings from q1 are used for the new
+	## merged queue.
+	##
+	## q1: The first queue.  Settings are taken from here.
+	##
+	## q2: The second queue.
+	##
+	## Returns: A new queue from merging the other two together.
+	global merge:      function(q1: Queue, q2: Queue): Queue;
+
+	## Get the number of items in a queue.
+	##
+	## q: The queue.
+	##
+	## Returns: The length of the queue.
+	global len:     function(q: Queue): count;
+
+	## Get the contents of the queue as a vector.
+	##
+	## q: The queue.
+	##
+	## ret: A vector containing the
+	##      current contents of q as the type of ret.
+	global get_vector: function(q: Queue, ret: vector of any);
+
+}
+
+redef record Queue += {
+	# Indicator for if the queue was appropriately initialized.
+	initialized: bool                   &default=F;
+	# The values are stored here.
+	vals:        table[count] of any &optional;
+	# Settings for the queue.
+	settings:    Settings               &optional;
+	# The top value in the vals table.
+	top:         count                  &default=0;
+	# The bottom value in the vals table.
+	bottom:      count                  &default=0;
+	# The number of bytes in the queue.
+	size:        count                  &default=0;
+};
+
+function init(s: Settings): Queue
+	{
+	local q: Queue;
+	q$vals=table();
+	q$settings = copy(s);
+	q$initialized=T;
+	return q;
+	}
+
+function put(q: Queue, val: any)
+	{
+	if ( q$settings?$max_len && len(q) >= q$settings$max_len )
+		get(q);
+	q$vals[q$top] = val;
+	++q$top;
+	}
+
+function get(q: Queue): any
+	{
+	local ret = q$vals[q$bottom];
+	delete q$vals[q$bottom];
+	++q$bottom;
+	return ret;
+	}
+
+function peek(q: Queue): any
+	{
+	return q$vals[q$bottom];
+	}
+
+function merge(q1: Queue, q2: Queue): Queue
+	{
+	local ret = init(q1$settings);
+	local i = q1$bottom;
+	local j = q2$bottom;
+	for ( ignored_val in q1$vals )
+		{
+		if ( i in q1$vals )
+			put(ret, q1$vals[i]);
+		if ( j in q2$vals )
+			put(ret, q2$vals[j]);
+		++i;
+		++j;
+		}
+	return ret;
+	}
+
+function len(q: Queue): count
+	{
+	return |q$vals|;
+	}
+
+function get_vector(q: Queue, ret: vector of any)
+	{
+	local i = q$bottom;
+	local j = 0;
+	# Really dumb hack, this is only to provide
+	# the iteration for the correct number of
+	# values in q$vals.
+	for ( ignored_val in q$vals )
+		{
+		if ( i >= q$top )
+			break;
+
+		ret[j] = q$vals[i];
+		++j; ++i;
+		}
+	}
--- a/scripts/base/utils/time.bro
+++ b/scripts/base/utils/time.bro
@ -0,0 +1,9 @@
+
+## Given an interval, returns a string of the form 3m34s to
+## give a minimalized human readable string for the minutes
+## and seconds represented by the interval.
+function duration_to_mins_secs(dur: interval): string
+	{
+	local dur_count = double_to_count(interval_to_double(dur));
+	return fmt("%dm%ds", dur_count/60, dur_count%60);
+	}