Merge branch 'master', remote-tracking branch 'origin' into topic/gregor/tunnel

2025-10-02 14:48:21 +00:00 · 2011-08-15 13:33:14 -07:00 · 2011-08-15 13:33:14 -07:00 · ae1eb5379b
commit ae1eb5379b
parent 65921bc61d 3919a35b9b
130 changed files with 2363 additions and 801 deletions
--- a/scripts/base/frameworks/cluster/load.bro
+++ b/scripts/base/frameworks/cluster/load.bro
@ -9,10 +9,6 @@ redef peer_description = Cluster::node;
 # Add a cluster prefix.
@prefixes += cluster

-# Make this a controllable node since all cluster nodes are inherently 
-# controllable.
-@load frameworks/control/controllee
-
 ## If this script isn't found anywhere, the cluster bombs out.
 ## Loading the cluster framework requires that a script by this name exists
 ## somewhere in the BROPATH.  The only thing in the file should be the
@ -23,7 +19,7 @@ redef peer_description = Cluster::node;

@load ./setup-connections

-# Don't start the listening process until we're a bit more sure that the
+# Don't load the listening script until we're a bit more sure that the
 # cluster framework is actually being enabled.
@load frameworks/communication/listen-clear

--- a/scripts/base/frameworks/cluster/main.bro
+++ b/scripts/base/frameworks/cluster/main.bro
@ -47,6 +47,25 @@ export {
 		time_machine: string      &optional;
 	};
 	
+	## This function can be called at any time to determine if the cluster
+	## framework is being enabled for this run.
+	global is_enabled: function(): bool;
+	
+	## This function can be called at any time to determine what type of
+	## cluster node the current Bro instance is going to be acting as.
+	## :bro:id:`is_enabled` should be called first to find out if this is
+	## actually going to be a cluster node.
+	global local_node_type: function(): NodeType;
+	
+	## This gives the value for the number of workers currently connected to,
+	## and it's maintained internally by the cluster framework.  It's 
+	## primarily intended for use by managers to find out how many workers 
+	## should be responding to requests.
+	global worker_count: count = 0;
+	
+	## The cluster layout definition.  This should be placed into a filter
+	## named cluster-layout.bro somewhere in the BROPATH.  It will be 
+	## automatically loaded if the CLUSTER_NODE environment variable is set.
 	const nodes: table[string] of Node = {} &redef;
 	
 	## This is usually supplied on the command line for each instance
@ -54,7 +73,29 @@ export {
 	const node = getenv("CLUSTER_NODE") &redef;
 }

-event bro_init()
+function is_enabled(): bool
+	{
+	return (node != "");
+	}
+
+function local_node_type(): NodeType
+	{
+	return nodes[node]$node_type;
+	}
+	
+
+event remote_connection_handshake_done(p: event_peer)
+	{
+	if ( nodes[p$descr]$node_type == WORKER )
+		++worker_count;
+	}
+event remote_connection_closed(p: event_peer)
+	{
+	if ( nodes[p$descr]$node_type == WORKER )
+		--worker_count;
+	}
+
+event bro_init() &priority=5
 	{
 	# If a node is given, but it's an unknown name we need to fail.
 	if ( node != "" && node !in nodes )
--- a/scripts/base/frameworks/cluster/nodes/manager.bro
+++ b/scripts/base/frameworks/cluster/nodes/manager.bro
@ -10,11 +10,14 @@

@prefixes += cluster-manager

+# Load the script for local site configuration for the manager node.
+@load site/local-manager
+
 ## Turn off remote logging since this is the manager and should only log here.
 redef Log::enable_remote_logging = F;

 ## Use the cluster's archive logging script.
-redef Log::default_rotation_postprocessor = "archive-log";
+redef Log::default_rotation_postprocessor_cmd = "archive-log";

 ## We're processing essentially *only* remote events.
 redef max_remote_events_processed = 10000;
--- a/scripts/base/frameworks/cluster/nodes/proxy.bro
+++ b/scripts/base/frameworks/cluster/nodes/proxy.bro
@ -1,6 +1,9 @@

@prefixes += cluster-proxy

+# Load the script for local site configuration for proxy nodes.
+@load site/local-proxy
+
 ## The proxy only syncs state; does not forward events.
 redef forward_remote_events = F;
 redef forward_remote_state_changes = T;
@ -12,5 +15,5 @@ redef Log::enable_local_logging = F;
 redef Log::enable_remote_logging = T;

 ## Use the cluster's delete-log script.
-redef Log::default_rotation_postprocessor = "delete-log";
+redef Log::default_rotation_postprocessor_cmd = "delete-log";

--- a/scripts/base/frameworks/cluster/nodes/worker.bro
+++ b/scripts/base/frameworks/cluster/nodes/worker.bro
@ -1,6 +1,9 @@

@prefixes += cluster-worker

+# Load the script for local site configuration for the worker nodes.
+@load site/local-worker
+
 ## Don't do any local logging.
 redef Log::enable_local_logging = F;

@ -8,7 +11,7 @@ redef Log::enable_local_logging = F;
 redef Log::enable_remote_logging = T;

 ## Use the cluster's delete-log script.
-redef Log::default_rotation_postprocessor = "delete-log";
+redef Log::default_rotation_postprocessor_cmd = "delete-log";

 ## Record all packets into trace file.
 # TODO: should we really be setting this to T?
--- a/scripts/base/frameworks/cluster/setup-connections.bro
+++ b/scripts/base/frameworks/cluster/setup-connections.bro
@ -1,4 +1,5 @@
-@load base/frameworks/communication
+@load ./main
+@load base/frameworks/communication/main

 module Cluster;

@ -59,13 +60,12 @@ event bro_init() &priority=9
 				                                   $connect=T, $retry=1mins, 
 				                                   $class=node];
 			}
-		
 		else if ( me$node_type == WORKER )
 			{
 			if ( n$node_type == MANAGER && me$manager == i )
 				Communication::nodes["manager"] = [$host=nodes[i]$ip, $p=nodes[i]$p,
 				                                   $connect=T, $retry=1mins, 
-				                                   $class=node];
+				                                   $class=node, $events=manager_events];
 			
 			if ( n$node_type == PROXY && me$proxy == i )
 				Communication::nodes["proxy"] = [$host=nodes[i]$ip, $p=nodes[i]$p,
--- a/scripts/base/frameworks/communication/load.bro
+++ b/scripts/base/frameworks/communication/load.bro
@ -1,5 +1 @@
-
-# TODO: get rid of this as soon as the Expr.cc hack is changed.
-@if ( getenv("ENABLE_COMMUNICATION") != "" )
@load ./main
-@endif
--- a/scripts/base/frameworks/communication/main.bro
+++ b/scripts/base/frameworks/communication/main.bro
@ -108,6 +108,9 @@ const src_names = {
 event bro_init()
 	{
 	Log::create_stream(COMMUNICATION, [$columns=Info]);
+
+	if ( |nodes| > 0 )
+		enable_communication();
 	}

 function do_script_log_common(level: count, src: count, msg: string)
--- a/scripts/base/frameworks/logging/main.bro
+++ b/scripts/base/frameworks/logging/main.bro
@ -27,6 +27,19 @@ export {
 		ev: any &optional;
 	};

+	## Default function for building the path values for log filters if not
+	## speficied otherwise by a filter. The default implementation uses ``id``
+	## to derive a name.
+	##
+	## id: The log stream.
+	## path: A suggested path value, which may be either the filter's ``path``
+	##       if defined or a fall-back generated internally.
+	## rec: An instance of the streams's ``columns`` type with its
+	##      fields set to the values to logged.
+	##
+	## Returns: The path to be used for the filter.
+	global default_path_func: function(id: ID, path: string, rec: any) : string &redef;
+
 	## Filter customizing logging.
 	type Filter: record {
 		## Descriptive name to reference this filter.
@ -50,7 +63,7 @@ export {
 		## The specific interpretation of the string is up to
 		## the used writer, and may for example be the destination
 		## file name. Generally, filenames are expected to given
-		## without any extensions; writers will add appropiate 
+		## without any extensions; writers will add appropiate
 		## extensions automatically.
 		path: string &optional;

@ -60,7 +73,15 @@ export {
 		## different strings for separate calls, but be careful: it's
 		## easy to flood the disk by returning a new string for each
 		## connection ...
-		path_func: function(id: ID, path: string): string &optional;
+		##
+		## id: The log stream.
+		## path: A suggested path value, which may be either the filter's ``path``
+		##       if defined or a fall-back generated internally.
+		## rec: An instance of the streams's ``columns`` type with its
+		##      fields set to the values to logged.
+		##
+		## Returns: The path to be used for the filter.
+		path_func: function(id: ID, path: string, rec: any): string &optional;

 		## Subset of column names to record. If not given, all
 		## columns are recorded.
@ -81,36 +102,34 @@ export {

 	## Information passed into rotation callback functions.
 	type RotationInfo: record {
-		writer: Writer;	##< Writer.
-		path: string;	##< Original path value.
-		open: time;	##< Time when opened.
-		close: time;	##< Time when closed.
+		writer: Writer;		##< Writer.
+		fname: string;		##< Full name of the rotated file.
+		path: string;		##< Original path value.
+		open: time;			##< Time when opened.
+		close: time;		##< Time when closed.
+		terminating: bool;	##< True if rotation occured due to Bro shutting down.
 	};

 	## Default rotation interval. Zero disables rotation.
 	const default_rotation_interval = 0secs &redef;

-	## Default naming suffix format. Uses a strftime() style.
-	const default_rotation_date_format = "%y-%m-%d_%H.%M.%S" &redef;
+	## Default naming format for timestamps embedded into filenames. Uses a strftime() style.
+	const default_rotation_date_format = "%Y-%m-%d-%H-%M-%S" &redef;

-	## Default postprocessor for writers outputting into files.
-	const default_rotation_postprocessor = "" &redef;
+	## Default shell command to run on rotated files. Empty for none.
+	const default_rotation_postprocessor_cmd = "" &redef;

-	## Default function to construct the name of a rotated output file.
-	## The default implementation appends info$date_fmt to the original
-	## file name.
-	##
-	## info: Meta-data about the file to be rotated.
-	global default_rotation_path_func: function(info: RotationInfo) : string &redef;
+	## Specifies the default postprocessor function per writer type. Entries in this
+	## table are initialized by each writer type.
+	const default_rotation_postprocessors: table[Writer] of function(info: RotationInfo) : bool &redef;

 	## Type for controlling file rotation.
 	type RotationControl: record  {
 		## Rotation interval.
 		interv: interval &default=default_rotation_interval;
-		## Format for timestamps embedded into rotated file names.
-		date_fmt: string &default=default_rotation_date_format;
-		## Postprocessor process to run on rotate file.
-		postprocessor: string &default=default_rotation_postprocessor;
+		## Callback function to trigger for rotated files. If not set, the default
+		## comes out of default_rotation_postprocessors.
+		postprocessor: function(info: RotationInfo) : bool &optional;
 	};

 	## Specifies rotation parameters per ``(id, path)`` tuple.
@ -133,6 +152,8 @@ export {
 	global flush: function(id: ID): bool;
 	global add_default_filter: function(id: ID) : bool;
 	global remove_default_filter: function(id: ID) : bool;
+
+	global run_rotation_postprocessor_cmd: function(info: RotationInfo, npath: string) : bool;
 }

 # We keep a script-level copy of all filters so that we can manipulate them.
@ -140,10 +161,39 @@ global filters: table[ID, string] of Filter;

@load logging.bif.bro # Needs Filter and Stream defined.

-function default_rotation_path_func(info: RotationInfo) : string
+module Log;
+
+# Used internally by the log manager.
+function __default_rotation_postprocessor(info: RotationInfo) : bool
 	{
-	local date_fmt = rotation_control[info$writer, info$path]$date_fmt;
-	return fmt("%s-%s", info$path, strftime(date_fmt, info$open));
+	if ( info$writer in default_rotation_postprocessors )
+		return default_rotation_postprocessors[info$writer](info);
+	}
+
+function default_path_func(id: ID, path: string, rec: any) : string
+	{
+	# TODO for Seth: Do what you want. :)
+	return path;
+	}
+
+# Run post-processor on file. If there isn't any postprocessor defined,
+# we move the file to a nicer name.
+function run_rotation_postprocessor_cmd(info: RotationInfo, npath: string) : bool
+	{
+	local pp_cmd = default_rotation_postprocessor_cmd;
+
+	if ( pp_cmd == "" )
+		return T;
+
+	# The date format is hard-coded here to provide a standardized
+	# script interface.
+	system(fmt("%s %s %s %s %s %d",
+               pp_cmd, npath, info$path,
+               strftime("%y-%m-%d_%H.%M.%S", info$open),
+               strftime("%y-%m-%d_%H.%M.%S", info$close),
+               info$terminating));
+
+	return T;
 	}

 function create_stream(id: ID, stream: Stream) : bool
@ -159,9 +209,15 @@ function disable_stream(id: ID) : bool
 	if ( ! __disable_stream(id) )
 		return F;
 	}
-						   
+
 function add_filter(id: ID, filter: Filter) : bool
 	{
+	# This is a work-around for the fact that we can't forward-declare
+	# the default_path_func and then use it as &default in the record
+	# definition.
+	if ( ! filter?$path_func )
+		filter$path_func = default_path_func;
+
 	filters[id, filter$name] = filter;
 	return __add_filter(id, filter);
 	}
--- a/scripts/base/frameworks/logging/writers/ascii.bro
+++ b/scripts/base/frameworks/logging/writers/ascii.bro
@ -26,4 +26,19 @@ export {
 	const unset_field = "-" &redef;
 }

+# Default function to postprocess a rotated ASCII log file. It moves the rotated
+# file to a new name that includes a timestamp with the opening time, and then
+# runs the writer's default postprocessor command on it.
+function default_rotation_postprocessor_func(info: Log::RotationInfo) : bool
+	{
+	# Move file to name including both opening and closing time.
+	local dst = fmt("%s.%s.log", info$path,
+			strftime(Log::default_rotation_date_format, info$open));

+	system(fmt("/bin/mv %s %s", info$fname, dst));
+
+	# Run default postprocessor.
+	return Log::run_rotation_postprocessor_cmd(info, dst);
+	}
+
+redef Log::default_rotation_postprocessors += { [Log::WRITER_ASCII] = default_rotation_postprocessor_func };
--- a/scripts/base/frameworks/metrics/load.bro
+++ b/scripts/base/frameworks/metrics/load.bro
@ -1 +1,11 @@
@load ./main
+
+# The cluster framework must be loaded first.
+@load base/frameworks/cluster
+
+# Load either the cluster support script or the non-cluster support script.
+@if ( Cluster::is_enabled() )
+@load ./cluster
+@else
+@load ./non-cluster
+@endif
--- a/scripts/base/frameworks/metrics/cluster.bro
+++ b/scripts/base/frameworks/metrics/cluster.bro
@ -0,0 +1,146 @@
+##! This implements transparent cluster support for the metrics framework.
+##! Do not load this file directly.  It's only meant to be loaded automatically
+##! and will be depending on if the cluster framework has been enabled.
+##! The goal of this script is to make metric calculation completely and
+##! transparently automated when running on a cluster.
+
+@load base/frameworks/cluster
+
+module Metrics;
+
+export {
+	## This event is sent by the manager in a cluster to initiate the 3
+	## collection of metrics values 
+	global cluster_collect: event(uid: string, id: ID, filter_name: string);
+	
+	## This event is sent by nodes that are collecting metrics after receiving
+	## a request for the metric filter from the manager.
+	global cluster_results: event(uid: string, id: ID, filter_name: string, data: MetricTable, done: bool);
+	
+	## This event is used internally by workers to send result chunks.
+	global send_data: event(uid: string, id: ID, filter_name: string, data: MetricTable);
+	
+	## This value allows a user to decide how large of result groups the 
+	## workers should transmit values.
+	const cluster_send_in_groups_of = 50 &redef;
+}
+
+# This is maintained by managers so they can know what data they requested and
+# when they requested it.
+global requested_results: table[string] of time = table() &create_expire=5mins;
+
+# TODO: Both of the next variables make the assumption that a value never 
+#       takes longer than 5 minutes to transmit from workers to manager.  This needs to 
+#       be tunable or self-tuning.  These should also be restructured to be
+#       maintained within a single variable.
+# This variable is maintained by manager nodes as they collect and aggregate 
+# results.
+global collecting_results: table[string, ID, string] of MetricTable &create_expire=5mins;
+
+# This variable is maintained by manager nodes to track how many "dones" they
+# collected per collection unique id.  Once the number of results for a uid 
+# matches the number of peer nodes that results should be coming from, the 
+# result is written out and deleted from here.
+# TODO: add an &expire_func in case not all results are received.
+global done_with: table[string] of count &create_expire=5mins &default=0;
+
+# Add events to the cluster framework to make this work.
+redef Cluster::manager_events += /Metrics::cluster_collect/;
+redef Cluster::worker_events += /Metrics::cluster_results/;
+
+# The metrics collection process can only be done by a manager.
+@if ( Cluster::local_node_type() == Cluster::MANAGER )
+event Metrics::log_it(filter: Filter)
+	{
+	local uid = unique_id("");
+	
+	# Set some tracking variables.
+	requested_results[uid] = network_time();
+	collecting_results[uid, filter$id, filter$name] = table();
+	
+	# Request data from peers.
+	event Metrics::cluster_collect(uid, filter$id, filter$name);
+	# Schedule the log_it event for the next break period.
+	schedule filter$break_interval { Metrics::log_it(filter) };
+	}
+@endif
+
+@if ( Cluster::local_node_type() == Cluster::WORKER )
+
+event Metrics::send_data(uid: string, id: ID, filter_name: string, data: MetricTable)
+	{
+	#print fmt("WORKER %s: sending data for uid %s...", Cluster::node, uid);
+	
+	local local_data: MetricTable;
+	local num_added = 0;
+	for ( index in data )
+		{
+		local_data[index] = data[index];
+		delete data[index];
+		
+		# Only send cluster_send_in_groups_of at a time.  Queue another
+		# event to send the next group.
+		if ( cluster_send_in_groups_of == ++num_added )
+			break;
+		}
+	
+	local done = F;
+	# If data is empty, this metric is done.
+	if ( |data| == 0 )
+		done = T;
+	
+	event Metrics::cluster_results(uid, id, filter_name, local_data, done);
+	if ( ! done )
+		event Metrics::send_data(uid, id, filter_name, data);
+	}
+
+event Metrics::cluster_collect(uid: string, id: ID, filter_name: string)
+	{
+	#print fmt("WORKER %s: received the cluster_collect event.", Cluster::node);
+	
+	event Metrics::send_data(uid, id, filter_name, store[id, filter_name]);
+		
+	# Lookup the actual filter and reset it, the reference to the data
+	# currently stored will be maintained interally by the send_data event.
+	reset(filter_store[id, filter_name]);
+	}
+@endif
+
+
+@if ( Cluster::local_node_type() == Cluster::MANAGER )
+
+event Metrics::cluster_results(uid: string, id: ID, filter_name: string, data: MetricTable, done: bool)
+	{
+	#print fmt("MANAGER: receiving results from %s", get_event_peer()$descr);
+	
+	local local_data = collecting_results[uid, id, filter_name];
+	for ( index in data )
+		{
+		if ( index !in local_data )
+			local_data[index] = 0;
+		local_data[index] += data[index];
+		}
+	
+	# Mark another worker as being "done" for this uid.
+	if ( done )
+		++done_with[uid];
+	
+	# If the data has been collected from all peers, we are done and ready to log.
+	if ( Cluster::worker_count == done_with[uid] )
+		{
+		local ts = network_time();
+		# Log the time this was initially requested if it's available.
+		if ( uid in requested_results )
+			ts = requested_results[uid];
+			
+		write_log(ts, filter_store[id, filter_name], local_data);
+		if ( [uid, id, filter_name] in collecting_results )
+			delete collecting_results[uid, id, filter_name];
+		if ( uid in done_with )
+			delete done_with[uid];
+		if ( uid in requested_results )
+			delete requested_results[uid];
+		}
+	}
+
+@endif
--- a/scripts/base/frameworks/metrics/conn-example.bro
+++ b/scripts/base/frameworks/metrics/conn-example.bro
@ -1,19 +0,0 @@
-@load base/frameworks/metrics
-
-redef enum Metrics::ID += { 
-	CONNS_ORIGINATED, 
-	CONNS_RESPONDED 
-};
-
-event bro_init()
-	{
-	Metrics::configure(CONNS_ORIGINATED, [$aggregation_mask=24, $break_interval=5mins]);
-	Metrics::configure(CONNS_RESPONDED, [$aggregation_mask=24, $break_interval=5mins]);
-	}
-
-event connection_established(c: connection)
-	{
-	Metrics::add_data(CONNS_ORIGINATED, [$host=c$id$orig_h], 1);
-	Metrics::add_data(CONNS_RESPONDED,  [$host=c$id$resp_h], 1);
-	}
-	
--- a/scripts/base/frameworks/metrics/http-example.bro
+++ b/scripts/base/frameworks/metrics/http-example.bro
@ -1,20 +0,0 @@
-@load base/frameworks/metrics
-
-redef enum Metrics::ID += {
-	HTTP_REQUESTS_BY_STATUS_CODE,
-	HTTP_REQUESTS_BY_HOST,
-};
-
-event bro_init()
-	{
-	Metrics::configure(HTTP_REQUESTS_BY_STATUS_CODE, [$aggregation_mask=24, $break_interval=10secs]);
-	Metrics::configure(HTTP_REQUESTS_BY_HOST, [$break_interval=10secs]);
-	}
-
-event HTTP::log_http(rec: HTTP::Info)
-	{
-	if ( rec?$host )
-		Metrics::add_data(HTTP_REQUESTS_BY_HOST, [$index=rec$host], 1);
-	if ( rec?$status_code )
-		Metrics::add_data(HTTP_REQUESTS_BY_STATUS_CODE, [$host=rec$id$orig_h, $index=fmt("%d", rec$status_code)], 1);
-	}
--- a/scripts/base/frameworks/metrics/main.bro
+++ b/scripts/base/frameworks/metrics/main.bro
@ -1,28 +1,19 @@
-##! This is the implementation of the metrics framework
+##! This is the implementation of the metrics framework.
+
+@load base/frameworks/notice

 module Metrics;

 export {
 	redef enum Log::ID += { METRICS };
-
+	
 	type ID: enum {
-		ALL,
+		NOTHING,
 	};
 	
-	const default_aggregation_mask = 24 &redef;
-	const default_break_interval = 5mins &redef;
-	
-	# TODO: configure a metrics filter logging stream to log the current
-	#       metrics configuration in case someone is looking through
-	#       old logs and the configuration has changed since then.
-	type Filter: record {
-		name:              ID                      &optional;
-		## Global mask by which you'd like to aggregate traffic.
-		aggregation_mask:  count                   &optional;
-		## This is essentially applying names to various subnets.
-		aggregation_table: table[subnet] of string &optional;
-		break_interval:    interval                &default=default_break_interval;
-	};
+	## The default interval used for "breaking" metrics and writing the 
+	## current value to the logging stream.
+	const default_break_interval = 15mins &redef;
 	
 	type Index: record {
 		## Host is the value to which this metric applies.
@ -35,108 +26,190 @@ export {
 		## value in a Host header.  This is an example of a non-host based
 		## metric since multiple IP addresses could respond for the same Host
 		## header value.
-		index:        string &default="";
-	};
+		str:        string &optional;
+		
+		## The CIDR block that this metric applies to.  This is typically
+		## only used internally for host based aggregation.
+		network:      subnet &optional;
+	} &log;
 	
 	type Info: record {
 		ts:           time   &log;
-		name:         ID     &log;
-		index:        string &log &optional;
-		agg_subnet:   string &log &optional;
+		metric_id:    ID     &log;
+		filter_name:  string &log;
+		index:        Index  &log;
 		value:        count  &log;
 	};
 	
-	global add_filter: function(name: ID, filter: Filter);
-	global add_data: function(name: ID, index: Index, increment: count);
+	# TODO: configure a metrics filter logging stream to log the current
+	#       metrics configuration in case someone is looking through
+	#       old logs and the configuration has changed since then.
+	type Filter: record {
+		## The :bro:type:`Metrics::ID` that this filter applies to.
+		id:                ID                      &optional;
+		## The name for this filter so that multiple filters can be
+		## applied to a single metrics to get a different view of the same
+		## metric data being collected (different aggregation, break, etc).
+		name:              string                  &default="default";
+		## A predicate so that you can decide per index if you would like
+		## to accept the data being inserted.
+		pred:              function(index: Index): bool &optional;
+		## Global mask by which you'd like to aggregate traffic.
+		aggregation_mask:  count                   &optional;
+		## This is essentially applying names to various subnets.
+		aggregation_table: table[subnet] of subnet &optional;
+		## The interval at which the metric should be "broken" and written
+		## to the logging stream.
+		break_interval:    interval                &default=default_break_interval;
+		## This determines if the result of this filter is sent to the metrics
+		## logging stream.  One use for the logging framework is as an internal
+		## thresholding and statistics gathering utility that is meant to
+		## never log but rather to generate notices and derive data.
+		log:               bool                    &default=T;
+		## A straight threshold for generating a notice.
+		notice_threshold:  count                   &optional;
+		## A series of thresholds at which to generate notices.
+		## TODO: This is not implemented yet!
+		notice_thresholds: vector of count         &optional;
+		## If this and a $notice_threshold value are set, this notice type
+		## will be generated by the metrics framework.
+		note:              Notice::Type            &optional;
+	};
+	
+	global add_filter: function(id: ID, filter: Filter);
+	global add_data: function(id: ID, index: Index, increment: count);
+	
+	# This is the event that is used to "finish" metrics and adapt the metrics
+	# framework for clustered or non-clustered usage.
+	global log_it: event(filter: Filter);	
 	
 	global log_metrics: event(rec: Info);
 }

-global metric_filters: table[ID] of Filter = table();
+redef record Notice::Info += {
+	metric_index: Index &log &optional;
+};

-type MetricIndex: table[string] of count &default=0;
-type MetricTable: table[string] of MetricIndex;
-global store: table[ID] of MetricTable = table();
+global metric_filters: table[ID] of vector of Filter = table();
+global filter_store: table[ID, string] of Filter = table();

-event bro_init()
+type MetricTable: table[Index] of count &default=0;
+# This is indexed by metric ID and stream filter name.
+global store: table[ID, string] of MetricTable = table();
+
+# This stores the current threshold index for filters using the
+# $notice_thresholds element.
+global thresholds: table[string] of count = {} &default=0;
+
+event bro_init() &priority=5
 	{
 	Log::create_stream(METRICS, [$columns=Info, $ev=log_metrics]);
 	}
-	
-function reset(name: ID)
-	{
-	store[name] = table();
-	}

-event log_it(filter: Filter)
+function write_log(ts: time, filter: Filter, data: MetricTable)
 	{
-	# If this node is the manager in a cluster, this needs to request values
-	# for this metric from all of the workers.
-	
-	local name = filter$name;
-	for ( agg_subnet in store[name] )
+	for ( index in data )
 		{
-		local metric_values = store[name][agg_subnet];
-		for ( index in metric_values )
+		local val = data[index];
+		local m: Info = [$ts=ts, 
+		                 $metric_id=filter$id,
+		                 $filter_name=filter$name,
+		                 $index=index,
+		                 $value=val];
+
+		if ( m$index?$host &&
+		     filter?$notice_threshold &&
+		     m$value >= filter$notice_threshold )
 			{
-			local val = metric_values[index];
-			local m: Info = [$ts=network_time(), 
-			                 $name=name, 
-			                 $agg_subnet=fmt("%s", agg_subnet), 
-			                 $index=index, 
-			                 $value=val];
-			if ( index == "" )
-				delete m$index;
-			if ( agg_subnet == "" )
-				delete m$agg_subnet;
-			Log::write(METRICS, m);
+			NOTICE([$note=filter$note,
+			        $msg=fmt("Metrics threshold crossed by %s %d/%d", index$host, m$value, filter$notice_threshold),
+			        $src=m$index$host, $n=m$value,
+			        $metric_index=index]);
 			}
+		
+		else if ( filter?$notice_thresholds &&
+		          m$value >= filter$notice_thresholds[thresholds[cat(filter$id,filter$name)]] )
+			{
+			# TODO: implement this
+			}
+		
+		if ( filter$log )
+			Log::write(METRICS, m);
 		}
-	
-	
-	reset(name);
-	
-	schedule filter$break_interval { log_it(filter) };
 	}

-function add_filter(name: ID, filter: Filter)
+
+function reset(filter: Filter)
+	{
+	store[filter$id, filter$name] = table();
+	}
+
+function add_filter(id: ID, filter: Filter)
 	{
 	if ( filter?$aggregation_table && filter?$aggregation_mask )
 		{
 		print "INVALID Metric filter: Defined $aggregation_table and $aggregation_mask.";
 		return;
 		}
-	
-	filter$name = name;
-	metric_filters[name] = filter;
-	store[name] = table();
-	
-	# Only do this on the manager if in a cluster.
-	schedule filter$break_interval { log_it(filter) };
-	}
-	
-function add_data(name: ID, index: Index, increment: count)
-	{
-	local conf = metric_filters[name];
-
-	local agg_subnet = "";
-	if ( index?$host )
+	if ( [id, filter$name] in store )
 		{
-		if ( conf?$aggregation_mask )
-			{
-			local agg_mask = conf$aggregation_mask;
-			agg_subnet = fmt("%s", mask_addr(index$host, agg_mask));
-			}
-		else if ( conf?$aggregation_table )	
-			agg_subnet = fmt("%s", conf$aggregation_table[index$host]);
-		else
-			agg_subnet = fmt("%s", index$host);
+		print fmt("INVALID Metric filter: Filter with name \"%s\" already exists.", filter$name);
+		return;
+		}
+	if ( filter?$notice_threshold && filter?$notice_thresholds )
+		{
+		print "INVALID Metric filter: Defined both $notice_threshold and $notice_thresholds";
+		return;
 		}
 	
-	if ( agg_subnet !in store[name] )
-		store[name][agg_subnet] = table();
+	if ( ! filter?$id )
+		filter$id = id;
 	
-	if ( index$index !in store[name][agg_subnet] )
-		store[name][agg_subnet][index$index] = 0;
-	store[name][agg_subnet][index$index] = store[name][agg_subnet][index$index] + increment;
+	if ( id !in metric_filters )
+		metric_filters[id] = vector();
+	metric_filters[id][|metric_filters[id]|] = filter;
+
+	filter_store[id, filter$name] = filter;
+	store[id, filter$name] = table();
+	
+	schedule filter$break_interval { Metrics::log_it(filter) };
+	}
+	
+function add_data(id: ID, index: Index, increment: count)
+	{
+	if ( id !in metric_filters )
+		return;
+	
+	local filters = metric_filters[id];
+	
+	# Add the data to any of the defined filters.
+	for ( filter_id in filters )
+		{
+		local filter = filters[filter_id];
+		
+		# If this filter has a predicate, run the predicate and skip this
+		# index if the predicate return false.
+		if ( filter?$pred &&
+		     ! filter$pred(index) )
+			next;
+		
+		local filt_store = store[id, filter$name];
+		if ( index?$host )
+			{
+			if ( filter?$aggregation_mask )
+				{
+				index$network = mask_addr(index$host, filter$aggregation_mask);
+				delete index$host;
+				}
+			else if ( filter?$aggregation_table )
+				{
+				index$network = filter$aggregation_table[index$host];
+				delete index$host;
+				}
+			}
+
+		if ( index !in filt_store )
+			filt_store[index] = 0;
+		filt_store[index] += increment;
+		}
 	}
--- a/scripts/base/frameworks/metrics/non-cluster.bro
+++ b/scripts/base/frameworks/metrics/non-cluster.bro
@ -0,0 +1,17 @@
+
+module Metrics;
+
+export {
+
+}
+
+event Metrics::log_it(filter: Filter)
+	{
+	local id = filter$id;
+	local name = filter$name;
+	
+	write_log(network_time(), filter, store[id, name]);
+	reset(filter);
+	
+	schedule filter$break_interval { Metrics::log_it(filter) };
+	}
--- a/scripts/base/frameworks/notice/load.bro
+++ b/scripts/base/frameworks/notice/load.bro
@ -6,7 +6,8 @@
@load ./actions/drop
@load ./actions/email_admin
@load ./actions/page
+@load ./actions/add-geodata

-# Load the script to add hostnames to emails by default.
-# NOTE: this exposes a memleak in async DNS lookups.
-#@load ./extend-email/hostnames
+# There shouldn't be any defaul toverhead from loading these since they 
+# *should* only do anything when notices have the ACTION_EMAIL action applied.
+@load ./extend-email/hostnames
--- a/scripts/base/frameworks/notice/actions/add-geodata.bro
+++ b/scripts/base/frameworks/notice/actions/add-geodata.bro
@ -0,0 +1,47 @@
+##! This script adds geographic location data to notices for the "remote"
+##! host in a connection.  It does make the assumption that one of the 
+##! addresses in a connection is "local" and one is "remote" which is 
+##! probably a safe assumption to make in most cases.  If both addresses
+##! are remote, it will use the $src address.
+
+module Notice;
+
+export {
+	redef enum Action += {
+		## Indicates that the notice should have geodata added for the
+		## "remote" host.  :bro:id:`Site::local_nets` must be defined
+		## in order for this to work.
+		ACTION_ADD_GEODATA
+	};
+	
+	redef record Info += {
+		## If libGeoIP support is built in, notices can have geographic
+		## information attached to them.
+		remote_location: geo_location  &log &optional;
+	};
+	
+	## Notice types which should have the "remote" location looked up.
+	## If GeoIP support is not built in, this does nothing.
+	const lookup_location_types: set[Notice::Type] = {} &redef;
+	
+	## Add a helper to the notice policy for looking up GeoIP data.
+	redef Notice::policy += {
+		[$pred(n: Notice::Info) = { return (n$note in Notice::lookup_location_types); },
+		 $priority = 10],
+	};
+}
+
+# This is handled at a high priority in case other notice handlers 
+# want to use the data.
+event notice(n: Notice::Info) &priority=10
+	{
+	if ( ACTION_ADD_GEODATA in n$actions &&
+	     |Site::local_nets| > 0 &&
+	     ! n?$remote_location )
+		{
+		if ( n?$src && ! Site::is_local_addr(n$src) )
+			n$remote_location = lookup_location(n$src);
+		else if ( n?$dst && ! Site::is_local_addr(n$dst) )
+			n$remote_location = lookup_location(n$dst);
+		}
+	}
--- a/scripts/base/init-bare.bro
+++ b/scripts/base/init-bare.bro
@ -519,7 +519,7 @@ const frag_timeout = 0.0 sec &redef;
 # packets and IP-level bytes transfered by each endpoint. If
 # true, these values are returned in the connection's endpoint
 # record val.
-const use_conn_size_analyzer = F &redef;
+const use_conn_size_analyzer = T &redef;

 const UDP_INACTIVE = 0;
 const UDP_ACTIVE = 1;	# means we've seen something from this endpoint
--- a/scripts/base/init-default.bro
+++ b/scripts/base/init-default.bro
@ -23,11 +23,11 @@
@load base/frameworks/signatures
@load base/frameworks/packet-filter
@load base/frameworks/software
-@load base/frameworks/intel
-@load base/frameworks/metrics
@load base/frameworks/communication
@load base/frameworks/control
@load base/frameworks/cluster
+@load base/frameworks/metrics
+@load base/frameworks/intel
@load base/frameworks/reporter

@load base/protocols/conn
--- a/scripts/base/protocols/http/main.bro
+++ b/scripts/base/protocols/http/main.bro
@ -214,7 +214,7 @@ event http_header(c: connection, is_orig: bool, name: string, value: string) &pr
 			c$http$response_content_length = extract_count(value);
 		else if ( name == "CONTENT-DISPOSITION" &&
 		          /[fF][iI][lL][eE][nN][aA][mM][eE]/ in value )
-			c$http$filename = sub(value, /^.*[fF][iI][lL][eE][nN][aA][mM][eE]=/, "");
+			c$http$filename = extract_filename_from_content_disposition(value);
 		}
 	}
 	
--- a/scripts/base/protocols/mime/file-extract.bro
+++ b/scripts/base/protocols/mime/file-extract.bro
@ -16,11 +16,13 @@ export {
 		extract_file:         bool    &default=F;
 	
 		## Store the file handle here for the file currently being extracted.
-		extraction_file:      file    &optional;
-		
+		extraction_file:      file    &log &optional;
+	};
+
+	redef record State += {
 		## Store a count of the number of files that have been transferred in
 		## this conversation to create unique file names on disk.
-		num_extracted_files:  count   &optional;
+		num_extracted_files:  count   &default=0;
 	};
 }

@ -34,7 +36,7 @@ event mime_segment_data(c: connection, length: count, data: string) &priority=3
 	{
 	if ( c$mime$extract_file && c$mime$content_len == 0 )
 		{
-		local suffix = fmt("%d.dat", ++c$mime$num_extracted_files);
+		local suffix = fmt("%d.dat", ++c$mime_state$num_extracted_files);
 		local fname = generate_extraction_filename(extraction_prefix, c, suffix);
 		c$mime$extraction_file = open(fname);
 		enable_raw_output(c$mime$extraction_file);
--- a/scripts/base/protocols/smtp/load.bro
+++ b/scripts/base/protocols/smtp/load.bro
@ -1 +1,3 @@
@load ./main
+@load ./entities
+@load ./entities-excerpt
--- a/scripts/base/protocols/smtp/entities-excerpt.bro
+++ b/scripts/base/protocols/smtp/entities-excerpt.bro
@ -0,0 +1,52 @@
+##! This script is for optionally adding a body excerpt to the SMTP
+##! entities log.
+
+@load ./entities
+
+module SMTP;
+
+export {
+	redef record SMTP::EntityInfo += {
+		## The entity body excerpt.
+		excerpt:    string &log &default="";
+		
+		## Internal tracking to know how much of the body should be included
+		## in the excerpt.
+		excerpt_len: count &optional;
+	};
+	
+	## This is the default value for how much of the entity body should be
+	## included for all MIME entities.
+	const default_entity_excerpt_len = 0 &redef;
+	
+	## This table defines how much of various entity bodies should be 
+	## included in excerpts.
+	const entity_excerpt_len: table[string] of count = {} 
+		&redef
+		&default = default_entity_excerpt_len;
+}
+
+event mime_segment_data(c: connection, length: count, data: string) &priority=-1
+	{
+	if ( ! c?$smtp ) return;
+	
+	if ( c$smtp$current_entity$content_len == 0 )
+		c$smtp$current_entity$excerpt_len = entity_excerpt_len[c$smtp$current_entity$mime_type];
+	}
+
+event mime_segment_data(c: connection, length: count, data: string) &priority=-2
+	{
+	if ( ! c?$smtp ) return;
+	
+	local ent = c$smtp$current_entity;
+	if ( ent$content_len < ent$excerpt_len )
+		{
+		if ( ent$content_len + length < ent$excerpt_len )
+			ent$excerpt = cat(ent$excerpt, data);
+		else
+			{
+			local x_bytes = ent$excerpt_len - ent$content_len;
+			ent$excerpt = cat(ent$excerpt, sub_bytes(data, 1, x_bytes));
+			}
+		}
+	}
--- a/scripts/base/protocols/smtp/entities.bro
+++ b/scripts/base/protocols/smtp/entities.bro
@ -0,0 +1,234 @@
+##! Analysis and logging for MIME entities found in SMTP sessions.
+
+@load base/utils/strings
+@load base/utils/files
+@load ./main
+
+module SMTP;
+
+export {
+	redef enum Notice::Type += {
+		## Indicates that an MD5 sum was calculated for a MIME message.
+		MD5,
+	};
+
+	redef enum Log::ID += { SMTP_ENTITIES };
+
+	type EntityInfo: record {
+		## This is the timestamp of when the MIME content transfer began.
+		ts:               time    &log;
+		uid:              string  &log;
+		id:               conn_id &log;
+		## Internally generated "message id" that ties back to the particular
+		## message in the SMTP log where this entity was seen.
+		mid:              string  &log;
+		## The filename seen in the Content-Disposition header.
+		filename:         string  &log &optional;
+		## Track how many bytes of the MIME encoded file have been seen.
+		content_len:      count   &log &default=0;
+		## The mime type of the entity discovered through magic bytes identification.
+		mime_type:        string  &log &optional;
+		
+		## The calculated MD5 sum for the MIME entity.
+		md5:              string  &log &optional;
+		## Optionally calculate the file's MD5 sum.  Must be set prior to the 
+		## first data chunk being see in an event.
+		calc_md5:         bool    &default=F;
+		## This boolean value indicates if an MD5 sum is being calculated 
+		## for the current file transfer.
+		calculating_md5:  bool    &default=F;
+		
+		## Optionally write the file to disk.  Must be set prior to first 
+		## data chunk being seen in an event.
+		extract_file:     bool    &default=F;
+		## Store the file handle here for the file currently being extracted.
+		extraction_file:  file    &log &optional;
+	};
+
+	redef record Info += {
+		## The in-progress entity information.
+		current_entity:   EntityInfo &optional;
+	};
+
+	redef record State += {
+		## Store a count of the number of files that have been transferred in
+		## a conversation to create unique file names on disk.
+		num_extracted_files:  count   &default=0;
+		## Track the number of MIME encoded files transferred during a session.
+		mime_level:           count   &default=0;
+	};
+
+	## Generate MD5 sums for these filetypes.
+	const generate_md5 = /application\/x-dosexec/    # Windows and DOS executables
+	                   | /application\/x-executable/ # *NIX executable binary
+	                   &redef;
+
+	## Pattern of file mime types to extract from MIME bodies.
+	const extract_file_types = /NO_DEFAULT/ &redef;
+
+	## The on-disk prefix for files to be extracted from MIME entity bodies.
+	const extraction_prefix = "smtp-entity" &redef;
+
+	global log_mime: event(rec: EntityInfo);
+}
+
+event bro_init() &priority=5
+	{
+	Log::create_stream(SMTP_ENTITIES, [$columns=EntityInfo, $ev=log_mime]);
+	}
+
+function set_session(c: connection, new_entity: bool)
+	{
+	if ( ! c$smtp?$current_entity || new_entity )
+		{
+		local info: EntityInfo;
+		info$ts=network_time();
+		info$uid=c$uid;
+		info$id=c$id;
+		info$mid=c$smtp$mid;
+		
+		c$smtp$current_entity = info;
+		++c$smtp_state$mime_level;
+		}
+	}
+
+event mime_begin_entity(c: connection) &priority=10
+	{
+	if ( ! c?$smtp ) return;
+	
+	set_session(c, T);
+	}
+
+# This has priority -10 because other handlers need to know the current
+# content_len before it's updated by this handler.
+event mime_segment_data(c: connection, length: count, data: string) &priority=-10
+	{
+	if ( ! c?$smtp ) return;
+	
+	c$smtp$current_entity$content_len = c$smtp$current_entity$content_len + length;
+	}
+
+event mime_segment_data(c: connection, length: count, data: string) &priority=7
+    {
+	if ( ! c?$smtp ) return;
+	if ( c$smtp$current_entity$content_len == 0 )
+		c$smtp$current_entity$mime_type = split1(identify_data(data, T), /;/)[1];
+	}
+
+event mime_segment_data(c: connection, length: count, data: string) &priority=-5
+	{
+	if ( ! c?$smtp ) return;
+
+	if ( c$smtp$current_entity$content_len == 0 )
+		{
+		if ( generate_md5 in c$smtp$current_entity$mime_type )
+			c$smtp$current_entity$calc_md5 = T;
+
+		if ( c$smtp$current_entity$calc_md5 )
+			{
+			c$smtp$current_entity$calculating_md5 = T;
+			md5_hash_init(c$id);
+			}
+		}
+
+	if ( c$smtp$current_entity$calculating_md5 )
+		md5_hash_update(c$id, data);
+}
+
+## In the event of a content gap during the MIME transfer, detect the state for
+## the MD5 sum calculation and stop calculating the MD5 since it would be
+## incorrect anyway.
+event content_gap(c: connection, is_orig: bool, seq: count, length: count) &priority=5
+	{
+	if ( is_orig || ! c?$smtp || ! c$smtp?$current_entity ) return;
+
+	if ( c$smtp$current_entity$calculating_md5 )
+		{
+		c$smtp$current_entity$calculating_md5 = F;
+		md5_hash_finish(c$id);
+		}
+	}
+
+event mime_end_entity(c: connection) &priority=-3
+    {
+	# TODO: this check is only due to a bug in mime_end_entity that
+	#       causes the event to be generated twice for the same real event.
+	if ( ! c?$smtp || ! c$smtp?$current_entity )
+		return;
+
+	if ( c$smtp$current_entity$calculating_md5 )
+		{
+		c$smtp$current_entity$md5 = md5_hash_finish(c$id);
+
+		NOTICE([$note=MD5, $msg=fmt("Calculated a hash for a MIME entity from %s", c$id$orig_h),
+				$sub=c$smtp$current_entity$md5, $conn=c]);
+		}
+	}
+
+event mime_one_header(c: connection, h: mime_header_rec)
+	{
+	if ( ! c?$smtp ) return;
+	
+	if ( h$name == "CONTENT-DISPOSITION" &&
+	     /[fF][iI][lL][eE][nN][aA][mM][eE]/ in h$value )
+		c$smtp$current_entity$filename = extract_filename_from_content_disposition(h$value);
+	}
+
+event mime_end_entity(c: connection) &priority=-5
+	{
+	if ( ! c?$smtp ) return;
+
+	# This check and the delete below are just to cope with a bug where
+	# mime_end_entity can be generated multiple times for the same event.
+	if ( ! c$smtp?$current_entity )
+		return;
+
+	# Only log is there was some content.
+	if ( c$smtp$current_entity$content_len > 0 )
+		Log::write(SMTP_ENTITIES, c$smtp$current_entity);
+
+	delete c$smtp$current_entity;
+	}
+
+event mime_segment_data(c: connection, length: count, data: string) &priority=5
+	{
+	if ( ! c?$smtp ) return;
+	
+	if ( extract_file_types in c$smtp$current_entity$mime_type )
+		c$smtp$current_entity$extract_file = T;
+	}
+
+event mime_segment_data(c: connection, length: count, data: string) &priority=3
+	{
+	if ( ! c?$smtp ) return;
+	
+	if ( c$smtp$current_entity$extract_file && 
+	     c$smtp$current_entity$content_len == 0 )
+		{
+		local suffix = fmt("%d.dat", ++c$smtp_state$num_extracted_files);
+		local fname = generate_extraction_filename(extraction_prefix, c, suffix);
+		c$smtp$current_entity$extraction_file = open(fname);
+		enable_raw_output(c$smtp$current_entity$extraction_file);
+		}
+	}
+
+event mime_segment_data(c: connection, length: count, data: string) &priority=-5
+	{
+	if ( ! c?$smtp ) return;
+	
+	if ( c$smtp$current_entity$extract_file && c$smtp$current_entity?$extraction_file )
+		print c$smtp$current_entity$extraction_file, data;
+	}
+
+event mime_end_entity(c: connection) &priority=-3
+	{
+	if ( ! c?$smtp ) return;
+	
+	# TODO: this check is only due to a bug in mime_end_entity that
+	#       causes the event to be generated twice for the same real event.
+	if ( ! c$smtp?$current_entity )
+		return;
+
+	if ( c$smtp$current_entity?$extraction_file )
+		close(c$smtp$current_entity$extraction_file);
+	}
--- a/scripts/base/protocols/smtp/main.bro
+++ b/scripts/base/protocols/smtp/main.bro
@ -4,17 +4,14 @@ module SMTP;
 export {
 	redef enum Log::ID += { SMTP };

-	redef enum Notice::Type += { 
-		## Indicates that the server sent a reply mentioning an SMTP block list.
-		BL_Error_Message, 
-		## Indicates the client's address is seen in the block list error message.
-		BL_Blocked_Host, 
-	};
-
 	type Info: record {
 		ts:                time            &log;
 		uid:               string          &log;
 		id:                conn_id         &log;
+		## This is an internally generated "message id" that can be used to
+		## map between SMTP messages and MIME entities in the SMTP entities
+		## log.
+		mid:               string          &log;
 		helo:              string          &log &optional;
 		mailfrom:          string          &log &optional;
 		rcptto:            set[string]     &log &optional;
@ -30,19 +27,13 @@ export {
 		second_received:   string          &log &optional;
 		## The last message the server sent to the client.
 		last_reply:        string          &log &optional;
-		files:             set[string]     &log &optional;
 		path:              vector of addr  &log &optional;
 		user_agent:        string          &log &optional;
 		
-		## Indicate if this session is currently transmitting SMTP message 
-		## envelope headers.
-		in_headers:        bool            &default=F;
 		## Indicate if the "Received: from" headers should still be processed.
 		process_received_from: bool        &default=T;
-		## Maintain the current header for cases where there is header wrapping.
-		current_header:    string          &default="";
-		## Indicate when the message is logged and no longer applicable.
-		done:              bool            &default=F;
+		## Indicates if client activity has been seen, but not yet logged
+		has_client_activity:  bool            &default=F;
 	};
 	
 	type State: record {
@ -61,26 +52,7 @@ export {
 	##    ALL_HOSTS - always capture the entire path.
 	##    NO_HOSTS - never capture the path.
 	const mail_path_capture = ALL_HOSTS &redef;
-	
-	# This matches content in SMTP error messages that indicate some
-	# block list doesn't like the connection/mail.
-	const bl_error_messages = 
-	    /spamhaus\.org\//
-	  | /sophos\.com\/security\//
-	  | /spamcop\.net\/bl/
-	  | /cbl\.abuseat\.org\// 
-	  | /sorbs\.net\// 
-	  | /bsn\.borderware\.com\//
-	  | /mail-abuse\.com\//
-	  | /b\.barracudacentral\.com\//
-	  | /psbl\.surriel\.com\// 
-	  | /antispam\.imp\.ch\// 
-	  | /dyndns\.com\/.*spam/
-	  | /rbl\.knology\.net\//
-	  | /intercept\.datapacket\.net\//
-	  | /uceprotect\.net\//
-	  | /hostkarma\.junkemailfilter\.com\// &redef;
-	
+		
 	global log_smtp: event(rec: Info);
 	
 	## Configure the default ports for SMTP analysis.
@ -121,6 +93,7 @@ function new_smtp_log(c: connection): Info
 	l$ts=network_time();
 	l$uid=c$uid;
 	l$id=c$id;
+	l$mid=unique_id("@");
 	if ( c?$smtp_state && c$smtp_state?$helo )
 		l$helo = c$smtp_state$helo;
 	
@ -136,26 +109,23 @@ function set_smtp_session(c: connection)
 	if ( ! c?$smtp_state )
 		c$smtp_state = [];
 	
-	if ( ! c?$smtp || c$smtp$done )
-		{
+	if ( ! c?$smtp )
 		c$smtp = new_smtp_log(c);
-		}
 	}

-
 function smtp_message(c: connection)
 	{
-	Log::write(SMTP, c$smtp);
-
-	c$smtp$done = T;
-	# Track the number of messages seen in this session.
-	++c$smtp_state$messages_transferred;
+	if ( c$smtp$has_client_activity )
+		Log::write(SMTP, c$smtp);
 	}
 	
 event smtp_request(c: connection, is_orig: bool, command: string, arg: string) &priority=5
 	{
 	set_smtp_session(c);
 	local upper_command = to_upper(command);
+
+	if ( upper_command != "QUIT" )
+		c$smtp$has_client_activity = T;
 	
 	if ( upper_command == "HELO" || upper_command == "EHLO" )
 		{
@ -172,26 +142,11 @@ event smtp_request(c: connection, is_orig: bool, command: string, arg: string) &

 	else if ( upper_command == "MAIL" && /^[fF][rR][oO][mM]:/ in arg )
 		{
-		# In case this is not the first message in a session we want to 
-		# essentially write out a log, clear the session tracking, and begin
-		# new session tracking.
-		if ( c$smtp_state$messages_transferred > 0 )
-			{
-			smtp_message(c);
-			set_smtp_session(c);
-			}
-		
 		local partially_done = split1(arg, /:[[:blank:]]*/)[2];
 		c$smtp$mailfrom = split1(partially_done, /[[:blank:]]?/)[1];
 		}
-		
-	else if ( upper_command == "DATA" )
-		{
-		c$smtp$in_headers = T;
-		}
 	}
 	
-
 event smtp_reply(c: connection, is_orig: bool, code: count, cmd: string,
                 msg: string, cont_resp: bool) &priority=5
 	{
@ -199,169 +154,98 @@ event smtp_reply(c: connection, is_orig: bool, code: count, cmd: string,
 	
 	# This continually overwrites, but we want the last reply,
 	# so this actually works fine.
-	if ( code != 421 && code >= 400 )
+	c$smtp$last_reply = fmt("%d %s", code, msg);
+	}
+
+event smtp_reply(c: connection, is_orig: bool, code: count, cmd: string,
+                 msg: string, cont_resp: bool) &priority=-5
+	{
+	set_smtp_session(c);
+	if ( cmd == "." )
 		{
-		c$smtp$last_reply = fmt("%d %s", code, msg);
-
-		# Raise a notice when an SMTP error about a block list is discovered.
-		if ( bl_error_messages in msg )
-			{
-			local note = BL_Error_Message;
-			local message = fmt("%s received an error message mentioning an SMTP block list", c$id$orig_h);
-
-			# Determine if the originator's IP address is in the message.
-			local ips = find_ip_addresses(msg);
-			local text_ip = "";
-			if ( |ips| > 0 && to_addr(ips[0]) == c$id$orig_h )
-				{
-				note = BL_Blocked_Host;
-				message = fmt("%s is on an SMTP block list", c$id$orig_h);
-				}
-			
-			NOTICE([$note=note, $conn=c, $msg=message, $sub=msg]);
-			}
+		# Track the number of messages seen in this session.
+		++c$smtp_state$messages_transferred;
+		smtp_message(c);
+		c$smtp = new_smtp_log(c);
 		}
 	}

-event smtp_data(c: connection, is_orig: bool, data: string) &priority=5
+event mime_one_header(c: connection, h: mime_header_rec) &priority=5
 	{
-	# Is there something we should be handling from the server?
-	if ( ! is_orig ) return;
-	
-	set_smtp_session(c);
-	
-	if ( ! c$smtp$in_headers )
-		{
-		if ( /^[cC][oO][nN][tT][eE][nN][tT]-[dD][iI][sS].*[fF][iI][lL][eE][nN][aA][mM][eE]/ in data )
-			{
-			if ( ! c$smtp?$files )
-				c$smtp$files = set();
-			data = sub(data, /^.*[fF][iI][lL][eE][nN][aA][mM][eE]=/, "");
-			add c$smtp$files[data];
-			}
-		return;
-		}
+	if ( ! c?$smtp ) return;
+	c$smtp$has_client_activity = T;

-	if ( /^[[:blank:]]*$/ in data )
-		c$smtp$in_headers = F;
+	if ( h$name == "MESSAGE-ID" )
+		c$smtp$msg_id = h$value;

-	# This is to reconstruct headers that tend to wrap around.
-	if ( /^[[:blank:]]/ in data )
-		{
-		# Remove all but a single space at the beginning (this seems to follow
-		# the most common behavior).
-		data = sub(data, /^[[:blank:]]*/, " ");
-		if ( c$smtp$current_header == "MESSAGE-ID" )
-			c$smtp$msg_id += data;
-		else if ( c$smtp$current_header == "RECEIVED" )
-			c$smtp$first_received += data;
-		else if ( c$smtp$current_header == "IN-REPLY-TO" )
-			c$smtp$in_reply_to += data;
-		else if ( c$smtp$current_header == "SUBJECCT" )
-			c$smtp$subject += data;
-		else if ( c$smtp$current_header == "FROM" )
-			c$smtp$from += data;
-		else if ( c$smtp$current_header == "REPLY-TO" )
-			c$smtp$reply_to += data;
-		else if ( c$smtp$current_header == "USER-AGENT" )
-			c$smtp$user_agent += data;
-		return;
-		}
-	# Once there isn't a line starting with a blank, we're not continuing a 
-	# header anymore.
-	c$smtp$current_header = "";
-	
-	local header_parts = split1(data, /:[[:blank:]]*/);
-	# TODO: do something in this case?  This would definitely be odd.
-	# Header wrapping needs to be handled more elegantly.  This will happen
-	# if the header value is wrapped immediately after the header key.
-	if ( |header_parts| != 2 )
-		return;
-	
-	local header_key = to_upper(header_parts[1]);
-	c$smtp$current_header = header_key;
-	
-	local header_val = header_parts[2];
-	
-	if ( header_key == "MESSAGE-ID" )
-		c$smtp$msg_id = header_val;
-	
-	else if ( header_key == "RECEIVED" )
+	else if ( h$name == "RECEIVED" )
 		{
 		if ( c$smtp?$first_received )
 			c$smtp$second_received = c$smtp$first_received;
-		c$smtp$first_received = header_val;
+		c$smtp$first_received = h$value;
 		}
-	
-	else if ( header_key == "IN-REPLY-TO" )
-		c$smtp$in_reply_to = header_val;
-	
-	else if ( header_key == "DATE" )
-		c$smtp$date = header_val;
-	
-	else if ( header_key == "FROM" )
-		c$smtp$from = header_val;
-	
-	else if ( header_key == "TO" )
+
+	else if ( h$name == "IN-REPLY-TO" )
+		c$smtp$in_reply_to = h$value;
+
+	else if ( h$name == "SUBJECT" )
+		c$smtp$subject = h$value;
+
+	else if ( h$name == "FROM" )
+		c$smtp$from = h$value;
+
+	else if ( h$name == "REPLY-TO" )
+		c$smtp$reply_to = h$value;
+
+	else if ( h$name == "DATE" )
+		c$smtp$date = h$value;
+
+	else if ( h$name == "TO" )
 		{
 		if ( ! c$smtp?$to )
 			c$smtp$to = set();
-		add c$smtp$to[header_val];
+		add c$smtp$to[h$value];
 		}
-	
-	else if ( header_key == "REPLY-TO" )
-		c$smtp$reply_to = header_val;
-	
-	else if ( header_key == "SUBJECT" )
-		c$smtp$subject = header_val;

-	else if ( header_key == "X-ORIGINATING-IP" )
+	else if ( h$name == "X-ORIGINATING-IP" )
 		{
-		local addresses = find_ip_addresses(header_val);
+		local addresses = find_ip_addresses(h$value);
 		if ( 1 in addresses )
 			c$smtp$x_originating_ip = to_addr(addresses[1]);
 		}
 	
-	else if ( header_key == "X-MAILER" || 
-	          header_key == "USER-AGENT" ||
-	          header_key == "X-USER-AGENT" )
-		{
-		c$smtp$user_agent = header_val;
-		# Explicitly set the current header here because there are several 
-		# headers bulked under this same key.
-		c$smtp$current_header = "USER-AGENT";
-		}
+	else if ( h$name == "X-MAILER" ||
+	          h$name == "USER-AGENT" ||
+	          h$name == "X-USER-AGENT" )
+		c$smtp$user_agent = h$value;
 	}
 	
 # This event handler builds the "Received From" path by reading the 
 # headers in the mail
-event smtp_data(c: connection, is_orig: bool, data: string) &priority=3
+event mime_one_header(c: connection, h: mime_header_rec) &priority=3
 	{
 	# If we've decided that we're done watching the received headers for
 	# whatever reason, we're done.  Could be due to only watching until 
 	# local addresses are seen in the received from headers.
-	if ( c$smtp$current_header != "RECEIVED" ||
-	     ! c$smtp$process_received_from )
+	if ( ! c?$smtp || h$name != "RECEIVED" || ! c$smtp$process_received_from )
 		return;
-		
-	local text_ip = find_address_in_smtp_header(data);
+
+	local text_ip = find_address_in_smtp_header(h$value);
 	if ( text_ip == "" )
 		return;
 	local ip = to_addr(text_ip);
-	
+
 	if ( ! addr_matches_host(ip, mail_path_capture) && 
 	     ! Site::is_private_addr(ip) )
 		{
 		c$smtp$process_received_from = F;
 		}
-	
 	if ( c$smtp$path[|c$smtp$path|-1] != ip )
 		c$smtp$path[|c$smtp$path|] = ip;
 	}

-
 event connection_state_remove(c: connection) &priority=-5
 	{
-	if ( c?$smtp && ! c$smtp$done )
+	if ( c?$smtp )
 		smtp_message(c);
 	}
--- a/scripts/base/protocols/ssh/main.bro
+++ b/scripts/base/protocols/ssh/main.bro
@ -1,74 +1,58 @@
+##! Base SSH analysis script.  The heuristic to blindly determine success or 
+##! failure for SSH connections is implemented here.  At this time, it only
+##! uses the size of the data being returned from the server to make the
+##! heuristic determination about success of the connection.  
+##! Requires that :bro:id:`use_conn_size_analyzer` is set to T!  The heuristic
+##! is not attempted if the connection size analyzer isn't enabled.

 module SSH;

 export {
 	redef enum Log::ID += { SSH };

-	redef enum Notice::Type += {
-		Login,
-		Password_Guessing,
-		Login_By_Password_Guesser,
-		Login_From_Interesting_Hostname,
-		Bytecount_Inconsistency,
-	};
-
 	type Info: record {
 		ts:              time         &log;
 		uid:             string       &log;
 		id:              conn_id      &log;
+		## Indicates if the login was heuristically guessed to be "success"
+		## or "failure".
 		status:          string       &log &optional;
-		direction:       string       &log &optional;
-		remote_location: geo_location &log &optional;
+		## Direction of the connection.  If the client was a local host 
+		## logging into an external host, this would be OUTBOUD.  INBOUND
+		## would be set for the opposite situation.
+		# TODO: handle local-local and remote-remote better.
+		direction:       Direction    &log &optional;
+		## The software string given by the client.
 		client:          string       &log &optional;
+		## The software string given by the server.
 		server:          string       &log &optional;
+		## The amount of data returned from the server.  This is currently
+		## the only measure of the success heuristic and it is logged to 
+		## assist analysts looking at the logs to make their own determination
+		## about the success on a case-by-case basis.
 		resp_size:       count        &log &default=0;
 		
 		## Indicate if the SSH session is done being watched.
 		done:            bool         &default=F;
 	};
-
-	const password_guesses_limit = 30 &redef;
 	
-	# The size in bytes at which the SSH connection is presumed to be
-	# successful.
+	## The size in bytes at which the SSH connection is presumed to be
+	## successful.
 	const authentication_data_size = 5500 &redef;
 	
-	# The amount of time to remember presumed non-successful logins to build
-	# model of a password guesser.
-	const guessing_timeout = 30 mins &redef;
-
-	# The set of countries for which you'd like to throw notices upon successful login
-	#   requires Bro compiled with libGeoIP support
-	const watched_countries: set[string] = {"RO"} &redef;
-
-	# Strange/bad host names to originate successful SSH logins
-	const interesting_hostnames =
-			/^d?ns[0-9]*\./ |
-			/^smtp[0-9]*\./ |
-			/^mail[0-9]*\./ |
-			/^pop[0-9]*\./  |
-			/^imap[0-9]*\./ |
-			/^www[0-9]*\./  |
-			/^ftp[0-9]*\./  &redef;
-
-	# This is a table with orig subnet as the key, and subnet as the value.
-	const ignore_guessers: table[subnet] of subnet &redef;
-	
-	# If true, we tell the event engine to not look at further data
-	# packets after the initial SSH handshake. Helps with performance
-	# (especially with large file transfers) but precludes some
-	# kinds of analyses (e.g., tracking connection size).
+	## If true, we tell the event engine to not look at further data
+	## packets after the initial SSH handshake. Helps with performance
+	## (especially with large file transfers) but precludes some
+	## kinds of analyses (e.g., tracking connection size).
 	const skip_processing_after_detection = F &redef;
 	
-	# Keeps count of how many rejections a host has had
-	global password_rejections: table[addr] of TrackCount 
-		&write_expire=guessing_timeout
-		&synchronized;
-
-	# Keeps track of hosts identified as guessing passwords
-	# TODO: guessing_timeout doesn't work correctly here.  If a user redefs
-	#       the variable, it won't take effect.
-	global password_guessers: set[addr] &read_expire=guessing_timeout+1hr &synchronized;
+	## This event is generated when the heuristic thinks that a login
+	## was successful.
+	global heuristic_successful_login: event(c: connection);
+	
+	## This event is generated when the heuristic thinks that a login
+	## failed.
+	global heuristic_failed_login: event(c: connection);
 	
 	global log_ssh: event(rec: Info);
 }
@ -106,116 +90,51 @@ function check_ssh_connection(c: connection, done: bool)
 	
 	# If this is still a live connection and the byte count has not
 	# crossed the threshold, just return and let the resheduled check happen later.
-	if ( !done && c$resp$size < authentication_data_size )
+	if ( !done && c$resp$num_bytes_ip < authentication_data_size )
 		return;

 	# Make sure the server has sent back more than 50 bytes to filter out
 	# hosts that are just port scanning.  Nothing is ever logged if the server
 	# doesn't send back at least 50 bytes.
-	if ( c$resp$size < 50 )
+	if ( c$resp$num_bytes_ip < 50 )
 		return;

-	local status = "failure";
-	local direction = Site::is_local_addr(c$id$orig_h) ? "to" : "from";
-	local location: geo_location;
-	location = (direction == "to") ? lookup_location(c$id$resp_h) : lookup_location(c$id$orig_h);
+	c$ssh$direction = Site::is_local_addr(c$id$orig_h) ? OUTBOUND : INBOUND;
+	c$ssh$resp_size = c$resp$num_bytes_ip;
 	
-	if ( done && c$resp$size < authentication_data_size )
+	if ( c$resp$num_bytes_ip < authentication_data_size )
 		{
-		# presumed failure
-		if ( c$id$orig_h !in password_rejections )
-			password_rejections[c$id$orig_h] = new_track_count();
-			
-		# Track the number of rejections
-		if ( !(c$id$orig_h in ignore_guessers &&
-		       c$id$resp_h in ignore_guessers[c$id$orig_h]) )
-			++password_rejections[c$id$orig_h]$n;
-			
-		if ( default_check_threshold(password_rejections[c$id$orig_h]) )
-			{
-			add password_guessers[c$id$orig_h];
-			NOTICE([$note=Password_Guessing,
-			        $conn=c,
-			        $msg=fmt("SSH password guessing by %s", c$id$orig_h),
-			        $sub=fmt("%d failed logins", password_rejections[c$id$orig_h]$n),
-			        $n=password_rejections[c$id$orig_h]$n]);
-			}
-		} 
-	# TODO: This is to work around a quasi-bug in Bro which occasionally 
-	#       causes the byte count to be oversized.
-	#   Watch for Gregors work that adds an actual counter of bytes transferred.
-	else if ( c$resp$size < 20000000 ) 
+		c$ssh$status  = "failure";
+		event SSH::heuristic_failed_login(c);
+		}
+	else
 		{ 
 		# presumed successful login
-		status = "success";
-		c$ssh$done = T;
-
-		if ( c$id$orig_h in password_rejections &&
-		     password_rejections[c$id$orig_h]$n > password_guesses_limit &&
-		     c$id$orig_h !in password_guessers )
-			{
-			add password_guessers[c$id$orig_h];
-			NOTICE([$note=Login_By_Password_Guesser,
-			        $conn=c,
-			        $n=password_rejections[c$id$orig_h]$n,
-			        $msg=fmt("Successful SSH login by password guesser %s", c$id$orig_h),
-			        $sub=fmt("%d failed logins", password_rejections[c$id$orig_h]$n)]);
-			}
-		
-		local message = fmt("SSH login %s %s \"%s\" \"%s\" %f %f %s (triggered with %d bytes)",
-		              direction, location$country_code, location$region, location$city,
-		              location$latitude, location$longitude,
-		              id_string(c$id), c$resp$size);
-		NOTICE([$note=Login,
-		        $conn=c,
-		        $msg=message,
-		        $sub=location$country_code]);
-		
-		# Check to see if this login came from an interesting hostname
-		when ( local hostname = lookup_addr(c$id$orig_h) )
-			{
-			if ( interesting_hostnames in hostname )
-				{
-				NOTICE([$note=Login_From_Interesting_Hostname,
-				        $conn=c,
-				        $msg=fmt("Strange login from %s", hostname),
-				        $sub=hostname]);
-				}
-			}
-			
-		if ( location$country_code in watched_countries )
-			{
-			
-			}
-			
+		c$ssh$status = "success";
+		event SSH::heuristic_successful_login(c);
 		}
-	else if ( c$resp$size >= 200000000 ) 
-		{
-		NOTICE([$note=Bytecount_Inconsistency,
-		        $conn=c,
-		        $msg="During byte counting in SSH analysis, an overly large value was seen.",
-		        $sub=fmt("%d",c$resp$size)]);
-		}
-
-	c$ssh$remote_location = location;
-	c$ssh$status = status;
-	c$ssh$direction = direction;
-	c$ssh$resp_size = c$resp$size;
-	
-	Log::write(SSH, c$ssh);
 	
 	# Set the "done" flag to prevent the watching event from rescheduling
 	# after detection is done.
-	c$ssh$done;
+	c$ssh$done=T;
 	
-	# Stop watching this connection, we don't care about it anymore.
 	if ( skip_processing_after_detection )
 		{
+		# Stop watching this connection, we don't care about it anymore.
 		skip_further_processing(c$id);
 		set_record_packets(c$id, F);
 		}
 	}

+event SSH::heuristic_successful_login(c: connection) &priority=-5
+	{
+	Log::write(SSH, c$ssh);
+	}
+event SSH::heuristic_failed_login(c: connection) &priority=-5
+	{
+	Log::write(SSH, c$ssh);
+	}
+
 event connection_state_remove(c: connection) &priority=-5
 	{
 	if ( c?$ssh )
@ -226,7 +145,7 @@ event ssh_watcher(c: connection)
 	{
 	local id = c$id;
 	# don't go any further if this connection is gone already!
-	if ( !connection_exists(id) )
+	if ( ! connection_exists(id) )
 		return;

 	check_ssh_connection(c, F);
@ -244,5 +163,9 @@ event ssh_client_version(c: connection, version: string) &priority=5
 	{
 	set_session(c);
 	c$ssh$client = version;
-	schedule +15secs { ssh_watcher(c) };
+	
+	# The heuristic detection for SSH relies on the ConnSize analyzer.
+	# Don't do the heuristics if it's disabled.
+	if ( use_conn_size_analyzer )
+		schedule +15secs { ssh_watcher(c) };
 	}
--- a/scripts/base/utils/files.bro
+++ b/scripts/base/utils/files.bro
@ -12,4 +12,15 @@ function generate_extraction_filename(prefix: string, c: connection, suffix: str
 		conn_info = fmt("%s_%s", conn_info, suffix);
 		
 	return conn_info;
-	}
+	}
+	
+## For CONTENT-DISPOSITION headers, this function can be used to extract 
+## the filename.
+function extract_filename_from_content_disposition(data: string): string
+	{
+	local filename = sub(data, /^.*[fF][iI][lL][eE][nN][aA][mM][eE]=/, "");
+	# Remove quotes around the filename if they are there.
+	if ( /^\"/ in filename )
+		filename =  split_n(filename, /\"/, F, 2)[2];
+	return filename;
+	}
--- a/scripts/base/utils/site.bro
+++ b/scripts/base/utils/site.bro
@ -16,6 +16,12 @@ export {

 	## Networks that are considered "local".
 	const local_nets: set[subnet] &redef;
+	
+	## This is used for retrieving the subnet when you multiple 
+	## :bro:id:`local_nets`.  A membership query can be done with an 
+	## :bro:type:`addr` and the table will yield the subnet it was found 
+	## within.
+	global local_nets_table: table[subnet] of subnet = {};

 	## Networks that are considered "neighbors".
 	const neighbor_nets: set[subnet] &redef;
@ -138,4 +144,9 @@ event bro_init() &priority=10
 	# Double backslashes are needed due to string parsing.
 	local_dns_suffix_regex = set_to_regex(local_zones, "(^\\.?|\\.)(~~)$");
 	local_dns_neighbor_suffix_regex = set_to_regex(neighbor_zones, "(^\\.?|\\.)(~~)$");
+
+	# Create the local_nets mapping table.
+	for ( cidr in Site::local_nets )
+		local_nets_table[cidr] = cidr;
+
 	}