Merge remote-tracking branch 'origin/master' into topic/bernhard/hyperloglog

2025-10-10 02:28:21 +00:00 · 2013-07-30 14:31:09 -07:00 · 2013-07-30 14:31:09 -07:00 · 5b9d80e50d
commit 5b9d80e50d
parent 32c2885742 af9e181731
247 changed files with 2729 additions and 5372 deletions
--- a/scripts/base/files/extract/load.bro
+++ b/scripts/base/files/extract/load.bro
@ -0,0 +1 @@
+@load ./main
--- a/scripts/base/files/extract/main.bro
+++ b/scripts/base/files/extract/main.bro
@ -0,0 +1,38 @@
+@load base/frameworks/files
+@load base/utils/paths
+
+module FileExtract;
+
+export {
+	## The prefix where files are extracted to.
+	const prefix = "./extract_files/" &redef;
+
+	redef record Files::Info += {
+		## Local filenames of extracted file.
+		extracted: string &optional &log;
+	};
+
+	redef record Files::AnalyzerArgs += {
+		## The local filename to which to write an extracted file.
+		## This field is used in the core by the extraction plugin
+		## to know where to write the file to.  It's also optional
+		extract_filename: string &optional;
+	};
+}
+
+function on_add(f: fa_file, args: Files::AnalyzerArgs)
+	{
+	if ( ! args?$extract_filename )
+		args$extract_filename = cat("extract-", f$source, "-", f$id);
+
+	f$info$extracted = args$extract_filename;
+	args$extract_filename = build_path_compressed(prefix, args$extract_filename);
+	}
+
+event bro_init() &priority=10
+	{
+	Files::register_analyzer_add_callback(Files::ANALYZER_EXTRACT, on_add);
+
+	# Create the extraction directory.
+	mkdir(prefix);
+	}
--- a/scripts/base/files/hash/load.bro
+++ b/scripts/base/files/hash/load.bro
@ -0,0 +1 @@
+@load ./main
--- a/scripts/base/files/hash/main.bro
+++ b/scripts/base/files/hash/main.bro
@ -0,0 +1,32 @@
+@load base/frameworks/files
+
+module FileHash;
+
+export {
+	redef record Files::Info += {
+		## An MD5 digest of the file contents.
+		md5: string &log &optional;
+
+		## A SHA1 digest of the file contents.
+		sha1: string &log &optional;
+
+		## A SHA256 digest of the file contents.
+		sha256: string &log &optional;
+	};
+
+}
+
+event file_hash(f: fa_file, kind: string, hash: string) &priority=5
+	{
+	switch ( kind ) {
+	case "md5":
+		f$info$md5 = hash;
+		break;
+	case "sha1":
+		f$info$sha1 = hash;
+		break;
+	case "sha256":
+		f$info$sha256 = hash;
+		break;
+	}
+	}
--- a/scripts/base/frameworks/analyzer/main.bro
+++ b/scripts/base/frameworks/analyzer/main.bro
@ -81,6 +81,13 @@ export {
 	## Returns: The analyzer name corresponding to the tag.
 	global name: function(tag: Analyzer::Tag) : string;

+	## Translates an analyzer's name to a tag enum value.
+	##
+	## name: The analyzer name.
+	##
+	## Returns: The analyzer tag corresponding to the name.
+	global get_tag: function(name: string): Analyzer::Tag;
+
 	## Schedules an analyzer for a future connection originating from a given IP
 	## address and port.
 	##
@ -187,6 +194,11 @@ function name(atype: Analyzer::Tag) : string
 	return __name(atype);
 	}

+function get_tag(name: string): Analyzer::Tag
+	{
+	return __tag(name);
+	}
+
 function schedule_analyzer(orig: addr, resp: addr, resp_p: port,
 			   analyzer: Analyzer::Tag, tout: interval) : bool
 	{
--- a/scripts/base/frameworks/file-analysis/main.bro
+++ b/scripts/base/frameworks/file-analysis/main.bro
@ -1,261 +0,0 @@
-##! An interface for driving the analysis of files, possibly independent of
-##! any network protocol over which they're transported.
-
-@load base/bif/file_analysis.bif
-@load base/frameworks/logging
-
-module FileAnalysis;
-
-export {
-	redef enum Log::ID += {
-		## Logging stream for file analysis.
-		LOG
-	};
-
-	## A structure which represents a desired type of file analysis.
-	type AnalyzerArgs: record {
-		## The type of analysis.
-		tag: FileAnalysis::Tag;
-
-		## The local filename to which to write an extracted file.  Must be
-		## set when *tag* is :bro:see:`FileAnalysis::ANALYZER_EXTRACT`.
-		extract_filename: string &optional;
-
-		## An event which will be generated for all new file contents,
-		## chunk-wise.  Used when *tag* is
-		## :bro:see:`FileAnalysis::ANALYZER_DATA_EVENT`.
-		chunk_event: event(f: fa_file, data: string, off: count) &optional;
-
-		## An event which will be generated for all new file contents,
-		## stream-wise.  Used when *tag* is
-		## :bro:see:`FileAnalysis::ANALYZER_DATA_EVENT`.
-		stream_event: event(f: fa_file, data: string) &optional;
-	} &redef;
-
-	## Contains all metadata related to the analysis of a given file.
-	## For the most part, fields here are derived from ones of the same name
-	## in :bro:see:`fa_file`.
-	type Info: record {
-		## An identifier associated with a single file.
-		id: string &log;
-
-		## Identifier associated with a container file from which this one was
-		## extracted as part of the file analysis.
-		parent_id: string &log &optional;
-
-		## An identification of the source of the file data.  E.g. it may be
-		## a network protocol over which it was transferred, or a local file
-		## path which was read, or some other input source.
-		source: string &log &optional;
-
-		## If the source of this file is is a network connection, this field
-		## may be set to indicate the directionality.
-		is_orig: bool &log &optional;
-
-		## The time at which the last activity for the file was seen.
-		last_active: time &log;
-
-		## Number of bytes provided to the file analysis engine for the file.
-		seen_bytes: count &log &default=0;
-
-		## Total number of bytes that are supposed to comprise the full file.
-		total_bytes: count &log &optional;
-
-		## The number of bytes in the file stream that were completely missed
-		## during the process of analysis e.g. due to dropped packets.
-		missing_bytes: count &log &default=0;
-
-		## The number of not all-in-sequence bytes in the file stream that
-		## were delivered to file analyzers due to reassembly buffer overflow.
-		overflow_bytes: count &log &default=0;
-
-		## The amount of time between receiving new data for this file that
-		## the analysis engine will wait before giving up on it.
-		timeout_interval: interval &log &optional;
-
-		## The number of bytes at the beginning of a file to save for later
-		## inspection in *bof_buffer* field.
-		bof_buffer_size: count &log &optional;
-
-		## A mime type provided by libmagic against the *bof_buffer*, or
-		## in the cases where no buffering of the beginning of file occurs,
-		## an initial guess of the mime type based on the first data seen.
-		mime_type: string &log &optional;
-
-		## Whether the file analysis timed out at least once for the file.
-		timedout: bool &log &default=F;
-
-		## Connection UIDS over which the file was transferred.
-		conn_uids: set[string] &log;
-
-		## A set of analysis types done during the file analysis.
-		analyzers: set[FileAnalysis::Tag];
-
-		## Local filenames of extracted files.
-		extracted_files: set[string] &log;
-
-		## An MD5 digest of the file contents.
-		md5: string &log &optional;
-
-		## A SHA1 digest of the file contents.
-		sha1: string &log &optional;
-
-		## A SHA256 digest of the file contents.
-		sha256: string &log &optional;
-	} &redef;
-
-	## A table that can be used to disable file analysis completely for
-	## any files transferred over given network protocol analyzers.
-	const disable: table[Analyzer::Tag] of bool = table() &redef;
-
-	## Event that can be handled to access the Info record as it is sent on
-	## to the logging framework.
-	global log_file_analysis: event(rec: Info);
-
-	## The salt concatenated to unique file handle strings generated by
-	## :bro:see:`get_file_handle` before hashing them in to a file id
-	## (the *id* field of :bro:see:`fa_file`).
-	## Provided to help mitigate the possiblility of manipulating parts of
-	## network connections that factor in to the file handle in order to
-	## generate two handles that would hash to the same file id.
-	const salt = "I recommend changing this." &redef;
-
-	## Sets the *timeout_interval* field of :bro:see:`fa_file`, which is
-	## used to determine the length of inactivity that is allowed for a file
-	## before internal state related to it is cleaned up.  When used within a
-	## :bro:see:`file_timeout` handler, the analysis will delay timing out
-	## again for the period specified by *t*.
-	##
-	## f: the file.
-	##
-	## t: the amount of time the file can remain inactive before discarding.
-	##
-	## Returns: true if the timeout interval was set, or false if analysis
-	##          for the *id* isn't currently active.
-	global set_timeout_interval: function(f: fa_file, t: interval): bool;
-
-	## Adds an analyzer to the analysis of a given file.
-	##
-	## f: the file.
-	##
-	## args: the analyzer type to add along with any arguments it takes.
-	##
-	## Returns: true if the analyzer will be added, or false if analysis
-	##          for the *id* isn't currently active or the *args*
-	##          were invalid for the analyzer type.
-	global add_analyzer: function(f: fa_file, args: AnalyzerArgs): bool;
-
-	## Removes an analyzer from the analysis of a given file.
-	##
-	## f: the file.
-	##
-	## args: the analyzer (type and args) to remove.
-	##
-	## Returns: true if the analyzer will be removed, or false if analysis
-	##          for the *id* isn't currently active.
-	global remove_analyzer: function(f: fa_file, args: AnalyzerArgs): bool;
-
-	## Stops/ignores any further analysis of a given file.
-	##
-	## f: the file.
-	##
-	## Returns: true if analysis for the given file will be ignored for the
-	##          rest of it's contents, or false if analysis for the *id*
-	##          isn't currently active.
-	global stop: function(f: fa_file): bool;
-}
-
-redef record fa_file += {
-	info: Info &optional;
-};
-
-function set_info(f: fa_file)
-	{
-	if ( ! f?$info )
-		{
-		local tmp: Info;
-		f$info = tmp;
-		}
-
-	f$info$id = f$id;
-	if ( f?$parent_id ) f$info$parent_id = f$parent_id;
-	if ( f?$source ) f$info$source = f$source;
-	if ( f?$is_orig ) f$info$is_orig = f$is_orig;
-	f$info$last_active = f$last_active;
-	f$info$seen_bytes = f$seen_bytes;
-	if ( f?$total_bytes ) f$info$total_bytes = f$total_bytes;
-	f$info$missing_bytes = f$missing_bytes;
-	f$info$overflow_bytes = f$overflow_bytes;
-	f$info$timeout_interval = f$timeout_interval;
-	f$info$bof_buffer_size = f$bof_buffer_size;
-	if ( f?$mime_type ) f$info$mime_type = f$mime_type;
-	if ( f?$conns )
-		for ( cid in f$conns )
-			add f$info$conn_uids[f$conns[cid]$uid];
-	}
-
-function set_timeout_interval(f: fa_file, t: interval): bool
-	{
-	return __set_timeout_interval(f$id, t);
-	}
-
-function add_analyzer(f: fa_file, args: AnalyzerArgs): bool
-	{
-	if ( ! __add_analyzer(f$id, args) ) return F;
-
-	set_info(f);
-	add f$info$analyzers[args$tag];
-
-	if ( args$tag == FileAnalysis::ANALYZER_EXTRACT )
-		add f$info$extracted_files[args$extract_filename];
-
-	return T;
-	}
-
-function remove_analyzer(f: fa_file, args: AnalyzerArgs): bool
-	{
-	return __remove_analyzer(f$id, args);
-	}
-
-function stop(f: fa_file): bool
-	{
-	return __stop(f$id);
-	}
-
-event bro_init() &priority=5
-	{
-	Log::create_stream(FileAnalysis::LOG,
-	                   [$columns=Info, $ev=log_file_analysis]);
-	}
-
-event file_timeout(f: fa_file) &priority=5
-	{
-	set_info(f);
-	f$info$timedout = T;
-	}
-
-event file_hash(f: fa_file, kind: string, hash: string) &priority=5
-	{
-	set_info(f);
-	switch ( kind ) {
-	case "md5":
-		f$info$md5 = hash;
-		break;
-	case "sha1":
-		f$info$sha1 = hash;
-		break;
-	case "sha256":
-		f$info$sha256 = hash;
-		break;
-	}
-	}
-
-event file_state_remove(f: fa_file) &priority=5
-	{
-	set_info(f);
-	}
-
-event file_state_remove(f: fa_file) &priority=-5
-	{
-	Log::write(FileAnalysis::LOG, f$info);
-	}
--- a/scripts/base/frameworks/file-analysis/load.bro
+++ b/scripts/base/frameworks/file-analysis/load.bro
--- a/scripts/base/frameworks/files/main.bro
+++ b/scripts/base/frameworks/files/main.bro
@ -0,0 +1,371 @@
+##! An interface for driving the analysis of files, possibly independent of
+##! any network protocol over which they're transported.
+
+@load base/bif/file_analysis.bif
+@load base/frameworks/analyzer
+@load base/frameworks/logging
+@load base/utils/site
+
+module Files;
+
+export {
+	redef enum Log::ID += {
+		## Logging stream for file analysis.
+		LOG
+	};
+
+	## A structure which represents a desired type of file analysis.
+	type AnalyzerArgs: record {
+		## An event which will be generated for all new file contents,
+		## chunk-wise.  Used when *tag* is
+		## :bro:see:`Files::ANALYZER_DATA_EVENT`.
+		chunk_event: event(f: fa_file, data: string, off: count) &optional;
+
+		## An event which will be generated for all new file contents,
+		## stream-wise.  Used when *tag* is
+		## :bro:see:`Files::ANALYZER_DATA_EVENT`.
+		stream_event: event(f: fa_file, data: string) &optional;
+	} &redef;
+
+	## Contains all metadata related to the analysis of a given file.
+	## For the most part, fields here are derived from ones of the same name
+	## in :bro:see:`fa_file`.
+	type Info: record {
+		## The time when the file was first seen.
+		ts: time &log;
+
+		## An identifier associated with a single file.
+		fuid: string &log;
+
+		## If this file was transferred over a network
+		## connection this should show the host or hosts that
+		## the data sourced from.
+		tx_hosts: set[addr] &log;
+
+		## If this file was transferred over a network
+		## connection this should show the host or hosts that
+		## the data traveled to.
+		rx_hosts: set[addr] &log;
+
+		## Connection UIDS over which the file was transferred.
+		conn_uids: set[string] &log;
+
+		## An identification of the source of the file data.  E.g. it may be
+		## a network protocol over which it was transferred, or a local file
+		## path which was read, or some other input source.
+		source: string &log &optional;
+
+		## A value to represent the depth of this file in relation 
+		## to its source.  In SMTP, it is the depth of the MIME
+		## attachment on the message.  In HTTP, it is the depth of the
+		## request within the TCP connection.
+		depth: count &default=0 &log;
+
+		## A set of analysis types done during the file analysis.
+		analyzers: set[string] &log;
+
+		## A mime type provided by libmagic against the *bof_buffer*, or
+		## in the cases where no buffering of the beginning of file occurs,
+		## an initial guess of the mime type based on the first data seen.
+		mime_type: string &log &optional;
+
+		## A filename for the file if one is available from the source
+		## for the file.  These will frequently come from 
+		## "Content-Disposition" headers in network protocols.
+		filename: string &log &optional;
+
+		## The duration the file was analyzed for.
+		duration: interval &log &default=0secs;
+
+		## If the source of this file is a network connection, this field
+		## indicates if the data originated from the local network or not as
+		## determined by the configured bro:see:`Site::local_nets`.
+		local_orig: bool &log &optional;
+
+		## If the source of this file is a network connection, this field
+		## indicates if the file is being sent by the originator of the connection
+		## or the responder.
+		is_orig: bool &log &optional;
+
+		## Number of bytes provided to the file analysis engine for the file.
+		seen_bytes: count &log &default=0;
+
+		## Total number of bytes that are supposed to comprise the full file.
+		total_bytes: count &log &optional;
+
+		## The number of bytes in the file stream that were completely missed
+		## during the process of analysis e.g. due to dropped packets.
+		missing_bytes: count &log &default=0;
+
+		## The number of not all-in-sequence bytes in the file stream that
+		## were delivered to file analyzers due to reassembly buffer overflow.
+		overflow_bytes: count &log &default=0;
+
+		## Whether the file analysis timed out at least once for the file.
+		timedout: bool &log &default=F;
+
+		## Identifier associated with a container file from which this one was
+		## extracted as part of the file analysis.
+		parent_fuid: string &log &optional;
+	} &redef;
+
+	## A table that can be used to disable file analysis completely for
+	## any files transferred over given network protocol analyzers.
+	const disable: table[Files::Tag] of bool = table() &redef;
+
+	## The salt concatenated to unique file handle strings generated by
+	## :bro:see:`get_file_handle` before hashing them in to a file id
+	## (the *id* field of :bro:see:`fa_file`).
+	## Provided to help mitigate the possiblility of manipulating parts of
+	## network connections that factor in to the file handle in order to
+	## generate two handles that would hash to the same file id.
+	const salt = "I recommend changing this." &redef;
+
+	## Sets the *timeout_interval* field of :bro:see:`fa_file`, which is
+	## used to determine the length of inactivity that is allowed for a file
+	## before internal state related to it is cleaned up.  When used within a
+	## :bro:see:`file_timeout` handler, the analysis will delay timing out
+	## again for the period specified by *t*.
+	##
+	## f: the file.
+	##
+	## t: the amount of time the file can remain inactive before discarding.
+	##
+	## Returns: true if the timeout interval was set, or false if analysis
+	##          for the *id* isn't currently active.
+	global set_timeout_interval: function(f: fa_file, t: interval): bool;
+
+	## Adds an analyzer to the analysis of a given file.
+	##
+	## f: the file.
+	##
+	## tag: the analyzer type.
+	##
+	## args: any parameters the analyzer takes.
+	##
+	## Returns: true if the analyzer will be added, or false if analysis
+	##          for the *id* isn't currently active or the *args*
+	##          were invalid for the analyzer type.
+	global add_analyzer: function(f: fa_file, 
+	                              tag: Files::Tag, 
+	                              args: AnalyzerArgs &default=AnalyzerArgs()): bool;
+
+	## Removes an analyzer from the analysis of a given file.
+	##
+	## f: the file.
+	##
+	## args: the analyzer (type and args) to remove.
+	##
+	## Returns: true if the analyzer will be removed, or false if analysis
+	##          for the *id* isn't currently active.
+	global remove_analyzer: function(f: fa_file,
+	                                 tag: Files::Tag,
+	                                 args: AnalyzerArgs &default=AnalyzerArgs()): bool;
+
+	## Stops/ignores any further analysis of a given file.
+	##
+	## f: the file.
+	##
+	## Returns: true if analysis for the given file will be ignored for the
+	##          rest of it's contents, or false if analysis for the *id*
+	##          isn't currently active.
+	global stop: function(f: fa_file): bool;
+
+	## Translates an file analyzer enum value to a string with the analyzer's name.
+	##
+	## tag: The analyzer tag.
+	##
+	## Returns: The analyzer name corresponding to the tag.
+	global analyzer_name: function(tag: Files::Tag): string;
+
+	## Provides a text description regarding metadata of the file.
+	## For example, with HTTP it would return a URL.
+	##
+	## f: The file to be described.
+	##
+	## Returns a text description regarding metadata of the file.
+	global describe: function(f: fa_file): string;
+
+	type ProtoRegistration: record {
+		## A callback to generate a file handle on demand when
+		## one is needed by the core.
+		get_file_handle: function(c: connection, is_orig: bool): string;
+		
+		## A callback to "describe" a file.  In the case of an HTTP
+		## transfer the most obvious description would be the URL.
+		## It's like an extremely compressed version of the normal log.
+		describe: function(f: fa_file): string
+				&default=function(f: fa_file): string { return ""; };
+	};
+
+	## Register callbacks for protocols that work with the Files framework.  
+	## The callbacks must uniquely identify a file and each protocol can 
+	## only have a single callback registered for it.
+	## 
+	## tag: Tag for the protocol analyzer having a callback being registered.
+	##
+	## reg: A :bro:see:`ProtoRegistration` record.
+	##
+	## Returns: true if the protocol being registered was not previously registered.
+	global register_protocol: function(tag: Analyzer::Tag, reg: ProtoRegistration): bool;
+
+	## Register a callback for file analyzers to use if they need to do some manipulation
+	## when they are being added to a file before the core code takes over.  This is 
+	## unlikely to be interesting for users and should only be called by file analyzer
+	## authors but it *not required*.
+	## 
+	## tag: Tag for the file analyzer.
+	##
+	## callback: Function to execute when the given file analyzer is being added.
+	global register_analyzer_add_callback: function(tag: Files::Tag, callback: function(f: fa_file, args: AnalyzerArgs));
+
+	## Event that can be handled to access the Info record as it is sent on
+	## to the logging framework.
+	global log_files: event(rec: Info);
+}
+
+redef record fa_file += {
+	info: Info &optional;
+};
+
+redef record AnalyzerArgs += {
+	# This is used interally for the core file analyzer api.
+	tag: Files::Tag &optional;
+};
+
+# Store the callbacks for protocol analyzers that have files.
+global registered_protocols: table[Analyzer::Tag] of ProtoRegistration = table();
+
+global analyzer_add_callbacks: table[Files::Tag] of function(f: fa_file, args: AnalyzerArgs) = table();
+
+event bro_init() &priority=5
+	{
+	Log::create_stream(Files::LOG, [$columns=Info, $ev=log_files]);
+	}
+
+function set_info(f: fa_file)
+	{
+	if ( ! f?$info )
+		{
+		local tmp: Info = Info($ts=f$last_active,
+		                       $fuid=f$id);
+		f$info = tmp;
+		}
+
+	if ( f?$parent_id )
+		f$info$parent_fuid = f$parent_id;
+	if ( f?$source )
+		f$info$source = f$source;
+	f$info$duration = f$last_active - f$info$ts;
+	f$info$seen_bytes = f$seen_bytes;
+	if ( f?$total_bytes ) 
+		f$info$total_bytes = f$total_bytes;
+	f$info$missing_bytes = f$missing_bytes;
+	f$info$overflow_bytes = f$overflow_bytes;
+	if ( f?$is_orig )
+		f$info$is_orig = f$is_orig;
+	if ( f?$mime_type ) 
+		f$info$mime_type = f$mime_type;
+	}
+
+function set_timeout_interval(f: fa_file, t: interval): bool
+	{
+	return __set_timeout_interval(f$id, t);
+	}
+
+function add_analyzer(f: fa_file, tag: Files::Tag, args: AnalyzerArgs): bool
+	{
+	# This is to construct the correct args for the core API.
+	args$tag = tag;
+	add f$info$analyzers[Files::analyzer_name(tag)];
+
+	if ( tag in analyzer_add_callbacks )
+		analyzer_add_callbacks[tag](f, args);
+
+	if ( ! __add_analyzer(f$id, args) )
+		{
+		Reporter::warning(fmt("Analyzer %s not added successfully to file %s.", tag, f$id));
+		return F;
+		}
+	return T;
+	}
+
+function register_analyzer_add_callback(tag: Files::Tag, callback: function(f: fa_file, args: AnalyzerArgs))
+	{
+	analyzer_add_callbacks[tag] = callback;
+	}
+
+function remove_analyzer(f: fa_file, tag: Files::Tag, args: AnalyzerArgs): bool
+	{
+	args$tag = tag;
+	return __remove_analyzer(f$id, args);
+	}
+
+function stop(f: fa_file): bool
+	{
+	return __stop(f$id);
+	}
+
+function analyzer_name(tag: Files::Tag): string
+	{
+	return __analyzer_name(tag);
+	}
+
+event file_new(f: fa_file) &priority=10
+	{
+	set_info(f);
+	}
+
+event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=10
+	{
+	set_info(f);
+	add f$info$conn_uids[c$uid];
+	local cid = c$id;
+	add f$info$tx_hosts[f$is_orig ? cid$orig_h : cid$resp_h];
+	if( |Site::local_nets| > 0 )
+		f$info$local_orig=Site::is_local_addr(f$is_orig ? cid$orig_h : cid$resp_h);
+
+	add f$info$rx_hosts[f$is_orig ? cid$resp_h : cid$orig_h];
+	}
+
+event file_timeout(f: fa_file) &priority=10
+	{
+	set_info(f);
+	f$info$timedout = T;
+	}
+
+event file_state_remove(f: fa_file) &priority=10
+	{
+	set_info(f);
+	}
+
+event file_state_remove(f: fa_file) &priority=-10
+	{
+	Log::write(Files::LOG, f$info);
+	}
+
+function register_protocol(tag: Analyzer::Tag, reg: ProtoRegistration): bool
+	{
+	local result = (tag !in registered_protocols);
+	registered_protocols[tag] = reg;
+	return result;
+	}
+
+function describe(f: fa_file): string
+	{
+	local tag = Analyzer::get_tag(f$source);
+	if ( tag !in registered_protocols )
+		return "";
+
+	local handler = registered_protocols[tag];
+	return handler$describe(f);
+	}
+
+event get_file_handle(tag: Analyzer::Tag, c: connection, is_orig: bool) &priority=5
+	{
+	if ( tag !in registered_protocols )
+		return;
+
+	local handler = registered_protocols[tag];
+	set_file_handle(handler$get_file_handle(c, is_orig));
+	}
--- a/scripts/base/frameworks/intel/main.bro
+++ b/scripts/base/frameworks/intel/main.bro
@ -10,13 +10,14 @@ module Intel;
 export {
 	redef enum Log::ID += { LOG };
 	
-	## String data needs to be further categoried since it could represent
-	## and number of types of data.
-	type StrType: enum {
+	## Enum type to represent various types of intelligence data.
+	type Type: enum {
+		## An IP address.
+		ADDR,
 		## A complete URL without the prefix "http://".
 		URL,
-		## User-Agent string, typically HTTP or mail message body.
-		USER_AGENT,
+		## Software name.
+		SOFTWARE,
 		## Email address.
 		EMAIL,
 		## DNS domain name.
@ -44,18 +45,15 @@ export {
 	
 	## Represents a piece of intelligence.
 	type Item: record {
-		## The IP address if the intelligence is about an IP address.
-		host:        addr           &optional;
-		## The network if the intelligence is about a CIDR block.
-		net:         subnet         &optional;
-		## The string if the intelligence is about a string.
-		str:         string         &optional;
-		## The type of data that is in the string if the $str field is set.
-		str_type:    StrType        &optional;
+		## The intelligence indicator.
+		indicator:      string;
+
+		## The type of data that the indicator field represents.
+		indicator_type: Type;
 		
-		## Metadata for the item.  Typically represents more deeply \
+		## Metadata for the item.  Typically represents more deeply
 		## descriptive data for a piece of intelligence.
-		meta:        MetaData;
+		meta:           MetaData;
 	};
 	
 	## Enum to represent where data came from when it was discovered.
@ -65,23 +63,23 @@ export {
 		IN_ANYWHERE,
 	};

-	## The $host field and combination of $str and $str_type fields are mutually 
-	## exclusive.  These records *must* represent either an IP address being
-	## seen or a string being seen.
 	type Seen: record {
-		## The IP address if the data seen is an IP address.
-		host:      addr          &log &optional;
 		## The string if the data is about a string.
-		str:       string        &log &optional;
-		## The type of data that is in the string if the $str field is set.
-		str_type:  StrType       &log &optional;
+		indicator:       string        &log &optional;
+
+		## The type of data that the indicator represents.
+		indicator_type:  Type          &log &optional;
+
+		## If the indicator type was :bro:enum:`Intel::ADDR`, then this 
+		## field will be present.
+		host:            addr          &optional;

 		## Where the data was discovered.
-		where:     Where         &log;
+		where:           Where         &log;
 		
 		## If the data was discovered within a connection, the 
 		## connection record should go into get to give context to the data.
-		conn:      connection    &optional;
+		conn:            connection    &optional;
 	};

 	## Record used for the logging framework representing a positive
@ -100,7 +98,7 @@ export {
 		## Where the data was seen.
 		seen:     Seen           &log;
 		## Sources which supplied data that resulted in this match.
-		sources:  set[string]    &log;
+		sources:  set[string]    &log &default=string_set();
 	};

 	## Intelligence data manipulation functions.
@ -135,8 +133,8 @@ const have_full_data = T &redef;

 # The in memory data structure for holding intelligence.
 type DataStore: record {
-	net_data:    table[subnet] of set[MetaData];
-	string_data: table[string, StrType] of set[MetaData];
+	host_data:    table[addr] of set[MetaData];
+	string_data:  table[string, Type] of set[MetaData];
 };
 global data_store: DataStore &redef;

@ -144,8 +142,8 @@ global data_store: DataStore &redef;
 # This is primarily for workers to do the initial quick matches and store
 # a minimal amount of data for the full match to happen on the manager.
 type MinDataStore: record {
-	net_data:    set[subnet];
-	string_data: set[string, StrType];
+	host_data:    set[addr];
+	string_data:  set[string, Type];
 };
 global min_data_store: MinDataStore &redef;

@ -157,15 +155,13 @@ event bro_init() &priority=5

 function find(s: Seen): bool
 	{
-	if ( s?$host && 
-	     ((have_full_data && s$host in data_store$net_data) || 
-	      (s$host in min_data_store$net_data)))
+	if ( s?$host )
 		{
-		return T;
+		return ((s$host in min_data_store$host_data) || 
+		        (have_full_data && s$host in data_store$host_data));
 		}
-	else if ( s?$str && s?$str_type &&
-	          ((have_full_data && [s$str, s$str_type] in data_store$string_data) ||
-	           ([s$str, s$str_type] in min_data_store$string_data)))
+	else if ( ([to_lower(s$indicator), s$indicator_type] in min_data_store$string_data) ||
+	           (have_full_data && [to_lower(s$indicator), s$indicator_type] in data_store$string_data) )
 		{
 		return T;
 		}
@ -177,8 +173,7 @@ function find(s: Seen): bool

 function get_items(s: Seen): set[Item]
 	{
-	local item: Item;
-	local return_data: set[Item] = set();
+	local return_data: set[Item];

 	if ( ! have_full_data )
 		{
@ -191,26 +186,23 @@ function get_items(s: Seen): set[Item]
 	if ( s?$host )
 		{
 		# See if the host is known about and it has meta values
-		if ( s$host in data_store$net_data )
+		if ( s$host in data_store$host_data )
 			{
-			for ( m in data_store$net_data[s$host] )
+			for ( m in data_store$host_data[s$host] )
 				{
-				# TODO: the lookup should be finding all and not just most specific
-				#       and $host/$net should have the correct value.
-				item = [$host=s$host, $meta=m];
-				add return_data[item];
+				add return_data[Item($indicator=cat(s$host), $indicator_type=ADDR, $meta=m)];
 				}
 			}
 		}
-	else if ( s?$str && s?$str_type )
+	else
 		{
+		local lower_indicator = to_lower(s$indicator);
 		# See if the string is known about and it has meta values
-		if ( [s$str, s$str_type] in data_store$string_data )
+		if ( [lower_indicator, s$indicator_type] in data_store$string_data )
 			{
-			for ( m in data_store$string_data[s$str, s$str_type] )
+			for ( m in data_store$string_data[lower_indicator, s$indicator_type] )
 				{
-				item = [$str=s$str, $str_type=s$str_type, $meta=m];
-				add return_data[item];
+				add return_data[Item($indicator=s$indicator, $indicator_type=s$indicator_type, $meta=m)];
 				}
 			}
 		}
@ -222,6 +214,12 @@ function Intel::seen(s: Seen)
 	{
 	if ( find(s) )
 		{
+		if ( s?$host )
+			{
+			s$indicator = cat(s$host);
+			s$indicator_type = Intel::ADDR;
+			}
+
 		if ( have_full_data )
 			{
 			local items = get_items(s);
@ -250,8 +248,7 @@ function has_meta(check: MetaData, metas: set[MetaData]): bool

 event Intel::match(s: Seen, items: set[Item]) &priority=5
 	{
-	local empty_set: set[string] = set();
-	local info: Info = [$ts=network_time(), $seen=s, $sources=empty_set];
+	local info: Info = [$ts=network_time(), $seen=s];

 	if ( s?$conn )
 		{
@ -267,52 +264,37 @@ event Intel::match(s: Seen, items: set[Item]) &priority=5

 function insert(item: Item)
 	{
-	if ( item?$str && !item?$str_type )
-		{
-		event reporter_warning(network_time(), fmt("You must provide a str_type for strings or this item doesn't make sense.  Item: %s", item), "");
-		return;
-		}
-
 	# Create and fill out the meta data item.
 	local meta = item$meta;
 	local metas: set[MetaData];

-	if ( item?$host )
+	# All intelligence is case insensitive at the moment.
+	local lower_indicator = to_lower(item$indicator);
+
+	if ( item$indicator_type == ADDR )
 		{
-		local host = mask_addr(item$host, is_v4_addr(item$host) ? 32 : 128);
+		local host = to_addr(item$indicator);
 		if ( have_full_data )
 			{
-			if ( host !in data_store$net_data )
-				data_store$net_data[host] = set();
+			if ( host !in data_store$host_data )
+				data_store$host_data[host] = set();

-			metas = data_store$net_data[host];
+			metas = data_store$host_data[host];
 			}

-		add min_data_store$net_data[host];
+		add min_data_store$host_data[host];
 		}
-	else if ( item?$net )
+	else
 		{
 		if ( have_full_data )
 			{
-			if ( item$net !in data_store$net_data )
-				data_store$net_data[item$net] = set();
+			if ( [lower_indicator, item$indicator_type] !in data_store$string_data )
+				data_store$string_data[lower_indicator, item$indicator_type] = set();

-			metas = data_store$net_data[item$net];
+			metas = data_store$string_data[lower_indicator, item$indicator_type];
 			}

-		add min_data_store$net_data[item$net];
-		}
-	else if ( item?$str )
-		{
-		if ( have_full_data )
-			{
-			if ( [item$str, item$str_type] !in data_store$string_data )
-				data_store$string_data[item$str, item$str_type] = set();
-
-			metas = data_store$string_data[item$str, item$str_type];
-			}
-
-		add min_data_store$string_data[item$str, item$str_type];
+		add min_data_store$string_data[lower_indicator, item$indicator_type];
 		}

 	local updated = F;
--- a/scripts/base/frameworks/notice/main.bro
+++ b/scripts/base/frameworks/notice/main.bro
@ -68,6 +68,25 @@ export {
 		## the notice policy.
 		iconn:          icmp_conn      &optional;

+		## A file record if the notice is relted to a file.  The
+		## reference to the actual fa_file record will be deleted after applying
+		## the notice policy.
+		f:              fa_file         &optional;
+
+		## A file unique ID if this notice is related to a file.  If the $f
+		## field is provided, this will be automatically filled out.
+		fuid:           string          &log &optional;
+
+		## A mime type if the notice is related to a file.  If the $f field
+		## is provided, this will be automatically filled out.
+		file_mime_type: string          &log &optional;
+
+		## Frequently files can be "described" to give a bit more context.
+		## This field will typically be automatically filled out from an
+		## fa_file record.  For example, if a notice was related to a
+		## file over HTTP, the URL of the request would be shown.
+		file_desc:      string          &log &optional;
+
 		## The transport protocol. Filled automatically when either conn, iconn
 		## or p is specified.
 		proto:          transport_proto &log &optional;
@ -460,10 +479,28 @@ function apply_policy(n: Notice::Info)
 	if ( ! n?$ts )
 		n$ts = network_time();

+	if ( n?$f )
+		{
+		if ( ! n?$fuid )
+			n$fuid = n$f$id;
+
+		if ( ! n?$file_mime_type && n$f?$mime_type )
+			n$file_mime_type = n$f$mime_type;
+
+		n$file_desc = Files::describe(n$f);
+
+		if ( n$f?$conns && |n$f$conns| == 1 )
+			{
+			for ( id in n$f$conns )
+				n$conn = n$f$conns[id];
+			}
+		}
+
 	if ( n?$conn )
 		{
 		if ( ! n?$id )
 			n$id = n$conn$id;
+
 		if ( ! n?$uid )
 			n$uid = n$conn$uid;
 		}
@ -513,13 +550,15 @@ function apply_policy(n: Notice::Info)
 	if ( ! n?$suppress_for )
 		n$suppress_for = default_suppression_interval;

-	# Delete the connection record if it's there so we aren't sending that
-	# to remote machines.  It can cause problems due to the size of the
-	# connection record.
+	# Delete the connection and file records if they're there so we
+	# aren't sending that to remote machines.  It can cause problems
+	# due to the size of those records.
 	if ( n?$conn )
 		delete n$conn;
 	if ( n?$iconn )
 		delete n$iconn;
+	if ( n?$f )
+		delete n$f;
 	}

 function internal_NOTICE(n: Notice::Info)
--- a/scripts/base/init-bare.bro
+++ b/scripts/base/init-bare.bro
@ -328,7 +328,7 @@ type fa_file: record {
 	## An identification of the source of the file data.  E.g. it may be
 	## a network protocol over which it was transferred, or a local file
 	## path which was read, or some other input source.
-	source: string &optional;
+	source: string;

 	## If the source of this file is is a network connection, this field
 	## may be set to indicate the directionality.
@ -3049,6 +3049,6 @@ const snaplen = 8192 &redef;
@load base/frameworks/logging
@load base/frameworks/input
@load base/frameworks/analyzer
-@load base/frameworks/file-analysis
+@load base/frameworks/files

@load base/bif
--- a/scripts/base/init-default.bro
+++ b/scripts/base/init-default.bro
@ -5,9 +5,12 @@
 ##! you actually want.

@load base/utils/site
+@load base/utils/active-http
@load base/utils/addrs
@load base/utils/conn-ids
+@load base/utils/dir
@load base/utils/directions-and-hosts
+@load base/utils/exec
@load base/utils/files
@load base/utils/numbers
@load base/utils/paths
@ -49,4 +52,7 @@
@load base/protocols/syslog
@load base/protocols/tunnels

+@load base/files/hash
+@load base/files/extract
+
@load base/misc/find-checksum-offloading
--- a/scripts/base/protocols/ftp/load.bro
+++ b/scripts/base/protocols/ftp/load.bro
@ -1,7 +1,7 @@
@load ./utils-commands
@load ./main
-@load ./file-analysis
-@load ./file-extract
+@load ./utils
+@load ./files
@load ./gridftp

-@load-sigs ./dpd.sig
+@load-sigs ./dpd.sig
--- a/scripts/base/protocols/ftp/file-analysis.bro
+++ b/scripts/base/protocols/ftp/file-analysis.bro
@ -1,48 +0,0 @@
-@load ./main
-@load base/utils/conn-ids
-@load base/frameworks/file-analysis/main
-
-module FTP;
-
-export {
-	## Default file handle provider for FTP.
-	global get_file_handle: function(c: connection, is_orig: bool): string;
-}
-
-function get_handle_string(c: connection): string
-	{
-	return cat(Analyzer::ANALYZER_FTP_DATA, " ", c$start_time, " ", id_string(c$id));
-	}
-
-function get_file_handle(c: connection, is_orig: bool): string
-	{
-	if ( [c$id$resp_h, c$id$resp_p] !in ftp_data_expected ) return "";
-
-	local info: FTP::Info = ftp_data_expected[c$id$resp_h, c$id$resp_p];
-
-	if ( info$passive )
-		# FTP client initiates data channel.
-		if ( is_orig )
-			# Don't care about FTP client data.
-			return "";
-		else
-			# Do care about FTP server data.
-			return get_handle_string(c);
-	else
-		# FTP server initiates dta channel.
-		if ( is_orig )
-			# Do care about FTP server data.
-			return get_handle_string(c);
-		else
-			# Don't care about FTP client data.
-			return "";
-	}
-
-module GLOBAL;
-
-event get_file_handle(tag: Analyzer::Tag, c: connection, is_orig: bool)
-	&priority=5
-	{
-	if ( tag != Analyzer::ANALYZER_FTP_DATA ) return;
-	set_file_handle(FTP::get_file_handle(c, is_orig));
-	}
--- a/scripts/base/protocols/ftp/file-extract.bro
+++ b/scripts/base/protocols/ftp/file-extract.bro
@ -1,90 +0,0 @@
-##! File extraction support for FTP.
-
-@load ./main
-@load base/utils/files
-
-module FTP;
-
-export {
-	## Pattern of file mime types to extract from FTP transfers.
-	const extract_file_types = /NO_DEFAULT/ &redef;
-
-	## The on-disk prefix for files to be extracted from FTP-data transfers.
-	const extraction_prefix = "ftp-item" &redef;
-}
-
-redef record Info += {
-	## On disk file where it was extracted to.
-	extraction_file:       string &log &optional;
-	
-	## Indicates if the current command/response pair should attempt to 
-	## extract the file if a file was transferred.
-	extract_file:          bool &default=F;
-};
-
-function get_extraction_name(f: fa_file): string
-	{
-	local r = fmt("%s-%s.dat", extraction_prefix, f$id);
-	return r;
-	}
-
-event file_new(f: fa_file) &priority=5
-	{
-	if ( ! f?$source ) return;
-	if ( f$source != "FTP_DATA" ) return;
-
-	if ( f?$mime_type && extract_file_types in f$mime_type )
-		{
-		FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
-		                           $extract_filename=get_extraction_name(f)]);
-		return;
-		}
-
-	if ( ! f?$conns ) return;
-
-	for ( cid in f$conns )
-		{
-		local c: connection = f$conns[cid];
-
-		if ( [cid$resp_h, cid$resp_p] !in ftp_data_expected ) next;
-
-		local s = ftp_data_expected[cid$resp_h, cid$resp_p];
-
-		if ( ! s$extract_file ) next;
-
-		FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
-		                           $extract_filename=get_extraction_name(f)]);
-		return;
-		}
-	}
-
-event file_state_remove(f: fa_file) &priority=4
-	{
-	if ( ! f?$source ) return;
-	if ( f$source != "FTP_DATA" ) return;
-	if ( ! f?$info ) return;
-
-	for ( filename in f$info$extracted_files )
-		{
-		local s: FTP::Info;
-		s$ts = network_time();
-		s$tags = set();
-		s$user = "<ftp-data>";
-		s$extraction_file = filename;
-
-		if ( f?$conns )
-			for ( cid in f$conns )
-				{
-				s$uid = f$conns[cid]$uid;
-				s$id = cid;
-				}
-
-		Log::write(FTP::LOG, s);
-		}
-	}
-
-event log_ftp(rec: Info) &priority=-10
-	{
-	delete rec$extraction_file;
-	delete rec$extract_file;
-	}
--- a/scripts/base/protocols/ftp/files.bro
+++ b/scripts/base/protocols/ftp/files.bro
@ -0,0 +1,60 @@
+@load ./main
+@load ./utils
+@load base/utils/conn-ids
+@load base/frameworks/files
+
+module FTP;
+
+export {
+	redef record Info += {
+		## File unique ID.
+		fuid: string &optional &log;
+	};
+
+	## Default file handle provider for FTP.
+	global get_file_handle: function(c: connection, is_orig: bool): string;
+
+	## Describe the file being transferred.
+	global describe_file: function(f: fa_file): string;
+}
+
+function get_file_handle(c: connection, is_orig: bool): string
+	{
+	if ( [c$id$resp_h, c$id$resp_p] !in ftp_data_expected ) 
+		return "";
+
+	return cat(Analyzer::ANALYZER_FTP_DATA, c$start_time, c$id, is_orig);
+	}
+
+function describe_file(f: fa_file): string
+	{
+	# This shouldn't be needed, but just in case...
+	if ( f$source != "FTP" )
+		return "";
+
+	for ( cid in f$conns )
+		{
+		if ( f$conns[cid]?$ftp )
+			return FTP::describe(f$conns[cid]$ftp);
+		}
+	return "";
+	}
+
+event bro_init() &priority=5
+	{
+	Files::register_protocol(Analyzer::ANALYZER_FTP_DATA,
+	                         [$get_file_handle = FTP::get_file_handle,
+	                          $describe        = FTP::describe_file]);
+	}
+
+
+event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5
+	{
+	if ( [c$id$resp_h, c$id$resp_p] !in ftp_data_expected ) 
+		return;
+
+	local ftp = ftp_data_expected[c$id$resp_h, c$id$resp_p];
+	ftp$fuid = f$id;
+	if ( f?$mime_type )
+		ftp$mime_type = f$mime_type;
+	}
--- a/scripts/base/protocols/ftp/main.bro
+++ b/scripts/base/protocols/ftp/main.bro
@ -63,8 +63,6 @@ export {
 		reply_code:       count       &log &optional;
 		## Reply message from the server in response to the command.
 		reply_msg:        string      &log &optional;
-		## Arbitrary tags that may indicate a particular attribute of this command.
-		tags:             set[string] &log;

 		## Expected FTP data channel.
 		data_channel:     ExpectedDataChannel &log &optional;
@ -104,6 +102,8 @@ export {
 	global log_ftp: event(rec: Info);
 }

+@load ./utils
+
 # Add the state tracking information variable to the connection record
 redef record connection += {
 	ftp: Info &optional;
@ -171,37 +171,26 @@ function set_ftp_session(c: connection)

 function ftp_message(s: Info)
 	{
-	# If it either has a tag associated with it (something detected)
-	# or it's a deliberately logged command.
-	if ( |s$tags| > 0 || (s?$cmdarg && s$cmdarg$cmd in logged_commands) )
+	s$ts=s$cmdarg$ts;
+	s$command=s$cmdarg$cmd;
+
+	s$arg = s$cmdarg$arg;
+	if ( s$cmdarg$cmd in file_cmds )
+		s$arg = build_url_ftp(s);
+
+	if ( s$arg == "" )
+		delete s$arg;
+
+	if ( s?$password &&
+	     ! s$capture_password &&
+	     to_lower(s$user) !in guest_ids )
 		{
-		if ( s?$password &&
-		     ! s$capture_password &&
-		     to_lower(s$user) !in guest_ids )
-			{
-			s$password = "<hidden>";
-			}
-
-		local arg = s$cmdarg$arg;
-		if ( s$cmdarg$cmd in file_cmds )
-			{
-			local comp_path = build_path_compressed(s$cwd, arg);
-			if ( comp_path[0] != "/" )
-				comp_path = cat("/", comp_path);
-
-			arg = fmt("ftp://%s%s", addr_to_uri(s$id$resp_h), comp_path);
-			}
-
-		s$ts=s$cmdarg$ts;
-		s$command=s$cmdarg$cmd;
-		if ( arg == "" )
-			delete s$arg;
-		else
-			s$arg=arg;
-
-		Log::write(FTP::LOG, s);
+		s$password = "<hidden>";
 		}

+	if ( s?$cmdarg && s$command in logged_commands)
+		Log::write(FTP::LOG, s);
+
 	# The MIME and file_size fields are specific to file transfer commands
 	# and may not be used in all commands so they need reset to "blank"
 	# values after logging.
@ -209,8 +198,6 @@ function ftp_message(s: Info)
 	delete s$file_size;
 	# Same with data channel.
 	delete s$data_channel;
-	# Tags are cleared everytime too.
-	s$tags = set();
 	}

 function add_expected_data_channel(s: Info, chan: ExpectedDataChannel)
@ -218,8 +205,9 @@ function add_expected_data_channel(s: Info, chan: ExpectedDataChannel)
 	s$passive = chan$passive;
 	s$data_channel = chan;
 	ftp_data_expected[chan$resp_h, chan$resp_p] = s;
-	Analyzer::schedule_analyzer(chan$orig_h, chan$resp_h, chan$resp_p, Analyzer::ANALYZER_FTP_DATA,
-				    5mins);
+	Analyzer::schedule_analyzer(chan$orig_h, chan$resp_h, chan$resp_p,
+	                            Analyzer::ANALYZER_FTP_DATA,
+	                            5mins);
 	}

 event ftp_request(c: connection, command: string, arg: string) &priority=5
--- a/scripts/base/protocols/ftp/utils.bro
+++ b/scripts/base/protocols/ftp/utils.bro
@ -0,0 +1,47 @@
+##! Utilities specific for FTP processing.
+
+@load ./main
+@load base/utils/addrs
+
+module FTP;
+
+export {
+	## Creates a URL from an :bro:type:`FTP::Info` record.
+	##
+	## rec: An :bro:type:`FTP::Info` record.
+	##
+	## Returns: A URL, not prefixed by "ftp://".
+	global build_url: function(rec: Info): string;
+
+	## Creates a URL from an :bro:type:`FTP::Info` record.
+	##
+	## rec: An :bro:type:`FTP::Info` record.
+	##
+	## Returns: A URL prefixed with "ftp://".
+	global build_url_ftp: function(rec: Info): string;
+
+	## Create an extremely shortened representation of a log line.
+	global describe: function(rec: Info): string;
+}
+
+function build_url(rec: Info): string
+	{
+	if ( !rec?$arg )
+		return "";
+
+	local comp_path = build_path_compressed(rec$cwd, rec$arg);
+	if ( comp_path[0] != "/" )
+		comp_path = cat("/", comp_path);
+
+	return fmt("%s%s", addr_to_uri(rec$id$resp_h), comp_path);
+	}
+
+function build_url_ftp(rec: Info): string
+	{
+	return fmt("ftp://%s", build_url(rec));
+	}
+
+function describe(rec: Info): string
+	{
+	return build_url_ftp(rec);
+	}
--- a/scripts/base/protocols/http/load.bro
+++ b/scripts/base/protocols/http/load.bro
@ -1,8 +1,6 @@
@load ./main
+@load ./entities
@load ./utils
-@load ./file-analysis
-@load ./file-ident
-@load ./file-hash
-@load ./file-extract
+@load ./files

@load-sigs ./dpd.sig
--- a/scripts/base/protocols/http/entities.bro
+++ b/scripts/base/protocols/http/entities.bro
@ -0,0 +1,109 @@
+##! Analysis and logging for MIME entities found in HTTP sessions.
+
+@load base/frameworks/files
+@load base/utils/strings
+@load base/utils/files
+@load ./main
+
+module HTTP;
+
+export {
+	type Entity: record {
+		## Filename for the entity if discovered from a header.
+		filename: string &optional;
+	};
+
+	redef record Info += {
+		## An ordered vector of file unique IDs.
+		orig_fuids:      vector of string &log &optional;
+
+		## An ordered vector of mime types.
+		orig_mime_types: vector of string &log &optional;
+
+		## An ordered vector of file unique IDs.
+		resp_fuids:      vector of string &log &optional;
+
+		## An ordered vector of mime types.
+		resp_mime_types: vector of string &log &optional;
+
+		## The current entity.
+		current_entity:  Entity           &optional;
+		## Current number of MIME entities in the HTTP request message body.
+		orig_mime_depth: count            &default=0;
+		## Current number of MIME entities in the HTTP response message body.
+		resp_mime_depth: count            &default=0;
+	};
+}
+
+event http_begin_entity(c: connection, is_orig: bool) &priority=10
+	{
+	set_state(c, F, is_orig);
+
+	if ( is_orig )
+		++c$http$orig_mime_depth;
+	else
+		++c$http$resp_mime_depth;
+
+	c$http$current_entity = Entity();
+	}
+
+event http_header(c: connection, is_orig: bool, name: string, value: string) &priority=3
+	{
+	if ( name == "CONTENT-DISPOSITION" &&
+	     /[fF][iI][lL][eE][nN][aA][mM][eE]/ in value )
+		{
+		c$http$current_entity$filename = extract_filename_from_content_disposition(value);
+		}
+	else if ( name == "CONTENT-TYPE" &&
+	          /[nN][aA][mM][eE][:blank:]*=/ in value )
+		{
+		c$http$current_entity$filename = extract_filename_from_content_disposition(value);
+		}
+	}
+
+event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5
+	{
+	if ( f$source == "HTTP" && c?$http ) 
+		{
+		if ( c$http?$current_entity && c$http$current_entity?$filename )
+			f$info$filename = c$http$current_entity$filename;
+
+		if ( f$is_orig )
+			{
+			if ( ! c$http?$orig_mime_types )
+				c$http$orig_fuids = string_vec(f$id);
+			else
+				c$http$orig_fuids[|c$http$orig_fuids|] = f$id;
+
+			if ( f?$mime_type )
+				{
+				if ( ! c$http?$orig_mime_types )
+					c$http$orig_mime_types = string_vec(f$mime_type);
+				else
+					c$http$orig_mime_types[|c$http$orig_mime_types|] = f$mime_type;
+				}
+			}
+		else
+			{
+			if ( ! c$http?$resp_mime_types )
+				c$http$resp_fuids = string_vec(f$id);
+			else
+				c$http$resp_fuids[|c$http$resp_fuids|] = f$id;
+
+			if ( f?$mime_type )
+				{
+				if ( ! c$http?$resp_mime_types )
+					c$http$resp_mime_types = string_vec(f$mime_type);
+				else
+					c$http$resp_mime_types[|c$http$resp_mime_types|] = f$mime_type;
+				}
+			}
+		}
+
+	}
+
+event http_end_entity(c: connection, is_orig: bool) &priority=5
+	{
+	if ( c?$http && c$http?$current_entity ) 
+		delete c$http$current_entity;
+	}
--- a/scripts/base/protocols/http/file-analysis.bro
+++ b/scripts/base/protocols/http/file-analysis.bro
@ -1,54 +0,0 @@
-@load ./main
-@load ./utils
-@load base/utils/conn-ids
-@load base/frameworks/file-analysis/main
-
-module HTTP;
-
-export {
-	redef record HTTP::Info += {
-		## Number of MIME entities in the HTTP request message body so far.
-		request_mime_level: count &default=0;
-		## Number of MIME entities in the HTTP response message body so far.
-		response_mime_level: count &default=0;
-	};
-
-	## Default file handle provider for HTTP.
-	global get_file_handle: function(c: connection, is_orig: bool): string;
-}
-
-event http_begin_entity(c: connection, is_orig: bool) &priority=5
-	{
-	if ( ! c?$http )
-		return;
-
-	if ( is_orig )
-		++c$http$request_mime_level;
-	else
-		++c$http$response_mime_level;
-	}
-
-function get_file_handle(c: connection, is_orig: bool): string
-	{
-	if ( ! c?$http ) return "";
-
-	local mime_level: count =
-	        is_orig ? c$http$request_mime_level : c$http$response_mime_level;
-	local mime_level_str: string = mime_level > 1 ? cat(mime_level) : "";
-
-	if ( c$http$range_request )
-		return cat(Analyzer::ANALYZER_HTTP, " ", is_orig, " ", c$id$orig_h, " ",
-		           build_url(c$http));
-
-	return cat(Analyzer::ANALYZER_HTTP, " ", c$start_time, " ", is_orig, " ",
-	           c$http$trans_depth, mime_level_str, " ", id_string(c$id));
-	}
-
-module GLOBAL;
-
-event get_file_handle(tag: Analyzer::Tag, c: connection, is_orig: bool)
-	&priority=5
-	{
-	if ( tag != Analyzer::ANALYZER_HTTP ) return;
-	set_file_handle(HTTP::get_file_handle(c, is_orig));
-	}
--- a/scripts/base/protocols/http/file-extract.bro
+++ b/scripts/base/protocols/http/file-extract.bro
@ -1,100 +0,0 @@
-##! Extracts the items from HTTP traffic, one per file.  At this time only 
-##! the message body from the server can be extracted with this script.
-
-@load ./main
-@load ./file-analysis
-
-module HTTP;
-
-export {
-	## Pattern of file mime types to extract from HTTP response entity bodies.
-	const extract_file_types = /NO_DEFAULT/ &redef;
-
-	## The on-disk prefix for files to be extracted from HTTP entity bodies.
-	const extraction_prefix = "http-item" &redef;
-
-	redef record Info += {
-		## On-disk location where files in request body were extracted.
-		extracted_request_files: vector of string &log &optional;
-
-		## On-disk location where files in response body were extracted.
-		extracted_response_files: vector of string &log &optional;
-		
-		## Indicates if the response body is to be extracted or not.  Must be 
-		## set before or by the first :bro:see:`file_new` for the file content.
-		extract_file:     bool &default=F;
-	};
-}
-
-function get_extraction_name(f: fa_file): string
-	{
-	local r = fmt("%s-%s.dat", extraction_prefix, f$id);
-	return r;
-	}
-
-function add_extraction_file(c: connection, is_orig: bool, fn: string)
-	{
-	if ( is_orig )
-		{
-		if ( ! c$http?$extracted_request_files )
-			c$http$extracted_request_files = vector();
-		c$http$extracted_request_files[|c$http$extracted_request_files|] = fn;
-		}
-	else
-		{
-		if ( ! c$http?$extracted_response_files )
-			c$http$extracted_response_files = vector();
-		c$http$extracted_response_files[|c$http$extracted_response_files|] = fn;
-		}
-	}
-
-event file_new(f: fa_file) &priority=5
-	{
-	if ( ! f?$source ) return;
-	if ( f$source != "HTTP" ) return;
-	if ( ! f?$conns ) return;
-
-	local fname: string;
-	local c: connection;
-
-	if ( f?$mime_type && extract_file_types in f$mime_type )
-		{
-		fname = get_extraction_name(f);
-		FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
-		                               $extract_filename=fname]);
-
-		for ( cid in f$conns )
-			{
-			c = f$conns[cid];
-			if ( ! c?$http ) next;
-			add_extraction_file(c, f$is_orig, fname);
-			}
-
-		return;
-		}
-
-	local extracting: bool = F;
-
-	for ( cid in f$conns )
-		{
-		c = f$conns[cid];
-
-		if ( ! c?$http ) next;
-
-		if ( ! c$http$extract_file ) next;
-
-		fname = get_extraction_name(f);
-		FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
-		                               $extract_filename=fname]);
-		extracting = T;
-		break;
-		}
-
-	if ( extracting )
-		for ( cid in f$conns )
-			{
-			c = f$conns[cid];
-			if ( ! c?$http ) next;
-			add_extraction_file(c, f$is_orig, fname);
-			}
-	}
--- a/scripts/base/protocols/http/file-hash.bro
+++ b/scripts/base/protocols/http/file-hash.bro
@ -1,68 +0,0 @@
-##! Calculate hashes for HTTP body transfers.
-
-@load ./main
-@load ./file-analysis
-
-module HTTP;
-
-export {
-	redef record Info += {
-		## MD5 sum for a file transferred over HTTP calculated from the 
-		## response body.
-		md5:             string     &log &optional;
-		
-		## This value can be set per-transfer to determine per request
-		## if a file should have an MD5 sum generated.  It must be
-		## set to T at the time of or before the first chunk of body data.
-		calc_md5:        bool       &default=F;
-	};
-	
-	## Generate MD5 sums for these filetypes.
-	const generate_md5 = /application\/x-dosexec/    # Windows and DOS executables
-	                   | /application\/x-executable/ # *NIX executable binary
-	                   &redef;
-}
-
-event file_new(f: fa_file) &priority=5
-	{
-	if ( ! f?$source ) return;
-	if ( f$source != "HTTP" ) return;
-
-	if ( f?$mime_type && generate_md5 in f$mime_type )
-		{
-		FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
-		return;
-		}
-
-	if ( ! f?$conns ) return;
-
-	for ( cid in f$conns )
-		{
-		local c: connection = f$conns[cid];
-
-		if ( ! c?$http ) next;
-
-		if ( ! c$http$calc_md5 ) next;
-
-		FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
-		return;
-		}
-	}
-
-event file_state_remove(f: fa_file) &priority=4
-	{
-	if ( ! f?$source ) return;
-	if ( f$source != "HTTP" ) return;
-	if ( ! f?$conns ) return;
-	if ( ! f?$info ) return;
-	if ( ! f$info?$md5 ) return;
-
-	for ( cid in f$conns )
-		{
-		local c: connection = f$conns[cid];
-
-		if ( ! c?$http ) next;
-
-		c$http$md5 = f$info$md5;
-		}
-	}
--- a/scripts/base/protocols/http/file-ident.bro
+++ b/scripts/base/protocols/http/file-ident.bro
@ -1,105 +0,0 @@
-##! Identification of file types in HTTP response bodies with file content sniffing.
-
-@load base/frameworks/notice
-@load ./main
-@load ./utils
-@load ./file-analysis
-
-module HTTP;
-
-export {
-	redef enum Notice::Type += {
-		## Indicates when the file extension doesn't seem to match the file
-		## contents.
-		Incorrect_File_Type,
-	};
-
-	redef record Info += {
-		## Mime type of response body identified by content sniffing.
-		mime_type:    string   &log &optional;
-	};
-	
-	## Mapping between mime type strings (without character set) and
-	## regular expressions for URLs.
-	## The :bro:enum:`HTTP::Incorrect_File_Type` notice is generated if the
-	## pattern doesn't match the mime type that was discovered.
-	const mime_types_extensions: table[string] of pattern = {
-		["application/x-dosexec"] = /\.([eE][xX][eE]|[dD][lL][lL])/,
-	} &redef;
-	
-	## A pattern for filtering out :bro:enum:`HTTP::Incorrect_File_Type` urls
-	## that are not noteworthy before a notice is created.  Each
-	## pattern added should match the complete URL (the matched URLs include
-	## "http://" at the beginning).
-	const ignored_incorrect_file_type_urls = /^$/ &redef;
-}
-
-event file_new(f: fa_file) &priority=5
-	{
-	if ( ! f?$source ) return;
-	if ( f$source != "HTTP" ) return;
-	if ( ! f?$mime_type ) return;
-	if ( ! f?$conns ) return;
-
-	for ( cid in f$conns )
-		{
-		local c: connection = f$conns[cid];
-
-		if ( ! c?$http ) next;
-
-		c$http$mime_type = f$mime_type;
-
-		local mime_str: string = c$http$mime_type;
-
-		if ( mime_str !in mime_types_extensions ) next;
-		if ( ! c$http?$uri ) next;
-		if ( mime_types_extensions[mime_str] in c$http$uri ) next;
-
-		local url = build_url_http(c$http);
-
-		if ( url == ignored_incorrect_file_type_urls ) next;
-
-		local message = fmt("%s %s %s", mime_str, c$http$method, url);
-		NOTICE([$note=Incorrect_File_Type,
-		        $msg=message,
-		        $conn=c]);
-		}
-	}
-
-event file_over_new_connection(f: fa_file, c: connection) &priority=5
-	{
-	if ( ! f?$source ) return;
-	if ( f$source != "HTTP" ) return;
-	if ( ! f?$mime_type ) return;
-	if ( ! c?$http ) return;
-
-	# Spread the mime around (e.g. for partial content, file_type event only
-	# happens once for the first connection, but if there's subsequent
-	# connections to transfer the same file, they'll be lacking the mime_type
-	# field if we don't do this).
-	c$http$mime_type = f$mime_type;
-	}
-
-# Tracks byte-range request / partial content response mime types, indexed
-# by [connection, uri] pairs.  This is needed because a person can pipeline
-# byte-range requests over multiple connections to the same uri.  Without
-# the tracking, only the first request in the pipeline for each connection
-# would get a mime_type field assigned to it (by the FileAnalysis policy hooks).
-global partial_types: table[conn_id, string] of string &read_expire=5mins;
-
-# Priority 4 so that it runs before the handler that will write to http.log.
-event http_message_done(c: connection, is_orig: bool, stat: http_message_stat)
-	&priority=4
-	{
-	if ( ! c$http$range_request ) return;
-	if ( ! c$http?$uri ) return;
-
-	if ( c$http?$mime_type )
-		{
-		partial_types[c$id, c$http$uri] = c$http$mime_type;
-		return;
-		}
-
-	if ( [c$id, c$http$uri] in partial_types )
-		c$http$mime_type = partial_types[c$id, c$http$uri];
-	}
--- a/scripts/base/protocols/http/files.bro
+++ b/scripts/base/protocols/http/files.bro
@ -0,0 +1,56 @@
+@load ./main
+@load ./entities
+@load ./utils
+@load base/utils/conn-ids
+@load base/frameworks/files
+
+module HTTP;
+
+export {
+	## Default file handle provider for HTTP.
+	global get_file_handle: function(c: connection, is_orig: bool): string;
+
+	## Default file describer for HTTP.
+	global describe_file: function(f: fa_file): string;
+}
+
+function get_file_handle(c: connection, is_orig: bool): string
+	{
+	if ( ! c?$http )
+		return "";
+
+	if ( c$http$range_request && ! is_orig )
+		{
+		# Any multipart responses from the server are pieces of same file
+		# that correspond to range requests, so don't use mime depth to
+		# identify the file.
+		return cat(Analyzer::ANALYZER_HTTP, is_orig, c$id$orig_h, build_url(c$http));
+		}
+	else
+		{
+		local mime_depth = is_orig ? c$http$orig_mime_depth : c$http$resp_mime_depth;
+		return cat(Analyzer::ANALYZER_HTTP, c$start_time, is_orig,
+		           c$http$trans_depth, mime_depth, id_string(c$id));
+		}
+	}
+
+function describe_file(f: fa_file): string
+	{
+	# This shouldn't be needed, but just in case...
+	if ( f$source != "HTTP" )
+		return "";
+
+	for ( cid in f$conns )
+		{
+		if ( f$conns[cid]?$http )
+			return build_url_http(f$conns[cid]$http);
+		}
+	return "";
+	}
+
+event bro_init() &priority=5
+	{
+	Files::register_protocol(Analyzer::ANALYZER_HTTP,
+	                         [$get_file_handle = HTTP::get_file_handle,
+	                          $describe        = HTTP::describe_file]);
+	}
--- a/scripts/base/protocols/http/main.bro
+++ b/scripts/base/protocols/http/main.bro
@ -1,5 +1,5 @@
-##! Implements base functionality for HTTP analysis.  The logging model is 
-##! to log request/response pairs and all relevant metadata together in 
+##! Implements base functionality for HTTP analysis.  The logging model is
+##! to log request/response pairs and all relevant metadata together in
 ##! a single record.

@load base/utils/numbers
@ -15,10 +15,10 @@ export {
 		## Placeholder.
 		EMPTY
 	};
-	
+
 	## This setting changes if passwords used in Basic-Auth are captured or not.
 	const default_capture_password = F &redef;
-	
+
 	type Info: record {
 		## Timestamp for when the request happened.
 		ts:                      time      &log;
@ -26,7 +26,7 @@ export {
 		uid:                     string    &log;
 		## The connection's 4-tuple of endpoint addresses/ports.
 		id:                      conn_id   &log;
-		## Represents the pipelined depth into the connection of this 
+		## Represents the pipelined depth into the connection of this
 		## request/response transaction.
 		trans_depth:             count     &log;
 		## Verb used in the HTTP request (GET, POST, HEAD, etc.).
@ -60,24 +60,24 @@ export {
 		## A set of indicators of various attributes discovered and
 		## related to a particular request/response pair.
 		tags:                    set[Tags] &log;
-		
+
 		## Username if basic-auth is performed for the request.
 		username:                string    &log &optional;
 		## Password if basic-auth is performed for the request.
 		password:                string    &log &optional;
-		
+
 		## Determines if the password will be captured for this request.
 		capture_password:        bool      &default=default_capture_password;
-		
+
 		## All of the headers that may indicate if the request was proxied.
 		proxied:                 set[string] &log &optional;

 		## Indicates if this request can assume 206 partial content in
 		## response.
-		range_request:           bool &default=F;
+		range_request:           bool      &default=F;
 	};
-	
-	## Structure to maintain state for an HTTP connection with multiple 
+
+	## Structure to maintain state for an HTTP connection with multiple
 	## requests and responses.
 	type State: record {
 		## Pending requests.
@ -87,7 +87,7 @@ export {
 		## Current response in the pending queue.
 		current_response: count                &default=0;
 	};
-		
+
 	## A list of HTTP headers typically used to indicate proxied requests.
 	const proxy_headers: set[string] = {
 		"FORWARDED",
@ -100,8 +100,8 @@ export {
 	} &redef;

 	## A list of HTTP methods. Other methods will generate a weird. Note
-        ## that the HTTP analyzer will only accept methods consisting solely
-        ## of letters ``[A-Za-z]``.
+	## that the HTTP analyzer will only accept methods consisting solely
+	## of letters ``[A-Za-z]``.
 	const http_methods: set[string] = {
 		"GET", "POST", "HEAD", "OPTIONS",
 		"PUT", "DELETE", "TRACE", "CONNECT",
@ -111,8 +111,8 @@ export {
 		"POLL", "REPORT", "SUBSCRIBE", "BMOVE",
 		"SEARCH"
 	} &redef;
-	
-	## Event that can be handled to access the HTTP record as it is sent on 
+
+	## Event that can be handled to access the HTTP record as it is sent on
 	## to the logging framework.
 	global log_http: event(rec: Info);
 }
@ -147,12 +147,12 @@ function new_http_session(c: connection): Info
 	tmp$ts=network_time();
 	tmp$uid=c$uid;
 	tmp$id=c$id;
-	# $current_request is set prior to the Info record creation so we 
+	# $current_request is set prior to the Info record creation so we
 	# can use the value directly here.
 	tmp$trans_depth = c$http_state$current_request;
 	return tmp;
 	}
-	
+
 function set_state(c: connection, request: bool, is_orig: bool)
 	{
 	if ( ! c?$http_state )
@ -160,19 +160,19 @@ function set_state(c: connection, request: bool, is_orig: bool)
 		local s: State;
 		c$http_state = s;
 		}
-		
+
 	# These deal with new requests and responses.
 	if ( request || c$http_state$current_request !in c$http_state$pending )
 		c$http_state$pending[c$http_state$current_request] = new_http_session(c);
 	if ( ! is_orig && c$http_state$current_response !in c$http_state$pending )
 		c$http_state$pending[c$http_state$current_response] = new_http_session(c);
-	
+
 	if ( is_orig )
 		c$http = c$http_state$pending[c$http_state$current_request];
 	else
 		c$http = c$http_state$pending[c$http_state$current_response];
 	}
-	
+
 event http_request(c: connection, method: string, original_URI: string,
                   unescaped_URI: string, version: string) &priority=5
 	{
@ -181,17 +181,17 @@ event http_request(c: connection, method: string, original_URI: string,
 		local s: State;
 		c$http_state = s;
 		}
-	
+
 	++c$http_state$current_request;
 	set_state(c, T, T);
-	
+
 	c$http$method = method;
 	c$http$uri = unescaped_URI;

 	if ( method !in http_methods )
 		event conn_weird("unknown_HTTP_method", c, method);
 	}
-	
+
 event http_reply(c: connection, version: string, code: count, reason: string) &priority=5
 	{
 	if ( ! c?$http_state )
@ -199,7 +199,7 @@ event http_reply(c: connection, version: string, code: count, reason: string) &p
 		local s: State;
 		c$http_state = s;
 		}
-	
+
 	# If the last response was an informational 1xx, we're still expecting
 	# the real response to the request, so don't create a new Info record yet.
 	if ( c$http_state$current_response !in c$http_state$pending ||
@ -207,7 +207,7 @@ event http_reply(c: connection, version: string, code: count, reason: string) &p
 	       ! code_in_range(c$http_state$pending[c$http_state$current_response]$status_code, 100, 199)) )
 		++c$http_state$current_response;
 	set_state(c, F, F);
-	
+
 	c$http$status_code = code;
 	c$http$status_msg = reason;
 	if ( code_in_range(code, 100, 199) )
@ -216,33 +216,33 @@ event http_reply(c: connection, version: string, code: count, reason: string) &p
 		c$http$info_msg = reason;
 		}
 	}
-	
+
 event http_header(c: connection, is_orig: bool, name: string, value: string) &priority=5
 	{
 	set_state(c, F, is_orig);
-	
+
 	if ( is_orig ) # client headers
 		{
 		if ( name == "REFERER" )
 			c$http$referrer = value;
-		
+
 		else if ( name == "HOST" )
 			# The split is done to remove the occasional port value that shows up here.
 			c$http$host = split1(value, /:/)[1];

 		else if ( name == "RANGE" )
 			c$http$range_request = T;
-		
+
 		else if ( name == "USER-AGENT" )
 			c$http$user_agent = value;
-		
+
 		else if ( name in proxy_headers )
 				{
 				if ( ! c$http?$proxied )
 					c$http$proxied = set();
 				add c$http$proxied[fmt("%s -> %s", name, value)];
 				}
-		
+
 		else if ( name == "AUTHORIZATION" )
 			{
 			if ( /^[bB][aA][sS][iI][cC] / in value )
@ -264,25 +264,19 @@ event http_header(c: connection, is_orig: bool, name: string, value: string) &pr
 				}
 			}
 		}
-	
-	else # server headers
-		{
-		if ( name == "CONTENT-DISPOSITION" &&
-		     /[fF][iI][lL][eE][nN][aA][mM][eE]/ in value )
-			c$http$filename = extract_filename_from_content_disposition(value);
-		}
+
 	}
-	
+
 event http_message_done(c: connection, is_orig: bool, stat: http_message_stat) &priority = 5
 	{
 	set_state(c, F, is_orig);
-	
+
 	if ( is_orig )
 		c$http$request_body_len = stat$body_length;
 	else
 		c$http$response_body_len = stat$body_length;
 	}
-	
+
 event http_message_done(c: connection, is_orig: bool, stat: http_message_stat) &priority = -5
 	{
 	# The reply body is done so we're ready to log.
@ -311,4 +305,4 @@ event connection_state_remove(c: connection) &priority=-5
 			}
 		}
 	}
-	
+
--- a/scripts/base/protocols/http/utils.bro
+++ b/scripts/base/protocols/http/utils.bro
@ -32,6 +32,9 @@ export {
 	##
 	## Returns: A URL prefixed with "http://".
 	global build_url_http: function(rec: Info): string;
+
+	## Create an extremely shortened representation of a log line.
+	global describe: function(rec: Info): string;
 }


@ -62,3 +65,8 @@ function build_url_http(rec: Info): string
 	{
 	return fmt("http://%s", build_url(rec));
 	}
+
+function describe(rec: Info): string
+	{
+	return build_url_http(rec);
+	}
--- a/scripts/base/protocols/irc/load.bro
+++ b/scripts/base/protocols/irc/load.bro
@ -1,5 +1,5 @@
@load ./main
@load ./dcc-send
-@load ./file-analysis
+@load ./files

@load-sigs ./dpd.sig
--- a/scripts/base/protocols/irc/dcc-send.bro
+++ b/scripts/base/protocols/irc/dcc-send.bro
@ -2,7 +2,7 @@
 ##!
 ##! There is a major problem with this script in the cluster context because
 ##! we might see A send B a message that a DCC connection is to be expected,
-##! but that connection will actually be between B and C which could be 
+##! but that connection will actually be between B and C which could be
 ##! analyzed on a different worker.
 ##!

@ -15,12 +15,6 @@
 module IRC;

 export {
-	## Pattern of file mime types to extract from IRC DCC file transfers.
-	const extract_file_types = /NO_DEFAULT/ &redef;
-
-	## On-disk prefix for files to be extracted from IRC DCC file transfers.
-	const extraction_prefix = "irc-dcc-item" &redef;
-
 	redef record Info += {
 		## DCC filename requested.
 		dcc_file_name:         string &log &optional;
@ -28,101 +22,10 @@ export {
 		dcc_file_size:         count  &log &optional;
 		## Sniffed mime type of the file.
 		dcc_mime_type:         string &log &optional;
-
-		## The file handle for the file to be extracted
-		extraction_file:       string &log &optional;
-
-		## A boolean to indicate if the current file transfer should be extracted.
-		extract_file:          bool &default=F;
 	};
 }

-global dcc_expected_transfers: table[addr, port] of Info &read_expire=5mins;
-
-function set_dcc_mime(f: fa_file)
-	{
-	if ( ! f?$conns ) return;
-
-	for ( cid in f$conns )
-		{
-		local c: connection = f$conns[cid];
-
-		if ( [cid$resp_h, cid$resp_p] !in dcc_expected_transfers ) next;
-
-		local s = dcc_expected_transfers[cid$resp_h, cid$resp_p];
-
-		s$dcc_mime_type = f$mime_type;
-		}
-	}
-
-function set_dcc_extraction_file(f: fa_file, filename: string)
-	{
-	if ( ! f?$conns ) return;
-
-	for ( cid in f$conns )
-		{
-		local c: connection = f$conns[cid];
-
-		if ( [cid$resp_h, cid$resp_p] !in dcc_expected_transfers ) next;
-
-		local s = dcc_expected_transfers[cid$resp_h, cid$resp_p];
-
-		s$extraction_file = filename;
-		}
-	}
-
-function get_extraction_name(f: fa_file): string
-	{
-	local r = fmt("%s-%s.dat", extraction_prefix, f$id);
-	return r;
-	}
-
-# this handler sets the IRC::Info mime type
-event file_new(f: fa_file) &priority=5
-	{
-	if ( ! f?$source ) return;
-	if ( f$source != "IRC_DATA" ) return;
-	if ( ! f?$mime_type ) return;
-
-	set_dcc_mime(f);
-	}
-
-# this handler check if file extraction is desired
-event file_new(f: fa_file) &priority=5
-	{
-	if ( ! f?$source ) return;
-	if ( f$source != "IRC_DATA" ) return;
-
-	local fname: string;
-
-	if ( f?$mime_type && extract_file_types in f$mime_type )
-		{
-		fname = get_extraction_name(f);
-		FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
-		                               $extract_filename=fname]);
-		set_dcc_extraction_file(f, fname);
-		return;
-		}
-
-	if ( ! f?$conns ) return;
-
-	for ( cid in f$conns )
-		{
-		local c: connection = f$conns[cid];
-
-		if ( [cid$resp_h, cid$resp_p] !in dcc_expected_transfers ) next;
-
-		local s = dcc_expected_transfers[cid$resp_h, cid$resp_p];
-
-		if ( ! s$extract_file ) next;
-
-		fname = get_extraction_name(f);
-		FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
-		                               $extract_filename=fname]);
-		s$extraction_file = fname;
-		return;
-		}
-	}
+global dcc_expected_transfers: table[addr, port] of Info &synchronized &read_expire=5mins;

 function log_dcc(f: fa_file)
 	{
@ -141,24 +44,21 @@ function log_dcc(f: fa_file)
 		Log::write(IRC::LOG, irc);
 		irc$command = tmp;

-		# Delete these values in case another DCC transfer 
+		# Delete these values in case another DCC transfer
 		# happens during the IRC session.
-		delete irc$extract_file;
-		delete irc$extraction_file;
 		delete irc$dcc_file_name;
 		delete irc$dcc_file_size;
 		delete irc$dcc_mime_type;

+		delete dcc_expected_transfers[cid$resp_h, cid$resp_p];
 		return;
 		}
 	}

 event file_new(f: fa_file) &priority=-5
 	{
-	if ( ! f?$source ) return;
-	if ( f$source != "IRC_DATA" ) return;
-
-	log_dcc(f);
+	if ( f$source == "IRC_DATA" )
+		log_dcc(f);
 	}

 event irc_dcc_message(c: connection, is_orig: bool,
--- a/scripts/base/protocols/irc/file-analysis.bro
+++ b/scripts/base/protocols/irc/file-analysis.bro
@ -1,25 +0,0 @@
-@load ./dcc-send.bro
-@load base/utils/conn-ids
-@load base/frameworks/file-analysis/main
-
-module IRC;
-
-export {
-	## Default file handle provider for IRC.
-	global get_file_handle: function(c: connection, is_orig: bool): string;
-}
-
-function get_file_handle(c: connection, is_orig: bool): string
-	{
-	if ( is_orig ) return "";
-	return cat(Analyzer::ANALYZER_IRC_DATA, " ", c$start_time, " ", id_string(c$id));
-	}
-
-module GLOBAL;
-
-event get_file_handle(tag: Analyzer::Tag, c: connection, is_orig: bool)
-	&priority=5
-	{
-	if ( tag != Analyzer::ANALYZER_IRC_DATA ) return;
-	set_file_handle(IRC::get_file_handle(c, is_orig));
-	}
--- a/scripts/base/protocols/irc/files.bro
+++ b/scripts/base/protocols/irc/files.bro
@ -0,0 +1,39 @@
+@load ./dcc-send
+@load base/utils/conn-ids
+@load base/frameworks/files
+
+module IRC;
+
+export {
+	redef record Info += {
+		## File unique ID.
+		fuid: string &log &optional;
+	};
+
+	## Default file handle provider for IRC.
+	global get_file_handle: function(c: connection, is_orig: bool): string;
+}
+
+function get_file_handle(c: connection, is_orig: bool): string
+	{
+	return cat(Analyzer::ANALYZER_IRC_DATA, c$start_time, c$id, is_orig);
+	}
+
+event bro_init() &priority=5
+	{
+	Files::register_protocol(Analyzer::ANALYZER_IRC_DATA,
+	                         [$get_file_handle = IRC::get_file_handle]);
+	}
+
+event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5
+	{
+	if ( [c$id$resp_h, c$id$resp_p] !in dcc_expected_transfers ) 
+		return;
+
+	local irc = dcc_expected_transfers[c$id$resp_h, c$id$resp_p];
+	irc$fuid = f$id;
+	if ( irc?$dcc_file_name )
+		f$info$filename = irc$dcc_file_name;
+	if ( f?$mime_type )
+		irc$dcc_mime_type = f$mime_type;
+	}
--- a/scripts/base/protocols/smtp/load.bro
+++ b/scripts/base/protocols/smtp/load.bro
@ -1,6 +1,5 @@
@load ./main
@load ./entities
-@load ./entities-excerpt
-@load ./file-analysis
+@load ./files

@load-sigs ./dpd.sig
--- a/scripts/base/protocols/smtp/entities-excerpt.bro
+++ b/scripts/base/protocols/smtp/entities-excerpt.bro
@ -1,37 +0,0 @@
-##! This script is for optionally adding a body excerpt to the SMTP
-##! entities log.
-
-@load ./entities
-
-module SMTP;
-
-export {
-	redef record SMTP::EntityInfo += {
-		## The entity body excerpt.
-		excerpt:    string &log &default="";
-	};
-	
-	## This is the default value for how much of the entity body should be
-	## included for all MIME entities.  The lesser of this value and
-	## :bro:see:`default_file_bof_buffer_size` will be used.
-	const default_entity_excerpt_len = 0 &redef;
-}
-
-event file_new(f: fa_file) &priority=5
-	{
-	if ( ! f?$source ) return;
-	if ( f$source != "SMTP" ) return;
-	if ( ! f?$bof_buffer ) return;
-	if ( ! f?$conns ) return;
-
-	for ( cid in f$conns )
-		{
-		local c: connection = f$conns[cid];
-
-		if ( ! c?$smtp ) next;
-
-		if ( default_entity_excerpt_len > 0 )
-			c$smtp$current_entity$excerpt =
-			        f$bof_buffer[0:default_entity_excerpt_len];
-		}
-	}
--- a/scripts/base/protocols/smtp/entities.bro
+++ b/scripts/base/protocols/smtp/entities.bro
@ -1,5 +1,6 @@
 ##! Analysis and logging for MIME entities found in SMTP sessions.

+@load base/frameworks/files
@load base/utils/strings
@load base/utils/files
@load ./main
@ -7,217 +8,55 @@
 module SMTP;

 export {
-	redef enum Log::ID += { ENTITIES_LOG };
-
-	type EntityInfo: record {
-		## This is the timestamp of when the MIME content transfer began.
-		ts:               time            &log;
-		uid:              string          &log;
-		id:               conn_id         &log;
-		## A count to represent the depth of this message transaction in a 
-		## single connection where multiple messages were transferred.
-		trans_depth:      count           &log;
-		## The filename seen in the Content-Disposition header.
-		filename:         string          &log &optional;
-		## Track how many bytes of the MIME encoded file have been seen.
-		content_len:      count           &log &default=0;
-		## The mime type of the entity discovered through magic bytes identification.
-		mime_type:        string          &log &optional;
-		
-		## The calculated MD5 sum for the MIME entity.
-		md5:              string          &log &optional;
-		## Optionally calculate the file's MD5 sum.  Must be set prior to the 
-		## first data chunk being see in an event.
-		calc_md5:         bool            &default=F;
-		
-		## Optionally write the file to disk.  Must be set prior to first 
-		## data chunk being seen in an event.
-		extract_file:     bool            &default=F;
-		## Store the file handle here for the file currently being extracted.
-		extraction_file:  string          &log &optional;
+	type Entity: record {
+		## Filename for the entity if discovered from a header.
+		filename: string &optional;
 	};

 	redef record Info += {
-		## The in-progress entity information.
-		current_entity:   EntityInfo &optional;
+		## The current entity being seen.
+		entity: Entity &optional;
 	};

 	redef record State += {
-		## Track the number of MIME encoded files transferred during a session.
-		mime_level:           count   &default=0;
+		## Track the number of MIME encoded files transferred
+		## during a session.
+		mime_depth: count &default=0;
 	};
-
-	## Generate MD5 sums for these filetypes.
-	const generate_md5 = /application\/x-dosexec/    # Windows and DOS executables
-	                   | /application\/x-executable/ # *NIX executable binary
-	                   &redef;
-
-	## Pattern of file mime types to extract from MIME bodies.
-	const extract_file_types = /NO_DEFAULT/ &redef;
-
-	## The on-disk prefix for files to be extracted from MIME entity bodies.
-	const extraction_prefix = "smtp-entity" &redef;
-
-	## If set, never generate MD5s. This is mainly for testing purposes to create
-	## reproducable output in the case that the decision whether to create
-	## checksums depends on environment specifics.
-	const never_calc_md5 = F &redef;
-
-	global log_mime: event(rec: EntityInfo);
 }

-event bro_init() &priority=5
-	{
-	Log::create_stream(SMTP::ENTITIES_LOG, [$columns=EntityInfo, $ev=log_mime]);
-	}
-
-function set_session(c: connection, new_entity: bool)
-	{
-	if ( ! c$smtp?$current_entity || new_entity )
-		{
-		local info: EntityInfo;
-		info$ts=network_time();
-		info$uid=c$uid;
-		info$id=c$id;
-		info$trans_depth=c$smtp$trans_depth;
-		
-		c$smtp$current_entity = info;
-		++c$smtp_state$mime_level;
-		}
-	}
-
-function get_extraction_name(f: fa_file): string
-	{
-	local r = fmt("%s-%s.dat", extraction_prefix, f$id);
-	return r;
-	}
-
 event mime_begin_entity(c: connection) &priority=10
 	{
-	if ( ! c?$smtp ) return;
-
-	set_session(c, T);
+	c$smtp$entity = Entity();
+	++c$smtp_state$mime_depth;
 	}

-event file_new(f: fa_file) &priority=5
+event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5
 	{
-	if ( ! f?$source ) return;
-	if ( f$source != "SMTP" ) return;
-	if ( ! f?$conns ) return;
-
-	local fname: string;
-	local extracting: bool = F;
-
-	for ( cid in f$conns )
+	if ( f$source == "SMTP" && c?$smtp )
 		{
-		local c: connection = f$conns[cid];
-
-		if ( ! c?$smtp ) next;
-		if ( ! c$smtp?$current_entity ) next;
-
-		if ( c$smtp$current_entity$extract_file )
-			{
-			if ( ! extracting )
-				{
-				fname = get_extraction_name(f);
-				FileAnalysis::add_analyzer(f,
-				                           [$tag=FileAnalysis::ANALYZER_EXTRACT,
-				                            $extract_filename=fname]);
-				extracting = T;
-				}
-
-			c$smtp$current_entity$extraction_file = fname;
-			}
-
-		if ( c$smtp$current_entity$calc_md5 )
-			FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
+		if ( c$smtp?$entity && c$smtp$entity?$filename )
+			f$info$filename = c$smtp$entity$filename;
+		f$info$depth = c$smtp_state$mime_depth;
 		}
 	}

-function check_extract_by_type(f: fa_file)
+event mime_one_header(c: connection, h: mime_header_rec) &priority=5
 	{
-	if ( extract_file_types !in f$mime_type ) return;
-
-	if ( f?$info && FileAnalysis::ANALYZER_EXTRACT in f$info$analyzers )
+	if ( ! c?$smtp )
 		return;

-	local fname: string = get_extraction_name(f);
-	FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
-	                               $extract_filename=fname]);
-
-	if ( ! f?$conns ) return;
-
-	for ( cid in f$conns )
-		{
-		local c: connection = f$conns[cid];
-		if ( ! c?$smtp ) next;
-		c$smtp$current_entity$extraction_file = fname;
-		}
-	}
-
-function check_md5_by_type(f: fa_file)
-	{
-	if ( never_calc_md5 ) return;
-	if ( generate_md5 !in f$mime_type ) return;
-
-	FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
-	}
-
-event file_new(f: fa_file) &priority=5
-	{
-	if ( ! f?$source ) return;
-	if ( f$source != "SMTP" ) return;
-	if ( ! f?$mime_type ) return;
-
-	if ( f?$conns )
-		for ( cid in f$conns )
-			{
-			local c: connection = f$conns[cid];
-
-			if ( ! c?$smtp ) next;
-			if ( ! c$smtp?$current_entity ) next;
-
-			c$smtp$current_entity$mime_type = f$mime_type;
-			}
-
-	check_extract_by_type(f);
-	check_md5_by_type(f);
-	}
-
-event file_state_remove(f: fa_file) &priority=4
-	{
-	if ( ! f?$source ) return;
-	if ( f$source != "SMTP" ) return;
-	if ( ! f?$conns ) return;
-
-	for ( cid in f$conns )
-		{
-		local c: connection = f$conns[cid];
-
-		if ( ! c?$smtp ) next;
-		if ( ! c$smtp?$current_entity ) next;
-		# Only log if there was some content.
-		if ( f$seen_bytes == 0 ) next;
-
-		if ( f?$info && f$info?$md5 )
-			c$smtp$current_entity$md5 = f$info$md5;
-
-		c$smtp$current_entity$content_len = f$seen_bytes;
-		Log::write(SMTP::ENTITIES_LOG, c$smtp$current_entity);
-		delete c$smtp$current_entity;
-		return;
-		}
-	}
-
-event mime_one_header(c: connection, h: mime_header_rec)
-	{
-	if ( ! c?$smtp ) return;
-	
 	if ( h$name == "CONTENT-DISPOSITION" &&
 	     /[fF][iI][lL][eE][nN][aA][mM][eE]/ in h$value )
-		c$smtp$current_entity$filename = extract_filename_from_content_disposition(h$value);
+		c$smtp$entity$filename = extract_filename_from_content_disposition(h$value);

 	if ( h$name == "CONTENT-TYPE" &&
 	     /[nN][aA][mM][eE][:blank:]*=/ in h$value )
-		c$smtp$current_entity$filename = extract_filename_from_content_disposition(h$value);
+		c$smtp$entity$filename = extract_filename_from_content_disposition(h$value);
+	}
+
+event mime_end_entity(c: connection) &priority=5
+	{
+	if ( c?$smtp && c$smtp?$entity )
+		delete c$smtp$entity;
 	}
--- a/scripts/base/protocols/smtp/file-analysis.bro
+++ b/scripts/base/protocols/smtp/file-analysis.bro
@ -1,27 +0,0 @@
-@load ./main
-@load ./entities
-@load base/utils/conn-ids
-@load base/frameworks/file-analysis/main
-
-module SMTP;
-
-export {
-	## Default file handle provider for SMTP.
-	global get_file_handle: function(c: connection, is_orig: bool): string;
-}
-
-function get_file_handle(c: connection, is_orig: bool): string
-	{
-	if ( ! c?$smtp ) return "";
-	return cat(Analyzer::ANALYZER_SMTP, " ", c$start_time, " ", c$smtp$trans_depth, " ",
-	           c$smtp_state$mime_level);
-	}
-
-module GLOBAL;
-
-event get_file_handle(tag: Analyzer::Tag, c: connection, is_orig: bool)
-	&priority=5
-	{
-	if ( tag != Analyzer::ANALYZER_SMTP ) return;
-	set_file_handle(SMTP::get_file_handle(c, is_orig));
-	}
--- a/scripts/base/protocols/smtp/files.bro
+++ b/scripts/base/protocols/smtp/files.bro
@ -0,0 +1,53 @@
+@load ./main
+@load ./entities
+@load base/utils/conn-ids
+@load base/frameworks/files
+
+module SMTP;
+
+export {
+	redef record Info += {
+		## An ordered vector of file unique IDs seen attached to
+		## the message.
+		fuids: vector of string &log &default=string_vec();
+	};
+
+	## Default file handle provider for SMTP.
+	global get_file_handle: function(c: connection, is_orig: bool): string;
+
+	## Default file describer for SMTP.
+	global describe_file: function(f: fa_file): string;
+}
+
+function get_file_handle(c: connection, is_orig: bool): string
+	{
+	return cat(Analyzer::ANALYZER_SMTP, c$start_time, c$smtp$trans_depth,
+	           c$smtp_state$mime_depth);
+	}
+
+function describe_file(f: fa_file): string
+	{
+	# This shouldn't be needed, but just in case...
+	if ( f$source != "SMTP" )
+		return "";
+
+	for ( cid in f$conns )
+		{
+		local c = f$conns[cid];
+		return SMTP::describe(c$smtp);
+		}
+	return "";
+	}
+
+event bro_init() &priority=5
+	{
+	Files::register_protocol(Analyzer::ANALYZER_SMTP, 
+	                         [$get_file_handle = SMTP::get_file_handle,
+	                          $describe        = SMTP::describe_file]);
+	}
+
+event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5
+	{
+	if ( c?$smtp )
+		c$smtp$fuids[|c$smtp$fuids|] = f$id;
+	}
--- a/scripts/base/protocols/smtp/main.bro
+++ b/scripts/base/protocols/smtp/main.bro
@ -72,7 +72,10 @@ export {
 	##    ALL_HOSTS - always capture the entire path.
 	##    NO_HOSTS - never capture the path.
 	const mail_path_capture = ALL_HOSTS &redef;
-		
+	
+	## Create an extremely shortened representation of a log line.
+	global describe: function(rec: Info): string;
+
 	global log_smtp: event(rec: Info);
 }

@ -223,7 +226,10 @@ event mime_one_header(c: connection, h: mime_header_rec) &priority=5
 		{
 		if ( ! c$smtp?$to )
 			c$smtp$to = set();
-		add c$smtp$to[h$value];
+
+		local to_parts = split(h$value, /[[:blank:]]*,[[:blank:]]*/);
+		for ( i in to_parts )
+			add c$smtp$to[to_parts[i]];
 		}

 	else if ( h$name == "X-ORIGINATING-IP" )
@ -268,3 +274,29 @@ event connection_state_remove(c: connection) &priority=-5
 	if ( c?$smtp )
 		smtp_message(c);
 	}
+
+function describe(rec: Info): string
+	{
+	if ( rec?$mailfrom && rec?$rcptto )
+		{
+		local one_to = "";
+		for ( to in rec$rcptto )
+			{
+			one_to = to;
+			break;
+			}
+		local abbrev_subject = "";
+		if ( rec?$subject )
+			{
+			if ( |rec$subject| > 20 )
+				{
+				abbrev_subject = rec$subject[0:20] + "...";
+				}
+			}
+
+		return fmt("%s -> %s%s%s", rec$mailfrom, one_to,
+			(|rec$rcptto|>1 ? fmt(" (plus %d others)", |rec$rcptto|-1) : ""),
+			(abbrev_subject != "" ? fmt(": %s", abbrev_subject) : ""));
+		}
+		return "";
+	}
--- a/scripts/base/utils/active-http.bro
+++ b/scripts/base/utils/active-http.bro
@ -0,0 +1,123 @@
+##! A module for performing active HTTP requests and
+##! getting the reply at runtime.
+
+@load ./exec
+
+module ActiveHTTP;
+
+export {
+	## The default timeout for HTTP requests.
+	const default_max_time = 1min &redef;
+
+	## The default HTTP method/verb to use for requests.
+	const default_method = "GET" &redef;
+
+	type Response: record {
+		## Numeric response code from the server.
+		code:      count;
+		## String response message from the server.
+		msg:       string;
+		## Full body of the response.
+		body:      string                  &optional;
+		## All headers returned by the server.
+		headers:   table[string] of string &optional;
+	};
+
+	type Request: record {
+		## The URL being requested.
+		url:             string;
+		## The HTTP method/verb to use for the request.
+		method:          string                  &default=default_method;
+		## Data to send to the server in the client body.  Keep in
+		## mind that you will probably need to set the *method* field
+		## to "POST" or "PUT".
+		client_data:     string                  &optional;
+		## Arbitrary headers to pass to the server.  Some headers
+		## will be included by libCurl.
+		#custom_headers: table[string] of string &optional;
+		## Timeout for the request.
+		max_time:        interval                &default=default_max_time;
+		## Additional curl command line arguments.  Be very careful
+		## with this option since shell injection could take place
+		## if careful handling of untrusted data is not applied.
+		addl_curl_args:  string                  &optional;
+	};
+
+	## Perform an HTTP request according to the :bro:type:`Request` record.
+	## This is an asynchronous function and must be called within a "when"
+	## statement.
+	##
+	## req: A record instance representing all options for an HTTP request.
+	##
+	## Returns: A record with the full response message.
+	global request: function(req: ActiveHTTP::Request): ActiveHTTP::Response;
+}
+
+function request2curl(r: Request, bodyfile: string, headersfile: string): string
+	{
+	local cmd = fmt("curl -s -g -o \"%s\" -D \"%s\" -X \"%s\"",
+	                str_shell_escape(bodyfile),
+	                str_shell_escape(headersfile),
+	                str_shell_escape(r$method));
+
+	cmd = fmt("%s -m %.0f", cmd, r$max_time);
+
+	if ( r?$client_data )
+		cmd = fmt("%s -d -", cmd);
+
+	if ( r?$addl_curl_args )
+		cmd = fmt("%s %s", cmd, r$addl_curl_args);
+
+	cmd = fmt("%s \"%s\"", cmd, str_shell_escape(r$url));
+	return cmd;
+	}
+
+function request(req: Request): ActiveHTTP::Response
+	{
+	local tmpfile     = "/tmp/bro-activehttp-" + unique_id("");
+	local bodyfile    = fmt("%s_body", tmpfile);
+	local headersfile = fmt("%s_headers", tmpfile);
+
+	local cmd = request2curl(req, bodyfile, headersfile);
+	local stdin_data = req?$client_data ? req$client_data : "";
+
+	local resp: Response;
+	resp$code = 0;
+	resp$msg = "";
+	resp$body = "";
+	resp$headers = table();
+	return when ( local result = Exec::run([$cmd=cmd, $stdin=stdin_data, $read_files=set(bodyfile, headersfile)]) )
+		{
+		# If there is no response line then nothing else will work either.
+		if ( ! (result?$files && headersfile in result$files) )
+			{
+			Reporter::error(fmt("There was a failure when requesting \"%s\" with ActiveHTTP.", req$url));
+			return resp;
+			}
+
+		local headers = result$files[headersfile];
+		for ( i in headers )
+			{
+			# The reply is the first line.
+			if ( i == 0 )
+				{
+				local response_line = split_n(headers[0], /[[:blank:]]+/, F, 2);
+				if ( |response_line| != 3 )
+					return resp;
+
+				resp$code = to_count(response_line[2]);
+				resp$msg = response_line[3];
+				resp$body = join_string_vec(result$files[bodyfile], "");
+				}
+			else
+				{
+				local line = headers[i];
+				local h = split1(line, /:/);
+				if ( |h| != 2 )
+					next;
+				resp$headers[h[1]] = sub_bytes(h[2], 0, |h[2]|-1);
+				}
+			}
+		return resp;
+		}
+	}
--- a/scripts/base/utils/dir.bro
+++ b/scripts/base/utils/dir.bro
@ -0,0 +1,66 @@
+@load base/utils/exec
+@load base/frameworks/reporter
+@load base/utils/paths
+
+module Dir;
+
+export {
+	## The default interval this module checks for files in directories when
+	## using the :bro:see:`Dir::monitor` function.
+	const polling_interval = 30sec &redef;
+
+	## Register a directory to monitor with a callback that is called
+	## every time a previously unseen file is seen.  If a file is deleted
+	## and seen to be gone, the file is available for being seen again in
+	## the future.
+	##
+	## dir: The directory to monitor for files.
+	##
+	## callback: Callback that gets executed with each file name
+	##           that is found.  Filenames are provided with the full path.
+	##
+	## poll_interval: An interval at which to check for new files.
+	global monitor: function(dir: string, callback: function(fname: string),
+	                         poll_interval: interval &default=polling_interval);
+}
+
+event Dir::monitor_ev(dir: string, last_files: set[string],
+                      callback: function(fname: string),
+                      poll_interval: interval)
+	{
+	when ( local result = Exec::run([$cmd=fmt("ls -i \"%s/\"", str_shell_escape(dir))]) )
+		{
+		if ( result$exit_code != 0 )
+			{
+			Reporter::warning(fmt("Requested monitoring of non-existent directory (%s).", dir));
+			return;
+			}
+
+		local current_files: set[string] = set();
+		local files: vector of string = vector();
+
+		if ( result?$stdout )
+			files = result$stdout;
+
+		for ( i in files )
+			{
+			local parts = split1(files[i], / /);
+			if ( parts[1] !in last_files )
+				callback(build_path_compressed(dir, parts[2]));
+			add current_files[parts[1]];
+			}
+
+		schedule poll_interval
+			{
+			Dir::monitor_ev(dir, current_files, callback, poll_interval)
+			};
+		}
+	}
+
+function monitor(dir: string, callback: function(fname: string),
+                 poll_interval: interval &default=polling_interval)
+	{
+	event Dir::monitor_ev(dir, set(), callback, poll_interval);
+	}
+
+
--- a/scripts/base/utils/exec.bro
+++ b/scripts/base/utils/exec.bro
@ -0,0 +1,185 @@
+##! A module for executing external command line programs.
+
+@load base/frameworks/input
+
+module Exec;
+
+export {
+	type Command: record {
+		## The command line to execute.  Use care to avoid injection attacks.
+		## I.e. if the command uses untrusted/variable data, sanitize
+                ## it with str_shell_escape().
+		cmd:         string;
+		## Provide standard in to the program as a string.
+		stdin:       string      &default="";
+		## If additional files are required to be read in as part of the output
+		## of the command they can be defined here.
+		read_files:  set[string] &optional;
+		# The unique id for tracking executors.
+		uid: string &default=unique_id("");
+	};
+
+	type Result: record {
+		## Exit code from the program.
+		exit_code:    count            &default=0;
+		## True if the command was terminated with a signal.
+		signal_exit:  bool             &default=F;
+		## Each line of standard out.
+		stdout:       vector of string &optional;
+		## Each line of standard error.
+		stderr:       vector of string &optional;
+		## If additional files were requested to be read in
+		## the content of the files will be available here.
+		files:        table[string] of string_vec &optional;
+	};
+
+	## Function for running command line programs and getting
+	## output.  This is an asynchronous function which is meant
+	## to be run with the `when` statement.
+	##
+	## cmd: The command to run.  Use care to avoid injection attacks!
+	##
+	## returns: A record representing the full results from the
+	##          external program execution.
+	global run: function(cmd: Command): Result;
+
+	## The system directory for temp files.
+	const tmp_dir = "/tmp" &redef;
+}
+
+# Indexed by command uid.
+global results: table[string] of Result;
+global pending_commands: set[string];
+global pending_files: table[string] of set[string];
+
+type OneLine: record {
+	s: string;
+	is_stderr: bool;
+};
+
+type FileLine: record {
+	s: string;
+};
+
+event Exec::line(description: Input::EventDescription, tpe: Input::Event, s: string, is_stderr: bool)
+	{
+	local result = results[description$name];
+	if ( is_stderr )
+		{
+		if ( ! result?$stderr )
+			result$stderr = vector(s);
+		else
+			result$stderr[|result$stderr|] = s;
+		}
+	else
+		{
+		if ( ! result?$stdout )
+			result$stdout = vector(s);
+		else
+			result$stdout[|result$stdout|] = s;
+		}
+	}
+
+event Exec::file_line(description: Input::EventDescription, tpe: Input::Event, s: string)
+	{
+	local parts = split1(description$name, /_/);
+	local name = parts[1];
+	local track_file = parts[2];
+
+	local result = results[name];
+	if ( ! result?$files )
+		result$files = table();
+
+	if ( track_file !in result$files )
+		result$files[track_file] = vector(s);
+	else
+		result$files[track_file][|result$files[track_file]|] = s;
+	}
+
+event Input::end_of_data(name: string, source:string)
+	{
+	local parts = split1(name, /_/);
+	name = parts[1];
+
+	if ( name !in pending_commands || |parts| < 2 )
+		return;
+
+	local track_file = parts[2];
+
+	Input::remove(name);
+
+	if ( name !in pending_files )
+		delete pending_commands[name];
+	else
+		{
+		delete pending_files[name][track_file];
+		if ( |pending_files[name]| == 0 )
+			delete pending_commands[name];
+		system(fmt("rm \"%s\"", str_shell_escape(track_file)));
+		}
+	}
+
+event InputRaw::process_finished(name: string, source:string, exit_code:count, signal_exit:bool)
+	{
+	if ( name !in pending_commands )
+		return;
+
+	Input::remove(name);
+	results[name]$exit_code = exit_code;
+	results[name]$signal_exit = signal_exit;
+
+	if ( name !in pending_files || |pending_files[name]| == 0 )
+		# No extra files to read, command is done.
+		delete pending_commands[name];
+	else
+		for ( read_file in pending_files[name] )
+			Input::add_event([$source=fmt("%s", read_file),
+			                  $name=fmt("%s_%s", name, read_file),
+			                  $reader=Input::READER_RAW,
+			                  $want_record=F,
+			                  $fields=FileLine,
+			                  $ev=Exec::file_line]);
+	}
+
+function run(cmd: Command): Result
+	{
+	add pending_commands[cmd$uid];
+	results[cmd$uid] = [];
+
+	if ( cmd?$read_files )
+		{
+		for ( read_file in cmd$read_files )
+			{
+			if ( cmd$uid !in pending_files )
+				pending_files[cmd$uid] = set();
+			add pending_files[cmd$uid][read_file];
+			}
+		}
+
+	local config_strings: table[string] of string = {
+		["stdin"]       = cmd$stdin,
+		["read_stderr"] = "1",
+	};
+	Input::add_event([$name=cmd$uid,
+	                  $source=fmt("%s |", cmd$cmd),
+	                  $reader=Input::READER_RAW,
+	                  $fields=Exec::OneLine,
+	                  $ev=Exec::line,
+	                  $want_record=F,
+	                  $config=config_strings]);
+
+	return when ( cmd$uid !in pending_commands )
+		{
+		local result = results[cmd$uid];
+		delete results[cmd$uid];
+		return result;
+		}
+	}
+
+event bro_done()
+	{
+	# We are punting here and just deleting any unprocessed files.
+	for ( uid in pending_files )
+		for ( fname in pending_files[uid] )
+			system(fmt("rm \"%s\"", str_shell_escape(fname)));
+	}
--- a/scripts/base/utils/files.bro
+++ b/scripts/base/utils/files.bro
@ -6,22 +6,28 @@ function generate_extraction_filename(prefix: string, c: connection, suffix: str
 	{
 	local conn_info = fmt("%s:%d-%s:%d", addr_to_uri(c$id$orig_h), c$id$orig_p,
 	                      addr_to_uri(c$id$resp_h), c$id$resp_p);
-	
+
 	if ( prefix != "" )
 		conn_info = fmt("%s_%s", prefix, conn_info);
 	if ( suffix != "" )
 		conn_info = fmt("%s_%s", conn_info, suffix);
-		
+
 	return conn_info;
 	}
-	
-## For CONTENT-DISPOSITION headers, this function can be used to extract 
+
+## For CONTENT-DISPOSITION headers, this function can be used to extract
 ## the filename.
 function extract_filename_from_content_disposition(data: string): string
 	{
-	local filename = sub(data, /^.*[nN][aA][mM][eE][[:blank:]]*=[[:blank:]]*/, "");
+	local filename = sub(data, /^.*[nN][aA][mM][eE][[:blank:]]*\*?=[[:blank:]]*/, "");
+
 	# Remove quotes around the filename if they are there.
 	if ( /^\"/ in filename )
-		filename =  split_n(filename, /\"/, F, 2)[2];
-	return filename;
+		filename = split_n(filename, /\"/, F, 2)[2];
+
+	# Remove the language and encoding if it's there.
+	if ( /^[a-zA-Z0-9\!#$%&+-^_`{}~]+'[a-zA-Z0-9\!#$%&+-^_`{}~]*'/ in filename )
+		filename = sub(filename, /^.+'.*'/, "");
+
+	return unescape_URI(filename);
 	}