From 0f99956417425ef20e5592781e3b6335ea4f3f37 Mon Sep 17 00:00:00 2001
From: Seth Hall <seth@icir.org>
Date: Wed, 13 Mar 2013 14:36:27 -0400
Subject: [PATCH 001/118] Added Exec, Dir, and ActiveHTTP modules.

---
 scripts/base/init-default.bro      |   3 +
 scripts/base/utils/active-http.bro | 120 +++++++++++++++++
 scripts/base/utils/dir.bro         |  51 +++++++
 scripts/base/utils/exec.bro        | 207 +++++++++++++++++++++++++++++
 4 files changed, 381 insertions(+)
 create mode 100644 scripts/base/utils/active-http.bro
 create mode 100644 scripts/base/utils/dir.bro
 create mode 100644 scripts/base/utils/exec.bro

diff --git a/scripts/base/init-default.bro b/scripts/base/init-default.bro
index 8b36899f10..9b62c80014 100644
--- a/scripts/base/init-default.bro
+++ b/scripts/base/init-default.bro
@@ -5,9 +5,12 @@
 ##! you actually want.
 
 @load base/utils/site
+@load base/utils/active-http
 @load base/utils/addrs
 @load base/utils/conn-ids
+@load base/utils/dir
 @load base/utils/directions-and-hosts
+@load base/utils/exec
 @load base/utils/files
 @load base/utils/numbers
 @load base/utils/paths
diff --git a/scripts/base/utils/active-http.bro b/scripts/base/utils/active-http.bro
new file mode 100644
index 0000000000..5522cc108a
--- /dev/null
+++ b/scripts/base/utils/active-http.bro
@@ -0,0 +1,120 @@
+##! A module for performing active HTTP requests and 
+##! getting the reply at runtime.
+
+@load ./exec
+
+module ActiveHTTP;
+ 
+export {
+	## The default timeout for HTTP requests.
+	const default_max_time = 1min &redef;
+ 
+	## The default HTTP method/verb to use for requests.
+	const default_method = "GET" &redef;
+ 
+ 	type Response: record {
+		## Numeric response code from the server.
+		code:      count;
+		## String response messgae from the server.
+		msg:       string;
+		## Full body of the response.
+		body:      string                  &optional;
+		## All headers returned by the server.
+		headers:   table[string] of string &optional;
+	};
+
+	type Request: record {
+		## The URL being requested.
+		url:             string;
+		## The HTTP method/verb to use for the request.
+		method:          string                  &default=default_method;
+		## Data to send to the server in the client body.  Keep in
+		## mind that you will probably need to set the $method field
+		## to "POST" or "PUT".
+		client_data:     string                  &optional;
+		## Arbitrary headers to pass to the server.  Some headers 
+		## will be included by libCurl.
+		#custom_headers: table[string] of string &optional;
+		## Timeout for the request.
+		max_time:        interval                &default=default_max_time;
+		## Additional curl command line arguments.  Be very careful 
+		## with this option since shell injection could take place
+		## if careful handling of untrusted data is not applied.
+		addl_curl_args:  string                  &optional;
+	};
+
+	## Perform an HTTP request according to the :bro:type:`Request` record.
+	## This is an asynchronous function and must be called within a "when" 
+	## statement.
+	## 
+	## req: A record instance representing all options for an HTTP request.
+	##
+	## Returns: A record with the full response message.
+	global request: function(req: ActiveHTTP::Request): ActiveHTTP::Response;
+}
+
+function request2curl(r: Request, bodyfile: string, headersfile: string): string
+	{
+	local cmd = fmt("curl -s -g -o \"%s\" -D \"%s\" -X \"%s\"", 
+	                str_shell_escape(bodyfile),
+	                str_shell_escape(headersfile),
+	                str_shell_escape(r$method));
+
+	cmd = fmt("%s -m %.0f", cmd, r$max_time);
+
+	if ( r?$client_data )
+		cmd = fmt("%s -d -", cmd);
+
+	if ( r?$addl_curl_args )
+		cmd = fmt("%s %s", cmd, r$addl_curl_args);
+
+	cmd = fmt("%s \"%s\"", cmd, str_shell_escape(r$url));
+	return cmd;
+	}
+
+function request(req: Request): ActiveHTTP::Response
+	{
+	local tmpfile     = "/tmp/bro-activehttp-" + unique_id("");
+	local bodyfile    = fmt("%s_body", tmpfile);
+	local headersfile = fmt("%s_headers", tmpfile);
+
+	local cmd = request2curl(req, bodyfile, headersfile);
+	local stdin_data = req?$client_data ? req$client_data : "";
+
+	local resp: Response;
+	resp$code = 0;
+	resp$msg = "";
+	resp$body = "";
+	resp$headers = table();
+	return when ( local result = Exec::run([$cmd=cmd, $stdin=stdin_data, $read_files=set(bodyfile, headersfile)]) )
+		{
+		# If there is no response line then nothing else will work either.
+		if ( ! (result?$files && headersfile in result$files) )
+			Reporter::error(fmt("There was a failure when requesting \"%s\" with ActiveHTTP.", req$url));
+		
+		local headers = result$files[headersfile];
+		for ( i in headers )
+			{
+			# The reply is the first line.
+			if ( i == 0 )
+				{
+				local response_line = split_n(headers[0], /[[:blank:]]+/, F, 2);
+				if ( |response_line| != 3 )
+					return resp;
+
+				resp$code = to_count(response_line[2]);
+				resp$msg = response_line[3];
+				resp$body = join_string_vec(result$files[bodyfile], "");
+				}
+			else
+				{
+				local line = headers[i];
+				local h = split1(line, /:/);
+				if ( |h| != 2 )
+					next;
+				resp$headers[h[1]] = sub_bytes(h[2], 0, |h[2]|-1);
+				}
+			}
+		return resp;
+		}
+	}
diff --git a/scripts/base/utils/dir.bro b/scripts/base/utils/dir.bro
new file mode 100644
index 0000000000..2ed1c8e6e9
--- /dev/null
+++ b/scripts/base/utils/dir.bro
@@ -0,0 +1,51 @@
+@load base/utils/exec
+@load base/frameworks/reporter
+@load base/utils/paths
+
+module Dir;
+
+export {
+	## Register a directory to monitor with a callback that is called 
+	## every time a previously unseen file is seen.  If a file is deleted
+	## and seen to be gone, the file is available for being seen again in 
+	## the future.
+	##
+	## dir: The directory to monitor for files.
+	##
+	## callback: Callback that gets executed with each file name 
+	##           that is found.  Filenames are provided with the full path.
+	global monitor: function(dir: string, callback: function(fname: string));
+
+	## The interval this module checks for files in directories when using 
+	## the :bro:see:`Dir::monitor` function.
+	const polling_interval = 30sec &redef;
+}
+
+event Dir::monitor_ev(dir: string, last_files: set[string], callback: function(fname: string))
+	{
+	when ( local result = Exec::run([$cmd=fmt("ls \"%s\"", str_shell_escape(dir))]) )
+		{
+		if ( result$exit_code != 0 )
+			{
+			Reporter::warning("Requested monitoring of non-existent directory.");
+			return;
+			}
+
+		local current_files: set[string] = set();
+		local files = result$stdout;
+		for ( i in files )
+			{
+			if ( files[i] !in last_files )
+				callback(build_path_compressed(dir, files[i]));
+			add current_files[files[i]];
+			}
+		schedule polling_interval { Dir::monitor_ev(dir, current_files, callback) };
+		}
+	}
+
+function monitor(dir: string, callback: function(fname: string))
+	{
+	event Dir::monitor_ev(dir, set(), callback);
+	}
+
+
diff --git a/scripts/base/utils/exec.bro b/scripts/base/utils/exec.bro
new file mode 100644
index 0000000000..fe353cf590
--- /dev/null
+++ b/scripts/base/utils/exec.bro
@@ -0,0 +1,207 @@
+##! A module for executing external command line programs.
+##! This requires code that is still in topic branches and 
+##! definitely won't currently work on any released version of Bro.
+
+@load base/frameworks/input
+
+module Exec;
+
+export {
+	type Command: record {
+		## The command line to execute.
+		## Use care to avoid injection attacks!
+		cmd:         string;
+		## Provide standard in to the program as a
+		## string.
+		stdin:       string      &default="";
+		## If additional files are required to be read 
+		## in as part of the output of the command they
+		## can be defined here.
+		read_files:  set[string] &optional;
+	};
+
+	type Result: record {
+		## Exit code from the program.
+		exit_code:    count            &default=0;
+		## Each line of standard out.
+		stdout:       vector of string &optional;
+		## Each line of standard error. 
+		stderr:       vector of string &optional;
+		## If additional files were requested to be read in
+		## the content of the files will be available here.
+		files:        table[string] of string_vec &optional;
+	};
+
+	## Function for running command line programs and getting
+	## output.  This is an asynchronous function which is meant 
+	## to be run with the `when` statement.
+	##
+	## cmd: The command to run.  Use care to avoid injection attacks!
+	##
+	## returns: A record representing the full results from the
+	##          external program execution.
+	global run: function(cmd: Command): Result;
+}
+
+redef record Command += {
+	# The prefix name for tracking temp files.
+	prefix_name: string &optional;
+};
+
+global results: table[string] of Result = table();
+global finished_commands: set[string];
+global tmp_files: set[string] = set();
+
+type OneLine: record { line: string; };
+
+event Exec::stdout_line(description: Input::EventDescription, tpe: Input::Event, s: string)
+	{
+	local name = sub(description$name, /_[^_]*$/, "");
+
+	local result = results[name];
+	if ( ! results[name]?$stdout )
+		result$stdout = vector(s);
+	else
+		result$stdout[|result$stdout|] = s;
+	}
+
+event Exec::stderr_line(description: Input::EventDescription, tpe: Input::Event, s: string)
+	{
+	local name = sub(description$name, /_[^_]*$/, "");
+
+	local result = results[name];
+	if ( ! results[name]?$stderr )
+		result$stderr = vector(s);
+	else
+		result$stderr[|result$stderr|] = s;
+	}
+
+event Exec::file_line(description: Input::EventDescription, tpe: Input::Event, s: string)
+	{
+	local parts = split1(description$name, /_/);
+	local name = parts[1];
+	local track_file = parts[2];
+
+	local result = results[name];
+	if ( ! result?$files )
+		result$files = table();
+	
+	if ( track_file !in result$files )
+		result$files[track_file] = vector(s);
+	else
+		result$files[track_file][|result$files[track_file]|] = s;
+	}
+
+event Exec::cleanup_and_do_callback(name: string)
+	{
+	Input::remove(fmt("%s_stdout", name));
+	system(fmt("rm %s_stdout", name));
+	delete tmp_files[fmt("%s_stdout", name)];
+
+	Input::remove(fmt("%s_stderr", name));
+	system(fmt("rm %s_stderr", name));
+	delete tmp_files[fmt("%s_stderr", name)];
+
+	Input::remove(fmt("%s_done", name));
+	system(fmt("rm %s_done", name));
+	delete tmp_files[fmt("%s_done", name)];
+
+	# Indicate to the "when" async watcher that this command is done.
+	add finished_commands[name];
+	}
+
+event Exec::run_done(description: Input::EventDescription, tpe: Input::Event, s: string)
+	{
+	local name = sub(description$name, /_[^_]*$/, "");
+
+	if ( /^exit_code:/ in s )
+		results[name]$exit_code = to_count(split1(s, /:/)[2]);
+	else if ( s == "done" )
+		# Wait one second to allow all threads to read all of their input
+		# and forward it.
+		schedule 1sec { Exec::cleanup_and_do_callback(name) };
+	}
+
+event Exec::start_watching_files(cmd: Command)
+	{
+	Input::add_event([$source=fmt("%s_done", cmd$prefix_name),
+	                  $name=fmt("%s_done", cmd$prefix_name),
+	                  $reader=Input::READER_RAW,
+	                  $mode=Input::STREAM,
+	                  $want_record=F,
+	                  $fields=OneLine,
+	                  $ev=Exec::run_done]);
+
+	Input::add_event([$source=fmt("%s_stdout", cmd$prefix_name),
+	                  $name=fmt("%s_stdout", cmd$prefix_name),
+	                  $reader=Input::READER_RAW,
+	                  $mode=Input::STREAM,
+	                  $want_record=F,
+	                  $fields=OneLine,
+	                  $ev=Exec::stdout_line]);
+
+	Input::add_event([$source=fmt("%s_stderr", cmd$prefix_name),
+	                  $name=fmt("%s_stderr", cmd$prefix_name),
+	                  $reader=Input::READER_RAW,
+	                  $mode=Input::STREAM,
+	                  $want_record=F,
+	                  $fields=OneLine,
+	                  $ev=Exec::stderr_line]);
+
+	if ( cmd?$read_files )
+		{
+		for ( read_file in cmd$read_files )
+			{
+			Input::add_event([$source=fmt("%s", read_file),
+			                  $name=fmt("%s_%s", cmd$prefix_name, read_file),
+			                  $reader=Input::READER_RAW,
+			                  $mode=Input::STREAM,
+			                  $want_record=F,
+			                  $fields=OneLine,
+			                  $ev=Exec::file_line]);
+			}
+		}
+	}
+
+function run(cmd: Command): Result
+	{
+	cmd$prefix_name = "/tmp/bro-exec-" + unique_id("");
+	system(fmt("touch %s_done %s_stdout %s_stderr 2>/dev/null", cmd$prefix_name, cmd$prefix_name, cmd$prefix_name));
+	add tmp_files[fmt("%s_done", cmd$prefix_name)];
+	add tmp_files[fmt("%s_stdout", cmd$prefix_name)];
+	add tmp_files[fmt("%s_stderr", cmd$prefix_name)];
+
+	if ( cmd?$read_files )
+		{
+		for ( read_file in cmd$read_files )
+			{
+			system(fmt("touch %s 2>/dev/null", read_file));
+			add tmp_files[read_file];
+			}
+		}
+
+	piped_exec(fmt("%s 2>> %s_stderr 1>> %s_stdout; echo \"exit_code:${?}\" >> %s_done; echo \"done\" >> %s_done", 
+	               cmd$cmd, cmd$prefix_name, cmd$prefix_name, cmd$prefix_name, cmd$prefix_name),
+	           cmd$stdin);
+
+	results[cmd$prefix_name] = [];
+
+	schedule 1msec { Exec::start_watching_files(cmd) };
+
+	return when ( cmd$prefix_name in finished_commands )
+		{
+		delete finished_commands[cmd$prefix_name];
+		local result = results[cmd$prefix_name];
+		delete results[cmd$prefix_name];
+		return result;
+		}
+	}
+
+event bro_done()
+	{
+	# We are punting here and just deleting any files that haven't been processed yet.
+	for ( fname in tmp_files )
+		{
+		system(fmt("rm \"%s\"", str_shell_escape(fname)));
+		}
+	}
\ No newline at end of file

From 035b668f7398cd4b803c9ecc455ce58203de666b Mon Sep 17 00:00:00 2001
From: Seth Hall <seth@icir.org>
Date: Mon, 22 Apr 2013 21:52:21 -0400
Subject: [PATCH 002/118] Updates to use new input framework mechanism to
 execute command line programs.

---
 scripts/base/utils/exec.bro | 160 ++++++++++++++----------------------
 1 file changed, 60 insertions(+), 100 deletions(-)

diff --git a/scripts/base/utils/exec.bro b/scripts/base/utils/exec.bro
index fe353cf590..45cd8cb287 100644
--- a/scripts/base/utils/exec.bro
+++ b/scripts/base/utils/exec.bro
@@ -23,6 +23,8 @@ export {
 	type Result: record {
 		## Exit code from the program.
 		exit_code:    count            &default=0;
+		## True if the command was terminated with a signal.
+		signal_exit:  bool             &default=F;
 		## Each line of standard out.
 		stdout:       vector of string &optional;
 		## Each line of standard error. 
@@ -41,39 +43,45 @@ export {
 	## returns: A record representing the full results from the
 	##          external program execution.
 	global run: function(cmd: Command): Result;
+
+	## The system directory for temp files.
+	const tmp_dir = "/tmp" &redef;
 }
 
 redef record Command += {
-	# The prefix name for tracking temp files.
-	prefix_name: string &optional;
+	# The unique id for tracking executors.
+	uid: string &optional;
 };
 
 global results: table[string] of Result = table();
 global finished_commands: set[string];
-global tmp_files: set[string] = set();
+global currently_tracked_files: set[string] = set();
+type OneLine: record { 
+	s: string;
+	is_stderr: bool;
+};
 
-type OneLine: record { line: string; };
+type FileLine: record { 
+	s: string;
+};
 
-event Exec::stdout_line(description: Input::EventDescription, tpe: Input::Event, s: string)
+event Exec::line(description: Input::EventDescription, tpe: Input::Event, s: string, is_stderr: bool)
 	{
-	local name = sub(description$name, /_[^_]*$/, "");
-
-	local result = results[name];
-	if ( ! results[name]?$stdout )
-		result$stdout = vector(s);
+	local result = results[description$name];
+	if ( is_stderr )
+		{
+		if ( ! result?$stderr )
+			result$stderr = vector(s);
+		else
+			result$stderr[|result$stderr|] = s;
+		}
 	else
-		result$stdout[|result$stdout|] = s;
-	}
-
-event Exec::stderr_line(description: Input::EventDescription, tpe: Input::Event, s: string)
-	{
-	local name = sub(description$name, /_[^_]*$/, "");
-
-	local result = results[name];
-	if ( ! results[name]?$stderr )
-		result$stderr = vector(s);
-	else
-		result$stderr[|result$stderr|] = s;
+		{
+		if ( ! result?$stdout )
+			result$stdout = vector(s);
+		else
+			result$stdout[|result$stdout|] = s;
+		}
 	}
 
 event Exec::file_line(description: Input::EventDescription, tpe: Input::Event, s: string)
@@ -92,107 +100,59 @@ event Exec::file_line(description: Input::EventDescription, tpe: Input::Event, s
 		result$files[track_file][|result$files[track_file]|] = s;
 	}
 
-event Exec::cleanup_and_do_callback(name: string)
+event InputRaw::process_finished(name: string, source:string, exit_code:count, signal_exit:bool)
 	{
-	Input::remove(fmt("%s_stdout", name));
-	system(fmt("rm %s_stdout", name));
-	delete tmp_files[fmt("%s_stdout", name)];
-
-	Input::remove(fmt("%s_stderr", name));
-	system(fmt("rm %s_stderr", name));
-	delete tmp_files[fmt("%s_stderr", name)];
-
-	Input::remove(fmt("%s_done", name));
-	system(fmt("rm %s_done", name));
-	delete tmp_files[fmt("%s_done", name)];
+	results[name]$exit_code = exit_code;
+	results[name]$signal_exit = signal_exit;
 
+	Input::remove(name);
 	# Indicate to the "when" async watcher that this command is done.
 	add finished_commands[name];
 	}
 
-event Exec::run_done(description: Input::EventDescription, tpe: Input::Event, s: string)
+event Exec::start_watching_file(uid: string, read_file: string)
 	{
-	local name = sub(description$name, /_[^_]*$/, "");
-
-	if ( /^exit_code:/ in s )
-		results[name]$exit_code = to_count(split1(s, /:/)[2]);
-	else if ( s == "done" )
-		# Wait one second to allow all threads to read all of their input
-		# and forward it.
-		schedule 1sec { Exec::cleanup_and_do_callback(name) };
-	}
-
-event Exec::start_watching_files(cmd: Command)
-	{
-	Input::add_event([$source=fmt("%s_done", cmd$prefix_name),
-	                  $name=fmt("%s_done", cmd$prefix_name),
+	Input::add_event([$source=fmt("%s", read_file),
+	                  $name=fmt("%s_%s", uid, read_file),
 	                  $reader=Input::READER_RAW,
 	                  $mode=Input::STREAM,
 	                  $want_record=F,
-	                  $fields=OneLine,
-	                  $ev=Exec::run_done]);
-
-	Input::add_event([$source=fmt("%s_stdout", cmd$prefix_name),
-	                  $name=fmt("%s_stdout", cmd$prefix_name),
-	                  $reader=Input::READER_RAW,
-	                  $mode=Input::STREAM,
-	                  $want_record=F,
-	                  $fields=OneLine,
-	                  $ev=Exec::stdout_line]);
-
-	Input::add_event([$source=fmt("%s_stderr", cmd$prefix_name),
-	                  $name=fmt("%s_stderr", cmd$prefix_name),
-	                  $reader=Input::READER_RAW,
-	                  $mode=Input::STREAM,
-	                  $want_record=F,
-	                  $fields=OneLine,
-	                  $ev=Exec::stderr_line]);
-
-	if ( cmd?$read_files )
-		{
-		for ( read_file in cmd$read_files )
-			{
-			Input::add_event([$source=fmt("%s", read_file),
-			                  $name=fmt("%s_%s", cmd$prefix_name, read_file),
-			                  $reader=Input::READER_RAW,
-			                  $mode=Input::STREAM,
-			                  $want_record=F,
-			                  $fields=OneLine,
-			                  $ev=Exec::file_line]);
-			}
-		}
+	                  $fields=FileLine,
+	                  $ev=Exec::file_line]);
 	}
 
 function run(cmd: Command): Result
 	{
-	cmd$prefix_name = "/tmp/bro-exec-" + unique_id("");
-	system(fmt("touch %s_done %s_stdout %s_stderr 2>/dev/null", cmd$prefix_name, cmd$prefix_name, cmd$prefix_name));
-	add tmp_files[fmt("%s_done", cmd$prefix_name)];
-	add tmp_files[fmt("%s_stdout", cmd$prefix_name)];
-	add tmp_files[fmt("%s_stderr", cmd$prefix_name)];
+	cmd$uid = unique_id("");
+	results[cmd$uid] = [];
 
 	if ( cmd?$read_files )
 		{
 		for ( read_file in cmd$read_files )
 			{
-			system(fmt("touch %s 2>/dev/null", read_file));
-			add tmp_files[read_file];
+			add currently_tracked_files[read_file];
+			system(fmt("touch \"%s\" 2>/dev/null", str_shell_escape(read_file)));
+			schedule 1msec { Exec::start_watching_file(cmd$uid, read_file) };
 			}
 		}
 
-	piped_exec(fmt("%s 2>> %s_stderr 1>> %s_stdout; echo \"exit_code:${?}\" >> %s_done; echo \"done\" >> %s_done", 
-	               cmd$cmd, cmd$prefix_name, cmd$prefix_name, cmd$prefix_name, cmd$prefix_name),
-	           cmd$stdin);
+	local config_strings: table[string] of string = { 
+		["stdin"]       = cmd$stdin,
+		["read_stderr"] = "1",
+	};
+	Input::add_event([$name=cmd$uid, 
+	                  $source=fmt("%s |", cmd$cmd), 
+	                  $reader=Input::READER_RAW, 
+	                  $fields=Exec::OneLine, 
+	                  $ev=Exec::line, 
+	                  $want_record=F, 
+	                  $config=config_strings]);
 
-	results[cmd$prefix_name] = [];
-
-	schedule 1msec { Exec::start_watching_files(cmd) };
-
-	return when ( cmd$prefix_name in finished_commands )
+	return when ( cmd$uid in finished_commands )
 		{
-		delete finished_commands[cmd$prefix_name];
-		local result = results[cmd$prefix_name];
-		delete results[cmd$prefix_name];
+		delete finished_commands[cmd$uid];
+		local result = results[cmd$uid];
+		delete results[cmd$uid];
 		return result;
 		}
 	}
@@ -200,7 +160,7 @@ function run(cmd: Command): Result
 event bro_done()
 	{
 	# We are punting here and just deleting any files that haven't been processed yet.
-	for ( fname in tmp_files )
+	for ( fname in currently_tracked_files )
 		{
 		system(fmt("rm \"%s\"", str_shell_escape(fname)));
 		}

From 08348b2bc29f0d4661fbe61be355716a3ee51a25 Mon Sep 17 00:00:00 2001
From: Seth Hall <seth@icir.org>
Date: Mon, 22 Apr 2013 21:53:00 -0400
Subject: [PATCH 003/118] Update to make Dir::monitor watch inodes instead of
 file names.

---
 scripts/base/utils/dir.bro | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/scripts/base/utils/dir.bro b/scripts/base/utils/dir.bro
index 2ed1c8e6e9..b154fe000e 100644
--- a/scripts/base/utils/dir.bro
+++ b/scripts/base/utils/dir.bro
@@ -23,11 +23,11 @@ export {
 
 event Dir::monitor_ev(dir: string, last_files: set[string], callback: function(fname: string))
 	{
-	when ( local result = Exec::run([$cmd=fmt("ls \"%s\"", str_shell_escape(dir))]) )
+	when ( local result = Exec::run([$cmd=fmt("ls -i \"%s/\"", str_shell_escape(dir))]) )
 		{
 		if ( result$exit_code != 0 )
 			{
-			Reporter::warning("Requested monitoring of non-existent directory.");
+			Reporter::warning(fmt("Requested monitoring of non-existent directory (%s).", dir));
 			return;
 			}
 
@@ -35,9 +35,10 @@ event Dir::monitor_ev(dir: string, last_files: set[string], callback: function(f
 		local files = result$stdout;
 		for ( i in files )
 			{
-			if ( files[i] !in last_files )
-				callback(build_path_compressed(dir, files[i]));
-			add current_files[files[i]];
+			local parts = split1(files[i], / /);
+			if ( parts[1] !in last_files )
+				callback(build_path_compressed(dir, parts[2]));
+			add current_files[parts[1]];
 			}
 		schedule polling_interval { Dir::monitor_ev(dir, current_files, callback) };
 		}

From 4d275522c7a87f8c69b1494126cc995a20b2d66b Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Thu, 23 May 2013 16:03:26 -0700
Subject: [PATCH 004/118] Add abstraction for vector of bits.

A bitvector is a vector of bits with underlying block storage. Since C++ has no
notion of lvalues in the context of bits, we use a small wrapper class
Reference that masks the desired bit in the corresponding block.
---
 src/BitVector.cc   | 455 +++++++++++++++++++++++++++++++++++++++++++++
 src/BitVector.h    | 324 ++++++++++++++++++++++++++++++++
 src/CMakeLists.txt |   1 +
 3 files changed, 780 insertions(+)
 create mode 100644 src/BitVector.cc
 create mode 100644 src/BitVector.h

diff --git a/src/BitVector.cc b/src/BitVector.cc
new file mode 100644
index 0000000000..2f714a6c79
--- /dev/null
+++ b/src/BitVector.cc
@@ -0,0 +1,455 @@
+#include "BitVector.h"
+
+#include <cassert>
+#include <limits>
+
+BitVector::size_type BitVector::npos = static_cast<BitVector::size_type>(-1);
+BitVector::block_type BitVector::bits_per_block =
+  std::numeric_limits<BitVector::block_type>::digits;
+
+namespace {
+
+uint8_t count_table[] = {
+  0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2,
+  3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3,
+  3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3,
+  4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4,
+  3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5,
+  6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4,
+  4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5,
+  6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5,
+  3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3,
+  4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6,
+  6, 7, 6, 7, 7, 8
+};
+
+} // namespace <anonymous>
+
+BitVector::Reference::Reference(block_type& block, block_type i)
+  : block_(block),
+    mask_(block_type(1) << i)
+  {
+  assert(i < bits_per_block);
+  }
+
+BitVector::Reference& BitVector::Reference::flip()
+  {
+  block_ ^= mask_;
+  return *this;
+  }
+
+BitVector::Reference::operator bool() const
+  {
+  return (block_ & mask_) != 0;
+  }
+
+bool BitVector::Reference::operator~() const
+  {
+  return (block_ & mask_) == 0;
+  }
+
+BitVector::Reference& BitVector::Reference::operator=(bool x)
+  {
+  x ? block_ |= mask_ : block_ &= ~mask_;
+  return *this;
+  }
+
+BitVector::Reference& BitVector::Reference::operator=(Reference const& other)
+  {
+  other ? block_ |= mask_ : block_ &= ~mask_;
+  return *this;
+  }
+
+BitVector::Reference& BitVector::Reference::operator|=(bool x)
+  {
+  if (x) 
+    block_ |= mask_;
+  return *this;
+  }
+
+BitVector::Reference& BitVector::Reference::operator&=(bool x)
+  {
+  if (! x)
+    block_ &= ~mask_;
+  return *this;
+  }
+    
+BitVector::Reference& BitVector::Reference::operator^=(bool x)
+  {
+  if (x)
+    block_ ^= mask_;
+  return *this;
+  }
+
+BitVector::Reference& BitVector::Reference::operator-=(bool x)
+  {
+  if (x)
+    block_ &= ~mask_;
+  return *this;
+  }
+
+
+BitVector::BitVector() : num_bits_(0) { }
+
+BitVector::BitVector(size_type size, bool value)
+  : bits_(bits_to_blocks(size), value ? ~block_type(0) : 0),
+    num_bits_(size)
+{ }
+
+BitVector::BitVector(BitVector const& other)
+  : bits_(other.bits_),
+    num_bits_(other.num_bits_)
+{ }
+
+BitVector BitVector::operator~() const
+  {
+  BitVector b(*this);
+  b.flip();
+  return b;
+  }
+
+BitVector& BitVector::operator=(BitVector const& other)
+  {
+  bits_ = other.bits_;
+  return *this;
+  }
+
+BitVector BitVector::operator<<(size_type n) const
+  {
+  BitVector b(*this);
+  return b <<= n;
+  }
+
+BitVector BitVector::operator>>(size_type n) const
+  {
+  BitVector b(*this);
+  return b >>= n;
+  }
+
+BitVector& BitVector::operator<<=(size_type n)
+  {
+  if (n >= num_bits_)
+    return reset();
+
+  if (n > 0)
+    {
+    size_type last = blocks() - 1;
+    size_type div = n / bits_per_block;
+    block_type r = bit_index(n);
+    block_type* b = &bits_[0];
+    assert(blocks() >= 1);
+    assert(div <= last);
+
+    if (r != 0)
+      {
+      for (size_type i = last - div; i > 0; --i)
+        b[i + div] = (b[i] << r) | (b[i - 1] >> (bits_per_block - r));
+      b[div] = b[0] << r;
+      }
+    else
+      {
+      for (size_type i = last-div; i > 0; --i)
+        b[i + div] = b[i];
+      b[div] = b[0];
+      }
+
+    std::fill_n(b, div, block_type(0));
+    zero_unused_bits();
+    }
+
+  return *this;
+  }
+
+BitVector& BitVector::operator>>=(size_type n)
+  {
+  if (n >= num_bits_)
+      return reset();
+
+  if (n > 0)
+    {
+    size_type last = blocks() - 1;
+    size_type div = n / bits_per_block;
+    block_type r = bit_index(n);
+    block_type* b = &bits_[0];
+    assert(blocks() >= 1);
+    assert(div <= last);
+
+    if (r != 0)
+      {
+      for (size_type i = last - div; i > 0; --i)
+        b[i - div] = (b[i] >> r) | (b[i + 1] << (bits_per_block - r));
+      b[last - div] = b[last] >> r;
+      }
+    else
+      {
+      for (size_type i = div; i <= last; ++i)
+        b[i-div] = b[i];
+      }
+
+    std::fill_n(b + (blocks() - div), div, block_type(0));
+    }
+  return *this;
+  }
+
+BitVector& BitVector::operator&=(BitVector const& other)
+  {
+  assert(size() >= other.size());
+  for (size_type i = 0; i < blocks(); ++i)
+    bits_[i] &= other.bits_[i];
+  return *this;
+  }
+
+BitVector& BitVector::operator|=(BitVector const& other)
+  {
+  assert(size() >= other.size());
+  for (size_type i = 0; i < blocks(); ++i)
+    bits_[i] |= other.bits_[i];
+  return *this;
+  }
+
+BitVector& BitVector::operator^=(BitVector const& other)
+  {
+  assert(size() >= other.size());
+  for (size_type i = 0; i < blocks(); ++i)
+    bits_[i] ^= other.bits_[i];
+  return *this;
+  }
+
+BitVector& BitVector::operator-=(BitVector const& other)
+  {
+  assert(size() >= other.size());
+  for (size_type i = 0; i < blocks(); ++i)
+    bits_[i] &= ~other.bits_[i];
+  return *this;
+  }
+
+BitVector operator&(BitVector const& x, BitVector const& y)
+  {
+  BitVector b(x);
+  return b &= y;
+  }
+
+BitVector operator|(BitVector const& x, BitVector const& y)
+  {
+  BitVector b(x);
+  return b |= y;
+  }
+
+BitVector operator^(BitVector const& x, BitVector const& y)
+  {
+  BitVector b(x);
+  return b ^= y;
+  }
+
+BitVector operator-(BitVector const& x, BitVector const& y)
+  {
+  BitVector b(x);
+  return b -= y;
+  }
+
+bool operator==(BitVector const& x, BitVector const& y)
+  {
+  return x.num_bits_ == y.num_bits_ && x.bits_ == y.bits_;
+  }
+
+bool operator!=(BitVector const& x, BitVector const& y)
+  {
+  return ! (x == y);
+  }
+
+bool operator<(BitVector const& x, BitVector const& y)
+  {
+  assert(x.size() == y.size());
+  for (BitVector::size_type r = x.blocks(); r > 0; --r)
+    {
+    BitVector::size_type i = r - 1;
+    if (x.bits_[i] < y.bits_[i])
+      return true;
+    else if (x.bits_[i] > y.bits_[i])
+      return false;
+    }
+  return false;
+  }
+
+void BitVector::resize(size_type n, bool value)
+  {
+  size_type old = blocks();
+  size_type required = bits_to_blocks(n);
+  block_type block_value = value ? ~block_type(0) : block_type(0);
+
+  if (required != old)
+    bits_.resize(required, block_value);
+
+  if (value && (n > num_bits_) && extra_bits())
+    bits_[old - 1] |= (block_value << extra_bits());
+
+  num_bits_ = n;
+  zero_unused_bits();
+  }
+
+void BitVector::clear()
+  {
+  bits_.clear();
+  num_bits_ = 0;
+  }
+
+void BitVector::push_back(bool bit)
+  {
+  size_type s = size();
+  resize(s + 1);
+  set(s, bit);
+  }
+
+void BitVector::append(block_type block)
+  {
+  size_type excess = extra_bits();
+  if (excess)
+    {
+    assert(! bits_.empty());
+    bits_.push_back(block >> (bits_per_block - excess));
+    bits_[bits_.size() - 2] |= (block << excess);
+    }
+  else
+    {
+    bits_.push_back(block);
+    }
+  num_bits_ += bits_per_block;
+  }
+
+BitVector& BitVector::set(size_type i, bool bit)
+  {
+  assert(i < num_bits_);
+
+  if (bit)
+      bits_[block_index(i)] |= bit_mask(i);
+  else
+      reset(i);
+
+  return *this;
+  }
+
+BitVector& BitVector::set()
+  {
+  std::fill(bits_.begin(), bits_.end(), ~block_type(0));
+  zero_unused_bits();
+  return *this;
+  }
+
+BitVector& BitVector::reset(size_type i)
+  {
+  assert(i < num_bits_);
+  bits_[block_index(i)] &= ~bit_mask(i);
+  return *this;
+  }
+
+BitVector& BitVector::reset()
+  {
+  std::fill(bits_.begin(), bits_.end(), block_type(0));
+  return *this;
+  }
+
+BitVector& BitVector::flip(size_type i)
+  {
+  assert(i < num_bits_);
+  bits_[block_index(i)] ^= bit_mask(i);
+  return *this;
+  }
+
+BitVector& BitVector::flip()
+  {
+  for (size_type i = 0; i < blocks(); ++i)
+      bits_[i] = ~bits_[i];
+  zero_unused_bits();
+  return *this;
+  }
+
+bool BitVector::operator[](size_type i) const
+  {
+  assert(i < num_bits_);
+  return (bits_[block_index(i)] & bit_mask(i)) != 0;
+  }
+
+BitVector::Reference BitVector::operator[](size_type i)
+  {
+  assert(i < num_bits_);
+  return Reference(bits_[block_index(i)], bit_index(i));
+  }
+
+BitVector::size_type BitVector::count() const
+  {
+  std::vector<block_type>::const_iterator first = bits_.begin();
+  size_t n = 0;
+  size_type length = blocks();
+  while (length)
+    {
+    block_type block = *first;
+    while (block)
+      {
+      // TODO: use __popcnt if available.
+      n += count_table[block & ((1u << 8) - 1)];
+      block >>= 8;
+      }
+    ++first;
+    --length;
+    }
+  return n;
+  }
+
+BitVector::size_type BitVector::blocks() const
+  {
+  return bits_.size();
+  }
+
+BitVector::size_type BitVector::size() const
+  {
+  return num_bits_;
+  }
+
+bool BitVector::empty() const
+  {
+  return bits_.empty();
+  }
+
+BitVector::size_type BitVector::find_first() const
+  {
+  return find_from(0);
+  }
+
+BitVector::size_type BitVector::find_next(size_type i) const
+  {
+  if (i >= (size() - 1) || size() == 0)
+    return npos;
+  ++i;
+  size_type bi = block_index(i);
+  block_type block = bits_[bi] & (~block_type(0) << bit_index(i));
+  return block ? bi * bits_per_block + lowest_bit(block) : find_from(bi + 1);
+  }
+
+BitVector::size_type BitVector::lowest_bit(block_type block)
+  {
+  block_type x = block - (block & (block - 1));
+  size_type log = 0;
+  while (x >>= 1)
+    ++log;
+  return log;
+  }
+
+BitVector::block_type BitVector::extra_bits() const
+  {
+  return bit_index(size());
+  }
+
+void BitVector::zero_unused_bits()
+  {
+  if (extra_bits())
+    bits_.back() &= ~(~block_type(0) << extra_bits());
+  }
+
+BitVector::size_type BitVector::find_from(size_type i) const
+  {
+  while (i < blocks() && bits_[i] == 0)
+    ++i;
+  if (i >= blocks())
+    return npos;
+  return i * bits_per_block + lowest_bit(bits_[i]);
+  }
diff --git a/src/BitVector.h b/src/BitVector.h
new file mode 100644
index 0000000000..46d7e2df8f
--- /dev/null
+++ b/src/BitVector.h
@@ -0,0 +1,324 @@
+#ifndef BitVector_h
+#define BitVector_h
+
+#include <iterator>
+#include <vector>
+
+/**
+ * A vector of bits.
+ */
+class BitVector {
+public:
+  typedef size_t block_type;
+  typedef size_t size_type;
+  static size_type npos;
+  static block_type bits_per_block;
+
+public:
+  /**
+   * An lvalue proxy for single bits.
+   */
+  class Reference {
+    friend class BitVector;
+    Reference(block_type& block, block_type i);
+
+  public:
+    Reference& flip();
+    operator bool() const;
+    bool operator~() const;
+    Reference& operator=(bool x);
+    Reference& operator=(Reference const& other);
+    Reference& operator|=(bool x);
+    Reference& operator&=(bool x);
+    Reference& operator^=(bool x);
+    Reference& operator-=(bool x);
+
+  private:
+    void operator&();
+    block_type& block_;
+    block_type const mask_;
+  };
+
+  typedef bool const_reference;
+
+  /**
+   * Constructs an empty bit vector.
+   */
+  BitVector();
+
+  /**
+   * Constructs a bit vector of a given size.
+   * @param size The number of bits.
+   * @param value The value for each bit.
+   */
+  explicit BitVector(size_type size, bool value = false);
+
+  /**
+   * Constructs a bit vector from a sequence of blocks.
+   */
+  template <typename InputIterator>
+  BitVector(InputIterator first, InputIterator last)
+    {
+    bits_.insert(bits_.end(), first, last);
+    num_bits_ = bits_.size() * bits_per_block;
+    }
+
+  /**
+   * Copy-constructs a bit vector.
+   * @param other The bit vector to copy.
+   */
+  BitVector(const BitVector& other);
+
+  /**
+   * Assigns another bit vector to this instance.
+   * @param other The RHS of the assignment.
+   */
+  BitVector& operator=(const BitVector& other);
+
+  //
+  // Bitwise operations
+  //
+  BitVector operator~() const;
+  BitVector operator<<(size_type n) const;
+  BitVector operator>>(size_type n) const;
+  BitVector& operator<<=(size_type n);
+  BitVector& operator>>=(size_type n);
+  BitVector& operator&=(BitVector const& other);
+  BitVector& operator|=(BitVector const& other);
+  BitVector& operator^=(BitVector const& other);
+  BitVector& operator-=(BitVector const& other);
+  friend BitVector operator&(BitVector const& x, BitVector const& y);
+  friend BitVector operator|(BitVector const& x, BitVector const& y);
+  friend BitVector operator^(BitVector const& x, BitVector const& y);
+  friend BitVector operator-(BitVector const& x, BitVector const& y);
+
+  //
+  // Relational operators
+  //
+  friend bool operator==(BitVector const& x, BitVector const& y);
+  friend bool operator!=(BitVector const& x, BitVector const& y);
+  friend bool operator<(BitVector const& x, BitVector const& y);
+
+  //
+  // Basic operations
+  //
+  /** Appends the bits in a sequence of values.
+   * @tparam Iterator A forward iterator.
+   * @param first An iterator pointing to the first element of the sequence.
+   * @param last An iterator pointing to one past the last element of the
+   * sequence.
+   */
+  template <typename ForwardIterator>
+  void append(ForwardIterator first, ForwardIterator last)
+    {
+    if (first == last)
+      return;
+
+    block_type excess = extra_bits();
+    typename std::iterator_traits<ForwardIterator>::difference_type delta =
+      std::distance(first, last);
+
+    bits_.reserve(blocks() + delta);
+    if (excess == 0)
+      {
+      bits_.back() |= (*first << excess);
+      do
+        {
+        block_type b = *first++ >> (bits_per_block - excess);
+        bits_.push_back(b | (first == last ? 0 : *first << excess));
+        } while (first != last);
+      }
+    else
+      {
+      bits_.insert(bits_.end(), first, last);
+      }
+    num_bits_ += bits_per_block * delta;
+    }
+
+  /**
+   * Appends the bits in a given block.
+   * @param block The block containing bits to append.
+   */
+  void append(block_type block);
+
+  /** Appends a single bit to the end of the bit vector.
+   * @param bit The value of the bit.
+   */
+  void push_back(bool bit);
+
+  /**
+   * Clears all bits in the bitvector.
+   */
+  void clear();
+
+  /**
+   * Resizes the bit vector to a new number of bits.
+   * @param n The new number of bits of the bit vector.
+   * @param value The bit value of new values, if the vector expands.
+   */
+  void resize(size_type n, bool value = false);
+
+  /**
+   * Sets a bit at a specific position to a given value.
+   * @param i The bit position.
+   * @param bit The value assigned to position *i*.
+   * @return A reference to the bit vector instance.
+   */
+  BitVector& set(size_type i, bool bit = true);
+
+  /**
+   * Sets all bits to 1.
+   * @return A reference to the bit vector instance.
+   */
+  BitVector& set();
+
+  /**
+   * Resets a bit at a specific position, i.e., sets it to 0.
+   * @param i The bit position.
+   * @return A reference to the bit vector instance.
+   */
+  BitVector& reset(size_type i);
+
+  /**
+   * Sets all bits to 0.
+   * @return A reference to the bit vector instance.
+   */
+  BitVector& reset();
+
+  /**
+   * Toggles/flips a bit at a specific position.
+   * @param i The bit position.
+   * @return A reference to the bit vector instance.
+   */
+  BitVector& flip(size_type i);
+
+  /**
+   * Computes the complement.
+   * @return A reference to the bit vector instance.
+   */
+  BitVector& flip();
+
+  /** Retrieves a single bit.
+   * @param i The bit position.
+   * @return A mutable reference to the bit at position *i*.
+   */
+  Reference operator[](size_type i);
+
+  /**
+   * Retrieves a single bit.
+   * @param i The bit position.
+   * @return A const-reference to the bit at position *i*.
+   */
+  const_reference operator[](size_type i) const;
+
+  /**
+   * Counts the number of 1-bits in the bit vector. Also known as *population
+   * count* or *Hamming weight*.
+   * @return The number of bits set to 1.
+   */
+  size_type count() const;
+
+  /**
+   * Retrieves the number of blocks of the underlying storage.
+   * @param The number of blocks that represent `size()` bits.
+   */
+  size_type blocks() const;
+
+  /**
+   * Retrieves the number of bits the bitvector consist of.
+   * @return The length of the bit vector in bits.
+   */
+  size_type size() const;
+
+  /**
+   * Checks whether the bit vector is empty.
+   * @return `true` iff the bitvector has zero length.
+   */
+  bool empty() const;
+
+  /**
+   * Finds the bit position of of the first 1-bit.
+   * @return The position of the first bit that equals to one or `npos` if no
+   * such bit exists.
+   */
+  size_type find_first() const;
+
+  /**
+   * Finds the next 1-bit from a given starting position.
+   *
+   * @param i The index where to start looking.
+   *
+   * @return The position of the first bit that equals to 1 after position
+   * *i*  or `npos` if no such bit exists.
+   */
+  size_type find_next(size_type i) const;
+
+private:
+  /**
+   * Computes the block index for a given bit position.
+   */
+  static size_type block_index(size_type i)
+    {
+    return i / bits_per_block;
+    }
+
+  /**
+   * Computes the bit index within a given block for a given bit position.
+   */
+  static block_type bit_index(size_type i)
+    {
+    return i % bits_per_block;
+    }
+
+  /**
+   * Computes the bitmask block to extract a bit a given bit position.
+   */
+  static block_type bit_mask(size_type i)
+    {
+    return block_type(1) << bit_index(i);
+    }
+
+  /**
+   * Computes the number of blocks needed to represent a given number of
+   * bits.
+   * @param bits the number of bits.
+   * @return The number of blocks to represent *bits* number of bits.
+   */
+  static size_type bits_to_blocks(size_type bits)
+    {
+    return bits / bits_per_block 
+      + static_cast<size_type>(bits % bits_per_block != 0);
+    }
+
+  /**
+   * Computes the bit position first 1-bit in a given block.
+   * @param block The block to inspect.
+   * @return The bit position where *block* has its first bit set to 1.
+   */
+  static size_type lowest_bit(block_type block);
+
+  /**
+   * Computes the number of excess/unused bits in the bit vector.
+   */
+  block_type extra_bits() const;
+
+  /**
+   * If the number of bits in the vector are not not a multiple of
+   * bitvector::bits_per_block, then the last block exhibits unused bits which
+   * this function resets.
+   */
+  void zero_unused_bits();
+
+  /**
+   * Looks for the first 1-bit starting at a given position.
+   * @param i The block index to start looking.
+   * @return The block index of the first 1-bit starting from *i* or
+   * `bitvector::npos` if no 1-bit exists.
+   */
+  size_type find_from(size_type i) const;
+
+  std::vector<block_type> bits_;
+  size_type num_bits_;
+};
+
+#endif
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 447b7d9ec7..33aaab29c1 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -303,6 +303,7 @@ set(bro_SRCS
     Base64.cc
     BitTorrent.cc
     BitTorrentTracker.cc
+    BitVector.cc
     BPF_Program.cc
     BroDoc.cc
     BroDocObj.cc

From 9e32eaad6db992e60a3d669c4d8c7b5016cc8cbc Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Tue, 28 May 2013 20:58:01 -0700
Subject: [PATCH 005/118] Make bitvectors serializable.

---
 src/BitVector.cc  | 57 +++++++++++++++++++++++++++++++++++++++++++++--
 src/BitVector.h   | 13 ++++++++---
 src/SerialTypes.h |  2 ++
 3 files changed, 67 insertions(+), 5 deletions(-)

diff --git a/src/BitVector.cc b/src/BitVector.cc
index 2f714a6c79..f57301d506 100644
--- a/src/BitVector.cc
+++ b/src/BitVector.cc
@@ -2,6 +2,7 @@
 
 #include <cassert>
 #include <limits>
+#include "Serializer.h"
 
 BitVector::size_type BitVector::npos = static_cast<BitVector::size_type>(-1);
 BitVector::block_type BitVector::bits_per_block =
@@ -62,7 +63,7 @@ BitVector::Reference& BitVector::Reference::operator=(Reference const& other)
 
 BitVector::Reference& BitVector::Reference::operator|=(bool x)
   {
-  if (x) 
+  if (x)
     block_ |= mask_;
   return *this;
   }
@@ -73,7 +74,7 @@ BitVector::Reference& BitVector::Reference::operator&=(bool x)
     block_ &= ~mask_;
   return *this;
   }
-    
+
 BitVector::Reference& BitVector::Reference::operator^=(bool x)
   {
   if (x)
@@ -453,3 +454,55 @@ BitVector::size_type BitVector::find_from(size_type i) const
     return npos;
   return i * bits_per_block + lowest_bit(bits_[i]);
   }
+
+bool BitVector::Serialize(SerialInfo* info) const
+  {
+  return SerialObj::Serialize(info);
+  }
+
+BitVector* BitVector::Unserialize(UnserialInfo* info)
+  {
+  return reinterpret_cast<BitVector*>(
+      SerialObj::Unserialize(info, SER_BITVECTOR));
+  }
+
+IMPLEMENT_SERIAL(BitVector, SER_BITVECTOR);
+
+bool BitVector::DoSerialize(SerialInfo* info) const
+  {
+  DO_SERIALIZE(SER_BITVECTOR, SerialObj);
+
+  if ( ! SERIALIZE(static_cast<uint64>(bits_.size())) )
+    return false;
+
+  for (size_t i = 0; i < bits_.size(); ++i)
+    if ( ! SERIALIZE(static_cast<uint64>(bits_[i])) )
+      return false;
+
+  return SERIALIZE(static_cast<uint64>(num_bits_));
+  }
+
+bool BitVector::DoUnserialize(UnserialInfo* info)
+  {
+  DO_UNSERIALIZE(SerialObj);
+
+  uint64 size;
+  if ( ! UNSERIALIZE(&size) )
+    return false;
+
+  bits_.resize(static_cast<size_t>(size));
+  uint64 block;
+  for ( size_t i = 0; i < bits_.size(); ++i )
+    {
+    if ( ! UNSERIALIZE(&block) )
+      return false;
+    bits_[i] = static_cast<block_type>(block);
+    }
+
+  uint64 num_bits;
+  if ( ! UNSERIALIZE(&num_bits) )
+    return false;
+  num_bits_ = static_cast<size_type>(num_bits);
+
+  return true;
+  }
diff --git a/src/BitVector.h b/src/BitVector.h
index 46d7e2df8f..9900dd103e 100644
--- a/src/BitVector.h
+++ b/src/BitVector.h
@@ -3,11 +3,12 @@
 
 #include <iterator>
 #include <vector>
+#include "SerialObj.h"
 
 /**
  * A vector of bits.
  */
-class BitVector {
+class BitVector : SerialObj {
 public:
   typedef size_t block_type;
   typedef size_t size_type;
@@ -42,7 +43,7 @@ public:
   typedef bool const_reference;
 
   /**
-   * Constructs an empty bit vector.
+   * Default-constructs an empty bit vector.
    */
   BitVector();
 
@@ -253,6 +254,12 @@ public:
    */
   size_type find_next(size_type i) const;
 
+  bool Serialize(SerialInfo* info) const;
+  static BitVector* Unserialize(UnserialInfo* info);
+
+protected:
+  DECLARE_SERIAL(BitVector);
+
 private:
   /**
    * Computes the block index for a given bit position.
@@ -286,7 +293,7 @@ private:
    */
   static size_type bits_to_blocks(size_type bits)
     {
-    return bits / bits_per_block 
+    return bits / bits_per_block
       + static_cast<size_type>(bits % bits_per_block != 0);
     }
 
diff --git a/src/SerialTypes.h b/src/SerialTypes.h
index 723badab1e..c9c0c34a33 100644
--- a/src/SerialTypes.h
+++ b/src/SerialTypes.h
@@ -49,6 +49,7 @@ SERIAL_IS(STATE_ACCESS, 0x1100)
 SERIAL_IS_BO(CASE, 0x1200)
 SERIAL_IS(LOCATION, 0x1300)
 SERIAL_IS(RE_MATCHER, 0x1400)
+SERIAL_IS(BITVECTOR, 0x1500)
 
 // These are the externally visible types.
 const SerialType SER_NONE = 0;
@@ -202,5 +203,6 @@ SERIAL_CONST2(STATE_ACCESS)
 SERIAL_CONST2(CASE)
 SERIAL_CONST2(LOCATION)
 SERIAL_CONST2(RE_MATCHER)
+SERIAL_CONST2(BITVECTOR)
 
 #endif

From d873db03cef3bb09d45e789d69607487e36b6093 Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Fri, 31 May 2013 18:31:14 -0700
Subject: [PATCH 006/118] Add draft of Bloom filter type hierarchy.

---
 src/BloomFilter.h  | 266 +++++++++++++++++++++++++++++++++++++++++++++
 src/CMakeLists.txt |   1 +
 2 files changed, 267 insertions(+)
 create mode 100644 src/BloomFilter.h

diff --git a/src/BloomFilter.h b/src/BloomFilter.h
new file mode 100644
index 0000000000..a767c6b8b8
--- /dev/null
+++ b/src/BloomFilter.h
@@ -0,0 +1,266 @@
+#ifndef BloomFilter_h
+#define BloomFilter_h
+
+#include <vector>
+#include "BitVector.h"
+#include "Hash.h"
+#include "H3.h"
+
+/**
+ * A vector of counters, each of which have a fixed number of bits.
+ */
+class CounterVector : SerialObj {
+public:
+  /**
+   * Constructs a counter vector having cells of a given width.
+   *
+   * @param width The number of bits that each cell occupies.
+   */
+  explicit CounterVector(unsigned width);
+
+  /**
+   * Increments a given cell.
+   *
+   * @param cell The cell to increment.
+   *
+   * @param value The value to add to the current counter in *cell*.
+   *
+   * @return `true` if adding *value* to the counter in *cell* succeeded.
+   */
+  bool Increment(size_type cell, count_type value);
+
+  /**
+   * Decrements a given cell.
+   *
+   * @param cell The cell to decrement.
+   *
+   * @param value The value to subtract from the current counter in *cell*.
+   *
+   * @return `true` if subtracting *value* from the counter in *cell* succeeded.
+   */
+  bool Decrement(size_type cell, count_type value);
+
+  /**
+   * Retrieves the counter of a given cell.
+   *
+   * @param cell The cell index to retrieve the count for.
+   *
+   * @return The counter associated with *cell*.
+   */
+  count_type Count(size_type cell) const;
+
+  /**
+   * Retrieves the number of cells in the storage.
+   *
+   * @return The number of cells.
+   */
+  size_type Size() const;
+
+  bool Serialize(SerialInfo* info) const;
+  static CounterVector* Unserialize(UnserialInfo* info);
+
+protected:
+  DECLARE_SERIAL(CounterVector);
+
+  CounterVector();
+
+private:
+  BitVector bits_;
+  unsigned width_;
+};
+
+/**
+ * The abstract base class for hash policies.
+ * @tparam Codomain An integral type.
+ */
+class HashPolicy {
+public:
+  typedef hash_t hash_type;
+  virtual ~HashPolicy() { }
+  size_t k() const { return k; }
+  virtual std::vector<hash_type> Hash(const void* x, size_t n) const = 0;
+protected:
+  /**
+   * A functor that computes a universal hash function.
+   * @tparam Codomain An integral type.
+   */
+  template <typename Codomain = hash_type>
+  class Hasher {
+  public:
+    template <typename Domain>
+    Codomain operator()(const Domain& x) const
+      {
+      return h3_(&x, sizeof(x));
+      }
+    Codomain operator()(const void* x, size_t n) const
+      {
+      return h3_(x, n);
+      }
+  private:
+    // FIXME: The hardcoded value of 36 comes from UHASH_KEY_SIZE defined in
+    // Hash.h. I do not know how this value impacts the hash function behavior
+    // so I'll just copy it verbatim. (Matthias)
+    H3<Codomain, 36> h3_;
+  };
+
+  HashPolicy(size_t k) : k_(k) { }
+private:
+  size_t k_;
+};
+
+/**
+ * The *default* hashing policy. Performs *k* hash function computations.
+ */
+class DefaultHashing : public HashPolicy {
+public:
+  DefaultHashing(size_t k) : HashPolicy(k), hashers_(k) { }
+  virtual ~DoubleHashing() { }
+
+  virtual std::vector<hash_type> Hash(const void* x, size_t n) const
+    {
+    std::vector<hash_type> h(k(), 0);
+    for (size_t i = 0; i < h.size(); ++i)
+      h[i] = hashers_[i](x, n);
+    return h;
+    }
+
+private:
+  std::vector< Hasher<hash_type> > hashers_;
+};
+
+/**
+ * The *double-hashing* policy. Uses a linear combination of 2 hash functions.
+ */
+class DoubleHashing : public HashPolicy {
+public:
+  DoubleHashing(size_t k) : HashPolicy(k), hashers_(k) { }
+  virtual ~DoubleHashing() { }
+
+  virtual std::vector<hash_type> Hash(const void* x, size_t n) const
+    {
+    Codomain h1 = hasher1_(x);
+    Codomain h2 = hasher2_(x);
+    std::vector<hash_type> h(k(), 0);
+    for (size_t i = 0; i < h.size(); ++i)
+      h[i] = h1 + i * h2;
+    return h;
+    }
+
+private:
+  Hasher<hash_type> hasher1_;
+  Hasher<hash_type> hasher2_;
+};
+
+/**
+ * The abstract base class for Bloom filters.
+ */
+class BloomFilter : SerialObj {
+public:
+  virtual ~BloomFilter() { delete hash_; }
+
+  /**
+   * Adds an element of type T to the Bloom filter.
+   * @param x The element to add
+   */
+  template <typename T>
+  void Add(const T& x)
+    {
+    ++elements_;
+    AddImpl(hash_->Hash(x));
+    }
+
+  /**
+   * Retrieves the associated count of a given value.
+   *
+   * @param x The value of type `T` to check.
+   *
+   * @return The counter associated with *x*.
+   */
+  template <typename T>
+  size_t Count(const T& x) const
+    {
+    return CountImpl(hash_->Hash(x));
+    }
+
+  /**
+   * Retrieves the number of elements added to the Bloom filter.
+   *
+   * @return The number of elements in this Bloom filter.
+   */
+  size_t Size() const
+    {
+    return elements_;
+    }
+
+protected:
+  typedef std::vector<HashPolicy::hash_value> HashVector;
+
+  /**
+   * Default-constructs a Bloom filter.
+   */
+	BloomFilter();
+
+  /**
+   * Constructs a BloomFilter.
+   * @param hash The hashing policy.
+   */
+  BloomFilter(HashPolicy* hash);
+
+  virtual void AddImpl(const HashVector& hashes) = 0;
+
+  virtual size_t CountImpl(const HashVector& hashes) const = 0;
+
+  std::vector<HashPolicy::hash_value> Hash(const T& x) const
+    {
+    return hash_->Hash(&x, sizeof(x));
+    }
+
+private:
+  HashPolicy<T>* hash_;  // Owned by *this.
+
+  size_t elements_;
+};
+
+/**
+ * A basic Bloom filter.
+ */
+class BasicBloomFilter : public BloomFilter {
+public:
+  BasicBloomFilter();
+  BasicBloomFilter(HashPolicy* hash);
+
+protected:
+  virtual void AddImpl(const HashVector& h)
+    {
+    for ( size_t i = 0; i < h.size(); ++i )
+      bits_.set(h[i] % h.size());
+    }
+
+  virtual size_t CountImpl(const HashVector& h) const
+    {
+    for ( size_t i = 0; i < h.size(); ++i )
+      if ( ! bits_[h[i] % h.size()] )
+        return 0;
+    return 1;
+    }
+
+private:
+  BitVector bits_;
+};
+
+/**
+ * A counting Bloom filter.
+ */
+class CountingBloomFilter : public BloomFilter {
+public:
+  CountingBloomFilter(unsigned width);
+  CountingBloomFilter(HashPolicy* hash);
+
+protected:
+  CountingBloomFilter();
+
+private:
+  CounterVector cells_;
+};
+
+#endif
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 33aaab29c1..11de7772d7 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -304,6 +304,7 @@ set(bro_SRCS
     BitTorrent.cc
     BitTorrentTracker.cc
     BitVector.cc
+    BloomFilter.cc
     BPF_Program.cc
     BroDoc.cc
     BroDocObj.cc

From 190f98f8a901728d507452b09717692c4c227821 Mon Sep 17 00:00:00 2001
From: Seth Hall <seth@icir.org>
Date: Mon, 3 Jun 2013 10:51:53 -0400
Subject: [PATCH 007/118] Beginning some rework.

---
 scripts/base/files/hash/__load__.bro          |  1 +
 scripts/base/files/hash/main.bro              | 22 ++++++++++
 .../base/frameworks/file-analysis/main.bro    | 40 +++++++++----------
 scripts/base/init-default.bro                 |  2 +
 4 files changed, 44 insertions(+), 21 deletions(-)
 create mode 100644 scripts/base/files/hash/__load__.bro
 create mode 100644 scripts/base/files/hash/main.bro

diff --git a/scripts/base/files/hash/__load__.bro b/scripts/base/files/hash/__load__.bro
new file mode 100644
index 0000000000..d551be57d3
--- /dev/null
+++ b/scripts/base/files/hash/__load__.bro
@@ -0,0 +1 @@
+@load ./main
\ No newline at end of file
diff --git a/scripts/base/files/hash/main.bro b/scripts/base/files/hash/main.bro
new file mode 100644
index 0000000000..cd50d6b291
--- /dev/null
+++ b/scripts/base/files/hash/main.bro
@@ -0,0 +1,22 @@
+
+module FilesHash;
+
+export {
+	
+}
+
+event file_hash(f: fa_file, kind: string, hash: string) &priority=5
+	{
+	set_info(f);
+	switch ( kind ) {
+	case "md5":
+		f$info$md5 = hash;
+		break;
+	case "sha1":
+		f$info$sha1 = hash;
+		break;
+	case "sha256":
+		f$info$sha256 = hash;
+		break;
+	}
+	}
diff --git a/scripts/base/frameworks/file-analysis/main.bro b/scripts/base/frameworks/file-analysis/main.bro
index 142709dcc4..418da53f70 100644
--- a/scripts/base/frameworks/file-analysis/main.bro
+++ b/scripts/base/frameworks/file-analysis/main.bro
@@ -34,6 +34,9 @@ export {
 	## For the most part, fields here are derived from ones of the same name
 	## in :bro:see:`fa_file`.
 	type Info: record {
+		## The time when the file was first seen.
+		ts: time &log;
+
 		## An identifier associated with a single file.
 		id: string &log;
 
@@ -233,25 +236,35 @@ function set_info(f: fa_file)
 	{
 	if ( ! f?$info )
 		{
-		local tmp: Info;
+		local tmp: Info = Info($ts=network_time());
 		f$info = tmp;
 		}
 
+	f$info$ts = network_time();
 	f$info$id = f$id;
-	if ( f?$parent_id ) f$info$parent_id = f$parent_id;
-	if ( f?$source ) f$info$source = f$source;
-	if ( f?$is_orig ) f$info$is_orig = f$is_orig;
+	if ( f?$parent_id )
+		f$info$parent_id = f$parent_id;
+	if ( f?$source )
+		f$info$source = f$source;
+	if ( f?$is_orig )
+		f$info$is_orig = f$is_orig;
 	f$info$last_active = f$last_active;
 	f$info$seen_bytes = f$seen_bytes;
-	if ( f?$total_bytes ) f$info$total_bytes = f$total_bytes;
+	if ( f?$total_bytes ) 
+		f$info$total_bytes = f$total_bytes;
 	f$info$missing_bytes = f$missing_bytes;
 	f$info$overflow_bytes = f$overflow_bytes;
 	f$info$timeout_interval = f$timeout_interval;
 	f$info$bof_buffer_size = f$bof_buffer_size;
-	if ( f?$mime_type ) f$info$mime_type = f$mime_type;
+	if ( f?$mime_type ) 
+		f$info$mime_type = f$mime_type;
 	if ( f?$conns )
+		{
 		for ( cid in f$conns )
+			{
 			add f$info$conn_uids[f$conns[cid]$uid];
+			}
+		}
 	}
 
 function set_timeout_interval(f: fa_file, t: interval): bool
@@ -324,21 +337,6 @@ event file_timeout(f: fa_file) &priority=5
 	f$info$timedout = T;
 	}
 
-event file_hash(f: fa_file, kind: string, hash: string) &priority=5
-	{
-	set_info(f);
-	switch ( kind ) {
-	case "md5":
-		f$info$md5 = hash;
-		break;
-	case "sha1":
-		f$info$sha1 = hash;
-		break;
-	case "sha256":
-		f$info$sha256 = hash;
-		break;
-	}
-	}
 
 event file_state_remove(f: fa_file) &priority=5
 	{
diff --git a/scripts/base/init-default.bro b/scripts/base/init-default.bro
index 829a1b9982..03ba474e0b 100644
--- a/scripts/base/init-default.bro
+++ b/scripts/base/init-default.bro
@@ -46,4 +46,6 @@
 @load base/protocols/ssl
 @load base/protocols/syslog
 
+@load base/files/hash
+
 @load base/misc/find-checksum-offloading

From f529df33e0afa930e4babff66f4a5f590b5eb6d9 Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Mon, 3 Jun 2013 14:00:28 -0700
Subject: [PATCH 008/118] Stabilize Bloom filter interface.

---
 src/BloomFilter.cc | 33 ++++++++++++++++++
 src/BloomFilter.h  | 85 +++++++++++++++++-----------------------------
 2 files changed, 65 insertions(+), 53 deletions(-)
 create mode 100644 src/BloomFilter.cc

diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc
new file mode 100644
index 0000000000..6873815f69
--- /dev/null
+++ b/src/BloomFilter.cc
@@ -0,0 +1,33 @@
+#include "BloomFilter.h"
+
+HashPolicy::HashVector DefaultHashing::Hash(const void* x, size_t n) const
+  {
+  HashVector h(k(), 0);
+  for ( size_t i = 0; i < h.size(); ++i )
+    h[i] = hashers_[i](x, n);
+  return h;
+  }
+
+HashPolicy::HashVector DoubleHashing::Hash(const void* x, size_t n) const
+  {
+  HashType h1 = hasher1_(x);
+  HashType h2 = hasher2_(x);
+  HashVector h(k(), 0);
+  for ( size_t i = 0; i < h.size(); ++i )
+    h[i] = h1 + i * h2;
+  return h;
+  }
+
+void BasicBloomFilter::AddImpl(const HashPolicy::HashVector& h)
+  {
+  for ( size_t i = 0; i < h.size(); ++i )
+    bits_.set(h[i] % h.size());
+  }
+
+size_t BasicBloomFilter::CountImpl(const HashPolicy::HashVector& h) const
+  {
+  for ( size_t i = 0; i < h.size(); ++i )
+    if ( ! bits_[h[i] % h.size()] )
+      return 0;
+  return 1;
+  }
diff --git a/src/BloomFilter.h b/src/BloomFilter.h
index a767c6b8b8..dca4eff2bd 100644
--- a/src/BloomFilter.h
+++ b/src/BloomFilter.h
@@ -11,6 +11,9 @@
  */
 class CounterVector : SerialObj {
 public:
+  typedef size_t size_type;
+  typedef uint64 count_type;
+
   /**
    * Constructs a counter vector having cells of a given width.
    *
@@ -70,21 +73,24 @@ private:
 };
 
 /**
- * The abstract base class for hash policies.
+ * The abstract base class for hash policies that hash elements *k* times.
  * @tparam Codomain An integral type.
  */
 class HashPolicy {
 public:
-  typedef hash_t hash_type;
+  typedef hash_t HashType;
+  typedef std::vector<HashType> HashVector;
+
   virtual ~HashPolicy() { }
-  size_t k() const { return k; }
-  virtual std::vector<hash_type> Hash(const void* x, size_t n) const = 0;
+  size_t k() const { return k_; }
+  virtual HashVector Hash(const void* x, size_t n) const = 0;
+
 protected:
   /**
    * A functor that computes a universal hash function.
    * @tparam Codomain An integral type.
    */
-  template <typename Codomain = hash_type>
+  template <typename Codomain = HashType>
   class Hasher {
   public:
     template <typename Domain>
@@ -104,8 +110,9 @@ protected:
   };
 
   HashPolicy(size_t k) : k_(k) { }
+
 private:
-  size_t k_;
+  const size_t k_;
 };
 
 /**
@@ -114,18 +121,12 @@ private:
 class DefaultHashing : public HashPolicy {
 public:
   DefaultHashing(size_t k) : HashPolicy(k), hashers_(k) { }
-  virtual ~DoubleHashing() { }
+  virtual ~DefaultHashing() { }
 
-  virtual std::vector<hash_type> Hash(const void* x, size_t n) const
-    {
-    std::vector<hash_type> h(k(), 0);
-    for (size_t i = 0; i < h.size(); ++i)
-      h[i] = hashers_[i](x, n);
-    return h;
-    }
+  virtual HashVector Hash(const void* x, size_t n) const;
 
 private:
-  std::vector< Hasher<hash_type> > hashers_;
+  std::vector< Hasher<HashType> > hashers_;
 };
 
 /**
@@ -133,22 +134,14 @@ private:
  */
 class DoubleHashing : public HashPolicy {
 public:
-  DoubleHashing(size_t k) : HashPolicy(k), hashers_(k) { }
+  DoubleHashing(size_t k) : HashPolicy(k) { }
   virtual ~DoubleHashing() { }
 
-  virtual std::vector<hash_type> Hash(const void* x, size_t n) const
-    {
-    Codomain h1 = hasher1_(x);
-    Codomain h2 = hasher2_(x);
-    std::vector<hash_type> h(k(), 0);
-    for (size_t i = 0; i < h.size(); ++i)
-      h[i] = h1 + i * h2;
-    return h;
-    }
+  virtual HashVector Hash(const void* x, size_t n) const;
 
 private:
-  Hasher<hash_type> hasher1_;
-  Hasher<hash_type> hasher2_;
+  Hasher<HashType> hasher1_;
+  Hasher<HashType> hasher2_;
 };
 
 /**
@@ -166,7 +159,7 @@ public:
   void Add(const T& x)
     {
     ++elements_;
-    AddImpl(hash_->Hash(x));
+    AddImpl(hash_->Hash(&x, sizeof(x)));
     }
 
   /**
@@ -179,7 +172,7 @@ public:
   template <typename T>
   size_t Count(const T& x) const
     {
-    return CountImpl(hash_->Hash(x));
+    return CountImpl(hash_->Hash(&x, sizeof(x)));
     }
 
   /**
@@ -193,8 +186,6 @@ public:
     }
 
 protected:
-  typedef std::vector<HashPolicy::hash_value> HashVector;
-
   /**
    * Default-constructs a Bloom filter.
    */
@@ -206,17 +197,12 @@ protected:
    */
   BloomFilter(HashPolicy* hash);
 
-  virtual void AddImpl(const HashVector& hashes) = 0;
+  virtual void AddImpl(const HashPolicy::HashVector& hashes) = 0;
 
-  virtual size_t CountImpl(const HashVector& hashes) const = 0;
-
-  std::vector<HashPolicy::hash_value> Hash(const T& x) const
-    {
-    return hash_->Hash(&x, sizeof(x));
-    }
+  virtual size_t CountImpl(const HashPolicy::HashVector& hashes) const = 0;
 
 private:
-  HashPolicy<T>* hash_;  // Owned by *this.
+  HashPolicy* hash_;  // Owned by *this.
 
   size_t elements_;
 };
@@ -230,19 +216,9 @@ public:
   BasicBloomFilter(HashPolicy* hash);
 
 protected:
-  virtual void AddImpl(const HashVector& h)
-    {
-    for ( size_t i = 0; i < h.size(); ++i )
-      bits_.set(h[i] % h.size());
-    }
+  virtual void AddImpl(const HashPolicy::HashVector& h);
 
-  virtual size_t CountImpl(const HashVector& h) const
-    {
-    for ( size_t i = 0; i < h.size(); ++i )
-      if ( ! bits_[h[i] % h.size()] )
-        return 0;
-    return 1;
-    }
+  virtual size_t CountImpl(const HashPolicy::HashVector& h) const;
 
 private:
   BitVector bits_;
@@ -253,12 +229,15 @@ private:
  */
 class CountingBloomFilter : public BloomFilter {
 public:
-  CountingBloomFilter(unsigned width);
-  CountingBloomFilter(HashPolicy* hash);
+  CountingBloomFilter(unsigned width, HashPolicy* hash);
 
 protected:
   CountingBloomFilter();
 
+  virtual void AddImpl(const HashPolicy::HashVector& h);
+
+  virtual size_t CountImpl(const HashPolicy::HashVector& h) const;
+
 private:
   CounterVector cells_;
 };

From f708cd4a361ba02083380cfe0db2949e3e06cff7 Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Mon, 3 Jun 2013 22:55:21 -0700
Subject: [PATCH 009/118] Work on parameter estimation and serialization.

---
 src/BloomFilter.cc | 131 ++++++++++++++++++++++++++++++++++++++++++++-
 src/BloomFilter.h  |  41 +++++++-------
 src/NetVar.cc      |   2 +
 src/OpaqueVal.cc   |  23 ++++++++
 src/OpaqueVal.h    |  16 ++++++
 src/SerialTypes.h  |   7 +++
 6 files changed, 198 insertions(+), 22 deletions(-)

diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc
index 6873815f69..4787bef0f0 100644
--- a/src/BloomFilter.cc
+++ b/src/BloomFilter.cc
@@ -1,23 +1,130 @@
 #include "BloomFilter.h"
 
+#include <cmath>
+#include "Serializer.h"
+
+// Backport C++11's std::round().
+namespace {
+template <typename T>
+T round(double x) { return (x > 0.0) ? (x + 0.5) : (x - 0.5); }
+} // namespace <anonymous>
+
+
+IMPLEMENT_SERIAL(CounterVector, SER_COUNTERVECTOR)
+
+bool CounterVector::DoSerialize(SerialInfo* info) const
+	{
+	DO_SERIALIZE(SER_COUNTERVECTOR, SerialObj);
+  if ( ! SERIALIZE(&bits_) )
+    return false;
+	return SERIALIZE(static_cast<uint64>(width_));
+  }
+
+bool CounterVector::DoUnserialize(UnserialInfo* info)
+	{
+	DO_UNSERIALIZE(SerialObj);
+	return false;
+	// TODO: Ask Robin how to unserialize non-pointer members.
+  //if ( ! UNSERIALIZE(&bits_) )
+  //  return false;
+  uint64 width;
+  if ( ! UNSERIALIZE(&width) )
+    return false;
+	width_ = static_cast<unsigned>(width);
+	return true;
+  }
+
+
 HashPolicy::HashVector DefaultHashing::Hash(const void* x, size_t n) const
   {
-  HashVector h(k(), 0);
+  HashVector h(K(), 0);
   for ( size_t i = 0; i < h.size(); ++i )
     h[i] = hashers_[i](x, n);
   return h;
   }
 
+
 HashPolicy::HashVector DoubleHashing::Hash(const void* x, size_t n) const
   {
   HashType h1 = hasher1_(x);
   HashType h2 = hasher2_(x);
-  HashVector h(k(), 0);
+  HashVector h(K(), 0);
   for ( size_t i = 0; i < h.size(); ++i )
     h[i] = h1 + i * h2;
   return h;
   }
 
+bool BloomFilter::Serialize(SerialInfo* info) const
+  {
+  return SerialObj::Serialize(info);
+  }
+
+BloomFilter* BloomFilter::Unserialize(UnserialInfo* info)
+  {
+  return reinterpret_cast<BloomFilter*>(
+      SerialObj::Unserialize(info, SER_BLOOMFILTER));
+  }
+
+// FIXME: should abstract base classes also have IMPLEMENT_SERIAL?
+//IMPLEMENT_SERIAL(BloomFilter, SER_BLOOMFILTER)
+
+bool BloomFilter::DoSerialize(SerialInfo* info) const
+	{
+	DO_SERIALIZE(SER_BLOOMFILTER, SerialObj);
+	// TODO: Make the hash policy serializable.
+  //if ( ! SERIALIZE(hash_) )
+  //  return false;
+  return SERIALIZE(static_cast<uint64>(elements_));
+  }
+
+bool BloomFilter::DoUnserialize(UnserialInfo* info)
+	{
+	DO_UNSERIALIZE(SerialObj);
+	// TODO: Make the hash policy serializable.
+  //if ( ! hash_ = HashPolicy::Unserialize(info) )
+  //  return false;
+	uint64 elements;
+  if ( UNSERIALIZE(&elements) )
+    return false;
+  elements_ = static_cast<size_t>(elements);
+	return true;
+  }
+
+size_t BasicBloomFilter::Cells(double fp, size_t capacity)
+  {
+  double ln2 = std::log(2);
+  return std::ceil(-(capacity * std::log(fp) / ln2 / ln2));
+  }
+
+size_t BasicBloomFilter::K(size_t cells, size_t capacity)
+  {
+  double frac = static_cast<double>(cells) / static_cast<double>(capacity);
+  return round<size_t>(frac * std::log(2));
+  }
+
+BasicBloomFilter::BasicBloomFilter(size_t cells, HashPolicy* hash)
+  : BloomFilter(hash), bits_(cells)
+  {
+  }
+
+IMPLEMENT_SERIAL(BasicBloomFilter, SER_BASICBLOOMFILTER)
+
+bool BasicBloomFilter::DoSerialize(SerialInfo* info) const
+	{
+	DO_SERIALIZE(SER_BASICBLOOMFILTER, BloomFilter);
+	// TODO: Make the hash policy serializable.
+  //if ( ! SERIALIZE(&bits_) )
+  //  return false;
+  return true;
+  }
+
+bool BasicBloomFilter::DoUnserialize(UnserialInfo* info)
+	{
+	DO_UNSERIALIZE(BloomFilter);
+	// TODO: Non-pointer member deserialization?
+	return true;
+  }
+
 void BasicBloomFilter::AddImpl(const HashPolicy::HashVector& h)
   {
   for ( size_t i = 0; i < h.size(); ++i )
@@ -31,3 +138,23 @@ size_t BasicBloomFilter::CountImpl(const HashPolicy::HashVector& h) const
       return 0;
   return 1;
   }
+
+
+void CountingBloomFilter::AddImpl(const HashPolicy::HashVector& h)
+  {
+  for ( size_t i = 0; i < h.size(); ++i )
+    cells_.Increment(h[i] % h.size(), 1);
+  }
+
+size_t CountingBloomFilter::CountImpl(const HashPolicy::HashVector& h) const
+  {
+  CounterVector::size_type min =
+    std::numeric_limits<CounterVector::size_type>::max();
+  for ( size_t i = 0; i < h.size(); ++i )
+    {
+    CounterVector::size_type cnt = cells_.Count(h[i] % h.size());
+    if ( cnt  < min )
+      min = cnt;
+    }
+  return min;
+  }
diff --git a/src/BloomFilter.h b/src/BloomFilter.h
index dca4eff2bd..82948f30ec 100644
--- a/src/BloomFilter.h
+++ b/src/BloomFilter.h
@@ -65,7 +65,7 @@ public:
 protected:
   DECLARE_SERIAL(CounterVector);
 
-  CounterVector();
+  CounterVector() { }
 
 private:
   BitVector bits_;
@@ -82,7 +82,7 @@ public:
   typedef std::vector<HashType> HashVector;
 
   virtual ~HashPolicy() { }
-  size_t k() const { return k_; }
+  size_t K() const { return k_; }
   virtual HashVector Hash(const void* x, size_t n) const = 0;
 
 protected:
@@ -130,7 +130,7 @@ private:
 };
 
 /**
- * The *double-hashing* policy. Uses a linear combination of 2 hash functions.
+ * The *double-hashing* policy. Uses a linear combination of two hash functions.
  */
 class DoubleHashing : public HashPolicy {
 public:
@@ -185,25 +185,20 @@ public:
     return elements_;
     }
 
-protected:
-  /**
-   * Default-constructs a Bloom filter.
-   */
-	BloomFilter();
+  bool Serialize(SerialInfo* info) const;
+  static BloomFilter* Unserialize(UnserialInfo* info);
 
-  /**
-   * Constructs a BloomFilter.
-   * @param hash The hashing policy.
-   */
-  BloomFilter(HashPolicy* hash);
+protected:
+  DECLARE_SERIAL(BloomFilter);
+
+	BloomFilter() { };
+  BloomFilter(HashPolicy* hash) : hash_(hash) { }
 
   virtual void AddImpl(const HashPolicy::HashVector& hashes) = 0;
-
   virtual size_t CountImpl(const HashPolicy::HashVector& hashes) const = 0;
 
 private:
-  HashPolicy* hash_;  // Owned by *this.
-
+  HashPolicy* hash_;
   size_t elements_;
 };
 
@@ -212,12 +207,17 @@ private:
  */
 class BasicBloomFilter : public BloomFilter {
 public:
-  BasicBloomFilter();
-  BasicBloomFilter(HashPolicy* hash);
+  static size_t Cells(double fp, size_t capacity);
+  static size_t K(size_t cells, size_t capacity);
+
+  BasicBloomFilter(size_t cells, HashPolicy* hash);
 
 protected:
-  virtual void AddImpl(const HashPolicy::HashVector& h);
+  DECLARE_SERIAL(BasicBloomFilter);
 
+  BasicBloomFilter() { }
+
+  virtual void AddImpl(const HashPolicy::HashVector& h);
   virtual size_t CountImpl(const HashPolicy::HashVector& h) const;
 
 private:
@@ -232,10 +232,11 @@ public:
   CountingBloomFilter(unsigned width, HashPolicy* hash);
 
 protected:
+  DECLARE_SERIAL(CountingBloomFilter);
+
   CountingBloomFilter();
 
   virtual void AddImpl(const HashPolicy::HashVector& h);
-
   virtual size_t CountImpl(const HashPolicy::HashVector& h) const;
 
 private:
diff --git a/src/NetVar.cc b/src/NetVar.cc
index 3a23e4c9fa..d8c2192af7 100644
--- a/src/NetVar.cc
+++ b/src/NetVar.cc
@@ -244,6 +244,7 @@ OpaqueType* md5_type;
 OpaqueType* sha1_type;
 OpaqueType* sha256_type;
 OpaqueType* entropy_type;
+OpaqueType* bloomfilter_type;
 
 #include "const.bif.netvar_def"
 #include "types.bif.netvar_def"
@@ -310,6 +311,7 @@ void init_general_global_var()
 	sha1_type = new OpaqueType("sha1");
 	sha256_type = new OpaqueType("sha256");
 	entropy_type = new OpaqueType("entropy");
+	bloomfilter_type = new OpaqueType("bloomfilter");
 	}
 
 void init_net_var()
diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc
index 19346e52f2..a5fb65f53b 100644
--- a/src/OpaqueVal.cc
+++ b/src/OpaqueVal.cc
@@ -1,4 +1,6 @@
 #include "OpaqueVal.h"
+
+#include "BloomFilter.h"
 #include "NetVar.h"
 #include "Reporter.h"
 #include "Serializer.h"
@@ -515,3 +517,24 @@ bool EntropyVal::DoUnserialize(UnserialInfo* info)
 
 	return true;
 	}
+
+BloomFilterVal::BloomFilterVal(OpaqueType* t) : OpaqueVal(t)
+	{
+	}
+
+IMPLEMENT_SERIAL(BloomFilterVal, SER_BLOOMFILTER_VAL);
+
+bool BloomFilterVal::DoSerialize(SerialInfo* info) const
+	{
+	DO_SERIALIZE(SER_BLOOMFILTER_VAL, OpaqueVal);
+	// TODO: implement.
+	return true;
+  }
+
+bool BloomFilterVal::DoUnserialize(UnserialInfo* info)
+	{
+	DO_UNSERIALIZE(OpaqueVal);
+	// TODO: implement.
+	return true;
+  }
+
diff --git a/src/OpaqueVal.h b/src/OpaqueVal.h
index 78fa5da5e9..1c9c0361cc 100644
--- a/src/OpaqueVal.h
+++ b/src/OpaqueVal.h
@@ -7,6 +7,8 @@
 #include "Val.h"
 #include "digest.h"
 
+class BloomFilter;
+
 class HashVal : public OpaqueVal {
 public:
 	virtual bool IsValid() const;
@@ -107,4 +109,18 @@ private:
 	RandTest state;
 };
 
+class BloomFilterVal : public OpaqueVal {
+public:
+	BloomFilterVal();
+
+protected:
+	friend class Val;
+	BloomFilterVal(OpaqueType* t);
+
+	DECLARE_SERIAL(BloomFilterVal);
+
+private:
+	BloomFilter* bloom_filter_;
+};
+
 #endif
diff --git a/src/SerialTypes.h b/src/SerialTypes.h
index c9c0c34a33..171113ab6a 100644
--- a/src/SerialTypes.h
+++ b/src/SerialTypes.h
@@ -50,6 +50,9 @@ SERIAL_IS_BO(CASE, 0x1200)
 SERIAL_IS(LOCATION, 0x1300)
 SERIAL_IS(RE_MATCHER, 0x1400)
 SERIAL_IS(BITVECTOR, 0x1500)
+SERIAL_IS(COUNTERVECTOR, 0xa000)
+SERIAL_IS(BLOOMFILTER, 0xa100)
+SERIAL_IS(BASICBLOOMFILTER, 0xa200)
 
 // These are the externally visible types.
 const SerialType SER_NONE = 0;
@@ -105,6 +108,7 @@ SERIAL_VAL(MD5_VAL, 16)
 SERIAL_VAL(SHA1_VAL, 17)
 SERIAL_VAL(SHA256_VAL, 18)
 SERIAL_VAL(ENTROPY_VAL, 19)
+SERIAL_VAL(BLOOMFILTER_VAL, 20)
 
 #define SERIAL_EXPR(name, val) SERIAL_CONST(name, val, EXPR)
 SERIAL_EXPR(EXPR, 1)
@@ -204,5 +208,8 @@ SERIAL_CONST2(CASE)
 SERIAL_CONST2(LOCATION)
 SERIAL_CONST2(RE_MATCHER)
 SERIAL_CONST2(BITVECTOR)
+SERIAL_CONST2(COUNTERVECTOR)
+SERIAL_CONST2(BLOOMFILTER)
+SERIAL_CONST2(BASICBLOOMFILTER)
 
 #endif

From d3297dd6f3b6a50c07c90e9ad5f61c0ddf762460 Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Tue, 4 Jun 2013 13:32:26 -0700
Subject: [PATCH 010/118] Adhere to Bro coding style.

---
 src/BitVector.cc | 100 +++++++++++++++++++++++------------------------
 src/BitVector.h  |  40 +++++++++----------
 2 files changed, 69 insertions(+), 71 deletions(-)

diff --git a/src/BitVector.cc b/src/BitVector.cc
index f57301d506..f029230609 100644
--- a/src/BitVector.cc
+++ b/src/BitVector.cc
@@ -33,7 +33,7 @@ BitVector::Reference::Reference(block_type& block, block_type i)
   assert(i < bits_per_block);
   }
 
-BitVector::Reference& BitVector::Reference::flip()
+BitVector::Reference& BitVector::Reference::Flip()
   {
   block_ ^= mask_;
   return *this;
@@ -105,7 +105,7 @@ BitVector::BitVector(BitVector const& other)
 BitVector BitVector::operator~() const
   {
   BitVector b(*this);
-  b.flip();
+  b.Flip();
   return b;
   }
 
@@ -130,15 +130,15 @@ BitVector BitVector::operator>>(size_type n) const
 BitVector& BitVector::operator<<=(size_type n)
   {
   if (n >= num_bits_)
-    return reset();
+    return Reset();
 
   if (n > 0)
     {
-    size_type last = blocks() - 1;
+    size_type last = Blocks() - 1;
     size_type div = n / bits_per_block;
     block_type r = bit_index(n);
     block_type* b = &bits_[0];
-    assert(blocks() >= 1);
+    assert(Blocks() >= 1);
     assert(div <= last);
 
     if (r != 0)
@@ -164,15 +164,15 @@ BitVector& BitVector::operator<<=(size_type n)
 BitVector& BitVector::operator>>=(size_type n)
   {
   if (n >= num_bits_)
-      return reset();
+      return Reset();
 
   if (n > 0)
     {
-    size_type last = blocks() - 1;
+    size_type last = Blocks() - 1;
     size_type div = n / bits_per_block;
     block_type r = bit_index(n);
     block_type* b = &bits_[0];
-    assert(blocks() >= 1);
+    assert(Blocks() >= 1);
     assert(div <= last);
 
     if (r != 0)
@@ -187,39 +187,39 @@ BitVector& BitVector::operator>>=(size_type n)
         b[i-div] = b[i];
       }
 
-    std::fill_n(b + (blocks() - div), div, block_type(0));
+    std::fill_n(b + (Blocks() - div), div, block_type(0));
     }
   return *this;
   }
 
 BitVector& BitVector::operator&=(BitVector const& other)
   {
-  assert(size() >= other.size());
-  for (size_type i = 0; i < blocks(); ++i)
+  assert(Size() >= other.Size());
+  for (size_type i = 0; i < Blocks(); ++i)
     bits_[i] &= other.bits_[i];
   return *this;
   }
 
 BitVector& BitVector::operator|=(BitVector const& other)
   {
-  assert(size() >= other.size());
-  for (size_type i = 0; i < blocks(); ++i)
+  assert(Size() >= other.Size());
+  for (size_type i = 0; i < Blocks(); ++i)
     bits_[i] |= other.bits_[i];
   return *this;
   }
 
 BitVector& BitVector::operator^=(BitVector const& other)
   {
-  assert(size() >= other.size());
-  for (size_type i = 0; i < blocks(); ++i)
+  assert(Size() >= other.Size());
+  for (size_type i = 0; i < Blocks(); ++i)
     bits_[i] ^= other.bits_[i];
   return *this;
   }
 
 BitVector& BitVector::operator-=(BitVector const& other)
   {
-  assert(size() >= other.size());
-  for (size_type i = 0; i < blocks(); ++i)
+  assert(Size() >= other.Size());
+  for (size_type i = 0; i < Blocks(); ++i)
     bits_[i] &= ~other.bits_[i];
   return *this;
   }
@@ -260,8 +260,8 @@ bool operator!=(BitVector const& x, BitVector const& y)
 
 bool operator<(BitVector const& x, BitVector const& y)
   {
-  assert(x.size() == y.size());
-  for (BitVector::size_type r = x.blocks(); r > 0; --r)
+  assert(x.Size() == y.Size());
+  for (BitVector::size_type r = x.Blocks(); r > 0; --r)
     {
     BitVector::size_type i = r - 1;
     if (x.bits_[i] < y.bits_[i])
@@ -272,9 +272,9 @@ bool operator<(BitVector const& x, BitVector const& y)
   return false;
   }
 
-void BitVector::resize(size_type n, bool value)
+void BitVector::Resize(size_type n, bool value)
   {
-  size_type old = blocks();
+  size_type old = Blocks();
   size_type required = bits_to_blocks(n);
   block_type block_value = value ? ~block_type(0) : block_type(0);
 
@@ -288,27 +288,27 @@ void BitVector::resize(size_type n, bool value)
   zero_unused_bits();
   }
 
-void BitVector::clear()
+void BitVector::Clear()
   {
   bits_.clear();
   num_bits_ = 0;
   }
 
-void BitVector::push_back(bool bit)
+void BitVector::PushBack(bool bit)
   {
-  size_type s = size();
-  resize(s + 1);
-  set(s, bit);
+  size_type s = Size();
+  Resize(s + 1);
+  Set(s, bit);
   }
 
-void BitVector::append(block_type block)
+void BitVector::Append(block_type block)
   {
   size_type excess = extra_bits();
   if (excess)
     {
-    assert(! bits_.empty());
+    assert(! Empty());
     bits_.push_back(block >> (bits_per_block - excess));
-    bits_[bits_.size() - 2] |= (block << excess);
+    bits_[Blocks() - 2] |= (block << excess);
     }
   else
     {
@@ -317,48 +317,46 @@ void BitVector::append(block_type block)
   num_bits_ += bits_per_block;
   }
 
-BitVector& BitVector::set(size_type i, bool bit)
+BitVector& BitVector::Set(size_type i, bool bit)
   {
   assert(i < num_bits_);
-
   if (bit)
-      bits_[block_index(i)] |= bit_mask(i);
+    bits_[block_index(i)] |= bit_mask(i);
   else
-      reset(i);
-
+    Reset(i);
   return *this;
   }
 
-BitVector& BitVector::set()
+BitVector& BitVector::Set()
   {
   std::fill(bits_.begin(), bits_.end(), ~block_type(0));
   zero_unused_bits();
   return *this;
   }
 
-BitVector& BitVector::reset(size_type i)
+BitVector& BitVector::Reset(size_type i)
   {
   assert(i < num_bits_);
   bits_[block_index(i)] &= ~bit_mask(i);
   return *this;
   }
 
-BitVector& BitVector::reset()
+BitVector& BitVector::Reset()
   {
   std::fill(bits_.begin(), bits_.end(), block_type(0));
   return *this;
   }
 
-BitVector& BitVector::flip(size_type i)
+BitVector& BitVector::Flip(size_type i)
   {
   assert(i < num_bits_);
   bits_[block_index(i)] ^= bit_mask(i);
   return *this;
   }
 
-BitVector& BitVector::flip()
+BitVector& BitVector::Flip()
   {
-  for (size_type i = 0; i < blocks(); ++i)
+  for (size_type i = 0; i < Blocks(); ++i)
       bits_[i] = ~bits_[i];
   zero_unused_bits();
   return *this;
@@ -376,11 +374,11 @@ BitVector::Reference BitVector::operator[](size_type i)
   return Reference(bits_[block_index(i)], bit_index(i));
   }
 
-BitVector::size_type BitVector::count() const
+BitVector::size_type BitVector::Count() const
   {
   std::vector<block_type>::const_iterator first = bits_.begin();
   size_t n = 0;
-  size_type length = blocks();
+  size_type length = Blocks();
   while (length)
     {
     block_type block = *first;
@@ -396,29 +394,29 @@ BitVector::size_type BitVector::count() const
   return n;
   }
 
-BitVector::size_type BitVector::blocks() const
+BitVector::size_type BitVector::Blocks() const
   {
   return bits_.size();
   }
 
-BitVector::size_type BitVector::size() const
+BitVector::size_type BitVector::Size() const
   {
   return num_bits_;
   }
 
-bool BitVector::empty() const
+bool BitVector::Empty() const
   {
   return bits_.empty();
   }
 
-BitVector::size_type BitVector::find_first() const
+BitVector::size_type BitVector::FindFirst() const
   {
   return find_from(0);
   }
 
-BitVector::size_type BitVector::find_next(size_type i) const
+BitVector::size_type BitVector::FindNext(size_type i) const
   {
-  if (i >= (size() - 1) || size() == 0)
+  if (i >= (Size() - 1) || Size() == 0)
     return npos;
   ++i;
   size_type bi = block_index(i);
@@ -437,7 +435,7 @@ BitVector::size_type BitVector::lowest_bit(block_type block)
 
 BitVector::block_type BitVector::extra_bits() const
   {
-  return bit_index(size());
+  return bit_index(Size());
   }
 
 void BitVector::zero_unused_bits()
@@ -448,9 +446,9 @@ void BitVector::zero_unused_bits()
 
 BitVector::size_type BitVector::find_from(size_type i) const
   {
-  while (i < blocks() && bits_[i] == 0)
+  while (i < Blocks() && bits_[i] == 0)
     ++i;
-  if (i >= blocks())
+  if (i >= Blocks())
     return npos;
   return i * bits_per_block + lowest_bit(bits_[i]);
   }
diff --git a/src/BitVector.h b/src/BitVector.h
index 9900dd103e..8315a151f0 100644
--- a/src/BitVector.h
+++ b/src/BitVector.h
@@ -24,7 +24,7 @@ public:
     Reference(block_type& block, block_type i);
 
   public:
-    Reference& flip();
+    Reference& Flip();
     operator bool() const;
     bool operator~() const;
     Reference& operator=(bool x);
@@ -110,7 +110,7 @@ public:
    * sequence.
    */
   template <typename ForwardIterator>
-  void append(ForwardIterator first, ForwardIterator last)
+  void Append(ForwardIterator first, ForwardIterator last)
     {
     if (first == last)
       return;
@@ -119,7 +119,7 @@ public:
     typename std::iterator_traits<ForwardIterator>::difference_type delta =
       std::distance(first, last);
 
-    bits_.reserve(blocks() + delta);
+    bits_.reserve(Blocks() + delta);
     if (excess == 0)
       {
       bits_.back() |= (*first << excess);
@@ -140,24 +140,24 @@ public:
    * Appends the bits in a given block.
    * @param block The block containing bits to append.
    */
-  void append(block_type block);
+  void Append(block_type block);
 
   /** Appends a single bit to the end of the bit vector.
    * @param bit The value of the bit.
    */
-  void push_back(bool bit);
+  void PushBack(bool bit);
 
   /**
    * Clears all bits in the bitvector.
    */
-  void clear();
+  void Clear();
 
   /**
    * Resizes the bit vector to a new number of bits.
    * @param n The new number of bits of the bit vector.
    * @param value The bit value of new values, if the vector expands.
    */
-  void resize(size_type n, bool value = false);
+  void Resize(size_type n, bool value = false);
 
   /**
    * Sets a bit at a specific position to a given value.
@@ -165,39 +165,39 @@ public:
    * @param bit The value assigned to position *i*.
    * @return A reference to the bit vector instance.
    */
-  BitVector& set(size_type i, bool bit = true);
+  BitVector& Set(size_type i, bool bit = true);
 
   /**
    * Sets all bits to 1.
    * @return A reference to the bit vector instance.
    */
-  BitVector& set();
+  BitVector& Set();
 
   /**
    * Resets a bit at a specific position, i.e., sets it to 0.
    * @param i The bit position.
    * @return A reference to the bit vector instance.
    */
-  BitVector& reset(size_type i);
+  BitVector& Reset(size_type i);
 
   /**
    * Sets all bits to 0.
    * @return A reference to the bit vector instance.
    */
-  BitVector& reset();
+  BitVector& Reset();
 
   /**
    * Toggles/flips a bit at a specific position.
    * @param i The bit position.
    * @return A reference to the bit vector instance.
    */
-  BitVector& flip(size_type i);
+  BitVector& Flip(size_type i);
 
   /**
    * Computes the complement.
    * @return A reference to the bit vector instance.
    */
-  BitVector& flip();
+  BitVector& Flip();
 
   /** Retrieves a single bit.
    * @param i The bit position.
@@ -217,32 +217,32 @@ public:
    * count* or *Hamming weight*.
    * @return The number of bits set to 1.
    */
-  size_type count() const;
+  size_type Count() const;
 
   /**
    * Retrieves the number of blocks of the underlying storage.
-   * @param The number of blocks that represent `size()` bits.
+   * @param The number of blocks that represent `Size()` bits.
    */
-  size_type blocks() const;
+  size_type Blocks() const;
 
   /**
    * Retrieves the number of bits the bitvector consist of.
    * @return The length of the bit vector in bits.
    */
-  size_type size() const;
+  size_type Size() const;
 
   /**
    * Checks whether the bit vector is empty.
    * @return `true` iff the bitvector has zero length.
    */
-  bool empty() const;
+  bool Empty() const;
 
   /**
    * Finds the bit position of of the first 1-bit.
    * @return The position of the first bit that equals to one or `npos` if no
    * such bit exists.
    */
-  size_type find_first() const;
+  size_type FindFirst() const;
 
   /**
    * Finds the next 1-bit from a given starting position.
@@ -252,7 +252,7 @@ public:
    * @return The position of the first bit that equals to 1 after position
    * *i*  or `npos` if no such bit exists.
    */
-  size_type find_next(size_type i) const;
+  size_type FindNext(size_type i) const;
 
   bool Serialize(SerialInfo* info) const;
   static BitVector* Unserialize(UnserialInfo* info);

From a5572dd66f10ca653855483e0941da327b8422e4 Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Tue, 4 Jun 2013 14:31:39 -0700
Subject: [PATCH 011/118] Write CounterVector implementation scaffold.

---
 src/BloomFilter.cc | 36 ++++++++++++++++++++++++++++++++++++
 src/BloomFilter.h  | 10 +++++++---
 2 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc
index 4787bef0f0..78048ee588 100644
--- a/src/BloomFilter.cc
+++ b/src/BloomFilter.cc
@@ -10,6 +10,42 @@ T round(double x) { return (x > 0.0) ? (x + 0.5) : (x - 0.5); }
 } // namespace <anonymous>
 
 
+CounterVector::CounterVector(size_t width, size_t cells)
+  : bits_(new BitVector(width * cells)), width_(width)
+  {
+  }
+
+CounterVector::~CounterVector()
+  {
+  delete bits_;
+  }
+
+bool CounterVector::Increment(size_type cell, count_type value)
+  {
+  // TODO
+  assert(! "not yet implemented");
+  return false;
+  }
+
+bool CounterVector::Decrement(size_type cell, count_type value)
+  {
+  // TODO
+  assert(! "not yet implemented");
+  return false;
+  }
+
+CounterVector::count_type CounterVector::Count(size_type cell) const
+  {
+  // TODO
+  assert(! "not yet implemented");
+  return 0;
+  }
+
+CounterVector::size_type CounterVector::Size() const
+  {
+  return bits_->Blocks() / width_;
+  }
+
 IMPLEMENT_SERIAL(CounterVector, SER_COUNTERVECTOR)
 
 bool CounterVector::DoSerialize(SerialInfo* info) const
diff --git a/src/BloomFilter.h b/src/BloomFilter.h
index 82948f30ec..b4f82efee9 100644
--- a/src/BloomFilter.h
+++ b/src/BloomFilter.h
@@ -9,7 +9,7 @@
 /**
  * A vector of counters, each of which have a fixed number of bits.
  */
-class CounterVector : SerialObj {
+class CounterVector : public SerialObj {
 public:
   typedef size_t size_type;
   typedef uint64 count_type;
@@ -18,8 +18,12 @@ public:
    * Constructs a counter vector having cells of a given width.
    *
    * @param width The number of bits that each cell occupies.
+   *
+   * @param cells The number of cells in the bitvector.
    */
-  explicit CounterVector(unsigned width);
+  CounterVector(size_t width, size_t cells = 1024);
+
+  ~CounterVector();
 
   /**
    * Increments a given cell.
@@ -68,7 +72,7 @@ protected:
   CounterVector() { }
 
 private:
-  BitVector bits_;
+  BitVector* bits_;
   unsigned width_;
 };
 

From 751cf612931f021ddf7b5ee51019f20d05e0c309 Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Tue, 4 Jun 2013 15:30:27 -0700
Subject: [PATCH 012/118] Add more serialization implementation.

---
 src/BloomFilter.cc | 93 ++++++++++++++++++++++++++++++++--------------
 src/BloomFilter.h  | 56 +++++++++++++++++++++++-----
 src/NetVar.h       |  1 +
 src/OpaqueVal.cc   | 18 ++++++---
 src/OpaqueVal.h    |  1 +
 src/SerialTypes.h  |  2 +
 6 files changed, 129 insertions(+), 42 deletions(-)

diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc
index 78048ee588..64f0e1c67b 100644
--- a/src/BloomFilter.cc
+++ b/src/BloomFilter.cc
@@ -46,12 +46,23 @@ CounterVector::size_type CounterVector::Size() const
   return bits_->Blocks() / width_;
   }
 
+bool CounterVector::Serialize(SerialInfo* info) const
+  {
+  return SerialObj::Serialize(info);
+  }
+
+CounterVector* CounterVector::Unserialize(UnserialInfo* info)
+  {
+  return reinterpret_cast<CounterVector*>(
+      SerialObj::Unserialize(info, SER_COUNTERVECTOR));
+  }
+
 IMPLEMENT_SERIAL(CounterVector, SER_COUNTERVECTOR)
 
 bool CounterVector::DoSerialize(SerialInfo* info) const
 	{
 	DO_SERIALIZE(SER_COUNTERVECTOR, SerialObj);
-  if ( ! SERIALIZE(&bits_) )
+  if ( ! SERIALIZE(bits_) )
     return false;
 	return SERIALIZE(static_cast<uint64>(width_));
   }
@@ -60,9 +71,9 @@ bool CounterVector::DoUnserialize(UnserialInfo* info)
 	{
 	DO_UNSERIALIZE(SerialObj);
 	return false;
-	// TODO: Ask Robin how to unserialize non-pointer members.
-  //if ( ! UNSERIALIZE(&bits_) )
-  //  return false;
+	bits_ = BitVector::Unserialize(info);
+  if ( ! bits_ )
+    return false;
   uint64 width;
   if ( ! UNSERIALIZE(&width) )
     return false;
@@ -90,6 +101,18 @@ HashPolicy::HashVector DoubleHashing::Hash(const void* x, size_t n) const
   return h;
   }
 
+
+BloomFilter::BloomFilter(size_t k)
+  : hash_(new hash_policy(k))
+  {
+  }
+
+BloomFilter::~BloomFilter()
+  {
+  if ( hash_ )
+    delete hash_;
+  }
+
 bool BloomFilter::Serialize(SerialInfo* info) const
   {
   return SerialObj::Serialize(info);
@@ -101,24 +124,21 @@ BloomFilter* BloomFilter::Unserialize(UnserialInfo* info)
       SerialObj::Unserialize(info, SER_BLOOMFILTER));
   }
 
-// FIXME: should abstract base classes also have IMPLEMENT_SERIAL?
-//IMPLEMENT_SERIAL(BloomFilter, SER_BLOOMFILTER)
-
 bool BloomFilter::DoSerialize(SerialInfo* info) const
 	{
 	DO_SERIALIZE(SER_BLOOMFILTER, SerialObj);
-	// TODO: Make the hash policy serializable.
-  //if ( ! SERIALIZE(hash_) )
-  //  return false;
-  return SERIALIZE(static_cast<uint64>(elements_));
+  if ( ! SERIALIZE(static_cast<uint16>(hash_->K())) )
+    return false;
+  return SERIALIZE(static_cast<uint16>(elements_));
   }
 
 bool BloomFilter::DoUnserialize(UnserialInfo* info)
 	{
 	DO_UNSERIALIZE(SerialObj);
-	// TODO: Make the hash policy serializable.
-  //if ( ! hash_ = HashPolicy::Unserialize(info) )
-  //  return false;
+	uint16 k;
+	if ( ! UNSERIALIZE(&k) )
+	  return false;
+	hash_ = new hash_policy(static_cast<size_t>(k));
 	uint64 elements;
   if ( UNSERIALIZE(&elements) )
     return false;
@@ -126,7 +146,7 @@ bool BloomFilter::DoUnserialize(UnserialInfo* info)
 	return true;
   }
 
-size_t BasicBloomFilter::Cells(double fp, size_t capacity)
+size_t BasicBloomFilter::M(double fp, size_t capacity)
   {
   double ln2 = std::log(2);
   return std::ceil(-(capacity * std::log(fp) / ln2 / ln2));
@@ -138,9 +158,16 @@ size_t BasicBloomFilter::K(size_t cells, size_t capacity)
   return round<size_t>(frac * std::log(2));
   }
 
-BasicBloomFilter::BasicBloomFilter(size_t cells, HashPolicy* hash)
-  : BloomFilter(hash), bits_(cells)
+BasicBloomFilter::BasicBloomFilter(double fp, size_t capacity)
+  : BloomFilter(K(M(fp, capacity), capacity))
   {
+  bits_ = new BitVector(M(fp, capacity));
+  }
+
+BasicBloomFilter::BasicBloomFilter(size_t cells, size_t capacity)
+  : BloomFilter(K(cells, capacity))
+  {
+  bits_ = new BitVector(cells);
   }
 
 IMPLEMENT_SERIAL(BasicBloomFilter, SER_BASICBLOOMFILTER)
@@ -148,38 +175,50 @@ IMPLEMENT_SERIAL(BasicBloomFilter, SER_BASICBLOOMFILTER)
 bool BasicBloomFilter::DoSerialize(SerialInfo* info) const
 	{
 	DO_SERIALIZE(SER_BASICBLOOMFILTER, BloomFilter);
-	// TODO: Make the hash policy serializable.
-  //if ( ! SERIALIZE(&bits_) )
-  //  return false;
-  return true;
+  return SERIALIZE(bits_);
   }
 
 bool BasicBloomFilter::DoUnserialize(UnserialInfo* info)
 	{
 	DO_UNSERIALIZE(BloomFilter);
-	// TODO: Non-pointer member deserialization?
-	return true;
+	bits_ = BitVector::Unserialize(info);
+	return bits_ == NULL;
   }
 
 void BasicBloomFilter::AddImpl(const HashPolicy::HashVector& h)
   {
   for ( size_t i = 0; i < h.size(); ++i )
-    bits_.set(h[i] % h.size());
+    bits_->Set(h[i] % h.size());
   }
 
 size_t BasicBloomFilter::CountImpl(const HashPolicy::HashVector& h) const
   {
   for ( size_t i = 0; i < h.size(); ++i )
-    if ( ! bits_[h[i] % h.size()] )
+    if ( ! (*bits_)[h[i] % h.size()] )
       return 0;
   return 1;
   }
 
 
+IMPLEMENT_SERIAL(CountingBloomFilter, SER_COUNTINGBLOOMFILTER)
+
+bool CountingBloomFilter::DoSerialize(SerialInfo* info) const
+	{
+	DO_SERIALIZE(SER_BASICBLOOMFILTER, BloomFilter);
+  return SERIALIZE(cells_);
+  }
+
+bool CountingBloomFilter::DoUnserialize(UnserialInfo* info)
+	{
+	DO_UNSERIALIZE(BloomFilter);
+	cells_ = CounterVector::Unserialize(info);
+	return cells_ == NULL;
+  }
+
 void CountingBloomFilter::AddImpl(const HashPolicy::HashVector& h)
   {
   for ( size_t i = 0; i < h.size(); ++i )
-    cells_.Increment(h[i] % h.size(), 1);
+    cells_->Increment(h[i] % h.size(), 1);
   }
 
 size_t CountingBloomFilter::CountImpl(const HashPolicy::HashVector& h) const
@@ -188,7 +227,7 @@ size_t CountingBloomFilter::CountImpl(const HashPolicy::HashVector& h) const
     std::numeric_limits<CounterVector::size_type>::max();
   for ( size_t i = 0; i < h.size(); ++i )
     {
-    CounterVector::size_type cnt = cells_.Count(h[i] % h.size());
+    CounterVector::size_type cnt = cells_->Count(h[i] % h.size());
     if ( cnt  < min )
       min = cnt;
     }
diff --git a/src/BloomFilter.h b/src/BloomFilter.h
index b4f82efee9..77c6bc4f56 100644
--- a/src/BloomFilter.h
+++ b/src/BloomFilter.h
@@ -151,9 +151,13 @@ private:
 /**
  * The abstract base class for Bloom filters.
  */
-class BloomFilter : SerialObj {
+class BloomFilter : public SerialObj {
 public:
-  virtual ~BloomFilter() { delete hash_; }
+  // At this point we won't let the user choose the hash policy, but we might
+  // open up the interface in the future.
+  typedef DoubleHashing hash_policy;
+
+  virtual ~BloomFilter();
 
   /**
    * Adds an element of type T to the Bloom filter.
@@ -193,10 +197,10 @@ public:
   static BloomFilter* Unserialize(UnserialInfo* info);
 
 protected:
-  DECLARE_SERIAL(BloomFilter);
+  DECLARE_ABSTRACT_SERIAL(BloomFilter);
 
 	BloomFilter() { };
-  BloomFilter(HashPolicy* hash) : hash_(hash) { }
+  BloomFilter(size_t k);
 
   virtual void AddImpl(const HashPolicy::HashVector& hashes) = 0;
   virtual size_t CountImpl(const HashPolicy::HashVector& hashes) const = 0;
@@ -211,10 +215,42 @@ private:
  */
 class BasicBloomFilter : public BloomFilter {
 public:
-  static size_t Cells(double fp, size_t capacity);
+  /**
+   * Computes the number of cells based a given false-positive rate and
+   * capacity. In the literature, this parameter often has the name *M*.
+   *
+   * @param fp The false-positive rate.
+   *
+   * @param capacity The number of exepected elements.
+   *
+   * Returns: The number cells needed to support a false-positive rate of *fp*
+   * with at most *capacity* elements.
+   */
+  static size_t M(double fp, size_t capacity);
+
+  /**
+   * Computes the optimal number of hash functions based on the number cells
+   * and expected number of elements.
+   *
+   * @param cells The number of cells (*m*).
+   *
+   * @param capacity The maximum number of elements.
+   *
+   * Returns: the optimal number of hash functions for a false-positive rate of
+   * *fp* for at most *capacity* elements.
+   */
   static size_t K(size_t cells, size_t capacity);
 
-  BasicBloomFilter(size_t cells, HashPolicy* hash);
+  /**
+   * Constructs a basic Bloom filter with a given false-positive rate and
+   * capacity.
+   */
+  BasicBloomFilter(double fp, size_t capacity);
+
+  /**
+   * Constructs a basic Bloom filter with a given number of cells and capacity.
+   */
+  BasicBloomFilter(size_t cells, size_t capacity);
 
 protected:
   DECLARE_SERIAL(BasicBloomFilter);
@@ -225,7 +261,7 @@ protected:
   virtual size_t CountImpl(const HashPolicy::HashVector& h) const;
 
 private:
-  BitVector bits_;
+  BitVector* bits_;
 };
 
 /**
@@ -233,18 +269,18 @@ private:
  */
 class CountingBloomFilter : public BloomFilter {
 public:
-  CountingBloomFilter(unsigned width, HashPolicy* hash);
+  CountingBloomFilter(unsigned width);
 
 protected:
   DECLARE_SERIAL(CountingBloomFilter);
 
-  CountingBloomFilter();
+  CountingBloomFilter() { }
 
   virtual void AddImpl(const HashPolicy::HashVector& h);
   virtual size_t CountImpl(const HashPolicy::HashVector& h) const;
 
 private:
-  CounterVector cells_;
+  CounterVector* cells_;
 };
 
 #endif
diff --git a/src/NetVar.h b/src/NetVar.h
index 1a20adcaf2..aa2a14ada5 100644
--- a/src/NetVar.h
+++ b/src/NetVar.h
@@ -249,6 +249,7 @@ extern OpaqueType* md5_type;
 extern OpaqueType* sha1_type;
 extern OpaqueType* sha256_type;
 extern OpaqueType* entropy_type;
+extern OpaqueType* bloomfilter_type;
 
 // Initializes globals that don't pertain to network/event analysis.
 extern void init_general_global_var();
diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc
index a5fb65f53b..b4f1290436 100644
--- a/src/OpaqueVal.cc
+++ b/src/OpaqueVal.cc
@@ -518,23 +518,31 @@ bool EntropyVal::DoUnserialize(UnserialInfo* info)
 	return true;
 	}
 
+BloomFilterVal::BloomFilterVal() : OpaqueVal(bloomfilter_type)
+	{
+	}
+
 BloomFilterVal::BloomFilterVal(OpaqueType* t) : OpaqueVal(t)
 	{
 	}
 
+BloomFilterVal::~BloomFilterVal()
+  {
+  if ( bloom_filter_ )
+    delete bloom_filter_;
+  }
+
 IMPLEMENT_SERIAL(BloomFilterVal, SER_BLOOMFILTER_VAL);
 
 bool BloomFilterVal::DoSerialize(SerialInfo* info) const
 	{
 	DO_SERIALIZE(SER_BLOOMFILTER_VAL, OpaqueVal);
-	// TODO: implement.
-	return true;
+	return SERIALIZE(bloom_filter_);
   }
 
 bool BloomFilterVal::DoUnserialize(UnserialInfo* info)
 	{
 	DO_UNSERIALIZE(OpaqueVal);
-	// TODO: implement.
-	return true;
+	bloom_filter_ = BloomFilter::Unserialize(info);
+	return bloom_filter_ == NULL;
   }
-
diff --git a/src/OpaqueVal.h b/src/OpaqueVal.h
index 1c9c0361cc..68b42a8a49 100644
--- a/src/OpaqueVal.h
+++ b/src/OpaqueVal.h
@@ -112,6 +112,7 @@ private:
 class BloomFilterVal : public OpaqueVal {
 public:
 	BloomFilterVal();
+	~BloomFilterVal();
 
 protected:
 	friend class Val;
diff --git a/src/SerialTypes.h b/src/SerialTypes.h
index 171113ab6a..859145f19f 100644
--- a/src/SerialTypes.h
+++ b/src/SerialTypes.h
@@ -53,6 +53,7 @@ SERIAL_IS(BITVECTOR, 0x1500)
 SERIAL_IS(COUNTERVECTOR, 0xa000)
 SERIAL_IS(BLOOMFILTER, 0xa100)
 SERIAL_IS(BASICBLOOMFILTER, 0xa200)
+SERIAL_IS(COUNTINGBLOOMFILTER, 0xa300)
 
 // These are the externally visible types.
 const SerialType SER_NONE = 0;
@@ -211,5 +212,6 @@ SERIAL_CONST2(BITVECTOR)
 SERIAL_CONST2(COUNTERVECTOR)
 SERIAL_CONST2(BLOOMFILTER)
 SERIAL_CONST2(BASICBLOOMFILTER)
+SERIAL_CONST2(COUNTINGBLOOMFILTER)
 
 #endif

From 880d02f7204d21fc0e69f08ac78e963042df4f16 Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Wed, 5 Jun 2013 16:16:55 -0700
Subject: [PATCH 013/118] Associate a Comphash with a BloomFilterVal.

We also keep track of the Bloom filter's element type inside each value. The
first use of the BiF bloomfilter_add will "typify" the Bloom filter and lock
the Bloom filter's type to the element type.
---
 src/BloomFilter.cc | 15 ++++++++++++
 src/BloomFilter.h  |  3 ++-
 src/OpaqueVal.cc   | 60 ++++++++++++++++++++++++++++++++++++++++++++--
 src/OpaqueVal.h    | 18 ++++++++++++--
 4 files changed, 91 insertions(+), 5 deletions(-)

diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc
index 64f0e1c67b..74fa6fb255 100644
--- a/src/BloomFilter.cc
+++ b/src/BloomFilter.cc
@@ -199,6 +199,21 @@ size_t BasicBloomFilter::CountImpl(const HashPolicy::HashVector& h) const
   return 1;
   }
 
+CountingBloomFilter::CountingBloomFilter(double fp, size_t capacity,
+                                         size_t width)
+  : BloomFilter(BasicBloomFilter::K(BasicBloomFilter::M(fp, capacity),
+                                    capacity))
+  {
+  cells_ = new CounterVector(width, BasicBloomFilter::M(fp, capacity));
+  }
+
+CountingBloomFilter::CountingBloomFilter(size_t cells, size_t capacity,
+                                         size_t width)
+  : BloomFilter(BasicBloomFilter::K(cells, capacity))
+  {
+  cells_ = new CounterVector(width, cells);
+  }
+
 
 IMPLEMENT_SERIAL(CountingBloomFilter, SER_COUNTINGBLOOMFILTER)
 
diff --git a/src/BloomFilter.h b/src/BloomFilter.h
index 77c6bc4f56..14b0ac3281 100644
--- a/src/BloomFilter.h
+++ b/src/BloomFilter.h
@@ -269,7 +269,8 @@ private:
  */
 class CountingBloomFilter : public BloomFilter {
 public:
-  CountingBloomFilter(unsigned width);
+  CountingBloomFilter(double fp, size_t capacity, size_t width);
+  CountingBloomFilter(size_t cells, size_t capacity, size_t width);
 
 protected:
   DECLARE_SERIAL(CountingBloomFilter);
diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc
index b4f1290436..abfd8f320f 100644
--- a/src/OpaqueVal.cc
+++ b/src/OpaqueVal.cc
@@ -518,31 +518,87 @@ bool EntropyVal::DoUnserialize(UnserialInfo* info)
 	return true;
 	}
 
-BloomFilterVal::BloomFilterVal() : OpaqueVal(bloomfilter_type)
+BloomFilterVal::BloomFilterVal(BloomFilter* bf)
+  : OpaqueVal(bloomfilter_type), bloom_filter_(bf)
 	{
 	}
 
-BloomFilterVal::BloomFilterVal(OpaqueType* t) : OpaqueVal(t)
+BloomFilterVal::BloomFilterVal(OpaqueType* t)
+  : OpaqueVal(t)
 	{
 	}
 
+bool BloomFilterVal::Typify(BroType* type)
+  {
+  if ( type_ )
+    return false;
+  type_ = type;
+  TypeList* tl = new TypeList(type_);
+  tl->Append(type_);
+  hash_ = new CompositeHash(tl);
+  Unref(tl);
+  return true;
+  }
+
+BroType* BloomFilterVal::Type() const
+  {
+  return type_;
+  }
+
+void BloomFilterVal::Add(const Val* val)
+  {
+  HashKey* key = hash_->ComputeHash(val, 1);
+  bloom_filter_->Add(key->Hash());
+  }
+
+size_t BloomFilterVal::Count(const Val* val) const
+  {
+  HashKey* key = hash_->ComputeHash(val, 1);
+  return bloom_filter_->Count(key->Hash());
+  }
+
+BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* first,
+                                      const BloomFilterVal* second)
+{
+  assert(! "not yet implemented");
+  return NULL;
+  }
+
 BloomFilterVal::~BloomFilterVal()
   {
+  if ( type_ )
+    Unref(type_);
+  if ( hash_ )
+    delete hash_;
   if ( bloom_filter_ )
     delete bloom_filter_;
   }
 
+BloomFilterVal::BloomFilterVal()
+  : OpaqueVal(bloomfilter_type)
+	{
+	}
+
 IMPLEMENT_SERIAL(BloomFilterVal, SER_BLOOMFILTER_VAL);
 
 bool BloomFilterVal::DoSerialize(SerialInfo* info) const
 	{
 	DO_SERIALIZE(SER_BLOOMFILTER_VAL, OpaqueVal);
+	if ( ! SERIALIZE(type_) )
+	  return false;
 	return SERIALIZE(bloom_filter_);
   }
 
 bool BloomFilterVal::DoUnserialize(UnserialInfo* info)
 	{
 	DO_UNSERIALIZE(OpaqueVal);
+	type_ = BroType::Unserialize(info);
+	if ( ! type_ )
+	  return false;
+  TypeList* tl = new TypeList(type_);
+  tl->Append(type_);
+  hash_ = new CompositeHash(tl);
+  Unref(tl);
 	bloom_filter_ = BloomFilter::Unserialize(info);
 	return bloom_filter_ == NULL;
   }
diff --git a/src/OpaqueVal.h b/src/OpaqueVal.h
index 68b42a8a49..e97a530f3a 100644
--- a/src/OpaqueVal.h
+++ b/src/OpaqueVal.h
@@ -110,18 +110,32 @@ private:
 };
 
 class BloomFilterVal : public OpaqueVal {
+  BloomFilterVal(const BloomFilterVal&);
+  BloomFilterVal& operator=(const BloomFilterVal&);
 public:
-	BloomFilterVal();
+	static BloomFilterVal* Merge(const BloomFilterVal* first,
+                               const BloomFilterVal* second);
+
+	BloomFilterVal(BloomFilter* bf);
 	~BloomFilterVal();
 
+	bool Typify(BroType* type);
+	BroType* Type() const;
+
+	void Add(const Val* val);
+	size_t Count(const Val* val) const;
+
 protected:
 	friend class Val;
+	BloomFilterVal();
 	BloomFilterVal(OpaqueType* t);
 
 	DECLARE_SERIAL(BloomFilterVal);
 
 private:
-	BloomFilter* bloom_filter_;
+  BroType* type_;
+  CompositeHash* hash_;
+  BloomFilter* bloom_filter_;
 };
 
 #endif

From 3d9764213191070a6b68375c0d0ae8c3193528e3 Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Wed, 5 Jun 2013 16:26:16 -0700
Subject: [PATCH 014/118] Add Bloom filter BiFs.

---
 src/bro.bif | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 89 insertions(+)

diff --git a/src/bro.bif b/src/bro.bif
index d9558106a7..60fb985dda 100644
--- a/src/bro.bif
+++ b/src/bro.bif
@@ -5730,3 +5730,92 @@ function anonymize_addr%(a: addr, cl: IPAddrAnonymizationClass%): addr
 		}
 	%}
 
+# ===========================================================================
+#
+#                           Bloom Filter Functions
+#
+# ===========================================================================
+
+%%{
+#include "BloomFilter.h"
+%%}
+
+## Initializes a Bloom filter data structure.
+##
+## fp: The desired false-positive rate.
+##
+## capacity: the maximum number of elements that guarantees a false-positive
+## rate of *fp*.
+##
+## Returns: A Bloom filter handle.
+function bloomfilter_init%(fp: double, capacity: count,
+                           max: count &default=1%): opaque of bloomfilter
+  %{
+  BloomFilter* bf;
+  if ( max == 1 )
+    {
+    bf = new BasicBloomFilter(fp, capacity);
+    }
+  else
+    {
+    uint16 width = 0;
+    while ( max >>= 1 )
+      ++width;
+    bf = new CountingBloomFilter(fp, capacity, width);
+    }
+  return new BloomFilterVal(bf);
+  %}
+
+## Adds an element to a Bloom filter.
+##
+## bf: The Bloom filter handle.
+##
+## x: The element to add.
+function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any
+  %{
+  BloomFilterVal* bfv = static_cast<BloomFilterVal*>(bf);
+  if ( ! bfv->Type() || ! bfv->Typify(x->Type()) )
+    reporter->Error("failed to set Bloom filter type");
+  else if ( bfv->Type() != x->Type() )
+    reporter->Error("incompatible Bloom filter types");
+  bfv->Add(x);
+  return 0;
+  %}
+
+## Retrieves the counter for a given element in a Bloom filter.
+##
+## bf: The Bloom filter handle.
+##
+## x: The element to count.
+##
+## Returns: the counter associated with *x* in *bf*.
+function bloomfilter_lookup%(bf: opaque of bloomfilter, x: any%): count
+  %{
+  BloomFilterVal* bfv = static_cast<BloomFilterVal*>(bf);
+  if ( ! bfv->Type() )
+    reporter->Error("cannot perform lookup on untyped Bloom filter");
+  else if ( bfv->Type() != x->Type() )
+    reporter->Error("incompatible Bloom filter types");
+	return new Val(static_cast<uint64>(bfv->Count(x)), TYPE_COUNT);
+  %}
+
+## Merges two Bloom filters.
+##
+## bf1: The first Bloom filter handle.
+##
+## bf2: The second Bloom filter handle.
+##
+## Returns: The union of *bf1* and *bf2*.
+function bloomfilter_merge%(bf1: opaque of bloomfilter,
+                            bf2: opaque of bloomfilter%): opaque of bloomfilter
+  %{
+  const BloomFilterVal* bfv1 = static_cast<const BloomFilterVal*>(bf1);
+  const BloomFilterVal* bfv2 = static_cast<const BloomFilterVal*>(bf2);
+  if ( ! bfv1->Type() )
+    reporter->Error("The first Bloom filter has not yet been typed");
+  if ( ! bfv2->Type() )
+    reporter->Error("The second Bloom filter has not yet been typed");
+  else if ( bfv1->Type() != bfv2->Type() )
+    reporter->Error("incompatible Bloom filter types");
+	return BloomFilterVal::Merge(bfv1, bfv2);
+  %}

From d5126a13395f899fab12f081248336e687222ed9 Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Wed, 5 Jun 2013 17:45:10 -0700
Subject: [PATCH 015/118] Fix some BiF issues.

---
 src/bro.bif | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/bro.bif b/src/bro.bif
index 60fb985dda..08b532eaea 100644
--- a/src/bro.bif
+++ b/src/bro.bif
@@ -5774,12 +5774,18 @@ function bloomfilter_init%(fp: double, capacity: count,
 function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any
   %{
   BloomFilterVal* bfv = static_cast<BloomFilterVal*>(bf);
-  if ( ! bfv->Type() || ! bfv->Typify(x->Type()) )
+  if ( ! bfv->Type() && ! bfv->Typify(x->Type()) )
+    {
     reporter->Error("failed to set Bloom filter type");
+    return NULL;
+    }
   else if ( bfv->Type() != x->Type() )
+    {
     reporter->Error("incompatible Bloom filter types");
+    return NULL;
+    }
   bfv->Add(x);
-  return 0;
+  return NULL;
   %}
 
 ## Retrieves the counter for a given element in a Bloom filter.
@@ -5812,9 +5818,9 @@ function bloomfilter_merge%(bf1: opaque of bloomfilter,
   const BloomFilterVal* bfv1 = static_cast<const BloomFilterVal*>(bf1);
   const BloomFilterVal* bfv2 = static_cast<const BloomFilterVal*>(bf2);
   if ( ! bfv1->Type() )
-    reporter->Error("The first Bloom filter has not yet been typed");
+    reporter->Error("first Bloom filter has not yet been typed");
   if ( ! bfv2->Type() )
-    reporter->Error("The second Bloom filter has not yet been typed");
+    reporter->Error("second Bloom filter has not yet been typed");
   else if ( bfv1->Type() != bfv2->Type() )
     reporter->Error("incompatible Bloom filter types");
 	return BloomFilterVal::Merge(bfv1, bfv2);

From 012e09c5c40bdf0acd29a34bf2271417ed36d770 Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Thu, 6 Jun 2013 12:56:46 -0700
Subject: [PATCH 016/118] Small fixes and simplifications.

---
 src/BloomFilter.cc |  2 +-
 src/BloomFilter.h  | 17 +++++++----------
 src/OpaqueVal.cc   |  1 +
 3 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc
index 74fa6fb255..e549553bf4 100644
--- a/src/BloomFilter.cc
+++ b/src/BloomFilter.cc
@@ -140,7 +140,7 @@ bool BloomFilter::DoUnserialize(UnserialInfo* info)
 	  return false;
 	hash_ = new hash_policy(static_cast<size_t>(k));
 	uint64 elements;
-  if ( UNSERIALIZE(&elements) )
+  if ( ! UNSERIALIZE(&elements) )
     return false;
   elements_ = static_cast<size_t>(elements);
 	return true;
diff --git a/src/BloomFilter.h b/src/BloomFilter.h
index 14b0ac3281..3e2bd5de90 100644
--- a/src/BloomFilter.h
+++ b/src/BloomFilter.h
@@ -94,15 +94,14 @@ protected:
    * A functor that computes a universal hash function.
    * @tparam Codomain An integral type.
    */
-  template <typename Codomain = HashType>
   class Hasher {
   public:
-    template <typename Domain>
-    Codomain operator()(const Domain& x) const
+    template <typename T>
+    HashType operator()(const T& x) const
       {
       return h3_(&x, sizeof(x));
       }
-    Codomain operator()(const void* x, size_t n) const
+    HashType operator()(const void* x, size_t n) const
       {
       return h3_(x, n);
       }
@@ -110,7 +109,7 @@ protected:
     // FIXME: The hardcoded value of 36 comes from UHASH_KEY_SIZE defined in
     // Hash.h. I do not know how this value impacts the hash function behavior
     // so I'll just copy it verbatim. (Matthias)
-    H3<Codomain, 36> h3_;
+    H3<HashType, 36> h3_;
   };
 
   HashPolicy(size_t k) : k_(k) { }
@@ -125,12 +124,11 @@ private:
 class DefaultHashing : public HashPolicy {
 public:
   DefaultHashing(size_t k) : HashPolicy(k), hashers_(k) { }
-  virtual ~DefaultHashing() { }
 
   virtual HashVector Hash(const void* x, size_t n) const;
 
 private:
-  std::vector< Hasher<HashType> > hashers_;
+  std::vector<Hasher> hashers_;
 };
 
 /**
@@ -139,13 +137,12 @@ private:
 class DoubleHashing : public HashPolicy {
 public:
   DoubleHashing(size_t k) : HashPolicy(k) { }
-  virtual ~DoubleHashing() { }
 
   virtual HashVector Hash(const void* x, size_t n) const;
 
 private:
-  Hasher<HashType> hasher1_;
-  Hasher<HashType> hasher2_;
+  Hasher hasher1_;
+  Hasher hasher2_;
 };
 
 /**
diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc
index abfd8f320f..03a6e51ce8 100644
--- a/src/OpaqueVal.cc
+++ b/src/OpaqueVal.cc
@@ -533,6 +533,7 @@ bool BloomFilterVal::Typify(BroType* type)
   if ( type_ )
     return false;
   type_ = type;
+  type_->Ref();
   TypeList* tl = new TypeList(type_);
   tl->Append(type_);
   hash_ = new CompositeHash(tl);

From f211b856c9ae35e68ea4af194e08157fdefef7e6 Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Thu, 6 Jun 2013 13:13:36 -0700
Subject: [PATCH 017/118] Catch invalid values of the false-positive rate.

---
 src/bro.bif | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/bro.bif b/src/bro.bif
index 08b532eaea..74219dd2b7 100644
--- a/src/bro.bif
+++ b/src/bro.bif
@@ -5751,6 +5751,11 @@ function anonymize_addr%(a: addr, cl: IPAddrAnonymizationClass%): addr
 function bloomfilter_init%(fp: double, capacity: count,
                            max: count &default=1%): opaque of bloomfilter
   %{
+  if ( fp < 0.0 || fp > 1.0 )
+    {
+    reporter->Error("false-positive rate must take value between 0 and 1");
+    return NULL;
+    }
   BloomFilter* bf;
   if ( max == 1 )
     {

From 7ce986e31f59b1f1000ec335a4efc1f0f5e0c011 Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Thu, 6 Jun 2013 13:21:27 -0700
Subject: [PATCH 018/118] Fix modding.

---
 src/BloomFilter.cc | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc
index e549553bf4..7c347927c3 100644
--- a/src/BloomFilter.cc
+++ b/src/BloomFilter.cc
@@ -188,13 +188,13 @@ bool BasicBloomFilter::DoUnserialize(UnserialInfo* info)
 void BasicBloomFilter::AddImpl(const HashPolicy::HashVector& h)
   {
   for ( size_t i = 0; i < h.size(); ++i )
-    bits_->Set(h[i] % h.size());
+    bits_->Set(h[i] % bits_->Size());
   }
 
 size_t BasicBloomFilter::CountImpl(const HashPolicy::HashVector& h) const
   {
   for ( size_t i = 0; i < h.size(); ++i )
-    if ( ! (*bits_)[h[i] % h.size()] )
+    if ( ! (*bits_)[h[i] % bits_->Size()] )
       return 0;
   return 1;
   }
@@ -233,7 +233,7 @@ bool CountingBloomFilter::DoUnserialize(UnserialInfo* info)
 void CountingBloomFilter::AddImpl(const HashPolicy::HashVector& h)
   {
   for ( size_t i = 0; i < h.size(); ++i )
-    cells_->Increment(h[i] % h.size(), 1);
+    cells_->Increment(h[i] % cells_->Size(), 1);
   }
 
 size_t CountingBloomFilter::CountImpl(const HashPolicy::HashVector& h) const
@@ -242,7 +242,7 @@ size_t CountingBloomFilter::CountImpl(const HashPolicy::HashVector& h) const
     std::numeric_limits<CounterVector::size_type>::max();
   for ( size_t i = 0; i < h.size(); ++i )
     {
-    CounterVector::size_type cnt = cells_->Count(h[i] % h.size());
+    CounterVector::size_type cnt = cells_->Count(h[i] % cells_->Size());
     if ( cnt  < min )
       min = cnt;
     }

From fcf1807fc8ac320a6c787360e8b78509b58b0a5a Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Thu, 6 Jun 2013 13:39:00 -0700
Subject: [PATCH 019/118] Fix hasher usage and narrow interface.

---
 src/BloomFilter.cc |  4 ++--
 src/BloomFilter.h  | 10 +---------
 2 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc
index 7c347927c3..c684c82c0e 100644
--- a/src/BloomFilter.cc
+++ b/src/BloomFilter.cc
@@ -93,8 +93,8 @@ HashPolicy::HashVector DefaultHashing::Hash(const void* x, size_t n) const
 
 HashPolicy::HashVector DoubleHashing::Hash(const void* x, size_t n) const
   {
-  HashType h1 = hasher1_(x);
-  HashType h2 = hasher2_(x);
+  HashType h1 = hasher1_(x, n);
+  HashType h2 = hasher2_(x, n);
   HashVector h(K(), 0);
   for ( size_t i = 0; i < h.size(); ++i )
     h[i] = h1 + i * h2;
diff --git a/src/BloomFilter.h b/src/BloomFilter.h
index 3e2bd5de90..fd1cb31d61 100644
--- a/src/BloomFilter.h
+++ b/src/BloomFilter.h
@@ -96,15 +96,7 @@ protected:
    */
   class Hasher {
   public:
-    template <typename T>
-    HashType operator()(const T& x) const
-      {
-      return h3_(&x, sizeof(x));
-      }
-    HashType operator()(const void* x, size_t n) const
-      {
-      return h3_(x, n);
-      }
+    HashType operator()(const void* x, size_t n) const { return h3_(x, n); }
   private:
     // FIXME: The hardcoded value of 36 comes from UHASH_KEY_SIZE defined in
     // Hash.h. I do not know how this value impacts the hash function behavior

From 0d299eca57ddab9dfb17c1f6c99139c481dccb49 Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Thu, 6 Jun 2013 14:54:25 -0700
Subject: [PATCH 020/118] Correct computation of k hash functions.

---
 src/BloomFilter.cc | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc
index c684c82c0e..f1db71ae1d 100644
--- a/src/BloomFilter.cc
+++ b/src/BloomFilter.cc
@@ -3,13 +3,6 @@
 #include <cmath>
 #include "Serializer.h"
 
-// Backport C++11's std::round().
-namespace {
-template <typename T>
-T round(double x) { return (x > 0.0) ? (x + 0.5) : (x - 0.5); }
-} // namespace <anonymous>
-
-
 CounterVector::CounterVector(size_t width, size_t cells)
   : bits_(new BitVector(width * cells)), width_(width)
   {
@@ -155,7 +148,7 @@ size_t BasicBloomFilter::M(double fp, size_t capacity)
 size_t BasicBloomFilter::K(size_t cells, size_t capacity)
   {
   double frac = static_cast<double>(cells) / static_cast<double>(capacity);
-  return round<size_t>(frac * std::log(2));
+  return std::ceil(frac * std::log(2));
   }
 
 BasicBloomFilter::BasicBloomFilter(double fp, size_t capacity)

From e15f03d980e8bb63d00969268056b2e9592b2f85 Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Thu, 6 Jun 2013 15:02:11 -0700
Subject: [PATCH 021/118] Cleanup BiFs.

---
 src/bro.bif | 25 ++++++++++---------------
 1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/src/bro.bif b/src/bro.bif
index 5c1280645e..8bd9575498 100644
--- a/src/bro.bif
+++ b/src/bro.bif
@@ -5026,16 +5026,11 @@ function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any
   %{
   BloomFilterVal* bfv = static_cast<BloomFilterVal*>(bf);
   if ( ! bfv->Type() && ! bfv->Typify(x->Type()) )
-    {
     reporter->Error("failed to set Bloom filter type");
-    return NULL;
-    }
   else if ( bfv->Type() != x->Type() )
-    {
     reporter->Error("incompatible Bloom filter types");
-    return NULL;
-    }
-  bfv->Add(x);
+  else
+    bfv->Add(x);
   return NULL;
   %}
 
@@ -5048,12 +5043,14 @@ function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any
 ## Returns: the counter associated with *x* in *bf*.
 function bloomfilter_lookup%(bf: opaque of bloomfilter, x: any%): count
   %{
-  BloomFilterVal* bfv = static_cast<BloomFilterVal*>(bf);
+  const BloomFilterVal* bfv = static_cast<const BloomFilterVal*>(bf);
   if ( ! bfv->Type() )
     reporter->Error("cannot perform lookup on untyped Bloom filter");
   else if ( bfv->Type() != x->Type() )
     reporter->Error("incompatible Bloom filter types");
-	return new Val(static_cast<uint64>(bfv->Count(x)), TYPE_COUNT);
+  else
+    return new Val(static_cast<uint64>(bfv->Count(x)), TYPE_COUNT);
+  return new Val(0, TYPE_COUNT);
   %}
 
 ## Merges two Bloom filters.
@@ -5068,11 +5065,9 @@ function bloomfilter_merge%(bf1: opaque of bloomfilter,
   %{
   const BloomFilterVal* bfv1 = static_cast<const BloomFilterVal*>(bf1);
   const BloomFilterVal* bfv2 = static_cast<const BloomFilterVal*>(bf2);
-  if ( ! bfv1->Type() )
-    reporter->Error("first Bloom filter has not yet been typed");
-  if ( ! bfv2->Type() )
-    reporter->Error("second Bloom filter has not yet been typed");
-  else if ( bfv1->Type() != bfv2->Type() )
+  if ( bfv1->Type() != bfv2->Type() )
     reporter->Error("incompatible Bloom filter types");
-	return BloomFilterVal::Merge(bfv1, bfv2);
+  else
+    return BloomFilterVal::Merge(bfv1, bfv2);
+  return NULL;
   %}

From 86becdd6e467fabc475eb81baea6d3586b2d74e7 Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Thu, 6 Jun 2013 15:08:24 -0700
Subject: [PATCH 022/118] Add tests.

---
 testing/btest/bifs/bloomfilter.bro | 38 ++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)
 create mode 100644 testing/btest/bifs/bloomfilter.bro

diff --git a/testing/btest/bifs/bloomfilter.bro b/testing/btest/bifs/bloomfilter.bro
new file mode 100644
index 0000000000..6abbdd69f7
--- /dev/null
+++ b/testing/btest/bifs/bloomfilter.bro
@@ -0,0 +1,38 @@
+# @TEST-EXEC: bro -b %INPUT >output
+# @TEST-EXEC: btest-diff output
+
+event bro_init()
+  {
+  # Basic usage with counts.
+  local bf_cnt = bloomfilter_init(0.1, 1000);
+  bloomfilter_add(bf_cnt, 42);
+  bloomfilter_add(bf_cnt, 84);
+  bloomfilter_add(bf_cnt, 168);
+  print bloomfilter_lookup(bf_cnt, 0);
+  print bloomfilter_lookup(bf_cnt, 42);
+  print bloomfilter_lookup(bf_cnt, 168);
+  print bloomfilter_lookup(bf_cnt, 336);
+  bloomfilter_add(bf_cnt, 0.5); # Type mismatch
+  bloomfilter_add(bf_cnt, "foo"); # Type mismatch
+
+  # Basic usage with strings.
+  local bf_str = bloomfilter_init(0.9, 10);
+  bloomfilter_add(bf_str, "foo");
+  bloomfilter_add(bf_str, "bar");
+  print bloomfilter_lookup(bf_str, "foo");
+  print bloomfilter_lookup(bf_str, "bar");
+  print bloomfilter_lookup(bf_str, "baz");
+  print bloomfilter_lookup(bf_str, "qux");
+  bloomfilter_add(bf_str, 0.5); # Type mismatch
+  bloomfilter_add(bf_str, 100); # Type mismatch
+
+  # Edge cases.
+  local bf_edge0 = bloomfilter_init(0.000000000001, 1);
+  local bf_edge1 = bloomfilter_init(0.00000001, 100000000);
+  local bf_edge2 = bloomfilter_init(0.9999999, 1);
+  local bf_edge3 = bloomfilter_init(0.9999999, 100000000000);
+
+  # Invalid parameters.
+  local bf_bug0 = bloomfilter_init(-0.5, 42);
+  local bf_bug1 = bloomfilter_init(1.1, 42);
+  }

From f2d536d2da1118b1d5feb143f751d47dc344232b Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Thu, 6 Jun 2013 15:22:04 -0700
Subject: [PATCH 023/118] Add missing initializations.

---
 src/BloomFilter.cc | 15 +++++++++++++++
 src/BloomFilter.h  |  6 +++---
 src/OpaqueVal.cc   | 25 +++++++++++++++++--------
 3 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc
index f1db71ae1d..40772fecb6 100644
--- a/src/BloomFilter.cc
+++ b/src/BloomFilter.cc
@@ -95,6 +95,11 @@ HashPolicy::HashVector DoubleHashing::Hash(const void* x, size_t n) const
   }
 
 
+BloomFilter::BloomFilter()
+  : hash_(NULL)
+  {
+  }
+
 BloomFilter::BloomFilter(size_t k)
   : hash_(new hash_policy(k))
   {
@@ -151,6 +156,11 @@ size_t BasicBloomFilter::K(size_t cells, size_t capacity)
   return std::ceil(frac * std::log(2));
   }
 
+BasicBloomFilter::BasicBloomFilter()
+  : bits_(NULL)
+  {
+  }
+
 BasicBloomFilter::BasicBloomFilter(double fp, size_t capacity)
   : BloomFilter(K(M(fp, capacity), capacity))
   {
@@ -192,6 +202,11 @@ size_t BasicBloomFilter::CountImpl(const HashPolicy::HashVector& h) const
   return 1;
   }
 
+CountingBloomFilter::CountingBloomFilter()
+  : cells_(NULL)
+  {
+  }
+
 CountingBloomFilter::CountingBloomFilter(double fp, size_t capacity,
                                          size_t width)
   : BloomFilter(BasicBloomFilter::K(BasicBloomFilter::M(fp, capacity),
diff --git a/src/BloomFilter.h b/src/BloomFilter.h
index fd1cb31d61..c0101cadf8 100644
--- a/src/BloomFilter.h
+++ b/src/BloomFilter.h
@@ -188,7 +188,7 @@ public:
 protected:
   DECLARE_ABSTRACT_SERIAL(BloomFilter);
 
-	BloomFilter() { };
+	BloomFilter();
   BloomFilter(size_t k);
 
   virtual void AddImpl(const HashPolicy::HashVector& hashes) = 0;
@@ -244,7 +244,7 @@ public:
 protected:
   DECLARE_SERIAL(BasicBloomFilter);
 
-  BasicBloomFilter() { }
+  BasicBloomFilter();
 
   virtual void AddImpl(const HashPolicy::HashVector& h);
   virtual size_t CountImpl(const HashPolicy::HashVector& h) const;
@@ -264,7 +264,7 @@ public:
 protected:
   DECLARE_SERIAL(CountingBloomFilter);
 
-  CountingBloomFilter() { }
+  CountingBloomFilter();
 
   virtual void AddImpl(const HashPolicy::HashVector& h);
   virtual size_t CountImpl(const HashPolicy::HashVector& h) const;
diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc
index 03a6e51ce8..38ea93d000 100644
--- a/src/OpaqueVal.cc
+++ b/src/OpaqueVal.cc
@@ -518,13 +518,27 @@ bool EntropyVal::DoUnserialize(UnserialInfo* info)
 	return true;
 	}
 
-BloomFilterVal::BloomFilterVal(BloomFilter* bf)
-  : OpaqueVal(bloomfilter_type), bloom_filter_(bf)
+BloomFilterVal::BloomFilterVal()
+  : OpaqueVal(bloomfilter_type),
+    type_(NULL),
+    hash_(NULL),
+    bloom_filter_(NULL)
 	{
 	}
 
 BloomFilterVal::BloomFilterVal(OpaqueType* t)
-  : OpaqueVal(t)
+  : OpaqueVal(t),
+    type_(NULL),
+    hash_(NULL),
+    bloom_filter_(NULL)
+	{
+	}
+
+BloomFilterVal::BloomFilterVal(BloomFilter* bf)
+  : OpaqueVal(bloomfilter_type),
+    type_(NULL),
+    hash_(NULL),
+    bloom_filter_(bf)
 	{
 	}
 
@@ -575,11 +589,6 @@ BloomFilterVal::~BloomFilterVal()
     delete bloom_filter_;
   }
 
-BloomFilterVal::BloomFilterVal()
-  : OpaqueVal(bloomfilter_type)
-	{
-	}
-
 IMPLEMENT_SERIAL(BloomFilterVal, SER_BLOOMFILTER_VAL);
 
 bool BloomFilterVal::DoSerialize(SerialInfo* info) const

From c6381055380f889c4891efcf83da512597ae64d6 Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Mon, 10 Jun 2013 12:51:41 -0700
Subject: [PATCH 024/118] Document max parameter in bloomfilter_init.

---
 src/bro.bif | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/bro.bif b/src/bro.bif
index 8bd9575498..9b80c90dbf 100644
--- a/src/bro.bif
+++ b/src/bro.bif
@@ -4993,6 +4993,13 @@ function anonymize_addr%(a: addr, cl: IPAddrAnonymizationClass%): addr
 ## capacity: the maximum number of elements that guarantees a false-positive
 ## rate of *fp*.
 ##
+## max: The maximum counter value associated with each each element in the
+## Bloom filter. If greater than 1, each element in the set has a counter of
+## *w = ceil(log_2(max))* bits. Each bit in the underlying bit vector then
+## becomes a cell of size *w* bits. Since the number number of cells is a
+## function ## of *fp* and *capacity*, it is important to consider the effects
+## on space when tuning this value.
+##
 ## Returns: A Bloom filter handle.
 function bloomfilter_init%(fp: double, capacity: count,
                            max: count &default=1%): opaque of bloomfilter

From d25984ba45643be524788b73d7cebc1278a78810 Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Mon, 10 Jun 2013 12:55:03 -0700
Subject: [PATCH 025/118] Update baseline for unit tests.

---
 testing/btest/Baseline/bifs.bloomfilter/output | 8 ++++++++
 testing/btest/bifs/bloomfilter.bro             | 4 ++--
 2 files changed, 10 insertions(+), 2 deletions(-)
 create mode 100644 testing/btest/Baseline/bifs.bloomfilter/output

diff --git a/testing/btest/Baseline/bifs.bloomfilter/output b/testing/btest/Baseline/bifs.bloomfilter/output
new file mode 100644
index 0000000000..65aaa8b07c
--- /dev/null
+++ b/testing/btest/Baseline/bifs.bloomfilter/output
@@ -0,0 +1,8 @@
+0
+1
+1
+0
+1
+1
+1
+1
diff --git a/testing/btest/bifs/bloomfilter.bro b/testing/btest/bifs/bloomfilter.bro
index 6abbdd69f7..769cec1200 100644
--- a/testing/btest/bifs/bloomfilter.bro
+++ b/testing/btest/bifs/bloomfilter.bro
@@ -21,8 +21,8 @@ event bro_init()
   bloomfilter_add(bf_str, "bar");
   print bloomfilter_lookup(bf_str, "foo");
   print bloomfilter_lookup(bf_str, "bar");
-  print bloomfilter_lookup(bf_str, "baz");
-  print bloomfilter_lookup(bf_str, "qux");
+  print bloomfilter_lookup(bf_str, "baz"); # FP
+  print bloomfilter_lookup(bf_str, "qux"); # FP
   bloomfilter_add(bf_str, 0.5); # Type mismatch
   bloomfilter_add(bf_str, 100); # Type mismatch
 

From 4c21576c120a0dcc9725308549fd57a8bf9072a1 Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Mon, 10 Jun 2013 20:14:34 -0700
Subject: [PATCH 026/118] Add Bloomfilter serialization test code.

---
 testing/btest/istate/opaque.bro | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/testing/btest/istate/opaque.bro b/testing/btest/istate/opaque.bro
index 84818a5e70..ac3b2c0874 100644
--- a/testing/btest/istate/opaque.bro
+++ b/testing/btest/istate/opaque.bro
@@ -12,6 +12,9 @@ global sha1_handle: opaque of sha1 &persistent &synchronized;
 global sha256_handle: opaque of sha256 &persistent &synchronized;
 global entropy_handle: opaque of entropy &persistent &synchronized;
 
+global bloomfilter_elements: set[string] &persistent &synchronized;
+global bloomfilter_handle: opaque of bloomfilter &persistent &synchronized;
+
 event bro_done()
   {
   local out = open("output.log");
@@ -36,6 +39,9 @@ event bro_done()
     print out, entropy_test_finish(entropy_handle);
   else
     print out, "entropy_test_add() failed";
+
+  for ( e in bloomfilter_elements )
+    print bloomfilter_lookup(bloomfilter_handle, e);
   }
 
 @TEST-END-FILE
@@ -47,6 +53,9 @@ global sha1_handle: opaque of sha1 &persistent &synchronized;
 global sha256_handle: opaque of sha256 &persistent &synchronized;
 global entropy_handle: opaque of entropy &persistent &synchronized;
 
+global bloomfilter_elements = { "foo", "bar", "baz" } &persistent &synchronized;
+global bloomfilter_handle: opaque of bloomfilter &persistent &synchronized;
+
 event bro_init()
   {
 	local out = open("expected.log");
@@ -72,6 +81,10 @@ event bro_init()
   entropy_handle = entropy_test_init();
   if ( ! entropy_test_add(entropy_handle, "f") )
     print out, "entropy_test_add() failed";
+
+  bloomfilter_handle = bloomfilter_init(0.1, 100);
+  for ( e in bloomfilter_elements )
+    bloomfilter_add(bloomfilter_handle, e);
   }
 
 @TEST-END-FILE

From 22afbe42dd91e668de8c72417b6a8ff8b544dd99 Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Mon, 10 Jun 2013 20:15:13 -0700
Subject: [PATCH 027/118] A number of tweaks of the serialization code.

---
 src/BitVector.h    |  2 +-
 src/BloomFilter.cc | 17 ++++++++---------
 src/BloomFilter.h  |  2 +-
 src/OpaqueVal.cc   | 10 ++++++----
 src/SerialTypes.h  |  8 ++++----
 5 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/src/BitVector.h b/src/BitVector.h
index 8315a151f0..83fec44a0d 100644
--- a/src/BitVector.h
+++ b/src/BitVector.h
@@ -8,7 +8,7 @@
 /**
  * A vector of bits.
  */
-class BitVector : SerialObj {
+class BitVector : public SerialObj {
 public:
   typedef size_t block_type;
   typedef size_t size_type;
diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc
index 40772fecb6..1d73734236 100644
--- a/src/BloomFilter.cc
+++ b/src/BloomFilter.cc
@@ -55,7 +55,7 @@ IMPLEMENT_SERIAL(CounterVector, SER_COUNTERVECTOR)
 bool CounterVector::DoSerialize(SerialInfo* info) const
 	{
 	DO_SERIALIZE(SER_COUNTERVECTOR, SerialObj);
-  if ( ! SERIALIZE(bits_) )
+  if ( ! bits_->Serialize(info) )
     return false;
 	return SERIALIZE(static_cast<uint64>(width_));
   }
@@ -63,14 +63,13 @@ bool CounterVector::DoSerialize(SerialInfo* info) const
 bool CounterVector::DoUnserialize(UnserialInfo* info)
 	{
 	DO_UNSERIALIZE(SerialObj);
-	return false;
 	bits_ = BitVector::Unserialize(info);
   if ( ! bits_ )
     return false;
   uint64 width;
   if ( ! UNSERIALIZE(&width) )
     return false;
-	width_ = static_cast<unsigned>(width);
+	width_ = static_cast<size_t>(width);
 	return true;
   }
 
@@ -127,7 +126,7 @@ bool BloomFilter::DoSerialize(SerialInfo* info) const
 	DO_SERIALIZE(SER_BLOOMFILTER, SerialObj);
   if ( ! SERIALIZE(static_cast<uint16>(hash_->K())) )
     return false;
-  return SERIALIZE(static_cast<uint16>(elements_));
+  return SERIALIZE(static_cast<uint64>(elements_));
   }
 
 bool BloomFilter::DoUnserialize(UnserialInfo* info)
@@ -178,14 +177,14 @@ IMPLEMENT_SERIAL(BasicBloomFilter, SER_BASICBLOOMFILTER)
 bool BasicBloomFilter::DoSerialize(SerialInfo* info) const
 	{
 	DO_SERIALIZE(SER_BASICBLOOMFILTER, BloomFilter);
-  return SERIALIZE(bits_);
+  return bits_->Serialize(info);
   }
 
 bool BasicBloomFilter::DoUnserialize(UnserialInfo* info)
 	{
 	DO_UNSERIALIZE(BloomFilter);
 	bits_ = BitVector::Unserialize(info);
-	return bits_ == NULL;
+	return bits_ != NULL;
   }
 
 void BasicBloomFilter::AddImpl(const HashPolicy::HashVector& h)
@@ -227,15 +226,15 @@ IMPLEMENT_SERIAL(CountingBloomFilter, SER_COUNTINGBLOOMFILTER)
 
 bool CountingBloomFilter::DoSerialize(SerialInfo* info) const
 	{
-	DO_SERIALIZE(SER_BASICBLOOMFILTER, BloomFilter);
-  return SERIALIZE(cells_);
+	DO_SERIALIZE(SER_COUNTINGBLOOMFILTER, BloomFilter);
+  return cells_->Serialize(info);
   }
 
 bool CountingBloomFilter::DoUnserialize(UnserialInfo* info)
 	{
 	DO_UNSERIALIZE(BloomFilter);
 	cells_ = CounterVector::Unserialize(info);
-	return cells_ == NULL;
+	return cells_ != NULL;
   }
 
 void CountingBloomFilter::AddImpl(const HashPolicy::HashVector& h)
diff --git a/src/BloomFilter.h b/src/BloomFilter.h
index c0101cadf8..4a83ba904b 100644
--- a/src/BloomFilter.h
+++ b/src/BloomFilter.h
@@ -73,7 +73,7 @@ protected:
 
 private:
   BitVector* bits_;
-  unsigned width_;
+  size_t width_;
 };
 
 /**
diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc
index 38ea93d000..76936dfb78 100644
--- a/src/OpaqueVal.cc
+++ b/src/OpaqueVal.cc
@@ -574,7 +574,7 @@ size_t BloomFilterVal::Count(const Val* val) const
 
 BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* first,
                                       const BloomFilterVal* second)
-{
+  {
   assert(! "not yet implemented");
   return NULL;
   }
@@ -594,14 +594,15 @@ IMPLEMENT_SERIAL(BloomFilterVal, SER_BLOOMFILTER_VAL);
 bool BloomFilterVal::DoSerialize(SerialInfo* info) const
 	{
 	DO_SERIALIZE(SER_BLOOMFILTER_VAL, OpaqueVal);
-	if ( ! SERIALIZE(type_) )
+	if ( ! type_->Serialize(info) )
 	  return false;
-	return SERIALIZE(bloom_filter_);
+	return bloom_filter_->Serialize(info);
   }
 
 bool BloomFilterVal::DoUnserialize(UnserialInfo* info)
 	{
 	DO_UNSERIALIZE(OpaqueVal);
+
 	type_ = BroType::Unserialize(info);
 	if ( ! type_ )
 	  return false;
@@ -609,6 +610,7 @@ bool BloomFilterVal::DoUnserialize(UnserialInfo* info)
   tl->Append(type_);
   hash_ = new CompositeHash(tl);
   Unref(tl);
+
 	bloom_filter_ = BloomFilter::Unserialize(info);
-	return bloom_filter_ == NULL;
+	return bloom_filter_ != NULL;
   }
diff --git a/src/SerialTypes.h b/src/SerialTypes.h
index 859145f19f..9e4aef5b3b 100644
--- a/src/SerialTypes.h
+++ b/src/SerialTypes.h
@@ -50,10 +50,10 @@ SERIAL_IS_BO(CASE, 0x1200)
 SERIAL_IS(LOCATION, 0x1300)
 SERIAL_IS(RE_MATCHER, 0x1400)
 SERIAL_IS(BITVECTOR, 0x1500)
-SERIAL_IS(COUNTERVECTOR, 0xa000)
-SERIAL_IS(BLOOMFILTER, 0xa100)
-SERIAL_IS(BASICBLOOMFILTER, 0xa200)
-SERIAL_IS(COUNTINGBLOOMFILTER, 0xa300)
+SERIAL_IS(COUNTERVECTOR, 0x1600)
+SERIAL_IS(BLOOMFILTER, 0x1700)
+SERIAL_IS(BASICBLOOMFILTER, 0x1800)
+SERIAL_IS(COUNTINGBLOOMFILTER, 0x1900)
 
 // These are the externally visible types.
 const SerialType SER_NONE = 0;

From 14a701a237dfdd745a842a11f363b93d01926505 Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Mon, 10 Jun 2013 22:24:23 -0700
Subject: [PATCH 028/118] Implement value merging.

The actual BloomFilter merging still lacks, this is just the first step in the
right direction from the user interface side.
---
 src/BloomFilter.cc | 27 ++++++++++++++++++++-------
 src/BloomFilter.h  | 18 ++++++------------
 src/OpaqueVal.cc   | 17 ++++++++++++++---
 src/OpaqueVal.h    | 17 ++++++++++++++---
 4 files changed, 54 insertions(+), 25 deletions(-)

diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc
index 1d73734236..e55db71e46 100644
--- a/src/BloomFilter.cc
+++ b/src/BloomFilter.cc
@@ -124,9 +124,7 @@ BloomFilter* BloomFilter::Unserialize(UnserialInfo* info)
 bool BloomFilter::DoSerialize(SerialInfo* info) const
 	{
 	DO_SERIALIZE(SER_BLOOMFILTER, SerialObj);
-  if ( ! SERIALIZE(static_cast<uint16>(hash_->K())) )
-    return false;
-  return SERIALIZE(static_cast<uint64>(elements_));
+  return SERIALIZE(static_cast<uint16>(hash_->K()));
   }
 
 bool BloomFilter::DoUnserialize(UnserialInfo* info)
@@ -136,10 +134,6 @@ bool BloomFilter::DoUnserialize(UnserialInfo* info)
 	if ( ! UNSERIALIZE(&k) )
 	  return false;
 	hash_ = new hash_policy(static_cast<size_t>(k));
-	uint64 elements;
-  if ( ! UNSERIALIZE(&elements) )
-    return false;
-  elements_ = static_cast<size_t>(elements);
 	return true;
   }
 
@@ -155,6 +149,17 @@ size_t BasicBloomFilter::K(size_t cells, size_t capacity)
   return std::ceil(frac * std::log(2));
   }
 
+BasicBloomFilter* BasicBloomFilter::Merge(const BasicBloomFilter* x,
+                                          const BasicBloomFilter* y)
+  {
+  BasicBloomFilter* result = new BasicBloomFilter();
+  result->bits_ = new BitVector(*x->bits_ | *y->bits_);
+  // TODO: implement the hasher pool and make sure the new result gets the same
+  // number of (equal) hash functions.
+  //assert(x->hash_ == y->hash_);
+  return result;
+  }
+
 BasicBloomFilter::BasicBloomFilter()
   : bits_(NULL)
   {
@@ -201,6 +206,14 @@ size_t BasicBloomFilter::CountImpl(const HashPolicy::HashVector& h) const
   return 1;
   }
 
+
+CountingBloomFilter* CountingBloomFilter::Merge(const CountingBloomFilter* x,
+                                                const CountingBloomFilter* y)
+{
+  assert(! "not yet implemented");
+  return NULL;
+}
+
 CountingBloomFilter::CountingBloomFilter()
   : cells_(NULL)
   {
diff --git a/src/BloomFilter.h b/src/BloomFilter.h
index 4a83ba904b..3b5d9efa71 100644
--- a/src/BloomFilter.h
+++ b/src/BloomFilter.h
@@ -155,7 +155,6 @@ public:
   template <typename T>
   void Add(const T& x)
     {
-    ++elements_;
     AddImpl(hash_->Hash(&x, sizeof(x)));
     }
 
@@ -172,16 +171,6 @@ public:
     return CountImpl(hash_->Hash(&x, sizeof(x)));
     }
 
-  /**
-   * Retrieves the number of elements added to the Bloom filter.
-   *
-   * @return The number of elements in this Bloom filter.
-   */
-  size_t Size() const
-    {
-    return elements_;
-    }
-
   bool Serialize(SerialInfo* info) const;
   static BloomFilter* Unserialize(UnserialInfo* info);
 
@@ -196,7 +185,6 @@ protected:
 
 private:
   HashPolicy* hash_;
-  size_t elements_;
 };
 
 /**
@@ -230,6 +218,9 @@ public:
    */
   static size_t K(size_t cells, size_t capacity);
 
+  static BasicBloomFilter* Merge(const BasicBloomFilter* x,
+                                 const BasicBloomFilter* y);
+
   /**
    * Constructs a basic Bloom filter with a given false-positive rate and
    * capacity.
@@ -258,6 +249,9 @@ private:
  */
 class CountingBloomFilter : public BloomFilter {
 public:
+  static CountingBloomFilter* Merge(const CountingBloomFilter* x,
+                                    const CountingBloomFilter* y);
+
   CountingBloomFilter(double fp, size_t capacity, size_t width);
   CountingBloomFilter(size_t cells, size_t capacity, size_t width);
 
diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc
index 76936dfb78..9dd5c7f980 100644
--- a/src/OpaqueVal.cc
+++ b/src/OpaqueVal.cc
@@ -572,10 +572,21 @@ size_t BloomFilterVal::Count(const Val* val) const
   return bloom_filter_->Count(key->Hash());
   }
 
-BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* first,
-                                      const BloomFilterVal* second)
+BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* x,
+                                      const BloomFilterVal* y)
   {
-  assert(! "not yet implemented");
+  if ( x->Type() != y->Type() )
+    {
+    reporter->InternalError("cannot merge Bloom filters with different types");
+    return NULL;
+    }
+
+  BloomFilterVal* result;
+  if ( (result = DoMerge<BasicBloomFilter>(x, y)) )
+    return result;
+  else if ( (result = DoMerge<CountingBloomFilter>(x, y)) )
+    return result;
+
   return NULL;
   }
 
diff --git a/src/OpaqueVal.h b/src/OpaqueVal.h
index e97a530f3a..4b45cad519 100644
--- a/src/OpaqueVal.h
+++ b/src/OpaqueVal.h
@@ -113,10 +113,10 @@ class BloomFilterVal : public OpaqueVal {
   BloomFilterVal(const BloomFilterVal&);
   BloomFilterVal& operator=(const BloomFilterVal&);
 public:
-	static BloomFilterVal* Merge(const BloomFilterVal* first,
-                               const BloomFilterVal* second);
+  static BloomFilterVal* Merge(const BloomFilterVal* x,
+                               const BloomFilterVal* y);
 
-	BloomFilterVal(BloomFilter* bf);
+	explicit BloomFilterVal(BloomFilter* bf);
 	~BloomFilterVal();
 
 	bool Typify(BroType* type);
@@ -133,6 +133,17 @@ protected:
 	DECLARE_SERIAL(BloomFilterVal);
 
 private:
+  template <typename T>
+  static BloomFilterVal* DoMerge(const BloomFilterVal* x,
+                                 const BloomFilterVal* y)
+    {
+    const T* a = dynamic_cast<const T*>(x->bloom_filter_);
+    const T* b = dynamic_cast<const T*>(y->bloom_filter_);
+    if ( a && b )
+      return new BloomFilterVal(T::Merge(a, b));
+    return NULL;
+    }
+
   BroType* type_;
   CompositeHash* hash_;
   BloomFilter* bloom_filter_;

From 1f90b539a8574eeadd4b20ae9f379b0fe08999be Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Thu, 13 Jun 2013 23:06:01 -0700
Subject: [PATCH 029/118] Make H3 class adhere to Bro coding style.

---
 src/H3.h | 89 ++++++++++++++++++++++++++++----------------------------
 1 file changed, 44 insertions(+), 45 deletions(-)

diff --git a/src/H3.h b/src/H3.h
index 72d81d519f..50afda5688 100644
--- a/src/H3.h
+++ b/src/H3.h
@@ -65,53 +65,52 @@
 template<class T, int N> class H3 {
     T byte_lookup[N][H3_BYTE_RANGE];
 public:
-    H3();
-    T operator()(const void* data, size_t size, size_t offset = 0) const
-    {
-	const unsigned char *p = static_cast<const unsigned char*>(data);
-	T result = 0;
+    H3()
+			{
+			T bit_lookup[N * CHAR_BIT];
 
-	// loop optmized with Duff's Device
-	register unsigned n = (size + 7) / 8;
-	switch (size % 8) {
-	case 0:	do { result ^= byte_lookup[offset++][*p++];
-	case 7:	     result ^= byte_lookup[offset++][*p++];
-	case 6:	     result ^= byte_lookup[offset++][*p++];
-	case 5:	     result ^= byte_lookup[offset++][*p++];
-	case 4:	     result ^= byte_lookup[offset++][*p++];
-	case 3:	     result ^= byte_lookup[offset++][*p++];
-	case 2:	     result ^= byte_lookup[offset++][*p++];
-	case 1:	     result ^= byte_lookup[offset++][*p++];
-		} while (--n > 0);
-	}
+			for ( size_t bit = 0; bit < N * CHAR_BIT; bit++ )
+			  {
+				bit_lookup[bit] = 0;
+				for ( size_t i = 0; i < sizeof(T)/2; i++ )
+					// assume random() returns at least 16 random bits
+					bit_lookup[bit] = (bit_lookup[bit] << 16) | (bro_random() & 0xFFFF);
+				}
 
-	return result;
-    }
+				for ( size_t byte = 0; byte < N; byte++ )
+					{
+					for ( unsigned val = 0; val < H3_BYTE_RANGE; val++ )
+						{
+						byte_lookup[byte][val] = 0;
+						for ( size_t bit = 0; bit < CHAR_BIT; bit++ )
+							// Does this mean byte_lookup[*][0] == 0? -RP
+							if (val & (1 << bit))
+								byte_lookup[byte][val] ^= bit_lookup[byte*CHAR_BIT+bit];
+						}
+					}
+			}
+
+	T operator()(const void* data, size_t size, size_t offset = 0) const
+		{
+		const unsigned char *p = static_cast<const unsigned char*>(data);
+		T result = 0;
+
+		// loop optmized with Duff's Device
+		register unsigned n = (size + 7) / 8;
+		switch (size % 8) {
+		case 0:	do { result ^= byte_lookup[offset++][*p++];
+	  case 7:      result ^= byte_lookup[offset++][*p++];
+		case 6:      result ^= byte_lookup[offset++][*p++];
+		case 5:      result ^= byte_lookup[offset++][*p++];
+		case 4:      result ^= byte_lookup[offset++][*p++];
+		case 3:      result ^= byte_lookup[offset++][*p++];
+		case 2:      result ^= byte_lookup[offset++][*p++];
+		case 1:      result ^= byte_lookup[offset++][*p++];
+				} while (--n > 0);
+			}
+
+		return result;
+		}
 };
 
-template<class T, int N>
-H3<T,N>::H3()
-{
-    T bit_lookup[N * CHAR_BIT];
-
-    for (size_t bit = 0; bit < N * CHAR_BIT; bit++) {
-	bit_lookup[bit] = 0;
-	for (size_t i = 0; i < sizeof(T)/2; i++) {
-	    // assume random() returns at least 16 random bits
-	    bit_lookup[bit] = (bit_lookup[bit] << 16) | (bro_random() & 0xFFFF);
-	}
-    }
-
-    for (size_t byte = 0; byte < N; byte++) {
-        for (unsigned val = 0; val < H3_BYTE_RANGE; val++) {
-            byte_lookup[byte][val] = 0;
-            for (size_t bit = 0; bit < CHAR_BIT; bit++) {
-		// Does this mean byte_lookup[*][0] == 0? -RP
-	        if (val & (1 << bit))
-		    byte_lookup[byte][val] ^= bit_lookup[byte*CHAR_BIT+bit];
-            }
-        }
-    }
-}
-
 #endif //H3_H

From 529d12037672d34fd4d1ba5f0d291fd6214f41d4 Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Thu, 13 Jun 2013 23:07:31 -0700
Subject: [PATCH 030/118] Make H3 seed configurable.

---
 src/H3.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/H3.h b/src/H3.h
index 50afda5688..11b0cd79a5 100644
--- a/src/H3.h
+++ b/src/H3.h
@@ -65,7 +65,7 @@
 template<class T, int N> class H3 {
     T byte_lookup[N][H3_BYTE_RANGE];
 public:
-    H3()
+    H3(T seed = bro_random())
 			{
 			T bit_lookup[N * CHAR_BIT];
 
@@ -74,7 +74,7 @@ public:
 				bit_lookup[bit] = 0;
 				for ( size_t i = 0; i < sizeof(T)/2; i++ )
 					// assume random() returns at least 16 random bits
-					bit_lookup[bit] = (bit_lookup[bit] << 16) | (bro_random() & 0xFFFF);
+					bit_lookup[bit] = (bit_lookup[bit] << 16) | (seed & 0xFFFF);
 				}
 
 				for ( size_t byte = 0; byte < N; byte++ )

From a6d7b7856e87c3a15ba7009ccfb7d6550d1dcfcc Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Thu, 13 Jun 2013 23:12:00 -0700
Subject: [PATCH 031/118] Update H3 documentation (and minor style nits.)

---
 src/H3.h | 60 +++++++++++++++++++++++++++++---------------------------
 1 file changed, 31 insertions(+), 29 deletions(-)

diff --git a/src/H3.h b/src/H3.h
index 11b0cd79a5..2eda14d276 100644
--- a/src/H3.h
+++ b/src/H3.h
@@ -49,9 +49,9 @@
 //     hash a substring of the data.  Hashes of substrings can be bitwise-XOR'ed
 //     together to get the same result as hashing the full string.
 // Any number of hash functions can be created by creating new instances of H3,
-//     with the same or different template parameters.  The hash function is
-//     randomly generated using bro_random(); you must call init_random_seed()
-//     before the H3 constructor if you wish to seed it.
+//     with the same or different template parameters.  The hash function
+//     constructor takes a seed as argument which defaults to a call to
+//     bro_random().
 
 
 #ifndef H3_H
@@ -62,34 +62,34 @@
 // The number of values representable by a byte.
 #define H3_BYTE_RANGE (UCHAR_MAX+1)
 
-template<class T, int N> class H3 {
-    T byte_lookup[N][H3_BYTE_RANGE];
+template <typename T, int N>
+class H3 {
 public:
-    H3(T seed = bro_random())
+	H3(T seed = bro_random())
+		{
+		T bit_lookup[N * CHAR_BIT];
+
+		for ( size_t bit = 0; bit < N * CHAR_BIT; bit++ )
 			{
-			T bit_lookup[N * CHAR_BIT];
-
-			for ( size_t bit = 0; bit < N * CHAR_BIT; bit++ )
-			  {
-				bit_lookup[bit] = 0;
-				for ( size_t i = 0; i < sizeof(T)/2; i++ )
-					// assume random() returns at least 16 random bits
-					bit_lookup[bit] = (bit_lookup[bit] << 16) | (seed & 0xFFFF);
-				}
-
-				for ( size_t byte = 0; byte < N; byte++ )
-					{
-					for ( unsigned val = 0; val < H3_BYTE_RANGE; val++ )
-						{
-						byte_lookup[byte][val] = 0;
-						for ( size_t bit = 0; bit < CHAR_BIT; bit++ )
-							// Does this mean byte_lookup[*][0] == 0? -RP
-							if (val & (1 << bit))
-								byte_lookup[byte][val] ^= bit_lookup[byte*CHAR_BIT+bit];
-						}
-					}
+			bit_lookup[bit] = 0;
+			for ( size_t i = 0; i < sizeof(T)/2; i++ )
+				// assume random() returns at least 16 random bits
+				bit_lookup[bit] = (bit_lookup[bit] << 16) | (seed & 0xFFFF);
 			}
 
+		for ( size_t byte = 0; byte < N; byte++ )
+			{
+			for ( unsigned val = 0; val < H3_BYTE_RANGE; val++ )
+				{
+				byte_lookup[byte][val] = 0;
+				for ( size_t bit = 0; bit < CHAR_BIT; bit++ )
+					// Does this mean byte_lookup[*][0] == 0? -RP
+					if (val & (1 << bit))
+						byte_lookup[byte][val] ^= bit_lookup[byte*CHAR_BIT+bit];
+				}
+			}
+		}
+
 	T operator()(const void* data, size_t size, size_t offset = 0) const
 		{
 		const unsigned char *p = static_cast<const unsigned char*>(data);
@@ -97,7 +97,7 @@ public:
 
 		// loop optmized with Duff's Device
 		register unsigned n = (size + 7) / 8;
-		switch (size % 8) {
+		switch ( size % 8 ) {
 		case 0:	do { result ^= byte_lookup[offset++][*p++];
 	  case 7:      result ^= byte_lookup[offset++][*p++];
 		case 6:      result ^= byte_lookup[offset++][*p++];
@@ -106,11 +106,13 @@ public:
 		case 3:      result ^= byte_lookup[offset++][*p++];
 		case 2:      result ^= byte_lookup[offset++][*p++];
 		case 1:      result ^= byte_lookup[offset++][*p++];
-				} while (--n > 0);
+				} while ( --n > 0 );
 			}
 
 		return result;
 		}
+private:
+	T byte_lookup[N][H3_BYTE_RANGE];
 };
 
 #endif //H3_H

From d2d8aff81456413597b09b71557b0caabdb7af3d Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Fri, 14 Jun 2013 09:22:48 -0700
Subject: [PATCH 032/118] Add utility function to access first random seed.

---
 src/util.cc | 13 +++++++++++++
 src/util.h  |  5 +++++
 2 files changed, 18 insertions(+)

diff --git a/src/util.cc b/src/util.cc
index de9bd5b679..721ee10a7e 100644
--- a/src/util.cc
+++ b/src/util.cc
@@ -716,6 +716,8 @@ static bool write_random_seeds(const char* write_file, uint32 seed,
 
 static bool bro_rand_determistic = false;
 static unsigned int bro_rand_state = 0;
+static bool first_seed_saved = false;
+static unsigned int first_seed = 0;
 
 static void bro_srandom(unsigned int seed, bool deterministic)
 	{
@@ -800,6 +802,12 @@ void init_random_seed(uint32 seed, const char* read_file, const char* write_file
 
 	bro_srandom(seed, seeds_done);
 
+	if ( ! first_seed_saved )
+    {
+    first_seed = seed;
+    first_seed_saved = true;
+    }
+
 	if ( ! hmac_key_set )
 		{
 		MD5((const u_char*) buf, sizeof(buf), shared_hmac_md5_key);
@@ -811,6 +819,11 @@ void init_random_seed(uint32 seed, const char* read_file, const char* write_file
 				write_file);
 	}
 
+unsigned int initial_seed()
+  {
+  return first_seed;
+}
+
 bool have_random_seed()
 	{
 	return bro_rand_determistic;
diff --git a/src/util.h b/src/util.h
index 49bcbf318b..c3eebb04e3 100644
--- a/src/util.h
+++ b/src/util.h
@@ -165,6 +165,11 @@ extern void hmac_md5(size_t size, const unsigned char* bytes,
 extern void init_random_seed(uint32 seed, const char* load_file,
 				const char* write_file);
 
+// Retrieves the initial seed computed after the very first call to
+// init_random_seed(). Repeated calls to init_random_seed() will not affect the
+// return value of this function.
+unsigned int initial_seed();
+
 // Returns true if the user explicitly set a seed via init_random_seed();
 extern bool have_random_seed();
 

From 1576239f67ef2641135f95bdd331f3c1a54ee5ad Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Fri, 14 Jun 2013 10:19:39 -0700
Subject: [PATCH 033/118] Support seeding for hashers.

---
 src/BloomFilter.cc | 11 +++++++++++
 src/BloomFilter.h  |  4 +++-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc
index e55db71e46..eff7eee733 100644
--- a/src/BloomFilter.cc
+++ b/src/BloomFilter.cc
@@ -74,6 +74,17 @@ bool CounterVector::DoUnserialize(UnserialInfo* info)
   }
 
 
+HashPolicy::Hasher::Hasher(size_t seed)
+  : h3_(seed)
+{
+}
+
+HashPolicy::HashType
+HashPolicy::Hasher::operator()(const void* x, size_t n) const
+  {
+  return h3_(x, n);
+  }
+
 HashPolicy::HashVector DefaultHashing::Hash(const void* x, size_t n) const
   {
   HashVector h(K(), 0);
diff --git a/src/BloomFilter.h b/src/BloomFilter.h
index 3b5d9efa71..65133621f9 100644
--- a/src/BloomFilter.h
+++ b/src/BloomFilter.h
@@ -96,7 +96,9 @@ protected:
    */
   class Hasher {
   public:
-    HashType operator()(const void* x, size_t n) const { return h3_(x, n); }
+    Hasher(size_t seed);
+
+    HashType operator()(const void* x, size_t n) const;
   private:
     // FIXME: The hardcoded value of 36 comes from UHASH_KEY_SIZE defined in
     // Hash.h. I do not know how this value impacts the hash function behavior

From 79a6a26f9f70a937551a94a5dc83b2c5dafe1414 Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Fri, 14 Jun 2013 10:20:33 -0700
Subject: [PATCH 034/118] H3 does not check for zero length input.

---
 src/BloomFilter.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc
index eff7eee733..6a44defc6d 100644
--- a/src/BloomFilter.cc
+++ b/src/BloomFilter.cc
@@ -82,7 +82,7 @@ HashPolicy::Hasher::Hasher(size_t seed)
 HashPolicy::HashType
 HashPolicy::Hasher::operator()(const void* x, size_t n) const
   {
-  return h3_(x, n);
+  return n == 0 ? 0 : h3_(x, n);
   }
 
 HashPolicy::HashVector DefaultHashing::Hash(const void* x, size_t n) const

From 9f740642891664ee8f482285523969793d0063d0 Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Mon, 17 Jun 2013 14:02:14 -0700
Subject: [PATCH 035/118] Expose Bro's linear congruence PRNG as utility
 function.

It was previously not possible to crank the wheel on the PRNG in a
deterministic way without affecting the globally unique seed. The new extra
utility function bro_prng takes a state in the form of a long int and returns
the new PRNG state, now allowing arbitrary code parts to use the random number
functionality.

This commit also fixes a problem in the H3 constructor, which requires use
of multiple seeds. The single seed passed in now serves as seed to crank out as
many value needed using bro_prng.
---
 src/H3.h    |  1 +
 src/util.cc | 29 ++++++++++++++++++-----------
 src/util.h  |  7 +++++--
 3 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/src/H3.h b/src/H3.h
index 2eda14d276..e2dc865147 100644
--- a/src/H3.h
+++ b/src/H3.h
@@ -72,6 +72,7 @@ public:
 		for ( size_t bit = 0; bit < N * CHAR_BIT; bit++ )
 			{
 			bit_lookup[bit] = 0;
+			seed = bro_prng(seed);
 			for ( size_t i = 0; i < sizeof(T)/2; i++ )
 				// assume random() returns at least 16 random bits
 				bit_lookup[bit] = (bit_lookup[bit] << 16) | (seed & 0xFFFF);
diff --git a/src/util.cc b/src/util.cc
index 721ee10a7e..cdd257d94f 100644
--- a/src/util.cc
+++ b/src/util.cc
@@ -829,22 +829,29 @@ bool have_random_seed()
 	return bro_rand_determistic;
 	}
 
+long int bro_prng(long int state)
+  {
+	// Use our own simple linear congruence PRNG to make sure we are
+	// predictable across platforms.
+	static const long int m = 2147483647;
+	static const long int a = 16807;
+	const long int q = m / a;
+	const long int r = m % a;
+
+	state = a * ( state % q ) - r * ( state / q );
+
+	if ( state <= 0 )
+		state += m;
+
+	return state;
+  }
+
 long int bro_random()
 	{
 	if ( ! bro_rand_determistic )
 		return random(); // Use system PRNG.
 
-	// Use our own simple linear congruence PRNG to make sure we are
-	// predictable across platforms.
-	const long int m = 2147483647;
-	const long int a = 16807;
-	const long int q = m / a;
-	const long int r = m % a;
-
-	bro_rand_state = a * ( bro_rand_state % q ) - r * ( bro_rand_state / q );
-
-	if ( bro_rand_state <= 0 )
-		bro_rand_state += m;
+  bro_rand_state = bro_prng(bro_rand_state);
 
 	return bro_rand_state;
 	}
diff --git a/src/util.h b/src/util.h
index c3eebb04e3..0af401c668 100644
--- a/src/util.h
+++ b/src/util.h
@@ -173,9 +173,12 @@ unsigned int initial_seed();
 // Returns true if the user explicitly set a seed via init_random_seed();
 extern bool have_random_seed();
 
+// A simple linear congruence PRNG. It takes its state as argument and returns
+// a new random value, which can serve as state for subsequent calls.
+long int bro_prng(long int state);
+
 // Replacement for the system random(), to which is normally falls back
-// except when a seed has been given. In that case, we use our own
-// predictable PRNG.
+// except when a seed has been given. In that case, the function bro_prng.
 long int bro_random();
 
 // Calls the system srandom() function with the given seed if not running

From 532fbfb4d27ac9ee733dbcfebccbc91e652d4eb0 Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Mon, 17 Jun 2013 16:06:02 -0700
Subject: [PATCH 036/118] Factor implementation and change interface.

When constructing a Bloom filter, one now has to pass a HashPolicy instance to
it. This separates more clearly the concerns of hashing and Bloom filter
management.

This commit also changes the interface to initialize Bloom filters: there exist
now two initialization functions, one for each type:

  (1) bloomfilter_basic_init(fp: double,
                             capacity: count,
                             name: string &default=""): opaque of bloomfilter

  (2) bloomfilter_counting_init(k: count,
                                cells: count,
                                max: count,
                                name: string &default=""): opaque of bloomfilter

The BiFs for adding elements and performing lookups remain the same. This
essentially gives us "BiF polymorphism" at script land, where the
initialization BiF constructs the most derived type while subsequent BiFs
adhere to the same interface.

The reason why we split up the constructor in this case is that we have not yet
derived the math that computes the optimal number of hash functions for
counting Bloom filters---users have to explicitly parameterize them for now.
---
 src/BloomFilter.cc                 | 159 +++++---------------------
 src/BloomFilter.h                  | 172 ++++-------------------------
 src/CMakeLists.txt                 |   2 +
 src/CounterVector.cc               |  75 +++++++++++++
 src/CounterVector.h                |  78 +++++++++++++
 src/HashPolicy.cc                  |  72 ++++++++++++
 src/HashPolicy.h                   |  90 +++++++++++++++
 src/OpaqueVal.cc                   |   1 +
 src/bro.bif                        |  57 ++++++----
 testing/btest/bifs/bloomfilter.bro |  20 ++--
 testing/btest/istate/opaque.bro    |   2 +-
 11 files changed, 409 insertions(+), 319 deletions(-)
 create mode 100644 src/CounterVector.cc
 create mode 100644 src/CounterVector.h
 create mode 100644 src/HashPolicy.cc
 create mode 100644 src/HashPolicy.h

diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc
index 6a44defc6d..0be64c18de 100644
--- a/src/BloomFilter.cc
+++ b/src/BloomFilter.cc
@@ -1,117 +1,16 @@
 #include "BloomFilter.h"
 
 #include <cmath>
+#include "CounterVector.h"
 #include "Serializer.h"
 
-CounterVector::CounterVector(size_t width, size_t cells)
-  : bits_(new BitVector(width * cells)), width_(width)
-  {
-  }
-
-CounterVector::~CounterVector()
-  {
-  delete bits_;
-  }
-
-bool CounterVector::Increment(size_type cell, count_type value)
-  {
-  // TODO
-  assert(! "not yet implemented");
-  return false;
-  }
-
-bool CounterVector::Decrement(size_type cell, count_type value)
-  {
-  // TODO
-  assert(! "not yet implemented");
-  return false;
-  }
-
-CounterVector::count_type CounterVector::Count(size_type cell) const
-  {
-  // TODO
-  assert(! "not yet implemented");
-  return 0;
-  }
-
-CounterVector::size_type CounterVector::Size() const
-  {
-  return bits_->Blocks() / width_;
-  }
-
-bool CounterVector::Serialize(SerialInfo* info) const
-  {
-  return SerialObj::Serialize(info);
-  }
-
-CounterVector* CounterVector::Unserialize(UnserialInfo* info)
-  {
-  return reinterpret_cast<CounterVector*>(
-      SerialObj::Unserialize(info, SER_COUNTERVECTOR));
-  }
-
-IMPLEMENT_SERIAL(CounterVector, SER_COUNTERVECTOR)
-
-bool CounterVector::DoSerialize(SerialInfo* info) const
-	{
-	DO_SERIALIZE(SER_COUNTERVECTOR, SerialObj);
-  if ( ! bits_->Serialize(info) )
-    return false;
-	return SERIALIZE(static_cast<uint64>(width_));
-  }
-
-bool CounterVector::DoUnserialize(UnserialInfo* info)
-	{
-	DO_UNSERIALIZE(SerialObj);
-	bits_ = BitVector::Unserialize(info);
-  if ( ! bits_ )
-    return false;
-  uint64 width;
-  if ( ! UNSERIALIZE(&width) )
-    return false;
-	width_ = static_cast<size_t>(width);
-	return true;
-  }
-
-
-HashPolicy::Hasher::Hasher(size_t seed)
-  : h3_(seed)
-{
-}
-
-HashPolicy::HashType
-HashPolicy::Hasher::operator()(const void* x, size_t n) const
-  {
-  return n == 0 ? 0 : h3_(x, n);
-  }
-
-HashPolicy::HashVector DefaultHashing::Hash(const void* x, size_t n) const
-  {
-  HashVector h(K(), 0);
-  for ( size_t i = 0; i < h.size(); ++i )
-    h[i] = hashers_[i](x, n);
-  return h;
-  }
-
-
-HashPolicy::HashVector DoubleHashing::Hash(const void* x, size_t n) const
-  {
-  HashType h1 = hasher1_(x, n);
-  HashType h2 = hasher2_(x, n);
-  HashVector h(K(), 0);
-  for ( size_t i = 0; i < h.size(); ++i )
-    h[i] = h1 + i * h2;
-  return h;
-  }
-
-
 BloomFilter::BloomFilter()
   : hash_(NULL)
   {
   }
 
-BloomFilter::BloomFilter(size_t k)
-  : hash_(new hash_policy(k))
+BloomFilter::BloomFilter(const HashPolicy* hash_policy)
+  : hash_(hash_policy)
   {
   }
 
@@ -135,7 +34,11 @@ BloomFilter* BloomFilter::Unserialize(UnserialInfo* info)
 bool BloomFilter::DoSerialize(SerialInfo* info) const
 	{
 	DO_SERIALIZE(SER_BLOOMFILTER, SerialObj);
-  return SERIALIZE(static_cast<uint16>(hash_->K()));
+	// FIXME: Since we have a fixed hashing policy, we just serialize the
+	// information needed to reconstruct it.
+  if ( ! SERIALIZE(static_cast<uint16>(hash_->K())) )
+    return false;
+  return SERIALIZE_STR(hash_->Name().c_str(), hash_->Name().size());
   }
 
 bool BloomFilter::DoUnserialize(UnserialInfo* info)
@@ -144,10 +47,15 @@ bool BloomFilter::DoUnserialize(UnserialInfo* info)
 	uint16 k;
 	if ( ! UNSERIALIZE(&k) )
 	  return false;
-	hash_ = new hash_policy(static_cast<size_t>(k));
+  const char* name;
+  if ( ! UNSERIALIZE_STR(&name, 0) )
+    return false;
+	// FIXME: for now Bloom filters always use double hashing.
+	hash_ = new DefaultHashing(k, name);
 	return true;
   }
 
+
 size_t BasicBloomFilter::M(double fp, size_t capacity)
   {
   double ln2 = std::log(2);
@@ -163,11 +71,9 @@ size_t BasicBloomFilter::K(size_t cells, size_t capacity)
 BasicBloomFilter* BasicBloomFilter::Merge(const BasicBloomFilter* x,
                                           const BasicBloomFilter* y)
   {
+  // TODO: Ensure that x and y use the same HashPolicy before proceeding.
   BasicBloomFilter* result = new BasicBloomFilter();
   result->bits_ = new BitVector(*x->bits_ | *y->bits_);
-  // TODO: implement the hasher pool and make sure the new result gets the same
-  // number of (equal) hash functions.
-  //assert(x->hash_ == y->hash_);
   return result;
   }
 
@@ -176,16 +82,10 @@ BasicBloomFilter::BasicBloomFilter()
   {
   }
 
-BasicBloomFilter::BasicBloomFilter(double fp, size_t capacity)
-  : BloomFilter(K(M(fp, capacity), capacity))
+BasicBloomFilter::BasicBloomFilter(const HashPolicy* hash_policy, size_t cells)
+  : BloomFilter(hash_policy),
+    bits_(new BitVector(cells))
   {
-  bits_ = new BitVector(M(fp, capacity));
-  }
-
-BasicBloomFilter::BasicBloomFilter(size_t cells, size_t capacity)
-  : BloomFilter(K(cells, capacity))
-  {
-  bits_ = new BitVector(cells);
   }
 
 IMPLEMENT_SERIAL(BasicBloomFilter, SER_BASICBLOOMFILTER)
@@ -203,13 +103,13 @@ bool BasicBloomFilter::DoUnserialize(UnserialInfo* info)
 	return bits_ != NULL;
   }
 
-void BasicBloomFilter::AddImpl(const HashPolicy::HashVector& h)
+void BasicBloomFilter::AddImpl(const HashPolicy::hash_vector& h)
   {
   for ( size_t i = 0; i < h.size(); ++i )
     bits_->Set(h[i] % bits_->Size());
   }
 
-size_t BasicBloomFilter::CountImpl(const HashPolicy::HashVector& h) const
+size_t BasicBloomFilter::CountImpl(const HashPolicy::hash_vector& h) const
   {
   for ( size_t i = 0; i < h.size(); ++i )
     if ( ! (*bits_)[h[i] % bits_->Size()] )
@@ -230,17 +130,9 @@ CountingBloomFilter::CountingBloomFilter()
   {
   }
 
-CountingBloomFilter::CountingBloomFilter(double fp, size_t capacity,
-                                         size_t width)
-  : BloomFilter(BasicBloomFilter::K(BasicBloomFilter::M(fp, capacity),
-                                    capacity))
-  {
-  cells_ = new CounterVector(width, BasicBloomFilter::M(fp, capacity));
-  }
-
-CountingBloomFilter::CountingBloomFilter(size_t cells, size_t capacity,
-                                         size_t width)
-  : BloomFilter(BasicBloomFilter::K(cells, capacity))
+CountingBloomFilter::CountingBloomFilter(const HashPolicy* hash_policy,
+                                         size_t cells, size_t width)
+  : BloomFilter(hash_policy)
   {
   cells_ = new CounterVector(width, cells);
   }
@@ -261,18 +153,19 @@ bool CountingBloomFilter::DoUnserialize(UnserialInfo* info)
 	return cells_ != NULL;
   }
 
-void CountingBloomFilter::AddImpl(const HashPolicy::HashVector& h)
+void CountingBloomFilter::AddImpl(const HashPolicy::hash_vector& h)
   {
   for ( size_t i = 0; i < h.size(); ++i )
     cells_->Increment(h[i] % cells_->Size(), 1);
   }
 
-size_t CountingBloomFilter::CountImpl(const HashPolicy::HashVector& h) const
+size_t CountingBloomFilter::CountImpl(const HashPolicy::hash_vector& h) const
   {
   CounterVector::size_type min =
     std::numeric_limits<CounterVector::size_type>::max();
   for ( size_t i = 0; i < h.size(); ++i )
     {
+    // TODO: Use partitioning.
     CounterVector::size_type cnt = cells_->Count(h[i] % cells_->Size());
     if ( cnt  < min )
       min = cnt;
diff --git a/src/BloomFilter.h b/src/BloomFilter.h
index 65133621f9..189f4920b7 100644
--- a/src/BloomFilter.h
+++ b/src/BloomFilter.h
@@ -3,141 +3,9 @@
 
 #include <vector>
 #include "BitVector.h"
-#include "Hash.h"
-#include "H3.h"
+#include "HashPolicy.h"
 
-/**
- * A vector of counters, each of which have a fixed number of bits.
- */
-class CounterVector : public SerialObj {
-public:
-  typedef size_t size_type;
-  typedef uint64 count_type;
-
-  /**
-   * Constructs a counter vector having cells of a given width.
-   *
-   * @param width The number of bits that each cell occupies.
-   *
-   * @param cells The number of cells in the bitvector.
-   */
-  CounterVector(size_t width, size_t cells = 1024);
-
-  ~CounterVector();
-
-  /**
-   * Increments a given cell.
-   *
-   * @param cell The cell to increment.
-   *
-   * @param value The value to add to the current counter in *cell*.
-   *
-   * @return `true` if adding *value* to the counter in *cell* succeeded.
-   */
-  bool Increment(size_type cell, count_type value);
-
-  /**
-   * Decrements a given cell.
-   *
-   * @param cell The cell to decrement.
-   *
-   * @param value The value to subtract from the current counter in *cell*.
-   *
-   * @return `true` if subtracting *value* from the counter in *cell* succeeded.
-   */
-  bool Decrement(size_type cell, count_type value);
-
-  /**
-   * Retrieves the counter of a given cell.
-   *
-   * @param cell The cell index to retrieve the count for.
-   *
-   * @return The counter associated with *cell*.
-   */
-  count_type Count(size_type cell) const;
-
-  /**
-   * Retrieves the number of cells in the storage.
-   *
-   * @return The number of cells.
-   */
-  size_type Size() const;
-
-  bool Serialize(SerialInfo* info) const;
-  static CounterVector* Unserialize(UnserialInfo* info);
-
-protected:
-  DECLARE_SERIAL(CounterVector);
-
-  CounterVector() { }
-
-private:
-  BitVector* bits_;
-  size_t width_;
-};
-
-/**
- * The abstract base class for hash policies that hash elements *k* times.
- * @tparam Codomain An integral type.
- */
-class HashPolicy {
-public:
-  typedef hash_t HashType;
-  typedef std::vector<HashType> HashVector;
-
-  virtual ~HashPolicy() { }
-  size_t K() const { return k_; }
-  virtual HashVector Hash(const void* x, size_t n) const = 0;
-
-protected:
-  /**
-   * A functor that computes a universal hash function.
-   * @tparam Codomain An integral type.
-   */
-  class Hasher {
-  public:
-    Hasher(size_t seed);
-
-    HashType operator()(const void* x, size_t n) const;
-  private:
-    // FIXME: The hardcoded value of 36 comes from UHASH_KEY_SIZE defined in
-    // Hash.h. I do not know how this value impacts the hash function behavior
-    // so I'll just copy it verbatim. (Matthias)
-    H3<HashType, 36> h3_;
-  };
-
-  HashPolicy(size_t k) : k_(k) { }
-
-private:
-  const size_t k_;
-};
-
-/**
- * The *default* hashing policy. Performs *k* hash function computations.
- */
-class DefaultHashing : public HashPolicy {
-public:
-  DefaultHashing(size_t k) : HashPolicy(k), hashers_(k) { }
-
-  virtual HashVector Hash(const void* x, size_t n) const;
-
-private:
-  std::vector<Hasher> hashers_;
-};
-
-/**
- * The *double-hashing* policy. Uses a linear combination of two hash functions.
- */
-class DoubleHashing : public HashPolicy {
-public:
-  DoubleHashing(size_t k) : HashPolicy(k) { }
-
-  virtual HashVector Hash(const void* x, size_t n) const;
-
-private:
-  Hasher hasher1_;
-  Hasher hasher2_;
-};
+class CounterVector;
 
 /**
  * The abstract base class for Bloom filters.
@@ -146,8 +14,6 @@ class BloomFilter : public SerialObj {
 public:
   // At this point we won't let the user choose the hash policy, but we might
   // open up the interface in the future.
-  typedef DoubleHashing hash_policy;
-
   virtual ~BloomFilter();
 
   /**
@@ -180,13 +46,19 @@ protected:
   DECLARE_ABSTRACT_SERIAL(BloomFilter);
 
 	BloomFilter();
-  BloomFilter(size_t k);
 
-  virtual void AddImpl(const HashPolicy::HashVector& hashes) = 0;
-  virtual size_t CountImpl(const HashPolicy::HashVector& hashes) const = 0;
+	/**
+	 * Constructs a Bloom filter.
+	 *
+	 * @param hash_policy The hash policy to use for this Bloom filter.
+	 */
+  BloomFilter(const HashPolicy* hash_policy);
+
+  virtual void AddImpl(const HashPolicy::hash_vector& hashes) = 0;
+  virtual size_t CountImpl(const HashPolicy::hash_vector& hashes) const = 0;
 
 private:
-  HashPolicy* hash_;
+  const HashPolicy* hash_;
 };
 
 /**
@@ -223,24 +95,18 @@ public:
   static BasicBloomFilter* Merge(const BasicBloomFilter* x,
                                  const BasicBloomFilter* y);
 
-  /**
-   * Constructs a basic Bloom filter with a given false-positive rate and
-   * capacity.
-   */
-  BasicBloomFilter(double fp, size_t capacity);
-
   /**
    * Constructs a basic Bloom filter with a given number of cells and capacity.
    */
-  BasicBloomFilter(size_t cells, size_t capacity);
+  BasicBloomFilter(const HashPolicy* hash_policy, size_t cells);
 
 protected:
   DECLARE_SERIAL(BasicBloomFilter);
 
   BasicBloomFilter();
 
-  virtual void AddImpl(const HashPolicy::HashVector& h);
-  virtual size_t CountImpl(const HashPolicy::HashVector& h) const;
+  virtual void AddImpl(const HashPolicy::hash_vector& h);
+  virtual size_t CountImpl(const HashPolicy::hash_vector& h) const;
 
 private:
   BitVector* bits_;
@@ -254,16 +120,16 @@ public:
   static CountingBloomFilter* Merge(const CountingBloomFilter* x,
                                     const CountingBloomFilter* y);
 
-  CountingBloomFilter(double fp, size_t capacity, size_t width);
-  CountingBloomFilter(size_t cells, size_t capacity, size_t width);
+  CountingBloomFilter(const HashPolicy* hash_policy, size_t cells,
+                      size_t width);
 
 protected:
   DECLARE_SERIAL(CountingBloomFilter);
 
   CountingBloomFilter();
 
-  virtual void AddImpl(const HashPolicy::HashVector& h);
-  virtual size_t CountImpl(const HashPolicy::HashVector& h) const;
+  virtual void AddImpl(const HashPolicy::hash_vector& h);
+  virtual size_t CountImpl(const HashPolicy::hash_vector& h) const;
 
 private:
   CounterVector* cells_;
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 1537bb04b0..f2c7ce6bad 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -255,6 +255,7 @@ set(bro_SRCS
     ChunkedIO.cc
     CompHash.cc
     Conn.cc
+    CounterVector.cc
     DFA.cc
     DbgBreakpoint.cc
     DbgHelp.cc
@@ -278,6 +279,7 @@ set(bro_SRCS
     Frame.cc
     Func.cc
     Hash.cc
+    HashPolicy.cc
     ID.cc
     IntSet.cc
     IOSource.cc
diff --git a/src/CounterVector.cc b/src/CounterVector.cc
new file mode 100644
index 0000000000..8ed4c30427
--- /dev/null
+++ b/src/CounterVector.cc
@@ -0,0 +1,75 @@
+#include "CounterVector.h"
+
+#include "BitVector.h"
+#include "Serializer.h"
+
+CounterVector::CounterVector(size_t width, size_t cells)
+  : bits_(new BitVector(width * cells)), width_(width)
+  {
+  }
+
+CounterVector::~CounterVector()
+  {
+  delete bits_;
+  }
+
+bool CounterVector::Increment(size_type cell, count_type value)
+  {
+  // TODO
+  assert(! "not yet implemented");
+  return false;
+  }
+
+bool CounterVector::Decrement(size_type cell, count_type value)
+  {
+  // TODO
+  assert(! "not yet implemented");
+  return false;
+  }
+
+CounterVector::count_type CounterVector::Count(size_type cell) const
+  {
+  // TODO
+  assert(! "not yet implemented");
+  return 0;
+  }
+
+CounterVector::size_type CounterVector::Size() const
+  {
+  return bits_->Blocks() / width_;
+  }
+
+bool CounterVector::Serialize(SerialInfo* info) const
+  {
+  return SerialObj::Serialize(info);
+  }
+
+CounterVector* CounterVector::Unserialize(UnserialInfo* info)
+  {
+  return reinterpret_cast<CounterVector*>(
+      SerialObj::Unserialize(info, SER_COUNTERVECTOR));
+  }
+
+IMPLEMENT_SERIAL(CounterVector, SER_COUNTERVECTOR)
+
+bool CounterVector::DoSerialize(SerialInfo* info) const
+	{
+	DO_SERIALIZE(SER_COUNTERVECTOR, SerialObj);
+  if ( ! bits_->Serialize(info) )
+    return false;
+	return SERIALIZE(static_cast<uint64>(width_));
+  }
+
+bool CounterVector::DoUnserialize(UnserialInfo* info)
+	{
+	DO_UNSERIALIZE(SerialObj);
+	bits_ = BitVector::Unserialize(info);
+  if ( ! bits_ )
+    return false;
+  uint64 width;
+  if ( ! UNSERIALIZE(&width) )
+    return false;
+	width_ = static_cast<size_t>(width);
+	return true;
+  }
+
diff --git a/src/CounterVector.h b/src/CounterVector.h
new file mode 100644
index 0000000000..ecc8fe90e0
--- /dev/null
+++ b/src/CounterVector.h
@@ -0,0 +1,78 @@
+#ifndef CounterVector_h
+#define CounterVector_h
+
+#include "SerialObj.h"
+
+class BitVector;
+
+/**
+ * A vector of counters, each of which have a fixed number of bits.
+ */
+class CounterVector : public SerialObj {
+public:
+  typedef size_t size_type;
+  typedef uint64 count_type;
+
+  /**
+   * Constructs a counter vector having cells of a given width.
+   *
+   * @param width The number of bits that each cell occupies.
+   *
+   * @param cells The number of cells in the bitvector.
+   */
+  CounterVector(size_t width, size_t cells = 1024);
+
+  ~CounterVector();
+
+  /**
+   * Increments a given cell.
+   *
+   * @param cell The cell to increment.
+   *
+   * @param value The value to add to the current counter in *cell*.
+   *
+   * @return `true` if adding *value* to the counter in *cell* succeeded.
+   */
+  bool Increment(size_type cell, count_type value);
+
+  /**
+   * Decrements a given cell.
+   *
+   * @param cell The cell to decrement.
+   *
+   * @param value The value to subtract from the current counter in *cell*.
+   *
+   * @return `true` if subtracting *value* from the counter in *cell* succeeded.
+   */
+  bool Decrement(size_type cell, count_type value);
+
+  /**
+   * Retrieves the counter of a given cell.
+   *
+   * @param cell The cell index to retrieve the count for.
+   *
+   * @return The counter associated with *cell*.
+   */
+  count_type Count(size_type cell) const;
+
+  /**
+   * Retrieves the number of cells in the storage.
+   *
+   * @return The number of cells.
+   */
+  size_type Size() const;
+
+  bool Serialize(SerialInfo* info) const;
+  static CounterVector* Unserialize(UnserialInfo* info);
+
+protected:
+  DECLARE_SERIAL(CounterVector);
+
+  CounterVector() { }
+
+private:
+  BitVector* bits_;
+  size_t width_;
+};
+
+#endif
diff --git a/src/HashPolicy.cc b/src/HashPolicy.cc
new file mode 100644
index 0000000000..d6fb4f3da4
--- /dev/null
+++ b/src/HashPolicy.cc
@@ -0,0 +1,72 @@
+#include "HashPolicy.h"
+
+#include "digest.h"
+
+Hasher::Hasher(size_t seed, const std::string& extra)
+	: h_(compute_seed(seed, extra))
+	{
+	}
+
+Hasher::hash_type Hasher::operator()(const void* x, size_t n) const
+  {
+  return n == 0 ? 0 : h_(x, n);
+  }
+
+size_t Hasher::compute_seed(size_t seed, const std::string& extra)
+  {
+  u_char digest[SHA256_DIGEST_LENGTH];
+  SHA256_CTX ctx;
+  sha256_init(&ctx);
+  if ( extra.empty() )
+		{
+		unsigned int first_seed = initial_seed();
+		sha256_update(&ctx, &first_seed, sizeof(first_seed));
+		}
+	else
+		{
+    sha256_update(&ctx, extra.c_str(), extra.size());
+		}
+  sha256_update(&ctx, &seed, sizeof(seed));
+  sha256_final(&ctx, digest);
+  return *reinterpret_cast<size_t*>(digest);
+  }
+
+
+HashPolicy::HashPolicy(size_t k, const std::string& name)
+  : k_(k), name_(name)
+	{
+	}
+
+DefaultHashing::DefaultHashing(size_t k, const std::string& name)
+  : HashPolicy(k, name)
+  {
+  for ( size_t i = 0; i < k; ++i )
+    hashers_.push_back(Hasher(i, name));
+  }
+
+HashPolicy::hash_vector DefaultHashing::Hash(const void* x, size_t n) const
+  {
+  hash_vector h(K(), 0);
+  for ( size_t i = 0; i < h.size(); ++i )
+    h[i] = hashers_[i](x, n);
+  return h;
+  }
+
+DoubleHashing::DoubleHashing(size_t k, const std::string& name)
+	: HashPolicy(k, name),
+		hasher1_(1, name),
+		hasher2_(2, name)
+	{
+	}
+
+HashPolicy::hash_vector DoubleHashing::Hash(const void* x, size_t n) const
+  {
+  hash_type h1 = hasher1_(x, n);
+  hash_type h2 = hasher2_(x, n);
+  hash_vector h(K(), 0);
+  for ( size_t i = 0; i < h.size(); ++i )
+    h[i] = h1 + i * h2;
+  return h;
+  }
+
+
diff --git a/src/HashPolicy.h b/src/HashPolicy.h
new file mode 100644
index 0000000000..4660bc0080
--- /dev/null
+++ b/src/HashPolicy.h
@@ -0,0 +1,90 @@
+#ifndef HashPolicy_h
+#define HashPolicy_h
+
+#include "Hash.h"
+#include "H3.h"
+
+/**
+ * A functor that computes a universal hash function.
+ */
+class Hasher {
+public:
+	typedef hash_t hash_type;
+
+	/**
+	 * Constructs a hasher seeded by a given seed and optionally an extra
+	 * descriptor.
+	 *
+	 * @param seed The seed to use.
+	 *
+	 * @param extra If not `NULL`, the hasher will not mix in the initial seed
+	 * but instead use this NUL-terminated string as additional seed.
+	 */
+	Hasher(size_t seed, const std::string& extra = "");
+
+	/**
+	 * Computes the hash digest of contiguous data.
+	 *
+	 * @param x A pointer to the beginning of the byte sequence to hash.
+	 *
+	 * @param n The length of the sequence pointed to by *x*.
+	 */
+	hash_type operator()(const void* x, size_t n) const;
+
+private:
+	static size_t compute_seed(size_t seed, const std::string& extra);
+
+	H3<hash_type, UHASH_KEY_SIZE> h_;
+};
+
+/**
+ * The abstract base class for hash policies that hash elements *k* times.
+ */
+class HashPolicy {
+public:
+  typedef Hasher::hash_type hash_type;
+  typedef std::vector<hash_type> hash_vector;
+
+  virtual ~HashPolicy() { }
+
+  virtual hash_vector Hash(const void* x, size_t n) const = 0;
+
+  size_t K() const { return k_; }
+  const std::string& Name() const { return name_; }
+
+protected:
+  HashPolicy(size_t k, const std::string& name);
+
+private:
+  const size_t k_;
+  std::string name_;
+};
+
+/**
+ * The default hashing policy. Performs *k* hash function computations.
+ */
+class DefaultHashing : public HashPolicy {
+public:
+  DefaultHashing(size_t k, const std::string& name);
+
+  virtual hash_vector Hash(const void* x, size_t n) const /* override */;
+
+private:
+  std::vector<Hasher> hashers_;
+};
+
+/**
+ * The *double-hashing* policy. Uses a linear combination of two hash functions.
+ */
+class DoubleHashing : public HashPolicy {
+public:
+  DoubleHashing(size_t k, const std::string& name);
+
+  virtual hash_vector Hash(const void* x, size_t n) const;
+
+private:
+  Hasher hasher1_;
+  Hasher hasher2_;
+};
+
+#endif
diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc
index 9dd5c7f980..8b82916689 100644
--- a/src/OpaqueVal.cc
+++ b/src/OpaqueVal.cc
@@ -605,6 +605,7 @@ IMPLEMENT_SERIAL(BloomFilterVal, SER_BLOOMFILTER_VAL);
 bool BloomFilterVal::DoSerialize(SerialInfo* info) const
 	{
 	DO_SERIALIZE(SER_BLOOMFILTER_VAL, OpaqueVal);
+	assert( type_ );
 	if ( ! type_->Serialize(info) )
 	  return false;
 	return bloom_filter_->Serialize(info);
diff --git a/src/bro.bif b/src/bro.bif
index 9b80c90dbf..a89b808888 100644
--- a/src/bro.bif
+++ b/src/bro.bif
@@ -4986,42 +4986,55 @@ function anonymize_addr%(a: addr, cl: IPAddrAnonymizationClass%): addr
 #include "BloomFilter.h"
 %%}
 
-## Initializes a Bloom filter data structure.
+## Creates a basic Bloom filter.
 ##
 ## fp: The desired false-positive rate.
 ##
 ## capacity: the maximum number of elements that guarantees a false-positive
 ## rate of *fp*.
 ##
-## max: The maximum counter value associated with each each element in the
-## Bloom filter. If greater than 1, each element in the set has a counter of
-## *w = ceil(log_2(max))* bits. Each bit in the underlying bit vector then
-## becomes a cell of size *w* bits. Since the number number of cells is a
-## function ## of *fp* and *capacity*, it is important to consider the effects
-## on space when tuning this value.
+## name: A name that uniquely identifies and seeds the Bloom filter. If empty,
+## the initialization will become dependent on the initial seed.
 ##
 ## Returns: A Bloom filter handle.
-function bloomfilter_init%(fp: double, capacity: count,
-                           max: count &default=1%): opaque of bloomfilter
+function bloomfilter_basic_init%(fp: double, capacity: count,
+                                 name: string &default=""%): opaque of bloomfilter
   %{
   if ( fp < 0.0 || fp > 1.0 )
     {
     reporter->Error("false-positive rate must take value between 0 and 1");
     return NULL;
     }
-  BloomFilter* bf;
-  if ( max == 1 )
-    {
-    bf = new BasicBloomFilter(fp, capacity);
-    }
-  else
-    {
-    uint16 width = 0;
-    while ( max >>= 1 )
-      ++width;
-    bf = new CountingBloomFilter(fp, capacity, width);
-    }
-  return new BloomFilterVal(bf);
+
+  size_t cells = BasicBloomFilter::M(fp, capacity);
+  size_t optimal_k = BasicBloomFilter::K(cells, capacity);
+  const HashPolicy* hp = new DefaultHashing(optimal_k, name->CheckString());
+  fprintf(stderr, "constructing Bloom filter with %llu hash fns and %llu cells\n", optimal_k, cells);
+  return new BloomFilterVal(new BasicBloomFilter(hp, cells));
+  %}
+
+## Creates a counting Bloom filter.
+##
+## k: The number of hash functions to use.
+##
+## cells: The number of cells of the underlying counter vector.
+##
+## max: The maximum counter value associated with each each element described
+## by *w = ceil(log_2(max))* bits. Each bit in the underlying counter vector
+## becomes a cell of size *w* bits.
+##
+## name: A name that uniquely identifies and seeds the Bloom filter. If empty,
+## the initialization will become dependent on the initial seed.
+##
+## Returns: A Bloom filter handle.
+function bloomfilter_counting_init%(k: count, cells: count, max: count,
+                                    name: string &default=""%): opaque of bloomfilter
+  %{
+  const HashPolicy* hp = new DefaultHashing(k, name->CheckString());
+  uint16 width = 0;
+  while ( max >>= 1 )
+    ++width;
+  return new BloomFilterVal(new CountingBloomFilter(hp, cells, width));
   %}
 
 ## Adds an element to a Bloom filter.
diff --git a/testing/btest/bifs/bloomfilter.bro b/testing/btest/bifs/bloomfilter.bro
index 769cec1200..3ff6a6668e 100644
--- a/testing/btest/bifs/bloomfilter.bro
+++ b/testing/btest/bifs/bloomfilter.bro
@@ -4,7 +4,7 @@
 event bro_init()
   {
   # Basic usage with counts.
-  local bf_cnt = bloomfilter_init(0.1, 1000);
+  local bf_cnt = bloomfilter_basic_init(0.1, 1000);
   bloomfilter_add(bf_cnt, 42);
   bloomfilter_add(bf_cnt, 84);
   bloomfilter_add(bf_cnt, 168);
@@ -16,23 +16,23 @@ event bro_init()
   bloomfilter_add(bf_cnt, "foo"); # Type mismatch
 
   # Basic usage with strings.
-  local bf_str = bloomfilter_init(0.9, 10);
+  local bf_str = bloomfilter_basic_init(0.9, 10);
   bloomfilter_add(bf_str, "foo");
   bloomfilter_add(bf_str, "bar");
   print bloomfilter_lookup(bf_str, "foo");
   print bloomfilter_lookup(bf_str, "bar");
-  print bloomfilter_lookup(bf_str, "baz"); # FP
-  print bloomfilter_lookup(bf_str, "qux"); # FP
+  print bloomfilter_lookup(bf_str, "b4z"); # FP
+  print bloomfilter_lookup(bf_str, "quux"); # FP
   bloomfilter_add(bf_str, 0.5); # Type mismatch
   bloomfilter_add(bf_str, 100); # Type mismatch
 
   # Edge cases.
-  local bf_edge0 = bloomfilter_init(0.000000000001, 1);
-  local bf_edge1 = bloomfilter_init(0.00000001, 100000000);
-  local bf_edge2 = bloomfilter_init(0.9999999, 1);
-  local bf_edge3 = bloomfilter_init(0.9999999, 100000000000);
+  local bf_edge0 = bloomfilter_basic_init(0.000000000001, 1);
+  local bf_edge1 = bloomfilter_basic_init(0.00000001, 100000000);
+  local bf_edge2 = bloomfilter_basic_init(0.9999999, 1);
+  local bf_edge3 = bloomfilter_basic_init(0.9999999, 100000000000);
 
   # Invalid parameters.
-  local bf_bug0 = bloomfilter_init(-0.5, 42);
-  local bf_bug1 = bloomfilter_init(1.1, 42);
+  local bf_bug0 = bloomfilter_basic_init(-0.5, 42);
+  local bf_bug1 = bloomfilter_basic_init(1.1, 42);
   }
diff --git a/testing/btest/istate/opaque.bro b/testing/btest/istate/opaque.bro
index ac3b2c0874..b387f9d6bc 100644
--- a/testing/btest/istate/opaque.bro
+++ b/testing/btest/istate/opaque.bro
@@ -82,7 +82,7 @@ event bro_init()
   if ( ! entropy_test_add(entropy_handle, "f") )
     print out, "entropy_test_add() failed";
 
-  bloomfilter_handle = bloomfilter_init(0.1, 100);
+  bloomfilter_handle = bloomfilter_basic_init(0.1, 100);
   for ( e in bloomfilter_elements )
     bloomfilter_add(bloomfilter_handle, e);
   }

From 85668e7054dd22bc783a620eaf88b04f2e4bb952 Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Mon, 17 Jun 2013 16:16:44 -0700
Subject: [PATCH 037/118] Remove lingering debug code.

---
 src/bro.bif | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/bro.bif b/src/bro.bif
index a89b808888..7c81966317 100644
--- a/src/bro.bif
+++ b/src/bro.bif
@@ -5009,7 +5009,6 @@ function bloomfilter_basic_init%(fp: double, capacity: count,
   size_t cells = BasicBloomFilter::M(fp, capacity);
   size_t optimal_k = BasicBloomFilter::K(cells, capacity);
   const HashPolicy* hp = new DefaultHashing(optimal_k, name->CheckString());
-  fprintf(stderr, "constructing Bloom filter with %llu hash fns and %llu cells\n", optimal_k, cells);
   return new BloomFilterVal(new BasicBloomFilter(hp, cells));
   %}
 

From e6e5f4926f5a850c773af05b51d7004fc4899a7c Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Mon, 17 Jun 2013 16:26:35 -0700
Subject: [PATCH 038/118] Create hash policies through factory.

---
 src/BloomFilter.cc | 5 +----
 src/HashPolicy.cc  | 5 +++++
 src/HashPolicy.h   | 7 +++++++
 src/bro.bif        | 4 ++--
 4 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc
index 0be64c18de..59d411d8e2 100644
--- a/src/BloomFilter.cc
+++ b/src/BloomFilter.cc
@@ -34,8 +34,6 @@ BloomFilter* BloomFilter::Unserialize(UnserialInfo* info)
 bool BloomFilter::DoSerialize(SerialInfo* info) const
 	{
 	DO_SERIALIZE(SER_BLOOMFILTER, SerialObj);
-	// FIXME: Since we have a fixed hashing policy, we just serialize the
-	// information needed to reconstruct it.
   if ( ! SERIALIZE(static_cast<uint16>(hash_->K())) )
     return false;
   return SERIALIZE_STR(hash_->Name().c_str(), hash_->Name().size());
@@ -50,8 +48,7 @@ bool BloomFilter::DoUnserialize(UnserialInfo* info)
   const char* name;
   if ( ! UNSERIALIZE_STR(&name, 0) )
     return false;
-	// FIXME: for now Bloom filters always use double hashing.
-	hash_ = new DefaultHashing(k, name);
+	hash_ = HashPolicy::Create(k, name);
 	return true;
   }
 
diff --git a/src/HashPolicy.cc b/src/HashPolicy.cc
index d6fb4f3da4..7ce754be3c 100644
--- a/src/HashPolicy.cc
+++ b/src/HashPolicy.cc
@@ -32,6 +32,11 @@ size_t Hasher::compute_seed(size_t seed, const std::string& extra)
   }
 
 
+HashPolicy* HashPolicy::Create(size_t k, const std::string& name)
+  {
+  return new DefaultHashing(k, name);
+  }
+
 HashPolicy::HashPolicy(size_t k, const std::string& name)
   : k_(k), name_(name)
 	{
diff --git a/src/HashPolicy.h b/src/HashPolicy.h
index 4660bc0080..7bdb968bfe 100644
--- a/src/HashPolicy.h
+++ b/src/HashPolicy.h
@@ -42,6 +42,13 @@ private:
  */
 class HashPolicy {
 public:
+  /**
+   * Constructs the hashing policy used by the implementation. This factory
+   * function exists because the HashingPolicy class hierachy is not yet
+   * serializable.
+   */
+	static HashPolicy* Create(size_t k, const std::string& name);
+
   typedef Hasher::hash_type hash_type;
   typedef std::vector<hash_type> hash_vector;
 
diff --git a/src/bro.bif b/src/bro.bif
index 7c81966317..d0ce066139 100644
--- a/src/bro.bif
+++ b/src/bro.bif
@@ -5008,7 +5008,7 @@ function bloomfilter_basic_init%(fp: double, capacity: count,
 
   size_t cells = BasicBloomFilter::M(fp, capacity);
   size_t optimal_k = BasicBloomFilter::K(cells, capacity);
-  const HashPolicy* hp = new DefaultHashing(optimal_k, name->CheckString());
+  const HashPolicy* hp = HashPolicy::Create(optimal_k, name->CheckString());
   return new BloomFilterVal(new BasicBloomFilter(hp, cells));
   %}
 
@@ -5029,7 +5029,7 @@ function bloomfilter_basic_init%(fp: double, capacity: count,
 function bloomfilter_counting_init%(k: count, cells: count, max: count,
                                     name: string &default=""%): opaque of bloomfilter
   %{
-  const HashPolicy* hp = new DefaultHashing(k, name->CheckString());
+  const HashPolicy* hp = HashPolicy::Create(k, name->CheckString());
   uint16 width = 0;
   while ( max >>= 1 )
     ++width;

From 273629de366290f411f381fe5970fc672adf465f Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Tue, 18 Jun 2013 10:23:07 -0700
Subject: [PATCH 039/118] Only serialize Bloom filter type if available.

---
 src/OpaqueVal.cc | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc
index 8b82916689..5a673c4a40 100644
--- a/src/OpaqueVal.cc
+++ b/src/OpaqueVal.cc
@@ -605,9 +605,13 @@ IMPLEMENT_SERIAL(BloomFilterVal, SER_BLOOMFILTER_VAL);
 bool BloomFilterVal::DoSerialize(SerialInfo* info) const
 	{
 	DO_SERIALIZE(SER_BLOOMFILTER_VAL, OpaqueVal);
-	assert( type_ );
-	if ( ! type_->Serialize(info) )
+
+	bool is_typed = type_ != NULL;
+	if ( ! SERIALIZE(is_typed) )
 	  return false;
+	if ( is_typed && ! type_->Serialize(info) )
+	  return false;
+
 	return bloom_filter_->Serialize(info);
   }
 
@@ -615,13 +619,16 @@ bool BloomFilterVal::DoUnserialize(UnserialInfo* info)
 	{
 	DO_UNSERIALIZE(OpaqueVal);
 
-	type_ = BroType::Unserialize(info);
-	if ( ! type_ )
+	bool is_typed;
+	if ( ! UNSERIALIZE(&is_typed) )
 	  return false;
-  TypeList* tl = new TypeList(type_);
-  tl->Append(type_);
-  hash_ = new CompositeHash(tl);
-  Unref(tl);
+	if ( is_typed )
+    {
+      BroType* type = BroType::Unserialize(info);
+      if ( ! Typify(type) )
+        return false;
+      Unref(type);
+    }
 
 	bloom_filter_ = BloomFilter::Unserialize(info);
 	return bloom_filter_ != NULL;

From 5f70452a9ac816346c4e480d8de52b213630b5b7 Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Tue, 18 Jun 2013 10:40:00 -0700
Subject: [PATCH 040/118] Small fixes and style tweaks.

---
 src/BitVector.cc   | 2 +-
 src/BloomFilter.cc | 1 +
 src/OpaqueVal.h    | 4 +---
 src/Type.cc        | 6 +++---
 4 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/BitVector.cc b/src/BitVector.cc
index f029230609..64db32131f 100644
--- a/src/BitVector.cc
+++ b/src/BitVector.cc
@@ -473,7 +473,7 @@ bool BitVector::DoSerialize(SerialInfo* info) const
   if ( ! SERIALIZE(static_cast<uint64>(bits_.size())) )
     return false;
 
-  for (size_t i = 0; i < bits_.size(); ++i)
+  for ( size_t i = 0; i < bits_.size(); ++i )
     if ( ! SERIALIZE(static_cast<uint64>(bits_[i])) )
       return false;
 
diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc
index 59d411d8e2..a7727630f7 100644
--- a/src/BloomFilter.cc
+++ b/src/BloomFilter.cc
@@ -49,6 +49,7 @@ bool BloomFilter::DoUnserialize(UnserialInfo* info)
   if ( ! UNSERIALIZE_STR(&name, 0) )
     return false;
 	hash_ = HashPolicy::Create(k, name);
+	delete [] name;
 	return true;
   }
 
diff --git a/src/OpaqueVal.h b/src/OpaqueVal.h
index 4b45cad519..2362fdacfc 100644
--- a/src/OpaqueVal.h
+++ b/src/OpaqueVal.h
@@ -139,9 +139,7 @@ private:
     {
     const T* a = dynamic_cast<const T*>(x->bloom_filter_);
     const T* b = dynamic_cast<const T*>(y->bloom_filter_);
-    if ( a && b )
-      return new BloomFilterVal(T::Merge(a, b));
-    return NULL;
+    return a && b ? new BloomFilterVal(T::Merge(a, b)) : NULL;
     }
 
   BroType* type_;
diff --git a/src/Type.cc b/src/Type.cc
index 6461bf2560..f19de461cd 100644
--- a/src/Type.cc
+++ b/src/Type.cc
@@ -1311,19 +1311,19 @@ IMPLEMENT_SERIAL(OpaqueType, SER_OPAQUE_TYPE);
 bool OpaqueType::DoSerialize(SerialInfo* info) const
 	{
 	DO_SERIALIZE(SER_OPAQUE_TYPE, BroType);
-	return SERIALIZE(name);
+	return SERIALIZE_STR(name.c_str(), name.size());
 	}
 
 bool OpaqueType::DoUnserialize(UnserialInfo* info)
 	{
 	DO_UNSERIALIZE(BroType);
 
-	char const* n;
+	const char* n;
 	if ( ! UNSERIALIZE_STR(&n, 0) )
 		return false;
-
 	name = n;
 	delete [] n;
+
 	return true;
 	}
 

From fef3180942723b4124007b605da7c1d93f8f8ce3 Mon Sep 17 00:00:00 2001
From: Bernhard Amann <bernhard@icsi.berkeley.edu>
Date: Tue, 2 Jul 2013 18:54:46 -0700
Subject: [PATCH 041/118] bump sqlite to 3.7.17.

---
 src/3rdparty/sqlite3.c | 3176 ++++++++++++++++++++++++++++++++++------
 src/3rdparty/sqlite3.h |  109 +-
 2 files changed, 2846 insertions(+), 439 deletions(-)

diff --git a/src/3rdparty/sqlite3.c b/src/3rdparty/sqlite3.c
index ba6a30e132..deef460899 100644
--- a/src/3rdparty/sqlite3.c
+++ b/src/3rdparty/sqlite3.c
@@ -1,9 +1,6 @@
-# define SQLITE_THREADSAFE 2
-# define SQLITE_DEFAULT_MEMSTATUS 0
-
 /******************************************************************************
 ** This file is an amalgamation of many separate C source files from SQLite
-** version 3.7.16.2.  By combining all the individual C code files into this 
+** version 3.7.17.  By combining all the individual C code files into this 
 ** single large file, the entire code can be compiled as a single translation
 ** unit.  This allows many compilers to do optimizations that would not be
 ** possible if the files were compiled separately.  Performance improvements
@@ -365,11 +362,11 @@
 ** We support that for legacy.
 */
 #if !defined(SQLITE_THREADSAFE)
-#if defined(THREADSAFE)
-# define SQLITE_THREADSAFE THREADSAFE
-#else
-# define SQLITE_THREADSAFE 1 /* IMP: R-07272-22309 */
-#endif
+# if defined(THREADSAFE)
+#   define SQLITE_THREADSAFE THREADSAFE
+# else
+#   define SQLITE_THREADSAFE 1 /* IMP: R-07272-22309 */
+# endif
 #endif
 
 /*
@@ -681,9 +678,9 @@ extern "C" {
 ** [sqlite3_libversion_number()], [sqlite3_sourceid()],
 ** [sqlite_version()] and [sqlite_source_id()].
 */
-#define SQLITE_VERSION        "3.7.16.2"
-#define SQLITE_VERSION_NUMBER 3007016
-#define SQLITE_SOURCE_ID      "2013-04-12 11:52:43 cbea02d93865ce0e06789db95fd9168ebac970c7"
+#define SQLITE_VERSION        "3.7.17"
+#define SQLITE_VERSION_NUMBER 3007017
+#define SQLITE_SOURCE_ID      "2013-05-20 00:56:22 118a3b35693b134d56ebd780123b7fd6f1497668"
 
 /*
 ** CAPI3REF: Run-Time Library Version Numbers
@@ -999,6 +996,8 @@ SQLITE_API int sqlite3_exec(
 #define SQLITE_FORMAT      24   /* Auxiliary database format error */
 #define SQLITE_RANGE       25   /* 2nd parameter to sqlite3_bind out of range */
 #define SQLITE_NOTADB      26   /* File opened that is not a database file */
+#define SQLITE_NOTICE      27   /* Notifications from sqlite3_log() */
+#define SQLITE_WARNING     28   /* Warnings from sqlite3_log() */
 #define SQLITE_ROW         100  /* sqlite3_step() has another row ready */
 #define SQLITE_DONE        101  /* sqlite3_step() has finished executing */
 /* end-of-error-codes */
@@ -1049,6 +1048,7 @@ SQLITE_API int sqlite3_exec(
 #define SQLITE_IOERR_SHMMAP            (SQLITE_IOERR | (21<<8))
 #define SQLITE_IOERR_SEEK              (SQLITE_IOERR | (22<<8))
 #define SQLITE_IOERR_DELETE_NOENT      (SQLITE_IOERR | (23<<8))
+#define SQLITE_IOERR_MMAP              (SQLITE_IOERR | (24<<8))
 #define SQLITE_LOCKED_SHAREDCACHE      (SQLITE_LOCKED |  (1<<8))
 #define SQLITE_BUSY_RECOVERY           (SQLITE_BUSY   |  (1<<8))
 #define SQLITE_CANTOPEN_NOTEMPDIR      (SQLITE_CANTOPEN | (1<<8))
@@ -1068,6 +1068,8 @@ SQLITE_API int sqlite3_exec(
 #define SQLITE_CONSTRAINT_TRIGGER      (SQLITE_CONSTRAINT | (7<<8))
 #define SQLITE_CONSTRAINT_UNIQUE       (SQLITE_CONSTRAINT | (8<<8))
 #define SQLITE_CONSTRAINT_VTAB         (SQLITE_CONSTRAINT | (9<<8))
+#define SQLITE_NOTICE_RECOVER_WAL      (SQLITE_NOTICE | (1<<8))
+#define SQLITE_NOTICE_RECOVER_ROLLBACK (SQLITE_NOTICE | (2<<8))
 
 /*
 ** CAPI3REF: Flags For File Open Operations
@@ -1307,6 +1309,9 @@ struct sqlite3_io_methods {
   void (*xShmBarrier)(sqlite3_file*);
   int (*xShmUnmap)(sqlite3_file*, int deleteFlag);
   /* Methods above are valid for version 2 */
+  int (*xFetch)(sqlite3_file*, sqlite3_int64 iOfst, int iAmt, void **pp);
+  int (*xUnfetch)(sqlite3_file*, sqlite3_int64 iOfst, void *p);
+  /* Methods above are valid for version 3 */
   /* Additional methods may be added in future releases */
 };
 
@@ -1443,7 +1448,8 @@ struct sqlite3_io_methods {
 ** it is able to override built-in [PRAGMA] statements.
 **
 ** <li>[[SQLITE_FCNTL_BUSYHANDLER]]
-** ^This file-control may be invoked by SQLite on the database file handle
+** ^The [SQLITE_FCNTL_BUSYHANDLER]
+** file-control may be invoked by SQLite on the database file handle
 ** shortly after it is opened in order to provide a custom VFS with access
 ** to the connections busy-handler callback. The argument is of type (void **)
 ** - an array of two (void *) values. The first (void *) actually points
@@ -1454,13 +1460,24 @@ struct sqlite3_io_methods {
 ** current operation.
 **
 ** <li>[[SQLITE_FCNTL_TEMPFILENAME]]
-** ^Application can invoke this file-control to have SQLite generate a
+** ^Application can invoke the [SQLITE_FCNTL_TEMPFILENAME] file-control
+** to have SQLite generate a
 ** temporary filename using the same algorithm that is followed to generate
 ** temporary filenames for TEMP tables and other internal uses.  The
 ** argument should be a char** which will be filled with the filename
 ** written into memory obtained from [sqlite3_malloc()].  The caller should
 ** invoke [sqlite3_free()] on the result to avoid a memory leak.
 **
+** <li>[[SQLITE_FCNTL_MMAP_SIZE]]
+** The [SQLITE_FCNTL_MMAP_SIZE] file control is used to query or set the
+** maximum number of bytes that will be used for memory-mapped I/O.
+** The argument is a pointer to a value of type sqlite3_int64 that
+** is an advisory maximum number of bytes in the file to memory map.  The
+** pointer is overwritten with the old value.  The limit is not changed if
+** the value originally pointed to is negative, and so the current limit 
+** can be queried by passing in a pointer to a negative number.  This
+** file-control is used internally to implement [PRAGMA mmap_size].
+**
 ** </ul>
 */
 #define SQLITE_FCNTL_LOCKSTATE               1
@@ -1479,6 +1496,7 @@ struct sqlite3_io_methods {
 #define SQLITE_FCNTL_PRAGMA                 14
 #define SQLITE_FCNTL_BUSYHANDLER            15
 #define SQLITE_FCNTL_TEMPFILENAME           16
+#define SQLITE_FCNTL_MMAP_SIZE              18
 
 /*
 ** CAPI3REF: Mutex Handle
@@ -2145,7 +2163,9 @@ struct sqlite3_mem_methods {
 ** page cache implementation into that object.)^ </dd>
 **
 ** [[SQLITE_CONFIG_LOG]] <dt>SQLITE_CONFIG_LOG</dt>
-** <dd> ^The SQLITE_CONFIG_LOG option takes two arguments: a pointer to a
+** <dd> The SQLITE_CONFIG_LOG option is used to configure the SQLite
+** global [error log].
+** (^The SQLITE_CONFIG_LOG option takes two arguments: a pointer to a
 ** function with a call signature of void(*)(void*,int,const char*), 
 ** and a pointer to void. ^If the function pointer is not NULL, it is
 ** invoked by [sqlite3_log()] to process each logging event.  ^If the
@@ -2191,12 +2211,12 @@ struct sqlite3_mem_methods {
 ** <dt>SQLITE_CONFIG_PCACHE and SQLITE_CONFIG_GETPCACHE
 ** <dd> These options are obsolete and should not be used by new code.
 ** They are retained for backwards compatibility but are now no-ops.
-** </dl>
+** </dd>
 **
 ** [[SQLITE_CONFIG_SQLLOG]]
 ** <dt>SQLITE_CONFIG_SQLLOG
 ** <dd>This option is only available if sqlite is compiled with the
-** SQLITE_ENABLE_SQLLOG pre-processor macro defined. The first argument should
+** [SQLITE_ENABLE_SQLLOG] pre-processor macro defined. The first argument should
 ** be a pointer to a function of type void(*)(void*,sqlite3*,const char*, int).
 ** The second should be of type (void*). The callback is invoked by the library
 ** in three separate circumstances, identified by the value passed as the
@@ -2206,7 +2226,23 @@ struct sqlite3_mem_methods {
 ** fourth parameter is 1, then the SQL statement that the third parameter
 ** points to has just been executed. Or, if the fourth parameter is 2, then
 ** the connection being passed as the second parameter is being closed. The
-** third parameter is passed NULL In this case.
+** third parameter is passed NULL In this case.  An example of using this
+** configuration option can be seen in the "test_sqllog.c" source file in
+** the canonical SQLite source tree.</dd>
+**
+** [[SQLITE_CONFIG_MMAP_SIZE]]
+** <dt>SQLITE_CONFIG_MMAP_SIZE
+** <dd>SQLITE_CONFIG_MMAP_SIZE takes two 64-bit integer (sqlite3_int64) values
+** that are the default mmap size limit (the default setting for
+** [PRAGMA mmap_size]) and the maximum allowed mmap size limit.
+** The default setting can be overridden by each database connection using
+** either the [PRAGMA mmap_size] command, or by using the
+** [SQLITE_FCNTL_MMAP_SIZE] file control.  The maximum allowed mmap size
+** cannot be changed at run-time.  Nor may the maximum allowed mmap size
+** exceed the compile-time maximum mmap size set by the
+** [SQLITE_MAX_MMAP_SIZE] compile-time option.  
+** If either argument to this option is negative, then that argument is
+** changed to its compile-time default.
 ** </dl>
 */
 #define SQLITE_CONFIG_SINGLETHREAD  1  /* nil */
@@ -2230,6 +2266,7 @@ struct sqlite3_mem_methods {
 #define SQLITE_CONFIG_GETPCACHE2   19  /* sqlite3_pcache_methods2* */
 #define SQLITE_CONFIG_COVERING_INDEX_SCAN 20  /* int */
 #define SQLITE_CONFIG_SQLLOG       21  /* xSqllog, void* */
+#define SQLITE_CONFIG_MMAP_SIZE    22  /* sqlite3_int64, sqlite3_int64 */
 
 /*
 ** CAPI3REF: Database Connection Configuration Options
@@ -3063,6 +3100,9 @@ SQLITE_API int sqlite3_set_authorizer(
 ** as each triggered subprogram is entered.  The callbacks for triggers
 ** contain a UTF-8 SQL comment that identifies the trigger.)^
 **
+** The [SQLITE_TRACE_SIZE_LIMIT] compile-time option can be used to limit
+** the length of [bound parameter] expansion in the output of sqlite3_trace().
+**
 ** ^The callback function registered by sqlite3_profile() is invoked
 ** as each SQL statement finishes.  ^The profile callback contains
 ** the original statement text and an estimate of wall-clock time
@@ -3601,7 +3641,8 @@ SQLITE_API int sqlite3_limit(sqlite3*, int id, int newVal);
 ** <li>
 ** ^If the database schema changes, instead of returning [SQLITE_SCHEMA] as it
 ** always used to do, [sqlite3_step()] will automatically recompile the SQL
-** statement and try to run it again.
+** statement and try to run it again. As many as [SQLITE_MAX_SCHEMA_RETRY]
+** retries will occur before sqlite3_step() gives up and returns an error.
 ** </li>
 **
 ** <li>
@@ -3805,6 +3846,9 @@ typedef struct sqlite3_context sqlite3_context;
 ** parameter [SQLITE_LIMIT_VARIABLE_NUMBER] (default value: 999).
 **
 ** ^The third argument is the value to bind to the parameter.
+** ^If the third parameter to sqlite3_bind_text() or sqlite3_bind_text16()
+** or sqlite3_bind_blob() is a NULL pointer then the fourth parameter
+** is ignored and the end result is the same as sqlite3_bind_null().
 **
 ** ^(In those routines that have a fourth argument, its value is the
 ** number of bytes in the parameter.  To be clear: the value is the
@@ -4761,7 +4805,7 @@ SQLITE_API void sqlite3_set_auxdata(sqlite3_context*, int N, void*, void (*)(voi
 ** the content before returning.
 **
 ** The typedef is necessary to work around problems in certain
-** C++ compilers.  See ticket #2191.
+** C++ compilers.
 */
 typedef void (*sqlite3_destructor_type)(void*);
 #define SQLITE_STATIC      ((sqlite3_destructor_type)0)
@@ -5560,11 +5604,20 @@ SQLITE_API int sqlite3_table_column_metadata(
 ** ^This interface loads an SQLite extension library from the named file.
 **
 ** ^The sqlite3_load_extension() interface attempts to load an
-** SQLite extension library contained in the file zFile.
+** [SQLite extension] library contained in the file zFile.  If
+** the file cannot be loaded directly, attempts are made to load
+** with various operating-system specific extensions added.
+** So for example, if "samplelib" cannot be loaded, then names like
+** "samplelib.so" or "samplelib.dylib" or "samplelib.dll" might
+** be tried also.
 **
 ** ^The entry point is zProc.
-** ^zProc may be 0, in which case the name of the entry point
-** defaults to "sqlite3_extension_init".
+** ^(zProc may be 0, in which case SQLite will try to come up with an
+** entry point name on its own.  It first tries "sqlite3_extension_init".
+** If that does not work, it constructs a name "sqlite3_X_init" where the
+** X is consists of the lower-case equivalent of all ASCII alphabetic
+** characters in the filename from the last "/" to the first following
+** "." and omitting any initial "lib".)^
 ** ^The sqlite3_load_extension() interface returns
 ** [SQLITE_OK] on success and [SQLITE_ERROR] if something goes wrong.
 ** ^If an error occurs and pzErrMsg is not 0, then the
@@ -5590,11 +5643,11 @@ SQLITE_API int sqlite3_load_extension(
 ** CAPI3REF: Enable Or Disable Extension Loading
 **
 ** ^So as not to open security holes in older applications that are
-** unprepared to deal with extension loading, and as a means of disabling
-** extension loading while evaluating user-entered SQL, the following API
+** unprepared to deal with [extension loading], and as a means of disabling
+** [extension loading] while evaluating user-entered SQL, the following API
 ** is provided to turn the [sqlite3_load_extension()] mechanism on and off.
 **
-** ^Extension loading is off by default. See ticket #1863.
+** ^Extension loading is off by default.
 ** ^Call the sqlite3_enable_load_extension() routine with onoff==1
 ** to turn extension loading on and call it with onoff==0 to turn
 ** it back off again.
@@ -5606,7 +5659,7 @@ SQLITE_API int sqlite3_enable_load_extension(sqlite3 *db, int onoff);
 **
 ** ^This interface causes the xEntryPoint() function to be invoked for
 ** each new [database connection] that is created.  The idea here is that
-** xEntryPoint() is the entry point for a statically linked SQLite extension
+** xEntryPoint() is the entry point for a statically linked [SQLite extension]
 ** that is to be automatically loaded into all new database connections.
 **
 ** ^(Even though the function prototype shows that xEntryPoint() takes
@@ -7386,10 +7439,25 @@ SQLITE_API int sqlite3_unlock_notify(
 SQLITE_API int sqlite3_stricmp(const char *, const char *);
 SQLITE_API int sqlite3_strnicmp(const char *, const char *, int);
 
+/*
+** CAPI3REF: String Globbing
+*
+** ^The [sqlite3_strglob(P,X)] interface returns zero if string X matches
+** the glob pattern P, and it returns non-zero if string X does not match
+** the glob pattern P.  ^The definition of glob pattern matching used in
+** [sqlite3_strglob(P,X)] is the same as for the "X GLOB P" operator in the
+** SQL dialect used by SQLite.  ^The sqlite3_strglob(P,X) function is case
+** sensitive.
+**
+** Note that this routine returns zero on a match and non-zero if the strings
+** do not match, the same as [sqlite3_stricmp()] and [sqlite3_strnicmp()].
+*/
+SQLITE_API int sqlite3_strglob(const char *zGlob, const char *zStr);
+
 /*
 ** CAPI3REF: Error Logging Interface
 **
-** ^The [sqlite3_log()] interface writes a message into the error log
+** ^The [sqlite3_log()] interface writes a message into the [error log]
 ** established by the [SQLITE_CONFIG_LOG] option to [sqlite3_config()].
 ** ^If logging is enabled, the zFormat string and subsequent arguments are
 ** used with [sqlite3_snprintf()] to generate the final output string.
@@ -8074,6 +8142,7 @@ SQLITE_PRIVATE void sqlite3HashClear(Hash*);
 */
 #ifndef SQLITE_TEMP_STORE
 # define SQLITE_TEMP_STORE 1
+# define SQLITE_TEMP_STORE_xc 1  /* Exclude from ctime.c */
 #endif
 
 /*
@@ -8221,6 +8290,49 @@ SQLITE_PRIVATE const int sqlite3one;
 # define EIGHT_BYTE_ALIGNMENT(X)   ((((char*)(X) - (char*)0)&7)==0)
 #endif
 
+/*
+** Disable MMAP on platforms where it is known to not work
+*/
+#if defined(__OpenBSD__) || defined(__QNXNTO__)
+# undef SQLITE_MAX_MMAP_SIZE
+# define SQLITE_MAX_MMAP_SIZE 0
+#endif
+
+/*
+** Default maximum size of memory used by memory-mapped I/O in the VFS
+*/
+#ifdef __APPLE__
+# include <TargetConditionals.h>
+# if TARGET_OS_IPHONE
+#   undef SQLITE_MAX_MMAP_SIZE
+#   define SQLITE_MAX_MMAP_SIZE 0
+# endif
+#endif
+#ifndef SQLITE_MAX_MMAP_SIZE
+# if defined(__linux__) \
+  || defined(_WIN32) \
+  || (defined(__APPLE__) && defined(__MACH__)) \
+  || defined(__sun)
+#   define SQLITE_MAX_MMAP_SIZE 0x7fff0000  /* 2147418112 */
+# else
+#   define SQLITE_MAX_MMAP_SIZE 0
+# endif
+# define SQLITE_MAX_MMAP_SIZE_xc 1 /* exclude from ctime.c */
+#endif
+
+/*
+** The default MMAP_SIZE is zero on all platforms.  Or, even if a larger
+** default MMAP_SIZE is specified at compile-time, make sure that it does
+** not exceed the maximum mmap size.
+*/
+#ifndef SQLITE_DEFAULT_MMAP_SIZE
+# define SQLITE_DEFAULT_MMAP_SIZE 0
+# define SQLITE_DEFAULT_MMAP_SIZE_xc 1  /* Exclude from ctime.c */
+#endif
+#if SQLITE_DEFAULT_MMAP_SIZE>SQLITE_MAX_MMAP_SIZE
+# undef SQLITE_DEFAULT_MMAP_SIZE
+# define SQLITE_DEFAULT_MMAP_SIZE SQLITE_MAX_MMAP_SIZE
+#endif
 
 /*
 ** An instance of the following structure is used to store the busy-handler
@@ -8442,6 +8554,7 @@ SQLITE_PRIVATE int sqlite3BtreeOpen(
 
 SQLITE_PRIVATE int sqlite3BtreeClose(Btree*);
 SQLITE_PRIVATE int sqlite3BtreeSetCacheSize(Btree*,int);
+SQLITE_PRIVATE int sqlite3BtreeSetMmapLimit(Btree*,sqlite3_int64);
 SQLITE_PRIVATE int sqlite3BtreeSetSafetyLevel(Btree*,int,int,int);
 SQLITE_PRIVATE int sqlite3BtreeSyncDisabled(Btree*);
 SQLITE_PRIVATE int sqlite3BtreeSetPageSize(Btree *p, int nPagesize, int nReserve, int eFix);
@@ -8518,6 +8631,7 @@ SQLITE_PRIVATE int sqlite3BtreeNewDb(Btree *p);
 #define BTREE_TEXT_ENCODING       5
 #define BTREE_USER_VERSION        6
 #define BTREE_INCR_VACUUM         7
+#define BTREE_APPLICATION_ID      8
 
 /*
 ** Values that may be OR'd together to form the second argument of an
@@ -9142,6 +9256,12 @@ typedef struct PgHdr DbPage;
 #define PAGER_JOURNALMODE_MEMORY      4   /* In-memory journal file */
 #define PAGER_JOURNALMODE_WAL         5   /* Use write-ahead logging */
 
+/*
+** Flags that make up the mask passed to sqlite3PagerAcquire().
+*/
+#define PAGER_ACQUIRE_NOCONTENT     0x01  /* Do not load data from disk */
+#define PAGER_ACQUIRE_READONLY      0x02  /* Read-only page is acceptable */
+
 /*
 ** The remainder of this file contains the declarations of the functions
 ** that make up the Pager sub-system API. See source code comments for 
@@ -9166,6 +9286,7 @@ SQLITE_PRIVATE void sqlite3PagerSetBusyhandler(Pager*, int(*)(void *), void *);
 SQLITE_PRIVATE int sqlite3PagerSetPagesize(Pager*, u32*, int);
 SQLITE_PRIVATE int sqlite3PagerMaxPageCount(Pager*, int);
 SQLITE_PRIVATE void sqlite3PagerSetCachesize(Pager*, int);
+SQLITE_PRIVATE void sqlite3PagerSetMmapLimit(Pager *, sqlite3_int64);
 SQLITE_PRIVATE void sqlite3PagerShrink(Pager*);
 SQLITE_PRIVATE void sqlite3PagerSetSafetyLevel(Pager*,int,int,int);
 SQLITE_PRIVATE int sqlite3PagerLockingMode(Pager *, int);
@@ -9312,6 +9433,8 @@ struct PgHdr {
 #define PGHDR_REUSE_UNLIKELY    0x010  /* A hint that reuse is unlikely */
 #define PGHDR_DONT_WRITE        0x020  /* Do not write content to disk */
 
+#define PGHDR_MMAP              0x040  /* This is an mmap page object */
+
 /* Initialize and shutdown the page cache subsystem */
 SQLITE_PRIVATE int sqlite3PcacheInitialize(void);
 SQLITE_PRIVATE void sqlite3PcacheShutdown(void);
@@ -9523,14 +9646,6 @@ SQLITE_PRIVATE void sqlite3PCacheSetDefault(void);
 # define SQLITE_OS_WINRT 0
 #endif
 
-/*
-** When compiled for WinCE or WinRT, there is no concept of the current
-** directory.
- */
-#if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT
-# define SQLITE_CURDIR 1
-#endif
-
 /* If the SET_FULLSYNC macro is not defined above, then make it
 ** a no-op
 */
@@ -9683,6 +9798,8 @@ SQLITE_PRIVATE int sqlite3OsShmMap(sqlite3_file *,int,int,int,void volatile **);
 SQLITE_PRIVATE int sqlite3OsShmLock(sqlite3_file *id, int, int, int);
 SQLITE_PRIVATE void sqlite3OsShmBarrier(sqlite3_file *id);
 SQLITE_PRIVATE int sqlite3OsShmUnmap(sqlite3_file *id, int);
+SQLITE_PRIVATE int sqlite3OsFetch(sqlite3_file *id, i64, int, void **);
+SQLITE_PRIVATE int sqlite3OsUnfetch(sqlite3_file *, i64, void *);
 
 
 /* 
@@ -9922,6 +10039,7 @@ struct sqlite3 {
   int nDb;                      /* Number of backends currently in use */
   int flags;                    /* Miscellaneous flags. See below */
   i64 lastRowid;                /* ROWID of most recent insert (see above) */
+  i64 szMmap;                   /* Default mmap_size setting */
   unsigned int openFlags;       /* Flags passed to sqlite3_vfs.xOpen() */
   int errCode;                  /* Most recent error code (SQLITE_*) */
   int errMask;                  /* & result codes with this before returning */
@@ -11158,6 +11276,8 @@ struct NameContext {
 #define NC_HasAgg    0x02    /* One or more aggregate functions seen */
 #define NC_IsCheck   0x04    /* True if resolving names in a CHECK constraint */
 #define NC_InAggFunc 0x08    /* True if analyzing arguments to an agg func */
+#define NC_AsMaybe   0x10    /* Resolve to AS terms of the result set only
+                             ** if no other resolution is available */
 
 /*
 ** An instance of the following structure contains all information
@@ -11593,6 +11713,8 @@ struct Sqlite3Config {
   void *pHeap;                      /* Heap storage space */
   int nHeap;                        /* Size of pHeap[] */
   int mnReq, mxReq;                 /* Min and max heap requests sizes */
+  sqlite3_int64 szMmap;             /* mmap() space per open file */
+  sqlite3_int64 mxMmap;             /* Maximum value for szMmap */
   void *pScratch;                   /* Scratch memory */
   int szScratch;                    /* Size of each scratch buffer */
   int nScratch;                     /* Number of scratch buffers */
@@ -11627,6 +11749,7 @@ struct Walker {
   int (*xSelectCallback)(Walker*,Select*);  /* Callback for SELECTs */
   Parse *pParse;                            /* Parser context.  */
   int walkerDepth;                          /* Number of subqueries */
+  u8 bSelectDepthFirst;                     /* Do subqueries first */
   union {                                   /* Extra data for callback */
     NameContext *pNC;                          /* Naming context */
     int i;                                     /* Integer value */
@@ -12130,6 +12253,12 @@ SQLITE_PRIVATE void sqlite3Error(sqlite3*, int, const char*,...);
 SQLITE_PRIVATE void *sqlite3HexToBlob(sqlite3*, const char *z, int n);
 SQLITE_PRIVATE u8 sqlite3HexToInt(int h);
 SQLITE_PRIVATE int sqlite3TwoPartName(Parse *, Token *, Token *, Token **);
+
+#if defined(SQLITE_DEBUG) || defined(SQLITE_TEST) || \
+    defined(SQLITE_DEBUG_OS_TRACE)
+SQLITE_PRIVATE const char *sqlite3ErrName(int);
+#endif
+
 SQLITE_PRIVATE const char *sqlite3ErrStr(int);
 SQLITE_PRIVATE int sqlite3ReadSchema(Parse *pParse);
 SQLITE_PRIVATE CollSeq *sqlite3FindCollSeq(sqlite3*,u8 enc, const char*,int);
@@ -12614,6 +12743,8 @@ SQLITE_PRIVATE SQLITE_WSD struct Sqlite3Config sqlite3Config = {
    (void*)0,                  /* pHeap */
    0,                         /* nHeap */
    0, 0,                      /* mnHeap, mxHeap */
+   SQLITE_DEFAULT_MMAP_SIZE,  /* szMmap */
+   SQLITE_MAX_MMAP_SIZE,      /* mxMmap */
    (void*)0,                  /* pScratch */
    0,                         /* szScratch */
    0,                         /* nScratch */
@@ -12737,15 +12868,15 @@ static const char * const azCompileOpt[] = {
 #ifdef SQLITE_COVERAGE_TEST
   "COVERAGE_TEST",
 #endif
-#ifdef SQLITE_CURDIR
-  "CURDIR",
-#endif
 #ifdef SQLITE_DEBUG
   "DEBUG",
 #endif
 #ifdef SQLITE_DEFAULT_LOCKING_MODE
   "DEFAULT_LOCKING_MODE=" CTIMEOPT_VAL(SQLITE_DEFAULT_LOCKING_MODE),
 #endif
+#if defined(SQLITE_DEFAULT_MMAP_SIZE) && !defined(SQLITE_DEFAULT_MMAP_SIZE_xc)
+  "DEFAULT_MMAP_SIZE=" CTIMEOPT_VAL(SQLITE_DEFAULT_MMAP_SIZE),
+#endif
 #ifdef SQLITE_DISABLE_DIRSYNC
   "DISABLE_DIRSYNC",
 #endif
@@ -12836,6 +12967,9 @@ static const char * const azCompileOpt[] = {
 #ifdef SQLITE_LOCK_TRACE
   "LOCK_TRACE",
 #endif
+#if defined(SQLITE_MAX_MMAP_SIZE) && !defined(SQLITE_MAX_MMAP_SIZE_xc)
+  "MAX_MMAP_SIZE=" CTIMEOPT_VAL(SQLITE_MAX_MMAP_SIZE),
+#endif
 #ifdef SQLITE_MAX_SCHEMA_RETRY
   "MAX_SCHEMA_RETRY=" CTIMEOPT_VAL(SQLITE_MAX_SCHEMA_RETRY),
 #endif
@@ -12893,11 +13027,6 @@ static const char * const azCompileOpt[] = {
 #ifdef SQLITE_OMIT_CHECK
   "OMIT_CHECK",
 #endif
-/* // redundant
-** #ifdef SQLITE_OMIT_COMPILEOPTION_DIAGS
-**   "OMIT_COMPILEOPTION_DIAGS",
-** #endif
-*/
 #ifdef SQLITE_OMIT_COMPLETE
   "OMIT_COMPLETE",
 #endif
@@ -13039,13 +13168,13 @@ static const char * const azCompileOpt[] = {
 #ifdef SQLITE_TCL
   "TCL",
 #endif
-#ifdef SQLITE_TEMP_STORE
+#if defined(SQLITE_TEMP_STORE) && !defined(SQLITE_TEMP_STORE_xc)
   "TEMP_STORE=" CTIMEOPT_VAL(SQLITE_TEMP_STORE),
 #endif
 #ifdef SQLITE_TEST
   "TEST",
 #endif
-#ifdef SQLITE_THREADSAFE
+#if defined(SQLITE_THREADSAFE)
   "THREADSAFE=" CTIMEOPT_VAL(SQLITE_THREADSAFE),
 #endif
 #ifdef SQLITE_USE_ALLOCA
@@ -13071,8 +13200,11 @@ SQLITE_API int sqlite3_compileoption_used(const char *zOptName){
   /* Since ArraySize(azCompileOpt) is normally in single digits, a
   ** linear search is adequate.  No need for a binary search. */
   for(i=0; i<ArraySize(azCompileOpt); i++){
-    if(   (sqlite3StrNICmp(zOptName, azCompileOpt[i], n)==0)
-       && ( (azCompileOpt[i][n]==0) || (azCompileOpt[i][n]=='=') ) ) return 1;
+    if( sqlite3StrNICmp(zOptName, azCompileOpt[i], n)==0
+     && sqlite3CtypeMap[(unsigned char)azCompileOpt[i][n]]==0
+    ){
+      return 1;
+    }
   }
   return 0;
 }
@@ -13129,6 +13261,14 @@ SQLITE_API const char *sqlite3_compileoption_get(int N){
 #ifndef _VDBEINT_H_
 #define _VDBEINT_H_
 
+/*
+** The maximum number of times that a statement will try to reparse
+** itself before giving up and returning SQLITE_SCHEMA.
+*/
+#ifndef SQLITE_MAX_SCHEMA_RETRY
+# define SQLITE_MAX_SCHEMA_RETRY 50
+#endif
+
 /*
 ** SQL is translated into a sequence of instructions to be
 ** executed by a virtual machine.  Each instruction is an instance
@@ -15095,6 +15235,26 @@ SQLITE_PRIVATE int sqlite3OsShmMap(
   return id->pMethods->xShmMap(id, iPage, pgsz, bExtend, pp);
 }
 
+#if SQLITE_MAX_MMAP_SIZE>0
+/* The real implementation of xFetch and xUnfetch */
+SQLITE_PRIVATE int sqlite3OsFetch(sqlite3_file *id, i64 iOff, int iAmt, void **pp){
+  DO_OS_MALLOC_TEST(id);
+  return id->pMethods->xFetch(id, iOff, iAmt, pp);
+}
+SQLITE_PRIVATE int sqlite3OsUnfetch(sqlite3_file *id, i64 iOff, void *p){
+  return id->pMethods->xUnfetch(id, iOff, p);
+}
+#else
+/* No-op stubs to use when memory-mapped I/O is disabled */
+SQLITE_PRIVATE int sqlite3OsFetch(sqlite3_file *id, i64 iOff, int iAmt, void **pp){
+  *pp = 0;
+  return SQLITE_OK;
+}
+SQLITE_PRIVATE int sqlite3OsUnfetch(sqlite3_file *id, i64 iOff, void *p){
+  return SQLITE_OK;
+}
+#endif
+
 /*
 ** The next group of routines are convenience wrappers around the
 ** VFS methods.
@@ -22851,7 +23011,7 @@ SQLITE_PRIVATE const char *sqlite3OpcodeName(int i){
 /* #include <time.h> */
 #include <sys/time.h>
 #include <errno.h>
-#ifndef SQLITE_OMIT_WAL
+#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0
 #include <sys/mman.h>
 #endif
 
@@ -22950,6 +23110,11 @@ struct unixFile {
   const char *zPath;                  /* Name of the file */
   unixShm *pShm;                      /* Shared memory segment information */
   int szChunk;                        /* Configured by FCNTL_CHUNK_SIZE */
+  int nFetchOut;                      /* Number of outstanding xFetch refs */
+  sqlite3_int64 mmapSize;             /* Usable size of mapping at pMapRegion */
+  sqlite3_int64 mmapSizeActual;       /* Actual size of mapping at pMapRegion */
+  sqlite3_int64 mmapSizeMax;          /* Configured FCNTL_MMAP_SIZE value */
+  void *pMapRegion;                   /* Memory mapped region */
 #ifdef __QNXNTO__
   int sectorSize;                     /* Device sector size */
   int deviceCharacteristics;          /* Precomputed device characteristics */
@@ -22974,7 +23139,9 @@ struct unixFile {
   unsigned char transCntrChng;   /* True if the transaction counter changed */
   unsigned char dbUpdate;        /* True if any part of database file changed */
   unsigned char inNormalWrite;   /* True if in a normal write operation */
+
 #endif
+
 #ifdef SQLITE_TEST
   /* In test mode, increase the size of this structure a bit so that 
   ** it is larger than the struct CrashFile defined in test6.c.
@@ -22998,6 +23165,7 @@ struct unixFile {
 #define UNIXFILE_DELETE      0x20     /* Delete on close */
 #define UNIXFILE_URI         0x40     /* Filename might have query parameters */
 #define UNIXFILE_NOLOCK      0x80     /* Do no file locking */
+#define UNIXFILE_WARNED    0x0100     /* verifyDbFile() warnings have been issued */
 
 /*
 ** Include code that is common to all os_*.c files
@@ -23239,6 +23407,17 @@ SQLITE_API int sqlite3_open_file_count = 0;
 #define threadid 0
 #endif
 
+/*
+** HAVE_MREMAP defaults to true on Linux and false everywhere else.
+*/
+#if !defined(HAVE_MREMAP)
+# if defined(__linux__) && defined(_GNU_SOURCE)
+#  define HAVE_MREMAP 1
+# else
+#  define HAVE_MREMAP 0
+# endif
+#endif
+
 /*
 ** Different Unix systems declare open() in different ways.  Same use
 ** open(const char*,int,mode_t).  Others use open(const char*,int,...).
@@ -23263,9 +23442,6 @@ static int posixFchown(int fd, uid_t uid, gid_t gid){
 /* Forward reference */
 static int openDirectory(const char*, int*);
 
-/* Fix for "error: 'fchmod' undeclared here (not in a function)" on FreeBSD 9 */
-int fchmod(int, mode_t);
-
 /*
 ** Many system calls are accessed through pointer-to-functions so that
 ** they may be overridden at runtime to facilitate fault injection during
@@ -23373,6 +23549,19 @@ static struct unix_syscall {
   { "fchown",       (sqlite3_syscall_ptr)posixFchown,     0 },
 #define osFchown    ((int(*)(int,uid_t,gid_t))aSyscall[20].pCurrent)
 
+  { "mmap",       (sqlite3_syscall_ptr)mmap,     0 },
+#define osMmap ((void*(*)(void*,size_t,int,int,int,off_t))aSyscall[21].pCurrent)
+
+  { "munmap",       (sqlite3_syscall_ptr)munmap,          0 },
+#define osMunmap ((void*(*)(void*,size_t))aSyscall[22].pCurrent)
+
+#if HAVE_MREMAP
+  { "mremap",       (sqlite3_syscall_ptr)mremap,          0 },
+#else
+  { "mremap",       (sqlite3_syscall_ptr)0,               0 },
+#endif
+#define osMremap ((void*(*)(void*,size_t,size_t,int,...))aSyscall[23].pCurrent)
+
 }; /* End of the overrideable system calls */
 
 /*
@@ -23704,7 +23893,6 @@ static int sqliteErrorFromPosixError(int posixError, int sqliteIOErr) {
 }
 
 
-
 /******************************************************************************
 ****************** Begin Unique File ID Utility Used By VxWorks ***************
 **
@@ -24040,7 +24228,6 @@ static int unixLogErrorAtLine(
   zErr = strerror(iErrno);
 #endif
 
-  assert( errcode!=SQLITE_OK );
   if( zPath==0 ) zPath = "";
   sqlite3_log(errcode,
       "os_unix.c:%d: (%d) %s(%s) - %s",
@@ -24206,6 +24393,50 @@ static int findInodeInfo(
 }
 
 
+/*
+** Check a unixFile that is a database.  Verify the following:
+**
+** (1) There is exactly one hard link on the file
+** (2) The file is not a symbolic link
+** (3) The file has not been renamed or unlinked
+**
+** Issue sqlite3_log(SQLITE_WARNING,...) messages if anything is not right.
+*/
+static void verifyDbFile(unixFile *pFile){
+  struct stat buf;
+  int rc;
+  if( pFile->ctrlFlags & UNIXFILE_WARNED ){
+    /* One or more of the following warnings have already been issued.  Do not
+    ** repeat them so as not to clutter the error log */
+    return;
+  }
+  rc = osFstat(pFile->h, &buf);
+  if( rc!=0 ){
+    sqlite3_log(SQLITE_WARNING, "cannot fstat db file %s", pFile->zPath);
+    pFile->ctrlFlags |= UNIXFILE_WARNED;
+    return;
+  }
+  if( buf.st_nlink==0 && (pFile->ctrlFlags & UNIXFILE_DELETE)==0 ){
+    sqlite3_log(SQLITE_WARNING, "file unlinked while open: %s", pFile->zPath);
+    pFile->ctrlFlags |= UNIXFILE_WARNED;
+    return;
+  }
+  if( buf.st_nlink>1 ){
+    sqlite3_log(SQLITE_WARNING, "multiple links to file: %s", pFile->zPath);
+    pFile->ctrlFlags |= UNIXFILE_WARNED;
+    return;
+  }
+  if( pFile->pInode!=0
+   && ((rc = osStat(pFile->zPath, &buf))!=0
+       || buf.st_ino!=pFile->pInode->fileId.ino)
+  ){
+    sqlite3_log(SQLITE_WARNING, "file renamed while open: %s", pFile->zPath);
+    pFile->ctrlFlags |= UNIXFILE_WARNED;
+    return;
+  }
+}
+
+
 /*
 ** This routine checks if there is a RESERVED lock held on the specified
 ** file by this or any other process. If such a lock is held, set *pResOut
@@ -24736,9 +24967,13 @@ end_unlock:
 ** the requested locking level, this routine is a no-op.
 */
 static int unixUnlock(sqlite3_file *id, int eFileLock){
+  assert( eFileLock==SHARED_LOCK || ((unixFile *)id)->nFetchOut==0 );
   return posixUnlock(id, eFileLock, 0);
 }
 
+static int unixMapfile(unixFile *pFd, i64 nByte);
+static void unixUnmapfile(unixFile *pFd);
+
 /*
 ** This function performs the parts of the "close file" operation 
 ** common to all locking schemes. It closes the directory and file
@@ -24751,6 +24986,7 @@ static int unixUnlock(sqlite3_file *id, int eFileLock){
 */
 static int closeUnixFile(sqlite3_file *id){
   unixFile *pFile = (unixFile*)id;
+  unixUnmapfile(pFile);
   if( pFile->h>=0 ){
     robust_close(pFile, pFile->h, __LINE__);
     pFile->h = -1;
@@ -24777,6 +25013,7 @@ static int closeUnixFile(sqlite3_file *id){
 static int unixClose(sqlite3_file *id){
   int rc = SQLITE_OK;
   unixFile *pFile = (unixFile *)id;
+  verifyDbFile(pFile);
   unixUnlock(id, NO_LOCK);
   unixEnterMutex();
 
@@ -26008,6 +26245,8 @@ static int unixRead(
   unixFile *pFile = (unixFile *)id;
   int got;
   assert( id );
+  assert( offset>=0 );
+  assert( amt>0 );
 
   /* If this is a database file (not a journal, master-journal or temp
   ** file), the bytes in the locking range should never be read or written. */
@@ -26018,6 +26257,23 @@ static int unixRead(
   );
 #endif
 
+#if SQLITE_MAX_MMAP_SIZE>0
+  /* Deal with as much of this read request as possible by transfering
+  ** data from the memory mapping using memcpy().  */
+  if( offset<pFile->mmapSize ){
+    if( offset+amt <= pFile->mmapSize ){
+      memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], amt);
+      return SQLITE_OK;
+    }else{
+      int nCopy = pFile->mmapSize - offset;
+      memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], nCopy);
+      pBuf = &((u8 *)pBuf)[nCopy];
+      amt -= nCopy;
+      offset += nCopy;
+    }
+  }
+#endif
+
   got = seekAndRead(pFile, offset, pBuf, amt);
   if( got==amt ){
     return SQLITE_OK;
@@ -26032,6 +26288,51 @@ static int unixRead(
   }
 }
 
+/*
+** Attempt to seek the file-descriptor passed as the first argument to
+** absolute offset iOff, then attempt to write nBuf bytes of data from
+** pBuf to it. If an error occurs, return -1 and set *piErrno. Otherwise, 
+** return the actual number of bytes written (which may be less than
+** nBuf).
+*/
+static int seekAndWriteFd(
+  int fd,                         /* File descriptor to write to */
+  i64 iOff,                       /* File offset to begin writing at */
+  const void *pBuf,               /* Copy data from this buffer to the file */
+  int nBuf,                       /* Size of buffer pBuf in bytes */
+  int *piErrno                    /* OUT: Error number if error occurs */
+){
+  int rc = 0;                     /* Value returned by system call */
+
+  assert( nBuf==(nBuf&0x1ffff) );
+  nBuf &= 0x1ffff;
+  TIMER_START;
+
+#if defined(USE_PREAD)
+  do{ rc = osPwrite(fd, pBuf, nBuf, iOff); }while( rc<0 && errno==EINTR );
+#elif defined(USE_PREAD64)
+  do{ rc = osPwrite64(fd, pBuf, nBuf, iOff);}while( rc<0 && errno==EINTR);
+#else
+  do{
+    i64 iSeek = lseek(fd, iOff, SEEK_SET);
+    SimulateIOError( iSeek-- );
+
+    if( iSeek!=iOff ){
+      if( piErrno ) *piErrno = (iSeek==-1 ? errno : 0);
+      return -1;
+    }
+    rc = osWrite(fd, pBuf, nBuf);
+  }while( rc<0 && errno==EINTR );
+#endif
+
+  TIMER_END;
+  OSTRACE(("WRITE   %-3d %5d %7lld %llu\n", fd, rc, iOff, TIMER_ELAPSED));
+
+  if( rc<0 && piErrno ) *piErrno = errno;
+  return rc;
+}
+
+
 /*
 ** Seek to the offset in id->offset then read cnt bytes into pBuf.
 ** Return the number of bytes actually read.  Update the offset.
@@ -26040,39 +26341,7 @@ static int unixRead(
 ** is set before returning.
 */
 static int seekAndWrite(unixFile *id, i64 offset, const void *pBuf, int cnt){
-  int got;
-#if (!defined(USE_PREAD) && !defined(USE_PREAD64))
-  i64 newOffset;
-#endif
-  assert( cnt==(cnt&0x1ffff) );
-  cnt &= 0x1ffff;
-  TIMER_START;
-#if defined(USE_PREAD)
-  do{ got = osPwrite(id->h, pBuf, cnt, offset); }while( got<0 && errno==EINTR );
-#elif defined(USE_PREAD64)
-  do{ got = osPwrite64(id->h, pBuf, cnt, offset);}while( got<0 && errno==EINTR);
-#else
-  do{
-    newOffset = lseek(id->h, offset, SEEK_SET);
-    SimulateIOError( newOffset-- );
-    if( newOffset!=offset ){
-      if( newOffset == -1 ){
-        ((unixFile*)id)->lastErrno = errno;
-      }else{
-        ((unixFile*)id)->lastErrno = 0;
-      }
-      return -1;
-    }
-    got = osWrite(id->h, pBuf, cnt);
-  }while( got<0 && errno==EINTR );
-#endif
-  TIMER_END;
-  if( got<0 ){
-    ((unixFile*)id)->lastErrno = errno;
-  }
-
-  OSTRACE(("WRITE   %-3d %5d %7lld %llu\n", id->h, got, offset, TIMER_ELAPSED));
-  return got;
+  return seekAndWriteFd(id->h, offset, pBuf, cnt, &id->lastErrno);
 }
 
 
@@ -26122,6 +26391,23 @@ static int unixWrite(
   }
 #endif
 
+#if SQLITE_MAX_MMAP_SIZE>0
+  /* Deal with as much of this write request as possible by transfering
+  ** data from the memory mapping using memcpy().  */
+  if( offset<pFile->mmapSize ){
+    if( offset+amt <= pFile->mmapSize ){
+      memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, amt);
+      return SQLITE_OK;
+    }else{
+      int nCopy = pFile->mmapSize - offset;
+      memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, nCopy);
+      pBuf = &((u8 *)pBuf)[nCopy];
+      amt -= nCopy;
+      offset += nCopy;
+    }
+  }
+#endif
+
   while( amt>0 && (wrote = seekAndWrite(pFile, offset, pBuf, amt))>0 ){
     amt -= wrote;
     offset += wrote;
@@ -26404,6 +26690,14 @@ static int unixTruncate(sqlite3_file *id, i64 nByte){
     }
 #endif
 
+    /* If the file was just truncated to a size smaller than the currently
+    ** mapped region, reduce the effective mapping size as well. SQLite will
+    ** use read() and write() to access data beyond this point from now on.  
+    */
+    if( nByte<pFile->mmapSize ){
+      pFile->mmapSize = nByte;
+    }
+
     return SQLITE_OK;
   }
 }
@@ -26492,6 +26786,19 @@ static int fcntlSizeHint(unixFile *pFile, i64 nByte){
     }
   }
 
+  if( pFile->mmapSizeMax>0 && nByte>pFile->mmapSize ){
+    int rc;
+    if( pFile->szChunk<=0 ){
+      if( robust_ftruncate(pFile->h, nByte) ){
+        pFile->lastErrno = errno;
+        return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath);
+      }
+    }
+
+    rc = unixMapfile(pFile, nByte);
+    return rc;
+  }
+
   return SQLITE_OK;
 }
 
@@ -26559,6 +26866,18 @@ static int unixFileControl(sqlite3_file *id, int op, void *pArg){
       }
       return SQLITE_OK;
     }
+    case SQLITE_FCNTL_MMAP_SIZE: {
+      i64 newLimit = *(i64*)pArg;
+      if( newLimit>sqlite3GlobalConfig.mxMmap ){
+        newLimit = sqlite3GlobalConfig.mxMmap;
+      }
+      *(i64*)pArg = pFile->mmapSizeMax;
+      if( newLimit>=0 ){
+        pFile->mmapSizeMax = newLimit;
+        if( newLimit<pFile->mmapSize ) pFile->mmapSize = newLimit;
+      }
+      return SQLITE_OK;
+    }
 #ifdef SQLITE_DEBUG
     /* The pager calls this method to signal that it has done
     ** a rollback and that the database is therefore unchanged and
@@ -26871,7 +27190,7 @@ static void unixShmPurge(unixFile *pFd){
     sqlite3_mutex_free(p->mutex);
     for(i=0; i<p->nRegion; i++){
       if( p->h>=0 ){
-        munmap(p->apRegion[i], p->szRegion);
+        osMunmap(p->apRegion[i], p->szRegion);
       }else{
         sqlite3_free(p->apRegion[i]);
       }
@@ -27111,24 +27430,32 @@ static int unixShmMap(
       if( sStat.st_size<nByte ){
         /* The requested memory region does not exist. If bExtend is set to
         ** false, exit early. *pp will be set to NULL and SQLITE_OK returned.
-        **
-        ** Alternatively, if bExtend is true, use ftruncate() to allocate
-        ** the requested memory region.
         */
-        if( !bExtend ) goto shmpage_out;
-#if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE
-        if( osFallocate(pShmNode->h, sStat.st_size, nByte)!=0 ){
-          rc = unixLogError(SQLITE_IOERR_SHMSIZE, "fallocate",
-                            pShmNode->zFilename);
+        if( !bExtend ){
           goto shmpage_out;
         }
-#else
-        if( robust_ftruncate(pShmNode->h, nByte) ){
-          rc = unixLogError(SQLITE_IOERR_SHMSIZE, "ftruncate",
-                            pShmNode->zFilename);
-          goto shmpage_out;
+
+        /* Alternatively, if bExtend is true, extend the file. Do this by
+        ** writing a single byte to the end of each (OS) page being
+        ** allocated or extended. Technically, we need only write to the
+        ** last page in order to extend the file. But writing to all new
+        ** pages forces the OS to allocate them immediately, which reduces
+        ** the chances of SIGBUS while accessing the mapped region later on.
+        */
+        else{
+          static const int pgsz = 4096;
+          int iPg;
+
+          /* Write to the last byte of each newly allocated or extended page */
+          assert( (nByte % pgsz)==0 );
+          for(iPg=(sStat.st_size/pgsz); iPg<(nByte/pgsz); iPg++){
+            if( seekAndWriteFd(pShmNode->h, iPg*pgsz + pgsz-1, "", 1, 0)!=1 ){
+              const char *zFile = pShmNode->zFilename;
+              rc = unixLogError(SQLITE_IOERR_SHMSIZE, "write", zFile);
+              goto shmpage_out;
+            }
+          }
         }
-#endif
       }
     }
 
@@ -27144,7 +27471,7 @@ static int unixShmMap(
     while(pShmNode->nRegion<=iRegion){
       void *pMem;
       if( pShmNode->h>=0 ){
-        pMem = mmap(0, szRegion,
+        pMem = osMmap(0, szRegion,
             pShmNode->isReadonly ? PROT_READ : PROT_READ|PROT_WRITE, 
             MAP_SHARED, pShmNode->h, szRegion*(i64)pShmNode->nRegion
         );
@@ -27361,6 +27688,236 @@ static int unixShmUnmap(
 # define unixShmUnmap   0
 #endif /* #ifndef SQLITE_OMIT_WAL */
 
+/*
+** If it is currently memory mapped, unmap file pFd.
+*/
+static void unixUnmapfile(unixFile *pFd){
+  assert( pFd->nFetchOut==0 );
+#if SQLITE_MAX_MMAP_SIZE>0
+  if( pFd->pMapRegion ){
+    osMunmap(pFd->pMapRegion, pFd->mmapSizeActual);
+    pFd->pMapRegion = 0;
+    pFd->mmapSize = 0;
+    pFd->mmapSizeActual = 0;
+  }
+#endif
+}
+
+#if SQLITE_MAX_MMAP_SIZE>0
+/*
+** Return the system page size.
+*/
+static int unixGetPagesize(void){
+#if HAVE_MREMAP
+  return 512;
+#elif defined(_BSD_SOURCE)
+  return getpagesize();
+#else
+  return (int)sysconf(_SC_PAGESIZE);
+#endif
+}
+#endif /* SQLITE_MAX_MMAP_SIZE>0 */
+
+#if SQLITE_MAX_MMAP_SIZE>0
+/*
+** Attempt to set the size of the memory mapping maintained by file 
+** descriptor pFd to nNew bytes. Any existing mapping is discarded.
+**
+** If successful, this function sets the following variables:
+**
+**       unixFile.pMapRegion
+**       unixFile.mmapSize
+**       unixFile.mmapSizeActual
+**
+** If unsuccessful, an error message is logged via sqlite3_log() and
+** the three variables above are zeroed. In this case SQLite should
+** continue accessing the database using the xRead() and xWrite()
+** methods.
+*/
+static void unixRemapfile(
+  unixFile *pFd,                  /* File descriptor object */
+  i64 nNew                        /* Required mapping size */
+){
+  const char *zErr = "mmap";
+  int h = pFd->h;                      /* File descriptor open on db file */
+  u8 *pOrig = (u8 *)pFd->pMapRegion;   /* Pointer to current file mapping */
+  i64 nOrig = pFd->mmapSizeActual;     /* Size of pOrig region in bytes */
+  u8 *pNew = 0;                        /* Location of new mapping */
+  int flags = PROT_READ;               /* Flags to pass to mmap() */
+
+  assert( pFd->nFetchOut==0 );
+  assert( nNew>pFd->mmapSize );
+  assert( nNew<=pFd->mmapSizeMax );
+  assert( nNew>0 );
+  assert( pFd->mmapSizeActual>=pFd->mmapSize );
+  assert( MAP_FAILED!=0 );
+
+  if( (pFd->ctrlFlags & UNIXFILE_RDONLY)==0 ) flags |= PROT_WRITE;
+
+  if( pOrig ){
+    const int szSyspage = unixGetPagesize();
+    i64 nReuse = (pFd->mmapSize & ~(szSyspage-1));
+    u8 *pReq = &pOrig[nReuse];
+
+    /* Unmap any pages of the existing mapping that cannot be reused. */
+    if( nReuse!=nOrig ){
+      osMunmap(pReq, nOrig-nReuse);
+    }
+
+#if HAVE_MREMAP
+    pNew = osMremap(pOrig, nReuse, nNew, MREMAP_MAYMOVE);
+    zErr = "mremap";
+#else
+    pNew = osMmap(pReq, nNew-nReuse, flags, MAP_SHARED, h, nReuse);
+    if( pNew!=MAP_FAILED ){
+      if( pNew!=pReq ){
+        osMunmap(pNew, nNew - nReuse);
+        pNew = 0;
+      }else{
+        pNew = pOrig;
+      }
+    }
+#endif
+
+    /* The attempt to extend the existing mapping failed. Free it. */
+    if( pNew==MAP_FAILED || pNew==0 ){
+      osMunmap(pOrig, nReuse);
+    }
+  }
+
+  /* If pNew is still NULL, try to create an entirely new mapping. */
+  if( pNew==0 ){
+    pNew = osMmap(0, nNew, flags, MAP_SHARED, h, 0);
+  }
+
+  if( pNew==MAP_FAILED ){
+    pNew = 0;
+    nNew = 0;
+    unixLogError(SQLITE_OK, zErr, pFd->zPath);
+
+    /* If the mmap() above failed, assume that all subsequent mmap() calls
+    ** will probably fail too. Fall back to using xRead/xWrite exclusively
+    ** in this case.  */
+    pFd->mmapSizeMax = 0;
+  }
+  pFd->pMapRegion = (void *)pNew;
+  pFd->mmapSize = pFd->mmapSizeActual = nNew;
+}
+#endif
+
+/*
+** Memory map or remap the file opened by file-descriptor pFd (if the file
+** is already mapped, the existing mapping is replaced by the new). Or, if 
+** there already exists a mapping for this file, and there are still 
+** outstanding xFetch() references to it, this function is a no-op.
+**
+** If parameter nByte is non-negative, then it is the requested size of 
+** the mapping to create. Otherwise, if nByte is less than zero, then the 
+** requested size is the size of the file on disk. The actual size of the
+** created mapping is either the requested size or the value configured 
+** using SQLITE_FCNTL_MMAP_LIMIT, whichever is smaller.
+**
+** SQLITE_OK is returned if no error occurs (even if the mapping is not
+** recreated as a result of outstanding references) or an SQLite error
+** code otherwise.
+*/
+static int unixMapfile(unixFile *pFd, i64 nByte){
+#if SQLITE_MAX_MMAP_SIZE>0
+  i64 nMap = nByte;
+  int rc;
+
+  assert( nMap>=0 || pFd->nFetchOut==0 );
+  if( pFd->nFetchOut>0 ) return SQLITE_OK;
+
+  if( nMap<0 ){
+    struct stat statbuf;          /* Low-level file information */
+    rc = osFstat(pFd->h, &statbuf);
+    if( rc!=SQLITE_OK ){
+      return SQLITE_IOERR_FSTAT;
+    }
+    nMap = statbuf.st_size;
+  }
+  if( nMap>pFd->mmapSizeMax ){
+    nMap = pFd->mmapSizeMax;
+  }
+
+  if( nMap!=pFd->mmapSize ){
+    if( nMap>0 ){
+      unixRemapfile(pFd, nMap);
+    }else{
+      unixUnmapfile(pFd);
+    }
+  }
+#endif
+
+  return SQLITE_OK;
+}
+
+/*
+** If possible, return a pointer to a mapping of file fd starting at offset
+** iOff. The mapping must be valid for at least nAmt bytes.
+**
+** If such a pointer can be obtained, store it in *pp and return SQLITE_OK.
+** Or, if one cannot but no error occurs, set *pp to 0 and return SQLITE_OK.
+** Finally, if an error does occur, return an SQLite error code. The final
+** value of *pp is undefined in this case.
+**
+** If this function does return a pointer, the caller must eventually 
+** release the reference by calling unixUnfetch().
+*/
+static int unixFetch(sqlite3_file *fd, i64 iOff, int nAmt, void **pp){
+#if SQLITE_MAX_MMAP_SIZE>0
+  unixFile *pFd = (unixFile *)fd;   /* The underlying database file */
+#endif
+  *pp = 0;
+
+#if SQLITE_MAX_MMAP_SIZE>0
+  if( pFd->mmapSizeMax>0 ){
+    if( pFd->pMapRegion==0 ){
+      int rc = unixMapfile(pFd, -1);
+      if( rc!=SQLITE_OK ) return rc;
+    }
+    if( pFd->mmapSize >= iOff+nAmt ){
+      *pp = &((u8 *)pFd->pMapRegion)[iOff];
+      pFd->nFetchOut++;
+    }
+  }
+#endif
+  return SQLITE_OK;
+}
+
+/*
+** If the third argument is non-NULL, then this function releases a 
+** reference obtained by an earlier call to unixFetch(). The second
+** argument passed to this function must be the same as the corresponding
+** argument that was passed to the unixFetch() invocation. 
+**
+** Or, if the third argument is NULL, then this function is being called 
+** to inform the VFS layer that, according to POSIX, any existing mapping 
+** may now be invalid and should be unmapped.
+*/
+static int unixUnfetch(sqlite3_file *fd, i64 iOff, void *p){
+  unixFile *pFd = (unixFile *)fd;   /* The underlying database file */
+  UNUSED_PARAMETER(iOff);
+
+  /* If p==0 (unmap the entire file) then there must be no outstanding 
+  ** xFetch references. Or, if p!=0 (meaning it is an xFetch reference),
+  ** then there must be at least one outstanding.  */
+  assert( (p==0)==(pFd->nFetchOut==0) );
+
+  /* If p!=0, it must match the iOff value. */
+  assert( p==0 || p==&((u8 *)pFd->pMapRegion)[iOff] );
+
+  if( p ){
+    pFd->nFetchOut--;
+  }else{
+    unixUnmapfile(pFd);
+  }
+
+  assert( pFd->nFetchOut>=0 );
+  return SQLITE_OK;
+}
+
 /*
 ** Here ends the implementation of all sqlite3_file methods.
 **
@@ -27419,7 +27976,9 @@ static const sqlite3_io_methods METHOD = {                                   \
    unixShmMap,                 /* xShmMap */                                 \
    unixShmLock,                /* xShmLock */                                \
    unixShmBarrier,             /* xShmBarrier */                             \
-   unixShmUnmap                /* xShmUnmap */                               \
+   unixShmUnmap,               /* xShmUnmap */                               \
+   unixFetch,                  /* xFetch */                                  \
+   unixUnfetch,                /* xUnfetch */                                \
 };                                                                           \
 static const sqlite3_io_methods *FINDER##Impl(const char *z, unixFile *p){   \
   UNUSED_PARAMETER(z); UNUSED_PARAMETER(p);                                  \
@@ -27436,7 +27995,7 @@ static const sqlite3_io_methods *(*const FINDER)(const char*,unixFile *p)    \
 IOMETHODS(
   posixIoFinder,            /* Finder function name */
   posixIoMethods,           /* sqlite3_io_methods object name */
-  2,                        /* shared memory is enabled */
+  3,                        /* shared memory and mmap are enabled */
   unixClose,                /* xClose method */
   unixLock,                 /* xLock method */
   unixUnlock,               /* xUnlock method */
@@ -27687,6 +28246,7 @@ static int fillInUnixFile(
   pNew->pVfs = pVfs;
   pNew->zPath = zFilename;
   pNew->ctrlFlags = (u8)ctrlFlags;
+  pNew->mmapSizeMax = sqlite3GlobalConfig.szMmap;
   if( sqlite3_uri_boolean(((ctrlFlags & UNIXFILE_URI) ? zFilename : 0),
                            "psow", SQLITE_POWERSAFE_OVERWRITE) ){
     pNew->ctrlFlags |= UNIXFILE_PSOW;
@@ -27822,15 +28382,15 @@ static int fillInUnixFile(
     if( h>=0 ) robust_close(pNew, h, __LINE__);
     h = -1;
     osUnlink(zFilename);
-    isDelete = 0;
+    pNew->ctrlFlags |= UNIXFILE_DELETE;
   }
-  if( isDelete ) pNew->ctrlFlags |= UNIXFILE_DELETE;
 #endif
   if( rc!=SQLITE_OK ){
     if( h>=0 ) robust_close(pNew, h, __LINE__);
   }else{
     pNew->pMethod = pLockingStyle;
     OpenCounter(+1);
+    verifyDbFile(pNew);
   }
   return rc;
 }
@@ -29924,7 +30484,7 @@ SQLITE_API int sqlite3_os_init(void){
 
   /* Double-check that the aSyscall[] array has been constructed
   ** correctly.  See ticket [bb3a86e890c8e96ab] */
-  assert( ArraySize(aSyscall)==21 );
+  assert( ArraySize(aSyscall)==24 );
 
   /* Register all VFSes defined in the aVfs[] array */
   for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){
@@ -30307,11 +30867,20 @@ struct winFile {
   winceLock local;        /* Locks obtained by this instance of winFile */
   winceLock *shared;      /* Global shared lock memory for the file  */
 #endif
+#if SQLITE_MAX_MMAP_SIZE>0
+  int nFetchOut;                /* Number of outstanding xFetch references */
+  HANDLE hMap;                  /* Handle for accessing memory mapping */
+  void *pMapRegion;             /* Area memory mapped */
+  sqlite3_int64 mmapSize;       /* Usable size of mapped region */
+  sqlite3_int64 mmapSizeActual; /* Actual size of mapped region */
+  sqlite3_int64 mmapSizeMax;    /* Configured FCNTL_MMAP_SIZE value */
+#endif
 };
 
 /*
 ** Allowed values for winFile.ctrlFlags
 */
+#define WINFILE_RDONLY          0x02   /* Connection is read only */
 #define WINFILE_PERSIST_WAL     0x04   /* Persistent WAL mode */
 #define WINFILE_PSOW            0x10   /* SQLITE_IOCAP_POWERSAFE_OVERWRITE */
 
@@ -31671,7 +32240,7 @@ static int getLastErrorMsg(DWORD lastErrno, int nBuf, char *zBuf){
   }
 #endif
   if( 0 == dwLen ){
-    sqlite3_snprintf(nBuf, zBuf, "OsError 0x%x (%u)", lastErrno, lastErrno);
+    sqlite3_snprintf(nBuf, zBuf, "OsError 0x%lx (%lu)", lastErrno, lastErrno);
   }else{
     /* copy a maximum of nBuf chars to output buffer */
     sqlite3_snprintf(nBuf, zBuf, "%s", zOut);
@@ -31714,7 +32283,7 @@ static int winLogErrorAtLine(
   for(i=0; zMsg[i] && zMsg[i]!='\r' && zMsg[i]!='\n'; i++){}
   zMsg[i] = 0;
   sqlite3_log(errcode,
-      "os_win.c:%d: (%d) %s(%s) - %s",
+      "os_win.c:%d: (%lu) %s(%s) - %s",
       iLine, lastErrno, zFunc, zPath, zMsg
   );
 
@@ -32175,6 +32744,8 @@ static int seekWinFile(winFile *pFile, sqlite3_int64 iOffset){
   DWORD dwRet;                    /* Value returned by SetFilePointer() */
   DWORD lastErrno;                /* Value returned by GetLastError() */
 
+  OSTRACE(("SEEK file=%p, offset=%lld\n", pFile->h, iOffset));
+
   upperBits = (LONG)((iOffset>>32) & 0x7fffffff);
   lowerBits = (LONG)(iOffset & 0xffffffff);
 
@@ -32192,9 +32763,11 @@ static int seekWinFile(winFile *pFile, sqlite3_int64 iOffset){
     pFile->lastErrno = lastErrno;
     winLogError(SQLITE_IOERR_SEEK, pFile->lastErrno,
              "seekWinFile", pFile->zPath);
+    OSTRACE(("SEEK file=%p, rc=SQLITE_IOERR_SEEK\n", pFile->h));
     return 1;
   }
 
+  OSTRACE(("SEEK file=%p, rc=SQLITE_OK\n", pFile->h));
   return 0;
 #else
   /*
@@ -32211,13 +32784,20 @@ static int seekWinFile(winFile *pFile, sqlite3_int64 iOffset){
     pFile->lastErrno = osGetLastError();
     winLogError(SQLITE_IOERR_SEEK, pFile->lastErrno,
              "seekWinFile", pFile->zPath);
+    OSTRACE(("SEEK file=%p, rc=SQLITE_IOERR_SEEK\n", pFile->h));
     return 1;
   }
 
+  OSTRACE(("SEEK file=%p, rc=SQLITE_OK\n", pFile->h));
   return 0;
 #endif
 }
 
+#if SQLITE_MAX_MMAP_SIZE>0
+/* Forward references to VFS methods */
+static int winUnmapfile(winFile*);
+#endif
+
 /*
 ** Close a file.
 **
@@ -32237,8 +32817,14 @@ static int winClose(sqlite3_file *id){
 #ifndef SQLITE_OMIT_WAL
   assert( pFile->pShm==0 );
 #endif
-  OSTRACE(("CLOSE %d\n", pFile->h));
   assert( pFile->h!=NULL && pFile->h!=INVALID_HANDLE_VALUE );
+  OSTRACE(("CLOSE file=%p\n", pFile->h));
+
+#if SQLITE_MAX_MMAP_SIZE>0
+  rc = winUnmapfile(pFile);
+  if( rc!=SQLITE_OK ) return rc;
+#endif
+
   do{
     rc = osCloseHandle(pFile->h);
     /* SimulateIOError( rc=0; cnt=MX_CLOSE_ATTEMPT; ); */
@@ -32258,11 +32844,11 @@ static int winClose(sqlite3_file *id){
     sqlite3_free(pFile->zDeleteOnClose);
   }
 #endif
-  OSTRACE(("CLOSE %d %s\n", pFile->h, rc ? "ok" : "failed"));
   if( rc ){
     pFile->h = NULL;
   }
   OpenCounter(-1);
+  OSTRACE(("CLOSE file=%p, rc=%s\n", pFile->h, rc ? "ok" : "failed"));
   return rc ? SQLITE_OK
             : winLogError(SQLITE_IOERR_CLOSE, osGetLastError(),
                           "winClose", pFile->zPath);
@@ -32287,11 +32873,33 @@ static int winRead(
   int nRetry = 0;                 /* Number of retrys */
 
   assert( id!=0 );
+  assert( amt>0 );
+  assert( offset>=0 );
   SimulateIOError(return SQLITE_IOERR_READ);
-  OSTRACE(("READ %d lock=%d\n", pFile->h, pFile->locktype));
+  OSTRACE(("READ file=%p, buffer=%p, amount=%d, offset=%lld, lock=%d\n",
+           pFile->h, pBuf, amt, offset, pFile->locktype));
+
+#if SQLITE_MAX_MMAP_SIZE>0
+  /* Deal with as much of this read request as possible by transfering
+  ** data from the memory mapping using memcpy().  */
+  if( offset<pFile->mmapSize ){
+    if( offset+amt <= pFile->mmapSize ){
+      memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], amt);
+      OSTRACE(("READ-MMAP file=%p, rc=SQLITE_OK\n", pFile->h));
+      return SQLITE_OK;
+    }else{
+      int nCopy = (int)(pFile->mmapSize - offset);
+      memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], nCopy);
+      pBuf = &((u8 *)pBuf)[nCopy];
+      amt -= nCopy;
+      offset += nCopy;
+    }
+  }
+#endif
 
 #if SQLITE_OS_WINCE
   if( seekWinFile(pFile, offset) ){
+    OSTRACE(("READ file=%p, rc=SQLITE_FULL\n", pFile->h));
     return SQLITE_FULL;
   }
   while( !osReadFile(pFile->h, pBuf, amt, &nRead, 0) ){
@@ -32305,6 +32913,7 @@ static int winRead(
     DWORD lastErrno;
     if( retryIoerr(&nRetry, &lastErrno) ) continue;
     pFile->lastErrno = lastErrno;
+    OSTRACE(("READ file=%p, rc=SQLITE_IOERR_READ\n", pFile->h));
     return winLogError(SQLITE_IOERR_READ, pFile->lastErrno,
              "winRead", pFile->zPath);
   }
@@ -32312,9 +32921,11 @@ static int winRead(
   if( nRead<(DWORD)amt ){
     /* Unread parts of the buffer must be zero-filled */
     memset(&((char*)pBuf)[nRead], 0, amt-nRead);
+    OSTRACE(("READ file=%p, rc=SQLITE_IOERR_SHORT_READ\n", pFile->h));
     return SQLITE_IOERR_SHORT_READ;
   }
 
+  OSTRACE(("READ file=%p, rc=SQLITE_OK\n", pFile->h));
   return SQLITE_OK;
 }
 
@@ -32337,7 +32948,26 @@ static int winWrite(
   SimulateIOError(return SQLITE_IOERR_WRITE);
   SimulateDiskfullError(return SQLITE_FULL);
 
-  OSTRACE(("WRITE %d lock=%d\n", pFile->h, pFile->locktype));
+  OSTRACE(("WRITE file=%p, buffer=%p, amount=%d, offset=%lld, lock=%d\n",
+           pFile->h, pBuf, amt, offset, pFile->locktype));
+
+#if SQLITE_MAX_MMAP_SIZE>0
+  /* Deal with as much of this write request as possible by transfering
+  ** data from the memory mapping using memcpy().  */
+  if( offset<pFile->mmapSize ){
+    if( offset+amt <= pFile->mmapSize ){
+      memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, amt);
+      OSTRACE(("WRITE-MMAP file=%p, rc=SQLITE_OK\n", pFile->h));
+      return SQLITE_OK;
+    }else{
+      int nCopy = (int)(pFile->mmapSize - offset);
+      memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, nCopy);
+      pBuf = &((u8 *)pBuf)[nCopy];
+      amt -= nCopy;
+      offset += nCopy;
+    }
+  }
+#endif
 
 #if SQLITE_OS_WINCE
   rc = seekWinFile(pFile, offset);
@@ -32390,13 +33020,16 @@ static int winWrite(
   if( rc ){
     if(   ( pFile->lastErrno==ERROR_HANDLE_DISK_FULL )
        || ( pFile->lastErrno==ERROR_DISK_FULL )){
+      OSTRACE(("WRITE file=%p, rc=SQLITE_FULL\n", pFile->h));
       return SQLITE_FULL;
     }
+    OSTRACE(("WRITE file=%p, rc=SQLITE_IOERR_WRITE\n", pFile->h));
     return winLogError(SQLITE_IOERR_WRITE, pFile->lastErrno,
              "winWrite", pFile->zPath);
   }else{
     logIoerr(nRetry);
   }
+  OSTRACE(("WRITE file=%p, rc=SQLITE_OK\n", pFile->h));
   return SQLITE_OK;
 }
 
@@ -32406,11 +33039,12 @@ static int winWrite(
 static int winTruncate(sqlite3_file *id, sqlite3_int64 nByte){
   winFile *pFile = (winFile*)id;  /* File handle object */
   int rc = SQLITE_OK;             /* Return code for this function */
+  DWORD lastErrno;
 
   assert( pFile );
-
-  OSTRACE(("TRUNCATE %d %lld\n", pFile->h, nByte));
   SimulateIOError(return SQLITE_IOERR_TRUNCATE);
+  OSTRACE(("TRUNCATE file=%p, size=%lld, lock=%d\n",
+           pFile->h, nByte, pFile->locktype));
 
   /* If the user has configured a chunk-size for this file, truncate the
   ** file so that it consists of an integer number of chunks (i.e. the
@@ -32424,14 +33058,25 @@ static int winTruncate(sqlite3_file *id, sqlite3_int64 nByte){
   /* SetEndOfFile() returns non-zero when successful, or zero when it fails. */
   if( seekWinFile(pFile, nByte) ){
     rc = winLogError(SQLITE_IOERR_TRUNCATE, pFile->lastErrno,
-             "winTruncate1", pFile->zPath);
-  }else if( 0==osSetEndOfFile(pFile->h) ){
-    pFile->lastErrno = osGetLastError();
+                     "winTruncate1", pFile->zPath);
+  }else if( 0==osSetEndOfFile(pFile->h) &&
+            ((lastErrno = osGetLastError())!=ERROR_USER_MAPPED_FILE) ){
+    pFile->lastErrno = lastErrno;
     rc = winLogError(SQLITE_IOERR_TRUNCATE, pFile->lastErrno,
-             "winTruncate2", pFile->zPath);
+                     "winTruncate2", pFile->zPath);
   }
 
-  OSTRACE(("TRUNCATE %d %lld %s\n", pFile->h, nByte, rc ? "failed" : "ok"));
+#if SQLITE_MAX_MMAP_SIZE>0
+  /* If the file was truncated to a size smaller than the currently
+  ** mapped region, reduce the effective mapping size as well. SQLite will
+  ** use read() and write() to access data beyond this point from now on.
+  */
+  if( pFile->pMapRegion && nByte<pFile->mmapSize ){
+    pFile->mmapSize = nByte;
+  }
+#endif
+
+  OSTRACE(("TRUNCATE file=%p, rc=%s\n", pFile->h, sqlite3ErrName(rc)));
   return rc;
 }
 
@@ -32471,13 +33116,14 @@ static int winSync(sqlite3_file *id, int flags){
       || (flags&0x0F)==SQLITE_SYNC_FULL
   );
 
-  OSTRACE(("SYNC %d lock=%d\n", pFile->h, pFile->locktype));
-
   /* Unix cannot, but some systems may return SQLITE_FULL from here. This
   ** line is to test that doing so does not cause any problems.
   */
   SimulateDiskfullError( return SQLITE_FULL );
 
+  OSTRACE(("SYNC file=%p, flags=%x, lock=%d\n",
+           pFile->h, flags, pFile->locktype));
+
 #ifndef SQLITE_TEST
   UNUSED_PARAMETER(flags);
 #else
@@ -32496,9 +33142,11 @@ static int winSync(sqlite3_file *id, int flags){
   rc = osFlushFileBuffers(pFile->h);
   SimulateIOError( rc=FALSE );
   if( rc ){
+    OSTRACE(("SYNC file=%p, rc=SQLITE_OK\n", pFile->h));
     return SQLITE_OK;
   }else{
     pFile->lastErrno = osGetLastError();
+    OSTRACE(("SYNC file=%p, rc=SQLITE_IOERR_FSYNC\n", pFile->h));
     return winLogError(SQLITE_IOERR_FSYNC, pFile->lastErrno,
              "winSync", pFile->zPath);
   }
@@ -32513,7 +33161,10 @@ static int winFileSize(sqlite3_file *id, sqlite3_int64 *pSize){
   int rc = SQLITE_OK;
 
   assert( id!=0 );
+  assert( pSize!=0 );
   SimulateIOError(return SQLITE_IOERR_FSTAT);
+  OSTRACE(("SIZE file=%p, pSize=%p\n", pFile->h, pSize));
+
 #if SQLITE_OS_WINRT
   {
     FILE_STANDARD_INFO info;
@@ -32542,6 +33193,8 @@ static int winFileSize(sqlite3_file *id, sqlite3_int64 *pSize){
     }
   }
 #endif
+  OSTRACE(("SIZE file=%p, pSize=%p, *pSize=%lld, rc=%s\n",
+           pFile->h, pSize, *pSize, sqlite3ErrName(rc)));
   return rc;
 }
 
@@ -32583,6 +33236,7 @@ static int winFileSize(sqlite3_file *id, sqlite3_int64 *pSize){
 */
 static int getReadLock(winFile *pFile){
   int res;
+  OSTRACE(("READ-LOCK file=%p, lock=%d\n", pFile->h, pFile->locktype));
   if( isNT() ){
 #if SQLITE_OS_WINCE
     /*
@@ -32608,6 +33262,7 @@ static int getReadLock(winFile *pFile){
     pFile->lastErrno = osGetLastError();
     /* No need to log a failure to lock */
   }
+  OSTRACE(("READ-LOCK file=%p, rc=%s\n", pFile->h, sqlite3ErrName(res)));
   return res;
 }
 
@@ -32617,6 +33272,7 @@ static int getReadLock(winFile *pFile){
 static int unlockReadLock(winFile *pFile){
   int res;
   DWORD lastErrno;
+  OSTRACE(("READ-UNLOCK file=%p, lock=%d\n", pFile->h, pFile->locktype));
   if( isNT() ){
     res = winUnlockFile(&pFile->h, SHARED_FIRST, 0, SHARED_SIZE, 0);
   }
@@ -32630,6 +33286,7 @@ static int unlockReadLock(winFile *pFile){
     winLogError(SQLITE_IOERR_UNLOCK, pFile->lastErrno,
              "unlockReadLock", pFile->zPath);
   }
+  OSTRACE(("READ-UNLOCK file=%p, rc=%s\n", pFile->h, sqlite3ErrName(res)));
   return res;
 }
 
@@ -32668,14 +33325,15 @@ static int winLock(sqlite3_file *id, int locktype){
   DWORD lastErrno = NO_ERROR;
 
   assert( id!=0 );
-  OSTRACE(("LOCK %d %d was %d(%d)\n",
-           pFile->h, locktype, pFile->locktype, pFile->sharedLockByte));
+  OSTRACE(("LOCK file=%p, oldLock=%d(%d), newLock=%d\n",
+           pFile->h, pFile->locktype, pFile->sharedLockByte, locktype));
 
   /* If there is already a lock of this type or more restrictive on the
   ** OsFile, do nothing. Don't use the end_lock: exit path, as
   ** sqlite3OsEnterMutex() hasn't been called yet.
   */
   if( pFile->locktype>=locktype ){
+    OSTRACE(("LOCK-HELD file=%p, rc=SQLITE_OK\n", pFile->h));
     return SQLITE_OK;
   }
 
@@ -32703,7 +33361,8 @@ static int winLock(sqlite3_file *id, int locktype){
       ** If you are using this code as a model for alternative VFSes, do not
       ** copy this retry logic.  It is a hack intended for Windows only.
       */
-      OSTRACE(("could not get a PENDING lock. cnt=%d\n", cnt));
+      OSTRACE(("LOCK-PENDING-FAIL file=%p, count=%d, rc=%s\n",
+               pFile->h, cnt, sqlite3ErrName(res)));
       if( cnt ) sqlite3_win32_sleep(1);
     }
     gotPendingLock = res;
@@ -32748,14 +33407,12 @@ static int winLock(sqlite3_file *id, int locktype){
   if( locktype==EXCLUSIVE_LOCK && res ){
     assert( pFile->locktype>=SHARED_LOCK );
     res = unlockReadLock(pFile);
-    OSTRACE(("unreadlock = %d\n", res));
     res = winLockFile(&pFile->h, SQLITE_LOCKFILE_FLAGS, SHARED_FIRST, 0,
                       SHARED_SIZE, 0);
     if( res ){
       newLocktype = EXCLUSIVE_LOCK;
     }else{
       lastErrno = osGetLastError();
-      OSTRACE(("error-code = %d\n", lastErrno));
       getReadLock(pFile);
     }
   }
@@ -32773,12 +33430,14 @@ static int winLock(sqlite3_file *id, int locktype){
   if( res ){
     rc = SQLITE_OK;
   }else{
-    OSTRACE(("LOCK FAILED %d trying for %d but got %d\n", pFile->h,
-           locktype, newLocktype));
+    OSTRACE(("LOCK-FAIL file=%p, wanted=%d, got=%d\n",
+             pFile->h, locktype, newLocktype));
     pFile->lastErrno = lastErrno;
     rc = SQLITE_BUSY;
   }
   pFile->locktype = (u8)newLocktype;
+  OSTRACE(("LOCK file=%p, lock=%d, rc=%s\n",
+           pFile->h, pFile->locktype, sqlite3ErrName(rc)));
   return rc;
 }
 
@@ -32792,20 +33451,23 @@ static int winCheckReservedLock(sqlite3_file *id, int *pResOut){
   winFile *pFile = (winFile*)id;
 
   SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
+  OSTRACE(("TEST-WR-LOCK file=%p, pResOut=%p\n", pFile->h, pResOut));
 
   assert( id!=0 );
   if( pFile->locktype>=RESERVED_LOCK ){
     rc = 1;
-    OSTRACE(("TEST WR-LOCK %d %d (local)\n", pFile->h, rc));
+    OSTRACE(("TEST-WR-LOCK file=%p, rc=%d (local)\n", pFile->h, rc));
   }else{
     rc = winLockFile(&pFile->h, SQLITE_LOCKFILEEX_FLAGS,RESERVED_BYTE, 0, 1, 0);
     if( rc ){
       winUnlockFile(&pFile->h, RESERVED_BYTE, 0, 1, 0);
     }
     rc = !rc;
-    OSTRACE(("TEST WR-LOCK %d %d (remote)\n", pFile->h, rc));
+    OSTRACE(("TEST-WR-LOCK file=%p, rc=%d (remote)\n", pFile->h, rc));
   }
   *pResOut = rc;
+  OSTRACE(("TEST-WR-LOCK file=%p, pResOut=%p, *pResOut=%d, rc=SQLITE_OK\n",
+           pFile->h, pResOut, *pResOut));
   return SQLITE_OK;
 }
 
@@ -32826,8 +33488,8 @@ static int winUnlock(sqlite3_file *id, int locktype){
   int rc = SQLITE_OK;
   assert( pFile!=0 );
   assert( locktype<=SHARED_LOCK );
-  OSTRACE(("UNLOCK %d to %d was %d(%d)\n", pFile->h, locktype,
-          pFile->locktype, pFile->sharedLockByte));
+  OSTRACE(("UNLOCK file=%p, oldLock=%d(%d), newLock=%d\n",
+           pFile->h, pFile->locktype, pFile->sharedLockByte, locktype));
   type = pFile->locktype;
   if( type>=EXCLUSIVE_LOCK ){
     winUnlockFile(&pFile->h, SHARED_FIRST, 0, SHARED_SIZE, 0);
@@ -32848,6 +33510,8 @@ static int winUnlock(sqlite3_file *id, int locktype){
     winUnlockFile(&pFile->h, PENDING_BYTE, 0, 1, 0);
   }
   pFile->locktype = (u8)locktype;
+  OSTRACE(("UNLOCK file=%p, lock=%d, rc=%s\n",
+           pFile->h, pFile->locktype, sqlite3ErrName(rc)));
   return rc;
 }
 
@@ -32875,17 +33539,21 @@ static int getTempname(int nBuf, char *zBuf);
 */
 static int winFileControl(sqlite3_file *id, int op, void *pArg){
   winFile *pFile = (winFile*)id;
+  OSTRACE(("FCNTL file=%p, op=%d, pArg=%p\n", pFile->h, op, pArg));
   switch( op ){
     case SQLITE_FCNTL_LOCKSTATE: {
       *(int*)pArg = pFile->locktype;
+      OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h));
       return SQLITE_OK;
     }
     case SQLITE_LAST_ERRNO: {
       *(int*)pArg = (int)pFile->lastErrno;
+      OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h));
       return SQLITE_OK;
     }
     case SQLITE_FCNTL_CHUNK_SIZE: {
       pFile->szChunk = *(int *)pArg;
+      OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h));
       return SQLITE_OK;
     }
     case SQLITE_FCNTL_SIZE_HINT: {
@@ -32900,20 +33568,25 @@ static int winFileControl(sqlite3_file *id, int op, void *pArg){
             SimulateIOErrorBenign(0);
           }
         }
+        OSTRACE(("FCNTL file=%p, rc=%s\n", pFile->h, sqlite3ErrName(rc)));
         return rc;
       }
+      OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h));
       return SQLITE_OK;
     }
     case SQLITE_FCNTL_PERSIST_WAL: {
       winModeBit(pFile, WINFILE_PERSIST_WAL, (int*)pArg);
+      OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h));
       return SQLITE_OK;
     }
     case SQLITE_FCNTL_POWERSAFE_OVERWRITE: {
       winModeBit(pFile, WINFILE_PSOW, (int*)pArg);
+      OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h));
       return SQLITE_OK;
     }
     case SQLITE_FCNTL_VFSNAME: {
       *(char**)pArg = sqlite3_mprintf("win32");
+      OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h));
       return SQLITE_OK;
     }
     case SQLITE_FCNTL_WIN32_AV_RETRY: {
@@ -32928,6 +33601,7 @@ static int winFileControl(sqlite3_file *id, int op, void *pArg){
       }else{
         a[1] = win32IoerrRetryDelay;
       }
+      OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h));
       return SQLITE_OK;
     }
     case SQLITE_FCNTL_TEMPFILENAME: {
@@ -32936,9 +33610,23 @@ static int winFileControl(sqlite3_file *id, int op, void *pArg){
         getTempname(pFile->pVfs->mxPathname, zTFile);
         *(char**)pArg = zTFile;
       }
+      OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h));
       return SQLITE_OK;
     }
+#if SQLITE_MAX_MMAP_SIZE>0
+    case SQLITE_FCNTL_MMAP_SIZE: {
+      i64 newLimit = *(i64*)pArg;
+      if( newLimit>sqlite3GlobalConfig.mxMmap ){
+        newLimit = sqlite3GlobalConfig.mxMmap;
+      }
+      *(i64*)pArg = pFile->mmapSizeMax;
+      if( newLimit>=0 ) pFile->mmapSizeMax = newLimit;
+      OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h));
+      return SQLITE_OK;
+    }
+#endif
   }
+  OSTRACE(("FCNTL file=%p, rc=SQLITE_NOTFOUND\n", pFile->h));
   return SQLITE_NOTFOUND;
 }
 
@@ -32966,8 +33654,6 @@ static int winDeviceCharacteristics(sqlite3_file *id){
          ((p->ctrlFlags & WINFILE_PSOW)?SQLITE_IOCAP_POWERSAFE_OVERWRITE:0);
 }
 
-#ifndef SQLITE_OMIT_WAL
-
 /* 
 ** Windows will only let you create file view mappings
 ** on allocation size granularity boundaries.
@@ -32976,6 +33662,8 @@ static int winDeviceCharacteristics(sqlite3_file *id){
 */
 SYSTEM_INFO winSysInfo;
 
+#ifndef SQLITE_OMIT_WAL
+
 /*
 ** Helper functions to obtain and relinquish the global mutex. The
 ** global mutex is used to protect the winLockInfo objects used by 
@@ -33099,6 +33787,9 @@ static int winShmSystemLock(
   /* Access to the winShmNode object is serialized by the caller */
   assert( sqlite3_mutex_held(pFile->mutex) || pFile->nRef==0 );
 
+  OSTRACE(("SHM-LOCK file=%p, lock=%d, offset=%d, size=%d\n",
+           pFile->hFile.h, lockType, ofst, nByte));
+
   /* Release/Acquire the system-level lock */
   if( lockType==_SHM_UNLCK ){
     rc = winUnlockFile(&pFile->hFile.h, ofst, 0, nByte, 0);
@@ -33116,11 +33807,9 @@ static int winShmSystemLock(
     rc = SQLITE_BUSY;
   }
 
-  OSTRACE(("SHM-LOCK %d %s %s 0x%08lx\n", 
-           pFile->hFile.h,
-           rc==SQLITE_OK ? "ok" : "failed",
-           lockType==_SHM_UNLCK ? "UnlockFileEx" : "LockFileEx",
-           pFile->lastErrno));
+  OSTRACE(("SHM-LOCK file=%p, func=%s, errno=%lu, rc=%s\n",
+           pFile->hFile.h, (lockType == _SHM_UNLCK) ? "winUnlockFile" :
+           "winLockFile", pFile->lastErrno, sqlite3ErrName(rc)));
 
   return rc;
 }
@@ -33140,6 +33829,8 @@ static void winShmPurge(sqlite3_vfs *pVfs, int deleteFlag){
   winShmNode *p;
   BOOL bRc;
   assert( winShmMutexHeld() );
+  OSTRACE(("SHM-PURGE pid=%lu, deleteFlag=%d\n",
+           osGetCurrentProcessId(), deleteFlag));
   pp = &winShmNodeList;
   while( (p = *pp)!=0 ){
     if( p->nRef==0 ){
@@ -33147,13 +33838,11 @@ static void winShmPurge(sqlite3_vfs *pVfs, int deleteFlag){
       if( p->mutex ) sqlite3_mutex_free(p->mutex);
       for(i=0; i<p->nRegion; i++){
         bRc = osUnmapViewOfFile(p->aRegion[i].pMap);
-        OSTRACE(("SHM-PURGE pid-%d unmap region=%d %s\n",
-                 (int)osGetCurrentProcessId(), i,
-                 bRc ? "ok" : "failed"));
+        OSTRACE(("SHM-PURGE-UNMAP pid=%lu, region=%d, rc=%s\n",
+                 osGetCurrentProcessId(), i, bRc ? "ok" : "failed"));
         bRc = osCloseHandle(p->aRegion[i].hMap);
-        OSTRACE(("SHM-PURGE pid-%d close region=%d %s\n",
-                 (int)osGetCurrentProcessId(), i,
-                 bRc ? "ok" : "failed"));
+        OSTRACE(("SHM-PURGE-CLOSE pid=%lu, region=%d, rc=%s\n",
+                 osGetCurrentProcessId(), i, bRc ? "ok" : "failed"));
       }
       if( p->hFile.h!=NULL && p->hFile.h!=INVALID_HANDLE_VALUE ){
         SimulateIOErrorBenign(1);
@@ -33432,9 +34121,9 @@ static int winShmLock(
     }
   }
   sqlite3_mutex_leave(pShmNode->mutex);
-  OSTRACE(("SHM-LOCK shmid-%d, pid-%d got %03x,%03x %s\n",
-           p->id, (int)osGetCurrentProcessId(), p->sharedMask, p->exclMask,
-           rc ? "failed" : "ok"));
+  OSTRACE(("SHM-LOCK pid=%lu, id=%d, sharedMask=%03x, exclMask=%03x, rc=%s\n",
+           osGetCurrentProcessId(), p->id, p->sharedMask, p->exclMask,
+           sqlite3ErrName(rc)));
   return rc;
 }
 
@@ -33555,8 +34244,8 @@ static int winShmMap(
           NULL, PAGE_READWRITE, 0, nByte, NULL
       );
 #endif
-      OSTRACE(("SHM-MAP pid-%d create region=%d nbyte=%d %s\n",
-               (int)osGetCurrentProcessId(), pShmNode->nRegion, nByte,
+      OSTRACE(("SHM-MAP-CREATE pid=%lu, region=%d, size=%d, rc=%s\n",
+               osGetCurrentProcessId(), pShmNode->nRegion, nByte,
                hMap ? "ok" : "failed"));
       if( hMap ){
         int iOffset = pShmNode->nRegion*szRegion;
@@ -33570,8 +34259,8 @@ static int winShmMap(
             0, iOffset - iOffsetShift, szRegion + iOffsetShift
         );
 #endif
-        OSTRACE(("SHM-MAP pid-%d map region=%d offset=%d size=%d %s\n",
-                 (int)osGetCurrentProcessId(), pShmNode->nRegion, iOffset,
+        OSTRACE(("SHM-MAP-MAP pid=%lu, region=%d, offset=%d, size=%d, rc=%s\n",
+                 osGetCurrentProcessId(), pShmNode->nRegion, iOffset,
                  szRegion, pMap ? "ok" : "failed"));
       }
       if( !pMap ){
@@ -33608,6 +34297,230 @@ shmpage_out:
 # define winShmUnmap   0
 #endif /* #ifndef SQLITE_OMIT_WAL */
 
+/*
+** Cleans up the mapped region of the specified file, if any.
+*/
+#if SQLITE_MAX_MMAP_SIZE>0
+static int winUnmapfile(winFile *pFile){
+  assert( pFile!=0 );
+  OSTRACE(("UNMAP-FILE pid=%lu, pFile=%p, hMap=%p, pMapRegion=%p, "
+           "mmapSize=%lld, mmapSizeActual=%lld, mmapSizeMax=%lld\n",
+           osGetCurrentProcessId(), pFile, pFile->hMap, pFile->pMapRegion,
+           pFile->mmapSize, pFile->mmapSizeActual, pFile->mmapSizeMax));
+  if( pFile->pMapRegion ){
+    if( !osUnmapViewOfFile(pFile->pMapRegion) ){
+      pFile->lastErrno = osGetLastError();
+      OSTRACE(("UNMAP-FILE pid=%lu, pFile=%p, pMapRegion=%p, "
+               "rc=SQLITE_IOERR_MMAP\n", osGetCurrentProcessId(), pFile,
+               pFile->pMapRegion));
+      return winLogError(SQLITE_IOERR_MMAP, pFile->lastErrno,
+                         "winUnmap1", pFile->zPath);
+    }
+    pFile->pMapRegion = 0;
+    pFile->mmapSize = 0;
+    pFile->mmapSizeActual = 0;
+  }
+  if( pFile->hMap!=NULL ){
+    if( !osCloseHandle(pFile->hMap) ){
+      pFile->lastErrno = osGetLastError();
+      OSTRACE(("UNMAP-FILE pid=%lu, pFile=%p, hMap=%p, rc=SQLITE_IOERR_MMAP\n",
+               osGetCurrentProcessId(), pFile, pFile->hMap));
+      return winLogError(SQLITE_IOERR_MMAP, pFile->lastErrno,
+                         "winUnmap2", pFile->zPath);
+    }
+    pFile->hMap = NULL;
+  }
+  OSTRACE(("UNMAP-FILE pid=%lu, pFile=%p, rc=SQLITE_OK\n",
+           osGetCurrentProcessId(), pFile));
+  return SQLITE_OK;
+}
+
+/*
+** Memory map or remap the file opened by file-descriptor pFd (if the file
+** is already mapped, the existing mapping is replaced by the new). Or, if 
+** there already exists a mapping for this file, and there are still 
+** outstanding xFetch() references to it, this function is a no-op.
+**
+** If parameter nByte is non-negative, then it is the requested size of 
+** the mapping to create. Otherwise, if nByte is less than zero, then the 
+** requested size is the size of the file on disk. The actual size of the
+** created mapping is either the requested size or the value configured 
+** using SQLITE_FCNTL_MMAP_SIZE, whichever is smaller.
+**
+** SQLITE_OK is returned if no error occurs (even if the mapping is not
+** recreated as a result of outstanding references) or an SQLite error
+** code otherwise.
+*/
+static int winMapfile(winFile *pFd, sqlite3_int64 nByte){
+  sqlite3_int64 nMap = nByte;
+  int rc;
+
+  assert( nMap>=0 || pFd->nFetchOut==0 );
+  OSTRACE(("MAP-FILE pid=%lu, pFile=%p, size=%lld\n",
+           osGetCurrentProcessId(), pFd, nByte));
+
+  if( pFd->nFetchOut>0 ) return SQLITE_OK;
+
+  if( nMap<0 ){
+    rc = winFileSize((sqlite3_file*)pFd, &nMap);
+    if( rc ){
+      OSTRACE(("MAP-FILE pid=%lu, pFile=%p, rc=SQLITE_IOERR_FSTAT\n",
+               osGetCurrentProcessId(), pFd));
+      return SQLITE_IOERR_FSTAT;
+    }
+  }
+  if( nMap>pFd->mmapSizeMax ){
+    nMap = pFd->mmapSizeMax;
+  }
+  nMap &= ~(sqlite3_int64)(winSysInfo.dwPageSize - 1);
+ 
+  if( nMap==0 && pFd->mmapSize>0 ){
+    winUnmapfile(pFd);
+  }
+  if( nMap!=pFd->mmapSize ){
+    void *pNew = 0;
+    DWORD protect = PAGE_READONLY;
+    DWORD flags = FILE_MAP_READ;
+
+    winUnmapfile(pFd);
+    if( (pFd->ctrlFlags & WINFILE_RDONLY)==0 ){
+      protect = PAGE_READWRITE;
+      flags |= FILE_MAP_WRITE;
+    }
+#if SQLITE_OS_WINRT
+    pFd->hMap = osCreateFileMappingFromApp(pFd->h, NULL, protect, nMap, NULL);
+#elif defined(SQLITE_WIN32_HAS_WIDE)
+    pFd->hMap = osCreateFileMappingW(pFd->h, NULL, protect,
+                                (DWORD)((nMap>>32) & 0xffffffff),
+                                (DWORD)(nMap & 0xffffffff), NULL);
+#elif defined(SQLITE_WIN32_HAS_ANSI)
+    pFd->hMap = osCreateFileMappingA(pFd->h, NULL, protect,
+                                (DWORD)((nMap>>32) & 0xffffffff),
+                                (DWORD)(nMap & 0xffffffff), NULL);
+#endif
+    if( pFd->hMap==NULL ){
+      pFd->lastErrno = osGetLastError();
+      rc = winLogError(SQLITE_IOERR_MMAP, pFd->lastErrno,
+                       "winMapfile", pFd->zPath);
+      /* Log the error, but continue normal operation using xRead/xWrite */
+      OSTRACE(("MAP-FILE-CREATE pid=%lu, pFile=%p, rc=SQLITE_IOERR_MMAP\n",
+               osGetCurrentProcessId(), pFd));
+      return SQLITE_OK;
+    }
+    assert( (nMap % winSysInfo.dwPageSize)==0 );
+#if SQLITE_OS_WINRT
+    pNew = osMapViewOfFileFromApp(pFd->hMap, flags, 0, nMap);
+#else
+    assert( sizeof(SIZE_T)==sizeof(sqlite3_int64) || nMap<=0xffffffff );
+    pNew = osMapViewOfFile(pFd->hMap, flags, 0, 0, (SIZE_T)nMap);
+#endif
+    if( pNew==NULL ){
+      osCloseHandle(pFd->hMap);
+      pFd->hMap = NULL;
+      pFd->lastErrno = osGetLastError();
+      winLogError(SQLITE_IOERR_MMAP, pFd->lastErrno,
+                  "winMapfile", pFd->zPath);
+      OSTRACE(("MAP-FILE-MAP pid=%lu, pFile=%p, rc=SQLITE_IOERR_MMAP\n",
+               osGetCurrentProcessId(), pFd));
+      return SQLITE_OK;
+    }
+    pFd->pMapRegion = pNew;
+    pFd->mmapSize = nMap;
+    pFd->mmapSizeActual = nMap;
+  }
+
+  OSTRACE(("MAP-FILE pid=%lu, pFile=%p, rc=SQLITE_OK\n",
+           osGetCurrentProcessId(), pFd));
+  return SQLITE_OK;
+}
+#endif /* SQLITE_MAX_MMAP_SIZE>0 */
+
+/*
+** If possible, return a pointer to a mapping of file fd starting at offset
+** iOff. The mapping must be valid for at least nAmt bytes.
+**
+** If such a pointer can be obtained, store it in *pp and return SQLITE_OK.
+** Or, if one cannot but no error occurs, set *pp to 0 and return SQLITE_OK.
+** Finally, if an error does occur, return an SQLite error code. The final
+** value of *pp is undefined in this case.
+**
+** If this function does return a pointer, the caller must eventually 
+** release the reference by calling winUnfetch().
+*/
+static int winFetch(sqlite3_file *fd, i64 iOff, int nAmt, void **pp){
+#if SQLITE_MAX_MMAP_SIZE>0
+  winFile *pFd = (winFile*)fd;   /* The underlying database file */
+#endif
+  *pp = 0;
+
+  OSTRACE(("FETCH pid=%lu, pFile=%p, offset=%lld, amount=%d, pp=%p\n",
+           osGetCurrentProcessId(), fd, iOff, nAmt, pp));
+
+#if SQLITE_MAX_MMAP_SIZE>0
+  if( pFd->mmapSizeMax>0 ){
+    if( pFd->pMapRegion==0 ){
+      int rc = winMapfile(pFd, -1);
+      if( rc!=SQLITE_OK ){
+        OSTRACE(("FETCH pid=%lu, pFile=%p, rc=%s\n",
+                 osGetCurrentProcessId(), pFd, sqlite3ErrName(rc)));
+        return rc;
+      }
+    }
+    if( pFd->mmapSize >= iOff+nAmt ){
+      *pp = &((u8 *)pFd->pMapRegion)[iOff];
+      pFd->nFetchOut++;
+    }
+  }
+#endif
+
+  OSTRACE(("FETCH pid=%lu, pFile=%p, pp=%p, *pp=%p, rc=SQLITE_OK\n",
+           osGetCurrentProcessId(), fd, pp, *pp));
+  return SQLITE_OK;
+}
+
+/*
+** If the third argument is non-NULL, then this function releases a 
+** reference obtained by an earlier call to winFetch(). The second
+** argument passed to this function must be the same as the corresponding
+** argument that was passed to the winFetch() invocation. 
+**
+** Or, if the third argument is NULL, then this function is being called 
+** to inform the VFS layer that, according to POSIX, any existing mapping 
+** may now be invalid and should be unmapped.
+*/
+static int winUnfetch(sqlite3_file *fd, i64 iOff, void *p){
+#if SQLITE_MAX_MMAP_SIZE>0
+  winFile *pFd = (winFile*)fd;   /* The underlying database file */
+
+  /* If p==0 (unmap the entire file) then there must be no outstanding 
+  ** xFetch references. Or, if p!=0 (meaning it is an xFetch reference),
+  ** then there must be at least one outstanding.  */
+  assert( (p==0)==(pFd->nFetchOut==0) );
+
+  /* If p!=0, it must match the iOff value. */
+  assert( p==0 || p==&((u8 *)pFd->pMapRegion)[iOff] );
+
+  OSTRACE(("UNFETCH pid=%lu, pFile=%p, offset=%lld, p=%p\n",
+           osGetCurrentProcessId(), pFd, iOff, p));
+
+  if( p ){
+    pFd->nFetchOut--;
+  }else{
+    /* FIXME:  If Windows truly always prevents truncating or deleting a
+    ** file while a mapping is held, then the following winUnmapfile() call
+    ** is unnecessary can can be omitted - potentially improving
+    ** performance.  */
+    winUnmapfile(pFd);
+  }
+
+  assert( pFd->nFetchOut>=0 );
+#endif
+
+  OSTRACE(("UNFETCH pid=%lu, pFile=%p, rc=SQLITE_OK\n",
+           osGetCurrentProcessId(), fd));
+  return SQLITE_OK;
+}
+
 /*
 ** Here ends the implementation of all sqlite3_file methods.
 **
@@ -33619,7 +34532,7 @@ shmpage_out:
 ** sqlite3_file for win32.
 */
 static const sqlite3_io_methods winIoMethod = {
-  2,                              /* iVersion */
+  3,                              /* iVersion */
   winClose,                       /* xClose */
   winRead,                        /* xRead */
   winWrite,                       /* xWrite */
@@ -33635,7 +34548,9 @@ static const sqlite3_io_methods winIoMethod = {
   winShmMap,                      /* xShmMap */
   winShmLock,                     /* xShmLock */
   winShmBarrier,                  /* xShmBarrier */
-  winShmUnmap                     /* xShmUnmap */
+  winShmUnmap,                    /* xShmUnmap */
+  winFetch,                       /* xFetch */
+  winUnfetch                      /* xUnfetch */
 };
 
 /****************************************************************************
@@ -33699,6 +34614,7 @@ static int getTempname(int nBuf, char *zBuf){
       sqlite3_snprintf(MAX_PATH-30, zTempPath, "%s", zMulti);
       sqlite3_free(zMulti);
     }else{
+      OSTRACE(("TEMP-FILENAME rc=SQLITE_IOERR_NOMEM\n"));
       return SQLITE_IOERR_NOMEM;
     }
   }
@@ -33712,6 +34628,7 @@ static int getTempname(int nBuf, char *zBuf){
       sqlite3_snprintf(MAX_PATH-30, zTempPath, "%s", zUtf8);
       sqlite3_free(zUtf8);
     }else{
+      OSTRACE(("TEMP-FILENAME rc=SQLITE_IOERR_NOMEM\n"));
       return SQLITE_IOERR_NOMEM;
     }
   }
@@ -33724,6 +34641,7 @@ static int getTempname(int nBuf, char *zBuf){
   nTempPath = sqlite3Strlen30(zTempPath);
 
   if( (nTempPath + sqlite3Strlen30(SQLITE_TEMP_FILE_PREFIX) + 18) >= nBuf ){
+    OSTRACE(("TEMP-FILENAME rc=SQLITE_ERROR\n"));
     return SQLITE_ERROR;
   }
 
@@ -33741,8 +34659,8 @@ static int getTempname(int nBuf, char *zBuf){
   zBuf[j] = 0;
   zBuf[j+1] = 0;
 
-  OSTRACE(("TEMP FILENAME: %s\n", zBuf));
-  return SQLITE_OK; 
+  OSTRACE(("TEMP-FILENAME name=%s, rc=SQLITE_OK\n", zBuf));
+  return SQLITE_OK;
 }
 
 /*
@@ -33811,9 +34729,7 @@ static int winOpen(
   int isExclusive  = (flags & SQLITE_OPEN_EXCLUSIVE);
   int isDelete     = (flags & SQLITE_OPEN_DELETEONCLOSE);
   int isCreate     = (flags & SQLITE_OPEN_CREATE);
-#ifndef NDEBUG
   int isReadonly   = (flags & SQLITE_OPEN_READONLY);
-#endif
   int isReadWrite  = (flags & SQLITE_OPEN_READWRITE);
 
 #ifndef NDEBUG
@@ -33824,6 +34740,9 @@ static int winOpen(
   ));
 #endif
 
+  OSTRACE(("OPEN name=%s, pFile=%p, flags=%x, pOutFlags=%p\n",
+           zUtf8Name, id, flags, pOutFlags));
+
   /* Check the following statements are true: 
   **
   **   (a) Exactly one of the READWRITE and READONLY flags must be set, and 
@@ -33869,6 +34788,7 @@ static int winOpen(
     memset(zTmpname, 0, MAX_PATH+2);
     rc = getTempname(MAX_PATH+2, zTmpname);
     if( rc!=SQLITE_OK ){
+      OSTRACE(("OPEN name=%s, rc=%s", zUtf8Name, sqlite3ErrName(rc)));
       return rc;
     }
     zUtf8Name = zTmpname;
@@ -33884,11 +34804,13 @@ static int winOpen(
   /* Convert the filename to the system encoding. */
   zConverted = convertUtf8Filename(zUtf8Name);
   if( zConverted==0 ){
+    OSTRACE(("OPEN name=%s, rc=SQLITE_IOERR_NOMEM", zUtf8Name));
     return SQLITE_IOERR_NOMEM;
   }
 
   if( winIsDir(zConverted) ){
     sqlite3_free(zConverted);
+    OSTRACE(("OPEN name=%s, rc=SQLITE_CANTOPEN_ISDIR", zUtf8Name));
     return SQLITE_CANTOPEN_ISDIR;
   }
 
@@ -33979,9 +34901,8 @@ static int winOpen(
 #endif
   logIoerr(cnt);
 
-  OSTRACE(("OPEN %d %s 0x%lx %s\n", 
-           h, zName, dwDesiredAccess, 
-           h==INVALID_HANDLE_VALUE ? "failed" : "ok"));
+  OSTRACE(("OPEN file=%p, name=%s, access=%lx, rc=%s\n", h, zUtf8Name,
+           dwDesiredAccess, (h==INVALID_HANDLE_VALUE) ? "failed" : "ok"));
 
   if( h==INVALID_HANDLE_VALUE ){
     pFile->lastErrno = lastErrno;
@@ -34005,12 +34926,17 @@ static int winOpen(
     }
   }
 
+  OSTRACE(("OPEN file=%p, name=%s, access=%lx, pOutFlags=%p, *pOutFlags=%d, "
+           "rc=%s\n", h, zUtf8Name, dwDesiredAccess, pOutFlags, pOutFlags ?
+           *pOutFlags : 0, (h==INVALID_HANDLE_VALUE) ? "failed" : "ok"));
+
 #if SQLITE_OS_WINCE
   if( isReadWrite && eType==SQLITE_OPEN_MAIN_DB
        && (rc = winceCreateLock(zName, pFile))!=SQLITE_OK
   ){
     osCloseHandle(h);
     sqlite3_free(zConverted);
+    OSTRACE(("OPEN-CE-LOCK name=%s, rc=%s\n", zName, sqlite3ErrName(rc)));
     return rc;
   }
   if( isTemp ){
@@ -34024,11 +34950,21 @@ static int winOpen(
   pFile->pMethod = &winIoMethod;
   pFile->pVfs = pVfs;
   pFile->h = h;
+  if( isReadonly ){
+    pFile->ctrlFlags |= WINFILE_RDONLY;
+  }
   if( sqlite3_uri_boolean(zName, "psow", SQLITE_POWERSAFE_OVERWRITE) ){
     pFile->ctrlFlags |= WINFILE_PSOW;
   }
   pFile->lastErrno = NO_ERROR;
   pFile->zPath = zName;
+#if SQLITE_MAX_MMAP_SIZE>0
+  pFile->hMap = NULL;
+  pFile->pMapRegion = 0;
+  pFile->mmapSize = 0;
+  pFile->mmapSizeActual = 0;
+  pFile->mmapSizeMax = sqlite3GlobalConfig.szMmap;
+#endif
 
   OpenCounter(+1);
   return rc;
@@ -34060,6 +34996,8 @@ static int winDelete(
   UNUSED_PARAMETER(syncDir);
 
   SimulateIOError(return SQLITE_IOERR_DELETE);
+  OSTRACE(("DELETE name=%s, syncDir=%d\n", zFilename, syncDir));
+
   zConverted = convertUtf8Filename(zFilename);
   if( zConverted==0 ){
     return SQLITE_IOERR_NOMEM;
@@ -34145,7 +35083,7 @@ static int winDelete(
     logIoerr(cnt);
   }
   sqlite3_free(zConverted);
-  OSTRACE(("DELETE \"%s\" %s\n", zFilename, (rc ? "failed" : "ok" )));
+  OSTRACE(("DELETE name=%s, rc=%s\n", zFilename, sqlite3ErrName(rc)));
   return rc;
 }
 
@@ -34165,8 +35103,12 @@ static int winAccess(
   UNUSED_PARAMETER(pVfs);
 
   SimulateIOError( return SQLITE_IOERR_ACCESS; );
+  OSTRACE(("ACCESS name=%s, flags=%x, pResOut=%p\n",
+           zFilename, flags, pResOut));
+
   zConverted = convertUtf8Filename(zFilename);
   if( zConverted==0 ){
+    OSTRACE(("ACCESS name=%s, rc=SQLITE_IOERR_NOMEM\n", zFilename));
     return SQLITE_IOERR_NOMEM;
   }
   if( isNT() ){
@@ -34217,6 +35159,8 @@ static int winAccess(
       assert(!"Invalid flags argument");
   }
   *pResOut = rc;
+  OSTRACE(("ACCESS name=%s, pResOut=%p, *pResOut=%d, rc=SQLITE_OK\n",
+           zFilename, pResOut, *pResOut));
   return SQLITE_OK;
 }
 
@@ -34657,7 +35601,6 @@ SQLITE_API int sqlite3_os_init(void){
   ** correctly.  See ticket [bb3a86e890c8e96ab] */
   assert( ArraySize(aSyscall)==74 );
 
-#ifndef SQLITE_OMIT_WAL
   /* get memory map allocation granularity */
   memset(&winSysInfo, 0, sizeof(SYSTEM_INFO));
 #if SQLITE_OS_WINRT
@@ -34665,8 +35608,8 @@ SQLITE_API int sqlite3_os_init(void){
 #else
   osGetSystemInfo(&winSysInfo);
 #endif
-  assert(winSysInfo.dwAllocationGranularity > 0);
-#endif
+  assert( winSysInfo.dwAllocationGranularity>0 );
+  assert( winSysInfo.dwPageSize>0 );
 
   sqlite3_vfs_register(&winVfs, 1);
   return SQLITE_OK; 
@@ -37303,7 +38246,6 @@ SQLITE_PRIVATE int sqlite3RowSetTest(RowSet *pRowSet, u8 iBatch, sqlite3_int64 i
 # define sqlite3WalClose(w,x,y,z)                0
 # define sqlite3WalBeginReadTransaction(y,z)     0
 # define sqlite3WalEndReadTransaction(z)
-# define sqlite3WalRead(v,w,x,y,z)               0
 # define sqlite3WalDbsize(y)                     0
 # define sqlite3WalBeginWriteTransaction(y)      0
 # define sqlite3WalEndWriteTransaction(x)        0
@@ -37316,6 +38258,7 @@ SQLITE_PRIVATE int sqlite3RowSetTest(RowSet *pRowSet, u8 iBatch, sqlite3_int64 i
 # define sqlite3WalExclusiveMode(y,z)            0
 # define sqlite3WalHeapMemory(z)                 0
 # define sqlite3WalFramesize(z)                  0
+# define sqlite3WalFindFrame(x,y,z)              0
 #else
 
 #define WAL_SAVEPOINT_NDATA 4
@@ -37343,7 +38286,8 @@ SQLITE_PRIVATE int sqlite3WalBeginReadTransaction(Wal *pWal, int *);
 SQLITE_PRIVATE void sqlite3WalEndReadTransaction(Wal *pWal);
 
 /* Read a page from the write-ahead log, if it is present. */
-SQLITE_PRIVATE int sqlite3WalRead(Wal *pWal, Pgno pgno, int *pInWal, int nOut, u8 *pOut);
+SQLITE_PRIVATE int sqlite3WalFindFrame(Wal *, Pgno, u32 *);
+SQLITE_PRIVATE int sqlite3WalReadFrame(Wal *, u32, int, u8 *);
 
 /* If the WAL is not empty, return the size of the database. */
 SQLITE_PRIVATE Pgno sqlite3WalDbsize(Wal *pWal);
@@ -38043,6 +38987,11 @@ struct Pager {
   PagerSavepoint *aSavepoint; /* Array of active savepoints */
   int nSavepoint;             /* Number of elements in aSavepoint[] */
   char dbFileVers[16];        /* Changes whenever database file changes */
+
+  u8 bUseFetch;               /* True to use xFetch() */
+  int nMmapOut;               /* Number of mmap pages currently outstanding */
+  sqlite3_int64 szMmap;       /* Desired maximum mmap size */
+  PgHdr *pMmapFreelist;       /* List of free mmap page headers (pDirty) */
   /*
   ** End of the routinely-changing class members
   ***************************************************************************/
@@ -38153,6 +39102,16 @@ static const unsigned char aJournalMagic[] = {
 # define MEMDB pPager->memDb
 #endif
 
+/*
+** The macro USEFETCH is true if we are allowed to use the xFetch and xUnfetch
+** interfaces to access the database using memory-mapped I/O.
+*/
+#if SQLITE_MAX_MMAP_SIZE>0
+# define USEFETCH(x) ((x)->bUseFetch)
+#else
+# define USEFETCH(x) 0
+#endif
+
 /*
 ** The maximum legal page number is (2^31 - 1).
 */
@@ -39640,7 +40599,7 @@ static int pager_playback_one_page(
     i64 ofst = (pgno-1)*(i64)pPager->pageSize;
     testcase( !isSavepnt && pPg!=0 && (pPg->flags&PGHDR_NEED_SYNC)!=0 );
     assert( !pagerUseWal(pPager) );
-    rc = sqlite3OsWrite(pPager->fd, (u8*)aData, pPager->pageSize, ofst);
+    rc = sqlite3OsWrite(pPager->fd, (u8 *)aData, pPager->pageSize, ofst);
     if( pgno>pPager->dbFileSize ){
       pPager->dbFileSize = pgno;
     }
@@ -40031,6 +40990,7 @@ static int pager_playback(Pager *pPager, int isHot){
   int res = 1;             /* Value returned by sqlite3OsAccess() */
   char *zMaster = 0;       /* Name of master journal file if any */
   int needPagerReset;      /* True to reset page prior to first page rollback */
+  int nPlayback = 0;       /* Total number of pages restored from journal */
 
   /* Figure out how many records are in the journal.  Abort early if
   ** the journal is empty.
@@ -40131,7 +41091,9 @@ static int pager_playback(Pager *pPager, int isHot){
         needPagerReset = 0;
       }
       rc = pager_playback_one_page(pPager,&pPager->journalOff,0,1,0);
-      if( rc!=SQLITE_OK ){
+      if( rc==SQLITE_OK ){
+        nPlayback++;
+      }else{
         if( rc==SQLITE_DONE ){
           pPager->journalOff = szJ;
           break;
@@ -40201,6 +41163,10 @@ end_playback:
     rc = pager_delmaster(pPager, zMaster);
     testcase( rc!=SQLITE_OK );
   }
+  if( isHot && nPlayback ){
+    sqlite3_log(SQLITE_NOTICE_RECOVER_ROLLBACK, "recovered %d pages from %s",
+                nPlayback, pPager->zJournal);
+  }
 
   /* The Pager.sectorSize variable may have been updated while rolling
   ** back a journal created by a process with a different sector size
@@ -40222,11 +41188,10 @@ end_playback:
 ** If an IO error occurs, then the IO error is returned to the caller.
 ** Otherwise, SQLITE_OK is returned.
 */
-static int readDbPage(PgHdr *pPg){
+static int readDbPage(PgHdr *pPg, u32 iFrame){
   Pager *pPager = pPg->pPager; /* Pager object associated with page pPg */
   Pgno pgno = pPg->pgno;       /* Page number to read */
   int rc = SQLITE_OK;          /* Return code */
-  int isInWal = 0;             /* True if page is in log file */
   int pgsz = pPager->pageSize; /* Number of bytes to read */
 
   assert( pPager->eState>=PAGER_READER && !MEMDB );
@@ -40238,11 +41203,13 @@ static int readDbPage(PgHdr *pPg){
     return SQLITE_OK;
   }
 
-  if( pagerUseWal(pPager) ){
+#ifndef SQLITE_OMIT_WAL
+  if( iFrame ){
     /* Try to pull the page from the write-ahead log. */
-    rc = sqlite3WalRead(pPager->pWal, pgno, &isInWal, pgsz, pPg->pData);
-  }
-  if( rc==SQLITE_OK && !isInWal ){
+    rc = sqlite3WalReadFrame(pPager->pWal, iFrame, pgsz, pPg->pData);
+  }else
+#endif
+  {
     i64 iOffset = (pgno-1)*(i64)pPager->pageSize;
     rc = sqlite3OsRead(pPager->fd, pPg->pData, pgsz, iOffset);
     if( rc==SQLITE_IOERR_SHORT_READ ){
@@ -40321,12 +41288,17 @@ static int pagerUndoCallback(void *pCtx, Pgno iPg){
   Pager *pPager = (Pager *)pCtx;
   PgHdr *pPg;
 
+  assert( pagerUseWal(pPager) );
   pPg = sqlite3PagerLookup(pPager, iPg);
   if( pPg ){
     if( sqlite3PcachePageRefcount(pPg)==1 ){
       sqlite3PcacheDrop(pPg);
     }else{
-      rc = readDbPage(pPg);
+      u32 iFrame = 0;
+      rc = sqlite3WalFindFrame(pPager->pWal, pPg->pgno, &iFrame);
+      if( rc==SQLITE_OK ){
+        rc = readDbPage(pPg, iFrame);
+      }
       if( rc==SQLITE_OK ){
         pPager->xReiniter(pPg);
       }
@@ -40470,6 +41442,7 @@ static int pagerBeginReadTransaction(Pager *pPager){
   rc = sqlite3WalBeginReadTransaction(pPager->pWal, &changed);
   if( rc!=SQLITE_OK || changed ){
     pager_reset(pPager);
+    if( USEFETCH(pPager) ) sqlite3OsUnfetch(pPager->fd, 0, 0);
   }
 
   return rc;
@@ -40731,6 +41704,29 @@ SQLITE_PRIVATE void sqlite3PagerSetCachesize(Pager *pPager, int mxPage){
   sqlite3PcacheSetCachesize(pPager->pPCache, mxPage);
 }
 
+/*
+** Invoke SQLITE_FCNTL_MMAP_SIZE based on the current value of szMmap.
+*/
+static void pagerFixMaplimit(Pager *pPager){
+#if SQLITE_MAX_MMAP_SIZE>0
+  sqlite3_file *fd = pPager->fd;
+  if( isOpen(fd) ){
+    sqlite3_int64 sz;
+    pPager->bUseFetch = (fd->pMethods->iVersion>=3) && pPager->szMmap>0;
+    sz = pPager->szMmap;
+    sqlite3OsFileControlHint(pPager->fd, SQLITE_FCNTL_MMAP_SIZE, &sz);
+  }
+#endif
+}
+
+/*
+** Change the maximum size of any memory mapping made of the database file.
+*/
+SQLITE_PRIVATE void sqlite3PagerSetMmapLimit(Pager *pPager, sqlite3_int64 szMmap){
+  pPager->szMmap = szMmap;
+  pagerFixMaplimit(pPager);
+}
+
 /*
 ** Free as much memory as possible from the pager.
 */
@@ -40966,6 +41962,7 @@ SQLITE_PRIVATE int sqlite3PagerSetPagesize(Pager *pPager, u32 *pPageSize, int nR
     assert( nReserve>=0 && nReserve<1000 );
     pPager->nReserve = (i16)nReserve;
     pagerReportSize(pPager);
+    pagerFixMaplimit(pPager);
   }
   return rc;
 }
@@ -41191,6 +42188,81 @@ static int pagerSyncHotJournal(Pager *pPager){
   return rc;
 }
 
+/*
+** Obtain a reference to a memory mapped page object for page number pgno. 
+** The new object will use the pointer pData, obtained from xFetch().
+** If successful, set *ppPage to point to the new page reference
+** and return SQLITE_OK. Otherwise, return an SQLite error code and set
+** *ppPage to zero.
+**
+** Page references obtained by calling this function should be released
+** by calling pagerReleaseMapPage().
+*/
+static int pagerAcquireMapPage(
+  Pager *pPager,                  /* Pager object */
+  Pgno pgno,                      /* Page number */
+  void *pData,                    /* xFetch()'d data for this page */
+  PgHdr **ppPage                  /* OUT: Acquired page object */
+){
+  PgHdr *p;                       /* Memory mapped page to return */
+
+  if( pPager->pMmapFreelist ){
+    *ppPage = p = pPager->pMmapFreelist;
+    pPager->pMmapFreelist = p->pDirty;
+    p->pDirty = 0;
+    memset(p->pExtra, 0, pPager->nExtra);
+  }else{
+    *ppPage = p = (PgHdr *)sqlite3MallocZero(sizeof(PgHdr) + pPager->nExtra);
+    if( p==0 ){
+      sqlite3OsUnfetch(pPager->fd, (i64)(pgno-1) * pPager->pageSize, pData);
+      return SQLITE_NOMEM;
+    }
+    p->pExtra = (void *)&p[1];
+    p->flags = PGHDR_MMAP;
+    p->nRef = 1;
+    p->pPager = pPager;
+  }
+
+  assert( p->pExtra==(void *)&p[1] );
+  assert( p->pPage==0 );
+  assert( p->flags==PGHDR_MMAP );
+  assert( p->pPager==pPager );
+  assert( p->nRef==1 );
+
+  p->pgno = pgno;
+  p->pData = pData;
+  pPager->nMmapOut++;
+
+  return SQLITE_OK;
+}
+
+/*
+** Release a reference to page pPg. pPg must have been returned by an 
+** earlier call to pagerAcquireMapPage().
+*/
+static void pagerReleaseMapPage(PgHdr *pPg){
+  Pager *pPager = pPg->pPager;
+  pPager->nMmapOut--;
+  pPg->pDirty = pPager->pMmapFreelist;
+  pPager->pMmapFreelist = pPg;
+
+  assert( pPager->fd->pMethods->iVersion>=3 );
+  sqlite3OsUnfetch(pPager->fd, (i64)(pPg->pgno-1)*pPager->pageSize, pPg->pData);
+}
+
+/*
+** Free all PgHdr objects stored in the Pager.pMmapFreelist list.
+*/
+static void pagerFreeMapHdrs(Pager *pPager){
+  PgHdr *p;
+  PgHdr *pNext;
+  for(p=pPager->pMmapFreelist; p; p=pNext){
+    pNext = p->pDirty;
+    sqlite3_free(p);
+  }
+}
+
+
 /*
 ** Shutdown the page cache.  Free all memory and close all files.
 **
@@ -41211,6 +42283,7 @@ SQLITE_PRIVATE int sqlite3PagerClose(Pager *pPager){
   assert( assert_pager_state(pPager) );
   disable_simulated_io_errors();
   sqlite3BeginBenignMalloc();
+  pagerFreeMapHdrs(pPager);
   /* pPager->errCode = 0; */
   pPager->exclusiveMode = 0;
 #ifndef SQLITE_OMIT_WAL
@@ -41472,7 +42545,9 @@ static int pager_write_pagelist(Pager *pPager, PgHdr *pList){
   ** file size will be.
   */
   assert( rc!=SQLITE_OK || isOpen(pPager->fd) );
-  if( rc==SQLITE_OK && pPager->dbSize>pPager->dbHintSize ){
+  if( rc==SQLITE_OK 
+   && (pList->pDirty ? pPager->dbSize : pList->pgno+1)>pPager->dbHintSize 
+  ){
     sqlite3_int64 szFile = pPager->pageSize * (sqlite3_int64)pPager->dbSize;
     sqlite3OsFileControlHint(pPager->fd, SQLITE_FCNTL_SIZE_HINT, &szFile);
     pPager->dbHintSize = pPager->dbSize;
@@ -42026,6 +43101,7 @@ SQLITE_PRIVATE int sqlite3PagerOpen(
   /* pPager->pBusyHandlerArg = 0; */
   pPager->xReiniter = xReinit;
   /* memset(pPager->aHash, 0, sizeof(pPager->aHash)); */
+  /* pPager->szMmap = SQLITE_DEFAULT_MMAP_SIZE // will be set by btree.c */
 
   *ppPager = pPager;
   return SQLITE_OK;
@@ -42317,9 +43393,11 @@ SQLITE_PRIVATE int sqlite3PagerSharedLock(Pager *pPager){
       );
     }
 
-    if( !pPager->tempFile 
-     && (pPager->pBackup || sqlite3PcachePagecount(pPager->pPCache)>0) 
-    ){
+    if( !pPager->tempFile && (
+        pPager->pBackup 
+     || sqlite3PcachePagecount(pPager->pPCache)>0 
+     || USEFETCH(pPager)
+    )){
       /* The shared-lock has just been acquired on the database file
       ** and there are already pages in the cache (from a previous
       ** read or write transaction).  Check to see if the database
@@ -42345,7 +43423,7 @@ SQLITE_PRIVATE int sqlite3PagerSharedLock(Pager *pPager){
       if( nPage>0 ){
         IOTRACE(("CKVERS %p %d\n", pPager, sizeof(dbFileVers)));
         rc = sqlite3OsRead(pPager->fd, &dbFileVers, sizeof(dbFileVers), 24);
-        if( rc!=SQLITE_OK ){
+        if( rc!=SQLITE_OK && rc!=SQLITE_IOERR_SHORT_READ ){
           goto failed;
         }
       }else{
@@ -42354,6 +43432,16 @@ SQLITE_PRIVATE int sqlite3PagerSharedLock(Pager *pPager){
 
       if( memcmp(pPager->dbFileVers, dbFileVers, sizeof(dbFileVers))!=0 ){
         pager_reset(pPager);
+
+        /* Unmap the database file. It is possible that external processes
+        ** may have truncated the database file and then extended it back
+        ** to its original size while this process was not holding a lock.
+        ** In this case there may exist a Pager.pMap mapping that appears
+        ** to be the right size but is not actually valid. Avoid this
+        ** possibility by unmapping the db here. */
+        if( USEFETCH(pPager) ){
+          sqlite3OsUnfetch(pPager->fd, 0, 0);
+        }
       }
     }
 
@@ -42395,7 +43483,7 @@ SQLITE_PRIVATE int sqlite3PagerSharedLock(Pager *pPager){
 ** nothing to rollback, so this routine is a no-op.
 */ 
 static void pagerUnlockIfUnused(Pager *pPager){
-  if( (sqlite3PcacheRefCount(pPager->pPCache)==0) ){
+  if( pPager->nMmapOut==0 && (sqlite3PcacheRefCount(pPager->pPCache)==0) ){
     pagerUnlockAndRollback(pPager);
   }
 }
@@ -42454,13 +43542,27 @@ SQLITE_PRIVATE int sqlite3PagerAcquire(
   Pager *pPager,      /* The pager open on the database file */
   Pgno pgno,          /* Page number to fetch */
   DbPage **ppPage,    /* Write a pointer to the page here */
-  int noContent       /* Do not bother reading content from disk if true */
+  int flags           /* PAGER_ACQUIRE_XXX flags */
 ){
-  int rc;
-  PgHdr *pPg;
+  int rc = SQLITE_OK;
+  PgHdr *pPg = 0;
+  u32 iFrame = 0;                 /* Frame to read from WAL file */
+  const int noContent = (flags & PAGER_ACQUIRE_NOCONTENT);
+
+  /* It is acceptable to use a read-only (mmap) page for any page except
+  ** page 1 if there is no write-transaction open or the ACQUIRE_READONLY
+  ** flag was specified by the caller. And so long as the db is not a 
+  ** temporary or in-memory database.  */
+  const int bMmapOk = (pgno!=1 && USEFETCH(pPager)
+   && (pPager->eState==PAGER_READER || (flags & PAGER_ACQUIRE_READONLY))
+#ifdef SQLITE_HAS_CODEC
+   && pPager->xCodec==0
+#endif
+  );
 
   assert( pPager->eState>=PAGER_READER );
   assert( assert_pager_state(pPager) );
+  assert( noContent==0 || bMmapOk==0 );
 
   if( pgno==0 ){
     return SQLITE_CORRUPT_BKPT;
@@ -42471,6 +43573,39 @@ SQLITE_PRIVATE int sqlite3PagerAcquire(
   if( pPager->errCode!=SQLITE_OK ){
     rc = pPager->errCode;
   }else{
+
+    if( bMmapOk && pagerUseWal(pPager) ){
+      rc = sqlite3WalFindFrame(pPager->pWal, pgno, &iFrame);
+      if( rc!=SQLITE_OK ) goto pager_acquire_err;
+    }
+
+    if( iFrame==0 && bMmapOk ){
+      void *pData = 0;
+
+      rc = sqlite3OsFetch(pPager->fd, 
+          (i64)(pgno-1) * pPager->pageSize, pPager->pageSize, &pData
+      );
+
+      if( rc==SQLITE_OK && pData ){
+        if( pPager->eState>PAGER_READER ){
+          (void)sqlite3PcacheFetch(pPager->pPCache, pgno, 0, &pPg);
+        }
+        if( pPg==0 ){
+          rc = pagerAcquireMapPage(pPager, pgno, pData, &pPg);
+        }else{
+          sqlite3OsUnfetch(pPager->fd, (i64)(pgno-1)*pPager->pageSize, pData);
+        }
+        if( pPg ){
+          assert( rc==SQLITE_OK );
+          *ppPage = pPg;
+          return SQLITE_OK;
+        }
+      }
+      if( rc!=SQLITE_OK ){
+        goto pager_acquire_err;
+      }
+    }
+
     rc = sqlite3PcacheFetch(pPager->pPCache, pgno, 1, ppPage);
   }
 
@@ -42529,9 +43664,13 @@ SQLITE_PRIVATE int sqlite3PagerAcquire(
       memset(pPg->pData, 0, pPager->pageSize);
       IOTRACE(("ZERO %p %d\n", pPager, pgno));
     }else{
+      if( pagerUseWal(pPager) && bMmapOk==0 ){
+        rc = sqlite3WalFindFrame(pPager->pWal, pgno, &iFrame);
+        if( rc!=SQLITE_OK ) goto pager_acquire_err;
+      }
       assert( pPg->pPager==pPager );
       pPager->aStat[PAGER_STAT_MISS]++;
-      rc = readDbPage(pPg);
+      rc = readDbPage(pPg, iFrame);
       if( rc!=SQLITE_OK ){
         goto pager_acquire_err;
       }
@@ -42584,7 +43723,11 @@ SQLITE_PRIVATE DbPage *sqlite3PagerLookup(Pager *pPager, Pgno pgno){
 SQLITE_PRIVATE void sqlite3PagerUnref(DbPage *pPg){
   if( pPg ){
     Pager *pPager = pPg->pPager;
-    sqlite3PcacheRelease(pPg);
+    if( pPg->flags & PGHDR_MMAP ){
+      pagerReleaseMapPage(pPg);
+    }else{
+      sqlite3PcacheRelease(pPg);
+    }
     pagerUnlockIfUnused(pPager);
   }
 }
@@ -42919,6 +44062,7 @@ SQLITE_PRIVATE int sqlite3PagerWrite(DbPage *pDbPage){
   Pager *pPager = pPg->pPager;
   Pgno nPagePerSector = (pPager->sectorSize/pPager->pageSize);
 
+  assert( (pPg->flags & PGHDR_MMAP)==0 );
   assert( pPager->eState>=PAGER_WRITER_LOCKED );
   assert( pPager->eState!=PAGER_ERROR );
   assert( assert_pager_state(pPager) );
@@ -43118,6 +44262,11 @@ static int pager_incr_changecounter(Pager *pPager, int isDirectMode){
           pPager->aStat[PAGER_STAT_WRITE]++;
         }
         if( rc==SQLITE_OK ){
+          /* Update the pager's copy of the change-counter. Otherwise, the
+          ** next time a read transaction is opened the cache will be
+          ** flushed (as the change-counter values will not match).  */
+          const void *pCopy = (const void *)&((const char *)zBuf)[24];
+          memcpy(&pPager->dbFileVers, pCopy, sizeof(pPager->dbFileVers));
           pPager->changeCountDone = 1;
         }
       }else{
@@ -43475,7 +44624,7 @@ SQLITE_PRIVATE int sqlite3PagerRollback(Pager *pPager){
   }
 
   assert( pPager->eState==PAGER_READER || rc!=SQLITE_OK );
-  assert( rc==SQLITE_OK || rc==SQLITE_FULL
+  assert( rc==SQLITE_OK || rc==SQLITE_FULL || rc==SQLITE_CORRUPT
           || rc==SQLITE_NOMEM || (rc&0xFF)==SQLITE_IOERR );
 
   /* If an error occurs during a ROLLBACK, we can no longer trust the pager
@@ -44209,11 +45358,12 @@ static int pagerOpenWal(Pager *pPager){
   ** (e.g. due to malloc() failure), return an error code.
   */
   if( rc==SQLITE_OK ){
-    rc = sqlite3WalOpen(pPager->pVfs, 
+    rc = sqlite3WalOpen(pPager->pVfs,
         pPager->fd, pPager->zWal, pPager->exclusiveMode,
         pPager->journalSizeLimit, &pPager->pWal
     );
   }
+  pagerFixMaplimit(pPager);
 
   return rc;
 }
@@ -44304,6 +45454,7 @@ SQLITE_PRIVATE int sqlite3PagerCloseWal(Pager *pPager){
       rc = sqlite3WalClose(pPager->pWal, pPager->ckptSyncFlags,
                            pPager->pageSize, (u8*)pPager->pTmpSpace);
       pPager->pWal = 0;
+      pagerFixMaplimit(pPager);
     }
   }
   return rc;
@@ -45552,8 +46703,9 @@ finished:
     ** checkpointing the log file.
     */
     if( pWal->hdr.nPage ){
-      sqlite3_log(SQLITE_OK, "Recovered %d frames from WAL file %s",
-          pWal->hdr.nPage, pWal->zWalName
+      sqlite3_log(SQLITE_NOTICE_RECOVER_WAL,
+          "recovered %d frames from WAL file %s",
+          pWal->hdr.mxFrame, pWal->zWalName
       );
     }
   }
@@ -46067,8 +47219,8 @@ static int walCheckpoint(
       rc = sqlite3OsSync(pWal->pWalFd, sync_flags);
     }
 
-    /* If the database file may grow as a result of this checkpoint, hint
-    ** about the eventual size of the db file to the VFS layer. 
+    /* If the database may grow as a result of this checkpoint, hint
+    ** about the eventual size of the db file to the VFS layer.
     */
     if( rc==SQLITE_OK ){
       i64 nReq = ((i64)mxPage * szPage);
@@ -46078,6 +47230,7 @@ static int walCheckpoint(
       }
     }
 
+
     /* Iterate through the contents of the WAL, copying data to the db file. */
     while( rc==SQLITE_OK && 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){
       i64 iOffset;
@@ -46632,19 +47785,17 @@ SQLITE_PRIVATE void sqlite3WalEndReadTransaction(Wal *pWal){
 }
 
 /*
-** Read a page from the WAL, if it is present in the WAL and if the 
-** current read transaction is configured to use the WAL.  
+** Search the wal file for page pgno. If found, set *piRead to the frame that
+** contains the page. Otherwise, if pgno is not in the wal file, set *piRead
+** to zero.
 **
-** The *pInWal is set to 1 if the requested page is in the WAL and
-** has been loaded.  Or *pInWal is set to 0 if the page was not in 
-** the WAL and needs to be read out of the database.
+** Return SQLITE_OK if successful, or an error code if an error occurs. If an
+** error does occur, the final value of *piRead is undefined.
 */
-SQLITE_PRIVATE int sqlite3WalRead(
+SQLITE_PRIVATE int sqlite3WalFindFrame(
   Wal *pWal,                      /* WAL handle */
   Pgno pgno,                      /* Database page number to read data for */
-  int *pInWal,                    /* OUT: True if data is read from WAL */
-  int nOut,                       /* Size of buffer pOut in bytes */
-  u8 *pOut                        /* Buffer to write page data to */
+  u32 *piRead                     /* OUT: Frame number (or zero) */
 ){
   u32 iRead = 0;                  /* If !=0, WAL frame to return data from */
   u32 iLast = pWal->hdr.mxFrame;  /* Last page in WAL for this reader */
@@ -46660,7 +47811,7 @@ SQLITE_PRIVATE int sqlite3WalRead(
   ** WAL were empty.
   */
   if( iLast==0 || pWal->readLock==0 ){
-    *pInWal = 0;
+    *piRead = 0;
     return SQLITE_OK;
   }
 
@@ -46731,26 +47882,31 @@ SQLITE_PRIVATE int sqlite3WalRead(
   }
 #endif
 
-  /* If iRead is non-zero, then it is the log frame number that contains the
-  ** required page. Read and return data from the log file.
-  */
-  if( iRead ){
-    int sz;
-    i64 iOffset;
-    sz = pWal->hdr.szPage;
-    sz = (sz&0xfe00) + ((sz&0x0001)<<16);
-    testcase( sz<=32768 );
-    testcase( sz>=65536 );
-    iOffset = walFrameOffset(iRead, sz) + WAL_FRAME_HDRSIZE;
-    *pInWal = 1;
-    /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */
-    return sqlite3OsRead(pWal->pWalFd, pOut, (nOut>sz ? sz : nOut), iOffset);
-  }
-
-  *pInWal = 0;
+  *piRead = iRead;
   return SQLITE_OK;
 }
 
+/*
+** Read the contents of frame iRead from the wal file into buffer pOut
+** (which is nOut bytes in size). Return SQLITE_OK if successful, or an
+** error code otherwise.
+*/
+SQLITE_PRIVATE int sqlite3WalReadFrame(
+  Wal *pWal,                      /* WAL handle */
+  u32 iRead,                      /* Frame to read */
+  int nOut,                       /* Size of buffer pOut in bytes */
+  u8 *pOut                        /* Buffer to write page data to */
+){
+  int sz;
+  i64 iOffset;
+  sz = pWal->hdr.szPage;
+  sz = (sz&0xfe00) + ((sz&0x0001)<<16);
+  testcase( sz<=32768 );
+  testcase( sz>=65536 );
+  iOffset = walFrameOffset(iRead, sz) + WAL_FRAME_HDRSIZE;
+  /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */
+  return sqlite3OsRead(pWal->pWalFd, pOut, (nOut>sz ? sz : nOut), iOffset);
+}
 
 /* 
 ** Return the size of the database in pages (or zero, if unknown).
@@ -47297,6 +48453,9 @@ SQLITE_PRIVATE int sqlite3WalCheckpoint(
   /* Read the wal-index header. */
   if( rc==SQLITE_OK ){
     rc = walIndexReadHdr(pWal, &isChanged);
+    if( isChanged && pWal->pDbFd->pMethods->iVersion>=3 ){
+      sqlite3OsUnfetch(pWal->pDbFd, 0, 0);
+    }
   }
 
   /* Copy data from the log to the database file. */
@@ -49968,13 +51127,17 @@ static int btreeGetPage(
   BtShared *pBt,       /* The btree */
   Pgno pgno,           /* Number of the page to fetch */
   MemPage **ppPage,    /* Return the page in this parameter */
-  int noContent        /* Do not load page content if true */
+  int noContent,       /* Do not load page content if true */
+  int bReadonly        /* True if a read-only (mmap) page is ok */
 ){
   int rc;
   DbPage *pDbPage;
+  int flags = (noContent ? PAGER_ACQUIRE_NOCONTENT : 0) 
+            | (bReadonly ? PAGER_ACQUIRE_READONLY : 0);
 
+  assert( noContent==0 || bReadonly==0 );
   assert( sqlite3_mutex_held(pBt->mutex) );
-  rc = sqlite3PagerAcquire(pBt->pPager, pgno, (DbPage**)&pDbPage, noContent);
+  rc = sqlite3PagerAcquire(pBt->pPager, pgno, (DbPage**)&pDbPage, flags);
   if( rc ) return rc;
   *ppPage = btreePageFromDbPage(pDbPage, pgno, pBt);
   return SQLITE_OK;
@@ -50017,9 +51180,10 @@ SQLITE_PRIVATE u32 sqlite3BtreeLastPage(Btree *p){
 ** may remain unchanged, or it may be set to an invalid value.
 */
 static int getAndInitPage(
-  BtShared *pBt,          /* The database file */
-  Pgno pgno,           /* Number of the page to get */
-  MemPage **ppPage     /* Write the page pointer here */
+  BtShared *pBt,                  /* The database file */
+  Pgno pgno,                      /* Number of the page to get */
+  MemPage **ppPage,               /* Write the page pointer here */
+  int bReadonly                   /* True if a read-only (mmap) page is ok */
 ){
   int rc;
   assert( sqlite3_mutex_held(pBt->mutex) );
@@ -50027,7 +51191,7 @@ static int getAndInitPage(
   if( pgno>btreePagecount(pBt) ){
     rc = SQLITE_CORRUPT_BKPT;
   }else{
-    rc = btreeGetPage(pBt, pgno, ppPage, 0);
+    rc = btreeGetPage(pBt, pgno, ppPage, 0, bReadonly);
     if( rc==SQLITE_OK ){
       rc = btreeInitPage(*ppPage);
       if( rc!=SQLITE_OK ){
@@ -50258,6 +51422,7 @@ SQLITE_PRIVATE int sqlite3BtreeOpen(
     rc = sqlite3PagerOpen(pVfs, &pBt->pPager, zFilename,
                           EXTRA_SIZE, flags, vfsFlags, pageReinit);
     if( rc==SQLITE_OK ){
+      sqlite3PagerSetMmapLimit(pBt->pPager, db->szMmap);
       rc = sqlite3PagerReadFileheader(pBt->pPager,sizeof(zDbHeader),zDbHeader);
     }
     if( rc!=SQLITE_OK ){
@@ -50524,6 +51689,19 @@ SQLITE_PRIVATE int sqlite3BtreeSetCacheSize(Btree *p, int mxPage){
   return SQLITE_OK;
 }
 
+/*
+** Change the limit on the amount of the database file that may be
+** memory mapped.
+*/
+SQLITE_PRIVATE int sqlite3BtreeSetMmapLimit(Btree *p, sqlite3_int64 szMmap){
+  BtShared *pBt = p->pBt;
+  assert( sqlite3_mutex_held(p->db->mutex) );
+  sqlite3BtreeEnter(p);
+  sqlite3PagerSetMmapLimit(pBt->pPager, szMmap);
+  sqlite3BtreeLeave(p);
+  return SQLITE_OK;
+}
+
 /*
 ** Change the way data is synced to disk in order to increase or decrease
 ** how well the database resists damage due to OS crashes and power
@@ -50749,7 +51927,7 @@ static int lockBtree(BtShared *pBt){
   assert( pBt->pPage1==0 );
   rc = sqlite3PagerSharedLock(pBt->pPager);
   if( rc!=SQLITE_OK ) return rc;
-  rc = btreeGetPage(pBt, 1, &pPage1, 0);
+  rc = btreeGetPage(pBt, 1, &pPage1, 0, 0);
   if( rc!=SQLITE_OK ) return rc;
 
   /* Do some checking to help insure the file we opened really is
@@ -50885,6 +52063,29 @@ page1_init_failed:
   return rc;
 }
 
+#ifndef NDEBUG
+/*
+** Return the number of cursors open on pBt. This is for use
+** in assert() expressions, so it is only compiled if NDEBUG is not
+** defined.
+**
+** Only write cursors are counted if wrOnly is true.  If wrOnly is
+** false then all cursors are counted.
+**
+** For the purposes of this routine, a cursor is any cursor that
+** is capable of reading or writing to the databse.  Cursors that
+** have been tripped into the CURSOR_FAULT state are not counted.
+*/
+static int countValidCursors(BtShared *pBt, int wrOnly){
+  BtCursor *pCur;
+  int r = 0;
+  for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){
+    if( (wrOnly==0 || pCur->wrFlag) && pCur->eState!=CURSOR_FAULT ) r++; 
+  }
+  return r;
+}
+#endif
+
 /*
 ** If there are no outstanding cursors and we are not in the middle
 ** of a transaction but there is a read lock on the database, then
@@ -50895,7 +52096,7 @@ page1_init_failed:
 */
 static void unlockBtreeIfUnused(BtShared *pBt){
   assert( sqlite3_mutex_held(pBt->mutex) );
-  assert( pBt->pCursor==0 || pBt->inTransaction>TRANS_NONE );
+  assert( countValidCursors(pBt,0)==0 || pBt->inTransaction>TRANS_NONE );
   if( pBt->inTransaction==TRANS_NONE && pBt->pPage1!=0 ){
     assert( pBt->pPage1->aData );
     assert( sqlite3PagerRefcount(pBt->pPager)==1 );
@@ -51308,7 +52509,7 @@ static int relocatePage(
   ** iPtrPage.
   */
   if( eType!=PTRMAP_ROOTPAGE ){
-    rc = btreeGetPage(pBt, iPtrPage, &pPtrPage, 0);
+    rc = btreeGetPage(pBt, iPtrPage, &pPtrPage, 0, 0);
     if( rc!=SQLITE_OK ){
       return rc;
     }
@@ -51392,7 +52593,7 @@ static int incrVacuumStep(BtShared *pBt, Pgno nFin, Pgno iLastPg, int bCommit){
       u8 eMode = BTALLOC_ANY;   /* Mode parameter for allocateBtreePage() */
       Pgno iNear = 0;           /* nearby parameter for allocateBtreePage() */
 
-      rc = btreeGetPage(pBt, iLastPg, &pLastPg, 0);
+      rc = btreeGetPage(pBt, iLastPg, &pLastPg, 0, 0);
       if( rc!=SQLITE_OK ){
         return rc;
       }
@@ -51484,8 +52685,11 @@ SQLITE_PRIVATE int sqlite3BtreeIncrVacuum(Btree *p){
     if( nOrig<nFin ){
       rc = SQLITE_CORRUPT_BKPT;
     }else if( nFree>0 ){
-      invalidateAllOverflowCache(pBt);
-      rc = incrVacuumStep(pBt, nFin, nOrig, 0);
+      rc = saveAllCursors(pBt, 0, 0);
+      if( rc==SQLITE_OK ){
+        invalidateAllOverflowCache(pBt);
+        rc = incrVacuumStep(pBt, nFin, nOrig, 0);
+      }
       if( rc==SQLITE_OK ){
         rc = sqlite3PagerWrite(pBt->pPage1->pDbPage);
         put4byte(&pBt->pPage1->aData[28], pBt->nPage);
@@ -51533,7 +52737,9 @@ static int autoVacuumCommit(BtShared *pBt){
     nFree = get4byte(&pBt->pPage1->aData[36]);
     nFin = finalDbSize(pBt, nOrig, nFree);
     if( nFin>nOrig ) return SQLITE_CORRUPT_BKPT;
-
+    if( nFin<nOrig ){
+      rc = saveAllCursors(pBt, 0, 0);
+    }
     for(iFree=nOrig; iFree>nFin && rc==SQLITE_OK; iFree--){
       rc = incrVacuumStep(pBt, nFin, iFree, 1);
     }
@@ -51550,7 +52756,7 @@ static int autoVacuumCommit(BtShared *pBt){
     }
   }
 
-  assert( nRef==sqlite3PagerRefcount(pPager) );
+  assert( nRef>=sqlite3PagerRefcount(pPager) );
   return rc;
 }
 
@@ -51618,7 +52824,6 @@ static void btreeEndTransaction(Btree *p){
 #ifndef SQLITE_OMIT_AUTOVACUUM
   pBt->bDoTruncate = 0;
 #endif
-  btreeClearHasContent(pBt);
   if( p->inTrans>TRANS_NONE && p->db->activeVdbeCnt>1 ){
     /* If there are other active statements that belong to this database
     ** handle, downgrade to a read-only transaction. The other statements
@@ -51693,6 +52898,7 @@ SQLITE_PRIVATE int sqlite3BtreeCommitPhaseTwo(Btree *p, int bCleanup){
       return rc;
     }
     pBt->inTransaction = TRANS_READ;
+    btreeClearHasContent(pBt);
   }
 
   btreeEndTransaction(p);
@@ -51714,27 +52920,6 @@ SQLITE_PRIVATE int sqlite3BtreeCommit(Btree *p){
   return rc;
 }
 
-#ifndef NDEBUG
-/*
-** Return the number of write-cursors open on this handle. This is for use
-** in assert() expressions, so it is only compiled if NDEBUG is not
-** defined.
-**
-** For the purposes of this routine, a write-cursor is any cursor that
-** is capable of writing to the databse.  That means the cursor was
-** originally opened for writing and the cursor has not be disabled
-** by having its state changed to CURSOR_FAULT.
-*/
-static int countWriteCursors(BtShared *pBt){
-  BtCursor *pCur;
-  int r = 0;
-  for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){
-    if( pCur->wrFlag && pCur->eState!=CURSOR_FAULT ) r++; 
-  }
-  return r;
-}
-#endif
-
 /*
 ** This routine sets the state to CURSOR_FAULT and the error
 ** code to errCode for every cursor on BtShared that pBtree
@@ -51806,7 +52991,7 @@ SQLITE_PRIVATE int sqlite3BtreeRollback(Btree *p, int tripCode){
     /* The rollback may have destroyed the pPage1->aData value.  So
     ** call btreeGetPage() on page 1 again to make
     ** sure pPage1->aData is set correctly. */
-    if( btreeGetPage(pBt, 1, &pPage1, 0)==SQLITE_OK ){
+    if( btreeGetPage(pBt, 1, &pPage1, 0, 0)==SQLITE_OK ){
       int nPage = get4byte(28+(u8*)pPage1->aData);
       testcase( nPage==0 );
       if( nPage==0 ) sqlite3PagerPagecount(pBt->pPager, &nPage);
@@ -51814,8 +52999,9 @@ SQLITE_PRIVATE int sqlite3BtreeRollback(Btree *p, int tripCode){
       pBt->nPage = nPage;
       releasePage(pPage1);
     }
-    assert( countWriteCursors(pBt)==0 );
+    assert( countValidCursors(pBt, 1)==0 );
     pBt->inTransaction = TRANS_READ;
+    btreeClearHasContent(pBt);
   }
 
   btreeEndTransaction(p);
@@ -52240,7 +53426,7 @@ static int getOverflowPage(
 
   assert( next==0 || rc==SQLITE_DONE );
   if( rc==SQLITE_OK ){
-    rc = btreeGetPage(pBt, ovfl, &pPage, 0);
+    rc = btreeGetPage(pBt, ovfl, &pPage, 0, (ppPage==0));
     assert( rc==SQLITE_OK || pPage==0 );
     if( rc==SQLITE_OK ){
       next = get4byte(pPage->aData);
@@ -52461,7 +53647,9 @@ static int accessPayload(
 
         {
           DbPage *pDbPage;
-          rc = sqlite3PagerGet(pBt->pPager, nextPage, &pDbPage);
+          rc = sqlite3PagerAcquire(pBt->pPager, nextPage, &pDbPage,
+              (eOp==0 ? PAGER_ACQUIRE_READONLY : 0)
+          );
           if( rc==SQLITE_OK ){
             aPayload = sqlite3PagerGetData(pDbPage);
             nextPage = get4byte(aPayload);
@@ -52640,10 +53828,11 @@ static int moveToChild(BtCursor *pCur, u32 newPgno){
   assert( cursorHoldsMutex(pCur) );
   assert( pCur->eState==CURSOR_VALID );
   assert( pCur->iPage<BTCURSOR_MAX_DEPTH );
+  assert( pCur->iPage>=0 );
   if( pCur->iPage>=(BTCURSOR_MAX_DEPTH-1) ){
     return SQLITE_CORRUPT_BKPT;
   }
-  rc = getAndInitPage(pBt, newPgno, &pNewPage);
+  rc = getAndInitPage(pBt, newPgno, &pNewPage, (pCur->wrFlag==0));
   if( rc ) return rc;
   pCur->apPage[i+1] = pNewPage;
   pCur->aiIdx[i+1] = 0;
@@ -52760,7 +53949,7 @@ static int moveToRoot(BtCursor *pCur){
     pCur->eState = CURSOR_INVALID;
     return SQLITE_OK;
   }else{
-    rc = getAndInitPage(pBt, pCur->pgnoRoot, &pCur->apPage[0]);
+    rc = getAndInitPage(pBt, pCur->pgnoRoot, &pCur->apPage[0], pCur->wrFlag==0);
     if( rc!=SQLITE_OK ){
       pCur->eState = CURSOR_INVALID;
       return rc;
@@ -53374,7 +54563,7 @@ static int allocateBtreePage(
       if( iTrunk>mxPage ){
         rc = SQLITE_CORRUPT_BKPT;
       }else{
-        rc = btreeGetPage(pBt, iTrunk, &pTrunk, 0);
+        rc = btreeGetPage(pBt, iTrunk, &pTrunk, 0, 0);
       }
       if( rc ){
         pTrunk = 0;
@@ -53438,7 +54627,7 @@ static int allocateBtreePage(
             goto end_allocate_page;
           }
           testcase( iNewTrunk==mxPage );
-          rc = btreeGetPage(pBt, iNewTrunk, &pNewTrunk, 0);
+          rc = btreeGetPage(pBt, iNewTrunk, &pNewTrunk, 0, 0);
           if( rc!=SQLITE_OK ){
             goto end_allocate_page;
           }
@@ -53518,7 +54707,7 @@ static int allocateBtreePage(
           }
           put4byte(&aData[4], k-1);
           noContent = !btreeGetHasContent(pBt, *pPgno);
-          rc = btreeGetPage(pBt, *pPgno, ppPage, noContent);
+          rc = btreeGetPage(pBt, *pPgno, ppPage, noContent, 0);
           if( rc==SQLITE_OK ){
             rc = sqlite3PagerWrite((*ppPage)->pDbPage);
             if( rc!=SQLITE_OK ){
@@ -53566,7 +54755,7 @@ static int allocateBtreePage(
       MemPage *pPg = 0;
       TRACE(("ALLOCATE: %d from end of file (pointer-map page)\n", pBt->nPage));
       assert( pBt->nPage!=PENDING_BYTE_PAGE(pBt) );
-      rc = btreeGetPage(pBt, pBt->nPage, &pPg, bNoContent);
+      rc = btreeGetPage(pBt, pBt->nPage, &pPg, bNoContent, 0);
       if( rc==SQLITE_OK ){
         rc = sqlite3PagerWrite(pPg->pDbPage);
         releasePage(pPg);
@@ -53580,7 +54769,7 @@ static int allocateBtreePage(
     *pPgno = pBt->nPage;
 
     assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );
-    rc = btreeGetPage(pBt, *pPgno, ppPage, bNoContent);
+    rc = btreeGetPage(pBt, *pPgno, ppPage, bNoContent, 0);
     if( rc ) return rc;
     rc = sqlite3PagerWrite((*ppPage)->pDbPage);
     if( rc!=SQLITE_OK ){
@@ -53648,7 +54837,7 @@ static int freePage2(BtShared *pBt, MemPage *pMemPage, Pgno iPage){
     /* If the secure_delete option is enabled, then
     ** always fully overwrite deleted information with zeros.
     */
-    if( (!pPage && ((rc = btreeGetPage(pBt, iPage, &pPage, 0))!=0) )
+    if( (!pPage && ((rc = btreeGetPage(pBt, iPage, &pPage, 0, 0))!=0) )
      ||            ((rc = sqlite3PagerWrite(pPage->pDbPage))!=0)
     ){
       goto freepage_out;
@@ -53675,7 +54864,7 @@ static int freePage2(BtShared *pBt, MemPage *pMemPage, Pgno iPage){
     u32 nLeaf;                /* Initial number of leaf cells on trunk page */
 
     iTrunk = get4byte(&pPage1->aData[32]);
-    rc = btreeGetPage(pBt, iTrunk, &pTrunk, 0);
+    rc = btreeGetPage(pBt, iTrunk, &pTrunk, 0, 0);
     if( rc!=SQLITE_OK ){
       goto freepage_out;
     }
@@ -53721,7 +54910,7 @@ static int freePage2(BtShared *pBt, MemPage *pMemPage, Pgno iPage){
   ** first trunk in the free-list is full. Either way, the page being freed
   ** will become the new first trunk page in the free-list.
   */
-  if( pPage==0 && SQLITE_OK!=(rc = btreeGetPage(pBt, iPage, &pPage, 0)) ){
+  if( pPage==0 && SQLITE_OK!=(rc = btreeGetPage(pBt, iPage, &pPage, 0, 0)) ){
     goto freepage_out;
   }
   rc = sqlite3PagerWrite(pPage->pDbPage);
@@ -54522,7 +55711,7 @@ static int balance_nonroot(
   }
   pgno = get4byte(pRight);
   while( 1 ){
-    rc = getAndInitPage(pBt, pgno, &apOld[i]);
+    rc = getAndInitPage(pBt, pgno, &apOld[i], 0);
     if( rc ){
       memset(apOld, 0, (i+1)*sizeof(MemPage*));
       goto balance_cleanup;
@@ -55610,10 +56799,17 @@ static int btreeCreateTable(Btree *p, int *piTable, int createTabFlags){
       u8 eType = 0;
       Pgno iPtrPage = 0;
 
+      /* Save the positions of any open cursors. This is required in
+      ** case they are holding a reference to an xFetch reference
+      ** corresponding to page pgnoRoot.  */
+      rc = saveAllCursors(pBt, 0, 0);
       releasePage(pPageMove);
+      if( rc!=SQLITE_OK ){
+        return rc;
+      }
 
       /* Move the page currently at pgnoRoot to pgnoMove. */
-      rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0);
+      rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0, 0);
       if( rc!=SQLITE_OK ){
         return rc;
       }
@@ -55634,7 +56830,7 @@ static int btreeCreateTable(Btree *p, int *piTable, int createTabFlags){
       if( rc!=SQLITE_OK ){
         return rc;
       }
-      rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0);
+      rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0, 0);
       if( rc!=SQLITE_OK ){
         return rc;
       }
@@ -55710,7 +56906,7 @@ static int clearDatabasePage(
     return SQLITE_CORRUPT_BKPT;
   }
 
-  rc = getAndInitPage(pBt, pgno, &pPage);
+  rc = getAndInitPage(pBt, pgno, &pPage, 0);
   if( rc ) return rc;
   for(i=0; i<pPage->nCell; i++){
     pCell = findCell(pPage, i);
@@ -55812,7 +57008,7 @@ static int btreeDropTable(Btree *p, Pgno iTable, int *piMoved){
     return SQLITE_LOCKED_SHAREDCACHE;
   }
 
-  rc = btreeGetPage(pBt, (Pgno)iTable, &pPage, 0);
+  rc = btreeGetPage(pBt, (Pgno)iTable, &pPage, 0, 0);
   if( rc ) return rc;
   rc = sqlite3BtreeClearTable(p, iTable, 0);
   if( rc ){
@@ -55847,7 +57043,7 @@ static int btreeDropTable(Btree *p, Pgno iTable, int *piMoved){
         */
         MemPage *pMove;
         releasePage(pPage);
-        rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0);
+        rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0, 0);
         if( rc!=SQLITE_OK ){
           return rc;
         }
@@ -55857,7 +57053,7 @@ static int btreeDropTable(Btree *p, Pgno iTable, int *piMoved){
           return rc;
         }
         pMove = 0;
-        rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0);
+        rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0, 0);
         freePage(pMove, &rc);
         releasePage(pMove);
         if( rc!=SQLITE_OK ){
@@ -56269,7 +57465,7 @@ static int checkTreePage(
   usableSize = pBt->usableSize;
   if( iPage==0 ) return 0;
   if( checkRef(pCheck, iPage, zParentContext) ) return 0;
-  if( (rc = btreeGetPage(pBt, (Pgno)iPage, &pPage, 0))!=0 ){
+  if( (rc = btreeGetPage(pBt, (Pgno)iPage, &pPage, 0, 0))!=0 ){
     checkAppendMsg(pCheck, zContext,
        "unable to get the page. error code=%d", rc);
     return 0;
@@ -56741,6 +57937,17 @@ SQLITE_PRIVATE int sqlite3BtreePutData(BtCursor *pCsr, u32 offset, u32 amt, void
     return SQLITE_ABORT;
   }
 
+  /* Save the positions of all other cursors open on this table. This is
+  ** required in case any of them are holding references to an xFetch
+  ** version of the b-tree page modified by the accessPayload call below.
+  **
+  ** Note that pCsr must be open on a BTREE_INTKEY table and saveCursorPosition()
+  ** and hence saveAllCursors() cannot fail on a BTREE_INTKEY table, hence
+  ** saveAllCursors can only return SQLITE_OK.
+  */
+  VVA_ONLY(rc =) saveAllCursors(pCsr->pBt, pCsr->pgnoRoot, pCsr);
+  assert( rc==SQLITE_OK );
+
   /* Check some assumptions: 
   **   (a) the cursor is open for writing,
   **   (b) there is a read/write transaction open,
@@ -57222,7 +58429,8 @@ SQLITE_API int sqlite3_backup_step(sqlite3_backup *p, int nPage){
       const Pgno iSrcPg = p->iNext;                 /* Source page number */
       if( iSrcPg!=PENDING_BYTE_PAGE(p->pSrc->pBt) ){
         DbPage *pSrcPg;                             /* Source page object */
-        rc = sqlite3PagerGet(pSrcPager, iSrcPg, &pSrcPg);
+        rc = sqlite3PagerAcquire(pSrcPager, iSrcPg, &pSrcPg,
+                                 PAGER_ACQUIRE_READONLY);
         if( rc==SQLITE_OK ){
           rc = backupOnePage(p, iSrcPg, sqlite3PagerGetData(pSrcPg), 0);
           sqlite3PagerUnref(pSrcPg);
@@ -62445,14 +63653,6 @@ end_of_step:
   return (rc&db->errMask);
 }
 
-/*
-** The maximum number of times that a statement will try to reparse
-** itself before giving up and returning SQLITE_SCHEMA.
-*/
-#ifndef SQLITE_MAX_SCHEMA_RETRY
-# define SQLITE_MAX_SCHEMA_RETRY 5
-#endif
-
 /*
 ** This is the top-level implementation of sqlite3_step().  Call
 ** sqlite3Step() to do most of the work.  If a schema error occurs,
@@ -63356,6 +64556,11 @@ static int findNextHostParameter(const char *zSql, int *pnToken){
 ** then the returned string holds a copy of zRawSql with "-- " prepended
 ** to each line of text.
 **
+** If the SQLITE_TRACE_SIZE_LIMIT macro is defined to an integer, then
+** then long strings and blobs are truncated to that many bytes.  This
+** can be used to prevent unreasonably large trace strings when dealing
+** with large (multi-megabyte) strings and blobs.
+**
 ** The calling function is responsible for making sure the memory returned
 ** is eventually freed.
 **
@@ -63426,30 +64631,49 @@ SQLITE_PRIVATE char *sqlite3VdbeExpandSql(
       }else if( pVar->flags & MEM_Real ){
         sqlite3XPrintf(&out, "%!.15g", pVar->r);
       }else if( pVar->flags & MEM_Str ){
+        int nOut;  /* Number of bytes of the string text to include in output */
 #ifndef SQLITE_OMIT_UTF16
         u8 enc = ENC(db);
+        Mem utf8;
         if( enc!=SQLITE_UTF8 ){
-          Mem utf8;
           memset(&utf8, 0, sizeof(utf8));
           utf8.db = db;
           sqlite3VdbeMemSetStr(&utf8, pVar->z, pVar->n, enc, SQLITE_STATIC);
           sqlite3VdbeChangeEncoding(&utf8, SQLITE_UTF8);
-          sqlite3XPrintf(&out, "'%.*q'", utf8.n, utf8.z);
-          sqlite3VdbeMemRelease(&utf8);
-        }else
-#endif
-        {
-          sqlite3XPrintf(&out, "'%.*q'", pVar->n, pVar->z);
+          pVar = &utf8;
         }
+#endif
+        nOut = pVar->n;
+#ifdef SQLITE_TRACE_SIZE_LIMIT
+        if( nOut>SQLITE_TRACE_SIZE_LIMIT ){
+          nOut = SQLITE_TRACE_SIZE_LIMIT;
+          while( nOut<pVar->n && (pVar->z[nOut]&0xc0)==0x80 ){ nOut++; }
+        }
+#endif    
+        sqlite3XPrintf(&out, "'%.*q'", nOut, pVar->z);
+#ifdef SQLITE_TRACE_SIZE_LIMIT
+        if( nOut<pVar->n ) sqlite3XPrintf(&out, "/*+%d bytes*/", pVar->n-nOut);
+#endif
+#ifndef SQLITE_OMIT_UTF16
+        if( enc!=SQLITE_UTF8 ) sqlite3VdbeMemRelease(&utf8);
+#endif
       }else if( pVar->flags & MEM_Zero ){
         sqlite3XPrintf(&out, "zeroblob(%d)", pVar->u.nZero);
       }else{
+        int nOut;  /* Number of bytes of the blob to include in output */
         assert( pVar->flags & MEM_Blob );
         sqlite3StrAccumAppend(&out, "x'", 2);
-        for(i=0; i<pVar->n; i++){
+        nOut = pVar->n;
+#ifdef SQLITE_TRACE_SIZE_LIMIT
+        if( nOut>SQLITE_TRACE_SIZE_LIMIT ) nOut = SQLITE_TRACE_SIZE_LIMIT;
+#endif
+        for(i=0; i<nOut; i++){
           sqlite3XPrintf(&out, "%02x", pVar->z[i]&0xff);
         }
         sqlite3StrAccumAppend(&out, "'", 1);
+#ifdef SQLITE_TRACE_SIZE_LIMIT
+        if( nOut<pVar->n ) sqlite3XPrintf(&out, "/*+%d bytes*/", pVar->n-nOut);
+#endif
       }
     }
   }
@@ -67666,7 +68890,7 @@ case OP_SeekGt: {       /* jump, in3 */
       **     u.bc.r.flags = 0;
       **   }
       */
-      u.bc.r.flags = (u16)(UNPACKED_INCRKEY * (1 & (u.bc.oc - OP_SeekLt)));
+      u.bc.r.flags = (u8)(UNPACKED_INCRKEY * (1 & (u.bc.oc - OP_SeekLt)));
       assert( u.bc.oc!=OP_SeekGt || u.bc.r.flags==UNPACKED_INCRKEY );
       assert( u.bc.oc!=OP_SeekLe || u.bc.r.flags==UNPACKED_INCRKEY );
       assert( u.bc.oc!=OP_SeekGe || u.bc.r.flags==0 );
@@ -70791,7 +72015,7 @@ SQLITE_API int sqlite3_blob_open(
     }
     sqlite3_bind_int64(pBlob->pStmt, 1, iRow);
     rc = blobSeekToRow(pBlob, iRow, &zErr);
-  } while( (++nAttempt)<5 && rc==SQLITE_SCHEMA );
+  } while( (++nAttempt)<SQLITE_MAX_SCHEMA_RETRY && rc==SQLITE_SCHEMA );
 
 blob_open_out:
   if( rc==SQLITE_OK && db->mallocFailed==0 ){
@@ -72476,7 +73700,9 @@ static const struct sqlite3_io_methods MemJournalMethods = {
   0,                /* xShmMap */
   0,                /* xShmLock */
   0,                /* xShmBarrier */
-  0                 /* xShmUnlock */
+  0,                /* xShmUnmap */
+  0,                /* xFetch */
+  0                 /* xUnfetch */
 };
 
 /* 
@@ -72620,7 +73846,9 @@ SQLITE_PRIVATE int sqlite3WalkSelectFrom(Walker *pWalker, Select *p){
 /*
 ** Call sqlite3WalkExpr() for every expression in Select statement p.
 ** Invoke sqlite3WalkSelect() for subqueries in the FROM clause and
-** on the compound select chain, p->pPrior.
+** on the compound select chain, p->pPrior.  Invoke the xSelectCallback()
+** either before or after the walk of expressions and FROM clause, depending
+** on whether pWalker->bSelectDepthFirst is false or true, respectively.
 **
 ** Return WRC_Continue under normal conditions.  Return WRC_Abort if
 ** there is an abort request.
@@ -72634,14 +73862,23 @@ SQLITE_PRIVATE int sqlite3WalkSelect(Walker *pWalker, Select *p){
   rc = WRC_Continue;
   pWalker->walkerDepth++;
   while( p ){
-    rc = pWalker->xSelectCallback(pWalker, p);
-    if( rc ) break;
+    if( !pWalker->bSelectDepthFirst ){
+       rc = pWalker->xSelectCallback(pWalker, p);
+       if( rc ) break;
+    }
     if( sqlite3WalkSelectExpr(pWalker, p)
      || sqlite3WalkSelectFrom(pWalker, p)
     ){
       pWalker->walkerDepth--;
       return WRC_Abort;
     }
+    if( pWalker->bSelectDepthFirst ){
+      rc = pWalker->xSelectCallback(pWalker, p);
+      /* Depth-first search is currently only used for
+      ** selectAddSubqueryTypeInfo() and that routine always returns
+      ** WRC_Continue (0).  So the following branch is never taken. */
+      if( NEVER(rc) ) break;
+    }
     p = p->pPrior;
   }
   pWalker->walkerDepth--;
@@ -73039,7 +74276,10 @@ static int lookupName(
     ** Note that the expression in the result set should have already been
     ** resolved by the time the WHERE clause is resolved.
     */
-    if( cnt==0 && (pEList = pNC->pEList)!=0 && zTab==0 ){
+    if( (pEList = pNC->pEList)!=0
+     && zTab==0
+     && ((pNC->ncFlags & NC_AsMaybe)==0 || cnt==0)
+    ){
       for(j=0; j<pEList->nExpr; j++){
         char *zAs = pEList->a[j].zName;
         if( zAs!=0 && sqlite3StrICmp(zAs, zCol)==0 ){
@@ -73130,7 +74370,9 @@ static int lookupName(
 lookupname_end:
   if( cnt==1 ){
     assert( pNC!=0 );
-    sqlite3AuthRead(pParse, pExpr, pSchema, pNC->pSrcList);
+    if( pExpr->op!=TK_AS ){
+      sqlite3AuthRead(pParse, pExpr, pSchema, pNC->pSrcList);
+    }
     /* Increment the nRef value on all name contexts from TopNC up to
     ** the point where the name matched. */
     for(;;){
@@ -73805,11 +75047,10 @@ static int resolveSelectStep(Walker *pWalker, Select *p){
     ** re-evaluated for each reference to it.
     */
     sNC.pEList = p->pEList;
-    if( sqlite3ResolveExprNames(&sNC, p->pWhere) ||
-       sqlite3ResolveExprNames(&sNC, p->pHaving)
-    ){
-      return WRC_Abort;
-    }
+    sNC.ncFlags |= NC_AsMaybe;
+    if( sqlite3ResolveExprNames(&sNC, p->pHaving) ) return WRC_Abort;
+    if( sqlite3ResolveExprNames(&sNC, p->pWhere) ) return WRC_Abort;
+    sNC.ncFlags &= ~NC_AsMaybe;
 
     /* The ORDER BY and GROUP BY clauses may not refer to terms in
     ** outer queries 
@@ -73930,6 +75171,7 @@ SQLITE_PRIVATE int sqlite3ResolveExprNames(
 #endif
   savedHasAgg = pNC->ncFlags & NC_HasAgg;
   pNC->ncFlags &= ~NC_HasAgg;
+  memset(&w, 0, sizeof(w));
   w.xExprCallback = resolveExprStep;
   w.xSelectCallback = resolveSelectStep;
   w.pParse = pNC->pParse;
@@ -73970,6 +75212,7 @@ SQLITE_PRIVATE void sqlite3ResolveSelectNames(
   Walker w;
 
   assert( p!=0 );
+  memset(&w, 0, sizeof(w));
   w.xExprCallback = resolveExprStep;
   w.xSelectCallback = resolveSelectStep;
   w.pParse = pParse;
@@ -74096,12 +75339,7 @@ SQLITE_PRIVATE CollSeq *sqlite3ExprCollSeq(Parse *pParse, Expr *pExpr){
     }
     assert( op!=TK_REGISTER || p->op2!=TK_COLLATE );
     if( op==TK_COLLATE ){
-      if( db->init.busy ){
-        /* Do not report errors when parsing while the schema */
-        pColl = sqlite3FindCollSeq(db, ENC(db), p->u.zToken, 0);
-      }else{
-        pColl = sqlite3GetCollSeq(pParse, ENC(db), 0, p->u.zToken);
-      }
+      pColl = sqlite3GetCollSeq(pParse, ENC(db), 0, p->u.zToken);
       break;
     }
     if( p->pTab!=0
@@ -75194,6 +76432,7 @@ static int selectNodeIsConstant(Walker *pWalker, Select *NotUsed){
 }
 static int exprIsConst(Expr *p, int initFlag){
   Walker w;
+  memset(&w, 0, sizeof(w));
   w.u.i = initFlag;
   w.xExprCallback = exprNodeIsConstant;
   w.xSelectCallback = selectNodeIsConstant;
@@ -77408,8 +78647,8 @@ SQLITE_PRIVATE void sqlite3ExprCodeConstants(Parse *pParse, Expr *pExpr){
   Walker w;
   if( pParse->cookieGoto ) return;
   if( OptimizationDisabled(pParse->db, SQLITE_FactorOutConst) ) return;
+  memset(&w, 0, sizeof(w));
   w.xExprCallback = evalConstExpr;
-  w.xSelectCallback = 0;
   w.pParse = pParse;
   sqlite3WalkExpr(&w, pExpr);
 }
@@ -83601,10 +84840,8 @@ SQLITE_PRIVATE Index *sqlite3CreateIndex(
   for(i=0; i<pList->nExpr; i++){
     Expr *pExpr = pList->a[i].pExpr;
     if( pExpr ){
-      CollSeq *pColl = sqlite3ExprCollSeq(pParse, pExpr);
-      if( pColl ){
-        nExtra += (1 + sqlite3Strlen30(pColl->zName));
-      }
+      assert( pExpr->op==TK_COLLATE );
+      nExtra += (1 + sqlite3Strlen30(pExpr->u.zToken));
     }
   }
 
@@ -83665,7 +84902,6 @@ SQLITE_PRIVATE Index *sqlite3CreateIndex(
     const char *zColName = pListItem->zName;
     Column *pTabCol;
     int requestedSortOrder;
-    CollSeq *pColl;                /* Collating sequence */
     char *zColl;                   /* Collation sequence name */
 
     for(j=0, pTabCol=pTab->aCol; j<pTab->nCol; j++, pTabCol++){
@@ -83678,11 +84914,10 @@ SQLITE_PRIVATE Index *sqlite3CreateIndex(
       goto exit_create_index;
     }
     pIndex->aiColumn[i] = j;
-    if( pListItem->pExpr
-     && (pColl = sqlite3ExprCollSeq(pParse, pListItem->pExpr))!=0
-    ){
+    if( pListItem->pExpr ){
       int nColl;
-      zColl = pColl->zName;
+      assert( pListItem->pExpr->op==TK_COLLATE );
+      zColl = pListItem->pExpr->u.zToken;
       nColl = sqlite3Strlen30(zColl) + 1;
       assert( nExtra>=nColl );
       memcpy(zExtra, zColl, nColl);
@@ -83691,9 +84926,7 @@ SQLITE_PRIVATE Index *sqlite3CreateIndex(
       nExtra -= nColl;
     }else{
       zColl = pTab->aCol[j].zColl;
-      if( !zColl ){
-        zColl = "BINARY";
-      }
+      if( !zColl ) zColl = "BINARY";
     }
     if( !db->init.busy && !sqlite3LocateCollSeq(pParse, zColl) ){
       goto exit_create_index;
@@ -86612,6 +87845,13 @@ static int patternCompare(
   return *zString==0;
 }
 
+/*
+** The sqlite3_strglob() interface.
+*/
+SQLITE_API int sqlite3_strglob(const char *zGlobPattern, const char *zString){
+  return patternCompare((u8*)zGlobPattern, (u8*)zString, &globInfo, 0)==0;
+}
+
 /*
 ** Count the number of times that the LIKE operator (or GLOB which is
 ** just a variation of LIKE) gets called.  This is used for testing
@@ -90812,7 +92052,6 @@ SQLITE_API int sqlite3_exec(
   const char *zLeftover;      /* Tail of unprocessed SQL */
   sqlite3_stmt *pStmt = 0;    /* The current SQL statement */
   char **azCols = 0;          /* Names of result columns */
-  int nRetry = 0;             /* Number of retry attempts */
   int callbackIsInit;         /* True if callback data is initialized */
 
   if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT;
@@ -90820,12 +92059,12 @@ SQLITE_API int sqlite3_exec(
 
   sqlite3_mutex_enter(db->mutex);
   sqlite3Error(db, SQLITE_OK, 0);
-  while( (rc==SQLITE_OK || (rc==SQLITE_SCHEMA && (++nRetry)<2)) && zSql[0] ){
+  while( rc==SQLITE_OK && zSql[0] ){
     int nCol;
     char **azVals = 0;
 
     pStmt = 0;
-    rc = sqlite3_prepare(db, zSql, -1, &pStmt, &zLeftover);
+    rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, &zLeftover);
     assert( rc==SQLITE_OK || pStmt==0 );
     if( rc!=SQLITE_OK ){
       continue;
@@ -90882,11 +92121,8 @@ SQLITE_API int sqlite3_exec(
       if( rc!=SQLITE_ROW ){
         rc = sqlite3VdbeFinalize((Vdbe *)pStmt);
         pStmt = 0;
-        if( rc!=SQLITE_SCHEMA ){
-          nRetry = 0;
-          zSql = zLeftover;
-          while( sqlite3Isspace(zSql[0]) ) zSql++;
-        }
+        zSql = zLeftover;
+        while( sqlite3Isspace(zSql[0]) ) zSql++;
         break;
       }
     }
@@ -91410,8 +92646,17 @@ struct sqlite3_api_routines {
 #define sqlite3_wal_checkpoint_v2      sqlite3_api->wal_checkpoint_v2
 #endif /* SQLITE_CORE */
 
-#define SQLITE_EXTENSION_INIT1     const sqlite3_api_routines *sqlite3_api = 0;
-#define SQLITE_EXTENSION_INIT2(v)  sqlite3_api = v;
+#ifndef SQLITE_CORE
+  /* This case when the file really is being compiled as a loadable 
+  ** extension */
+# define SQLITE_EXTENSION_INIT1     const sqlite3_api_routines *sqlite3_api=0;
+# define SQLITE_EXTENSION_INIT2(v)  sqlite3_api=v;
+#else
+  /* This case when the file is being statically linked into the 
+  ** application */
+# define SQLITE_EXTENSION_INIT1     /*no-op*/
+# define SQLITE_EXTENSION_INIT2(v)  (void)v; /* unused parameter */
+#endif
 
 #endif /* _SQLITE3EXT_H_ */
 
@@ -91814,8 +93059,23 @@ static int sqlite3LoadExtension(
   void *handle;
   int (*xInit)(sqlite3*,char**,const sqlite3_api_routines*);
   char *zErrmsg = 0;
+  const char *zEntry;
+  char *zAltEntry = 0;
   void **aHandle;
   int nMsg = 300 + sqlite3Strlen30(zFile);
+  int ii;
+
+  /* Shared library endings to try if zFile cannot be loaded as written */
+  static const char *azEndings[] = {
+#if SQLITE_OS_WIN
+     "dll"   
+#elif defined(__APPLE__)
+     "dylib"
+#else
+     "so"
+#endif
+  };
+
 
   if( pzErrMsg ) *pzErrMsg = 0;
 
@@ -91832,11 +93092,17 @@ static int sqlite3LoadExtension(
     return SQLITE_ERROR;
   }
 
-  if( zProc==0 ){
-    zProc = "sqlite3_extension_init";
-  }
+  zEntry = zProc ? zProc : "sqlite3_extension_init";
 
   handle = sqlite3OsDlOpen(pVfs, zFile);
+#if SQLITE_OS_UNIX || SQLITE_OS_WIN
+  for(ii=0; ii<ArraySize(azEndings) && handle==0; ii++){
+    char *zAltFile = sqlite3_mprintf("%s.%s", zFile, azEndings[ii]);
+    if( zAltFile==0 ) return SQLITE_NOMEM;
+    handle = sqlite3OsDlOpen(pVfs, zAltFile);
+    sqlite3_free(zAltFile);
+  }
+#endif
   if( handle==0 ){
     if( pzErrMsg ){
       *pzErrMsg = zErrmsg = sqlite3_malloc(nMsg);
@@ -91849,20 +93115,57 @@ static int sqlite3LoadExtension(
     return SQLITE_ERROR;
   }
   xInit = (int(*)(sqlite3*,char**,const sqlite3_api_routines*))
-                   sqlite3OsDlSym(pVfs, handle, zProc);
+                   sqlite3OsDlSym(pVfs, handle, zEntry);
+
+  /* If no entry point was specified and the default legacy
+  ** entry point name "sqlite3_extension_init" was not found, then
+  ** construct an entry point name "sqlite3_X_init" where the X is
+  ** replaced by the lowercase value of every ASCII alphabetic 
+  ** character in the filename after the last "/" upto the first ".",
+  ** and eliding the first three characters if they are "lib".  
+  ** Examples:
+  **
+  **    /usr/local/lib/libExample5.4.3.so ==>  sqlite3_example_init
+  **    C:/lib/mathfuncs.dll              ==>  sqlite3_mathfuncs_init
+  */
+  if( xInit==0 && zProc==0 ){
+    int iFile, iEntry, c;
+    int ncFile = sqlite3Strlen30(zFile);
+    zAltEntry = sqlite3_malloc(ncFile+30);
+    if( zAltEntry==0 ){
+      sqlite3OsDlClose(pVfs, handle);
+      return SQLITE_NOMEM;
+    }
+    memcpy(zAltEntry, "sqlite3_", 8);
+    for(iFile=ncFile-1; iFile>=0 && zFile[iFile]!='/'; iFile--){}
+    iFile++;
+    if( sqlite3_strnicmp(zFile+iFile, "lib", 3)==0 ) iFile += 3;
+    for(iEntry=8; (c = zFile[iFile])!=0 && c!='.'; iFile++){
+      if( sqlite3Isalpha(c) ){
+        zAltEntry[iEntry++] = (char)sqlite3UpperToLower[(unsigned)c];
+      }
+    }
+    memcpy(zAltEntry+iEntry, "_init", 6);
+    zEntry = zAltEntry;
+    xInit = (int(*)(sqlite3*,char**,const sqlite3_api_routines*))
+                     sqlite3OsDlSym(pVfs, handle, zEntry);
+  }
   if( xInit==0 ){
     if( pzErrMsg ){
-      nMsg += sqlite3Strlen30(zProc);
+      nMsg += sqlite3Strlen30(zEntry);
       *pzErrMsg = zErrmsg = sqlite3_malloc(nMsg);
       if( zErrmsg ){
         sqlite3_snprintf(nMsg, zErrmsg,
-            "no entry point [%s] in shared library [%s]", zProc,zFile);
+            "no entry point [%s] in shared library [%s]", zEntry, zFile);
         sqlite3OsDlError(pVfs, nMsg-1, zErrmsg);
       }
-      sqlite3OsDlClose(pVfs, handle);
     }
+    sqlite3OsDlClose(pVfs, handle);
+    sqlite3_free(zAltEntry);
     return SQLITE_ERROR;
-  }else if( xInit(db, &zErrmsg, &sqlite3Apis) ){
+  }
+  sqlite3_free(zAltEntry);
+  if( xInit(db, &zErrmsg, &sqlite3Apis) ){
     if( pzErrMsg ){
       *pzErrMsg = sqlite3_mprintf("error during initialization: %s", zErrmsg);
     }
@@ -92391,7 +93694,7 @@ SQLITE_PRIVATE void sqlite3Pragma(
   int rc;                      /* return value form SQLITE_FCNTL_PRAGMA */
   sqlite3 *db = pParse->db;    /* The database connection */
   Db *pDb;                     /* The specific database being pragmaed */
-  Vdbe *v = pParse->pVdbe = sqlite3VdbeCreate(db);  /* Prepared statement */
+  Vdbe *v = sqlite3GetVdbe(pParse);  /* Prepared statement */
 
   if( v==0 ) return;
   sqlite3VdbeRunOnlyOnce(v);
@@ -92474,11 +93777,12 @@ SQLITE_PRIVATE void sqlite3Pragma(
     static const VdbeOpList getCacheSize[] = {
       { OP_Transaction, 0, 0,        0},                         /* 0 */
       { OP_ReadCookie,  0, 1,        BTREE_DEFAULT_CACHE_SIZE},  /* 1 */
-      { OP_IfPos,       1, 7,        0},
+      { OP_IfPos,       1, 8,        0},
       { OP_Integer,     0, 2,        0},
       { OP_Subtract,    1, 2,        1},
-      { OP_IfPos,       1, 7,        0},
+      { OP_IfPos,       1, 8,        0},
       { OP_Integer,     0, 1,        0},                         /* 6 */
+      { OP_Noop,        0, 0,        0},
       { OP_ResultRow,   1, 1,        0},
     };
     int addr;
@@ -92816,6 +94120,43 @@ SQLITE_PRIVATE void sqlite3Pragma(
     }
   }else
 
+  /*
+  **  PRAGMA [database.]mmap_size(N)
+  **
+  ** Used to set mapping size limit. The mapping size limit is
+  ** used to limit the aggregate size of all memory mapped regions of the
+  ** database file. If this parameter is set to zero, then memory mapping
+  ** is not used at all.  If N is negative, then the default memory map
+  ** limit determined by sqlite3_config(SQLITE_CONFIG_MMAP_SIZE) is set.
+  ** The parameter N is measured in bytes.
+  **
+  ** This value is advisory.  The underlying VFS is free to memory map
+  ** as little or as much as it wants.  Except, if N is set to 0 then the
+  ** upper layers will never invoke the xFetch interfaces to the VFS.
+  */
+  if( sqlite3StrICmp(zLeft,"mmap_size")==0 ){
+    sqlite3_int64 sz;
+    assert( sqlite3SchemaMutexHeld(db, iDb, 0) );
+    if( zRight ){
+      int ii;
+      sqlite3Atoi64(zRight, &sz, 1000, SQLITE_UTF8);
+      if( sz<0 ) sz = sqlite3GlobalConfig.szMmap;
+      if( pId2->n==0 ) db->szMmap = sz;
+      for(ii=db->nDb-1; ii>=0; ii--){
+        if( db->aDb[ii].pBt && (ii==iDb || pId2->n==0) ){
+          sqlite3BtreeSetMmapLimit(db->aDb[ii].pBt, sz);
+        }
+      }
+    }
+    sz = -1;
+    if( sqlite3_file_control(db,zDb,SQLITE_FCNTL_MMAP_SIZE,&sz)==SQLITE_OK ){
+#if SQLITE_MAX_MMAP_SIZE==0
+      sz = 0;
+#endif
+      returnSingleInt(pParse, "mmap_size", sz);
+    }
+  }else
+
   /*
   **   PRAGMA temp_store
   **   PRAGMA temp_store = "default"|"memory"|"file"
@@ -93601,6 +94942,11 @@ SQLITE_PRIVATE void sqlite3Pragma(
   **   PRAGMA [database.]user_version
   **   PRAGMA [database.]user_version = <integer>
   **
+  **   PRAGMA [database.]freelist_count = <integer>
+  **
+  **   PRAGMA [database.]application_id
+  **   PRAGMA [database.]application_id = <integer>
+  **
   ** The pragma's schema_version and user_version are used to set or get
   ** the value of the schema-version and user-version, respectively. Both
   ** the schema-version and the user-version are 32-bit signed integers
@@ -93622,10 +94968,14 @@ SQLITE_PRIVATE void sqlite3Pragma(
   if( sqlite3StrICmp(zLeft, "schema_version")==0 
    || sqlite3StrICmp(zLeft, "user_version")==0 
    || sqlite3StrICmp(zLeft, "freelist_count")==0 
+   || sqlite3StrICmp(zLeft, "application_id")==0 
   ){
     int iCookie;   /* Cookie index. 1 for schema-cookie, 6 for user-cookie. */
     sqlite3VdbeUsesBtree(v, iDb);
     switch( zLeft[0] ){
+      case 'a': case 'A':
+        iCookie = BTREE_APPLICATION_ID;
+        break;
       case 'f': case 'F':
         iCookie = BTREE_FREE_PAGE_COUNT;
         break;
@@ -94506,7 +95856,6 @@ static int sqlite3Prepare(
   }
 #endif
 
-  assert( db->init.busy==0 || saveSqlFlag==0 );
   if( db->init.busy==0 ){
     Vdbe *pVdbe = pParse->pVdbe;
     sqlite3VdbeSetSql(pVdbe, zSql, (int)(pParse->zTail-zSql), saveSqlFlag);
@@ -97982,6 +99331,69 @@ SQLITE_PRIVATE int sqlite3IndexedByLookup(Parse *pParse, struct SrcList_item *pF
   }
   return SQLITE_OK;
 }
+/*
+** Detect compound SELECT statements that use an ORDER BY clause with 
+** an alternative collating sequence.
+**
+**    SELECT ... FROM t1 EXCEPT SELECT ... FROM t2 ORDER BY .. COLLATE ...
+**
+** These are rewritten as a subquery:
+**
+**    SELECT * FROM (SELECT ... FROM t1 EXCEPT SELECT ... FROM t2)
+**     ORDER BY ... COLLATE ...
+**
+** This transformation is necessary because the multiSelectOrderBy() routine
+** above that generates the code for a compound SELECT with an ORDER BY clause
+** uses a merge algorithm that requires the same collating sequence on the
+** result columns as on the ORDER BY clause.  See ticket
+** http://www.sqlite.org/src/info/6709574d2a
+**
+** This transformation is only needed for EXCEPT, INTERSECT, and UNION.
+** The UNION ALL operator works fine with multiSelectOrderBy() even when
+** there are COLLATE terms in the ORDER BY.
+*/
+static int convertCompoundSelectToSubquery(Walker *pWalker, Select *p){
+  int i;
+  Select *pNew;
+  Select *pX;
+  sqlite3 *db;
+  struct ExprList_item *a;
+  SrcList *pNewSrc;
+  Parse *pParse;
+  Token dummy;
+
+  if( p->pPrior==0 ) return WRC_Continue;
+  if( p->pOrderBy==0 ) return WRC_Continue;
+  for(pX=p; pX && (pX->op==TK_ALL || pX->op==TK_SELECT); pX=pX->pPrior){}
+  if( pX==0 ) return WRC_Continue;
+  a = p->pOrderBy->a;
+  for(i=p->pOrderBy->nExpr-1; i>=0; i--){
+    if( a[i].pExpr->flags & EP_Collate ) break;
+  }
+  if( i<0 ) return WRC_Continue;
+
+  /* If we reach this point, that means the transformation is required. */
+
+  pParse = pWalker->pParse;
+  db = pParse->db;
+  pNew = sqlite3DbMallocZero(db, sizeof(*pNew) );
+  if( pNew==0 ) return WRC_Abort;
+  memset(&dummy, 0, sizeof(dummy));
+  pNewSrc = sqlite3SrcListAppendFromTerm(pParse,0,0,0,&dummy,pNew,0,0);
+  if( pNewSrc==0 ) return WRC_Abort;
+  *pNew = *p;
+  p->pSrc = pNewSrc;
+  p->pEList = sqlite3ExprListAppend(pParse, 0, sqlite3Expr(db, TK_ALL, 0));
+  p->op = TK_SELECT;
+  p->pWhere = 0;
+  pNew->pGroupBy = 0;
+  pNew->pHaving = 0;
+  pNew->pOrderBy = 0;
+  p->pPrior = 0;
+  pNew->pLimit = 0;
+  pNew->pOffset = 0;
+  return WRC_Continue;
+}
 
 /*
 ** This routine is a Walker callback for "expanding" a SELECT statement.
@@ -98298,10 +99710,13 @@ static int exprWalkNoop(Walker *NotUsed, Expr *NotUsed2){
 */
 static void sqlite3SelectExpand(Parse *pParse, Select *pSelect){
   Walker w;
-  w.xSelectCallback = selectExpander;
+  memset(&w, 0, sizeof(w));
+  w.xSelectCallback = convertCompoundSelectToSubquery;
   w.xExprCallback = exprWalkNoop;
   w.pParse = pParse;
   sqlite3WalkSelect(&w, pSelect);
+  w.xSelectCallback = selectExpander;
+  sqlite3WalkSelect(&w, pSelect);
 }
 
 
@@ -98356,9 +99771,11 @@ static int selectAddSubqueryTypeInfo(Walker *pWalker, Select *p){
 static void sqlite3SelectAddTypeInfo(Parse *pParse, Select *pSelect){
 #ifndef SQLITE_OMIT_SUBQUERY
   Walker w;
+  memset(&w, 0, sizeof(w));
   w.xSelectCallback = selectAddSubqueryTypeInfo;
   w.xExprCallback = exprWalkNoop;
   w.pParse = pParse;
+  w.bSelectDepthFirst = 1;
   sqlite3WalkSelect(&w, pSelect);
 #endif
 }
@@ -98769,7 +100186,7 @@ SQLITE_PRIVATE int sqlite3Select(
       pItem->addrFillSub = topAddr+1;
       VdbeNoopComment((v, "materialize %s", pItem->pTab->zName));
       if( pItem->isCorrelated==0 ){
-        /* If the subquery is no correlated and if we are not inside of
+        /* If the subquery is not correlated and if we are not inside of
         ** a trigger, then we only need to compute the value of the subquery
         ** once. */
         onceAddr = sqlite3CodeOnce(pParse);
@@ -101035,6 +102452,7 @@ SQLITE_PRIVATE void sqlite3Update(
     }
     if( j>=pTab->nCol ){
       if( sqlite3IsRowid(pChanges->a[i].zName) ){
+        j = -1;
         chngRowid = 1;
         pRowidExpr = pChanges->a[i].pExpr;
       }else{
@@ -101047,7 +102465,8 @@ SQLITE_PRIVATE void sqlite3Update(
     {
       int rc;
       rc = sqlite3AuthCheck(pParse, SQLITE_UPDATE, pTab->zName,
-                           pTab->aCol[j].zName, db->aDb[iDb].zName);
+                            j<0 ? "ROWID" : pTab->aCol[j].zName,
+                            db->aDb[iDb].zName);
       if( rc==SQLITE_DENY ){
         goto update_cleanup;
       }else if( rc==SQLITE_IGNORE ){
@@ -101790,6 +103209,7 @@ SQLITE_PRIVATE int sqlite3RunVacuum(char **pzErrMsg, sqlite3 *db){
        BTREE_DEFAULT_CACHE_SIZE, 0,  /* Preserve the default page cache size */
        BTREE_TEXT_ENCODING,      0,  /* Preserve the text encoding */
        BTREE_USER_VERSION,       0,  /* Preserve the user version */
+       BTREE_APPLICATION_ID,     0,  /* Preserve the application id */
     };
 
     assert( 1==sqlite3BtreeIsInTrans(pTemp) );
@@ -103657,7 +105077,7 @@ static WhereTerm *findTerm(
                 continue;
               }
             }
-            if( pTerm->prereqRight==0 ){
+            if( pTerm->prereqRight==0 && (pTerm->eOperator&WO_EQ)!=0 ){
               pResult = pTerm;
               goto findTerm_success;
             }else if( pResult==0 ){
@@ -105227,9 +106647,8 @@ static void bestVirtualIndex(WhereBestIdx *p){
   struct sqlite3_index_constraint *pIdxCons;
   struct sqlite3_index_constraint_usage *pUsage;
   WhereTerm *pTerm;
-  int i, j, k;
+  int i, j;
   int nOrderBy;
-  int sortOrder;                  /* Sort order for IN clauses */
   int bAllowIN;                   /* Allow IN optimizations */
   double rCost;
 
@@ -105328,7 +106747,6 @@ static void bestVirtualIndex(WhereBestIdx *p){
       return;
     }
   
-    sortOrder = SQLITE_SO_ASC;
     pIdxCons = *(struct sqlite3_index_constraint**)&pIdxInfo->aConstraint;
     for(i=0; i<pIdxInfo->nConstraint; i++, pIdxCons++){
       if( pUsage[i].argvIndex>0 ){
@@ -105343,17 +106761,28 @@ static void bestVirtualIndex(WhereBestIdx *p){
             ** repeated in the output. */
             break;
           }
-          for(k=0; k<pIdxInfo->nOrderBy; k++){
-            if( pIdxInfo->aOrderBy[k].iColumn==pIdxCons->iColumn ){
-              sortOrder = pIdxInfo->aOrderBy[k].desc;
-              break;
-            }
-          }
+          /* A virtual table that is constrained by an IN clause may not
+          ** consume the ORDER BY clause because (1) the order of IN terms
+          ** is not necessarily related to the order of output terms and
+          ** (2) Multiple outputs from a single IN value will not merge
+          ** together.  */
+          pIdxInfo->orderByConsumed = 0;
         }
       }
     }
     if( i>=pIdxInfo->nConstraint ) break;
   }
+
+  /* The orderByConsumed signal is only valid if all outer loops collectively
+  ** generate just a single row of output.
+  */
+  if( pIdxInfo->orderByConsumed ){
+    for(i=0; i<p->i; i++){
+      if( (p->aLevel[i].plan.wsFlags & WHERE_UNIQUE)==0 ){
+        pIdxInfo->orderByConsumed = 0;
+      }
+    }
+  }
   
   /* If there is an ORDER BY clause, and the selected virtual table index
   ** does not satisfy it, increase the cost of the scan accordingly. This
@@ -105378,8 +106807,7 @@ static void bestVirtualIndex(WhereBestIdx *p){
   }
   p->cost.plan.u.pVtabIdx = pIdxInfo;
   if( pIdxInfo->orderByConsumed ){
-    assert( sortOrder==0 || sortOrder==1 );
-    p->cost.plan.wsFlags |= WHERE_ORDERED + sortOrder*WHERE_REVERSE;
+    p->cost.plan.wsFlags |= WHERE_ORDERED;
     p->cost.plan.nOBSat = nOrderBy;
   }else{
     p->cost.plan.nOBSat = p->i ? p->aLevel[p->i-1].plan.nOBSat : 0;
@@ -107116,6 +108544,7 @@ static Bitmask codeOneLoopStart(
   int addrCont;                   /* Jump here to continue with next cycle */
   int iRowidReg = 0;        /* Rowid is stored in this register, if not zero */
   int iReleaseReg = 0;      /* Temp register to free before returning */
+  Bitmask newNotReady;      /* Return value */
 
   pParse = pWInfo->pParse;
   v = pParse->pVdbe;
@@ -107126,6 +108555,7 @@ static Bitmask codeOneLoopStart(
   bRev = (pLevel->plan.wsFlags & WHERE_REVERSE)!=0;
   omitTable = (pLevel->plan.wsFlags & WHERE_IDX_ONLY)!=0 
            && (wctrlFlags & WHERE_FORCE_TABLE)==0;
+  VdbeNoopComment((v, "Begin Join Loop %d", iLevel));
 
   /* Create labels for the "break" and "continue" instructions
   ** for the current loop.  Jump to addrBrk to break out of a loop.
@@ -107668,6 +109098,10 @@ static Bitmask codeOneLoopStart(
     ** the "interesting" terms of z - terms that did not originate in the
     ** ON or USING clause of a LEFT JOIN, and terms that are usable as 
     ** indices.
+    **
+    ** This optimization also only applies if the (x1 OR x2 OR ...) term
+    ** is not contained in the ON clause of a LEFT JOIN.
+    ** See ticket http://www.sqlite.org/src/info/f2369304e4
     */
     if( pWC->nTerm>1 ){
       int iTerm;
@@ -107689,7 +109123,7 @@ static Bitmask codeOneLoopStart(
       if( pOrTerm->leftCursor==iCur || (pOrTerm->eOperator & WO_AND)!=0 ){
         WhereInfo *pSubWInfo;          /* Info for single OR-term scan */
         Expr *pOrExpr = pOrTerm->pExpr;
-        if( pAndExpr ){
+        if( pAndExpr && !ExprHasProperty(pOrExpr, EP_FromJoin) ){
           pAndExpr->pLeft = pOrExpr;
           pOrExpr = pAndExpr;
         }
@@ -107776,7 +109210,7 @@ static Bitmask codeOneLoopStart(
     pLevel->p2 = 1 + sqlite3VdbeAddOp2(v, aStart[bRev], iCur, addrBrk);
     pLevel->p5 = SQLITE_STMTSTATUS_FULLSCAN_STEP;
   }
-  notReady &= ~getMask(pWC->pMaskSet, iCur);
+  newNotReady = notReady & ~getMask(pWC->pMaskSet, iCur);
 
   /* Insert code to test every subexpression that can be completely
   ** computed using the current set of tables.
@@ -107790,7 +109224,7 @@ static Bitmask codeOneLoopStart(
     testcase( pTerm->wtFlags & TERM_VIRTUAL ); /* IMP: R-30575-11662 */
     testcase( pTerm->wtFlags & TERM_CODED );
     if( pTerm->wtFlags & (TERM_VIRTUAL|TERM_CODED) ) continue;
-    if( (pTerm->prereqAll & notReady)!=0 ){
+    if( (pTerm->prereqAll & newNotReady)!=0 ){
       testcase( pWInfo->untestedTerms==0
                && (pWInfo->wctrlFlags & WHERE_ONETABLE_ONLY)!=0 );
       pWInfo->untestedTerms = 1;
@@ -107805,6 +109239,33 @@ static Bitmask codeOneLoopStart(
     pTerm->wtFlags |= TERM_CODED;
   }
 
+  /* Insert code to test for implied constraints based on transitivity
+  ** of the "==" operator.
+  **
+  ** Example: If the WHERE clause contains "t1.a=t2.b" and "t2.b=123"
+  ** and we are coding the t1 loop and the t2 loop has not yet coded,
+  ** then we cannot use the "t1.a=t2.b" constraint, but we can code
+  ** the implied "t1.a=123" constraint.
+  */
+  for(pTerm=pWC->a, j=pWC->nTerm; j>0; j--, pTerm++){
+    Expr *pE;
+    WhereTerm *pAlt;
+    Expr sEq;
+    if( pTerm->wtFlags & (TERM_VIRTUAL|TERM_CODED) ) continue;
+    if( pTerm->eOperator!=(WO_EQUIV|WO_EQ) ) continue;
+    if( pTerm->leftCursor!=iCur ) continue;
+    pE = pTerm->pExpr;
+    assert( !ExprHasProperty(pE, EP_FromJoin) );
+    assert( (pTerm->prereqRight & newNotReady)!=0 );
+    pAlt = findTerm(pWC, iCur, pTerm->u.leftColumn, notReady, WO_EQ|WO_IN, 0);
+    if( pAlt==0 ) continue;
+    if( pAlt->wtFlags & (TERM_CODED) ) continue;
+    VdbeNoopComment((v, "begin transitive constraint"));
+    sEq = *pAlt->pExpr;
+    sEq.pLeft = pE->pLeft;
+    sqlite3ExprIfFalse(pParse, &sEq, addrCont, SQLITE_JUMPIFNULL);
+  }
+
   /* For a LEFT OUTER JOIN, generate code that will record the fact that
   ** at least one row of the right table has matched the left table.  
   */
@@ -107817,7 +109278,7 @@ static Bitmask codeOneLoopStart(
       testcase( pTerm->wtFlags & TERM_VIRTUAL );  /* IMP: R-30575-11662 */
       testcase( pTerm->wtFlags & TERM_CODED );
       if( pTerm->wtFlags & (TERM_VIRTUAL|TERM_CODED) ) continue;
-      if( (pTerm->prereqAll & notReady)!=0 ){
+      if( (pTerm->prereqAll & newNotReady)!=0 ){
         assert( pWInfo->untestedTerms );
         continue;
       }
@@ -107828,7 +109289,7 @@ static Bitmask codeOneLoopStart(
   }
   sqlite3ReleaseTempReg(pParse, iReleaseReg);
 
-  return notReady;
+  return newNotReady;
 }
 
 #if defined(SQLITE_TEST)
@@ -111146,7 +112607,9 @@ static void yy_reduce(
         struct SrcList_item *pOld = yymsp[-4].minor.yy347->a;
         pNew->zName = pOld->zName;
         pNew->zDatabase = pOld->zDatabase;
+        pNew->pSelect = pOld->pSelect;
         pOld->zName = pOld->zDatabase = 0;
+        pOld->pSelect = 0;
       }
       sqlite3SrcListDelete(pParse->db, yymsp[-4].minor.yy347);
     }else{
@@ -113814,6 +115277,19 @@ SQLITE_API int sqlite3_config(int op, ...){
     }
 #endif
 
+    case SQLITE_CONFIG_MMAP_SIZE: {
+      sqlite3_int64 szMmap = va_arg(ap, sqlite3_int64);
+      sqlite3_int64 mxMmap = va_arg(ap, sqlite3_int64);
+      if( mxMmap<0 || mxMmap>SQLITE_MAX_MMAP_SIZE ){
+        mxMmap = SQLITE_MAX_MMAP_SIZE;
+      }
+      sqlite3GlobalConfig.mxMmap = mxMmap;
+      if( szMmap<0 ) szMmap = SQLITE_DEFAULT_MMAP_SIZE;
+      if( szMmap>mxMmap) szMmap = mxMmap;
+      sqlite3GlobalConfig.szMmap = szMmap;
+      break;
+    }
+
     default: {
       rc = SQLITE_ERROR;
       break;
@@ -114207,6 +115683,12 @@ SQLITE_PRIVATE void sqlite3LeaveMutexAndCloseZombie(sqlite3 *db){
   ** go ahead and free all resources.
   */
 
+  /* If a transaction is open, roll it back. This also ensures that if
+  ** any database schemas have been modified by an uncommitted transaction
+  ** they are reset. And that the required b-tree mutex is held to make
+  ** the pager rollback and schema reset an atomic operation. */
+  sqlite3RollbackAll(db, SQLITE_OK);
+
   /* Free any outstanding Savepoint structures. */
   sqlite3CloseSavepoints(db);
 
@@ -114307,6 +115789,15 @@ SQLITE_PRIVATE void sqlite3RollbackAll(sqlite3 *db, int tripCode){
   int inTrans = 0;
   assert( sqlite3_mutex_held(db->mutex) );
   sqlite3BeginBenignMalloc();
+
+  /* Obtain all b-tree mutexes before making any calls to BtreeRollback(). 
+  ** This is important in case the transaction being rolled back has
+  ** modified the database schema. If the b-tree mutexes are not taken
+  ** here, then another shared-cache connection might sneak in between
+  ** the database rollback and schema reset, which can cause false
+  ** corruption reports in some cases.  */
+  sqlite3BtreeEnterAll(db);
+
   for(i=0; i<db->nDb; i++){
     Btree *p = db->aDb[i].pBt;
     if( p ){
@@ -114324,6 +115815,7 @@ SQLITE_PRIVATE void sqlite3RollbackAll(sqlite3 *db, int tripCode){
     sqlite3ExpirePreparedStatements(db);
     sqlite3ResetAllSchemasOfConnection(db);
   }
+  sqlite3BtreeLeaveAll(db);
 
   /* Any deferred constraint violations have now been resolved. */
   db->nDeferredCons = 0;
@@ -114334,6 +115826,110 @@ SQLITE_PRIVATE void sqlite3RollbackAll(sqlite3 *db, int tripCode){
   }
 }
 
+/*
+** Return a static string containing the name corresponding to the error code
+** specified in the argument.
+*/
+#if defined(SQLITE_DEBUG) || defined(SQLITE_TEST) || \
+    defined(SQLITE_DEBUG_OS_TRACE)
+SQLITE_PRIVATE const char *sqlite3ErrName(int rc){
+  const char *zName = 0;
+  int i, origRc = rc;
+  for(i=0; i<2 && zName==0; i++, rc &= 0xff){
+    switch( rc ){
+      case SQLITE_OK:                 zName = "SQLITE_OK";                break;
+      case SQLITE_ERROR:              zName = "SQLITE_ERROR";             break;
+      case SQLITE_INTERNAL:           zName = "SQLITE_INTERNAL";          break;
+      case SQLITE_PERM:               zName = "SQLITE_PERM";              break;
+      case SQLITE_ABORT:              zName = "SQLITE_ABORT";             break;
+      case SQLITE_ABORT_ROLLBACK:     zName = "SQLITE_ABORT_ROLLBACK";    break;
+      case SQLITE_BUSY:               zName = "SQLITE_BUSY";              break;
+      case SQLITE_BUSY_RECOVERY:      zName = "SQLITE_BUSY_RECOVERY";     break;
+      case SQLITE_LOCKED:             zName = "SQLITE_LOCKED";            break;
+      case SQLITE_LOCKED_SHAREDCACHE: zName = "SQLITE_LOCKED_SHAREDCACHE";break;
+      case SQLITE_NOMEM:              zName = "SQLITE_NOMEM";             break;
+      case SQLITE_READONLY:           zName = "SQLITE_READONLY";          break;
+      case SQLITE_READONLY_RECOVERY:  zName = "SQLITE_READONLY_RECOVERY"; break;
+      case SQLITE_READONLY_CANTLOCK:  zName = "SQLITE_READONLY_CANTLOCK"; break;
+      case SQLITE_READONLY_ROLLBACK:  zName = "SQLITE_READONLY_ROLLBACK"; break;
+      case SQLITE_INTERRUPT:          zName = "SQLITE_INTERRUPT";         break;
+      case SQLITE_IOERR:              zName = "SQLITE_IOERR";             break;
+      case SQLITE_IOERR_READ:         zName = "SQLITE_IOERR_READ";        break;
+      case SQLITE_IOERR_SHORT_READ:   zName = "SQLITE_IOERR_SHORT_READ";  break;
+      case SQLITE_IOERR_WRITE:        zName = "SQLITE_IOERR_WRITE";       break;
+      case SQLITE_IOERR_FSYNC:        zName = "SQLITE_IOERR_FSYNC";       break;
+      case SQLITE_IOERR_DIR_FSYNC:    zName = "SQLITE_IOERR_DIR_FSYNC";   break;
+      case SQLITE_IOERR_TRUNCATE:     zName = "SQLITE_IOERR_TRUNCATE";    break;
+      case SQLITE_IOERR_FSTAT:        zName = "SQLITE_IOERR_FSTAT";       break;
+      case SQLITE_IOERR_UNLOCK:       zName = "SQLITE_IOERR_UNLOCK";      break;
+      case SQLITE_IOERR_RDLOCK:       zName = "SQLITE_IOERR_RDLOCK";      break;
+      case SQLITE_IOERR_DELETE:       zName = "SQLITE_IOERR_DELETE";      break;
+      case SQLITE_IOERR_BLOCKED:      zName = "SQLITE_IOERR_BLOCKED";     break;
+      case SQLITE_IOERR_NOMEM:        zName = "SQLITE_IOERR_NOMEM";       break;
+      case SQLITE_IOERR_ACCESS:       zName = "SQLITE_IOERR_ACCESS";      break;
+      case SQLITE_IOERR_CHECKRESERVEDLOCK:
+                                zName = "SQLITE_IOERR_CHECKRESERVEDLOCK"; break;
+      case SQLITE_IOERR_LOCK:         zName = "SQLITE_IOERR_LOCK";        break;
+      case SQLITE_IOERR_CLOSE:        zName = "SQLITE_IOERR_CLOSE";       break;
+      case SQLITE_IOERR_DIR_CLOSE:    zName = "SQLITE_IOERR_DIR_CLOSE";   break;
+      case SQLITE_IOERR_SHMOPEN:      zName = "SQLITE_IOERR_SHMOPEN";     break;
+      case SQLITE_IOERR_SHMSIZE:      zName = "SQLITE_IOERR_SHMSIZE";     break;
+      case SQLITE_IOERR_SHMLOCK:      zName = "SQLITE_IOERR_SHMLOCK";     break;
+      case SQLITE_IOERR_SHMMAP:       zName = "SQLITE_IOERR_SHMMAP";      break;
+      case SQLITE_IOERR_SEEK:         zName = "SQLITE_IOERR_SEEK";        break;
+      case SQLITE_IOERR_DELETE_NOENT: zName = "SQLITE_IOERR_DELETE_NOENT";break;
+      case SQLITE_IOERR_MMAP:         zName = "SQLITE_IOERR_MMAP";        break;
+      case SQLITE_CORRUPT:            zName = "SQLITE_CORRUPT";           break;
+      case SQLITE_CORRUPT_VTAB:       zName = "SQLITE_CORRUPT_VTAB";      break;
+      case SQLITE_NOTFOUND:           zName = "SQLITE_NOTFOUND";          break;
+      case SQLITE_FULL:               zName = "SQLITE_FULL";              break;
+      case SQLITE_CANTOPEN:           zName = "SQLITE_CANTOPEN";          break;
+      case SQLITE_CANTOPEN_NOTEMPDIR: zName = "SQLITE_CANTOPEN_NOTEMPDIR";break;
+      case SQLITE_CANTOPEN_ISDIR:     zName = "SQLITE_CANTOPEN_ISDIR";    break;
+      case SQLITE_CANTOPEN_FULLPATH:  zName = "SQLITE_CANTOPEN_FULLPATH"; break;
+      case SQLITE_PROTOCOL:           zName = "SQLITE_PROTOCOL";          break;
+      case SQLITE_EMPTY:              zName = "SQLITE_EMPTY";             break;
+      case SQLITE_SCHEMA:             zName = "SQLITE_SCHEMA";            break;
+      case SQLITE_TOOBIG:             zName = "SQLITE_TOOBIG";            break;
+      case SQLITE_CONSTRAINT:         zName = "SQLITE_CONSTRAINT";        break;
+      case SQLITE_CONSTRAINT_UNIQUE:  zName = "SQLITE_CONSTRAINT_UNIQUE"; break;
+      case SQLITE_CONSTRAINT_TRIGGER: zName = "SQLITE_CONSTRAINT_TRIGGER";break;
+      case SQLITE_CONSTRAINT_FOREIGNKEY:
+                                zName = "SQLITE_CONSTRAINT_FOREIGNKEY";   break;
+      case SQLITE_CONSTRAINT_CHECK:   zName = "SQLITE_CONSTRAINT_CHECK";  break;
+      case SQLITE_CONSTRAINT_PRIMARYKEY:
+                                zName = "SQLITE_CONSTRAINT_PRIMARYKEY";   break;
+      case SQLITE_CONSTRAINT_NOTNULL: zName = "SQLITE_CONSTRAINT_NOTNULL";break;
+      case SQLITE_CONSTRAINT_COMMITHOOK:
+                                zName = "SQLITE_CONSTRAINT_COMMITHOOK";   break;
+      case SQLITE_CONSTRAINT_VTAB:    zName = "SQLITE_CONSTRAINT_VTAB";   break;
+      case SQLITE_CONSTRAINT_FUNCTION:
+                                zName = "SQLITE_CONSTRAINT_FUNCTION";     break;
+      case SQLITE_MISMATCH:           zName = "SQLITE_MISMATCH";          break;
+      case SQLITE_MISUSE:             zName = "SQLITE_MISUSE";            break;
+      case SQLITE_NOLFS:              zName = "SQLITE_NOLFS";             break;
+      case SQLITE_AUTH:               zName = "SQLITE_AUTH";              break;
+      case SQLITE_FORMAT:             zName = "SQLITE_FORMAT";            break;
+      case SQLITE_RANGE:              zName = "SQLITE_RANGE";             break;
+      case SQLITE_NOTADB:             zName = "SQLITE_NOTADB";            break;
+      case SQLITE_ROW:                zName = "SQLITE_ROW";               break;
+      case SQLITE_NOTICE:             zName = "SQLITE_NOTICE";            break;
+      case SQLITE_NOTICE_RECOVER_WAL: zName = "SQLITE_NOTICE_RECOVER_WAL";break;
+      case SQLITE_NOTICE_RECOVER_ROLLBACK:
+                                zName = "SQLITE_NOTICE_RECOVER_ROLLBACK"; break;
+      case SQLITE_WARNING:            zName = "SQLITE_WARNING";           break;
+      case SQLITE_DONE:               zName = "SQLITE_DONE";              break;
+    }
+  }
+  if( zName==0 ){
+    static char zBuf[50];
+    sqlite3_snprintf(sizeof(zBuf), zBuf, "SQLITE_UNKNOWN(%d)", origRc);
+    zName = zBuf;
+  }
+  return zName;
+}
+#endif
+
 /*
 ** Return a static string that describes the kind of error specified in the
 ** argument.
@@ -115634,6 +117230,7 @@ static int openDatabase(
   memcpy(db->aLimit, aHardLimit, sizeof(db->aLimit));
   db->autoCommit = 1;
   db->nextAutovac = -1;
+  db->szMmap = sqlite3GlobalConfig.szMmap;
   db->nextPagesize = 0;
   db->flags |= SQLITE_ShortColNames | SQLITE_AutoIndex | SQLITE_EnableTrigger
 #if SQLITE_DEFAULT_FILE_FORMAT<4
@@ -117950,7 +119547,7 @@ SQLITE_PRIVATE void sqlite3Fts3Matchinfo(sqlite3_context *, Fts3Cursor *, const
 
 /* fts3_expr.c */
 SQLITE_PRIVATE int sqlite3Fts3ExprParse(sqlite3_tokenizer *, int,
-  char **, int, int, int, const char *, int, Fts3Expr **
+  char **, int, int, int, const char *, int, Fts3Expr **, char **
 );
 SQLITE_PRIVATE void sqlite3Fts3ExprFree(Fts3Expr *);
 #ifdef SQLITE_TEST
@@ -117975,6 +119572,9 @@ SQLITE_PRIVATE int sqlite3Fts3EvalPhrasePoslist(Fts3Cursor *, Fts3Expr *, int iC
 SQLITE_PRIVATE int sqlite3Fts3MsrOvfl(Fts3Cursor *, Fts3MultiSegReader *, int *);
 SQLITE_PRIVATE int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr);
 
+/* fts3_tokenize_vtab.c */
+SQLITE_PRIVATE int sqlite3Fts3InitTok(sqlite3*, Fts3Hash *);
+
 /* fts3_unicode2.c (functions generated by parsing unicode text files) */
 #ifdef SQLITE_ENABLE_FTS4_UNICODE61
 SQLITE_PRIVATE int sqlite3FtsUnicodeFold(int, int);
@@ -120671,14 +122271,12 @@ static int fts3FilterMethod(
     pCsr->iLangid = 0;
     if( nVal==2 ) pCsr->iLangid = sqlite3_value_int(apVal[1]);
 
+    assert( p->base.zErrMsg==0 );
     rc = sqlite3Fts3ExprParse(p->pTokenizer, pCsr->iLangid,
-        p->azColumn, p->bFts4, p->nColumn, iCol, zQuery, -1, &pCsr->pExpr
+        p->azColumn, p->bFts4, p->nColumn, iCol, zQuery, -1, &pCsr->pExpr, 
+        &p->base.zErrMsg
     );
     if( rc!=SQLITE_OK ){
-      if( rc==SQLITE_ERROR ){
-        static const char *zErr = "malformed MATCH expression: [%s]";
-        p->base.zErrMsg = sqlite3_mprintf(zErr, zQuery);
-      }
       return rc;
     }
 
@@ -121342,9 +122940,13 @@ SQLITE_PRIVATE int sqlite3Fts3Init(sqlite3 *db){
           db, "fts4", &fts3Module, (void *)pHash, 0
       );
     }
+    if( rc==SQLITE_OK ){
+      rc = sqlite3Fts3InitTok(db, (void *)pHash);
+    }
     return rc;
   }
 
+
   /* An error has occurred. Delete the hash table and return the error code. */
   assert( rc!=SQLITE_OK );
   if( pHash ){
@@ -123118,17 +124720,26 @@ static int fts3auxConnectMethod(
 
   UNUSED_PARAMETER(pUnused);
 
-  /* The user should specify a single argument - the name of an fts3 table. */
-  if( argc!=4 ){
-    *pzErr = sqlite3_mprintf(
-        "wrong number of arguments to fts4aux constructor"
-    );
-    return SQLITE_ERROR;
-  }
+  /* The user should invoke this in one of two forms:
+  **
+  **     CREATE VIRTUAL TABLE xxx USING fts4aux(fts4-table);
+  **     CREATE VIRTUAL TABLE xxx USING fts4aux(fts4-table-db, fts4-table);
+  */
+  if( argc!=4 && argc!=5 ) goto bad_args;
 
   zDb = argv[1]; 
   nDb = (int)strlen(zDb);
-  zFts3 = argv[3];
+  if( argc==5 ){
+    if( nDb==4 && 0==sqlite3_strnicmp("temp", zDb, 4) ){
+      zDb = argv[3]; 
+      nDb = (int)strlen(zDb);
+      zFts3 = argv[4];
+    }else{
+      goto bad_args;
+    }
+  }else{
+    zFts3 = argv[3];
+  }
   nFts3 = (int)strlen(zFts3);
 
   rc = sqlite3_declare_vtab(db, FTS3_TERMS_SCHEMA);
@@ -123151,6 +124762,10 @@ static int fts3auxConnectMethod(
 
   *ppVtab = (sqlite3_vtab *)p;
   return SQLITE_OK;
+
+ bad_args:
+  *pzErr = sqlite3_mprintf("invalid arguments to fts4aux constructor");
+  return SQLITE_ERROR;
 }
 
 /*
@@ -124164,8 +125779,10 @@ static int fts3ExprParse(
         }
         pNot->eType = FTSQUERY_NOT;
         pNot->pRight = p;
+        p->pParent = pNot;
         if( pNotBranch ){
           pNot->pLeft = pNotBranch;
+          pNotBranch->pParent = pNot;
         }
         pNotBranch = pNot;
         p = pPrev;
@@ -124253,6 +125870,7 @@ static int fts3ExprParse(
           pIter = pIter->pLeft;
         }
         pIter->pLeft = pRet;
+        pRet->pParent = pIter;
         pRet = pNotBranch;
       }
     }
@@ -124269,6 +125887,223 @@ exprparse_out:
   return rc;
 }
 
+/*
+** Return SQLITE_ERROR if the maximum depth of the expression tree passed 
+** as the only argument is more than nMaxDepth.
+*/
+static int fts3ExprCheckDepth(Fts3Expr *p, int nMaxDepth){
+  int rc = SQLITE_OK;
+  if( p ){
+    if( nMaxDepth<0 ){ 
+      rc = SQLITE_TOOBIG;
+    }else{
+      rc = fts3ExprCheckDepth(p->pLeft, nMaxDepth-1);
+      if( rc==SQLITE_OK ){
+        rc = fts3ExprCheckDepth(p->pRight, nMaxDepth-1);
+      }
+    }
+  }
+  return rc;
+}
+
+/*
+** This function attempts to transform the expression tree at (*pp) to
+** an equivalent but more balanced form. The tree is modified in place.
+** If successful, SQLITE_OK is returned and (*pp) set to point to the 
+** new root expression node. 
+**
+** nMaxDepth is the maximum allowable depth of the balanced sub-tree.
+**
+** Otherwise, if an error occurs, an SQLite error code is returned and 
+** expression (*pp) freed.
+*/
+static int fts3ExprBalance(Fts3Expr **pp, int nMaxDepth){
+  int rc = SQLITE_OK;             /* Return code */
+  Fts3Expr *pRoot = *pp;          /* Initial root node */
+  Fts3Expr *pFree = 0;            /* List of free nodes. Linked by pParent. */
+  int eType = pRoot->eType;       /* Type of node in this tree */
+
+  if( nMaxDepth==0 ){
+    rc = SQLITE_ERROR;
+  }
+
+  if( rc==SQLITE_OK && (eType==FTSQUERY_AND || eType==FTSQUERY_OR) ){
+    Fts3Expr **apLeaf;
+    apLeaf = (Fts3Expr **)sqlite3_malloc(sizeof(Fts3Expr *) * nMaxDepth);
+    if( 0==apLeaf ){
+      rc = SQLITE_NOMEM;
+    }else{
+      memset(apLeaf, 0, sizeof(Fts3Expr *) * nMaxDepth);
+    }
+
+    if( rc==SQLITE_OK ){
+      int i;
+      Fts3Expr *p;
+
+      /* Set $p to point to the left-most leaf in the tree of eType nodes. */
+      for(p=pRoot; p->eType==eType; p=p->pLeft){
+        assert( p->pParent==0 || p->pParent->pLeft==p );
+        assert( p->pLeft && p->pRight );
+      }
+
+      /* This loop runs once for each leaf in the tree of eType nodes. */
+      while( 1 ){
+        int iLvl;
+        Fts3Expr *pParent = p->pParent;     /* Current parent of p */
+
+        assert( pParent==0 || pParent->pLeft==p );
+        p->pParent = 0;
+        if( pParent ){
+          pParent->pLeft = 0;
+        }else{
+          pRoot = 0;
+        }
+        rc = fts3ExprBalance(&p, nMaxDepth-1);
+        if( rc!=SQLITE_OK ) break;
+
+        for(iLvl=0; p && iLvl<nMaxDepth; iLvl++){
+          if( apLeaf[iLvl]==0 ){
+            apLeaf[iLvl] = p;
+            p = 0;
+          }else{
+            assert( pFree );
+            pFree->pLeft = apLeaf[iLvl];
+            pFree->pRight = p;
+            pFree->pLeft->pParent = pFree;
+            pFree->pRight->pParent = pFree;
+
+            p = pFree;
+            pFree = pFree->pParent;
+            p->pParent = 0;
+            apLeaf[iLvl] = 0;
+          }
+        }
+        if( p ){
+          sqlite3Fts3ExprFree(p);
+          rc = SQLITE_TOOBIG;
+          break;
+        }
+
+        /* If that was the last leaf node, break out of the loop */
+        if( pParent==0 ) break;
+
+        /* Set $p to point to the next leaf in the tree of eType nodes */
+        for(p=pParent->pRight; p->eType==eType; p=p->pLeft);
+
+        /* Remove pParent from the original tree. */
+        assert( pParent->pParent==0 || pParent->pParent->pLeft==pParent );
+        pParent->pRight->pParent = pParent->pParent;
+        if( pParent->pParent ){
+          pParent->pParent->pLeft = pParent->pRight;
+        }else{
+          assert( pParent==pRoot );
+          pRoot = pParent->pRight;
+        }
+
+        /* Link pParent into the free node list. It will be used as an
+        ** internal node of the new tree.  */
+        pParent->pParent = pFree;
+        pFree = pParent;
+      }
+
+      if( rc==SQLITE_OK ){
+        p = 0;
+        for(i=0; i<nMaxDepth; i++){
+          if( apLeaf[i] ){
+            if( p==0 ){
+              p = apLeaf[i];
+              p->pParent = 0;
+            }else{
+              assert( pFree!=0 );
+              pFree->pRight = p;
+              pFree->pLeft = apLeaf[i];
+              pFree->pLeft->pParent = pFree;
+              pFree->pRight->pParent = pFree;
+
+              p = pFree;
+              pFree = pFree->pParent;
+              p->pParent = 0;
+            }
+          }
+        }
+        pRoot = p;
+      }else{
+        /* An error occurred. Delete the contents of the apLeaf[] array 
+        ** and pFree list. Everything else is cleaned up by the call to
+        ** sqlite3Fts3ExprFree(pRoot) below.  */
+        Fts3Expr *pDel;
+        for(i=0; i<nMaxDepth; i++){
+          sqlite3Fts3ExprFree(apLeaf[i]);
+        }
+        while( (pDel=pFree)!=0 ){
+          pFree = pDel->pParent;
+          sqlite3_free(pDel);
+        }
+      }
+
+      assert( pFree==0 );
+      sqlite3_free( apLeaf );
+    }
+  }
+
+  if( rc!=SQLITE_OK ){
+    sqlite3Fts3ExprFree(pRoot);
+    pRoot = 0;
+  }
+  *pp = pRoot;
+  return rc;
+}
+
+/*
+** This function is similar to sqlite3Fts3ExprParse(), with the following
+** differences:
+**
+**   1. It does not do expression rebalancing.
+**   2. It does not check that the expression does not exceed the 
+**      maximum allowable depth.
+**   3. Even if it fails, *ppExpr may still be set to point to an 
+**      expression tree. It should be deleted using sqlite3Fts3ExprFree()
+**      in this case.
+*/
+static int fts3ExprParseUnbalanced(
+  sqlite3_tokenizer *pTokenizer,      /* Tokenizer module */
+  int iLangid,                        /* Language id for tokenizer */
+  char **azCol,                       /* Array of column names for fts3 table */
+  int bFts4,                          /* True to allow FTS4-only syntax */
+  int nCol,                           /* Number of entries in azCol[] */
+  int iDefaultCol,                    /* Default column to query */
+  const char *z, int n,               /* Text of MATCH query */
+  Fts3Expr **ppExpr                   /* OUT: Parsed query structure */
+){
+  int nParsed;
+  int rc;
+  ParseContext sParse;
+
+  memset(&sParse, 0, sizeof(ParseContext));
+  sParse.pTokenizer = pTokenizer;
+  sParse.iLangid = iLangid;
+  sParse.azCol = (const char **)azCol;
+  sParse.nCol = nCol;
+  sParse.iDefaultCol = iDefaultCol;
+  sParse.bFts4 = bFts4;
+  if( z==0 ){
+    *ppExpr = 0;
+    return SQLITE_OK;
+  }
+  if( n<0 ){
+    n = (int)strlen(z);
+  }
+  rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed);
+  assert( rc==SQLITE_OK || *ppExpr==0 );
+
+  /* Check for mismatched parenthesis */
+  if( rc==SQLITE_OK && sParse.nNest ){
+    rc = SQLITE_ERROR;
+  }
+  
+  return rc;
+}
+
 /*
 ** Parameters z and n contain a pointer to and length of a buffer containing
 ** an fts3 query expression, respectively. This function attempts to parse the
@@ -124301,49 +126136,74 @@ SQLITE_PRIVATE int sqlite3Fts3ExprParse(
   int nCol,                           /* Number of entries in azCol[] */
   int iDefaultCol,                    /* Default column to query */
   const char *z, int n,               /* Text of MATCH query */
-  Fts3Expr **ppExpr                   /* OUT: Parsed query structure */
+  Fts3Expr **ppExpr,                  /* OUT: Parsed query structure */
+  char **pzErr                        /* OUT: Error message (sqlite3_malloc) */
 ){
-  int nParsed;
-  int rc;
-  ParseContext sParse;
-
-  memset(&sParse, 0, sizeof(ParseContext));
-  sParse.pTokenizer = pTokenizer;
-  sParse.iLangid = iLangid;
-  sParse.azCol = (const char **)azCol;
-  sParse.nCol = nCol;
-  sParse.iDefaultCol = iDefaultCol;
-  sParse.bFts4 = bFts4;
-  if( z==0 ){
-    *ppExpr = 0;
-    return SQLITE_OK;
+  static const int MAX_EXPR_DEPTH = 12;
+  int rc = fts3ExprParseUnbalanced(
+      pTokenizer, iLangid, azCol, bFts4, nCol, iDefaultCol, z, n, ppExpr
+  );
+  
+  /* Rebalance the expression. And check that its depth does not exceed
+  ** MAX_EXPR_DEPTH.  */
+  if( rc==SQLITE_OK && *ppExpr ){
+    rc = fts3ExprBalance(ppExpr, MAX_EXPR_DEPTH);
+    if( rc==SQLITE_OK ){
+      rc = fts3ExprCheckDepth(*ppExpr, MAX_EXPR_DEPTH);
+    }
   }
-  if( n<0 ){
-    n = (int)strlen(z);
-  }
-  rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed);
 
-  /* Check for mismatched parenthesis */
-  if( rc==SQLITE_OK && sParse.nNest ){
-    rc = SQLITE_ERROR;
+  if( rc!=SQLITE_OK ){
     sqlite3Fts3ExprFree(*ppExpr);
     *ppExpr = 0;
+    if( rc==SQLITE_TOOBIG ){
+      *pzErr = sqlite3_mprintf(
+          "FTS expression tree is too large (maximum depth %d)", MAX_EXPR_DEPTH
+      );
+      rc = SQLITE_ERROR;
+    }else if( rc==SQLITE_ERROR ){
+      *pzErr = sqlite3_mprintf("malformed MATCH expression: [%s]", z);
+    }
   }
 
   return rc;
 }
 
 /*
-** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse().
+** Free a single node of an expression tree.
 */
-SQLITE_PRIVATE void sqlite3Fts3ExprFree(Fts3Expr *p){
-  if( p ){
-    assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 );
-    sqlite3Fts3ExprFree(p->pLeft);
-    sqlite3Fts3ExprFree(p->pRight);
-    sqlite3Fts3EvalPhraseCleanup(p->pPhrase);
-    sqlite3_free(p->aMI);
-    sqlite3_free(p);
+static void fts3FreeExprNode(Fts3Expr *p){
+  assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 );
+  sqlite3Fts3EvalPhraseCleanup(p->pPhrase);
+  sqlite3_free(p->aMI);
+  sqlite3_free(p);
+}
+
+/*
+** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse().
+**
+** This function would be simpler if it recursively called itself. But
+** that would mean passing a sufficiently large expression to ExprParse()
+** could cause a stack overflow.
+*/
+SQLITE_PRIVATE void sqlite3Fts3ExprFree(Fts3Expr *pDel){
+  Fts3Expr *p;
+  assert( pDel==0 || pDel->pParent==0 );
+  for(p=pDel; p && (p->pLeft||p->pRight); p=(p->pLeft ? p->pLeft : p->pRight)){
+    assert( p->pParent==0 || p==p->pParent->pRight || p==p->pParent->pLeft );
+  }
+  while( p ){
+    Fts3Expr *pParent = p->pParent;
+    fts3FreeExprNode(p);
+    if( pParent && p==pParent->pLeft && pParent->pRight ){
+      p = pParent->pRight;
+      while( p && (p->pLeft || p->pRight) ){
+        assert( p==p->pParent->pRight || p==p->pParent->pLeft );
+        p = (p->pLeft ? p->pLeft : p->pRight);
+      }
+    }else{
+      p = pParent;
+    }
   }
 }
 
@@ -124395,6 +126255,9 @@ static int queryTestTokenizer(
 ** the returned expression text and then freed using sqlite3_free().
 */
 static char *exprToString(Fts3Expr *pExpr, char *zBuf){
+  if( pExpr==0 ){
+    return sqlite3_mprintf("");
+  }
   switch( pExpr->eType ){
     case FTSQUERY_PHRASE: {
       Fts3Phrase *pPhrase = pExpr->pPhrase;
@@ -124502,10 +126365,21 @@ static void fts3ExprTest(
     azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]);
   }
 
-  rc = sqlite3Fts3ExprParse(
-      pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr
-  );
+  if( sqlite3_user_data(context) ){
+    char *zDummy = 0;
+    rc = sqlite3Fts3ExprParse(
+        pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr, &zDummy
+    );
+    assert( rc==SQLITE_OK || pExpr==0 );
+    sqlite3_free(zDummy);
+  }else{
+    rc = fts3ExprParseUnbalanced(
+        pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr
+    );
+  }
+
   if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){
+    sqlite3Fts3ExprFree(pExpr);
     sqlite3_result_error(context, "Error parsing expression", -1);
   }else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){
     sqlite3_result_error_nomem(context);
@@ -124528,9 +126402,15 @@ exprtest_out:
 ** with database connection db. 
 */
 SQLITE_PRIVATE int sqlite3Fts3ExprInitTestInterface(sqlite3* db){
-  return sqlite3_create_function(
+  int rc = sqlite3_create_function(
       db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0
   );
+  if( rc==SQLITE_OK ){
+    rc = sqlite3_create_function(db, "fts3_exprtest_rebalance", 
+        -1, SQLITE_UTF8, (void *)1, fts3ExprTest, 0, 0
+    );
+  }
+  return rc;
 }
 
 #endif
@@ -126293,6 +128173,462 @@ SQLITE_PRIVATE void sqlite3Fts3SimpleTokenizerModule(
 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
 
 /************** End of fts3_tokenizer1.c *************************************/
+/************** Begin file fts3_tokenize_vtab.c ******************************/
+/*
+** 2013 Apr 22
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This file contains code for the "fts3tokenize" virtual table module.
+** An fts3tokenize virtual table is created as follows:
+**
+**   CREATE VIRTUAL TABLE <tbl> USING fts3tokenize(
+**       <tokenizer-name>, <arg-1>, ...
+**   );
+**
+** The table created has the following schema:
+**
+**   CREATE TABLE <tbl>(input, token, start, end, position)
+**
+** When queried, the query must include a WHERE clause of type:
+**
+**   input = <string>
+**
+** The virtual table module tokenizes this <string>, using the FTS3 
+** tokenizer specified by the arguments to the CREATE VIRTUAL TABLE 
+** statement and returns one row for each token in the result. With
+** fields set as follows:
+**
+**   input:   Always set to a copy of <string>
+**   token:   A token from the input.
+**   start:   Byte offset of the token within the input <string>.
+**   end:     Byte offset of the byte immediately following the end of the
+**            token within the input string.
+**   pos:     Token offset of token within input.
+**
+*/
+#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
+
+/* #include <string.h> */
+/* #include <assert.h> */
+
+typedef struct Fts3tokTable Fts3tokTable;
+typedef struct Fts3tokCursor Fts3tokCursor;
+
+/*
+** Virtual table structure.
+*/
+struct Fts3tokTable {
+  sqlite3_vtab base;              /* Base class used by SQLite core */
+  const sqlite3_tokenizer_module *pMod;
+  sqlite3_tokenizer *pTok;
+};
+
+/*
+** Virtual table cursor structure.
+*/
+struct Fts3tokCursor {
+  sqlite3_vtab_cursor base;       /* Base class used by SQLite core */
+  char *zInput;                   /* Input string */
+  sqlite3_tokenizer_cursor *pCsr; /* Cursor to iterate through zInput */
+  int iRowid;                     /* Current 'rowid' value */
+  const char *zToken;             /* Current 'token' value */
+  int nToken;                     /* Size of zToken in bytes */
+  int iStart;                     /* Current 'start' value */
+  int iEnd;                       /* Current 'end' value */
+  int iPos;                       /* Current 'pos' value */
+};
+
+/*
+** Query FTS for the tokenizer implementation named zName.
+*/
+static int fts3tokQueryTokenizer(
+  Fts3Hash *pHash,
+  const char *zName,
+  const sqlite3_tokenizer_module **pp,
+  char **pzErr
+){
+  sqlite3_tokenizer_module *p;
+  int nName = (int)strlen(zName);
+
+  p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1);
+  if( !p ){
+    *pzErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
+    return SQLITE_ERROR;
+  }
+
+  *pp = p;
+  return SQLITE_OK;
+}
+
+/*
+** The second argument, argv[], is an array of pointers to nul-terminated
+** strings. This function makes a copy of the array and strings into a 
+** single block of memory. It then dequotes any of the strings that appear
+** to be quoted.
+**
+** If successful, output parameter *pazDequote is set to point at the
+** array of dequoted strings and SQLITE_OK is returned. The caller is
+** responsible for eventually calling sqlite3_free() to free the array
+** in this case. Or, if an error occurs, an SQLite error code is returned.
+** The final value of *pazDequote is undefined in this case.
+*/
+static int fts3tokDequoteArray(
+  int argc,                       /* Number of elements in argv[] */
+  const char * const *argv,       /* Input array */
+  char ***pazDequote              /* Output array */
+){
+  int rc = SQLITE_OK;             /* Return code */
+  if( argc==0 ){
+    *pazDequote = 0;
+  }else{
+    int i;
+    int nByte = 0;
+    char **azDequote;
+
+    for(i=0; i<argc; i++){
+      nByte += (int)(strlen(argv[i]) + 1);
+    }
+
+    *pazDequote = azDequote = sqlite3_malloc(sizeof(char *)*argc + nByte);
+    if( azDequote==0 ){
+      rc = SQLITE_NOMEM;
+    }else{
+      char *pSpace = (char *)&azDequote[argc];
+      for(i=0; i<argc; i++){
+        int n = (int)strlen(argv[i]);
+        azDequote[i] = pSpace;
+        memcpy(pSpace, argv[i], n+1);
+        sqlite3Fts3Dequote(pSpace);
+        pSpace += (n+1);
+      }
+    }
+  }
+
+  return rc;
+}
+
+/*
+** Schema of the tokenizer table.
+*/
+#define FTS3_TOK_SCHEMA "CREATE TABLE x(input, token, start, end, position)"
+
+/*
+** This function does all the work for both the xConnect and xCreate methods.
+** These tables have no persistent representation of their own, so xConnect
+** and xCreate are identical operations.
+**
+**   argv[0]: module name
+**   argv[1]: database name 
+**   argv[2]: table name
+**   argv[3]: first argument (tokenizer name)
+*/
+static int fts3tokConnectMethod(
+  sqlite3 *db,                    /* Database connection */
+  void *pHash,                    /* Hash table of tokenizers */
+  int argc,                       /* Number of elements in argv array */
+  const char * const *argv,       /* xCreate/xConnect argument array */
+  sqlite3_vtab **ppVtab,          /* OUT: New sqlite3_vtab object */
+  char **pzErr                    /* OUT: sqlite3_malloc'd error message */
+){
+  Fts3tokTable *pTab;
+  const sqlite3_tokenizer_module *pMod = 0;
+  sqlite3_tokenizer *pTok = 0;
+  int rc;
+  char **azDequote = 0;
+  int nDequote;
+
+  rc = sqlite3_declare_vtab(db, FTS3_TOK_SCHEMA);
+  if( rc!=SQLITE_OK ) return rc;
+
+  nDequote = argc-3;
+  rc = fts3tokDequoteArray(nDequote, &argv[3], &azDequote);
+
+  if( rc==SQLITE_OK ){
+    const char *zModule;
+    if( nDequote<1 ){
+      zModule = "simple";
+    }else{
+      zModule = azDequote[0];
+    }
+    rc = fts3tokQueryTokenizer((Fts3Hash*)pHash, zModule, &pMod, pzErr);
+  }
+
+  assert( (rc==SQLITE_OK)==(pMod!=0) );
+  if( rc==SQLITE_OK ){
+    const char * const *azArg = (const char * const *)&azDequote[1];
+    rc = pMod->xCreate((nDequote>1 ? nDequote-1 : 0), azArg, &pTok);
+  }
+
+  if( rc==SQLITE_OK ){
+    pTab = (Fts3tokTable *)sqlite3_malloc(sizeof(Fts3tokTable));
+    if( pTab==0 ){
+      rc = SQLITE_NOMEM;
+    }
+  }
+
+  if( rc==SQLITE_OK ){
+    memset(pTab, 0, sizeof(Fts3tokTable));
+    pTab->pMod = pMod;
+    pTab->pTok = pTok;
+    *ppVtab = &pTab->base;
+  }else{
+    if( pTok ){
+      pMod->xDestroy(pTok);
+    }
+  }
+
+  sqlite3_free(azDequote);
+  return rc;
+}
+
+/*
+** This function does the work for both the xDisconnect and xDestroy methods.
+** These tables have no persistent representation of their own, so xDisconnect
+** and xDestroy are identical operations.
+*/
+static int fts3tokDisconnectMethod(sqlite3_vtab *pVtab){
+  Fts3tokTable *pTab = (Fts3tokTable *)pVtab;
+
+  pTab->pMod->xDestroy(pTab->pTok);
+  sqlite3_free(pTab);
+  return SQLITE_OK;
+}
+
+/*
+** xBestIndex - Analyze a WHERE and ORDER BY clause.
+*/
+static int fts3tokBestIndexMethod(
+  sqlite3_vtab *pVTab, 
+  sqlite3_index_info *pInfo
+){
+  int i;
+  UNUSED_PARAMETER(pVTab);
+
+  for(i=0; i<pInfo->nConstraint; i++){
+    if( pInfo->aConstraint[i].usable 
+     && pInfo->aConstraint[i].iColumn==0 
+     && pInfo->aConstraint[i].op==SQLITE_INDEX_CONSTRAINT_EQ 
+    ){
+      pInfo->idxNum = 1;
+      pInfo->aConstraintUsage[i].argvIndex = 1;
+      pInfo->aConstraintUsage[i].omit = 1;
+      pInfo->estimatedCost = 1;
+      return SQLITE_OK;
+    }
+  }
+
+  pInfo->idxNum = 0;
+  assert( pInfo->estimatedCost>1000000.0 );
+
+  return SQLITE_OK;
+}
+
+/*
+** xOpen - Open a cursor.
+*/
+static int fts3tokOpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){
+  Fts3tokCursor *pCsr;
+  UNUSED_PARAMETER(pVTab);
+
+  pCsr = (Fts3tokCursor *)sqlite3_malloc(sizeof(Fts3tokCursor));
+  if( pCsr==0 ){
+    return SQLITE_NOMEM;
+  }
+  memset(pCsr, 0, sizeof(Fts3tokCursor));
+
+  *ppCsr = (sqlite3_vtab_cursor *)pCsr;
+  return SQLITE_OK;
+}
+
+/*
+** Reset the tokenizer cursor passed as the only argument. As if it had
+** just been returned by fts3tokOpenMethod().
+*/
+static void fts3tokResetCursor(Fts3tokCursor *pCsr){
+  if( pCsr->pCsr ){
+    Fts3tokTable *pTab = (Fts3tokTable *)(pCsr->base.pVtab);
+    pTab->pMod->xClose(pCsr->pCsr);
+    pCsr->pCsr = 0;
+  }
+  sqlite3_free(pCsr->zInput);
+  pCsr->zInput = 0;
+  pCsr->zToken = 0;
+  pCsr->nToken = 0;
+  pCsr->iStart = 0;
+  pCsr->iEnd = 0;
+  pCsr->iPos = 0;
+  pCsr->iRowid = 0;
+}
+
+/*
+** xClose - Close a cursor.
+*/
+static int fts3tokCloseMethod(sqlite3_vtab_cursor *pCursor){
+  Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
+
+  fts3tokResetCursor(pCsr);
+  sqlite3_free(pCsr);
+  return SQLITE_OK;
+}
+
+/*
+** xNext - Advance the cursor to the next row, if any.
+*/
+static int fts3tokNextMethod(sqlite3_vtab_cursor *pCursor){
+  Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
+  Fts3tokTable *pTab = (Fts3tokTable *)(pCursor->pVtab);
+  int rc;                         /* Return code */
+
+  pCsr->iRowid++;
+  rc = pTab->pMod->xNext(pCsr->pCsr,
+      &pCsr->zToken, &pCsr->nToken,
+      &pCsr->iStart, &pCsr->iEnd, &pCsr->iPos
+  );
+
+  if( rc!=SQLITE_OK ){
+    fts3tokResetCursor(pCsr);
+    if( rc==SQLITE_DONE ) rc = SQLITE_OK;
+  }
+
+  return rc;
+}
+
+/*
+** xFilter - Initialize a cursor to point at the start of its data.
+*/
+static int fts3tokFilterMethod(
+  sqlite3_vtab_cursor *pCursor,   /* The cursor used for this query */
+  int idxNum,                     /* Strategy index */
+  const char *idxStr,             /* Unused */
+  int nVal,                       /* Number of elements in apVal */
+  sqlite3_value **apVal           /* Arguments for the indexing scheme */
+){
+  int rc = SQLITE_ERROR;
+  Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
+  Fts3tokTable *pTab = (Fts3tokTable *)(pCursor->pVtab);
+  UNUSED_PARAMETER(idxStr);
+  UNUSED_PARAMETER(nVal);
+
+  fts3tokResetCursor(pCsr);
+  if( idxNum==1 ){
+    const char *zByte = (const char *)sqlite3_value_text(apVal[0]);
+    int nByte = sqlite3_value_bytes(apVal[0]);
+    pCsr->zInput = sqlite3_malloc(nByte+1);
+    if( pCsr->zInput==0 ){
+      rc = SQLITE_NOMEM;
+    }else{
+      memcpy(pCsr->zInput, zByte, nByte);
+      pCsr->zInput[nByte] = 0;
+      rc = pTab->pMod->xOpen(pTab->pTok, pCsr->zInput, nByte, &pCsr->pCsr);
+      if( rc==SQLITE_OK ){
+        pCsr->pCsr->pTokenizer = pTab->pTok;
+      }
+    }
+  }
+
+  if( rc!=SQLITE_OK ) return rc;
+  return fts3tokNextMethod(pCursor);
+}
+
+/*
+** xEof - Return true if the cursor is at EOF, or false otherwise.
+*/
+static int fts3tokEofMethod(sqlite3_vtab_cursor *pCursor){
+  Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
+  return (pCsr->zToken==0);
+}
+
+/*
+** xColumn - Return a column value.
+*/
+static int fts3tokColumnMethod(
+  sqlite3_vtab_cursor *pCursor,   /* Cursor to retrieve value from */
+  sqlite3_context *pCtx,          /* Context for sqlite3_result_xxx() calls */
+  int iCol                        /* Index of column to read value from */
+){
+  Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
+
+  /* CREATE TABLE x(input, token, start, end, position) */
+  switch( iCol ){
+    case 0:
+      sqlite3_result_text(pCtx, pCsr->zInput, -1, SQLITE_TRANSIENT);
+      break;
+    case 1:
+      sqlite3_result_text(pCtx, pCsr->zToken, pCsr->nToken, SQLITE_TRANSIENT);
+      break;
+    case 2:
+      sqlite3_result_int(pCtx, pCsr->iStart);
+      break;
+    case 3:
+      sqlite3_result_int(pCtx, pCsr->iEnd);
+      break;
+    default:
+      assert( iCol==4 );
+      sqlite3_result_int(pCtx, pCsr->iPos);
+      break;
+  }
+  return SQLITE_OK;
+}
+
+/*
+** xRowid - Return the current rowid for the cursor.
+*/
+static int fts3tokRowidMethod(
+  sqlite3_vtab_cursor *pCursor,   /* Cursor to retrieve value from */
+  sqlite_int64 *pRowid            /* OUT: Rowid value */
+){
+  Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
+  *pRowid = (sqlite3_int64)pCsr->iRowid;
+  return SQLITE_OK;
+}
+
+/*
+** Register the fts3tok module with database connection db. Return SQLITE_OK
+** if successful or an error code if sqlite3_create_module() fails.
+*/
+SQLITE_PRIVATE int sqlite3Fts3InitTok(sqlite3 *db, Fts3Hash *pHash){
+  static const sqlite3_module fts3tok_module = {
+     0,                           /* iVersion      */
+     fts3tokConnectMethod,        /* xCreate       */
+     fts3tokConnectMethod,        /* xConnect      */
+     fts3tokBestIndexMethod,      /* xBestIndex    */
+     fts3tokDisconnectMethod,     /* xDisconnect   */
+     fts3tokDisconnectMethod,     /* xDestroy      */
+     fts3tokOpenMethod,           /* xOpen         */
+     fts3tokCloseMethod,          /* xClose        */
+     fts3tokFilterMethod,         /* xFilter       */
+     fts3tokNextMethod,           /* xNext         */
+     fts3tokEofMethod,            /* xEof          */
+     fts3tokColumnMethod,         /* xColumn       */
+     fts3tokRowidMethod,          /* xRowid        */
+     0,                           /* xUpdate       */
+     0,                           /* xBegin        */
+     0,                           /* xSync         */
+     0,                           /* xCommit       */
+     0,                           /* xRollback     */
+     0,                           /* xFindFunction */
+     0,                           /* xRename       */
+     0,                           /* xSavepoint    */
+     0,                           /* xRelease      */
+     0                            /* xRollbackTo   */
+  };
+  int rc;                         /* Return code */
+
+  rc = sqlite3_create_module(db, "fts3tokenize", &fts3tok_module, (void*)pHash);
+  return rc;
+}
+
+#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
+
+/************** End of fts3_tokenize_vtab.c **********************************/
 /************** Begin file fts3_write.c **************************************/
 /*
 ** 2009 Oct 23
diff --git a/src/3rdparty/sqlite3.h b/src/3rdparty/sqlite3.h
index 69b4586a3f..e398838287 100644
--- a/src/3rdparty/sqlite3.h
+++ b/src/3rdparty/sqlite3.h
@@ -107,9 +107,9 @@ extern "C" {
 ** [sqlite3_libversion_number()], [sqlite3_sourceid()],
 ** [sqlite_version()] and [sqlite_source_id()].
 */
-#define SQLITE_VERSION        "3.7.16.2"
-#define SQLITE_VERSION_NUMBER 3007016
-#define SQLITE_SOURCE_ID      "2013-04-12 11:52:43 cbea02d93865ce0e06789db95fd9168ebac970c7"
+#define SQLITE_VERSION        "3.7.17"
+#define SQLITE_VERSION_NUMBER 3007017
+#define SQLITE_SOURCE_ID      "2013-05-20 00:56:22 118a3b35693b134d56ebd780123b7fd6f1497668"
 
 /*
 ** CAPI3REF: Run-Time Library Version Numbers
@@ -425,6 +425,8 @@ SQLITE_API int sqlite3_exec(
 #define SQLITE_FORMAT      24   /* Auxiliary database format error */
 #define SQLITE_RANGE       25   /* 2nd parameter to sqlite3_bind out of range */
 #define SQLITE_NOTADB      26   /* File opened that is not a database file */
+#define SQLITE_NOTICE      27   /* Notifications from sqlite3_log() */
+#define SQLITE_WARNING     28   /* Warnings from sqlite3_log() */
 #define SQLITE_ROW         100  /* sqlite3_step() has another row ready */
 #define SQLITE_DONE        101  /* sqlite3_step() has finished executing */
 /* end-of-error-codes */
@@ -475,6 +477,7 @@ SQLITE_API int sqlite3_exec(
 #define SQLITE_IOERR_SHMMAP            (SQLITE_IOERR | (21<<8))
 #define SQLITE_IOERR_SEEK              (SQLITE_IOERR | (22<<8))
 #define SQLITE_IOERR_DELETE_NOENT      (SQLITE_IOERR | (23<<8))
+#define SQLITE_IOERR_MMAP              (SQLITE_IOERR | (24<<8))
 #define SQLITE_LOCKED_SHAREDCACHE      (SQLITE_LOCKED |  (1<<8))
 #define SQLITE_BUSY_RECOVERY           (SQLITE_BUSY   |  (1<<8))
 #define SQLITE_CANTOPEN_NOTEMPDIR      (SQLITE_CANTOPEN | (1<<8))
@@ -494,6 +497,8 @@ SQLITE_API int sqlite3_exec(
 #define SQLITE_CONSTRAINT_TRIGGER      (SQLITE_CONSTRAINT | (7<<8))
 #define SQLITE_CONSTRAINT_UNIQUE       (SQLITE_CONSTRAINT | (8<<8))
 #define SQLITE_CONSTRAINT_VTAB         (SQLITE_CONSTRAINT | (9<<8))
+#define SQLITE_NOTICE_RECOVER_WAL      (SQLITE_NOTICE | (1<<8))
+#define SQLITE_NOTICE_RECOVER_ROLLBACK (SQLITE_NOTICE | (2<<8))
 
 /*
 ** CAPI3REF: Flags For File Open Operations
@@ -733,6 +738,9 @@ struct sqlite3_io_methods {
   void (*xShmBarrier)(sqlite3_file*);
   int (*xShmUnmap)(sqlite3_file*, int deleteFlag);
   /* Methods above are valid for version 2 */
+  int (*xFetch)(sqlite3_file*, sqlite3_int64 iOfst, int iAmt, void **pp);
+  int (*xUnfetch)(sqlite3_file*, sqlite3_int64 iOfst, void *p);
+  /* Methods above are valid for version 3 */
   /* Additional methods may be added in future releases */
 };
 
@@ -869,7 +877,8 @@ struct sqlite3_io_methods {
 ** it is able to override built-in [PRAGMA] statements.
 **
 ** <li>[[SQLITE_FCNTL_BUSYHANDLER]]
-** ^This file-control may be invoked by SQLite on the database file handle
+** ^The [SQLITE_FCNTL_BUSYHANDLER]
+** file-control may be invoked by SQLite on the database file handle
 ** shortly after it is opened in order to provide a custom VFS with access
 ** to the connections busy-handler callback. The argument is of type (void **)
 ** - an array of two (void *) values. The first (void *) actually points
@@ -880,13 +889,24 @@ struct sqlite3_io_methods {
 ** current operation.
 **
 ** <li>[[SQLITE_FCNTL_TEMPFILENAME]]
-** ^Application can invoke this file-control to have SQLite generate a
+** ^Application can invoke the [SQLITE_FCNTL_TEMPFILENAME] file-control
+** to have SQLite generate a
 ** temporary filename using the same algorithm that is followed to generate
 ** temporary filenames for TEMP tables and other internal uses.  The
 ** argument should be a char** which will be filled with the filename
 ** written into memory obtained from [sqlite3_malloc()].  The caller should
 ** invoke [sqlite3_free()] on the result to avoid a memory leak.
 **
+** <li>[[SQLITE_FCNTL_MMAP_SIZE]]
+** The [SQLITE_FCNTL_MMAP_SIZE] file control is used to query or set the
+** maximum number of bytes that will be used for memory-mapped I/O.
+** The argument is a pointer to a value of type sqlite3_int64 that
+** is an advisory maximum number of bytes in the file to memory map.  The
+** pointer is overwritten with the old value.  The limit is not changed if
+** the value originally pointed to is negative, and so the current limit 
+** can be queried by passing in a pointer to a negative number.  This
+** file-control is used internally to implement [PRAGMA mmap_size].
+**
 ** </ul>
 */
 #define SQLITE_FCNTL_LOCKSTATE               1
@@ -905,6 +925,7 @@ struct sqlite3_io_methods {
 #define SQLITE_FCNTL_PRAGMA                 14
 #define SQLITE_FCNTL_BUSYHANDLER            15
 #define SQLITE_FCNTL_TEMPFILENAME           16
+#define SQLITE_FCNTL_MMAP_SIZE              18
 
 /*
 ** CAPI3REF: Mutex Handle
@@ -1571,7 +1592,9 @@ struct sqlite3_mem_methods {
 ** page cache implementation into that object.)^ </dd>
 **
 ** [[SQLITE_CONFIG_LOG]] <dt>SQLITE_CONFIG_LOG</dt>
-** <dd> ^The SQLITE_CONFIG_LOG option takes two arguments: a pointer to a
+** <dd> The SQLITE_CONFIG_LOG option is used to configure the SQLite
+** global [error log].
+** (^The SQLITE_CONFIG_LOG option takes two arguments: a pointer to a
 ** function with a call signature of void(*)(void*,int,const char*), 
 ** and a pointer to void. ^If the function pointer is not NULL, it is
 ** invoked by [sqlite3_log()] to process each logging event.  ^If the
@@ -1617,12 +1640,12 @@ struct sqlite3_mem_methods {
 ** <dt>SQLITE_CONFIG_PCACHE and SQLITE_CONFIG_GETPCACHE
 ** <dd> These options are obsolete and should not be used by new code.
 ** They are retained for backwards compatibility but are now no-ops.
-** </dl>
+** </dd>
 **
 ** [[SQLITE_CONFIG_SQLLOG]]
 ** <dt>SQLITE_CONFIG_SQLLOG
 ** <dd>This option is only available if sqlite is compiled with the
-** SQLITE_ENABLE_SQLLOG pre-processor macro defined. The first argument should
+** [SQLITE_ENABLE_SQLLOG] pre-processor macro defined. The first argument should
 ** be a pointer to a function of type void(*)(void*,sqlite3*,const char*, int).
 ** The second should be of type (void*). The callback is invoked by the library
 ** in three separate circumstances, identified by the value passed as the
@@ -1632,7 +1655,23 @@ struct sqlite3_mem_methods {
 ** fourth parameter is 1, then the SQL statement that the third parameter
 ** points to has just been executed. Or, if the fourth parameter is 2, then
 ** the connection being passed as the second parameter is being closed. The
-** third parameter is passed NULL In this case.
+** third parameter is passed NULL In this case.  An example of using this
+** configuration option can be seen in the "test_sqllog.c" source file in
+** the canonical SQLite source tree.</dd>
+**
+** [[SQLITE_CONFIG_MMAP_SIZE]]
+** <dt>SQLITE_CONFIG_MMAP_SIZE
+** <dd>SQLITE_CONFIG_MMAP_SIZE takes two 64-bit integer (sqlite3_int64) values
+** that are the default mmap size limit (the default setting for
+** [PRAGMA mmap_size]) and the maximum allowed mmap size limit.
+** The default setting can be overridden by each database connection using
+** either the [PRAGMA mmap_size] command, or by using the
+** [SQLITE_FCNTL_MMAP_SIZE] file control.  The maximum allowed mmap size
+** cannot be changed at run-time.  Nor may the maximum allowed mmap size
+** exceed the compile-time maximum mmap size set by the
+** [SQLITE_MAX_MMAP_SIZE] compile-time option.  
+** If either argument to this option is negative, then that argument is
+** changed to its compile-time default.
 ** </dl>
 */
 #define SQLITE_CONFIG_SINGLETHREAD  1  /* nil */
@@ -1656,6 +1695,7 @@ struct sqlite3_mem_methods {
 #define SQLITE_CONFIG_GETPCACHE2   19  /* sqlite3_pcache_methods2* */
 #define SQLITE_CONFIG_COVERING_INDEX_SCAN 20  /* int */
 #define SQLITE_CONFIG_SQLLOG       21  /* xSqllog, void* */
+#define SQLITE_CONFIG_MMAP_SIZE    22  /* sqlite3_int64, sqlite3_int64 */
 
 /*
 ** CAPI3REF: Database Connection Configuration Options
@@ -2489,6 +2529,9 @@ SQLITE_API int sqlite3_set_authorizer(
 ** as each triggered subprogram is entered.  The callbacks for triggers
 ** contain a UTF-8 SQL comment that identifies the trigger.)^
 **
+** The [SQLITE_TRACE_SIZE_LIMIT] compile-time option can be used to limit
+** the length of [bound parameter] expansion in the output of sqlite3_trace().
+**
 ** ^The callback function registered by sqlite3_profile() is invoked
 ** as each SQL statement finishes.  ^The profile callback contains
 ** the original statement text and an estimate of wall-clock time
@@ -3027,7 +3070,8 @@ SQLITE_API int sqlite3_limit(sqlite3*, int id, int newVal);
 ** <li>
 ** ^If the database schema changes, instead of returning [SQLITE_SCHEMA] as it
 ** always used to do, [sqlite3_step()] will automatically recompile the SQL
-** statement and try to run it again.
+** statement and try to run it again. As many as [SQLITE_MAX_SCHEMA_RETRY]
+** retries will occur before sqlite3_step() gives up and returns an error.
 ** </li>
 **
 ** <li>
@@ -3231,6 +3275,9 @@ typedef struct sqlite3_context sqlite3_context;
 ** parameter [SQLITE_LIMIT_VARIABLE_NUMBER] (default value: 999).
 **
 ** ^The third argument is the value to bind to the parameter.
+** ^If the third parameter to sqlite3_bind_text() or sqlite3_bind_text16()
+** or sqlite3_bind_blob() is a NULL pointer then the fourth parameter
+** is ignored and the end result is the same as sqlite3_bind_null().
 **
 ** ^(In those routines that have a fourth argument, its value is the
 ** number of bytes in the parameter.  To be clear: the value is the
@@ -4187,7 +4234,7 @@ SQLITE_API void sqlite3_set_auxdata(sqlite3_context*, int N, void*, void (*)(voi
 ** the content before returning.
 **
 ** The typedef is necessary to work around problems in certain
-** C++ compilers.  See ticket #2191.
+** C++ compilers.
 */
 typedef void (*sqlite3_destructor_type)(void*);
 #define SQLITE_STATIC      ((sqlite3_destructor_type)0)
@@ -4986,11 +5033,20 @@ SQLITE_API int sqlite3_table_column_metadata(
 ** ^This interface loads an SQLite extension library from the named file.
 **
 ** ^The sqlite3_load_extension() interface attempts to load an
-** SQLite extension library contained in the file zFile.
+** [SQLite extension] library contained in the file zFile.  If
+** the file cannot be loaded directly, attempts are made to load
+** with various operating-system specific extensions added.
+** So for example, if "samplelib" cannot be loaded, then names like
+** "samplelib.so" or "samplelib.dylib" or "samplelib.dll" might
+** be tried also.
 **
 ** ^The entry point is zProc.
-** ^zProc may be 0, in which case the name of the entry point
-** defaults to "sqlite3_extension_init".
+** ^(zProc may be 0, in which case SQLite will try to come up with an
+** entry point name on its own.  It first tries "sqlite3_extension_init".
+** If that does not work, it constructs a name "sqlite3_X_init" where the
+** X is consists of the lower-case equivalent of all ASCII alphabetic
+** characters in the filename from the last "/" to the first following
+** "." and omitting any initial "lib".)^
 ** ^The sqlite3_load_extension() interface returns
 ** [SQLITE_OK] on success and [SQLITE_ERROR] if something goes wrong.
 ** ^If an error occurs and pzErrMsg is not 0, then the
@@ -5016,11 +5072,11 @@ SQLITE_API int sqlite3_load_extension(
 ** CAPI3REF: Enable Or Disable Extension Loading
 **
 ** ^So as not to open security holes in older applications that are
-** unprepared to deal with extension loading, and as a means of disabling
-** extension loading while evaluating user-entered SQL, the following API
+** unprepared to deal with [extension loading], and as a means of disabling
+** [extension loading] while evaluating user-entered SQL, the following API
 ** is provided to turn the [sqlite3_load_extension()] mechanism on and off.
 **
-** ^Extension loading is off by default. See ticket #1863.
+** ^Extension loading is off by default.
 ** ^Call the sqlite3_enable_load_extension() routine with onoff==1
 ** to turn extension loading on and call it with onoff==0 to turn
 ** it back off again.
@@ -5032,7 +5088,7 @@ SQLITE_API int sqlite3_enable_load_extension(sqlite3 *db, int onoff);
 **
 ** ^This interface causes the xEntryPoint() function to be invoked for
 ** each new [database connection] that is created.  The idea here is that
-** xEntryPoint() is the entry point for a statically linked SQLite extension
+** xEntryPoint() is the entry point for a statically linked [SQLite extension]
 ** that is to be automatically loaded into all new database connections.
 **
 ** ^(Even though the function prototype shows that xEntryPoint() takes
@@ -6812,10 +6868,25 @@ SQLITE_API int sqlite3_unlock_notify(
 SQLITE_API int sqlite3_stricmp(const char *, const char *);
 SQLITE_API int sqlite3_strnicmp(const char *, const char *, int);
 
+/*
+** CAPI3REF: String Globbing
+*
+** ^The [sqlite3_strglob(P,X)] interface returns zero if string X matches
+** the glob pattern P, and it returns non-zero if string X does not match
+** the glob pattern P.  ^The definition of glob pattern matching used in
+** [sqlite3_strglob(P,X)] is the same as for the "X GLOB P" operator in the
+** SQL dialect used by SQLite.  ^The sqlite3_strglob(P,X) function is case
+** sensitive.
+**
+** Note that this routine returns zero on a match and non-zero if the strings
+** do not match, the same as [sqlite3_stricmp()] and [sqlite3_strnicmp()].
+*/
+SQLITE_API int sqlite3_strglob(const char *zGlob, const char *zStr);
+
 /*
 ** CAPI3REF: Error Logging Interface
 **
-** ^The [sqlite3_log()] interface writes a message into the error log
+** ^The [sqlite3_log()] interface writes a message into the [error log]
 ** established by the [SQLITE_CONFIG_LOG] option to [sqlite3_config()].
 ** ^If logging is enabled, the zFormat string and subsequent arguments are
 ** used with [sqlite3_snprintf()] to generate the final output string.

From df2841458d43b71d67082cbb765411d06f8ad81a Mon Sep 17 00:00:00 2001
From: Seth Hall <seth@icir.org>
Date: Fri, 5 Jul 2013 02:00:14 -0400
Subject: [PATCH 042/118] Large overhaul in name and appearance for file
 analysis.

---
 scripts/base/files/extract/__load__.bro       |   1 +
 scripts/base/files/extract/main.bro           |  38 +++
 scripts/base/files/hash/main.bro              |  16 +-
 .../{file-analysis => files}/__load__.bro     |   0
 .../{file-analysis => files}/main.bro         | 231 +++++++++++-------
 scripts/base/init-bare.bro                    |   4 +-
 scripts/base/init-default.bro                 |   1 +
 scripts/base/protocols/ftp/__load__.bro       |   1 -
 scripts/base/protocols/ftp/file-analysis.bro  |  37 +--
 scripts/base/protocols/ftp/file-extract.bro   |  90 -------
 scripts/base/protocols/http/__load__.bro      |   6 +-
 scripts/base/protocols/http/file-analysis.bro |  69 +++---
 scripts/base/protocols/http/file-extract.bro  | 100 --------
 scripts/base/protocols/http/file-hash.bro     |  68 ------
 scripts/base/protocols/http/file-ident.bro    | 105 --------
 scripts/base/protocols/http/main.bro          |  18 +-
 scripts/base/protocols/irc/__load__.bro       |   2 +-
 scripts/base/protocols/irc/dcc-send.bro       | 108 +-------
 scripts/base/protocols/irc/file-analysis.bro  |  18 +-
 scripts/base/protocols/smtp/__load__.bro      |   2 +-
 scripts/base/protocols/smtp/entities.bro      | 212 ++--------------
 scripts/base/protocols/smtp/file-analysis.bro |  15 +-
 .../policy/frameworks/files/detect-MHR.bro    |  63 +++++
 .../frameworks/files/hash-all-files.bro       |   7 +
 .../frameworks/intel/smtp-url-extraction.bro  |   2 +-
 .../protocols/smtp/entities-excerpt.bro       |   0
 scripts/site/local.bro                        |  11 +-
 src/const.bif                                 |   2 +-
 src/event.bif                                 |  10 +-
 src/file_analysis.bif                         |  22 +-
 src/file_analysis/Analyzer.h                  |   4 +-
 src/file_analysis/AnalyzerSet.cc              |   2 +-
 src/file_analysis/DataEvent.cc                |   2 +-
 src/file_analysis/Extract.cc                  |   2 +-
 src/file_analysis/Manager.cc                  |   4 +-
 .../file-analysis/bifs/remove_action.bro      |   4 +-
 .../bifs/set_timeout_interval.bro             |   2 +-
 .../frameworks/file-analysis/bifs/stop.bro    |   2 +-
 testing/scripts/file-analysis-test.bro        |  14 +-
 39 files changed, 420 insertions(+), 875 deletions(-)
 create mode 100644 scripts/base/files/extract/__load__.bro
 create mode 100644 scripts/base/files/extract/main.bro
 rename scripts/base/frameworks/{file-analysis => files}/__load__.bro (100%)
 rename scripts/base/frameworks/{file-analysis => files}/main.bro (52%)
 delete mode 100644 scripts/base/protocols/ftp/file-extract.bro
 delete mode 100644 scripts/base/protocols/http/file-extract.bro
 delete mode 100644 scripts/base/protocols/http/file-hash.bro
 delete mode 100644 scripts/base/protocols/http/file-ident.bro
 create mode 100644 scripts/policy/frameworks/files/detect-MHR.bro
 create mode 100644 scripts/policy/frameworks/files/hash-all-files.bro
 rename scripts/{base => policy}/protocols/smtp/entities-excerpt.bro (100%)

diff --git a/scripts/base/files/extract/__load__.bro b/scripts/base/files/extract/__load__.bro
new file mode 100644
index 0000000000..d551be57d3
--- /dev/null
+++ b/scripts/base/files/extract/__load__.bro
@@ -0,0 +1 @@
+@load ./main
\ No newline at end of file
diff --git a/scripts/base/files/extract/main.bro b/scripts/base/files/extract/main.bro
new file mode 100644
index 0000000000..70e61c8529
--- /dev/null
+++ b/scripts/base/files/extract/main.bro
@@ -0,0 +1,38 @@
+@load base/frameworks/files
+@load base/utils/paths
+
+module FileExtract;
+
+export {
+	## The prefix where files are extracted to.
+	const prefix = "./extract_files/" &redef;
+
+	redef record Files::Info += {
+		## Local filenames of extracted file.
+		extracted: string &optional &log;
+	};
+
+	redef record Files::AnalyzerArgs += {
+		## The local filename to which to write an extracted file.
+		## This field is used in the core by the extraction plugin
+		## to know where to write the file to.  It's also optional
+		extract_filename: string &optional;
+	};
+}
+
+function on_add(f: fa_file, args: Files::AnalyzerArgs)
+	{
+	if ( ! args?$extract_filename )
+		args$extract_filename = cat("extract-", f$source, "-", f$id);
+
+	f$info$extracted = args$extract_filename;
+	args$extract_filename = build_path_compressed(prefix, args$extract_filename);
+	}
+
+event bro_init() &priority=10
+	{
+	Files::register_analyzer_add_callback(Files::ANALYZER_EXTRACT, on_add);
+
+	# Create the extraction directory.
+	mkdir(prefix);
+	}
\ No newline at end of file
diff --git a/scripts/base/files/hash/main.bro b/scripts/base/files/hash/main.bro
index cd50d6b291..926e39865a 100644
--- a/scripts/base/files/hash/main.bro
+++ b/scripts/base/files/hash/main.bro
@@ -1,13 +1,23 @@
+@load base/frameworks/files
 
-module FilesHash;
+module FileHash;
 
 export {
-	
+	redef record Files::Info += {
+		## An MD5 digest of the file contents.
+		md5: string &log &optional;
+
+		## A SHA1 digest of the file contents.
+		sha1: string &log &optional;
+
+		## A SHA256 digest of the file contents.
+		sha256: string &log &optional;
+	};
+
 }
 
 event file_hash(f: fa_file, kind: string, hash: string) &priority=5
 	{
-	set_info(f);
 	switch ( kind ) {
 	case "md5":
 		f$info$md5 = hash;
diff --git a/scripts/base/frameworks/file-analysis/__load__.bro b/scripts/base/frameworks/files/__load__.bro
similarity index 100%
rename from scripts/base/frameworks/file-analysis/__load__.bro
rename to scripts/base/frameworks/files/__load__.bro
diff --git a/scripts/base/frameworks/file-analysis/main.bro b/scripts/base/frameworks/files/main.bro
similarity index 52%
rename from scripts/base/frameworks/file-analysis/main.bro
rename to scripts/base/frameworks/files/main.bro
index 7b1bd7d81c..1c0481a87c 100644
--- a/scripts/base/frameworks/file-analysis/main.bro
+++ b/scripts/base/frameworks/files/main.bro
@@ -3,8 +3,9 @@
 
 @load base/file_analysis.bif
 @load base/frameworks/logging
+@load base/utils/site
 
-module FileAnalysis;
+module Files;
 
 export {
 	redef enum Log::ID += {
@@ -14,21 +15,14 @@ export {
 
 	## A structure which represents a desired type of file analysis.
 	type AnalyzerArgs: record {
-		## The type of analysis.
-		tag: Analyzer;
-
-		## The local filename to which to write an extracted file.  Must be
-		## set when *tag* is :bro:see:`FileAnalysis::ANALYZER_EXTRACT`.
-		extract_filename: string &optional;
-
 		## An event which will be generated for all new file contents,
 		## chunk-wise.  Used when *tag* is
-		## :bro:see:`FileAnalysis::ANALYZER_DATA_EVENT`.
+		## :bro:see:`Files::ANALYZER_DATA_EVENT`.
 		chunk_event: event(f: fa_file, data: string, off: count) &optional;
 
 		## An event which will be generated for all new file contents,
 		## stream-wise.  Used when *tag* is
-		## :bro:see:`FileAnalysis::ANALYZER_DATA_EVENT`.
+		## :bro:see:`Files::ANALYZER_DATA_EVENT`.
 		stream_event: event(f: fa_file, data: string) &optional;
 	} &redef;
 
@@ -40,23 +34,52 @@ export {
 		ts: time &log;
 
 		## An identifier associated with a single file.
-		id: string &log;
+		fuid: string &log;
 
-		## Identifier associated with a container file from which this one was
-		## extracted as part of the file analysis.
-		parent_id: string &log &optional;
+		## If this file was transferred over a network
+		## connection this should show the host or hosts that
+		## the data sourced from.
+		tx_hosts: set[addr] &log;
+
+		## If this file was transferred over a network
+		## connection this should show the host or hosts that
+		## the data traveled to.
+		rx_hosts: set[addr] &log;
+
+		## Connection UIDS over which the file was transferred.
+		conn_uids: set[string] &log;
 
 		## An identification of the source of the file data.  E.g. it may be
 		## a network protocol over which it was transferred, or a local file
 		## path which was read, or some other input source.
 		source: string &log &optional;
 
-		## If the source of this file is is a network connection, this field
-		## may be set to indicate the directionality.
-		is_orig: bool &log &optional;
+		## A value to represent the depth of this file in relation 
+		## to its source.  In SMTP, it is the depth of the MIME
+		## attachment on the message.  In HTTP, it is the depth of the
+		## request within the TCP connection.
+		depth: count &default=0 &log;
 
-		## The time at which the last activity for the file was seen.
-		last_active: time &log;
+		## A set of analysis types done during the file analysis.
+		analyzers: set[Analyzer] &log;
+
+		## A mime type provided by libmagic against the *bof_buffer*, or
+		## in the cases where no buffering of the beginning of file occurs,
+		## an initial guess of the mime type based on the first data seen.
+		mime_type: string &log &optional;
+
+		## A filename for the file if one is available from the source
+		## for the file.  These will frequently come from 
+		## "Content-Disposition" headers in network protocols.
+		filename: string &log &optional;
+
+		## The duration the file was analyzed for.
+		duration: interval &log &default=0secs;
+
+		## If the source of this file is is a network connection, this field
+		## indicates if the data originated from the local network or not as
+		## determined by the configured bro:see:`Site::local_nets`.
+		local_orig: bool &log &optional;
 
 		## Number of bytes provided to the file analysis engine for the file.
 		seen_bytes: count &log &default=0;
@@ -72,49 +95,18 @@ export {
 		## were delivered to file analyzers due to reassembly buffer overflow.
 		overflow_bytes: count &log &default=0;
 
-		## The amount of time between receiving new data for this file that
-		## the analysis engine will wait before giving up on it.
-		timeout_interval: interval &log &optional;
-
-		## The number of bytes at the beginning of a file to save for later
-		## inspection in *bof_buffer* field.
-		bof_buffer_size: count &log &optional;
-
-		## A mime type provided by libmagic against the *bof_buffer*, or
-		## in the cases where no buffering of the beginning of file occurs,
-		## an initial guess of the mime type based on the first data seen.
-		mime_type: string &log &optional;
-
 		## Whether the file analysis timed out at least once for the file.
 		timedout: bool &log &default=F;
 
-		## Connection UIDS over which the file was transferred.
-		conn_uids: set[string] &log;
-
-		## A set of analysis types done during the file analysis.
-		analyzers: set[Analyzer];
-
-		## Local filenames of extracted files.
-		extracted_files: set[string] &log;
-
-		## An MD5 digest of the file contents.
-		md5: string &log &optional;
-
-		## A SHA1 digest of the file contents.
-		sha1: string &log &optional;
-
-		## A SHA256 digest of the file contents.
-		sha256: string &log &optional;
+		## Identifier associated with a container file from which this one was
+		## extracted as part of the file analysis.
+		parent_fuid: string &log &optional;
 	} &redef;
 
 	## A table that can be used to disable file analysis completely for
 	## any files transferred over given network protocol analyzers.
 	const disable: table[AnalyzerTag] of bool = table() &redef;
 
-	## Event that can be handled to access the Info record as it is sent on
-	## to the logging framework.
-	global log_file_analysis: event(rec: Info);
-
 	## The salt concatenated to unique file handle strings generated by
 	## :bro:see:`get_file_handle` before hashing them in to a file id
 	## (the *id* field of :bro:see:`fa_file`).
@@ -146,7 +138,9 @@ export {
 	## Returns: true if the analyzer will be added, or false if analysis
 	##          for the *id* isn't currently active or the *args*
 	##          were invalid for the analyzer type.
-	global add_analyzer: function(f: fa_file, args: AnalyzerArgs): bool;
+	global add_analyzer: function(f: fa_file, 
+	                              tag: Files::Analyzer, 
+	                              args: AnalyzerArgs &default=AnalyzerArgs()): bool;
 
 	## Removes an analyzer from the analysis of a given file.
 	##
@@ -156,7 +150,7 @@ export {
 	##
 	## Returns: true if the analyzer will be removed, or false if analysis
 	##          for the *id* isn't currently active.
-	global remove_analyzer: function(f: fa_file, args: AnalyzerArgs): bool;
+	global remove_analyzer: function(f: fa_file, tag: Files::Analyzer, args: AnalyzerArgs): bool;
 
 	## Stops/ignores any further analysis of a given file.
 	##
@@ -166,45 +160,75 @@ export {
 	##          rest of it's contents, or false if analysis for the *id*
 	##          isn't currently active.
 	global stop: function(f: fa_file): bool;
+
+	## Register callbacks for protocols that work with the Files framework.  
+	## The callbacks must uniquely identify a file and each protocol can 
+	## only have a single callback registered for it.
+	## 
+	## tag: Tag for the protocol analyzer having a callback being registered.
+	##
+	## callback: Function that can generate a file handle for the protocol analyzer
+	##           defined previously.
+	##
+	## Returns: true if the protocol being registered was not previously registered.
+	global register_protocol: function(tag: AnalyzerTag, callback: function(c: connection, is_orig: bool): string): bool;
+
+	## Register a callback for file analyzers to use if they need to do some manipulation
+	## when they are being added to a file before the core code takes over.  This is 
+	## unlikely to be interesting for users and should only be called by file analyzer
+	## authors but it *not required*.
+	## 
+	## tag: Tag for the file analyzer.
+	##
+	## callback: Function to execute when the given file analyzer is being added.
+	global register_analyzer_add_callback: function(tag: Files::Analyzer, callback: function(f: fa_file, args: AnalyzerArgs));
+
+	## Event that can be handled to access the Info record as it is sent on
+	## to the logging framework.
+	global log_files: event(rec: Info);
 }
 
 redef record fa_file += {
 	info: Info &optional;
 };
 
+redef record AnalyzerArgs += {
+	# This is used interally for the core file analyzer api.
+	tag: Files::Analyzer &optional;
+};
+
+# Store the callbacks for protocol analyzers that have files.
+global registered_protocols: table[AnalyzerTag] of function(c: connection, is_orig: bool): string = table()
+		&default=function(c: connection, is_orig: bool): string { return cat(c$uid, is_orig); };
+
+global analyzer_add_callbacks: table[Files::Analyzer] of function(f: fa_file, args: AnalyzerArgs) = table();
+
+event bro_init() &priority=5
+	{
+	Log::create_stream(Files::LOG, [$columns=Info, $ev=log_files]);
+	}
+
 function set_info(f: fa_file)
 	{
 	if ( ! f?$info )
 		{
-		local tmp: Info = Info($ts=network_time());
+		local tmp: Info = Info($ts=f$last_active,
+		                       $fuid=f$id);
 		f$info = tmp;
 		}
 
-	f$info$ts = network_time();
-	f$info$id = f$id;
 	if ( f?$parent_id )
-		f$info$parent_id = f$parent_id;
+		f$info$parent_fuid = f$parent_id;
 	if ( f?$source )
 		f$info$source = f$source;
-	if ( f?$is_orig )
-		f$info$is_orig = f$is_orig;
-	f$info$last_active = f$last_active;
+	f$info$duration = f$last_active - f$info$ts;
 	f$info$seen_bytes = f$seen_bytes;
 	if ( f?$total_bytes ) 
 		f$info$total_bytes = f$total_bytes;
 	f$info$missing_bytes = f$missing_bytes;
 	f$info$overflow_bytes = f$overflow_bytes;
-	f$info$timeout_interval = f$timeout_interval;
-	f$info$bof_buffer_size = f$bof_buffer_size;
 	if ( f?$mime_type ) 
 		f$info$mime_type = f$mime_type;
-	if ( f?$conns )
-		{
-		for ( cid in f$conns )
-			{
-			add f$info$conn_uids[f$conns[cid]$uid];
-			}
-		}
 	}
 
 function set_timeout_interval(f: fa_file, t: interval): bool
@@ -212,21 +236,31 @@ function set_timeout_interval(f: fa_file, t: interval): bool
 	return __set_timeout_interval(f$id, t);
 	}
 
-function add_analyzer(f: fa_file, args: AnalyzerArgs): bool
+function add_analyzer(f: fa_file, tag: Analyzer, args: AnalyzerArgs): bool
 	{
-	if ( ! __add_analyzer(f$id, args) ) return F;
+	# This is to construct the correct args for the core API.
+	args$tag = tag;
+	add f$info$analyzers[tag];
 
-	set_info(f);
-	add f$info$analyzers[args$tag];
-
-	if ( args$tag == FileAnalysis::ANALYZER_EXTRACT )
-		add f$info$extracted_files[args$extract_filename];
+	if ( tag in analyzer_add_callbacks )
+		analyzer_add_callbacks[tag](f, args);
 
+	if ( ! __add_analyzer(f$id, args) )
+		{
+		Reporter::warning(fmt("Analyzer %s not added successfully to file %s.", tag, f$id));
+		return F;
+		}
 	return T;
 	}
 
-function remove_analyzer(f: fa_file, args: AnalyzerArgs): bool
+function register_analyzer_add_callback(tag: Files::Analyzer, callback: function(f: fa_file, args: AnalyzerArgs))
 	{
+	analyzer_add_callbacks[tag] = callback;
+	}
+
+function remove_analyzer(f: fa_file, tag: Files::Analyzer, args: AnalyzerArgs): bool
+	{
+	args$tag = tag;
 	return __remove_analyzer(f$id, args);
 	}
 
@@ -235,25 +269,48 @@ function stop(f: fa_file): bool
 	return __stop(f$id);
 	}
 
-event bro_init() &priority=5
+event file_new(f: fa_file) &priority=10
 	{
-	Log::create_stream(FileAnalysis::LOG,
-	                   [$columns=Info, $ev=log_file_analysis]);
+	set_info(f);
 	}
 
-event file_timeout(f: fa_file) &priority=5
+event file_over_new_connection(f: fa_file, c: connection) &priority=10
+	{
+	set_info(f);
+	add f$info$conn_uids[c$uid];
+	local cid = c$id;
+	add f$info$tx_hosts[f$is_orig ? cid$orig_h : cid$resp_h];
+	if( |Site::local_nets| > 0 )
+		f$info$local_orig=Site::is_local_addr(f$is_orig ? cid$orig_h : cid$resp_h);
+
+	add f$info$rx_hosts[f$is_orig ? cid$resp_h : cid$orig_h];
+	}
+
+event file_timeout(f: fa_file) &priority=10
 	{
 	set_info(f);
 	f$info$timedout = T;
 	}
 
-
-event file_state_remove(f: fa_file) &priority=5
+event file_state_remove(f: fa_file) &priority=10
 	{
 	set_info(f);
 	}
 
-event file_state_remove(f: fa_file) &priority=-5
+event file_state_remove(f: fa_file) &priority=-10
 	{
-	Log::write(FileAnalysis::LOG, f$info);
+	Log::write(Files::LOG, f$info);
+	}
+
+function register_protocol(tag: AnalyzerTag, callback: function(c: connection, is_orig: bool): string): bool
+	{
+	local result = (tag !in registered_protocols);
+	registered_protocols[tag] = callback;
+	return result;
+	}
+
+event get_file_handle(tag: AnalyzerTag, c: connection, is_orig: bool) &priority=5
+	{
+	local handler = registered_protocols[tag];
+	set_file_handle(handler(c, is_orig));
 	}
diff --git a/scripts/base/init-bare.bro b/scripts/base/init-bare.bro
index c4245d9052..4e1a5248c8 100644
--- a/scripts/base/init-bare.bro
+++ b/scripts/base/init-bare.bro
@@ -339,7 +339,7 @@ type fa_file: record {
 	## An identification of the source of the file data.  E.g. it may be
 	## a network protocol over which it was transferred, or a local file
 	## path which was read, or some other input source.
-	source: string &optional;
+	source: string;
 
 	## If the source of this file is is a network connection, this field
 	## may be set to indicate the directionality.
@@ -3101,4 +3101,4 @@ const snaplen = 8192 &redef;
 
 @load base/frameworks/input
 
-@load base/frameworks/file-analysis
+@load base/frameworks/files
diff --git a/scripts/base/init-default.bro b/scripts/base/init-default.bro
index 03ba474e0b..719842af09 100644
--- a/scripts/base/init-default.bro
+++ b/scripts/base/init-default.bro
@@ -47,5 +47,6 @@
 @load base/protocols/syslog
 
 @load base/files/hash
+@load base/files/extract
 
 @load base/misc/find-checksum-offloading
diff --git a/scripts/base/protocols/ftp/__load__.bro b/scripts/base/protocols/ftp/__load__.bro
index 464571dc7d..9c839610ac 100644
--- a/scripts/base/protocols/ftp/__load__.bro
+++ b/scripts/base/protocols/ftp/__load__.bro
@@ -1,5 +1,4 @@
 @load ./utils-commands
 @load ./main
 @load ./file-analysis
-@load ./file-extract
 @load ./gridftp
diff --git a/scripts/base/protocols/ftp/file-analysis.bro b/scripts/base/protocols/ftp/file-analysis.bro
index f8fa2d816b..3710a44cee 100644
--- a/scripts/base/protocols/ftp/file-analysis.bro
+++ b/scripts/base/protocols/ftp/file-analysis.bro
@@ -1,6 +1,6 @@
 @load ./main
 @load base/utils/conn-ids
-@load base/frameworks/file-analysis/main
+@load base/frameworks/files
 
 module FTP;
 
@@ -9,40 +9,15 @@ export {
 	global get_file_handle: function(c: connection, is_orig: bool): string;
 }
 
-function get_handle_string(c: connection): string
-	{
-	return cat(ANALYZER_FTP_DATA, " ", c$start_time, " ", id_string(c$id));
-	}
-
 function get_file_handle(c: connection, is_orig: bool): string
 	{
-	if ( [c$id$resp_h, c$id$resp_p] !in ftp_data_expected ) return "";
+	if ( [c$id$resp_h, c$id$resp_p] !in ftp_data_expected ) 
+		return "";
 
-	local info: FTP::Info = ftp_data_expected[c$id$resp_h, c$id$resp_p];
-
-	if ( info$passive )
-		# FTP client initiates data channel.
-		if ( is_orig )
-			# Don't care about FTP client data.
-			return "";
-		else
-			# Do care about FTP server data.
-			return get_handle_string(c);
-	else
-		# FTP server initiates dta channel.
-		if ( is_orig )
-			# Do care about FTP server data.
-			return get_handle_string(c);
-		else
-			# Don't care about FTP client data.
-			return "";
+	return cat(ANALYZER_FTP_DATA, c$start_time, c$id, is_orig);
 	}
 
-module GLOBAL;
-
-event get_file_handle(tag: AnalyzerTag, c: connection, is_orig: bool)
-	&priority=5
+event bro_init() &priority=5
 	{
-	if ( tag != ANALYZER_FTP_DATA ) return;
-	set_file_handle(FTP::get_file_handle(c, is_orig));
+	Files::register_protocol(ANALYZER_FTP_DATA, FTP::get_file_handle);
 	}
diff --git a/scripts/base/protocols/ftp/file-extract.bro b/scripts/base/protocols/ftp/file-extract.bro
deleted file mode 100644
index 2b7bb8cd50..0000000000
--- a/scripts/base/protocols/ftp/file-extract.bro
+++ /dev/null
@@ -1,90 +0,0 @@
-##! File extraction support for FTP.
-
-@load ./main
-@load base/utils/files
-
-module FTP;
-
-export {
-	## Pattern of file mime types to extract from FTP transfers.
-	const extract_file_types = /NO_DEFAULT/ &redef;
-
-	## The on-disk prefix for files to be extracted from FTP-data transfers.
-	const extraction_prefix = "ftp-item" &redef;
-}
-
-redef record Info += {
-	## On disk file where it was extracted to.
-	extraction_file:       string &log &optional;
-	
-	## Indicates if the current command/response pair should attempt to 
-	## extract the file if a file was transferred.
-	extract_file:          bool &default=F;
-};
-
-function get_extraction_name(f: fa_file): string
-	{
-	local r = fmt("%s-%s.dat", extraction_prefix, f$id);
-	return r;
-	}
-
-event file_new(f: fa_file) &priority=5
-	{
-	if ( ! f?$source ) return;
-	if ( f$source != "FTP_DATA" ) return;
-
-	if ( f?$mime_type && extract_file_types in f$mime_type )
-		{
-		FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
-		                           $extract_filename=get_extraction_name(f)]);
-		return;
-		}
-
-	if ( ! f?$conns ) return;
-
-	for ( cid in f$conns )
-		{
-		local c: connection = f$conns[cid];
-
-		if ( [cid$resp_h, cid$resp_p] !in ftp_data_expected ) next;
-
-		local s = ftp_data_expected[cid$resp_h, cid$resp_p];
-
-		if ( ! s$extract_file ) next;
-
-		FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
-		                           $extract_filename=get_extraction_name(f)]);
-		return;
-		}
-	}
-
-event file_state_remove(f: fa_file) &priority=4
-	{
-	if ( ! f?$source ) return;
-	if ( f$source != "FTP_DATA" ) return;
-	if ( ! f?$info ) return;
-
-	for ( filename in f$info$extracted_files )
-		{
-		local s: FTP::Info;
-		s$ts = network_time();
-		s$tags = set();
-		s$user = "<ftp-data>";
-		s$extraction_file = filename;
-
-		if ( f?$conns )
-			for ( cid in f$conns )
-				{
-				s$uid = f$conns[cid]$uid;
-				s$id = cid;
-				}
-
-		Log::write(FTP::LOG, s);
-		}
-	}
-
-event log_ftp(rec: Info) &priority=-10
-	{
-	delete rec$extraction_file;
-	delete rec$extract_file;
-	}
diff --git a/scripts/base/protocols/http/__load__.bro b/scripts/base/protocols/http/__load__.bro
index 58618dedc7..585b815eed 100644
--- a/scripts/base/protocols/http/__load__.bro
+++ b/scripts/base/protocols/http/__load__.bro
@@ -1,6 +1,6 @@
 @load ./main
 @load ./utils
 @load ./file-analysis
-@load ./file-ident
-@load ./file-hash
-@load ./file-extract
+#@load ./file-ident
+#@load ./file-hash
+#@load ./file-extract
diff --git a/scripts/base/protocols/http/file-analysis.bro b/scripts/base/protocols/http/file-analysis.bro
index 769bb509f5..b79ca041b8 100644
--- a/scripts/base/protocols/http/file-analysis.bro
+++ b/scripts/base/protocols/http/file-analysis.bro
@@ -1,53 +1,58 @@
 @load ./main
 @load ./utils
 @load base/utils/conn-ids
-@load base/frameworks/file-analysis/main
+@load base/frameworks/files
 
 module HTTP;
 
 export {
-	redef record HTTP::Info += {
-		## Number of MIME entities in the HTTP request message body so far.
-		request_mime_level: count &default=0;
-		## Number of MIME entities in the HTTP response message body so far.
-		response_mime_level: count &default=0;
+	redef record Info += {
+		## The sniffed mime type of the data being sent by the client.
+		client_mime_type: string &log &optional;
+
+		## The sniffed mime type of the data being returned by the server.
+		mime_type:        string &log &optional;
 	};
 
 	## Default file handle provider for HTTP.
 	global get_file_handle: function(c: connection, is_orig: bool): string;
 }
 
-event http_begin_entity(c: connection, is_orig: bool) &priority=5
-	{
-	if ( ! c?$http ) return;
-
-	if ( is_orig )
-		++c$http$request_mime_level;
-	else
-		++c$http$response_mime_level;
-	}
-
 function get_file_handle(c: connection, is_orig: bool): string
 	{
-	if ( ! c?$http ) return "";
-
-	local mime_level: count =
-	        is_orig ? c$http$request_mime_level : c$http$response_mime_level;
-	local mime_level_str: string = mime_level > 1 ? cat(mime_level) : "";
+	if ( ! c?$http ) 
+		return "";
 
+	local mime_depth = is_orig ? c$http$orig_mime_depth : c$http$resp_mime_depth;
 	if ( c$http$range_request )
-		return cat(ANALYZER_HTTP, " ", is_orig, " ", c$id$orig_h, " ",
-		           build_url(c$http));
-
-	return cat(ANALYZER_HTTP, " ", c$start_time, " ", is_orig, " ",
-	           c$http$trans_depth, mime_level_str, " ", id_string(c$id));
+		{
+		return cat(ANALYZER_HTTP, is_orig, c$id$orig_h, mime_depth, build_url(c$http));
+		}
+	else
+		{
+		return cat(ANALYZER_HTTP, c$start_time, is_orig, 
+		           c$http$trans_depth, mime_depth, id_string(c$id));
+		}
 	}
 
-module GLOBAL;
-
-event get_file_handle(tag: AnalyzerTag, c: connection, is_orig: bool)
-	&priority=5
+event bro_init() &priority=5
 	{
-	if ( tag != ANALYZER_HTTP ) return;
-	set_file_handle(HTTP::get_file_handle(c, is_orig));
+	Files::register_protocol(ANALYZER_HTTP, HTTP::get_file_handle);
 	}
+
+event file_over_new_connection(f: fa_file, c: connection) &priority=5
+	{
+	if ( c?$http )
+		{
+		#if (!f?$mime_type)
+		#	print f;
+#
+		#if ( f$is_orig )
+		#	c$http$client_mime_type = f$mime_type;
+		#else
+		#	c$http$mime_type = f$mime_type;
+
+		if ( c$http?$filename )
+			f$info$filename = c$http$filename;
+		}
+	}
\ No newline at end of file
diff --git a/scripts/base/protocols/http/file-extract.bro b/scripts/base/protocols/http/file-extract.bro
deleted file mode 100644
index a8c6039395..0000000000
--- a/scripts/base/protocols/http/file-extract.bro
+++ /dev/null
@@ -1,100 +0,0 @@
-##! Extracts the items from HTTP traffic, one per file.  At this time only 
-##! the message body from the server can be extracted with this script.
-
-@load ./main
-@load ./file-analysis
-
-module HTTP;
-
-export {
-	## Pattern of file mime types to extract from HTTP response entity bodies.
-	const extract_file_types = /NO_DEFAULT/ &redef;
-
-	## The on-disk prefix for files to be extracted from HTTP entity bodies.
-	const extraction_prefix = "http-item" &redef;
-
-	redef record Info += {
-		## On-disk location where files in request body were extracted.
-		extracted_request_files: vector of string &log &optional;
-
-		## On-disk location where files in response body were extracted.
-		extracted_response_files: vector of string &log &optional;
-		
-		## Indicates if the response body is to be extracted or not.  Must be 
-		## set before or by the first :bro:see:`file_new` for the file content.
-		extract_file:     bool &default=F;
-	};
-}
-
-function get_extraction_name(f: fa_file): string
-	{
-	local r = fmt("%s-%s.dat", extraction_prefix, f$id);
-	return r;
-	}
-
-function add_extraction_file(c: connection, is_orig: bool, fn: string)
-	{
-	if ( is_orig )
-		{
-		if ( ! c$http?$extracted_request_files )
-			c$http$extracted_request_files = vector();
-		c$http$extracted_request_files[|c$http$extracted_request_files|] = fn;
-		}
-	else
-		{
-		if ( ! c$http?$extracted_response_files )
-			c$http$extracted_response_files = vector();
-		c$http$extracted_response_files[|c$http$extracted_response_files|] = fn;
-		}
-	}
-
-event file_new(f: fa_file) &priority=5
-	{
-	if ( ! f?$source ) return;
-	if ( f$source != "HTTP" ) return;
-	if ( ! f?$conns ) return;
-
-	local fname: string;
-	local c: connection;
-
-	if ( f?$mime_type && extract_file_types in f$mime_type )
-		{
-		fname = get_extraction_name(f);
-		FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
-		                               $extract_filename=fname]);
-
-		for ( cid in f$conns )
-			{
-			c = f$conns[cid];
-			if ( ! c?$http ) next;
-			add_extraction_file(c, f$is_orig, fname);
-			}
-
-		return;
-		}
-
-	local extracting: bool = F;
-
-	for ( cid in f$conns )
-		{
-		c = f$conns[cid];
-
-		if ( ! c?$http ) next;
-
-		if ( ! c$http$extract_file ) next;
-
-		fname = get_extraction_name(f);
-		FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
-		                               $extract_filename=fname]);
-		extracting = T;
-		break;
-		}
-
-	if ( extracting )
-		for ( cid in f$conns )
-			{
-			c = f$conns[cid];
-			if ( ! c?$http ) next;
-			add_extraction_file(c, f$is_orig, fname);
-			}
-	}
diff --git a/scripts/base/protocols/http/file-hash.bro b/scripts/base/protocols/http/file-hash.bro
deleted file mode 100644
index 34d91e45bb..0000000000
--- a/scripts/base/protocols/http/file-hash.bro
+++ /dev/null
@@ -1,68 +0,0 @@
-##! Calculate hashes for HTTP body transfers.
-
-@load ./main
-@load ./file-analysis
-
-module HTTP;
-
-export {
-	redef record Info += {
-		## MD5 sum for a file transferred over HTTP calculated from the 
-		## response body.
-		md5:             string     &log &optional;
-		
-		## This value can be set per-transfer to determine per request
-		## if a file should have an MD5 sum generated.  It must be
-		## set to T at the time of or before the first chunk of body data.
-		calc_md5:        bool       &default=F;
-	};
-	
-	## Generate MD5 sums for these filetypes.
-	const generate_md5 = /application\/x-dosexec/    # Windows and DOS executables
-	                   | /application\/x-executable/ # *NIX executable binary
-	                   &redef;
-}
-
-event file_new(f: fa_file) &priority=5
-	{
-	if ( ! f?$source ) return;
-	if ( f$source != "HTTP" ) return;
-
-	if ( f?$mime_type && generate_md5 in f$mime_type )
-		{
-		FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
-		return;
-		}
-
-	if ( ! f?$conns ) return;
-
-	for ( cid in f$conns )
-		{
-		local c: connection = f$conns[cid];
-
-		if ( ! c?$http ) next;
-
-		if ( ! c$http$calc_md5 ) next;
-
-		FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
-		return;
-		}
-	}
-
-event file_state_remove(f: fa_file) &priority=4
-	{
-	if ( ! f?$source ) return;
-	if ( f$source != "HTTP" ) return;
-	if ( ! f?$conns ) return;
-	if ( ! f?$info ) return;
-	if ( ! f$info?$md5 ) return;
-
-	for ( cid in f$conns )
-		{
-		local c: connection = f$conns[cid];
-
-		if ( ! c?$http ) next;
-
-		c$http$md5 = f$info$md5;
-		}
-	}
diff --git a/scripts/base/protocols/http/file-ident.bro b/scripts/base/protocols/http/file-ident.bro
deleted file mode 100644
index 7ed4b58a37..0000000000
--- a/scripts/base/protocols/http/file-ident.bro
+++ /dev/null
@@ -1,105 +0,0 @@
-##! Identification of file types in HTTP response bodies with file content sniffing.
-
-@load base/frameworks/notice
-@load ./main
-@load ./utils
-@load ./file-analysis
-
-module HTTP;
-
-export {
-	redef enum Notice::Type += {
-		## Indicates when the file extension doesn't seem to match the file
-		## contents.
-		Incorrect_File_Type,
-	};
-
-	redef record Info += {
-		## Mime type of response body identified by content sniffing.
-		mime_type:    string   &log &optional;
-	};
-	
-	## Mapping between mime type strings (without character set) and
-	## regular expressions for URLs.
-	## The :bro:enum:`HTTP::Incorrect_File_Type` notice is generated if the
-	## pattern doesn't match the mime type that was discovered.
-	const mime_types_extensions: table[string] of pattern = {
-		["application/x-dosexec"] = /\.([eE][xX][eE]|[dD][lL][lL])/,
-	} &redef;
-	
-	## A pattern for filtering out :bro:enum:`HTTP::Incorrect_File_Type` urls
-	## that are not noteworthy before a notice is created.  Each
-	## pattern added should match the complete URL (the matched URLs include
-	## "http://" at the beginning).
-	const ignored_incorrect_file_type_urls = /^$/ &redef;
-}
-
-event file_new(f: fa_file) &priority=5
-	{
-	if ( ! f?$source ) return;
-	if ( f$source != "HTTP" ) return;
-	if ( ! f?$mime_type ) return;
-	if ( ! f?$conns ) return;
-
-	for ( cid in f$conns )
-		{
-		local c: connection = f$conns[cid];
-
-		if ( ! c?$http ) next;
-
-		c$http$mime_type = f$mime_type;
-
-		local mime_str: string = c$http$mime_type;
-
-		if ( mime_str !in mime_types_extensions ) next;
-		if ( ! c$http?$uri ) next;
-		if ( mime_types_extensions[mime_str] in c$http$uri ) next;
-
-		local url = build_url_http(c$http);
-
-		if ( url == ignored_incorrect_file_type_urls ) next;
-
-		local message = fmt("%s %s %s", mime_str, c$http$method, url);
-		NOTICE([$note=Incorrect_File_Type,
-		        $msg=message,
-		        $conn=c]);
-		}
-	}
-
-event file_over_new_connection(f: fa_file, c: connection) &priority=5
-	{
-	if ( ! f?$source ) return;
-	if ( f$source != "HTTP" ) return;
-	if ( ! f?$mime_type ) return;
-	if ( ! c?$http ) return;
-
-	# Spread the mime around (e.g. for partial content, file_type event only
-	# happens once for the first connection, but if there's subsequent
-	# connections to transfer the same file, they'll be lacking the mime_type
-	# field if we don't do this).
-	c$http$mime_type = f$mime_type;
-	}
-
-# Tracks byte-range request / partial content response mime types, indexed
-# by [connection, uri] pairs.  This is needed because a person can pipeline
-# byte-range requests over multiple connections to the same uri.  Without
-# the tracking, only the first request in the pipeline for each connection
-# would get a mime_type field assigned to it (by the FileAnalysis policy hooks).
-global partial_types: table[conn_id, string] of string &read_expire=5mins;
-
-# Priority 4 so that it runs before the handler that will write to http.log.
-event http_message_done(c: connection, is_orig: bool, stat: http_message_stat)
-	&priority=4
-	{
-	if ( ! c$http$range_request ) return;
-	if ( ! c$http?$uri ) return;
-
-	if ( c$http?$mime_type )
-		{
-		partial_types[c$id, c$http$uri] = c$http$mime_type;
-		return;
-		}
-
-	if ( [c$id, c$http$uri] in partial_types )
-		c$http$mime_type = partial_types[c$id, c$http$uri];
-	}
diff --git a/scripts/base/protocols/http/main.bro b/scripts/base/protocols/http/main.bro
index a1771c8e77..ebf412d36e 100644
--- a/scripts/base/protocols/http/main.bro
+++ b/scripts/base/protocols/http/main.bro
@@ -71,10 +71,14 @@ export {
 		
 		## All of the headers that may indicate if the request was proxied.
 		proxied:                 set[string] &log &optional;
-
+		
 		## Indicates if this request can assume 206 partial content in
 		## response.
-		range_request:           bool &default=F;
+		range_request:           bool      &default=F;
+		## Number of MIME entities in the HTTP request message body so far.
+		orig_mime_depth:         count     &default=0;
+		## Number of MIME entities in the HTTP response message body so far.
+		resp_mime_depth:         count     &default=0;
 	};
 	
 	## Structure to maintain state for an HTTP connection with multiple 
@@ -283,6 +287,16 @@ event http_header(c: connection, is_orig: bool, name: string, value: string) &pr
 		}
 	}
 	
+event http_begin_entity(c: connection, is_orig: bool) &priority=5
+	{
+	set_state(c, F, is_orig);
+
+	if ( is_orig )
+		++c$http$orig_mime_depth;
+	else
+		++c$http$resp_mime_depth;
+	}
+
 event http_message_done(c: connection, is_orig: bool, stat: http_message_stat) &priority = 5
 	{
 	set_state(c, F, is_orig);
diff --git a/scripts/base/protocols/irc/__load__.bro b/scripts/base/protocols/irc/__load__.bro
index 5123385b0c..d20550c54f 100644
--- a/scripts/base/protocols/irc/__load__.bro
+++ b/scripts/base/protocols/irc/__load__.bro
@@ -1,3 +1,3 @@
 @load ./main
-@load ./dcc-send
+#@load ./dcc-send
 @load ./file-analysis
diff --git a/scripts/base/protocols/irc/dcc-send.bro b/scripts/base/protocols/irc/dcc-send.bro
index 53381d0302..afe01485a2 100644
--- a/scripts/base/protocols/irc/dcc-send.bro
+++ b/scripts/base/protocols/irc/dcc-send.bro
@@ -15,12 +15,6 @@
 module IRC;
 
 export {
-	## Pattern of file mime types to extract from IRC DCC file transfers.
-	const extract_file_types = /NO_DEFAULT/ &redef;
-
-	## On-disk prefix for files to be extracted from IRC DCC file transfers.
-	const extraction_prefix = "irc-dcc-item" &redef;
-
 	redef record Info += {
 		## DCC filename requested.
 		dcc_file_name:         string &log &optional;
@@ -28,101 +22,10 @@ export {
 		dcc_file_size:         count  &log &optional;
 		## Sniffed mime type of the file.
 		dcc_mime_type:         string &log &optional;
-
-		## The file handle for the file to be extracted
-		extraction_file:       string &log &optional;
-
-		## A boolean to indicate if the current file transfer should be extracted.
-		extract_file:          bool &default=F;
 	};
 }
 
-global dcc_expected_transfers: table[addr, port] of Info &read_expire=5mins;
-
-function set_dcc_mime(f: fa_file)
-	{
-	if ( ! f?$conns ) return;
-
-	for ( cid in f$conns )
-		{
-		local c: connection = f$conns[cid];
-
-		if ( [cid$resp_h, cid$resp_p] !in dcc_expected_transfers ) next;
-
-		local s = dcc_expected_transfers[cid$resp_h, cid$resp_p];
-
-		s$dcc_mime_type = f$mime_type;
-		}
-	}
-
-function set_dcc_extraction_file(f: fa_file, filename: string)
-	{
-	if ( ! f?$conns ) return;
-
-	for ( cid in f$conns )
-		{
-		local c: connection = f$conns[cid];
-
-		if ( [cid$resp_h, cid$resp_p] !in dcc_expected_transfers ) next;
-
-		local s = dcc_expected_transfers[cid$resp_h, cid$resp_p];
-
-		s$extraction_file = filename;
-		}
-	}
-
-function get_extraction_name(f: fa_file): string
-	{
-	local r = fmt("%s-%s.dat", extraction_prefix, f$id);
-	return r;
-	}
-
-# this handler sets the IRC::Info mime type
-event file_new(f: fa_file) &priority=5
-	{
-	if ( ! f?$source ) return;
-	if ( f$source != "IRC_DATA" ) return;
-	if ( ! f?$mime_type ) return;
-
-	set_dcc_mime(f);
-	}
-
-# this handler check if file extraction is desired
-event file_new(f: fa_file) &priority=5
-	{
-	if ( ! f?$source ) return;
-	if ( f$source != "IRC_DATA" ) return;
-
-	local fname: string;
-
-	if ( f?$mime_type && extract_file_types in f$mime_type )
-		{
-		fname = get_extraction_name(f);
-		FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
-		                               $extract_filename=fname]);
-		set_dcc_extraction_file(f, fname);
-		return;
-		}
-
-	if ( ! f?$conns ) return;
-
-	for ( cid in f$conns )
-		{
-		local c: connection = f$conns[cid];
-
-		if ( [cid$resp_h, cid$resp_p] !in dcc_expected_transfers ) next;
-
-		local s = dcc_expected_transfers[cid$resp_h, cid$resp_p];
-
-		if ( ! s$extract_file ) next;
-
-		fname = get_extraction_name(f);
-		FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
-		                               $extract_filename=fname]);
-		s$extraction_file = fname;
-		return;
-		}
-	}
+global dcc_expected_transfers: table[addr, port] of Info &synchronized &read_expire=5mins;
 
 function log_dcc(f: fa_file)
 	{
@@ -143,22 +46,17 @@ function log_dcc(f: fa_file)
 
 		# Delete these values in case another DCC transfer 
 		# happens during the IRC session.
-		delete irc$extract_file;
-		delete irc$extraction_file;
 		delete irc$dcc_file_name;
 		delete irc$dcc_file_size;
 		delete irc$dcc_mime_type;
-
 		return;
 		}
 	}
 
 event file_new(f: fa_file) &priority=-5
 	{
-	if ( ! f?$source ) return;
-	if ( f$source != "IRC_DATA" ) return;
-
-	log_dcc(f);
+	if ( f?$source && f$source == "IRC_DATA" ) 
+		log_dcc(f);
 	}
 
 event irc_dcc_message(c: connection, is_orig: bool,
diff --git a/scripts/base/protocols/irc/file-analysis.bro b/scripts/base/protocols/irc/file-analysis.bro
index 5159064b27..f2e84fbc22 100644
--- a/scripts/base/protocols/irc/file-analysis.bro
+++ b/scripts/base/protocols/irc/file-analysis.bro
@@ -1,6 +1,6 @@
-@load ./dcc-send.bro
+@load ./dcc-send
 @load base/utils/conn-ids
-@load base/frameworks/file-analysis/main
+@load base/frameworks/files
 
 module IRC;
 
@@ -11,15 +11,13 @@ export {
 
 function get_file_handle(c: connection, is_orig: bool): string
 	{
-	if ( is_orig ) return "";
-	return cat(ANALYZER_IRC_DATA, " ", c$start_time, " ", id_string(c$id));
+	if ( [c$id$resp_h, c$id$resp_p] !in dcc_expected_transfers ) 
+		return "";
+
+	return cat(ANALYZER_IRC_DATA, c$start_time, c$id, is_orig);
 	}
 
-module GLOBAL;
-
-event get_file_handle(tag: AnalyzerTag, c: connection, is_orig: bool)
-	&priority=5
+event bro_init() &priority=5
 	{
-	if ( tag != ANALYZER_IRC_DATA ) return;
-	set_file_handle(IRC::get_file_handle(c, is_orig));
+	Files::register_protocol(ANALYZER_IRC_DATA, IRC::get_file_handle);
 	}
diff --git a/scripts/base/protocols/smtp/__load__.bro b/scripts/base/protocols/smtp/__load__.bro
index bac9cc118f..1e913d8dff 100644
--- a/scripts/base/protocols/smtp/__load__.bro
+++ b/scripts/base/protocols/smtp/__load__.bro
@@ -1,4 +1,4 @@
 @load ./main
 @load ./entities
-@load ./entities-excerpt
+#@load ./entities-excerpt
 @load ./file-analysis
diff --git a/scripts/base/protocols/smtp/entities.bro b/scripts/base/protocols/smtp/entities.bro
index b58766e51d..dcb53dc0aa 100644
--- a/scripts/base/protocols/smtp/entities.bro
+++ b/scripts/base/protocols/smtp/entities.bro
@@ -1,5 +1,6 @@
 ##! Analysis and logging for MIME entities found in SMTP sessions.
 
+@load base/frameworks/files
 @load base/utils/strings
 @load base/utils/files
 @load ./main
@@ -7,217 +8,56 @@
 module SMTP;
 
 export {
-	redef enum Log::ID += { ENTITIES_LOG };
-
-	type EntityInfo: record {
-		## This is the timestamp of when the MIME content transfer began.
-		ts:               time            &log;
-		uid:              string          &log;
-		id:               conn_id         &log;
-		## A count to represent the depth of this message transaction in a 
-		## single connection where multiple messages were transferred.
-		trans_depth:      count           &log;
-		## The filename seen in the Content-Disposition header.
-		filename:         string          &log &optional;
-		## Track how many bytes of the MIME encoded file have been seen.
-		content_len:      count           &log &default=0;
-		## The mime type of the entity discovered through magic bytes identification.
-		mime_type:        string          &log &optional;
-		
-		## The calculated MD5 sum for the MIME entity.
-		md5:              string          &log &optional;
-		## Optionally calculate the file's MD5 sum.  Must be set prior to the 
-		## first data chunk being see in an event.
-		calc_md5:         bool            &default=F;
-		
-		## Optionally write the file to disk.  Must be set prior to first 
-		## data chunk being seen in an event.
-		extract_file:     bool            &default=F;
-		## Store the file handle here for the file currently being extracted.
-		extraction_file:  string          &log &optional;
+	type Entity: record {
+		filename: string &optional;
 	};
 
 	redef record Info += {
-		## The in-progress entity information.
-		current_entity:   EntityInfo &optional;
+		## The current entity being seen.
+		entity: Entity &optional;
 	};
 
 	redef record State += {
-		## Track the number of MIME encoded files transferred during a session.
-		mime_level:           count   &default=0;
+		## Track the number of MIME encoded files transferred 
+		## during a session.
+		mime_depth: count &default=0;
 	};
-
-	## Generate MD5 sums for these filetypes.
-	const generate_md5 = /application\/x-dosexec/    # Windows and DOS executables
-	                   | /application\/x-executable/ # *NIX executable binary
-	                   &redef;
-
-	## Pattern of file mime types to extract from MIME bodies.
-	const extract_file_types = /NO_DEFAULT/ &redef;
-
-	## The on-disk prefix for files to be extracted from MIME entity bodies.
-	const extraction_prefix = "smtp-entity" &redef;
-
-	## If set, never generate MD5s. This is mainly for testing purposes to create
-	## reproducable output in the case that the decision whether to create
-	## checksums depends on environment specifics.
-	const never_calc_md5 = F &redef;
-
-	global log_mime: event(rec: EntityInfo);
 }
 
-event bro_init() &priority=5
-	{
-	Log::create_stream(SMTP::ENTITIES_LOG, [$columns=EntityInfo, $ev=log_mime]);
-	}
-
-function set_session(c: connection, new_entity: bool)
-	{
-	if ( ! c$smtp?$current_entity || new_entity )
-		{
-		local info: EntityInfo;
-		info$ts=network_time();
-		info$uid=c$uid;
-		info$id=c$id;
-		info$trans_depth=c$smtp$trans_depth;
-		
-		c$smtp$current_entity = info;
-		++c$smtp_state$mime_level;
-		}
-	}
-
-function get_extraction_name(f: fa_file): string
-	{
-	local r = fmt("%s-%s.dat", extraction_prefix, f$id);
-	return r;
-	}
-
 event mime_begin_entity(c: connection) &priority=10
 	{
-	if ( ! c?$smtp ) return;
+	#print fmt("%s : begin entity", c$uid);
 
-	set_session(c, T);
+	c$smtp$entity = Entity();
+	++c$smtp_state$mime_depth;
 	}
 
-event file_new(f: fa_file) &priority=5
+event file_over_new_connection(f: fa_file, c: connection) &priority=5
 	{
-	if ( ! f?$source ) return;
-	if ( f$source != "SMTP" ) return;
-	if ( ! f?$conns ) return;
-
-	local fname: string;
-	local extracting: bool = F;
-
-	for ( cid in f$conns )
-		{
-		local c: connection = f$conns[cid];
-
-		if ( ! c?$smtp ) next;
-		if ( ! c$smtp?$current_entity ) next;
-
-		if ( c$smtp$current_entity$extract_file )
-			{
-			if ( ! extracting )
-				{
-				fname = get_extraction_name(f);
-				FileAnalysis::add_analyzer(f,
-				                           [$tag=FileAnalysis::ANALYZER_EXTRACT,
-				                            $extract_filename=fname]);
-				extracting = T;
-				}
-
-			c$smtp$current_entity$extraction_file = fname;
-			}
-
-		if ( c$smtp$current_entity$calc_md5 )
-			FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
-		}
-	}
-
-function check_extract_by_type(f: fa_file)
-	{
-	if ( extract_file_types !in f$mime_type ) return;
-
-	if ( f?$info && FileAnalysis::ANALYZER_EXTRACT in f$info$analyzers )
+	if ( f$source != "SMTP" ) 
 		return;
 
-	local fname: string = get_extraction_name(f);
-	FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
-	                               $extract_filename=fname]);
-
-	if ( ! f?$conns ) return;
-
-	for ( cid in f$conns )
-		{
-		local c: connection = f$conns[cid];
-		if ( ! c?$smtp ) next;
-		c$smtp$current_entity$extraction_file = fname;
-		}
+	if ( c$smtp$entity?$filename )
+		f$info$filename = c$smtp$entity$filename;
+	f$info$depth = c$smtp_state$mime_depth;
 	}
 
-function check_md5_by_type(f: fa_file)
+event mime_one_header(c: connection, h: mime_header_rec) &priority=5
 	{
-	if ( never_calc_md5 ) return;
-	if ( generate_md5 !in f$mime_type ) return;
-
-	FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
-	}
-
-event file_new(f: fa_file) &priority=5
-	{
-	if ( ! f?$source ) return;
-	if ( f$source != "SMTP" ) return;
-	if ( ! f?$mime_type ) return;
-
-	if ( f?$conns )
-		for ( cid in f$conns )
-			{
-			local c: connection = f$conns[cid];
-
-			if ( ! c?$smtp ) next;
-			if ( ! c$smtp?$current_entity ) next;
-
-			c$smtp$current_entity$mime_type = f$mime_type;
-			}
-
-	check_extract_by_type(f);
-	check_md5_by_type(f);
-	}
-
-event file_state_remove(f: fa_file) &priority=4
-	{
-	if ( ! f?$source ) return;
-	if ( f$source != "SMTP" ) return;
-	if ( ! f?$conns ) return;
-
-	for ( cid in f$conns )
-		{
-		local c: connection = f$conns[cid];
-
-		if ( ! c?$smtp ) next;
-		if ( ! c$smtp?$current_entity ) next;
-		# Only log if there was some content.
-		if ( f$seen_bytes == 0 ) next;
-
-		if ( f?$info && f$info?$md5 )
-			c$smtp$current_entity$md5 = f$info$md5;
-
-		c$smtp$current_entity$content_len = f$seen_bytes;
-		Log::write(SMTP::ENTITIES_LOG, c$smtp$current_entity);
-		delete c$smtp$current_entity;
+	if ( ! c?$smtp )
 		return;
-		}
-	}
 
-event mime_one_header(c: connection, h: mime_header_rec)
-	{
-	if ( ! c?$smtp ) return;
-	
 	if ( h$name == "CONTENT-DISPOSITION" &&
 	     /[fF][iI][lL][eE][nN][aA][mM][eE]/ in h$value )
-		c$smtp$current_entity$filename = extract_filename_from_content_disposition(h$value);
+		c$smtp$entity$filename = extract_filename_from_content_disposition(h$value);
 
 	if ( h$name == "CONTENT-TYPE" &&
 	     /[nN][aA][mM][eE][:blank:]*=/ in h$value )
-		c$smtp$current_entity$filename = extract_filename_from_content_disposition(h$value);
+		c$smtp$entity$filename = extract_filename_from_content_disposition(h$value);
+	}
+
+event mime_end_entity(c: connection) &priority=5
+	{
+	if ( c?$smtp && c$smtp?$entity ) 
+		delete c$smtp$entity;
 	}
diff --git a/scripts/base/protocols/smtp/file-analysis.bro b/scripts/base/protocols/smtp/file-analysis.bro
index b893cbef7d..44938c8698 100644
--- a/scripts/base/protocols/smtp/file-analysis.bro
+++ b/scripts/base/protocols/smtp/file-analysis.bro
@@ -1,7 +1,7 @@
 @load ./main
 @load ./entities
 @load base/utils/conn-ids
-@load base/frameworks/file-analysis/main
+@load base/frameworks/files
 
 module SMTP;
 
@@ -12,16 +12,11 @@ export {
 
 function get_file_handle(c: connection, is_orig: bool): string
 	{
-	if ( ! c?$smtp ) return "";
-	return cat(ANALYZER_SMTP, " ", c$start_time, " ", c$smtp$trans_depth, " ",
-	           c$smtp_state$mime_level);
+	return cat(ANALYZER_SMTP, c$start_time, c$smtp$trans_depth,
+	           c$smtp_state$mime_depth);
 	}
 
-module GLOBAL;
-
-event get_file_handle(tag: AnalyzerTag, c: connection, is_orig: bool)
-	&priority=5
+event bro_init() &priority=5
 	{
-	if ( tag != ANALYZER_SMTP ) return;
-	set_file_handle(SMTP::get_file_handle(c, is_orig));
+	Files::register_protocol(ANALYZER_SMTP, SMTP::get_file_handle);
 	}
diff --git a/scripts/policy/frameworks/files/detect-MHR.bro b/scripts/policy/frameworks/files/detect-MHR.bro
new file mode 100644
index 0000000000..c896bd56fd
--- /dev/null
+++ b/scripts/policy/frameworks/files/detect-MHR.bro
@@ -0,0 +1,63 @@
+##! Detect file downloads that have hash values matching files in Team
+##! Cymru's Malware Hash Registry (http://www.team-cymru.org/Services/MHR/).
+
+@load base/frameworks/files
+@load base/frameworks/notice
+@load frameworks/files/hash-all-files
+
+module MalwareHashRegistery;
+
+export {
+	redef enum Notice::Type += {
+		## The hash value of a file transferred over HTTP matched in the
+		## malware hash registry.
+		Match
+	};
+
+	redef record Files::Info += {
+		## Team Cymru Malware Hash Registry date of first detection.
+		mhr_first_detected:  time  &log &optional;
+		## Team Cymru Malware Hash Registry percent of detection 
+		## among malware scanners.
+		mhr_detect_rate:     count &log &optional;
+	};
+
+	## File types to attempt matching against the Malware Hash Registry.
+	const match_file_types = /^application\/x-dosexec/ &redef;
+
+	## The malware hash registry runs each malware sample through several A/V engines.
+	## Team Cymru returns a percentage to indicate how many A/V engines flagged the
+	## sample as malicious. This threshold allows you to require a minimum detection
+	## rate.
+	const notice_threshold = 10 &redef;
+}
+
+event file_hash(f: fa_file, kind: string, hash: string)
+	{
+	if ( kind=="sha1" && match_file_types in f$mime_type )
+		{
+		local hash_domain = fmt("%s.malware.hash.cymru.com", hash);
+		when ( local MHR_result = lookup_hostname_txt(hash_domain) )
+			{
+			# Data is returned as "<dateFirstDetected> <detectionRate>"
+			local MHR_answer = split1(MHR_result, / /);
+			if ( |MHR_answer| == 2 )
+				{
+				f$info$mhr_first_detected = double_to_time(to_double(MHR_answer[1]));
+				f$info$mhr_detect_rate = to_count(MHR_answer[2]);
+
+				#print strftime("%Y-%m-%d %H:%M:%S", f$info$mhr_first_detected);
+				if ( f$info$mhr_detect_rate >= notice_threshold )
+					{
+					local url = "";
+					# TODO: Create a generic mechanism for creating file "urls".
+					#if ( f$source == "HTTP" )
+					#	url = HTTP::build_url_http(f);
+					local message = fmt("%s %s", hash, url);
+					#local message = fmt("Host(s) %s sent a file with SHA1 hash %s to host %s", f$src_host, hash, f$dst_host);
+					NOTICE([$note=Match, $msg=message]);
+					}
+				}
+			}
+		}
+	}
diff --git a/scripts/policy/frameworks/files/hash-all-files.bro b/scripts/policy/frameworks/files/hash-all-files.bro
new file mode 100644
index 0000000000..931857c2bc
--- /dev/null
+++ b/scripts/policy/frameworks/files/hash-all-files.bro
@@ -0,0 +1,7 @@
+# Perform MD5 and SHA1 hashing on all files.
+
+event file_new(f: fa_file)
+	{
+	Files::add_analyzer(f, Files::ANALYZER_MD5);
+	Files::add_analyzer(f, Files::ANALYZER_SHA1);
+	}
diff --git a/scripts/policy/frameworks/intel/smtp-url-extraction.bro b/scripts/policy/frameworks/intel/smtp-url-extraction.bro
index 2b87f809a6..b4ab32a915 100644
--- a/scripts/policy/frameworks/intel/smtp-url-extraction.bro
+++ b/scripts/policy/frameworks/intel/smtp-url-extraction.bro
@@ -26,6 +26,6 @@ event file_new(f: fa_file) &priority=5
 	if ( ! f?$source ) return;
 	if ( f$source != "SMTP" ) return;
 
-	FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_DATA_EVENT,
+	Files::add_analyzer(f, [$tag=Files::ANALYZER_DATA_EVENT,
 	                               $stream_event=intel_mime_data]);
 	}
diff --git a/scripts/base/protocols/smtp/entities-excerpt.bro b/scripts/policy/protocols/smtp/entities-excerpt.bro
similarity index 100%
rename from scripts/base/protocols/smtp/entities-excerpt.bro
rename to scripts/policy/protocols/smtp/entities-excerpt.bro
diff --git a/scripts/site/local.bro b/scripts/site/local.bro
index dfebd9923a..e4b3a44e7a 100644
--- a/scripts/site/local.bro
+++ b/scripts/site/local.bro
@@ -64,7 +64,14 @@
 # Detect logins using "interesting" hostnames.
 @load protocols/ssh/interesting-hostnames
 
-# Detect MD5 sums in Team Cymru's Malware Hash Registry.
-@load protocols/http/detect-MHR
 # Detect SQL injection attacks.
 @load protocols/http/detect-sqli
+
+#### Network File Handling ####
+
+# Enable MD5 and SHA1 hashing for all files.
+@load frameworks/files/hash-all-files
+
+# Detect SHA1 sums in Team Cymru's Malware Hash Registry.
+@load frameworks/files/detect-MHR
+
diff --git a/src/const.bif b/src/const.bif
index 31e6ccee1a..10dceda6ff 100644
--- a/src/const.bif
+++ b/src/const.bif
@@ -24,4 +24,4 @@ const Tunnel::ip_tunnel_timeout: interval;
 
 const Threading::heartbeat_interval: interval;
 
-const FileAnalysis::salt: string;
+const Files::salt: string;
diff --git a/src/event.bif b/src/event.bif
index 5b14c05933..23ebc0591b 100644
--- a/src/event.bif
+++ b/src/event.bif
@@ -7001,7 +7001,7 @@ event event_queue_flush_point%(%);
 event get_file_handle%(tag: count, c: connection, is_orig: bool%);
 
 ## Indicates that an analysis of a new file has begun. The analysis can be
-## augmented at this time via :bro:see:`FileAnalysis::add_analyzer`.
+## augmented at this time via :bro:see:`Files::add_analyzer`.
 ##
 ## f: The file.
 ##
@@ -7024,8 +7024,8 @@ event file_over_new_connection%(f: fa_file, c: connection%);
 ## f: The file.
 ##
 ## .. bro:see:: file_new file_over_new_connection file_gap file_state_remove
-##    default_file_timeout_interval FileAnalysis::set_timeout_interval
-##    FileAnalysis::set_timeout_interval
+##    default_file_timeout_interval Files::set_timeout_interval
+##    Files::set_timeout_interval
 event file_timeout%(f: fa_file%);
 
 ## Indicates that a chunk of the file is missing.
@@ -7055,8 +7055,8 @@ event file_state_remove%(f: fa_file%);
 ##
 ## hash: The result of the hashing.
 ##
-## .. bro:see:: FileAnalysis::add_analyzer FileAnalysis::ANALYZER_MD5
-##    FileAnalysis::ANALYZER_SHA1 FileAnalysis::ANALYZER_SHA256
+## .. bro:see:: Files::add_analyzer Files::ANALYZER_MD5
+##    Files::ANALYZER_SHA1 Files::ANALYZER_SHA256
 event file_hash%(f: fa_file, kind: string, hash: string%);
 
 ## Deprecated. Will be removed.
diff --git a/src/file_analysis.bif b/src/file_analysis.bif
index ef46ccf9c1..648c031221 100644
--- a/src/file_analysis.bif
+++ b/src/file_analysis.bif
@@ -1,6 +1,6 @@
 ##! Internal functions and types used by the logging framework.
 
-module FileAnalysis;
+module Files;
 
 %%{
 #include "file_analysis/Manager.h"
@@ -27,35 +27,35 @@ enum Analyzer %{
 	ANALYZER_DATA_EVENT,
 %}
 
-## :bro:see:`FileAnalysis::set_timeout_interval`.
-function FileAnalysis::__set_timeout_interval%(file_id: string, t: interval%): bool
+## :bro:see:`Files::set_timeout_interval`.
+function Files::__set_timeout_interval%(file_id: string, t: interval%): bool
 	%{
 	bool result = file_mgr->SetTimeoutInterval(file_id->CheckString(), t);
 	return new Val(result, TYPE_BOOL);
 	%}
 
-## :bro:see:`FileAnalysis::add_analyzer`.
-function FileAnalysis::__add_analyzer%(file_id: string, args: any%): bool
+## :bro:see:`Files::add_analyzer`.
+function Files::__add_analyzer%(file_id: string, args: any%): bool
 	%{
-	using BifType::Record::FileAnalysis::AnalyzerArgs;
+	using BifType::Record::Files::AnalyzerArgs;
 	RecordVal* rv = args->AsRecordVal()->CoerceTo(AnalyzerArgs);
 	bool result = file_mgr->AddAnalyzer(file_id->CheckString(), rv);
 	Unref(rv);
 	return new Val(result, TYPE_BOOL);
 	%}
 
-## :bro:see:`FileAnalysis::remove_analyzer`.
-function FileAnalysis::__remove_analyzer%(file_id: string, args: any%): bool
+## :bro:see:`Files::remove_analyzer`.
+function Files::__remove_analyzer%(file_id: string, args: any%): bool
 	%{
-	using BifType::Record::FileAnalysis::AnalyzerArgs;
+	using BifType::Record::Files::AnalyzerArgs;
 	RecordVal* rv = args->AsRecordVal()->CoerceTo(AnalyzerArgs);
 	bool result = file_mgr->RemoveAnalyzer(file_id->CheckString(), rv);
 	Unref(rv);
 	return new Val(result, TYPE_BOOL);
 	%}
 
-## :bro:see:`FileAnalysis::stop`.
-function FileAnalysis::__stop%(file_id: string%): bool
+## :bro:see:`Files::stop`.
+function Files::__stop%(file_id: string%): bool
 	%{
 	bool result = file_mgr->IgnoreFile(file_id->CheckString());
 	return new Val(result, TYPE_BOOL);
diff --git a/src/file_analysis/Analyzer.h b/src/file_analysis/Analyzer.h
index d32532b264..c348ab358b 100644
--- a/src/file_analysis/Analyzer.h
+++ b/src/file_analysis/Analyzer.h
@@ -8,7 +8,7 @@
 
 namespace file_analysis {
 
-typedef BifEnum::FileAnalysis::Analyzer FA_Tag;
+typedef BifEnum::Files::Analyzer FA_Tag;
 
 class File;
 
@@ -93,7 +93,7 @@ public:
 	 */
 	static FA_Tag ArgsTag(const RecordVal* args)
 		{
-		using BifType::Record::FileAnalysis::AnalyzerArgs;
+		using BifType::Record::Files::AnalyzerArgs;
 		return static_cast<FA_Tag>(
 		              args->Lookup(AnalyzerArgs->FieldOffset("tag"))->AsEnum());
 		}
diff --git a/src/file_analysis/AnalyzerSet.cc b/src/file_analysis/AnalyzerSet.cc
index 83c60d9abe..d10e78d338 100644
--- a/src/file_analysis/AnalyzerSet.cc
+++ b/src/file_analysis/AnalyzerSet.cc
@@ -26,7 +26,7 @@ static void analyzer_del_func(void* v)
 AnalyzerSet::AnalyzerSet(File* arg_file) : file(arg_file)
 	{
 	TypeList* t = new TypeList();
-	t->Append(BifType::Record::FileAnalysis::AnalyzerArgs->Ref());
+	t->Append(BifType::Record::Files::AnalyzerArgs->Ref());
 	analyzer_hash = new CompositeHash(t);
 	Unref(t);
 	analyzer_map.SetDeleteFunc(analyzer_del_func);
diff --git a/src/file_analysis/DataEvent.cc b/src/file_analysis/DataEvent.cc
index 159c8c19cd..1b04111c44 100644
--- a/src/file_analysis/DataEvent.cc
+++ b/src/file_analysis/DataEvent.cc
@@ -17,7 +17,7 @@ DataEvent::DataEvent(RecordVal* args, File* file,
 
 file_analysis::Analyzer* DataEvent::Instantiate(RecordVal* args, File* file)
 	{
-	using BifType::Record::FileAnalysis::AnalyzerArgs;
+	using BifType::Record::Files::AnalyzerArgs;
 
 	int chunk_off = AnalyzerArgs->FieldOffset("chunk_event");
 	int stream_off = AnalyzerArgs->FieldOffset("stream_event");
diff --git a/src/file_analysis/Extract.cc b/src/file_analysis/Extract.cc
index cbe176d4ca..ef37425003 100644
--- a/src/file_analysis/Extract.cc
+++ b/src/file_analysis/Extract.cc
@@ -29,7 +29,7 @@ Extract::~Extract()
 
 file_analysis::Analyzer* Extract::Instantiate(RecordVal* args, File* file)
 	{
-	using BifType::Record::FileAnalysis::AnalyzerArgs;
+	using BifType::Record::Files::AnalyzerArgs;
 	Val* v = args->Lookup(AnalyzerArgs->FieldOffset("extract_filename"));
 
 	if ( ! v )
diff --git a/src/file_analysis/Manager.cc b/src/file_analysis/Manager.cc
index b247f23efc..61f9f7a10d 100644
--- a/src/file_analysis/Manager.cc
+++ b/src/file_analysis/Manager.cc
@@ -38,7 +38,7 @@ string Manager::HashHandle(const string& handle) const
 	static string salt;
 
 	if ( salt.empty() )
-		salt = BifConst::FileAnalysis::salt->CheckString();
+		salt = BifConst::Files::salt->CheckString();
 
 	char tmp[20];
 	uint64 hash[2];
@@ -310,7 +310,7 @@ void Manager::GetFileHandle(AnalyzerTag::Tag tag, Connection* c, bool is_orig)
 bool Manager::IsDisabled(AnalyzerTag::Tag tag)
 	{
 	if ( ! disabled )
-		disabled = internal_const_val("FileAnalysis::disable")->AsTableVal();
+		disabled = internal_const_val("Files::disable")->AsTableVal();
 
 	Val* index = new Val(tag, TYPE_COUNT);
 	Val* yield = disabled->Lookup(index);
diff --git a/testing/btest/scripts/base/frameworks/file-analysis/bifs/remove_action.bro b/testing/btest/scripts/base/frameworks/file-analysis/bifs/remove_action.bro
index 1f15a4221f..e31abe5ea3 100644
--- a/testing/btest/scripts/base/frameworks/file-analysis/bifs/remove_action.bro
+++ b/testing/btest/scripts/base/frameworks/file-analysis/bifs/remove_action.bro
@@ -11,8 +11,8 @@ redef test_get_file_name = function(f: fa_file): string
 event file_new(f: fa_file) &priority=-10
 	{
 	for ( tag in test_file_analyzers )
-		FileAnalysis::remove_analyzer(f, tag);
+		Files::remove_analyzer(f, tag);
 	local filename = test_get_file_name(f);
-	FileAnalysis::remove_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
+	Files::remove_analyzer(f, [$tag=Files::ANALYZER_EXTRACT,
 	                                $extract_filename=filename]);
 	}
diff --git a/testing/btest/scripts/base/frameworks/file-analysis/bifs/set_timeout_interval.bro b/testing/btest/scripts/base/frameworks/file-analysis/bifs/set_timeout_interval.bro
index 8ec4704cdb..c9eac4c31d 100644
--- a/testing/btest/scripts/base/frameworks/file-analysis/bifs/set_timeout_interval.bro
+++ b/testing/btest/scripts/base/frameworks/file-analysis/bifs/set_timeout_interval.bro
@@ -20,7 +20,7 @@ redef default_file_timeout_interval = 2sec;
 event file_timeout(f: fa_file)
 	{
 	if ( timeout_cnt < 1 )
-		FileAnalysis::set_timeout_interval(f, f$timeout_interval);
+		Files::set_timeout_interval(f, f$timeout_interval);
 	else
 		terminate();
 	++timeout_cnt;
diff --git a/testing/btest/scripts/base/frameworks/file-analysis/bifs/stop.bro b/testing/btest/scripts/base/frameworks/file-analysis/bifs/stop.bro
index e994706010..dd40c69684 100644
--- a/testing/btest/scripts/base/frameworks/file-analysis/bifs/stop.bro
+++ b/testing/btest/scripts/base/frameworks/file-analysis/bifs/stop.bro
@@ -4,5 +4,5 @@
 
 event file_new(f: fa_file)
 	{
-	FileAnalysis::stop(f);
+	Files::stop(f);
 	}
diff --git a/testing/scripts/file-analysis-test.bro b/testing/scripts/file-analysis-test.bro
index 15929dd4f6..cb1027d8f1 100644
--- a/testing/scripts/file-analysis-test.bro
+++ b/testing/scripts/file-analysis-test.bro
@@ -1,7 +1,7 @@
 
 global test_file_analysis_source: string = "" &redef;
 
-global test_file_analyzers: set[FileAnalysis::AnalyzerArgs];
+global test_file_analyzers: set[Files::AnalyzerArgs];
 
 global test_get_file_name: function(f: fa_file): string =
 	function(f: fa_file): string { return ""; } &redef;
@@ -30,13 +30,13 @@ event file_new(f: fa_file)
 	     f$source == test_file_analysis_source )
 		{
 		for ( tag in test_file_analyzers )
-			FileAnalysis::add_analyzer(f, tag);
+			Files::add_analyzer(f, tag);
 
 		local filename: string = test_get_file_name(f);
 		if ( filename != "" )
-			FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
+			Files::add_analyzer(f, [$tag=Files::ANALYZER_EXTRACT,
 			                               $extract_filename=filename]);
-		FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_DATA_EVENT,
+		Files::add_analyzer(f, [$tag=Files::ANALYZER_DATA_EVENT,
 		                               $chunk_event=file_chunk,
 		                               $stream_event=file_stream]);
 		}
@@ -94,7 +94,7 @@ event file_state_remove(f: fa_file)
 
 event bro_init()
 	{
-	add test_file_analyzers[[$tag=FileAnalysis::ANALYZER_MD5]];
-	add test_file_analyzers[[$tag=FileAnalysis::ANALYZER_SHA1]];
-	add test_file_analyzers[[$tag=FileAnalysis::ANALYZER_SHA256]];
+	add test_file_analyzers[[$tag=Files::ANALYZER_MD5]];
+	add test_file_analyzers[[$tag=Files::ANALYZER_SHA1]];
+	add test_file_analyzers[[$tag=Files::ANALYZER_SHA256]];
 	}

From 2b48396d23f2dddb9dcef005fb478d9d12b99dad Mon Sep 17 00:00:00 2001
From: Seth Hall <seth@icir.org>
Date: Fri, 5 Jul 2013 02:00:35 -0400
Subject: [PATCH 043/118] Check file_over_new_connetion to fire for each
 connection (including the first).

---
 src/file_analysis/File.cc | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/file_analysis/File.cc b/src/file_analysis/File.cc
index e68ee5523c..c1ad92c0ed 100644
--- a/src/file_analysis/File.cc
+++ b/src/file_analysis/File.cc
@@ -116,11 +116,8 @@ void File::UpdateConnectionFields(Connection* conn)
 
 	Val* conns = val->Lookup(conns_idx);
 
-	bool is_first = false;
-
 	if ( ! conns )
 		{
-		is_first = true;
 		conns = empty_connection_table();
 		val->Assign(conns_idx, conns);
 		}
@@ -131,7 +128,7 @@ void File::UpdateConnectionFields(Connection* conn)
 		Val* conn_val = conn->BuildConnVal();
 		conns->AsTableVal()->Assign(idx, conn_val);
 
-		if ( ! is_first && FileEventAvailable(file_over_new_connection) )
+		if ( FileEventAvailable(file_over_new_connection) )
 			{
 			val_list* vl = new val_list();
 			vl->append(val->Ref());

From cdf6b7864ecab07bf6a6150cbaa3eb58a12251c0 Mon Sep 17 00:00:00 2001
From: Seth Hall <seth@icir.org>
Date: Tue, 9 Jul 2013 11:50:54 -0400
Subject: [PATCH 044/118] More file analysis updates.

  - Recorrected the module name to Files.

  - Added Files::analyzer_name to get a more readable name for a
    file analyzer.

  - Improved and just overall better handled multipart mime
    transfers in HTTP and SMTP.  HTTP now has orig_fuids and resp_fuids
    log fields since multiple "files" can be transferred with
    multipart mime in a single request/response pair.  SMTP has
    an fuids field which has file unique IDs for all parts
    transferred. FTP and IRC have a log field named fuid added
    because only a single file can be transferred per irc and ftp
    log line.
---
 scripts/base/frameworks/files/main.bro        | 51 +++++++++-----
 scripts/base/protocols/ftp/__load__.bro       |  2 +-
 scripts/base/protocols/ftp/file-analysis.bro  | 23 ------
 scripts/base/protocols/ftp/files.bro          | 40 +++++++++++
 scripts/base/protocols/http/__load__.bro      |  6 +-
 scripts/base/protocols/http/entities.bro      | 70 +++++++++++++++++++
 .../http/{file-analysis.bro => files.bro}     | 30 ++++----
 scripts/base/protocols/http/main.bro          | 26 +------
 scripts/base/protocols/irc/__load__.bro       |  4 +-
 scripts/base/protocols/irc/dcc-send.bro       |  4 +-
 scripts/base/protocols/irc/files.bro          | 41 +++++++++++
 scripts/base/protocols/smtp/__load__.bro      |  3 +-
 scripts/base/protocols/smtp/entities.bro      |  3 +-
 scripts/base/protocols/smtp/file-analysis.bro | 22 ------
 scripts/base/protocols/smtp/files.bro         | 34 +++++++++
 src/file_analysis/Manager.cc                  |  8 +--
 src/file_analysis/analyzer/hash/events.bif    |  4 +-
 src/file_analysis/file_analysis.bif           |  6 ++
 18 files changed, 257 insertions(+), 120 deletions(-)
 delete mode 100644 scripts/base/protocols/ftp/file-analysis.bro
 create mode 100644 scripts/base/protocols/ftp/files.bro
 create mode 100644 scripts/base/protocols/http/entities.bro
 rename scripts/base/protocols/http/{file-analysis.bro => files.bro} (50%)
 create mode 100644 scripts/base/protocols/irc/files.bro
 delete mode 100644 scripts/base/protocols/smtp/file-analysis.bro
 create mode 100644 scripts/base/protocols/smtp/files.bro

diff --git a/scripts/base/frameworks/files/main.bro b/scripts/base/frameworks/files/main.bro
index d6e26e1833..d5a3ddee67 100644
--- a/scripts/base/frameworks/files/main.bro
+++ b/scripts/base/frameworks/files/main.bro
@@ -61,7 +61,7 @@ export {
 		depth: count &default=0 &log;
 
 		## A set of analysis types done during the file analysis.
-		analyzers: set[Analyzer] &log;
+		analyzers: set[string] &log;
 
 		## A mime type provided by libmagic against the *bof_buffer*, or
 		## in the cases where no buffering of the beginning of file occurs,
@@ -76,11 +76,16 @@ export {
 		## The duration the file was analyzed for.
 		duration: interval &log &default=0secs;
 
-		## If the source of this file is is a network connection, this field
+		## If the source of this file is a network connection, this field
 		## indicates if the data originated from the local network or not as
 		## determined by the configured bro:see:`Site::local_nets`.
 		local_orig: bool &log &optional;
 
+		## If the source of this file is a network connection, this field
+		## indicates if the file is being sent by the originator of the connection
+		## or the responder.
+		is_orig: bool &log &optional;
+
 		## Number of bytes provided to the file analysis engine for the file.
 		seen_bytes: count &log &default=0;
 
@@ -105,7 +110,7 @@ export {
 
 	## A table that can be used to disable file analysis completely for
 	## any files transferred over given network protocol analyzers.
-	const disable: table[Analyzer::Tag] of bool = table() &redef;
+	const disable: table[Files::Tag] of bool = table() &redef;
 
 	## The salt concatenated to unique file handle strings generated by
 	## :bro:see:`get_file_handle` before hashing them in to a file id
@@ -139,7 +144,7 @@ export {
 	##          for the *id* isn't currently active or the *args*
 	##          were invalid for the analyzer type.
 	global add_analyzer: function(f: fa_file, 
-	                              tag: Files::Analyzer, 
+	                              tag: Files::Tag, 
 	                              args: AnalyzerArgs &default=AnalyzerArgs()): bool;
 
 	## Removes an analyzer from the analysis of a given file.
@@ -150,7 +155,7 @@ export {
 	##
 	## Returns: true if the analyzer will be removed, or false if analysis
 	##          for the *id* isn't currently active.
-	global remove_analyzer: function(f: fa_file, tag: Files::Analyzer, args: AnalyzerArgs): bool;
+	global remove_analyzer: function(f: fa_file, tag: Files::Tag, args: AnalyzerArgs): bool;
 
 	## Stops/ignores any further analysis of a given file.
 	##
@@ -161,6 +166,13 @@ export {
 	##          isn't currently active.
 	global stop: function(f: fa_file): bool;
 
+	## Translates an file analyzer enum value to a string with the analyzer's name.
+	##
+	## tag: The analyzer tag.
+	##
+	## Returns: The analyzer name corresponding to the tag.
+	global analyzer_name: function(tag: Files::Tag): string;
+
 	## Register callbacks for protocols that work with the Files framework.  
 	## The callbacks must uniquely identify a file and each protocol can 
 	## only have a single callback registered for it.
@@ -171,7 +183,7 @@ export {
 	##           defined previously.
 	##
 	## Returns: true if the protocol being registered was not previously registered.
-	global register_protocol: function(tag: AnalyzerTag, callback: function(c: connection, is_orig: bool): string): bool;
+	global register_protocol: function(tag: Files::Tag, callback: function(c: connection, is_orig: bool): string): bool;
 
 	## Register a callback for file analyzers to use if they need to do some manipulation
 	## when they are being added to a file before the core code takes over.  This is 
@@ -181,7 +193,7 @@ export {
 	## tag: Tag for the file analyzer.
 	##
 	## callback: Function to execute when the given file analyzer is being added.
-	global register_analyzer_add_callback: function(tag: Files::Analyzer, callback: function(f: fa_file, args: AnalyzerArgs));
+	global register_analyzer_add_callback: function(tag: Files::Tag, callback: function(f: fa_file, args: AnalyzerArgs));
 
 	## Event that can be handled to access the Info record as it is sent on
 	## to the logging framework.
@@ -194,14 +206,14 @@ redef record fa_file += {
 
 redef record AnalyzerArgs += {
 	# This is used interally for the core file analyzer api.
-	tag: Files::Analyzer &optional;
+	tag: Files::Tag &optional;
 };
 
 # Store the callbacks for protocol analyzers that have files.
-global registered_protocols: table[AnalyzerTag] of function(c: connection, is_orig: bool): string = table()
+global registered_protocols: table[Files::Tag] of function(c: connection, is_orig: bool): string = table()
 		&default=function(c: connection, is_orig: bool): string { return cat(c$uid, is_orig); };
 
-global analyzer_add_callbacks: table[Files::Analyzer] of function(f: fa_file, args: AnalyzerArgs) = table();
+global analyzer_add_callbacks: table[Files::Tag] of function(f: fa_file, args: AnalyzerArgs) = table();
 
 event bro_init() &priority=5
 	{
@@ -227,6 +239,8 @@ function set_info(f: fa_file)
 		f$info$total_bytes = f$total_bytes;
 	f$info$missing_bytes = f$missing_bytes;
 	f$info$overflow_bytes = f$overflow_bytes;
+	if ( f?$is_orig )
+		f$info$is_orig = f$is_orig;
 	if ( f?$mime_type ) 
 		f$info$mime_type = f$mime_type;
 	}
@@ -236,11 +250,11 @@ function set_timeout_interval(f: fa_file, t: interval): bool
 	return __set_timeout_interval(f$id, t);
 	}
 
-function add_analyzer(f: fa_file, tag: Analyzer, args: AnalyzerArgs): bool
+function add_analyzer(f: fa_file, tag: Files::Tag, args: AnalyzerArgs): bool
 	{
 	# This is to construct the correct args for the core API.
 	args$tag = tag;
-	add f$info$analyzers[tag];
+	add f$info$analyzers[Files::analyzer_name(tag)];
 
 	if ( tag in analyzer_add_callbacks )
 		analyzer_add_callbacks[tag](f, args);
@@ -253,12 +267,12 @@ function add_analyzer(f: fa_file, tag: Analyzer, args: AnalyzerArgs): bool
 	return T;
 	}
 
-function register_analyzer_add_callback(tag: Files::Analyzer, callback: function(f: fa_file, args: AnalyzerArgs))
+function register_analyzer_add_callback(tag: Files::Tag, callback: function(f: fa_file, args: AnalyzerArgs))
 	{
 	analyzer_add_callbacks[tag] = callback;
 	}
 
-function remove_analyzer(f: fa_file, tag: Files::Analyzer, args: AnalyzerArgs): bool
+function remove_analyzer(f: fa_file, tag: Files::Tag, args: AnalyzerArgs): bool
 	{
 	args$tag = tag;
 	return __remove_analyzer(f$id, args);
@@ -269,6 +283,11 @@ function stop(f: fa_file): bool
 	return __stop(f$id);
 	}
 
+function analyzer_name(tag: Files::Tag): string
+	{
+	return __analyzer_name(tag);
+	}
+
 event file_new(f: fa_file) &priority=10
 	{
 	set_info(f);
@@ -302,14 +321,14 @@ event file_state_remove(f: fa_file) &priority=-10
 	Log::write(Files::LOG, f$info);
 	}
 
-function register_protocol(tag: AnalyzerTag, callback: function(c: connection, is_orig: bool): string): bool
+function register_protocol(tag: Files::Tag, callback: function(c: connection, is_orig: bool): string): bool
 	{
 	local result = (tag !in registered_protocols);
 	registered_protocols[tag] = callback;
 	return result;
 	}
 
-event get_file_handle(tag: AnalyzerTag, c: connection, is_orig: bool) &priority=5
+event get_file_handle(tag: Files::Tag, c: connection, is_orig: bool) &priority=5
 	{
 	local handler = registered_protocols[tag];
 	set_file_handle(handler(c, is_orig));
diff --git a/scripts/base/protocols/ftp/__load__.bro b/scripts/base/protocols/ftp/__load__.bro
index 9c839610ac..6fffd5ec43 100644
--- a/scripts/base/protocols/ftp/__load__.bro
+++ b/scripts/base/protocols/ftp/__load__.bro
@@ -1,4 +1,4 @@
 @load ./utils-commands
 @load ./main
-@load ./file-analysis
+@load ./files
 @load ./gridftp
diff --git a/scripts/base/protocols/ftp/file-analysis.bro b/scripts/base/protocols/ftp/file-analysis.bro
deleted file mode 100644
index 3710a44cee..0000000000
--- a/scripts/base/protocols/ftp/file-analysis.bro
+++ /dev/null
@@ -1,23 +0,0 @@
-@load ./main
-@load base/utils/conn-ids
-@load base/frameworks/files
-
-module FTP;
-
-export {
-	## Default file handle provider for FTP.
-	global get_file_handle: function(c: connection, is_orig: bool): string;
-}
-
-function get_file_handle(c: connection, is_orig: bool): string
-	{
-	if ( [c$id$resp_h, c$id$resp_p] !in ftp_data_expected ) 
-		return "";
-
-	return cat(ANALYZER_FTP_DATA, c$start_time, c$id, is_orig);
-	}
-
-event bro_init() &priority=5
-	{
-	Files::register_protocol(ANALYZER_FTP_DATA, FTP::get_file_handle);
-	}
diff --git a/scripts/base/protocols/ftp/files.bro b/scripts/base/protocols/ftp/files.bro
new file mode 100644
index 0000000000..a943adff9d
--- /dev/null
+++ b/scripts/base/protocols/ftp/files.bro
@@ -0,0 +1,40 @@
+@load ./main
+@load base/utils/conn-ids
+@load base/frameworks/files
+
+module FTP;
+
+export {
+	redef record Info += {
+		## File unique ID.
+		fuid: string &optional &log;
+	};
+
+	## Default file handle provider for FTP.
+	global get_file_handle: function(c: connection, is_orig: bool): string;
+}
+
+function get_file_handle(c: connection, is_orig: bool): string
+	{
+	if ( [c$id$resp_h, c$id$resp_p] !in ftp_data_expected ) 
+		return "";
+
+	return cat(Analyzer::ANALYZER_FTP_DATA, c$start_time, c$id, is_orig);
+	}
+
+event bro_init() &priority=5
+	{
+	Files::register_protocol(Analyzer::ANALYZER_FTP_DATA, FTP::get_file_handle);
+	}
+
+
+event file_over_new_connection(f: fa_file, c: connection) &priority=5
+	{
+	if ( [c$id$resp_h, c$id$resp_p] !in ftp_data_expected ) 
+		return;
+
+	local ftp = ftp_data_expected[c$id$resp_h, c$id$resp_p];
+	ftp$fuid = f$id;
+	if ( f?$mime_type )
+		ftp$mime_type = f$mime_type;
+	}
\ No newline at end of file
diff --git a/scripts/base/protocols/http/__load__.bro b/scripts/base/protocols/http/__load__.bro
index 585b815eed..f0cec220d3 100644
--- a/scripts/base/protocols/http/__load__.bro
+++ b/scripts/base/protocols/http/__load__.bro
@@ -1,6 +1,4 @@
 @load ./main
+@load ./entities
 @load ./utils
-@load ./file-analysis
-#@load ./file-ident
-#@load ./file-hash
-#@load ./file-extract
+@load ./files
\ No newline at end of file
diff --git a/scripts/base/protocols/http/entities.bro b/scripts/base/protocols/http/entities.bro
new file mode 100644
index 0000000000..cc852a7e11
--- /dev/null
+++ b/scripts/base/protocols/http/entities.bro
@@ -0,0 +1,70 @@
+##! Analysis and logging for MIME entities found in HTTP sessions.
+
+@load base/frameworks/files
+@load base/utils/strings
+@load base/utils/files
+@load ./main
+
+module HTTP;
+
+export {
+	type Entity: record {
+		## Depth of the entity if multiple entities are sent in a single transaction.
+		depth: count &default=0;
+
+		## Filename for the entity if discovered from a header.
+		filename: string &optional;
+	};
+
+	redef record Info += {
+		## The current entity being seen.
+		entity:          Entity    &optional;
+
+		## Current number of MIME entities in the HTTP request message body.
+		orig_mime_depth: count     &default=0;
+		## Current number of MIME entities in the HTTP response message body.
+		resp_mime_depth: count     &default=0;
+	};
+}
+
+event http_begin_entity(c: connection, is_orig: bool) &priority=10
+	{
+	set_state(c, F, is_orig);
+
+	if ( is_orig )
+		++c$http$orig_mime_depth;
+	else
+		++c$http$resp_mime_depth;
+
+	c$http$entity = Entity($depth = is_orig ? c$http$orig_mime_depth : c$http$resp_mime_depth);
+	}
+
+event http_header(c: connection, is_orig: bool, name: string, value: string) &priority=3
+	{
+	if ( name == "CONTENT-DISPOSITION" &&
+	     /[fF][iI][lL][eE][nN][aA][mM][eE]/ in value )
+		{
+		c$http$entity$filename = extract_filename_from_content_disposition(value);
+		}
+	else if ( name == "CONTENT-TYPE" &&
+	          /[nN][aA][mM][eE][:blank:]*=/ in value )
+		{
+		c$http$entity$filename = extract_filename_from_content_disposition(value);
+		}
+	}
+
+event file_over_new_connection(f: fa_file, c: connection) &priority=5
+	{
+	if ( f$source == "HTTP" && c$http?$entity ) 
+		{
+		f$info$depth = c$http$entity$depth;
+		if ( c$http$entity?$filename )
+			f$info$filename = c$http$entity$filename;
+		}
+	}
+
+event http_end_entity(c: connection, is_orig: bool) &priority=5
+	{
+	if ( c?$http && c$http?$entity ) 
+		delete c$http$entity;
+	}
diff --git a/scripts/base/protocols/http/file-analysis.bro b/scripts/base/protocols/http/files.bro
similarity index 50%
rename from scripts/base/protocols/http/file-analysis.bro
rename to scripts/base/protocols/http/files.bro
index b79ca041b8..44fdc4c1f4 100644
--- a/scripts/base/protocols/http/file-analysis.bro
+++ b/scripts/base/protocols/http/files.bro
@@ -1,17 +1,17 @@
 @load ./main
+@load ./entities
 @load ./utils
-@load base/utils/conn-ids
 @load base/frameworks/files
 
 module HTTP;
 
 export {
 	redef record Info += {
-		## The sniffed mime type of the data being sent by the client.
-		client_mime_type: string &log &optional;
+		## An ordered vector of file unique IDs seen sent by the originator (client).
+		orig_fuids:    vector of string &log &default=string_vec();
 
-		## The sniffed mime type of the data being returned by the server.
-		mime_type:        string &log &optional;
+		## An ordered vector of file unique IDs seen sent by the responder (server).
+		resp_fuids:    vector of string &log &default=string_vec();
 	};
 
 	## Default file handle provider for HTTP.
@@ -26,33 +26,27 @@ function get_file_handle(c: connection, is_orig: bool): string
 	local mime_depth = is_orig ? c$http$orig_mime_depth : c$http$resp_mime_depth;
 	if ( c$http$range_request )
 		{
-		return cat(ANALYZER_HTTP, is_orig, c$id$orig_h, mime_depth, build_url(c$http));
+		return cat(Analyzer::ANALYZER_HTTP, is_orig, c$id$orig_h, mime_depth, build_url(c$http));
 		}
 	else
 		{
-		return cat(ANALYZER_HTTP, c$start_time, is_orig, 
+		return cat(Analyzer::ANALYZER_HTTP, c$start_time, is_orig, 
 		           c$http$trans_depth, mime_depth, id_string(c$id));
 		}
 	}
 
 event bro_init() &priority=5
 	{
-	Files::register_protocol(ANALYZER_HTTP, HTTP::get_file_handle);
+	Files::register_protocol(Analyzer::ANALYZER_HTTP, HTTP::get_file_handle);
 	}
 
 event file_over_new_connection(f: fa_file, c: connection) &priority=5
 	{
 	if ( c?$http )
 		{
-		#if (!f?$mime_type)
-		#	print f;
-#
-		#if ( f$is_orig )
-		#	c$http$client_mime_type = f$mime_type;
-		#else
-		#	c$http$mime_type = f$mime_type;
-
-		if ( c$http?$filename )
-			f$info$filename = c$http$filename;
+		if ( f$is_orig )
+			c$http$orig_fuids[|c$http$orig_fuids|] = f$id;
+		else
+			c$http$resp_fuids[|c$http$resp_fuids|] = f$id;
 		}
 	}
\ No newline at end of file
diff --git a/scripts/base/protocols/http/main.bro b/scripts/base/protocols/http/main.bro
index a982fdc9c6..d96384ee5f 100644
--- a/scripts/base/protocols/http/main.bro
+++ b/scripts/base/protocols/http/main.bro
@@ -75,10 +75,6 @@ export {
 		## Indicates if this request can assume 206 partial content in
 		## response.
 		range_request:           bool      &default=F;
-		## Number of MIME entities in the HTTP request message body so far.
-		orig_mime_depth:         count     &default=0;
-		## Number of MIME entities in the HTTP response message body so far.
-		resp_mime_depth:         count     &default=0;
 	};
 	
 	## Structure to maintain state for an HTTP connection with multiple 
@@ -104,8 +100,8 @@ export {
 	} &redef;
 
 	## A list of HTTP methods. Other methods will generate a weird. Note
-        ## that the HTTP analyzer will only accept methods consisting solely
-        ## of letters ``[A-Za-z]``.
+	## that the HTTP analyzer will only accept methods consisting solely
+	## of letters ``[A-Za-z]``.
 	const http_methods: set[string] = {
 		"GET", "POST", "HEAD", "OPTIONS",
 		"PUT", "DELETE", "TRACE", "CONNECT",
@@ -275,25 +271,9 @@ event http_header(c: connection, is_orig: bool, name: string, value: string) &pr
 				}
 			}
 		}
-	
-	else # server headers
-		{
-		if ( name == "CONTENT-DISPOSITION" &&
-		     /[fF][iI][lL][eE][nN][aA][mM][eE]/ in value )
-			c$http$filename = extract_filename_from_content_disposition(value);
-		}
+
 	}
 	
-event http_begin_entity(c: connection, is_orig: bool) &priority=5
-	{
-	set_state(c, F, is_orig);
-
-	if ( is_orig )
-		++c$http$orig_mime_depth;
-	else
-		++c$http$resp_mime_depth;
-	}
-
 event http_message_done(c: connection, is_orig: bool, stat: http_message_stat) &priority = 5
 	{
 	set_state(c, F, is_orig);
diff --git a/scripts/base/protocols/irc/__load__.bro b/scripts/base/protocols/irc/__load__.bro
index d20550c54f..afb7fecc62 100644
--- a/scripts/base/protocols/irc/__load__.bro
+++ b/scripts/base/protocols/irc/__load__.bro
@@ -1,3 +1,3 @@
 @load ./main
-#@load ./dcc-send
-@load ./file-analysis
+@load ./dcc-send
+@load ./files
\ No newline at end of file
diff --git a/scripts/base/protocols/irc/dcc-send.bro b/scripts/base/protocols/irc/dcc-send.bro
index b79eb370e6..83b32faf2b 100644
--- a/scripts/base/protocols/irc/dcc-send.bro
+++ b/scripts/base/protocols/irc/dcc-send.bro
@@ -49,13 +49,15 @@ function log_dcc(f: fa_file)
 		delete irc$dcc_file_name;
 		delete irc$dcc_file_size;
 		delete irc$dcc_mime_type;
+
+		delete dcc_expected_transfers[cid$resp_h, cid$resp_p];
 		return;
 		}
 	}
 
 event file_new(f: fa_file) &priority=-5
 	{
-	if ( f?$source && f$source == "IRC_DATA" ) 
+	if ( f$source == "IRC_DATA" ) 
 		log_dcc(f);
 	}
 
diff --git a/scripts/base/protocols/irc/files.bro b/scripts/base/protocols/irc/files.bro
new file mode 100644
index 0000000000..f4553b534a
--- /dev/null
+++ b/scripts/base/protocols/irc/files.bro
@@ -0,0 +1,41 @@
+@load ./dcc-send
+@load base/utils/conn-ids
+@load base/frameworks/files
+
+module IRC;
+
+export {
+	redef record Info += {
+		## File unique ID.
+		fuid: string &log &optional;
+	};
+
+	## Default file handle provider for IRC.
+	global get_file_handle: function(c: connection, is_orig: bool): string;
+}
+
+function get_file_handle(c: connection, is_orig: bool): string
+	{
+	if ( [c$id$resp_h, c$id$resp_p] !in dcc_expected_transfers ) 
+		return "";
+
+	return cat(Analyzer::ANALYZER_IRC_DATA, c$start_time, c$id, is_orig);
+	}
+
+event bro_init() &priority=5
+	{
+	Files::register_protocol(Analyzer::ANALYZER_IRC_DATA, IRC::get_file_handle);
+	}
+
+event file_over_new_connection(f: fa_file, c: connection) &priority=5
+	{
+	if ( [c$id$resp_h, c$id$resp_p] !in dcc_expected_transfers ) 
+		return;
+
+	local irc = dcc_expected_transfers[c$id$resp_h, c$id$resp_p];
+	irc$fuid = f$id;
+	if ( irc?$dcc_file_name )
+		f$info$filename = irc$dcc_file_name;
+	if ( f?$mime_type )
+		irc$dcc_mime_type = f$mime_type;
+	}
\ No newline at end of file
diff --git a/scripts/base/protocols/smtp/__load__.bro b/scripts/base/protocols/smtp/__load__.bro
index 1e913d8dff..a37c2ed3de 100644
--- a/scripts/base/protocols/smtp/__load__.bro
+++ b/scripts/base/protocols/smtp/__load__.bro
@@ -1,4 +1,3 @@
 @load ./main
 @load ./entities
-#@load ./entities-excerpt
-@load ./file-analysis
+@load ./files
\ No newline at end of file
diff --git a/scripts/base/protocols/smtp/entities.bro b/scripts/base/protocols/smtp/entities.bro
index dcb53dc0aa..067b8acf8e 100644
--- a/scripts/base/protocols/smtp/entities.bro
+++ b/scripts/base/protocols/smtp/entities.bro
@@ -9,6 +9,7 @@ module SMTP;
 
 export {
 	type Entity: record {
+		## Filename for the entity if discovered from a header.
 		filename: string &optional;
 	};
 
@@ -26,8 +27,6 @@ export {
 
 event mime_begin_entity(c: connection) &priority=10
 	{
-	#print fmt("%s : begin entity", c$uid);
-
 	c$smtp$entity = Entity();
 	++c$smtp_state$mime_depth;
 	}
diff --git a/scripts/base/protocols/smtp/file-analysis.bro b/scripts/base/protocols/smtp/file-analysis.bro
deleted file mode 100644
index 44938c8698..0000000000
--- a/scripts/base/protocols/smtp/file-analysis.bro
+++ /dev/null
@@ -1,22 +0,0 @@
-@load ./main
-@load ./entities
-@load base/utils/conn-ids
-@load base/frameworks/files
-
-module SMTP;
-
-export {
-	## Default file handle provider for SMTP.
-	global get_file_handle: function(c: connection, is_orig: bool): string;
-}
-
-function get_file_handle(c: connection, is_orig: bool): string
-	{
-	return cat(ANALYZER_SMTP, c$start_time, c$smtp$trans_depth,
-	           c$smtp_state$mime_depth);
-	}
-
-event bro_init() &priority=5
-	{
-	Files::register_protocol(ANALYZER_SMTP, SMTP::get_file_handle);
-	}
diff --git a/scripts/base/protocols/smtp/files.bro b/scripts/base/protocols/smtp/files.bro
new file mode 100644
index 0000000000..e67181d6bc
--- /dev/null
+++ b/scripts/base/protocols/smtp/files.bro
@@ -0,0 +1,34 @@
+@load ./main
+@load ./entities
+@load base/utils/conn-ids
+@load base/frameworks/files
+
+module SMTP;
+
+export {
+	redef record Info += {
+		## An ordered vector of file unique IDs seen attached to
+		## the message.
+		fuids: vector of string &log &default=string_vec();
+	};
+
+	## Default file handle provider for SMTP.
+	global get_file_handle: function(c: connection, is_orig: bool): string;
+}
+
+function get_file_handle(c: connection, is_orig: bool): string
+	{
+	return cat(Analyzer::ANALYZER_SMTP, c$start_time, c$smtp$trans_depth,
+	           c$smtp_state$mime_depth);
+	}
+
+event bro_init() &priority=5
+	{
+	Files::register_protocol(Analyzer::ANALYZER_SMTP, SMTP::get_file_handle);
+	}
+
+event file_over_new_connection(f: fa_file, c: connection) &priority=5
+	{
+	if ( c?$smtp )
+		c$smtp$fuids[|c$smtp$fuids|] = f$id;
+	}
\ No newline at end of file
diff --git a/src/file_analysis/Manager.cc b/src/file_analysis/Manager.cc
index 02af4aa9f1..453c6f7902 100644
--- a/src/file_analysis/Manager.cc
+++ b/src/file_analysis/Manager.cc
@@ -19,8 +19,8 @@ string Manager::salt;
 
 Manager::Manager()
 	{
-	tag_enum_type = new EnumType("FileAnalysis::Tag");
-	::ID* id = install_ID("Tag", "FileAnalysis", true, true);
+	tag_enum_type = new EnumType("Files::Tag");
+	::ID* id = install_ID("Tag", "Files", true, true);
 	add_type(id, tag_enum_type, 0, 0);
 	}
 
@@ -42,7 +42,7 @@ void Manager::RegisterAnalyzerComponent(Component* component)
 	{
 	const char* cname = component->CanonicalName();
 
-	if ( tag_enum_type->Lookup("FileAnalysis", cname) != -1 )
+	if ( tag_enum_type->Lookup("Files", cname) != -1 )
 		reporter->FatalError("File Analyzer %s defined more than once", cname);
 
 	DBG_LOG(DBG_FILE_ANALYSIS, "Registering analyzer %s (tag %s)",
@@ -54,7 +54,7 @@ void Manager::RegisterAnalyzerComponent(Component* component)
 	        component->Tag().AsEnumVal()->InternalInt(), component));
 
 	string id = fmt("ANALYZER_%s", cname);
-	tag_enum_type->AddName("FileAnalysis", id.c_str(),
+	tag_enum_type->AddName("Files", id.c_str(),
 						   component->Tag().AsEnumVal()->InternalInt(), true);
 	}
 
diff --git a/src/file_analysis/analyzer/hash/events.bif b/src/file_analysis/analyzer/hash/events.bif
index b4a8de1c74..e03cbf359a 100644
--- a/src/file_analysis/analyzer/hash/events.bif
+++ b/src/file_analysis/analyzer/hash/events.bif
@@ -7,6 +7,6 @@
 ##
 ## hash: The result of the hashing.
 ##
-## .. bro:see:: FileAnalysis::add_analyzer FileAnalysis::ANALYZER_MD5
-##    FileAnalysis::ANALYZER_SHA1 FileAnalysis::ANALYZER_SHA256
+## .. bro:see:: Files::add_analyzer Files::ANALYZER_MD5
+##    Files::ANALYZER_SHA1 Files::ANALYZER_SHA256
 event file_hash%(f: fa_file, kind: string, hash: string%);
diff --git a/src/file_analysis/file_analysis.bif b/src/file_analysis/file_analysis.bif
index 148e6360da..b6c80ac800 100644
--- a/src/file_analysis/file_analysis.bif
+++ b/src/file_analysis/file_analysis.bif
@@ -42,6 +42,12 @@ function Files::__stop%(file_id: string%): bool
 	return new Val(result, TYPE_BOOL);
 	%}
 
+## :bro:see:`Files::analyzer_name`.
+function Files::__analyzer_name%(tag: Files::Tag%) : string
+	%{
+	return new StringVal(file_mgr->GetAnalyzerName(tag->InternalInt()));
+	%}
+
 module GLOBAL;
 
 ## For use within a :bro:see:`get_file_handle` handler to set a unique

From ecfac31de0b5d69254b590939c3a56be4038e0d6 Mon Sep 17 00:00:00 2001
From: Seth Hall <seth@icir.org>
Date: Tue, 9 Jul 2013 11:51:23 -0400
Subject: [PATCH 045/118] Fixed SMTP URL extraction for the Intel framework
 with Files updates.

---
 .../frameworks/intel/smtp-url-extraction.bro       | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/scripts/policy/frameworks/intel/smtp-url-extraction.bro b/scripts/policy/frameworks/intel/smtp-url-extraction.bro
index b4ab32a915..2478eba9f8 100644
--- a/scripts/policy/frameworks/intel/smtp-url-extraction.bro
+++ b/scripts/policy/frameworks/intel/smtp-url-extraction.bro
@@ -1,11 +1,12 @@
 @load base/frameworks/intel
-@load base/protocols/smtp/file-analysis
+@load base/protocols/smtp
 @load base/utils/urls
 @load ./where-locations
 
 event intel_mime_data(f: fa_file, data: string)
 	{
-	if ( ! f?$conns ) return;
+	if ( ! f?$conns ) 
+		return;
 
 	for ( cid in f$conns )
 		{
@@ -21,11 +22,8 @@ event intel_mime_data(f: fa_file, data: string)
 		}
 	}
 
-event file_new(f: fa_file) &priority=5
+event file_new(f: fa_file)
 	{
-	if ( ! f?$source ) return;
-	if ( f$source != "SMTP" ) return;
-
-	Files::add_analyzer(f, [$tag=Files::ANALYZER_DATA_EVENT,
-	                               $stream_event=intel_mime_data]);
+	if ( f$source == "SMTP" )
+		Files::add_analyzer(f, Files::ANALYZER_DATA_EVENT, [$stream_event=intel_mime_data]);
 	}

From 5dbc354898454bb3e8b0970119925b42bec213f7 Mon Sep 17 00:00:00 2001
From: Seth Hall <seth@icir.org>
Date: Tue, 9 Jul 2013 14:05:36 -0400
Subject: [PATCH 046/118] extract_filename_from_content_disposition is still
 hacky but more closely aligns with RFC5987

---
 scripts/base/utils/files.bro                       | 14 ++++++++++----
 .../btest/Baseline/scripts.base.utils.files/output |  3 +++
 testing/btest/scripts/base/utils/files.test        |  7 +++++++
 3 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/scripts/base/utils/files.bro b/scripts/base/utils/files.bro
index 76d2ecea4f..fedd93ab47 100644
--- a/scripts/base/utils/files.bro
+++ b/scripts/base/utils/files.bro
@@ -19,9 +19,15 @@ function generate_extraction_filename(prefix: string, c: connection, suffix: str
 ## the filename.
 function extract_filename_from_content_disposition(data: string): string
 	{
-	local filename = sub(data, /^.*[nN][aA][mM][eE][[:blank:]]*=[[:blank:]]*/, "");
+	local filename = sub(data, /^.*[nN][aA][mM][eE][[:blank:]]*\*?=[[:blank:]]*/, "");
+	
 	# Remove quotes around the filename if they are there.
 	if ( /^\"/ in filename )
-		filename =  split_n(filename, /\"/, F, 2)[2];
-	return filename;
-	}
+		filename = split_n(filename, /\"/, F, 2)[2];
+
+	# Remove the language and encoding if it's there.
+	if ( /^[a-zA-Z0-9\!#$%&+-^_`{}~]+'[a-zA-Z0-9\!#$%&+-^_`{}~]*'/ in filename )
+		filename = sub(filename, /^.+'.*'/, "");
+
+	return unescape_URI(filename);
+	}
\ No newline at end of file
diff --git a/testing/btest/Baseline/scripts.base.utils.files/output b/testing/btest/Baseline/scripts.base.utils.files/output
index ab92c3a624..4d53bcedd3 100644
--- a/testing/btest/Baseline/scripts.base.utils.files/output
+++ b/testing/btest/Baseline/scripts.base.utils.files/output
@@ -1,3 +1,6 @@
+Economy
+US-$ rates
+\xa3 rates
 test-prefix_141.142.220.118:48649-208.80.152.118:80_test-suffix
 test-prefix_141.142.220.118:48649-208.80.152.118:80
 141.142.220.118:48649-208.80.152.118:80_test-suffix
diff --git a/testing/btest/scripts/base/utils/files.test b/testing/btest/scripts/base/utils/files.test
index 84eff13187..3324522030 100644
--- a/testing/btest/scripts/base/utils/files.test
+++ b/testing/btest/scripts/base/utils/files.test
@@ -11,3 +11,10 @@ event connection_established(c: connection)
 	print generate_extraction_filename("", c, "test-suffix");
 	print generate_extraction_filename("", c, "");
 	}
+
+event bro_init()
+	{
+	print extract_filename_from_content_disposition("attachment; filename=Economy");
+	print extract_filename_from_content_disposition("attachment; name=\"US-$ rates\"");
+	print extract_filename_from_content_disposition("attachment; filename*=iso-8859-1'en'%A3%20rates");
+	}
\ No newline at end of file

From 6a5b8250589e7e9d9b2036fa4fe2230561e5428f Mon Sep 17 00:00:00 2001
From: Jon Siwek <jsiwek@ncsa.illinois.edu>
Date: Tue, 9 Jul 2013 14:25:41 -0500
Subject: [PATCH 047/118] Delay file_over_new_connection events until after
 file_new occurs.

---
 src/file_analysis/File.cc | 23 +++++++++++++++++++++--
 src/file_analysis/File.h  |  4 ++++
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/src/file_analysis/File.cc b/src/file_analysis/File.cc
index b5edfaedc9..ed3d2ae9a8 100644
--- a/src/file_analysis/File.cc
+++ b/src/file_analysis/File.cc
@@ -75,7 +75,8 @@ void File::StaticInit()
 File::File(const string& file_id, Connection* conn, analyzer::Tag tag,
            bool is_orig)
 	: id(file_id), val(0), postpone_timeout(false), first_chunk(true),
-	  missed_bof(false), need_reassembly(false), done(false), analyzers(this)
+	  missed_bof(false), need_reassembly(false), done(false),
+	  did_file_new_event(false), analyzers(this)
 	{
 	StaticInit();
 
@@ -99,6 +100,7 @@ File::~File()
 	{
 	DBG_LOG(DBG_FILE_ANALYSIS, "Destroying File object %s", id.c_str());
 	Unref(val);
+	assert(fonc_queue.empty());
 	}
 
 void File::UpdateLastActivityTime()
@@ -135,7 +137,12 @@ void File::UpdateConnectionFields(Connection* conn)
 			val_list* vl = new val_list();
 			vl->append(val->Ref());
 			vl->append(conn_val->Ref());
-			FileEvent(file_over_new_connection, vl);
+
+			if ( did_file_new_event )
+				FileEvent(file_over_new_connection, vl);
+			else
+				fonc_queue.push(pair<EventHandlerPtr, val_list*>(
+				        file_over_new_connection, vl));
 			}
 		}
 
@@ -432,6 +439,18 @@ void File::FileEvent(EventHandlerPtr h, val_list* vl)
 	{
 	mgr.QueueEvent(h, vl);
 
+	if ( h == file_new )
+		{
+		did_file_new_event = true;
+
+		while ( ! fonc_queue.empty() )
+			{
+			pair<EventHandlerPtr, val_list*> p = fonc_queue.front();
+			mgr.QueueEvent(p.first, p.second);
+			fonc_queue.pop();
+			}
+		}
+
 	if ( h == file_new || h == file_timeout )
 		{
 		// immediate feedback is required for these events.
diff --git a/src/file_analysis/File.h b/src/file_analysis/File.h
index ac54c75bc5..5d967e7356 100644
--- a/src/file_analysis/File.h
+++ b/src/file_analysis/File.h
@@ -3,7 +3,9 @@
 #ifndef FILE_ANALYSIS_FILE_H
 #define FILE_ANALYSIS_FILE_H
 
+#include <queue>
 #include <string>
+#include <utility>
 #include <vector>
 
 #include "Conn.h"
@@ -239,7 +241,9 @@ private:
 	bool missed_bof;           /**< Flags that we missed start of file. */
 	bool need_reassembly;      /**< Whether file stream reassembly is needed. */
 	bool done;                 /**< If this object is about to be deleted. */
+	bool did_file_new_event;   /**< Whether the file_new event has been done. */
 	AnalyzerSet analyzers;     /**< A set of attached file analyzer. */
+	queue<pair<EventHandlerPtr, val_list*> > fonc_queue;
 
 	struct BOF_Buffer {
 		BOF_Buffer() : full(false), replayed(false), size(0) {}

From da4a0bed03dd9b4904716844a271c7074fcc17ee Mon Sep 17 00:00:00 2001
From: Jon Siwek <jsiwek@ncsa.illinois.edu>
Date: Tue, 9 Jul 2013 15:55:33 -0500
Subject: [PATCH 048/118] Disable more libmagic builtin checks that override
 the magic database.

---
 src/util.cc |  2 +-
 src/util.h  | 17 +++++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/src/util.cc b/src/util.cc
index cff36f0f23..5a63be22cb 100644
--- a/src/util.cc
+++ b/src/util.cc
@@ -1578,7 +1578,7 @@ void bro_init_magic(magic_t* cookie_ptr, int flags)
 	if ( ! cookie_ptr || *cookie_ptr )
 		return;
 
-	*cookie_ptr = magic_open(flags|MAGIC_NO_CHECK_TOKENS);
+	*cookie_ptr = magic_open(flags|DISABLE_LIBMAGIC_BUILTIN_CHECKS);
 
 	// Use our custom database for mime types, but the default database
 	// from libmagic for the verbose file type.
diff --git a/src/util.h b/src/util.h
index cafa63b7e8..91ed8f2888 100644
--- a/src/util.h
+++ b/src/util.h
@@ -377,6 +377,23 @@ struct CompareString
 		}
 	};
 
+// Older versions of libmagic may not define the MAGIC_NO_CHECK_BUILTIN
+// convenience macro and other newer versions seem to have a typo that makes
+// it unusable, so just make a different one now with all known flags for
+// builtin libmagic components that should be disabled so that Bro only
+// uses the custom magic database shipped with it.
+#define DISABLE_LIBMAGIC_BUILTIN_CHECKS  ( \
+    MAGIC_NO_CHECK_COMPRESS | \
+    MAGIC_NO_CHECK_TAR  | \
+/*  MAGIC_NO_CHECK_SOFT | */ \
+    MAGIC_NO_CHECK_APPTYPE  | \
+    MAGIC_NO_CHECK_ELF  | \
+    MAGIC_NO_CHECK_TEXT | \
+    MAGIC_NO_CHECK_CDF  | \
+    MAGIC_NO_CHECK_TOKENS   | \
+    MAGIC_NO_CHECK_ENCODING \
+)
+
 extern magic_t magic_desc_cookie;
 extern magic_t magic_mime_cookie;
 

From efe878f3de6999c7b3f28fde79af7e4b43fd1180 Mon Sep 17 00:00:00 2001
From: Jon Siwek <jsiwek@ncsa.illinois.edu>
Date: Tue, 9 Jul 2013 15:56:47 -0500
Subject: [PATCH 049/118] Make magic for emitting application/msword mime type
 less strict.

---
 magic/msdos | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/magic/msdos b/magic/msdos
index 59a9d2caac..cc411aeeb7 100644
--- a/magic/msdos
+++ b/magic/msdos
@@ -349,12 +349,13 @@
 # False positive with PPT (also currently this string is too long)
 #0	string/b	\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x3E\x00\x03\x00\xFE\xFF\x09\x00\x06	Microsoft Installer
 0	string/b	\320\317\021\340\241\261\032\341	Microsoft Office Document
+!:mime	application/msword
 #>48	byte	0x1B					Excel Document
 #!:mime application/vnd.ms-excel
->546	string	bjbj			Microsoft Word Document
-!:mime	application/msword
->546	string	jbjb			Microsoft Word Document
-!:mime	application/msword
+#>546	string	bjbj			Microsoft Word Document
+#!:mime	application/msword
+#>546	string	jbjb			Microsoft Word Document
+#!:mime	application/msword
 
 0	string/b	\224\246\056		Microsoft Word Document
 !:mime	application/msword

From 73155c321bdd82a762b9642b1bcf55f45e784e94 Mon Sep 17 00:00:00 2001
From: Jon Siwek <jsiwek@ncsa.illinois.edu>
Date: Tue, 9 Jul 2013 15:58:28 -0500
Subject: [PATCH 050/118] Add an is_orig parameter to file_over_new_connection
 event.

---
 scripts/base/frameworks/files/main.bro   | 2 +-
 scripts/base/protocols/ftp/files.bro     | 4 ++--
 scripts/base/protocols/http/entities.bro | 2 +-
 scripts/base/protocols/http/files.bro    | 4 ++--
 scripts/base/protocols/irc/files.bro     | 4 ++--
 scripts/base/protocols/smtp/entities.bro | 2 +-
 scripts/base/protocols/smtp/files.bro    | 4 ++--
 src/event.bif                            | 4 +++-
 src/file_analysis/File.cc                | 5 +++--
 src/file_analysis/File.h                 | 3 ++-
 src/file_analysis/Manager.cc             | 2 +-
 testing/scripts/file-analysis-test.bro   | 2 +-
 12 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/scripts/base/frameworks/files/main.bro b/scripts/base/frameworks/files/main.bro
index d5a3ddee67..8dd07fcb53 100644
--- a/scripts/base/frameworks/files/main.bro
+++ b/scripts/base/frameworks/files/main.bro
@@ -293,7 +293,7 @@ event file_new(f: fa_file) &priority=10
 	set_info(f);
 	}
 
-event file_over_new_connection(f: fa_file, c: connection) &priority=10
+event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=10
 	{
 	set_info(f);
 	add f$info$conn_uids[c$uid];
diff --git a/scripts/base/protocols/ftp/files.bro b/scripts/base/protocols/ftp/files.bro
index a943adff9d..c68717c8a2 100644
--- a/scripts/base/protocols/ftp/files.bro
+++ b/scripts/base/protocols/ftp/files.bro
@@ -28,7 +28,7 @@ event bro_init() &priority=5
 	}
 
 
-event file_over_new_connection(f: fa_file, c: connection) &priority=5
+event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5
 	{
 	if ( [c$id$resp_h, c$id$resp_p] !in ftp_data_expected ) 
 		return;
@@ -37,4 +37,4 @@ event file_over_new_connection(f: fa_file, c: connection) &priority=5
 	ftp$fuid = f$id;
 	if ( f?$mime_type )
 		ftp$mime_type = f$mime_type;
-	}
\ No newline at end of file
+	}
diff --git a/scripts/base/protocols/http/entities.bro b/scripts/base/protocols/http/entities.bro
index cc852a7e11..fc8ab753ae 100644
--- a/scripts/base/protocols/http/entities.bro
+++ b/scripts/base/protocols/http/entities.bro
@@ -53,7 +53,7 @@ event http_header(c: connection, is_orig: bool, name: string, value: string) &pr
 		}
 	}
 
-event file_over_new_connection(f: fa_file, c: connection) &priority=5
+event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5
 	{
 	if ( f$source == "HTTP" && c$http?$entity ) 
 		{
diff --git a/scripts/base/protocols/http/files.bro b/scripts/base/protocols/http/files.bro
index 44fdc4c1f4..e45ff8cadb 100644
--- a/scripts/base/protocols/http/files.bro
+++ b/scripts/base/protocols/http/files.bro
@@ -40,7 +40,7 @@ event bro_init() &priority=5
 	Files::register_protocol(Analyzer::ANALYZER_HTTP, HTTP::get_file_handle);
 	}
 
-event file_over_new_connection(f: fa_file, c: connection) &priority=5
+event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5
 	{
 	if ( c?$http )
 		{
@@ -49,4 +49,4 @@ event file_over_new_connection(f: fa_file, c: connection) &priority=5
 		else
 			c$http$resp_fuids[|c$http$resp_fuids|] = f$id;
 		}
-	}
\ No newline at end of file
+	}
diff --git a/scripts/base/protocols/irc/files.bro b/scripts/base/protocols/irc/files.bro
index f4553b534a..8708270bfd 100644
--- a/scripts/base/protocols/irc/files.bro
+++ b/scripts/base/protocols/irc/files.bro
@@ -27,7 +27,7 @@ event bro_init() &priority=5
 	Files::register_protocol(Analyzer::ANALYZER_IRC_DATA, IRC::get_file_handle);
 	}
 
-event file_over_new_connection(f: fa_file, c: connection) &priority=5
+event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5
 	{
 	if ( [c$id$resp_h, c$id$resp_p] !in dcc_expected_transfers ) 
 		return;
@@ -38,4 +38,4 @@ event file_over_new_connection(f: fa_file, c: connection) &priority=5
 		f$info$filename = irc$dcc_file_name;
 	if ( f?$mime_type )
 		irc$dcc_mime_type = f$mime_type;
-	}
\ No newline at end of file
+	}
diff --git a/scripts/base/protocols/smtp/entities.bro b/scripts/base/protocols/smtp/entities.bro
index 067b8acf8e..ec43b39ce1 100644
--- a/scripts/base/protocols/smtp/entities.bro
+++ b/scripts/base/protocols/smtp/entities.bro
@@ -31,7 +31,7 @@ event mime_begin_entity(c: connection) &priority=10
 	++c$smtp_state$mime_depth;
 	}
 
-event file_over_new_connection(f: fa_file, c: connection) &priority=5
+event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5
 	{
 	if ( f$source != "SMTP" ) 
 		return;
diff --git a/scripts/base/protocols/smtp/files.bro b/scripts/base/protocols/smtp/files.bro
index e67181d6bc..1cf9ec01e1 100644
--- a/scripts/base/protocols/smtp/files.bro
+++ b/scripts/base/protocols/smtp/files.bro
@@ -27,8 +27,8 @@ event bro_init() &priority=5
 	Files::register_protocol(Analyzer::ANALYZER_SMTP, SMTP::get_file_handle);
 	}
 
-event file_over_new_connection(f: fa_file, c: connection) &priority=5
+event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5
 	{
 	if ( c?$smtp )
 		c$smtp$fuids[|c$smtp$fuids|] = f$id;
-	}
\ No newline at end of file
+	}
diff --git a/src/event.bif b/src/event.bif
index df22902094..e4d6f8c844 100644
--- a/src/event.bif
+++ b/src/event.bif
@@ -911,8 +911,10 @@ event file_new%(f: fa_file%);
 ##
 ## c: The new connection over which the file is seen being transferred.
 ##
+## is_orig: true if the originator of *c* is the one sending the file.
+##
 ## .. bro:see:: file_new file_timeout file_gap file_state_remove
-event file_over_new_connection%(f: fa_file, c: connection%);
+event file_over_new_connection%(f: fa_file, c: connection, is_orig: bool%);
 
 ## Indicates that file analysis has timed out because no activity was seen
 ## for the file in a while.
diff --git a/src/file_analysis/File.cc b/src/file_analysis/File.cc
index ed3d2ae9a8..9a06fa3db9 100644
--- a/src/file_analysis/File.cc
+++ b/src/file_analysis/File.cc
@@ -90,7 +90,7 @@ File::File(const string& file_id, Connection* conn, analyzer::Tag tag,
 		// add source, connection, is_orig fields
 		SetSource(analyzer_mgr->GetAnalyzerName(tag));
 		val->Assign(is_orig_idx, new Val(is_orig, TYPE_BOOL));
-		UpdateConnectionFields(conn);
+		UpdateConnectionFields(conn, is_orig);
 		}
 
 	UpdateLastActivityTime();
@@ -113,7 +113,7 @@ double File::GetLastActivityTime() const
 	return val->Lookup(last_active_idx)->AsTime();
 	}
 
-void File::UpdateConnectionFields(Connection* conn)
+void File::UpdateConnectionFields(Connection* conn, bool is_orig)
 	{
 	if ( ! conn )
 		return;
@@ -137,6 +137,7 @@ void File::UpdateConnectionFields(Connection* conn)
 			val_list* vl = new val_list();
 			vl->append(val->Ref());
 			vl->append(conn_val->Ref());
+			vl->append(new Val(is_orig, TYPE_BOOL));
 
 			if ( did_file_new_event )
 				FileEvent(file_over_new_connection, vl);
diff --git a/src/file_analysis/File.h b/src/file_analysis/File.h
index 5d967e7356..794734d24b 100644
--- a/src/file_analysis/File.h
+++ b/src/file_analysis/File.h
@@ -173,8 +173,9 @@ protected:
 	 * Updates the "conn_ids" and "conn_uids" fields in #val record with the
 	 * \c conn_id and UID taken from \a conn.
 	 * @param conn the connection over which a part of the file has been seen.
+	 * @param is_orig true if the connection originator is sending the file.
 	 */
-	void UpdateConnectionFields(Connection* conn);
+	void UpdateConnectionFields(Connection* conn, bool is_orig);
 
 	/**
 	 * Increment a byte count field of #val record by \a size.
diff --git a/src/file_analysis/Manager.cc b/src/file_analysis/Manager.cc
index 453c6f7902..4e25bb0b0e 100644
--- a/src/file_analysis/Manager.cc
+++ b/src/file_analysis/Manager.cc
@@ -250,7 +250,7 @@ File* Manager::GetFile(const string& file_id, Connection* conn,
 		rval->UpdateLastActivityTime();
 
 		if ( update_conn )
-			rval->UpdateConnectionFields(conn);
+			rval->UpdateConnectionFields(conn, is_orig);
 		}
 
 	return rval;
diff --git a/testing/scripts/file-analysis-test.bro b/testing/scripts/file-analysis-test.bro
index 9df640c893..cf2bbf2d59 100644
--- a/testing/scripts/file-analysis-test.bro
+++ b/testing/scripts/file-analysis-test.bro
@@ -66,7 +66,7 @@ event file_new(f: fa_file)
 		}
 	}
 
-event file_over_new_connection(f: fa_file, c: connection)
+event file_over_new_connection(f: fa_file, c: connection, is_orig: bool)
 	{
 	print "FILE_OVER_NEW_CONNECTION";
 	}

From 39444b5af79de557b5ead73a9c2156bec1e2ea46 Mon Sep 17 00:00:00 2001
From: Seth Hall <seth@icir.org>
Date: Tue, 9 Jul 2013 22:44:55 -0400
Subject: [PATCH 051/118] Moved DPD signatures into script specific
 directories.

 - This caused us to lose signatures for POP3 and Bittorrent.  These will
   need discovered in the repository again when we add scripts
   for those analyzers.
---
 scripts/base/frameworks/dpd/dpd.sig         | 212 --------------------
 scripts/base/frameworks/dpd/main.bro        |   2 -
 scripts/base/init-default.bro               |   1 +
 scripts/base/protocols/ftp/__load__.bro     |   2 +
 scripts/base/protocols/ftp/dpd.sig          |  15 ++
 scripts/base/protocols/http/__load__.bro    |   2 +
 scripts/base/protocols/http/dpd.sig         |  13 ++
 scripts/base/protocols/irc/__load__.bro     |   2 +
 scripts/base/protocols/irc/dpd.sig          |  33 +++
 scripts/base/protocols/smtp/__load__.bro    |   2 +
 scripts/base/protocols/smtp/dpd.sig         |  12 ++
 scripts/base/protocols/socks/__load__.bro   |   4 +-
 scripts/base/protocols/socks/dpd.sig        |  48 +++++
 scripts/base/protocols/ssh/__load__.bro     |   4 +-
 scripts/base/protocols/ssh/dpd.sig          |  13 ++
 scripts/base/protocols/ssl/__load__.bro     |   2 +
 scripts/base/protocols/ssl/dpd.sig          |  15 ++
 scripts/base/protocols/tunnels/__load__.bro |   1 +
 scripts/base/protocols/tunnels/dpd.sig      |  14 ++
 19 files changed, 181 insertions(+), 216 deletions(-)
 delete mode 100644 scripts/base/frameworks/dpd/dpd.sig
 create mode 100644 scripts/base/protocols/ftp/dpd.sig
 create mode 100644 scripts/base/protocols/http/dpd.sig
 create mode 100644 scripts/base/protocols/irc/dpd.sig
 create mode 100644 scripts/base/protocols/smtp/dpd.sig
 create mode 100644 scripts/base/protocols/socks/dpd.sig
 create mode 100644 scripts/base/protocols/ssh/dpd.sig
 create mode 100644 scripts/base/protocols/ssl/dpd.sig
 create mode 100644 scripts/base/protocols/tunnels/__load__.bro
 create mode 100644 scripts/base/protocols/tunnels/dpd.sig

diff --git a/scripts/base/frameworks/dpd/dpd.sig b/scripts/base/frameworks/dpd/dpd.sig
deleted file mode 100644
index 49e24cefc6..0000000000
--- a/scripts/base/frameworks/dpd/dpd.sig
+++ /dev/null
@@ -1,212 +0,0 @@
-# Signatures to initiate dynamic protocol detection.
-
-signature dpd_ftp_client {
-  ip-proto == tcp
-  payload /(|.*[\n\r]) *[uU][sS][eE][rR] /
-  tcp-state originator
-}
-
-# Match for server greeting (220, 120) and for login or passwd
-# required (230, 331).
-signature dpd_ftp_server {
-  ip-proto == tcp
-  payload /[\n\r ]*(120|220)[^0-9].*[\n\r] *(230|331)[^0-9]/
-  tcp-state responder
-  requires-reverse-signature dpd_ftp_client
-  enable "ftp"
-}
-
-signature dpd_http_client {
-  ip-proto == tcp
-  payload /^[[:space:]]*(GET|HEAD|POST)[[:space:]]*/
-  tcp-state originator
-}
-
-signature dpd_http_server {
-  ip-proto == tcp
-  payload /^HTTP\/[0-9]/
-  tcp-state responder
-  requires-reverse-signature dpd_http_client
-  enable "http"
-}
-
-signature dpd_bittorrenttracker_client {
-  ip-proto == tcp
-  payload /^.*\/announce\?.*info_hash/
-  tcp-state originator
-}
-
-signature dpd_bittorrenttracker_server {
-  ip-proto == tcp
-  payload /^HTTP\/[0-9]/
-  tcp-state responder
-  requires-reverse-signature dpd_bittorrenttracker_client
-  enable "bittorrenttracker"
-}
-
-signature dpd_bittorrent_peer1 {
-  ip-proto == tcp
-  payload /^\x13BitTorrent protocol/
-  tcp-state originator
-}
-
-signature dpd_bittorrent_peer2 {
-  ip-proto == tcp
-  payload /^\x13BitTorrent protocol/
-  tcp-state responder
-  requires-reverse-signature dpd_bittorrent_peer1
-  enable "bittorrent"
-}
-
-signature irc_client1 {
-  ip-proto == tcp
-  payload /(|.*[\r\n]) *[Uu][Ss][Ee][Rr] +.+[\n\r]+ *[Nn][Ii][Cc][Kk] +.*[\r\n]/
-  requires-reverse-signature irc_server_reply
-  tcp-state originator
-  enable "irc"
-}
-
-signature irc_client2 {
-  ip-proto == tcp
-  payload /(|.*[\r\n]) *[Nn][Ii][Cc][Kk] +.+[\r\n]+ *[Uu][Ss][Ee][Rr] +.+[\r\n]/
-  requires-reverse-signature irc_server_reply
-  tcp-state originator
-  enable "irc"
-}
-
-signature irc_server_reply {
-  ip-proto == tcp
-  payload /^(|.*[\n\r])(:[^ \n\r]+ )?[0-9][0-9][0-9] /
-  tcp-state responder
-}
-
-signature irc_server_to_server1 {
-  ip-proto == tcp
-  payload /(|.*[\r\n]) *[Ss][Ee][Rr][Vv][Ee][Rr] +[^ ]+ +[0-9]+ +:.+[\r\n]/
-}
-
-signature irc_server_to_server2 {
-  ip-proto == tcp
-  payload /(|.*[\r\n]) *[Ss][Ee][Rr][Vv][Ee][Rr] +[^ ]+ +[0-9]+ +:.+[\r\n]/
-  requires-reverse-signature irc_server_to_server1
-  enable "irc"
-}
-
-signature dpd_smtp_client {
-  ip-proto == tcp
-  payload /(|.*[\n\r])[[:space:]]*([hH][eE][lL][oO]|[eE][hH][lL][oO])/
-  requires-reverse-signature dpd_smtp_server
-  enable "smtp"
-  tcp-state originator
-}
-
-signature dpd_smtp_server {
-  ip-proto == tcp
-  payload /^[[:space:]]*220[[:space:]-]/
-  tcp-state responder
-}
-
-signature dpd_ssh_client {
-  ip-proto == tcp
-  payload /^[sS][sS][hH]-/
-  requires-reverse-signature dpd_ssh_server
-  enable "ssh"
-  tcp-state originator
-}
-
-signature dpd_ssh_server {
-  ip-proto == tcp
-  payload /^[sS][sS][hH]-/
-  tcp-state responder
-}
-
-signature dpd_pop3_server {
-  ip-proto == tcp
-  payload /^\+OK/
-  requires-reverse-signature dpd_pop3_client
-  enable "pop3"
-  tcp-state responder
-}
-
-signature dpd_pop3_client {
-  ip-proto == tcp
-  payload /(|.*[\r\n])[[:space:]]*([uU][sS][eE][rR][[:space:]]|[aA][pP][oO][pP][[:space:]]|[cC][aA][pP][aA]|[aA][uU][tT][hH])/
-  tcp-state originator
-}
-
-signature dpd_ssl_server {
-  ip-proto == tcp
-  # Server hello.
-  payload /^(\x16\x03[\x00\x01\x02]..\x02...\x03[\x00\x01\x02]|...?\x04..\x00\x02).*/
-  requires-reverse-signature dpd_ssl_client
-  enable "ssl"
-  tcp-state responder
-}
-
-signature dpd_ssl_client {
-  ip-proto == tcp
-  # Client hello.
-  payload /^(\x16\x03[\x00\x01\x02]..\x01...\x03[\x00\x01\x02]|...?\x01[\x00\x01\x02][\x02\x03]).*/
-  tcp-state originator
-}
-
-signature dpd_ayiya {
-  ip-proto = udp
-  payload /^..\x11\x29/
-  enable "ayiya"
-}
-
-signature dpd_teredo {
-  ip-proto = udp
-  payload /^(\x00\x00)|(\x00\x01)|([\x60-\x6f])/
-  enable "teredo"
-}
-
-signature dpd_socks4_client {
-	ip-proto == tcp
-	# '32' is a rather arbitrary max length for the user name.
-	payload /^\x04[\x01\x02].{0,32}\x00/
-	tcp-state originator
-}
-
-signature dpd_socks4_server {
-	ip-proto == tcp
-	requires-reverse-signature dpd_socks4_client
-	payload /^\x00[\x5a\x5b\x5c\x5d]/
-	tcp-state responder
-	enable "socks"
-}
-
-signature dpd_socks4_reverse_client {
-	ip-proto == tcp
-	# '32' is a rather arbitrary max length for the user name.
-	payload /^\x04[\x01\x02].{0,32}\x00/
-	tcp-state responder
-}
-
-signature dpd_socks4_reverse_server {
-	ip-proto == tcp
-	requires-reverse-signature dpd_socks4_reverse_client
-	payload /^\x00[\x5a\x5b\x5c\x5d]/
-	tcp-state originator
-	enable "socks"
-}
-
-signature dpd_socks5_client {
-	ip-proto == tcp
-	# Watch for a few authentication methods to reduce false positives.
-	payload /^\x05.[\x00\x01\x02]/
-	tcp-state originator
-}
-
-signature dpd_socks5_server {
-	ip-proto == tcp
-	requires-reverse-signature dpd_socks5_client
-	# Watch for a single authentication method to be chosen by the server or
-	# the server to indicate the no authentication is required.
-	payload /^\x05(\x00|\x01[\x00\x01\x02])/
-	tcp-state responder
-	enable "socks"
-}
-
-
diff --git a/scripts/base/frameworks/dpd/main.bro b/scripts/base/frameworks/dpd/main.bro
index c3282a1da4..9df8a45e5e 100644
--- a/scripts/base/frameworks/dpd/main.bro
+++ b/scripts/base/frameworks/dpd/main.bro
@@ -3,8 +3,6 @@
 
 module DPD;
 
-@load-sigs ./dpd.sig
-
 export {
 	## Add the DPD logging stream identifier.
 	redef enum Log::ID += { LOG };
diff --git a/scripts/base/init-default.bro b/scripts/base/init-default.bro
index 9c3995673c..6c40a7547f 100644
--- a/scripts/base/init-default.bro
+++ b/scripts/base/init-default.bro
@@ -46,5 +46,6 @@
 @load base/protocols/ssh
 @load base/protocols/ssl
 @load base/protocols/syslog
+@load base/protocols/tunnels
 
 @load base/misc/find-checksum-offloading
diff --git a/scripts/base/protocols/ftp/__load__.bro b/scripts/base/protocols/ftp/__load__.bro
index 464571dc7d..f3226de69d 100644
--- a/scripts/base/protocols/ftp/__load__.bro
+++ b/scripts/base/protocols/ftp/__load__.bro
@@ -3,3 +3,5 @@
 @load ./file-analysis
 @load ./file-extract
 @load ./gridftp
+
+@load-sigs ./dpd.sig
\ No newline at end of file
diff --git a/scripts/base/protocols/ftp/dpd.sig b/scripts/base/protocols/ftp/dpd.sig
new file mode 100644
index 0000000000..3a6ceadd18
--- /dev/null
+++ b/scripts/base/protocols/ftp/dpd.sig
@@ -0,0 +1,15 @@
+signature dpd_ftp_client {
+  ip-proto == tcp
+  payload /(|.*[\n\r]) *[uU][sS][eE][rR] /
+  tcp-state originator
+}
+
+# Match for server greeting (220, 120) and for login or passwd
+# required (230, 331).
+signature dpd_ftp_server {
+  ip-proto == tcp
+  payload /[\n\r ]*(120|220)[^0-9].*[\n\r] *(230|331)[^0-9]/
+  tcp-state responder
+  requires-reverse-signature dpd_ftp_client
+  enable "ftp"
+}
diff --git a/scripts/base/protocols/http/__load__.bro b/scripts/base/protocols/http/__load__.bro
index 58618dedc7..8f426c1521 100644
--- a/scripts/base/protocols/http/__load__.bro
+++ b/scripts/base/protocols/http/__load__.bro
@@ -4,3 +4,5 @@
 @load ./file-ident
 @load ./file-hash
 @load ./file-extract
+
+@load-sigs ./dpd.sig
\ No newline at end of file
diff --git a/scripts/base/protocols/http/dpd.sig b/scripts/base/protocols/http/dpd.sig
new file mode 100644
index 0000000000..13470f4e95
--- /dev/null
+++ b/scripts/base/protocols/http/dpd.sig
@@ -0,0 +1,13 @@
+signature dpd_http_client {
+  ip-proto == tcp
+  payload /^[[:space:]]*(GET|HEAD|POST)[[:space:]]*/
+  tcp-state originator
+}
+
+signature dpd_http_server {
+  ip-proto == tcp
+  payload /^HTTP\/[0-9]/
+  tcp-state responder
+  requires-reverse-signature dpd_http_client
+  enable "http"
+}
diff --git a/scripts/base/protocols/irc/__load__.bro b/scripts/base/protocols/irc/__load__.bro
index 5123385b0c..2e60cda0a6 100644
--- a/scripts/base/protocols/irc/__load__.bro
+++ b/scripts/base/protocols/irc/__load__.bro
@@ -1,3 +1,5 @@
 @load ./main
 @load ./dcc-send
 @load ./file-analysis
+
+@load-sigs ./dpd.sig
\ No newline at end of file
diff --git a/scripts/base/protocols/irc/dpd.sig b/scripts/base/protocols/irc/dpd.sig
new file mode 100644
index 0000000000..308358d619
--- /dev/null
+++ b/scripts/base/protocols/irc/dpd.sig
@@ -0,0 +1,33 @@
+signature irc_client1 {
+  ip-proto == tcp
+  payload /(|.*[\r\n]) *[Uu][Ss][Ee][Rr] +.+[\n\r]+ *[Nn][Ii][Cc][Kk] +.*[\r\n]/
+  requires-reverse-signature irc_server_reply
+  tcp-state originator
+  enable "irc"
+}
+
+signature irc_client2 {
+  ip-proto == tcp
+  payload /(|.*[\r\n]) *[Nn][Ii][Cc][Kk] +.+[\r\n]+ *[Uu][Ss][Ee][Rr] +.+[\r\n]/
+  requires-reverse-signature irc_server_reply
+  tcp-state originator
+  enable "irc"
+}
+
+signature irc_server_reply {
+  ip-proto == tcp
+  payload /^(|.*[\n\r])(:[^ \n\r]+ )?[0-9][0-9][0-9] /
+  tcp-state responder
+}
+
+signature irc_server_to_server1 {
+  ip-proto == tcp
+  payload /(|.*[\r\n]) *[Ss][Ee][Rr][Vv][Ee][Rr] +[^ ]+ +[0-9]+ +:.+[\r\n]/
+}
+
+signature irc_server_to_server2 {
+  ip-proto == tcp
+  payload /(|.*[\r\n]) *[Ss][Ee][Rr][Vv][Ee][Rr] +[^ ]+ +[0-9]+ +:.+[\r\n]/
+  requires-reverse-signature irc_server_to_server1
+  enable "irc"
+}
diff --git a/scripts/base/protocols/smtp/__load__.bro b/scripts/base/protocols/smtp/__load__.bro
index bac9cc118f..3e3fde6947 100644
--- a/scripts/base/protocols/smtp/__load__.bro
+++ b/scripts/base/protocols/smtp/__load__.bro
@@ -2,3 +2,5 @@
 @load ./entities
 @load ./entities-excerpt
 @load ./file-analysis
+
+@load-sigs ./dpd.sig
\ No newline at end of file
diff --git a/scripts/base/protocols/smtp/dpd.sig b/scripts/base/protocols/smtp/dpd.sig
new file mode 100644
index 0000000000..49ed7ea3be
--- /dev/null
+++ b/scripts/base/protocols/smtp/dpd.sig
@@ -0,0 +1,12 @@
+signature dpd_smtp_client {
+  ip-proto == tcp
+  payload /(|.*[\n\r])[[:space:]]*([hH][eE][lL][oO]|[eE][hH][lL][oO])/
+  requires-reverse-signature dpd_smtp_server
+  enable "smtp"
+  tcp-state originator
+}
+
+signature dpd_smtp_server {
+  ip-proto == tcp
+  payload /^[[:space:]]*220[[:space:]-]/
+  tcp-state responder
diff --git a/scripts/base/protocols/socks/__load__.bro b/scripts/base/protocols/socks/__load__.bro
index 0098b81a7a..80193afb6f 100644
--- a/scripts/base/protocols/socks/__load__.bro
+++ b/scripts/base/protocols/socks/__load__.bro
@@ -1,2 +1,4 @@
 @load ./consts
-@load ./main
\ No newline at end of file
+@load ./main
+
+@load-sigs ./dpd.sig
\ No newline at end of file
diff --git a/scripts/base/protocols/socks/dpd.sig b/scripts/base/protocols/socks/dpd.sig
new file mode 100644
index 0000000000..3dcd7a945a
--- /dev/null
+++ b/scripts/base/protocols/socks/dpd.sig
@@ -0,0 +1,48 @@
+signature dpd_socks4_client {
+	ip-proto == tcp
+	# '32' is a rather arbitrary max length for the user name.
+	payload /^\x04[\x01\x02].{0,32}\x00/
+	tcp-state originator
+}
+
+signature dpd_socks4_server {
+	ip-proto == tcp
+	requires-reverse-signature dpd_socks4_client
+	payload /^\x00[\x5a\x5b\x5c\x5d]/
+	tcp-state responder
+	enable "socks"
+}
+
+signature dpd_socks4_reverse_client {
+	ip-proto == tcp
+	# '32' is a rather arbitrary max length for the user name.
+	payload /^\x04[\x01\x02].{0,32}\x00/
+	tcp-state responder
+}
+
+signature dpd_socks4_reverse_server {
+	ip-proto == tcp
+	requires-reverse-signature dpd_socks4_reverse_client
+	payload /^\x00[\x5a\x5b\x5c\x5d]/
+	tcp-state originator
+	enable "socks"
+}
+
+signature dpd_socks5_client {
+	ip-proto == tcp
+	# Watch for a few authentication methods to reduce false positives.
+	payload /^\x05.[\x00\x01\x02]/
+	tcp-state originator
+}
+
+signature dpd_socks5_server {
+	ip-proto == tcp
+	requires-reverse-signature dpd_socks5_client
+	# Watch for a single authentication method to be chosen by the server or
+	# the server to indicate the no authentication is required.
+	payload /^\x05(\x00|\x01[\x00\x01\x02])/
+	tcp-state responder
+	enable "socks"
+}
+
+
diff --git a/scripts/base/protocols/ssh/__load__.bro b/scripts/base/protocols/ssh/__load__.bro
index d551be57d3..0f3cb011f8 100644
--- a/scripts/base/protocols/ssh/__load__.bro
+++ b/scripts/base/protocols/ssh/__load__.bro
@@ -1 +1,3 @@
-@load ./main
\ No newline at end of file
+@load ./main
+
+@load-sigs ./dpd.sig
\ No newline at end of file
diff --git a/scripts/base/protocols/ssh/dpd.sig b/scripts/base/protocols/ssh/dpd.sig
new file mode 100644
index 0000000000..95e22908ab
--- /dev/null
+++ b/scripts/base/protocols/ssh/dpd.sig
@@ -0,0 +1,13 @@
+signature dpd_ssh_client {
+  ip-proto == tcp
+  payload /^[sS][sS][hH]-/
+  requires-reverse-signature dpd_ssh_server
+  enable "ssh"
+  tcp-state originator
+}
+
+signature dpd_ssh_server {
+  ip-proto == tcp
+  payload /^[sS][sS][hH]-/
+  tcp-state responder
+}
diff --git a/scripts/base/protocols/ssl/__load__.bro b/scripts/base/protocols/ssl/__load__.bro
index 239438047c..80cb4e216a 100644
--- a/scripts/base/protocols/ssl/__load__.bro
+++ b/scripts/base/protocols/ssl/__load__.bro
@@ -1,3 +1,5 @@
 @load ./consts
 @load ./main
 @load ./mozilla-ca-list
+
+@load-sigs ./dpd.sig
\ No newline at end of file
diff --git a/scripts/base/protocols/ssl/dpd.sig b/scripts/base/protocols/ssl/dpd.sig
new file mode 100644
index 0000000000..b36b9a5aa5
--- /dev/null
+++ b/scripts/base/protocols/ssl/dpd.sig
@@ -0,0 +1,15 @@
+signature dpd_ssl_server {
+  ip-proto == tcp
+  # Server hello.
+  payload /^(\x16\x03[\x00\x01\x02]..\x02...\x03[\x00\x01\x02]|...?\x04..\x00\x02).*/
+  requires-reverse-signature dpd_ssl_client
+  enable "ssl"
+  tcp-state responder
+}
+
+signature dpd_ssl_client {
+  ip-proto == tcp
+  # Client hello.
+  payload /^(\x16\x03[\x00\x01\x02]..\x01...\x03[\x00\x01\x02]|...?\x01[\x00\x01\x02][\x02\x03]).*/
+  tcp-state originator
+}
diff --git a/scripts/base/protocols/tunnels/__load__.bro b/scripts/base/protocols/tunnels/__load__.bro
new file mode 100644
index 0000000000..9de7b6ff19
--- /dev/null
+++ b/scripts/base/protocols/tunnels/__load__.bro
@@ -0,0 +1 @@
+@load-sigs ./dpd.sig
\ No newline at end of file
diff --git a/scripts/base/protocols/tunnels/dpd.sig b/scripts/base/protocols/tunnels/dpd.sig
new file mode 100644
index 0000000000..0c66775f5d
--- /dev/null
+++ b/scripts/base/protocols/tunnels/dpd.sig
@@ -0,0 +1,14 @@
+# Provide DPD signatures for tunneling protocols that otherwise
+# wouldn't be detected at all.
+
+signature dpd_ayiya {
+  ip-proto = udp
+  payload /^..\x11\x29/
+  enable "ayiya"
+}
+
+signature dpd_teredo {
+  ip-proto = udp
+  payload /^(\x00\x00)|(\x00\x01)|([\x60-\x6f])/
+  enable "teredo"
+}

From 4dda9cd3bab0ca2eb2123a57ea4685eef7c560e1 Mon Sep 17 00:00:00 2001
From: Seth Hall <seth@icir.org>
Date: Tue, 9 Jul 2013 22:45:21 -0400
Subject: [PATCH 052/118] Fix a bug where the same analyzer tag was reused for
 two different analyzers.

---
 src/analyzer/protocol/bittorrent/BitTorrentTracker.cc | 2 +-
 src/analyzer/protocol/bittorrent/Plugin.cc            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/analyzer/protocol/bittorrent/BitTorrentTracker.cc b/src/analyzer/protocol/bittorrent/BitTorrentTracker.cc
index b32db9a8bd..98adcaa610 100644
--- a/src/analyzer/protocol/bittorrent/BitTorrentTracker.cc
+++ b/src/analyzer/protocol/bittorrent/BitTorrentTracker.cc
@@ -22,7 +22,7 @@ static RecordType* bittorrent_benc_value;
 static TableType* bittorrent_benc_dir;
 
 BitTorrentTracker_Analyzer::BitTorrentTracker_Analyzer(Connection* c)
-: tcp::TCP_ApplicationAnalyzer("BITTORRENT", c)
+: tcp::TCP_ApplicationAnalyzer("BITTORRENTTRACKER", c)
 	{
 	if ( ! bt_tracker_headers )
 		{
diff --git a/src/analyzer/protocol/bittorrent/Plugin.cc b/src/analyzer/protocol/bittorrent/Plugin.cc
index 2da9972d0d..7fea68bf07 100644
--- a/src/analyzer/protocol/bittorrent/Plugin.cc
+++ b/src/analyzer/protocol/bittorrent/Plugin.cc
@@ -7,6 +7,6 @@
 BRO_PLUGIN_BEGIN(Bro, BitTorrent)
 	BRO_PLUGIN_DESCRIPTION("BitTorrent Analyzer");
 	BRO_PLUGIN_ANALYZER("BitTorrent", bittorrent::BitTorrent_Analyzer);
-	BRO_PLUGIN_ANALYZER("BitTorrentTracker", bittorrent::BitTorrent_Analyzer);
+	BRO_PLUGIN_ANALYZER("BitTorrentTracker", bittorrent::BitTorrentTracker_Analyzer);
 	BRO_PLUGIN_BIF_FILE(events);
 BRO_PLUGIN_END

From 60da0f476416e4a7a831a20df9f06b8f0db1a782 Mon Sep 17 00:00:00 2001
From: Seth Hall <seth@icir.org>
Date: Tue, 9 Jul 2013 22:57:36 -0400
Subject: [PATCH 053/118] Added a missing curly brace in smtp/dpd.sig

---
 scripts/base/protocols/smtp/dpd.sig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/base/protocols/smtp/dpd.sig b/scripts/base/protocols/smtp/dpd.sig
index 49ed7ea3be..6fbde59059 100644
--- a/scripts/base/protocols/smtp/dpd.sig
+++ b/scripts/base/protocols/smtp/dpd.sig
@@ -10,3 +10,4 @@ signature dpd_smtp_server {
   ip-proto == tcp
   payload /^[[:space:]]*220[[:space:]-]/
   tcp-state responder
+}
\ No newline at end of file

From 8322bbfd620038171f93a0aca09119c406dab221 Mon Sep 17 00:00:00 2001
From: Seth Hall <seth@icir.org>
Date: Tue, 9 Jul 2013 23:28:09 -0400
Subject: [PATCH 054/118] Small test fixes.

---
 .../canonified_loaded_scripts.log                            | 5 +++--
 testing/btest/core/tunnels/teredo-known-services.test        | 4 ++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log
index 28430aacd8..6d6906d924 100644
--- a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log
+++ b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log
@@ -3,7 +3,7 @@
 #empty_field	(empty)
 #unset_field	-
 #path	loaded_scripts
-#open	2013-07-05-05-21-48
+#open	2013-07-10-03-19-58
 #fields	name
 #types	string
 scripts/base/init-bare.bro
@@ -191,6 +191,7 @@ scripts/base/init-default.bro
   scripts/base/protocols/syslog/__load__.bro
     scripts/base/protocols/syslog/consts.bro
     scripts/base/protocols/syslog/main.bro
+  scripts/base/protocols/tunnels/__load__.bro
   scripts/base/misc/find-checksum-offloading.bro
 scripts/policy/misc/loaded-scripts.bro
-#close	2013-07-05-05-21-48
+#close	2013-07-10-03-19-58
diff --git a/testing/btest/core/tunnels/teredo-known-services.test b/testing/btest/core/tunnels/teredo-known-services.test
index d03ef2ab71..da3a538515 100644
--- a/testing/btest/core/tunnels/teredo-known-services.test
+++ b/testing/btest/core/tunnels/teredo-known-services.test
@@ -1,6 +1,6 @@
-# @TEST-EXEC: bro -r $TRACES/tunnels/false-teredo.pcap base/frameworks/dpd protocols/conn/known-services Tunnel::delay_teredo_confirmation=T "Site::local_nets+={192.168.1.0/24}"
+# @TEST-EXEC: bro -r $TRACES/tunnels/false-teredo.pcap base/frameworks/dpd base/protocols/tunnels protocols/conn/known-services Tunnel::delay_teredo_confirmation=T "Site::local_nets+={192.168.1.0/24}"
 # @TEST-EXEC: test ! -e known_services.log
-# @TEST-EXEC: bro -b -r $TRACES/tunnels/false-teredo.pcap base/frameworks/dpd protocols/conn/known-services Tunnel::delay_teredo_confirmation=F "Site::local_nets+={192.168.1.0/24}"
+# @TEST-EXEC: bro -b -r $TRACES/tunnels/false-teredo.pcap base/frameworks/dpd base/protocols/tunnels protocols/conn/known-services Tunnel::delay_teredo_confirmation=F "Site::local_nets+={192.168.1.0/24}"
 # @TEST-EXEC: btest-diff known_services.log
 
 # The first case using Tunnel::delay_teredo_confirmation=T doesn't produce

From 40201a180e54a560711003f2e65e14be87a7b8e9 Mon Sep 17 00:00:00 2001
From: Robin Sommer <robin@icir.org>
Date: Tue, 9 Jul 2013 21:00:53 -0700
Subject: [PATCH 055/118] Fixing for unserializion error.

Because BloomFilter is a base class, with other classes derived from
it, it needs special treatment.
---
 src/SerialTypes.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/SerialTypes.h b/src/SerialTypes.h
index 9e4aef5b3b..85aed10bda 100644
--- a/src/SerialTypes.h
+++ b/src/SerialTypes.h
@@ -52,8 +52,6 @@ SERIAL_IS(RE_MATCHER, 0x1400)
 SERIAL_IS(BITVECTOR, 0x1500)
 SERIAL_IS(COUNTERVECTOR, 0x1600)
 SERIAL_IS(BLOOMFILTER, 0x1700)
-SERIAL_IS(BASICBLOOMFILTER, 0x1800)
-SERIAL_IS(COUNTINGBLOOMFILTER, 0x1900)
 
 // These are the externally visible types.
 const SerialType SER_NONE = 0;
@@ -203,6 +201,11 @@ SERIAL_FUNC(BRO_FUNC, 2)
 SERIAL_FUNC(DEBUG_FUNC, 3)
 SERIAL_FUNC(BUILTIN_FUNC, 4)
 
+#define SERIAL_BLOOMFILTER(name, val) SERIAL_CONST(name, val, BLOOMFILTER)
+SERIAL_BLOOMFILTER(BLOOMFILTER, 1)
+SERIAL_BLOOMFILTER(BASICBLOOMFILTER, 2)
+SERIAL_BLOOMFILTER(COUNTINGBLOOMFILTER, 3)
+
 SERIAL_CONST2(ID)
 SERIAL_CONST2(STATE_ACCESS)
 SERIAL_CONST2(CASE)
@@ -210,8 +213,5 @@ SERIAL_CONST2(LOCATION)
 SERIAL_CONST2(RE_MATCHER)
 SERIAL_CONST2(BITVECTOR)
 SERIAL_CONST2(COUNTERVECTOR)
-SERIAL_CONST2(BLOOMFILTER)
-SERIAL_CONST2(BASICBLOOMFILTER)
-SERIAL_CONST2(COUNTINGBLOOMFILTER)
 
 #endif

From 446344ae998e8eef30a0f45a05dcea29efe4f032 Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Wed, 10 Jul 2013 01:32:59 -0700
Subject: [PATCH 056/118] Add missing include for GCC.

---
 src/BloomFilter.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc
index a7727630f7..c59092b1e4 100644
--- a/src/BloomFilter.cc
+++ b/src/BloomFilter.cc
@@ -1,6 +1,7 @@
 #include "BloomFilter.h"
 
 #include <cmath>
+#include <limits>
 #include "CounterVector.h"
 #include "Serializer.h"
 

From 0394493faccf3975094208b4142d3c19b3482b4b Mon Sep 17 00:00:00 2001
From: Jon Siwek <jsiwek@ncsa.illinois.edu>
Date: Wed, 10 Jul 2013 11:53:44 -0500
Subject: [PATCH 057/118] const adjustment

And fixes compiler warning about overloaded virtual function hiding.
---
 src/file_analysis/Component.cc | 2 +-
 src/file_analysis/Component.h  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/file_analysis/Component.cc b/src/file_analysis/Component.cc
index d686918130..99531e40f5 100644
--- a/src/file_analysis/Component.cc
+++ b/src/file_analysis/Component.cc
@@ -41,7 +41,7 @@ analyzer::Tag Component::Tag() const
 	return tag;
 	}
 
-void Component::Describe(ODesc* d)
+void Component::Describe(ODesc* d) const
 	{
 	plugin::Component::Describe(d);
 	d->Add(name);
diff --git a/src/file_analysis/Component.h b/src/file_analysis/Component.h
index 5ec97f2e0c..8b79436991 100644
--- a/src/file_analysis/Component.h
+++ b/src/file_analysis/Component.h
@@ -90,7 +90,7 @@ public:
 	 * Generates a human-readable description of the component's main
 	 * parameters. This goes into the output of \c "bro -NN".
 	 */
-	virtual void Describe(ODesc* d);
+	virtual void Describe(ODesc* d) const;
 
 	Component& operator=(const Component& other);
 

From 99d604c9b565d18a73c12b91512aebebade7d57d Mon Sep 17 00:00:00 2001
From: Jon Siwek <jsiwek@ncsa.illinois.edu>
Date: Wed, 10 Jul 2013 14:06:51 -0500
Subject: [PATCH 058/118] Make the custom libmagic database a git submodule.

The magic files couldn't be in the root of that repo or else
libmagic would abort when it ran in to the .git* files and tried
to treat them like magic files, too.
---
 .gitmodules          |   3 +
 CMakeLists.txt       |   7 +-
 magic                |   1 +
 magic/COPYING        |  29 ----
 magic/animation      | 208 ------------------------
 magic/archive        | 242 ----------------------------
 magic/assembler      |  19 ---
 magic/audio          | 149 -----------------
 magic/c-lang         |  47 ------
 magic/cafebabe       |  31 ----
 magic/commands       |  82 ----------
 magic/compress       |  77 ---------
 magic/database       |  47 ------
 magic/diff           |  25 ---
 magic/elf            |  43 -----
 magic/epoc           |  34 ----
 magic/filesystems    |  12 --
 magic/flash          |  18 ---
 magic/fonts          |  32 ----
 magic/fortran        |   7 -
 magic/frame          |  31 ----
 magic/gimp           |  13 --
 magic/gnu            |  23 ---
 magic/gnumeric       |   8 -
 magic/icc            |  51 ------
 magic/iff            |  21 ---
 magic/images         | 255 ------------------------------
 magic/java           |  16 --
 magic/javascript     |  17 --
 magic/jpeg           |  31 ----
 magic/kde            |  11 --
 magic/kml            |  30 ----
 magic/linux          |  22 ---
 magic/lisp           |  42 -----
 magic/lua            |  17 --
 magic/m4             |   7 -
 magic/macintosh      |  21 ---
 magic/mail.news      |  35 ----
 magic/make           |  16 --
 magic/marc21         |  29 ----
 magic/matroska       |  17 --
 magic/misctools      |   9 --
 magic/msdos          | 369 -------------------------------------------
 magic/neko           |  12 --
 magic/pascal         |  11 --
 magic/pdf            |   8 -
 magic/perl           |  26 ---
 magic/pgp            |  27 ----
 magic/pkgadd         |   7 -
 magic/printer        |  14 --
 magic/python         |  46 ------
 magic/riff           |  36 -----
 magic/rpm            |  12 --
 magic/rtf            |   9 --
 magic/ruby           |  28 ----
 magic/sc             |   7 -
 magic/sgml           |  82 ----------
 magic/sniffer        |  17 --
 magic/tcl            |  23 ---
 magic/tex            |  56 -------
 magic/troff          |  22 ---
 magic/vorbis         |  26 ---
 magic/warc           |  14 --
 magic/windows        |  19 ---
 magic/wordprocessors |  43 -----
 magic/xwindows       |  11 --
 66 files changed, 7 insertions(+), 2753 deletions(-)
 create mode 160000 magic
 delete mode 100644 magic/COPYING
 delete mode 100644 magic/animation
 delete mode 100644 magic/archive
 delete mode 100644 magic/assembler
 delete mode 100644 magic/audio
 delete mode 100644 magic/c-lang
 delete mode 100644 magic/cafebabe
 delete mode 100644 magic/commands
 delete mode 100644 magic/compress
 delete mode 100644 magic/database
 delete mode 100644 magic/diff
 delete mode 100644 magic/elf
 delete mode 100644 magic/epoc
 delete mode 100644 magic/filesystems
 delete mode 100644 magic/flash
 delete mode 100644 magic/fonts
 delete mode 100644 magic/fortran
 delete mode 100644 magic/frame
 delete mode 100644 magic/gimp
 delete mode 100644 magic/gnu
 delete mode 100644 magic/gnumeric
 delete mode 100644 magic/icc
 delete mode 100644 magic/iff
 delete mode 100644 magic/images
 delete mode 100644 magic/java
 delete mode 100644 magic/javascript
 delete mode 100644 magic/jpeg
 delete mode 100644 magic/kde
 delete mode 100644 magic/kml
 delete mode 100644 magic/linux
 delete mode 100644 magic/lisp
 delete mode 100644 magic/lua
 delete mode 100644 magic/m4
 delete mode 100644 magic/macintosh
 delete mode 100644 magic/mail.news
 delete mode 100644 magic/make
 delete mode 100644 magic/marc21
 delete mode 100644 magic/matroska
 delete mode 100644 magic/misctools
 delete mode 100644 magic/msdos
 delete mode 100644 magic/neko
 delete mode 100644 magic/pascal
 delete mode 100644 magic/pdf
 delete mode 100644 magic/perl
 delete mode 100644 magic/pgp
 delete mode 100644 magic/pkgadd
 delete mode 100644 magic/printer
 delete mode 100644 magic/python
 delete mode 100644 magic/riff
 delete mode 100644 magic/rpm
 delete mode 100644 magic/rtf
 delete mode 100644 magic/ruby
 delete mode 100644 magic/sc
 delete mode 100644 magic/sgml
 delete mode 100644 magic/sniffer
 delete mode 100644 magic/tcl
 delete mode 100644 magic/tex
 delete mode 100644 magic/troff
 delete mode 100644 magic/vorbis
 delete mode 100644 magic/warc
 delete mode 100644 magic/windows
 delete mode 100644 magic/wordprocessors
 delete mode 100644 magic/xwindows

diff --git a/.gitmodules b/.gitmodules
index 95053091cf..2ede715f49 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -16,3 +16,6 @@
 [submodule "cmake"]
 	path = cmake
 	url = git://git.bro-ids.org/cmake
+[submodule "magic"]
+	path = magic
+	url = git://git.bro.org/bromagic
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b95b637770..0f64f304b8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,7 +18,7 @@ get_filename_component(BRO_SCRIPT_INSTALL_PATH ${BRO_SCRIPT_INSTALL_PATH}
     ABSOLUTE)
 
 set(BRO_MAGIC_INSTALL_PATH ${BRO_ROOT_DIR}/share/bro/magic)
-set(BRO_MAGIC_SOURCE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/magic)
+set(BRO_MAGIC_SOURCE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/magic/database)
 
 configure_file(bro-path-dev.in ${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev)
 file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev.sh
@@ -201,9 +201,8 @@ CheckOptionalBuildSources(aux/broctl   Broctl   INSTALL_BROCTL)
 CheckOptionalBuildSources(aux/bro-aux  Bro-Aux  INSTALL_AUX_TOOLS)
 CheckOptionalBuildSources(aux/broccoli Broccoli INSTALL_BROCCOLI)
 
-install(DIRECTORY ./magic/ DESTINATION ${BRO_MAGIC_INSTALL_PATH} FILES_MATCHING
-        PATTERN "COPYING" EXCLUDE
-        PATTERN "*"
+install(DIRECTORY ./magic/database/
+        DESTINATION ${BRO_MAGIC_INSTALL_PATH}
 )
 
 ########################################################################
diff --git a/magic b/magic
new file mode 160000
index 0000000000..e87fe13a7b
--- /dev/null
+++ b/magic
@@ -0,0 +1 @@
+Subproject commit e87fe13a7b776182ffc8c75076d42702f5c28fed
diff --git a/magic/COPYING b/magic/COPYING
deleted file mode 100644
index 7d2bf1e711..0000000000
--- a/magic/COPYING
+++ /dev/null
@@ -1,29 +0,0 @@
-# $File: LEGAL.NOTICE,v 1.15 2006/05/03 18:48:33 christos Exp $
-# Copyright (c) Ian F. Darwin 1986, 1987, 1989, 1990, 1991, 1992, 1994, 1995.
-# Software written by Ian F. Darwin and others;
-# maintained 1994- Christos Zoulas.
-# 
-# This software is not subject to any export provision of the United States
-# Department of Commerce, and may be exported to any country or planet.
-# 
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-# 1. Redistributions of source code must retain the above copyright
-#    notice immediately at the beginning of the file, without modification,
-#    this list of conditions, and the following disclaimer.
-# 2. Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  
-# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
-# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
-# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-# SUCH DAMAGE.
diff --git a/magic/animation b/magic/animation
deleted file mode 100644
index 0cec03d511..0000000000
--- a/magic/animation
+++ /dev/null
@@ -1,208 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: animation,v 1.47 2013/02/06 14:18:52 christos Exp $
-# animation:  file(1) magic for animation/movie formats
-#
-# animation formats
-# MPEG, FLI, DL originally from vax@ccwf.cc.utexas.edu (VaX#n8)
-# FLC, SGI, Apple originally from Daniel Quinlan (quinlan@yggdrasil.com)
-
-# SGI and Apple formats
-0	string		MOVI		Silicon Graphics movie file
-!:mime	video/x-sgi-movie
-4       string          moov            Apple QuickTime
-!:mime	video/quicktime
-4       string          mdat            Apple QuickTime movie (unoptimized)
-!:mime	video/quicktime
-#4       string          wide            Apple QuickTime movie (unoptimized)
-#!:mime	video/quicktime
-#4       string          skip            Apple QuickTime movie (modified)
-#!:mime	video/quicktime
-#4       string          free            Apple QuickTime movie (modified)
-#!:mime	video/quicktime
-4       string          idsc            Apple QuickTime image (fast start)
-!:mime	image/x-quicktime
-#4       string          idat            Apple QuickTime image (unoptimized)
-#!:mime	image/x-quicktime
-4       string          pckg            Apple QuickTime compressed archive
-!:mime	application/x-quicktime-player
-4	string/W	jP		JPEG 2000 image
-!:mime	image/jp2
-4	string		ftyp		ISO Media
->8	string		isom		\b, MPEG v4 system, version 1
-!:mime	video/mp4
->8	string		mp41		\b, MPEG v4 system, version 1
-!:mime	video/mp4
->8	string		mp42		\b, MPEG v4 system, version 2
-!:mime	video/mp4
->8	string/W	jp2		\b, JPEG 2000
-!:mime	image/jp2
->8	string		3ge		\b, MPEG v4 system, 3GPP
-!:mime	video/3gpp
->8	string		3gg		\b, MPEG v4 system, 3GPP
-!:mime	video/3gpp
->8	string		3gp		\b, MPEG v4 system, 3GPP
-!:mime	video/3gpp
->8	string		3gs		\b, MPEG v4 system, 3GPP
-!:mime	video/3gpp
->8	string		3g2		\b, MPEG v4 system, 3GPP2
-!:mime	video/3gpp2
->8	string		mmp4		\b, MPEG v4 system, 3GPP Mobile
-!:mime	video/mp4
->8	string		avc1		\b, MPEG v4 system, 3GPP JVT AVC
-!:mime	video/3gpp
->8	string/W	M4A		\b, MPEG v4 system, iTunes AAC-LC
-!:mime	audio/mp4
->8	string/W	M4V		\b, MPEG v4 system, iTunes AVC-LC
-!:mime	video/mp4
->8	string/W	qt		\b, Apple QuickTime movie
-!:mime	video/quicktime
-
-# MPEG sequences
-# Scans for all common MPEG header start codes
-0        belong&0xFFFFFF00  0x00000100     
->3       byte               0xBA           MPEG sequence
-!:mime  video/mpeg
-# GRR too general as it catches also FoxPro Memo example NG.FPT
->3       byte               0xB0           MPEG sequence, v4
-!:mime  video/mpeg4-generic
->3       byte               0xB5           MPEG sequence, v4
-!:mime  video/mpeg4-generic
->3       byte               0xB3           MPEG sequence
-!:mime  video/mpeg
-
-# MPEG ADTS Audio (*.mpx/mxa/aac)
-# from dreesen@math.fu-berlin.de
-# modified to fully support MPEG ADTS
-
-# MP3, M1A
-# modified by Joerg Jenderek
-# GRR the original test are too common for many DOS files
-# so don't accept as MP3 until we've tested the rate
-0       beshort&0xFFFE  0xFFFA
-# rates
->2      byte&0xF0       0x10           MPEG ADTS, layer III, v1,  32 kbps
-!:mime	audio/mpeg
->2      byte&0xF0       0x20           MPEG ADTS, layer III, v1,  40 kbps
-!:mime	audio/mpeg
->2      byte&0xF0       0x30           MPEG ADTS, layer III, v1,  48 kbps
-!:mime	audio/mpeg
->2      byte&0xF0       0x40           MPEG ADTS, layer III, v1,  56 kbps
-!:mime	audio/mpeg
->2      byte&0xF0       0x50           MPEG ADTS, layer III, v1,  64 kbps
-!:mime	audio/mpeg
->2      byte&0xF0       0x60           MPEG ADTS, layer III, v1,  80 kbps
-!:mime	audio/mpeg
->2      byte&0xF0       0x70           MPEG ADTS, layer III, v1,  96 kbps
-!:mime	audio/mpeg
->2      byte&0xF0       0x80           MPEG ADTS, layer III, v1, 112 kbps
-!:mime	audio/mpeg
->2      byte&0xF0       0x90           MPEG ADTS, layer III, v1, 128 kbps
-!:mime	audio/mpeg
->2      byte&0xF0       0xA0           MPEG ADTS, layer III, v1, 160 kbps
-!:mime	audio/mpeg
->2      byte&0xF0       0xB0           MPEG ADTS, layer III, v1, 192 kbps
-!:mime	audio/mpeg
->2      byte&0xF0       0xC0           MPEG ADTS, layer III, v1, 224 kbps
-!:mime	audio/mpeg
->2      byte&0xF0       0xD0           MPEG ADTS, layer III, v1, 256 kbps
-!:mime	audio/mpeg
->2      byte&0xF0       0xE0           MPEG ADTS, layer III, v1, 320 kbps
-!:mime	audio/mpeg
-
-# MP2, M1A
-0       beshort&0xFFFE  0xFFFC         MPEG ADTS, layer II, v1
-!:mime	audio/mpeg
-
-# MP3, M2A
-0       beshort&0xFFFE  0xFFF2         MPEG ADTS, layer III, v2
-!:mime	audio/mpeg
-
-# MPA, M2A
-0       beshort&0xFFFE  0xFFF6         MPEG ADTS, layer I, v2
-!:mime	audio/mpeg
-
-# MP3, M25A
-0       beshort&0xFFFE  0xFFE2         MPEG ADTS, layer III,  v2.5
-!:mime	audio/mpeg
-
-# Stored AAC streams (instead of the MP4 format)
-0       string          ADIF           MPEG ADIF, AAC
-!:mime	audio/x-hx-aac-adif
-
-# Live or stored single AAC stream (used with MPEG-2 systems)
-0       beshort&0xFFF6  0xFFF0         MPEG ADTS, AAC
-!:mime	audio/x-hx-aac-adts
-
-# Live MPEG-4 audio streams (instead of RTP FlexMux)
-0       beshort&0xFFE0  0x56E0         MPEG-4 LOAS
-!:mime	audio/x-mp4a-latm
-
-# This magic isn't strong enough (matches plausible ISO-8859-1 text)
-#0       beshort         0x4DE1         MPEG-4 LO-EP audio stream
-#!:mime	audio/x-mp4a-latm
-
-# Summary: FLI animation format
-# Created by: Daniel Quinlan <quinlan@yggdrasil.com>
-# Modified by (1): Abel Cheung <abelcheung@gmail.com> (avoid over-generic detection)
-4	leshort		0xAF11
-# standard FLI always has 320x200 resolution and 8 bit color
->8	leshort		320
->>10	leshort		200
->>>12	leshort		8			FLI animation, 320x200x8
-!:mime	video/x-fli
-
-# Summary: FLC animation format
-# Created by: Daniel Quinlan <quinlan@yggdrasil.com>
-# Modified by (1): Abel Cheung <abelcheung@gmail.com> (avoid over-generic detection)
-4	leshort		0xAF12
-# standard FLC always use 8 bit color
->12	leshort		8			FLC animation
-!:mime	video/x-flc
-
-# Microsoft Advanced Streaming Format (ASF) <mpruett@sgi.com>
-0	belong			0x3026b275	Microsoft ASF
-!:mime  video/x-ms-asf
-
-# MNG Video Format, <URL:http://www.libpng.org/pub/mng/spec/>
-0	string			\x8aMNG		MNG video data,
-!:mime	video/x-mng
-
-# JNG Video Format, <URL:http://www.libpng.org/pub/mng/spec/>
-0	string			\x8bJNG		JNG video data,
-!:mime	video/x-jng
-
-# VRML (Virtual Reality Modelling Language)
-0       string/w        #VRML\ V1.0\ ascii	VRML 1 file
-!:mime	model/vrml
-0	string/w	#VRML\ V2.0\ utf8	ISO/IEC 14772 VRML 97 file
-!:mime	model/vrml
-
-# X3D (Extensible 3D) [http://www.web3d.org/specifications/x3d-3.0.dtd]
-# From Michel Briand <michelbriand@free.fr>
-0	string/t		\<?xml\ version="
-!:strength +1
->20	search/1000/cw  \<!DOCTYPE\ X3D		X3D (Extensible 3D) model xml text
-!:mime model/x3d
-
-#						MPEG file
-# MPEG sequences
-# FIXME: This section is from the old magic.mime file and needs integrating with the rest
-0       belong             0x000001BA
->4      byte               &0x40
-!:mime	video/mp2p
->4      byte               ^0x40
-!:mime	video/mpeg
-0       belong             0x000001BB
-!:mime	video/mpeg
-0       belong             0x000001B0
-!:mime	video/mp4v-es
-0       belong             0x000001B5
-!:mime	video/mp4v-es
-0       belong             0x000001B3
-!:mime	video/mpv
-0       belong&0xFF5FFF1F  0x47400010
-!:mime	video/mp2t
-0       belong             0x00000001
->4      byte&0x1F	   0x07
-!:mime	video/h264
diff --git a/magic/archive b/magic/archive
deleted file mode 100644
index 35cbef4012..0000000000
--- a/magic/archive
+++ /dev/null
@@ -1,242 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: archive,v 1.78 2013/02/06 14:18:52 christos Exp $
-# archive:  file(1) magic for archive formats (see also "msdos" for self-
-#           extracting compressed archives)
-#
-# cpio, ar, arc, arj, hpack, lha/lharc, rar, squish, uc2, zip, zoo, etc.
-# pre-POSIX "tar" archives are handled in the C code.
-
-# POSIX tar archives
-257	string		ustar\0		POSIX tar archive
-!:mime	application/x-tar # encoding: posix
-257	string		ustar\040\040\0	GNU tar archive
-!:mime	application/x-tar # encoding: gnu
-
-# cpio archives
-#
-# Yes, the top two "cpio archive" formats *are* supposed to just be "short".
-# The idea is to indicate archives produced on machines with the same
-# byte order as the machine running "file" with "cpio archive", and
-# to indicate archives produced on machines with the opposite byte order
-# from the machine running "file" with "byte-swapped cpio archive".
-#
-# The SVR4 "cpio(4)" hints that there are additional formats, but they
-# are defined as "short"s; I think all the new formats are
-# character-header formats and thus are strings, not numbers.
-0	short		070707		cpio archive
-!:mime	application/x-cpio
-0	short		0143561		byte-swapped cpio archive
-!:mime	application/x-cpio # encoding: swapped
-
-#
-# System V Release 1 portable(?) archive format.
-#
-0	string		=<ar>		System V Release 1 ar archive
-!:mime	application/x-archive
-
-#
-# Debian package; it's in the portable archive format, and needs to go
-# before the entry for regular portable archives, as it's recognized as
-# a portable archive whose first member has a name beginning with
-# "debian".
-#
-0	string		=!<arch>\ndebian
-!:mime	application/x-debian-package
-
-#
-# MIPS archive; they're in the portable archive format, and need to go
-# before the entry for regular portable archives, as it's recognized as
-# a portable archive whose first member has a name beginning with
-# "__________E".
-#
-0	string	=!<arch>\n__________E	MIPS archive
-!:mime	application/x-archive
-
-#
-# BSD/SVR2-and-later portable archive formats.
-#
-0	string		=!<arch>		current ar archive
-!:mime	application/x-archive
-
-# ARC archiver, from Daniel Quinlan (quinlan@yggdrasil.com)
-#
-# The first byte is the magic (0x1a), byte 2 is the compression type for
-# the first file (0x01 through 0x09), and bytes 3 to 15 are the MS-DOS
-# filename of the first file (null terminated).  Since some types collide
-# we only test some types on basis of frequency: 0x08 (83%), 0x09 (5%),
-# 0x02 (5%), 0x03 (3%), 0x04 (2%), 0x06 (2%).  0x01 collides with terminfo.
-0	lelong&0x8080ffff	0x0000081a	ARC archive data, dynamic LZW
-!:mime	application/x-arc
-0	lelong&0x8080ffff	0x0000091a	ARC archive data, squashed
-!:mime	application/x-arc
-0	lelong&0x8080ffff	0x0000021a	ARC archive data, uncompressed
-!:mime	application/x-arc
-0	lelong&0x8080ffff	0x0000031a	ARC archive data, packed
-!:mime	application/x-arc
-0	lelong&0x8080ffff	0x0000041a	ARC archive data, squeezed
-!:mime	application/x-arc
-0	lelong&0x8080ffff	0x0000061a	ARC archive data, crunched
-!:mime	application/x-arc
-# [JW] stuff taken from idarc, obviously ARC successors:
-0	lelong&0x8080ffff	0x00000a1a	PAK archive data
-!:mime	application/x-arc
-0	lelong&0x8080ffff	0x0000141a	ARC+ archive data
-!:mime	application/x-arc
-0	lelong&0x8080ffff	0x0000481a	HYP archive data
-!:mime	application/x-arc
-
-# ARJ archiver (jason@jarthur.Claremont.EDU)
-0	leshort		0xea60		ARJ archive data
-!:mime	application/x-arj
-
-# LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu)
-2	string		-lh0-		LHarc 1.x/ARX archive data [lh0]
-!:mime	application/x-lharc
-2	string		-lh1-		LHarc 1.x/ARX archive data [lh1]
-!:mime	application/x-lharc
-2	string		-lz4-		LHarc 1.x archive data [lz4]
-!:mime	application/x-lharc
-2	string		-lz5-		LHarc 1.x archive data [lz5]
-!:mime	application/x-lharc
-#	[never seen any but the last; -lh4- reported in comp.compression:]
-2	string		-lzs-		LHa/LZS archive data [lzs]
-!:mime	application/x-lha
-2	string		-lh\40-		LHa 2.x? archive data [lh ]
-!:mime	application/x-lha
-2	string		-lhd-		LHa 2.x? archive data [lhd]
-!:mime	application/x-lha
-2	string		-lh2-		LHa 2.x? archive data [lh2]
-!:mime	application/x-lha
-2	string		-lh3-		LHa 2.x? archive data [lh3]
-!:mime	application/x-lha
-2	string		-lh4-		LHa (2.x) archive data [lh4]
-!:mime	application/x-lha
-2	string		-lh5-		LHa (2.x) archive data [lh5]
-!:mime	application/x-lha
-2	string		-lh6-		LHa (2.x) archive data [lh6]
-!:mime	application/x-lha
-2	string		-lh7-		LHa (2.x)/LHark archive data [lh7]
-!:mime	application/x-lha
-
-# RAR archiver (Greg Roelofs, newt@uchicago.edu)
-0	string		Rar!		RAR archive data,
-!:mime	application/x-rar
-
-# PKZIP multi-volume archive
-0	string		PK\x07\x08PK\x03\x04	Zip multi-volume archive data, at least PKZIP v2.50 to extract
-!:mime	application/zip
-
-# Zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
-0	string		PK\003\004
-
-# Specialised zip formats which start with a member named 'mimetype'
-# (stored uncompressed, with no 'extra field') containing the file's MIME type.
-# Check for have 8-byte name, 0-byte extra field, name "mimetype", and
-#  contents starting with "application/":
->26	string		\x8\0\0\0mimetypeapplication/
-
-#   OpenDocument formats (for OpenOffice 2.x / StarOffice >= 8)
-#    http://lists.oasis-open.org/archives/office/200505/msg00006.html
-#    (mimetype contains "application/vnd.oasis.opendocument.<SUBTYPE>")
->>50	string	vnd.oasis.opendocument.	OpenDocument
->>>73	string	text
->>>>77	byte	!0x2d			Text
-!:mime	application/vnd.oasis.opendocument.text
->>>>77	string	-template		Text Template
-!:mime	application/vnd.oasis.opendocument.text-template
->>>>77	string	-web			HTML Document Template
-!:mime	application/vnd.oasis.opendocument.text-web
->>>>77	string	-master			Master Document
-!:mime	application/vnd.oasis.opendocument.text-master
->>>73	string	graphics
->>>>81	byte	!0x2d			Drawing
-!:mime	application/vnd.oasis.opendocument.graphics
->>>>81	string	-template		Template
-!:mime	application/vnd.oasis.opendocument.graphics-template
->>>73	string	presentation
->>>>85	byte	!0x2d			Presentation
-!:mime	application/vnd.oasis.opendocument.presentation
->>>>85	string	-template		Template
-!:mime	application/vnd.oasis.opendocument.presentation-template
->>>73	string	spreadsheet
->>>>84	byte	!0x2d			Spreadsheet
-!:mime	application/vnd.oasis.opendocument.spreadsheet
->>>>84	string	-template		Template
-!:mime	application/vnd.oasis.opendocument.spreadsheet-template
->>>73	string	chart
->>>>78	byte	!0x2d			Chart
-!:mime	application/vnd.oasis.opendocument.chart
->>>>78	string	-template		Template
-!:mime	application/vnd.oasis.opendocument.chart-template
->>>73	string	formula
->>>>80	byte	!0x2d			Formula
-!:mime	application/vnd.oasis.opendocument.formula
->>>>80	string	-template		Template
-!:mime	application/vnd.oasis.opendocument.formula-template
->>>73	string	database		Database
-!:mime	application/vnd.oasis.opendocument.database
->>>73	string	image
->>>>78	byte	!0x2d			Image
-!:mime	application/vnd.oasis.opendocument.image
->>>>78	string	-template		Template
-!:mime	application/vnd.oasis.opendocument.image-template
-
-#  EPUB (OEBPS) books using OCF (OEBPS Container Format)
-#    http://www.idpf.org/ocf/ocf1.0/download/ocf10.htm, section 4.
-#    From: Ralf Brown <ralf.brown@gmail.com>
->0x1E	string	mimetypeapplication/epub+zip	EPUB document
-!:mime application/epub+zip
-
-#  Catch other ZIP-with-mimetype formats
-#	In a ZIP file, the bytes immediately after a member's contents are
-#	always "PK". The 2 regex rules here print the "mimetype" member's
-#	contents up to the first 'P'. Luckily, most MIME types don't contain
-#	any capital 'P's. This is a kludge.
-#    (mimetype contains "application/<OTHER>")
->>50		string	!epub+zip
->>>50		string	!vnd.oasis.opendocument.
->>>>50		string	!vnd.sun.xml.
->>>>>50		string	!vnd.kde.
->>>>>>38	regex	[!-OQ-~]+		Zip data (MIME type "%s"?)
-!:mime	application/zip
-#    (mimetype contents other than "application/*")
->26		string	\x8\0\0\0mimetype
->>38		string	!application/
->>>38		regex	[!-OQ-~]+		Zip data (MIME type "%s"?)
-!:mime	application/zip
-
-# Java Jar files
->(26.s+30)	leshort	0xcafe		Java Jar file data (zip)
-!:mime	application/jar
-
-# Generic zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
-#   Next line excludes specialized formats:
->(26.s+30)	leshort	!0xcafe
->>26    string          !\x8\0\0\0mimetype	Zip archive data
-!:mime	application/zip
-
-# Zoo archiver
-20	lelong		0xfdc4a7dc	Zoo archive data
-!:mime	application/x-zoo
-
-# Shell archives
-10	string		#\ This\ is\ a\ shell\ archive	shell archive text
-!:mime	application/octet-stream
-
-# Felix von Leitner <felix-file@fefe.de>
-0	string	d8:announce	BitTorrent file
-!:mime	application/x-bittorrent
-
-# EET archive
-# From: Tilman Sauerbeck <tilman@code-monkey.de>
-0	belong	0x1ee7ff00	EET archive
-!:mime	application/x-eet
-
-# Symbian installation files
-#  http://www.thouky.co.uk/software/psifs/sis.html
-#  http://developer.symbian.com/main/downloads/papers/SymbianOSv91/softwareinstallsis.pdf
-8	lelong	0x10000419	Symbian installation file
-!:mime	application/vnd.symbian.install
-0	lelong	0x10201A7A	Symbian installation file (Symbian OS 9.x)
-!:mime	x-epoc/x-sisx-app
diff --git a/magic/assembler b/magic/assembler
deleted file mode 100644
index 242b6e19e2..0000000000
--- a/magic/assembler
+++ /dev/null
@@ -1,19 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: assembler,v 1.3 2013/01/04 17:23:28 christos Exp $
-# make:  file(1) magic for assembler source
-#
-0	regex	\^[\020\t]*\\.asciiz		assembler source text
-!:mime	text/x-asm
-0	regex	\^[\020\t]*\\.byte		assembler source text
-!:mime	text/x-asm
-0	regex	\^[\020\t]*\\.even		assembler source text
-!:mime	text/x-asm
-0	regex	\^[\020\t]*\\.globl		assembler source text
-!:mime	text/x-asm
-0	regex	\^[\020\t]*\\.text		assembler source text
-!:mime	text/x-asm
-0	regex	\^[\020\t]*\\.file		assembler source text
-!:mime	text/x-asm
-0	regex	\^[\020\t]*\\.type		assembler source text
-!:mime	text/x-asm
diff --git a/magic/audio b/magic/audio
deleted file mode 100644
index 75a9dc536c..0000000000
--- a/magic/audio
+++ /dev/null
@@ -1,149 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: audio,v 1.65 2012/10/31 13:38:40 christos Exp $
-# audio:  file(1) magic for sound formats (see also "iff")
-#
-# Jan Nicolai Langfeldt (janl@ifi.uio.no), Dan Quinlan (quinlan@yggdrasil.com),
-# and others
-#
-
-# Sun/NeXT audio data
-0	string		.snd		Sun/NeXT audio data:
->12	belong		1		8-bit ISDN mu-law,
-!:mime	audio/basic
->12	belong		2		8-bit linear PCM [REF-PCM],
-!:mime	audio/basic
->12	belong		3		16-bit linear PCM,
-!:mime	audio/basic
->12	belong		4		24-bit linear PCM,
-!:mime	audio/basic
->12	belong		5		32-bit linear PCM,
-!:mime	audio/basic
->12	belong		6		32-bit IEEE floating point,
-!:mime	audio/basic
->12	belong		7		64-bit IEEE floating point,
-!:mime	audio/basic
->12	belong		23		8-bit ISDN mu-law compressed (CCITT G.721 ADPCM voice enc.),
-!:mime  audio/x-adpcm
-
-# DEC systems (e.g. DECstation 5000) use a variant of the Sun/NeXT format
-# that uses little-endian encoding and has a different magic number
-0	lelong		0x0064732E	DEC audio data:
->12	lelong		1		8-bit ISDN mu-law,
-!:mime	audio/x-dec-basic
->12	lelong		2		8-bit linear PCM [REF-PCM],
-!:mime	audio/x-dec-basic
->12	lelong		3		16-bit linear PCM,
-!:mime	audio/x-dec-basic
->12	lelong		4		24-bit linear PCM,
-!:mime	audio/x-dec-basic
->12	lelong		5		32-bit linear PCM,
-!:mime	audio/x-dec-basic
->12	lelong		6		32-bit IEEE floating point,
-!:mime	audio/x-dec-basic
->12	lelong		7		64-bit IEEE floating point,
-!:mime	audio/x-dec-basic
->12	lelong		23		8-bit ISDN mu-law compressed (CCITT G.721 ADPCM voice enc.),
-!:mime	audio/x-dec-basic
-
-# Creative Labs AUDIO stuff
-0	string	MThd			Standard MIDI data
-!:mime	audio/midi
-
-0	string	CTMF			Creative Music (CMF) data
-!:mime	audio/x-unknown
-0	string	SBI			SoundBlaster instrument data
-!:mime	audio/x-unknown
-0	string	Creative\ Voice\ File	Creative Labs voice data
-!:mime	audio/x-unknown
-
-# Real Audio (Magic .ra\0375)
-0	belong		0x2e7261fd	RealAudio sound file
-!:mime	audio/x-pn-realaudio
-0	string		.RMF\0\0\0	RealMedia file
-!:mime	application/vnd.rn-realmedia
-
-# mime types according to http://www.geocities.com/nevilo/mod.htm:
-#	audio/it	.it
-#	audio/x-zipped-it	.itz
-#	audio/xm	fasttracker modules
-#	audio/x-s3m	screamtracker modules
-#	audio/s3m	screamtracker modules
-#	audio/x-zipped-mod	mdz
-#	audio/mod	mod
-#	audio/x-mod	All modules (mod, s3m, 669, mtm, med, xm, it, mdz, stm, itz, xmz, s3z)
-
-#
-# Taken from loader code from mikmod version 2.14
-# by Steve McIntyre (stevem@chiark.greenend.org.uk)
-# <doj@cubic.org> added title printing on 2003-06-24
-0	string	MAS_UTrack_V00
->14	string	>/0		ultratracker V1.%.1s module sound data
-!:mime	audio/x-mod
-#audio/x-tracker-module
-
-0	string	Extended\ Module: Fasttracker II module sound data
-!:mime	audio/x-mod
-#audio/x-tracker-module
-
-21	string/c	=!SCREAM!	Screamtracker 2 module sound data
-!:mime	audio/x-mod
-#audio/x-screamtracker-module
-21	string	BMOD2STM	Screamtracker 2 module sound data
-!:mime	audio/x-mod
-#audio/x-screamtracker-module
-1080	string	M.K.		4-channel Protracker module sound data
-!:mime	audio/x-mod
-#audio/x-protracker-module
-1080	string	M!K!		4-channel Protracker module sound data
-!:mime	audio/x-mod
-#audio/x-protracker-module
-1080	string	FLT4		4-channel Startracker module sound data
-!:mime	audio/x-mod
-#audio/x-startracker-module
-1080	string	FLT8		8-channel Startracker module sound data
-!:mime	audio/x-mod
-#audio/x-startracker-module
-1080	string	4CHN		4-channel Fasttracker module sound data
-!:mime	audio/x-mod
-#audio/x-fasttracker-module
-1080	string	6CHN		6-channel Fasttracker module sound data
-!:mime	audio/x-mod
-#audio/x-fasttracker-module
-1080	string	8CHN		8-channel Fasttracker module sound data
-!:mime	audio/x-mod
-#audio/x-fasttracker-module
-1080	string	CD81		8-channel Octalyser module sound data
-!:mime	audio/x-mod
-#audio/x-octalysertracker-module
-1080	string	OKTA		8-channel Octalyzer module sound data
-!:mime	audio/x-mod
-#audio/x-octalysertracker-module
-# Not good enough.
-#1082	string	CH
-#>1080	string	>/0		%.2s-channel Fasttracker "oktalyzer" module sound data
-1080	string	16CN		16-channel Taketracker module sound data
-!:mime	audio/x-mod
-#audio/x-taketracker-module
-1080	string	32CN		32-channel Taketracker module sound data
-!:mime	audio/x-mod
-#audio/x-taketracker-module
-
-# Impulse tracker module (audio/x-it)
-0	string		IMPM		Impulse Tracker module sound data -
-!:mime	audio/x-mod
-
-# Free lossless audio codec <http://flac.sourceforge.net>
-# From: Przemyslaw Augustyniak <silvathraec@rpg.pl>
-0	string			fLaC		FLAC audio bitstream data
-!:mime	audio/x-flac
-
-# Monkey's Audio compressed audio format (.ape)
-# From danny.milo@gmx.net (Danny Milosavljevic)
-# New version from Abel Cheung <abel (@) oaka.org>
-0		string		MAC\040		Monkey's Audio compressed format
-!:mime audio/x-ape
-
-# musepak support From: "Jiri Pejchal" <jiri.pejchal@gmail.com>
-0       string          MP+     Musepack audio
-!:mime	audio/x-musepack
diff --git a/magic/c-lang b/magic/c-lang
deleted file mode 100644
index 525dc6b599..0000000000
--- a/magic/c-lang
+++ /dev/null
@@ -1,47 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: c-lang,v 1.16 2011/12/09 08:02:16 rrt Exp $
-# c-lang:  file(1) magic for C and related languages programs
-#
-
-# BCPL
-0	search/8192	"libhdr"	BCPL source text
-!:mime	text/x-bcpl
-0	search/8192	"LIBHDR"	BCPL source text
-!:mime	text/x-bcpl
-
-# C
-0	regex	\^#include	C source text
-!:mime	text/x-c
-0	regex	\^char		C source text
-!:mime	text/x-c
-0	regex	\^double		C source text
-!:mime	text/x-c
-0	regex	\^extern		C source text
-!:mime	text/x-c
-0	regex	\^float		C source text
-!:mime	text/x-c
-0	regex	\^struct		C source text
-!:mime	text/x-c
-0	regex	\^union		C source text
-!:mime	text/x-c
-0	search/8192	main(		C source text
-!:mime	text/x-c
-
-# C++
-# The strength of these rules is increased so they beat the C rules above
-0	regex	\^template	C++ source text
-!:strength + 5
-!:mime	text/x-c++
-0	regex	\^virtual		C++ source text
-!:strength + 5
-!:mime	text/x-c++
-0	regex	\^class		C++ source text
-!:strength + 5
-!:mime	text/x-c++
-0	regex	\^public:		C++ source text
-!:strength + 5
-!:mime	text/x-c++
-0	regex	\^private:		C++ source text
-!:strength + 5
-!:mime	text/x-c++
diff --git a/magic/cafebabe b/magic/cafebabe
deleted file mode 100644
index 29fefd5f1e..0000000000
--- a/magic/cafebabe
+++ /dev/null
@@ -1,31 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: cafebabe,v 1.13 2013/02/26 21:04:38 christos Exp $
-# Cafe Babes unite!
-#
-# Since Java bytecode and Mach-O universal binaries have the same magic number,
-# the test must be performed in the same "magic" sequence to get both right.
-# The long at offset 4 in a Mach-O universal binary tells the number of
-# architectures; the short at offset 4 in a Java bytecode file is the JVM minor
-# version and the short at offset 6 is the JVM major version.  Since there are only 
-# only 18 labeled Mach-O architectures at current, and the first released 
-# Java class format was version 43.0, we can safely choose any number
-# between 18 and 39 to test the number of architectures against
-# (and use as a hack). Let's not use 18, because the Mach-O people
-# might add another one or two as time goes by...
-#
-### JAVA START ###
-0	belong		0xcafebabe
-!:mime	application/x-java-applet
-
-0	belong		0xcafed00d	JAR compressed with pack200,
->5	byte		x		version %d.
->4	byte		x		\b%d
-!:mime	application/x-java-pack200
-
-0	belong		0xcafed00d	JAR compressed with pack200,
->5	byte		x		version %d.
->4	byte		x		\b%d
-!:mime	application/x-java-pack200
-
-### JAVA END ###
diff --git a/magic/commands b/magic/commands
deleted file mode 100644
index 6ad7699c5e..0000000000
--- a/magic/commands
+++ /dev/null
@@ -1,82 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: commands,v 1.44 2013/02/05 15:20:47 christos Exp $
-# commands:  file(1) magic for various shells and interpreters
-#
-#0	string/w	:			shell archive or script for antique kernel text
-0	string/wt	#!\ /bin/sh		POSIX shell script text executable
-!:mime	text/x-shellscript
-0	string/wt	#!\ /bin/csh		C shell script text executable
-!:mime	text/x-shellscript
-# korn shell magic, sent by George Wu, gwu@clyde.att.com
-0	string/wt	#!\ /bin/ksh		Korn shell script text executable
-!:mime	text/x-shellscript
-0	string/wt 	#!\ /bin/tcsh		Tenex C shell script text executable
-!:mime	text/x-shellscript
-0	string/wt	#!\ /usr/bin/tcsh	Tenex C shell script text executable
-!:mime	text/x-shellscript
-0	string/wt 	#!\ /usr/local/tcsh	Tenex C shell script text executable
-!:mime	text/x-shellscript
-0	string/wt	#!\ /usr/local/bin/tcsh	Tenex C shell script text executable
-!:mime	text/x-shellscript
-
-#
-# zsh/ash/ae/nawk/gawk magic from cameron@cs.unsw.oz.au (Cameron Simpson)
-0	string/wt	#!\ /bin/zsh		Paul Falstad's zsh script text executable
-!:mime	text/x-shellscript
-0	string/wt	#!\ /usr/bin/zsh	Paul Falstad's zsh script text executable
-!:mime	text/x-shellscript
-0	string/wt	#!\ /usr/local/bin/zsh	Paul Falstad's zsh script text executable
-!:mime	text/x-shellscript
-0	string/wt	#!\ /usr/local/bin/ash	Neil Brown's ash script text executable
-!:mime	text/x-shellscript
-0	string/wt	#!\ /usr/local/bin/ae	Neil Brown's ae script text executable
-!:mime	text/x-shellscript
-0	string/wt	#!\ /bin/nawk		new awk script text executable
-!:mime	text/x-nawk
-0	string/wt	#!\ /usr/bin/nawk	new awk script text executable
-!:mime	text/x-nawk
-0	string/wt	#!\ /usr/local/bin/nawk	new awk script text executable
-!:mime	text/x-nawk
-0	string/wt	#!\ /bin/gawk		GNU awk script text executable
-!:mime	text/x-gawk
-0	string/wt	#!\ /usr/bin/gawk	GNU awk script text executable
-!:mime	text/x-gawk
-0	string/wt	#!\ /usr/local/bin/gawk	GNU awk script text executable
-!:mime	text/x-gawk
-#
-0	string/wt	#!\ /bin/awk		awk script text executable
-!:mime	text/x-awk
-0	string/wt	#!\ /usr/bin/awk	awk script text executable
-!:mime	text/x-awk
-
-# bash shell magic, from Peter Tobias (tobias@server.et-inf.fho-emden.de)
-0	string/wt	#!\ /bin/bash	Bourne-Again shell script text executable
-!:mime	text/x-shellscript
-0	string/wt	#!\ /usr/bin/bash	Bourne-Again shell script text executable
-!:mime	text/x-shellscript
-0	string/wt	#!\ /usr/local/bash	Bourne-Again shell script text executable
-!:mime	text/x-shellscript
-0	string/wt	#!\ /usr/local/bin/bash	Bourne-Again shell script text executable
-!:mime	text/x-shellscript
-
-# PHP scripts
-# Ulf Harnhammar <ulfh@update.uu.se>
-0	search/1/c	=<?php			PHP script text
-!:strength + 10
-!:mime	text/x-php
-0	search/1	=<?\n			PHP script text
-!:mime	text/x-php
-0	search/1	=<?\r			PHP script text
-!:mime	text/x-php
-0	search/1/w	#!\ /usr/local/bin/php	PHP script text executable
-!:strength + 10
-!:mime	text/x-php
-0	search/1/w	#!\ /usr/bin/php	PHP script text executable
-!:strength + 10
-!:mime	text/x-php
-# Smarty compiled template, http://www.smarty.net/
-# Elan Ruusamae <glen@delfi.ee>
-0	string	=<?php\ /*\ Smarty\ version	Smarty compiled template
->24	regex	[0-9.]+				\b, version %s
-!:mime	text/x-php
diff --git a/magic/compress b/magic/compress
deleted file mode 100644
index f2598b783f..0000000000
--- a/magic/compress
+++ /dev/null
@@ -1,77 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: compress,v 1.48 2011/12/07 18:39:43 christos Exp $
-# compress:  file(1) magic for pure-compression formats (no archives)
-#
-# compress, gzip, pack, compact, huf, squeeze, crunch, freeze, yabba, etc.
-#
-# Formats for various forms of compressed data
-# Formats for "compress" proper have been moved into "compress.c",
-# because it tries to uncompress it to figure out what's inside.
-
-# standard unix compress
-0	string		\037\235	compress'd data
-!:mime	application/x-compress
-!:apple	LZIVZIVU
-
-# gzip (GNU zip, not to be confused with Info-ZIP or PKWARE zip archiver)
-#   Edited by Chris Chittleborough <cchittleborough@yahoo.com.au>, March 2002
-#	* Original filename is only at offset 10 if "extra field" absent
-#	* Produce shorter output - notably, only report compression methods
-#         other than 8 ("deflate", the only method defined in RFC 1952).
-0       string          \037\213        gzip compressed data
-!:mime	application/x-gzip
-
-# packed data, Huffman (minimum redundancy) codes on a byte-by-byte basis
-0	string		\037\036	packed data
-!:mime	application/octet-stream
-
-#
-# This magic number is byte-order-independent.
-0	short		0x1f1f		old packed data
-!:mime	application/octet-stream
-
-# XXX - why *two* entries for "compacted data", one of which is
-# byte-order independent, and one of which is byte-order dependent?
-#
-0	short		0x1fff		compacted data
-!:mime	application/octet-stream
-# This string is valid for SunOS (BE) and a matching "short" is listed
-# in the Ultrix (LE) magic file.
-0	string		\377\037	compacted data
-!:mime	application/octet-stream
-0	short		0145405		huf output
-!:mime	application/octet-stream
-
-# bzip2
-0	string		BZh		bzip2 compressed data
-!:mime	application/x-bzip2
-
-# lzip
-0	string		LZIP		lzip compressed data
-!:mime application/x-lzip
-
-# 7-zip archiver, from Thomas Klausner (wiz@danbala.tuwien.ac.at)
-# http://www.7-zip.org or DOC/7zFormat.txt
-#
-0	string		7z\274\257\047\034	7-zip archive data,
->6	byte		x			version %d
->7	byte		x			\b.%d
-!:mime	application/x-7z-compressed
-
-# Type: LZMA
-0	lelong&0xffffff	=0x5d
->12	leshort		=0xff			LZMA compressed data,
->>5	lequad		=0xffffffffffffffff	streamed
->>5	lequad		!0xffffffffffffffff	non-streamed, size %lld
-!:mime	application/x-lzma
-
-# http://tukaani.org/xz/xz-file-format.txt
-0	ustring		\xFD7zXZ\x00		XZ compressed data
-!:mime	application/x-xz
-
-# https://github.com/ckolivas/lrzip/blob/master/doc/magic.header.txt
-0	string		LRZI			LRZIP compressed data
->4	byte		x			- version %d
->5	byte		x			\b.%d
-!:mime	application/x-lrzip
diff --git a/magic/database b/magic/database
deleted file mode 100644
index f1c09c0629..0000000000
--- a/magic/database
+++ /dev/null
@@ -1,47 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: database,v 1.32 2013/02/06 14:18:52 christos Exp $
-# database:  file(1) magic for various databases
-#
-# extracted from header/code files by Graeme Wilford (eep2gw@ee.surrey.ac.uk)
-#
-#
-# GDBM magic numbers
-#  Will be maintained as part of the GDBM distribution in the future.
-#  <downsj@teeny.org>
-0	belong	0x13579ace	GNU dbm 1.x or ndbm database, big endian
-!:mime	application/x-gdbm
-0	lelong	0x13579ace	GNU dbm 1.x or ndbm database, little endian
-!:mime	application/x-gdbm
-0	string	GDBM		GNU dbm 2.x database
-!:mime	application/x-gdbm
-#
-# Berkeley DB
-#
-# Ian Darwin's file /etc/magic files: big/little-endian version.
-#
-# Hash 1.85/1.86 databases store metadata in network byte order.
-# Btree 1.85/1.86 databases store the metadata in host byte order.
-# Hash and Btree 2.X and later databases store the metadata in host byte order.
-
-0	long	0x00061561	Berkeley DB
-!:mime	application/x-dbm
-
-# MS Access database
-4	string	Standard\ Jet\ DB	Microsoft Access Database
-!:mime	application/x-msaccess
-4	string	Standard\ ACE\ DB	Microsoft Access Database
-!:mime	application/x-msaccess
-
-# Tokyo Cabinet magic data
-# http://tokyocabinet.sourceforge.net/index.html
-0	string		ToKyO\ CaBiNeT\n	Tokyo Cabinet
->14	string		x			\b (%s)
->32	byte		0			\b, Hash
-!:mime	application/x-tokyocabinet-hash
->32	byte		1			\b, B+ tree
-!:mime	application/x-tokyocabinet-btree
->32	byte		2			\b, Fixed-length
-!:mime	application/x-tokyocabinet-fixed
->32	byte		3			\b, Table
-!:mime	application/x-tokyocabinet-table
diff --git a/magic/diff b/magic/diff
deleted file mode 100644
index b6504f17a0..0000000000
--- a/magic/diff
+++ /dev/null
@@ -1,25 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: diff,v 1.13 2012/06/16 14:43:36 christos Exp $
-# diff:  file(1) magic for diff(1) output
-#
-0	search/1	diff\ 		diff output text
-!:mime	text/x-diff
-0	search/1	***\ 		diff output text
-!:mime	text/x-diff
-0	search/1	Only\ in\ 	diff output text
-!:mime	text/x-diff
-0	search/1	Common\ subdirectories:\ 	diff output text
-!:mime	text/x-diff
-
-0	search/1	Index:		RCS/CVS diff output text
-!:mime	text/x-diff
-
-# unified diff
-0	search/4096	---\ 
->&0	search/1024 \n
->>&0	search/1 +++\ 
->>>&0	search/1024 \n
->>>>&0	search/1 @@	unified diff output text
-!:mime	text/x-diff
-!:strength + 90
diff --git a/magic/elf b/magic/elf
deleted file mode 100644
index aaf80cf10e..0000000000
--- a/magic/elf
+++ /dev/null
@@ -1,43 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# elf:  file(1) magic for ELF executables
-#
-# We have to check the byte order flag to see what byte order all the
-# other stuff in the header is in.
-#
-# What're the correct byte orders for the nCUBE and the Fujitsu VPP500?
-#
-# Created by: unknown
-# Modified by (1): Daniel Quinlan <quinlan@yggdrasil.com>
-# Modified by (2): Peter Tobias <tobias@server.et-inf.fho-emden.de> (core support)
-# Modified by (3): Christian 'Dr. Disk' Hechelmann <drdisk@ds9.au.s.shuttle.de> (fix of core support)
-# Modified by (4): <gerardo.cacciari@gmail.com> (VMS Itanium)
-# Modified by (5): Matthias Urlichs <smurf@debian.org> (Listing of many architectures)
-0	string		\177ELF		ELF
->4	byte		0		invalid class
->4	byte		1		32-bit
->4	byte		2		64-bit
->5	byte		0		invalid byte order
->5	byte		1		LSB
->>16	leshort		0		no file type,
-!:strength *2
-!:mime	application/octet-stream
->>16	leshort		1		relocatable,
-!:mime	application/x-object
->>16	leshort		2		executable,
-!:mime	application/x-executable
->>16	leshort		3		shared object,
-!:mime	application/x-sharedlib
->>16	leshort		4		core file
-!:mime	application/x-coredump
->5	byte		2		MSB
->>16	beshort		0		no file type,
-!:mime	application/octet-stream
->>16	beshort		1		relocatable,
-!:mime	application/x-object
->>16	beshort		2		executable,
-!:mime	application/x-executable
->>16	beshort		3		shared object,
-!:mime	application/x-sharedlib
->>16	beshort		4		core file,
-!:mime	application/x-coredump
diff --git a/magic/epoc b/magic/epoc
deleted file mode 100644
index d7397145fb..0000000000
--- a/magic/epoc
+++ /dev/null
@@ -1,34 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: epoc,v 1.7 2009/09/19 16:28:09 christos Exp $
-# EPOC : file(1) magic for EPOC documents [Psion Series 5/Osaris/Geofox 1]
-# Stefan Praszalowicz <hpicollo@worldnet.fr> and Peter Breitenlohner <peb@mppmu.mpg.de>
-# Useful information for improving this file can be found at:
-# http://software.frodo.looijaard.name/psiconv/formats/Index.html
-#------------------------------------------------------------------------------
-0	lelong		0x10000037	Psion Series 5
->4	lelong		0x10000042	multi-bitmap image
-!:mime image/x-epoc-mbm
->4	lelong		0x1000006D
->>8	lelong		0x1000007D	Sketch image
-!:mime image/x-epoc-sketch
->>8	lelong		0x1000007F	Word file
-!:mime application/x-epoc-word
->>8	lelong		0x10000085	OPL program (TextEd)
-!:mime application/x-epoc-opl
->>8	lelong		0x10000088	Sheet file
-!:mime application/x-epoc-sheet
->4	lelong		0x10000073	OPO module
-!:mime application/x-epoc-opo
->4	lelong		0x10000074	OPL application
-!:mime application/x-epoc-app
-
-
-0	lelong		0x10000050	Psion Series 5
->4	lelong		0x1000006D	database
->>8	lelong		0x10000084	Agenda file
-!:mime application/x-epoc-agenda
->>8	lelong		0x10000086	Data file
-!:mime application/x-epoc-data
->>8	lelong		0x10000CEA	Jotter file
-!:mime application/x-epoc-jotter
diff --git a/magic/filesystems b/magic/filesystems
deleted file mode 100644
index d2178296e0..0000000000
--- a/magic/filesystems
+++ /dev/null
@@ -1,12 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: filesystems,v 1.76 2013/02/18 18:45:41 christos Exp $
-# filesystems:  file(1) magic for different filesystems
-#
-
-# CDROM Filesystems
-# Modified for UDF by gerardo.cacciari@gmail.com
-32769	string    CD001     #
-!:mime	application/x-iso9660-image
-37633	string    CD001     ISO 9660 CD-ROM filesystem data (raw 2352 byte sectors)
-!:mime	application/x-iso9660-image
diff --git a/magic/flash b/magic/flash
deleted file mode 100644
index b64761b12d..0000000000
--- a/magic/flash
+++ /dev/null
@@ -1,18 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: flash,v 1.8 2009/09/19 16:28:09 christos Exp $
-# flash:	file(1) magic for Macromedia Flash file format
-#
-# See
-#
-#	http://www.macromedia.com/software/flash/open/
-#
-0	string		FWS		Macromedia Flash data,
->3	byte		x		version %d
-!:mime	application/x-shockwave-flash
-0	string		CWS		Macromedia Flash data (compressed),
-!:mime	application/x-shockwave-flash
-
-# From: Cal Peake <cp@absolutedigital.net>
-0	string		FLV		Macromedia Flash Video
-!:mime	video/x-flv
diff --git a/magic/fonts b/magic/fonts
deleted file mode 100644
index 8189131d15..0000000000
--- a/magic/fonts
+++ /dev/null
@@ -1,32 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: fonts,v 1.25 2013/02/06 14:18:52 christos Exp $
-# fonts:  file(1) magic for font data
-#
-
-# X11 font files in SNF (Server Natural Format) format
-# updated by Joerg Jenderek at Feb 2013
-# http://computer-programming-forum.com/51-perl/8f22fb96d2e34bab.htm
-0	belong		00000004		X11 SNF font data, MSB first
-#>104	belong		00000004		X11 SNF font data, MSB first
-!:mime	application/x-font-sfn
-# GRR: line below too general as it catches also Xbase index file t3-CHAR.NDX
-0	lelong		00000004		
->104	lelong		00000004		X11 SNF font data, LSB first
-!:mime	application/x-font-sfn
-
-# True Type fonts
-0	string	\000\001\000\000\000	TrueType font data
-!:mime application/x-font-ttf
-
-# Opentype font data from Avi Bercovich
-0	string		OTTO		OpenType font data
-!:mime application/vnd.ms-opentype
-
-# Gurkan Sengun <gurkan@linuks.mine.nu>, www.linuks.mine.nu 
-0	string		SplineFontDB:	Spline Font Database 
-!:mime application/vnd.font-fontforge-sfd
-
-# EOT
-34	string		LP		Embedded OpenType (EOT)
-!:mime application/vnd.ms-fontobject
diff --git a/magic/fortran b/magic/fortran
deleted file mode 100644
index 498eeacf8a..0000000000
--- a/magic/fortran
+++ /dev/null
@@ -1,7 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: fortran,v 1.6 2009/09/19 16:28:09 christos Exp $
-# FORTRAN source
-0	regex/100	\^[Cc][\ \t]	FORTRAN program
-!:mime	text/x-fortran
-!:strength - 5
diff --git a/magic/frame b/magic/frame
deleted file mode 100644
index b42943bfcd..0000000000
--- a/magic/frame
+++ /dev/null
@@ -1,31 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File$
-# frame:  file(1) magic for FrameMaker files
-#
-# This stuff came on a FrameMaker demo tape, most of which is
-# copyright, but this file is "published" as witness the following:
-#
-# Note that this is the Framemaker Maker Interchange Format, not the
-# Normal format which would be application/vnd.framemaker.
-#
-0	string		\<MakerFile	FrameMaker document
-!:mime	application/x-mif
-0	string		\<MIFFile	FrameMaker MIF (ASCII) file
-!:mime	application/x-mif
-0	search/1	\<MakerDictionary	FrameMaker Dictionary text
-!:mime	application/x-mif
-0	string		\<MakerScreenFont	FrameMaker Font file
-!:mime	application/x-mif
-0	string		\<MML		FrameMaker MML file
-!:mime	application/x-mif
-0	string		\<BookFile	FrameMaker Book file
-!:mime	application/x-mif
-# XXX - this book entry should be verified, if you find one, uncomment this
-#0	string		\<Book\ 	FrameMaker Book (ASCII) file
-#!:mime	application/x-mif
-#>6	string		3.0		 (3.0)
-#>6	string		2.0		 (2.0)
-#>6	string		1.0		 (1.0)
-0	string		\<Maker	Intermediate Print File	FrameMaker IPL file
-!:mime	application/x-mif
diff --git a/magic/gimp b/magic/gimp
deleted file mode 100644
index 61d4d898eb..0000000000
--- a/magic/gimp
+++ /dev/null
@@ -1,13 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: gimp,v 1.6 2009/09/19 16:28:09 christos Exp $
-# GIMP Gradient: file(1) magic for the GIMP's gradient data files
-# by Federico Mena <federico@nuclecu.unam.mx>
-
-#------------------------------------------------------------------------------
-# XCF:  file(1) magic for the XCF image format used in the GIMP developed
-#       by Spencer Kimball and Peter Mattis
-#       ('Bucky' LaDieu, nega@vt.edu)
-
-0	string		gimp\ xcf	GIMP XCF image data,
-!:mime	image/x-xcf
diff --git a/magic/gnu b/magic/gnu
deleted file mode 100644
index bf1f631751..0000000000
--- a/magic/gnu
+++ /dev/null
@@ -1,23 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: gnu,v 1.13 2012/01/03 17:16:54 christos Exp $
-# gnu:  file(1) magic for various GNU tools
-#
-# GNU nlsutils message catalog file format
-#
-# GNU message catalog (.mo and .gmo files)
-
-# GnuPG
-# The format is very similar to pgp
-# Note: magic.mime had 0x8501 for the next line instead of 0x8502
-0	beshort		0x8502			GPG encrypted data
-!:mime	text/PGP # encoding: data
-
-# This magic is not particularly good, as the keyrings don't have true
-# magic. Nevertheless, it covers many keyrings.
-0       beshort         0x9901                  GPG key public ring
-!:mime	application/x-gnupg-keyring
-
-# gettext message catalogue
-0	regex	\^msgid\ 		GNU gettext message catalogue text
-!:mime text/x-po
diff --git a/magic/gnumeric b/magic/gnumeric
deleted file mode 100644
index b5edca93c1..0000000000
--- a/magic/gnumeric
+++ /dev/null
@@ -1,8 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File$
-# gnumeric:  file(1) magic for Gnumeric spreadsheet
-# This entry is only semi-helpful, as Gnumeric compresses its files, so
-# they will ordinarily reported as "compressed", but at least -z helps
-39	string	=<gmr:Workbook	Gnumeric spreadsheet
-!:mime	application/x-gnumeric
diff --git a/magic/icc b/magic/icc
deleted file mode 100644
index 5672a6a8d8..0000000000
--- a/magic/icc
+++ /dev/null
@@ -1,51 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File$
-# icc:  file(1) magic for International Color Consortium file formats
-
-#
-# Color profiles as per the ICC's "Image technology colour management -
-# Architecture, profile format, and data structure" specification.
-# See
-#
-#	http://www.color.org/specification/ICC1v43_2010-12.pdf
-#
-# for Specification ICC.1:2010 (Profile version 4.3.0.0).
-#
-# Bytes 36 to 39 contain a generic profile file signature of "acsp";
-# bytes 40 to 43 "may be used to identify the primary platform/operating
-# system framework for which the profile was created".
-#
-# There are other fields that might be worth dumping as well.
-#
-
-# This appears to be what's used for Apple ColorSync profiles.
-# Instead of adding that, Apple just changed the generic "acsp" entry
-# to be for "ColorSync ICC Color Profile" rather than "Kodak Color
-# Management System, ICC Profile".
-# Yes, it's "APPL", not "AAPL"; see the spec.
-36	string		acspAPPL	ColorSync ICC Profile
-!:mime	application/vnd.iccprofile
-
-# Microsoft ICM color profile
-36	string		acspMSFT	Microsoft ICM Color Profile
-!:mime	application/vnd.iccprofile
-
-# Yes, that's a blank after "SGI".
-36	string		acspSGI\ 	SGI ICC Profile
-!:mime	application/vnd.iccprofile
-
-# XXX - is this what's used for the Sun KCMS or not?  The standard file
-# uses just "acsp" for that, but Apple's file uses it for "ColorSync",
-# and there *is* an identified "primary platform" value of SUNW.
-36	string		acspSUNW	Sun KCMS ICC Profile
-!:mime	application/vnd.iccprofile
-
-# Any other profile.
-# XXX - should we use "acsp\0\0\0\0" for "no primary platform" profiles,
-# and use "acsp" for everything else and dump the "primary platform"
-# string in those cases?
-36	string		acsp		ICC Profile
-!:mime	application/vnd.iccprofile
-
-
diff --git a/magic/iff b/magic/iff
deleted file mode 100644
index 0bb0a0f496..0000000000
--- a/magic/iff
+++ /dev/null
@@ -1,21 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: iff,v 1.12 2009/09/19 16:28:09 christos Exp $
-# iff:	file(1) magic for Interchange File Format (see also "audio" & "images")
-#
-# Daniel Quinlan (quinlan@yggdrasil.com) -- IFF was designed by Electronic
-# Arts for file interchange.  It has also been used by Apple, SGI, and
-# especially Commodore-Amiga.
-#
-# IFF files begin with an 8 byte FORM header, followed by a 4 character
-# FORM type, which is followed by the first chunk in the FORM.
-
-0	string		FORM		IFF data
-#>4	belong		x		\b, FORM is %d bytes long
-# audio formats
->8	string		AIFF		\b, AIFF audio
-!:mime	audio/x-aiff
->8	string		AIFC		\b, AIFF-C compressed audio
-!:mime	audio/x-aiff
->8	string		8SVX		\b, 8SVX 8-bit sampled sound voice
-!:mime	audio/x-aiff
diff --git a/magic/images b/magic/images
deleted file mode 100644
index 281aba4706..0000000000
--- a/magic/images
+++ /dev/null
@@ -1,255 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: images,v 1.80 2013/02/06 14:18:52 christos Exp $
-# images:  file(1) magic for image formats (see also "iff", and "c-lang" for
-# XPM bitmaps)
-#
-# originally from jef@helios.ee.lbl.gov (Jef Poskanzer),
-# additions by janl@ifi.uio.no as well as others. Jan also suggested
-# merging several one- and two-line files into here.
-#
-# little magic: PCX (first byte is 0x0a)
-
-# PBMPLUS images
-# The next byte following the magic is always whitespace.
-# strength is changed to try these patterns before "x86 boot sector"
-0	search/1	P1		
->3	regex		=[0-9]*\ [0-9]*		Netpbm PBM image text
->3	regex		=[0-9]+\ 		\b, size = %sx
->>3	regex		=\ [0-9]+	\b%s
-!:strength + 45
-!:mime	image/x-portable-bitmap
-0	search/1	P2		
->3	regex		=[0-9]*\ [0-9]*		Netpbm PGM image text
->3	regex		=[0-9]+\ 		\b, size = %sx
->>3	regex		=\ [0-9]+	\b%s
-!:strength + 45
-!:mime	image/x-portable-greymap
-0	search/1	P3		Netpbm PPM image text
->3	regex		=[0-9]*\ [0-9]*		Netpbm PPM image text
->3	regex		=[0-9]+\ 		\b, size = %sx
->>3	regex		=\ [0-9]+	\b%s
-!:strength + 45
-!:mime	image/x-portable-pixmap
-0	string		P4		
->3	regex		=[0-9]*\ [0-9]*		Netpbm PBM "rawbits" image data
->3	regex		=[0-9]+\ 		\b, size = %sx
->>3	regex		=\ [0-9]+	\b%s
-!:strength + 45
-!:mime	image/x-portable-bitmap
-0	string		P5		
->3	regex		=[0-9]*\ [0-9]*		Netpbm PGM "rawbits" image data
->3	regex		=[0-9]+\ 		\b, size = %sx
->>3	regex		=\ [0-9]+	\b%s
-!:strength + 45
-!:mime	image/x-portable-greymap
-0	string		P6		
->3	regex		=[0-9]*\ [0-9]*		Netpbm PPM "rawbits" image data
->3	regex		=[0-9]+\ 		\b, size = %sx
->>3	regex		=\ [0-9]+	\b%s
-!:strength + 45
-!:mime	image/x-portable-pixmap
-0	string		P7		Netpbm PAM image file
-!:mime	image/x-portable-pixmap
-
-# NIFF (Navy Interchange File Format, a modification of TIFF) images
-# [GRR:  this *must* go before TIFF]
-0	string		IIN1		NIFF image data
-!:mime	image/x-niff
-
-# Canon RAW version 1 (CRW) files are a type of Canon Image File Format
-# (CIFF) file. These are apparently all little-endian.
-# From: Adam Buchbinder <adam.buchbinder@gmail.com>
-# URL: http://www.sno.phy.queensu.ca/~phil/exiftool/canon_raw.html
-0	string		II\x1a\0\0\0HEAPCCDR	Canon CIFF raw image data
-!:mime	image/x-canon-crw
-
-# Canon RAW version 2 (CR2) files are a kind of TIFF with an extra magic
-# number. Put this above the TIFF test to make sure we detect them.
-# These are apparently all little-endian.
-# From: Adam Buchbinder <adam.buchbinder@gmail.com>
-# URL: http://libopenraw.freedesktop.org/wiki/Canon_CR2
-0	string		II\x2a\0\x10\0\0\0CR	Canon CR2 raw image data
-!:mime	image/x-canon-cr2
-
-# Tag Image File Format, from Daniel Quinlan (quinlan@yggdrasil.com)
-# The second word of TIFF files is the TIFF version number, 42, which has
-# never changed.  The TIFF specification recommends testing for it.
-0	string		MM\x00\x2a	TIFF image data, big-endian
-!:mime	image/tiff
-0	string		II\x2a\x00	TIFF image data, little-endian
-!:mime	image/tiff
-
-0	string		MM\x00\x2b	Big TIFF image data, big-endian
-!:mime	image/tiff
-0	string		II\x2b\x00	Big TIFF image data, little-endian
-!:mime	image/tiff
-
-# PNG [Portable Network Graphics, or "PNG's Not GIF"] images
-# (Greg Roelofs, newt@uchicago.edu)
-# (Albert Cahalan, acahalan@cs.uml.edu)
-#
-# 137 P N G \r \n ^Z \n [4-byte length] H E A D [HEAD data] [HEAD crc] ...
-#
-0	string		\x89PNG\x0d\x0a\x1a\x0a		PNG image data
-!:mime	image/png
-
-# possible GIF replacements; none yet released!
-# (Greg Roelofs, newt@uchicago.edu)
-#
-# GRR 950115:  this was mine ("Zip GIF"):
-0	string		GIF94z		ZIF image (GIF+deflate alpha)
-!:mime	image/x-unknown
-#
-# GRR 950115:  this is Jeremy Wohl's Free Graphics Format (better):
-#					
-0	string		FGF95a		FGF image (GIF+deflate beta)
-!:mime	image/x-unknown
-#
-# GRR 950115:  this is Thomas Boutell's Portable Bitmap Format proposal
-# (best; not yet implemented):
-#					
-0	string		PBF		PBF image (deflate compression)
-!:mime	image/x-unknown
-
-# GIF
-0	string		GIF8		GIF image data
-!:mime	image/gif
-!:apple	8BIMGIFf
-
-# From: Joerg Jenderek <joerg.jen.der.ek@gmx.net>
-# most files with the extension .EPA and some with .BMP
-0	string		\x11\x06	Award BIOS Logo, 136 x 84
-!:mime	image/x-award-bioslogo
-0	string		\x11\x09	Award BIOS Logo, 136 x 126
-!:mime	image/x-award-bioslogo
-#0	string		\x07\x1f	BIOS Logo corrupted?
-# http://www.blackfiveservices.co.uk/awbmtools.shtml
-# http://biosgfx.narod.ru/v3/
-# http://biosgfx.narod.ru/abr-2/
-0	string		AWBM		
->4	leshort		<1981		Award BIOS bitmap
-!:mime	image/x-award-bmp
-
-# PC bitmaps (OS/2, Windows BMP files)  (Greg Roelofs, newt@uchicago.edu)
-0	string		BM
->14	leshort		12		PC bitmap, OS/2 1.x format
-!:mime	image/x-ms-bmp
->14	leshort		64		PC bitmap, OS/2 2.x format
-!:mime	image/x-ms-bmp
->14	leshort		40		PC bitmap, Windows 3.x format
-!:mime	image/x-ms-bmp
->14	leshort		128		PC bitmap, Windows NT/2000 format
-!:mime	image/x-ms-bmp
-
-# XPM icons (Greg Roelofs, newt@uchicago.edu)
-0	search/1	/*\ XPM\ */	X pixmap image text
-!:mime	image/x-xpmi
-
-# DICOM medical imaging data
-128	string	DICM			DICOM medical imaging data
-!:mime	application/dicom
-
-# XWD - X Window Dump file.
-#   As described in /usr/X11R6/include/X11/XWDFile.h
-#   used by the xwd program.
-#   Bradford Castalia, idaeim, 1/01
-#   updated by Adam Buchbinder, 2/09
-# The following assumes version 7 of the format; the first long is the length
-# of the header, which is at least 25 4-byte longs, and the one at offset 8
-# is a constant which is always either 1 or 2. Offset 12 is the pixmap depth,
-# which is a maximum of 32.
-0	belong	>100
->8	belong	<3
->>12	belong	<33
->>>4	belong	7			XWD X Window Dump image data
-!:mime	image/x-xwindowdump
-
-# PCX image files
-# From: Dan Fandrich <dan@coneharvesters.com>
-# updated by Joerg Jenderek at Feb 2013 by http://de.wikipedia.org/wiki/PCX
-# http://web.archive.org/web/20100206055706/http://www.qzx.com/pc-gpe/pcx.txt
-# GRR: original test was still too general as it catches xbase examples T5.DBT,T6.DBT with 0xa000000
-# test for bytes 0x0a,version byte (0,2,3,4,5),compression byte flag(0,1), bit depth (>0) of PCX or T5.DBT,T6.DBT
-0	ubelong&0xffF8fe00	0x0a000000	
-# for PCX bit depth > 0 
->3	ubyte		>0	
-# test for valid versions
->>1	ubyte		<6	
->>>1	ubyte		!1	PCX
-!:mime	image/x-pcx
-
-# Adobe Photoshop
-# From: Asbjoern Sloth Toennesen <asbjorn@lila.io>
-0	string		8BPS Adobe Photoshop Image
-!:mime	image/vnd.adobe.photoshop
-
-# Summary: DjVu image / document
-# Extension: .djvu
-# Reference: http://djvu.org/docs/DjVu3Spec.djvu
-# Submitted by: Stephane Loeuillet <stephane.loeuillet@tiscali.fr>
-# Modified by (1): Abel Cheung <abelcheung@gmail.com>
-0	string	AT&TFORM
->12	string	DJVM		DjVu multiple page document
-!:mime	image/vnd.djvu
->12	string	DJVU		DjVu image or single page document
-!:mime	image/vnd.djvu
->12	string	DJVI		DjVu shared document
-!:mime	image/vnd.djvu
->12	string	THUM		DjVu page thumbnails
-!:mime	image/vnd.djvu
-
-# Originally by Marc Espie
-# Modified by Robert Minsk <robertminsk at yahoo.com>
-# http://www.openexr.com/openexrfilelayout.pdf
-0	lelong		20000630	OpenEXR image data,
-!:mime image/x-exr
-
-# SMPTE Digital Picture Exchange Format, SMPTE DPX
-#
-# ANSI/SMPTE 268M-1994, SMPTE Standard for File Format for Digital
-# Moving-Picture Exchange (DPX), v1.0, 18 February 1994
-# Robert Minsk <robertminsk at yahoo.com>
-0	string		SDPX	DPX image data, big-endian,
-!:mime image/x-dpx
-
-#-----------------------------------------------------------------------
-# Hierarchical Data Format, used to facilitate scientific data exchange
-# specifications at http://hdf.ncsa.uiuc.edu/
-0	belong	0x0e031301	Hierarchical Data Format (version 4) data
-!:mime	application/x-hdf
-0	string	\211HDF\r\n\032\n	Hierarchical Data Format (version 5) data
-!:mime	application/x-hdf
-
-# http://www.cartesianinc.com/Tech/
-0	string	CPC\262		Cartesian Perceptual Compression image
-!:mime	image/x-cpi
-
-
-# Polar Monitor Bitmap (.pmb) used as logo for Polar Electro watches
-# From: Markus Heidelberg <markus.heidelberg at web.de>
-0	string/t	[BitmapInfo2]	Polar Monitor Bitmap text
-!:mime	image/x-polar-monitor-bitmap
-
-# Type:	Olympus ORF raw images.
-# URL:	http://libopenraw.freedesktop.org/wiki/Olympus_ORF
-# From:	Adam Buchbinder <adam.buchbinder@gmail.com>
-0	string		MMOR		Olympus ORF raw image data, big-endian
-!:mime	image/x-olympus-orf
-0	string		IIRO		Olympus ORF raw image data, little-endian
-!:mime	image/x-olympus-orf
-0	string		IIRS		Olympus ORF raw image data, little-endian
-!:mime	image/x-olympus-orf
-
-# Type: Foveon X3F
-# URL:  http://www.photofo.com/downloads/x3f-raw-format.pdf
-# From: Adam Buchbinder <adam.buchbinder@gmail.com>
-# Note that the MIME type isn't defined anywhere that I can find; if
-# there's a canonical type for this format, it should replace this one.
-0	string	FOVb	Foveon X3F raw image data
-!:mime	image/x-x3f
-
-# Paint.NET file
-# From Adam Buchbinder <adam.buchbinder@gmail.com>
-0	string	PDN3	Paint.NET image data
-!:mime	image/x-paintnet
diff --git a/magic/java b/magic/java
deleted file mode 100644
index 481ffec160..0000000000
--- a/magic/java
+++ /dev/null
@@ -1,16 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------
-# $File: java,v 1.13 2011/12/08 12:12:46 rrt Exp $
-# Java ByteCode and Mach-O binaries (e.g., Mac OS X) use the
-# same magic number, 0xcafebabe, so they are both handled
-# in the entry called "cafebabe".
-#------------------------------------------------------------
-
-0	belong		0xfeedfeed	Java KeyStore
-!:mime	application/x-java-keystore
-0	belong		0xcececece	Java JCE KeyStore
-!:mime	application/x-java-jce-keystore
-
-# Java source
-0	regex	^import.*;$	Java source
-!:mime	text/x-java
diff --git a/magic/javascript b/magic/javascript
deleted file mode 100644
index a1311d0e71..0000000000
--- a/magic/javascript
+++ /dev/null
@@ -1,17 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: $
-# javascript:  magic for javascript and node.js scripts.
-#
-0	search/1/w	#!/bin/node		Node.js script text executable
-!:mime application/javascript
-0	search/1/w	#!/usr/bin/node		Node.js script text executable
-!:mime application/javascript
-0	search/1/w	#!/bin/nodejs		Node.js script text executable
-!:mime application/javascript
-0	search/1/w	#!/usr/bin/nodejs	Node.js script text executable
-!:mime application/javascript
-0	search/1	#!/usr/bin/env\ node	Node.js script text executable
-!:mime application/javascript
-0	search/1	#!/usr/bin/env\ nodejs	Node.js script text executable
-!:mime application/javascript
diff --git a/magic/jpeg b/magic/jpeg
deleted file mode 100644
index 55fedae4b4..0000000000
--- a/magic/jpeg
+++ /dev/null
@@ -1,31 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: jpeg,v 1.18 2012/08/01 12:12:36 christos Exp $
-# JPEG images
-# SunOS 5.5.1 had
-#
-#	0	string		\377\330\377\340	JPEG file
-#	0	string		\377\330\377\356	JPG file
-#
-# both of which turn into "JPEG image data" here.
-#
-0	beshort		0xffd8		JPEG image data
-!:mime	image/jpeg
-!:apple	8BIMJPEG
-!:strength +2
-
-# From: David Santinoli <david@santinoli.com>
-0	string		\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A	JPEG 2000
-# From: Johan van der Knijff <johan.vanderknijff@kb.nl>
-# Added sub-entries for JP2, JPX, JPM and MJ2 formats; added mimetypes
-# https://github.com/bitsgalore/jp2kMagic
-#
-# Now read value of 'Brand' field, which yields a few possibilities:
->20	string		\x6a\x70\x32\x20	Part 1 (JP2)
-!:mime	image/jp2
->20	string		\x6a\x70\x78\x20	Part 2 (JPX)
-!:mime	image/jpx
->20	string		\x6a\x70\x6d\x20	Part 6 (JPM)
-!:mime	image/jpm
->20	string		\x6d\x6a\x70\x32	Part 3 (MJ2)
-!:mime	video/mj2
diff --git a/magic/kde b/magic/kde
deleted file mode 100644
index 2b66ee611d..0000000000
--- a/magic/kde
+++ /dev/null
@@ -1,11 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: kde,v 1.4 2009/09/19 16:28:10 christos Exp $
-# kde:  file(1) magic for KDE
-
-0		string/t	[KDE\ Desktop\ Entry]	KDE desktop entry
-!:mime	application/x-kdelnk
-0		string/t	#\ KDE\ Config\ File	KDE config file
-!:mime	application/x-kdelnk
-0		string/t	#\ xmcd	xmcd database file for kscd
-!:mime	text/x-xmcd
diff --git a/magic/kml b/magic/kml
deleted file mode 100644
index 608ff0e1b0..0000000000
--- a/magic/kml
+++ /dev/null
@@ -1,30 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: kml,v 1.2 2009/09/19 16:28:10 christos Exp $
-# Type: Google KML, formerly Keyhole Markup Language
-# Future development of this format has been handed
-# over to the Open Geospatial Consortium.
-# http://www.opengeospatial.org/standards/kml/
-# From: Asbjoern Sloth Toennesen <asbjorn@lila.io>
-0 string/t    \<?xml
->20  search/400 \ xmlns= 
->>&0 regex ['"]http://earth.google.com/kml Google KML document
-!:mime application/vnd.google-earth.kml+xml
-
-#------------------------------------------------------------------------------
-# Type: OpenGIS KML, formerly Keyhole Markup Language
-# This standard is maintained by the
-# Open Geospatial Consortium.
-# http://www.opengeospatial.org/standards/kml/
-# From: Asbjoern Sloth Toennesen <asbjorn@lila.io>
->>&0 regex ['"]http://www.opengis.net/kml OpenGIS KML document
-!:mime application/vnd.google-earth.kml+xml
-
-#------------------------------------------------------------------------------
-# Type: Google KML Archive (ZIP based) 
-# http://code.google.com/apis/kml/documentation/kml_tut.html
-# From: Asbjoern Sloth Toennesen <asbjorn@lila.io>
-0 string    PK\003\004
->4  byte    0x14
->>30  string doc.kml Compressed Google KML Document, including resources.
-!:mime application/vnd.google-earth.kmz
diff --git a/magic/linux b/magic/linux
deleted file mode 100644
index 4a5c935760..0000000000
--- a/magic/linux
+++ /dev/null
@@ -1,22 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: linux,v 1.46 2013/01/06 21:26:48 christos Exp $
-# linux:  file(1) magic for Linux files
-#
-# Values for Linux/i386 binaries, from Daniel Quinlan <quinlan@yggdrasil.com>
-# The following basic Linux magic is useful for reference, but using
-# "long" magic is a better practice in order to avoid collisions.
-#
-# 2	leshort		100		Linux/i386
-# >0	leshort		0407		impure executable (OMAGIC)
-# >0	leshort		0410		pure executable (NMAGIC)
-# >0	leshort		0413		demand-paged executable (ZMAGIC)
-# >0	leshort		0314		demand-paged executable (QMAGIC)
-#
-
-# SYSLINUX boot logo files (from 'ppmtolss16' sources)
-# http://www.syslinux.org/wiki/index.php/SYSLINUX#Display_graphic_from_filename:
-# file extension .lss .16
-0	lelong	=0x1413f33d		SYSLINUX' LSS16 image data
-# syslinux-4.05/mime/image/x-lss16.xml
-!:mime image/x-lss16
diff --git a/magic/lisp b/magic/lisp
deleted file mode 100644
index f5a06c8964..0000000000
--- a/magic/lisp
+++ /dev/null
@@ -1,42 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File$
-# lisp:  file(1) magic for lisp programs
-#
-# various lisp types, from Daniel Quinlan (quinlan@yggdrasil.com)
-
-# updated by Joerg Jenderek
-# GRR: This lot is too weak
-#0	string	;;			
-# windows INF files often begin with semicolon and use CRLF as line end
-# lisp files are mainly created on unix system with LF as line end
-#>2	search/4096	!\r		Lisp/Scheme program text
-#>2	search/4096	\r		Windows INF file
-
-0	search/4096	(setq\ 			Lisp/Scheme program text
-!:mime	text/x-lisp
-0	search/4096	(defvar\ 		Lisp/Scheme program text
-!:mime	text/x-lisp
-0	search/4096	(defparam\ 		Lisp/Scheme program text
-!:mime	text/x-lisp
-0	search/4096	(defun\  		Lisp/Scheme program text
-!:mime	text/x-lisp
-0	search/4096	(autoload\ 		Lisp/Scheme program text
-!:mime	text/x-lisp
-0	search/4096	(custom-set-variables\ 	Lisp/Scheme program text
-!:mime	text/x-lisp
-
-# Emacs 18 - this is always correct, but not very magical.
-0	string	\012(			Emacs v18 byte-compiled Lisp data
-!:mime	application/x-elc
-# Emacs 19+ - ver. recognition added by Ian Springer
-# Also applies to XEmacs 19+ .elc files; could tell them apart with regexs
-# - Chris Chittleborough <cchittleborough@yahoo.com.au>
-0	string	;ELC	
->4	byte	>18			
->4	byte    <32			Emacs/XEmacs v%d byte-compiled Lisp data
-!:mime	application/x-elc		
-
-# From: David Allouche <david@allouche.net>
-0	search/1	\<TeXmacs|	TeXmacs document text
-!:mime	text/texmacs
diff --git a/magic/lua b/magic/lua
deleted file mode 100644
index 657fcd0b6f..0000000000
--- a/magic/lua
+++ /dev/null
@@ -1,17 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: lua,v 1.5 2009/09/19 16:28:10 christos Exp $
-# lua:  file(1) magic for Lua scripting language
-# URL:  http://www.lua.org/
-# From: Reuben Thomas <rrt@sc3d.org>, Seo Sanghyeon <tinuviel@sparcs.kaist.ac.kr>
-
-# Lua scripts
-0	search/1/w	#!\ /usr/bin/lua	Lua script text executable
-!:mime	text/x-lua
-0	search/1/w	#!\ /usr/local/bin/lua	Lua script text executable
-!:mime	text/x-lua
-0	search/1	#!/usr/bin/env\ lua	Lua script text executable
-!:mime	text/x-lua
-0	search/1	#!\ /usr/bin/env\ lua	Lua script text executable
-!:mime	text/x-lua
-
diff --git a/magic/m4 b/magic/m4
deleted file mode 100644
index 7262fca81b..0000000000
--- a/magic/m4
+++ /dev/null
@@ -1,7 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File$
-# make:  file(1) magic for M4 scripts
-#
-0	regex	\^dnl\ 		M4 macro processor script text
-!:mime	text/x-m4
diff --git a/magic/macintosh b/magic/macintosh
deleted file mode 100644
index 6398fc2ff2..0000000000
--- a/magic/macintosh
+++ /dev/null
@@ -1,21 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: macintosh,v 1.21 2010/09/20 19:19:17 rrt Exp $
-# macintosh description
-#
-# BinHex is the Macintosh ASCII-encoded file format (see also "apple")
-# Daniel Quinlan, quinlan@yggdrasil.com
-11	string	must\ be\ converted\ with\ BinHex	BinHex binary text
-!:mime	application/mac-binhex40
-
-# Stuffit archives are the de facto standard of compression for Macintosh
-# files obtained from most archives. (franklsm@tuns.ca)
-0	string		SIT!			StuffIt Archive (data)
-!:mime	application/x-stuffit
-!:apple	SIT!SIT!
-
-# Newer StuffIt archives (grant@netbsd.org)
-0	string		StuffIt			StuffIt Archive
-!:mime	application/x-stuffit
-!:apple	SIT!SIT!
-#>162	string		>0			: %s
diff --git a/magic/mail.news b/magic/mail.news
deleted file mode 100644
index c1a446d4ca..0000000000
--- a/magic/mail.news
+++ /dev/null
@@ -1,35 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: mail.news,v 1.21 2012/06/21 01:44:52 christos Exp $
-# mail.news:  file(1) magic for mail and news
-#
-# Unfortunately, saved netnews also has From line added in some news software.
-#0	string		From 		mail text
-0	string/t		Relay-Version: 	old news text
-!:mime	message/rfc822
-0	string/t		#!\ rnews	batched news text
-!:mime	message/rfc822
-0	string/t		N#!\ rnews	mailed, batched news text
-!:mime	message/rfc822
-0	string/t		Forward\ to 	mail forwarding text
-!:mime	message/rfc822
-0	string/t		Pipe\ to 	mail piping text
-!:mime	message/rfc822
-0	string/tc		delivered-to:	SMTP mail text
-!:mime	message/rfc822
-0	string/tc		return-path:	SMTP mail text
-!:mime	message/rfc822
-0	string/t		Path:		news text
-!:mime	message/news
-0	string/t		Xref:		news text
-!:mime	message/news
-0	string/t		From:		news or mail text
-!:mime	message/rfc822
-0	string/t		Article 	saved news text
-!:mime	message/news
-0	string/t		Received:	RFC 822 mail text
-!:mime	message/rfc822
-
-# TNEF files...
-0	lelong		0x223E9F78	Transport Neutral Encapsulation Format
-!:mime	application/vnd.ms-tnef
diff --git a/magic/make b/magic/make
deleted file mode 100644
index 83d6a012dd..0000000000
--- a/magic/make
+++ /dev/null
@@ -1,16 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File$
-# make:  file(1) magic for makefiles
-#
-0	regex	\^CFLAGS	makefile script text
-!:mime	text/x-makefile
-0	regex	\^LDFLAGS	makefile script text
-!:mime	text/x-makefile
-0	regex	\^all:	makefile script text
-!:mime	text/x-makefile
-0	regex	\^.PRECIOUS	makefile script text
-!:mime	text/x-makefile
-
-0	regex	\^SUBDIRS	automake makefile script text
-!:mime	text/x-makefile
diff --git a/magic/marc21 b/magic/marc21
deleted file mode 100644
index 26899d2e70..0000000000
--- a/magic/marc21
+++ /dev/null
@@ -1,29 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#--------------------------------------------
-# marc21: file(1) magic for MARC 21 Format
-#
-# Kevin Ford (kefo@loc.gov)
-# 
-# MARC21 formats are for the representation and communication
-# of bibliographic and related information in machine-readable
-# form.  For more info, see http://www.loc.gov/marc/
-
-
-# leader position 20-21 must be 45
-20	string	45	
-
-# leader starts with 5 digits, followed by codes specific to MARC format
->0	regex/1	(^[0-9]{5})[acdnp][^bhlnqsu-z]	MARC21 Bibliographic
-!:mime	application/marc
->0	regex/1	(^[0-9]{5})[acdnosx][z]	MARC21 Authority
-!:mime	application/marc
->0	regex/1	(^[0-9]{5})[cdn][uvxy]	MARC21 Holdings
-!:mime	application/marc
-0	regex/1	(^[0-9]{5})[acdn][w]	MARC21 Classification
-!:mime	application/marc
->0	regex/1	(^[0-9]{5})[cdn][q]	MARC21 Community
-!:mime	application/marc
-
-# leader position 22-23, should be "00" but is it?
->0	regex/1	(^.{21})([^0]{2})	(non-conforming)
-!:mime	application/marc
diff --git a/magic/matroska b/magic/matroska
deleted file mode 100644
index c1791413cb..0000000000
--- a/magic/matroska
+++ /dev/null
@@ -1,17 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: matroska,v 1.7 2012/08/26 10:06:15 christos Exp $
-# matroska:  file(1) magic for Matroska files
-#
-# See http://www.matroska.org/
-#
-
-# EBML id:
-0		belong		0x1a45dfa3
-# DocType id:
->4		search/4096 	\x42\x82
-# DocType contents:
->>&1		string		webm		WebM
-!:mime  video/webm
->>&1		string		matroska	Matroska data
-!:mime  video/x-matroska
diff --git a/magic/misctools b/magic/misctools
deleted file mode 100644
index 35fddaa61a..0000000000
--- a/magic/misctools
+++ /dev/null
@@ -1,9 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#-----------------------------------------------------------------------------
-# $File: misctools,v 1.12 2010/09/29 18:36:49 rrt Exp $
-# misctools:  file(1) magic for miscellaneous UNIX tools.
-#
-0	string/c	BEGIN:VCALENDAR		vCalendar calendar file
-!:mime	text/calendar
-0	string/c	BEGIN:VCARD		vCard visiting card
-!:mime	text/x-vcard
diff --git a/magic/msdos b/magic/msdos
deleted file mode 100644
index cc411aeeb7..0000000000
--- a/magic/msdos
+++ /dev/null
@@ -1,369 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: msdos,v 1.84 2013/02/05 13:55:22 christos Exp $
-# msdos:  file(1) magic for MS-DOS files
-#
-
-# .BAT files (Daniel Quinlan, quinlan@yggdrasil.com)
-# updated by Joerg Jenderek at Oct 2008,Apr 2011
-0	string/t	@			
->1	string/cW	\ echo\ off	DOS batch file text
-!:mime	text/x-msdos-batch
->1	string/cW	echo\ off	DOS batch file text
-!:mime	text/x-msdos-batch
->1	string/cW	rem		DOS batch file text
-!:mime	text/x-msdos-batch
->1	string/cW	set\ 		DOS batch file text
-!:mime	text/x-msdos-batch
-
-# Tests for various EXE types.
-#
-# Many of the compressed formats were extraced from IDARC 1.23 source code.
-#
-0	string/b	MZ DOS MZ
-!:mime	application/x-dosexec
-# All non-DOS EXE extensions have the relocation table more than 0x40 bytes into the file.
->0x18	leshort <0x40 MS-DOS executable
-# These traditional tests usually work but not always.  When test quality support is
-# implemented these can be turned on.
-#>>0x18	leshort	0x1c	(Borland compiler)
-#>>0x18	leshort	0x1e	(MS compiler)
-
-# If the relocation table is 0x40 or more bytes into the file, it's definitely
-# not a DOS EXE.
->0x18  leshort >0x3f
-
-# Maybe it's a PE?
->>(0x3c.l) string PE\0\0 PE
->>>(0x3c.l+24)	leshort		0x010b	\b32 executable
->>>(0x3c.l+24)	leshort		0x020b	\b32+ executable
->>>(0x3c.l+24)	leshort		0x0107	ROM image
->>>(0x3c.l+24)	default		x	Unknown PE signature
->>>>&0 		leshort		x	0x%x
->>>(0x3c.l+22)	leshort&0x2000	>0	(DLL)
->>>(0x3c.l+92)	leshort		1	(native)
->>>(0x3c.l+92)	leshort		2	(GUI)
->>>(0x3c.l+92)	leshort		3	(console)
->>>(0x3c.l+92)	leshort		7	(POSIX)
->>>(0x3c.l+92)	leshort		9	(Windows CE)
->>>(0x3c.l+92)	leshort		10	(EFI application)
->>>(0x3c.l+92)	leshort		11	(EFI boot service driver)
->>>(0x3c.l+92)	leshort		12	(EFI runtime driver)
->>>(0x3c.l+92)	leshort		13	(EFI ROM)
->>>(0x3c.l+92)	leshort		14	(XBOX)
->>>(0x3c.l+92)	leshort		15	(Windows boot application)
->>>(0x3c.l+92)	default		x	(Unknown subsystem
->>>>&0		leshort		x	0x%x)
->>>(0x3c.l+4)	leshort		0x14c	Intel 80386
->>>(0x3c.l+4)	leshort		0x166	MIPS R4000
->>>(0x3c.l+4)	leshort		0x168	MIPS R10000
->>>(0x3c.l+4)	leshort		0x184	Alpha
->>>(0x3c.l+4)	leshort		0x1a2	Hitachi SH3
->>>(0x3c.l+4)	leshort		0x1a6	Hitachi SH4
->>>(0x3c.l+4)	leshort		0x1c0	ARM
->>>(0x3c.l+4)	leshort		0x1c2	ARM Thumb
->>>(0x3c.l+4)	leshort		0x1c4	ARMv7 Thumb
->>>(0x3c.l+4)	leshort		0x1f0	PowerPC
->>>(0x3c.l+4)	leshort		0x200	Intel Itanium
->>>(0x3c.l+4)	leshort		0x266	MIPS16
->>>(0x3c.l+4)	leshort		0x268	Motorola 68000
->>>(0x3c.l+4)	leshort		0x290	PA-RISC
->>>(0x3c.l+4)	leshort		0x366	MIPSIV
->>>(0x3c.l+4)	leshort		0x466	MIPS16 with FPU
->>>(0x3c.l+4)	leshort		0xebc	EFI byte code
->>>(0x3c.l+4)	leshort		0x8664	x86-64
->>>(0x3c.l+4)	leshort		0xc0ee	MSIL
->>>(0x3c.l+4)	default		x	Unknown processor type
->>>>&0		leshort		x	0x%x
->>>(0x3c.l+22)	leshort&0x0200	>0	(stripped to external PDB)
->>>(0x3c.l+22)	leshort&0x1000	>0	system file
->>>(0x3c.l+24)	leshort		0x010b
->>>>(0x3c.l+232) lelong	>0	Mono/.Net assembly
->>>(0x3c.l+24)	leshort		0x020b
->>>>(0x3c.l+248) lelong	>0	Mono/.Net assembly
-
-# hooray, there's a DOS extender using the PE format, with a valid PE
-# executable inside (which just prints a message and exits if run in win)
->>>(8.s*16)		string		32STUB	\b, 32rtm DOS extender
->>>(8.s*16)		string		!32STUB	\b, for MS Windows
->>>(0x3c.l+0xf8)	string		UPX0 \b, UPX compressed
->>>(0x3c.l+0xf8)	search/0x140	PEC2 \b, PECompact2 compressed
->>>(0x3c.l+0xf8)	search/0x140	UPX2
->>>>(&0x10.l+(-4))	string		PK\3\4 \b, ZIP self-extracting archive (Info-Zip)
->>>(0x3c.l+0xf8)	search/0x140	.idata
->>>>(&0xe.l+(-4))	string		PK\3\4 \b, ZIP self-extracting archive (Info-Zip)
->>>>(&0xe.l+(-4))	string		ZZ0 \b, ZZip self-extracting archive
->>>>(&0xe.l+(-4))	string		ZZ1 \b, ZZip self-extracting archive
->>>(0x3c.l+0xf8)	search/0x140	.rsrc
->>>>(&0x0f.l+(-4))	string		a\\\4\5 \b, WinHKI self-extracting archive
->>>>(&0x0f.l+(-4))	string		Rar! \b, RAR self-extracting archive
->>>>(&0x0f.l+(-4))	search/0x3000	MSCF \b, InstallShield self-extracting archive
->>>>(&0x0f.l+(-4))	search/32	Nullsoft \b, Nullsoft Installer self-extracting archive
->>>(0x3c.l+0xf8)	search/0x140	.data
->>>>(&0x0f.l)		string		WEXTRACT \b, MS CAB-Installer self-extracting archive
->>>(0x3c.l+0xf8)	search/0x140	.petite\0 \b, Petite compressed
->>>>(0x3c.l+0xf7)	byte		x
->>>>>(&0x104.l+(-4))	string		=!sfx! \b, ACE self-extracting archive
->>>(0x3c.l+0xf8)	search/0x140	.WISE \b, WISE installer self-extracting archive
->>>(0x3c.l+0xf8)	search/0x140	.dz\0\0\0 \b, Dzip self-extracting archive
->>>&(0x3c.l+0xf8)	search/0x100	_winzip_ \b, ZIP self-extracting archive (WinZip)
->>>&(0x3c.l+0xf8)	search/0x100	SharedD \b, Microsoft Installer self-extracting archive
->>>0x30			string		Inno \b, InnoSetup self-extracting archive
-
-# Hmm, not a PE but the relocation table is too high for a traditional DOS exe,
-# must be one of the unusual subformats.
->>(0x3c.l) string !PE\0\0 MS-DOS executable
-
->>(0x3c.l)		string		NE \b, NE
->>>(0x3c.l+0x36)	byte		1 for OS/2 1.x
->>>(0x3c.l+0x36)	byte		2 for MS Windows 3.x
->>>(0x3c.l+0x36)	byte		3 for MS-DOS
->>>(0x3c.l+0x36)	byte		4 for Windows 386
->>>(0x3c.l+0x36)	byte		5 for Borland Operating System Services
->>>(0x3c.l+0x36)	default		x
->>>>(0x3c.l+0x36)	byte		x (unknown OS %x)
->>>(0x3c.l+0x36)	byte		0x81 for MS-DOS, Phar Lap DOS extender
->>>(0x3c.l+0x0c)	leshort&0x8003	0x8002 (DLL)
->>>(0x3c.l+0x0c)	leshort&0x8003	0x8001 (driver)
->>>&(&0x24.s-1)		string		ARJSFX \b, ARJ self-extracting archive
->>>(0x3c.l+0x70)	search/0x80	WinZip(R)\ Self-Extractor \b, ZIP self-extracting archive (WinZip)
-
->>(0x3c.l)		string		LX\0\0 \b, LX
->>>(0x3c.l+0x0a)	leshort		<1 (unknown OS)
->>>(0x3c.l+0x0a)	leshort		1 for OS/2
->>>(0x3c.l+0x0a)	leshort		2 for MS Windows
->>>(0x3c.l+0x0a)	leshort		3 for DOS
->>>(0x3c.l+0x0a)	leshort		>3 (unknown OS)
->>>(0x3c.l+0x10)	lelong&0x28000	=0x8000 (DLL)
->>>(0x3c.l+0x10)	lelong&0x20000	>0 (device driver)
->>>(0x3c.l+0x10)	lelong&0x300	0x300 (GUI)
->>>(0x3c.l+0x10)	lelong&0x28300	<0x300 (console)
->>>(0x3c.l+0x08)	leshort		1 i80286
->>>(0x3c.l+0x08)	leshort		2 i80386
->>>(0x3c.l+0x08)	leshort		3 i80486
->>>(8.s*16)		string		emx \b, emx
->>>>&1			string		x %s
->>>&(&0x54.l-3)		string		arjsfx \b, ARJ self-extracting archive
-
-# MS Windows system file, supposedly a collection of LE executables
->>(0x3c.l)		string		W3 \b, W3 for MS Windows
-
->>(0x3c.l)		string		LE\0\0 \b, LE executable
->>>(0x3c.l+0x0a)	leshort		1
-# some DOS extenders use LE files with OS/2 header
->>>>0x240		search/0x100	DOS/4G for MS-DOS, DOS4GW DOS extender
->>>>0x240		search/0x200	WATCOM\ C/C++ for MS-DOS, DOS4GW DOS extender
->>>>0x440		search/0x100	CauseWay\ DOS\ Extender for MS-DOS, CauseWay DOS extender
->>>>0x40		search/0x40	PMODE/W for MS-DOS, PMODE/W DOS extender
->>>>0x40		search/0x40	STUB/32A for MS-DOS, DOS/32A DOS extender (stub)
->>>>0x40		search/0x80	STUB/32C for MS-DOS, DOS/32A DOS extender (configurable stub)
->>>>0x40		search/0x80	DOS/32A for MS-DOS, DOS/32A DOS extender (embedded)
-# this is a wild guess; hopefully it is a specific signature
->>>>&0x24		lelong		<0x50
->>>>>(&0x4c.l)		string		\xfc\xb8WATCOM
->>>>>>&0		search/8	3\xdbf\xb9 \b, 32Lite compressed
-# another wild guess: if real OS/2 LE executables exist, they probably have higher start EIP
-#>>>>(0x3c.l+0x1c)	lelong		>0x10000 for OS/2
-# fails with DOS-Extenders.
->>>(0x3c.l+0x0a)	leshort		2 for MS Windows
->>>(0x3c.l+0x0a)	leshort		3 for DOS
->>>(0x3c.l+0x0a)	leshort		4 for MS Windows (VxD)
->>>(&0x7c.l+0x26)	string		UPX \b, UPX compressed
->>>&(&0x54.l-3)		string		UNACE \b, ACE self-extracting archive
-
-# looks like ASCII, probably some embedded copyright message.
-# and definitely not NE/LE/LX/PE
->>0x3c		lelong	>0x20000000
->>>(4.s*512)	leshort !0x014c \b, MZ for MS-DOS
-# header data too small for extended executable
->2		long	!0
->>0x18		leshort <0x40
->>>(4.s*512)	leshort !0x014c
-
->>>>&(2.s-514)	string	!LE
->>>>>&-2	string	!BW \b, MZ for MS-DOS
->>>>&(2.s-514)	string	LE \b, LE
->>>>>0x240	search/0x100	DOS/4G for MS-DOS, DOS4GW DOS extender
-# educated guess since indirection is still not capable enough for complex offset
-# calculations (next embedded executable would be at &(&2*512+&0-2)
-# I suspect there are only LE executables in these multi-exe files
->>>>&(2.s-514)	string	BW
->>>>>0x240	search/0x100	DOS/4G ,\b LE for MS-DOS, DOS4GW DOS extender (embedded)
->>>>>0x240	search/0x100	!DOS/4G ,\b BW collection for MS-DOS
-
-# This sequence skips to the first COFF segment, usually .text
->(4.s*512)	leshort		0x014c \b, COFF
->>(8.s*16)	string		go32stub for MS-DOS, DJGPP go32 DOS extender
->>(8.s*16)	string		emx
->>>&1		string		x for DOS, Win or OS/2, emx %s
->>&(&0x42.l-3)	byte		x 
->>>&0x26	string		UPX \b, UPX compressed
-# and yet another guess: small .text, and after large .data is unusal, could be 32lite
->>&0x2c		search/0xa0	.text
->>>&0x0b	lelong		<0x2000
->>>>&0		lelong		>0x6000 \b, 32lite compressed
-
->(8.s*16) string $WdX \b, WDos/X DOS extender
-
-# By now an executable type should have been printed out.  The executable
-# may be a self-uncompressing archive, so look for evidence of that and 
-# print it out.  
-#
-# Some signatures below from Greg Roelofs, newt@uchicago.edu.
-#
->0x35	string	\x8e\xc0\xb9\x08\x00\xf3\xa5\x4a\x75\xeb\x8e\xc3\x8e\xd8\x33\xff\xbe\x30\x00\x05 \b, aPack compressed
->0xe7	string	LH/2\ 	Self-Extract \b, %s
->0x1c	string	UC2X	\b, UCEXE compressed
->0x1c	string	WWP\ 	\b, WWPACK compressed
->0x1c	string	RJSX 	\b, ARJ self-extracting archive
->0x1c	string	diet 	\b, diet compressed
->0x1c	string	LZ09 	\b, LZEXE v0.90 compressed
->0x1c	string	LZ91 	\b, LZEXE v0.91 compressed
->0x1c	string	tz 	\b, TinyProg compressed
->0x1e	string	Copyright\ 1989-1990\ PKWARE\ Inc.	Self-extracting PKZIP archive
-!:mime	application/zip
-# Yes, this really is "Copr", not "Corp."
->0x1e	string	PKLITE\ Copr.	Self-extracting PKZIP archive
-!:mime	application/zip
-# winarj stores a message in the stub instead of the sig in the MZ header
->0x20	search/0xe0	aRJsfX \b, ARJ self-extracting archive
->0x20	string AIN
->>0x23	string 2	\b, AIN 2.x compressed
->>0x23	string <2	\b, AIN 1.x compressed
->>0x23	string >2	\b, AIN 1.x compressed
->0x24	string	LHa's\ SFX \b, LHa self-extracting archive
-!:mime	application/x-lha
->0x24	string	LHA's\ SFX \b, LHa self-extracting archive
-!:mime	application/x-lha
->0x24	string	\ $ARX \b, ARX self-extracting archive
->0x24	string	\ $LHarc \b, LHarc self-extracting archive
->0x20	string	SFX\ by\ LARC \b, LARC self-extracting archive
->0x40	string aPKG \b, aPackage self-extracting archive
->0x64	string	W\ Collis\0\0 \b, Compack compressed
->0x7a	string		Windows\ self-extracting\ ZIP	\b, ZIP self-extracting archive
->>&0xf4 search/0x140 \x0\x40\x1\x0
->>>(&0.l+(4)) string MSCF \b, WinHKI CAB self-extracting archive
->1638	string	-lh5- \b, LHa self-extracting archive v2.13S
->0x17888 string Rar! \b, RAR self-extracting archive
-
-# Skip to the end of the EXE.  This will usually work fine in the PE case
-# because the MZ image is hardcoded into the toolchain and almost certainly
-# won't match any of these signatures.
->(4.s*512)	long	x 
->>&(2.s-517)	byte	x 
->>>&0	string		PK\3\4 \b, ZIP self-extracting archive
->>>&0	string		Rar! \b, RAR self-extracting archive
->>>&0	string		=!\x11 \b, AIN 2.x self-extracting archive
->>>&0	string		=!\x12 \b, AIN 2.x self-extracting archive
->>>&0	string		=!\x17 \b, AIN 1.x self-extracting archive
->>>&0	string		=!\x18 \b, AIN 1.x self-extracting archive
->>>&7	search/400	**ACE** \b, ACE self-extracting archive
->>>&0	search/0x480	UC2SFX\ Header \b, UC2 self-extracting archive
-
-# a few unknown ZIP sfxes, no idea if they are needed or if they are
-# already captured by the generic patterns above
->(8.s*16)	search/0x20	PKSFX \b, ZIP self-extracting archive (PKZIP)
-# TODO: how to add this? >FileSize-34 string Windows\ Self-Installing\ Executable \b, ZIP self-extracting archive
-#
-
-# TELVOX Teleinformatica CODEC self-extractor for OS/2:
->49801	string	\x79\xff\x80\xff\x76\xff	\b, CODEC archive v3.21
->>49824 leshort		=1			\b, 1 file
->>49824 leshort		>1			\b, %u files
-
-# Popular applications
-2080	string	Microsoft\ Word\ 6.0\ Document	%s
-!:mime	application/msword
-2080	string	Documento\ Microsoft\ Word\ 6 Spanish Microsoft Word 6 document data
-!:mime	application/msword
-# Pawel Wiecek <coven@i17linuxb.ists.pwr.wroc.pl> (for polish Word)
-2112	string	MSWordDoc			Microsoft Word document data
-!:mime	application/msword
-#
-0	belong	0x31be0000			Microsoft Word Document
-!:mime	application/msword
-#
-0	string/b	PO^Q`				Microsoft Word 6.0 Document
-!:mime	application/msword
-#
-0	string/b	\376\067\0\043			Microsoft Office Document
-!:mime	application/msword
-0	string/b	\333\245-\0\0\0			Microsoft Office Document
-!:mime	application/msword
-512	string/b	\354\245\301			Microsoft Word Document
-!:mime	application/msword
-
-#
-0	string/b	\xDB\xA5\x2D\x00		Microsoft WinWord 2.0 Document
-!:mime application/msword
-#
-2080	string	Microsoft\ Excel\ 5.0\ Worksheet	%s
-!:mime	application/vnd.ms-excel
-#
-0	string/b	\xDB\xA5\x2D\x00		Microsoft WinWord 2.0 Document
-!:mime application/msword
-
-2080	string	Foglio\ di\ lavoro\ Microsoft\ Exce	%s
-!:mime	application/vnd.ms-excel
-#
-# Pawel Wiecek <coven@i17linuxb.ists.pwr.wroc.pl> (for polish Excel)
-2114	string	Biff5		Microsoft Excel 5.0 Worksheet
-!:mime	application/vnd.ms-excel
-# Italian MS-Excel
-2121	string	Biff5		Microsoft Excel 5.0 Worksheet
-!:mime	application/vnd.ms-excel
-0	string/b	\x09\x04\x06\x00\x00\x00\x10\x00	Microsoft Excel Worksheet
-!:mime	application/vnd.ms-excel
-#
-0	belong	0x00001a00	Lotus 1-2-3
-!:mime	application/x-123
-#
-0	belong	0x00000200	Lotus 1-2-3
-!:mime	application/x-123
-0	string/b		WordPro\0	Lotus WordPro
-!:mime	application/vnd.lotus-wordpro
-0	string/b		WordPro\r\373	Lotus WordPro
-!:mime	application/vnd.lotus-wordpro
-
-# Windows icons (Ian Springer <ips@fpk.hp.com>)
-0	string/b	\000\000\001\000	MS Windows icon resource
-!:mime	image/x-icon
-
-# .PIF files added by Joerg Jenderek from http://smsoft.ru/en/pifdoc.htm
-# only for windows versions equal or greater 3.0
-0x171	string	MICROSOFT\ PIFEX\0	Windows Program Information File
-!:mime	application/x-dosexec
-
-# TNEF magic From "Joomy" <joomy@se-ed.net> 
-# Microsoft Outlook's Transport Neutral Encapsulation Format (TNEF)
-0	leshort		0x223e9f78	TNEF
-!:mime	application/vnd.ms-tnef
-
-#------------------------------------------------------------------------------
-# From Stuart Caie <kyzer@4u.net> (developer of cabextract)
-# Microsoft Cabinet files
-0	string/b	MSCF\0\0\0\0	Microsoft Cabinet archive data
-!:mime application/vnd.ms-cab-compressed
-
-# from http://filext.com by Derek M Jones <derek@knosof.co.uk>
-# False positive with PPT (also currently this string is too long)
-#0	string/b	\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x3E\x00\x03\x00\xFE\xFF\x09\x00\x06	Microsoft Installer
-0	string/b	\320\317\021\340\241\261\032\341	Microsoft Office Document
-!:mime	application/msword
-#>48	byte	0x1B					Excel Document
-#!:mime application/vnd.ms-excel
-#>546	string	bjbj			Microsoft Word Document
-#!:mime	application/msword
-#>546	string	jbjb			Microsoft Word Document
-#!:mime	application/msword
-
-0	string/b	\224\246\056		Microsoft Word Document
-!:mime	application/msword
-
-512	string	R\0o\0o\0t\0\ \0E\0n\0t\0r\0y	Microsoft Word Document
-!:mime	application/msword
-
-# MS eBook format (.lit)
-0	string/b	ITOLITLS		Microsoft Reader eBook Data
->8	lelong	x			\b, version %u
-!:mime					application/x-ms-reader
diff --git a/magic/neko b/magic/neko
deleted file mode 100644
index 50163a0861..0000000000
--- a/magic/neko
+++ /dev/null
@@ -1,12 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------
-# $File: java,v 1.12 2009/09/19 16:28:10 christos Exp $
-
-# From: Mikhail Gusarov <dottedmag@dottedmag.net>
-# NekoVM (http://nekovm.org/) bytecode
-0	string		NEKO	NekoVM bytecode
->4	lelong		x	(%d global symbols,
->8	lelong		x	%d global fields,
->12	lelong		x	%d bytecode ops)
-!:mime	application/x-nekovm-bytecode
-
diff --git a/magic/pascal b/magic/pascal
deleted file mode 100644
index 911eea3c0c..0000000000
--- a/magic/pascal
+++ /dev/null
@@ -1,11 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File$
-# pascal:  file(1) magic for Pascal source
-#
-0	search/8192	(input,		Pascal source text
-!:mime	text/x-pascal
-0	regex		\^program	Pascal source text
-!:mime	text/x-pascal
-0	regex           	\^record		Pascal source text
-!:mime	text/x-pascal
diff --git a/magic/pdf b/magic/pdf
deleted file mode 100644
index 761006ffe6..0000000000
--- a/magic/pdf
+++ /dev/null
@@ -1,8 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File$
-# pdf:  file(1) magic for Portable Document Format
-#
-
-0	string		%PDF-		PDF document
-!:mime	application/pdf
diff --git a/magic/perl b/magic/perl
deleted file mode 100644
index 12ec33b73a..0000000000
--- a/magic/perl
+++ /dev/null
@@ -1,26 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: perl,v 1.19 2012/06/20 21:16:25 christos Exp $
-# perl:  file(1) magic for Larry Wall's perl language.
-#
-# The `eval' lines recognizes an outrageously clever hack.
-# Keith Waclena <keith@cerberus.uchicago.edu>
-# Send additions to <perl5-porters@perl.org>
-0	search/1/w	#!\ /bin/perl			Perl script text executable
-!:mime	text/x-perl
-0	search/1	eval\ "exec\ /bin/perl		Perl script text
-!:mime	text/x-perl
-0	search/1/w	#!\ /usr/bin/perl		Perl script text executable
-!:mime	text/x-perl
-0	search/1	eval\ "exec\ /usr/bin/perl	Perl script text
-!:mime	text/x-perl
-0	search/1/w	#!\ /usr/local/bin/perl		Perl script text executable
-!:mime	text/x-perl
-0	search/1	eval\ "exec\ /usr/local/bin/perl	Perl script text
-!:mime	text/x-perl
-0	search/1	eval\ '(exit\ $?0)'\ &&\ eval\ 'exec	Perl script text
-!:mime	text/x-perl
-0	search/1	#!/usr/bin/env\ perl	Perl script text executable
-!:mime	text/x-perl
-0	search/1	#!\ /usr/bin/env\ perl	Perl script text executable
-!:mime	text/x-perl
diff --git a/magic/pgp b/magic/pgp
deleted file mode 100644
index 2bdfb77981..0000000000
--- a/magic/pgp
+++ /dev/null
@@ -1,27 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File$
-# pgp:  file(1) magic for Pretty Good Privacy
-# see http://lists.gnupg.org/pipermail/gnupg-devel/1999-September/016052.html
-#
-0       beshort         0x9900                  PGP key public ring
-!:mime	application/x-pgp-keyring
-0       beshort         0x9501                  PGP key security ring
-!:mime	application/x-pgp-keyring
-0       beshort         0x9500                  PGP key security ring
-!:mime	application/x-pgp-keyring
-0	beshort		0xa600			PGP encrypted data
-#!:mime	application/pgp-encrypted
-#0	string		-----BEGIN\040PGP	text/PGP armored data
-!:mime	text/PGP # encoding: armored data
-#>15	string	PUBLIC\040KEY\040BLOCK-	public key block
-#>15	string	MESSAGE-		message
-#>15	string	SIGNED\040MESSAGE-	signed message
-#>15	string	PGP\040SIGNATURE-	signature
-
-2	string	---BEGIN\ PGP\ PUBLIC\ KEY\ BLOCK-	PGP public key block
-!:mime	application/pgp-keys
-0	string	-----BEGIN\040PGP\40MESSAGE-		PGP message
-!:mime	application/pgp
-0	string	-----BEGIN\040PGP\40SIGNATURE-		PGP signature
-!:mime	application/pgp-signature
diff --git a/magic/pkgadd b/magic/pkgadd
deleted file mode 100644
index 602b4ec21d..0000000000
--- a/magic/pkgadd
+++ /dev/null
@@ -1,7 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File$
-# pkgadd:  file(1) magic for SysV R4 PKG Datastreams
-#
-0       string          #\ PaCkAgE\ DaTaStReAm  pkg Datastream (SVR4)
-!:mime	application/x-svr4-package
diff --git a/magic/printer b/magic/printer
deleted file mode 100644
index cdce275b12..0000000000
--- a/magic/printer
+++ /dev/null
@@ -1,14 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: printer,v 1.24 2011/05/08 16:34:51 christos Exp $
-# printer:  file(1) magic for printer-formatted files
-#
-
-# PostScript, updated by Daniel Quinlan (quinlan@yggdrasil.com)
-0	string		%!		PostScript document text
-!:mime	application/postscript
-!:apple	ASPSTEXT
-# Some PCs have the annoying habit of adding a ^D as a document separator
-0	string		\004%!		PostScript document text
-!:mime	application/postscript
-!:apple	ASPSTEXT
diff --git a/magic/python b/magic/python
deleted file mode 100644
index 1cd724bc59..0000000000
--- a/magic/python
+++ /dev/null
@@ -1,46 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: python,v 1.21 2012/06/21 01:12:51 christos Exp $
-# python:  file(1) magic for python
-#
-
-0	search/1/w	#!\ /usr/bin/python	Python script text executable
-!:mime text/x-python
-0	search/1/w	#!\ /usr/local/bin/python	Python script text executable
-!:mime text/x-python
-0	search/1	#!/usr/bin/env\ python	Python script text executable
-!:mime text/x-python
-0	search/1	#!\ /usr/bin/env\ python	Python script text executable
-!:mime text/x-python
-
-# from module.submodule import func1, func2
-0	regex	\^from\\s+(\\w|\\.)+\\s+import.*$	Python script text executable
-!:mime text/x-python
-
-# def __init__ (self, ...):
-0	search/4096	def\ __init__
->&0	search/64 self	Python script text executable
-!:mime text/x-python
-
-# comments
-0	search/4096	'''
->&0	regex	.*'''$	Python script text executable
-!:mime text/x-python
-
-0	search/4096	"""
->&0	regex	.*"""$	Python script text executable
-!:mime text/x-python
-
-# try:
-# except: or finally:
-# block
-0	search/4096	try:
->&0	regex	\^\\s*except.*:	Python script text executable
-!:mime text/x-python
->&0	search/4096	finally:	Python script text executable
-!:mime text/x-python
-
-# def name(args, args):
-0	regex	 \^(\ |\\t)*def\ +[a-zA-Z]+
->&0	regex	\ *\\(([a-zA-Z]|,|\ )*\\):$ Python script text executable
-!:mime text/x-python
diff --git a/magic/riff b/magic/riff
deleted file mode 100644
index 929dc9aa89..0000000000
--- a/magic/riff
+++ /dev/null
@@ -1,36 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: riff,v 1.22 2011/09/06 11:00:06 christos Exp $
-# riff:  file(1) magic for RIFF format
-# See
-#
-#	http://www.seanet.com/users/matts/riffmci/riffmci.htm
-#
-# AVI section extended by Patrik Radman <patrik+file-magic@iki.fi>
-#
-0	string		RIFF		RIFF (little-endian) data
-# Microsoft WAVE format (*.wav)
->8	string		WAVE		\b, WAVE audio
-!:mime	audio/x-wav
-# Corel Draw Picture
->8	string		CDRA		\b, Corel Draw Picture
-!:mime	image/x-coreldraw
-# AVI == Audio Video Interleave
->8	string		AVI\040		\b, AVI
-!:mime	video/x-msvideo
-
-#------------------------------------------------------------------------------
-# Sony Wave64
-# see http://www.vcs.de/fileadmin/user_upload/MBS/PDF/Whitepaper/Informations_about_Sony_Wave64.pdf
-# 128 bit RIFF-GUID { 66666972-912E-11CF-A5D6-28DB04C10000 } in little-endian 
-0	string	riff\x2E\x91\xCF\x11\xA5\xD6\x28\xDB\x04\xC1\x00\x00		Sony Wave64 RIFF data
-# 128 bit + total file size (64 bits) so 24 bytes
-# then WAVE-GUID { 65766177-ACF3-11D3-8CD1-00C04F8EDB8A }
->24	string		wave\xF3\xAC\xD3\x11\x8C\xD1\x00\xC0\x4F\x8E\xDB\x8A		\b, WAVE 64 audio
-!:mime	audio/x-w64
-
-#------------------------------------------------------------------------------
-# MBWF/RF64
-# see EBU TECH 3306 http://tech.ebu.ch/docs/tech/tech3306-2009.pdf
-0	string	RF64\xff\xff\xff\xffWAVEds64		MBWF/RF64 audio
-!:mime	audio/x-wav
diff --git a/magic/rpm b/magic/rpm
deleted file mode 100644
index 2558ebeef1..0000000000
--- a/magic/rpm
+++ /dev/null
@@ -1,12 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: rpm,v 1.11 2011/06/14 12:47:41 christos Exp $
-#
-# RPM: file(1) magic for Red Hat Packages   Erik Troan (ewt@redhat.com)
-#
-0	belong		0xedabeedb	RPM
-!:mime	application/x-rpm
-
-#delta RPM    Daniel Novotny (dnovotny@redhat.com)
-0	string		drpm		Delta RPM
-!:mime  application/x-rpm
diff --git a/magic/rtf b/magic/rtf
deleted file mode 100644
index 0719264e47..0000000000
--- a/magic/rtf
+++ /dev/null
@@ -1,9 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File$
-# rtf:	file(1) magic for Rich Text Format (RTF)
-#
-# Duncan P. Simpson, D.P.Simpson@dcs.warwick.ac.uk
-#
-0	string		{\\rtf		Rich Text Format data,
-!:mime	text/rtf
diff --git a/magic/ruby b/magic/ruby
deleted file mode 100644
index 41682a89ad..0000000000
--- a/magic/ruby
+++ /dev/null
@@ -1,28 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: ruby,v 1.4 2010/07/08 20:24:13 christos Exp $
-# ruby:  file(1) magic for Ruby scripting language
-# URL:  http://www.ruby-lang.org/
-# From: Reuben Thomas <rrt@sc3d.org>
-
-# Ruby scripts
-0	search/1/w	#!\ /usr/bin/ruby	Ruby script text executable
-!:mime text/x-ruby
-0	search/1/w	#!\ /usr/local/bin/ruby	Ruby script text executable
-!:mime text/x-ruby
-0	search/1	#!/usr/bin/env\ ruby	Ruby script text executable
-!:mime text/x-ruby
-0	search/1	#!\ /usr/bin/env\ ruby	Ruby script text executable
-!:mime text/x-ruby
-
-# What looks like ruby, but does not have a shebang
-# (modules and such)
-# From: Lubomir Rintel <lkundrak@v3.sk>
-0	regex		\^[\ \t]*require[\ \t]'[A-Za-z_/]+'
->0	regex		include\ [A-Z]|def\ [a-z]|\ do$
->>0	regex		\^[\ \t]*end([\ \t]*[;#].*)?$		Ruby script text
-!:mime	text/x-ruby
-0	regex		\^[\ \t]*(class|module)[\ \t][A-Z]
->0	regex		(modul|includ)e\ [A-Z]|def\ [a-z]
->>0	regex		\^[\ \t]*end([\ \t]*[;#].*)?$		Ruby module source text
-!:mime	text/x-ruby
diff --git a/magic/sc b/magic/sc
deleted file mode 100644
index 75333b3916..0000000000
--- a/magic/sc
+++ /dev/null
@@ -1,7 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File$
-# sc:  file(1) magic for "sc" spreadsheet
-#
-38	string		Spreadsheet	sc spreadsheet file
-!:mime	application/x-sc
diff --git a/magic/sgml b/magic/sgml
deleted file mode 100644
index 64efa2c153..0000000000
--- a/magic/sgml
+++ /dev/null
@@ -1,82 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: sgml,v 1.28 2012/04/28 21:20:26 christos Exp $
-# Type:	SVG Vectorial Graphics
-# From:	Noel Torres <tecnico@ejerciciosresueltos.com>
-0	string		\<?xml\ version="
->15	string		>\0
->>19	search/4096	\<svg			SVG Scalable Vector Graphics image
-!:mime	image/svg+xml
->>19	search/4096	\<gnc-v2		GnuCash file
-!:mime	application/x-gnucash
-
-# Sitemap file
-0	string/t		\<?xml\ version="
->15	string		>\0
->>19	search/4096	\<urlset		XML Sitemap document text
-!:mime	application/xml-sitemap
-
-# xhtml
-0	string/t		\<?xml\ version="
->15	string		>\0
->>19	search/4096/cWbt	\<!doctype\ html	XHTML document text
-!:mime	text/html
-0	string/t		\<?xml\ version='
->15	string		>\0
->>19	search/4096/cWbt	\<!doctype\ html	XHTML document text
-!:mime	text/html
-0	string/t		\<?xml\ version="
->15	string		>\0
->>19	search/4096/cWbt	\<html	broken XHTML document text
-!:mime	text/html
-
-#------------------------------------------------------------------------------
-# sgml:  file(1) magic for Standard Generalized Markup Language
-# HyperText Markup Language (HTML) is an SGML document type,
-# from Daniel Quinlan (quinlan@yggdrasil.com)
-# adapted to string extenstions by Anthon van der Neut <anthon@mnt.org)
-0	search/4096/cWt	\<!doctype\ html	HTML document text
-!:mime	text/html
-!:strength + 5
-0	search/4096/cwt	\<head			HTML document text
-!:mime	text/html
-!:strength + 5
-0	search/4096/cwt	\<title			HTML document text
-!:mime	text/html
-!:strength + 5
-0	search/4096/cwt	\<html			HTML document text
-!:mime	text/html
-!:strength + 5
-0	search/4096/cwt	\<script 		HTML document text
-!:mime	text/html
-!:strength + 5
-0	search/4096/cwt	\<style 		HTML document text
-!:mime	text/html
-!:strength + 5
-0	search/4096/cwt	\<table			HTML document text
-!:mime	text/html
-!:strength + 5
-0	search/4096/cwt	\<a\ href=		HTML document text
-!:mime	text/html
-!:strength + 5
-
-# Extensible markup language (XML), a subset of SGML
-# from Marc Prud'hommeaux (marc@apocalypse.org)
-0	search/1/cwt	\<?xml			XML document text
-!:mime	application/xml
-!:strength + 5
-0	string/t		\<?xml\ version\ "	XML
-!:mime	application/xml
-!:strength + 5
-0	string/t		\<?xml\ version="	XML
-!:mime	application/xml
-!:strength + 5
-0	string		\<?xml\ version='	XML
-!:mime	application/xml
-!:strength + 5
-0	search/1/wbt	\<?xml			XML document text
-!:mime	application/xml
-!:strength - 10
-0	search/1/wt	\<?XML			broken XML document text
-!:mime	application/xml
-!:strength - 10
diff --git a/magic/sniffer b/magic/sniffer
deleted file mode 100644
index de218ea79f..0000000000
--- a/magic/sniffer
+++ /dev/null
@@ -1,17 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# sniffer:  file(1) magic for packet capture files
-#
-# From: guy@alum.mit.edu (Guy Harris)
-#
-
-#
-# "libpcap" capture files.
-# (We call them "tcpdump capture file(s)" for now, as "tcpdump" is
-# the main program that uses that format, but there are other programs
-# that use "libpcap", or that use the same capture file format.)
-#
-0	ubelong		0xa1b2c3d4	tcpdump capture file (big-endian)
-!:mime  application/vnd.tcpdump.pcap
-0	ulelong		0xa1b2c3d4	tcpdump capture file (little-endian)
-!:mime  application/vnd.tcpdump.pcap
diff --git a/magic/tcl b/magic/tcl
deleted file mode 100644
index e200d15af1..0000000000
--- a/magic/tcl
+++ /dev/null
@@ -1,23 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# file:  file(1) magic for Tcl scripting language
-# URL:  http://www.tcl.tk/
-# From: gustaf neumann
-
-# Tcl scripts
-0	search/1/w	#!\ /usr/bin/tcl	Tcl script text executable
-!:mime	text/x-lua
-0	search/1/w	#!\ /usr/local/bin/tcl	Tcl script text executable
-!:mime	text/x-tcl
-0	search/1	#!/usr/bin/env\ tcl	Tcl script text executable
-!:mime	text/x-tcl
-0	search/1	#!\ /usr/bin/env\ tcl	Tcl script text executable
-!:mime	text/x-tcl
-0	search/1/w	#!\ /usr/bin/wish	Tcl/Tk script text executable
-!:mime	text/x-tcl
-0	search/1/w	#!\ /usr/local/bin/wish	Tcl/Tk script text executable
-!:mime	text/x-tcl
-0	search/1	#!/usr/bin/env\ wish	Tcl/Tk script text executable
-!:mime	text/x-tcl
-0	search/1	#!\ /usr/bin/env\ wish	Tcl/Tk script text executable
-!:mime	text/x-tcl
diff --git a/magic/tex b/magic/tex
deleted file mode 100644
index e46236938d..0000000000
--- a/magic/tex
+++ /dev/null
@@ -1,56 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: tex,v 1.17 2010/09/20 19:19:17 rrt Exp $
-# tex:  file(1) magic for TeX files
-#
-# XXX - needs byte-endian stuff (big-endian and little-endian DVI?)
-#
-# From <conklin@talisman.kaleida.com>
-
-# Although we may know the offset of certain text fields in TeX DVI
-# and font files, we can't use them reliably because they are not
-# zero terminated. [but we do anyway, christos]
-0	string		\367\002	TeX DVI file
-!:mime	application/x-dvi
-
-# There is no way to detect TeX Font Metric (*.tfm) files without
-# breaking them apart and reading the data.  The following patterns
-# match most *.tfm files generated by METAFONT or afm2tfm.
-2	string		\000\021	TeX font metric data
-!:mime	application/x-tex-tfm
-2	string		\000\022	TeX font metric data
-!:mime	application/x-tex-tfm
-
-# Texinfo and GNU Info, from Daniel Quinlan (quinlan@yggdrasil.com)
-0	search/1	\\input\ texinfo	Texinfo source text
-!:mime	text/x-texinfo
-0	search/1	This\ is\ Info\ file	GNU Info text
-!:mime	text/x-info
-
-# TeX documents, from Daniel Quinlan (quinlan@yggdrasil.com)
-0	search/4096	\\input		TeX document text
-!:mime	text/x-tex
-!:strength + 15
-0	search/4096	\\section	LaTeX document text
-!:mime	text/x-tex
-!:strength + 18
-0	search/4096	\\setlength	LaTeX document text
-!:mime	text/x-tex
-!:strength + 15
-0	search/4096	\\documentstyle	LaTeX document text
-!:mime	text/x-tex
-!:strength + 18
-0	search/4096	\\chapter	LaTeX document text
-!:mime	text/x-tex
-!:strength + 18
-0	search/4096	\\documentclass	LaTeX 2e document text
-!:mime	text/x-tex
-!:strength + 15
-0	search/4096	\\relax		LaTeX auxiliary file
-!:mime	text/x-tex
-!:strength + 15
-0	search/4096	\\contentsline	LaTeX table of contents
-!:mime	text/x-tex
-!:strength + 15
-0	search/4096	%\ -*-latex-*-	LaTeX document text
-!:mime	text/x-tex
diff --git a/magic/troff b/magic/troff
deleted file mode 100644
index 7f60b1d9b3..0000000000
--- a/magic/troff
+++ /dev/null
@@ -1,22 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File$
-# troff:  file(1) magic for *roff
-#
-# updated by Daniel Quinlan (quinlan@yggdrasil.com)
-
-# troff input
-0	search/1	.\\"		troff or preprocessor input text
-!:mime	text/troff
-0	search/1	'\\"		troff or preprocessor input text
-!:mime	text/troff
-0	search/1	'.\\"		troff or preprocessor input text
-!:mime	text/troff
-0	search/1	\\"		troff or preprocessor input text
-!:mime	text/troff
-0	search/1	'''		troff or preprocessor input text
-!:mime	text/troff
-0	regex/20	\^\\.[A-Za-z0-9][A-Za-z0-9][\ \t]	troff or preprocessor input text
-!:mime	text/troff
-0	regex/20	\^\\.[A-Za-z0-9][A-Za-z0-9]$	troff or preprocessor input text
-!:mime	text/troff
diff --git a/magic/vorbis b/magic/vorbis
deleted file mode 100644
index 4d25c3c3cd..0000000000
--- a/magic/vorbis
+++ /dev/null
@@ -1,26 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File$
-# vorbis:  file(1) magic for Ogg/Vorbis files
-#
-# From Felix von Leitner <leitner@fefe.de>
-# Extended by Beni Cherniavsky <cben@crosswinds.net>
-# Further extended by Greg Wooledge <greg@wooledge.org>
-#
-# Most (everything but the number of channels and bitrate) is commented
-# out with `##' as it's not interesting to the average user.  The most
-# probable things advanced users would want to uncomment are probably
-# the number of comments and the encoder version.
-#
-# FIXME: The first match has been made a search, so that it can skip
-# over prepended ID3 tags. This will work for MIME type detection, but
-# won't work for detecting other properties of the file (they all need
-# to be made relative to the search). In any case, if the file has ID3
-# tags, the ID3 information will be printed, not the Ogg information,
-# so until that's fixed, this doesn't matter.
-# FIXME[2]: Disable the above for now, since search assumes text mode.
-#
-# --- Ogg Framing ---
-#0		search/1000	OggS		Ogg data
-0		string	OggS		Ogg data
-!:mime		application/ogg
diff --git a/magic/warc b/magic/warc
deleted file mode 100644
index 2a2aeb6fae..0000000000
--- a/magic/warc
+++ /dev/null
@@ -1,14 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: warc,v 1.2 2009/09/19 16:28:13 christos Exp $
-# warc:  file(1) magic for WARC files
-
-0	string	WARC/	WARC Archive
->5	string	x	version %.4s
-!:mime application/warc
-
-#------------------------------------------------------------------------------
-# Arc File Format from Internet Archive
-# see http://www.archive.org/web/researcher/ArcFileFormat.php
-0      string          filedesc://     Internet Archive File
-!:mime application/x-ia-arc
diff --git a/magic/windows b/magic/windows
deleted file mode 100644
index 6a529782a9..0000000000
--- a/magic/windows
+++ /dev/null
@@ -1,19 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: windows,v 1.4 2009/09/19 16:28:13 christos Exp $
-# windows:  file(1) magic for Microsoft Windows
-#
-# This file is mainly reserved for files where programs
-# using them are run almost always on MS Windows 3.x or
-# above, or files only used exclusively in Windows OS,
-# where there is no better category to allocate for.
-# For example, even though WinZIP almost run on Windows
-# only, it is better to treat them as "archive" instead.
-# For format usable in DOS, such as generic executable
-# format, please specify under "msdos" file.
-#
-
-# From: Pal Tamas <folti@balabit.hu>
-# Autorun File
-0       string/c          [autorun]\r\n   Microsoft Windows Autorun file.
-!:mime	application/x-setupscript. 
diff --git a/magic/wordprocessors b/magic/wordprocessors
deleted file mode 100644
index 7de3413c0a..0000000000
--- a/magic/wordprocessors
+++ /dev/null
@@ -1,43 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: wordprocessors,v 1.16 2012/10/29 17:36:49 christos Exp $
-# wordprocessors:  file(1) magic fo word processors.
-#
-
-# Hangul (Korean) Word Processor File
-# From: Won-Kyu Park <wkpark@kldp.org>
-512	string		R\0o\0o\0t\0	Hangul (Korean) Word Processor File 2000
-!:mime	application/x-hwp
-
-# Quark Express from http://www.garykessler.net/library/file_sigs.html
-2	string	MMXPR3			Motorola Quark Express Document (English)
-!:mime	application/x-quark-xpress-3
-
-#------------------------------------------------------------------------------
-# ichitaro456: file(1) magic for Just System Word Processor Ichitaro
-#
-# Contributor kenzo-:
-# Reversed-engineered JS Ichitaro magic numbers
-#
-
-0	string		DOC
->43	byte		0x14	Just System Word Processor Ichitaro v4
-!:mime	application/x-ichitaro4
-
-0	string		DOC
->43	byte		0x15	Just System Word Processor Ichitaro v5
-!:mime	application/x-ichitaro5
-
-0	string		DOC
->43	byte		0x16	Just System Word Processor Ichitaro v6
-!:mime	application/x-ichitaro6
-
-# Type: Freemind mindmap documents
-# From: Jamie Thompson <debian-bugs@jamie-thompson.co.uk>
-0	string/w	\<map\ version	Freemind document
-!:mime	application/x-freemind
-
-# Type:        Scribus
-# From:        Werner Fink <werner@suse.de>
-0	string	\<SCRIBUSUTF8NEW\ Version	Scribus Document
-!:mime	application/x-scribus
diff --git a/magic/xwindows b/magic/xwindows
deleted file mode 100644
index 358709d3f3..0000000000
--- a/magic/xwindows
+++ /dev/null
@@ -1,11 +0,0 @@
-# See COPYING file in this directory for original libmagic copyright.
-#------------------------------------------------------------------------------
-# $File: xwindows,v 1.7 2011/05/03 01:44:17 christos Exp $
-# xwindows:  file(1) magic for various X/Window system file formats.
-
-# Xcursor data
-# X11 mouse cursor format defined in libXcursor, see
-# http://www.x.org/archive/X11R6.8.1/doc/Xcursor.3.html
-# http://cgit.freedesktop.org/xorg/lib/libXcursor/tree/include/X11/Xcursor/Xcursor.h
-0	string		Xcur		Xcursor data
-!:mime	image/x-xcursor

From 788a31edcd2c816d62aa152783fd69d71fc05cd0 Mon Sep 17 00:00:00 2001
From: Seth Hall <seth@icir.org>
Date: Wed, 10 Jul 2013 16:29:07 -0400
Subject: [PATCH 059/118] Added support for files to the notice framework.

---
 scripts/base/frameworks/notice/main.bro | 34 ++++++++++++++++++++++---
 1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/scripts/base/frameworks/notice/main.bro b/scripts/base/frameworks/notice/main.bro
index 30e0013517..f47ed79940 100644
--- a/scripts/base/frameworks/notice/main.bro
+++ b/scripts/base/frameworks/notice/main.bro
@@ -68,6 +68,19 @@ export {
 		## the notice policy.
 		iconn:          icmp_conn      &optional;
 
+		## A file record if the notice is relted to a file.  The
+		## reference to the actual fa_file record will be deleted after applying
+		## the notice policy.
+		f:              fa_file         &optional;
+
+		## A file unique ID if this notice is related to a file.  If the $f
+		## field is provided, this will be automatically filled out.
+		fuid:           string          &log &optional;
+
+		## A mime type if the notice is related to a file.  If the $f field
+		## is provided, this will be automatically filled out.
+		mime_type:      string          &log &optional;
+
 		## The transport protocol. Filled automatically when either conn, iconn
 		## or p is specified.
 		proto:          transport_proto &log &optional;
@@ -460,6 +473,19 @@ function apply_policy(n: Notice::Info)
 	if ( ! n?$ts )
 		n$ts = network_time();
 
+	if ( n?$f )
+		{
+		if ( ! n?$fuid )
+			n$fuid = n$f$id;
+		if ( ! n?$mime_type && n$f?$mime_type )
+			n$mime_type = n$f$mime_type;
+		if ( |n$f$conns| == 1 )
+			{
+			for ( id in n$f$conns )
+				n$conn = n$f$conns[id];
+			}
+		}
+
 	if ( n?$conn )
 		{
 		if ( ! n?$id )
@@ -513,13 +539,15 @@ function apply_policy(n: Notice::Info)
 	if ( ! n?$suppress_for )
 		n$suppress_for = default_suppression_interval;
 
-	# Delete the connection record if it's there so we aren't sending that
-	# to remote machines.  It can cause problems due to the size of the
-	# connection record.
+	# Delete the connection and file records if they're there so we 
+	# aren't sending that to remote machines.  It can cause problems 
+	# due to the size of those records.
 	if ( n?$conn )
 		delete n$conn;
 	if ( n?$iconn )
 		delete n$iconn;
+	if ( n?$f )
+		delete n$f;
 	}
 
 function internal_NOTICE(n: Notice::Info)

From 22b4f8dd90f1b5b1262240efb94a3f65546ace04 Mon Sep 17 00:00:00 2001
From: Seth Hall <seth@icir.org>
Date: Wed, 10 Jul 2013 16:51:22 -0400
Subject: [PATCH 060/118] Fix a small issue with finding smtp entities.

---
 scripts/base/protocols/smtp/entities.bro | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/scripts/base/protocols/smtp/entities.bro b/scripts/base/protocols/smtp/entities.bro
index ec43b39ce1..159c10b5ab 100644
--- a/scripts/base/protocols/smtp/entities.bro
+++ b/scripts/base/protocols/smtp/entities.bro
@@ -33,12 +33,12 @@ event mime_begin_entity(c: connection) &priority=10
 
 event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5
 	{
-	if ( f$source != "SMTP" ) 
-		return;
-
-	if ( c$smtp$entity?$filename )
-		f$info$filename = c$smtp$entity$filename;
-	f$info$depth = c$smtp_state$mime_depth;
+	if ( f$source == "SMTP" && c?$smtp ) 
+		{
+		if ( c$smtp?$entity && c$smtp$entity?$filename )
+			f$info$filename = c$smtp$entity$filename;
+		f$info$depth = c$smtp_state$mime_depth;
+		}
 	}
 
 event mime_one_header(c: connection, h: mime_header_rec) &priority=5

From bf4f57383f5f0639257ecac3651c7b01004a3a02 Mon Sep 17 00:00:00 2001
From: Seth Hall <seth@icir.org>
Date: Wed, 10 Jul 2013 16:52:39 -0400
Subject: [PATCH 061/118] Improve malware hash registry script.

 - Include a link to a virustotal search in the notice sub message field.

 - Give all information returned from Team Cymru in the notice message.

 - Add more file types to match on to the default set.
---
 .../policy/frameworks/files/detect-MHR.bro    | 34 +++++++------------
 1 file changed, 13 insertions(+), 21 deletions(-)

diff --git a/scripts/policy/frameworks/files/detect-MHR.bro b/scripts/policy/frameworks/files/detect-MHR.bro
index c896bd56fd..ebfc97fd26 100644
--- a/scripts/policy/frameworks/files/detect-MHR.bro
+++ b/scripts/policy/frameworks/files/detect-MHR.bro
@@ -5,7 +5,7 @@
 @load base/frameworks/notice
 @load frameworks/files/hash-all-files
 
-module MalwareHashRegistery;
+module TeamCymruMalwareHashRegistry;
 
 export {
 	redef enum Notice::Type += {
@@ -14,16 +14,12 @@ export {
 		Match
 	};
 
-	redef record Files::Info += {
-		## Team Cymru Malware Hash Registry date of first detection.
-		mhr_first_detected:  time  &log &optional;
-		## Team Cymru Malware Hash Registry percent of detection 
-		## among malware scanners.
-		mhr_detect_rate:     count &log &optional;
-	};
-
 	## File types to attempt matching against the Malware Hash Registry.
-	const match_file_types = /^application\/x-dosexec/ &redef;
+	const match_file_types = /application\/x-dosexec/ |
+	                         /application\/pdf/ |
+	                         /application\/x-shockwave-flash/ |
+	                         /application\/x-java-applet/ |
+	                         /video\/mp4/ &redef;
 
 	## The malware hash registry runs each malware sample through several A/V engines.
 	## Team Cymru returns a percentage to indicate how many A/V engines flagged the
@@ -43,19 +39,15 @@ event file_hash(f: fa_file, kind: string, hash: string)
 			local MHR_answer = split1(MHR_result, / /);
 			if ( |MHR_answer| == 2 )
 				{
-				f$info$mhr_first_detected = double_to_time(to_double(MHR_answer[1]));
-				f$info$mhr_detect_rate = to_count(MHR_answer[2]);
+				local mhr_first_detected = double_to_time(to_double(MHR_answer[1]));
+				local mhr_detect_rate = to_count(MHR_answer[2]);
 
-				#print strftime("%Y-%m-%d %H:%M:%S", f$info$mhr_first_detected);
-				if ( f$info$mhr_detect_rate >= notice_threshold )
+				local readable_first_detected = strftime("%Y-%m-%d %H:%M:%S", mhr_first_detected);
+				if ( mhr_detect_rate >= notice_threshold )
 					{
-					local url = "";
-					# TODO: Create a generic mechanism for creating file "urls".
-					#if ( f$source == "HTTP" )
-					#	url = HTTP::build_url_http(f);
-					local message = fmt("%s %s", hash, url);
-					#local message = fmt("Host(s) %s sent a file with SHA1 hash %s to host %s", f$src_host, hash, f$dst_host);
-					NOTICE([$note=Match, $msg=message]);
+					local message = fmt("Detection rate: %d%%  Last seen: %s", mhr_detect_rate, readable_first_detected);
+					local virustotal_url = fmt("https://www.virustotal.com/en/file/%s/analysis/", hash);
+					NOTICE([$note=Match, $msg=message, $sub=virustotal_url, $f=f]);
 					}
 				}
 			}

From be8c947c040ac828036a70938bcc3f721a5a480d Mon Sep 17 00:00:00 2001
From: Seth Hall <seth@icir.org>
Date: Wed, 10 Jul 2013 17:04:09 -0400
Subject: [PATCH 062/118] Adding CAB files for MHR checking.

---
 scripts/policy/frameworks/files/detect-MHR.bro | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/policy/frameworks/files/detect-MHR.bro b/scripts/policy/frameworks/files/detect-MHR.bro
index ebfc97fd26..18875ade4c 100644
--- a/scripts/policy/frameworks/files/detect-MHR.bro
+++ b/scripts/policy/frameworks/files/detect-MHR.bro
@@ -16,6 +16,7 @@ export {
 
 	## File types to attempt matching against the Malware Hash Registry.
 	const match_file_types = /application\/x-dosexec/ |
+	                         /application\/vnd.ms-cab-compressed/ |
 	                         /application\/pdf/ |
 	                         /application\/x-shockwave-flash/ |
 	                         /application\/x-java-applet/ |

From 06287966a166d9a2d33a84084898009bfe86eea3 Mon Sep 17 00:00:00 2001
From: Robin Sommer <robin@icir.org>
Date: Wed, 10 Jul 2013 14:19:00 -0700
Subject: [PATCH 063/118] Bringing the DPD POP3 signature back.

This also avoids the need for updating the external test suite.
---
 scripts/base/init-default.bro                       |  1 +
 scripts/base/protocols/pop3/__load__.bro            |  2 ++
 scripts/base/protocols/pop3/dpd.sig                 | 13 +++++++++++++
 .../canonified_loaded_scripts.log                   |  5 +++--
 4 files changed, 19 insertions(+), 2 deletions(-)
 create mode 100644 scripts/base/protocols/pop3/__load__.bro
 create mode 100644 scripts/base/protocols/pop3/dpd.sig

diff --git a/scripts/base/init-default.bro b/scripts/base/init-default.bro
index 6c40a7547f..6aa8ff5e26 100644
--- a/scripts/base/init-default.bro
+++ b/scripts/base/init-default.bro
@@ -41,6 +41,7 @@
 @load base/protocols/http
 @load base/protocols/irc
 @load base/protocols/modbus
+@load base/protocols/pop3
 @load base/protocols/smtp
 @load base/protocols/socks
 @load base/protocols/ssh
diff --git a/scripts/base/protocols/pop3/__load__.bro b/scripts/base/protocols/pop3/__load__.bro
new file mode 100644
index 0000000000..c5ddf0e788
--- /dev/null
+++ b/scripts/base/protocols/pop3/__load__.bro
@@ -0,0 +1,2 @@
+
+@load-sigs ./dpd.sig
diff --git a/scripts/base/protocols/pop3/dpd.sig b/scripts/base/protocols/pop3/dpd.sig
new file mode 100644
index 0000000000..8d7e3567da
--- /dev/null
+++ b/scripts/base/protocols/pop3/dpd.sig
@@ -0,0 +1,13 @@
+signature dpd_pop3_server {
+  ip-proto == tcp
+  payload /^\+OK/
+  requires-reverse-signature dpd_pop3_client
+  enable "pop3"
+  tcp-state responder
+}
+
+signature dpd_pop3_client {
+  ip-proto == tcp
+  payload /(|.*[\r\n])[[:space:]]*([uU][sS][eE][rR][[:space:]]|[aA][pP][oO][pP][[:space:]]|[cC][aA][pP][aA]|[aA][uU][tT][hH])/
+  tcp-state originator
+}
diff --git a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log
index 6d6906d924..999fd7c841 100644
--- a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log
+++ b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log
@@ -3,7 +3,7 @@
 #empty_field	(empty)
 #unset_field	-
 #path	loaded_scripts
-#open	2013-07-10-03-19-58
+#open	2013-07-10-21-18-31
 #fields	name
 #types	string
 scripts/base/init-bare.bro
@@ -178,6 +178,7 @@ scripts/base/init-default.bro
   scripts/base/protocols/modbus/__load__.bro
     scripts/base/protocols/modbus/consts.bro
     scripts/base/protocols/modbus/main.bro
+  scripts/base/protocols/pop3/__load__.bro
   scripts/base/protocols/smtp/__load__.bro
     scripts/base/protocols/smtp/main.bro
     scripts/base/protocols/smtp/entities.bro
@@ -194,4 +195,4 @@ scripts/base/init-default.bro
   scripts/base/protocols/tunnels/__load__.bro
   scripts/base/misc/find-checksum-offloading.bro
 scripts/policy/misc/loaded-scripts.bro
-#close	2013-07-10-03-19-58
+#close	2013-07-10-21-18-31

From 3d5c17e9e01b812398d5cc928c63d883d2a89d55 Mon Sep 17 00:00:00 2001
From: Seth Hall <seth@icir.org>
Date: Wed, 10 Jul 2013 23:46:01 -0400
Subject: [PATCH 064/118] Add jar files to the default MHR lookups.

---
 scripts/policy/frameworks/files/detect-MHR.bro | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/policy/frameworks/files/detect-MHR.bro b/scripts/policy/frameworks/files/detect-MHR.bro
index 18875ade4c..71d73217e0 100644
--- a/scripts/policy/frameworks/files/detect-MHR.bro
+++ b/scripts/policy/frameworks/files/detect-MHR.bro
@@ -20,6 +20,7 @@ export {
 	                         /application\/pdf/ |
 	                         /application\/x-shockwave-flash/ |
 	                         /application\/x-java-applet/ |
+	                         /application\/jar/ |
 	                         /video\/mp4/ &redef;
 
 	## The malware hash registry runs each malware sample through several A/V engines.

From 1a60fae41c057bb150604d53fa6a15ed3bf2b629 Mon Sep 17 00:00:00 2001
From: Jon Siwek <jsiwek@ncsa.illinois.edu>
Date: Thu, 11 Jul 2013 11:28:55 -0500
Subject: [PATCH 065/118] Clean up queued but unused file_over_new_connections
 event args.

---
 src/file_analysis/File.cc | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/file_analysis/File.cc b/src/file_analysis/File.cc
index 9a06fa3db9..7189d90932 100644
--- a/src/file_analysis/File.cc
+++ b/src/file_analysis/File.cc
@@ -100,7 +100,12 @@ File::~File()
 	{
 	DBG_LOG(DBG_FILE_ANALYSIS, "Destroying File object %s", id.c_str());
 	Unref(val);
-	assert(fonc_queue.empty());
+	// Queue may not be empty in the case where only content gaps were seen.
+	while ( ! fonc_queue.empty() )
+		{
+		delete_vals(fonc_queue.front().second);
+		fonc_queue.pop();
+		}
 	}
 
 void File::UpdateLastActivityTime()

From e01678d132a7fcb90c45701d110733bcc6ab84e4 Mon Sep 17 00:00:00 2001
From: Bernhard Amann <bernhard@icsi.berkeley.edu>
Date: Fri, 12 Jul 2013 21:09:13 +0200
Subject: [PATCH 066/118] yep, freebsd still needs this fix

---
 src/3rdparty/sqlite3.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/3rdparty/sqlite3.c b/src/3rdparty/sqlite3.c
index deef460899..8d473d32b7 100644
--- a/src/3rdparty/sqlite3.c
+++ b/src/3rdparty/sqlite3.c
@@ -23442,6 +23442,9 @@ static int posixFchown(int fd, uid_t uid, gid_t gid){
 /* Forward reference */
 static int openDirectory(const char*, int*);
 
+/* fix compile on FreeBSD, not sure why needed... */
+int fchmod(int, mode_t);
+
 /*
 ** Many system calls are accessed through pointer-to-functions so that
 ** they may be overridden at runtime to facilitate fault injection during

From b14f5a853eb67a5e312bc612a062889b594d1a58 Mon Sep 17 00:00:00 2001
From: Seth Hall <seth@icir.org>
Date: Fri, 12 Jul 2013 16:06:40 -0400
Subject: [PATCH 067/118] Added mime types to http.log

---
 scripts/base/protocols/http/entities.bro | 54 +++++++++++++++++++++---
 scripts/base/protocols/http/files.bro    | 19 ---------
 2 files changed, 47 insertions(+), 26 deletions(-)

diff --git a/scripts/base/protocols/http/entities.bro b/scripts/base/protocols/http/entities.bro
index fc8ab753ae..dcddf6fc4f 100644
--- a/scripts/base/protocols/http/entities.bro
+++ b/scripts/base/protocols/http/entities.bro
@@ -9,14 +9,23 @@ module HTTP;
 
 export {
 	type Entity: record {
-		## Depth of the entity if multiple entities are sent in a single transaction.
-		depth: count &default=0;
-
 		## Filename for the entity if discovered from a header.
 		filename: string &optional;
 	};
 
 	redef record Info += {
+		## An ordered vector of file unique IDs.
+		orig_fuids:     vector of string &log &optional;
+
+		## An ordered vector of mime types.
+		orig_mime_types: vector of string &log &optional;
+
+		## An ordered vector of file unique IDs.
+		resp_fuids:     vector of string &log &optional;
+
+		## An ordered vector of mime types.
+		resp_mime_types: vector of string &log &optional;
+
 		## The current entity being seen.
 		entity:          Entity    &optional;
 
@@ -36,7 +45,7 @@ event http_begin_entity(c: connection, is_orig: bool) &priority=10
 	else
 		++c$http$resp_mime_depth;
 
-	c$http$entity = Entity($depth = is_orig ? c$http$orig_mime_depth : c$http$resp_mime_depth);
+	c$http$entity = Entity();
 	}
 
 event http_header(c: connection, is_orig: bool, name: string, value: string) &priority=3
@@ -55,12 +64,43 @@ event http_header(c: connection, is_orig: bool, name: string, value: string) &pr
 
 event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5
 	{
-	if ( f$source == "HTTP" && c$http?$entity ) 
+	if ( f$source == "HTTP" && c?$http ) 
 		{
-		f$info$depth = c$http$entity$depth;
-		if ( c$http$entity?$filename )
+		if ( c$http?$entity && c$http$entity?$filename )
 			f$info$filename = c$http$entity$filename;
+
+		if ( f$is_orig )
+			{
+			if ( ! c$http?$resp_mime_types )
+				c$http$resp_fuids = string_vec(f$id);
+			else
+				c$http$orig_fuids[|c$http$orig_fuids|] = f$id;
+
+			if ( f?$mime_type )
+				{
+				if ( ! c$http?$orig_mime_types )
+					c$http$orig_mime_types = string_vec(f$mime_type);
+				else
+					c$http$orig_mime_types[|c$http$orig_mime_types|] = f$mime_type;
+				}
+			}
+		else
+			{
+			if ( ! c$http?$resp_mime_types )
+				c$http$resp_fuids = string_vec(f$id);
+			else
+				c$http$resp_fuids[|c$http$resp_fuids|] = f$id;
+
+			if ( f?$mime_type )
+				{
+				if ( ! c$http?$resp_mime_types )
+					c$http$resp_mime_types = string_vec(f$mime_type);
+				else
+					c$http$resp_mime_types[|c$http$resp_mime_types|] = f$mime_type;
+				}
+			}
 		}
+
 	}
 
 event http_end_entity(c: connection, is_orig: bool) &priority=5
diff --git a/scripts/base/protocols/http/files.bro b/scripts/base/protocols/http/files.bro
index e45ff8cadb..09324b5f45 100644
--- a/scripts/base/protocols/http/files.bro
+++ b/scripts/base/protocols/http/files.bro
@@ -6,14 +6,6 @@
 module HTTP;
 
 export {
-	redef record Info += {
-		## An ordered vector of file unique IDs seen sent by the originator (client).
-		orig_fuids:    vector of string &log &default=string_vec();
-
-		## An ordered vector of file unique IDs seen sent by the responder (server).
-		resp_fuids:    vector of string &log &default=string_vec();
-	};
-
 	## Default file handle provider for HTTP.
 	global get_file_handle: function(c: connection, is_orig: bool): string;
 }
@@ -39,14 +31,3 @@ event bro_init() &priority=5
 	{
 	Files::register_protocol(Analyzer::ANALYZER_HTTP, HTTP::get_file_handle);
 	}
-
-event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5
-	{
-	if ( c?$http )
-		{
-		if ( f$is_orig )
-			c$http$orig_fuids[|c$http$orig_fuids|] = f$id;
-		else
-			c$http$resp_fuids[|c$http$resp_fuids|] = f$id;
-		}
-	}

From 4dd4c5344e071cf2f9996852369fa2a5a90909bd Mon Sep 17 00:00:00 2001
From: Seth Hall <seth@icir.org>
Date: Fri, 12 Jul 2013 16:12:26 -0400
Subject: [PATCH 068/118] Fix a bug where orig file information in http wasn't
 working right.

---
 scripts/base/protocols/http/entities.bro | 31 ++++++++++++------------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/scripts/base/protocols/http/entities.bro b/scripts/base/protocols/http/entities.bro
index dcddf6fc4f..e9376a0c0c 100644
--- a/scripts/base/protocols/http/entities.bro
+++ b/scripts/base/protocols/http/entities.bro
@@ -15,24 +15,23 @@ export {
 
 	redef record Info += {
 		## An ordered vector of file unique IDs.
-		orig_fuids:     vector of string &log &optional;
+		orig_fuids:      vector of string &log &optional;
 
 		## An ordered vector of mime types.
 		orig_mime_types: vector of string &log &optional;
 
 		## An ordered vector of file unique IDs.
-		resp_fuids:     vector of string &log &optional;
+		resp_fuids:      vector of string &log &optional;
 
 		## An ordered vector of mime types.
 		resp_mime_types: vector of string &log &optional;
 
-		## The current entity being seen.
-		entity:          Entity    &optional;
-
+		## The current entity.
+		current_entity:  Entity           &optional;
 		## Current number of MIME entities in the HTTP request message body.
-		orig_mime_depth: count     &default=0;
+		orig_mime_depth: count            &default=0;
 		## Current number of MIME entities in the HTTP response message body.
-		resp_mime_depth: count     &default=0;
+		resp_mime_depth: count            &default=0;
 	};
 }
 
@@ -45,7 +44,7 @@ event http_begin_entity(c: connection, is_orig: bool) &priority=10
 	else
 		++c$http$resp_mime_depth;
 
-	c$http$entity = Entity();
+	c$http$current_entity = Entity();
 	}
 
 event http_header(c: connection, is_orig: bool, name: string, value: string) &priority=3
@@ -53,12 +52,12 @@ event http_header(c: connection, is_orig: bool, name: string, value: string) &pr
 	if ( name == "CONTENT-DISPOSITION" &&
 	     /[fF][iI][lL][eE][nN][aA][mM][eE]/ in value )
 		{
-		c$http$entity$filename = extract_filename_from_content_disposition(value);
+		c$http$current_entity$filename = extract_filename_from_content_disposition(value);
 		}
 	else if ( name == "CONTENT-TYPE" &&
 	          /[nN][aA][mM][eE][:blank:]*=/ in value )
 		{
-		c$http$entity$filename = extract_filename_from_content_disposition(value);
+		c$http$current_entity$filename = extract_filename_from_content_disposition(value);
 		}
 	}
 
@@ -66,13 +65,13 @@ event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priori
 	{
 	if ( f$source == "HTTP" && c?$http ) 
 		{
-		if ( c$http?$entity && c$http$entity?$filename )
-			f$info$filename = c$http$entity$filename;
+		if ( c$http?$current_entity && c$http$current_entity?$filename )
+			f$info$filename = c$http$current_entity$filename;
 
 		if ( f$is_orig )
 			{
-			if ( ! c$http?$resp_mime_types )
-				c$http$resp_fuids = string_vec(f$id);
+			if ( ! c$http?$orig_mime_types )
+				c$http$orig_fuids = string_vec(f$id);
 			else
 				c$http$orig_fuids[|c$http$orig_fuids|] = f$id;
 
@@ -105,6 +104,6 @@ event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priori
 
 event http_end_entity(c: connection, is_orig: bool) &priority=5
 	{
-	if ( c?$http && c$http?$entity ) 
-		delete c$http$entity;
+	if ( c?$http && c$http?$current_entity ) 
+		delete c$http$current_entity;
 	}

From 58290d6fc0436677df760792a9cda9b0c99def11 Mon Sep 17 00:00:00 2001
From: Robin Sommer <robin@icir.org>
Date: Sun, 14 Jul 2013 08:42:35 -0700
Subject: [PATCH 069/118] Updating NEWS.

---
 NEWS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/NEWS b/NEWS
index b5fea869e4..1fce6b1d9d 100644
--- a/NEWS
+++ b/NEWS
@@ -189,6 +189,8 @@ Changed Functionality
 - PacketFilter::all_packets has been replaced with
   PacketFilter::enable_auto_protocol_capture_filters.
 
+- We removed the BitTorrent DPD signatures pending further updates to
+  that analyzer.
 
 Bro 2.1
 -------

From 7427ce511b78c8ae5656762ad8c229976dd33fd3 Mon Sep 17 00:00:00 2001
From: Bernhard Amann <bernhard@icsi.berkeley.edu>
Date: Mon, 15 Jul 2013 13:50:40 -0700
Subject: [PATCH 070/118] Small raw reader fixes * crash when accessing
 nonexistant file. * memory leak when reading from file.

Addresses #1038.
---
 src/input/readers/Raw.cc | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/src/input/readers/Raw.cc b/src/input/readers/Raw.cc
index 46cb3656a3..98f1dfcab6 100644
--- a/src/input/readers/Raw.cc
+++ b/src/input/readers/Raw.cc
@@ -55,6 +55,13 @@ void Raw::DoClose()
 	if ( file != 0 )
 		CloseInput();
 
+	if ( buf != 0 ) 
+		{
+		// we still have output that has not been flushed. Throw away.
+		delete buf;
+		buf = 0;
+		}
+
 	if ( execute && childpid > 0 && kill(childpid, 0) == 0 )
 		{
 		// kill child process
@@ -157,13 +164,13 @@ bool Raw::OpenInput()
 	else
 		{
 		file = fopen(fname.c_str(), "r");
-		fcntl(fileno(file),  F_SETFD, FD_CLOEXEC);
 		if ( ! file )
 			{
 			Error(Fmt("Init: cannot open %s", fname.c_str()));
 			return false;
 			}
 		}
+		fcntl(fileno(file),  F_SETFD, FD_CLOEXEC);
 
 	return true;
 	}
@@ -322,12 +329,14 @@ int64_t Raw::GetLine(FILE* arg_file)
 			// but first check if we encountered the file end - because if we did this was it.
 			if ( feof(arg_file) != 0 )
 				{
-				outbuf = buf;
-				buf = 0;
 				if ( pos == 0 )
 					return -1; // signal EOF - and that we had no more data.
 				else
+					{
+					outbuf = buf;
+					buf = 0;
 					return pos;
+					}
 				}
 
 			repeats++;
@@ -342,15 +351,13 @@ int64_t Raw::GetLine(FILE* arg_file)
 			{
 			outbuf = buf;
 			buf = 0;
-			buf = new char[block_size];
-
 
 			if ( found < pos )
 				{
 				// we have leftovers. copy them into the buffer for the next line
 				buf = new char[block_size];
 				memcpy(buf, outbuf + found + sep_length, pos - found - sep_length);
-				bufpos =  pos - found - sep_length;
+				bufpos = pos - found - sep_length;
 				}
 
 			return found;
@@ -368,7 +375,7 @@ int64_t Raw::GetLine(FILE* arg_file)
 		return -3;
 		}
 
-	InternalError("Internal control flow execution");
+	InternalError("Internal control flow execution error in raw reader");
 	assert(false);
 	}
 
@@ -461,7 +468,7 @@ bool Raw::DoUpdate()
 		if ( length == -3 )
 			return false;
 
-		else if ( length == -2 || length == -1 )
+		else if ( length == -2 || length == -1 ) 
 			// no data ready or eof
 			break;
 

From 0bfdcc1fbca326e563ea4a6db5e69be05f2fbed5 Mon Sep 17 00:00:00 2001
From: Seth Hall <seth@icir.org>
Date: Tue, 16 Jul 2013 12:01:50 -0400
Subject: [PATCH 071/118] Added protocol description functions that provide a
 super compressed log representation.

---
 scripts/base/frameworks/analyzer/main.bro    | 12 +++++
 scripts/base/frameworks/files/main.bro       | 50 ++++++++++++++++----
 scripts/base/frameworks/notice/main.bro      | 19 ++++++--
 scripts/base/protocols/ftp/__load__.bro      |  1 +
 scripts/base/protocols/ftp/files.bro         | 21 +++++++-
 scripts/base/protocols/ftp/main.bro          | 50 +++++++-------------
 scripts/base/protocols/http/files.bro        | 21 +++++++-
 scripts/base/protocols/http/utils.bro        |  8 ++++
 scripts/base/protocols/irc/file-analysis.bro | 23 ---------
 scripts/base/protocols/irc/files.bro         |  3 +-
 scripts/base/protocols/smtp/files.bro        | 21 +++++++-
 scripts/base/protocols/smtp/main.bro         | 31 +++++++++++-
 src/analyzer/analyzer.bif                    |  5 ++
 13 files changed, 190 insertions(+), 75 deletions(-)
 delete mode 100644 scripts/base/protocols/irc/file-analysis.bro

diff --git a/scripts/base/frameworks/analyzer/main.bro b/scripts/base/frameworks/analyzer/main.bro
index c4ee5c943b..e266eb8c7a 100644
--- a/scripts/base/frameworks/analyzer/main.bro
+++ b/scripts/base/frameworks/analyzer/main.bro
@@ -81,6 +81,13 @@ export {
 	## Returns: The analyzer name corresponding to the tag.
 	global name: function(tag: Analyzer::Tag) : string;
 
+	## Translates an analyzer's name to a tag enum value.
+	##
+	## name: The analyzer name.
+	##
+	## Returns: The analyzer tag corresponding to the name.
+	global get_tag: function(name: string): Analyzer::Tag;
+
 	## Schedules an analyzer for a future connection originating from a given IP
 	## address and port.
 	##
@@ -187,6 +194,11 @@ function name(atype: Analyzer::Tag) : string
 	return __name(atype);
 	}
 
+function get_tag(name: string): Analyzer::Tag
+	{
+	return __tag(name);
+	}
+
 function schedule_analyzer(orig: addr, resp: addr, resp_p: port,
 			   analyzer: Analyzer::Tag, tout: interval) : bool
 	{
diff --git a/scripts/base/frameworks/files/main.bro b/scripts/base/frameworks/files/main.bro
index 8dd07fcb53..cc92932bbf 100644
--- a/scripts/base/frameworks/files/main.bro
+++ b/scripts/base/frameworks/files/main.bro
@@ -2,6 +2,7 @@
 ##! any network protocol over which they're transported.
 
 @load base/bif/file_analysis.bif
+@load base/frameworks/analyzer
 @load base/frameworks/logging
 @load base/utils/site
 
@@ -173,17 +174,36 @@ export {
 	## Returns: The analyzer name corresponding to the tag.
 	global analyzer_name: function(tag: Files::Tag): string;
 
+	## Provides a text description regarding metadata of the file.
+	## For example, with HTTP it would return a URL.
+	##
+	## f: The file to be described.
+	##
+	## Returns a text description regarding metadata of the file.
+	global describe: function(f: fa_file): string;
+
+	type ProtoRegistration: record {
+		## A callback to generate a file handle on demand when
+		## one is needed by the core.
+		get_file_handle: function(c: connection, is_orig: bool): string;
+		
+		## A callback to "describe" a file.  In the case of an HTTP
+		## transfer the most obvious description would be the URL.
+		## It's like an extremely compressed version of the normal log.
+		describe: function(f: fa_file): string
+				&default=function(f: fa_file): string { return ""; };
+	};
+
 	## Register callbacks for protocols that work with the Files framework.  
 	## The callbacks must uniquely identify a file and each protocol can 
 	## only have a single callback registered for it.
 	## 
 	## tag: Tag for the protocol analyzer having a callback being registered.
 	##
-	## callback: Function that can generate a file handle for the protocol analyzer
-	##           defined previously.
+	## reg: A :bro:see:`ProtoRegistration` record.
 	##
 	## Returns: true if the protocol being registered was not previously registered.
-	global register_protocol: function(tag: Files::Tag, callback: function(c: connection, is_orig: bool): string): bool;
+	global register_protocol: function(tag: Analyzer::Tag, reg: ProtoRegistration): bool;
 
 	## Register a callback for file analyzers to use if they need to do some manipulation
 	## when they are being added to a file before the core code takes over.  This is 
@@ -210,8 +230,7 @@ redef record AnalyzerArgs += {
 };
 
 # Store the callbacks for protocol analyzers that have files.
-global registered_protocols: table[Files::Tag] of function(c: connection, is_orig: bool): string = table()
-		&default=function(c: connection, is_orig: bool): string { return cat(c$uid, is_orig); };
+global registered_protocols: table[Analyzer::Tag] of ProtoRegistration = table();
 
 global analyzer_add_callbacks: table[Files::Tag] of function(f: fa_file, args: AnalyzerArgs) = table();
 
@@ -321,15 +340,28 @@ event file_state_remove(f: fa_file) &priority=-10
 	Log::write(Files::LOG, f$info);
 	}
 
-function register_protocol(tag: Files::Tag, callback: function(c: connection, is_orig: bool): string): bool
+function register_protocol(tag: Analyzer::Tag, reg: ProtoRegistration): bool
 	{
 	local result = (tag !in registered_protocols);
-	registered_protocols[tag] = callback;
+	registered_protocols[tag] = reg;
 	return result;
 	}
 
-event get_file_handle(tag: Files::Tag, c: connection, is_orig: bool) &priority=5
+function describe(f: fa_file): string
 	{
+	local tag = Analyzer::get_tag(f$source);
+	if ( tag !in registered_protocols )
+		return "";
+
 	local handler = registered_protocols[tag];
-	set_file_handle(handler(c, is_orig));
+	return handler$describe(f);
+	}
+
+event get_file_handle(tag: Analyzer::Tag, c: connection, is_orig: bool) &priority=5
+	{
+	if ( tag !in registered_protocols )
+		return;
+
+	local handler = registered_protocols[tag];
+	set_file_handle(handler$get_file_handle(c, is_orig));
 	}
diff --git a/scripts/base/frameworks/notice/main.bro b/scripts/base/frameworks/notice/main.bro
index f47ed79940..5bd01e0982 100644
--- a/scripts/base/frameworks/notice/main.bro
+++ b/scripts/base/frameworks/notice/main.bro
@@ -79,7 +79,13 @@ export {
 
 		## A mime type if the notice is related to a file.  If the $f field
 		## is provided, this will be automatically filled out.
-		mime_type:      string          &log &optional;
+		file_mime_type: string          &log &optional;
+
+		## Frequently files can be "described" to give a bit more context.
+		## This field will typically be automatically filled out from an
+		## fa_file record.  For example, if a notice was related to a 
+		## file over HTTP, the URL of the request would be shown.
+		file_desc:      string          &log &optional;
 
 		## The transport protocol. Filled automatically when either conn, iconn
 		## or p is specified.
@@ -477,9 +483,13 @@ function apply_policy(n: Notice::Info)
 		{
 		if ( ! n?$fuid )
 			n$fuid = n$f$id;
-		if ( ! n?$mime_type && n$f?$mime_type )
-			n$mime_type = n$f$mime_type;
-		if ( |n$f$conns| == 1 )
+		
+		if ( ! n?$file_mime_type && n$f?$mime_type )
+			n$file_mime_type = n$f$mime_type;
+
+		n$file_desc = Files::describe(n$f);
+		
+		if ( n$f?$conns && |n$f$conns| == 1 )
 			{
 			for ( id in n$f$conns )
 				n$conn = n$f$conns[id];
@@ -490,6 +500,7 @@ function apply_policy(n: Notice::Info)
 		{
 		if ( ! n?$id )
 			n$id = n$conn$id;
+
 		if ( ! n?$uid )
 			n$uid = n$conn$uid;
 		}
diff --git a/scripts/base/protocols/ftp/__load__.bro b/scripts/base/protocols/ftp/__load__.bro
index bc68f61cea..ebb09e702c 100644
--- a/scripts/base/protocols/ftp/__load__.bro
+++ b/scripts/base/protocols/ftp/__load__.bro
@@ -1,5 +1,6 @@
 @load ./utils-commands
 @load ./main
+@load ./utils
 @load ./files
 @load ./gridftp
 
diff --git a/scripts/base/protocols/ftp/files.bro b/scripts/base/protocols/ftp/files.bro
index c68717c8a2..1d7b7670f4 100644
--- a/scripts/base/protocols/ftp/files.bro
+++ b/scripts/base/protocols/ftp/files.bro
@@ -12,6 +12,9 @@ export {
 
 	## Default file handle provider for FTP.
 	global get_file_handle: function(c: connection, is_orig: bool): string;
+
+	## Describe the file being transferred.
+	global describe_file: function(f: fa_file): string;
 }
 
 function get_file_handle(c: connection, is_orig: bool): string
@@ -22,9 +25,25 @@ function get_file_handle(c: connection, is_orig: bool): string
 	return cat(Analyzer::ANALYZER_FTP_DATA, c$start_time, c$id, is_orig);
 	}
 
+function describe_file(f: fa_file): string
+	{
+	# This shouldn't be needed, but just in case...
+	if ( f$source != "FTP" )
+		return "";
+
+	for ( cid in f$conns )
+		{
+		if ( f$conns[cid]?$ftp )
+			return FTP::describe(f$conns[cid]$ftp);
+		}
+	return "";
+	}
+
 event bro_init() &priority=5
 	{
-	Files::register_protocol(Analyzer::ANALYZER_FTP_DATA, FTP::get_file_handle);
+	Files::register_protocol(Analyzer::ANALYZER_FTP_DATA,
+	                         [$get_file_handle = FTP::get_file_handle,
+	                          $describe        = FTP::describe_file]);
 	}
 
 
diff --git a/scripts/base/protocols/ftp/main.bro b/scripts/base/protocols/ftp/main.bro
index 7bf9d6cc4c..f525c7792b 100644
--- a/scripts/base/protocols/ftp/main.bro
+++ b/scripts/base/protocols/ftp/main.bro
@@ -63,8 +63,6 @@ export {
 		reply_code:       count       &log &optional;
 		## Reply message from the server in response to the command.
 		reply_msg:        string      &log &optional;
-		## Arbitrary tags that may indicate a particular attribute of this command.
-		tags:             set[string] &log;
 
 		## Expected FTP data channel.
 		data_channel:     ExpectedDataChannel &log &optional;
@@ -171,37 +169,22 @@ function set_ftp_session(c: connection)
 
 function ftp_message(s: Info)
 	{
-	# If it either has a tag associated with it (something detected)
-	# or it's a deliberately logged command.
-	if ( |s$tags| > 0 || (s?$cmdarg && s$cmdarg$cmd in logged_commands) )
+	s$ts=s$cmdarg$ts;
+	s$command=s$cmdarg$cmd;
+	s$arg=s$cmdarg$arg;
+	if ( s$arg == "" )
+		delete s$arg;
+
+	if ( s?$password &&
+	     ! s$capture_password &&
+	     to_lower(s$user) !in guest_ids )
 		{
-		if ( s?$password &&
-		     ! s$capture_password &&
-		     to_lower(s$user) !in guest_ids )
-			{
-			s$password = "<hidden>";
-			}
-
-		local arg = s$cmdarg$arg;
-		if ( s$cmdarg$cmd in file_cmds )
-			{
-			local comp_path = build_path_compressed(s$cwd, arg);
-			if ( comp_path[0] != "/" )
-				comp_path = cat("/", comp_path);
-
-			arg = fmt("ftp://%s%s", addr_to_uri(s$id$resp_h), comp_path);
-			}
-
-		s$ts=s$cmdarg$ts;
-		s$command=s$cmdarg$cmd;
-		if ( arg == "" )
-			delete s$arg;
-		else
-			s$arg=arg;
-
-		Log::write(FTP::LOG, s);
+		s$password = "<hidden>";
 		}
 
+	if ( s?$cmdarg && s$command in logged_commands)
+		Log::write(FTP::LOG, s);
+
 	# The MIME and file_size fields are specific to file transfer commands
 	# and may not be used in all commands so they need reset to "blank"
 	# values after logging.
@@ -209,8 +192,6 @@ function ftp_message(s: Info)
 	delete s$file_size;
 	# Same with data channel.
 	delete s$data_channel;
-	# Tags are cleared everytime too.
-	s$tags = set();
 	}
 
 function add_expected_data_channel(s: Info, chan: ExpectedDataChannel)
@@ -218,8 +199,9 @@ function add_expected_data_channel(s: Info, chan: ExpectedDataChannel)
 	s$passive = chan$passive;
 	s$data_channel = chan;
 	ftp_data_expected[chan$resp_h, chan$resp_p] = s;
-	Analyzer::schedule_analyzer(chan$orig_h, chan$resp_h, chan$resp_p, Analyzer::ANALYZER_FTP_DATA,
-				    5mins);
+	Analyzer::schedule_analyzer(chan$orig_h, chan$resp_h, chan$resp_p, 
+	                            Analyzer::ANALYZER_FTP_DATA,
+	                            5mins);
 	}
 
 event ftp_request(c: connection, command: string, arg: string) &priority=5
diff --git a/scripts/base/protocols/http/files.bro b/scripts/base/protocols/http/files.bro
index 09324b5f45..fd07dc096a 100644
--- a/scripts/base/protocols/http/files.bro
+++ b/scripts/base/protocols/http/files.bro
@@ -8,6 +8,9 @@ module HTTP;
 export {
 	## Default file handle provider for HTTP.
 	global get_file_handle: function(c: connection, is_orig: bool): string;
+
+	## Default file describer for HTTP.
+	global describe_file: function(f: fa_file): string;
 }
 
 function get_file_handle(c: connection, is_orig: bool): string
@@ -27,7 +30,23 @@ function get_file_handle(c: connection, is_orig: bool): string
 		}
 	}
 
+function describe_file(f: fa_file): string
+	{
+	# This shouldn't be needed, but just in case...
+	if ( f$source != "HTTP" )
+		return "";
+
+	for ( cid in f$conns )
+		{
+		if ( f$conns[cid]?$http )
+			return build_url_http(f$conns[cid]$http);
+		}
+	return "";
+	}
+
 event bro_init() &priority=5
 	{
-	Files::register_protocol(Analyzer::ANALYZER_HTTP, HTTP::get_file_handle);
+	Files::register_protocol(Analyzer::ANALYZER_HTTP,
+	                         [$get_file_handle = HTTP::get_file_handle,
+	                          $describe        = HTTP::describe_file]);
 	}
diff --git a/scripts/base/protocols/http/utils.bro b/scripts/base/protocols/http/utils.bro
index a74a2fe696..fe8c076780 100644
--- a/scripts/base/protocols/http/utils.bro
+++ b/scripts/base/protocols/http/utils.bro
@@ -32,6 +32,9 @@ export {
 	##
 	## Returns: A URL prefixed with "http://".
 	global build_url_http: function(rec: Info): string;
+
+	## Create an extremely shortened representation of a log line.
+	global describe: function(rec: Info): string;
 }
 
 
@@ -62,3 +65,8 @@ function build_url_http(rec: Info): string
 	{
 	return fmt("http://%s", build_url(rec));
 	}
+
+function describe(rec: Info): string
+	{
+	return build_url_http(rec);
+	}
diff --git a/scripts/base/protocols/irc/file-analysis.bro b/scripts/base/protocols/irc/file-analysis.bro
deleted file mode 100644
index f2e84fbc22..0000000000
--- a/scripts/base/protocols/irc/file-analysis.bro
+++ /dev/null
@@ -1,23 +0,0 @@
-@load ./dcc-send
-@load base/utils/conn-ids
-@load base/frameworks/files
-
-module IRC;
-
-export {
-	## Default file handle provider for IRC.
-	global get_file_handle: function(c: connection, is_orig: bool): string;
-}
-
-function get_file_handle(c: connection, is_orig: bool): string
-	{
-	if ( [c$id$resp_h, c$id$resp_p] !in dcc_expected_transfers ) 
-		return "";
-
-	return cat(ANALYZER_IRC_DATA, c$start_time, c$id, is_orig);
-	}
-
-event bro_init() &priority=5
-	{
-	Files::register_protocol(ANALYZER_IRC_DATA, IRC::get_file_handle);
-	}
diff --git a/scripts/base/protocols/irc/files.bro b/scripts/base/protocols/irc/files.bro
index 8708270bfd..a6321d3f2f 100644
--- a/scripts/base/protocols/irc/files.bro
+++ b/scripts/base/protocols/irc/files.bro
@@ -24,7 +24,8 @@ function get_file_handle(c: connection, is_orig: bool): string
 
 event bro_init() &priority=5
 	{
-	Files::register_protocol(Analyzer::ANALYZER_IRC_DATA, IRC::get_file_handle);
+	Files::register_protocol(Analyzer::ANALYZER_IRC_DATA,
+	                         [$get_file_handle = IRC::get_file_handle]);
 	}
 
 event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5
diff --git a/scripts/base/protocols/smtp/files.bro b/scripts/base/protocols/smtp/files.bro
index 1cf9ec01e1..f9ae2ab05f 100644
--- a/scripts/base/protocols/smtp/files.bro
+++ b/scripts/base/protocols/smtp/files.bro
@@ -14,6 +14,9 @@ export {
 
 	## Default file handle provider for SMTP.
 	global get_file_handle: function(c: connection, is_orig: bool): string;
+
+	## Default file describer for SMTP.
+	global describe_file: function(f: fa_file): string;
 }
 
 function get_file_handle(c: connection, is_orig: bool): string
@@ -22,9 +25,25 @@ function get_file_handle(c: connection, is_orig: bool): string
 	           c$smtp_state$mime_depth);
 	}
 
+function describe_file(f: fa_file): string
+	{
+	# This shouldn't be needed, but just in case...
+	if ( f$source != "SMTP" )
+		return "";
+
+	for ( cid in f$conns )
+		{
+		local c = f$conns[cid];
+		return SMTP::describe(c$smtp);
+		}
+	return "";
+	}
+
 event bro_init() &priority=5
 	{
-	Files::register_protocol(Analyzer::ANALYZER_SMTP, SMTP::get_file_handle);
+	Files::register_protocol(Analyzer::ANALYZER_SMTP, 
+	                         [$get_file_handle = SMTP::get_file_handle,
+	                          $describe        = SMTP::describe_file]);
 	}
 
 event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5
diff --git a/scripts/base/protocols/smtp/main.bro b/scripts/base/protocols/smtp/main.bro
index d53128b06c..702cb9fc0e 100644
--- a/scripts/base/protocols/smtp/main.bro
+++ b/scripts/base/protocols/smtp/main.bro
@@ -72,7 +72,10 @@ export {
 	##    ALL_HOSTS - always capture the entire path.
 	##    NO_HOSTS - never capture the path.
 	const mail_path_capture = ALL_HOSTS &redef;
-		
+	
+	## Create an extremely shortened representation of a log line.
+	global describe: function(rec: Info): string;
+
 	global log_smtp: event(rec: Info);
 }
 
@@ -268,3 +271,29 @@ event connection_state_remove(c: connection) &priority=-5
 	if ( c?$smtp )
 		smtp_message(c);
 	}
+
+function describe(rec: Info): string
+	{
+	if ( rec?$mailfrom && rec?$rcptto )
+		{
+		local one_to = "";
+		for ( to in rec$rcptto )
+			{
+			one_to = to;
+			break;
+			}
+		local abbrev_subject = "";
+		if ( rec?$subject )
+			{
+			if ( |rec$subject| > 20 )
+				{
+				abbrev_subject = rec$subject[0:20] + "...";
+				}
+			}
+
+		return fmt("%s -> %s%s%s", rec$mailfrom, one_to,
+			(|rec$rcptto|>1 ? fmt(" (plus %d others)", |rec$rcptto|-1) : ""),
+			(abbrev_subject != "" ? fmt(": %s", abbrev_subject) : ""));
+		}
+		return "";
+	}
\ No newline at end of file
diff --git a/src/analyzer/analyzer.bif b/src/analyzer/analyzer.bif
index 7f3cc6ed94..8b5a85956c 100644
--- a/src/analyzer/analyzer.bif
+++ b/src/analyzer/analyzer.bif
@@ -43,3 +43,8 @@ function __name%(atype: Analyzer::Tag%) : string
 	%{
 	return new StringVal(analyzer_mgr->GetAnalyzerName(atype));
 	%}
+
+function __tag%(name: string%) : Analyzer::Tag
+	%{
+	return new Val(analyzer_mgr->GetAnalyzerTag(name->CheckString()), TYPE_ENUM);
+	%}

From eb7ceb3e9ac4bc49f7a337bcc56046350aaa89d5 Mon Sep 17 00:00:00 2001
From: Seth Hall <seth@icir.org>
Date: Tue, 16 Jul 2013 12:07:33 -0400
Subject: [PATCH 072/118] Forgot a file.

---
 scripts/base/protocols/ftp/utils.bro | 47 ++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)
 create mode 100644 scripts/base/protocols/ftp/utils.bro

diff --git a/scripts/base/protocols/ftp/utils.bro b/scripts/base/protocols/ftp/utils.bro
new file mode 100644
index 0000000000..629b87e5a8
--- /dev/null
+++ b/scripts/base/protocols/ftp/utils.bro
@@ -0,0 +1,47 @@
+##! Utilities specific for FTP processing.
+
+@load ./main
+@load base/utils/addrs
+
+module FTP;
+
+export {
+	## Creates a URL from an :bro:type:`FTP::Info` record.
+	##
+	## rec: An :bro:type:`FTP::Info` record.
+	##
+	## Returns: A URL, not prefixed by "ftp://".
+	global build_url: function(rec: Info): string;
+	
+	## Creates a URL from an :bro:type:`FTP::Info` record.
+	##
+	## rec: An :bro:type:`FTP::Info` record.
+	##
+	## Returns: A URL prefixed with "ftp://".
+	global build_url_ftp: function(rec: Info): string;
+
+	## Create an extremely shortened representation of a log line.
+	global describe: function(rec: Info): string;
+}
+
+function build_url(rec: Info): string
+	{
+	if ( !rec?$arg )
+		return "";
+
+	local comp_path = build_path_compressed(rec$cwd, rec$arg);
+	if ( comp_path[0] != "/" )
+		comp_path = cat("/", comp_path);
+
+	return fmt("%s%s", addr_to_uri(rec$id$resp_h), comp_path);
+	}
+	
+function build_url_ftp(rec: Info): string
+	{
+	return fmt("ftp://%s", build_url(rec));
+	}
+
+function describe(rec: Info): string
+	{
+	return build_url_ftp(rec);
+	}
\ No newline at end of file

From 57b05a2989d32e87147686b39480240e5162e405 Mon Sep 17 00:00:00 2001
From: Robin Sommer <robin@icir.org>
Date: Wed, 17 Jul 2013 17:30:35 -0700
Subject: [PATCH 073/118] Small raw reader tweaks that I forgot to commit
 earlier.

---
 src/input/readers/Raw.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/input/readers/Raw.cc b/src/input/readers/Raw.cc
index 98f1dfcab6..2820923a25 100644
--- a/src/input/readers/Raw.cc
+++ b/src/input/readers/Raw.cc
@@ -55,7 +55,7 @@ void Raw::DoClose()
 	if ( file != 0 )
 		CloseInput();
 
-	if ( buf != 0 ) 
+	if ( buf != 0 )
 		{
 		// we still have output that has not been flushed. Throw away.
 		delete buf;
@@ -169,8 +169,8 @@ bool Raw::OpenInput()
 			Error(Fmt("Init: cannot open %s", fname.c_str()));
 			return false;
 			}
-		}
 		fcntl(fileno(file),  F_SETFD, FD_CLOEXEC);
+		}
 
 	return true;
 	}
@@ -468,7 +468,7 @@ bool Raw::DoUpdate()
 		if ( length == -3 )
 			return false;
 
-		else if ( length == -2 || length == -1 ) 
+		else if ( length == -2 || length == -1 )
 			// no data ready or eof
 			break;
 

From d8801bb9c4bc8c898a6c0b51ddb7a647076237bc Mon Sep 17 00:00:00 2001
From: Robin Sommer <robin@icir.org>
Date: Wed, 17 Jul 2013 17:31:16 -0700
Subject: [PATCH 074/118] Canonifying internal order for plugins and their
 components to make it deterministic.

---
 aux/btest                                     |  2 +-
 src/analyzer/Component.h                      |  2 +-
 src/file_analysis/Component.h                 |  2 +-
 src/plugin/Component.h                        |  6 +++++
 src/plugin/Manager.cc                         |  9 +++++++
 src/plugin/Plugin.cc                          |  9 +++++++
 .../Baseline/core.print-bpf-filters/conn.log  |  4 ++--
 .../Baseline/core.print-bpf-filters/output    | 24 +++++++++----------
 8 files changed, 41 insertions(+), 17 deletions(-)

diff --git a/aux/btest b/aux/btest
index c2e73c9e1e..b1d4faf239 160000
--- a/aux/btest
+++ b/aux/btest
@@ -1 +1 @@
-Subproject commit c2e73c9e1efed6bfdf2d977d716c97773c39492e
+Subproject commit b1d4faf23900d4753e93a68abbba45ae3bf96d03
diff --git a/src/analyzer/Component.h b/src/analyzer/Component.h
index f3d91c7f90..9e12ed347e 100644
--- a/src/analyzer/Component.h
+++ b/src/analyzer/Component.h
@@ -72,7 +72,7 @@ public:
 	 * from what's passed to the constructor but upper-cased and
 	 * canonified to allow being part of a script-level ID.
 	 */
-	const char* Name() const	{ return name; }
+	virtual const char* Name() const	{ return name; }
 
 	/**
 	 * Returns a canonocalized version of the analyzer's name.  The
diff --git a/src/file_analysis/Component.h b/src/file_analysis/Component.h
index 8b79436991..3cdc69efdf 100644
--- a/src/file_analysis/Component.h
+++ b/src/file_analysis/Component.h
@@ -64,7 +64,7 @@ public:
 	 * from what's passed to the constructor but upper-cased and
 	 * canonified to allow being part of a script-level ID.
 	 */
-	const char* Name() const	{ return name; }
+	virtual const char* Name() const	{ return name; }
 
 	/**
 	 * Returns a canonocalized version of the analyzer's name.  The
diff --git a/src/plugin/Component.h b/src/plugin/Component.h
index 4ac448e466..ad02dc7e4b 100644
--- a/src/plugin/Component.h
+++ b/src/plugin/Component.h
@@ -45,6 +45,12 @@ public:
 	 */
 	component::Type Type() const;
 
+	/**
+	 * Returns a descriptive name for the analyzer. This name must be
+	 * unique across all components of the same type.
+	 */
+	virtual const char* Name() const = 0;
+
 	/**
 	 * Returns a textual representation of the component. The default
 	 * version just output the type. Derived version should call the
diff --git a/src/plugin/Manager.cc b/src/plugin/Manager.cc
index 93ed3f2b97..67f4dea2bd 100644
--- a/src/plugin/Manager.cc
+++ b/src/plugin/Manager.cc
@@ -30,9 +30,18 @@ bool Manager::LoadPluginsFrom(const std::string& dir)
 	return false;
 	}
 
+static bool plugin_cmp(const Plugin* a, const Plugin* b)
+	{
+	return a->Name() < b->Name();
+	}
+
 bool Manager::RegisterPlugin(Plugin *plugin)
 	{
 	Manager::PluginsInternal()->push_back(plugin);
+
+	// Sort plugins by name to make sure we have a deterministic order.
+	PluginsInternal()->sort(plugin_cmp);
+
 	return true;
 	}
 
diff --git a/src/plugin/Plugin.cc b/src/plugin/Plugin.cc
index 084c49f51e..eaac8a3b25 100644
--- a/src/plugin/Plugin.cc
+++ b/src/plugin/Plugin.cc
@@ -156,9 +156,18 @@ Plugin::component_list Plugin::Components() const
 	return components;
 	}
 
+static bool component_cmp(const Component* a, const Component* b)
+	{
+	return a->Name() < b->Name();
+	}
+
 void Plugin::AddComponent(Component* c)
 	{
 	components.push_back(c);
+
+	// Sort components by name to make sure we have a deterministic
+	// order.
+	components.sort(component_cmp);
 	}
 
 void Plugin::AddBifInitFunction(bif_init_func c)
diff --git a/testing/btest/Baseline/core.print-bpf-filters/conn.log b/testing/btest/Baseline/core.print-bpf-filters/conn.log
index 745673c027..166286203e 100644
--- a/testing/btest/Baseline/core.print-bpf-filters/conn.log
+++ b/testing/btest/Baseline/core.print-bpf-filters/conn.log
@@ -3,8 +3,8 @@
 #empty_field	(empty)
 #unset_field	-
 #path	conn
-#open	2013-07-08-20-05-18
+#open	2013-07-18-00-18-33
 #fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	proto	service	duration	orig_bytes	resp_bytes	conn_state	local_orig	missed_bytes	history	orig_pkts	orig_ip_bytes	resp_pkts	resp_ip_bytes	tunnel_parents
 #types	time	string	addr	port	addr	port	enum	string	interval	count	count	string	bool	count	string	count	count	count	count	table[string]
 1278600802.069419	UWkUyAuUGXf	10.20.80.1	50343	10.0.0.15	80	tcp	-	0.004152	9	3429	SF	-	0	ShADadfF	7	381	7	3801	(empty)
-#close	2013-07-08-20-05-18
+#close	2013-07-18-00-18-33
diff --git a/testing/btest/Baseline/core.print-bpf-filters/output b/testing/btest/Baseline/core.print-bpf-filters/output
index 8ccc04b1a7..871719bba8 100644
--- a/testing/btest/Baseline/core.print-bpf-filters/output
+++ b/testing/btest/Baseline/core.print-bpf-filters/output
@@ -3,38 +3,38 @@
 #empty_field	(empty)
 #unset_field	-
 #path	packet_filter
-#open	2013-07-08-20-05-17
+#open	2013-07-18-00-18-33
 #fields	ts	node	filter	init	success
 #types	time	string	string	bool	bool
-1373313917.926565	-	ip or not ip	T	T
-#close	2013-07-08-20-05-17
+1374106713.105591	-	ip or not ip	T	T
+#close	2013-07-18-00-18-33
 #separator \x09
 #set_separator	,
 #empty_field	(empty)
 #unset_field	-
 #path	packet_filter
-#open	2013-07-08-20-05-18
+#open	2013-07-18-00-18-33
 #fields	ts	node	filter	init	success
 #types	time	string	string	bool	bool
-1373313918.205206	-	port 42	T	T
-#close	2013-07-08-20-05-18
+1374106713.385541	-	port 42	T	T
+#close	2013-07-18-00-18-33
 #separator \x09
 #set_separator	,
 #empty_field	(empty)
 #unset_field	-
 #path	packet_filter
-#open	2013-07-08-20-05-18
+#open	2013-07-18-00-18-33
 #fields	ts	node	filter	init	success
 #types	time	string	string	bool	bool
-1373313918.491383	-	(vlan) and (ip or not ip)	T	T
-#close	2013-07-08-20-05-18
+1374106713.664282	-	(vlan) and (ip or not ip)	T	T
+#close	2013-07-18-00-18-33
 #separator \x09
 #set_separator	,
 #empty_field	(empty)
 #unset_field	-
 #path	packet_filter
-#open	2013-07-08-20-05-18
+#open	2013-07-18-00-18-33
 #fields	ts	node	filter	init	success
 #types	time	string	string	bool	bool
-1373313918.795264	-	((((((((((((udp and port 3544) or (udp and port 514)) or ((tcp and port 2811) or (tcp and port 21))) or (tcp and port 502)) or ((((tcp and port 6669) or (tcp and port 6666)) or (tcp and port 6668)) or (tcp and port 6667))) or (tcp and port 1080)) or ((udp and port 2152) or (udp and port 2123))) or ((((((((tcp and port 631) or (tcp and port 8888)) or (tcp and port 3128)) or (tcp and port 80)) or (tcp and port 1080)) or (tcp and port 8000)) or (tcp and port 81)) or (tcp and port 8080))) or (udp and port 5072)) or ((tcp and port 25) or (tcp and port 587))) or (((((((((((tcp and port 5223) or (tcp and port 585)) or (tcp and port 614)) or (tcp and port 993)) or (tcp and port 636)) or (tcp and port 989)) or (tcp and port 995)) or (tcp and port 443)) or (tcp and port 563)) or (tcp and port 990)) or (tcp and port 992))) or (((((udp and port 5355) or (tcp and port 53)) or (udp and port 5353)) or (udp and port 137)) or (udp and port 53))) or (tcp and port 22)	T	T
-#close	2013-07-08-20-05-18
+1374106713.957005	-	((((((((((((((((((((((tcp and port 5223) or (tcp and port 585)) or (tcp and port 614)) or (tcp and port 993)) or (tcp and port 636)) or (tcp and port 989)) or (tcp and port 995)) or (tcp and port 443)) or (tcp and port 563)) or (tcp and port 990)) or (tcp and port 992)) or ((tcp and port 2811) or (tcp and port 21))) or ((((tcp and port 6669) or (tcp and port 6666)) or (tcp and port 6668)) or (tcp and port 6667))) or ((udp and port 2152) or (udp and port 2123))) or (tcp and port 22)) or (tcp and port 1080)) or ((((((((tcp and port 631) or (tcp and port 8888)) or (tcp and port 3128)) or (tcp and port 80)) or (tcp and port 1080)) or (tcp and port 8000)) or (tcp and port 81)) or (tcp and port 8080))) or (udp and port 5072)) or ((tcp and port 25) or (tcp and port 587))) or (tcp and port 502)) or (udp and port 514)) or (((((udp and port 5355) or (tcp and port 53)) or (udp and port 5353)) or (udp and port 137)) or (udp and port 53))) or (udp and port 3544)	T	T
+#close	2013-07-18-00-18-33

From efd343af8d0122975536308b7a98689f6def42d1 Mon Sep 17 00:00:00 2001
From: Robin Sommer <robin@icir.org>
Date: Wed, 17 Jul 2013 21:55:36 -0700
Subject: [PATCH 075/118] Extending external canonifier to remove fractional
 values from capture_loss.log.

---
 testing/scripts/diff-canonifier-external | 9 ++++++++-
 testing/scripts/diff-remove-fractions    | 6 ++++++
 2 files changed, 14 insertions(+), 1 deletion(-)
 create mode 100755 testing/scripts/diff-remove-fractions

diff --git a/testing/scripts/diff-canonifier-external b/testing/scripts/diff-canonifier-external
index f4356154e4..37a51fa72f 100755
--- a/testing/scripts/diff-canonifier-external
+++ b/testing/scripts/diff-canonifier-external
@@ -2,10 +2,17 @@
 #
 # Default canonifier used with the trace-based tests in testing/external/*.
 
+addl="cat"
+
+if [ "$1" == "capture_loss.log" ]; then
+    addl="`dirname $0`/diff-remove-fractions"
+fi
+
 `dirname $0`/diff-remove-timestamps \
     | `dirname $0`/diff-remove-uids \
     | `dirname $0`/diff-remove-file-ids \
     | `dirname $0`/diff-remove-x509-names \
     | `dirname $0`/diff-canon-notice-policy \
-    | `dirname $0`/diff-sort
+    | `dirname $0`/diff-sort \
+    | eval $addl
 
diff --git a/testing/scripts/diff-remove-fractions b/testing/scripts/diff-remove-fractions
new file mode 100755
index 0000000000..975157913c
--- /dev/null
+++ b/testing/scripts/diff-remove-fractions
@@ -0,0 +1,6 @@
+#! /usr/bin/env bash
+#
+# Replace fractions of double value (i.e., 3.14 -> 3.x).
+
+sed 's/\.[0-9]\{1,\}/.X/g'
+

From c373f93c4f8922c31e8676ba7aa139e594b41bcf Mon Sep 17 00:00:00 2001
From: Robin Sommer <robin@icir.org>
Date: Wed, 17 Jul 2013 21:57:25 -0700
Subject: [PATCH 076/118] Updating submodule(s).

 [nomail]
---
 aux/btest | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aux/btest b/aux/btest
index b1d4faf239..ce366206e3 160000
--- a/aux/btest
+++ b/aux/btest
@@ -1 +1 @@
-Subproject commit b1d4faf23900d4753e93a68abbba45ae3bf96d03
+Subproject commit ce366206e3407e534a786ad572c342e9f9fef26b

From 1e32100fed2eac8639453739c376dc070befc9c0 Mon Sep 17 00:00:00 2001
From: Seth Hall <seth@icir.org>
Date: Thu, 18 Jul 2013 09:24:22 -0400
Subject: [PATCH 077/118] Fixing a dns reporter message in master.

---
 scripts/base/protocols/dns/main.bro              |   5 +++++
 .../dns.log                                      |  11 +++++++++++
 .../weird.log                                    |  11 +++++++++++
 testing/btest/Traces/dns-two-responses.trace     | Bin 0 -> 1006 bytes
 .../base/protocols/dns/duplicate-reponses.bro    |   5 +++++
 5 files changed, 32 insertions(+)
 create mode 100644 testing/btest/Baseline/scripts.base.protocols.dns.duplicate-reponses/dns.log
 create mode 100644 testing/btest/Baseline/scripts.base.protocols.dns.duplicate-reponses/weird.log
 create mode 100644 testing/btest/Traces/dns-two-responses.trace
 create mode 100644 testing/btest/scripts/base/protocols/dns/duplicate-reponses.bro

diff --git a/scripts/base/protocols/dns/main.bro b/scripts/base/protocols/dns/main.bro
index ea3ec016de..bf47519cd8 100644
--- a/scripts/base/protocols/dns/main.bro
+++ b/scripts/base/protocols/dns/main.bro
@@ -207,6 +207,11 @@ event DNS::do_reply(c: connection, msg: dns_msg, ans: dns_answer, reply: string)
 	{
 	if ( ans$answer_type == DNS_ANS )
 		{
+		if ( ! c?$dns )
+			{
+			event conn_weird("dns_unmatched_reply", c, "");
+			hook set_session(c, msg, F);
+			}
 		c$dns$AA    = msg$AA;
 		c$dns$RA    = msg$RA;
 
diff --git a/testing/btest/Baseline/scripts.base.protocols.dns.duplicate-reponses/dns.log b/testing/btest/Baseline/scripts.base.protocols.dns.duplicate-reponses/dns.log
new file mode 100644
index 0000000000..ca071ee8ef
--- /dev/null
+++ b/testing/btest/Baseline/scripts.base.protocols.dns.duplicate-reponses/dns.log
@@ -0,0 +1,11 @@
+#separator \x09
+#set_separator	,
+#empty_field	(empty)
+#unset_field	-
+#path	dns
+#open	2013-07-18-13-21-52
+#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	proto	trans_id	query	qclass	qclass_name	qtype	qtype_name	rcode	rcode_name	AA	TC	RD	RA	Z	answers	TTLs	rejected
+#types	time	string	addr	port	addr	port	enum	count	string	count	string	count	string	count	string	bool	bool	bool	bool	count	vector[string]	vector[interval]	bool
+1363716396.798072	UWkUyAuUGXf	55.247.223.174	27285	222.195.43.124	53	udp	21140	www.cmu.edu	1	C_INTERNET	1	A	0	NOERROR	T	F	F	F	1	www-cmu.andrew.cmu.edu,www-cmu-2.andrew.cmu.edu,128.2.10.163,www-cmu.andrew.cmu.edu	86400.000000,5.000000,21600.000000,86400.000000	F
+1363716396.798374	UWkUyAuUGXf	55.247.223.174	27285	222.195.43.124	53	udp	21140	-	-	-	-	-	0	NOERROR	T	F	F	F	0	www-cmu-2.andrew.cmu.edu,128.2.10.163	5.000000,21600.000000	F
+#close	2013-07-18-13-21-52
diff --git a/testing/btest/Baseline/scripts.base.protocols.dns.duplicate-reponses/weird.log b/testing/btest/Baseline/scripts.base.protocols.dns.duplicate-reponses/weird.log
new file mode 100644
index 0000000000..c7de92f894
--- /dev/null
+++ b/testing/btest/Baseline/scripts.base.protocols.dns.duplicate-reponses/weird.log
@@ -0,0 +1,11 @@
+#separator \x09
+#set_separator	,
+#empty_field	(empty)
+#unset_field	-
+#path	weird
+#open	2013-07-18-13-21-52
+#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	name	addl	notice	peer
+#types	time	string	addr	port	addr	port	string	string	bool	string
+1363716396.798286	UWkUyAuUGXf	55.247.223.174	27285	222.195.43.124	53	DNS_RR_unknown_type	-	F	bro
+1363716396.798374	UWkUyAuUGXf	55.247.223.174	27285	222.195.43.124	53	dns_unmatched_reply	-	F	bro
+#close	2013-07-18-13-21-52
diff --git a/testing/btest/Traces/dns-two-responses.trace b/testing/btest/Traces/dns-two-responses.trace
new file mode 100644
index 0000000000000000000000000000000000000000..627b0d2ebe091fcec6ffbbcbe8c96019235d718b
GIT binary patch
literal 1006
zcmca|c+)~A1{MYw`2U}Qff2~jS?Lj2q07S%1Z0CSgWArB$9EYdN*v}ea4@(sFt{8u
zVPG&6T-a~^{r<XphqY_6rZSi^7+B>7O<@oK8v!(kxxBobIXSnKIW?seWG>KTO#z@`
z4L}UC!B3BeVHHpigdt`_>;c)pI7dvGfx$u0(Gz4k$QB0Etf`D0FD3*{X#v^5!T@wQ
zh7AXJ7+66z1vW4UvWJI<>pJ^}vN`&>1i6ME5CBT*K_zE1u(C6Qlm*6YiVifDsFGHO
zyEMB*YExpCz3~R!=||*l#~<*%XH>z~zW2P!4u#%zr2&tl#auLguaxB}dLF)<_q4?%
z-vV8hr4whT+p@C7sj%&|o_X5qrt6X1P%e$Pdk&s{cH8jyj#RD`jWxfv<R6_;yESpS
zNT-zm^OKa1p}clBCO({2CZ+H1UcY)tJSU0SluK{b0Zp(U7=R=XC)5wRMh6TJyahR)
zfgvP;fu(_oYw-a?U=Tq3%E#^(>=+v2VaV*~8gd{4Nrnq5V{}mVpeR^BC~ScG899J_
i8K`~%HTK^cz({}rmV^38YTr&-ffjSiMq_RW#vA~4#T8xv

literal 0
HcmV?d00001

diff --git a/testing/btest/scripts/base/protocols/dns/duplicate-reponses.bro b/testing/btest/scripts/base/protocols/dns/duplicate-reponses.bro
new file mode 100644
index 0000000000..a16235b9a5
--- /dev/null
+++ b/testing/btest/scripts/base/protocols/dns/duplicate-reponses.bro
@@ -0,0 +1,5 @@
+# This tests the case where the DNS server responded with zero RRs.
+#
+# @TEST-EXEC: bro -r $TRACES/dns-two-responses.trace
+# @TEST-EXEC: btest-diff dns.log
+# @TEST-EXEC: btest-diff weird.log
\ No newline at end of file

From 006e370ee04775c6196b52368c93897402115992 Mon Sep 17 00:00:00 2001
From: Robin Sommer <robin@icir.org>
Date: Thu, 18 Jul 2013 19:58:19 -0700
Subject: [PATCH 078/118] Canonyfying the output of core.print-bpf-filters.

I couldn't figure out why it's not stable but it doesn't seem to
matter for now unless more such situations show up.
---
 .../Baseline/core.print-bpf-filters/output    | 28 ++++--------
 .../Baseline/core.print-bpf-filters/output2   | 43 +++++++++++++++++++
 testing/btest/core/print-bpf-filters.bro      |  9 +++-
 3 files changed, 59 insertions(+), 21 deletions(-)
 create mode 100644 testing/btest/Baseline/core.print-bpf-filters/output2

diff --git a/testing/btest/Baseline/core.print-bpf-filters/output b/testing/btest/Baseline/core.print-bpf-filters/output
index 871719bba8..2f7a1d9386 100644
--- a/testing/btest/Baseline/core.print-bpf-filters/output
+++ b/testing/btest/Baseline/core.print-bpf-filters/output
@@ -3,38 +3,28 @@
 #empty_field	(empty)
 #unset_field	-
 #path	packet_filter
-#open	2013-07-18-00-18-33
+#open	2013-07-19-02-54-13
 #fields	ts	node	filter	init	success
 #types	time	string	string	bool	bool
-1374106713.105591	-	ip or not ip	T	T
-#close	2013-07-18-00-18-33
+1374202453.158981	-	ip or not ip	T	T
+#close	2013-07-19-02-54-13
 #separator \x09
 #set_separator	,
 #empty_field	(empty)
 #unset_field	-
 #path	packet_filter
-#open	2013-07-18-00-18-33
+#open	2013-07-19-02-54-13
 #fields	ts	node	filter	init	success
 #types	time	string	string	bool	bool
-1374106713.385541	-	port 42	T	T
-#close	2013-07-18-00-18-33
+1374202453.437816	-	port 42	T	T
+#close	2013-07-19-02-54-13
 #separator \x09
 #set_separator	,
 #empty_field	(empty)
 #unset_field	-
 #path	packet_filter
-#open	2013-07-18-00-18-33
+#open	2013-07-19-02-54-13
 #fields	ts	node	filter	init	success
 #types	time	string	string	bool	bool
-1374106713.664282	-	(vlan) and (ip or not ip)	T	T
-#close	2013-07-18-00-18-33
-#separator \x09
-#set_separator	,
-#empty_field	(empty)
-#unset_field	-
-#path	packet_filter
-#open	2013-07-18-00-18-33
-#fields	ts	node	filter	init	success
-#types	time	string	string	bool	bool
-1374106713.957005	-	((((((((((((((((((((((tcp and port 5223) or (tcp and port 585)) or (tcp and port 614)) or (tcp and port 993)) or (tcp and port 636)) or (tcp and port 989)) or (tcp and port 995)) or (tcp and port 443)) or (tcp and port 563)) or (tcp and port 990)) or (tcp and port 992)) or ((tcp and port 2811) or (tcp and port 21))) or ((((tcp and port 6669) or (tcp and port 6666)) or (tcp and port 6668)) or (tcp and port 6667))) or ((udp and port 2152) or (udp and port 2123))) or (tcp and port 22)) or (tcp and port 1080)) or ((((((((tcp and port 631) or (tcp and port 8888)) or (tcp and port 3128)) or (tcp and port 80)) or (tcp and port 1080)) or (tcp and port 8000)) or (tcp and port 81)) or (tcp and port 8080))) or (udp and port 5072)) or ((tcp and port 25) or (tcp and port 587))) or (tcp and port 502)) or (udp and port 514)) or (((((udp and port 5355) or (tcp and port 53)) or (udp and port 5353)) or (udp and port 137)) or (udp and port 53))) or (udp and port 3544)	T	T
-#close	2013-07-18-00-18-33
+1374202453.715717	-	(vlan) and (ip or not ip)	T	T
+#close	2013-07-19-02-54-13
diff --git a/testing/btest/Baseline/core.print-bpf-filters/output2 b/testing/btest/Baseline/core.print-bpf-filters/output2
new file mode 100644
index 0000000000..460b02e055
--- /dev/null
+++ b/testing/btest/Baseline/core.print-bpf-filters/output2
@@ -0,0 +1,43 @@
+      2 1080
+      1 137
+      1 21
+      1 2123
+      1 2152
+      1 22
+      1 25
+      1 2811
+      1 3128
+      1 3544
+      1 443
+      1 502
+      1 5072
+      1 514
+      1 5223
+      2 53
+      1 5353
+      1 5355
+      1 563
+      1 585
+      1 587
+      1 614
+      1 631
+      1 636
+      1 6666
+      1 6667
+      1 6668
+      1 6669
+      1 80
+      1 8000
+      1 8080
+      1 81
+      1 8888
+      1 989
+      1 990
+      1 992
+      1 993
+      1 995
+     40 and
+     39 or
+     40 port
+     31 tcp
+      9 udp
diff --git a/testing/btest/core/print-bpf-filters.bro b/testing/btest/core/print-bpf-filters.bro
index 2c3d761cca..410db14b5d 100644
--- a/testing/btest/core/print-bpf-filters.bro
+++ b/testing/btest/core/print-bpf-filters.bro
@@ -4,7 +4,12 @@
 # @TEST-EXEC: cat packet_filter.log >>output
 # @TEST-EXEC: bro -r $TRACES/mixed-vlan-mpls.trace PacketFilter::restricted_filter="vlan" >>output
 # @TEST-EXEC: cat packet_filter.log >>output
-# @TEST-EXEC: bro -r $TRACES/empty.trace PacketFilter::enable_auto_protocol_capture_filters=T >>output
-# @TEST-EXEC: cat packet_filter.log >>output
 # @TEST-EXEC: btest-diff output
 # @TEST-EXEC: btest-diff conn.log
+# 
+# The order in the output of enable_auto_protocol_capture_filters isn't
+# stable, for reasons not clear. We canonify it first.
+# @TEST-EXEC: bro -r $TRACES/empty.trace PacketFilter::enable_auto_protocol_capture_filters=T
+# @TEST-EXEC: cat packet_filter.log | bro-cut filter | sed 's#[()]##g' | tr ' ' '\n' | sort | uniq -c >output2
+# @TEST-EXEC: btest-diff output2
+

From d3495207453aa5f10edef51699606856e9829987 Mon Sep 17 00:00:00 2001
From: Robin Sommer <robin@icir.org>
Date: Thu, 18 Jul 2013 21:34:02 -0700
Subject: [PATCH 079/118] Another test fix.

The classic "uniq -c" is not portable ...
---
 .../Baseline/core.print-bpf-filters/output2   | 86 +++++++++----------
 testing/btest/core/print-bpf-filters.bro      |  2 +-
 2 files changed, 44 insertions(+), 44 deletions(-)

diff --git a/testing/btest/Baseline/core.print-bpf-filters/output2 b/testing/btest/Baseline/core.print-bpf-filters/output2
index 460b02e055..99ad929fbf 100644
--- a/testing/btest/Baseline/core.print-bpf-filters/output2
+++ b/testing/btest/Baseline/core.print-bpf-filters/output2
@@ -1,43 +1,43 @@
-      2 1080
-      1 137
-      1 21
-      1 2123
-      1 2152
-      1 22
-      1 25
-      1 2811
-      1 3128
-      1 3544
-      1 443
-      1 502
-      1 5072
-      1 514
-      1 5223
-      2 53
-      1 5353
-      1 5355
-      1 563
-      1 585
-      1 587
-      1 614
-      1 631
-      1 636
-      1 6666
-      1 6667
-      1 6668
-      1 6669
-      1 80
-      1 8000
-      1 8080
-      1 81
-      1 8888
-      1 989
-      1 990
-      1 992
-      1 993
-      1 995
-     40 and
-     39 or
-     40 port
-     31 tcp
-      9 udp
+2 1080
+1 137
+1 21
+1 2123
+1 2152
+1 22
+1 25
+1 2811
+1 3128
+1 3544
+1 443
+1 502
+1 5072
+1 514
+1 5223
+2 53
+1 5353
+1 5355
+1 563
+1 585
+1 587
+1 614
+1 631
+1 636
+1 6666
+1 6667
+1 6668
+1 6669
+1 80
+1 8000
+1 8080
+1 81
+1 8888
+1 989
+1 990
+1 992
+1 993
+1 995
+40 and
+39 or
+40 port
+31 tcp
+9 udp
diff --git a/testing/btest/core/print-bpf-filters.bro b/testing/btest/core/print-bpf-filters.bro
index 410db14b5d..6e4a4d5c30 100644
--- a/testing/btest/core/print-bpf-filters.bro
+++ b/testing/btest/core/print-bpf-filters.bro
@@ -10,6 +10,6 @@
 # The order in the output of enable_auto_protocol_capture_filters isn't
 # stable, for reasons not clear. We canonify it first.
 # @TEST-EXEC: bro -r $TRACES/empty.trace PacketFilter::enable_auto_protocol_capture_filters=T
-# @TEST-EXEC: cat packet_filter.log | bro-cut filter | sed 's#[()]##g' | tr ' ' '\n' | sort | uniq -c >output2
+# @TEST-EXEC: cat packet_filter.log | bro-cut filter | sed 's#[()]##g' | tr ' ' '\n' | sort | uniq -c | awk '{print $1, $2}' >output2
 # @TEST-EXEC: btest-diff output2
 

From 9b444b2617c0a910a24ea938a3064eb092f26537 Mon Sep 17 00:00:00 2001
From: Seth Hall <seth@icir.org>
Date: Fri, 19 Jul 2013 13:16:12 -0400
Subject: [PATCH 080/118] Updates for the Intel Framework.

 - Intel importing format has changed (refer to docs).

 - All string matching is now case insensitive.

 - SMTP intel script has been updated to extract email
   addresses correctly.

 - Small fix sneaking into the smtp base script to actually
   extract individual email addresses in the To: field
   correctly.
---
 doc/intel.rst                                 |  20 +--
 scripts/base/frameworks/intel/main.bro        | 141 ++++++++----------
 scripts/base/protocols/smtp/main.bro          |   5 +-
 .../frameworks/intel/conn-established.bro     |   8 +-
 scripts/policy/frameworks/intel/dns.bro       |   4 +-
 .../frameworks/intel/http-host-header.bro     |   4 +-
 scripts/policy/frameworks/intel/http-url.bro  |   4 +-
 .../frameworks/intel/http-user-agents.bro     |   4 +-
 .../frameworks/intel/smtp-url-extraction.bro  |   4 +-
 scripts/policy/frameworks/intel/smtp.bro      |  70 ++++++---
 scripts/policy/frameworks/intel/ssl.bro       |  12 +-
 .../manager-1.intel.log                       |  10 +-
 .../broproc.intel.log                         |  12 +-
 .../manager-1.intel.log                       |  16 +-
 .../frameworks/intel/cluster-transparency.bro |   8 +-
 .../base/frameworks/intel/input-and-match.bro |  12 +-
 .../intel/read-file-dist-cluster.bro          |  10 +-
 17 files changed, 178 insertions(+), 166 deletions(-)

diff --git a/doc/intel.rst b/doc/intel.rst
index 390313461a..2a59a98974 100644
--- a/doc/intel.rst
+++ b/doc/intel.rst
@@ -29,9 +29,6 @@ Framework to be checked by loading this script in local.bro::
 
 	@load policy/frameworks/intel
 
-(TODO: find some good mechanism for getting setup with good data
-quickly)
-
 Refer to the "Loading Intelligence" section below to see the format
 for Intelligence Framework text files, then load those text files with
 this line in local.bro::
@@ -61,16 +58,14 @@ data out to all of the nodes that need it.
 
 Here is an example of the intelligence data format.  Note that all
 whitespace separators are literal tabs and fields containing only a
-hyphen a considered to be null values.::
+hyphen are considered to be null values.::
 
-	#fields	host	net	str	str_type	meta.source	meta.desc	meta.url
-	1.2.3.4	-	-	-	source1	Sending phishing email	http://source1.com/badhosts/1.2.3.4
-	-	31.131.248.0/21	-	-	spamhaus-drop	SBL154982	-	-
-	-	-	a.b.com	Intel::DOMAIN	source2	Name used for data exfiltration	-
+	#fields	indicator	indicator_type	meta.source	meta.desc	meta.url
+	1.2.3.4	Intel::ADDR	source1	Sending phishing email	http://source1.com/badhosts/1.2.3.4
+	a.b.com	Intel::DOMAIN	source2	Name used for data exfiltration	-
 
-For more examples of built in `str_type` values, please refer to the
-autogenerated documentation for the intelligence framework (TODO:
-figure out how to do this link).
+For more examples of built in `indicator_type` values, please refer to the
+autogenerated documentation for the intelligence framework.
 
 To load the data once files are created, use the following example
 code to define files to load with your own file names of course::
@@ -90,8 +85,7 @@ When some bit of data is extracted (such as an email address in the
 "From" header in a message over SMTP), the Intelligence Framework
 needs to be informed that this data was discovered and it's presence
 should be checked within the intelligence data set.  This is
-accomplished through the Intel::seen (TODO: do a reference link)
-function.
+accomplished through the Intel::seen function.
 
 Typically users won't need to work with this function due to built in
 hook scripts that Bro ships with that will "see" data and send it into
diff --git a/scripts/base/frameworks/intel/main.bro b/scripts/base/frameworks/intel/main.bro
index aeb7bf4bfc..1b740f538d 100644
--- a/scripts/base/frameworks/intel/main.bro
+++ b/scripts/base/frameworks/intel/main.bro
@@ -10,13 +10,14 @@ module Intel;
 export {
 	redef enum Log::ID += { LOG };
 	
-	## String data needs to be further categoried since it could represent
-	## and number of types of data.
-	type StrType: enum {
+	## Enum type to represent various types of intelligence data.
+	type Type: enum {
+		## An IP address.
+		ADDR,
 		## A complete URL without the prefix "http://".
 		URL,
-		## User-Agent string, typically HTTP or mail message body.
-		USER_AGENT,
+		## Software name.
+		SOFTWARE,
 		## Email address.
 		EMAIL,
 		## DNS domain name.
@@ -44,18 +45,15 @@ export {
 	
 	## Represents a piece of intelligence.
 	type Item: record {
-		## The IP address if the intelligence is about an IP address.
-		host:        addr           &optional;
-		## The network if the intelligence is about a CIDR block.
-		net:         subnet         &optional;
-		## The string if the intelligence is about a string.
-		str:         string         &optional;
-		## The type of data that is in the string if the $str field is set.
-		str_type:    StrType        &optional;
+		## The intelligence indicator.
+		indicator:      string;
+
+		## The type of data that the indicator field represents.
+		indicator_type: Type;
 		
-		## Metadata for the item.  Typically represents more deeply \
+		## Metadata for the item.  Typically represents more deeply
 		## descriptive data for a piece of intelligence.
-		meta:        MetaData;
+		meta:           MetaData;
 	};
 	
 	## Enum to represent where data came from when it was discovered.
@@ -69,19 +67,22 @@ export {
 	## exclusive.  These records *must* represent either an IP address being
 	## seen or a string being seen.
 	type Seen: record {
-		## The IP address if the data seen is an IP address.
-		host:      addr          &log &optional;
 		## The string if the data is about a string.
-		str:       string        &log &optional;
-		## The type of data that is in the string if the $str field is set.
-		str_type:  StrType       &log &optional;
+		indicator:       string        &log &optional;
+
+		## The type of data that the indicator represents.
+		indicator_type:  Type          &log &optional;
+
+		## If the indicator type was :bro:enum:`Intel::ADDR`, then this 
+		## field will be present.
+		host:            addr          &optional;
 
 		## Where the data was discovered.
-		where:     Where         &log;
+		where:           Where         &log;
 		
 		## If the data was discovered within a connection, the 
 		## connection record should go into get to give context to the data.
-		conn:      connection    &optional;
+		conn:            connection    &optional;
 	};
 
 	## Record used for the logging framework representing a positive
@@ -100,7 +101,7 @@ export {
 		## Where the data was seen.
 		seen:     Seen           &log;
 		## Sources which supplied data that resulted in this match.
-		sources:  set[string]    &log;
+		sources:  set[string]    &log &default=string_set();
 	};
 
 	## Intelligence data manipulation functions.
@@ -135,8 +136,8 @@ const have_full_data = T &redef;
 
 # The in memory data structure for holding intelligence.
 type DataStore: record {
-	net_data:    table[subnet] of set[MetaData];
-	string_data: table[string, StrType] of set[MetaData];
+	host_data:    table[addr] of set[MetaData];
+	string_data:  table[string, Type] of set[MetaData];
 };
 global data_store: DataStore &redef;
 
@@ -144,8 +145,8 @@ global data_store: DataStore &redef;
 # This is primarily for workers to do the initial quick matches and store
 # a minimal amount of data for the full match to happen on the manager.
 type MinDataStore: record {
-	net_data:    set[subnet];
-	string_data: set[string, StrType];
+	host_data:    set[addr];
+	string_data:  set[string, Type];
 };
 global min_data_store: MinDataStore &redef;
 
@@ -157,15 +158,13 @@ event bro_init() &priority=5
 
 function find(s: Seen): bool
 	{
-	if ( s?$host && 
-	     ((have_full_data && s$host in data_store$net_data) || 
-	      (s$host in min_data_store$net_data)))
+	if ( s?$host )
 		{
-		return T;
+		return ((s$host in min_data_store$host_data) || 
+		        (have_full_data && s$host in data_store$host_data));
 		}
-	else if ( s?$str && s?$str_type &&
-	          ((have_full_data && [s$str, s$str_type] in data_store$string_data) ||
-	           ([s$str, s$str_type] in min_data_store$string_data)))
+	else if ( ([to_lower(s$indicator), s$indicator_type] in min_data_store$string_data) ||
+	           (have_full_data && [to_lower(s$indicator), s$indicator_type] in data_store$string_data) )
 		{
 		return T;
 		}
@@ -177,8 +176,7 @@ function find(s: Seen): bool
 
 function get_items(s: Seen): set[Item]
 	{
-	local item: Item;
-	local return_data: set[Item] = set();
+	local return_data: set[Item];
 
 	if ( ! have_full_data )
 		{
@@ -191,26 +189,23 @@ function get_items(s: Seen): set[Item]
 	if ( s?$host )
 		{
 		# See if the host is known about and it has meta values
-		if ( s$host in data_store$net_data )
+		if ( s$host in data_store$host_data )
 			{
-			for ( m in data_store$net_data[s$host] )
+			for ( m in data_store$host_data[s$host] )
 				{
-				# TODO: the lookup should be finding all and not just most specific
-				#       and $host/$net should have the correct value.
-				item = [$host=s$host, $meta=m];
-				add return_data[item];
+				add return_data[Item($indicator=cat(s$host), $indicator_type=ADDR, $meta=m)];
 				}
 			}
 		}
-	else if ( s?$str && s?$str_type )
+	else
 		{
+		local lower_indicator = to_lower(s$indicator);
 		# See if the string is known about and it has meta values
-		if ( [s$str, s$str_type] in data_store$string_data )
+		if ( [lower_indicator, s$indicator_type] in data_store$string_data )
 			{
-			for ( m in data_store$string_data[s$str, s$str_type] )
+			for ( m in data_store$string_data[lower_indicator, s$indicator_type] )
 				{
-				item = [$str=s$str, $str_type=s$str_type, $meta=m];
-				add return_data[item];
+				add return_data[Item($indicator=s$indicator, $indicator_type=s$indicator_type, $meta=m)];
 				}
 			}
 		}
@@ -222,6 +217,12 @@ function Intel::seen(s: Seen)
 	{
 	if ( find(s) )
 		{
+		if ( s?$host )
+			{
+			s$indicator = cat(s$host);
+			s$indicator_type = Intel::ADDR;
+			}
+
 		if ( have_full_data )
 			{
 			local items = get_items(s);
@@ -250,8 +251,7 @@ function has_meta(check: MetaData, metas: set[MetaData]): bool
 
 event Intel::match(s: Seen, items: set[Item]) &priority=5
 	{
-	local empty_set: set[string] = set();
-	local info: Info = [$ts=network_time(), $seen=s, $sources=empty_set];
+	local info: Info = [$ts=network_time(), $seen=s];
 
 	if ( s?$conn )
 		{
@@ -267,52 +267,37 @@ event Intel::match(s: Seen, items: set[Item]) &priority=5
 
 function insert(item: Item)
 	{
-	if ( item?$str && !item?$str_type )
-		{
-		event reporter_warning(network_time(), fmt("You must provide a str_type for strings or this item doesn't make sense.  Item: %s", item), "");
-		return;
-		}
-
 	# Create and fill out the meta data item.
 	local meta = item$meta;
 	local metas: set[MetaData];
 
-	if ( item?$host )
+	# All intelligence is case insensitive at the moment.
+	local lower_indicator = to_lower(item$indicator);
+
+	if ( item$indicator_type == ADDR )
 		{
-		local host = mask_addr(item$host, is_v4_addr(item$host) ? 32 : 128);
+		local host = to_addr(item$indicator);
 		if ( have_full_data )
 			{
-			if ( host !in data_store$net_data )
-				data_store$net_data[host] = set();
+			if ( host !in data_store$host_data )
+				data_store$host_data[host] = set();
 
-			metas = data_store$net_data[host];
+			metas = data_store$host_data[host];
 			}
 
-		add min_data_store$net_data[host];
+		add min_data_store$host_data[host];
 		}
-	else if ( item?$net )
+	else
 		{
 		if ( have_full_data )
 			{
-			if ( item$net !in data_store$net_data )
-				data_store$net_data[item$net] = set();
+			if ( [lower_indicator, item$indicator_type] !in data_store$string_data )
+				data_store$string_data[lower_indicator, item$indicator_type] = set();
 
-			metas = data_store$net_data[item$net];
+			metas = data_store$string_data[lower_indicator, item$indicator_type];
 			}
 
-		add min_data_store$net_data[item$net];
-		}
-	else if ( item?$str )
-		{
-		if ( have_full_data )
-			{
-			if ( [item$str, item$str_type] !in data_store$string_data )
-				data_store$string_data[item$str, item$str_type] = set();
-
-			metas = data_store$string_data[item$str, item$str_type];
-			}
-
-		add min_data_store$string_data[item$str, item$str_type];
+		add min_data_store$string_data[lower_indicator, item$indicator_type];
 		}
 
 	local updated = F;
diff --git a/scripts/base/protocols/smtp/main.bro b/scripts/base/protocols/smtp/main.bro
index d53128b06c..0d510e645d 100644
--- a/scripts/base/protocols/smtp/main.bro
+++ b/scripts/base/protocols/smtp/main.bro
@@ -223,7 +223,10 @@ event mime_one_header(c: connection, h: mime_header_rec) &priority=5
 		{
 		if ( ! c$smtp?$to )
 			c$smtp$to = set();
-		add c$smtp$to[h$value];
+
+		local to_parts = split(h$value, /[[:blank:]]*,[[:blank:]]*/);
+		for ( i in to_parts )
+			add c$smtp$to[to_parts[i]];
 		}
 
 	else if ( h$name == "X-ORIGINATING-IP" )
diff --git a/scripts/policy/frameworks/intel/conn-established.bro b/scripts/policy/frameworks/intel/conn-established.bro
index a2e67b292b..20cec43e04 100644
--- a/scripts/policy/frameworks/intel/conn-established.bro
+++ b/scripts/policy/frameworks/intel/conn-established.bro
@@ -3,6 +3,10 @@
 
 event connection_established(c: connection)
 	{
-	Intel::seen([$host=c$id$orig_h, $conn=c, $where=Conn::IN_ORIG]);
-	Intel::seen([$host=c$id$resp_h, $conn=c, $where=Conn::IN_RESP]);
+	if ( c$orig$state == TCP_ESTABLISHED &&
+	     c$resp$state == TCP_ESTABLISHED )
+		{
+		Intel::seen([$host=c$id$orig_h, $conn=c, $where=Conn::IN_ORIG]);
+		Intel::seen([$host=c$id$resp_h, $conn=c, $where=Conn::IN_RESP]);
+		}
 	}
diff --git a/scripts/policy/frameworks/intel/dns.bro b/scripts/policy/frameworks/intel/dns.bro
index a0dee47acf..9218586c95 100644
--- a/scripts/policy/frameworks/intel/dns.bro
+++ b/scripts/policy/frameworks/intel/dns.bro
@@ -3,8 +3,8 @@
 
 event dns_request(c: connection, msg: dns_msg, query: string, qtype: count, qclass: count)
 	{
-	Intel::seen([$str=query,
-	             $str_type=Intel::DOMAIN,
+	Intel::seen([$indicator=query,
+	             $indicator_type=Intel::DOMAIN,
 	             $conn=c,
 	             $where=DNS::IN_REQUEST]);
 	}
diff --git a/scripts/policy/frameworks/intel/http-host-header.bro b/scripts/policy/frameworks/intel/http-host-header.bro
index f16b1628aa..3fd28b8ef9 100644
--- a/scripts/policy/frameworks/intel/http-host-header.bro
+++ b/scripts/policy/frameworks/intel/http-host-header.bro
@@ -4,8 +4,8 @@
 event http_header(c: connection, is_orig: bool, name: string, value: string)
 	{
 	if ( is_orig && name == "HOST" )
-		Intel::seen([$str=value,
-		             $str_type=Intel::DOMAIN,
+		Intel::seen([$indicator=value,
+		             $indicator_type=Intel::DOMAIN,
 		             $conn=c,
 		             $where=HTTP::IN_HOST_HEADER]);
 	}
diff --git a/scripts/policy/frameworks/intel/http-url.bro b/scripts/policy/frameworks/intel/http-url.bro
index feef4f0dac..340ae3c5ab 100644
--- a/scripts/policy/frameworks/intel/http-url.bro
+++ b/scripts/policy/frameworks/intel/http-url.bro
@@ -5,8 +5,8 @@
 event http_message_done(c: connection, is_orig: bool, stat: http_message_stat)
 	{
 	if ( is_orig && c?$http )
-		Intel::seen([$str=HTTP::build_url(c$http),
-		             $str_type=Intel::URL,
+		Intel::seen([$indicator=HTTP::build_url(c$http),
+		             $indicator_type=Intel::URL,
 		             $conn=c,
 		             $where=HTTP::IN_URL]);
 	}
diff --git a/scripts/policy/frameworks/intel/http-user-agents.bro b/scripts/policy/frameworks/intel/http-user-agents.bro
index 93445c1e43..7c4558d2a5 100644
--- a/scripts/policy/frameworks/intel/http-user-agents.bro
+++ b/scripts/policy/frameworks/intel/http-user-agents.bro
@@ -4,8 +4,8 @@
 event http_header(c: connection, is_orig: bool, name: string, value: string)
 	{
 	if ( is_orig && name == "USER-AGENT" )
-		Intel::seen([$str=value,
-		             $str_type=Intel::USER_AGENT,
+		Intel::seen([$indicator=value,
+		             $indicator_type=Intel::SOFTWARE,
 		             $conn=c,
 		             $where=HTTP::IN_USER_AGENT_HEADER]);
 	}
diff --git a/scripts/policy/frameworks/intel/smtp-url-extraction.bro b/scripts/policy/frameworks/intel/smtp-url-extraction.bro
index 2b87f809a6..a3ba410641 100644
--- a/scripts/policy/frameworks/intel/smtp-url-extraction.bro
+++ b/scripts/policy/frameworks/intel/smtp-url-extraction.bro
@@ -13,8 +13,8 @@ event intel_mime_data(f: fa_file, data: string)
 		local urls = find_all_urls_without_scheme(data);
 		for ( url in urls )
 			{
-			Intel::seen([$str=url,
-			             $str_type=Intel::URL,
+			Intel::seen([$indicator=url,
+			             $indicator_type=Intel::URL,
 			             $conn=c,
 			             $where=SMTP::IN_MESSAGE]);
 			}
diff --git a/scripts/policy/frameworks/intel/smtp.bro b/scripts/policy/frameworks/intel/smtp.bro
index 02e97ea54a..d760995e51 100644
--- a/scripts/policy/frameworks/intel/smtp.bro
+++ b/scripts/policy/frameworks/intel/smtp.bro
@@ -18,8 +18,8 @@ event mime_end_entity(c: connection)
 			}
 
 		if ( c$smtp?$user_agent )
-			Intel::seen([$str=c$smtp$user_agent,
-			             $str_type=Intel::USER_AGENT,
+			Intel::seen([$indicator=c$smtp$user_agent,
+			             $indicator_type=Intel::SOFTWARE,
 			             $conn=c,
 			             $where=SMTP::IN_HEADER]);
 
@@ -29,43 +29,69 @@ event mime_end_entity(c: connection)
 			             $where=SMTP::IN_X_ORIGINATING_IP_HEADER]);
 
 		if ( c$smtp?$mailfrom )
-			Intel::seen([$str=c$smtp$mailfrom,
-			             $str_type=Intel::EMAIL,
-			             $conn=c,
-			             $where=SMTP::IN_MAIL_FROM]);
+			{
+			local mailfromparts = split_n(c$smtp$mailfrom, /<.+>/, T, 1);
+			if ( |mailfromparts| > 2 )
+				{
+				Intel::seen([$indicator=mailfromparts[2][1:-2],
+				             $indicator_type=Intel::EMAIL,
+				             $conn=c,
+				             $where=SMTP::IN_MAIL_FROM]);
+				}
+			}
 
 		if ( c$smtp?$rcptto )
 			{
 			for ( rcptto in c$smtp$rcptto )
 				{
-				Intel::seen([$str=rcptto,
-				             $str_type=Intel::EMAIL,
-				             $conn=c,
-				             $where=SMTP::IN_RCPT_TO]);
+				local rcpttoparts = split_n(rcptto, /<.+>/, T, 1);
+				if ( |rcpttoparts| > 2 )
+					{
+					Intel::seen([$indicator=rcpttoparts[2][1:-2],
+					             $indicator_type=Intel::EMAIL,
+					             $conn=c,
+					             $where=SMTP::IN_RCPT_TO]);
+					}
 				}
 			}
 
 		if ( c$smtp?$from )
-			Intel::seen([$str=c$smtp$from,
-			             $str_type=Intel::EMAIL,
-			             $conn=c,
-			             $where=SMTP::IN_FROM]);
+			{
+			local fromparts = split_n(c$smtp$from, /<.+>/, T, 1);
+			if ( |fromparts| > 2 )
+				{
+				Intel::seen([$indicator=fromparts[2][1:-2],
+				             $indicator_type=Intel::EMAIL,
+				             $conn=c,
+				             $where=SMTP::IN_FROM]);
+				}
+			}
 
 		if ( c$smtp?$to )
 			{
 			for ( email_to in c$smtp$to )
 				{
-				Intel::seen([$str=email_to,
-				             $str_type=Intel::EMAIL,
-				             $conn=c,
-				             $where=SMTP::IN_TO]);
+				local toparts = split_n(email_to, /<.+>/, T, 1);
+				if ( |toparts| > 2 )
+					{
+					Intel::seen([$indicator=toparts[2][1:-2],
+					             $indicator_type=Intel::EMAIL,
+					             $conn=c,
+					             $where=SMTP::IN_TO]);
+					}
 				}
 			}
 
 		if ( c$smtp?$reply_to )
-			Intel::seen([$str=c$smtp$reply_to,
-			             $str_type=Intel::EMAIL,
-			             $conn=c,
-			             $where=SMTP::IN_REPLY_TO]);
+			{
+			local replytoparts = split_n(c$smtp$reply_to, /<.+>/, T, 1);
+			if ( |replytoparts| > 2 )
+				{
+				Intel::seen([$indicator=replytoparts[2][1:-2],
+				             $indicator_type=Intel::EMAIL,
+				             $conn=c,
+				             $where=SMTP::IN_REPLY_TO]);
+				}
+			}
 		}
 	}
diff --git a/scripts/policy/frameworks/intel/ssl.bro b/scripts/policy/frameworks/intel/ssl.bro
index 3f18a11e6e..e404c39e5b 100644
--- a/scripts/policy/frameworks/intel/ssl.bro
+++ b/scripts/policy/frameworks/intel/ssl.bro
@@ -10,14 +10,14 @@ event x509_certificate(c: connection, is_orig: bool, cert: X509, chain_idx: coun
 			{
 			local email = sub(cert$subject, /^.*emailAddress=/, "");
 			email = sub(email, /,.*$/, "");
-			Intel::seen([$str=email,
-			             $str_type=Intel::EMAIL,
+			Intel::seen([$indicator=email,
+			             $indicator_type=Intel::EMAIL,
 			             $conn=c,
 			             $where=(is_orig ? SSL::IN_CLIENT_CERT : SSL::IN_SERVER_CERT)]);
 			}
 
-		Intel::seen([$str=sha1_hash(der_cert),
-		             $str_type=Intel::CERT_HASH,
+		Intel::seen([$indicator=sha1_hash(der_cert),
+		             $indicator_type=Intel::CERT_HASH,
 		             $conn=c,
 		             $where=(is_orig ? SSL::IN_CLIENT_CERT : SSL::IN_SERVER_CERT)]);
 		}
@@ -27,8 +27,8 @@ event ssl_extension(c: connection, is_orig: bool, code: count, val: string)
 	{
 	if ( is_orig && SSL::extensions[code] == "server_name" && 
 	     c?$ssl && c$ssl?$server_name )
-		Intel::seen([$str=c$ssl$server_name,
-		             $str_type=Intel::DOMAIN,
+		Intel::seen([$indicator=c$ssl$server_name,
+		             $indicator_type=Intel::DOMAIN,
 		             $conn=c,
 		             $where=SSL::IN_SERVER_NAME]);
 	}
diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1.intel.log b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1.intel.log
index 26efc039c4..00871e7d93 100644
--- a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1.intel.log
+++ b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1.intel.log
@@ -3,8 +3,8 @@
 #empty_field	(empty)
 #unset_field	-
 #path	intel
-#open	2012-10-03-20-20-39
-#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	seen.host	seen.str	seen.str_type	seen.where	sources
-#types	time	string	addr	port	addr	port	addr	string	enum	enum	table[string]
-1349295639.424940	-	-	-	-	-	123.123.123.123	-	-	Intel::IN_ANYWHERE	worker-1
-#close	2012-10-03-20-20-49
+#open	2013-07-19-17-05-48
+#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	seen.indicator	seen.indicator_type	seen.where	sources
+#types	time	string	addr	port	addr	port	string	enum	enum	table[string]
+1374253548.038580	-	-	-	-	-	123.123.123.123	Intel::ADDR	Intel::IN_ANYWHERE	worker-1
+#close	2013-07-19-17-05-57
diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.input-and-match/broproc.intel.log b/testing/btest/Baseline/scripts.base.frameworks.intel.input-and-match/broproc.intel.log
index d72e9efed3..8c01ae5c27 100644
--- a/testing/btest/Baseline/scripts.base.frameworks.intel.input-and-match/broproc.intel.log
+++ b/testing/btest/Baseline/scripts.base.frameworks.intel.input-and-match/broproc.intel.log
@@ -3,9 +3,9 @@
 #empty_field	(empty)
 #unset_field	-
 #path	intel
-#open	2012-10-03-20-18-05
-#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	seen.host	seen.str	seen.str_type	seen.where	sources
-#types	time	string	addr	port	addr	port	addr	string	enum	enum	table[string]
-1349295485.114156	-	-	-	-	-	-	e@mail.com	Intel::EMAIL	SOMEWHERE	source1
-1349295485.114156	-	-	-	-	-	1.2.3.4	-	-	SOMEWHERE	source1
-#close	2012-10-03-20-18-05
+#open	2013-07-19-17-04-26
+#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	seen.indicator	seen.indicator_type	seen.where	sources
+#types	time	string	addr	port	addr	port	string	enum	enum	table[string]
+1374253466.857185	-	-	-	-	-	e@mail.com	Intel::EMAIL	SOMEWHERE	source1
+1374253466.857185	-	-	-	-	-	1.2.3.4	Intel::ADDR	SOMEWHERE	source1
+#close	2013-07-19-17-04-26
diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.read-file-dist-cluster/manager-1.intel.log b/testing/btest/Baseline/scripts.base.frameworks.intel.read-file-dist-cluster/manager-1.intel.log
index 8069bad528..70d92a3604 100644
--- a/testing/btest/Baseline/scripts.base.frameworks.intel.read-file-dist-cluster/manager-1.intel.log
+++ b/testing/btest/Baseline/scripts.base.frameworks.intel.read-file-dist-cluster/manager-1.intel.log
@@ -3,11 +3,11 @@
 #empty_field	(empty)
 #unset_field	-
 #path	intel
-#open	2012-10-10-15-05-23
-#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	seen.host	seen.str	seen.str_type	seen.where	sources
-#types	time	string	addr	port	addr	port	addr	string	enum	enum	table[string]
-1349881523.548946	-	-	-	-	-	1.2.3.4	-	-	Intel::IN_A_TEST	source1
-1349881523.548946	-	-	-	-	-	-	e@mail.com	Intel::EMAIL	Intel::IN_A_TEST	source1
-1349881524.567896	-	-	-	-	-	1.2.3.4	-	-	Intel::IN_A_TEST	source1
-1349881524.567896	-	-	-	-	-	-	e@mail.com	Intel::EMAIL	Intel::IN_A_TEST	source1
-#close	2012-10-10-15-05-24
+#open	2013-07-19-17-06-57
+#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	seen.indicator	seen.indicator_type	seen.where	sources
+#types	time	string	addr	port	addr	port	string	enum	enum	table[string]
+1374253617.312158	-	-	-	-	-	1.2.3.4	Intel::ADDR	Intel::IN_A_TEST	source1
+1374253617.312158	-	-	-	-	-	e@mail.com	Intel::EMAIL	Intel::IN_A_TEST	source1
+1374253618.332565	-	-	-	-	-	1.2.3.4	Intel::ADDR	Intel::IN_A_TEST	source1
+1374253618.332565	-	-	-	-	-	e@mail.com	Intel::EMAIL	Intel::IN_A_TEST	source1
+#close	2013-07-19-17-07-06
diff --git a/testing/btest/scripts/base/frameworks/intel/cluster-transparency.bro b/testing/btest/scripts/base/frameworks/intel/cluster-transparency.bro
index 3810de5d4b..4d977d475d 100644
--- a/testing/btest/scripts/base/frameworks/intel/cluster-transparency.bro
+++ b/testing/btest/scripts/base/frameworks/intel/cluster-transparency.bro
@@ -28,7 +28,7 @@ event remote_connection_handshake_done(p: event_peer)
 	# Insert the data once both workers are connected.
 	if ( Cluster::local_node_type() == Cluster::MANAGER && Cluster::worker_count == 2 )
 		{
-		Intel::insert([$host=1.2.3.4,$meta=[$source="manager"]]);
+		Intel::insert([$indicator="1.2.3.4", $indicator_type=Intel::ADDR, $meta=[$source="manager"]]);
 		}
 	}
 
@@ -39,7 +39,7 @@ event Intel::cluster_new_item(item: Intel::Item)
 	if ( ! is_remote_event() )
 		return;
 
-	print fmt("cluster_new_item: %s inserted by %s (from peer: %s)", item$host, item$meta$source, get_event_peer()$descr);
+	print fmt("cluster_new_item: %s inserted by %s (from peer: %s)", item$indicator, item$meta$source, get_event_peer()$descr);
 
 	if ( ! sent_data )
 		{
@@ -47,9 +47,9 @@ event Intel::cluster_new_item(item: Intel::Item)
 		# full cluster is constructed.
 		sent_data = T;
 		if ( Cluster::node == "worker-1" )
-			Intel::insert([$host=123.123.123.123,$meta=[$source="worker-1"]]);
+			Intel::insert([$indicator="123.123.123.123", $indicator_type=Intel::ADDR, $meta=[$source="worker-1"]]);
 		if ( Cluster::node == "worker-2" )
-			Intel::insert([$host=4.3.2.1,$meta=[$source="worker-2"]]);
+			Intel::insert([$indicator="4.3.2.1", $indicator_type=Intel::ADDR, $meta=[$source="worker-2"]]);
 		}
 
 	# We're forcing worker-2 to do a lookup when it has three intelligence items
diff --git a/testing/btest/scripts/base/frameworks/intel/input-and-match.bro b/testing/btest/scripts/base/frameworks/intel/input-and-match.bro
index f77f5c0f1d..7150d30993 100644
--- a/testing/btest/scripts/base/frameworks/intel/input-and-match.bro
+++ b/testing/btest/scripts/base/frameworks/intel/input-and-match.bro
@@ -5,10 +5,10 @@
 # @TEST-EXEC: btest-diff broproc/intel.log
 
 @TEST-START-FILE intel.dat
-#fields	host	net	str	str_type	meta.source	meta.desc	meta.url
-1.2.3.4	-	-	-	source1	this host is just plain baaad	http://some-data-distributor.com/1234
-1.2.3.4	-	-	-	source1	this host is just plain baaad	http://some-data-distributor.com/1234
--	-	e@mail.com	Intel::EMAIL	source1	Phishing email source	http://some-data-distributor.com/100000
+#fields	indicator	indicator_type	meta.source	meta.desc	meta.url
+1.2.3.4	Intel::ADDR	source1	this host is just plain baaad	http://some-data-distributor.com/1234
+1.2.3.4	Intel::ADDR	source1	this host is just plain baaad	http://some-data-distributor.com/1234
+e@mail.com	Intel::EMAIL	source1	Phishing email source	http://some-data-distributor.com/100000
 @TEST-END-FILE
 
 @load frameworks/communication/listen
@@ -18,8 +18,8 @@ redef enum Intel::Where += { SOMEWHERE };
 
 event do_it()
 	{
-	Intel::seen([$str="e@mail.com",
-	             $str_type=Intel::EMAIL,
+	Intel::seen([$indicator="e@mail.com",
+	             $indicator_type=Intel::EMAIL,
 	             $where=SOMEWHERE]);
 
 	Intel::seen([$host=1.2.3.4,
diff --git a/testing/btest/scripts/base/frameworks/intel/read-file-dist-cluster.bro b/testing/btest/scripts/base/frameworks/intel/read-file-dist-cluster.bro
index 6838736249..f336fe24b3 100644
--- a/testing/btest/scripts/base/frameworks/intel/read-file-dist-cluster.bro
+++ b/testing/btest/scripts/base/frameworks/intel/read-file-dist-cluster.bro
@@ -19,10 +19,10 @@ redef Cluster::nodes = {
 @TEST-END-FILE
 
 @TEST-START-FILE intel.dat
-#fields	host	net	str	str_type	meta.source	meta.desc	meta.url
-1.2.3.4	-	-	-	source1	this host is just plain baaad	http://some-data-distributor.com/1234
-1.2.3.4	-	-	-	source1	this host is just plain baaad	http://some-data-distributor.com/1234
--	-	e@mail.com	Intel::EMAIL	source1	Phishing email source	http://some-data-distributor.com/100000
+#fields	indicator	indicator_type	meta.source	meta.desc	meta.url
+1.2.3.4	Intel::ADDR	source1	this host is just plain baaad	http://some-data-distributor.com/1234
+1.2.3.4	Intel::ADDR	source1	this host is just plain baaad	http://some-data-distributor.com/1234
+e@mail.com	Intel::EMAIL	source1	Phishing email source	http://some-data-distributor.com/100000
 @TEST-END-FILE
 
 @load base/frameworks/control
@@ -41,7 +41,7 @@ redef enum Intel::Where += {
 event do_it()
 	{
 	Intel::seen([$host=1.2.3.4, $where=Intel::IN_A_TEST]);
-	Intel::seen([$str="e@mail.com", $str_type=Intel::EMAIL, $where=Intel::IN_A_TEST]);
+	Intel::seen([$indicator="e@mail.com", $indicator_type=Intel::EMAIL, $where=Intel::IN_A_TEST]);
 	}
 
 event bro_init()

From 9dae9dd3e26627d50c3a3620205eee3db88b2e4b Mon Sep 17 00:00:00 2001
From: Seth Hall <seth@icir.org>
Date: Fri, 19 Jul 2013 13:53:15 -0400
Subject: [PATCH 081/118] Remove the intel insertion after heuristically
 detecting ssh bruteforcing.

---
 scripts/policy/protocols/ssh/detect-bruteforcing.bro | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/scripts/policy/protocols/ssh/detect-bruteforcing.bro b/scripts/policy/protocols/ssh/detect-bruteforcing.bro
index 309905e939..ada418e61f 100644
--- a/scripts/policy/protocols/ssh/detect-bruteforcing.bro
+++ b/scripts/policy/protocols/ssh/detect-bruteforcing.bro
@@ -58,10 +58,6 @@ event bro_init()
 	                  	        $msg=fmt("%s appears to be guessing SSH passwords (seen in %d connections).", key$host, r$num),
 	                  	        $src=key$host,
 	                  	        $identifier=cat(key$host)]);
-	                  	# Insert the guesser into the intel framework.
-	                  	Intel::insert([$host=key$host,
-	                  	               $meta=[$source="local",
-	                  	                      $desc=fmt("Bro observed %d apparently failed SSH connections.", r$num)]]);
 	                  	}]);
 	}
 

From fd2e155d1af26086d40e12d38f564b7954f4597e Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Sun, 21 Jul 2013 17:34:25 +0200
Subject: [PATCH 082/118] Tweak hasher interface.

---
 src/BloomFilter.cc |  34 +++++++-------
 src/BloomFilter.h  |  31 +++++++------
 src/CMakeLists.txt |   2 +-
 src/HashPolicy.cc  |  77 --------------------------------
 src/HashPolicy.h   |  97 ----------------------------------------
 src/Hasher.cc      |  79 ++++++++++++++++++++++++++++++++
 src/Hasher.h       | 109 +++++++++++++++++++++++++++++++++++++++++++++
 src/bro.bif        |   8 ++--
 8 files changed, 225 insertions(+), 212 deletions(-)
 delete mode 100644 src/HashPolicy.cc
 delete mode 100644 src/HashPolicy.h
 create mode 100644 src/Hasher.cc
 create mode 100644 src/Hasher.h

diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc
index c59092b1e4..f399bddeca 100644
--- a/src/BloomFilter.cc
+++ b/src/BloomFilter.cc
@@ -6,19 +6,19 @@
 #include "Serializer.h"
 
 BloomFilter::BloomFilter()
-  : hash_(NULL)
+  : hasher_(NULL)
   {
   }
 
-BloomFilter::BloomFilter(const HashPolicy* hash_policy)
-  : hash_(hash_policy)
+BloomFilter::BloomFilter(const Hasher* hasher)
+  : hasher_(hasher)
   {
   }
 
 BloomFilter::~BloomFilter()
   {
-  if ( hash_ )
-    delete hash_;
+  if ( hasher_ )
+    delete hasher_;
   }
 
 bool BloomFilter::Serialize(SerialInfo* info) const
@@ -35,9 +35,9 @@ BloomFilter* BloomFilter::Unserialize(UnserialInfo* info)
 bool BloomFilter::DoSerialize(SerialInfo* info) const
 	{
 	DO_SERIALIZE(SER_BLOOMFILTER, SerialObj);
-  if ( ! SERIALIZE(static_cast<uint16>(hash_->K())) )
+  if ( ! SERIALIZE(static_cast<uint16>(hasher_->K())) )
     return false;
-  return SERIALIZE_STR(hash_->Name().c_str(), hash_->Name().size());
+  return SERIALIZE_STR(hasher_->Name().c_str(), hasher_->Name().size());
   }
 
 bool BloomFilter::DoUnserialize(UnserialInfo* info)
@@ -49,7 +49,7 @@ bool BloomFilter::DoUnserialize(UnserialInfo* info)
   const char* name;
   if ( ! UNSERIALIZE_STR(&name, 0) )
     return false;
-	hash_ = HashPolicy::Create(k, name);
+	hasher_ = Hasher::Create(k, name);
 	delete [] name;
 	return true;
   }
@@ -70,7 +70,7 @@ size_t BasicBloomFilter::K(size_t cells, size_t capacity)
 BasicBloomFilter* BasicBloomFilter::Merge(const BasicBloomFilter* x,
                                           const BasicBloomFilter* y)
   {
-  // TODO: Ensure that x and y use the same HashPolicy before proceeding.
+  // TODO: Ensure that x and y use the same Hasher before proceeding.
   BasicBloomFilter* result = new BasicBloomFilter();
   result->bits_ = new BitVector(*x->bits_ | *y->bits_);
   return result;
@@ -81,8 +81,8 @@ BasicBloomFilter::BasicBloomFilter()
   {
   }
 
-BasicBloomFilter::BasicBloomFilter(const HashPolicy* hash_policy, size_t cells)
-  : BloomFilter(hash_policy),
+BasicBloomFilter::BasicBloomFilter(const Hasher* hasher, size_t cells)
+  : BloomFilter(hasher),
     bits_(new BitVector(cells))
   {
   }
@@ -102,13 +102,13 @@ bool BasicBloomFilter::DoUnserialize(UnserialInfo* info)
 	return bits_ != NULL;
   }
 
-void BasicBloomFilter::AddImpl(const HashPolicy::hash_vector& h)
+void BasicBloomFilter::AddImpl(const Hasher::digest_vector& h)
   {
   for ( size_t i = 0; i < h.size(); ++i )
     bits_->Set(h[i] % bits_->Size());
   }
 
-size_t BasicBloomFilter::CountImpl(const HashPolicy::hash_vector& h) const
+size_t BasicBloomFilter::CountImpl(const Hasher::digest_vector& h) const
   {
   for ( size_t i = 0; i < h.size(); ++i )
     if ( ! (*bits_)[h[i] % bits_->Size()] )
@@ -129,9 +129,9 @@ CountingBloomFilter::CountingBloomFilter()
   {
   }
 
-CountingBloomFilter::CountingBloomFilter(const HashPolicy* hash_policy,
+CountingBloomFilter::CountingBloomFilter(const Hasher* hasher,
                                          size_t cells, size_t width)
-  : BloomFilter(hash_policy)
+  : BloomFilter(hasher)
   {
   cells_ = new CounterVector(width, cells);
   }
@@ -152,13 +152,13 @@ bool CountingBloomFilter::DoUnserialize(UnserialInfo* info)
 	return cells_ != NULL;
   }
 
-void CountingBloomFilter::AddImpl(const HashPolicy::hash_vector& h)
+void CountingBloomFilter::AddImpl(const Hasher::digest_vector& h)
   {
   for ( size_t i = 0; i < h.size(); ++i )
     cells_->Increment(h[i] % cells_->Size(), 1);
   }
 
-size_t CountingBloomFilter::CountImpl(const HashPolicy::hash_vector& h) const
+size_t CountingBloomFilter::CountImpl(const Hasher::digest_vector& h) const
   {
   CounterVector::size_type min =
     std::numeric_limits<CounterVector::size_type>::max();
diff --git a/src/BloomFilter.h b/src/BloomFilter.h
index 189f4920b7..92f15c6070 100644
--- a/src/BloomFilter.h
+++ b/src/BloomFilter.h
@@ -3,7 +3,7 @@
 
 #include <vector>
 #include "BitVector.h"
-#include "HashPolicy.h"
+#include "Hasher.h"
 
 class CounterVector;
 
@@ -12,7 +12,7 @@ class CounterVector;
  */
 class BloomFilter : public SerialObj {
 public:
-  // At this point we won't let the user choose the hash policy, but we might
+  // At this point we won't let the user choose the hasher, but we might
   // open up the interface in the future.
   virtual ~BloomFilter();
 
@@ -23,7 +23,7 @@ public:
   template <typename T>
   void Add(const T& x)
     {
-    AddImpl(hash_->Hash(&x, sizeof(x)));
+    AddImpl((*hasher_)(x));
     }
 
   /**
@@ -36,7 +36,7 @@ public:
   template <typename T>
   size_t Count(const T& x) const
     {
-    return CountImpl(hash_->Hash(&x, sizeof(x)));
+    return CountImpl((*hasher_)(x));
     }
 
   bool Serialize(SerialInfo* info) const;
@@ -50,15 +50,15 @@ protected:
 	/**
 	 * Constructs a Bloom filter.
 	 *
-	 * @param hash_policy The hash policy to use for this Bloom filter.
+	 * @param hasher The hasher to use for this Bloom filter.
 	 */
-  BloomFilter(const HashPolicy* hash_policy);
+  BloomFilter(const Hasher* hasher);
 
-  virtual void AddImpl(const HashPolicy::hash_vector& hashes) = 0;
-  virtual size_t CountImpl(const HashPolicy::hash_vector& hashes) const = 0;
+  virtual void AddImpl(const Hasher::digest_vector& hashes) = 0;
+  virtual size_t CountImpl(const Hasher::digest_vector& hashes) const = 0;
 
 private:
-  const HashPolicy* hash_;
+  const Hasher* hasher_;
 };
 
 /**
@@ -98,15 +98,15 @@ public:
   /**
    * Constructs a basic Bloom filter with a given number of cells and capacity.
    */
-  BasicBloomFilter(const HashPolicy* hash_policy, size_t cells);
+  BasicBloomFilter(const Hasher* hasher, size_t cells);
 
 protected:
   DECLARE_SERIAL(BasicBloomFilter);
 
   BasicBloomFilter();
 
-  virtual void AddImpl(const HashPolicy::hash_vector& h);
-  virtual size_t CountImpl(const HashPolicy::hash_vector& h) const;
+  virtual void AddImpl(const Hasher::digest_vector& h);
+  virtual size_t CountImpl(const Hasher::digest_vector& h) const;
 
 private:
   BitVector* bits_;
@@ -120,16 +120,15 @@ public:
   static CountingBloomFilter* Merge(const CountingBloomFilter* x,
                                     const CountingBloomFilter* y);
 
-  CountingBloomFilter(const HashPolicy* hash_policy, size_t cells,
-                      size_t width);
+  CountingBloomFilter(const Hasher* hasher, size_t cells, size_t width);
 
 protected:
   DECLARE_SERIAL(CountingBloomFilter);
 
   CountingBloomFilter();
 
-  virtual void AddImpl(const HashPolicy::hash_vector& h);
-  virtual size_t CountImpl(const HashPolicy::hash_vector& h) const;
+  virtual void AddImpl(const Hasher::digest_vector& h);
+  virtual size_t CountImpl(const Hasher::digest_vector& h) const;
 
 private:
   CounterVector* cells_;
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index f2c7ce6bad..87a3db3b62 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -279,7 +279,7 @@ set(bro_SRCS
     Frame.cc
     Func.cc
     Hash.cc
-    HashPolicy.cc
+    Hasher.cc
     ID.cc
     IntSet.cc
     IOSource.cc
diff --git a/src/HashPolicy.cc b/src/HashPolicy.cc
deleted file mode 100644
index 7ce754be3c..0000000000
--- a/src/HashPolicy.cc
+++ /dev/null
@@ -1,77 +0,0 @@
-#include "HashPolicy.h"
-
-#include "digest.h"
-
-Hasher::Hasher(size_t seed, const std::string& extra)
-	: h_(compute_seed(seed, extra))
-	{
-	}
-
-Hasher::hash_type Hasher::operator()(const void* x, size_t n) const
-  {
-  return n == 0 ? 0 : h_(x, n);
-  }
-
-size_t Hasher::compute_seed(size_t seed, const std::string& extra)
-  {
-  u_char digest[SHA256_DIGEST_LENGTH];
-  SHA256_CTX ctx;
-  sha256_init(&ctx);
-  if ( extra.empty() )
-		{
-		unsigned int first_seed = initial_seed();
-		sha256_update(&ctx, &first_seed, sizeof(first_seed));
-		}
-	else
-		{
-    sha256_update(&ctx, extra.c_str(), extra.size());
-		}
-  sha256_update(&ctx, &seed, sizeof(seed));
-  sha256_final(&ctx, digest);
-  return *reinterpret_cast<size_t*>(digest);
-  }
-
-
-HashPolicy* HashPolicy::Create(size_t k, const std::string& name)
-  {
-  return new DefaultHashing(k, name);
-  }
-
-HashPolicy::HashPolicy(size_t k, const std::string& name)
-  : k_(k), name_(name)
-	{
-	}
-
-DefaultHashing::DefaultHashing(size_t k, const std::string& name)
-  : HashPolicy(k, name)
-  {
-  for ( size_t i = 0; i < k; ++i )
-    hashers_.push_back(Hasher(i, name));
-  }
-
-HashPolicy::hash_vector DefaultHashing::Hash(const void* x, size_t n) const
-  {
-  hash_vector h(K(), 0);
-  for ( size_t i = 0; i < h.size(); ++i )
-    h[i] = hashers_[i](x, n);
-  return h;
-  }
-
-DoubleHashing::DoubleHashing(size_t k, const std::string& name)
-	: HashPolicy(k, name),
-		hasher1_(1, name),
-		hasher2_(2, name)
-	{
-	}
-
-HashPolicy::hash_vector DoubleHashing::Hash(const void* x, size_t n) const
-  {
-  hash_type h1 = hasher1_(x, n);
-  hash_type h2 = hasher2_(x, n);
-  hash_vector h(K(), 0);
-  for ( size_t i = 0; i < h.size(); ++i )
-    h[i] = h1 + i * h2;
-  return h;
-  }
-
-
diff --git a/src/HashPolicy.h b/src/HashPolicy.h
deleted file mode 100644
index 7bdb968bfe..0000000000
--- a/src/HashPolicy.h
+++ /dev/null
@@ -1,97 +0,0 @@
-#ifndef HashPolicy_h
-#define HashPolicy_h
-
-#include "Hash.h"
-#include "H3.h"
-
-/**
- * A functor that computes a universal hash function.
- */
-class Hasher {
-public:
-	typedef hash_t hash_type;
-
-	/**
-	 * Constructs a hasher seeded by a given seed and optionally an extra
-	 * descriptor.
-	 *
-	 * @param seed The seed to use.
-	 *
-	 * @param extra If not `NULL`, the hasher will not mix in the initial seed
-	 * but instead use this NUL-terminated string as additional seed.
-	 */
-	Hasher(size_t seed, const std::string& extra = "");
-
-	/**
-	 * Computes the hash digest of contiguous data.
-	 *
-	 * @param x A pointer to the beginning of the byte sequence to hash.
-	 *
-	 * @param n The length of the sequence pointed to by *x*.
-	 */
-	hash_type operator()(const void* x, size_t n) const;
-
-private:
-	static size_t compute_seed(size_t seed, const std::string& extra);
-
-	H3<hash_type, UHASH_KEY_SIZE> h_;
-};
-
-/**
- * The abstract base class for hash policies that hash elements *k* times.
- */
-class HashPolicy {
-public:
-  /**
-   * Constructs the hashing policy used by the implementation. This factory
-   * function exists because the HashingPolicy class hierachy is not yet
-   * serializable.
-   */
-	static HashPolicy* Create(size_t k, const std::string& name);
-
-  typedef Hasher::hash_type hash_type;
-  typedef std::vector<hash_type> hash_vector;
-
-  virtual ~HashPolicy() { }
-
-  virtual hash_vector Hash(const void* x, size_t n) const = 0;
-
-  size_t K() const { return k_; }
-  const std::string& Name() const { return name_; }
-
-protected:
-  HashPolicy(size_t k, const std::string& name);
-
-private:
-  const size_t k_;
-  std::string name_;
-};
-
-/**
- * The default hashing policy. Performs *k* hash function computations.
- */
-class DefaultHashing : public HashPolicy {
-public:
-  DefaultHashing(size_t k, const std::string& name);
-
-  virtual hash_vector Hash(const void* x, size_t n) const /* override */;
-
-private:
-  std::vector<Hasher> hashers_;
-};
-
-/**
- * The *double-hashing* policy. Uses a linear combination of two hash functions.
- */
-class DoubleHashing : public HashPolicy {
-public:
-  DoubleHashing(size_t k, const std::string& name);
-
-  virtual hash_vector Hash(const void* x, size_t n) const;
-
-private:
-  Hasher hasher1_;
-  Hasher hasher2_;
-};
-
-#endif
diff --git a/src/Hasher.cc b/src/Hasher.cc
new file mode 100644
index 0000000000..045adcd174
--- /dev/null
+++ b/src/Hasher.cc
@@ -0,0 +1,79 @@
+#include "Hasher.h"
+
+#include "digest.h"
+
+Hasher::UHF::UHF(size_t seed, const std::string& extra)
+	: h_(compute_seed(seed, extra))
+	{
+	}
+
+Hasher::digest Hasher::UHF::hash(const void* x, size_t n) const
+  {
+  assert(n <= UHASH_KEY_SIZE);
+  return n == 0 ? 0 : h_(x, n);
+  }
+
+size_t Hasher::UHF::compute_seed(size_t seed, const std::string& extra)
+  {
+  u_char buf[SHA256_DIGEST_LENGTH];
+  SHA256_CTX ctx;
+  sha256_init(&ctx);
+  if ( extra.empty() )
+		{
+		unsigned int first_seed = initial_seed();
+		sha256_update(&ctx, &first_seed, sizeof(first_seed));
+		}
+	else
+		{
+    sha256_update(&ctx, extra.c_str(), extra.size());
+		}
+  sha256_update(&ctx, &seed, sizeof(seed));
+  sha256_final(&ctx, buf);
+  // Take the first sizeof(size_t) bytes as seed.
+  return *reinterpret_cast<size_t*>(buf);
+  }
+
+
+Hasher* Hasher::Create(size_t k, const std::string& name)
+  {
+  return new DefaultHasher(k, name);
+  }
+
+Hasher::Hasher(size_t k, const std::string& name)
+  : k_(k), name_(name)
+	{
+	}
+
+DefaultHasher::DefaultHasher(size_t k, const std::string& name)
+  : Hasher(k, name)
+  {
+  for ( size_t i = 0; i < k; ++i )
+    hash_functions_.push_back(UHF(i, name));
+  }
+
+Hasher::digest_vector DefaultHasher::Hash(const void* x, size_t n) const
+  {
+  digest_vector h(K(), 0);
+  for ( size_t i = 0; i < h.size(); ++i )
+    h[i] = hash_functions_[i](x, n);
+  return h;
+  }
+
+DoubleHasher::DoubleHasher(size_t k, const std::string& name)
+	: Hasher(k, name),
+		h1_(1, name),
+		h2_(2, name)
+	{
+	}
+
+Hasher::digest_vector DoubleHasher::Hash(const void* x, size_t n) const
+  {
+  digest h1 = h1_(x, n);
+  digest h2 = h2_(x, n);
+  digest_vector h(K(), 0);
+  for ( size_t i = 0; i < h.size(); ++i )
+    h[i] = h1 + i * h2;
+  return h;
+  }
+
+
diff --git a/src/Hasher.h b/src/Hasher.h
new file mode 100644
index 0000000000..8d0af6b03f
--- /dev/null
+++ b/src/Hasher.h
@@ -0,0 +1,109 @@
+#ifndef Hasher_h
+#define Hasher_h
+
+#include "Hash.h"
+#include "H3.h"
+
+/**
+ * The abstract base class for hashers, i.e., constructs which hash elements
+ * *k* times.
+ */
+class Hasher {
+public:
+  typedef hash_t digest;
+  typedef std::vector<digest> digest_vector;
+
+  /**
+   * Constructs the hashing policy used by the implementation. 
+   *
+   * @todo This factory function exists because the HashingPolicy class
+   * hierachy is not yet serializable.
+   */
+	static Hasher* Create(size_t k, const std::string& name);
+
+  virtual ~Hasher() { }
+
+  template <typename T>
+  digest_vector operator()(const T& x) const
+  {
+    return Hash(&x, sizeof(T));
+  }
+
+  virtual digest_vector Hash(const void* x, size_t n) const = 0;
+
+  size_t K() const { return k_; }
+  const std::string& Name() const { return name_; }
+
+protected:
+  /** 
+   * A universal hash function family.
+   */
+  class UHF {
+  public:
+    /**
+     * Constructs an H3 hash function seeded with a given seed and an optional
+     * extra seed to replace the initial Bro seed.
+     *
+     * @param seed The seed to use for this instance.
+     *
+     * @param extra If not empty, this parameter replaces the initial seed to
+     * compute the seed for t to compute the
+     * seed
+     * NUL-terminated string as additional seed.
+     */
+    UHF(size_t seed, const std::string& extra = "");
+
+    template <typename T>
+    digest operator()(const T& x) const
+    {
+      return hash(&x, sizeof(T));
+    }
+
+    digest operator()(const void* x, size_t n) const
+    {
+      return hash(x, n);
+    }
+
+    digest hash(const void* x, size_t n) const;
+
+  private:
+    static size_t compute_seed(size_t seed, const std::string& extra);
+
+    H3<digest, UHASH_KEY_SIZE> h_;
+  };
+
+  Hasher(size_t k, const std::string& name);
+
+private:
+  const size_t k_;
+  std::string name_;
+};
+
+/**
+ * The default hashing policy. Performs *k* hash function computations.
+ */
+class DefaultHasher : public Hasher {
+public:
+  DefaultHasher(size_t k, const std::string& name);
+
+  virtual digest_vector Hash(const void* x, size_t n) const /* final */;
+
+private:
+  std::vector<UHF> hash_functions_;
+};
+
+/**
+ * The *double-hashing* policy. Uses a linear combination of two hash functions.
+ */
+class DoubleHasher : public Hasher {
+public:
+  DoubleHasher(size_t k, const std::string& name);
+
+  virtual digest_vector Hash(const void* x, size_t n) const /* final */;
+
+private:
+  UHF h1_;
+  UHF h2_;
+};
+
+#endif
diff --git a/src/bro.bif b/src/bro.bif
index d0ce066139..71f8c0716f 100644
--- a/src/bro.bif
+++ b/src/bro.bif
@@ -5008,8 +5008,8 @@ function bloomfilter_basic_init%(fp: double, capacity: count,
 
   size_t cells = BasicBloomFilter::M(fp, capacity);
   size_t optimal_k = BasicBloomFilter::K(cells, capacity);
-  const HashPolicy* hp = HashPolicy::Create(optimal_k, name->CheckString());
-  return new BloomFilterVal(new BasicBloomFilter(hp, cells));
+  const Hasher* h = Hasher::Create(optimal_k, name->CheckString());
+  return new BloomFilterVal(new BasicBloomFilter(h, cells));
   %}
 
 ## Creates a counting Bloom filter.
@@ -5029,11 +5029,11 @@ function bloomfilter_basic_init%(fp: double, capacity: count,
 function bloomfilter_counting_init%(k: count, cells: count, max: count,
                                     name: string &default=""%): opaque of bloomfilter
   %{
-  const HashPolicy* hp = HashPolicy::Create(k, name->CheckString());
+  const Hasher* h = Hasher::Create(k, name->CheckString());
   uint16 width = 0;
   while ( max >>= 1 )
     ++width;
-  return new BloomFilterVal(new CountingBloomFilter(hp, cells, width));
+  return new BloomFilterVal(new CountingBloomFilter(h, cells, width));
   %}
 
 ## Adds an element to a Bloom filter.

From 79a2e4b5d5c28076a8db1857d3ea6a8891e1ef7c Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Sun, 21 Jul 2013 22:41:48 +0200
Subject: [PATCH 083/118] Implement missing CounterVector functions.

---
 src/CounterVector.cc | 66 ++++++++++++++++++++++++++++++++++++++------
 src/CounterVector.h  | 15 ++++++++++
 2 files changed, 73 insertions(+), 8 deletions(-)

diff --git a/src/CounterVector.cc b/src/CounterVector.cc
index 8ed4c30427..a661492313 100644
--- a/src/CounterVector.cc
+++ b/src/CounterVector.cc
@@ -1,5 +1,6 @@
 #include "CounterVector.h"
 
+#include <limits>
 #include "BitVector.h"
 #include "Serializer.h"
 
@@ -15,23 +16,66 @@ CounterVector::~CounterVector()
 
 bool CounterVector::Increment(size_type cell, count_type value)
   {
-  // TODO
-  assert(! "not yet implemented");
+  assert(cell < Size());
+  assert(value != 0);
+  size_t lsb = cell * width_;
+  if (value >= Max())
+  {
+    bool r = false;
+    for (size_t i = 0; i < width_; ++i)
+      if (! (*bits_)[lsb + i])
+      {
+        bits_->Set(lsb + i);
+        if (! r)
+          r = true;
+      }
+    return r;
+  }
+  bool carry = false;
+  for (size_t i = 0; i < width_; ++i)
+  {
+    bool b1 = (*bits_)[lsb + i];
+    bool b2 = value & (1 << i);
+    (*bits_)[lsb + i] ^= b2 != carry; // bit1 ^ bit2 ^ carry
+    carry = carry ? b1 || b2 : b1 && b2;
+  }
+  if (! carry)
+    return true;
+  for (size_t i = 0; i < width_; ++i)
+    bits_->Set(lsb + i);
   return false;
   }
 
 bool CounterVector::Decrement(size_type cell, count_type value)
   {
-  // TODO
-  assert(! "not yet implemented");
-  return false;
+  assert(cell < Size());
+  size_t lsb = cell * width_;
+  bool success;
+  while (value --> 0)
+    {
+    success = false;
+    for (size_t i = lsb; i < lsb + width_; ++i)
+      if ((*bits_)[i])
+        {
+        bits_->Reset(i);
+        while (i && i > lsb)
+          bits_->Set(--i);
+        success = true;
+        break;
+        }
+    }
+  return success;
   }
 
 CounterVector::count_type CounterVector::Count(size_type cell) const
   {
-  // TODO
-  assert(! "not yet implemented");
-  return 0;
+  assert(cell < Size());
+  size_t cnt = 0, order = 1;
+  size_t lsb = cell * width_;
+  for (size_t i = lsb; i < lsb + width_; ++i, order <<= 1)
+    if ((*bits_)[i])
+      cnt |= order;
+  return cnt;
   }
 
 CounterVector::size_type CounterVector::Size() const
@@ -39,6 +83,12 @@ CounterVector::size_type CounterVector::Size() const
   return bits_->Blocks() / width_;
   }
 
+size_t CounterVector::Max() const
+  {
+  return std::numeric_limits<size_t>::max()
+    >> (std::numeric_limits<size_t>::digits - width_);
+  }
+
 bool CounterVector::Serialize(SerialInfo* info) const
   {
   return SerialObj::Serialize(info);
diff --git a/src/CounterVector.h b/src/CounterVector.h
index ecc8fe90e0..868beaca9b 100644
--- a/src/CounterVector.h
+++ b/src/CounterVector.h
@@ -19,6 +19,8 @@ public:
    * @param width The number of bits that each cell occupies.
    *
    * @param cells The number of cells in the bitvector.
+   *
+   * @pre `cells > 0 && width > 0`
    */
   CounterVector(size_t width, size_t cells = 1024);
 
@@ -32,6 +34,8 @@ public:
    * @param value The value to add to the current counter in *cell*.
    *
    * @return `true` if adding *value* to the counter in *cell* succeeded.
+   *
+   * @pre `cell < Size()`
    */
   bool Increment(size_type cell, count_type value);
 
@@ -43,6 +47,8 @@ public:
    * @param value The value to subtract from the current counter in *cell*.
    *
    * @return `true` if subtracting *value* from the counter in *cell* succeeded.
+   *
+   * @pre `cell < Size()`
    */
   bool Decrement(size_type cell, count_type value);
 
@@ -52,6 +58,8 @@ public:
    * @param cell The cell index to retrieve the count for.
    *
    * @return The counter associated with *cell*.
+   *
+   * @pre `cell < Size()`
    */
   count_type Count(size_type cell) const;
 
@@ -62,6 +70,13 @@ public:
    */
   size_type Size() const;
 
+  /**
+   * Computes the maximum counter value.
+   *
+   * @return The maximum counter value based on the width.
+   */
+  size_t Max() const;
+
   bool Serialize(SerialInfo* info) const;
   static CounterVector* Unserialize(UnserialInfo* info);
 

From 7a0240694ec69506b0789029ba48bb56ae703206 Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Mon, 22 Jul 2013 14:07:47 +0200
Subject: [PATCH 084/118] Fix and test counting Bloom filter.

---
 src/BloomFilter.cc                            |  9 ++++---
 src/CounterVector.cc                          |  5 ++--
 src/CounterVector.h                           |  4 +--
 src/bro.bif                                   |  8 +++++-
 .../btest/Baseline/bifs.bloomfilter/output    |  6 +++++
 testing/btest/bifs/bloomfilter.bro            | 26 ++++++++++++++++++-
 6 files changed, 48 insertions(+), 10 deletions(-)

diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc
index f399bddeca..3c7bac80f1 100644
--- a/src/BloomFilter.cc
+++ b/src/BloomFilter.cc
@@ -131,9 +131,9 @@ CountingBloomFilter::CountingBloomFilter()
 
 CountingBloomFilter::CountingBloomFilter(const Hasher* hasher,
                                          size_t cells, size_t width)
-  : BloomFilter(hasher)
+  : BloomFilter(hasher),
+    cells_(new CounterVector(width, cells))
   {
-  cells_ = new CounterVector(width, cells);
   }
 
 
@@ -152,10 +152,12 @@ bool CountingBloomFilter::DoUnserialize(UnserialInfo* info)
 	return cells_ != NULL;
   }
 
+// TODO: Use partitioning in add/count to allow for reusing CMS bounds.
+
 void CountingBloomFilter::AddImpl(const Hasher::digest_vector& h)
   {
   for ( size_t i = 0; i < h.size(); ++i )
-    cells_->Increment(h[i] % cells_->Size(), 1);
+    cells_->Increment(h[i] % cells_->Size());
   }
 
 size_t CountingBloomFilter::CountImpl(const Hasher::digest_vector& h) const
@@ -164,7 +166,6 @@ size_t CountingBloomFilter::CountImpl(const Hasher::digest_vector& h) const
     std::numeric_limits<CounterVector::size_type>::max();
   for ( size_t i = 0; i < h.size(); ++i )
     {
-    // TODO: Use partitioning.
     CounterVector::size_type cnt = cells_->Count(h[i] % cells_->Size());
     if ( cnt  < min )
       min = cnt;
diff --git a/src/CounterVector.cc b/src/CounterVector.cc
index a661492313..831b95386f 100644
--- a/src/CounterVector.cc
+++ b/src/CounterVector.cc
@@ -5,7 +5,8 @@
 #include "Serializer.h"
 
 CounterVector::CounterVector(size_t width, size_t cells)
-  : bits_(new BitVector(width * cells)), width_(width)
+  : bits_(new BitVector(width * cells)),
+    width_(width)
   {
   }
 
@@ -80,7 +81,7 @@ CounterVector::count_type CounterVector::Count(size_type cell) const
 
 CounterVector::size_type CounterVector::Size() const
   {
-  return bits_->Blocks() / width_;
+  return bits_->Size() / width_;
   }
 
 size_t CounterVector::Max() const
diff --git a/src/CounterVector.h b/src/CounterVector.h
index 868beaca9b..2d99bb44d8 100644
--- a/src/CounterVector.h
+++ b/src/CounterVector.h
@@ -37,7 +37,7 @@ public:
    *
    * @pre `cell < Size()`
    */
-  bool Increment(size_type cell, count_type value);
+  bool Increment(size_type cell, count_type value = 1);
 
   /**
    * Decrements a given cell.
@@ -50,7 +50,7 @@ public:
    *
    * @pre `cell < Size()`
    */
-  bool Decrement(size_type cell, count_type value);
+  bool Decrement(size_type cell, count_type value = 1);
 
   /**
    * Retrieves the counter of a given cell.
diff --git a/src/bro.bif b/src/bro.bif
index 71f8c0716f..a33a2248dd 100644
--- a/src/bro.bif
+++ b/src/bro.bif
@@ -5029,8 +5029,14 @@ function bloomfilter_basic_init%(fp: double, capacity: count,
 function bloomfilter_counting_init%(k: count, cells: count, max: count,
                                     name: string &default=""%): opaque of bloomfilter
   %{
+  if ( max == 0 )
+    {
+    reporter->Error("max counter value must be greater than 0");
+    return NULL;
+    }
+
   const Hasher* h = Hasher::Create(k, name->CheckString());
-  uint16 width = 0;
+  uint16 width = 1;
   while ( max >>= 1 )
     ++width;
   return new BloomFilterVal(new CountingBloomFilter(h, cells, width));
diff --git a/testing/btest/Baseline/bifs.bloomfilter/output b/testing/btest/Baseline/bifs.bloomfilter/output
index 65aaa8b07c..80847a81b9 100644
--- a/testing/btest/Baseline/bifs.bloomfilter/output
+++ b/testing/btest/Baseline/bifs.bloomfilter/output
@@ -6,3 +6,9 @@
 1
 1
 1
+1
+2
+3
+3
+2
+3
diff --git a/testing/btest/bifs/bloomfilter.bro b/testing/btest/bifs/bloomfilter.bro
index 3ff6a6668e..ab0bf86c22 100644
--- a/testing/btest/bifs/bloomfilter.bro
+++ b/testing/btest/bifs/bloomfilter.bro
@@ -1,7 +1,7 @@
 # @TEST-EXEC: bro -b %INPUT >output
 # @TEST-EXEC: btest-diff output
 
-event bro_init()
+function test_basic_bloom_filter()
   {
   # Basic usage with counts.
   local bf_cnt = bloomfilter_basic_init(0.1, 1000);
@@ -36,3 +36,27 @@ event bro_init()
   local bf_bug0 = bloomfilter_basic_init(-0.5, 42);
   local bf_bug1 = bloomfilter_basic_init(1.1, 42);
   }
+
+function test_counting_bloom_filter()
+  {
+  local bf = bloomfilter_counting_init(3, 16, 3);
+  bloomfilter_add(bf, "foo");
+  print bloomfilter_lookup(bf, "foo");    # 1
+  bloomfilter_add(bf, "foo");
+  print bloomfilter_lookup(bf, "foo");    # 2
+  bloomfilter_add(bf, "foo");
+  print bloomfilter_lookup(bf, "foo");    # 3
+  bloomfilter_add(bf, "foo");
+  print bloomfilter_lookup(bf, "foo");    # still 3
+
+  bloomfilter_add(bf, "bar");
+  bloomfilter_add(bf, "bar");
+  print bloomfilter_lookup(bf, "bar");    # 2
+  print bloomfilter_lookup(bf, "foo");    # still 3
+  }
+
+event bro_init()
+  {
+  test_basic_bloom_filter();
+  test_counting_bloom_filter();
+  }

From a3c61fe7eb6c43622de17df0e818def20cab7e90 Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Mon, 22 Jul 2013 15:39:13 +0200
Subject: [PATCH 085/118] Use half adder for bitwise addition and subtraction.

---
 src/CounterVector.cc | 53 +++++++++++++++-----------------------------
 1 file changed, 18 insertions(+), 35 deletions(-)

diff --git a/src/CounterVector.cc b/src/CounterVector.cc
index 831b95386f..f46fae1b98 100644
--- a/src/CounterVector.cc
+++ b/src/CounterVector.cc
@@ -20,52 +20,35 @@ bool CounterVector::Increment(size_type cell, count_type value)
   assert(cell < Size());
   assert(value != 0);
   size_t lsb = cell * width_;
-  if (value >= Max())
-  {
-    bool r = false;
-    for (size_t i = 0; i < width_; ++i)
-      if (! (*bits_)[lsb + i])
-      {
-        bits_->Set(lsb + i);
-        if (! r)
-          r = true;
-      }
-    return r;
-  }
   bool carry = false;
-  for (size_t i = 0; i < width_; ++i)
-  {
+  for ( size_t i = 0; i < width_; ++i )
+    {
     bool b1 = (*bits_)[lsb + i];
     bool b2 = value & (1 << i);
-    (*bits_)[lsb + i] ^= b2 != carry; // bit1 ^ bit2 ^ carry
-    carry = carry ? b1 || b2 : b1 && b2;
-  }
-  if (! carry)
-    return true;
-  for (size_t i = 0; i < width_; ++i)
-    bits_->Set(lsb + i);
-  return false;
+    (*bits_)[lsb + i] = b1 ^ b2 ^ carry;
+    carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) );
+    }
+  if ( carry )
+    for ( size_t i = 0; i < width_; ++i )
+      bits_->Set(lsb + i);
+  return ! carry;
   }
 
 bool CounterVector::Decrement(size_type cell, count_type value)
   {
   assert(cell < Size());
+  assert(value != 0);
+  value = ~value + 1; // A - B := A + ~B + 1
+  bool carry = false;
   size_t lsb = cell * width_;
-  bool success;
-  while (value --> 0)
+  for ( size_t i = 0; i < width_; ++i )
     {
-    success = false;
-    for (size_t i = lsb; i < lsb + width_; ++i)
-      if ((*bits_)[i])
-        {
-        bits_->Reset(i);
-        while (i && i > lsb)
-          bits_->Set(--i);
-        success = true;
-        break;
-        }
+    bool b1 = bits_[lsb + i];
+    bool b2 = value & (1 << i);
+    bits_[lsb + i] = b1 ^ b2 ^ carry;
+    carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) );
     }
-  return success;
+  return carry;
   }
 
 CounterVector::count_type CounterVector::Count(size_type cell) const

From 9c2f57a9d9d5667d05e43efd3c8541ff9d33382a Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Mon, 22 Jul 2013 16:36:54 +0200
Subject: [PATCH 086/118] Make counter vectors mergeable.

---
 src/CounterVector.cc | 42 ++++++++++++++++++++++++++++++++++++++++--
 src/CounterVector.h  | 27 +++++++++++++++++++++++++++
 2 files changed, 67 insertions(+), 2 deletions(-)

diff --git a/src/CounterVector.cc b/src/CounterVector.cc
index f46fae1b98..75c62b208a 100644
--- a/src/CounterVector.cc
+++ b/src/CounterVector.cc
@@ -43,9 +43,9 @@ bool CounterVector::Decrement(size_type cell, count_type value)
   size_t lsb = cell * width_;
   for ( size_t i = 0; i < width_; ++i )
     {
-    bool b1 = bits_[lsb + i];
+    bool b1 = (*bits_)[lsb + i];
     bool b2 = value & (1 << i);
-    bits_[lsb + i] = b1 ^ b2 ^ carry;
+    (*bits_)[lsb + i] = b1 ^ b2 ^ carry;
     carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) );
     }
   return carry;
@@ -67,12 +67,50 @@ CounterVector::size_type CounterVector::Size() const
   return bits_->Size() / width_;
   }
 
+size_t CounterVector::Width() const
+  {
+  return width_;
+  }
+
 size_t CounterVector::Max() const
   {
   return std::numeric_limits<size_t>::max()
     >> (std::numeric_limits<size_t>::digits - width_);
   }
 
+CounterVector& CounterVector::Merge(const CounterVector& other)
+  {
+  assert(Size() == other.Size());
+  assert(Width() == other.Width());
+  for ( size_t cell = 0; cell < Size(); ++cell )
+    {
+    size_t lsb = cell * width_;
+    bool carry = false;
+    for ( size_t i = 0; i < width_; ++i )
+      {
+      bool b1 = (*bits_)[lsb + i];
+      bool b2 = (*other.bits_)[lsb + i];
+      (*bits_)[lsb + i] = b1 ^ b2 ^ carry;
+      carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) );
+      }
+    if ( carry )
+      for ( size_t i = 0; i < width_; ++i )
+        bits_->Set(lsb + i);
+    }
+  return *this;
+  }
+
+CounterVector& CounterVector::operator|=(const CounterVector& other)
+{
+  return Merge(other);
+}
+
+CounterVector operator|(const CounterVector& x, const CounterVector& y)
+{
+  CounterVector cv(x);
+  return cv |= y;
+}
+
 bool CounterVector::Serialize(SerialInfo* info) const
   {
   return SerialObj::Serialize(info);
diff --git a/src/CounterVector.h b/src/CounterVector.h
index 2d99bb44d8..4ab221ff6b 100644
--- a/src/CounterVector.h
+++ b/src/CounterVector.h
@@ -70,6 +70,13 @@ public:
    */
   size_type Size() const;
 
+  /**
+   * Retrieves the counter width.
+   *
+   * @return The number of bits per counter.
+   */
+  size_t Width() const;
+
   /**
    * Computes the maximum counter value.
    *
@@ -77,6 +84,26 @@ public:
    */
   size_t Max() const;
 
+  /**
+   * Merges another counter vector into this instance by *adding* the counters
+   * of each cells.
+   *
+   * @param other The counter vector to merge into this instance.
+   *
+   * @return A reference to `*this`.
+   *
+   * @pre `Size() == other.Size() && Width() == other.Width()`
+   */
+  CounterVector& Merge(const CounterVector& other);
+
+  /**
+   * An alias for ::Merge.
+   */
+  CounterVector& operator|=(const CounterVector& other);
+
+  friend CounterVector operator|(const CounterVector& x,
+                                 const CounterVector& y);
+
   bool Serialize(SerialInfo* info) const;
   static CounterVector* Unserialize(UnserialInfo* info);
 

From eb64f5f9616e84295bc17537e8db57ae4f089c41 Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Mon, 22 Jul 2013 18:03:55 +0200
Subject: [PATCH 087/118] Make hash functions equality comparable.

---
 src/H3.h      |  12 ++++++
 src/Hasher.cc | 101 +++++++++++++++++++++++++++++++-------------------
 src/Hasher.h  |  18 +++++++++
 3 files changed, 93 insertions(+), 38 deletions(-)

diff --git a/src/H3.h b/src/H3.h
index e2dc865147..123dd6f374 100644
--- a/src/H3.h
+++ b/src/H3.h
@@ -58,6 +58,7 @@
 #define H3_H
 
 #include <climits>
+#include <cstring>
 
 // The number of values representable by a byte.
 #define H3_BYTE_RANGE (UCHAR_MAX+1)
@@ -112,6 +113,17 @@ public:
 
 		return result;
 		}
+
+	friend bool operator==(const H3& x, const H3& y)
+		{
+		return ! std::memcmp(x.byte_lookup, y.byte_lookup, N * H3_BYTE_RANGE);
+		}
+
+	friend bool operator!=(const H3& x, const H3& y)
+		{
+		return ! (x == y);
+		}
+
 private:
 	T byte_lookup[N][H3_BYTE_RANGE];
 };
diff --git a/src/Hasher.cc b/src/Hasher.cc
index 045adcd174..7a8d9a67e0 100644
--- a/src/Hasher.cc
+++ b/src/Hasher.cc
@@ -8,56 +8,69 @@ Hasher::UHF::UHF(size_t seed, const std::string& extra)
 	}
 
 Hasher::digest Hasher::UHF::hash(const void* x, size_t n) const
-  {
-  assert(n <= UHASH_KEY_SIZE);
-  return n == 0 ? 0 : h_(x, n);
-  }
+	{
+	assert(n <= UHASH_KEY_SIZE);
+	return n == 0 ? 0 : h_(x, n);
+	}
 
 size_t Hasher::UHF::compute_seed(size_t seed, const std::string& extra)
-  {
-  u_char buf[SHA256_DIGEST_LENGTH];
-  SHA256_CTX ctx;
-  sha256_init(&ctx);
-  if ( extra.empty() )
+	{
+	u_char buf[SHA256_DIGEST_LENGTH];
+	SHA256_CTX ctx;
+	sha256_init(&ctx);
+	if ( extra.empty() )
 		{
 		unsigned int first_seed = initial_seed();
 		sha256_update(&ctx, &first_seed, sizeof(first_seed));
 		}
 	else
 		{
-    sha256_update(&ctx, extra.c_str(), extra.size());
+		sha256_update(&ctx, extra.c_str(), extra.size());
+		}
+		sha256_update(&ctx, &seed, sizeof(seed));
+		sha256_final(&ctx, buf);
+		// Take the first sizeof(size_t) bytes as seed.
+		return *reinterpret_cast<size_t*>(buf);
 		}
-  sha256_update(&ctx, &seed, sizeof(seed));
-  sha256_final(&ctx, buf);
-  // Take the first sizeof(size_t) bytes as seed.
-  return *reinterpret_cast<size_t*>(buf);
-  }
 
 
 Hasher* Hasher::Create(size_t k, const std::string& name)
-  {
-  return new DefaultHasher(k, name);
-  }
+	{
+	return new DefaultHasher(k, name);
+	}
 
 Hasher::Hasher(size_t k, const std::string& name)
-  : k_(k), name_(name)
+	: k_(k), name_(name)
 	{
 	}
 
 DefaultHasher::DefaultHasher(size_t k, const std::string& name)
-  : Hasher(k, name)
-  {
-  for ( size_t i = 0; i < k; ++i )
-    hash_functions_.push_back(UHF(i, name));
-  }
+	: Hasher(k, name)
+	{
+	for ( size_t i = 0; i < k; ++i )
+		hash_functions_.push_back(UHF(i, name));
+	}
 
 Hasher::digest_vector DefaultHasher::Hash(const void* x, size_t n) const
-  {
-  digest_vector h(K(), 0);
-  for ( size_t i = 0; i < h.size(); ++i )
-    h[i] = hash_functions_[i](x, n);
-  return h;
-  }
+	{
+	digest_vector h(K(), 0);
+	for ( size_t i = 0; i < h.size(); ++i )
+		h[i] = hash_functions_[i](x, n);
+	return h;
+	}
+
+DefaultHasher* DefaultHasher::Clone() const
+	{
+	return new DefaultHasher(*this);
+	}
+
+bool DefaultHasher::Equals(const Hasher* other) const /* final */
+	{
+	if ( typeid(*this) != typeid(*other) )
+		return false;
+	const DefaultHasher* o = static_cast<const DefaultHasher*>(other);
+	return hash_functions_ == o->hash_functions_;
+	}
 
 DoubleHasher::DoubleHasher(size_t k, const std::string& name)
 	: Hasher(k, name),
@@ -67,13 +80,25 @@ DoubleHasher::DoubleHasher(size_t k, const std::string& name)
 	}
 
 Hasher::digest_vector DoubleHasher::Hash(const void* x, size_t n) const
-  {
-  digest h1 = h1_(x, n);
-  digest h2 = h2_(x, n);
-  digest_vector h(K(), 0);
-  for ( size_t i = 0; i < h.size(); ++i )
-    h[i] = h1 + i * h2;
-  return h;
-  }
+	{
+	digest h1 = h1_(x, n);
+	digest h2 = h2_(x, n);
+	digest_vector h(K(), 0);
+	for ( size_t i = 0; i < h.size(); ++i )
+		h[i] = h1 + i * h2;
+	return h;
+	}
 
+DoubleHasher* DoubleHasher::Clone() const
+	{
+	return new DoubleHasher(*this);
+	}
+
+bool DoubleHasher::Equals(const Hasher* other) const /* final */
+	{
+	if ( typeid(*this) != typeid(*other) )
+		return false;
+	const DoubleHasher* o = static_cast<const DoubleHasher*>(other);
+	return h1_ == o->h1_ && h2_ == o->h2_;
+	}
 
diff --git a/src/Hasher.h b/src/Hasher.h
index 8d0af6b03f..12393e7217 100644
--- a/src/Hasher.h
+++ b/src/Hasher.h
@@ -31,6 +31,10 @@ public:
 
   virtual digest_vector Hash(const void* x, size_t n) const = 0;
 
+  virtual Hasher* Clone() const = 0;
+
+  virtual bool Equals(const Hasher* other) const = 0;
+
   size_t K() const { return k_; }
   const std::string& Name() const { return name_; }
 
@@ -64,6 +68,16 @@ protected:
       return hash(x, n);
     }
 
+    friend bool operator==(const UHF& x, const UHF& y)
+    {
+      return x.h_ == y.h_;
+    }
+
+    friend bool operator!=(const UHF& x, const UHF& y)
+    {
+      return ! (x == y);
+    }
+
     digest hash(const void* x, size_t n) const;
 
   private:
@@ -87,6 +101,8 @@ public:
   DefaultHasher(size_t k, const std::string& name);
 
   virtual digest_vector Hash(const void* x, size_t n) const /* final */;
+  virtual DefaultHasher* Clone() const /* final */;
+  virtual bool Equals(const Hasher* other) const /* final */;
 
 private:
   std::vector<UHF> hash_functions_;
@@ -100,6 +116,8 @@ public:
   DoubleHasher(size_t k, const std::string& name);
 
   virtual digest_vector Hash(const void* x, size_t n) const /* final */;
+  virtual DoubleHasher* Clone() const /* final */;
+  virtual bool Equals(const Hasher* other) const /* final */;
 
 private:
   UHF h1_;

From a39f980cd493e64a6bb4016c47923e8754b059dc Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Mon, 22 Jul 2013 18:11:12 +0200
Subject: [PATCH 088/118] Implement and test Bloom filter merging.

---
 src/BloomFilter.cc                            | 22 ++++++++++++++----
 src/BloomFilter.h                             |  1 -
 src/CounterVector.cc                          |  6 +++++
 src/CounterVector.h                           |  8 +++++++
 src/Hasher.cc                                 |  4 ++--
 src/OpaqueVal.cc                              |  2 +-
 src/OpaqueVal.h                               | 21 ++++++++++++++---
 .../btest/Baseline/bifs.bloomfilter/output    |  7 ++++++
 testing/btest/bifs/bloomfilter.bro            | 23 ++++++++++++++++++-
 9 files changed, 81 insertions(+), 13 deletions(-)

diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc
index 3c7bac80f1..889c7bafe1 100644
--- a/src/BloomFilter.cc
+++ b/src/BloomFilter.cc
@@ -70,8 +70,13 @@ size_t BasicBloomFilter::K(size_t cells, size_t capacity)
 BasicBloomFilter* BasicBloomFilter::Merge(const BasicBloomFilter* x,
                                           const BasicBloomFilter* y)
   {
-  // TODO: Ensure that x and y use the same Hasher before proceeding.
+  if ( ! x->hasher_->Equals(y->hasher_) )
+    {
+    reporter->InternalError("incompatible hashers during Bloom filter merge");
+    return NULL;
+    }
   BasicBloomFilter* result = new BasicBloomFilter();
+  result->hasher_ = x->hasher_->Clone();
   result->bits_ = new BitVector(*x->bits_ | *y->bits_);
   return result;
   }
@@ -119,10 +124,17 @@ size_t BasicBloomFilter::CountImpl(const Hasher::digest_vector& h) const
 
 CountingBloomFilter* CountingBloomFilter::Merge(const CountingBloomFilter* x,
                                                 const CountingBloomFilter* y)
-{
-  assert(! "not yet implemented");
-  return NULL;
-}
+  {
+  if ( ! x->hasher_->Equals(y->hasher_) )
+    {
+    reporter->InternalError("incompatible hashers during Bloom filter merge");
+    return NULL;
+    }
+  CountingBloomFilter* result = new CountingBloomFilter();
+  result->hasher_ = x->hasher_->Clone();
+  result->cells_ = new CounterVector(*x->cells_ | *y->cells_);
+  return result;
+  }
 
 CountingBloomFilter::CountingBloomFilter()
   : cells_(NULL)
diff --git a/src/BloomFilter.h b/src/BloomFilter.h
index 92f15c6070..070aa2dc25 100644
--- a/src/BloomFilter.h
+++ b/src/BloomFilter.h
@@ -57,7 +57,6 @@ protected:
   virtual void AddImpl(const Hasher::digest_vector& hashes) = 0;
   virtual size_t CountImpl(const Hasher::digest_vector& hashes) const = 0;
 
-private:
   const Hasher* hasher_;
 };
 
diff --git a/src/CounterVector.cc b/src/CounterVector.cc
index 75c62b208a..cf3083de9e 100644
--- a/src/CounterVector.cc
+++ b/src/CounterVector.cc
@@ -10,6 +10,12 @@ CounterVector::CounterVector(size_t width, size_t cells)
   {
   }
 
+CounterVector::CounterVector(const CounterVector& other)
+	: bits_(new BitVector(*other.bits_)),
+	  width_(other.width_)
+  {
+  }
+
 CounterVector::~CounterVector()
   {
   delete bits_;
diff --git a/src/CounterVector.h b/src/CounterVector.h
index 4ab221ff6b..eced5956d4 100644
--- a/src/CounterVector.h
+++ b/src/CounterVector.h
@@ -9,6 +9,7 @@ class BitVector;
  * A vector of counters, each of which have a fixed number of bits.
  */
 class CounterVector : public SerialObj {
+  CounterVector& operator=(const CounterVector&);
 public:
   typedef size_t size_type;
   typedef uint64 count_type;
@@ -24,6 +25,13 @@ public:
    */
   CounterVector(size_t width, size_t cells = 1024);
 
+	/**
+	 * Copy-constructs a counter vector.
+	 *
+	 * @param other The counter vector to copy.
+	 */
+  CounterVector(const CounterVector& other);
+
   ~CounterVector();
 
   /**
diff --git a/src/Hasher.cc b/src/Hasher.cc
index 7a8d9a67e0..2a889c7e09 100644
--- a/src/Hasher.cc
+++ b/src/Hasher.cc
@@ -64,7 +64,7 @@ DefaultHasher* DefaultHasher::Clone() const
 	return new DefaultHasher(*this);
 	}
 
-bool DefaultHasher::Equals(const Hasher* other) const /* final */
+bool DefaultHasher::Equals(const Hasher* other) const
 	{
 	if ( typeid(*this) != typeid(*other) )
 		return false;
@@ -94,7 +94,7 @@ DoubleHasher* DoubleHasher::Clone() const
 	return new DoubleHasher(*this);
 	}
 
-bool DoubleHasher::Equals(const Hasher* other) const /* final */
+bool DoubleHasher::Equals(const Hasher* other) const
 	{
 	if ( typeid(*this) != typeid(*other) )
 		return false;
diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc
index 5a673c4a40..36038d679a 100644
--- a/src/OpaqueVal.cc
+++ b/src/OpaqueVal.cc
@@ -1,6 +1,5 @@
 #include "OpaqueVal.h"
 
-#include "BloomFilter.h"
 #include "NetVar.h"
 #include "Reporter.h"
 #include "Serializer.h"
@@ -587,6 +586,7 @@ BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* x,
   else if ( (result = DoMerge<CountingBloomFilter>(x, y)) )
     return result;
 
+  reporter->InternalError("failed to merge Bloom filters");
   return NULL;
   }
 
diff --git a/src/OpaqueVal.h b/src/OpaqueVal.h
index 2362fdacfc..22c3dbfade 100644
--- a/src/OpaqueVal.h
+++ b/src/OpaqueVal.h
@@ -3,6 +3,7 @@
 #ifndef OPAQUEVAL_H
 #define OPAQUEVAL_H
 
+#include "BloomFilter.h"
 #include "RandTest.h"
 #include "Val.h"
 #include "digest.h"
@@ -137,9 +138,23 @@ private:
   static BloomFilterVal* DoMerge(const BloomFilterVal* x,
                                  const BloomFilterVal* y)
     {
-    const T* a = dynamic_cast<const T*>(x->bloom_filter_);
-    const T* b = dynamic_cast<const T*>(y->bloom_filter_);
-    return a && b ? new BloomFilterVal(T::Merge(a, b)) : NULL;
+    if ( typeid(*x->bloom_filter_) != typeid(*y->bloom_filter_) )
+      {
+      reporter->InternalError("cannot merge different Bloom filter types");
+      return NULL;
+      }
+    if ( typeid(T) != typeid(*x->bloom_filter_) )
+      return NULL;
+    const T* a = static_cast<const T*>(x->bloom_filter_);
+    const T* b = static_cast<const T*>(y->bloom_filter_);
+    BloomFilterVal* merged = new BloomFilterVal(T::Merge(a, b));
+    assert(merged);
+    if ( ! merged->Typify(x->Type()) )
+      {
+      reporter->InternalError("failed to set type on merged Bloom filter");
+      return NULL;
+      }
+    return merged;
     }
 
   BroType* type_;
diff --git a/testing/btest/Baseline/bifs.bloomfilter/output b/testing/btest/Baseline/bifs.bloomfilter/output
index 80847a81b9..4fe2ae1ecc 100644
--- a/testing/btest/Baseline/bifs.bloomfilter/output
+++ b/testing/btest/Baseline/bifs.bloomfilter/output
@@ -7,8 +7,15 @@
 1
 1
 1
+1
+1
+1
+1
 2
 3
 3
 2
 3
+3
+3
+2
diff --git a/testing/btest/bifs/bloomfilter.bro b/testing/btest/bifs/bloomfilter.bro
index ab0bf86c22..f69ddbda0c 100644
--- a/testing/btest/bifs/bloomfilter.bro
+++ b/testing/btest/bifs/bloomfilter.bro
@@ -35,11 +35,21 @@ function test_basic_bloom_filter()
   # Invalid parameters.
   local bf_bug0 = bloomfilter_basic_init(-0.5, 42);
   local bf_bug1 = bloomfilter_basic_init(1.1, 42);
+
+  # Merging
+  local bf_cnt2 = bloomfilter_basic_init(0.1, 1000);
+  bloomfilter_add(bf_cnt2, 42);
+  bloomfilter_add(bf_cnt, 100);
+  local bf_merged = bloomfilter_merge(bf_cnt, bf_cnt2);
+  print bloomfilter_lookup(bf_merged, 42);
+  print bloomfilter_lookup(bf_merged, 84);
+  print bloomfilter_lookup(bf_merged, 100);
+  print bloomfilter_lookup(bf_merged, 168);
   }
 
 function test_counting_bloom_filter()
   {
-  local bf = bloomfilter_counting_init(3, 16, 3);
+  local bf = bloomfilter_counting_init(3, 32, 3);
   bloomfilter_add(bf, "foo");
   print bloomfilter_lookup(bf, "foo");    # 1
   bloomfilter_add(bf, "foo");
@@ -49,10 +59,21 @@ function test_counting_bloom_filter()
   bloomfilter_add(bf, "foo");
   print bloomfilter_lookup(bf, "foo");    # still 3
 
+
   bloomfilter_add(bf, "bar");
   bloomfilter_add(bf, "bar");
   print bloomfilter_lookup(bf, "bar");    # 2
   print bloomfilter_lookup(bf, "foo");    # still 3
+
+  # Merging
+  local bf2 = bloomfilter_counting_init(3, 32, 3);
+  bloomfilter_add(bf2, "baz");
+  bloomfilter_add(bf2, "baz");
+  bloomfilter_add(bf2, "bar");
+  local bf_merged = bloomfilter_merge(bf, bf2);
+  print bloomfilter_lookup(bf_merged, "foo");
+  print bloomfilter_lookup(bf_merged, "bar");
+  print bloomfilter_lookup(bf_merged, "baz");
   }
 
 event bro_init()

From 5c3bf14d168cca9af75e0ac642de8049f89cf525 Mon Sep 17 00:00:00 2001
From: Seth Hall <seth@icir.org>
Date: Mon, 22 Jul 2013 14:02:56 -0400
Subject: [PATCH 089/118] Fixed a scriptland state issue that manifested
 especially badly on proxies.

---
 scripts/base/protocols/irc/dcc-send.bro | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/base/protocols/irc/dcc-send.bro b/scripts/base/protocols/irc/dcc-send.bro
index 0a7f27e438..3194766946 100644
--- a/scripts/base/protocols/irc/dcc-send.bro
+++ b/scripts/base/protocols/irc/dcc-send.bro
@@ -185,5 +185,6 @@ event expected_connection_seen(c: connection, a: Analyzer::Tag) &priority=10
 
 event connection_state_remove(c: connection) &priority=-5
 	{
-	delete dcc_expected_transfers[c$id$resp_h, c$id$resp_p];
+	if ( [c$id$resp_h, c$id$resp_p] in dcc_expected_transfers )
+		delete dcc_expected_transfers[c$id$resp_h, c$id$resp_p];
 	}

From 325f0c2a3f087508dc0817739b9c312bcc5873d5 Mon Sep 17 00:00:00 2001
From: Jon Siwek <jsiwek@illinois.edu>
Date: Mon, 22 Jul 2013 14:15:35 -0500
Subject: [PATCH 090/118] Coverage test fixes and whitespace/doc tweaks.

---
 doc/scripts/DocSourcesList.cmake              |  3 ++
 scripts/base/utils/active-http.bro            | 26 ++++++------
 scripts/base/utils/exec.bro                   | 40 +++++++++----------
 .../canonified_loaded_scripts.log             | 13 +++---
 4 files changed, 42 insertions(+), 40 deletions(-)

diff --git a/doc/scripts/DocSourcesList.cmake b/doc/scripts/DocSourcesList.cmake
index 529b03ca83..bd264bfcb4 100644
--- a/doc/scripts/DocSourcesList.cmake
+++ b/doc/scripts/DocSourcesList.cmake
@@ -164,9 +164,12 @@ rest_target(${psd} base/protocols/ssl/main.bro)
 rest_target(${psd} base/protocols/ssl/mozilla-ca-list.bro)
 rest_target(${psd} base/protocols/syslog/consts.bro)
 rest_target(${psd} base/protocols/syslog/main.bro)
+rest_target(${psd} base/utils/active-http.bro)
 rest_target(${psd} base/utils/addrs.bro)
 rest_target(${psd} base/utils/conn-ids.bro)
+rest_target(${psd} base/utils/dir.bro)
 rest_target(${psd} base/utils/directions-and-hosts.bro)
+rest_target(${psd} base/utils/exec.bro)
 rest_target(${psd} base/utils/files.bro)
 rest_target(${psd} base/utils/numbers.bro)
 rest_target(${psd} base/utils/paths.bro)
diff --git a/scripts/base/utils/active-http.bro b/scripts/base/utils/active-http.bro
index 5522cc108a..3f475a378b 100644
--- a/scripts/base/utils/active-http.bro
+++ b/scripts/base/utils/active-http.bro
@@ -1,21 +1,21 @@
-##! A module for performing active HTTP requests and 
+##! A module for performing active HTTP requests and
 ##! getting the reply at runtime.
 
 @load ./exec
 
 module ActiveHTTP;
- 
+
 export {
 	## The default timeout for HTTP requests.
 	const default_max_time = 1min &redef;
- 
+
 	## The default HTTP method/verb to use for requests.
 	const default_method = "GET" &redef;
- 
- 	type Response: record {
+
+	type Response: record {
 		## Numeric response code from the server.
 		code:      count;
-		## String response messgae from the server.
+		## String response message from the server.
 		msg:       string;
 		## Full body of the response.
 		body:      string                  &optional;
@@ -29,24 +29,24 @@ export {
 		## The HTTP method/verb to use for the request.
 		method:          string                  &default=default_method;
 		## Data to send to the server in the client body.  Keep in
-		## mind that you will probably need to set the $method field
+		## mind that you will probably need to set the *method* field
 		## to "POST" or "PUT".
 		client_data:     string                  &optional;
-		## Arbitrary headers to pass to the server.  Some headers 
+		## Arbitrary headers to pass to the server.  Some headers
 		## will be included by libCurl.
 		#custom_headers: table[string] of string &optional;
 		## Timeout for the request.
 		max_time:        interval                &default=default_max_time;
-		## Additional curl command line arguments.  Be very careful 
+		## Additional curl command line arguments.  Be very careful
 		## with this option since shell injection could take place
 		## if careful handling of untrusted data is not applied.
 		addl_curl_args:  string                  &optional;
 	};
 
 	## Perform an HTTP request according to the :bro:type:`Request` record.
-	## This is an asynchronous function and must be called within a "when" 
+	## This is an asynchronous function and must be called within a "when"
 	## statement.
-	## 
+	##
 	## req: A record instance representing all options for an HTTP request.
 	##
 	## Returns: A record with the full response message.
@@ -55,7 +55,7 @@ export {
 
 function request2curl(r: Request, bodyfile: string, headersfile: string): string
 	{
-	local cmd = fmt("curl -s -g -o \"%s\" -D \"%s\" -X \"%s\"", 
+	local cmd = fmt("curl -s -g -o \"%s\" -D \"%s\" -X \"%s\"",
 	                str_shell_escape(bodyfile),
 	                str_shell_escape(headersfile),
 	                str_shell_escape(r$method));
@@ -91,7 +91,7 @@ function request(req: Request): ActiveHTTP::Response
 		# If there is no response line then nothing else will work either.
 		if ( ! (result?$files && headersfile in result$files) )
 			Reporter::error(fmt("There was a failure when requesting \"%s\" with ActiveHTTP.", req$url));
-		
+
 		local headers = result$files[headersfile];
 		for ( i in headers )
 			{
diff --git a/scripts/base/utils/exec.bro b/scripts/base/utils/exec.bro
index 45cd8cb287..f896a68064 100644
--- a/scripts/base/utils/exec.bro
+++ b/scripts/base/utils/exec.bro
@@ -1,6 +1,4 @@
 ##! A module for executing external command line programs.
-##! This requires code that is still in topic branches and 
-##! definitely won't currently work on any released version of Bro.
 
 @load base/frameworks/input
 
@@ -8,15 +6,13 @@ module Exec;
 
 export {
 	type Command: record {
-		## The command line to execute.
-		## Use care to avoid injection attacks!
+		## The command line to execute.  Use care to avoid injection attacks.
+		## I.e. if the command uses untrusted/variable data, sanitize it.
 		cmd:         string;
-		## Provide standard in to the program as a
-		## string.
+		## Provide standard in to the program as a string.
 		stdin:       string      &default="";
-		## If additional files are required to be read 
-		## in as part of the output of the command they
-		## can be defined here.
+		## If additional files are required to be read in as part of the output
+		## of the command they can be defined here.
 		read_files:  set[string] &optional;
 	};
 
@@ -27,7 +23,7 @@ export {
 		signal_exit:  bool             &default=F;
 		## Each line of standard out.
 		stdout:       vector of string &optional;
-		## Each line of standard error. 
+		## Each line of standard error.
 		stderr:       vector of string &optional;
 		## If additional files were requested to be read in
 		## the content of the files will be available here.
@@ -35,7 +31,7 @@ export {
 	};
 
 	## Function for running command line programs and getting
-	## output.  This is an asynchronous function which is meant 
+	## output.  This is an asynchronous function which is meant
 	## to be run with the `when` statement.
 	##
 	## cmd: The command to run.  Use care to avoid injection attacks!
@@ -56,12 +52,12 @@ redef record Command += {
 global results: table[string] of Result = table();
 global finished_commands: set[string];
 global currently_tracked_files: set[string] = set();
-type OneLine: record { 
+type OneLine: record {
 	s: string;
 	is_stderr: bool;
 };
 
-type FileLine: record { 
+type FileLine: record {
 	s: string;
 };
 
@@ -93,7 +89,7 @@ event Exec::file_line(description: Input::EventDescription, tpe: Input::Event, s
 	local result = results[name];
 	if ( ! result?$files )
 		result$files = table();
-	
+
 	if ( track_file !in result$files )
 		result$files[track_file] = vector(s);
 	else
@@ -136,16 +132,16 @@ function run(cmd: Command): Result
 			}
 		}
 
-	local config_strings: table[string] of string = { 
+	local config_strings: table[string] of string = {
 		["stdin"]       = cmd$stdin,
 		["read_stderr"] = "1",
 	};
-	Input::add_event([$name=cmd$uid, 
-	                  $source=fmt("%s |", cmd$cmd), 
-	                  $reader=Input::READER_RAW, 
-	                  $fields=Exec::OneLine, 
-	                  $ev=Exec::line, 
-	                  $want_record=F, 
+	Input::add_event([$name=cmd$uid,
+	                  $source=fmt("%s |", cmd$cmd),
+	                  $reader=Input::READER_RAW,
+	                  $fields=Exec::OneLine,
+	                  $ev=Exec::line,
+	                  $want_record=F,
 	                  $config=config_strings]);
 
 	return when ( cmd$uid in finished_commands )
@@ -164,4 +160,4 @@ event bro_done()
 		{
 		system(fmt("rm \"%s\"", str_shell_escape(fname)));
 		}
-	}
\ No newline at end of file
+	}
diff --git a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log
index 999fd7c841..37f1c739f8 100644
--- a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log
+++ b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log
@@ -3,7 +3,7 @@
 #empty_field	(empty)
 #unset_field	-
 #path	loaded_scripts
-#open	2013-07-10-21-18-31
+#open	2013-07-22-16-01-22
 #fields	name
 #types	string
 scripts/base/init-bare.bro
@@ -90,12 +90,17 @@ scripts/base/init-bare.bro
 scripts/base/init-default.bro
   scripts/base/utils/site.bro
     scripts/base/utils/patterns.bro
+  scripts/base/utils/active-http.bro
+    scripts/base/utils/exec.bro
   scripts/base/utils/addrs.bro
   scripts/base/utils/conn-ids.bro
+  scripts/base/utils/dir.bro
+    scripts/base/frameworks/reporter/__load__.bro
+      scripts/base/frameworks/reporter/main.bro
+    scripts/base/utils/paths.bro
   scripts/base/utils/directions-and-hosts.bro
   scripts/base/utils/files.bro
   scripts/base/utils/numbers.bro
-  scripts/base/utils/paths.bro
   scripts/base/utils/queue.bro
   scripts/base/utils/strings.bro
   scripts/base/utils/thresholds.bro
@@ -129,8 +134,6 @@ scripts/base/init-default.bro
   scripts/base/frameworks/intel/__load__.bro
     scripts/base/frameworks/intel/main.bro
     scripts/base/frameworks/intel/input.bro
-  scripts/base/frameworks/reporter/__load__.bro
-    scripts/base/frameworks/reporter/main.bro
   scripts/base/frameworks/sumstats/__load__.bro
     scripts/base/frameworks/sumstats/main.bro
     scripts/base/frameworks/sumstats/plugins/__load__.bro
@@ -195,4 +198,4 @@ scripts/base/init-default.bro
   scripts/base/protocols/tunnels/__load__.bro
   scripts/base/misc/find-checksum-offloading.bro
 scripts/policy/misc/loaded-scripts.bro
-#close	2013-07-10-21-18-31
+#close	2013-07-22-16-01-22

From 636914b8f12a27145ce2fcb2b4e1e4be8f6ad381 Mon Sep 17 00:00:00 2001
From: Seth Hall <seth@icir.org>
Date: Mon, 22 Jul 2013 17:01:31 -0400
Subject: [PATCH 091/118] Some tests work now (at least they all don't fail
 anymore!)

---
 testing/btest/btest.cfg | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/testing/btest/btest.cfg b/testing/btest/btest.cfg
index 4a13833094..7ccf99eea8 100644
--- a/testing/btest/btest.cfg
+++ b/testing/btest/btest.cfg
@@ -7,7 +7,7 @@ IgnoreFiles = *.tmp *.swp #* *.trace .DS_Store
 
 [environment]
 BROPATH=`bash -c %(testbase)s/../../build/bro-path-dev`
-BROMAGIC=%(testbase)s/../../magic
+BROMAGIC=%(testbase)s/../../magic/database
 BRO_SEED_FILE=%(testbase)s/random.seed
 TZ=UTC
 LC_ALL=C

From f098b17429151d2169aff30ead87801146fb376f Mon Sep 17 00:00:00 2001
From: Seth Hall <seth@icir.org>
Date: Tue, 23 Jul 2013 11:18:49 -0400
Subject: [PATCH 092/118] A few test updates.

---
 scripts/base/protocols/irc/files.bro          |  3 --
 .../policy/frameworks/files/detect-MHR.bro    |  2 +-
 .../Baseline/core.tunnels.ayiya/http.log      | 10 +++---
 .../canonified_loaded_scripts.log             | 31 ++++++++++---------
 .../out                                       |  3 +-
 .../out                                       | 15 +++++++++
 .../http.log                                  |  8 ++---
 .../notice.log                                | 10 +++---
 .../smtp_entities.log                         | 12 -------
 .../scripts/base/protocols/smtp/mime.test     |  6 ----
 testing/scripts/file-analysis-test.bro        | 18 +++++------
 11 files changed, 57 insertions(+), 61 deletions(-)
 delete mode 100644 testing/btest/Baseline/scripts.base.protocols.smtp.mime/smtp_entities.log
 delete mode 100644 testing/btest/scripts/base/protocols/smtp/mime.test

diff --git a/scripts/base/protocols/irc/files.bro b/scripts/base/protocols/irc/files.bro
index a6321d3f2f..7e077c8331 100644
--- a/scripts/base/protocols/irc/files.bro
+++ b/scripts/base/protocols/irc/files.bro
@@ -16,9 +16,6 @@ export {
 
 function get_file_handle(c: connection, is_orig: bool): string
 	{
-	if ( [c$id$resp_h, c$id$resp_p] !in dcc_expected_transfers ) 
-		return "";
-
 	return cat(Analyzer::ANALYZER_IRC_DATA, c$start_time, c$id, is_orig);
 	}
 
diff --git a/scripts/policy/frameworks/files/detect-MHR.bro b/scripts/policy/frameworks/files/detect-MHR.bro
index 71d73217e0..8a2e33b7f4 100644
--- a/scripts/policy/frameworks/files/detect-MHR.bro
+++ b/scripts/policy/frameworks/files/detect-MHR.bro
@@ -47,7 +47,7 @@ event file_hash(f: fa_file, kind: string, hash: string)
 				local readable_first_detected = strftime("%Y-%m-%d %H:%M:%S", mhr_first_detected);
 				if ( mhr_detect_rate >= notice_threshold )
 					{
-					local message = fmt("Detection rate: %d%%  Last seen: %s", mhr_detect_rate, readable_first_detected);
+					local message = fmt("Malware Hash Registry Detection rate: %d%%  Last seen: %s", mhr_detect_rate, readable_first_detected);
 					local virustotal_url = fmt("https://www.virustotal.com/en/file/%s/analysis/", hash);
 					NOTICE([$note=Match, $msg=message, $sub=virustotal_url, $f=f]);
 					}
diff --git a/testing/btest/Baseline/core.tunnels.ayiya/http.log b/testing/btest/Baseline/core.tunnels.ayiya/http.log
index cd49c4cc89..04692a3547 100644
--- a/testing/btest/Baseline/core.tunnels.ayiya/http.log
+++ b/testing/btest/Baseline/core.tunnels.ayiya/http.log
@@ -3,10 +3,10 @@
 #empty_field	(empty)
 #unset_field	-
 #path	http
-#open	2013-05-21-21-11-20
-#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	trans_depth	method	host	uri	referrer	user_agent	request_body_len	response_body_len	status_code	status_msg	info_code	info_msg	filename	tags	username	password	proxied	mime_type	md5	extracted_request_files	extracted_response_files
-#types	time	string	addr	port	addr	port	count	string	string	string	string	string	count	count	count	string	count	string	string	table[enum]	string	string	table[string]	string	string	vector[string]	vector[string]
-1257655301.652206	5OKnoww6xl4	2001:4978:f:4c::2	53382	2001:4860:b002::68	80	1	GET	ipv6.google.com	/	-	Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en; rv:1.9.0.15pre) Gecko/2009091516 Camino/2.0b4 (like Firefox/3.0.15pre)	0	10102	200	OK	-	-	-	(empty)	-	-	-	text/html	-	-	-
+#open	2013-07-23-05-12-58
+#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	trans_depth	method	host	uri	referrer	user_agent	request_body_len	response_body_len	status_code	status_msg	info_code	info_msg	filename	tags	username	password	proxied	orig_fuids	orig_mime_types	resp_fuids	resp_mime_types
+#types	time	string	addr	port	addr	port	count	string	string	string	string	string	count	count	count	string	count	string	string	table[enum]	string	string	table[string]	vector[string]	vector[string]	vector[string]	vector[string]
+1257655301.652206	5OKnoww6xl4	2001:4978:f:4c::2	53382	2001:4860:b002::68	80	1	GET	ipv6.google.com	/	-	Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en; rv:1.9.0.15pre) Gecko/2009091516 Camino/2.0b4 (like Firefox/3.0.15pre)	0	10102	200	OK	-	-	-	(empty)	-	-	-	-	-	meGKu6goEyd	application/octet-stream
 1257655302.514424	5OKnoww6xl4	2001:4978:f:4c::2	53382	2001:4860:b002::68	80	2	GET	ipv6.google.com	/csi?v=3&s=webhp&action=&tran=undefined&e=17259,19771,21517,21766,21887,22212&ei=BUz2Su7PMJTglQfz3NzCAw&rt=prt.77,xjs.565,ol.645	http://ipv6.google.com/	Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en; rv:1.9.0.15pre) Gecko/2009091516 Camino/2.0b4 (like Firefox/3.0.15pre)	0	0	204	No Content	-	-	-	(empty)	-	-	-	-	-	-	-
 1257655303.603569	5OKnoww6xl4	2001:4978:f:4c::2	53382	2001:4860:b002::68	80	3	GET	ipv6.google.com	/gen_204?atyp=i&ct=fade&cad=1254&ei=BUz2Su7PMJTglQfz3NzCAw&zx=1257655303600	http://ipv6.google.com/	Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en; rv:1.9.0.15pre) Gecko/2009091516 Camino/2.0b4 (like Firefox/3.0.15pre)	0	0	204	No Content	-	-	-	(empty)	-	-	-	-	-	-	-
-#close	2013-05-21-21-11-20
+#close	2013-07-23-05-12-58
diff --git a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log
index 999fd7c841..f67d4b6158 100644
--- a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log
+++ b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log
@@ -3,7 +3,7 @@
 #empty_field	(empty)
 #unset_field	-
 #path	loaded_scripts
-#open	2013-07-10-21-18-31
+#open	2013-07-23-05-48-10
 #fields	name
 #types	string
 scripts/base/init-bare.bro
@@ -84,12 +84,12 @@ scripts/base/init-bare.bro
     scripts/base/frameworks/analyzer/main.bro
       scripts/base/frameworks/packet-filter/utils.bro
       build/scripts/base/bif/analyzer.bif.bro
-  scripts/base/frameworks/file-analysis/__load__.bro
-    scripts/base/frameworks/file-analysis/main.bro
+  scripts/base/frameworks/files/__load__.bro
+    scripts/base/frameworks/files/main.bro
       build/scripts/base/bif/file_analysis.bif.bro
+      scripts/base/utils/site.bro
+        scripts/base/utils/patterns.bro
 scripts/base/init-default.bro
-  scripts/base/utils/site.bro
-    scripts/base/utils/patterns.bro
   scripts/base/utils/addrs.bro
   scripts/base/utils/conn-ids.bro
   scripts/base/utils/directions-and-hosts.bro
@@ -157,8 +157,8 @@ scripts/base/init-default.bro
   scripts/base/protocols/ftp/__load__.bro
     scripts/base/protocols/ftp/utils-commands.bro
     scripts/base/protocols/ftp/main.bro
-    scripts/base/protocols/ftp/file-analysis.bro
-    scripts/base/protocols/ftp/file-extract.bro
+    scripts/base/protocols/ftp/utils.bro
+    scripts/base/protocols/ftp/files.bro
     scripts/base/protocols/ftp/gridftp.bro
       scripts/base/protocols/ssl/__load__.bro
         scripts/base/protocols/ssl/consts.bro
@@ -166,15 +166,13 @@ scripts/base/init-default.bro
         scripts/base/protocols/ssl/mozilla-ca-list.bro
   scripts/base/protocols/http/__load__.bro
     scripts/base/protocols/http/main.bro
+    scripts/base/protocols/http/entities.bro
     scripts/base/protocols/http/utils.bro
-    scripts/base/protocols/http/file-analysis.bro
-    scripts/base/protocols/http/file-ident.bro
-    scripts/base/protocols/http/file-hash.bro
-    scripts/base/protocols/http/file-extract.bro
+    scripts/base/protocols/http/files.bro
   scripts/base/protocols/irc/__load__.bro
     scripts/base/protocols/irc/main.bro
     scripts/base/protocols/irc/dcc-send.bro
-    scripts/base/protocols/irc/file-analysis.bro
+    scripts/base/protocols/irc/files.bro
   scripts/base/protocols/modbus/__load__.bro
     scripts/base/protocols/modbus/consts.bro
     scripts/base/protocols/modbus/main.bro
@@ -182,8 +180,7 @@ scripts/base/init-default.bro
   scripts/base/protocols/smtp/__load__.bro
     scripts/base/protocols/smtp/main.bro
     scripts/base/protocols/smtp/entities.bro
-    scripts/base/protocols/smtp/entities-excerpt.bro
-    scripts/base/protocols/smtp/file-analysis.bro
+    scripts/base/protocols/smtp/files.bro
   scripts/base/protocols/socks/__load__.bro
     scripts/base/protocols/socks/consts.bro
     scripts/base/protocols/socks/main.bro
@@ -193,6 +190,10 @@ scripts/base/init-default.bro
     scripts/base/protocols/syslog/consts.bro
     scripts/base/protocols/syslog/main.bro
   scripts/base/protocols/tunnels/__load__.bro
+  scripts/base/files/hash/__load__.bro
+    scripts/base/files/hash/main.bro
+  scripts/base/files/extract/__load__.bro
+    scripts/base/files/extract/main.bro
   scripts/base/misc/find-checksum-offloading.bro
 scripts/policy/misc/loaded-scripts.bro
-#close	2013-07-10-21-18-31
+#close	2013-07-23-05-48-10
diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.ftp/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.ftp/out
index 4463db6958..c810ce15e5 100644
--- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.ftp/out
+++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.ftp/out
@@ -3,7 +3,8 @@ file #0, 0, 0
 FILE_BOF_BUFFER
 The Nationa
 MIME_TYPE
-text/x-pascal
+application/octet-stream
+FILE_OVER_NEW_CONNECTION
 FILE_STATE_REMOVE
 file #0, 16557, 0
 [orig_h=141.142.228.5, orig_p=50737/tcp, resp_h=141.142.192.162, resp_p=38141/tcp]
diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.irc/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.irc/out
index 36da7bdeed..fcd30b2253 100644
--- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.irc/out
+++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.irc/out
@@ -4,6 +4,21 @@ FILE_BOF_BUFFER
 PK^C^D^T\0\0\0^H\0\xae
 MIME_TYPE
 application/zip
+FILE_OVER_NEW_CONNECTION
+FILE_NEW
+file #1, 0, 0
+FILE_BOF_BUFFER
+\0\0^Ex\0\0^J\xf0\0\0^P
+MIME_TYPE
+application/octet-stream
+FILE_OVER_NEW_CONNECTION
+FILE_STATE_REMOVE
+file #1, 124, 0
+[orig_h=192.168.1.77, orig_p=57655/tcp, resp_h=209.197.168.151, resp_p=1024/tcp]
+source: IRC_DATA
+MD5: 35288fd50a74c7d675909ff83424d7a1
+SHA1: 8a98f177cb47e6bf771bf57c2f7e94c4b5e79ffa
+SHA256: b24dde52b933a0d76e885ab418cb6d697b14a4e2fef45fce66e12ecc5a6a81aa
 FILE_STATE_REMOVE
 file #0, 42208, 0
 [orig_h=192.168.1.77, orig_p=57655/tcp, resp_h=209.197.168.151, resp_p=1024/tcp]
diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.writer-path-conflict/http.log b/testing/btest/Baseline/scripts.base.frameworks.logging.writer-path-conflict/http.log
index 6b7bea88c9..8f9d553d9a 100644
--- a/testing/btest/Baseline/scripts.base.frameworks.logging.writer-path-conflict/http.log
+++ b/testing/btest/Baseline/scripts.base.frameworks.logging.writer-path-conflict/http.log
@@ -3,9 +3,9 @@
 #empty_field	(empty)
 #unset_field	-
 #path	http
-#open	2013-05-21-21-11-23
-#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	trans_depth	method	host	uri	referrer	user_agent	request_body_len	response_body_len	status_code	status_msg	info_code	info_msg	filename	tags	username	password	proxied	mime_type	md5	extracted_request_files	extracted_response_files
-#types	time	string	addr	port	addr	port	count	string	string	string	string	string	count	count	count	string	count	string	string	table[enum]	string	string	table[string]	string	string	vector[string]	vector[string]
+#open	2013-07-23-05-48-35
+#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	trans_depth	method	host	uri	referrer	user_agent	request_body_len	response_body_len	status_code	status_msg	info_code	info_msg	filename	tags	username	password	proxied	orig_fuids	orig_mime_types	resp_fuids	resp_mime_types
+#types	time	string	addr	port	addr	port	count	string	string	string	string	string	count	count	count	string	count	string	string	table[enum]	string	string	table[string]	vector[string]	vector[string]	vector[string]	vector[string]
 1300475168.784020	j4u32Pc5bif	141.142.220.118	48649	208.80.152.118	80	1	GET	bits.wikimedia.org	/skins-1.5/monobook/main.css	http://www.wikipedia.org/	Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.15) Gecko/20110303 Ubuntu/10.04 (lucid) Firefox/3.6.15	0	0	304	Not Modified	-	-	-	(empty)	-	-	-	-	-	-	-
 1300475168.916018	VW0XPVINV8a	141.142.220.118	49997	208.80.152.3	80	1	GET	upload.wikimedia.org	/wikipedia/commons/6/63/Wikipedia-logo.png	http://www.wikipedia.org/	Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.15) Gecko/20110303 Ubuntu/10.04 (lucid) Firefox/3.6.15	0	0	304	Not Modified	-	-	-	(empty)	-	-	-	-	-	-	-
 1300475168.916183	3PKsZ2Uye21	141.142.220.118	49996	208.80.152.3	80	1	GET	upload.wikimedia.org	/wikipedia/commons/thumb/b/bb/Wikipedia_wordmark.svg/174px-Wikipedia_wordmark.svg.png	http://www.wikipedia.org/	Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.15) Gecko/20110303 Ubuntu/10.04 (lucid) Firefox/3.6.15	0	0	304	Not Modified	-	-	-	(empty)	-	-	-	-	-	-	-
@@ -20,4 +20,4 @@
 1300475169.014619	Tw8jXtpTGu6	141.142.220.118	50000	208.80.152.3	80	2	GET	upload.wikimedia.org	/wikipedia/commons/thumb/4/4a/Commons-logo.svg/35px-Commons-logo.svg.png	http://www.wikipedia.org/	Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.15) Gecko/20110303 Ubuntu/10.04 (lucid) Firefox/3.6.15	0	0	304	Not Modified	-	-	-	(empty)	-	-	-	-	-	-	-
 1300475169.014593	P654jzLoe3a	141.142.220.118	49999	208.80.152.3	80	2	GET	upload.wikimedia.org	/wikipedia/commons/thumb/9/91/Wikiversity-logo.svg/35px-Wikiversity-logo.svg.png	http://www.wikipedia.org/	Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.15) Gecko/20110303 Ubuntu/10.04 (lucid) Firefox/3.6.15	0	0	304	Not Modified	-	-	-	(empty)	-	-	-	-	-	-	-
 1300475169.014927	0Q4FH8sESw5	141.142.220.118	50001	208.80.152.3	80	2	GET	upload.wikimedia.org	/wikipedia/commons/thumb/7/75/Wikimedia_Community_Logo.svg/35px-Wikimedia_Community_Logo.svg.png	http://www.wikipedia.org/	Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.15) Gecko/20110303 Ubuntu/10.04 (lucid) Firefox/3.6.15	0	0	304	Not Modified	-	-	-	(empty)	-	-	-	-	-	-	-
-#close	2013-05-21-21-11-23
+#close	2013-07-23-05-48-35
diff --git a/testing/btest/Baseline/scripts.base.protocols.ftp.gridftp/notice.log b/testing/btest/Baseline/scripts.base.protocols.ftp.gridftp/notice.log
index 051f1c6266..04c80407f6 100644
--- a/testing/btest/Baseline/scripts.base.protocols.ftp.gridftp/notice.log
+++ b/testing/btest/Baseline/scripts.base.protocols.ftp.gridftp/notice.log
@@ -3,8 +3,8 @@
 #empty_field	(empty)
 #unset_field	-
 #path	notice
-#open	2013-04-02-02-19-21
-#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	proto	note	msg	sub	src	dst	p	n	peer_descr	actions	suppress_for	dropped	remote_location.country_code	remote_location.region	remote_location.city	remote_location.latitude	remote_location.longitude
-#types	time	string	addr	port	addr	port	enum	enum	string	string	addr	addr	port	count	string	table[enum]	interval	bool	string	string	string	double	double
-1348168976.558309	arKYeMETxOg	192.168.57.103	35391	192.168.57.101	55968	tcp	GridFTP::Data_Channel	GridFTP data channel over threshold 2 bytes	-	192.168.57.103	192.168.57.101	55968	-	bro	Notice::ACTION_LOG	3600.000000	F	-	-	-	-	-
-#close	2013-04-02-02-19-21
+#open	2013-07-23-05-19-25
+#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	fuid	file_mime_type	file_desc	proto	note	msg	sub	src	dst	p	n	peer_descr	actions	suppress_for	dropped	remote_location.country_code	remote_location.region	remote_location.city	remote_location.latitude	remote_location.longitude
+#types	time	string	addr	port	addr	port	string	string	string	enum	enum	string	string	addr	addr	port	count	string	table[enum]	interval	bool	string	string	string	double	double
+1348168976.558309	arKYeMETxOg	192.168.57.103	35391	192.168.57.101	55968	-	-	-	tcp	GridFTP::Data_Channel	GridFTP data channel over threshold 2 bytes	-	192.168.57.103	192.168.57.101	55968	-	bro	Notice::ACTION_LOG	3600.000000	F	-	-	-	-	-
+#close	2013-07-23-05-19-25
diff --git a/testing/btest/Baseline/scripts.base.protocols.smtp.mime/smtp_entities.log b/testing/btest/Baseline/scripts.base.protocols.smtp.mime/smtp_entities.log
deleted file mode 100644
index 135c644855..0000000000
--- a/testing/btest/Baseline/scripts.base.protocols.smtp.mime/smtp_entities.log
+++ /dev/null
@@ -1,12 +0,0 @@
-#separator \x09
-#set_separator	,
-#empty_field	(empty)
-#unset_field	-
-#path	smtp_entities
-#open	2013-03-26-20-39-07
-#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	trans_depth	filename	content_len	mime_type	md5	extraction_file	excerpt
-#types	time	string	addr	port	addr	port	count	string	count	string	string	string	string
-1254722770.692743	arKYeMETxOg	10.10.1.4	1470	74.53.140.153	25	1	-	79	text/plain	92bca2e6cdcde73647125da7dccbdd07	-	(empty)
-1254722770.692743	arKYeMETxOg	10.10.1.4	1470	74.53.140.153	25	1	-	1918	text/html	-	-	(empty)
-1254722770.692804	arKYeMETxOg	10.10.1.4	1470	74.53.140.153	25	1	NEWS.txt	10823	text/plain	a968bb0f9f9d95835b2e74c845877e87	-	(empty)
-#close	2013-03-26-20-39-07
diff --git a/testing/btest/scripts/base/protocols/smtp/mime.test b/testing/btest/scripts/base/protocols/smtp/mime.test
deleted file mode 100644
index 8e7a336987..0000000000
--- a/testing/btest/scripts/base/protocols/smtp/mime.test
+++ /dev/null
@@ -1,6 +0,0 @@
-# @TEST-EXEC: bro -r $TRACES/smtp.trace %INPUT
-# @TEST-EXEC: btest-diff smtp_entities.log
-
-@load base/protocols/smtp
-
-redef SMTP::generate_md5=/text\/plain/;
diff --git a/testing/scripts/file-analysis-test.bro b/testing/scripts/file-analysis-test.bro
index cf2bbf2d59..8fe78b218e 100644
--- a/testing/scripts/file-analysis-test.bro
+++ b/testing/scripts/file-analysis-test.bro
@@ -1,7 +1,7 @@
 
 global test_file_analysis_source: string = "" &redef;
 
-global test_file_analyzers: set[Files::AnalyzerArgs];
+global test_file_analyzers: set[Files::Tag];
 
 global test_get_file_name: function(f: fa_file): string =
 	function(f: fa_file): string { return ""; } &redef;
@@ -46,11 +46,11 @@ event file_new(f: fa_file)
 
 		local filename: string = test_get_file_name(f);
 		if ( filename != "" )
-			Files::add_analyzer(f, [$tag=Files::ANALYZER_EXTRACT,
-			                               $extract_filename=filename]);
-		Files::add_analyzer(f, [$tag=Files::ANALYZER_DATA_EVENT,
-		                               $chunk_event=file_chunk,
-		                               $stream_event=file_stream]);
+			Files::add_analyzer(f, Files::ANALYZER_EXTRACT,
+			                       [$extract_filename=filename]);
+		Files::add_analyzer(f, Files::ANALYZER_DATA_EVENT,
+		                       [$chunk_event=file_chunk,
+		                        $stream_event=file_stream]);
 		}
 
 	if ( f?$bof_buffer )
@@ -106,7 +106,7 @@ event file_state_remove(f: fa_file)
 
 event bro_init()
 	{
-	add test_file_analyzers[[$tag=Files::ANALYZER_MD5]];
-	add test_file_analyzers[[$tag=Files::ANALYZER_SHA1]];
-	add test_file_analyzers[[$tag=Files::ANALYZER_SHA256]];
+	add test_file_analyzers[Files::ANALYZER_MD5];
+	add test_file_analyzers[Files::ANALYZER_SHA1];
+	add test_file_analyzers[Files::ANALYZER_SHA256];
 	}

From 73eb87a41ef5d79f5f84d8aebe42ce9b61aadc5a Mon Sep 17 00:00:00 2001
From: Jon Siwek <jsiwek@illinois.edu>
Date: Tue, 23 Jul 2013 14:16:39 -0500
Subject: [PATCH 093/118] Exec module changes/fixes.

- Give Dir::monitor() a param for the polling interval, so different
  dirs can be monitored at different frequencies.

- Fix race in Exec::run() when reading extra output files produced by
  a process -- it was possible for Exec::run() to return before all
  extra output files had been fully read.

- Add test cases.
---
 scripts/base/utils/active-http.bro            |  3 +
 scripts/base/utils/dir.bro                    | 34 +++++---
 scripts/base/utils/exec.bro                   | 85 ++++++++++++-------
 .../bro..stdout                               |  5 ++
 .../scripts.base.utils.dir/bro..stdout        | 10 +++
 .../scripts.base.utils.exec/bro..stdout       |  7 ++
 .../btest/scripts/base/utils/active-http.test | 25 ++++++
 testing/btest/scripts/base/utils/dir.test     | 58 +++++++++++++
 testing/btest/scripts/base/utils/exec.test    | 74 ++++++++++++++++
 testing/scripts/httpd.py                      | 40 +++++++++
 10 files changed, 299 insertions(+), 42 deletions(-)
 create mode 100644 testing/btest/Baseline/scripts.base.utils.active-http/bro..stdout
 create mode 100644 testing/btest/Baseline/scripts.base.utils.dir/bro..stdout
 create mode 100644 testing/btest/Baseline/scripts.base.utils.exec/bro..stdout
 create mode 100644 testing/btest/scripts/base/utils/active-http.test
 create mode 100644 testing/btest/scripts/base/utils/dir.test
 create mode 100644 testing/btest/scripts/base/utils/exec.test
 create mode 100755 testing/scripts/httpd.py

diff --git a/scripts/base/utils/active-http.bro b/scripts/base/utils/active-http.bro
index 3f475a378b..eb9a212221 100644
--- a/scripts/base/utils/active-http.bro
+++ b/scripts/base/utils/active-http.bro
@@ -90,7 +90,10 @@ function request(req: Request): ActiveHTTP::Response
 		{
 		# If there is no response line then nothing else will work either.
 		if ( ! (result?$files && headersfile in result$files) )
+			{
 			Reporter::error(fmt("There was a failure when requesting \"%s\" with ActiveHTTP.", req$url));
+			return resp;
+			}
 
 		local headers = result$files[headersfile];
 		for ( i in headers )
diff --git a/scripts/base/utils/dir.bro b/scripts/base/utils/dir.bro
index b154fe000e..3329dc6306 100644
--- a/scripts/base/utils/dir.bro
+++ b/scripts/base/utils/dir.bro
@@ -5,6 +5,10 @@
 module Dir;
 
 export {
+	## The default interval this module checks for files in directories when
+	## using the :bro:see:`Dir::monitor` function.
+	const polling_interval = 30sec &redef;
+
 	## Register a directory to monitor with a callback that is called 
 	## every time a previously unseen file is seen.  If a file is deleted
 	## and seen to be gone, the file is available for being seen again in 
@@ -14,14 +18,15 @@ export {
 	##
 	## callback: Callback that gets executed with each file name 
 	##           that is found.  Filenames are provided with the full path.
-	global monitor: function(dir: string, callback: function(fname: string));
-
-	## The interval this module checks for files in directories when using 
-	## the :bro:see:`Dir::monitor` function.
-	const polling_interval = 30sec &redef;
+	##
+	## poll_interval: An interval at which to check for new files.
+	global monitor: function(dir: string, callback: function(fname: string),
+	                         poll_interval: interval &default=polling_interval);
 }
 
-event Dir::monitor_ev(dir: string, last_files: set[string], callback: function(fname: string))
+event Dir::monitor_ev(dir: string, last_files: set[string],
+                      callback: function(fname: string),
+                      poll_interval: interval)
 	{
 	when ( local result = Exec::run([$cmd=fmt("ls -i \"%s/\"", str_shell_escape(dir))]) )
 		{
@@ -32,7 +37,11 @@ event Dir::monitor_ev(dir: string, last_files: set[string], callback: function(f
 			}
 
 		local current_files: set[string] = set();
-		local files = result$stdout;
+		local files: vector of string = vector();
+
+		if ( result?$stdout )
+			files = result$stdout;
+
 		for ( i in files )
 			{
 			local parts = split1(files[i], / /);
@@ -40,13 +49,18 @@ event Dir::monitor_ev(dir: string, last_files: set[string], callback: function(f
 				callback(build_path_compressed(dir, parts[2]));
 			add current_files[parts[1]];
 			}
-		schedule polling_interval { Dir::monitor_ev(dir, current_files, callback) };
+
+		schedule poll_interval
+			{
+			Dir::monitor_ev(dir, current_files, callback, poll_interval)
+			};
 		}
 	}
 
-function monitor(dir: string, callback: function(fname: string))
+function monitor(dir: string, callback: function(fname: string),
+                 poll_interval: interval &default=polling_interval)
 	{
-	event Dir::monitor_ev(dir, set(), callback);
+	event Dir::monitor_ev(dir, set(), callback, poll_interval);
 	}
 
 
diff --git a/scripts/base/utils/exec.bro b/scripts/base/utils/exec.bro
index f896a68064..4ffae29303 100644
--- a/scripts/base/utils/exec.bro
+++ b/scripts/base/utils/exec.bro
@@ -14,6 +14,8 @@ export {
 		## If additional files are required to be read in as part of the output
 		## of the command they can be defined here.
 		read_files:  set[string] &optional;
+		# The unique id for tracking executors.
+		uid: string &default=unique_id("");
 	};
 
 	type Result: record {
@@ -44,14 +46,11 @@ export {
 	const tmp_dir = "/tmp" &redef;
 }
 
-redef record Command += {
-	# The unique id for tracking executors.
-	uid: string &optional;
-};
+# Indexed by command uid.
+global results: table[string] of Result;
+global pending_commands: set[string];
+global pending_files: table[string] of set[string];
 
-global results: table[string] of Result = table();
-global finished_commands: set[string];
-global currently_tracked_files: set[string] = set();
 type OneLine: record {
 	s: string;
 	is_stderr: bool;
@@ -96,39 +95,63 @@ event Exec::file_line(description: Input::EventDescription, tpe: Input::Event, s
 		result$files[track_file][|result$files[track_file]|] = s;
 	}
 
+event Input::end_of_data(name: string, source:string)
+	{
+	local parts = split1(name, /_/);
+	name = parts[1];
+
+	if ( name !in pending_commands || |parts| < 2 )
+		return;
+
+	local track_file = parts[2];
+
+	Input::remove(name);
+
+	if ( name !in pending_files )
+		delete pending_commands[name];
+	else
+		{
+		delete pending_files[name][track_file];
+		if ( |pending_files[name]| == 0 )
+			delete pending_commands[name];
+		system(fmt("rm \"%s\"", str_shell_escape(track_file)));
+		}
+	}
+
 event InputRaw::process_finished(name: string, source:string, exit_code:count, signal_exit:bool)
 	{
+	if ( name !in pending_commands )
+		return;
+
+	Input::remove(name);
 	results[name]$exit_code = exit_code;
 	results[name]$signal_exit = signal_exit;
 
-	Input::remove(name);
-	# Indicate to the "when" async watcher that this command is done.
-	add finished_commands[name];
-	}
-
-event Exec::start_watching_file(uid: string, read_file: string)
-	{
-	Input::add_event([$source=fmt("%s", read_file),
-	                  $name=fmt("%s_%s", uid, read_file),
-	                  $reader=Input::READER_RAW,
-	                  $mode=Input::STREAM,
-	                  $want_record=F,
-	                  $fields=FileLine,
-	                  $ev=Exec::file_line]);
+	if ( name !in pending_files || |pending_files[name]| == 0 )
+		# No extra files to read, command is done.
+		delete pending_commands[name];
+	else
+		for ( read_file in pending_files[name] )
+			Input::add_event([$source=fmt("%s", read_file),
+			                  $name=fmt("%s_%s", name, read_file),
+			                  $reader=Input::READER_RAW,
+			                  $want_record=F,
+			                  $fields=FileLine,
+			                  $ev=Exec::file_line]);
 	}
 
 function run(cmd: Command): Result
 	{
-	cmd$uid = unique_id("");
+	add pending_commands[cmd$uid];
 	results[cmd$uid] = [];
 
 	if ( cmd?$read_files )
 		{
 		for ( read_file in cmd$read_files )
 			{
-			add currently_tracked_files[read_file];
-			system(fmt("touch \"%s\" 2>/dev/null", str_shell_escape(read_file)));
-			schedule 1msec { Exec::start_watching_file(cmd$uid, read_file) };
+			if ( cmd$uid !in pending_files )
+				pending_files[cmd$uid] = set();
+			add pending_files[cmd$uid][read_file];
 			}
 		}
 
@@ -144,9 +167,8 @@ function run(cmd: Command): Result
 	                  $want_record=F,
 	                  $config=config_strings]);
 
-	return when ( cmd$uid in finished_commands )
+	return when ( cmd$uid !in pending_commands )
 		{
-		delete finished_commands[cmd$uid];
 		local result = results[cmd$uid];
 		delete results[cmd$uid];
 		return result;
@@ -155,9 +177,8 @@ function run(cmd: Command): Result
 
 event bro_done()
 	{
-	# We are punting here and just deleting any files that haven't been processed yet.
-	for ( fname in currently_tracked_files )
-		{
-		system(fmt("rm \"%s\"", str_shell_escape(fname)));
-		}
+	# We are punting here and just deleting any unprocessed files.
+	for ( uid in pending_files )
+		for ( fname in pending_files[uid] )
+			system(fmt("rm \"%s\"", str_shell_escape(fname)));
 	}
diff --git a/testing/btest/Baseline/scripts.base.utils.active-http/bro..stdout b/testing/btest/Baseline/scripts.base.utils.active-http/bro..stdout
new file mode 100644
index 0000000000..0284eb19b3
--- /dev/null
+++ b/testing/btest/Baseline/scripts.base.utils.active-http/bro..stdout
@@ -0,0 +1,5 @@
+[code=200, msg=OK^M, body=It works!, headers={
+[Server] =  1.0,
+[Content-type] =  text/plain,
+[Date] =  July 22, 2013
+}]
diff --git a/testing/btest/Baseline/scripts.base.utils.dir/bro..stdout b/testing/btest/Baseline/scripts.base.utils.dir/bro..stdout
new file mode 100644
index 0000000000..c3103b7f64
--- /dev/null
+++ b/testing/btest/Baseline/scripts.base.utils.dir/bro..stdout
@@ -0,0 +1,10 @@
+new_file1, ../testdir/bye
+new_file1, ../testdir/hi
+new_file1, ../testdir/howsitgoing
+new_file2, ../testdir/bye
+new_file2, ../testdir/hi
+new_file2, ../testdir/howsitgoing
+new_file1, ../testdir/bye
+new_file1, ../testdir/newone
+new_file2, ../testdir/bye
+new_file2, ../testdir/newone
diff --git a/testing/btest/Baseline/scripts.base.utils.exec/bro..stdout b/testing/btest/Baseline/scripts.base.utils.exec/bro..stdout
new file mode 100644
index 0000000000..5352d15d18
--- /dev/null
+++ b/testing/btest/Baseline/scripts.base.utils.exec/bro..stdout
@@ -0,0 +1,7 @@
+test1, [exit_code=0, signal_exit=F, stdout=[done, exit, stop], stderr=<uninitialized>, files={
+[out1] = [insert text here, and here],
+[out2] = [insert more text here, and there]
+}]
+test2, [exit_code=1, signal_exit=F, stdout=[here's something on stdout, some more stdout, last stdout], stderr=[and some stderr, more stderr, last stderr], files=<uninitialized>]
+test3, [exit_code=9, signal_exit=F, stdout=[FML], stderr=<uninitialized>, files=<uninitialized>]
+test4, [exit_code=0, signal_exit=F, stdout=[hibye], stderr=<uninitialized>, files=<uninitialized>]
diff --git a/testing/btest/scripts/base/utils/active-http.test b/testing/btest/scripts/base/utils/active-http.test
new file mode 100644
index 0000000000..9ac762b9b7
--- /dev/null
+++ b/testing/btest/scripts/base/utils/active-http.test
@@ -0,0 +1,25 @@
+# @TEST-EXEC: btest-bg-run httpd python $SCRIPTS/httpd.py --max 1
+# @TEST-EXEC: sleep 3
+# @TEST-EXEC: btest-bg-run bro bro -b %INPUT
+# @TEST-EXEC: btest-bg-wait 15
+# @TEST-EXEC: btest-diff bro/.stdout
+
+@load base/utils/active-http
+
+redef exit_only_after_terminate = T;
+
+event bro_init()
+	{
+	local req = ActiveHTTP::Request($url="localhost:32123");
+
+	when ( local resp = ActiveHTTP::request(req) )
+		{
+		print resp;
+		terminate();
+		}
+	timeout 1min
+		{
+		print "HTTP request timeout";
+		terminate();
+		}
+	}
diff --git a/testing/btest/scripts/base/utils/dir.test b/testing/btest/scripts/base/utils/dir.test
new file mode 100644
index 0000000000..44fee3860f
--- /dev/null
+++ b/testing/btest/scripts/base/utils/dir.test
@@ -0,0 +1,58 @@
+# @TEST-EXEC: btest-bg-run bro bro -b ../dirtest.bro
+# @TEST-EXEC: btest-bg-wait 10
+# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-sort btest-diff bro/.stdout
+
+@TEST-START-FILE dirtest.bro
+
+@load base/utils/dir
+
+redef exit_only_after_terminate = T;
+
+global c: count = 0;
+
+function check_terminate_condition()
+	{
+	c += 1;
+
+	if ( c == 10 )
+		terminate();
+	}
+
+function new_file1(fname: string)
+	{
+	print "new_file1", fname;
+	check_terminate_condition();
+	}
+
+function new_file2(fname: string)
+	{
+	print "new_file2", fname;
+	check_terminate_condition();
+	}
+
+event change_things()
+	{
+	system("touch ../testdir/newone");
+	system("rm ../testdir/bye && touch ../testdir/bye");
+	}
+
+event bro_init()
+	{
+	Dir::monitor("../testdir", new_file1, .5sec);
+	Dir::monitor("../testdir", new_file2, 1sec);
+	schedule 1sec { change_things() };
+	}
+
+@TEST-END-FILE
+
+@TEST-START-FILE testdir/hi
+123
+@TEST-END-FILE
+
+@TEST-START-FILE testdir/howsitgoing
+abc
+@TEST-END-FILE
+
+@TEST-START-FILE testdir/bye
+!@#
+@TEST-END-FILE
diff --git a/testing/btest/scripts/base/utils/exec.test b/testing/btest/scripts/base/utils/exec.test
new file mode 100644
index 0000000000..8876f0f49b
--- /dev/null
+++ b/testing/btest/scripts/base/utils/exec.test
@@ -0,0 +1,74 @@
+# @TEST-EXEC: btest-bg-run bro bro -b ../exectest.bro
+# @TEST-EXEC: btest-bg-wait 10
+# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-sort btest-diff bro/.stdout
+
+@TEST-START-FILE exectest.bro
+
+@load base/utils/exec
+
+redef exit_only_after_terminate = T;
+
+global c: count = 0;
+
+function check_exit_condition()
+	{
+	c += 1;
+
+	if ( c == 4 )
+		terminate();
+	}
+
+function test_cmd(label: string, cmd: Exec::Command)
+	{
+	when ( local result = Exec::run(cmd) )
+		{
+		print label, result;
+		check_exit_condition();
+		}
+	}
+
+event bro_init()
+	{
+	test_cmd("test1", [$cmd="bash ../somescript.sh",
+	                   $read_files=set("out1", "out2")]);
+	test_cmd("test2", [$cmd="bash ../nofiles.sh"]);
+	test_cmd("test3", [$cmd="bash ../suicide.sh"]);
+	test_cmd("test4", [$cmd="bash ../stdin.sh", $stdin="hibye"]);
+	}
+
+@TEST-END-FILE
+
+@TEST-START-FILE somescript.sh
+#! /usr/bin/env bash
+echo "insert text here" > out1
+echo "and here" >> out1
+echo "insert more text here" > out2
+echo "and there" >> out2
+echo "done"
+echo "exit"
+echo "stop"
+@TEST-END-FILE
+
+@TEST-START-FILE nofiles.sh
+#! /usr/bin/env bash
+echo "here's something on stdout"
+echo "some more stdout"
+echo "last stdout"
+echo "and some stderr" 1>&2
+echo "more stderr" 1>&2
+echo "last stderr" 1>&2
+exit 1
+@TEST-END-FILE
+
+@TEST-START-FILE suicide.sh
+#! /usr/bin/env bash
+echo "FML"
+kill -9 $$
+echo "nope"
+@TEST-END-FILE
+
+@TEST-START-FILE stdin.sh
+#! /usr/bin/env bash
+read -r line
+echo "$line"
+@TEST-END-FILE
diff --git a/testing/scripts/httpd.py b/testing/scripts/httpd.py
new file mode 100755
index 0000000000..0732614bc2
--- /dev/null
+++ b/testing/scripts/httpd.py
@@ -0,0 +1,40 @@
+#! /usr/bin/env python
+
+import BaseHTTPServer
+
+class MyRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
+
+    def do_GET(self):
+        self.send_response(200)
+        self.send_header("Content-type", "text/plain")
+        self.end_headers()
+        self.wfile.write("It works!")
+
+    def version_string(self):
+        return "1.0"
+
+    def date_time_string(self):
+        return "July 22, 2013"
+
+
+if __name__ == "__main__":
+    from optparse import OptionParser
+    p = OptionParser()
+    p.add_option("-a", "--addr", type="string", default="localhost",
+                 help=("listen on given address (numeric IP or host name), "
+                       "an empty string (the default) means INADDR_ANY"))
+    p.add_option("-p", "--port", type="int", default=32123,
+                 help="listen on given TCP port number")
+    p.add_option("-m", "--max", type="int", default=-1,
+                 help="max number of requests to respond to, -1 means no max")
+    options, args = p.parse_args()
+
+    httpd = BaseHTTPServer.HTTPServer((options.addr, options.port),
+                                      MyRequestHandler)
+    if options.max == -1:
+        httpd.serve_forever()
+    else:
+        served_count = 0
+        while served_count != options.max:
+            httpd.handle_request()
+            served_count += 1

From 474107fe40c22dec977d4e9ee3dad0edcbc02344 Mon Sep 17 00:00:00 2001
From: Robin Sommer <robin@icir.org>
Date: Tue, 23 Jul 2013 17:16:57 -0700
Subject: [PATCH 094/118] Broifying the code.

Also extending API documentation a bit more and fixing a memory leak.
---
 src/Func.cc                                   |   4 +-
 src/H3.h                                      |   4 +-
 src/OpaqueVal.cc                              | 159 ++--
 src/OpaqueVal.h                               |  67 +-
 src/Type.cc                                   |   1 +
 src/probabilistic/BitVector.cc                | 777 ++++++++++--------
 src/probabilistic/BitVector.h                 | 575 +++++++------
 src/probabilistic/BloomFilter.cc              | 229 +++---
 src/probabilistic/BloomFilter.h               | 229 ++++--
 src/probabilistic/CounterVector.cc            | 244 +++---
 src/probabilistic/CounterVector.h             | 208 ++---
 src/probabilistic/Hasher.cc                   |  63 +-
 src/probabilistic/Hasher.h                    | 262 +++---
 src/probabilistic/bloom-filter.bif            | 122 +--
 src/util.cc                                   |  20 +-
 src/util.h                                    |   8 +-
 .../btest/Baseline/bifs.bloomfilter/output    |   6 +
 testing/btest/bifs/bloomfilter.bro            |   2 +-
 18 files changed, 1651 insertions(+), 1329 deletions(-)

diff --git a/src/Func.cc b/src/Func.cc
index a0d2299933..483699668f 100644
--- a/src/Func.cc
+++ b/src/Func.cc
@@ -560,7 +560,7 @@ void builtin_error(const char* msg, BroObj* arg)
 #include "reporter.bif.func_def"
 #include "strings.bif.func_def"
 
-// TODO: Add a nicer mechanism to pull subdirectory bifs automatically.
+// TODO: Add a nicer mechanism to pull in subdirectory bifs automatically.
 #include "probabilistic/bloom-filter.bif.h"
 
 void init_builtin_funcs()
@@ -577,7 +577,7 @@ void init_builtin_funcs()
 #include "reporter.bif.func_init"
 #include "strings.bif.func_init"
 
-// TODO: Add a nicer mechanism to pull subdirectory bifs automatically.
+// TODO: Add a nicer mechanism to pull in subdirectory bifs automatically.
 #include "probabilistic/bloom-filter.bif.init.cc"
 
 	did_builtin_init = true;
diff --git a/src/H3.h b/src/H3.h
index 123dd6f374..8ea5848816 100644
--- a/src/H3.h
+++ b/src/H3.h
@@ -100,8 +100,8 @@ public:
 		// loop optmized with Duff's Device
 		register unsigned n = (size + 7) / 8;
 		switch ( size % 8 ) {
-		case 0:	do { result ^= byte_lookup[offset++][*p++];
-	  case 7:      result ^= byte_lookup[offset++][*p++];
+		case 0: do { result ^= byte_lookup[offset++][*p++];
+		case 7:      result ^= byte_lookup[offset++][*p++];
 		case 6:      result ^= byte_lookup[offset++][*p++];
 		case 5:      result ^= byte_lookup[offset++][*p++];
 		case 4:      result ^= byte_lookup[offset++][*p++];
diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc
index 04032b2cfc..efdd890f70 100644
--- a/src/OpaqueVal.cc
+++ b/src/OpaqueVal.cc
@@ -1,5 +1,6 @@
-#include "OpaqueVal.h"
+// See the file "COPYING" in the main distribution directory for copyright.
 
+#include "OpaqueVal.h"
 #include "NetVar.h"
 #include "Reporter.h"
 #include "Serializer.h"
@@ -518,87 +519,89 @@ bool EntropyVal::DoUnserialize(UnserialInfo* info)
 	}
 
 BloomFilterVal::BloomFilterVal()
-  : OpaqueVal(bloomfilter_type),
-    type_(NULL),
-    hash_(NULL),
-    bloom_filter_(NULL)
+	: OpaqueVal(bloomfilter_type)
 	{
+	type = 0;
+	hash = 0;
+	bloom_filter = 0;
 	}
 
 BloomFilterVal::BloomFilterVal(OpaqueType* t)
-  : OpaqueVal(t),
-    type_(NULL),
-    hash_(NULL),
-    bloom_filter_(NULL)
+	: OpaqueVal(t)
 	{
+	type = 0;
+	hash = 0;
+	bloom_filter = 0;
 	}
 
 BloomFilterVal::BloomFilterVal(probabilistic::BloomFilter* bf)
-  : OpaqueVal(bloomfilter_type),
-    type_(NULL),
-    hash_(NULL),
-    bloom_filter_(bf)
+	: OpaqueVal(bloomfilter_type)
 	{
+	type = 0;
+	hash = 0;
+	bloom_filter = bf;
 	}
 
-bool BloomFilterVal::Typify(BroType* type)
-  {
-  if ( type_ )
-    return false;
-  type_ = type;
-  type_->Ref();
-  TypeList* tl = new TypeList(type_);
-  tl->Append(type_);
-  hash_ = new CompositeHash(tl);
-  Unref(tl);
-  return true;
-  }
+bool BloomFilterVal::Typify(BroType* arg_type)
+	{
+	if ( type )
+		return false;
+
+	type = arg_type;
+	type->Ref();
+
+	TypeList* tl = new TypeList(type);
+	tl->Append(type);
+	hash = new CompositeHash(tl);
+	Unref(tl);
+
+	return true;
+	}
 
 BroType* BloomFilterVal::Type() const
-  {
-  return type_;
-  }
+	{
+	return type;
+	}
 
 void BloomFilterVal::Add(const Val* val)
-  {
-  HashKey* key = hash_->ComputeHash(val, 1);
-  bloom_filter_->Add(key->Hash());
-  }
+	{
+	HashKey* key = hash->ComputeHash(val, 1);
+	bloom_filter->Add(key->Hash());
+	delete key;
+	}
 
 size_t BloomFilterVal::Count(const Val* val) const
-  {
-  HashKey* key = hash_->ComputeHash(val, 1);
-  return bloom_filter_->Count(key->Hash());
-  }
+	{
+	HashKey* key = hash->ComputeHash(val, 1);
+	size_t cnt = bloom_filter->Count(key->Hash());
+	delete key;
+	return cnt;
+	}
 
 BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* x,
                                       const BloomFilterVal* y)
-  {
-  if ( x->Type() != y->Type() )
-    {
-    reporter->InternalError("cannot merge Bloom filters with different types");
-    return NULL;
-    }
+	{
+	if ( ! same_type(x->Type(), y->Type()) )
+		reporter->InternalError("cannot merge Bloom filters with different types");
 
-  BloomFilterVal* result;
-  if ( (result = DoMerge<probabilistic::BasicBloomFilter>(x, y)) )
-    return result;
-  else if ( (result = DoMerge<probabilistic::CountingBloomFilter>(x, y)) )
-    return result;
+	BloomFilterVal* result;
 
-  reporter->InternalError("failed to merge Bloom filters");
-  return NULL;
-  }
+	if ( (result = DoMerge<probabilistic::BasicBloomFilter>(x, y)) )
+		return result;
+
+	else if ( (result = DoMerge<probabilistic::CountingBloomFilter>(x, y)) )
+		return result;
+
+	reporter->InternalError("failed to merge Bloom filters");
+	return 0;
+	}
 
 BloomFilterVal::~BloomFilterVal()
-  {
-  if ( type_ )
-    Unref(type_);
-  if ( hash_ )
-    delete hash_;
-  if ( bloom_filter_ )
-    delete bloom_filter_;
-  }
+	{
+	Unref(type);
+	delete hash;
+	delete bloom_filter;
+	}
 
 IMPLEMENT_SERIAL(BloomFilterVal, SER_BLOOMFILTER_VAL);
 
@@ -606,14 +609,16 @@ bool BloomFilterVal::DoSerialize(SerialInfo* info) const
 	{
 	DO_SERIALIZE(SER_BLOOMFILTER_VAL, OpaqueVal);
 
-	bool is_typed = type_ != NULL;
-	if ( ! SERIALIZE(is_typed) )
-	  return false;
-	if ( is_typed && ! type_->Serialize(info) )
-	  return false;
+	bool is_typed = (type != 0);
 
-	return bloom_filter_->Serialize(info);
-  }
+	if ( ! SERIALIZE(is_typed) )
+		return false;
+
+	if ( is_typed && ! type->Serialize(info) )
+		return false;
+
+	return bloom_filter->Serialize(info);
+	}
 
 bool BloomFilterVal::DoUnserialize(UnserialInfo* info)
 	{
@@ -621,15 +626,17 @@ bool BloomFilterVal::DoUnserialize(UnserialInfo* info)
 
 	bool is_typed;
 	if ( ! UNSERIALIZE(&is_typed) )
-	  return false;
-	if ( is_typed )
-    {
-      BroType* type = BroType::Unserialize(info);
-      if ( ! Typify(type) )
-        return false;
-      Unref(type);
-    }
+		return false;
 
-	bloom_filter_ = probabilistic::BloomFilter::Unserialize(info);
-	return bloom_filter_ != NULL;
-  }
+	if ( is_typed )
+		{
+		BroType* type = BroType::Unserialize(info);
+		if ( ! Typify(type) )
+			return false;
+
+		Unref(type);
+		}
+
+	bloom_filter = probabilistic::BloomFilter::Unserialize(info);
+	return bloom_filter != 0;
+	}
diff --git a/src/OpaqueVal.h b/src/OpaqueVal.h
index 5ccf73e11f..ea704cb70a 100644
--- a/src/OpaqueVal.h
+++ b/src/OpaqueVal.h
@@ -116,21 +116,19 @@ private:
 };
 
 class BloomFilterVal : public OpaqueVal {
-  BloomFilterVal(const BloomFilterVal&);
-  BloomFilterVal& operator=(const BloomFilterVal&);
 public:
-  static BloomFilterVal* Merge(const BloomFilterVal* x,
-                               const BloomFilterVal* y);
-
 	explicit BloomFilterVal(probabilistic::BloomFilter* bf);
-	~BloomFilterVal();
+	virtual ~BloomFilterVal();
 
-	bool Typify(BroType* type);
 	BroType* Type() const;
+	bool Typify(BroType* type);
 
 	void Add(const Val* val);
 	size_t Count(const Val* val) const;
 
+	static BloomFilterVal* Merge(const BloomFilterVal* x,
+				     const BloomFilterVal* y);
+
 protected:
 	friend class Val;
 	BloomFilterVal();
@@ -139,32 +137,35 @@ protected:
 	DECLARE_SERIAL(BloomFilterVal);
 
 private:
-  template <typename T>
-  static BloomFilterVal* DoMerge(const BloomFilterVal* x,
-                                 const BloomFilterVal* y)
-    {
-    if ( typeid(*x->bloom_filter_) != typeid(*y->bloom_filter_) )
-      {
-      reporter->InternalError("cannot merge different Bloom filter types");
-      return NULL;
-      }
-    if ( typeid(T) != typeid(*x->bloom_filter_) )
-      return NULL;
-    const T* a = static_cast<const T*>(x->bloom_filter_);
-    const T* b = static_cast<const T*>(y->bloom_filter_);
-    BloomFilterVal* merged = new BloomFilterVal(T::Merge(a, b));
-    assert(merged);
-    if ( ! merged->Typify(x->Type()) )
-      {
-      reporter->InternalError("failed to set type on merged Bloom filter");
-      return NULL;
-      }
-    return merged;
-    }
+	// Disable.
+	BloomFilterVal(const BloomFilterVal&);
+	BloomFilterVal& operator=(const BloomFilterVal&);
 
-  BroType* type_;
-  CompositeHash* hash_;
-  probabilistic::BloomFilter* bloom_filter_;
-};
+	template <typename T>
+	static BloomFilterVal* DoMerge(const BloomFilterVal* x,
+				       const BloomFilterVal* y)
+		{
+		if ( typeid(*x->bloom_filter) != typeid(*y->bloom_filter) )
+			reporter->InternalError("cannot merge different Bloom filter types");
+
+		if ( typeid(T) != typeid(*x->bloom_filter) )
+			return 0;
+
+		const T* a = static_cast<const T*>(x->bloom_filter);
+		const T* b = static_cast<const T*>(y->bloom_filter);
+
+		BloomFilterVal* merged = new BloomFilterVal(T::Merge(a, b));
+		assert(merged);
+
+		if ( ! merged->Typify(x->Type()) )
+			reporter->InternalError("failed to set type on merged Bloom filter");
+
+		return merged;
+		}
+
+	BroType* type;
+	CompositeHash* hash;
+	probabilistic::BloomFilter* bloom_filter;
+	};
 
 #endif
diff --git a/src/Type.cc b/src/Type.cc
index 57d9d0e6e5..563bc5afbd 100644
--- a/src/Type.cc
+++ b/src/Type.cc
@@ -1321,6 +1321,7 @@ bool OpaqueType::DoUnserialize(UnserialInfo* info)
 	const char* n;
 	if ( ! UNSERIALIZE_STR(&n, 0) )
 		return false;
+
 	name = n;
 	delete [] n;
 
diff --git a/src/probabilistic/BitVector.cc b/src/probabilistic/BitVector.cc
index 67714fe7d0..98f008b24b 100644
--- a/src/probabilistic/BitVector.cc
+++ b/src/probabilistic/BitVector.cc
@@ -1,3 +1,5 @@
+// See the file "COPYING" in the main distribution directory for copyright.
+
 #include "BitVector.h"
 
 #include <cassert>
@@ -8,505 +10,558 @@ using namespace probabilistic;
 
 BitVector::size_type BitVector::npos = static_cast<BitVector::size_type>(-1);
 BitVector::block_type BitVector::bits_per_block =
-  std::numeric_limits<BitVector::block_type>::digits;
+	std::numeric_limits<BitVector::block_type>::digits;
 
 namespace {
 
 uint8_t count_table[] = {
-  0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2,
-  3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3,
-  3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3,
-  4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4,
-  3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5,
-  6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4,
-  4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5,
-  6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5,
-  3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3,
-  4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6,
-  6, 7, 6, 7, 7, 8
+	0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2,
+	3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3,
+	3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3,
+	4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4,
+	3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5,
+	6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4,
+	4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5,
+	6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5,
+	3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3,
+	4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6,
+	6, 7, 6, 7, 7, 8
 };
 
 } // namespace <anonymous>
 
 BitVector::Reference::Reference(block_type& block, block_type i)
-  : block_(block),
-    mask_(block_type(1) << i)
-  {
-  assert(i < bits_per_block);
-  }
+	: block(block), mask((block_type(1) << i))
+	{
+	assert(i < bits_per_block);
+	}
 
 BitVector::Reference& BitVector::Reference::Flip()
-  {
-  block_ ^= mask_;
-  return *this;
-  }
+	{
+	block ^= mask;
+	return *this;
+	}
 
 BitVector::Reference::operator bool() const
-  {
-  return (block_ & mask_) != 0;
-  }
+	{
+	return (block & mask) != 0;
+	}
 
 bool BitVector::Reference::operator~() const
-  {
-  return (block_ & mask_) == 0;
-  }
+	{
+	return (block & mask) == 0;
+	}
 
 BitVector::Reference& BitVector::Reference::operator=(bool x)
-  {
-  x ? block_ |= mask_ : block_ &= ~mask_;
-  return *this;
-  }
+	{
+	if ( x )
+		block |= mask;
+	else
+		block &= ~mask;
 
-BitVector::Reference& BitVector::Reference::operator=(Reference const& other)
-  {
-  other ? block_ |= mask_ : block_ &= ~mask_;
-  return *this;
-  }
+	return *this;
+	}
+
+BitVector::Reference& BitVector::Reference::operator=(const Reference& other)
+	{
+	if ( other )
+		block |= mask;
+	else
+		block &= ~mask;
+
+	return *this;
+	}
 
 BitVector::Reference& BitVector::Reference::operator|=(bool x)
-  {
-  if (x)
-    block_ |= mask_;
-  return *this;
-  }
+	{
+	if ( x )
+		block |= mask;
+
+	return *this;
+	}
 
 BitVector::Reference& BitVector::Reference::operator&=(bool x)
-  {
-  if (! x)
-    block_ &= ~mask_;
-  return *this;
-  }
+	{
+	if ( ! x )
+		block &= ~mask;
+
+	return *this;
+	}
 
 BitVector::Reference& BitVector::Reference::operator^=(bool x)
-  {
-  if (x)
-    block_ ^= mask_;
-  return *this;
-  }
+	{
+	if ( x )
+		block ^= mask;
+
+	return *this;
+	}
 
 BitVector::Reference& BitVector::Reference::operator-=(bool x)
-  {
-  if (x)
-    block_ &= ~mask_;
-  return *this;
-  }
+	{
+	if ( x )
+		block &= ~mask;
 
+	return *this;
+	}
 
-BitVector::BitVector() : num_bits_(0) { }
+BitVector::BitVector()
+	{
+	num_bits = 0;
+	}
 
 BitVector::BitVector(size_type size, bool value)
-  : bits_(bits_to_blocks(size), value ? ~block_type(0) : 0),
-    num_bits_(size)
-{ }
+	: bits(bits_to_blocks(size), value ? ~block_type(0) : 0)
+	{
+	num_bits = size;
+	}
 
 BitVector::BitVector(BitVector const& other)
-  : bits_(other.bits_),
-    num_bits_(other.num_bits_)
-{ }
+	: bits(other.bits)
+	{
+	num_bits = other.num_bits;
+	}
 
 BitVector BitVector::operator~() const
-  {
-  BitVector b(*this);
-  b.Flip();
-  return b;
-  }
+	{
+	BitVector b(*this);
+	b.Flip();
+	return b;
+	}
 
 BitVector& BitVector::operator=(BitVector const& other)
-  {
-  bits_ = other.bits_;
-  return *this;
-  }
+	{
+	bits = other.bits;
+	return *this;
+	}
 
 BitVector BitVector::operator<<(size_type n) const
-  {
-  BitVector b(*this);
-  return b <<= n;
-  }
+	{
+	BitVector b(*this);
+	return b <<= n;
+	}
 
 BitVector BitVector::operator>>(size_type n) const
-  {
-  BitVector b(*this);
-  return b >>= n;
-  }
+	{
+	BitVector b(*this);
+	return b >>= n;
+	}
 
 BitVector& BitVector::operator<<=(size_type n)
-  {
-  if (n >= num_bits_)
-    return Reset();
+	{
+	if ( n >= num_bits )
+		return Reset();
 
-  if (n > 0)
-    {
-    size_type last = Blocks() - 1;
-    size_type div = n / bits_per_block;
-    block_type r = bit_index(n);
-    block_type* b = &bits_[0];
-    assert(Blocks() >= 1);
-    assert(div <= last);
+	if ( n > 0 )
+		{
+		size_type last = Blocks() - 1;
+		size_type div = n / bits_per_block;
+		block_type r = bit_index(n);
+		block_type* b = &bits[0];
 
-    if (r != 0)
-      {
-      for (size_type i = last - div; i > 0; --i)
-        b[i + div] = (b[i] << r) | (b[i - 1] >> (bits_per_block - r));
-      b[div] = b[0] << r;
-      }
-    else
-      {
-      for (size_type i = last-div; i > 0; --i)
-        b[i + div] = b[i];
-      b[div] = b[0];
-      }
+		assert(Blocks() >= 1);
+		assert(div <= last);
 
-    std::fill_n(b, div, block_type(0));
-    zero_unused_bits();
-    }
+		if ( r != 0 )
+			{
+			for ( size_type i = last - div; i > 0; --i )
+				b[i + div] = (b[i] << r) | (b[i - 1] >> (bits_per_block - r));
 
-  return *this;
-  }
+			b[div] = b[0] << r;
+			}
+
+		else
+			{
+			for (size_type i = last-div; i > 0; --i)
+				b[i + div] = b[i];
+
+			b[div] = b[0];
+			}
+
+		std::fill_n(b, div, block_type(0));
+		zero_unused_bits();
+		}
+
+	return *this;
+	}
 
 BitVector& BitVector::operator>>=(size_type n)
-  {
-  if (n >= num_bits_)
-      return Reset();
+	{
+	if ( n >= num_bits )
+		return Reset();
 
-  if (n > 0)
-    {
-    size_type last = Blocks() - 1;
-    size_type div = n / bits_per_block;
-    block_type r = bit_index(n);
-    block_type* b = &bits_[0];
-    assert(Blocks() >= 1);
-    assert(div <= last);
+	if ( n > 0 )
+		{
+		size_type last = Blocks() - 1;
+		size_type div = n / bits_per_block;
+		block_type r = bit_index(n);
+		block_type* b = &bits[0];
 
-    if (r != 0)
-      {
-      for (size_type i = last - div; i > 0; --i)
-        b[i - div] = (b[i] >> r) | (b[i + 1] << (bits_per_block - r));
-      b[last - div] = b[last] >> r;
-      }
-    else
-      {
-      for (size_type i = div; i <= last; ++i)
-        b[i-div] = b[i];
-      }
+		assert(Blocks() >= 1);
+		assert(div <= last);
 
-    std::fill_n(b + (Blocks() - div), div, block_type(0));
-    }
-  return *this;
-  }
+		if ( r != 0 )
+			{
+			for (size_type i = last - div; i > 0; --i)
+				b[i - div] = (b[i] >> r) | (b[i + 1] << (bits_per_block - r));
+
+			b[last - div] = b[last] >> r;
+			}
+
+		else
+			{
+			for (size_type i = div; i <= last; ++i)
+				b[i-div] = b[i];
+			}
+
+		std::fill_n(b + (Blocks() - div), div, block_type(0));
+		}
+
+	return *this;
+	}
 
 BitVector& BitVector::operator&=(BitVector const& other)
-  {
-  assert(Size() >= other.Size());
-  for (size_type i = 0; i < Blocks(); ++i)
-    bits_[i] &= other.bits_[i];
-  return *this;
-  }
+	{
+	assert(Size() >= other.Size());
+
+	for ( size_type i = 0; i < Blocks(); ++i )
+		bits[i] &= other.bits[i];
+
+	return *this;
+	}
 
 BitVector& BitVector::operator|=(BitVector const& other)
-  {
-  assert(Size() >= other.Size());
-  for (size_type i = 0; i < Blocks(); ++i)
-    bits_[i] |= other.bits_[i];
-  return *this;
-  }
+	{
+	assert(Size() >= other.Size());
+
+	for ( size_type i = 0; i < Blocks(); ++i )
+		bits[i] |= other.bits[i];
+
+	return *this;
+	}
 
 BitVector& BitVector::operator^=(BitVector const& other)
-  {
-  assert(Size() >= other.Size());
-  for (size_type i = 0; i < Blocks(); ++i)
-    bits_[i] ^= other.bits_[i];
-  return *this;
-  }
+	{
+	assert(Size() >= other.Size());
+
+	for ( size_type i = 0; i < Blocks(); ++i )
+		bits[i] ^= other.bits[i];
+
+	return *this;
+	}
 
 BitVector& BitVector::operator-=(BitVector const& other)
-  {
-  assert(Size() >= other.Size());
-  for (size_type i = 0; i < Blocks(); ++i)
-    bits_[i] &= ~other.bits_[i];
-  return *this;
-  }
+	{
+	assert(Size() >= other.Size());
+
+	for ( size_type i = 0; i < Blocks(); ++i )
+		bits[i] &= ~other.bits[i];
+
+	return *this;
+	}
 
 namespace probabilistic {
 
 BitVector operator&(BitVector const& x, BitVector const& y)
-  {
-  BitVector b(x);
-  return b &= y;
-  }
+	{
+	BitVector b(x);
+	return b &= y;
+	}
 
 BitVector operator|(BitVector const& x, BitVector const& y)
-  {
-  BitVector b(x);
-  return b |= y;
-  }
+	{
+	BitVector b(x);
+	return b |= y;
+	}
 
 BitVector operator^(BitVector const& x, BitVector const& y)
-  {
-  BitVector b(x);
-  return b ^= y;
-  }
+	{
+	BitVector b(x);
+	return b ^= y;
+	}
 
 BitVector operator-(BitVector const& x, BitVector const& y)
-  {
-  BitVector b(x);
-  return b -= y;
-  }
+	{
+	BitVector b(x);
+	return b -= y;
+	}
 
 bool operator==(BitVector const& x, BitVector const& y)
-  {
-  return x.num_bits_ == y.num_bits_ && x.bits_ == y.bits_;
-  }
+	{
+	return x.num_bits == y.num_bits && x.bits == y.bits;
+	}
 
 bool operator!=(BitVector const& x, BitVector const& y)
-  {
-  return ! (x == y);
-  }
+	{
+	return ! (x == y);
+	}
 
 bool operator<(BitVector const& x, BitVector const& y)
-  {
-  assert(x.Size() == y.Size());
-  for (BitVector::size_type r = x.Blocks(); r > 0; --r)
-    {
-    BitVector::size_type i = r - 1;
-    if (x.bits_[i] < y.bits_[i])
-      return true;
-    else if (x.bits_[i] > y.bits_[i])
-      return false;
-    }
-  return false;
-  }
+	{
+	assert(x.Size() == y.Size());
+
+	for ( BitVector::size_type r = x.Blocks(); r > 0; --r )
+		{
+		BitVector::size_type i = r - 1;
+
+		if ( x.bits[i] < y.bits[i] )
+			return true;
+
+		else if ( x.bits[i] > y.bits[i] )
+			return false;
+
+		}
+
+	return false;
+	}
 
 }
 
 void BitVector::Resize(size_type n, bool value)
-  {
-  size_type old = Blocks();
-  size_type required = bits_to_blocks(n);
-  block_type block_value = value ? ~block_type(0) : block_type(0);
+	{
+	size_type old = Blocks();
+	size_type required = bits_to_blocks(n);
+	block_type block_value = value ? ~block_type(0) : block_type(0);
 
-  if (required != old)
-    bits_.resize(required, block_value);
+	if ( required != old )
+		bits.resize(required, block_value);
 
-  if (value && (n > num_bits_) && extra_bits())
-    bits_[old - 1] |= (block_value << extra_bits());
+	if ( value && (n > num_bits) && extra_bits() )
+		bits[old - 1] |= (block_value << extra_bits());
 
-  num_bits_ = n;
-  zero_unused_bits();
-  }
+	num_bits = n;
+	zero_unused_bits();
+	}
 
 void BitVector::Clear()
-  {
-  bits_.clear();
-  num_bits_ = 0;
-  }
+	{
+	bits.clear();
+	num_bits = 0;
+	}
 
 void BitVector::PushBack(bool bit)
-  {
-  size_type s = Size();
-  Resize(s + 1);
-  Set(s, bit);
-  }
+	{
+	size_type s = Size();
+	Resize(s + 1);
+	Set(s, bit);
+	}
 
 void BitVector::Append(block_type block)
-  {
-  size_type excess = extra_bits();
-  if (excess)
-    {
-    assert(! Empty());
-    bits_.push_back(block >> (bits_per_block - excess));
-    bits_[Blocks() - 2] |= (block << excess);
-    }
-  else
-    {
-    bits_.push_back(block);
-    }
-  num_bits_ += bits_per_block;
-  }
+	{
+	size_type excess = extra_bits();
+
+	if ( excess )
+		{
+		assert(! Empty());
+		bits.push_back(block >> (bits_per_block - excess));
+		bits[Blocks() - 2] |= (block << excess);
+		}
+
+	else
+		{
+		bits.push_back(block);
+		}
+
+	num_bits += bits_per_block;
+	}
 
 BitVector& BitVector::Set(size_type i, bool bit)
-  {
-  assert(i < num_bits_);
-  if (bit)
-    bits_[block_index(i)] |= bit_mask(i);
-  else
-    Reset(i);
-  return *this;
-  }
+	{
+	assert(i < num_bits);
+
+	if ( bit )
+		bits[block_index(i)] |= bit_mask(i);
+	else
+		Reset(i);
+
+	return *this;
+	}
 
 BitVector& BitVector::Set()
-  {
-  std::fill(bits_.begin(), bits_.end(), ~block_type(0));
-  zero_unused_bits();
-  return *this;
-  }
+	{
+	std::fill(bits.begin(), bits.end(), ~block_type(0));
+	zero_unused_bits();
+	return *this;
+	}
 
 BitVector& BitVector::Reset(size_type i)
-  {
-  assert(i < num_bits_);
-  bits_[block_index(i)] &= ~bit_mask(i);
-  return *this;
-  }
+	{
+	assert(i < num_bits);
+	bits[block_index(i)] &= ~bit_mask(i);
+	return *this;
+	}
 
 BitVector& BitVector::Reset()
-  {
-  std::fill(bits_.begin(), bits_.end(), block_type(0));
-  return *this;
-  }
+	{
+	std::fill(bits.begin(), bits.end(), block_type(0));
+	return *this;
+	}
 
 BitVector& BitVector::Flip(size_type i)
-  {
-  assert(i < num_bits_);
-  bits_[block_index(i)] ^= bit_mask(i);
-  return *this;
-  }
+	{
+	assert(i < num_bits);
+	bits[block_index(i)] ^= bit_mask(i);
+	return *this;
+	}
 
 BitVector& BitVector::Flip()
-  {
-  for (size_type i = 0; i < Blocks(); ++i)
-      bits_[i] = ~bits_[i];
-  zero_unused_bits();
-  return *this;
-  }
+	{
+	for (size_type i = 0; i < Blocks(); ++i)
+		bits[i] = ~bits[i];
+
+	zero_unused_bits();
+	return *this;
+	}
 
 bool BitVector::operator[](size_type i) const
-  {
-  assert(i < num_bits_);
-  return (bits_[block_index(i)] & bit_mask(i)) != 0;
-  }
+	{
+	assert(i < num_bits);
+	return (bits[block_index(i)] & bit_mask(i)) != 0;
+	}
 
 BitVector::Reference BitVector::operator[](size_type i)
-  {
-  assert(i < num_bits_);
-  return Reference(bits_[block_index(i)], bit_index(i));
-  }
+	{
+	assert(i < num_bits);
+	return Reference(bits[block_index(i)], bit_index(i));
+	}
 
 BitVector::size_type BitVector::Count() const
-  {
-  std::vector<block_type>::const_iterator first = bits_.begin();
-  size_t n = 0;
-  size_type length = Blocks();
-  while (length)
-    {
-    block_type block = *first;
-    while (block)
-      {
-      // TODO: use __popcnt if available.
-      n += count_table[block & ((1u << 8) - 1)];
-      block >>= 8;
-      }
-    ++first;
-    --length;
-    }
-  return n;
-  }
+	{
+	std::vector<block_type>::const_iterator first = bits.begin();
+	size_t n = 0;
+	size_type length = Blocks();
+
+	while ( length )
+		{
+		block_type block = *first;
+
+		while ( block )
+			{
+			// TODO: use _popcnt if available.
+			n += count_table[block & ((1u << 8) - 1)];
+			block >>= 8;
+			}
+
+		++first;
+		--length;
+		}
+
+	return n;
+	}
 
 BitVector::size_type BitVector::Blocks() const
-  {
-  return bits_.size();
-  }
+	{
+	return bits.size();
+	}
 
 BitVector::size_type BitVector::Size() const
-  {
-  return num_bits_;
-  }
+	{
+	return num_bits;
+	}
 
 bool BitVector::Empty() const
-  {
-  return bits_.empty();
-  }
+	{
+	return bits.empty();
+	}
 
 BitVector::size_type BitVector::FindFirst() const
-  {
-  return find_from(0);
-  }
+	{
+	return find_from(0);
+	}
 
 BitVector::size_type BitVector::FindNext(size_type i) const
-  {
-  if (i >= (Size() - 1) || Size() == 0)
-    return npos;
-  ++i;
-  size_type bi = block_index(i);
-  block_type block = bits_[bi] & (~block_type(0) << bit_index(i));
-  return block ? bi * bits_per_block + lowest_bit(block) : find_from(bi + 1);
-  }
+	{
+	if ( i >= (Size() - 1) || Size() == 0 )
+		return npos;
+
+	++i;
+	size_type bi = block_index(i);
+	block_type block = bits[bi] & (~block_type(0) << bit_index(i));
+	return block ? bi * bits_per_block + lowest_bit(block) : find_from(bi + 1);
+	}
 
 BitVector::size_type BitVector::lowest_bit(block_type block)
-  {
-  block_type x = block - (block & (block - 1));
-  size_type log = 0;
-  while (x >>= 1)
-    ++log;
-  return log;
-  }
+	{
+	block_type x = block - (block & (block - 1));
+	size_type log = 0;
+
+	while (x >>= 1)
+		++log;
+
+	return log;
+	}
 
 BitVector::block_type BitVector::extra_bits() const
-  {
-  return bit_index(Size());
-  }
+	{
+	return bit_index(Size());
+	}
 
 void BitVector::zero_unused_bits()
-  {
-  if (extra_bits())
-    bits_.back() &= ~(~block_type(0) << extra_bits());
-  }
+	{
+	if ( extra_bits() )
+		bits.back() &= ~(~block_type(0) << extra_bits());
+	}
 
 BitVector::size_type BitVector::find_from(size_type i) const
-  {
-  while (i < Blocks() && bits_[i] == 0)
-    ++i;
-  if (i >= Blocks())
-    return npos;
-  return i * bits_per_block + lowest_bit(bits_[i]);
-  }
+	{
+	while (i < Blocks() && bits[i] == 0)
+		++i;
+
+	if ( i >= Blocks() )
+		return npos;
+
+	return i * bits_per_block + lowest_bit(bits[i]);
+	}
 
 bool BitVector::Serialize(SerialInfo* info) const
-  {
-  return SerialObj::Serialize(info);
-  }
+	{
+	return SerialObj::Serialize(info);
+	}
 
 BitVector* BitVector::Unserialize(UnserialInfo* info)
-  {
-  return reinterpret_cast<BitVector*>(
-      SerialObj::Unserialize(info, SER_BITVECTOR));
-  }
+	{
+	return reinterpret_cast<BitVector*>(SerialObj::Unserialize(info, SER_BITVECTOR));
+	}
 
 IMPLEMENT_SERIAL(BitVector, SER_BITVECTOR);
 
 bool BitVector::DoSerialize(SerialInfo* info) const
-  {
-  DO_SERIALIZE(SER_BITVECTOR, SerialObj);
+	{
+	DO_SERIALIZE(SER_BITVECTOR, SerialObj);
 
-  if ( ! SERIALIZE(static_cast<uint64>(bits_.size())) )
-    return false;
+	if ( ! SERIALIZE(static_cast<uint64>(bits.size())) )
+		return false;
 
-  for ( size_t i = 0; i < bits_.size(); ++i )
-    if ( ! SERIALIZE(static_cast<uint64>(bits_[i])) )
-      return false;
+	for ( size_t i = 0; i < bits.size(); ++i )
+		if ( ! SERIALIZE(static_cast<uint64>(bits[i])) )
+			return false;
 
-  return SERIALIZE(static_cast<uint64>(num_bits_));
-  }
+	return SERIALIZE(static_cast<uint64>(num_bits));
+	}
 
 bool BitVector::DoUnserialize(UnserialInfo* info)
-  {
-  DO_UNSERIALIZE(SerialObj);
+	{
+	DO_UNSERIALIZE(SerialObj);
 
-  uint64 size;
-  if ( ! UNSERIALIZE(&size) )
-    return false;
+	uint64 size;
+	if ( ! UNSERIALIZE(&size) )
+		return false;
 
-  bits_.resize(static_cast<size_t>(size));
-  uint64 block;
-  for ( size_t i = 0; i < bits_.size(); ++i )
-    {
-    if ( ! UNSERIALIZE(&block) )
-      return false;
-    bits_[i] = static_cast<block_type>(block);
-    }
+	bits.resize(static_cast<size_t>(size));
 
-  uint64 num_bits;
-  if ( ! UNSERIALIZE(&num_bits) )
-    return false;
-  num_bits_ = static_cast<size_type>(num_bits);
+	for ( size_t i = 0; i < bits.size(); ++i )
+		{
+		uint64 block;
+		if ( ! UNSERIALIZE(&block) )
+			return false;
 
-  return true;
-  }
+		bits[i] = static_cast<block_type>(block);
+		}
+
+	uint64 num_bits;
+	if ( ! UNSERIALIZE(&num_bits) )
+		return false;
+
+	num_bits = static_cast<size_type>(num_bits);
+
+	return true;
+	}
diff --git a/src/probabilistic/BitVector.h b/src/probabilistic/BitVector.h
index 8832c24cbe..9eefe1b633 100644
--- a/src/probabilistic/BitVector.h
+++ b/src/probabilistic/BitVector.h
@@ -1,8 +1,11 @@
-#ifndef BitVector_h
-#define BitVector_h
+// See the file "COPYING" in the main distribution directory for copyright.
+
+#ifndef PROBABILISTIC_BITVECTOR_H
+#define PROBABILISTIC_BITVECTOR_H
 
 #include <iterator>
 #include <vector>
+
 #include "SerialObj.h"
 
 namespace probabilistic {
@@ -12,322 +15,348 @@ namespace probabilistic {
  */
 class BitVector : public SerialObj {
 public:
-  typedef size_t block_type;
-  typedef size_t size_type;
-  static size_type npos;
-  static block_type bits_per_block;
+	typedef size_t block_type;
+	typedef size_t size_type;
+	typedef bool const_reference;
 
-public:
-  /**
-   * An lvalue proxy for single bits.
-   */
-  class Reference {
-    friend class BitVector;
-    Reference(block_type& block, block_type i);
+	static size_type npos;
+	static block_type bits_per_block;
 
-  public:
-    Reference& Flip();
-    operator bool() const;
-    bool operator~() const;
-    Reference& operator=(bool x);
-    Reference& operator=(Reference const& other);
-    Reference& operator|=(bool x);
-    Reference& operator&=(bool x);
-    Reference& operator^=(bool x);
-    Reference& operator-=(bool x);
+	/**
+	 * An lvalue proxy for individual bits.
+	 */
+	class Reference {
+	public:
+		/**
+		 * Inverts the bits' values.
+		 */
+		Reference& Flip();
 
-  private:
-    void operator&();
-    block_type& block_;
-    block_type const mask_;
-  };
+		operator bool() const;
+		bool operator~() const;
+		Reference& operator=(bool x);
+		Reference& operator=(const Reference& other);
+		Reference& operator|=(bool x);
+		Reference& operator&=(bool x);
+		Reference& operator^=(bool x);
+		Reference& operator-=(bool x);
 
-  typedef bool const_reference;
+	private:
+		friend class BitVector;
 
-  /**
-   * Default-constructs an empty bit vector.
-   */
-  BitVector();
+		Reference(block_type& block, block_type i);
+		void operator&();
 
-  /**
-   * Constructs a bit vector of a given size.
-   * @param size The number of bits.
-   * @param value The value for each bit.
-   */
-  explicit BitVector(size_type size, bool value = false);
+		block_type& block;
+		const block_type mask;
+	};
 
-  /**
-   * Constructs a bit vector from a sequence of blocks.
-   */
-  template <typename InputIterator>
-  BitVector(InputIterator first, InputIterator last)
-    {
-    bits_.insert(bits_.end(), first, last);
-    num_bits_ = bits_.size() * bits_per_block;
-    }
+	/**
+	 * Default-constructs an empty bit vector.
+	 */
+	BitVector();
 
-  /**
-   * Copy-constructs a bit vector.
-   * @param other The bit vector to copy.
-   */
-  BitVector(const BitVector& other);
+	/**
+	 * Constructs a bit vector of a given size.
+	 * @param size The number of bits.
+	 * @param value The value for each bit.
+	 */
+	explicit BitVector(size_type size, bool value = false);
 
-  /**
-   * Assigns another bit vector to this instance.
-   * @param other The RHS of the assignment.
-   */
-  BitVector& operator=(const BitVector& other);
+	/**
+	 * Constructs a bit vector from a sequence of blocks.
+	 *
+	 * @param first Start of range
+	 * @param last End of range.
+	 *
+	 */
+	template <typename InputIterator>
+	BitVector(InputIterator first, InputIterator last)
+		{
+		bits.insert(bits.end(), first, last);
+		num_bits = bits.size() * bits_per_block;
+		}
 
-  //
-  // Bitwise operations
-  //
-  BitVector operator~() const;
-  BitVector operator<<(size_type n) const;
-  BitVector operator>>(size_type n) const;
-  BitVector& operator<<=(size_type n);
-  BitVector& operator>>=(size_type n);
-  BitVector& operator&=(BitVector const& other);
-  BitVector& operator|=(BitVector const& other);
-  BitVector& operator^=(BitVector const& other);
-  BitVector& operator-=(BitVector const& other);
-  friend BitVector operator&(BitVector const& x, BitVector const& y);
-  friend BitVector operator|(BitVector const& x, BitVector const& y);
-  friend BitVector operator^(BitVector const& x, BitVector const& y);
-  friend BitVector operator-(BitVector const& x, BitVector const& y);
+	/**
+	 * Copy-constructs a bit vector.
+	 * @param other The bit vector to copy.
+	 */
+	BitVector(const BitVector& other);
 
-  //
-  // Relational operators
-  //
-  friend bool operator==(BitVector const& x, BitVector const& y);
-  friend bool operator!=(BitVector const& x, BitVector const& y);
-  friend bool operator<(BitVector const& x, BitVector const& y);
+	/**
+	 * Assigns another bit vector to this instance.
+	 * @param other The RHS of the assignment.
+	 */
+	BitVector& operator=(const BitVector& other);
 
-  //
-  // Basic operations
-  //
-  /** Appends the bits in a sequence of values.
-   * @tparam Iterator A forward iterator.
-   * @param first An iterator pointing to the first element of the sequence.
-   * @param last An iterator pointing to one past the last element of the
-   * sequence.
-   */
-  template <typename ForwardIterator>
-  void Append(ForwardIterator first, ForwardIterator last)
-    {
-    if (first == last)
-      return;
+	//
+	// Bitwise operations.
+	//
+	BitVector operator~() const;
+	BitVector operator<<(size_type n) const;
+	BitVector operator>>(size_type n) const;
+	BitVector& operator<<=(size_type n);
+	BitVector& operator>>=(size_type n);
+	BitVector& operator&=(BitVector const& other);
+	BitVector& operator|=(BitVector const& other);
+	BitVector& operator^=(BitVector const& other);
+	BitVector& operator-=(BitVector const& other);
+	friend BitVector operator&(BitVector const& x, BitVector const& y);
+	friend BitVector operator|(BitVector const& x, BitVector const& y);
+	friend BitVector operator^(BitVector const& x, BitVector const& y);
+	friend BitVector operator-(BitVector const& x, BitVector const& y);
 
-    block_type excess = extra_bits();
-    typename std::iterator_traits<ForwardIterator>::difference_type delta =
-      std::distance(first, last);
+	//
+	// Relational operators
+	//
+	friend bool operator==(BitVector const& x, BitVector const& y);
+	friend bool operator!=(BitVector const& x, BitVector const& y);
+	friend bool operator<(BitVector const& x, BitVector const& y);
 
-    bits_.reserve(Blocks() + delta);
-    if (excess == 0)
-      {
-      bits_.back() |= (*first << excess);
-      do
-        {
-        block_type b = *first++ >> (bits_per_block - excess);
-        bits_.push_back(b | (first == last ? 0 : *first << excess));
-        } while (first != last);
-      }
-    else
-      {
-      bits_.insert(bits_.end(), first, last);
-      }
-    num_bits_ += bits_per_block * delta;
-    }
+	//
+	// Basic operations
+	//
 
-  /**
-   * Appends the bits in a given block.
-   * @param block The block containing bits to append.
-   */
-  void Append(block_type block);
+	/** Appends the bits in a sequence of values.
+	 * @tparam Iterator A forward iterator.
+	 * @param first An iterator pointing to the first element of the sequence.
+	 * @param last An iterator pointing to one past the last element of the
+	 * sequence.
+	 */
+	template <typename ForwardIterator>
+	void Append(ForwardIterator first, ForwardIterator last)
+		{
+		if ( first == last )
+			return;
 
-  /** Appends a single bit to the end of the bit vector.
-   * @param bit The value of the bit.
-   */
-  void PushBack(bool bit);
+		block_type excess = extra_bits();
+		typename std::iterator_traits<ForwardIterator>::difference_type delta =
+			std::distance(first, last);
 
-  /**
-   * Clears all bits in the bitvector.
-   */
-  void Clear();
+		bits.reserve(Blocks() + delta);
 
-  /**
-   * Resizes the bit vector to a new number of bits.
-   * @param n The new number of bits of the bit vector.
-   * @param value The bit value of new values, if the vector expands.
-   */
-  void Resize(size_type n, bool value = false);
+		if ( excess == 0 )
+			{
+			bits.back() |= (*first << excess);
 
-  /**
-   * Sets a bit at a specific position to a given value.
-   * @param i The bit position.
-   * @param bit The value assigned to position *i*.
-   * @return A reference to the bit vector instance.
-   */
-  BitVector& Set(size_type i, bool bit = true);
+			do {
+				block_type b = *first++ >> (bits_per_block - excess);
+				bits.push_back(b | (first == last ? 0 : *first << excess));
+			} while (first != last);
 
-  /**
-   * Sets all bits to 1.
-   * @return A reference to the bit vector instance.
-   */
-  BitVector& Set();
+			}
 
-  /**
-   * Resets a bit at a specific position, i.e., sets it to 0.
-   * @param i The bit position.
-   * @return A reference to the bit vector instance.
-   */
-  BitVector& Reset(size_type i);
+		else
+			bits.insert(bits.end(), first, last);
 
-  /**
-   * Sets all bits to 0.
-   * @return A reference to the bit vector instance.
-   */
-  BitVector& Reset();
+		num_bits += bits_per_block * delta;
+		}
 
-  /**
-   * Toggles/flips a bit at a specific position.
-   * @param i The bit position.
-   * @return A reference to the bit vector instance.
-   */
-  BitVector& Flip(size_type i);
+	/**
+	 * Appends the bits in a given block.
+	 * @param block The block containing bits to append.
+	 */
+	void Append(block_type block);
 
-  /**
-   * Computes the complement.
-   * @return A reference to the bit vector instance.
-   */
-  BitVector& Flip();
+	/** Appends a single bit to the end of the bit vector.
+	 * @param bit The value of the bit.
+	 */
+	void PushBack(bool bit);
 
-  /** Retrieves a single bit.
-   * @param i The bit position.
-   * @return A mutable reference to the bit at position *i*.
-   */
-  Reference operator[](size_type i);
+	/**
+	 * Clears all bits in the bitvector.
+	 */
+	void Clear();
 
-  /**
-   * Retrieves a single bit.
-   * @param i The bit position.
-   * @return A const-reference to the bit at position *i*.
-   */
-  const_reference operator[](size_type i) const;
+	/**
+	 * Resizes the bit vector to a new number of bits.
+	 * @param n The new number of bits of the bit vector.
+	 * @param value The bit value of new values, if the vector expands.
+	 */
+	void Resize(size_type n, bool value = false);
 
-  /**
-   * Counts the number of 1-bits in the bit vector. Also known as *population
-   * count* or *Hamming weight*.
-   * @return The number of bits set to 1.
-   */
-  size_type Count() const;
+	/**
+	 * Sets a bit at a specific position to a given value.
+	 * @param i The bit position.
+	 * @param bit The value assigned to position *i*.
+	 * @return A reference to the bit vector instance.
+	 */
+	BitVector& Set(size_type i, bool bit = true);
 
-  /**
-   * Retrieves the number of blocks of the underlying storage.
-   * @param The number of blocks that represent `Size()` bits.
-   */
-  size_type Blocks() const;
+	/**
+	 * Sets all bits to 1.
+	 * @return A reference to the bit vector instance.
+	 */
+	BitVector& Set();
 
-  /**
-   * Retrieves the number of bits the bitvector consist of.
-   * @return The length of the bit vector in bits.
-   */
-  size_type Size() const;
+	/**
+	 * Resets a bit at a specific position, i.e., sets it to 0.
+	 * @param i The bit position.
+	 * @return A reference to the bit vector instance.
+	 */
+	BitVector& Reset(size_type i);
 
-  /**
-   * Checks whether the bit vector is empty.
-   * @return `true` iff the bitvector has zero length.
-   */
-  bool Empty() const;
+	/**
+	 * Sets all bits to 0.
+	 * @return A reference to the bit vector instance.
+	 */
+	BitVector& Reset();
 
-  /**
-   * Finds the bit position of of the first 1-bit.
-   * @return The position of the first bit that equals to one or `npos` if no
-   * such bit exists.
-   */
-  size_type FindFirst() const;
+	/**
+	 * Toggles/flips a bit at a specific position.
+	 * @param i The bit position.
+	 * @return A reference to the bit vector instance.
+	 */
+	BitVector& Flip(size_type i);
 
-  /**
-   * Finds the next 1-bit from a given starting position.
-   *
-   * @param i The index where to start looking.
-   *
-   * @return The position of the first bit that equals to 1 after position
-   * *i*  or `npos` if no such bit exists.
-   */
-  size_type FindNext(size_type i) const;
+	/**
+	 * Computes the complement.
+	 * @return A reference to the bit vector instance.
+	 */
+	BitVector& Flip();
 
-  bool Serialize(SerialInfo* info) const;
-  static BitVector* Unserialize(UnserialInfo* info);
+	/** Retrieves a single bit.
+	 * @param i The bit position.
+	 * @return A mutable reference to the bit at position *i*.
+	 */
+	Reference operator[](size_type i);
+
+	/**
+	 * Retrieves a single bit.
+	 * @param i The bit position.
+	 * @return A const-reference to the bit at position *i*.
+	 */
+	const_reference operator[](size_type i) const;
+
+	/**
+	 * Counts the number of 1-bits in the bit vector. Also known as *population
+	 * count* or *Hamming weight*.
+	 * @return The number of bits set to 1.
+	 */
+	size_type Count() const;
+
+	/**
+	 * Retrieves the number of blocks of the underlying storage.
+	 * @param The number of blocks that represent `Size()` bits.
+	 */
+	size_type Blocks() const;
+
+	/**
+	 * Retrieves the number of bits the bitvector consist of.
+	 * @return The length of the bit vector in bits.
+	 */
+	size_type Size() const;
+
+	/**
+	 * Checks whether the bit vector is empty.
+	 * @return `true` iff the bitvector has zero length.
+	 */
+	bool Empty() const;
+
+	/**
+	 * Finds the bit position of of the first 1-bit.
+	 * @return The position of the first bit that equals to one or `npos` if no
+	 * such bit exists.
+	 */
+	size_type FindFirst() const;
+
+	/**
+	 * Finds the next 1-bit from a given starting position.
+	 *
+	 * @param i The index where to start looking.
+	 *
+	 * @return The position of the first bit that equals to 1 after position
+	 * *i*  or `npos` if no such bit exists.
+	 */
+	size_type FindNext(size_type i) const;
+
+	/**
+	 * Serializes the bit vector.
+	 *
+	 * @param info The serializaton informationt to use.
+	 *
+	 * @return True if successful.
+	 */
+	bool Serialize(SerialInfo* info) const;
+
+	/**
+	 * Unserialize the bit vector.
+	 *
+	 * @param info The serializaton informationt to use.
+	 *
+	 * @return The unserialized bit vector, or null if an error occured.
+	 */
+	static BitVector* Unserialize(UnserialInfo* info);
 
 protected:
-  DECLARE_SERIAL(BitVector);
+	DECLARE_SERIAL(BitVector);
 
 private:
-  /**
-   * Computes the block index for a given bit position.
-   */
-  static size_type block_index(size_type i)
-    {
-    return i / bits_per_block;
-    }
+	/**
+	 * Computes the number of excess/unused bits in the bit vector.
+	 */
+	block_type extra_bits() const;
 
-  /**
-   * Computes the bit index within a given block for a given bit position.
-   */
-  static block_type bit_index(size_type i)
-    {
-    return i % bits_per_block;
-    }
+	/**
+	 * If the number of bits in the vector are not not a multiple of
+	 * bitvector::bits_per_block, then the last block exhibits unused bits which
+	 * this function resets.
+	 */
+	void zero_unused_bits();
 
-  /**
-   * Computes the bitmask block to extract a bit a given bit position.
-   */
-  static block_type bit_mask(size_type i)
-    {
-    return block_type(1) << bit_index(i);
-    }
+	/**
+	 * Looks for the first 1-bit starting at a given position.
+	 * @param i The block index to start looking.
+	 * @return The block index of the first 1-bit starting from *i* or
+	 * `bitvector::npos` if no 1-bit exists.
+	 */
+	size_type find_from(size_type i) const;
 
-  /**
-   * Computes the number of blocks needed to represent a given number of
-   * bits.
-   * @param bits the number of bits.
-   * @return The number of blocks to represent *bits* number of bits.
-   */
-  static size_type bits_to_blocks(size_type bits)
-    {
-    return bits / bits_per_block
-      + static_cast<size_type>(bits % bits_per_block != 0);
-    }
+	/**
+	 * Computes the block index for a given bit position.
+	 */
+	static size_type block_index(size_type i)
+		{
+		return i / bits_per_block;
+		}
 
-  /**
-   * Computes the bit position first 1-bit in a given block.
-   * @param block The block to inspect.
-   * @return The bit position where *block* has its first bit set to 1.
-   */
-  static size_type lowest_bit(block_type block);
+	/**
+	 * Computes the bit index within a given block for a given bit position.
+	 */
+	static block_type bit_index(size_type i)
+		{
+		return i % bits_per_block;
+		}
 
-  /**
-   * Computes the number of excess/unused bits in the bit vector.
-   */
-  block_type extra_bits() const;
+	/**
+	 * Computes the bitmask block to extract a bit a given bit position.
+	 */
+	static block_type bit_mask(size_type i)
+		{
+		return block_type(1) << bit_index(i);
+		}
 
-  /**
-   * If the number of bits in the vector are not not a multiple of
-   * bitvector::bits_per_block, then the last block exhibits unused bits which
-   * this function resets.
-   */
-  void zero_unused_bits();
+	/**
+	 * Computes the number of blocks needed to represent a given number of
+	 * bits.
+	 * @param bits the number of bits.
+	 * @return The number of blocks to represent *bits* number of bits.
+	 */
+	static size_type bits_to_blocks(size_type bits)
+		{
+		return bits / bits_per_block
+			+ static_cast<size_type>(bits % bits_per_block != 0);
+		}
 
-  /**
-   * Looks for the first 1-bit starting at a given position.
-   * @param i The block index to start looking.
-   * @return The block index of the first 1-bit starting from *i* or
-   * `bitvector::npos` if no 1-bit exists.
-   */
-  size_type find_from(size_type i) const;
+	/**
+	 * Computes the bit position first 1-bit in a given block.
+	 * @param block The block to inspect.
+	 * @return The bit position where *block* has its first bit set to 1.
+	 */
+	static size_type lowest_bit(block_type block);
 
-  std::vector<block_type> bits_;
-  size_type num_bits_;
+	std::vector<block_type> bits;
+	size_type num_bits;
 };
 
 }
diff --git a/src/probabilistic/BloomFilter.cc b/src/probabilistic/BloomFilter.cc
index 1b86ea1441..5613dcce05 100644
--- a/src/probabilistic/BloomFilter.cc
+++ b/src/probabilistic/BloomFilter.cc
@@ -1,3 +1,5 @@
+// See the file "COPYING" in the main distribution directory for copyright.
+
 #include "BloomFilter.h"
 
 #include <cmath>
@@ -8,181 +10,184 @@
 using namespace probabilistic;
 
 BloomFilter::BloomFilter()
-  : hasher_(NULL)
-  {
-  }
+	{
+	hasher = 0;
+	}
 
-BloomFilter::BloomFilter(const Hasher* hasher)
-  : hasher_(hasher)
-  {
-  }
+BloomFilter::BloomFilter(const Hasher* arg_hasher)
+	{
+	hasher = arg_hasher;
+	}
 
 BloomFilter::~BloomFilter()
-  {
-  if ( hasher_ )
-    delete hasher_;
-  }
+	{
+	delete hasher;
+	}
 
 bool BloomFilter::Serialize(SerialInfo* info) const
-  {
-  return SerialObj::Serialize(info);
-  }
+	{
+	return SerialObj::Serialize(info);
+	}
 
 BloomFilter* BloomFilter::Unserialize(UnserialInfo* info)
-  {
-  return reinterpret_cast<BloomFilter*>(
-      SerialObj::Unserialize(info, SER_BLOOMFILTER));
-  }
+	{
+	return reinterpret_cast<BloomFilter*>(SerialObj::Unserialize(info, SER_BLOOMFILTER));
+	}
 
 bool BloomFilter::DoSerialize(SerialInfo* info) const
 	{
 	DO_SERIALIZE(SER_BLOOMFILTER, SerialObj);
-  if ( ! SERIALIZE(static_cast<uint16>(hasher_->K())) )
-    return false;
-  return SERIALIZE_STR(hasher_->Name().c_str(), hasher_->Name().size());
-  }
+
+	if ( ! SERIALIZE(static_cast<uint16>(hasher->K())) )
+		return false;
+
+	return SERIALIZE_STR(hasher->Name().c_str(), hasher->Name().size());
+	}
 
 bool BloomFilter::DoUnserialize(UnserialInfo* info)
 	{
 	DO_UNSERIALIZE(SerialObj);
+
 	uint16 k;
 	if ( ! UNSERIALIZE(&k) )
-	  return false;
-  const char* name;
-  if ( ! UNSERIALIZE_STR(&name, 0) )
-    return false;
-	hasher_ = Hasher::Create(k, name);
+		return false;
+
+	const char* name;
+	if ( ! UNSERIALIZE_STR(&name, 0) )
+		return false;
+
+	hasher = Hasher::Create(k, name);
+
 	delete [] name;
 	return true;
-  }
-
+	}
 
 size_t BasicBloomFilter::M(double fp, size_t capacity)
-  {
-  double ln2 = std::log(2);
-  return std::ceil(-(capacity * std::log(fp) / ln2 / ln2));
-  }
+	{
+	double ln2 = std::log(2);
+	return std::ceil(-(capacity * std::log(fp) / ln2 / ln2));
+	}
 
 size_t BasicBloomFilter::K(size_t cells, size_t capacity)
-  {
-  double frac = static_cast<double>(cells) / static_cast<double>(capacity);
-  return std::ceil(frac * std::log(2));
-  }
+	{
+	double frac = static_cast<double>(cells) / static_cast<double>(capacity);
+	return std::ceil(frac * std::log(2));
+	}
 
 BasicBloomFilter* BasicBloomFilter::Merge(const BasicBloomFilter* x,
                                           const BasicBloomFilter* y)
-  {
-  if ( ! x->hasher_->Equals(y->hasher_) )
-    {
-    reporter->InternalError("incompatible hashers during Bloom filter merge");
-    return NULL;
-    }
-  BasicBloomFilter* result = new BasicBloomFilter();
-  result->hasher_ = x->hasher_->Clone();
-  result->bits_ = new BitVector(*x->bits_ | *y->bits_);
-  return result;
-  }
+	{
+	if ( ! x->hasher->Equals(y->hasher) )
+		reporter->InternalError("incompatible hashers during BasicBloomFilter merge");
+
+	BasicBloomFilter* result = new BasicBloomFilter();
+	result->hasher = x->hasher->Clone();
+	result->bits = new BitVector(*x->bits | *y->bits);
+
+	return result;
+	}
 
 BasicBloomFilter::BasicBloomFilter()
-  : bits_(NULL)
-  {
-  }
+	{
+	bits = 0;
+	}
 
 BasicBloomFilter::BasicBloomFilter(const Hasher* hasher, size_t cells)
-  : BloomFilter(hasher),
-    bits_(new BitVector(cells))
-  {
-  }
+	: BloomFilter(hasher)
+	{
+	bits = new BitVector(cells);
+	}
 
 IMPLEMENT_SERIAL(BasicBloomFilter, SER_BASICBLOOMFILTER)
 
 bool BasicBloomFilter::DoSerialize(SerialInfo* info) const
 	{
 	DO_SERIALIZE(SER_BASICBLOOMFILTER, BloomFilter);
-  return bits_->Serialize(info);
-  }
+	return bits->Serialize(info);
+	}
 
 bool BasicBloomFilter::DoUnserialize(UnserialInfo* info)
 	{
 	DO_UNSERIALIZE(BloomFilter);
-	bits_ = BitVector::Unserialize(info);
-	return bits_ != NULL;
-  }
+	bits = BitVector::Unserialize(info);
+	return (bits != 0);
+	}
 
 void BasicBloomFilter::AddImpl(const Hasher::digest_vector& h)
-  {
-  for ( size_t i = 0; i < h.size(); ++i )
-    bits_->Set(h[i] % bits_->Size());
-  }
+	{
+	for ( size_t i = 0; i < h.size(); ++i )
+		bits->Set(h[i] % bits->Size());
+	}
 
 size_t BasicBloomFilter::CountImpl(const Hasher::digest_vector& h) const
-  {
-  for ( size_t i = 0; i < h.size(); ++i )
-    if ( ! (*bits_)[h[i] % bits_->Size()] )
-      return 0;
-  return 1;
-  }
+	{
+	for ( size_t i = 0; i < h.size(); ++i )
+		{
+		if ( ! (*bits)[h[i] % bits->Size()] )
+			return 0;
+		}
 
+	return 1;
+	}
 
 CountingBloomFilter* CountingBloomFilter::Merge(const CountingBloomFilter* x,
-                                                const CountingBloomFilter* y)
-  {
-  if ( ! x->hasher_->Equals(y->hasher_) )
-    {
-    reporter->InternalError("incompatible hashers during Bloom filter merge");
-    return NULL;
-    }
-  CountingBloomFilter* result = new CountingBloomFilter();
-  result->hasher_ = x->hasher_->Clone();
-  result->cells_ = new CounterVector(*x->cells_ | *y->cells_);
-  return result;
-  }
+						const CountingBloomFilter* y)
+	{
+	if ( ! x->hasher->Equals(y->hasher) )
+		reporter->InternalError("incompatible hashers during CountingBloomFilter merge");
+
+	CountingBloomFilter* result = new CountingBloomFilter();
+	result->hasher = x->hasher->Clone();
+	result->cells = new CounterVector(*x->cells | *y->cells);
+
+	return result;
+	}
 
 CountingBloomFilter::CountingBloomFilter()
-  : cells_(NULL)
-  {
-  }
+	{
+	cells = 0;
+	}
 
 CountingBloomFilter::CountingBloomFilter(const Hasher* hasher,
-                                         size_t cells, size_t width)
-  : BloomFilter(hasher),
-    cells_(new CounterVector(width, cells))
-  {
-  }
-
+					 size_t arg_cells, size_t width)
+	: BloomFilter(hasher)
+	{
+	cells = new CounterVector(width, arg_cells);
+	}
 
 IMPLEMENT_SERIAL(CountingBloomFilter, SER_COUNTINGBLOOMFILTER)
 
 bool CountingBloomFilter::DoSerialize(SerialInfo* info) const
 	{
 	DO_SERIALIZE(SER_COUNTINGBLOOMFILTER, BloomFilter);
-  return cells_->Serialize(info);
-  }
+	return cells->Serialize(info);
+	}
 
 bool CountingBloomFilter::DoUnserialize(UnserialInfo* info)
 	{
 	DO_UNSERIALIZE(BloomFilter);
-	cells_ = CounterVector::Unserialize(info);
-	return cells_ != NULL;
-  }
+	cells = CounterVector::Unserialize(info);
+	return (cells != 0);
+	}
 
 // TODO: Use partitioning in add/count to allow for reusing CMS bounds.
-
 void CountingBloomFilter::AddImpl(const Hasher::digest_vector& h)
-  {
-  for ( size_t i = 0; i < h.size(); ++i )
-    cells_->Increment(h[i] % cells_->Size());
-  }
+	{
+	for ( size_t i = 0; i < h.size(); ++i )
+		cells->Increment(h[i] % cells->Size());
+	}
 
 size_t CountingBloomFilter::CountImpl(const Hasher::digest_vector& h) const
-  {
-  CounterVector::size_type min =
-    std::numeric_limits<CounterVector::size_type>::max();
-  for ( size_t i = 0; i < h.size(); ++i )
-    {
-    CounterVector::size_type cnt = cells_->Count(h[i] % cells_->Size());
-    if ( cnt  < min )
-      min = cnt;
-    }
-  return min;
-  }
+	{
+	CounterVector::size_type min =
+		std::numeric_limits<CounterVector::size_type>::max();
+
+	for ( size_t i = 0; i < h.size(); ++i )
+		{
+		CounterVector::size_type cnt = cells->Count(h[i] % cells->Size());
+		if ( cnt  < min )
+			min = cnt;
+		}
+
+	return min;
+	}
diff --git a/src/probabilistic/BloomFilter.h b/src/probabilistic/BloomFilter.h
index 2fa849505d..4a6b01c484 100644
--- a/src/probabilistic/BloomFilter.h
+++ b/src/probabilistic/BloomFilter.h
@@ -1,5 +1,7 @@
-#ifndef BloomFilter_h
-#define BloomFilter_h
+// See the file "COPYING" in the main distribution directory for copyright.
+
+#ifndef PROBABILISTIC_BLOOMFILTER_H
+#define PROBABILISTIC_BLOOMFILTER_H
 
 #include <vector>
 #include "BitVector.h"
@@ -11,42 +13,65 @@ class CounterVector;
 
 /**
  * The abstract base class for Bloom filters.
+ *
+ * At this point we won't let the user choose the hasher, but we might open
+ * up the interface in the future.
  */
 class BloomFilter : public SerialObj {
 public:
-  // At this point we won't let the user choose the hasher, but we might
-  // open up the interface in the future.
-  virtual ~BloomFilter();
+	/**
+	 * Destructor.
+	 */
+	virtual ~BloomFilter();
 
-  /**
-   * Adds an element of type T to the Bloom filter.
-   * @param x The element to add
-   */
-  template <typename T>
-  void Add(const T& x)
-    {
-    AddImpl((*hasher_)(x));
-    }
+	/**
+	 * Adds an element of type T to the Bloom filter.
+	 * @param x The element to add
+	 */
+	template <typename T>
+	void Add(const T& x)
+		{
+		AddImpl((*hasher)(x));
+		}
 
-  /**
-   * Retrieves the associated count of a given value.
-   *
-   * @param x The value of type `T` to check.
-   *
-   * @return The counter associated with *x*.
-   */
-  template <typename T>
-  size_t Count(const T& x) const
-    {
-    return CountImpl((*hasher_)(x));
-    }
+	/**
+	 * Retrieves the associated count of a given value.
+	 *
+	 * @param x The value of type `T` to check.
+	 *
+	 * @return The counter associated with *x*.
+	 */
+	template <typename T>
+	size_t Count(const T& x) const
+		{
+		return CountImpl((*hasher)(x));
+		}
 
-  bool Serialize(SerialInfo* info) const;
-  static BloomFilter* Unserialize(UnserialInfo* info);
+	/**
+	 * Serializes the Bloom filter.
+	 *
+	 * @param info The serializaton information to use.
+	 *
+	 * @return True if successful.
+	 */
+	bool Serialize(SerialInfo* info) const;
+
+	/**
+	 * Unserializes a Bloom filter.
+	 *
+	 * @param info The serializaton information to use.
+	 *
+	 * @return The unserialized Bloom filter, or null if an error
+	 * occured.
+	 */
+	static BloomFilter* Unserialize(UnserialInfo* info);
 
 protected:
-  DECLARE_ABSTRACT_SERIAL(BloomFilter);
+	DECLARE_ABSTRACT_SERIAL(BloomFilter);
 
+	/**
+	 * Default constructor.
+	 */
 	BloomFilter();
 
 	/**
@@ -54,12 +79,28 @@ protected:
 	 *
 	 * @param hasher The hasher to use for this Bloom filter.
 	 */
-  BloomFilter(const Hasher* hasher);
+	BloomFilter(const Hasher* hasher);
 
-  virtual void AddImpl(const Hasher::digest_vector& hashes) = 0;
-  virtual size_t CountImpl(const Hasher::digest_vector& hashes) const = 0;
+	/**
+	 * Abstract method for implementinng the *Add* operation.
+	 *
+	 * @param hashes A set of *k* hashes for the item to add, computed by
+	 * the internal hasher object.
+	 *
+	 */
+	virtual void AddImpl(const Hasher::digest_vector& hashes) = 0;
 
-  const Hasher* hasher_;
+	/**
+	 * Abstract method for implementing the *Count* operation.
+	 *
+	 * @param hashes A set of *k* hashes for the item to add, computed by
+	 * the internal hasher object.
+	 *
+	 * @return Returns the counter associated with the hashed element.
+	 */
+	virtual size_t CountImpl(const Hasher::digest_vector& hashes) const = 0;
+
+	const Hasher* hasher;
 };
 
 /**
@@ -67,50 +108,67 @@ protected:
  */
 class BasicBloomFilter : public BloomFilter {
 public:
-  /**
-   * Computes the number of cells based a given false-positive rate and
-   * capacity. In the literature, this parameter often has the name *M*.
-   *
-   * @param fp The false-positive rate.
-   *
-   * @param capacity The number of exepected elements.
-   *
-   * Returns: The number cells needed to support a false-positive rate of *fp*
-   * with at most *capacity* elements.
-   */
-  static size_t M(double fp, size_t capacity);
+	/**
+	 * Constructs a basic Bloom filter with a given number of cells. The
+	 * ideal number of cells can be computed with *M*.
+	 *
+	 * @param hasher The hasher to use. The ideal number of hash
+	 * functions can be computed with *K*.
+	 *
+	 * @param cells The number of cells.
+	 */
+	BasicBloomFilter(const Hasher* hasher, size_t cells);
 
-  /**
-   * Computes the optimal number of hash functions based on the number cells
-   * and expected number of elements.
-   *
-   * @param cells The number of cells (*m*).
-   *
-   * @param capacity The maximum number of elements.
-   *
-   * Returns: the optimal number of hash functions for a false-positive rate of
-   * *fp* for at most *capacity* elements.
-   */
-  static size_t K(size_t cells, size_t capacity);
+	/**
+	 * Computes the number of cells based on a given false positive rate
+	 * and capacity. In the literature, this parameter often has the name
+	 * *M*.
+	 *
+	 * @param fp The false positive rate.
+	 *
+	 * @param capacity The expected number of elements that will be
+	 * stored.
+	 *
+	 * Returns: The number cells needed to support a false positive rate
+	 * of *fp* with at most *capacity* elements.
+	 */
+	static size_t M(double fp, size_t capacity);
 
-  static BasicBloomFilter* Merge(const BasicBloomFilter* x,
-                                 const BasicBloomFilter* y);
+	/**
+	 * Computes the optimal number of hash functions based on the number cells
+	 * and expected number of elements.
+	 *
+	 * @param cells The number of cells (*m*).
+	 *
+	 * @param capacity The maximum number of elements.
+	 *
+	 * Returns: the optimal number of hash functions for a false-positive
+	 * rate of *fp* for at most *capacity* elements.
+	 */
+	static size_t K(size_t cells, size_t capacity);
 
-  /**
-   * Constructs a basic Bloom filter with a given number of cells and capacity.
-   */
-  BasicBloomFilter(const Hasher* hasher, size_t cells);
+	/**
+	 * Merges two basic Bloom filters.
+	 *
+	 * @return The merged Bloom filter.
+	 */
+	static BasicBloomFilter* Merge(const BasicBloomFilter* x,
+				       const BasicBloomFilter* y);
 
 protected:
-  DECLARE_SERIAL(BasicBloomFilter);
+	DECLARE_SERIAL(BasicBloomFilter);
 
-  BasicBloomFilter();
+	/**
+	 * Default constructor.
+	 */
+	BasicBloomFilter();
 
-  virtual void AddImpl(const Hasher::digest_vector& h);
-  virtual size_t CountImpl(const Hasher::digest_vector& h) const;
+	// Overridden from BloomFilter.
+	virtual void AddImpl(const Hasher::digest_vector& h);
+	virtual size_t CountImpl(const Hasher::digest_vector& h) const;
 
 private:
-  BitVector* bits_;
+	BitVector* bits;
 };
 
 /**
@@ -118,21 +176,40 @@ private:
  */
 class CountingBloomFilter : public BloomFilter {
 public:
-  static CountingBloomFilter* Merge(const CountingBloomFilter* x,
-                                    const CountingBloomFilter* y);
+	/**
+	 * Constructs a counting Bloom filter.
+	 *
+	 * @param hasher The hasher to use. The ideal number of hash
+	 * functions can be computed with *K*.
+	 *
+	 * @param cells The number of cells to use.
+	 *
+	 * @param width The maximal bit-width of counter values.
+	 */
+	CountingBloomFilter(const Hasher* hasher, size_t cells, size_t width);
 
-  CountingBloomFilter(const Hasher* hasher, size_t cells, size_t width);
+	/**
+	 * Merges two counting Bloom filters.
+	 *
+	 * @return The merged Bloom filter.
+	 */
+	static CountingBloomFilter* Merge(const CountingBloomFilter* x,
+					  const CountingBloomFilter* y);
 
 protected:
-  DECLARE_SERIAL(CountingBloomFilter);
+	DECLARE_SERIAL(CountingBloomFilter);
 
-  CountingBloomFilter();
+	/**
+	 * Default constructor.
+	 */
+	CountingBloomFilter();
 
-  virtual void AddImpl(const Hasher::digest_vector& h);
-  virtual size_t CountImpl(const Hasher::digest_vector& h) const;
+	// Overridden from BloomFilter.
+	virtual void AddImpl(const Hasher::digest_vector& h);
+	virtual size_t CountImpl(const Hasher::digest_vector& h) const;
 
 private:
-  CounterVector* cells_;
+	CounterVector* cells;
 };
 
 }
diff --git a/src/probabilistic/CounterVector.cc b/src/probabilistic/CounterVector.cc
index 943749ad46..570ed1f8ea 100644
--- a/src/probabilistic/CounterVector.cc
+++ b/src/probabilistic/CounterVector.cc
@@ -1,3 +1,5 @@
+// See the file "COPYING" in the main distribution directory for copyright.
+
 #include "CounterVector.h"
 
 #include <limits>
@@ -6,154 +8,176 @@
 
 using namespace probabilistic;
 
-CounterVector::CounterVector(size_t width, size_t cells)
-  : bits_(new BitVector(width * cells)),
-    width_(width)
-  {
-  }
+CounterVector::CounterVector(size_t arg_width, size_t cells)
+	{
+	bits = new BitVector(arg_width * cells);
+	width = arg_width;
+	}
 
 CounterVector::CounterVector(const CounterVector& other)
-	: bits_(new BitVector(*other.bits_)),
-	  width_(other.width_)
-  {
-  }
+	{
+	bits = new BitVector(*other.bits);
+	width = other.width;
+	}
 
 CounterVector::~CounterVector()
-  {
-  delete bits_;
-  }
+	{
+	delete bits;
+	}
 
 bool CounterVector::Increment(size_type cell, count_type value)
-  {
-  assert(cell < Size());
-  assert(value != 0);
-  size_t lsb = cell * width_;
-  bool carry = false;
-  for ( size_t i = 0; i < width_; ++i )
-    {
-    bool b1 = (*bits_)[lsb + i];
-    bool b2 = value & (1 << i);
-    (*bits_)[lsb + i] = b1 ^ b2 ^ carry;
-    carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) );
-    }
-  if ( carry )
-    for ( size_t i = 0; i < width_; ++i )
-      bits_->Set(lsb + i);
-  return ! carry;
-  }
+	{
+	assert(cell < Size());
+	assert(value != 0);
+
+	size_t lsb = cell * width;
+	bool carry = false;
+
+	for ( size_t i = 0; i < width; ++i )
+		{
+		bool b1 = (*bits)[lsb + i];
+		bool b2 = value & (1 << i);
+		(*bits)[lsb + i] = b1 ^ b2 ^ carry;
+		carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) );
+		}
+
+	if ( carry )
+		{
+		for ( size_t i = 0; i < width; ++i )
+			bits->Set(lsb + i);
+		}
+
+	return ! carry;
+	}
 
 bool CounterVector::Decrement(size_type cell, count_type value)
-  {
-  assert(cell < Size());
-  assert(value != 0);
-  value = ~value + 1; // A - B := A + ~B + 1
-  bool carry = false;
-  size_t lsb = cell * width_;
-  for ( size_t i = 0; i < width_; ++i )
-    {
-    bool b1 = (*bits_)[lsb + i];
-    bool b2 = value & (1 << i);
-    (*bits_)[lsb + i] = b1 ^ b2 ^ carry;
-    carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) );
-    }
-  return carry;
-  }
+	{
+	assert(cell < Size());
+	assert(value != 0);
+
+	value = ~value + 1; // A - B := A + ~B + 1
+	bool carry = false;
+	size_t lsb = cell * width;
+
+	for ( size_t i = 0; i < width; ++i )
+		{
+		bool b1 = (*bits)[lsb + i];
+		bool b2 = value & (1 << i);
+		(*bits)[lsb + i] = b1 ^ b2 ^ carry;
+		carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) );
+		}
+
+	return carry;
+	}
 
 CounterVector::count_type CounterVector::Count(size_type cell) const
-  {
-  assert(cell < Size());
-  size_t cnt = 0, order = 1;
-  size_t lsb = cell * width_;
-  for (size_t i = lsb; i < lsb + width_; ++i, order <<= 1)
-    if ((*bits_)[i])
-      cnt |= order;
-  return cnt;
-  }
+	{
+	assert(cell < Size());
+
+	size_t cnt = 0, order = 1;
+	size_t lsb = cell * width;
+
+	for ( size_t i = lsb; i < lsb + width; ++i, order <<= 1 )
+		if ( (*bits)[i] )
+			cnt |= order;
+
+	return cnt;
+	}
 
 CounterVector::size_type CounterVector::Size() const
-  {
-  return bits_->Size() / width_;
-  }
+	{
+	return bits->Size() / width;
+	}
 
 size_t CounterVector::Width() const
-  {
-  return width_;
-  }
+	{
+	return width;
+	}
 
 size_t CounterVector::Max() const
-  {
-  return std::numeric_limits<size_t>::max()
-    >> (std::numeric_limits<size_t>::digits - width_);
-  }
+	{
+	return std::numeric_limits<size_t>::max()
+		>> (std::numeric_limits<size_t>::digits - width);
+	}
 
 CounterVector& CounterVector::Merge(const CounterVector& other)
-  {
-  assert(Size() == other.Size());
-  assert(Width() == other.Width());
-  for ( size_t cell = 0; cell < Size(); ++cell )
-    {
-    size_t lsb = cell * width_;
-    bool carry = false;
-    for ( size_t i = 0; i < width_; ++i )
-      {
-      bool b1 = (*bits_)[lsb + i];
-      bool b2 = (*other.bits_)[lsb + i];
-      (*bits_)[lsb + i] = b1 ^ b2 ^ carry;
-      carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) );
-      }
-    if ( carry )
-      for ( size_t i = 0; i < width_; ++i )
-        bits_->Set(lsb + i);
-    }
-  return *this;
-  }
+	{
+	assert(Size() == other.Size());
+	assert(Width() == other.Width());
+
+	for ( size_t cell = 0; cell < Size(); ++cell )
+		{
+		size_t lsb = cell * width;
+		bool carry = false;
+
+		for ( size_t i = 0; i < width; ++i )
+			{
+			bool b1 = (*bits)[lsb + i];
+			bool b2 = (*other.bits)[lsb + i];
+			(*bits)[lsb + i] = b1 ^ b2 ^ carry;
+			carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) );
+			}
+
+		if ( carry )
+			{
+			for ( size_t i = 0; i < width; ++i )
+				bits->Set(lsb + i);
+			}
+		}
+
+	return *this;
+	}
 
 namespace probabilistic {
 
 CounterVector& CounterVector::operator|=(const CounterVector& other)
-{
-  return Merge(other);
-}
+	{
+	return Merge(other);
+	}
 
 CounterVector operator|(const CounterVector& x, const CounterVector& y)
-{
-  CounterVector cv(x);
-  return cv |= y;
-}
+	{
+	CounterVector cv(x);
+	return cv |= y;
+	}
 
 }
 
 bool CounterVector::Serialize(SerialInfo* info) const
-  {
-  return SerialObj::Serialize(info);
-  }
+	{
+	return SerialObj::Serialize(info);
+	}
 
 CounterVector* CounterVector::Unserialize(UnserialInfo* info)
-  {
-  return reinterpret_cast<CounterVector*>(
-      SerialObj::Unserialize(info, SER_COUNTERVECTOR));
-  }
+	{
+	return reinterpret_cast<CounterVector*>(SerialObj::Unserialize(info, SER_COUNTERVECTOR));
+	}
 
 IMPLEMENT_SERIAL(CounterVector, SER_COUNTERVECTOR)
 
 bool CounterVector::DoSerialize(SerialInfo* info) const
 	{
 	DO_SERIALIZE(SER_COUNTERVECTOR, SerialObj);
-  if ( ! bits_->Serialize(info) )
-    return false;
-	return SERIALIZE(static_cast<uint64>(width_));
-  }
+
+	if ( ! bits->Serialize(info) )
+		return false;
+
+	return SERIALIZE(static_cast<uint64>(width));
+	}
 
 bool CounterVector::DoUnserialize(UnserialInfo* info)
 	{
 	DO_UNSERIALIZE(SerialObj);
-	bits_ = BitVector::Unserialize(info);
-  if ( ! bits_ )
-    return false;
-  uint64 width;
-  if ( ! UNSERIALIZE(&width) )
-    return false;
-	width_ = static_cast<size_t>(width);
-	return true;
-  }
 
+	bits = BitVector::Unserialize(info);
+	if ( ! bits )
+		return false;
+
+	uint64 width;
+	if ( ! UNSERIALIZE(&width) )
+		return false;
+
+	width = static_cast<size_t>(width);
+
+	return true;
+	}
diff --git a/src/probabilistic/CounterVector.h b/src/probabilistic/CounterVector.h
index 63445ec12d..178a68e8f2 100644
--- a/src/probabilistic/CounterVector.h
+++ b/src/probabilistic/CounterVector.h
@@ -1,5 +1,7 @@
-#ifndef CounterVector_h
-#define CounterVector_h
+// See the file "COPYING" in the main distribution directory for copyright.
+
+#ifndef PROBABILISTIC_COUNTERVECTOR_H
+#define PROBABILISTIC_COUNTERVECTOR_H
 
 #include "SerialObj.h"
 
@@ -8,123 +10,143 @@ namespace probabilistic {
 class BitVector;
 
 /**
- * A vector of counters, each of which have a fixed number of bits.
+ * A vector of counters, each of which has a fixed number of bits.
  */
 class CounterVector : public SerialObj {
-  CounterVector& operator=(const CounterVector&);
 public:
-  typedef size_t size_type;
-  typedef uint64 count_type;
+	typedef size_t size_type;
+	typedef uint64 count_type;
 
-  /**
-   * Constructs a counter vector having cells of a given width.
-   *
-   * @param width The number of bits that each cell occupies.
-   *
-   * @param cells The number of cells in the bitvector.
-   *
-   * @pre `cells > 0 && width > 0`
-   */
-  CounterVector(size_t width, size_t cells = 1024);
+	/**
+	 * Constructs a counter vector having cells of a given width.
+	 *
+	 * @param width The number of bits that each cell occupies.
+	 *
+	 * @param cells The number of cells in the bitvector.
+	 *
+	 * @pre `cells > 0 && width > 0`
+	 */
+	CounterVector(size_t width, size_t cells = 1024);
 
 	/**
 	 * Copy-constructs a counter vector.
 	 *
 	 * @param other The counter vector to copy.
 	 */
-  CounterVector(const CounterVector& other);
+	CounterVector(const CounterVector& other);
 
-  ~CounterVector();
+	/**
+	 * Destructor.
+	 */
+	~CounterVector();
 
-  /**
-   * Increments a given cell.
-   *
-   * @param cell The cell to increment.
-   *
-   * @param value The value to add to the current counter in *cell*.
-   *
-   * @return `true` if adding *value* to the counter in *cell* succeeded.
-   *
-   * @pre `cell < Size()`
-   */
-  bool Increment(size_type cell, count_type value = 1);
+	/**
+	 * Increments a given cell.
+	 *
+	 * @param cell The cell to increment.
+	 *
+	 * @param value The value to add to the current counter in *cell*.
+	 *
+	 * @return `true` if adding *value* to the counter in *cell* succeeded.
+	 *
+	 * @pre `cell < Size()`
+	 */
+	bool Increment(size_type cell, count_type value = 1);
 
-  /**
-   * Decrements a given cell.
-   *
-   * @param cell The cell to decrement.
-   *
-   * @param value The value to subtract from the current counter in *cell*.
-   *
-   * @return `true` if subtracting *value* from the counter in *cell* succeeded.
-   *
-   * @pre `cell < Size()`
-   */
-  bool Decrement(size_type cell, count_type value = 1);
+	/**
+	 * Decrements a given cell.
+	 *
+	 * @param cell The cell to decrement.
+	 *
+	 * @param value The value to subtract from the current counter in *cell*.
+	 *
+	 * @return `true` if subtracting *value* from the counter in *cell* succeeded.
+	 *
+	 * @pre `cell < Size()`
+	 */
+	bool Decrement(size_type cell, count_type value = 1);
 
-  /**
-   * Retrieves the counter of a given cell.
-   *
-   * @param cell The cell index to retrieve the count for.
-   *
-   * @return The counter associated with *cell*.
-   *
-   * @pre `cell < Size()`
-   */
-  count_type Count(size_type cell) const;
+	/**
+	 * Retrieves the counter of a given cell.
+	 *
+	 * @param cell The cell index to retrieve the count for.
+	 *
+	 * @return The counter associated with *cell*.
+	 *
+	 * @pre `cell < Size()`
+	 */
+	count_type Count(size_type cell) const;
 
-  /**
-   * Retrieves the number of cells in the storage.
-   *
-   * @return The number of cells.
-   */
-  size_type Size() const;
+	/**
+	 * Retrieves the number of cells in the storage.
+	 *
+	 * @return The number of cells.
+	 */
+	size_type Size() const;
 
-  /**
-   * Retrieves the counter width.
-   *
-   * @return The number of bits per counter.
-   */
-  size_t Width() const;
+	/**
+	 * Retrieves the counter width.
+	 *
+	 * @return The number of bits per counter.
+	 */
+	size_t Width() const;
 
-  /**
-   * Computes the maximum counter value.
-   *
-   * @return The maximum counter value based on the width.
-   */
-  size_t Max() const;
+	/**
+	 * Computes the maximum counter value.
+	 *
+	 * @return The maximum counter value based on the width.
+	 */
+	size_t Max() const;
 
-  /**
-   * Merges another counter vector into this instance by *adding* the counters
-   * of each cells.
-   *
-   * @param other The counter vector to merge into this instance.
-   *
-   * @return A reference to `*this`.
-   *
-   * @pre `Size() == other.Size() && Width() == other.Width()`
-   */
-  CounterVector& Merge(const CounterVector& other);
+	/**
+	 * Merges another counter vector into this instance by *adding* the
+	 * counters of each cells.
+	 *
+	 * @param other The counter vector to merge into this instance.
+	 *
+	 * @return A reference to `*this`.
+	 *
+	 * @pre `Size() == other.Size() && Width() == other.Width()`
+	 */
+	CounterVector& Merge(const CounterVector& other);
 
-  /**
-   * An alias for ::Merge.
-   */
-  CounterVector& operator|=(const CounterVector& other);
+	/**
+	 * An alias for ::Merge.
+	 */
+	CounterVector& operator|=(const CounterVector& other);
 
-  friend CounterVector operator|(const CounterVector& x,
-                                 const CounterVector& y);
+	/**
+	 * Serializes the bit vector.
+	 *
+	 * @param info The serializaton information to use.
+	 *
+	 * @return True if successful.
+	 */
+	bool Serialize(SerialInfo* info) const;
 
-  bool Serialize(SerialInfo* info) const;
-  static CounterVector* Unserialize(UnserialInfo* info);
+	/**
+	 * Unserialize the counter vector.
+	 *
+	 * @param info The serializaton information to use.
+	 *
+	 * @return The unserialized counter vector, or null if an error
+	 * occured.
+	 */
+	static CounterVector* Unserialize(UnserialInfo* info);
 
 protected:
-  DECLARE_SERIAL(CounterVector);
+	friend CounterVector operator|(const CounterVector& x,
+				       const CounterVector& y);
 
-  CounterVector() { }
+	CounterVector() { }
+
+	DECLARE_SERIAL(CounterVector);
 
 private:
-  BitVector* bits_;
-  size_t width_;
+	CounterVector& operator=(const CounterVector&); // Disable.
+
+	BitVector* bits;
+	size_t width;
 };
 
 }
diff --git a/src/probabilistic/Hasher.cc b/src/probabilistic/Hasher.cc
index c2f1110ecd..f9ce7bdd6b 100644
--- a/src/probabilistic/Hasher.cc
+++ b/src/probabilistic/Hasher.cc
@@ -1,66 +1,70 @@
+// See the file "COPYING" in the main distribution directory for copyright.
 
 #include <typeinfo>
 
 #include "Hasher.h"
-
 #include "digest.h"
 
 using namespace probabilistic;
 
-Hasher::UHF::UHF(size_t seed, const std::string& extra)
-	: h_(compute_seed(seed, extra))
+UHF::UHF(size_t seed, const std::string& extra)
+	: h(compute_seed(seed, extra))
 	{
 	}
 
-Hasher::digest Hasher::UHF::hash(const void* x, size_t n) const
+Hasher::digest UHF::hash(const void* x, size_t n) const
 	{
 	assert(n <= UHASH_KEY_SIZE);
-	return n == 0 ? 0 : h_(x, n);
+	return n == 0 ? 0 : h(x, n);
 	}
 
-size_t Hasher::UHF::compute_seed(size_t seed, const std::string& extra)
+size_t UHF::compute_seed(size_t seed, const std::string& extra)
 	{
 	u_char buf[SHA256_DIGEST_LENGTH];
 	SHA256_CTX ctx;
 	sha256_init(&ctx);
+
 	if ( extra.empty() )
 		{
 		unsigned int first_seed = initial_seed();
 		sha256_update(&ctx, &first_seed, sizeof(first_seed));
 		}
-	else
-		{
-		sha256_update(&ctx, extra.c_str(), extra.size());
-		}
-		sha256_update(&ctx, &seed, sizeof(seed));
-		sha256_final(&ctx, buf);
-		// Take the first sizeof(size_t) bytes as seed.
-		return *reinterpret_cast<size_t*>(buf);
-		}
 
+	else
+		sha256_update(&ctx, extra.c_str(), extra.size());
+
+	sha256_update(&ctx, &seed, sizeof(seed));
+	sha256_final(&ctx, buf);
+
+	// Take the first sizeof(size_t) bytes as seed.
+	return *reinterpret_cast<size_t*>(buf);
+	}
 
 Hasher* Hasher::Create(size_t k, const std::string& name)
 	{
 	return new DefaultHasher(k, name);
 	}
 
-Hasher::Hasher(size_t k, const std::string& name)
-	: k_(k), name_(name)
+Hasher::Hasher(size_t k, const std::string& arg_name)
+	: k(k)
 	{
+	name = arg_name;
 	}
 
 DefaultHasher::DefaultHasher(size_t k, const std::string& name)
 	: Hasher(k, name)
 	{
 	for ( size_t i = 0; i < k; ++i )
-		hash_functions_.push_back(UHF(i, name));
+		hash_functions.push_back(UHF(i, name));
 	}
 
 Hasher::digest_vector DefaultHasher::Hash(const void* x, size_t n) const
 	{
 	digest_vector h(K(), 0);
+
 	for ( size_t i = 0; i < h.size(); ++i )
-		h[i] = hash_functions_[i](x, n);
+		h[i] = hash_functions[i](x, n);
+
 	return h;
 	}
 
@@ -73,24 +77,25 @@ bool DefaultHasher::Equals(const Hasher* other) const
 	{
 	if ( typeid(*this) != typeid(*other) )
 		return false;
+
 	const DefaultHasher* o = static_cast<const DefaultHasher*>(other);
-	return hash_functions_ == o->hash_functions_;
+	return hash_functions == o->hash_functions;
 	}
 
 DoubleHasher::DoubleHasher(size_t k, const std::string& name)
-	: Hasher(k, name),
-		h1_(1, name),
-		h2_(2, name)
+	: Hasher(k, name), h1(1, name), h2(2, name)
 	{
 	}
 
 Hasher::digest_vector DoubleHasher::Hash(const void* x, size_t n) const
 	{
-	digest h1 = h1_(x, n);
-	digest h2 = h2_(x, n);
+	digest d1 = h1(x, n);
+	digest d2 = h2(x, n);
 	digest_vector h(K(), 0);
+
 	for ( size_t i = 0; i < h.size(); ++i )
-		h[i] = h1 + i * h2;
+		h[i] = d1 + i * d2;
+
 	return h;
 	}
 
@@ -103,7 +108,7 @@ bool DoubleHasher::Equals(const Hasher* other) const
 	{
 	if ( typeid(*this) != typeid(*other) )
 		return false;
-	const DoubleHasher* o = static_cast<const DoubleHasher*>(other);
-	return h1_ == o->h1_ && h2_ == o->h2_;
-	}
 
+	const DoubleHasher* o = static_cast<const DoubleHasher*>(other);
+	return h1 == o->h1 && h2 == o->h2;
+	}
diff --git a/src/probabilistic/Hasher.h b/src/probabilistic/Hasher.h
index 0231343dcd..62c5d58d1f 100644
--- a/src/probabilistic/Hasher.h
+++ b/src/probabilistic/Hasher.h
@@ -1,5 +1,7 @@
-#ifndef Hasher_h
-#define Hasher_h
+// See the file "COPYING" in the main distribution directory for copyright.
+
+#ifndef PROBABILISTIC_HASHER_H
+#define PROBABILISTIC_HASHER_H
 
 #include "Hash.h"
 #include "H3.h"
@@ -7,123 +9,197 @@
 namespace probabilistic {
 
 /**
- * The abstract base class for hashers, i.e., constructs which hash elements
- * *k* times.
+ * Abstract base class for hashers. A hasher creates a family of hash
+ * functions to hash an element *k* times.
  */
 class Hasher {
 public:
-  typedef hash_t digest;
-  typedef std::vector<digest> digest_vector;
+	typedef hash_t digest;
+	typedef std::vector<digest> digest_vector;
 
-  /**
-   * Constructs the hashing policy used by the implementation. 
-   *
-   * @todo This factory function exists because the HashingPolicy class
-   * hierachy is not yet serializable.
-   */
+	/**
+	 * Destructor.
+	 */
+	virtual ~Hasher() { }
+
+	/**
+	 * Computes hash values for an element.
+	 *
+	 * @param x The element to hash.
+	 *
+	 * @return Vector of *k* hash values.
+	 */
+	template <typename T>
+	digest_vector operator()(const T& x) const
+		{
+		return Hash(&x, sizeof(T));
+		}
+
+	/**
+	 * Computes the hashes for a set of bytes.
+	 *
+	 * @param x Pointer to first byte to hash.
+	 *
+	 * @param n Number of bytes to hash.
+	 *
+	 * @return Vector of *k* hash values.
+	 *
+	 */
+	virtual digest_vector Hash(const void* x, size_t n) const = 0;
+
+	/**
+	 * Returns a deep copy of the hasher.
+	 */
+	virtual Hasher* Clone() const = 0;
+
+	/**
+	 * Returns true if two hashers are identical.
+	 */
+	virtual bool Equals(const Hasher* other) const = 0;
+
+	/**
+	 * Returns the number *k* of hash functions the hashers applies.
+	 */
+	size_t K() const	{ return k; }
+
+	/**
+	 * Returns the hasher's name. TODO: What's this?
+	 */
+	const std::string& Name() const { return name; }
+
+	/**
+	 * Constructs the hasher used by the implementation. This hardcodes a
+	 * specific hashing policy. It exists only because the HashingPolicy
+	 * class hierachy is not yet serializable.
+	 *
+	 * @param k The number of hash functions to apply.
+	 *
+	 * @param name The hasher's name.
+	 *
+	 * @return Returns a new hasher instance.
+	 */
 	static Hasher* Create(size_t k, const std::string& name);
 
-  virtual ~Hasher() { }
-
-  template <typename T>
-  digest_vector operator()(const T& x) const
-  {
-    return Hash(&x, sizeof(T));
-  }
-
-  virtual digest_vector Hash(const void* x, size_t n) const = 0;
-
-  virtual Hasher* Clone() const = 0;
-
-  virtual bool Equals(const Hasher* other) const = 0;
-
-  size_t K() const { return k_; }
-  const std::string& Name() const { return name_; }
-
 protected:
-  /** 
-   * A universal hash function family.
-   */
-  class UHF {
-  public:
-    /**
-     * Constructs an H3 hash function seeded with a given seed and an optional
-     * extra seed to replace the initial Bro seed.
-     *
-     * @param seed The seed to use for this instance.
-     *
-     * @param extra If not empty, this parameter replaces the initial seed to
-     * compute the seed for t to compute the
-     * seed
-     * NUL-terminated string as additional seed.
-     */
-    UHF(size_t seed, const std::string& extra = "");
+	Hasher(size_t k, const std::string& name);
 
-    template <typename T>
-    digest operator()(const T& x) const
-    {
-      return hash(&x, sizeof(T));
-    }
-
-    digest operator()(const void* x, size_t n) const
-    {
-      return hash(x, n);
-    }
-
-    friend bool operator==(const UHF& x, const UHF& y)
-    {
-      return x.h_ == y.h_;
-    }
-
-    friend bool operator!=(const UHF& x, const UHF& y)
-    {
-      return ! (x == y);
-    }
-
-    digest hash(const void* x, size_t n) const;
-
-  private:
-    static size_t compute_seed(size_t seed, const std::string& extra);
-
-    H3<digest, UHASH_KEY_SIZE> h_;
-  };
-
-  Hasher(size_t k, const std::string& name);
-
-private:
-  const size_t k_;
-  std::string name_;
+	private:
+	const size_t k;
+	std::string name;
 };
 
 /**
- * The default hashing policy. Performs *k* hash function computations.
+ * A universal hash function family. This is a helper class that Hasher
+ * implementations can use in their implementation.
+ */
+class UHF {
+public:
+	/**
+	 * Constructs an H3 hash function seeded with a given seed and an
+	 * optional extra seed to replace the initial Bro seed.
+	 *
+	 * @param seed The seed to use for this instance.
+	 *
+	 * @param extra If not empty, this parameter replaces the initial
+	 * seed to compute the seed for t to compute the seed NUL-terminated
+	 * string as additional seed.
+	 */
+	UHF(size_t seed, const std::string& extra = "");
+
+	template <typename T>
+	Hasher::digest operator()(const T& x) const
+		{
+		return hash(&x, sizeof(T));
+		}
+
+	/**
+	 * Computes hash values for an element.
+	 *
+	 * @param x The element to hash.
+	 *
+	 * @return Vector of *k* hash values.
+	 */
+	Hasher::digest operator()(const void* x, size_t n) const
+		{
+		return hash(x, n);
+		}
+
+	/**
+	 * Computes the hashes for a set of bytes.
+	 *
+	 * @param x Pointer to first byte to hash.
+	 *
+	 * @param n Number of bytes to hash.
+	 *
+	 * @return Vector of *k* hash values.
+	 *
+	 */
+	Hasher::digest hash(const void* x, size_t n) const;
+
+	friend bool operator==(const UHF& x, const UHF& y)
+		{
+		return x.h == y.h;
+		}
+
+	friend bool operator!=(const UHF& x, const UHF& y)
+		{
+		return ! (x == y);
+		}
+
+private:
+	static size_t compute_seed(size_t seed, const std::string& extra);
+
+	H3<Hasher::digest, UHASH_KEY_SIZE> h;
+};
+
+
+/**
+ * A hasher implementing the default hashing policy. Uses *k* separate hash
+ * functions internally.
  */
 class DefaultHasher : public Hasher {
 public:
-  DefaultHasher(size_t k, const std::string& name);
+	/**
+	 * Constructor for a hasher with *k* hash functions.
+	 *
+	 * @param k The number of hash functions to use.
+	 *
+	 * @param name The name of the hasher.
+	 */
+	DefaultHasher(size_t k, const std::string& name);
 
-  virtual digest_vector Hash(const void* x, size_t n) const /* final */;
-  virtual DefaultHasher* Clone() const /* final */;
-  virtual bool Equals(const Hasher* other) const /* final */;
+	// Overridden from Hasher.
+	virtual digest_vector Hash(const void* x, size_t n) const /* final */;
+	virtual DefaultHasher* Clone() const /* final */;
+	virtual bool Equals(const Hasher* other) const /* final */;
 
 private:
-  std::vector<UHF> hash_functions_;
+	std::vector<UHF> hash_functions;
 };
 
 /**
- * The *double-hashing* policy. Uses a linear combination of two hash functions.
+ * The *double-hashing* policy. Uses a linear combination of two hash
+ * functions.
  */
 class DoubleHasher : public Hasher {
 public:
-  DoubleHasher(size_t k, const std::string& name);
+	/**
+	 * Constructor for a double hasher with *k* hash functions.
+	 *
+	 * @param k The number of hash functions to use.
+	 *
+	 * @param name The name of the hasher.
+	 */
+	DoubleHasher(size_t k, const std::string& name);
 
-  virtual digest_vector Hash(const void* x, size_t n) const /* final */;
-  virtual DoubleHasher* Clone() const /* final */;
-  virtual bool Equals(const Hasher* other) const /* final */;
+	// Overridden from Hasher.
+	virtual digest_vector Hash(const void* x, size_t n) const /* final */;
+	virtual DoubleHasher* Clone() const /* final */;
+	virtual bool Equals(const Hasher* other) const /* final */;
 
 private:
-  UHF h1_;
-  UHF h2_;
+	UHF h1;
+	UHF h2;
 };
 
 }
diff --git a/src/probabilistic/bloom-filter.bif b/src/probabilistic/bloom-filter.bif
index 3c409b1b0f..cbbff85d7d 100644
--- a/src/probabilistic/bloom-filter.bif
+++ b/src/probabilistic/bloom-filter.bif
@@ -31,18 +31,19 @@ module GLOBAL;
 ## Returns: A Bloom filter handle.
 function bloomfilter_basic_init%(fp: double, capacity: count,
                                  name: string &default=""%): opaque of bloomfilter
-  %{
-  if ( fp < 0.0 || fp > 1.0 )
-    {
-    reporter->Error("false-positive rate must take value between 0 and 1");
-    return NULL;
-    }
+	%{
+	if ( fp < 0.0 || fp > 1.0 )
+		{
+		reporter->Error("false-positive rate must take value between 0 and 1");
+		return 0;
+		}
 
-  size_t cells = BasicBloomFilter::M(fp, capacity);
-  size_t optimal_k = BasicBloomFilter::K(cells, capacity);
-  const Hasher* h = Hasher::Create(optimal_k, name->CheckString());
-  return new BloomFilterVal(new BasicBloomFilter(h, cells));
-  %}
+	size_t cells = BasicBloomFilter::M(fp, capacity);
+	size_t optimal_k = BasicBloomFilter::K(cells, capacity);
+	const Hasher* h = Hasher::Create(optimal_k, name->CheckString());
+
+	return new BloomFilterVal(new BasicBloomFilter(h, cells));
+	%}
 
 ## Creates a counting Bloom filter.
 ##
@@ -59,20 +60,22 @@ function bloomfilter_basic_init%(fp: double, capacity: count,
 ##
 ## Returns: A Bloom filter handle.
 function bloomfilter_counting_init%(k: count, cells: count, max: count,
-                                    name: string &default=""%): opaque of bloomfilter
-  %{
-  if ( max == 0 )
-    {
-    reporter->Error("max counter value must be greater than 0");
-    return NULL;
-    }
+				    name: string &default=""%): opaque of bloomfilter
+	%{
+	if ( max == 0 )
+		{
+		reporter->Error("max counter value must be greater than 0");
+		return 0;
+		}
 
-  const Hasher* h = Hasher::Create(k, name->CheckString());
-  uint16 width = 1;
-  while ( max >>= 1 )
-    ++width;
-  return new BloomFilterVal(new CountingBloomFilter(h, cells, width));
-  %}
+	const Hasher* h = Hasher::Create(k, name->CheckString());
+
+	uint16 width = 1;
+	while ( max >>= 1 )
+		++width;
+
+	return new BloomFilterVal(new CountingBloomFilter(h, cells, width));
+	%}
 
 ## Adds an element to a Bloom filter.
 ##
@@ -80,16 +83,20 @@ function bloomfilter_counting_init%(k: count, cells: count, max: count,
 ##
 ## x: The element to add.
 function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any
-  %{
-  BloomFilterVal* bfv = static_cast<BloomFilterVal*>(bf);
-  if ( ! bfv->Type() && ! bfv->Typify(x->Type()) )
-    reporter->Error("failed to set Bloom filter type");
-  else if ( bfv->Type() != x->Type() )
-    reporter->Error("incompatible Bloom filter types");
-  else
-    bfv->Add(x);
-  return NULL;
-  %}
+	%{
+	BloomFilterVal* bfv = static_cast<BloomFilterVal*>(bf);
+
+	if ( ! bfv->Type() && ! bfv->Typify(x->Type()) )
+		reporter->Error("failed to set Bloom filter type");
+
+	else if ( ! same_type(bfv->Type(), x->Type()) )
+		reporter->Error("incompatible Bloom filter types");
+
+	else
+		bfv->Add(x);
+
+	return 0;
+	%}
 
 ## Retrieves the counter for a given element in a Bloom filter.
 ##
@@ -99,16 +106,20 @@ function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any
 ##
 ## Returns: the counter associated with *x* in *bf*.
 function bloomfilter_lookup%(bf: opaque of bloomfilter, x: any%): count
-  %{
-  const BloomFilterVal* bfv = static_cast<const BloomFilterVal*>(bf);
-  if ( ! bfv->Type() )
-    reporter->Error("cannot perform lookup on untyped Bloom filter");
-  else if ( bfv->Type() != x->Type() )
-    reporter->Error("incompatible Bloom filter types");
-  else
-    return new Val(static_cast<uint64>(bfv->Count(x)), TYPE_COUNT);
-  return new Val(0, TYPE_COUNT);
-  %}
+	%{
+	const BloomFilterVal* bfv = static_cast<const BloomFilterVal*>(bf);
+
+	if ( ! bfv->Type() )
+		reporter->Error("cannot perform lookup on untyped Bloom filter");
+
+	else if ( ! same_type(bfv->Type(), x->Type()) )
+		reporter->Error("incompatible Bloom filter types");
+
+	else
+		return new Val(static_cast<uint64>(bfv->Count(x)), TYPE_COUNT);
+
+	return new Val(0, TYPE_COUNT);
+	%}
 
 ## Merges two Bloom filters.
 ##
@@ -118,13 +129,16 @@ function bloomfilter_lookup%(bf: opaque of bloomfilter, x: any%): count
 ##
 ## Returns: The union of *bf1* and *bf2*.
 function bloomfilter_merge%(bf1: opaque of bloomfilter,
-                            bf2: opaque of bloomfilter%): opaque of bloomfilter
-  %{
-  const BloomFilterVal* bfv1 = static_cast<const BloomFilterVal*>(bf1);
-  const BloomFilterVal* bfv2 = static_cast<const BloomFilterVal*>(bf2);
-  if ( bfv1->Type() != bfv2->Type() )
-    reporter->Error("incompatible Bloom filter types");
-  else
-    return BloomFilterVal::Merge(bfv1, bfv2);
-  return NULL;
-  %}
+			    bf2: opaque of bloomfilter%): opaque of bloomfilter
+	%{
+	const BloomFilterVal* bfv1 = static_cast<const BloomFilterVal*>(bf1);
+	const BloomFilterVal* bfv2 = static_cast<const BloomFilterVal*>(bf2);
+
+	if ( ! same_type(bfv1->Type(), bfv2->Type()) )
+		{
+		reporter->Error("incompatible Bloom filter types");
+		return 0;
+		}
+
+	return BloomFilterVal::Merge(bfv1, bfv2);
+	%}
diff --git a/src/util.cc b/src/util.cc
index 81ec135f98..6bea2eb7f1 100644
--- a/src/util.cc
+++ b/src/util.cc
@@ -803,10 +803,10 @@ void init_random_seed(uint32 seed, const char* read_file, const char* write_file
 	bro_srandom(seed, seeds_done);
 
 	if ( ! first_seed_saved )
-    {
-    first_seed = seed;
-    first_seed_saved = true;
-    }
+		{
+		first_seed = seed;
+		first_seed_saved = true;
+		}
 
 	if ( ! hmac_key_set )
 		{
@@ -820,9 +820,9 @@ void init_random_seed(uint32 seed, const char* read_file, const char* write_file
 	}
 
 unsigned int initial_seed()
-  {
-  return first_seed;
-}
+	{
+	return first_seed;
+	}
 
 bool have_random_seed()
 	{
@@ -830,7 +830,7 @@ bool have_random_seed()
 	}
 
 long int bro_prng(long int state)
-  {
+	{
 	// Use our own simple linear congruence PRNG to make sure we are
 	// predictable across platforms.
 	static const long int m = 2147483647;
@@ -844,14 +844,14 @@ long int bro_prng(long int state)
 		state += m;
 
 	return state;
-  }
+	}
 
 long int bro_random()
 	{
 	if ( ! bro_rand_determistic )
 		return random(); // Use system PRNG.
 
-  bro_rand_state = bro_prng(bro_rand_state);
+	bro_rand_state = bro_prng(bro_rand_state);
 
 	return bro_rand_state;
 	}
diff --git a/src/util.h b/src/util.h
index 5689253d95..aaad2d9403 100644
--- a/src/util.h
+++ b/src/util.h
@@ -166,15 +166,15 @@ extern void init_random_seed(uint32 seed, const char* load_file,
 				const char* write_file);
 
 // Retrieves the initial seed computed after the very first call to
-// init_random_seed(). Repeated calls to init_random_seed() will not affect the
-// return value of this function.
+// init_random_seed(). Repeated calls to init_random_seed() will not affect
+// the return value of this function.
 unsigned int initial_seed();
 
 // Returns true if the user explicitly set a seed via init_random_seed();
 extern bool have_random_seed();
 
-// A simple linear congruence PRNG. It takes its state as argument and returns
-// a new random value, which can serve as state for subsequent calls.
+// A simple linear congruence PRNG. It takes its state as argument and
+// returns a new random value, which can serve as state for subsequent calls.
 long int bro_prng(long int state);
 
 // Replacement for the system random(), to which is normally falls back
diff --git a/testing/btest/Baseline/bifs.bloomfilter/output b/testing/btest/Baseline/bifs.bloomfilter/output
index 4fe2ae1ecc..14e1f038c0 100644
--- a/testing/btest/Baseline/bifs.bloomfilter/output
+++ b/testing/btest/Baseline/bifs.bloomfilter/output
@@ -1,3 +1,9 @@
+error: incompatible Bloom filter types
+error: incompatible Bloom filter types
+error: incompatible Bloom filter types
+error: incompatible Bloom filter types
+error: false-positive rate must take value between 0 and 1
+error: false-positive rate must take value between 0 and 1
 0
 1
 1
diff --git a/testing/btest/bifs/bloomfilter.bro b/testing/btest/bifs/bloomfilter.bro
index f69ddbda0c..3b40f29553 100644
--- a/testing/btest/bifs/bloomfilter.bro
+++ b/testing/btest/bifs/bloomfilter.bro
@@ -1,4 +1,4 @@
-# @TEST-EXEC: bro -b %INPUT >output
+# @TEST-EXEC: bro -b %INPUT >output 2>&1
 # @TEST-EXEC: btest-diff output
 
 function test_basic_bloom_filter()

From c89f61917b8b7a6ab8014fad211c879681c3ad5f Mon Sep 17 00:00:00 2001
From: Robin Sommer <robin@icir.org>
Date: Tue, 23 Jul 2013 18:44:22 -0700
Subject: [PATCH 095/118] Updating NEWS.

---
 NEWS | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/NEWS b/NEWS
index 1fce6b1d9d..b1a5adc12b 100644
--- a/NEWS
+++ b/NEWS
@@ -108,6 +108,18 @@ New Functionality
   shunting, and sampling; plus plugin support to customize filters
   dynamically.
 
+- Bro now provides Bloom filters of two kinds: basic Bloom filters
+  supporting membership tests, and counting Bloom filters that track
+  the frequency of elements. The corresponding functions are:
+
+    bloomfilter_basic_init(fp: double, capacity: count, name: string &default=""): opaque of bloomfilter
+    bloomfilter_counting_init(k: count, cells: count, max: count, name: string &default=""): opaque of bloomfilter
+    bloomfilter_add(bf: opaque of bloomfilter, x: any)
+    bloomfilter_lookup(bf: opaque of bloomfilter, x: any): count
+    bloomfilter_merge(bf1: opaque of bloomfilter, bf2: opaque of bloomfilter): opaque of bloomfilter
+
+  See TODO for full documentation.
+
 Changed Functionality
 ~~~~~~~~~~~~~~~~~~~~~
 

From 75814e58e481f723868b644ba9fd06dba2fffa20 Mon Sep 17 00:00:00 2001
From: Seth Hall <seth@icir.org>
Date: Wed, 24 Jul 2013 00:35:46 -0400
Subject: [PATCH 096/118] Fix a bug with getting analyzer tags.

---
 src/analyzer/analyzer.bif | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/analyzer/analyzer.bif b/src/analyzer/analyzer.bif
index 8b5a85956c..4d70816075 100644
--- a/src/analyzer/analyzer.bif
+++ b/src/analyzer/analyzer.bif
@@ -46,5 +46,6 @@ function __name%(atype: Analyzer::Tag%) : string
 
 function __tag%(name: string%) : Analyzer::Tag
 	%{
-	return new Val(analyzer_mgr->GetAnalyzerTag(name->CheckString()), TYPE_ENUM);
+	analyzer::Tag t = analyzer_mgr->GetAnalyzerTag(name->CheckString());
+	return t.AsEnumVal()->Ref();
 	%}

From 5383e8f75bae11bc5da30acf0b77493b90e5f71c Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Wed, 24 Jul 2013 11:21:10 +0200
Subject: [PATCH 097/118] Add bloomfilter_clear() BiF.

---
 src/OpaqueVal.cc                   |  5 +++++
 src/OpaqueVal.h                    |  1 +
 src/probabilistic/BloomFilter.cc   | 10 ++++++++++
 src/probabilistic/BloomFilter.h    | 11 +++++++++++
 src/probabilistic/CounterVector.cc |  5 +++++
 src/probabilistic/CounterVector.h  |  5 +++++
 src/probabilistic/bloom-filter.bif | 16 ++++++++++++++++
 7 files changed, 53 insertions(+)

diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc
index efdd890f70..19a372c005 100644
--- a/src/OpaqueVal.cc
+++ b/src/OpaqueVal.cc
@@ -578,6 +578,11 @@ size_t BloomFilterVal::Count(const Val* val) const
 	return cnt;
 	}
 
+void BloomFilterVal::Clear()
+	{
+	bloom_filter->Clear();
+	}
+
 BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* x,
                                       const BloomFilterVal* y)
 	{
diff --git a/src/OpaqueVal.h b/src/OpaqueVal.h
index ea704cb70a..cfb184fc77 100644
--- a/src/OpaqueVal.h
+++ b/src/OpaqueVal.h
@@ -125,6 +125,7 @@ public:
 
 	void Add(const Val* val);
 	size_t Count(const Val* val) const;
+	void Clear();
 
 	static BloomFilterVal* Merge(const BloomFilterVal* x,
 				     const BloomFilterVal* y);
diff --git a/src/probabilistic/BloomFilter.cc b/src/probabilistic/BloomFilter.cc
index 5613dcce05..c78cd4193d 100644
--- a/src/probabilistic/BloomFilter.cc
+++ b/src/probabilistic/BloomFilter.cc
@@ -74,6 +74,11 @@ size_t BasicBloomFilter::K(size_t cells, size_t capacity)
 	return std::ceil(frac * std::log(2));
 	}
 
+void BasicBloomFilter::Clear()
+	{
+	bits->Clear();
+	}
+
 BasicBloomFilter* BasicBloomFilter::Merge(const BasicBloomFilter* x,
                                           const BasicBloomFilter* y)
 	{
@@ -191,3 +196,8 @@ size_t CountingBloomFilter::CountImpl(const Hasher::digest_vector& h) const
 
 	return min;
 	}
+
+void CountingBloomFilter::Clear()
+	{
+	cells->Clear();
+	}
diff --git a/src/probabilistic/BloomFilter.h b/src/probabilistic/BloomFilter.h
index 4a6b01c484..55bc76fca7 100644
--- a/src/probabilistic/BloomFilter.h
+++ b/src/probabilistic/BloomFilter.h
@@ -47,6 +47,11 @@ public:
 		return CountImpl((*hasher)(x));
 		}
 
+	/**
+	 * Removes all elements, i.e., resets all bits in the underlying bit vector.
+	 */
+	virtual void Clear() = 0;
+
 	/**
 	 * Serializes the Bloom filter.
 	 *
@@ -147,6 +152,9 @@ public:
 	 */
 	static size_t K(size_t cells, size_t capacity);
 
+	// Overridden from BloomFilter.
+	virtual void Clear();
+
 	/**
 	 * Merges two basic Bloom filters.
 	 *
@@ -188,6 +196,9 @@ public:
 	 */
 	CountingBloomFilter(const Hasher* hasher, size_t cells, size_t width);
 
+	// Overridden from BloomFilter.
+	virtual void Clear();
+
 	/**
 	 * Merges two counting Bloom filters.
 	 *
diff --git a/src/probabilistic/CounterVector.cc b/src/probabilistic/CounterVector.cc
index 570ed1f8ea..00fa7fb8c0 100644
--- a/src/probabilistic/CounterVector.cc
+++ b/src/probabilistic/CounterVector.cc
@@ -70,6 +70,11 @@ bool CounterVector::Decrement(size_type cell, count_type value)
 	return carry;
 	}
 
+void CounterVector::Clear()
+	{
+	bits->Clear();
+	}
+
 CounterVector::count_type CounterVector::Count(size_type cell) const
 	{
 	assert(cell < Size());
diff --git a/src/probabilistic/CounterVector.h b/src/probabilistic/CounterVector.h
index 178a68e8f2..896f98ef1e 100644
--- a/src/probabilistic/CounterVector.h
+++ b/src/probabilistic/CounterVector.h
@@ -77,6 +77,11 @@ public:
 	 */
 	count_type Count(size_type cell) const;
 
+	/**
+	 * Sets all counters to 0.
+	 */
+	void Clear();
+
 	/**
 	 * Retrieves the number of cells in the storage.
 	 *
diff --git a/src/probabilistic/bloom-filter.bif b/src/probabilistic/bloom-filter.bif
index cbbff85d7d..9df168be0e 100644
--- a/src/probabilistic/bloom-filter.bif
+++ b/src/probabilistic/bloom-filter.bif
@@ -121,6 +121,22 @@ function bloomfilter_lookup%(bf: opaque of bloomfilter, x: any%): count
 	return new Val(0, TYPE_COUNT);
 	%}
 
+## Removes all elements from a Bloom filter. This function sets resets all bits
+## in the underlying bitvector to 0 but does not change the parameterization of
+## the Bloom filter, such as the element type and the hasher seed.
+##
+## bf: The Bloom filter handle.
+function bloomfilter_clear%(bf: opaque of bloomfilter%): any
+	%{
+	BloomFilterVal* bfv = static_cast<BloomFilterVal*>(bf);
+
+	if ( bfv->Type() ) // Untyped Bloom filters are already empty.
+		bfv->Clear();
+
+	return 0;
+	%}
+
+
 ## Merges two Bloom filters.
 ##
 ## bf1: The first Bloom filter handle.

From 5736aef440574389dda6555642ee7e938156dcf1 Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Wed, 24 Jul 2013 13:05:38 +0200
Subject: [PATCH 098/118] Refactor Bloom filter merging.

---
 src/OpaqueVal.cc                 | 31 ++++++++---
 src/OpaqueVal.h                  | 22 --------
 src/probabilistic/BloomFilter.cc | 92 +++++++++++++++++++++++---------
 src/probabilistic/BloomFilter.h  | 36 +++++++------
 4 files changed, 109 insertions(+), 72 deletions(-)

diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc
index 19a372c005..feff4f3cc0 100644
--- a/src/OpaqueVal.cc
+++ b/src/OpaqueVal.cc
@@ -584,21 +584,36 @@ void BloomFilterVal::Clear()
 	}
 
 BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* x,
-                                      const BloomFilterVal* y)
+		const BloomFilterVal* y)
 	{
 	if ( ! same_type(x->Type(), y->Type()) )
+		{
 		reporter->InternalError("cannot merge Bloom filters with different types");
+		return 0;
+		}
 
-	BloomFilterVal* result;
+	if ( typeid(*x->bloom_filter) != typeid(*y->bloom_filter) )
+		{
+		reporter->InternalError("cannot merge different Bloom filter types");
+		return 0;
+		}
 
-	if ( (result = DoMerge<probabilistic::BasicBloomFilter>(x, y)) )
-		return result;
+	probabilistic::BloomFilter* copy = x->bloom_filter->Clone();
+	bool success = copy->Merge(y->bloom_filter);
+	if ( ! success )
+		{
+		reporter->InternalError("failed to merge Bloom filter");
+		return 0;
+		}
 
-	else if ( (result = DoMerge<probabilistic::CountingBloomFilter>(x, y)) )
-		return result;
+	BloomFilterVal* merged = new BloomFilterVal(copy);
+	if ( ! merged->Typify(x->Type()) )
+		{
+		reporter->InternalError("failed to set type on merged Bloom filter");
+		return 0;
+		}
 
-	reporter->InternalError("failed to merge Bloom filters");
-	return 0;
+	return merged;
 	}
 
 BloomFilterVal::~BloomFilterVal()
diff --git a/src/OpaqueVal.h b/src/OpaqueVal.h
index cfb184fc77..360bb69803 100644
--- a/src/OpaqueVal.h
+++ b/src/OpaqueVal.h
@@ -142,28 +142,6 @@ private:
 	BloomFilterVal(const BloomFilterVal&);
 	BloomFilterVal& operator=(const BloomFilterVal&);
 
-	template <typename T>
-	static BloomFilterVal* DoMerge(const BloomFilterVal* x,
-				       const BloomFilterVal* y)
-		{
-		if ( typeid(*x->bloom_filter) != typeid(*y->bloom_filter) )
-			reporter->InternalError("cannot merge different Bloom filter types");
-
-		if ( typeid(T) != typeid(*x->bloom_filter) )
-			return 0;
-
-		const T* a = static_cast<const T*>(x->bloom_filter);
-		const T* b = static_cast<const T*>(y->bloom_filter);
-
-		BloomFilterVal* merged = new BloomFilterVal(T::Merge(a, b));
-		assert(merged);
-
-		if ( ! merged->Typify(x->Type()) )
-			reporter->InternalError("failed to set type on merged Bloom filter");
-
-		return merged;
-		}
-
 	BroType* type;
 	CompositeHash* hash;
 	probabilistic::BloomFilter* bloom_filter;
diff --git a/src/probabilistic/BloomFilter.cc b/src/probabilistic/BloomFilter.cc
index c78cd4193d..132cf376ec 100644
--- a/src/probabilistic/BloomFilter.cc
+++ b/src/probabilistic/BloomFilter.cc
@@ -79,17 +79,37 @@ void BasicBloomFilter::Clear()
 	bits->Clear();
 	}
 
-BasicBloomFilter* BasicBloomFilter::Merge(const BasicBloomFilter* x,
-                                          const BasicBloomFilter* y)
+bool BasicBloomFilter::Merge(const BloomFilter* other)
 	{
-	if ( ! x->hasher->Equals(y->hasher) )
-		reporter->InternalError("incompatible hashers during BasicBloomFilter merge");
+	if ( typeid(*this) != typeid(*other) )
+		return 0;
 
-	BasicBloomFilter* result = new BasicBloomFilter();
-	result->hasher = x->hasher->Clone();
-	result->bits = new BitVector(*x->bits | *y->bits);
+	const BasicBloomFilter* o = static_cast<const BasicBloomFilter*>(other);
 
-	return result;
+	if ( ! hasher->Equals(o->hasher) )
+		{
+		reporter->InternalError("incompatible hashers in BasicBloomFilter merge");
+		return false;
+		}
+	else if ( bits->Size() != o->bits->Size() )
+		{
+		reporter->InternalError("different bitvector size in BasicBloomFilter merge");
+		return false;
+		}
+
+	(*bits) |= *o->bits;
+
+	return true;
+	}
+
+BasicBloomFilter* BasicBloomFilter::Clone() const
+	{
+	BasicBloomFilter* copy = new BasicBloomFilter();
+
+	copy->hasher = hasher->Clone();
+	copy->bits = new BitVector(*bits);
+
+	return copy;
 	}
 
 BasicBloomFilter::BasicBloomFilter()
@@ -135,19 +155,6 @@ size_t BasicBloomFilter::CountImpl(const Hasher::digest_vector& h) const
 	return 1;
 	}
 
-CountingBloomFilter* CountingBloomFilter::Merge(const CountingBloomFilter* x,
-						const CountingBloomFilter* y)
-	{
-	if ( ! x->hasher->Equals(y->hasher) )
-		reporter->InternalError("incompatible hashers during CountingBloomFilter merge");
-
-	CountingBloomFilter* result = new CountingBloomFilter();
-	result->hasher = x->hasher->Clone();
-	result->cells = new CounterVector(*x->cells | *y->cells);
-
-	return result;
-	}
-
 CountingBloomFilter::CountingBloomFilter()
 	{
 	cells = 0;
@@ -160,6 +167,44 @@ CountingBloomFilter::CountingBloomFilter(const Hasher* hasher,
 	cells = new CounterVector(width, arg_cells);
 	}
 
+void CountingBloomFilter::Clear()
+	{
+	cells->Clear();
+	}
+
+bool CountingBloomFilter::Merge(const BloomFilter* other)
+	{
+	if ( typeid(*this) != typeid(*other) )
+		return 0;
+
+	const CountingBloomFilter* o = static_cast<const CountingBloomFilter*>(other);
+
+	if ( ! hasher->Equals(o->hasher) )
+		{
+		reporter->InternalError("incompatible hashers in CountingBloomFilter merge");
+		return false;
+		}
+	else if ( cells->Size() != o->cells->Size() )
+		{
+		reporter->InternalError("different bitvector size in CountingBloomFilter merge");
+		return false;
+		}
+
+	(*cells) |= *o->cells;
+
+	return true;
+	}
+
+CountingBloomFilter* CountingBloomFilter::Clone() const
+	{
+	CountingBloomFilter* copy = new CountingBloomFilter();
+
+	copy->hasher = hasher->Clone();
+	copy->cells = new CounterVector(*cells);
+
+	return copy;
+	}
+
 IMPLEMENT_SERIAL(CountingBloomFilter, SER_COUNTINGBLOOMFILTER)
 
 bool CountingBloomFilter::DoSerialize(SerialInfo* info) const
@@ -196,8 +241,3 @@ size_t CountingBloomFilter::CountImpl(const Hasher::digest_vector& h) const
 
 	return min;
 	}
-
-void CountingBloomFilter::Clear()
-	{
-	cells->Clear();
-	}
diff --git a/src/probabilistic/BloomFilter.h b/src/probabilistic/BloomFilter.h
index 55bc76fca7..2ab5b89941 100644
--- a/src/probabilistic/BloomFilter.h
+++ b/src/probabilistic/BloomFilter.h
@@ -52,6 +52,22 @@ public:
 	 */
 	virtual void Clear() = 0;
 
+	/**
+	 * Merges another Bloom filter into a copy of this one.
+	 *
+	 * @param other The other Bloom filter.
+	 *
+	 * @return `true` on success.
+	 */
+	virtual bool Merge(const BloomFilter* other) = 0;
+
+	/**
+	 * Constructs a copy of this Bloom filter.
+	 *
+	 * @return A copy of `*this`.
+	 */
+	virtual BloomFilter* Clone() const = 0;
+
 	/**
 	 * Serializes the Bloom filter.
 	 *
@@ -154,14 +170,8 @@ public:
 
 	// Overridden from BloomFilter.
 	virtual void Clear();
-
-	/**
-	 * Merges two basic Bloom filters.
-	 *
-	 * @return The merged Bloom filter.
-	 */
-	static BasicBloomFilter* Merge(const BasicBloomFilter* x,
-				       const BasicBloomFilter* y);
+	virtual bool Merge(const BloomFilter* other);
+	virtual BasicBloomFilter* Clone() const;
 
 protected:
 	DECLARE_SERIAL(BasicBloomFilter);
@@ -198,14 +208,8 @@ public:
 
 	// Overridden from BloomFilter.
 	virtual void Clear();
-
-	/**
-	 * Merges two counting Bloom filters.
-	 *
-	 * @return The merged Bloom filter.
-	 */
-	static CountingBloomFilter* Merge(const CountingBloomFilter* x,
-					  const CountingBloomFilter* y);
+	virtual bool Merge(const BloomFilter* other);
+	virtual CountingBloomFilter* Clone() const;
 
 protected:
 	DECLARE_SERIAL(CountingBloomFilter);

From 5769c32f1eeb319e599996e05e0e63b30af34823 Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Wed, 24 Jul 2013 13:18:19 +0200
Subject: [PATCH 099/118] Support emptiness check on Bloom filters.

---
 src/OpaqueVal.cc                   |  5 +++++
 src/OpaqueVal.h                    |  1 +
 src/probabilistic/BitVector.cc     |  8 ++++++++
 src/probabilistic/BitVector.h      |  6 ++++++
 src/probabilistic/BloomFilter.cc   | 10 ++++++++++
 src/probabilistic/BloomFilter.h    |  9 +++++++++
 src/probabilistic/CounterVector.cc |  5 +++++
 src/probabilistic/CounterVector.h  |  6 ++++++
 src/probabilistic/bloom-filter.bif |  3 +++
 9 files changed, 53 insertions(+)

diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc
index feff4f3cc0..a42892e2b2 100644
--- a/src/OpaqueVal.cc
+++ b/src/OpaqueVal.cc
@@ -583,6 +583,11 @@ void BloomFilterVal::Clear()
 	bloom_filter->Clear();
 	}
 
+bool BloomFilterVal::Empty() const
+	{
+	return bloom_filter->Empty();
+	}
+
 BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* x,
 		const BloomFilterVal* y)
 	{
diff --git a/src/OpaqueVal.h b/src/OpaqueVal.h
index 360bb69803..52c9583fc7 100644
--- a/src/OpaqueVal.h
+++ b/src/OpaqueVal.h
@@ -126,6 +126,7 @@ public:
 	void Add(const Val* val);
 	size_t Count(const Val* val) const;
 	void Clear();
+	bool Empty() const;
 
 	static BloomFilterVal* Merge(const BloomFilterVal* x,
 				     const BloomFilterVal* y);
diff --git a/src/probabilistic/BitVector.cc b/src/probabilistic/BitVector.cc
index 98f008b24b..13cd1aa3bb 100644
--- a/src/probabilistic/BitVector.cc
+++ b/src/probabilistic/BitVector.cc
@@ -463,6 +463,14 @@ bool BitVector::Empty() const
 	return bits.empty();
 	}
 
+bool BitVector::AllZero() const
+	{
+	for ( size_t i = 0; i < bits.size(); ++i )
+		if ( bits[i] )
+			return false;
+	return true;
+	}
+
 BitVector::size_type BitVector::FindFirst() const
 	{
 	return find_from(0);
diff --git a/src/probabilistic/BitVector.h b/src/probabilistic/BitVector.h
index 9eefe1b633..d9c55d53c6 100644
--- a/src/probabilistic/BitVector.h
+++ b/src/probabilistic/BitVector.h
@@ -253,6 +253,12 @@ public:
 	 */
 	bool Empty() const;
 
+	/**
+	 * Checks whether all bits are 0.
+	 * @return `true` iff all bits in all blocks are 0.
+	 */
+	bool AllZero() const;
+
 	/**
 	 * Finds the bit position of of the first 1-bit.
 	 * @return The position of the first bit that equals to one or `npos` if no
diff --git a/src/probabilistic/BloomFilter.cc b/src/probabilistic/BloomFilter.cc
index 132cf376ec..7f769cbf7c 100644
--- a/src/probabilistic/BloomFilter.cc
+++ b/src/probabilistic/BloomFilter.cc
@@ -74,6 +74,11 @@ size_t BasicBloomFilter::K(size_t cells, size_t capacity)
 	return std::ceil(frac * std::log(2));
 	}
 
+bool BasicBloomFilter::Empty() const
+	{
+	return bits->AllZero();
+	}
+
 void BasicBloomFilter::Clear()
 	{
 	bits->Clear();
@@ -167,6 +172,11 @@ CountingBloomFilter::CountingBloomFilter(const Hasher* hasher,
 	cells = new CounterVector(width, arg_cells);
 	}
 
+bool CountingBloomFilter::Empty() const
+	{
+	return cells->AllZero();
+	}
+
 void CountingBloomFilter::Clear()
 	{
 	cells->Clear();
diff --git a/src/probabilistic/BloomFilter.h b/src/probabilistic/BloomFilter.h
index 2ab5b89941..b6cf18672f 100644
--- a/src/probabilistic/BloomFilter.h
+++ b/src/probabilistic/BloomFilter.h
@@ -47,6 +47,13 @@ public:
 		return CountImpl((*hasher)(x));
 		}
 
+	/**
+	 * Checks whether the Bloom filter is empty.
+	 *
+	 * @return `true` if the Bloom filter contains no elements.
+	 */
+	virtual bool Empty() const = 0;
+
 	/**
 	 * Removes all elements, i.e., resets all bits in the underlying bit vector.
 	 */
@@ -169,6 +176,7 @@ public:
 	static size_t K(size_t cells, size_t capacity);
 
 	// Overridden from BloomFilter.
+	virtual bool Empty() const;
 	virtual void Clear();
 	virtual bool Merge(const BloomFilter* other);
 	virtual BasicBloomFilter* Clone() const;
@@ -207,6 +215,7 @@ public:
 	CountingBloomFilter(const Hasher* hasher, size_t cells, size_t width);
 
 	// Overridden from BloomFilter.
+	virtual bool Empty() const;
 	virtual void Clear();
 	virtual bool Merge(const BloomFilter* other);
 	virtual CountingBloomFilter* Clone() const;
diff --git a/src/probabilistic/CounterVector.cc b/src/probabilistic/CounterVector.cc
index 00fa7fb8c0..24c9ff3638 100644
--- a/src/probabilistic/CounterVector.cc
+++ b/src/probabilistic/CounterVector.cc
@@ -70,6 +70,11 @@ bool CounterVector::Decrement(size_type cell, count_type value)
 	return carry;
 	}
 
+bool CounterVector::AllZero() const
+	{
+	return bits->AllZero();
+	}
+
 void CounterVector::Clear()
 	{
 	bits->Clear();
diff --git a/src/probabilistic/CounterVector.h b/src/probabilistic/CounterVector.h
index 896f98ef1e..df6fc57ac2 100644
--- a/src/probabilistic/CounterVector.h
+++ b/src/probabilistic/CounterVector.h
@@ -77,6 +77,12 @@ public:
 	 */
 	count_type Count(size_type cell) const;
 
+	/**
+	 * Checks whether all counters are 0.
+	 * @return `true` iff all counters have the value 0.
+	 */
+	bool AllZero() const;
+
 	/**
 	 * Sets all counters to 0.
 	 */
diff --git a/src/probabilistic/bloom-filter.bif b/src/probabilistic/bloom-filter.bif
index 9df168be0e..dd21688fdd 100644
--- a/src/probabilistic/bloom-filter.bif
+++ b/src/probabilistic/bloom-filter.bif
@@ -109,6 +109,9 @@ function bloomfilter_lookup%(bf: opaque of bloomfilter, x: any%): count
 	%{
 	const BloomFilterVal* bfv = static_cast<const BloomFilterVal*>(bf);
 
+	if ( bfv->Empty() )
+		return new Val(0, TYPE_COUNT);
+
 	if ( ! bfv->Type() )
 		reporter->Error("cannot perform lookup on untyped Bloom filter");
 

From d8226169b8266b554c73b2804d480d10c4a9e456 Mon Sep 17 00:00:00 2001
From: Robin Sommer <robin@icir.org>
Date: Wed, 24 Jul 2013 16:34:52 -0700
Subject: [PATCH 100/118] Fixing random number generation so that it returns
 same numbers as before.

That broke a lot of tests.
---
 src/H3.h    | 16 ++++++++++++++--
 src/util.cc |  2 +-
 src/util.h  |  2 +-
 3 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/src/H3.h b/src/H3.h
index 8ea5848816..321fda924b 100644
--- a/src/H3.h
+++ b/src/H3.h
@@ -66,17 +66,29 @@
 template <typename T, int N>
 class H3 {
 public:
-	H3(T seed = bro_random())
+	H3()
+		{
+		Init(false, 0);
+		}
+
+	H3(T seed)
+		{
+		Init(true, seed);
+		}
+
+	void Init(bool have_seed, T seed)
 		{
 		T bit_lookup[N * CHAR_BIT];
 
 		for ( size_t bit = 0; bit < N * CHAR_BIT; bit++ )
 			{
 			bit_lookup[bit] = 0;
-			seed = bro_prng(seed);
 			for ( size_t i = 0; i < sizeof(T)/2; i++ )
+				{
+				seed = have_seed ? bro_prng(seed) : bro_random();
 				// assume random() returns at least 16 random bits
 				bit_lookup[bit] = (bit_lookup[bit] << 16) | (seed & 0xFFFF);
+				}
 			}
 
 		for ( size_t byte = 0; byte < N; byte++ )
diff --git a/src/util.cc b/src/util.cc
index 6bea2eb7f1..23abbacc3f 100644
--- a/src/util.cc
+++ b/src/util.cc
@@ -829,7 +829,7 @@ bool have_random_seed()
 	return bro_rand_determistic;
 	}
 
-long int bro_prng(long int state)
+unsigned int bro_prng(unsigned int  state)
 	{
 	// Use our own simple linear congruence PRNG to make sure we are
 	// predictable across platforms.
diff --git a/src/util.h b/src/util.h
index aaad2d9403..05b3f032d0 100644
--- a/src/util.h
+++ b/src/util.h
@@ -175,7 +175,7 @@ extern bool have_random_seed();
 
 // A simple linear congruence PRNG. It takes its state as argument and
 // returns a new random value, which can serve as state for subsequent calls.
-long int bro_prng(long int state);
+unsigned int bro_prng(unsigned int state);
 
 // Replacement for the system random(), to which is normally falls back
 // except when a seed has been given. In that case, the function bro_prng.

From 33e6435329c9c629b47069fd48fd97139f21a2e4 Mon Sep 17 00:00:00 2001
From: Robin Sommer <robin@icir.org>
Date: Wed, 24 Jul 2013 16:39:22 -0700
Subject: [PATCH 101/118] Updating tests.

---
 doc/scripts/DocSourcesList.cmake                             | 1 +
 .../canonified_loaded_scripts.log                            | 5 +++--
 .../canonified_loaded_scripts.log                            | 5 +++--
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/doc/scripts/DocSourcesList.cmake b/doc/scripts/DocSourcesList.cmake
index 529b03ca83..26a88027ef 100644
--- a/doc/scripts/DocSourcesList.cmake
+++ b/doc/scripts/DocSourcesList.cmake
@@ -17,6 +17,7 @@ rest_target(${psd} base/init-default.bro internal)
 rest_target(${psd} base/init-bare.bro internal)
 
 rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/analyzer.bif.bro)
+rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/bloom-filter.bif.bro)
 rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/bro.bif.bro)
 rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/const.bif.bro)
 rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/event.bif.bro)
diff --git a/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log
index b7585a1477..04316da023 100644
--- a/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log
+++ b/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log
@@ -3,7 +3,7 @@
 #empty_field	(empty)
 #unset_field	-
 #path	loaded_scripts
-#open	2013-07-05-05-20-50
+#open	2013-07-24-23-38-28
 #fields	name
 #types	string
 scripts/base/init-bare.bro
@@ -12,6 +12,7 @@ scripts/base/init-bare.bro
   build/scripts/base/bif/strings.bif.bro
   build/scripts/base/bif/bro.bif.bro
   build/scripts/base/bif/reporter.bif.bro
+  build/scripts/base/bif/bloom-filter.bif.bro
   build/scripts/base/bif/event.bif.bro
   build/scripts/base/bif/plugins/__load__.bro
     build/scripts/base/bif/plugins/Bro_ARP.events.bif.bro
@@ -89,4 +90,4 @@ scripts/base/init-bare.bro
       build/scripts/base/bif/file_analysis.bif.bro
 scripts/policy/misc/loaded-scripts.bro
   scripts/base/utils/paths.bro
-#close	2013-07-05-05-20-50
+#close	2013-07-24-23-38-28
diff --git a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log
index 999fd7c841..66212643f3 100644
--- a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log
+++ b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log
@@ -3,7 +3,7 @@
 #empty_field	(empty)
 #unset_field	-
 #path	loaded_scripts
-#open	2013-07-10-21-18-31
+#open	2013-07-24-23-38-33
 #fields	name
 #types	string
 scripts/base/init-bare.bro
@@ -12,6 +12,7 @@ scripts/base/init-bare.bro
   build/scripts/base/bif/strings.bif.bro
   build/scripts/base/bif/bro.bif.bro
   build/scripts/base/bif/reporter.bif.bro
+  build/scripts/base/bif/bloom-filter.bif.bro
   build/scripts/base/bif/event.bif.bro
   build/scripts/base/bif/plugins/__load__.bro
     build/scripts/base/bif/plugins/Bro_ARP.events.bif.bro
@@ -195,4 +196,4 @@ scripts/base/init-default.bro
   scripts/base/protocols/tunnels/__load__.bro
   scripts/base/misc/find-checksum-offloading.bro
 scripts/policy/misc/loaded-scripts.bro
-#close	2013-07-10-21-18-31
+#close	2013-07-24-23-38-33

From e482897f885e2f1039b96782d5e4bc080d74a535 Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Thu, 25 Jul 2013 15:16:53 +0200
Subject: [PATCH 102/118] Add docs and use default value for hasher names.

---
 src/probabilistic/Hasher.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/probabilistic/Hasher.h b/src/probabilistic/Hasher.h
index 62c5d58d1f..d266565284 100644
--- a/src/probabilistic/Hasher.h
+++ b/src/probabilistic/Hasher.h
@@ -63,7 +63,9 @@ public:
 	size_t K() const	{ return k; }
 
 	/**
-	 * Returns the hasher's name. TODO: What's this?
+	 * Returns the hasher's name. If not empty, the hasher uses this descriptor
+	 * to seed its *k* hash functions. Otherwise the hasher mixes in the initial
+	 * seed derived from the environment variable `$BRO_SEED`.
 	 */
 	const std::string& Name() const { return name; }
 
@@ -83,7 +85,7 @@ public:
 protected:
 	Hasher(size_t k, const std::string& name);
 
-	private:
+private:
 	const size_t k;
 	std::string name;
 };
@@ -166,7 +168,7 @@ public:
 	 *
 	 * @param name The name of the hasher.
 	 */
-	DefaultHasher(size_t k, const std::string& name);
+	DefaultHasher(size_t k, const std::string& name = "");
 
 	// Overridden from Hasher.
 	virtual digest_vector Hash(const void* x, size_t n) const /* final */;
@@ -190,7 +192,7 @@ public:
 	 *
 	 * @param name The name of the hasher.
 	 */
-	DoubleHasher(size_t k, const std::string& name);
+	DoubleHasher(size_t k, const std::string& name = "");
 
 	// Overridden from Hasher.
 	virtual digest_vector Hash(const void* x, size_t n) const /* final */;

From 2fc5ca53ff8f90aa959b2bc65626b319a1dee529 Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Thu, 25 Jul 2013 17:35:35 +0200
Subject: [PATCH 103/118] Make hashers serializable.

There exists still a small bug that I could not find; the unit test
istate/opaque.bro fails. If someone sees why, please chime in.
---
 src/SerialTypes.h                  |  6 ++
 src/probabilistic/BloomFilter.cc   | 19 +-----
 src/probabilistic/BloomFilter.h    |  3 -
 src/probabilistic/Hasher.cc        | 99 ++++++++++++++++++++++++++----
 src/probabilistic/Hasher.h         | 33 +++++-----
 src/probabilistic/bloom-filter.bif |  4 +-
 6 files changed, 117 insertions(+), 47 deletions(-)

diff --git a/src/SerialTypes.h b/src/SerialTypes.h
index 85aed10bda..9933d005f0 100644
--- a/src/SerialTypes.h
+++ b/src/SerialTypes.h
@@ -52,6 +52,7 @@ SERIAL_IS(RE_MATCHER, 0x1400)
 SERIAL_IS(BITVECTOR, 0x1500)
 SERIAL_IS(COUNTERVECTOR, 0x1600)
 SERIAL_IS(BLOOMFILTER, 0x1700)
+SERIAL_IS(HASHER, 0x1800)
 
 // These are the externally visible types.
 const SerialType SER_NONE = 0;
@@ -206,6 +207,11 @@ SERIAL_BLOOMFILTER(BLOOMFILTER, 1)
 SERIAL_BLOOMFILTER(BASICBLOOMFILTER, 2)
 SERIAL_BLOOMFILTER(COUNTINGBLOOMFILTER, 3)
 
+#define SERIAL_HASHER(name, val) SERIAL_CONST(name, val, HASHER)
+SERIAL_HASHER(HASHER, 1)
+SERIAL_HASHER(DEFAULTHASHER, 2)
+SERIAL_HASHER(DOUBLEHASHER, 3)
+
 SERIAL_CONST2(ID)
 SERIAL_CONST2(STATE_ACCESS)
 SERIAL_CONST2(CASE)
diff --git a/src/probabilistic/BloomFilter.cc b/src/probabilistic/BloomFilter.cc
index 7f769cbf7c..d446643ed3 100644
--- a/src/probabilistic/BloomFilter.cc
+++ b/src/probabilistic/BloomFilter.cc
@@ -38,28 +38,15 @@ bool BloomFilter::DoSerialize(SerialInfo* info) const
 	{
 	DO_SERIALIZE(SER_BLOOMFILTER, SerialObj);
 
-	if ( ! SERIALIZE(static_cast<uint16>(hasher->K())) )
-		return false;
-
-	return SERIALIZE_STR(hasher->Name().c_str(), hasher->Name().size());
+	return hasher->Serialize(info);
 	}
 
 bool BloomFilter::DoUnserialize(UnserialInfo* info)
 	{
 	DO_UNSERIALIZE(SerialObj);
 
-	uint16 k;
-	if ( ! UNSERIALIZE(&k) )
-		return false;
-
-	const char* name;
-	if ( ! UNSERIALIZE_STR(&name, 0) )
-		return false;
-
-	hasher = Hasher::Create(k, name);
-
-	delete [] name;
-	return true;
+	hasher = Hasher::Unserialize(info);
+	return hasher != 0;
 	}
 
 size_t BasicBloomFilter::M(double fp, size_t capacity)
diff --git a/src/probabilistic/BloomFilter.h b/src/probabilistic/BloomFilter.h
index b6cf18672f..4865ae145c 100644
--- a/src/probabilistic/BloomFilter.h
+++ b/src/probabilistic/BloomFilter.h
@@ -13,9 +13,6 @@ class CounterVector;
 
 /**
  * The abstract base class for Bloom filters.
- *
- * At this point we won't let the user choose the hasher, but we might open
- * up the interface in the future.
  */
 class BloomFilter : public SerialObj {
 public:
diff --git a/src/probabilistic/Hasher.cc b/src/probabilistic/Hasher.cc
index f9ce7bdd6b..7db363142d 100644
--- a/src/probabilistic/Hasher.cc
+++ b/src/probabilistic/Hasher.cc
@@ -4,9 +4,56 @@
 
 #include "Hasher.h"
 #include "digest.h"
+#include "Serializer.h"
 
 using namespace probabilistic;
 
+bool Hasher::Serialize(SerialInfo* info) const
+	{
+	return SerialObj::Serialize(info);
+	}
+
+Hasher* Hasher::Unserialize(UnserialInfo* info)
+	{
+	return reinterpret_cast<Hasher*>(SerialObj::Unserialize(info, SER_HASHER));
+	}
+
+bool Hasher::DoSerialize(SerialInfo* info) const
+	{
+	DO_SERIALIZE(SER_HASHER, SerialObj);
+
+	if ( ! SERIALIZE(static_cast<uint16>(k)) )
+		return false;
+
+	return SERIALIZE_STR(name.c_str(), name.size());
+	}
+
+bool Hasher::DoUnserialize(UnserialInfo* info)
+	{
+	DO_UNSERIALIZE(SerialObj);
+
+	uint16 serial_k;
+	if ( ! UNSERIALIZE(&serial_k) )
+		return false;
+	k = serial_k;
+	assert(k > 0);
+
+	const char* serial_name;
+	if ( ! UNSERIALIZE_STR(&serial_name, 0) )
+		return false;
+	name = serial_name;
+	delete [] serial_name;
+
+	return true;
+	}
+
+Hasher::Hasher(size_t k, const std::string& arg_name)
+	: k(k)
+	{
+	name = arg_name;
+	}
+
+
 UHF::UHF(size_t seed, const std::string& extra)
 	: h(compute_seed(seed, extra))
 	{
@@ -40,17 +87,6 @@ size_t UHF::compute_seed(size_t seed, const std::string& extra)
 	return *reinterpret_cast<size_t*>(buf);
 	}
 
-Hasher* Hasher::Create(size_t k, const std::string& name)
-	{
-	return new DefaultHasher(k, name);
-	}
-
-Hasher::Hasher(size_t k, const std::string& arg_name)
-	: k(k)
-	{
-	name = arg_name;
-	}
-
 DefaultHasher::DefaultHasher(size_t k, const std::string& name)
 	: Hasher(k, name)
 	{
@@ -82,6 +118,27 @@ bool DefaultHasher::Equals(const Hasher* other) const
 	return hash_functions == o->hash_functions;
 	}
 
+IMPLEMENT_SERIAL(DefaultHasher, SER_DEFAULTHASHER)
+
+bool DefaultHasher::DoSerialize(SerialInfo* info) const
+	{
+	DO_SERIALIZE(SER_DEFAULTHASHER, Hasher);
+
+	// Nothing to do here, the base class has all we need serialized already.
+	return true;
+	}
+
+bool DefaultHasher::DoUnserialize(UnserialInfo* info)
+	{
+	DO_UNSERIALIZE(Hasher);
+
+	hash_functions.clear();
+	for ( size_t i = 0; i < K(); ++i )
+		hash_functions.push_back(UHF(i, Name()));
+
+	return true;
+	}
+
 DoubleHasher::DoubleHasher(size_t k, const std::string& name)
 	: Hasher(k, name), h1(1, name), h2(2, name)
 	{
@@ -112,3 +169,23 @@ bool DoubleHasher::Equals(const Hasher* other) const
 	const DoubleHasher* o = static_cast<const DoubleHasher*>(other);
 	return h1 == o->h1 && h2 == o->h2;
 	}
+
+IMPLEMENT_SERIAL(DoubleHasher, SER_DOUBLEHASHER)
+
+bool DoubleHasher::DoSerialize(SerialInfo* info) const
+	{
+	DO_SERIALIZE(SER_DOUBLEHASHER, Hasher);
+
+	// Nothing to do here, the base class has all we need serialized already.
+	return true;
+	}
+
+bool DoubleHasher::DoUnserialize(UnserialInfo* info)
+	{
+	DO_UNSERIALIZE(Hasher);
+
+	h1 = UHF(1, Name());
+	h2 = UHF(2, Name());
+
+	return true;
+	}
diff --git a/src/probabilistic/Hasher.h b/src/probabilistic/Hasher.h
index d266565284..7e6a8ba134 100644
--- a/src/probabilistic/Hasher.h
+++ b/src/probabilistic/Hasher.h
@@ -5,6 +5,7 @@
 
 #include "Hash.h"
 #include "H3.h"
+#include "SerialObj.h"
 
 namespace probabilistic {
 
@@ -12,7 +13,7 @@ namespace probabilistic {
  * Abstract base class for hashers. A hasher creates a family of hash
  * functions to hash an element *k* times.
  */
-class Hasher {
+class Hasher : public SerialObj {
 public:
 	typedef hash_t digest;
 	typedef std::vector<digest> digest_vector;
@@ -69,24 +70,18 @@ public:
 	 */
 	const std::string& Name() const { return name; }
 
-	/**
-	 * Constructs the hasher used by the implementation. This hardcodes a
-	 * specific hashing policy. It exists only because the HashingPolicy
-	 * class hierachy is not yet serializable.
-	 *
-	 * @param k The number of hash functions to apply.
-	 *
-	 * @param name The hasher's name.
-	 *
-	 * @return Returns a new hasher instance.
-	 */
-	static Hasher* Create(size_t k, const std::string& name);
+	bool Serialize(SerialInfo* info) const;
+	static Hasher* Unserialize(UnserialInfo* info);
 
 protected:
+	DECLARE_ABSTRACT_SERIAL(Hasher);
+
+	Hasher() { }
+
 	Hasher(size_t k, const std::string& name);
 
 private:
-	const size_t k;
+	size_t k;
 	std::string name;
 };
 
@@ -106,7 +101,7 @@ public:
 	 * seed to compute the seed for t to compute the seed NUL-terminated
 	 * string as additional seed.
 	 */
-	UHF(size_t seed, const std::string& extra = "");
+	UHF(size_t seed = 0, const std::string& extra = "");
 
 	template <typename T>
 	Hasher::digest operator()(const T& x) const
@@ -175,7 +170,11 @@ public:
 	virtual DefaultHasher* Clone() const /* final */;
 	virtual bool Equals(const Hasher* other) const /* final */;
 
+	DECLARE_SERIAL(DefaultHasher);
+
 private:
+	DefaultHasher() { }
+
 	std::vector<UHF> hash_functions;
 };
 
@@ -199,7 +198,11 @@ public:
 	virtual DoubleHasher* Clone() const /* final */;
 	virtual bool Equals(const Hasher* other) const /* final */;
 
+	DECLARE_SERIAL(DoubleHasher);
+
 private:
+	DoubleHasher() { }
+
 	UHF h1;
 	UHF h2;
 };
diff --git a/src/probabilistic/bloom-filter.bif b/src/probabilistic/bloom-filter.bif
index dd21688fdd..f03e3d149b 100644
--- a/src/probabilistic/bloom-filter.bif
+++ b/src/probabilistic/bloom-filter.bif
@@ -40,7 +40,7 @@ function bloomfilter_basic_init%(fp: double, capacity: count,
 
 	size_t cells = BasicBloomFilter::M(fp, capacity);
 	size_t optimal_k = BasicBloomFilter::K(cells, capacity);
-	const Hasher* h = Hasher::Create(optimal_k, name->CheckString());
+	const Hasher* h = new DefaultHasher(optimal_k, name->CheckString());
 
 	return new BloomFilterVal(new BasicBloomFilter(h, cells));
 	%}
@@ -68,7 +68,7 @@ function bloomfilter_counting_init%(k: count, cells: count, max: count,
 		return 0;
 		}
 
-	const Hasher* h = Hasher::Create(k, name->CheckString());
+	const Hasher* h = new DefaultHasher(k, name->CheckString());
 
 	uint16 width = 1;
 	while ( max >>= 1 )

From febb7e83957aa14fbc14d59782b33ac3690388b3 Mon Sep 17 00:00:00 2001
From: Robin Sommer <robin@icir.org>
Date: Thu, 25 Jul 2013 09:55:15 -0700
Subject: [PATCH 104/118] Covenience make target to update the three coverage
 tests that usually need tweaking when scripts get added/removed.

---
 testing/btest/Makefile | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/testing/btest/Makefile b/testing/btest/Makefile
index ff63bdb601..47451fbf27 100644
--- a/testing/btest/Makefile
+++ b/testing/btest/Makefile
@@ -24,4 +24,11 @@ cleanup:
 update-doc-sources:
 	../../doc/scripts/genDocSourcesList.sh ../../doc/scripts/DocSourcesList.cmake
 
+# Updates the three coverage tests that usually need tweaking when
+# scripts get added/removed.
+update-coverage-tests: update-doc-sources
+	btest -qU coverage.bare-load-baseline
+	btest -qU coverage.default-load-baseline
+	@echo "Use 'git diff' to check updates look right."
+
 .PHONY: all btest-verbose brief btest-brief coverage cleanup

From 4a7046848caf6f0b97149c91902e42b770c97b3c Mon Sep 17 00:00:00 2001
From: Robin Sommer <robin@icir.org>
Date: Thu, 25 Jul 2013 09:45:10 -0700
Subject: [PATCH 105/118] bif files declared with bif_target() are now
 automatically compiled in.

No more manual includes to pull them in.

(It doesn't quite work fully automatically yet for some bifs that need
script-level types defined, like the input and logging frameworks.
They still do a manual "@load foo.bif" in their main.bro to get the
order right. It's a bit tricky to fix that and would probably need
splitting main.bro into two parts; not sure that's worth it.)
---
 CHANGES                                        | 10 ++++++++++
 VERSION                                        |  2 +-
 aux/binpac                                     |  2 +-
 cmake                                          |  2 +-
 scripts/base/init-bare.bro                     |  2 ++
 src/CMakeLists.txt                             | 18 +++++++++++++++++-
 src/Func.cc                                    |  4 ++++
 src/analyzer/Manager.cc                        |  1 -
 src/file_analysis/Manager.cc                   |  1 -
 .../canonified_loaded_scripts.log              |  5 +++--
 .../canonified_loaded_scripts.log              |  5 +++--
 11 files changed, 42 insertions(+), 10 deletions(-)

diff --git a/CHANGES b/CHANGES
index 7cbbc74e4f..92d16d7776 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,4 +1,14 @@
 
+2.1-826 | 2013-07-25 10:12:26 -0700
+
+  * bif files declared with bif_target() are now automatically
+    compiled in. No more manual includes to pull them in. (Robin
+    Sommer)
+
+  * Covenience make target in testing/btest to update the three
+    coverage tests that usually need tweaking when scripts get
+    added/removed. (Robin Sommer)
+
 2.1-824 | 2013-07-22 14:25:14 -0400
 
   * Fixed a scriptland state issue that manifested especially badly on proxies. (Seth Hall)
diff --git a/VERSION b/VERSION
index d35eaf1454..71d91b2ea8 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2.1-824
+2.1-826
diff --git a/aux/binpac b/aux/binpac
index c39bd478b9..0c91feea55 160000
--- a/aux/binpac
+++ b/aux/binpac
@@ -1 +1 @@
-Subproject commit c39bd478b9d0ecd05b1b83aa9d09a7887893977c
+Subproject commit 0c91feea55d00d3a1787203b3a43e3f9044d66e0
diff --git a/cmake b/cmake
index 0187b33a29..026639f836 160000
--- a/cmake
+++ b/cmake
@@ -1 +1 @@
-Subproject commit 0187b33a29d5ec824f940feff60dc5d8c2fe314f
+Subproject commit 026639f8368e56742c0cb5d9fb390ea64e60ec50
diff --git a/scripts/base/init-bare.bro b/scripts/base/init-bare.bro
index 60ed0d2fd1..cffa6d80f1 100644
--- a/scripts/base/init-bare.bro
+++ b/scripts/base/init-bare.bro
@@ -3050,3 +3050,5 @@ const snaplen = 8192 &redef;
 @load base/frameworks/input
 @load base/frameworks/analyzer
 @load base/frameworks/file-analysis
+
+@load base/bif
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index e353dd4695..4644bab80a 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -6,6 +6,9 @@ include_directories(BEFORE
 # This collects generated bif and pac files from subdirectories.
 set(bro_ALL_GENERATED_OUTPUTS  CACHE INTERNAL "automatically generated files" FORCE)
 
+# This collects bif inputs that we'll load automatically.
+set(bro_AUTO_BIFS CACHE INTERNAL "BIFs for automatic inclusion" FORCE)
+
 # If TRUE, use CMake's object libraries for sub-directories instead of
 # static libraries. This requires CMake >= 2.8.8.
 set(bro_HAVE_OBJECT_LIBRARIES FALSE)
@@ -382,8 +385,21 @@ set(BRO_EXE bro
     CACHE STRING "Bro executable binary" FORCE)
 
 # Target to create all the autogenerated files.
+add_custom_target(generate_outputs_stage1)
+add_dependencies(generate_outputs_stage1 ${bro_ALL_GENERATED_OUTPUTS})
+
+# Target to create the joint includes files that pull in the bif code.
+bro_bif_create_includes(generate_outputs_stage2 ${CMAKE_CURRENT_BINARY_DIR} "${bro_AUTO_BIFS}")
+add_dependencies(generate_outputs_stage2 generate_outputs_stage1)
+
+# Global target to trigger creation of autogenerated code.
 add_custom_target(generate_outputs)
-add_dependencies(generate_outputs ${bro_ALL_GENERATED_OUTPUTS})
+add_dependencies(generate_outputs generate_outputs_stage2)
+
+# Build __load__.bro files for standard *.bif.bro.
+bro_bif_create_loader(bif_loader ${CMAKE_BINARY_DIR}/scripts/base/bif)
+add_dependencies(bif_loader ${bro_SUBDIRS})
+add_dependencies(bro bif_loader)
 
 # Build __load__.bro files for plugins/*.bif.bro.
 bro_bif_create_loader(bif_loader_plugins ${CMAKE_BINARY_DIR}/scripts/base/bif/plugins)
diff --git a/src/Func.cc b/src/Func.cc
index f3718fe231..7859e8d2ad 100644
--- a/src/Func.cc
+++ b/src/Func.cc
@@ -560,6 +560,8 @@ void builtin_error(const char* msg, BroObj* arg)
 #include "reporter.bif.func_def"
 #include "strings.bif.func_def"
 
+#include "__all__.bif.cc" // Autogenerated for compiling in the bif_target() code.
+
 void init_builtin_funcs()
 	{
 	bro_resources = internal_type("bro_resources")->AsRecordType();
@@ -574,6 +576,8 @@ void init_builtin_funcs()
 #include "reporter.bif.func_init"
 #include "strings.bif.func_init"
 
+#include "__all__.bif.init.cc" // Autogenerated for compiling in the bif_target() code.
+
 	did_builtin_init = true;
 	}
 
diff --git a/src/analyzer/Manager.cc b/src/analyzer/Manager.cc
index 5695dec625..8b290e2341 100644
--- a/src/analyzer/Manager.cc
+++ b/src/analyzer/Manager.cc
@@ -103,7 +103,6 @@ void Manager::InitPreScript()
 
 void Manager::InitPostScript()
 	{
-	#include "analyzer.bif.init.cc"
 	}
 
 void Manager::DumpDebug()
diff --git a/src/file_analysis/Manager.cc b/src/file_analysis/Manager.cc
index ea1ed954ed..a7f7a29c18 100644
--- a/src/file_analysis/Manager.cc
+++ b/src/file_analysis/Manager.cc
@@ -60,7 +60,6 @@ void Manager::RegisterAnalyzerComponent(Component* component)
 
 void Manager::InitPostScript()
 	{
-	#include "file_analysis.bif.init.cc"
 	}
 
 void Manager::Terminate()
diff --git a/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log
index b7585a1477..724de75027 100644
--- a/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log
+++ b/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log
@@ -3,7 +3,7 @@
 #empty_field	(empty)
 #unset_field	-
 #path	loaded_scripts
-#open	2013-07-05-05-20-50
+#open	2013-07-25-17-10-49
 #fields	name
 #types	string
 scripts/base/init-bare.bro
@@ -87,6 +87,7 @@ scripts/base/init-bare.bro
   scripts/base/frameworks/file-analysis/__load__.bro
     scripts/base/frameworks/file-analysis/main.bro
       build/scripts/base/bif/file_analysis.bif.bro
+  build/scripts/base/bif/__load__.bro
 scripts/policy/misc/loaded-scripts.bro
   scripts/base/utils/paths.bro
-#close	2013-07-05-05-20-50
+#close	2013-07-25-17-10-49
diff --git a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log
index 999fd7c841..a3e89b4d60 100644
--- a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log
+++ b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log
@@ -3,7 +3,7 @@
 #empty_field	(empty)
 #unset_field	-
 #path	loaded_scripts
-#open	2013-07-10-21-18-31
+#open	2013-07-25-17-10-50
 #fields	name
 #types	string
 scripts/base/init-bare.bro
@@ -87,6 +87,7 @@ scripts/base/init-bare.bro
   scripts/base/frameworks/file-analysis/__load__.bro
     scripts/base/frameworks/file-analysis/main.bro
       build/scripts/base/bif/file_analysis.bif.bro
+  build/scripts/base/bif/__load__.bro
 scripts/base/init-default.bro
   scripts/base/utils/site.bro
     scripts/base/utils/patterns.bro
@@ -195,4 +196,4 @@ scripts/base/init-default.bro
   scripts/base/protocols/tunnels/__load__.bro
   scripts/base/misc/find-checksum-offloading.bro
 scripts/policy/misc/loaded-scripts.bro
-#close	2013-07-10-21-18-31
+#close	2013-07-25-17-10-50

From c11bf3d9226fed28dbf2676c123cadd52bd13a68 Mon Sep 17 00:00:00 2001
From: Robin Sommer <robin@icir.org>
Date: Thu, 25 Jul 2013 11:28:30 -0700
Subject: [PATCH 106/118] Fixing serialization bug introduced during earlier
 merge.

---
 src/OpaqueVal.cc                                   |  6 +++---
 src/probabilistic/BitVector.cc                     |  6 +++---
 src/probabilistic/CounterVector.cc                 |  6 +++---
 .../canonified_loaded_scripts.log                  | 14 +++++++-------
 .../canonified_loaded_scripts.log                  | 14 +++++++-------
 5 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc
index b70cfee086..66b3c081e7 100644
--- a/src/OpaqueVal.cc
+++ b/src/OpaqueVal.cc
@@ -656,11 +656,11 @@ bool BloomFilterVal::DoUnserialize(UnserialInfo* info)
 
 	if ( is_typed )
 		{
-		BroType* type = BroType::Unserialize(info);
-		if ( ! Typify(type) )
+		BroType* t = BroType::Unserialize(info);
+		if ( ! Typify(t) )
 			return false;
 
-		Unref(type);
+		Unref(t);
 		}
 
 	bloom_filter = probabilistic::BloomFilter::Unserialize(info);
diff --git a/src/probabilistic/BitVector.cc b/src/probabilistic/BitVector.cc
index c0285eced3..6e642e62c1 100644
--- a/src/probabilistic/BitVector.cc
+++ b/src/probabilistic/BitVector.cc
@@ -568,11 +568,11 @@ bool BitVector::DoUnserialize(UnserialInfo* info)
 		bits[i] = static_cast<block_type>(block);
 		}
 
-	uint64 num_bits;
-	if ( ! UNSERIALIZE(&num_bits) )
+	uint64 n;
+	if ( ! UNSERIALIZE(&n) )
 		return false;
 
-	num_bits = static_cast<size_type>(num_bits);
+	num_bits = static_cast<size_type>(n);
 
 	return true;
 	}
diff --git a/src/probabilistic/CounterVector.cc b/src/probabilistic/CounterVector.cc
index 24c9ff3638..d5635fc0f2 100644
--- a/src/probabilistic/CounterVector.cc
+++ b/src/probabilistic/CounterVector.cc
@@ -183,11 +183,11 @@ bool CounterVector::DoUnserialize(UnserialInfo* info)
 	if ( ! bits )
 		return false;
 
-	uint64 width;
-	if ( ! UNSERIALIZE(&width) )
+	uint64 w;
+	if ( ! UNSERIALIZE(&w) )
 		return false;
 
-	width = static_cast<size_t>(width);
+	width = static_cast<size_t>(w);
 
 	return true;
 	}
diff --git a/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log
index 3236b39acd..5879c504e2 100644
--- a/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log
+++ b/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log
@@ -3,7 +3,7 @@
 #empty_field	(empty)
 #unset_field	-
 #path	loaded_scripts
-#open	2013-07-25-17-17-10
+#open	2013-07-25-17-54-33
 #fields	name
 #types	string
 scripts/base/init-bare.bro
@@ -23,28 +23,28 @@ scripts/base/init-bare.bro
     build/scripts/base/bif/plugins/Bro_DCE_RPC.events.bif.bro
     build/scripts/base/bif/plugins/Bro_DHCP.events.bif.bro
     build/scripts/base/bif/plugins/Bro_DNS.events.bif.bro
+    build/scripts/base/bif/plugins/Bro_FTP.events.bif.bro
+    build/scripts/base/bif/plugins/Bro_FTP.functions.bif.bro
     build/scripts/base/bif/plugins/Bro_File.events.bif.bro
     build/scripts/base/bif/plugins/Bro_FileHash.events.bif.bro
     build/scripts/base/bif/plugins/Bro_Finger.events.bif.bro
-    build/scripts/base/bif/plugins/Bro_FTP.events.bif.bro
-    build/scripts/base/bif/plugins/Bro_FTP.functions.bif.bro
-    build/scripts/base/bif/plugins/Bro_Gnutella.events.bif.bro
     build/scripts/base/bif/plugins/Bro_GTPv1.events.bif.bro
+    build/scripts/base/bif/plugins/Bro_Gnutella.events.bif.bro
     build/scripts/base/bif/plugins/Bro_HTTP.events.bif.bro
     build/scripts/base/bif/plugins/Bro_HTTP.functions.bif.bro
     build/scripts/base/bif/plugins/Bro_ICMP.events.bif.bro
+    build/scripts/base/bif/plugins/Bro_IRC.events.bif.bro
     build/scripts/base/bif/plugins/Bro_Ident.events.bif.bro
     build/scripts/base/bif/plugins/Bro_InterConn.events.bif.bro
-    build/scripts/base/bif/plugins/Bro_IRC.events.bif.bro
     build/scripts/base/bif/plugins/Bro_Login.events.bif.bro
     build/scripts/base/bif/plugins/Bro_Login.functions.bif.bro
     build/scripts/base/bif/plugins/Bro_MIME.events.bif.bro
     build/scripts/base/bif/plugins/Bro_Modbus.events.bif.bro
     build/scripts/base/bif/plugins/Bro_NCP.events.bif.bro
+    build/scripts/base/bif/plugins/Bro_NTP.events.bif.bro
     build/scripts/base/bif/plugins/Bro_NetBIOS.events.bif.bro
     build/scripts/base/bif/plugins/Bro_NetBIOS.functions.bif.bro
     build/scripts/base/bif/plugins/Bro_NetFlow.events.bif.bro
-    build/scripts/base/bif/plugins/Bro_NTP.events.bif.bro
     build/scripts/base/bif/plugins/Bro_PIA.events.bif.bro
     build/scripts/base/bif/plugins/Bro_POP3.events.bif.bro
     build/scripts/base/bif/plugins/Bro_RPC.events.bif.bro
@@ -91,4 +91,4 @@ scripts/base/init-bare.bro
   build/scripts/base/bif/__load__.bro
 scripts/policy/misc/loaded-scripts.bro
   scripts/base/utils/paths.bro
-#close	2013-07-25-17-17-10
+#close	2013-07-25-17-54-33
diff --git a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log
index cb4ccba850..2a820f4270 100644
--- a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log
+++ b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log
@@ -3,7 +3,7 @@
 #empty_field	(empty)
 #unset_field	-
 #path	loaded_scripts
-#open	2013-07-25-17-17-11
+#open	2013-07-25-17-54-33
 #fields	name
 #types	string
 scripts/base/init-bare.bro
@@ -23,28 +23,28 @@ scripts/base/init-bare.bro
     build/scripts/base/bif/plugins/Bro_DCE_RPC.events.bif.bro
     build/scripts/base/bif/plugins/Bro_DHCP.events.bif.bro
     build/scripts/base/bif/plugins/Bro_DNS.events.bif.bro
+    build/scripts/base/bif/plugins/Bro_FTP.events.bif.bro
+    build/scripts/base/bif/plugins/Bro_FTP.functions.bif.bro
     build/scripts/base/bif/plugins/Bro_File.events.bif.bro
     build/scripts/base/bif/plugins/Bro_FileHash.events.bif.bro
     build/scripts/base/bif/plugins/Bro_Finger.events.bif.bro
-    build/scripts/base/bif/plugins/Bro_FTP.events.bif.bro
-    build/scripts/base/bif/plugins/Bro_FTP.functions.bif.bro
-    build/scripts/base/bif/plugins/Bro_Gnutella.events.bif.bro
     build/scripts/base/bif/plugins/Bro_GTPv1.events.bif.bro
+    build/scripts/base/bif/plugins/Bro_Gnutella.events.bif.bro
     build/scripts/base/bif/plugins/Bro_HTTP.events.bif.bro
     build/scripts/base/bif/plugins/Bro_HTTP.functions.bif.bro
     build/scripts/base/bif/plugins/Bro_ICMP.events.bif.bro
+    build/scripts/base/bif/plugins/Bro_IRC.events.bif.bro
     build/scripts/base/bif/plugins/Bro_Ident.events.bif.bro
     build/scripts/base/bif/plugins/Bro_InterConn.events.bif.bro
-    build/scripts/base/bif/plugins/Bro_IRC.events.bif.bro
     build/scripts/base/bif/plugins/Bro_Login.events.bif.bro
     build/scripts/base/bif/plugins/Bro_Login.functions.bif.bro
     build/scripts/base/bif/plugins/Bro_MIME.events.bif.bro
     build/scripts/base/bif/plugins/Bro_Modbus.events.bif.bro
     build/scripts/base/bif/plugins/Bro_NCP.events.bif.bro
+    build/scripts/base/bif/plugins/Bro_NTP.events.bif.bro
     build/scripts/base/bif/plugins/Bro_NetBIOS.events.bif.bro
     build/scripts/base/bif/plugins/Bro_NetBIOS.functions.bif.bro
     build/scripts/base/bif/plugins/Bro_NetFlow.events.bif.bro
-    build/scripts/base/bif/plugins/Bro_NTP.events.bif.bro
     build/scripts/base/bif/plugins/Bro_PIA.events.bif.bro
     build/scripts/base/bif/plugins/Bro_POP3.events.bif.bro
     build/scripts/base/bif/plugins/Bro_RPC.events.bif.bro
@@ -197,4 +197,4 @@ scripts/base/init-default.bro
   scripts/base/protocols/tunnels/__load__.bro
   scripts/base/misc/find-checksum-offloading.bro
 scripts/policy/misc/loaded-scripts.bro
-#close	2013-07-25-17-17-11
+#close	2013-07-25-17-54-33

From 7dd5771384d6e45693e602efaebc18ffbabe8c47 Mon Sep 17 00:00:00 2001
From: Robin Sommer <robin@icir.org>
Date: Thu, 25 Jul 2013 12:02:41 -0700
Subject: [PATCH 107/118] Protection about broken traces with empty pcap
 headers.

---
 CHANGES       | 5 +++++
 VERSION       | 2 +-
 src/PktSrc.cc | 6 ++++++
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/CHANGES b/CHANGES
index 3529576088..912d7d301f 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,4 +1,9 @@
 
+2.1-888 | 2013-07-25 12:02:41 -0700
+
+  * Protection about broken traces with empty pcap headers. (Matt
+    Thompson)
+
 2.1-887 | 2013-07-25 11:33:27 -0700
 
   * Support for Bloom filter. (Matthias Vallentin)
diff --git a/VERSION b/VERSION
index 2ced22d6f4..4f0ea7a5ac 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2.1-887
+2.1-888
diff --git a/src/PktSrc.cc b/src/PktSrc.cc
index 105dc90d30..48b382565b 100644
--- a/src/PktSrc.cc
+++ b/src/PktSrc.cc
@@ -77,6 +77,12 @@ int PktSrc::ExtractNextPacket()
 
 	data = last_data = pcap_next(pd, &hdr);
 
+	if ( data && (hdr.len == 0 || hdr.caplen == 0) )
+		{
+		sessions->Weird("empty_pcap_header", &hdr, data);
+		return 0;
+		}
+
 	if ( data )
 		next_timestamp = hdr.ts.tv_sec + double(hdr.ts.tv_usec) / 1e6;
 

From 8d729a378bd149206326f470fa76c1d4447e038f Mon Sep 17 00:00:00 2001
From: Robin Sommer <robin@icir.org>
Date: Thu, 25 Jul 2013 12:08:01 -0700
Subject: [PATCH 108/118] Updating submodule(s).

 [nomail]
---
 aux/binpac | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aux/binpac b/aux/binpac
index 0c91feea55..896ddedde5 160000
--- a/aux/binpac
+++ b/aux/binpac
@@ -1 +1 @@
-Subproject commit 0c91feea55d00d3a1787203b3a43e3f9044d66e0
+Subproject commit 896ddedde55c48ec2163577fc258b49c418abb3e

From 939619889d41b3233e72e0c109301355bee25173 Mon Sep 17 00:00:00 2001
From: Jon Siwek <jsiwek@illinois.edu>
Date: Thu, 25 Jul 2013 16:51:16 -0500
Subject: [PATCH 109/118] File analysis fixes and test updates.

- Several places were just using old variable names or not loading
  scripts correctly after they'd been renamed/moved.

- Revert/adjust a change in how HTTP file handles are generated that
  broke partial content responses.

- Turn some libmagic builtin checks back on; seems some are actually
  useful (e.g. text detection seems to be a builtin).  The rule going
  forward probably will be only to turn off a builtin if we confirm it
  causes issues.

- Removed some tests that are redundant or not necessary anymore because
  the generic file analysis tests cover them.

- A couple FTP tests still fail that I think need an actual solution via
  script changes.
---
 doc/scripts/CMakeLists.txt                    |   4 +-
 doc/scripts/DocSourcesList.cmake              |  23 +-
 scripts/base/frameworks/files/main.bro        |   8 +-
 scripts/base/protocols/ftp/files.bro          |   1 +
 scripts/base/protocols/http/files.bro         |  10 +-
 scripts/policy/protocols/http/detect-MHR.bro  |  44 ---
 .../protocols/smtp/entities-excerpt.bro       |   7 +-
 scripts/test-all-policy.bro                   |   4 +-
 src/util.cc                                   |  14 +
 src/util.h                                    |  17 -
 .../Baseline/core.tunnels.ayiya/http.log      |   6 +-
 .../http.log                                  |  12 +-
 .../core.tunnels.gtp.outer_ip_frag/http.log   |  10 +-
 .../Baseline/core.tunnels.teredo/http.log     |  16 +-
 .../http.log                                  |  12 +-
 .../canonified_loaded_scripts.log             |  10 +-
 .../istate.events-ssl/receiver.http.log       |  10 +-
 .../istate.events-ssl/sender.http.log         |  10 +-
 .../Baseline/istate.events/receiver.http.log  |  10 +-
 .../Baseline/istate.events/sender.http.log    |  10 +-
 .../out                                       |   1 +
 .../get.out                                   |   1 +
 .../bro..stdout                               |   2 +
 .../get.out                                   |   1 +
 .../out                                       |   2 +-
 .../get-gzip.out                              |   1 +
 .../get.out                                   |   1 +
 .../out                                       |   4 +
 .../a.out                                     |   1 +
 .../b.out                                     |   2 +
 .../c.out                                     |   1 +
 .../out                                       |   5 +
 .../out                                       |   2 +
 .../file_analysis.log                         |  10 -
 .../files.log                                 |  10 +
 .../out                                       |   3 +
 .../http.log                                  |  10 +-
 .../manager-1.notice.log                      |  10 +-
 .../manager-1.notice.log                      |  10 +-
 .../notice.log                                |  10 +-
 .../conn.log                                  |  14 -
 .../extractions                               |  22 --
 .../ftp.log                                   |  21 --
 .../http.log                                  |  10 +-
 .../http-item.dat                             | 304 ------------------
 .../http.log                                  |  10 -
 .../http.log                                  | 100 +++---
 .../http.log                                  |  14 -
 .../http.log                                  |  18 +-
 .../http.log                                  |  10 +-
 .../scripts.base.protocols.irc.basic/irc.log  |   6 +-
 .../irc-dcc-item.dat                          | Bin 42208 -> 0 bytes
 .../irc.log                                   |  13 -
 .../smtp.log                                  |  10 +-
 .../extractions                               | 277 ----------------
 .../filecount                                 |   1 -
 .../smtp_entities.log                         |  12 -
 .../notice.log                                |  12 +-
 testing/btest/istate/events-ssl.bro           |  21 +-
 testing/btest/istate/events.bro               |  15 +-
 .../file-analysis/bifs/remove_action.bro      |   4 +-
 .../base/frameworks/file-analysis/irc.bro     |  16 +-
 .../base/frameworks/file-analysis/logging.bro |   2 +-
 .../base/protocols/ftp/ftp-extract.bro        |  10 -
 .../protocols/http/http-extract-files.bro     |   6 -
 .../base/protocols/http/http-mime-and-md5.bro |   6 -
 .../base/protocols/http/multipart-extract.bro |   8 +-
 .../base/protocols/irc/dcc-extract.test       |  11 -
 .../base/protocols/smtp/mime-extract.test     |  11 -
 testing/external/subdir-btest.cfg             |   2 +-
 testing/scripts/file-analysis-test.bro        |   4 +
 71 files changed, 293 insertions(+), 1002 deletions(-)
 delete mode 100644 scripts/policy/protocols/http/detect-MHR.bro
 delete mode 100644 testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/file_analysis.log
 create mode 100644 testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/files.log
 delete mode 100644 testing/btest/Baseline/scripts.base.protocols.ftp.ftp-extract/conn.log
 delete mode 100644 testing/btest/Baseline/scripts.base.protocols.ftp.ftp-extract/extractions
 delete mode 100644 testing/btest/Baseline/scripts.base.protocols.ftp.ftp-extract/ftp.log
 delete mode 100644 testing/btest/Baseline/scripts.base.protocols.http.http-extract-files/http-item.dat
 delete mode 100644 testing/btest/Baseline/scripts.base.protocols.http.http-extract-files/http.log
 delete mode 100644 testing/btest/Baseline/scripts.base.protocols.http.http-mime-and-md5/http.log
 delete mode 100644 testing/btest/Baseline/scripts.base.protocols.irc.dcc-extract/irc-dcc-item.dat
 delete mode 100644 testing/btest/Baseline/scripts.base.protocols.irc.dcc-extract/irc.log
 delete mode 100644 testing/btest/Baseline/scripts.base.protocols.smtp.mime-extract/extractions
 delete mode 100644 testing/btest/Baseline/scripts.base.protocols.smtp.mime-extract/filecount
 delete mode 100644 testing/btest/Baseline/scripts.base.protocols.smtp.mime-extract/smtp_entities.log
 delete mode 100644 testing/btest/scripts/base/protocols/ftp/ftp-extract.bro
 delete mode 100644 testing/btest/scripts/base/protocols/http/http-extract-files.bro
 delete mode 100644 testing/btest/scripts/base/protocols/http/http-mime-and-md5.bro
 delete mode 100644 testing/btest/scripts/base/protocols/irc/dcc-extract.test
 delete mode 100644 testing/btest/scripts/base/protocols/smtp/mime-extract.test

diff --git a/doc/scripts/CMakeLists.txt b/doc/scripts/CMakeLists.txt
index ddb09bb29c..e7e39d0b3f 100644
--- a/doc/scripts/CMakeLists.txt
+++ b/doc/scripts/CMakeLists.txt
@@ -99,7 +99,7 @@ macro(REST_TARGET srcDir broInput)
         COMMAND "${CMAKE_COMMAND}"
         ARGS -E remove_directory .state
         # generate the reST documentation using bro
-        COMMAND BROPATH=${BROPATH}:${srcDir} BROMAGIC=${CMAKE_SOURCE_DIR}/magic ${CMAKE_BINARY_DIR}/src/bro
+        COMMAND BROPATH=${BROPATH}:${srcDir} BROMAGIC=${CMAKE_SOURCE_DIR}/magic/database ${CMAKE_BINARY_DIR}/src/bro
         ARGS -b -Z ${broInput} || (rm -rf .state *.log *.rst && exit 1)
         # move generated doc into a new directory tree that
         # defines the final structure of documents
@@ -130,7 +130,7 @@ add_custom_command(OUTPUT proto-analyzers.rst
     COMMAND "${CMAKE_COMMAND}"
     ARGS -E remove_directory .state
     # generate the reST documentation using bro
-    COMMAND BROPATH=${BROPATH}:${srcDir} BROMAGIC=${CMAKE_SOURCE_DIR}/magic ${CMAKE_BINARY_DIR}/src/bro
+    COMMAND BROPATH=${BROPATH}:${srcDir} BROMAGIC=${CMAKE_SOURCE_DIR}/magic/database ${CMAKE_BINARY_DIR}/src/bro
     ARGS -b -Z base/init-bare.bro || (rm -rf .state *.log *.rst && exit 1)
     # move generated doc into a new directory tree that
     # defines the final structure of documents
diff --git a/doc/scripts/DocSourcesList.cmake b/doc/scripts/DocSourcesList.cmake
index 529b03ca83..b2c932d117 100644
--- a/doc/scripts/DocSourcesList.cmake
+++ b/doc/scripts/DocSourcesList.cmake
@@ -73,6 +73,8 @@ rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_ZIP.events.bif.bro)
 rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/reporter.bif.bro)
 rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/strings.bif.bro)
 rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/types.bif.bro)
+rest_target(${psd} base/files/extract/main.bro)
+rest_target(${psd} base/files/hash/main.bro)
 rest_target(${psd} base/frameworks/analyzer/main.bro)
 rest_target(${psd} base/frameworks/cluster/main.bro)
 rest_target(${psd} base/frameworks/cluster/nodes/manager.bro)
@@ -82,7 +84,7 @@ rest_target(${psd} base/frameworks/cluster/setup-connections.bro)
 rest_target(${psd} base/frameworks/communication/main.bro)
 rest_target(${psd} base/frameworks/control/main.bro)
 rest_target(${psd} base/frameworks/dpd/main.bro)
-rest_target(${psd} base/frameworks/file-analysis/main.bro)
+rest_target(${psd} base/frameworks/files/main.bro)
 rest_target(${psd} base/frameworks/input/main.bro)
 rest_target(${psd} base/frameworks/input/readers/ascii.bro)
 rest_target(${psd} base/frameworks/input/readers/benchmark.bro)
@@ -136,25 +138,22 @@ rest_target(${psd} base/protocols/conn/main.bro)
 rest_target(${psd} base/protocols/conn/polling.bro)
 rest_target(${psd} base/protocols/dns/consts.bro)
 rest_target(${psd} base/protocols/dns/main.bro)
-rest_target(${psd} base/protocols/ftp/file-analysis.bro)
-rest_target(${psd} base/protocols/ftp/file-extract.bro)
+rest_target(${psd} base/protocols/ftp/files.bro)
 rest_target(${psd} base/protocols/ftp/gridftp.bro)
 rest_target(${psd} base/protocols/ftp/main.bro)
 rest_target(${psd} base/protocols/ftp/utils-commands.bro)
-rest_target(${psd} base/protocols/http/file-analysis.bro)
-rest_target(${psd} base/protocols/http/file-extract.bro)
-rest_target(${psd} base/protocols/http/file-hash.bro)
-rest_target(${psd} base/protocols/http/file-ident.bro)
+rest_target(${psd} base/protocols/ftp/utils.bro)
+rest_target(${psd} base/protocols/http/entities.bro)
+rest_target(${psd} base/protocols/http/files.bro)
 rest_target(${psd} base/protocols/http/main.bro)
 rest_target(${psd} base/protocols/http/utils.bro)
 rest_target(${psd} base/protocols/irc/dcc-send.bro)
-rest_target(${psd} base/protocols/irc/file-analysis.bro)
+rest_target(${psd} base/protocols/irc/files.bro)
 rest_target(${psd} base/protocols/irc/main.bro)
 rest_target(${psd} base/protocols/modbus/consts.bro)
 rest_target(${psd} base/protocols/modbus/main.bro)
-rest_target(${psd} base/protocols/smtp/entities-excerpt.bro)
 rest_target(${psd} base/protocols/smtp/entities.bro)
-rest_target(${psd} base/protocols/smtp/file-analysis.bro)
+rest_target(${psd} base/protocols/smtp/files.bro)
 rest_target(${psd} base/protocols/smtp/main.bro)
 rest_target(${psd} base/protocols/socks/consts.bro)
 rest_target(${psd} base/protocols/socks/main.bro)
@@ -182,6 +181,8 @@ rest_target(${psd} policy/frameworks/control/controllee.bro)
 rest_target(${psd} policy/frameworks/control/controller.bro)
 rest_target(${psd} policy/frameworks/dpd/detect-protocols.bro)
 rest_target(${psd} policy/frameworks/dpd/packet-segment-logging.bro)
+rest_target(${psd} policy/frameworks/files/detect-MHR.bro)
+rest_target(${psd} policy/frameworks/files/hash-all-files.bro)
 rest_target(${psd} policy/frameworks/intel/conn-established.bro)
 rest_target(${psd} policy/frameworks/intel/dns.bro)
 rest_target(${psd} policy/frameworks/intel/http-host-header.bro)
@@ -214,7 +215,6 @@ rest_target(${psd} policy/protocols/dns/detect-external-names.bro)
 rest_target(${psd} policy/protocols/ftp/detect-bruteforcing.bro)
 rest_target(${psd} policy/protocols/ftp/detect.bro)
 rest_target(${psd} policy/protocols/ftp/software.bro)
-rest_target(${psd} policy/protocols/http/detect-MHR.bro)
 rest_target(${psd} policy/protocols/http/detect-sqli.bro)
 rest_target(${psd} policy/protocols/http/detect-webapps.bro)
 rest_target(${psd} policy/protocols/http/header-names.bro)
@@ -226,6 +226,7 @@ rest_target(${psd} policy/protocols/modbus/known-masters-slaves.bro)
 rest_target(${psd} policy/protocols/modbus/track-memmap.bro)
 rest_target(${psd} policy/protocols/smtp/blocklists.bro)
 rest_target(${psd} policy/protocols/smtp/detect-suspicious-orig.bro)
+rest_target(${psd} policy/protocols/smtp/entities-excerpt.bro)
 rest_target(${psd} policy/protocols/smtp/software.bro)
 rest_target(${psd} policy/protocols/ssh/detect-bruteforcing.bro)
 rest_target(${psd} policy/protocols/ssh/geo-data.bro)
diff --git a/scripts/base/frameworks/files/main.bro b/scripts/base/frameworks/files/main.bro
index cc92932bbf..d0c381545b 100644
--- a/scripts/base/frameworks/files/main.bro
+++ b/scripts/base/frameworks/files/main.bro
@@ -139,7 +139,9 @@ export {
 	##
 	## f: the file.
 	##
-	## args: the analyzer type to add along with any arguments it takes.
+	## tag: the analyzer type.
+	##
+	## args: any parameters the analyzer takes.
 	##
 	## Returns: true if the analyzer will be added, or false if analysis
 	##          for the *id* isn't currently active or the *args*
@@ -156,7 +158,9 @@ export {
 	##
 	## Returns: true if the analyzer will be removed, or false if analysis
 	##          for the *id* isn't currently active.
-	global remove_analyzer: function(f: fa_file, tag: Files::Tag, args: AnalyzerArgs): bool;
+	global remove_analyzer: function(f: fa_file,
+	                                 tag: Files::Tag,
+	                                 args: AnalyzerArgs &default=AnalyzerArgs()): bool;
 
 	## Stops/ignores any further analysis of a given file.
 	##
diff --git a/scripts/base/protocols/ftp/files.bro b/scripts/base/protocols/ftp/files.bro
index 1d7b7670f4..9ed17ab2a4 100644
--- a/scripts/base/protocols/ftp/files.bro
+++ b/scripts/base/protocols/ftp/files.bro
@@ -1,4 +1,5 @@
 @load ./main
+@load ./utils
 @load base/utils/conn-ids
 @load base/frameworks/files
 
diff --git a/scripts/base/protocols/http/files.bro b/scripts/base/protocols/http/files.bro
index fd07dc096a..14dbb12989 100644
--- a/scripts/base/protocols/http/files.bro
+++ b/scripts/base/protocols/http/files.bro
@@ -1,6 +1,7 @@
 @load ./main
 @load ./entities
 @load ./utils
+@load base/utils/conn-ids
 @load base/frameworks/files
 
 module HTTP;
@@ -18,13 +19,16 @@ function get_file_handle(c: connection, is_orig: bool): string
 	if ( ! c?$http ) 
 		return "";
 
-	local mime_depth = is_orig ? c$http$orig_mime_depth : c$http$resp_mime_depth;
-	if ( c$http$range_request )
+	if ( c$http$range_request && ! is_orig )
 		{
-		return cat(Analyzer::ANALYZER_HTTP, is_orig, c$id$orig_h, mime_depth, build_url(c$http));
+		# Any multipart responses from the server are pieces of same file
+		# that correspond to range requests, so don't use mime depth to
+		# identify the file.
+		return cat(Analyzer::ANALYZER_HTTP, is_orig, c$id$orig_h, build_url(c$http));
 		}
 	else
 		{
+		local mime_depth = is_orig ? c$http$orig_mime_depth : c$http$resp_mime_depth;
 		return cat(Analyzer::ANALYZER_HTTP, c$start_time, is_orig, 
 		           c$http$trans_depth, mime_depth, id_string(c$id));
 		}
diff --git a/scripts/policy/protocols/http/detect-MHR.bro b/scripts/policy/protocols/http/detect-MHR.bro
deleted file mode 100644
index 0594276c93..0000000000
--- a/scripts/policy/protocols/http/detect-MHR.bro
+++ /dev/null
@@ -1,44 +0,0 @@
-##! Detect file downloads over HTTP that have MD5 sums matching files in Team
-##! Cymru's Malware Hash Registry (http://www.team-cymru.org/Services/MHR/).
-##! By default, not all file transfers will have MD5 sums calculated.  Read the
-##! documentation for the :doc:base/protocols/http/file-hash.bro script to see
-##! how to configure which transfers will have hashes calculated.
-
-@load base/frameworks/notice
-@load base/protocols/http
-
-module HTTP;
-
-export {
-	redef enum Notice::Type += {
-		## The MD5 sum of a file transferred over HTTP matched in the
-		## malware hash registry.
-		Malware_Hash_Registry_Match
-	};
-
-	## The malware hash registry runs each malware sample through several A/V engines.
-	## Team Cymru returns a percentage to indicate how many A/V engines flagged the
-	## sample as malicious. This threshold allows you to require a minimum detection
-	## rate (default: 50%).
-	const MHR_threshold = 50 &redef;
-}
-
-event log_http(rec: HTTP::Info)
-	{
-	if ( rec?$md5 )
-		{
-		local hash_domain = fmt("%s.malware.hash.cymru.com", rec$md5);
-		when ( local MHR_result = lookup_hostname_txt(hash_domain) )
-			{
-			# Data is returned as "<dateFirstDetected> <detectionRate>"
-			local MHR_answer = split1(MHR_result, / /);
-			if ( |MHR_answer| == 2 && to_count(MHR_answer[2]) >= MHR_threshold )
-				{
-				local url = HTTP::build_url_http(rec);
-				local message = fmt("%s %s %s", rec$id$orig_h, rec$md5, url);
-				NOTICE([$note=Malware_Hash_Registry_Match,
-				        $msg=message, $id=rec$id]);
-				}
-			}
-		}
-	}
diff --git a/scripts/policy/protocols/smtp/entities-excerpt.bro b/scripts/policy/protocols/smtp/entities-excerpt.bro
index 1ecd100571..423fae1ada 100644
--- a/scripts/policy/protocols/smtp/entities-excerpt.bro
+++ b/scripts/policy/protocols/smtp/entities-excerpt.bro
@@ -1,12 +1,12 @@
 ##! This script is for optionally adding a body excerpt to the SMTP
 ##! entities log.
 
-@load ./entities
+@load base/protocols/smtp/entities
 
 module SMTP;
 
 export {
-	redef record SMTP::EntityInfo += {
+	redef record SMTP::Entity+= {
 		## The entity body excerpt.
 		excerpt:    string &log &default="";
 	};
@@ -31,7 +31,6 @@ event file_new(f: fa_file) &priority=5
 		if ( ! c?$smtp ) next;
 
 		if ( default_entity_excerpt_len > 0 )
-			c$smtp$current_entity$excerpt =
-			        f$bof_buffer[0:default_entity_excerpt_len];
+			c$smtp$entity$excerpt = f$bof_buffer[0:default_entity_excerpt_len];
 		}
 	}
diff --git a/scripts/test-all-policy.bro b/scripts/test-all-policy.bro
index 1fd34d6f2f..2164343d37 100644
--- a/scripts/test-all-policy.bro
+++ b/scripts/test-all-policy.bro
@@ -14,6 +14,8 @@
 # @load frameworks/control/controller.bro
 @load frameworks/dpd/detect-protocols.bro
 @load frameworks/dpd/packet-segment-logging.bro
+@load frameworks/files/detect-MHR.bro
+@load frameworks/files/hash-all-files.bro
 @load frameworks/intel/__load__.bro
 @load frameworks/intel/conn-established.bro
 @load frameworks/intel/dns.bro
@@ -50,7 +52,6 @@
 @load protocols/ftp/detect-bruteforcing.bro
 @load protocols/ftp/detect.bro
 @load protocols/ftp/software.bro
-@load protocols/http/detect-MHR.bro
 @load protocols/http/detect-sqli.bro
 @load protocols/http/detect-webapps.bro
 @load protocols/http/header-names.bro
@@ -62,6 +63,7 @@
 @load protocols/modbus/track-memmap.bro
 @load protocols/smtp/blocklists.bro
 @load protocols/smtp/detect-suspicious-orig.bro
+@load protocols/smtp/entities-excerpt.bro
 @load protocols/smtp/software.bro
 @load protocols/ssh/detect-bruteforcing.bro
 @load protocols/ssh/geo-data.bro
diff --git a/src/util.cc b/src/util.cc
index 5a63be22cb..0651925898 100644
--- a/src/util.cc
+++ b/src/util.cc
@@ -1573,6 +1573,20 @@ void operator delete[](void* v)
 
 #endif
 
+// Being selective of which components of MAGIC_NO_CHECK_BUILTIN are actually
+// known to be problematic, but keeping rest of libmagic's builtin checks.
+#define DISABLE_LIBMAGIC_BUILTIN_CHECKS  ( \
+/*  MAGIC_NO_CHECK_COMPRESS | */ \
+/*  MAGIC_NO_CHECK_TAR  | */ \
+/*  MAGIC_NO_CHECK_SOFT | */ \
+/*  MAGIC_NO_CHECK_APPTYPE  | */ \
+/*  MAGIC_NO_CHECK_ELF  | */ \
+/*  MAGIC_NO_CHECK_TEXT | */ \
+    MAGIC_NO_CHECK_CDF  | \
+    MAGIC_NO_CHECK_TOKENS  \
+/*  MAGIC_NO_CHECK_ENCODING */ \
+)
+
 void bro_init_magic(magic_t* cookie_ptr, int flags)
 	{
 	if ( ! cookie_ptr || *cookie_ptr )
diff --git a/src/util.h b/src/util.h
index 91ed8f2888..cafa63b7e8 100644
--- a/src/util.h
+++ b/src/util.h
@@ -377,23 +377,6 @@ struct CompareString
 		}
 	};
 
-// Older versions of libmagic may not define the MAGIC_NO_CHECK_BUILTIN
-// convenience macro and other newer versions seem to have a typo that makes
-// it unusable, so just make a different one now with all known flags for
-// builtin libmagic components that should be disabled so that Bro only
-// uses the custom magic database shipped with it.
-#define DISABLE_LIBMAGIC_BUILTIN_CHECKS  ( \
-    MAGIC_NO_CHECK_COMPRESS | \
-    MAGIC_NO_CHECK_TAR  | \
-/*  MAGIC_NO_CHECK_SOFT | */ \
-    MAGIC_NO_CHECK_APPTYPE  | \
-    MAGIC_NO_CHECK_ELF  | \
-    MAGIC_NO_CHECK_TEXT | \
-    MAGIC_NO_CHECK_CDF  | \
-    MAGIC_NO_CHECK_TOKENS   | \
-    MAGIC_NO_CHECK_ENCODING \
-)
-
 extern magic_t magic_desc_cookie;
 extern magic_t magic_mime_cookie;
 
diff --git a/testing/btest/Baseline/core.tunnels.ayiya/http.log b/testing/btest/Baseline/core.tunnels.ayiya/http.log
index 04692a3547..cc0cf32148 100644
--- a/testing/btest/Baseline/core.tunnels.ayiya/http.log
+++ b/testing/btest/Baseline/core.tunnels.ayiya/http.log
@@ -3,10 +3,10 @@
 #empty_field	(empty)
 #unset_field	-
 #path	http
-#open	2013-07-23-05-12-58
+#open	2013-07-25-21-12-29
 #fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	trans_depth	method	host	uri	referrer	user_agent	request_body_len	response_body_len	status_code	status_msg	info_code	info_msg	filename	tags	username	password	proxied	orig_fuids	orig_mime_types	resp_fuids	resp_mime_types
 #types	time	string	addr	port	addr	port	count	string	string	string	string	string	count	count	count	string	count	string	string	table[enum]	string	string	table[string]	vector[string]	vector[string]	vector[string]	vector[string]
-1257655301.652206	5OKnoww6xl4	2001:4978:f:4c::2	53382	2001:4860:b002::68	80	1	GET	ipv6.google.com	/	-	Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en; rv:1.9.0.15pre) Gecko/2009091516 Camino/2.0b4 (like Firefox/3.0.15pre)	0	10102	200	OK	-	-	-	(empty)	-	-	-	-	-	meGKu6goEyd	application/octet-stream
+1257655301.652206	5OKnoww6xl4	2001:4978:f:4c::2	53382	2001:4860:b002::68	80	1	GET	ipv6.google.com	/	-	Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en; rv:1.9.0.15pre) Gecko/2009091516 Camino/2.0b4 (like Firefox/3.0.15pre)	0	10102	200	OK	-	-	-	(empty)	-	-	-	-	-	meGKu6goEyd	text/html
 1257655302.514424	5OKnoww6xl4	2001:4978:f:4c::2	53382	2001:4860:b002::68	80	2	GET	ipv6.google.com	/csi?v=3&s=webhp&action=&tran=undefined&e=17259,19771,21517,21766,21887,22212&ei=BUz2Su7PMJTglQfz3NzCAw&rt=prt.77,xjs.565,ol.645	http://ipv6.google.com/	Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en; rv:1.9.0.15pre) Gecko/2009091516 Camino/2.0b4 (like Firefox/3.0.15pre)	0	0	204	No Content	-	-	-	(empty)	-	-	-	-	-	-	-
 1257655303.603569	5OKnoww6xl4	2001:4978:f:4c::2	53382	2001:4860:b002::68	80	3	GET	ipv6.google.com	/gen_204?atyp=i&ct=fade&cad=1254&ei=BUz2Su7PMJTglQfz3NzCAw&zx=1257655303600	http://ipv6.google.com/	Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en; rv:1.9.0.15pre) Gecko/2009091516 Camino/2.0b4 (like Firefox/3.0.15pre)	0	0	204	No Content	-	-	-	(empty)	-	-	-	-	-	-	-
-#close	2013-07-23-05-12-58
+#close	2013-07-25-21-12-29
diff --git a/testing/btest/Baseline/core.tunnels.gtp.different_dl_and_ul/http.log b/testing/btest/Baseline/core.tunnels.gtp.different_dl_and_ul/http.log
index e88be88763..8f9ac07c96 100644
--- a/testing/btest/Baseline/core.tunnels.gtp.different_dl_and_ul/http.log
+++ b/testing/btest/Baseline/core.tunnels.gtp.different_dl_and_ul/http.log
@@ -3,9 +3,9 @@
 #empty_field	(empty)
 #unset_field	-
 #path	http
-#open	2013-05-21-21-11-21
-#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	trans_depth	method	host	uri	referrer	user_agent	request_body_len	response_body_len	status_code	status_msg	info_code	info_msg	filename	tags	username	password	proxied	mime_type	md5	extracted_request_files	extracted_response_files
-#types	time	string	addr	port	addr	port	count	string	string	string	string	string	count	count	count	string	count	string	string	table[enum]	string	string	table[string]	string	string	vector[string]	vector[string]
-1333458850.340368	arKYeMETxOg	10.131.17.170	51803	173.199.115.168	80	1	GET	cdn.epicgameads.com	/ads/flash/728x90_nx8com.swf?clickTAG=http://www.epicgameads.com/ads/bannerclickPage.php?id=e3ubwU6IF&pd=1&adid=0&icpc=1&axid=0&uctt=1&channel=4&cac=1&t=728x90&cb=1333458879	http://www.epicgameads.com/ads/banneriframe.php?id=e3ubwU6IF&t=728x90&channel=4&cb=1333458905296	Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)	0	31461	200	OK	-	-	-	(empty)	-	-	-	application/x-shockwave-flash	-	-	-
-1333458850.399501	arKYeMETxOg	10.131.17.170	51803	173.199.115.168	80	2	GET	cdn.epicgameads.com	/ads/flash/728x90_nx8com.swf?clickTAG=http://www.epicgameads.com/ads/bannerclickPage.php?id=e3ubwU6IF&pd=1&adid=0&icpc=1&axid=0&uctt=1&channel=0&cac=1&t=728x90&cb=1333458881	http://www.epicgameads.com/ads/banneriframe.php?id=e3ubwU6IF&t=728x90&cb=1333458920207	Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)	0	31461	200	OK	-	-	-	(empty)	-	-	-	application/x-shockwave-flash	-	-	-
-#close	2013-05-21-21-11-21
+#open	2013-07-25-16-23-41
+#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	trans_depth	method	host	uri	referrer	user_agent	request_body_len	response_body_len	status_code	status_msg	info_code	info_msg	filename	tags	username	password	proxied	orig_fuids	orig_mime_types	resp_fuids	resp_mime_types
+#types	time	string	addr	port	addr	port	count	string	string	string	string	string	count	count	count	string	count	string	string	table[enum]	string	string	table[string]	vector[string]	vector[string]	vector[string]	vector[string]
+1333458850.340368	arKYeMETxOg	10.131.17.170	51803	173.199.115.168	80	1	GET	cdn.epicgameads.com	/ads/flash/728x90_nx8com.swf?clickTAG=http://www.epicgameads.com/ads/bannerclickPage.php?id=e3ubwU6IF&pd=1&adid=0&icpc=1&axid=0&uctt=1&channel=4&cac=1&t=728x90&cb=1333458879	http://www.epicgameads.com/ads/banneriframe.php?id=e3ubwU6IF&t=728x90&channel=4&cb=1333458905296	Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)	0	31461	200	OK	-	-	-	(empty)	-	-	-	-	-	6jqjOyeITn5	application/x-shockwave-flash
+1333458850.399501	arKYeMETxOg	10.131.17.170	51803	173.199.115.168	80	2	GET	cdn.epicgameads.com	/ads/flash/728x90_nx8com.swf?clickTAG=http://www.epicgameads.com/ads/bannerclickPage.php?id=e3ubwU6IF&pd=1&adid=0&icpc=1&axid=0&uctt=1&channel=0&cac=1&t=728x90&cb=1333458881	http://www.epicgameads.com/ads/banneriframe.php?id=e3ubwU6IF&t=728x90&cb=1333458920207	Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)	0	31461	200	OK	-	-	-	(empty)	-	-	-	-	-	A0xot7xPc22	application/x-shockwave-flash
+#close	2013-07-25-16-23-41
diff --git a/testing/btest/Baseline/core.tunnels.gtp.outer_ip_frag/http.log b/testing/btest/Baseline/core.tunnels.gtp.outer_ip_frag/http.log
index 8f2893caa7..45b88b7813 100644
--- a/testing/btest/Baseline/core.tunnels.gtp.outer_ip_frag/http.log
+++ b/testing/btest/Baseline/core.tunnels.gtp.outer_ip_frag/http.log
@@ -3,8 +3,8 @@
 #empty_field	(empty)
 #unset_field	-
 #path	http
-#open	2013-05-21-21-11-22
-#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	trans_depth	method	host	uri	referrer	user_agent	request_body_len	response_body_len	status_code	status_msg	info_code	info_msg	filename	tags	username	password	proxied	mime_type	md5	extracted_request_files	extracted_response_files
-#types	time	string	addr	port	addr	port	count	string	string	string	string	string	count	count	count	string	count	string	string	table[enum]	string	string	table[string]	string	string	vector[string]	vector[string]
-1333458850.375568	arKYeMETxOg	10.131.47.185	1923	79.101.110.141	80	1	GET	o-o.preferred.telekomrs-beg1.v2.lscache8.c.youtube.com	/videoplayback?upn=MTU2MDY5NzQ5OTM0NTI3NDY4NDc&sparams=algorithm,burst,cp,factor,id,ip,ipbits,itag,source,upn,expire&fexp=912300,907210&algorithm=throttle-factor&itag=34&ip=212.0.0.0&burst=40&sver=3&signature=832FB1042E20780CFCA77A4DB5EA64AC593E8627.D1166C7E8365732E52DAFD68076DAE0146E0AE01&source=youtube&expire=1333484980&key=yt1&ipbits=8&factor=1.25&cp=U0hSSFRTUl9NSkNOMl9MTVZKOjh5eEN2SG8tZF84&id=ebf1e932d4bd1286&cm2=1	http://s.ytimg.com/yt/swfbin/watch_as3-vflqrJwOA.swf	Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko; X-SBLSP) Chrome/17.0.963.83 Safari/535.11	0	56320	206	Partial Content	-	-	-	(empty)	-	-	-	application/octet-stream	-	-	-
-#close	2013-05-21-21-11-22
+#open	2013-07-25-21-12-32
+#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	trans_depth	method	host	uri	referrer	user_agent	request_body_len	response_body_len	status_code	status_msg	info_code	info_msg	filename	tags	username	password	proxied	orig_fuids	orig_mime_types	resp_fuids	resp_mime_types
+#types	time	string	addr	port	addr	port	count	string	string	string	string	string	count	count	count	string	count	string	string	table[enum]	string	string	table[string]	vector[string]	vector[string]	vector[string]	vector[string]
+1333458850.375568	arKYeMETxOg	10.131.47.185	1923	79.101.110.141	80	1	GET	o-o.preferred.telekomrs-beg1.v2.lscache8.c.youtube.com	/videoplayback?upn=MTU2MDY5NzQ5OTM0NTI3NDY4NDc&sparams=algorithm,burst,cp,factor,id,ip,ipbits,itag,source,upn,expire&fexp=912300,907210&algorithm=throttle-factor&itag=34&ip=212.0.0.0&burst=40&sver=3&signature=832FB1042E20780CFCA77A4DB5EA64AC593E8627.D1166C7E8365732E52DAFD68076DAE0146E0AE01&source=youtube&expire=1333484980&key=yt1&ipbits=8&factor=1.25&cp=U0hSSFRTUl9NSkNOMl9MTVZKOjh5eEN2SG8tZF84&id=ebf1e932d4bd1286&cm2=1	http://s.ytimg.com/yt/swfbin/watch_as3-vflqrJwOA.swf	Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko; X-SBLSP) Chrome/17.0.963.83 Safari/535.11	0	56320	206	Partial Content	-	-	-	(empty)	-	-	-	-	-	oypNlaRdgs7	application/octet-stream
+#close	2013-07-25-21-12-32
diff --git a/testing/btest/Baseline/core.tunnels.teredo/http.log b/testing/btest/Baseline/core.tunnels.teredo/http.log
index 4e3cdfd61d..1ecf0884e2 100644
--- a/testing/btest/Baseline/core.tunnels.teredo/http.log
+++ b/testing/btest/Baseline/core.tunnels.teredo/http.log
@@ -3,11 +3,11 @@
 #empty_field	(empty)
 #unset_field	-
 #path	http
-#open	2013-05-21-21-11-21
-#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	trans_depth	method	host	uri	referrer	user_agent	request_body_len	response_body_len	status_code	status_msg	info_code	info_msg	filename	tags	username	password	proxied	mime_type	md5	extracted_request_files	extracted_response_files
-#types	time	string	addr	port	addr	port	count	string	string	string	string	string	count	count	count	string	count	string	string	table[enum]	string	string	table[string]	string	string	vector[string]	vector[string]
-1210953057.917183	3PKsZ2Uye21	192.168.2.16	1578	75.126.203.78	80	1	POST	download913.avast.com	/cgi-bin/iavs4stats.cgi	-	Syncer/4.80 (av_pro-1169;f)	589	0	204	<empty>	-	-	-	(empty)	-	-	-	text/plain	-	-	-
-1210953061.585996	70MGiRM1Qf4	2001:0:4137:9e50:8000:f12a:b9c8:2815	1286	2001:4860:0:2001::68	80	1	GET	ipv6.google.com	/	-	Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9b5) Gecko/2008032620 Firefox/3.0b5	0	6640	200	OK	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1210953073.381474	70MGiRM1Qf4	2001:0:4137:9e50:8000:f12a:b9c8:2815	1286	2001:4860:0:2001::68	80	2	GET	ipv6.google.com	/search?hl=en&q=Wireshark+!&btnG=Google+Search	http://ipv6.google.com/	Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9b5) Gecko/2008032620 Firefox/3.0b5	0	25119	200	OK	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1210953074.674817	c4Zw9TmAE05	192.168.2.16	1580	67.228.110.120	80	1	GET	www.wireshark.org	/	http://ipv6.google.com/search?hl=en&q=Wireshark+%21&btnG=Google+Search	Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9b5) Gecko/2008032620 Firefox/3.0b5	0	11845	200	OK	-	-	-	(empty)	-	-	-	application/xml	-	-	-
-#close	2013-05-21-21-11-21
+#open	2013-07-25-16-23-17
+#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	trans_depth	method	host	uri	referrer	user_agent	request_body_len	response_body_len	status_code	status_msg	info_code	info_msg	filename	tags	username	password	proxied	orig_fuids	orig_mime_types	resp_fuids	resp_mime_types
+#types	time	string	addr	port	addr	port	count	string	string	string	string	string	count	count	count	string	count	string	string	table[enum]	string	string	table[string]	vector[string]	vector[string]	vector[string]	vector[string]
+1210953057.917183	3PKsZ2Uye21	192.168.2.16	1578	75.126.203.78	80	1	POST	download913.avast.com	/cgi-bin/iavs4stats.cgi	-	Syncer/4.80 (av_pro-1169;f)	589	0	204	<empty>	-	-	-	(empty)	-	-	-	tZX578lAmo3	text/plain	-	-
+1210953061.585996	70MGiRM1Qf4	2001:0:4137:9e50:8000:f12a:b9c8:2815	1286	2001:4860:0:2001::68	80	1	GET	ipv6.google.com	/	-	Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9b5) Gecko/2008032620 Firefox/3.0b5	0	6640	200	OK	-	-	-	(empty)	-	-	-	-	-	nkfWSsPnjX7	text/html
+1210953073.381474	70MGiRM1Qf4	2001:0:4137:9e50:8000:f12a:b9c8:2815	1286	2001:4860:0:2001::68	80	2	GET	ipv6.google.com	/search?hl=en&q=Wireshark+!&btnG=Google+Search	http://ipv6.google.com/	Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9b5) Gecko/2008032620 Firefox/3.0b5	0	25119	200	OK	-	-	-	(empty)	-	-	-	-	-	fk5lVax7K37	text/html
+1210953074.674817	c4Zw9TmAE05	192.168.2.16	1580	67.228.110.120	80	1	GET	www.wireshark.org	/	http://ipv6.google.com/search?hl=en&q=Wireshark+%21&btnG=Google+Search	Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9b5) Gecko/2008032620 Firefox/3.0b5	0	11845	200	OK	-	-	-	(empty)	-	-	-	-	-	6wF1NFmBUza	application/xml
+#close	2013-07-25-16-23-17
diff --git a/testing/btest/Baseline/core.tunnels.teredo_bubble_with_payload/http.log b/testing/btest/Baseline/core.tunnels.teredo_bubble_with_payload/http.log
index 65ec33186e..0c8c448e30 100644
--- a/testing/btest/Baseline/core.tunnels.teredo_bubble_with_payload/http.log
+++ b/testing/btest/Baseline/core.tunnels.teredo_bubble_with_payload/http.log
@@ -3,9 +3,9 @@
 #empty_field	(empty)
 #unset_field	-
 #path	http
-#open	2013-05-21-21-11-22
-#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	trans_depth	method	host	uri	referrer	user_agent	request_body_len	response_body_len	status_code	status_msg	info_code	info_msg	filename	tags	username	password	proxied	mime_type	md5	extracted_request_files	extracted_response_files
-#types	time	string	addr	port	addr	port	count	string	string	string	string	string	count	count	count	string	count	string	string	table[enum]	string	string	table[string]	string	string	vector[string]	vector[string]
-1340127577.361683	FrJExwHcSal	2001:0:4137:9e50:8000:f12a:b9c8:2815	1286	2001:4860:0:2001::68	80	1	GET	ipv6.google.com	/	-	Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9b5) Gecko/2008032620 Firefox/3.0b5	0	6640	200	OK	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1340127577.379360	FrJExwHcSal	2001:0:4137:9e50:8000:f12a:b9c8:2815	1286	2001:4860:0:2001::68	80	2	GET	ipv6.google.com	/search?hl=en&q=Wireshark+!&btnG=Google+Search	http://ipv6.google.com/	Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9b5) Gecko/2008032620 Firefox/3.0b5	0	25119	200	OK	-	-	-	(empty)	-	-	-	text/html	-	-	-
-#close	2013-05-21-21-11-22
+#open	2013-07-25-16-22-21
+#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	trans_depth	method	host	uri	referrer	user_agent	request_body_len	response_body_len	status_code	status_msg	info_code	info_msg	filename	tags	username	password	proxied	orig_fuids	orig_mime_types	resp_fuids	resp_mime_types
+#types	time	string	addr	port	addr	port	count	string	string	string	string	string	count	count	count	string	count	string	string	table[enum]	string	string	table[string]	vector[string]	vector[string]	vector[string]	vector[string]
+1340127577.361683	FrJExwHcSal	2001:0:4137:9e50:8000:f12a:b9c8:2815	1286	2001:4860:0:2001::68	80	1	GET	ipv6.google.com	/	-	Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9b5) Gecko/2008032620 Firefox/3.0b5	0	6640	200	OK	-	-	-	(empty)	-	-	-	-	-	RzAMHHXJral	text/html
+1340127577.379360	FrJExwHcSal	2001:0:4137:9e50:8000:f12a:b9c8:2815	1286	2001:4860:0:2001::68	80	2	GET	ipv6.google.com	/search?hl=en&q=Wireshark+!&btnG=Google+Search	http://ipv6.google.com/	Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9b5) Gecko/2008032620 Firefox/3.0b5	0	25119	200	OK	-	-	-	(empty)	-	-	-	-	-	vOmb3ToMKRg	text/html
+#close	2013-07-25-16-22-21
diff --git a/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log
index b7585a1477..4bcda86272 100644
--- a/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log
+++ b/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log
@@ -3,7 +3,7 @@
 #empty_field	(empty)
 #unset_field	-
 #path	loaded_scripts
-#open	2013-07-05-05-20-50
+#open	2013-07-25-19-59-47
 #fields	name
 #types	string
 scripts/base/init-bare.bro
@@ -84,9 +84,11 @@ scripts/base/init-bare.bro
     scripts/base/frameworks/analyzer/main.bro
       scripts/base/frameworks/packet-filter/utils.bro
       build/scripts/base/bif/analyzer.bif.bro
-  scripts/base/frameworks/file-analysis/__load__.bro
-    scripts/base/frameworks/file-analysis/main.bro
+  scripts/base/frameworks/files/__load__.bro
+    scripts/base/frameworks/files/main.bro
       build/scripts/base/bif/file_analysis.bif.bro
+      scripts/base/utils/site.bro
+        scripts/base/utils/patterns.bro
 scripts/policy/misc/loaded-scripts.bro
   scripts/base/utils/paths.bro
-#close	2013-07-05-05-20-50
+#close	2013-07-25-19-59-47
diff --git a/testing/btest/Baseline/istate.events-ssl/receiver.http.log b/testing/btest/Baseline/istate.events-ssl/receiver.http.log
index be7e6e5692..dd61de5424 100644
--- a/testing/btest/Baseline/istate.events-ssl/receiver.http.log
+++ b/testing/btest/Baseline/istate.events-ssl/receiver.http.log
@@ -3,8 +3,8 @@
 #empty_field	(empty)
 #unset_field	-
 #path	http
-#open	2013-05-21-21-11-32
-#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	trans_depth	method	host	uri	referrer	user_agent	request_body_len	response_body_len	status_code	status_msg	info_code	info_msg	filename	tags	username	password	proxied	mime_type	md5	extracted_request_files	extracted_response_files
-#types	time	string	addr	port	addr	port	count	string	string	string	string	string	count	count	count	string	count	string	string	table[enum]	string	string	table[string]	string	string	vector[string]	vector[string]
-1369170691.550143	arKYeMETxOg	141.42.64.125	56730	125.190.109.199	80	1	GET	www.icir.org	/	-	Wget/1.10	0	9130	200	OK	-	-	-	(empty)	-	-	-	-	-	-	-
-#close	2013-05-21-21-11-33
+#open	2013-07-25-21-10-36
+#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	trans_depth	method	host	uri	referrer	user_agent	request_body_len	response_body_len	status_code	status_msg	info_code	info_msg	filename	tags	username	password	proxied	orig_fuids	orig_mime_types	resp_fuids	resp_mime_types
+#types	time	string	addr	port	addr	port	count	string	string	string	string	string	count	count	count	string	count	string	string	table[enum]	string	string	table[string]	vector[string]	vector[string]	vector[string]	vector[string]
+1374786635.573905	arKYeMETxOg	141.42.64.125	56730	125.190.109.199	80	1	GET	www.icir.org	/	-	Wget/1.10	0	9130	200	OK	-	-	-	(empty)	-	-	-	-	-	-	-
+#close	2013-07-25-21-10-37
diff --git a/testing/btest/Baseline/istate.events-ssl/sender.http.log b/testing/btest/Baseline/istate.events-ssl/sender.http.log
index be7e6e5692..dd61de5424 100644
--- a/testing/btest/Baseline/istate.events-ssl/sender.http.log
+++ b/testing/btest/Baseline/istate.events-ssl/sender.http.log
@@ -3,8 +3,8 @@
 #empty_field	(empty)
 #unset_field	-
 #path	http
-#open	2013-05-21-21-11-32
-#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	trans_depth	method	host	uri	referrer	user_agent	request_body_len	response_body_len	status_code	status_msg	info_code	info_msg	filename	tags	username	password	proxied	mime_type	md5	extracted_request_files	extracted_response_files
-#types	time	string	addr	port	addr	port	count	string	string	string	string	string	count	count	count	string	count	string	string	table[enum]	string	string	table[string]	string	string	vector[string]	vector[string]
-1369170691.550143	arKYeMETxOg	141.42.64.125	56730	125.190.109.199	80	1	GET	www.icir.org	/	-	Wget/1.10	0	9130	200	OK	-	-	-	(empty)	-	-	-	-	-	-	-
-#close	2013-05-21-21-11-33
+#open	2013-07-25-21-10-36
+#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	trans_depth	method	host	uri	referrer	user_agent	request_body_len	response_body_len	status_code	status_msg	info_code	info_msg	filename	tags	username	password	proxied	orig_fuids	orig_mime_types	resp_fuids	resp_mime_types
+#types	time	string	addr	port	addr	port	count	string	string	string	string	string	count	count	count	string	count	string	string	table[enum]	string	string	table[string]	vector[string]	vector[string]	vector[string]	vector[string]
+1374786635.573905	arKYeMETxOg	141.42.64.125	56730	125.190.109.199	80	1	GET	www.icir.org	/	-	Wget/1.10	0	9130	200	OK	-	-	-	(empty)	-	-	-	-	-	-	-
+#close	2013-07-25-21-10-37
diff --git a/testing/btest/Baseline/istate.events/receiver.http.log b/testing/btest/Baseline/istate.events/receiver.http.log
index ae693399c3..aebe4dea7b 100644
--- a/testing/btest/Baseline/istate.events/receiver.http.log
+++ b/testing/btest/Baseline/istate.events/receiver.http.log
@@ -3,8 +3,8 @@
 #empty_field	(empty)
 #unset_field	-
 #path	http
-#open	2013-05-21-21-11-40
-#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	trans_depth	method	host	uri	referrer	user_agent	request_body_len	response_body_len	status_code	status_msg	info_code	info_msg	filename	tags	username	password	proxied	mime_type	md5	extracted_request_files	extracted_response_files
-#types	time	string	addr	port	addr	port	count	string	string	string	string	string	count	count	count	string	count	string	string	table[enum]	string	string	table[string]	string	string	vector[string]	vector[string]
-1369170699.511968	arKYeMETxOg	141.42.64.125	56730	125.190.109.199	80	1	GET	www.icir.org	/	-	Wget/1.10	0	9130	200	OK	-	-	-	(empty)	-	-	-	-	-	-	-
-#close	2013-05-21-21-11-41
+#open	2013-07-25-20-26-59
+#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	trans_depth	method	host	uri	referrer	user_agent	request_body_len	response_body_len	status_code	status_msg	info_code	info_msg	filename	tags	username	password	proxied	orig_fuids	orig_mime_types	resp_fuids	resp_mime_types
+#types	time	string	addr	port	addr	port	count	string	string	string	string	string	count	count	count	string	count	string	string	table[enum]	string	string	table[string]	vector[string]	vector[string]	vector[string]	vector[string]
+1374784018.898860	arKYeMETxOg	141.42.64.125	56730	125.190.109.199	80	1	GET	www.icir.org	/	-	Wget/1.10	0	9130	200	OK	-	-	-	(empty)	-	-	-	-	-	-	-
+#close	2013-07-25-20-27-00
diff --git a/testing/btest/Baseline/istate.events/sender.http.log b/testing/btest/Baseline/istate.events/sender.http.log
index ae693399c3..b70ba733bd 100644
--- a/testing/btest/Baseline/istate.events/sender.http.log
+++ b/testing/btest/Baseline/istate.events/sender.http.log
@@ -3,8 +3,8 @@
 #empty_field	(empty)
 #unset_field	-
 #path	http
-#open	2013-05-21-21-11-40
-#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	trans_depth	method	host	uri	referrer	user_agent	request_body_len	response_body_len	status_code	status_msg	info_code	info_msg	filename	tags	username	password	proxied	mime_type	md5	extracted_request_files	extracted_response_files
-#types	time	string	addr	port	addr	port	count	string	string	string	string	string	count	count	count	string	count	string	string	table[enum]	string	string	table[string]	string	string	vector[string]	vector[string]
-1369170699.511968	arKYeMETxOg	141.42.64.125	56730	125.190.109.199	80	1	GET	www.icir.org	/	-	Wget/1.10	0	9130	200	OK	-	-	-	(empty)	-	-	-	-	-	-	-
-#close	2013-05-21-21-11-41
+#open	2013-07-25-21-05-37
+#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	trans_depth	method	host	uri	referrer	user_agent	request_body_len	response_body_len	status_code	status_msg	info_code	info_msg	filename	tags	username	password	proxied	orig_fuids	orig_mime_types	resp_fuids	resp_mime_types
+#types	time	string	addr	port	addr	port	count	string	string	string	string	string	count	count	count	string	count	string	string	table[enum]	string	string	table[string]	vector[string]	vector[string]	vector[string]	vector[string]
+1374786336.338273	arKYeMETxOg	141.42.64.125	56730	125.190.109.199	80	1	GET	www.icir.org	/	-	Wget/1.10	0	9130	200	OK	-	-	-	(empty)	-	-	-	-	-	-	-
+#close	2013-07-25-21-05-38
diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.actions.data_event/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.actions.data_event/out
index ddc3449a4c..cbd60840bf 100644
--- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.actions.data_event/out
+++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.actions.data_event/out
@@ -4,6 +4,7 @@ FILE_BOF_BUFFER
 ^J0.26 | 201
 MIME_TYPE
 text/plain
+FILE_OVER_NEW_CONNECTION
 file_stream, file #0, 1500, ^J0.26 | 2012-08-24 15:10:04 -0700^J^J  * Fixing update-changes, which could pick the wrong control file. (Robin Sommer)^J^J  * Fixing GPG signing script. (Robin Sommer)^J^J0.25 | 2012-08-01 13:55:46 -0500^J^J  * Fix configure script to exit with non-zero status on error (Jon Siwek)^J^J0.24 | 2012-07-05 12:50:43 -0700^J^J  * Raise minimum required CMake version to 2.6.3 (Jon Siwek)^J^J  * Adding script to delete old fully-merged branches. (Robin Sommer)^J^J0.23-2 | 2012-01-25 13:24:01 -0800^J^J  * Fix a bro-cut error message. (Daniel Thayer)^J^J0.23 | 2012-01-11 12:16:11 -0800^J^J  * Tweaks to release scripts, plus a new one for signing files.^J    (Robin Sommer)^J^J0.22 | 2012-01-10 16:45:19 -0800^J^J  * Tweaks for OpenBSD support. (Jon Siwek)^J^J  * bro-cut extensions and fixes.  (Robin Sommer)^J    ^J    - If no field names are given on the command line, we now pass through^J      all fields. Adresses #657.^J^J    - Removing some GNUism from awk script. Addresses #653.^J^J    - Added option for time output in UTC. Addresses #668.^J^J    - Added output field separator option -F. Addresses #649.^J^J    - Fixing option -c: only some header lines were passed through^J      rather than all. (Robin Sommer)^J^J  * Fix parallel make portability. (Jon Siwek)^J^J0.21-9 | 2011-11-07 05:44:14 -0800^J^J  * Fixing compiler warnings. Addresses #388. (Jon Siwek)^J^J0.21-2 | 2011-11-02 18:12:13 -0700^J^J  * Fix for misnaming temp file in update-changes script. (Robin Sommer)^J^J0.21-1 | 2011-11-02 18:10:39 -0700^J^J  * Little fix for make-relea
 file_chunk, file #0, 1500, 0, ^J0.26 | 2012-08-24 15:10:04 -0700^J^J  * Fixing update-changes, which could pick the wrong control file. (Robin Sommer)^J^J  * Fixing GPG signing script. (Robin Sommer)^J^J0.25 | 2012-08-01 13:55:46 -0500^J^J  * Fix configure script to exit with non-zero status on error (Jon Siwek)^J^J0.24 | 2012-07-05 12:50:43 -0700^J^J  * Raise minimum required CMake version to 2.6.3 (Jon Siwek)^J^J  * Adding script to delete old fully-merged branches. (Robin Sommer)^J^J0.23-2 | 2012-01-25 13:24:01 -0800^J^J  * Fix a bro-cut error message. (Daniel Thayer)^J^J0.23 | 2012-01-11 12:16:11 -0800^J^J  * Tweaks to release scripts, plus a new one for signing files.^J    (Robin Sommer)^J^J0.22 | 2012-01-10 16:45:19 -0800^J^J  * Tweaks for OpenBSD support. (Jon Siwek)^J^J  * bro-cut extensions and fixes.  (Robin Sommer)^J    ^J    - If no field names are given on the command line, we now pass through^J      all fields. Adresses #657.^J^J    - Removing some GNUism from awk script. Addresses #653.^J^J    - Added option for time output in UTC. Addresses #668.^J^J    - Added output field separator option -F. Addresses #649.^J^J    - Fixing option -c: only some header lines were passed through^J      rather than all. (Robin Sommer)^J^J  * Fix parallel make portability. (Jon Siwek)^J^J0.21-9 | 2011-11-07 05:44:14 -0800^J^J  * Fixing compiler warnings. Addresses #388. (Jon Siwek)^J^J0.21-2 | 2011-11-02 18:12:13 -0700^J^J  * Fix for misnaming temp file in update-changes script. (Robin Sommer)^J^J0.21-1 | 2011-11-02 18:10:39 -0700^J^J  * Little fix for make-relea
 file_stream, file #0, 1024, se script, which could pick out the wrong^J    tag. (Robin Sommer)^J^J0.21 | 2011-10-27 17:40:45 -0700^J^J  * Fixing bro-cut's usage message and argument error handling. (Robin Sommer)^J^J  * Bugfix in update-changes script. (Robin Sommer)^J^J  * update-changes now ignores commits it did itself. (Robin Sommer)^J^J  * Fix a bug in the update-changes script. (Robin Sommer)^J^J  * bro-cut now always installs to $prefix/bin by `make install`. (Jon Siwek)^J^J  * Options to adjust time format for bro-cut. (Robin Sommer)^J^J    The default with -d is now ISO format. The new option "-D <fmt>"^J    specifies a custom strftime()-style format string. Alternatively,^J    the environment variable BRO_CUT_TIMEFMT can set the format as^J    well.^J^J  * bro-cut now understands the field separator header. (Robin Sommer)^J^J  * Renaming options -h/-H -> -c/-C, and doing some general cleanup.^J^J0.2 | 2011-10-25 19:53:57 -0700^J^J  * Adding support for replacing version string in a setup.py. (Robin^J    Sommer)^J^J  * Change generated root cert DN indices f
diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.remove_action/get.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.remove_action/get.out
index 4b572d5df9..eb62690f91 100644
--- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.remove_action/get.out
+++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.remove_action/get.out
@@ -4,6 +4,7 @@ FILE_BOF_BUFFER
 ^J0.26 | 201
 MIME_TYPE
 text/plain
+FILE_OVER_NEW_CONNECTION
 FILE_STATE_REMOVE
 file #0, 4705, 0
 [orig_h=141.142.228.5, orig_p=59856/tcp, resp_h=192.150.187.43, resp_p=80/tcp]
diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.set_timeout_interval/bro..stdout b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.set_timeout_interval/bro..stdout
index 160a51a543..e78f5c8c17 100644
--- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.set_timeout_interval/bro..stdout
+++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.set_timeout_interval/bro..stdout
@@ -2,6 +2,7 @@ FILE_NEW
 file #0, 0, 0
 MIME_TYPE
 application/x-dosexec
+FILE_OVER_NEW_CONNECTION
 FILE_STATE_REMOVE
 file #0, 1022920, 0
 [orig_h=192.168.72.14, orig_p=3254/tcp, resp_h=65.54.95.206, resp_p=80/tcp]
@@ -11,6 +12,7 @@ FILE_NEW
 file #1, 0, 0
 MIME_TYPE
 application/octet-stream
+FILE_OVER_NEW_CONNECTION
 FILE_TIMEOUT
 FILE_TIMEOUT
 FILE_STATE_REMOVE
diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.stop/get.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.stop/get.out
index f7182027aa..13cfe5de58 100644
--- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.stop/get.out
+++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.stop/get.out
@@ -4,3 +4,4 @@ FILE_BOF_BUFFER
 ^J0.26 | 201
 MIME_TYPE
 text/plain
+FILE_OVER_NEW_CONNECTION
diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.ftp/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.ftp/out
index c810ce15e5..eba43b94a4 100644
--- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.ftp/out
+++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.ftp/out
@@ -3,7 +3,7 @@ file #0, 0, 0
 FILE_BOF_BUFFER
 The Nationa
 MIME_TYPE
-application/octet-stream
+text/x-pascal
 FILE_OVER_NEW_CONNECTION
 FILE_STATE_REMOVE
 file #0, 16557, 0
diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get-gzip.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get-gzip.out
index 2b46d02042..d42db4b90a 100644
--- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get-gzip.out
+++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get-gzip.out
@@ -4,6 +4,7 @@ FILE_BOF_BUFFER
 {^J  "origin
 MIME_TYPE
 text/plain
+FILE_OVER_NEW_CONNECTION
 FILE_STATE_REMOVE
 file #0, 197, 0
 [orig_h=141.142.228.5, orig_p=50153/tcp, resp_h=54.243.118.187, resp_p=80/tcp]
diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get.out
index bb2f622969..219aad4eff 100644
--- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get.out
+++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get.out
@@ -4,6 +4,7 @@ FILE_BOF_BUFFER
 ^J0.26 | 201
 MIME_TYPE
 text/plain
+FILE_OVER_NEW_CONNECTION
 FILE_STATE_REMOVE
 file #0, 4705, 0
 [orig_h=141.142.228.5, orig_p=59856/tcp, resp_h=192.150.187.43, resp_p=80/tcp]
diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.multipart/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.multipart/out
index 4b6fa76c0c..da42f4fd68 100644
--- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.multipart/out
+++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.multipart/out
@@ -4,6 +4,7 @@ FILE_BOF_BUFFER
 test^M^J
 MIME_TYPE
 text/plain
+FILE_OVER_NEW_CONNECTION
 FILE_STATE_REMOVE
 file #0, 6, 0
 [orig_h=141.142.228.5, orig_p=57262/tcp, resp_h=54.243.88.146, resp_p=80/tcp]
@@ -17,6 +18,7 @@ FILE_BOF_BUFFER
 test2^M^J
 MIME_TYPE
 text/plain
+FILE_OVER_NEW_CONNECTION
 FILE_STATE_REMOVE
 file #1, 7, 0
 [orig_h=141.142.228.5, orig_p=57262/tcp, resp_h=54.243.88.146, resp_p=80/tcp]
@@ -30,6 +32,7 @@ FILE_BOF_BUFFER
 test3^M^J
 MIME_TYPE
 text/plain
+FILE_OVER_NEW_CONNECTION
 FILE_STATE_REMOVE
 file #2, 7, 0
 [orig_h=141.142.228.5, orig_p=57262/tcp, resp_h=54.243.88.146, resp_p=80/tcp]
@@ -43,6 +46,7 @@ FILE_BOF_BUFFER
 {^J  "data":
 MIME_TYPE
 text/plain
+FILE_OVER_NEW_CONNECTION
 FILE_STATE_REMOVE
 file #3, 465, 0
 [orig_h=141.142.228.5, orig_p=57262/tcp, resp_h=54.243.88.146, resp_p=80/tcp]
diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/a.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/a.out
index f8f2538e92..077fb5282c 100644
--- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/a.out
+++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/a.out
@@ -3,6 +3,7 @@ file #0, 0, 0
 MIME_TYPE
 application/pdf
 FILE_OVER_NEW_CONNECTION
+FILE_OVER_NEW_CONNECTION
 FILE_STATE_REMOVE
 file #0, 555523, 0
 [orig_h=10.101.84.70, orig_p=10978/tcp, resp_h=129.174.93.161, resp_p=80/tcp]
diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/b.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/b.out
index b2a0cb66a2..9c05f311f3 100644
--- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/b.out
+++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/b.out
@@ -2,6 +2,7 @@ FILE_NEW
 file #0, 0, 0
 MIME_TYPE
 application/x-dosexec
+FILE_OVER_NEW_CONNECTION
 FILE_STATE_REMOVE
 file #0, 1022920, 0
 [orig_h=192.168.72.14, orig_p=3254/tcp, resp_h=65.54.95.206, resp_p=80/tcp]
@@ -11,6 +12,7 @@ FILE_NEW
 file #1, 0, 0
 MIME_TYPE
 application/octet-stream
+FILE_OVER_NEW_CONNECTION
 FILE_TIMEOUT
 FILE_STATE_REMOVE
 file #1, 206024, 0
diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/c.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/c.out
index 7c5e9dfeca..d85a9de314 100644
--- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/c.out
+++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/c.out
@@ -3,6 +3,7 @@ file #0, 0, 0
 MIME_TYPE
 application/octet-stream
 FILE_OVER_NEW_CONNECTION
+FILE_OVER_NEW_CONNECTION
 FILE_STATE_REMOVE
 file #0, 498702, 0
 [orig_h=10.45.179.94, orig_p=19950/tcp, resp_h=129.174.93.170, resp_p=80/tcp]
diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.pipeline/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.pipeline/out
index 02ac2f0a7e..b85485cd1a 100644
--- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.pipeline/out
+++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.pipeline/out
@@ -4,6 +4,7 @@ FILE_BOF_BUFFER
 /*^J********
 MIME_TYPE
 text/plain
+FILE_OVER_NEW_CONNECTION
 FILE_STATE_REMOVE
 file #0, 2675, 0
 [orig_h=192.168.1.104, orig_p=1673/tcp, resp_h=63.245.209.11, resp_p=80/tcp]
@@ -17,6 +18,7 @@ FILE_BOF_BUFFER
 //-- Google
 MIME_TYPE
 text/plain
+FILE_OVER_NEW_CONNECTION
 FILE_STATE_REMOVE
 file #1, 21421, 0
 [orig_h=192.168.1.104, orig_p=1673/tcp, resp_h=63.245.209.11, resp_p=80/tcp]
@@ -30,6 +32,7 @@ FILE_BOF_BUFFER
 GIF89a^D\0^D\0\xb3
 MIME_TYPE
 image/gif
+FILE_OVER_NEW_CONNECTION
 FILE_STATE_REMOVE
 file #2, 94, 0
 [orig_h=192.168.1.104, orig_p=1673/tcp, resp_h=63.245.209.11, resp_p=80/tcp]
@@ -44,6 +47,7 @@ FILE_BOF_BUFFER
 \x89PNG^M^J^Z^J\0\0\0
 MIME_TYPE
 image/png
+FILE_OVER_NEW_CONNECTION
 FILE_STATE_REMOVE
 file #3, 2349, 0
 [orig_h=192.168.1.104, orig_p=1673/tcp, resp_h=63.245.209.11, resp_p=80/tcp]
@@ -58,6 +62,7 @@ FILE_BOF_BUFFER
 \x89PNG^M^J^Z^J\0\0\0
 MIME_TYPE
 image/png
+FILE_OVER_NEW_CONNECTION
 FILE_STATE_REMOVE
 file #4, 27579, 0
 [orig_h=192.168.1.104, orig_p=1673/tcp, resp_h=63.245.209.11, resp_p=80/tcp]
diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.post/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.post/out
index 3103ecb39e..cedc396254 100644
--- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.post/out
+++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.post/out
@@ -4,6 +4,7 @@ FILE_BOF_BUFFER
 hello world
 MIME_TYPE
 text/plain
+FILE_OVER_NEW_CONNECTION
 FILE_STATE_REMOVE
 file #0, 11, 0
 [orig_h=141.142.228.5, orig_p=53595/tcp, resp_h=54.243.55.129, resp_p=80/tcp]
@@ -18,6 +19,7 @@ FILE_BOF_BUFFER
 {^J  "origin
 MIME_TYPE
 text/plain
+FILE_OVER_NEW_CONNECTION
 FILE_STATE_REMOVE
 file #1, 366, 0
 [orig_h=141.142.228.5, orig_p=53595/tcp, resp_h=54.243.55.129, resp_p=80/tcp]
diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/file_analysis.log b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/file_analysis.log
deleted file mode 100644
index f95a70d50a..0000000000
--- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/file_analysis.log
+++ /dev/null
@@ -1,10 +0,0 @@
-#separator \x09
-#set_separator	,
-#empty_field	(empty)
-#unset_field	-
-#path	file_analysis
-#open	2013-06-07-18-51-45
-#fields	id	parent_id	source	is_orig	last_active	seen_bytes	total_bytes	missing_bytes	overflow_bytes	timeout_interval	bof_buffer_size	mime_type	timedout	conn_uids	extracted_files	md5	sha1	sha256
-#types	string	string	string	bool	time	count	count	count	count	interval	count	string	bool	table[string]	table[string]	string	string	string
-BYYd1GSNX5c	-	HTTP	F	1362692527.009775	4705	4705	0	0	120.000000	1024	text/plain	F	UWkUyAuUGXf	BYYd1GSNX5c-file	397168fd09991a0e712254df7bc639ac	1dd7ac0398df6cbc0696445a91ec681facf4dc47	4e7c7ef0984119447e743e3ec77e1de52713e345cde03fe7df753a35849bed18
-#close	2013-06-07-18-51-46
diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/files.log b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/files.log
new file mode 100644
index 0000000000..2663184b88
--- /dev/null
+++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/files.log
@@ -0,0 +1,10 @@
+#separator \x09
+#set_separator	,
+#empty_field	(empty)
+#unset_field	-
+#path	files
+#open	2013-07-25-16-57-31
+#fields	ts	fuid	tx_hosts	rx_hosts	conn_uids	source	depth	analyzers	mime_type	filename	duration	local_orig	is_orig	seen_bytes	total_bytes	missing_bytes	overflow_bytes	timedout	parent_fuid	md5	sha1	sha256	extracted
+#types	time	string	table[addr]	table[addr]	table[string]	string	count	table[string]	string	string	interval	bool	bool	count	count	count	count	bool	string	string	string	string	string
+1362692527.009721	G75mcAsU764	192.150.187.43	141.142.228.5	UWkUyAuUGXf	HTTP	0	SHA256,DATA_EVENT,MD5,EXTRACT,SHA1	text/plain	-	0.000054	-	F	4705	4705	0	0	F	-	397168fd09991a0e712254df7bc639ac	1dd7ac0398df6cbc0696445a91ec681facf4dc47	4e7c7ef0984119447e743e3ec77e1de52713e345cde03fe7df753a35849bed18	G75mcAsU764-file
+#close	2013-07-25-16-57-31
diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.smtp/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.smtp/out
index ac4e6e50fa..57f1f97b9c 100644
--- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.smtp/out
+++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.smtp/out
@@ -4,6 +4,7 @@ FILE_BOF_BUFFER
 Hello^M^J^M^J ^M
 MIME_TYPE
 text/plain
+FILE_OVER_NEW_CONNECTION
 FILE_STATE_REMOVE
 file #0, 79, 0
 [orig_h=10.10.1.4, orig_p=1470/tcp, resp_h=74.53.140.153, resp_p=25/tcp]
@@ -17,6 +18,7 @@ FILE_BOF_BUFFER
 <html xmlns
 MIME_TYPE
 text/html
+FILE_OVER_NEW_CONNECTION
 FILE_STATE_REMOVE
 file #1, 1918, 0
 [orig_h=10.10.1.4, orig_p=1470/tcp, resp_h=74.53.140.153, resp_p=25/tcp]
@@ -30,6 +32,7 @@ FILE_BOF_BUFFER
 Version 4.9
 MIME_TYPE
 text/plain
+FILE_OVER_NEW_CONNECTION
 FILE_STATE_REMOVE
 file #2, 10823, 0
 [orig_h=10.10.1.4, orig_p=1470/tcp, resp_h=74.53.140.153, resp_p=25/tcp]
diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.ascii-escape-odd-url/http.log b/testing/btest/Baseline/scripts.base.frameworks.logging.ascii-escape-odd-url/http.log
index 026b25b161..a5331e8753 100644
--- a/testing/btest/Baseline/scripts.base.frameworks.logging.ascii-escape-odd-url/http.log
+++ b/testing/btest/Baseline/scripts.base.frameworks.logging.ascii-escape-odd-url/http.log
@@ -3,8 +3,8 @@
 #empty_field	(empty)
 #unset_field	-
 #path	http
-#open	2013-05-21-21-11-23
-#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	trans_depth	method	host	uri	referrer	user_agent	request_body_len	response_body_len	status_code	status_msg	info_code	info_msg	filename	tags	username	password	proxied	mime_type	md5	extracted_request_files	extracted_response_files
-#types	time	string	addr	port	addr	port	count	string	string	string	string	string	count	count	count	string	count	string	string	table[enum]	string	string	table[string]	string	string	vector[string]	vector[string]
-1315799856.264750	UWkUyAuUGXf	10.0.1.104	64216	193.40.5.162	80	1	GET	lepo.it.da.ut.ee	/~cect/teoreetilised seminarid_2010/arheoloogia_uurimisr\xfchma_seminar/Joyce et al - The Languages of Archaeology ~ Dialogue, Narrative and Writing.pdf	-	Wget/1.12 (darwin10.8.0)	0	346	404	Not Found	-	-	-	(empty)	-	-	-	text/html	-	-	-
-#close	2013-05-21-21-11-23
+#open	2013-07-25-18-54-41
+#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	trans_depth	method	host	uri	referrer	user_agent	request_body_len	response_body_len	status_code	status_msg	info_code	info_msg	filename	tags	username	password	proxied	orig_fuids	orig_mime_types	resp_fuids	resp_mime_types
+#types	time	string	addr	port	addr	port	count	string	string	string	string	string	count	count	count	string	count	string	string	table[enum]	string	string	table[string]	vector[string]	vector[string]	vector[string]	vector[string]
+1315799856.264750	UWkUyAuUGXf	10.0.1.104	64216	193.40.5.162	80	1	GET	lepo.it.da.ut.ee	/~cect/teoreetilised seminarid_2010/arheoloogia_uurimisr\xfchma_seminar/Joyce et al - The Languages of Archaeology ~ Dialogue, Narrative and Writing.pdf	-	Wget/1.12 (darwin10.8.0)	0	346	404	Not Found	-	-	-	(empty)	-	-	-	-	-	oijbBDPA7Ue	text/html
+#close	2013-07-25-18-54-41
diff --git a/testing/btest/Baseline/scripts.base.frameworks.notice.cluster/manager-1.notice.log b/testing/btest/Baseline/scripts.base.frameworks.notice.cluster/manager-1.notice.log
index e17610d69e..8f91bce3ae 100644
--- a/testing/btest/Baseline/scripts.base.frameworks.notice.cluster/manager-1.notice.log
+++ b/testing/btest/Baseline/scripts.base.frameworks.notice.cluster/manager-1.notice.log
@@ -3,8 +3,8 @@
 #empty_field	(empty)
 #unset_field	-
 #path	notice
-#open	2013-04-02-02-21-00
-#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	proto	note	msg	sub	src	dst	p	n	peer_descr	actions	suppress_for	dropped	remote_location.country_code	remote_location.region	remote_location.city	remote_location.latitude	remote_location.longitude
-#types	time	string	addr	port	addr	port	enum	enum	string	string	addr	addr	port	count	string	table[enum]	interval	bool	string	string	string	double	double
-1364869260.950557	-	-	-	-	-	-	Test_Notice	test notice!	-	-	-	-	-	worker-1	Notice::ACTION_LOG	3600.000000	F	-	-	-	-	-
-#close	2013-04-02-02-21-00
+#open	2013-07-25-20-02-37
+#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	fuid	file_mime_type	file_desc	proto	note	msg	sub	src	dst	p	n	peer_descr	actions	suppress_for	dropped	remote_location.country_code	remote_location.region	remote_location.city	remote_location.latitude	remote_location.longitude
+#types	time	string	addr	port	addr	port	string	string	string	enum	enum	string	string	addr	addr	port	count	string	table[enum]	interval	bool	string	string	string	double	double
+1374782557.074572	-	-	-	-	-	-	-	-	-	Test_Notice	test notice!	-	-	-	-	-	worker-1	Notice::ACTION_LOG	3600.000000	F	-	-	-	-	-
+#close	2013-07-25-20-02-37
diff --git a/testing/btest/Baseline/scripts.base.frameworks.notice.suppression-cluster/manager-1.notice.log b/testing/btest/Baseline/scripts.base.frameworks.notice.suppression-cluster/manager-1.notice.log
index c8b4306d22..0374dadc90 100644
--- a/testing/btest/Baseline/scripts.base.frameworks.notice.suppression-cluster/manager-1.notice.log
+++ b/testing/btest/Baseline/scripts.base.frameworks.notice.suppression-cluster/manager-1.notice.log
@@ -3,8 +3,8 @@
 #empty_field	(empty)
 #unset_field	-
 #path	notice
-#open	2013-04-02-02-21-29
-#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	proto	note	msg	sub	src	dst	p	n	peer_descr	actions	suppress_for	dropped	remote_location.country_code	remote_location.region	remote_location.city	remote_location.latitude	remote_location.longitude
-#types	time	string	addr	port	addr	port	enum	enum	string	string	addr	addr	port	count	string	table[enum]	interval	bool	string	string	string	double	double
-1364869289.545369	-	-	-	-	-	-	Test_Notice	test notice!	-	-	-	-	-	worker-2	Notice::ACTION_LOG	3600.000000	F	-	-	-	-	-
-#close	2013-04-02-02-21-32
+#open	2013-07-25-20-24-55
+#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	fuid	file_mime_type	file_desc	proto	note	msg	sub	src	dst	p	n	peer_descr	actions	suppress_for	dropped	remote_location.country_code	remote_location.region	remote_location.city	remote_location.latitude	remote_location.longitude
+#types	time	string	addr	port	addr	port	string	string	string	enum	enum	string	string	addr	addr	port	count	string	table[enum]	interval	bool	string	string	string	double	double
+1374783895.933003	-	-	-	-	-	-	-	-	-	Test_Notice	test notice!	-	-	-	-	-	worker-2	Notice::ACTION_LOG	3600.000000	F	-	-	-	-	-
+#close	2013-07-25-20-24-58
diff --git a/testing/btest/Baseline/scripts.base.frameworks.notice.suppression/notice.log b/testing/btest/Baseline/scripts.base.frameworks.notice.suppression/notice.log
index f7f4148548..075bd7ea0a 100644
--- a/testing/btest/Baseline/scripts.base.frameworks.notice.suppression/notice.log
+++ b/testing/btest/Baseline/scripts.base.frameworks.notice.suppression/notice.log
@@ -3,8 +3,8 @@
 #empty_field	(empty)
 #unset_field	-
 #path	notice
-#open	2013-02-11-18-32-39
-#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	proto	note	msg	sub	src	dst	p	n	peer_descr	actions	suppress_for	dropped	remote_location.country_code	remote_location.region	remote_location.city	remote_location.latitude	remote_location.longitude
-#types	time	string	addr	port	addr	port	enum	enum	string	string	addr	addr	port	count	string	table[enum]	interval	bool	string	string	string	double	double
-1360607559.193954	-	-	-	-	-	-	Test_Notice	test	-	-	-	-	-	bro	Notice::ACTION_LOG	3600.000000	F	-	-	-	-	-
-#close	2013-02-11-18-32-39
+#open	2013-07-25-18-56-00
+#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	fuid	file_mime_type	file_desc	proto	note	msg	sub	src	dst	p	n	peer_descr	actions	suppress_for	dropped	remote_location.country_code	remote_location.region	remote_location.city	remote_location.latitude	remote_location.longitude
+#types	time	string	addr	port	addr	port	string	string	string	enum	enum	string	string	addr	addr	port	count	string	table[enum]	interval	bool	string	string	string	double	double
+1374778560.016355	-	-	-	-	-	-	-	-	-	Test_Notice	test	-	-	-	-	-	bro	Notice::ACTION_LOG	3600.000000	F	-	-	-	-	-
+#close	2013-07-25-18-56-00
diff --git a/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-extract/conn.log b/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-extract/conn.log
deleted file mode 100644
index 52f7d90401..0000000000
--- a/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-extract/conn.log
+++ /dev/null
@@ -1,14 +0,0 @@
-#separator \x09
-#set_separator	,
-#empty_field	(empty)
-#unset_field	-
-#path	conn
-#open	2013-03-27-17-47-03
-#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	proto	service	duration	orig_bytes	resp_bytes	conn_state	local_orig	missed_bytes	history	orig_pkts	orig_ip_bytes	resp_pkts	resp_ip_bytes	tunnel_parents
-#types	time	string	addr	port	addr	port	enum	string	interval	count	count	string	bool	count	string	count	count	count	count	table[string]
-1329843175.736107	arKYeMETxOg	141.142.220.235	37604	199.233.217.249	56666	tcp	ftp-data	0.112432	0	342	SF	-	0	ShAdfFa	4	216	4	562	(empty)
-1329843179.871641	k6kgXLOoSKl	141.142.220.235	59378	199.233.217.249	56667	tcp	ftp-data	0.111218	0	77	SF	-	0	ShAdfFa	4	216	4	297	(empty)
-1329843194.151526	nQcgTWjvg4c	199.233.217.249	61920	141.142.220.235	33582	tcp	ftp-data	0.056211	342	0	SF	-	0	ShADaFf	5	614	3	164	(empty)
-1329843197.783443	j4u32Pc5bif	199.233.217.249	61918	141.142.220.235	37835	tcp	ftp-data	0.056005	77	0	SF	-	0	ShADaFf	5	349	3	164	(empty)
-1329843161.968492	UWkUyAuUGXf	141.142.220.235	50003	199.233.217.249	21	tcp	ftp	38.055625	180	3146	SF	-	0	ShAdDfFa	38	2164	25	4458	(empty)
-#close	2013-03-27-17-47-03
diff --git a/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-extract/extractions b/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-extract/extractions
deleted file mode 100644
index 1933de9992..0000000000
--- a/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-extract/extractions
+++ /dev/null
@@ -1,22 +0,0 @@
--rw-rw-r--  1 600   netbsd  50158695 Feb 21 03:10 ls-lRA.gz
--rw-rw-r--  1 600   netbsd  50158695 Feb 21 03:10 ls-lRA.gz
--rw-rw-r--  1 root  wheel         77 Aug 16  2009 robots.txt
--rw-rw-r--  1 root  wheel         77 Aug 16  2009 robots.txt
-Disallow: *.bz2
-Disallow: *.bz2
-Disallow: *.gz
-Disallow: *.gz
-Disallow: *.tbz
-Disallow: *.tbz
-Disallow: *.tgz
-Disallow: *.tgz
-User-agent: *
-User-agent: *
-drwxr-x--x  3 root  wheel        512 Aug 16  2009 etc
-drwxr-x--x  3 root  wheel        512 Aug 16  2009 etc
-drwxr-xr-x  7 root  wheel        512 Aug 20  2009 pub
-drwxr-xr-x  7 root  wheel        512 Aug 20  2009 pub
-lrwxrwxr-x  1 root  wheel         32 Aug 16  2009 .message -> pub/NetBSD/README.export-control
-lrwxrwxr-x  1 root  wheel         32 Aug 16  2009 .message -> pub/NetBSD/README.export-control
-total 98028
-total 98028
diff --git a/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-extract/ftp.log b/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-extract/ftp.log
deleted file mode 100644
index e77f59dc44..0000000000
--- a/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-extract/ftp.log
+++ /dev/null
@@ -1,21 +0,0 @@
-#separator \x09
-#set_separator	,
-#empty_field	(empty)
-#unset_field	-
-#path	ftp
-#open	2013-06-07-18-57-22
-#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	user	password	command	arg	mime_type	file_size	reply_code	reply_msg	tags	data_channel.passive	data_channel.orig_h	data_channel.resp_h	data_channel.resp_p	extraction_file
-#types	time	string	addr	port	addr	port	string	string	string	string	string	count	count	string	table[string]	bool	addr	addr	port	string
-1329843175.680248	UWkUyAuUGXf	141.142.220.235	50003	199.233.217.249	21	anonymous	test	PASV	-	-	-	227	Entering Passive Mode (199,233,217,249,221,90)	(empty)	T	141.142.220.235	199.233.217.249	56666	-
-1329843175.791528	UWkUyAuUGXf	141.142.220.235	50003	199.233.217.249	21	anonymous	test	LIST	-	-	-	226	Transfer complete.	(empty)	-	-	-	-	-
-1329843179.815947	UWkUyAuUGXf	141.142.220.235	50003	199.233.217.249	21	anonymous	test	PASV	-	-	-	227	Entering Passive Mode (199,233,217,249,221,91)	(empty)	T	141.142.220.235	199.233.217.249	56667	-
-1329843193.984222	arKYeMETxOg	141.142.220.235	37604	199.233.217.249	56666	<ftp-data>	-	-	-	-	-	-	-	(empty)	-	-	-	-	ftp-item-pVhQhhFsB2b.dat
-1329843193.984222	k6kgXLOoSKl	141.142.220.235	59378	199.233.217.249	56667	<ftp-data>	-	-	-	-	-	-	-	(empty)	-	-	-	-	ftp-item-fFCPkV1sEsc.dat
-1329843179.926563	UWkUyAuUGXf	141.142.220.235	50003	199.233.217.249	21	anonymous	test	RETR	ftp://199.233.217.249/./robots.txt	text/plain	77	226	Transfer complete.	(empty)	-	-	-	-	-
-1329843194.040188	UWkUyAuUGXf	141.142.220.235	50003	199.233.217.249	21	anonymous	test	PORT	141,142,220,235,131,46	-	-	200	PORT command successful.	(empty)	F	199.233.217.249	141.142.220.235	33582	-
-1329843194.095782	UWkUyAuUGXf	141.142.220.235	50003	199.233.217.249	21	anonymous	test	LIST	-	-	-	226	Transfer complete.	(empty)	-	-	-	-	-
-1329843197.672179	UWkUyAuUGXf	141.142.220.235	50003	199.233.217.249	21	anonymous	test	PORT	141,142,220,235,147,203	-	-	200	PORT command successful.	(empty)	F	199.233.217.249	141.142.220.235	37835	-
-1329843199.968212	nQcgTWjvg4c	199.233.217.249	61920	141.142.220.235	33582	<ftp-data>	-	-	-	-	-	-	-	(empty)	-	-	-	-	ftp-item-g3zS3MuJFh.dat
-1329843197.727769	UWkUyAuUGXf	141.142.220.235	50003	199.233.217.249	21	anonymous	test	RETR	ftp://199.233.217.249/./robots.txt	text/plain	77	226	Transfer complete.	(empty)	-	-	-	-	-
-1329843200.079930	j4u32Pc5bif	199.233.217.249	61918	141.142.220.235	37835	<ftp-data>	-	-	-	-	-	-	-	(empty)	-	-	-	-	ftp-item-lMf4UWRkEO5.dat
-#close	2013-06-07-18-57-22
diff --git a/testing/btest/Baseline/scripts.base.protocols.http.100-continue/http.log b/testing/btest/Baseline/scripts.base.protocols.http.100-continue/http.log
index edbee28991..a81c0d4a2d 100644
--- a/testing/btest/Baseline/scripts.base.protocols.http.100-continue/http.log
+++ b/testing/btest/Baseline/scripts.base.protocols.http.100-continue/http.log
@@ -3,8 +3,8 @@
 #empty_field	(empty)
 #unset_field	-
 #path	http
-#open	2013-05-21-21-11-24
-#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	trans_depth	method	host	uri	referrer	user_agent	request_body_len	response_body_len	status_code	status_msg	info_code	info_msg	filename	tags	username	password	proxied	mime_type	md5	extracted_request_files	extracted_response_files
-#types	time	string	addr	port	addr	port	count	string	string	string	string	string	count	count	count	string	count	string	string	table[enum]	string	string	table[string]	string	string	vector[string]	vector[string]
-1237440095.634312	UWkUyAuUGXf	192.168.3.103	54102	128.146.216.51	80	1	POST	www.osu.edu	/	-	curl/7.17.1 (i386-apple-darwin8.11.1) libcurl/7.17.1 zlib/1.2.3	2001	60731	200	OK	100	Continue	-	(empty)	-	-	-	text/html	-	-	-
-#close	2013-05-21-21-11-24
+#open	2013-07-25-19-39-08
+#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	trans_depth	method	host	uri	referrer	user_agent	request_body_len	response_body_len	status_code	status_msg	info_code	info_msg	filename	tags	username	password	proxied	orig_fuids	orig_mime_types	resp_fuids	resp_mime_types
+#types	time	string	addr	port	addr	port	count	string	string	string	string	string	count	count	count	string	count	string	string	table[enum]	string	string	table[string]	vector[string]	vector[string]	vector[string]	vector[string]
+1237440095.634312	UWkUyAuUGXf	192.168.3.103	54102	128.146.216.51	80	1	POST	www.osu.edu	/	-	curl/7.17.1 (i386-apple-darwin8.11.1) libcurl/7.17.1 zlib/1.2.3	2001	60731	200	OK	100	Continue	-	(empty)	-	-	-	8TXBHVmBGD7	text/plain	ATGo7hdUXdi	text/html
+#close	2013-07-25-19-39-08
diff --git a/testing/btest/Baseline/scripts.base.protocols.http.http-extract-files/http-item.dat b/testing/btest/Baseline/scripts.base.protocols.http.http-extract-files/http-item.dat
deleted file mode 100644
index 73c369dd14..0000000000
--- a/testing/btest/Baseline/scripts.base.protocols.http.http-extract-files/http-item.dat
+++ /dev/null
@@ -1,304 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
-"http://www.w3.org/TR/REC-html40/loose.dtd">
-<HEAD><TITLE>ICIR</TITLE></HEAD>
-<BODY bgcolor="#ffffff" text="#000000" link="#0000ff" vlink="#b20000">
-<img src=icir.gif alt="ICIR"><br>
-<p>
-ICIR (The ICSI Center for Internet Research)
-is a 
-non-profit
-research institute at
-<a href="http://www.icsi.berkeley.edu">ICSI</a>
-in 
-<a href="http://dir.yahoo.com/Regional/U_S__States/California/Cities/Berkeley/">Berkeley</a>, 
-California.<br>
-For the three years from 1999 to 2001 we were named
-ACIRI, the AT&T Center for Internet Research at ICSI, 
-and were funded by <a href="http://www.att.com">AT&amp;T</a>.<br>
-
-The goals of ICIR are to:
-<ul>
-<li>Pursue research on the Internet architecture and related networking issues,
-<li>
-Participate actively in the research (<a href="http://www.acm.org/sigcomm/">SIGCOMM</a> and <a href="http://www.irtf.org/">IRTF</a>) and
-standards (<a href="http://www.ietf.org/">IETF</a>) communities,
-<li> Bridge the gap between the Internet research community and commercial 
-interests by providing a neutral forum where topics of mutual technical 
-interest can be addressed.
-</ul>
-<p>
-<!--
-ICIR is now 
-<a href="jobs.html">
-hiring</a> for both postdoctoral positions and summer interns.
--->
-<hr>
-
-<DIV ALIGN="CENTER">
-
-<table width="100%" cellspacing=16 cellpadding=0>
-
-<tr>
-<td width="35%" valign=top>
-
-<h2>
-People
-</h2>
-<ul>
-<li>
-<a href="./shenker/">
-Scott Shenker</a>, Group Leader<br> 
-<li><a href="http://www.icir.org/mallman/">Mark Allman</a>
-<li>
-<a href="./floyd/">Sally Floyd</a>
-<!--
-<li><a href="http://www.isi.edu/~govindan/">Ramesh Govindan</a>
--->
-<li>
-<a href="./karp/papers.html">
-Richard Karp</a>  
-<!-- (also with the 
-<a href="http://www.icsi.berkeley.edu/Theory/">ICSI Theory Group</a>, 
-<a href="http://www.msri.org/">MSRI</a>, and
-<a href="http://www.cs.berkeley.edu/">UC Berkeley</a>) -->
-<li>
-<a href="./vern/">
-Vern Paxson</a>  
-<li>
-<a href="http://www.icir.org/robin/">
-Robin Sommer</a>
-<li>
-<a href="http://www.cs.berkeley.edu/~nweaver/">
-Nicholas Weaver</a>
-<li>
-<a href="http://www.icsi.berkeley.edu/~zhao/">
-Jerry Zhao</a>
-<!-- </ul> &nbsp; &nbsp;<b>Group Members</b> <ul> -->
-<li><b><a href="pastvisitors.html">Past Group Members</a></b>,
-<br>including:
-<ul>
-<li>
-<a href="http://www.cs.ucl.ac.uk/staff/M.Handley/">
-Mark Handley</a> (UCL)
-<li><a href="./kohler/">Eddie Kohler</a> (UCLA)
-</ul>
-<li><b>Affiliated <a href="http://www.xorp.org/">Xorp</a>
-Researchers</b>:
-  <ul>
-  <li><a href="./jcardona/">Javier Cardona</a>
-  <li><a href="./atanu/">Atanu Ghosh</a> 
-  <li><a href="./hodson/">Orion Hodson</a>
-  <li><a href="./pavlin/">Pavlin Radoslavov</a> 
-  <li><a href="http://www.iet.unipi.it/~luigi">Luigi Rizzo</a>
-  <li><a href="http://people.freebsd.org/~bms/">Bruce Simpson</a>
-</ul>
-<li><b>Affiliated UCB Researchers</b>:
-  <ul>
-  <li><a href="http://www.cs.berkeley.edu/~christos/">Christos Papadimitriou</a>
-  <li><a href="http://www.cs.berkeley.edu/~istoica/">Ion Stoica</a>
-  </ul>
-<li><b>Visitors</b>:
-  <ul>
-  <li><a href="http://grid.sjtu.edu.cn/teachers/dengqn/dengqn.htm">Professor Quin-Ni Deng</a>
-<!--
-  from Shanghai Jiaotong University
--->
-  <li>Teemu Koponen
-<!--
-  , Helsinki Institute for Information Technology
--->
-  </ul>
-<!--
-<li><a href="pastvisitors.html">Other researchers</a>
--->
-<a name=Visitors></a>
-<li><b>Interns:</b>
-<ul>
-<li>Juan Caballero
-<li><a href="http://www.stanford.edu/~casado/">Martin Casado</a>
-<li><a href="http://www.cs.rice.edu/~scrosby/">Scott Crosby</a>
-<li><a href="http://bnrg.cs.berkeley.edu/~wdc/">Weidong Cui</a>
-<li><a href="http://www.cs.berkeley.edu/~chema">Chema Gonzalez</a>
-<li>Halldor Isak Gylfason
-<li><a href="http://www.cl.cam.ac.uk/~cpk25/">Christian Kreibich</a>
-<li><a href="http://www.cs.ucsd.edu/~braghava">Barath Raghavan</a>
-<!--
-<li><a href="newinterns.html">New Interns:</a> 
--->
-</ul>
-<li><b>Undergraduate Interns:</b>
-<ul>
-<li>Michael Hoisie
-<li>Arthur Wayne Liao
-<li>Christopher Portka
-</ul>
-<li><b><a href="pastvisitors.html">Past Visitors and Interns:</a></b>
-</ul>
-
-</td>
-<td width="30%" valign=top>
-
-<h2>
-Publications
-</h2>
-<ul>
-<li><a href="./rfcs.html">
-RFCs</a> with ICIR authors.
-<li>
-<a href="./internetdrafts.html">
-Internet drafts</a> with ICIR authors, 3/2004 
-(or <a href="http://www.rfc-editor.org/idsearch.html">search</a>
-the current list).
-<!--
-for "Shenker OR Floyd OR Allman OR Paxson".
-(or the 
-<a
-href="
-http://search.ietf.org:80/search/cgi-bin/BrokerQuery.pl.cgi?broker=internet-drafts&query=%28Author%3A+Shenker+OR+Floyd+OR+Handley+OR+Paxson+OR+Kohler%29&caseflag=on&wordflag=off&errorflag=0&maxlineflag=50&maxresultflag=1000&descflag=on&sort=by-NML&verbose=on&maxobjflag=25">current list</a>.)
-
-http://search.ietf.org:80/search/cgi-bin/BrokerQuery.pl.cgi?broker=internet-drafts&query=(Shenker+OR+Floyd+OR+Handley+OR+Paxson+OR+Kohler)&descflag=on">current list</a>).
--->
-<!--
-from the 
-<a href="http://search.ietf.org/search/brokers/internet-drafts/query.html">
-Internet-Drafts Search Engine</a>).
--->
-<li>Papers by 
-<a href="./shenker/papers.html">Scott Shenker</a>
-(<a
-href="http://citeseer.ist.psu.edu/cs?qb=dbnum%3D1%2Cqtype%3Dcitation:&q=Scott%20w/2%20Shenker%20or%20S%20w/2%20Shenker&co=Citations">RI</a>),
-
-<a href="./mallman/papers/">Mark Allman</a>
-(<a
-href="http://citeseer.ist.psu.edu/cs?qb=dbnum%3D1%2Cqtype%3Dcitation:&q=Mark%20w/2%20Allman%20or%20S%20w/2%20Allman&co=Citations">RI</a>),
-
-<a href="./floyd/papers.html">Sally Floyd</a>
-(<a
-href="http://citeseer.ist.psu.edu/cs?qb=dbnum%3D1%2Cqtype%3Dcitation:&q=Sally%20w/2%20Floyd%20or%20S%20w/2%20Floyd&co=Citations">RI</a>),
-
-<a href="./karp/papers.html">Richard Karp</a>
-(<a
-href="http://citeseer.ist.psu.edu/cs?qb=dbnum%3D1%2Cqtype%3Dcitation:&q=Richard%20w/2%20Karp%20or%20R%20w/2%20Karp&co=Citations">RI</a>),
-<a href="./kohler/pubs/">Eddie Kohler</a>
-(<a
-href="http://citeseer.ist.psu.edu/cs?qb=dbnum%3D1%2Cqtype%3Dcitation:&q=eddie%20w/2%20kohler%20or%20e%20w/2%20kohler&co=Citations">RI</a>),
-<a href="./vern/papers.html">Vern Paxson</a>
-(<a
-href="http://citeseer.ist.psu.edu/cs?qb=dbnum%3D1%2Cqtype%3Dcitation:&q=Vern%20w/2%20Paxson%20or%20V%20w/2%20Paxson&co=Citations">RI</a>).
-<li>The <a href="http://citeseer.ist.psu.edu/">
-ResearchIndex</a> (RI) and the 
-<a href="http://citeseer.ist.psu.edu/cs">Search</a>
-and 
-<a href="http://citeseer.ist.psu.edu/Networking/">
-Networking</a> pages. 
-</ul>
-
-<h2>
-Projects 
-</h2>
-<ul>
-<li>
-<a href="./vern/bro-info.html">Bro</a>
-(detecting network intruders). 
-<li>The <a href="http://www.isi.edu/newarch/">NewArch</a> Project:
-Future-Generation Internet Architecture.
-<LI>The <a href="http://www.isi.edu/nsnam/ns/">NS</a>
-network simulator.
-<li> <a href="./tbit/">TBIT</a>
-(TCP Behavior Identification Tool).
-<li> <a href="http://www.xorp.org/">Xorp</a>
-(Extensible Open Router Platform).
-<li>
-<a href="./funded_projects.html">
-Other Funded Projects</a>.
-<li>
-<a href="./research.html">
-Additional Research Links</a>.
-</ul>
-
-
-</td>
-
-<td width="35%" valign=top>
-    
-<h2>Research</h2>
-&nbsp; &nbsp;<b>Transport and Congestion</b>
-<ul>
-<li>
-<a href="./kohler/dcp/">DCCP</a>
-(Datagram Congestion Control Protocol).
-<li>
-<a href="./floyd/ecn.html">ECN</a>
-(Explicit Congestion Notification).
-<li>
-<a href="http://www.ietf.org/html.charters/intserv-charter.html">
-Integrated services</a>.
-<li>
-<a href="./floyd/red.html">RED</a> 
-queue management, and
-<a href="./red-pd/">RED-PD</a>.
-<li>
-<a href="./floyd/hstcp.html">HighSpeed TCP</a>.
-<li>
-<a
-href="http://www.ietf.cnri.reston.va.us/html.charters/OLD/tcpimpl-charter.html">
-TCP Implementation</a>.
-<li>
-Reordering-Robust TCP 
-(<a href="./bkarp/RR-TCP/">RR-TCP</a>).
-<li>TCP
-<a href="./floyd/sacks.html">SACK</a> 
-(Selective Acknowledgment).
-<li>
-<a href="./tfrc/">TFRC</a> 
-(TCP-Friendly Rate Control).
-</ul>
-
-&nbsp; &nbsp;<b>Traffic and Topology</b>
-<ul>
-<LI>
-<a href="http://idmaps.eecs.umich.edu/">IDMaps</a> 
-(Internet Distance Mapping).
-<LI>The <a href="http://www.acm.org/sigcomm/ITA/">
-Internet Traffic Archive</a>.
-<li>
-<a href="http://www-net.cs.umass.edu/minc/">MINC</a>
-(Multicast-based Inference of Network-internal Characteristics).
-<li>
-<a href="http://www.psc.edu/networking/nimi/">NIMI</a>
-(National Internet Measurement Infrastructure).
-</ul>
-
-<h2>
-<a href="./collaborators.html">
-Collaborators</a>
-</h2>
-
-<!--
-&nbsp; &nbsp;<b>Multicast and Multimedia</b>
-<ul>
-<LI><A href="/malloc/">MALLOC</a>
-(Multicast Address Allocation).
-<LI><a href="http://www.cs.columbia.edu/~hgs/sip/">SIP</a>
-(Session Initiation Protocol).
-<li> <a href="yoid"> Yoid</a> (host-based content distribution). 
-</ul>
--->
-
-</td>
-
-</tr>
-</table>
-</DIV>
-
-<hr>
-<h2>Information for <a href="./abouticir.html">visitors</a> and <a href="/sysdocs/">local users</a>.</h2>
-<hr>
-Last modified: June 2004. <a href="./COPYRIGHTS">Copyright notice</a>.
-<a href="http://web.archive.org/web/*/http://www.aciri.org/">
-Older versions</a> of this web page, in its ACIRI incarnation..
-<BR>
-For more information about this server, mail <I>www@aciri.org</I>.  
-<BR>
-To report <a href="scanning.html">unusual activity</a> by any of our hosts, mail <I>abuse@aciri.org</I>.
-</BODY>
diff --git a/testing/btest/Baseline/scripts.base.protocols.http.http-extract-files/http.log b/testing/btest/Baseline/scripts.base.protocols.http.http-extract-files/http.log
deleted file mode 100644
index 53b80e5e9e..0000000000
--- a/testing/btest/Baseline/scripts.base.protocols.http.http-extract-files/http.log
+++ /dev/null
@@ -1,10 +0,0 @@
-#separator \x09
-#set_separator	,
-#empty_field	(empty)
-#unset_field	-
-#path	http
-#open	2013-06-07-19-04-27
-#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	trans_depth	method	host	uri	referrer	user_agent	request_body_len	response_body_len	status_code	status_msg	info_code	info_msg	filename	tags	username	password	proxied	mime_type	md5	extracted_request_files	extracted_response_files
-#types	time	string	addr	port	addr	port	count	string	string	string	string	string	count	count	count	string	count	string	string	table[enum]	string	string	table[string]	string	string	vector[string]	vector[string]
-1128727435.634189	arKYeMETxOg	141.42.64.125	56730	125.190.109.199	80	1	GET	www.icir.org	/	-	Wget/1.10	0	9130	200	OK	-	-	-	(empty)	-	-	-	text/html	-	-	http-item-54zlJFqn0x6.dat
-#close	2013-06-07-19-04-27
diff --git a/testing/btest/Baseline/scripts.base.protocols.http.http-methods/http.log b/testing/btest/Baseline/scripts.base.protocols.http.http-methods/http.log
index 54a75f4697..674e355631 100644
--- a/testing/btest/Baseline/scripts.base.protocols.http.http-methods/http.log
+++ b/testing/btest/Baseline/scripts.base.protocols.http.http-methods/http.log
@@ -3,56 +3,56 @@
 #empty_field	(empty)
 #unset_field	-
 #path	http
-#open	2013-05-21-21-11-25
-#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	trans_depth	method	host	uri	referrer	user_agent	request_body_len	response_body_len	status_code	status_msg	info_code	info_msg	filename	tags	username	password	proxied	mime_type	md5	extracted_request_files	extracted_response_files
-#types	time	string	addr	port	addr	port	count	string	string	string	string	string	count	count	count	string	count	string	string	table[enum]	string	string	table[string]	string	string	vector[string]	vector[string]
-1354328870.191989	UWkUyAuUGXf	128.2.6.136	46562	173.194.75.103	80	1	OPTIONS	www.google.com	*	-	-	0	962	405	Method Not Allowed	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328874.237327	arKYeMETxOg	128.2.6.136	46563	173.194.75.103	80	1	OPTIONS	www.google.com	HTTP/1.1	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328874.299063	k6kgXLOoSKl	128.2.6.136	46564	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328874.342591	nQcgTWjvg4c	128.2.6.136	46565	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328874.364020	j4u32Pc5bif	128.2.6.136	46566	173.194.75.103	80	1	GET	www.google.com	/	-	-	0	43911	200	OK	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328878.470424	TEfuqmmG4bh	128.2.6.136	46567	173.194.75.103	80	1	GET	www.google.com	/	-	-	0	43983	200	OK	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328882.575456	FrJExwHcSal	128.2.6.136	46568	173.194.75.103	80	1	GET	www.google.com	/HTTP/1.1	-	-	0	1207	403	Forbidden	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328882.928027	5OKnoww6xl4	128.2.6.136	46569	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328882.968948	3PKsZ2Uye21	128.2.6.136	46570	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328882.990373	VW0XPVINV8a	128.2.6.136	46571	173.194.75.103	80	1	GET	www.google.com	/	-	-	0	43913	200	OK	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328887.114613	fRFu0wcOle6	128.2.6.136	46572	173.194.75.103	80	0	-	-	-	-	-	0	961	405	Method Not Allowed	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328891.161077	qSsw6ESzHV4	128.2.6.136	46573	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328891.204740	iE6yhOq3SF	128.2.6.136	46574	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328891.245592	GSxOnSLghOa	128.2.6.136	46575	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328891.287655	qCaWGmzFtM5	128.2.6.136	46576	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328891.309065	70MGiRM1Qf4	128.2.6.136	46577	173.194.75.103	80	1	CCM_POST	www.google.com	/	-	-	0	963	405	Method Not Allowed	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328895.355012	h5DsfNtYzi1	128.2.6.136	46578	173.194.75.103	80	1	CCM_POST	www.google.com	/HTTP/1.1	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328895.416133	P654jzLoe3a	128.2.6.136	46579	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328895.459490	Tw8jXtpTGu6	128.2.6.136	46580	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328895.480865	c4Zw9TmAE05	128.2.6.136	46581	173.194.75.103	80	1	CCM_POST	www.google.com	/	-	-	0	963	405	Method Not Allowed	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328899.526682	EAr0uf4mhq	128.2.6.136	46582	173.194.75.103	80	1	CONNECT	www.google.com	/	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328903.572533	GvmoxJFXdTa	128.2.6.136	46583	173.194.75.103	80	1	CONNECT	www.google.com	/HTTP/1.1	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328903.634196	0Q4FH8sESw5	128.2.6.136	46584	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328903.676395	slFea8xwSmb	128.2.6.136	46585	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328903.697693	UfGkYA2HI2g	128.2.6.136	46586	173.194.75.103	80	1	CONNECT	www.google.com	/	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328907.743696	i2rO3KD1Syg	128.2.6.136	46587	173.194.75.103	80	1	TRACE	www.google.com	/	-	-	0	960	405	Method Not Allowed	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328911.790590	2cx26uAvUPl	128.2.6.136	46588	173.194.75.103	80	1	TRACE	www.google.com	/HTTP/1.1	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328911.853464	BWaU4aSuwkc	128.2.6.136	46589	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328911.897044	10XodEwRycf	128.2.6.136	46590	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328911.918511	zno26fFZkrh	128.2.6.136	46591	173.194.75.103	80	1	TRACE	www.google.com	/	-	-	0	960	405	Method Not Allowed	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328915.964678	v5rgkJBig5l	128.2.6.136	46592	173.194.75.103	80	1	DELETE	www.google.com	/	-	-	0	961	405	Method Not Allowed	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328920.010458	eWZCH7OONC1	128.2.6.136	46593	173.194.75.103	80	1	DELETE	www.google.com	/HTTP/1.1	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328920.072101	0Pwk3ntf8O3	128.2.6.136	46594	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328920.114526	0HKorjr8Zp7	128.2.6.136	46595	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328920.136714	yC2d6kVg709	128.2.6.136	46596	173.194.75.103	80	1	DELETE	www.google.com	/	-	-	0	961	405	Method Not Allowed	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328924.183211	VcgagLjnO92	128.2.6.136	46597	173.194.75.103	80	1	PUT	www.google.com	/	-	-	0	934	411	Length Required	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328924.224567	bdRoHfaPBo3	128.2.6.136	46598	173.194.75.103	80	1	PUT	www.google.com	/HTTP/1.1	-	-	0	934	411	Length Required	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328924.287402	zHqb7t7kv28	128.2.6.136	46599	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328924.328257	rrZWoMUQpv8	128.2.6.136	46600	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328924.350343	xNYSS2hJkle	128.2.6.136	46601	173.194.75.103	80	1	PUT	www.google.com	/	-	-	0	934	411	Length Required	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328924.391728	vMVjlplKKbd	128.2.6.136	46602	173.194.75.103	80	1	POST	www.google.com	/	-	-	0	934	411	Length Required	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328924.433150	3omNawSNrxj	128.2.6.136	46603	173.194.75.103	80	1	POST	www.google.com	/HTTP/1.1	-	-	0	934	411	Length Required	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328924.496732	Rv8AJVfi9Zi	128.2.6.136	46604	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328924.537671	wEyF3OvvcQe	128.2.6.136	46605	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	text/html	-	-	-
+#open	2013-07-25-19-41-27
+#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	trans_depth	method	host	uri	referrer	user_agent	request_body_len	response_body_len	status_code	status_msg	info_code	info_msg	filename	tags	username	password	proxied	orig_fuids	orig_mime_types	resp_fuids	resp_mime_types
+#types	time	string	addr	port	addr	port	count	string	string	string	string	string	count	count	count	string	count	string	string	table[enum]	string	string	table[string]	vector[string]	vector[string]	vector[string]	vector[string]
+1354328870.191989	UWkUyAuUGXf	128.2.6.136	46562	173.194.75.103	80	1	OPTIONS	www.google.com	*	-	-	0	962	405	Method Not Allowed	-	-	-	(empty)	-	-	-	-	-	VTrFjxi3V27	text/html
+1354328874.237327	arKYeMETxOg	128.2.6.136	46563	173.194.75.103	80	1	OPTIONS	www.google.com	HTTP/1.1	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	-	-	jeoiUX9q8v9	text/html
+1354328874.299063	k6kgXLOoSKl	128.2.6.136	46564	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	-	-	6dL7NPgFhil	text/html
+1354328874.342591	nQcgTWjvg4c	128.2.6.136	46565	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	-	-	cix6gzDRCob	text/html
+1354328874.364020	j4u32Pc5bif	128.2.6.136	46566	173.194.75.103	80	1	GET	www.google.com	/	-	-	0	43911	200	OK	-	-	-	(empty)	-	-	-	-	-	tCZHDKUkBdi	text/html
+1354328878.470424	TEfuqmmG4bh	128.2.6.136	46567	173.194.75.103	80	1	GET	www.google.com	/	-	-	0	43983	200	OK	-	-	-	(empty)	-	-	-	-	-	iVzFNTeQnnc	text/html
+1354328882.575456	FrJExwHcSal	128.2.6.136	46568	173.194.75.103	80	1	GET	www.google.com	/HTTP/1.1	-	-	0	1207	403	Forbidden	-	-	-	(empty)	-	-	-	-	-	boBAqw2JcFi	text/html
+1354328882.928027	5OKnoww6xl4	128.2.6.136	46569	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	-	-	r3w183FJvW3	text/html
+1354328882.968948	3PKsZ2Uye21	128.2.6.136	46570	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	-	-	bncugeoItlf	text/html
+1354328882.990373	VW0XPVINV8a	128.2.6.136	46571	173.194.75.103	80	1	GET	www.google.com	/	-	-	0	43913	200	OK	-	-	-	(empty)	-	-	-	-	-	NkYD5vo8Gy	text/html
+1354328887.114613	fRFu0wcOle6	128.2.6.136	46572	173.194.75.103	80	0	-	-	-	-	-	0	961	405	Method Not Allowed	-	-	-	(empty)	-	-	-	-	-	S85THffBTLh	text/html
+1354328891.161077	qSsw6ESzHV4	128.2.6.136	46573	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	-	-	2m6kUZZS0wd	text/html
+1354328891.204740	iE6yhOq3SF	128.2.6.136	46574	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	-	-	UoqtpOgJZSk	text/html
+1354328891.245592	GSxOnSLghOa	128.2.6.136	46575	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	-	-	mqs8p4wwsS7	text/html
+1354328891.287655	qCaWGmzFtM5	128.2.6.136	46576	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	-	-	S36eCQJUY5k	text/html
+1354328891.309065	70MGiRM1Qf4	128.2.6.136	46577	173.194.75.103	80	1	CCM_POST	www.google.com	/	-	-	0	963	405	Method Not Allowed	-	-	-	(empty)	-	-	-	-	-	LeNRDWYrpS7	text/html
+1354328895.355012	h5DsfNtYzi1	128.2.6.136	46578	173.194.75.103	80	1	CCM_POST	www.google.com	/HTTP/1.1	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	-	-	ZwKUASlWzYk	text/html
+1354328895.416133	P654jzLoe3a	128.2.6.136	46579	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	-	-	uj62KNQhsG3	text/html
+1354328895.459490	Tw8jXtpTGu6	128.2.6.136	46580	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	-	-	taBxWzrYquk	text/html
+1354328895.480865	c4Zw9TmAE05	128.2.6.136	46581	173.194.75.103	80	1	CCM_POST	www.google.com	/	-	-	0	963	405	Method Not Allowed	-	-	-	(empty)	-	-	-	-	-	bHBxZULKI0k	text/html
+1354328899.526682	EAr0uf4mhq	128.2.6.136	46582	173.194.75.103	80	1	CONNECT	www.google.com	/	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	-	-	t6k8zHaGZk5	text/html
+1354328903.572533	GvmoxJFXdTa	128.2.6.136	46583	173.194.75.103	80	1	CONNECT	www.google.com	/HTTP/1.1	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	-	-	c11un7ZO6nc	text/html
+1354328903.634196	0Q4FH8sESw5	128.2.6.136	46584	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	-	-	iWCHzW5XJWk	text/html
+1354328903.676395	slFea8xwSmb	128.2.6.136	46585	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	-	-	dzvHktkjD9a	text/html
+1354328903.697693	UfGkYA2HI2g	128.2.6.136	46586	173.194.75.103	80	1	CONNECT	www.google.com	/	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	-	-	vEO9iYqh3Zc	text/html
+1354328907.743696	i2rO3KD1Syg	128.2.6.136	46587	173.194.75.103	80	1	TRACE	www.google.com	/	-	-	0	960	405	Method Not Allowed	-	-	-	(empty)	-	-	-	-	-	8seYaeRVuV2	text/html
+1354328911.790590	2cx26uAvUPl	128.2.6.136	46588	173.194.75.103	80	1	TRACE	www.google.com	/HTTP/1.1	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	-	-	0kkHkmLHFl3	text/html
+1354328911.853464	BWaU4aSuwkc	128.2.6.136	46589	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	-	-	koHEYsvMVBa	text/html
+1354328911.897044	10XodEwRycf	128.2.6.136	46590	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	-	-	50tlwxQjBCb	text/html
+1354328911.918511	zno26fFZkrh	128.2.6.136	46591	173.194.75.103	80	1	TRACE	www.google.com	/	-	-	0	960	405	Method Not Allowed	-	-	-	(empty)	-	-	-	-	-	DdECXqOZjXh	text/html
+1354328915.964678	v5rgkJBig5l	128.2.6.136	46592	173.194.75.103	80	1	DELETE	www.google.com	/	-	-	0	961	405	Method Not Allowed	-	-	-	(empty)	-	-	-	-	-	LIZQeBP0Coi	text/html
+1354328920.010458	eWZCH7OONC1	128.2.6.136	46593	173.194.75.103	80	1	DELETE	www.google.com	/HTTP/1.1	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	-	-	hjPo0BdP973	text/html
+1354328920.072101	0Pwk3ntf8O3	128.2.6.136	46594	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	-	-	d6K2onvteNa	text/html
+1354328920.114526	0HKorjr8Zp7	128.2.6.136	46595	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	-	-	BY1g634OMv6	text/html
+1354328920.136714	yC2d6kVg709	128.2.6.136	46596	173.194.75.103	80	1	DELETE	www.google.com	/	-	-	0	961	405	Method Not Allowed	-	-	-	(empty)	-	-	-	-	-	5aAa2m40fZd	text/html
+1354328924.183211	VcgagLjnO92	128.2.6.136	46597	173.194.75.103	80	1	PUT	www.google.com	/	-	-	0	934	411	Length Required	-	-	-	(empty)	-	-	-	-	-	y3Syn85ve8e	text/html
+1354328924.224567	bdRoHfaPBo3	128.2.6.136	46598	173.194.75.103	80	1	PUT	www.google.com	/HTTP/1.1	-	-	0	934	411	Length Required	-	-	-	(empty)	-	-	-	-	-	P92nMD5z6D4	text/html
+1354328924.287402	zHqb7t7kv28	128.2.6.136	46599	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	-	-	qIPObDBIhSj	text/html
+1354328924.328257	rrZWoMUQpv8	128.2.6.136	46600	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	-	-	su86MWxyjne	text/html
+1354328924.350343	xNYSS2hJkle	128.2.6.136	46601	173.194.75.103	80	1	PUT	www.google.com	/	-	-	0	934	411	Length Required	-	-	-	(empty)	-	-	-	-	-	r2aysGE6ve8	text/html
+1354328924.391728	vMVjlplKKbd	128.2.6.136	46602	173.194.75.103	80	1	POST	www.google.com	/	-	-	0	934	411	Length Required	-	-	-	(empty)	-	-	-	-	-	Zosv3c0p2Zb	text/html
+1354328924.433150	3omNawSNrxj	128.2.6.136	46603	173.194.75.103	80	1	POST	www.google.com	/HTTP/1.1	-	-	0	934	411	Length Required	-	-	-	(empty)	-	-	-	-	-	L02QmCl2lX4	text/html
+1354328924.496732	Rv8AJVfi9Zi	128.2.6.136	46604	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	-	-	uh9TwTMdWI9	text/html
+1354328924.537671	wEyF3OvvcQe	128.2.6.136	46605	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	-	-	4gLQ9WVkuYd	text/html
 1354328924.559704	E490YZTUozc	128.2.6.136	46606	173.194.75.103	80	1	HEAD	www.google.com	/	-	-	0	0	200	OK	-	-	-	(empty)	-	-	-	-	-	-	-
 1354328928.625437	YIeWJmXWNWj	128.2.6.136	46607	173.194.75.103	80	1	HEAD	www.google.com	/	-	-	0	0	200	OK	-	-	-	(empty)	-	-	-	-	-	-	-
 1354328932.692706	ydiZblvsYri	128.2.6.136	46608	173.194.75.103	80	1	HEAD	www.google.com	/HTTP/1.1	-	-	0	0	400	Bad Request	-	-	-	(empty)	-	-	-	-	-	-	-
-1354328932.754657	HFYOnBqSE5e	128.2.6.136	46609	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	text/html	-	-	-
-1354328932.796568	JcUvhfWUMgd	128.2.6.136	46610	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	text/html	-	-	-
-#close	2013-05-21-21-11-25
+1354328932.754657	HFYOnBqSE5e	128.2.6.136	46609	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	-	-	NIV5LGdqSk2	text/html
+1354328932.796568	JcUvhfWUMgd	128.2.6.136	46610	173.194.75.103	80	0	-	-	-	-	-	0	925	400	Bad Request	-	-	-	(empty)	-	-	-	-	-	SlC7NZIgx1d	text/html
+#close	2013-07-25-19-41-27
diff --git a/testing/btest/Baseline/scripts.base.protocols.http.http-mime-and-md5/http.log b/testing/btest/Baseline/scripts.base.protocols.http.http-mime-and-md5/http.log
deleted file mode 100644
index 97e797b4fb..0000000000
--- a/testing/btest/Baseline/scripts.base.protocols.http.http-mime-and-md5/http.log
+++ /dev/null
@@ -1,14 +0,0 @@
-#separator \x09
-#set_separator	,
-#empty_field	(empty)
-#unset_field	-
-#path	http
-#open	2013-05-21-21-11-25
-#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	trans_depth	method	host	uri	referrer	user_agent	request_body_len	response_body_len	status_code	status_msg	info_code	info_msg	filename	tags	username	password	proxied	mime_type	md5	extracted_request_files	extracted_response_files
-#types	time	string	addr	port	addr	port	count	string	string	string	string	string	count	count	count	string	count	string	string	table[enum]	string	string	table[string]	string	string	vector[string]	vector[string]
-1258577884.844956	UWkUyAuUGXf	192.168.1.104	1673	63.245.209.11	80	1	GET	www.mozilla.org	/style/enhanced.css	http://www.mozilla.org/projects/calendar/	Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5	0	2675	200	OK	-	-	-	(empty)	-	-	-	text/plain	-	-	-
-1258577884.960135	UWkUyAuUGXf	192.168.1.104	1673	63.245.209.11	80	2	GET	www.mozilla.org	/script/urchin.js	http://www.mozilla.org/projects/calendar/	Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5	0	21421	200	OK	-	-	-	(empty)	-	-	-	text/plain	-	-	-
-1258577885.317160	UWkUyAuUGXf	192.168.1.104	1673	63.245.209.11	80	3	GET	www.mozilla.org	/images/template/screen/bullet_utility.png	http://www.mozilla.org/style/screen.css	Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5	0	94	200	OK	-	-	-	(empty)	-	-	-	image/gif	-	-	-
-1258577885.349639	UWkUyAuUGXf	192.168.1.104	1673	63.245.209.11	80	4	GET	www.mozilla.org	/images/template/screen/key-point-top.png	http://www.mozilla.org/style/screen.css	Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5	0	2349	200	OK	-	-	-	(empty)	-	-	-	image/png	e0029eea80812e9a8e57b8d05d52938a	-	-
-1258577885.394612	UWkUyAuUGXf	192.168.1.104	1673	63.245.209.11	80	5	GET	www.mozilla.org	/projects/calendar/images/header-sunbird.png	http://www.mozilla.org/projects/calendar/calendar.css	Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5	0	27579	200	OK	-	-	-	(empty)	-	-	-	image/png	30aa926344f58019d047e85ba049ca1e	-	-
-#close	2013-05-21-21-11-25
diff --git a/testing/btest/Baseline/scripts.base.protocols.http.http-pipelining/http.log b/testing/btest/Baseline/scripts.base.protocols.http.http-pipelining/http.log
index e22fb53103..6779485f91 100644
--- a/testing/btest/Baseline/scripts.base.protocols.http.http-pipelining/http.log
+++ b/testing/btest/Baseline/scripts.base.protocols.http.http-pipelining/http.log
@@ -3,12 +3,12 @@
 #empty_field	(empty)
 #unset_field	-
 #path	http
-#open	2013-05-21-21-11-25
-#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	trans_depth	method	host	uri	referrer	user_agent	request_body_len	response_body_len	status_code	status_msg	info_code	info_msg	filename	tags	username	password	proxied	md5	extracted_request_files	extracted_response_files
-#types	time	string	addr	port	addr	port	count	string	string	string	string	string	count	count	count	string	count	string	string	table[enum]	string	string	table[string]	string	vector[string]	vector[string]
-1258577884.844956	UWkUyAuUGXf	192.168.1.104	1673	63.245.209.11	80	1	GET	www.mozilla.org	/style/enhanced.css	http://www.mozilla.org/projects/calendar/	Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5	0	2675	200	OK	-	-	-	(empty)	-	-	-	-	-	-
-1258577884.960135	UWkUyAuUGXf	192.168.1.104	1673	63.245.209.11	80	2	GET	www.mozilla.org	/script/urchin.js	http://www.mozilla.org/projects/calendar/	Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5	0	21421	200	OK	-	-	-	(empty)	-	-	-	-	-	-
-1258577885.317160	UWkUyAuUGXf	192.168.1.104	1673	63.245.209.11	80	3	GET	www.mozilla.org	/images/template/screen/bullet_utility.png	http://www.mozilla.org/style/screen.css	Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5	0	94	200	OK	-	-	-	(empty)	-	-	-	-	-	-
-1258577885.349639	UWkUyAuUGXf	192.168.1.104	1673	63.245.209.11	80	4	GET	www.mozilla.org	/images/template/screen/key-point-top.png	http://www.mozilla.org/style/screen.css	Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5	0	2349	200	OK	-	-	-	(empty)	-	-	-	-	-	-
-1258577885.394612	UWkUyAuUGXf	192.168.1.104	1673	63.245.209.11	80	5	GET	www.mozilla.org	/projects/calendar/images/header-sunbird.png	http://www.mozilla.org/projects/calendar/calendar.css	Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5	0	27579	200	OK	-	-	-	(empty)	-	-	-	-	-	-
-#close	2013-05-21-21-11-25
+#open	2013-07-25-19-43-06
+#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	trans_depth	method	host	uri	referrer	user_agent	request_body_len	response_body_len	status_code	status_msg	info_code	info_msg	filename	tags	username	password	proxied	orig_fuids	orig_mime_types	resp_fuids	resp_mime_types
+#types	time	string	addr	port	addr	port	count	string	string	string	string	string	count	count	count	string	count	string	string	table[enum]	string	string	table[string]	vector[string]	vector[string]	vector[string]	vector[string]
+1258577884.844956	UWkUyAuUGXf	192.168.1.104	1673	63.245.209.11	80	1	GET	www.mozilla.org	/style/enhanced.css	http://www.mozilla.org/projects/calendar/	Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5	0	2675	200	OK	-	-	-	(empty)	-	-	-	-	-	XRu8VItOvLc	text/plain
+1258577884.960135	UWkUyAuUGXf	192.168.1.104	1673	63.245.209.11	80	2	GET	www.mozilla.org	/script/urchin.js	http://www.mozilla.org/projects/calendar/	Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5	0	21421	200	OK	-	-	-	(empty)	-	-	-	-	-	m1D1wMxW9y8	text/plain
+1258577885.317160	UWkUyAuUGXf	192.168.1.104	1673	63.245.209.11	80	3	GET	www.mozilla.org	/images/template/screen/bullet_utility.png	http://www.mozilla.org/style/screen.css	Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5	0	94	200	OK	-	-	-	(empty)	-	-	-	-	-	ZwnCaxWANNb	image/gif
+1258577885.349639	UWkUyAuUGXf	192.168.1.104	1673	63.245.209.11	80	4	GET	www.mozilla.org	/images/template/screen/key-point-top.png	http://www.mozilla.org/style/screen.css	Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5	0	2349	200	OK	-	-	-	(empty)	-	-	-	-	-	3WVi9g0Caei	image/png
+1258577885.394612	UWkUyAuUGXf	192.168.1.104	1673	63.245.209.11	80	5	GET	www.mozilla.org	/projects/calendar/images/header-sunbird.png	http://www.mozilla.org/projects/calendar/calendar.css	Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5	0	27579	200	OK	-	-	-	(empty)	-	-	-	-	-	ta9bGBff1Wl	image/png
+#close	2013-07-25-19-43-06
diff --git a/testing/btest/Baseline/scripts.base.protocols.http.multipart-extract/http.log b/testing/btest/Baseline/scripts.base.protocols.http.multipart-extract/http.log
index 0bd15badef..ae71680dfa 100644
--- a/testing/btest/Baseline/scripts.base.protocols.http.multipart-extract/http.log
+++ b/testing/btest/Baseline/scripts.base.protocols.http.multipart-extract/http.log
@@ -3,8 +3,8 @@
 #empty_field	(empty)
 #unset_field	-
 #path	http
-#open	2013-06-07-19-57-15
-#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	trans_depth	method	host	uri	referrer	user_agent	request_body_len	response_body_len	status_code	status_msg	info_code	info_msg	filename	tags	username	password	proxied	mime_type	md5	extracted_request_files	extracted_response_files
-#types	time	string	addr	port	addr	port	count	string	string	string	string	string	count	count	count	string	count	string	string	table[enum]	string	string	table[string]	string	string	vector[string]	vector[string]
-1369159408.455878	UWkUyAuUGXf	141.142.228.5	57262	54.243.88.146	80	1	POST	httpbin.org	/post	-	curl/7.30.0	370	465	200	OK	-	-	-	(empty)	-	-	-	text/plain	-	http-item-lcf92jVphSl.dat,http-item-z8gOS6arddh.dat,http-item-tBYz7eElzTb.dat	http-item-GVJrSB2Vxk6.dat
-#close	2013-06-07-19-57-15
+#open	2013-07-25-19-50-23
+#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	trans_depth	method	host	uri	referrer	user_agent	request_body_len	response_body_len	status_code	status_msg	info_code	info_msg	filename	tags	username	password	proxied	orig_fuids	orig_mime_types	resp_fuids	resp_mime_types
+#types	time	string	addr	port	addr	port	count	string	string	string	string	string	count	count	count	string	count	string	string	table[enum]	string	string	table[string]	vector[string]	vector[string]	vector[string]	vector[string]
+1369159408.455878	UWkUyAuUGXf	141.142.228.5	57262	54.243.88.146	80	1	POST	httpbin.org	/post	-	curl/7.30.0	370	465	200	OK	-	-	-	(empty)	-	-	-	UB09X6VFGTd,wFP689pOsIa,g5yDIGBH4i5	text/plain,text/plain,text/plain	yv4qm3EsdOc	text/plain
+#close	2013-07-25-19-50-23
diff --git a/testing/btest/Baseline/scripts.base.protocols.irc.basic/irc.log b/testing/btest/Baseline/scripts.base.protocols.irc.basic/irc.log
index 64bdb41861..8249c94938 100644
--- a/testing/btest/Baseline/scripts.base.protocols.irc.basic/irc.log
+++ b/testing/btest/Baseline/scripts.base.protocols.irc.basic/irc.log
@@ -3,11 +3,11 @@
 #empty_field	(empty)
 #unset_field	-
 #path	irc
-#open	2013-03-27-18-51-40
-#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	nick	user	command	value	addl	dcc_file_name	dcc_file_size	extraction_file
+#open	2013-07-25-19-51-43
+#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	nick	user	command	value	addl	dcc_file_name	dcc_file_size	fuid
 #types	time	string	addr	port	addr	port	string	string	string	string	string	string	count	string
 1311189164.119437	UWkUyAuUGXf	192.168.1.77	57640	66.198.80.67	6667	-	-	NICK	bloed	-	-	-	-
 1311189164.119437	UWkUyAuUGXf	192.168.1.77	57640	66.198.80.67	6667	bloed	-	USER	sdkfje	sdkfje Montreal.QC.CA.Undernet.org dkdkrwq	-	-	-
 1311189174.474127	UWkUyAuUGXf	192.168.1.77	57640	66.198.80.67	6667	bloed	sdkfje	JOIN	#easymovies	(empty)	-	-	-
 1311189316.326025	UWkUyAuUGXf	192.168.1.77	57640	66.198.80.67	6667	bloed	sdkfje	DCC	#easymovies	(empty)	ladyvampress-default(2011-07-07)-OS.zip	42208	-
-#close	2013-03-27-18-51-40
+#close	2013-07-25-19-51-43
diff --git a/testing/btest/Baseline/scripts.base.protocols.irc.dcc-extract/irc-dcc-item.dat b/testing/btest/Baseline/scripts.base.protocols.irc.dcc-extract/irc-dcc-item.dat
deleted file mode 100644
index d4ec9e374b118f65fbb1f67c14ee1a15a26e58e7..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 42208
zcmV(&K;geoO9KQH000080IopiK0{>y#G<1B069wo04D%_0Bm7od3IrKaB^jHb1h_L
zW?^+~bSN?~F)=MLH!UzXDJ@S^E_8TwQ$%P0tN;K200;o*oh`XzWMF9#VPIe|V1NK0
zAZBC*Gg(D}MCT7p)zh*&8`5UIR(ipVTaJ;5fq{u3fq?}iD<Hw}pHZFxBnDEe03;MZ
zYylC7I3q+{2^gAj^ZMX1{SjKXlW+&}^VRa6ZC2MD1RP6(kDPXC}4PNZF1ljF9P
zes`7sgYvSwsq(-rX{Ikzl?P|zB;N96cb!L9sz4GVF#-WDKuRP3qy3lr9gyl)j}Abw
zH%U##<G~?ta4z4u;Pfx8`uSJo58tm28}VVY`rp+rt+KzCUUz+1{mbgV23_ud)S<_}
z`OC0ZKMvgxR{yqY`o8=4?YGC{Q8+9J;{xG)z53(VU&RG(x7vQQU9VSv)ka~>T0Cw3
z@#jCTYGsv|y{cBut{(bk^&f*-ZSL?FwO!raeWd@b@rldD|Ec`yS5-P&5&5sb`iEP2
z^*@8cXXI*!zw*thblvIOPKI8sdbjEuwJIBFtujK0)gS(eWBvTk)n9-3@%x{@`|10Y
zs$c%_Cs{db<?7w)@6PB-o<!Sizy9hMzv}%dQW{LJTWtCKQElthc;wZ)KfPOhH}vjr
zYWn>_Y9kAyRy*(7xLOwOpjJm!1l=pzJ^lAj7ytSCtAGFaTPfS{&8iQ-efjVHU}SYV
z$hPw;gjH!|XoTAdP!_%Jr40hiyA$BPJm}Tx<HsL<{_8IvKd#nd`?o)Txx7-+i%+hx
ziADEMPvZWkt7W^v+LP#%>x{bbOZB^*^CkPY@2>xLBfZB5T;D4%?&IMaX?CJ8&Q+rG
z+VZ5{Go*Cgh)_1p8PN_wm(qx>2+C^b#fi?pxx0J3CiZZ5C+@C~u5G2Im0+4xSYoTV
zxjjUna_#vdN`DGHz7zI?F+AxH3~BGIXeI6<bsJc>$*jYIvRX4AThA@pT;2Plc)DJ+
zhxQ8@!i}uoTXzIwcm^<HW%N#gyf}iPSSoa%lx5~?M)y6$ap%gt^2`<=?l#;<E##wb
zrB?zBQKl84_e!O{{BU<Kw%0_up)k4>FlDV)fxn3Rrw5Lw+*0m39<P@MB?C4+3Effm
zWkcIua|~NwGYlf?1rEEvk-_orA0_dYWZXal#((4$=<)7>lk&K>f%NGTrkpl!rMFr}
zq6c@9wK|`ul1ED$lohPNZF{6;op+rJP(SSJiL+^soE$_JI<t!kn7I4*+}-!?1etNL
zSKDzRO!w7RH_TQ&j5Um6OVaAqLD`p+@1!xxCXMB5pAg-`-#`9FA78cZ05STF`5Ujl
zzTP~D#tnXQ7uBJH2fJn4l5RezN)*oRpBKaVU2$v8aZBP>aZvAuAf8#BB)}M82@Usq
zp>1T8ce0we&M4eHr!{E;*J`)r31n>sdl*uF?l4e&MO!%n%P4QipX4~gx+KVosL3z(
z4Xo{Sp#`&kEk0b|X93G2`({!&pS#jWpaJIl1!UKolW4RtuwIRB28sWTZfwnyVlhHt
zB>YPix07S#6DL^X;&8l>DNdppJ6I6!A<w4?USuxd{!zSVKJB3j<^<@q(?A9Ns}1lT
znK6trRK65WqQbBN#-|gFGA7GG)}54cfLNauqx3F>ora9(1(GAw$R3Aj$CdsAbAb#O
z%SI_UFj4`aH4@}To#0@|t3k3uZXfGMPB2UD;bX|Ac2duong?dRvj<)F+SyB;;tb6b
zhs#pSug%N@dy}pcZpa%I7M&v!X-Cf*y`c<qG?=|Z2`i{{6as>9X-y^q46GTo*h`qV
zO+pS5=B!dJfdc`*8qRB%IEYFeH0)4k<f(?6d5c8_#D_Zv10_&Z+ns>#b5s%nU1`Ld
zp1cjkAMH@?M_9<2W+K5zAWve$lAyCNyHM2#E6BFPtc2&w;0+Ta3mFQWQQ|1TL7W7I
z()p`kIG{cQCpiyB(3P}6yaWDxMk<&s>}0DU(Dg9wK@+wDxRf{E#Ew!4ez_v;01O~R
z2H#eplpV00hh^+E<vehi?vOLEiOtlq9dBgDRVH{E!GgBEvSgfc%p}vq9141Mv_qG_
z23E70wOxyY_CSB1XI3*trIEf8URQ|y-JFwqxML9)NEp=u`r<nC%hz1k+>kp0($hM<
zac&vRX*1Y433}xGK;wGR4~6Jq)d87-uWU|a_#_D6c9fVRrY)9V8=`Tr9O?o;dP3MB
zDlco7gIW}BbEg3ph}2%6Z;v;JtlVi~NMEo4(Gl9jy4HEGL&C$bywIg2@yjb6BYd9-
zU!D^_g+lPwOOua)*o4C;XJrHFos)k?jEHIK9J2lQ^=X+*+TK!9C7STXC*H?6C!YD)
zb|)PNbJFB`4goQU*u*Sg1f!HfjmEo|+SD+ez%{<l`ICe;83l_!rTvn^msnXpLpc`9
zFwS>c6GM7<FeB7w=Sv;hETCf1xq;OoI??tQpqtuC;xZAm_fUUes_mF8ZlE`PnhOu_
zx18^2W%YsqlKW?57p~iiT6+^_0z*b+TZL2W3na%yc1%Fos$Ky)$<CG1oVt+Lfk*bd
z8iyUM27(DBbTy7KajmKP!)-xX>%ej7?UbT~=jv28^O67)R+7Vp7#aMDBiBtg2_aM9
z4Z+o;3a+Ktv6Y{7`xYw)s0AklS_tCY8=wKyQd;L*Y*HfjIcQSSESY;YE2?Da%A9q*
ze_C^DLcq#4a{0j#FIcG9hon|Ma8iw5;gRVwDMYx<0pZt5_Rhb`)bLRyw@-fcwMl*A
zOlmiDog(pwXcj^e<HX&wJxrW^!_o4xD0B}fWmFCaJZ`yT;1n@ol=<w_=7GCmuR4c7
zz@8Uz?r6;!<LiZ-A7cENcLl6%^JQ*`_k#?2O0MU^Cq~GEL`c7*OySvXa`unQ*h|xA
zyaaR9Adh1Emn6Vo^1%T>CVF^3)MG)+y9p?HH<N~95qfB(tsAMAV%z{VH1uf*WIF?L
zH!Pc*A=Vppc*(=Ea3`Ue$@Sbda|homIQe`)_Vi)PIhe3NnS5A4I>Fs{>-LaMSrWsj
zv?nRw6GS0ruZ}VPFe;rGx^p3f&q3dFl_l49(U+Yl!rR~qTUEN2GEWKr$Y$ol;1Bu$
z*K;l)DVfjPuobyp?v>A^(Gyq?SMQx4Qwoc{5AF6r$m)QYJSI4B0$fCq-G8FQLbT+&
zUddDfHd#3<5aPVU{gE%shIVX<B~s9}(Bk~%rG_#?fh&oo_CSRx@J|_?4o#D55ucvK
zWvtF(eMb1k!lnywFz&31De+-|W?T+~sY*Ea)V|s5+L*1EziYK94vxzPPo=!YfoN1N
zSBqogM34tXDSHYhfGfm7mhvU(om7Iv^{dYvz$}G(`O0MCK($wjT&=fbId08e-#Vp%
zdr4Wjt86dh0XCbI2qbA-Nia-#qCP&x`2vQRa$Rj!n3AGQRa!q6s3i*F_zk{n`<XB`
z!N%Z94I_*zIJ%31;v#_#R35Lh`Lcvdi@~iMUZUIE)uDICoY+3yu{ijubAU$-33>Tq
z<&7Ej*y^QA@xu?x0aou2j*6YsK9@h?ndAr3G2JiL&7(Fk8Kg?1F9Y$V>#aJ97NKw2
zTru1lY^6gt^srM}Ye;vc+HS^1B|`bZQq!OA&sGyCr2}eH*HX5<f6v`H2oOY>vAV2@
z8Z3ZP*Tq~|O0pSr1z*-^yMI}hOgS*ZLch@jNWw4^jz4m`acsaUam=I11fDp02mBXJ
zAmO>ZZYGL>$0M|Q^d|W8iVR|j*y3y#uQl~?!zOKK>0DX&?fa1*FP%73lg{1KZ<l}e
z`a5fYz<MlJ-#sPa19Q;z<CxWSrlR^veI~J_xPhZ=#)w6528?0L#R|@FF)(LKX`u*j
z+aac!a=YB{4M6|z4x&!^9dMSYRWFY*GmSm4Au=PD5+X_26R3nLeS{gQ6C}Vs6~#xO
zFU2P>C?Qk9giyLnW%<EcBpHkJ=N?%zLx4NbIqI0g)^R!St-0^-2r9?k%{(W#Pm`>N
z{<8Tf_4WHFe)Fc#*1?<k_6-+dv_h~ub-c*FhezdQA;r2@d0j#XFT0GFOEd!TGa<^5
zk6=1L(BR~&Kv>Ad9*m`u0W~p~b>cL8*kCHIA9FgfDI+A9!+LCK!Oe4xA$_UuIj<py
zerR7CFutGm*0o&p$e6Qe3}ESNmu1#HXV_?MB~M{yCD%SCixV{x7A__~veLsfzR6FL
z>XBt5ilNPapVfjwdYD4j3n`|qt(=@Oexp!3=M|#%pbQBc<aH=>b}`|93e`}NhgzXe
zg7R6^A7NZuXgPB#j+m}RA?xUA5Rad-6xiH8QP|16A7y=U0Ykc<Q8(^;BPdLp-z}%4
zW6vV#XfdW87Fz#teGC|>Q*6R}89J=C7h~OM0%^Ic;`oJ#`FGLb9~G4a;17Q%SyZ7!
zwDIhnGM1U8Nj5L<lK5NqlyyXe1C89pkb@c@#saPmP0k*%5b0)m9DrimL_v>84jpd1
zC~|_G(pR{X;Ova6XPG(`GLJG3+*mwc_`$l&EpmsA4~?FiE`}_;A@j=8psf6k8nAW|
z^--_AIj5)=YD<a8tq7uWZjJ#A-%EmLlw)1aySViEp@+%4i3Ej?5XN$|R8pTi{7VdZ
z6e&4GK*dXmeSOFHk9j@g+I^+>dQ4E|R&tH8jf_31FZsS43D#$pjd5y5{uxm!shS=2
zID6uR+Qro{b#oAbYw0y^s8006r-5apmyBf$idz^tD%GTrS;A+OW+)1k6QTrED4my6
zp=lvJD&5c_dVYt1i7YezzoTLUZ$_7_zqK#kOKP5t`!>3b37q7~xt?}UK0U(xgEPIX
z^E}}D(a+t~?r;(Ka;TWaiRIXnOk(ejbB2%;qQPqlnrr1ewl78O$^|&u10B8O@AGoa
zX~k>0wF7RO*Gtw}R<g3P>t60AQn*P#D%s{QC6SPbVP{EYAD_pF?~|)zJ`(KHUj%8N
z54vKDUmLbv{036j!p=TiKpaB&Xa8(o8N<LvqOo9V^gb3H#^;zZy1ylW<R1Z4F-HT>
zZ@)a!O)fLOL&zJPL>2V>03mi<ogd_mJrp^A!=6HQ=N!R-9_Hocgf$<P+9_LQK||uX
zTBT)eUC?3n5{MCudRCAe>X>aPJtzx$QbH$bp{(*8(x;vzu;e^}sM6V9W`n#0*spYG
zW!`$Ry#<x4BkYx95AgWna<Kygm9ZCkaZp!53*6Me*HZ5>)gYWPU}_75`G&ZgTBu3S
zUV7PcaD*i=_BSkyjPW-Q^tkl72|ITaoOSWfS+1-nO}*%le%6*SyQUWA@WnHfG>H$K
zNl+%JIkcL94-x4W4j8N&%j^qzPH&PZK(Avn<U%8O@pgsUJL5A?_IJ^R#DdA<OWgsE
z%O+gv)lKGr-o?aNU^k%veaNKK8T%FqQ19g%_(Cu|KBp;^amVF|oCs7DNH5M3&`NS_
zLAJ30B1|dlxoDi@pgGaqWdnKA2?jF6cVUP~X|X%nnQ98#wX!43){_~sE}ybvTl{wf
zi<O;v8*}%^*sq<B<5eV;KaTQbXB3NVi=Su^_t)FAT~VKV72ot`k~M6(@<wVL3}dL(
z((kk~Rfr{?asJ6-%Td!)B|_;JMw3qtG6E78N2^J71)NS%Jz;thdZpqCEvcz2MQ^6a
zNx^r|r5V}g+uFF%K5Umc$=bQu6c;0xwWrZ0_@s4NuCc}5F?fMZ0`!f+KG?Mz+Yc5;
zG*psPfCmR^=~-@}nsXD(LbO7&j${+XhNjhwfbXEyBHlL$#mMnk@_th!kaJ&&stUc@
zJ4Q>pSi8qLKm|QFOq*v$^BReGE=Jlran!bKRP1-Z1^RKjQ2!bz?b9M!*7o%=Zhj4(
zCvs!;{M9?c9QxsI6Y5)NTg~O%_}x?D!@Aj&DR=z{T`WCXI?5nE#fdfByV$HSzCGYf
z?h*sp=n;S*#vB(9HqH}ECj>d>I1^_L%rKVCJd9EWA>FGt<kXwN?8Slb!#O(SUt@h>
z<$P$nOVGjPl2~Dg9kw_0iSRS95cmlEqVqnVpF1TQYLBE2DAG{4{5)t%WEL)7g0|Tb
zUE(nxB?2g26h{o5yT#!wf~1i+S1%tDT0vC$<J1kZSf{Xh%F-;)=Igeb%bato#w12z
zw2d9o;G8<;YkkbTl!=O>R%NeCn6vN($EfWNMtQCblXxDc?Nmf7%%Em`5<9esp8v<z
zJzdH^TXtoK=FnSUnuE4ozIB?RcCKDf3-#i5ff%%7^>dtdC9Mg#J#`h+(`5pq>M+}V
z#fVTG%+@!jsY;(0LGBr=EMVto9?M*&nfbVpFgdt+ud))Cc;6H2+kP%OW}{rAVvI*P
z+RKnis*-PE{KL!p%<+Xm{?1u*XY@2|5LY+^?Jk(dqgw9xl+LY&SdKnQAIAwS&Z-jl
zH>a0*S4%I@xhVV;xKhK@4N!`cp*$6?*@ry}#DP+1`HmMh^AMenrAEYZF0IY-p_P@A
zW2FS*{7LG38jj^cD+3Kq)ez~KiZbhn+la)V!V#MRqs0`}Vk|&UvsXJgn7lNwH9=Y{
zLPJR_dR|Q>Bu%fYUD>;Cn6nRzX>64@^qTG5FEBj>g2fBj1Z~2}f*cjdWF>1E%52NS
z`hhJEwfKeR`Vl;cTOu;5^x0%pqHVv`Wh0dl<a_Aq28%1)pMBSB65ID|0g^~4`9h*q
zEkb1c0slgaF+>>(H@<SrP}trUp0~Oh2TK%^@ZaLRdwkO0N?$fN#S8pc0GQy2`heW0
zF49Gk<)O7-<`a1P$9SW}YVI{Nr}*?n(K#BOGH3a?gO6R?I*et)5XYFzoq@Z=IXXAC
zL$%Y?zSB!c@dhJydhY4oE7fI-^A8)gDaEQ1|3@>bUfEGDAY)Ux^sa?~^>YIqgDzM=
zW!2|C#!ynvB?TDvO%qzCk|5&Pp@mw-BG-ghLaX}*imIuMdsFAHr8{V8P&WYXTlxV(
z;FHW0uDQt=&Qn{G6){aWXX_cQpzJr?1f{!fisacn#3XM`Q>*7r&6p)Sv;`#ZDN{aS
z<i-_10f(_2iv}F{e7iM2^8++Om}x#`Gh=SIn?{gSGYv7UOT+Te`8fo|b}G<27lco9
zAc3)9l)i4M@=c9IVVJE3at!Nm!H5J7*@{Kt`HAZ~N2A|o6CZ4{$@#94U$(cik8c=l
zXBXe-bUJCA=wjeqyO{<=7I1a|iLlkwY~E5gBh9y5Y^lNc*!#BFrogE0Ddt?i&#q-{
z1J*)d=sE1g+vbJ$))`q3>3$qaQ*hI43}mF5tMao2-Of>K0+n;$@Gz!xNAk{Nc4cXB
zgI~*7>M0}{m1+K!MaX0XV0}Z?${@S0$#+qb$KozB4*v&&>%5C^ra&LRg*LwBoV1;|
zs$tK^V!t@l6lyRITyLp$OzND!NK7pD^ZjZJcgJBeG*ME^mS*M4j?w`%FXHm`#hj3`
z;R;QEIp~Z7;-;RIu|iil_Z)Wx33Mm52uC;bpoPuNT~Lgu<V48qG|oJGLz@lmL&-Q~
zJwB0}3Pbs{8#f&${=5+}O(j|EuMp+}3{el(T;__KtJ1))zX8<HM4C4+Oq*!yl%0JH
z#O-yOVk5xzd`kxF9mgM;L3OS<*Up{S#1YmrMI5$19a{Q9DrJ^mskuFbbzy^4-_KR5
zIev9yPKu$LX)H=Wk<h^g%5pHXFEZH0vmIn?4toM^v3fBJ@!4=E{w#_3!*j-d=IR6h
zn&%#`Ry|dEojuPI%WWeuu&4o++sDEA6<j7POerf6ljY21aPx;iuA9muW4V1XP6$06
z<S6xwl}gR9>b*P`F>e)(Mk)97cq3NPbuB)*!CQ5Hr1(Y7=mVQmn^L{;>C*W4<6_`o
z33G~&>n+D5s`PZ>858kHl=BbV+8alTj%%a4JP-R%?ffyJa6JUBoeA*R#7h`#h{JnJ
zqvT9GbfOhG43wOpG1A$BW1@)>!47~@oI9NguGtDcy$mfyFEoCYBA+DYCqu>A6O^v+
z8=8lmKQ}khO76$r-}BQKJJx^mgt((n4^uB^TXDFtDTo5JrO}Jj;~1nlybcAJpc%mi
zPlyJEeb-MSxFbwpNKqG?(fDk=sDSt0@|q{Ew&4hodM={h<aEYaVbt_V&6ls5mv0uv
zX^L_=8%!l9b$w1sadIqL;WZ^H0N$;;<=t9pPM5T&nN}EfIO(`HP*d6`SInF{afC8*
zdt95E50aRQRMJyw`^J}#)6X^Q-q0laJl(+ze*bLu*aT;4wF)I+(k{a1MP;&adHj(O
zJ<fmDR7TkEKd`Mqxmjn6lTY#CFFnM7A&cq0ybL5puZ^>$tN5b_FRnB7iX(BGFS$xL
z*LwRt#hCwV?_G8!Ns=tFeFOauF$;EP109C&x8DZ1XmSQM$-w}`G}-7y0i?RQy1T};
zhnGFBUytwQieEuFqH6v!5jC@0&0P(qvoa%7#Ld)HMdfki$blS+<^=!g3mGBbz}ezN
zE@Ea>u0(jklj6wDbgH28?l*uSd23{X3M3M4&i^amcl7!26OaHz8$vsZ9bl}YtZEx~
zFCyfL%&w1n_E=_F8=|-0_wM5pnIv2N<BO`*53x=NRj%k}lTAJ9uK4hiCE#UwaC>BX
zinvcE!F#U6Oj-v(UGTdDd=?5g?nZ_X#wHZO>pjGC>tgr$O&9%BIS~7c0kNm@+s9GB
zu@@-^&{<-G^i8s<+-@k0QGPi$6<6cFX5&z$#V3TTJ9u9yTL?w9{g{-1Y9q^?$qiZB
z)Sd@ZYGRu^k6;RPQ+0+;!Y^&;GZ1@cQ=rJDUw~rR!Q!;_qD^-ZqlUlpWRR{NPJbyG
z15YZshv)PWy*HJ{43C|tOq1Rb!u^PCt1^4~V0a+ABj(Up`W>G?jowjW!G(o0lhdLh
zV#o(~kYZA6X7dF%DrS2XUr15~E$4&rTWAFELB#AZO{VqMb#lJR3wL4E7a+5KaD|DZ
zXH<jagHK#f<TxGuBiwS<p34nIZy%nKpWY0HiOPX*i(OfL@d|GXdO#PC=OX^phx4_U
zL61yzjr=G|v@F#WrF7E(U6n==S?WdZM+ReppN*GGwF?wpNHM$~(sEYD0r7f9?7^c?
zxX+(6mJ|YJ?k_E<mIs>l<ol7X#kToE7NIRtTcKX`m0WSlU&dz_U#d>>7ZFKBd6o&L
zQ<vG^k-o}?8SL?FhEiB<Z@ux402J9m3f2u3)grwHc%1LGdbHDH-J}o0MKP?;f&Sox
zga#904x+oHhjLZZn#>J+Be7QV;hyoidXZgv3sG3A?h9WQEURF+jc*FkS-LRn@{j<V
zK(AiJ-&}ckkV0L`Z0MAWi7X!TnZKZJ6duy?;v>nFFDKJ@Fz!n$u-rE~O@lO;MKCBn
z8+D_13Zs0@LxyT}$Z>JpAjZ8_<uA-TK>;+LtPqO{pc?{Rgdq#M-_vDC*ESC)02|ei
z8z?4YF7B79_MCpd7(X?b0@Kuvs+cGf{la~9DdUYmiaoxR6&iGpAQeJ2YCp)RASq#f
zPFkBHStx=(R^q;Cce`J7FQGOwTrd*d!%GW?4qxMBAFywu!St%UR8f)M{rX{}zK-&{
zpWks_%gS+8pX~;Ri>kXK92(DfT*S_ZVk>_k75YGUD<mUBW~4Vs{eS-Zzx#uEoVVZn
zvkKGJ`@GIS{^`e$AK!Kf2N7H0^@M{AZe<Lxtw#bc7YP%P9>a=kC&f9SRR&czD8}y8
z%!&@=8$@bk9xaN4NU<VM>m)zx5M0rz&(n0HwRoVhyl$dTVZ1S*WeW%!^(|y3SI@1+
zSdBXR;G!39(lWb-8e3%fBp%>B1$p#$Z3T)Q8(%bq%DK&&j*KB9D!sKQdV;G=u`)$K
zhpsq($HS_xBNpvEQ$TPIS+R3BEpoP2($X;_H+JA5#Kgl*{KJ)*Z&^}*U!izzIU(B&
zY+>oQA9$S~_ON`HZ$&F<3dd|o(H9pZFLMTBv#ia)l1r~#`OaEyR@p~?Tss3*<L~$?
zOiQIu?CKLxk=2*JehWlH=3;Llc4(TmDbMva^5`;;t=Yw7G-gvs<;WC9KCs-93sE{J
z-^u(AY;KYId3UWH4;_OCKWH%|N>|v}j9W5!p6Z<mxWSAJ9~5>mRO|u8?F+Kg3z?(1
zF%+?g7=5*)Gg}^qK8Q$ZJ#YnzQ4mQT-^=%h`2s>~t<0!vYRfcrp~p7-s(59|B;HJ5
z#BJ4}$HKUc>R{6NOURls5X_e6TDvf%O<0ykd&RZAm~JglQ>UYS==zBxmxQ$DaGL$y
z11wsIDkKL<BIWFmT6ABQ)x$0u&4WM|?(wllw@jcKZ2_-_hry1&YeMW7*>f_PEm-#P
z-f-C`pWi3>_%$JBR8tH@cfNllA95Q-`SLyPV~ItH^h6<(OEce84IYB~U}RO*R%CVa
zq`Zg3cJ=hgmz3B`0mkxB#sLE?ZNRKFe%57uGKiq!CqOjfDUeK_C~Epeo1zuNql)jK
z6m|W?WET&ei~7k*rn?iYN;1_A#HP5aB;Dq__QwAa*>=*lFN+$Ze$y|>R)wv)>rBMC
zOZ()yE34xzX{u~7v5UjsC&@3k!;68lB9h{g>wF+zu8Loy3ZpJN^aYvsGQK-#cS^LI
zkgu#A^4%oHOfPz_koh0@OxlxkdM0|P7aDCWPg0q@Iri4XrNm?Csabb69wdn8z>9)R
z6oi6bi1|9A+f)pZm9q3*D7vYX#NbmsToo0^8#0-0$WTspHTL%4y7sRv-<PVi_|DJL
zb6r||a13l{z<!aN>u$(iicf#0HOGZJU~Oe4WQRd>aSI?4r|un^t*og6+$n^lF-03O
zuS|*#?$}lXfq<AL{Lk*=-;@89ldQ@A(iTW2f6$L4v*eE=&659SuSZ*a|F8e$f4KkS
zU;gx8l3(up7^yAA_xkCIp{#_o3(Dh|44EQH{@xc-Li@7J>)Soow_xIrVb;c#sfOdf
zD4&$F1>|~fj>iF?MC3++cq~_<v0Rszf;=Mj;tOBai!PP4$trV1I|q~8#do_*6MYSx
zDK>s>$c)S^{HHSJ8VF+69BHislUC#xMKa_Dsg^4EaAiA`W)dz+%Mm=|7(1V6tW#t6
zOmsQm!HbDCi@|L+u&vlVzIij~U`UTZK4fg4XlqVZ%wz!1$Ipl27BQc5a_i}%-8OW{
zBqg5|^wnqkmCT6!Y$|$(d(~&!^9M@k26adJlj~5nSvV;NppdJfrI!Rzb-7V#PKy~1
zyi+H;l74U!lU4j)IkIW{e~g|rF<3(#g1boIXT{T^;+)?Xc{R--{&ZyzOyU*N;YU;L
z2h-WocV~b^O*iO3)FgT!2N&3YnLfrM!$samUKYyDB%VBakBXtwTq-)0Rr8f6Da)+B
z5eY>0pF~y8l-2wQwW+{zYbpxAvrOh~ly)IltFvo-{TI^3c%X}A=c+Fp_)v=n_x@0p
zLXiGb^1mnlsvjANaG7^l-O;!d9bwQkKLXs~bsn0Q`x`{WYTjgOx`*4;=a#apyzcpa
zL)&0kG*uakvotnnCd2MXa;8PH82m8cN{L$)?f@X*Oi~s1q@=|_G45>3Gg#Wq4cIZ+
zFJiV4fTD3@h3SgZoX}(Ce*FUABFtIsf6b*#hFxBmNja9>RNRZ>HSemp(dS)?l*8me
zmH-cqu?b|xU`w#m-doyxiLKEk=^NCJtzjDMyaZ*~^tu2yxOK#l)3WgpHFV9IUthRr
znX$R-iZU>kWI)JXUnN7gkU5|0<Y!Aq2HZsKEHHr!ihGLn5jXK^=-p-Ra&Sh2W)j;S
zgUgBlJtU_i1wZ)?fby55IeJybWok}kG+(V4Qb;`~mE`vkIij1SqSNfl<I|7ukjtci
zi@cbrsz7=gaayF0fU*+p`=ReA{xFMXWsP&>M+e}5G2a~m`C!&)GYY0eTEIYceFyvK
z&$L*|l=N-BqjS)BpFMu2R-&P3i{Xe^+BbnIX4;l<*ys%#{dbV2nt@wb<84lub{8Ln
ze9tR7%<52~FOiPndTrh2`GB1jyUyIs#oZY(s|4z6&;t#H=+X}!2TzYgI-Z{~Z|YpS
z0VsxuZUJWydGBU0-Vw>B4?}Pq1r|ynvMF3+D%VB}TXNo?{<{EaoVHuQtjP!UC#WPo
zJnzjIo2Tgt69>9h2~XRBE<^KvVoOkj-rD%ylhk`Yif$xssHz5+^)@JKn=~Ke9Q&Vi
zTl$OIvu2r7;5V5TVf<|KGyAICG%y)`0Pz7EBi>E8sd26{3$C!F>tG`vrfA$(jMlq#
z&5+d+j@rBZpfL3Wuixc~Nsc^!qnmv;uqDL`7;~1_o-k|j9+ZS-e2rkI0w7~3&7ydI
zLHZosM4GG*xv(9>r#rihT!_<N+|43a`a3#r9<DjrV-su#MCpw$+|8txhQ!noUnYTP
z;{<p_?B|uR(d#Hd@n45VV3XNT6FQ4br9F{YA@6!|qbFc1=6qN**ptkC+~6fs?0;rH
z%$-PYqAX7#uWo!+5kn$9y*2#EJ5$l`U#8#EKz5LA9dMMzy9;opi;15(#i|EOVd_)o
z9=yy6l)>rZ^ABno%l7Uy{c^uNWprdeOCQ2-osQ{50(FTj@n%2ZHprW{grj}5L*+jC
z_##8%?=9Z-w+8NMiCR{}J=g<DeenVft>b}THJm@p<l2vbsVEk$rAT@=WQ#<idt~^B
z4v$1Fjp4OGPROQeCDK~o$>Ku>g7)WOsNSj&b<^%)AbMGJ4s-vq6`woL4Sl<Kid$yW
zHXs8@E`gbITCNcN{!ba*(U&wyVybSyp!wevMuV%93`A+E`Ad}kr2G9!!RXkDl{xs(
z_DIKx1X|K>K!UrBfwB!#cctrC`Xu&~0p!dTwKD_$oO}X;iX$17x*0@i3f41sNO^D{
z%J@7m^X%_XVsJdBa<Dom(al2#ITEjUlhOU99gReGEk36gC-X7~&Cig}D?hp)5ipRV
zkl5(Scw*e^$^Y;4bJ#XWIy^uuCr1Ea)#o5Dq<!vKs!?|kX55u|&Cv^8MPAGF;<)EF
zGFg(*pOj_61N1O)tMqA*$Jffw$H&)t69cnXg*@)IpeuT)Pe?qOek>v&?l+$MX{%hg
z5U7)V$Lw8Kc!&{tBCnaN<C!8c0Opt@M*~OT^naK<#3eG(dl4IN3f~P#kziHsnR#UT
zRxBLt0#|3lz&44JA4>tQzAxZk3M3S&vOS(WJauv`UX(oT8Q5H&@DYXLD{PHL>Blnf
z!pQZeY6rt{aWuh)$2QjkfIE3A!8OGo)WYn`j@n{!6yxbx+dBCuug3G|d8!Hu=f)(K
zq`3uw@zqa}gP|IzuE`JPtLY_knw+r9Jg{+c&WwiwI39wK-j2zXgIBiAy{JC9Ohj&h
zsmr`@aW_%isb_PaF%-R{B>wZjOoReY9DAgAZl0ZqKAk;4(a=>REZmvmOmjvZ!G7|e
z=+3<;T``|2Xx!+#nNstUrDrD#TV~1S<j&_VOdA!SoipETWk0}+G+-IzVG0%bM9z+A
znvrSiE7Mg4hq|k@VGNZ$7Vr|r0YYVr;u*!&2Vg<hCN?V6IJa4C2YL%d)BYD!bK9I5
z@5}75M`|>jh*VH4+QchR%E$jp>-HEpko=`BAF~nij|)p*amQWKL0IUrrkya<AT7K^
zW%6A)@a3hc1E|My_I{w<N1i?acziR^$|@75qENN7*-#6vu;h@;<L;2+{R4I7XGGMG
z&Bvn;T4ghLY_lPoE=FBdN1Mh2q4qB%JCTq;TmcmLn(|DZX7O8-U;?7|JmVHy{0v-&
zEaLE9(Ic!Zce>H-&rEjaVo=D>&rwZca<Q`bTg~W-H?*I4=Gj65--a8=Yl=wp`z`(M
zWPey8PO8E@@gWZbjStdxDAV_%Dgg(Xe(FjTUdp{4gt`iQ=U<aQl7F3UUU))FurFd8
ztVB<a`61IQB}kVIw7SYO8*J{r!xRT0CAN`%(H4#@VEqoniHX-R)@7k81I6?P8X
zmlEl{{Y>@%01kx05_LOI$*eDCpa>zRPU{m7FshSF^i>rbTA$(_ABtax*(0~~$0%o%
zC9*HH#?()!3fXs*UhMr!HejGPT|823{w^kakJpp5i64k2pMO;*nv%AVBs((GS!ZpI
z9u~BeAPw@E<Q+{e<-wo<`6C`Uc@vds%4OIg9E7y@%goosCtu$bQdu8@yZXin+?eJI
zV7A&<F>4ch0c}7`*=9)|g?sCg+@G+w>1$8aiY}@D))zQuL%nqh*swldlR7<>QCMzd
z?g?mOn$u7_p*)uY;=lp=9C;Yg5joFm`CK(FBd59?*wo_l`=Kj?El0B5LJl#s!3?-|
z;zSY@mQx!|I;hicD%1%B@nK=>p~Y7xv4jaY6O6SNyC8ZQfO9F2-jes)#ZU1Pvvj)*
zf!*{OEV%CnH@bF%xAoZ;12s{lfs`(9a+UYpc-XB51d%vpO`L!!!alrsW7^-LjiF5k
zNmGttDCj%2^)8;qjm;>@&L^|uZN`9_$tHr8cv7KrpgoN|g{GhhB+e>^M5PY)3`So$
z-z<$eicLxOG5K)SG5Y)wI&)ybL;p|>z6E-m^Yy0*gD9rmXRHtNfG&?VPY&R;NhT*U
zJJmoJlgjT-C$RRTC!$|OWfOm95GhQB<f$JH3GU`I<`sty@kzUEoZiR`5@ioCTSnTr
z0V)KQ7dkDQ=CMpQ8UgMHx7Zi}LZrfCxwc|9X?k&nBJ6BI`;GSCj$Czc7Wn5FiRPK0
zQ9|`-+pw*?s?7cR<#0-TYu(puM0!o5A&5d_<SFi4eFbvP75eT_NVl2n;(e$|Yb(dW
zUcvPNG)qoJxlg=XqZ%u1){P&Q<CpbIcTUhi7xW-Ki?SRxLwrpyGKBh*1Kx+i=Wud6
zVjvm96Hh#Dm+GN=YzQA69EQO>JGbA5xrF;JW*QapbVGTb2FXmz{IH6)D4b;wCmgNR
zne5g|$gHcv6Jda$Hs!|;VBMG1JA0{>tA|5p<<X9n*|+AAU{f}Qns^I9{`6SVSJ2Po
z{wW+~ip%&kZHQJdr+eU<09>8P$g;?OJPqF-nj|+7?dYqDzqwkADNjFA2-E4x^!>$c
zlT#LoRn?V@oZIN<3eh#Tci^?)PNI0yP8+3TewnJ}>Cr!J`UO5vKMJTY$<Dl6KVW;P
zr$o6gGIxMuNvGX1a~Ro+p>7)G>+*b{fdrU3<mUrtqKbiHBQssd_pIZaGBVX9cYA;1
z<)Fdfs)Ts3wqh%`*HE~jHB&Z1URK9;sCKB$CZg!`>RVodO$@Xp7L6aJI*v@WX@17F
z^FHU<HBC*v`7w@TP`^pJ`#HkV6jP!sULd^n4HJ4|T|s``(HOgu>{}5*j;X`;yu81^
zJLLXArB=+9vaZhqW{`VA>*1WM{!TGHk_}Vdtpi-(*whTu`_lD{%6mCTz@^Th1k>hA
zj&L;ER+X0q9Vqs*dbkPtpHlnL))nzcRh`#K2ERsvZIBX9O@yG)@{INNDStRH9CMga
z88G!j)Yf1K>LAijF@Irb5nm)6OATT@l@D^Cegu|4lSbj+u@PMkc8m$ggJEpITi_G#
z7++Qo#APB5G$cIC1IS7K{K_)%ykD-|)^z@?Y2!#>)d(~kl3*0lK$S|3j647j5k+A?
z8}f(0(nsz`S%HM4FzySWIS<`KM=`Tn3@|J!<#<19D$9_P*qLfaU*<GG5usjKOwC?2
zx0zMbs(*}vGN}(Nh7-z4kD13(eW^&pqV#u7Hp-?dra;j!ArbPK930YCj>*7%*MG*`
zo9YJ<51RvIml-24gJBSMBd!Prtsz@H1S(WED8$jR03fvItw03%1?RXtTJ|Q)5zeKb
zK;Dg+8pMyt4~GFs)j-Wco{d_5#?#MoJHXQvv-^Cy=^uskGy~{g*|3x4tuYsh9wJ6x
ztKBaUuN_$k7j2_Z9~-<oyDL80P`z5(&J{rR6*r>|ZAJ0n-TBN_W4ICS&^%8nnz|C@
znRY;P@^jq2Am7@Du3Ar>O`(03T`xZ0hfb(3aL_hwQP?c0%pibQ`ap~c<K>BiS+yU(
zxU84=luV~QGg+3FoKfjWUP_LD2TCBK1NMM^@4P{dRj#Sh;H9{iZPB1Z+w2DN&SK^I
z!({K*s@sg>6eC#*$tPxBKMsS+<Z*jkjd-Br=r_YH#rOT719`<f6rNrV=OG0t;1Kd%
zFian=xDIq?iHG{bAp?Z(_Z4yoKcmkMBORpCY^FjQ>uIySb}or6)esCDEKDq$2Y!gp
zhDWIlRN$q-VWz^R>ghpvDS$JXp2`BKEX;a|2WQfRIh&=Rabhl~s-!5(CO>WQ9*^|2
z#f6`+FKu1;Ix{om#I*>Ym$mprFSiwZ*kiQ5s_YV0!_-?|myC7QhSKpcz)<S-CdOe%
z9JpGCXd^{${X%Dsd4dL*UPX`R;(%O^TS$>{k0%*&m3aEu2Im}_ad2((%)$6OOJ4wl
zA!%jk&YmrN`)qfFLhbnr%jgFp_HUd~nQ-KHhZ%i@S7E-kvOuDgVjSk(#B2O2Pozy}
zDejM@oB03wlkTSRcngE^CO^URN+81dzuZ3AyI+sxiyzD6M{m_`Y~sbCd@Od)B;ee?
z+NM1x_R~K8mrHE~;8$PfuUt<0=ReuvuV2;M^E|JCSO2;Qs5$?);Z^9roChy5;JF%l
z=GRBb77Tj)e0iApoUW1Ahv8o~+~x7-=RbeeIs4(Sxe(-Uv?C~zD$K}I)1^|@ldRZm
z4a1YZv-w&p*Yi@u4J{ISUT`(Geyo`KxT3{az}MD)QqC+deI_yXV`6}~2}IhFwGpnB
zEr7`UYYLcV_G2z~QkWfTR=${bNoH?v)BGMDHdUWMEy`etzqGg&>25{0`n9ZH8AlaZ
z4M+_-aDf`m3jvGCkUhU9o%0!4YH2hd{B?Tg?Q)Wj=Ea<&|JoKuCb8}mttJI~ogx?V
zC}{mm$-_ZzfRs%OBFwo=O?EvurQ)G&#5#(pYm`sOJ)>{0J-)o>US2=8{Nl$$%hjG;
z3K<MC6DyTG%FZ+L$YOGsauZVnLUEp70~cdwd91GbZsO0eq++AQ<a!W61^eSZ_3pKL
ztGpbG`j#@~K{rczGN3&nQA|#0e&y027S?<^&WHGBy9sb3LEDt<#m<(OHamo^<S@9k
ztbVI`hW-BILw*ctVxhfa6hTJ;IOT9-cyv?wfgkS1(?@<c#ndk_kB$5p;Tv;?FLBDt
zuNLAjKL{$43KZ~3QWX_%8xN)pIW$#JTH=yc7-qjl`=k1ql{tZAa7qeMuc#%-r+6(M
z$ka4`H2D<E*iGf52&yi*ajWt>Y*b}2Jpo%^lvU;G??{`XRmr|u*R7mscog;-0kpl)
zO$`9-sJi=;2C@w)#c{B^*7Q|uX7WtBM-@Cq_)^j#%Tv0n_v(yrdCuIUs*Vi^UGMBL
z_MxjA<%2}E%)^*lGP(>8d(pWRGYtbe7vqvyPg;B`Cu@vfcX^@SW@5%7C2z`lhF6UB
z^YBT!X=%y90yxS&NXuPi;0UZ=H``(<&KdG!$jVF77O<GOSm{t#HmQq}yPc0mt|QIQ
zCKYy{Jiv`qUR-@j+&DzTql6@)K6en2WTbg^4>GTPAJNfnEFwvWGtN-r2G@&vM#R^_
zt$A@#7han8<V=oRi#}5=xF^ygI3^X{YZuLWE)F5Mi;-OTd)@krr|m(-U5C%57}Y#&
zm!cbZ*YyEbQP87XvXj+auj+wFPgWWq4S-RZ)lEUC?8_WC6WtCGPhOlnb&GvOvDI93
zzHMgTo61CgPb||v4rDMH-HMg@_3iiB+ve>bC+Xp-$N+{ZE~8i_I-pzu&x*0p-yiEW
zbhW{E#k^%(MI!=v8qSku<)LOG1o8mf4~*L(^hC@P{-gQr4@Mb@n;MVX_LA}|EJgHz
zsGupY?<JBqTCAfD=joJj@)zA+M+y-`w_s(`0G@um9~7Xb4=~O7T$RQ%(QXt}g*2;2
z6qYK3(s%WtZOO&CCV80Bi4$5~C#8W7?z5{Q*C#U07fo7(=^lK#`E8*b)S3MDobcB7
zvvNyeFcaW~C5JT1PiiBMKD7u}0U(pW6#YlDt6Vf}SB}WhVr0~9j|Z?WsflD!r0GNS
zPtG)@dhv)SBJ?W+u%_g~i}P$UbUM~e$P*2`vov|;MT44VQ;y!rv#RdtKi;GHy<0r6
zt?tT-DHzB3R~T(b4yq5$`)@CpYfO&GJXyHnCv1O+k?f<O3&RRf`VyUTaX#&n2Ct%q
zD_!;Dr`1}UdebTR0E(HBORpD#=nX%JeuQ6ctA5C|5#vlS_FKK(OYAEqhtHnr5jnO)
z2ieOTZpBmu#^9O4BtJ_eOS*mGDc`HMRs%HUe8}o5;}~h(p(;Qq4FJ?%ueq@pq|12F
zl6;XVcKj##h~FtrYUvd}5Yn4Gx8HY};aoC?fR2iiWHt1<0D8UBrzw?lMMua2`B^D_
zMbfYeQ4t72{7BL%zNoAVc@yjLSM;~!XjL|ryAh*YS#ui(N@d+x_U|5@&XA-Ry=yV)
zIa03>dP>trL}eGa*f-v&Q8BEjsGC*_szcBcd#voMBc;TJKU;JNQDD9A?=NwyXSSgm
zB)M2~=KO}b4J7XPCt1aZP~7`xS>=9KE}CcANs=%xw6OKrXjv15*6Y^8lp<X*!Il)j
zH2~RIRz;vi(A@Toh-@rV+9ki$05`;(1JUv7WPP9QB%@ju*|nn}DEqgn9QK!OgIZnu
zIfX^?9EVbZMI}TO(=j3qqyMk9jJHC%2r^;cOsNWr;r658w0w6}0{Fu{6@i9;??eE*
zwGFcsZ5_I9uKHukB!*sQ>LPieh?M?#ACqnzFQm%IG=^TrB|Af&tWv#IWI<KO8Hm~2
z_GZ`uwFB3E?B21f-~Y4{8x4KcCb4<qhagT1YU;dvcg1gs34~w~X^HZvplb<y>WKl$
zzR<Eja+R?@!#mT<g$kS1O-kPyvS5403>^d=k8O#g;vkmAZ~j5wY!iWNN~~o!SqN``
zOo~<?g#Sox|L{@Y|Ne0gjb8^=`EM_i*v>X=%7}fH`XfwnER%l#8^=kzeB|aeOMz8p
zFo_z;wC_0>ZQZW!?R>5EnBKz`97{(Ehf@2+gP=>;t;x|lBt$PWxeN1AfqQ~(=LSVn
z+t0g+6p&o;F~vKD7rW^<8xusZPd33#@%#m*LRh24#cGcf_smZ@IbO&U7hrD5b>J<P
z)~MIE{jTKCaFNju^1y@<NziJY_M8T(e1DpfwpNP9z9YI{`BZs+x(;8@>o&!pvowVs
zbr_et92a%?{!M1Hjzs^;%5V=<*!TLE)r3JF<%^aFTNO&pDbg-Vo2zWo6Ivg%a{NrC
zg9wec*hrhV8}y<>UW|7kIw%2qXur?iR@;AkeS?*JsLFijx&NjFM@<PfHF{X1C2KTW
z{m_OL@vNlt^C&sZZS_l2JA1_K@+L8j3djCYYVJ{5uX{pd<p1Ei>;vh6rqLWTIo<tH
zW~tG(pyF|M46gJ+vhR#C=-@Y#4c()vNr(PPFeDt$=E~gOFRs{9%#~i?0{9*Q6$L4r
zpwI7s!QPA`xCm4Q;)8$w`)4GnjG6Aj!+w5#;-{M1mD?75XIVi=UCWl%>Da1hF?laO
zh{g6kZ>eXHGUJNTp0ang{`2d<d;XQh57Loy`)yN#qA0aI^rpY<g{53>xh!OBxiTh(
z;pBQb&SN$g=SN%~u&h(AHj`qimfc5y!mEYo{=?S<KUa%FaB?}(PU3Wi<slrA?<3)8
zA9S5DSj{$py+ny^#a0uu_d5J2ke%6GYv<=H$XZt7hrk&$yE2UH>8R-<m<Z{8JSMon
zCO8}@7>u(Jj?@rIz{ND29aC4D<TiVLT6NpN9)joGvqXj&c31l3CARDlEjqpGhK50Y
z(zQr1Dlqqhjb7zfl7)YkXO`pJJTIPGcnCB)N^MpHaKh4tfkuaxH}SOKR8kPRpCuTj
zq#^PU5}wjHGUBIL4pNA3cYIo5JKE|sd{R==J91on_Nz8-pVZ|5hF!$((kBlyuBU`d
zB-dp2XRG34k)oC0t+w$~#Sgv|DqFg(SPt9)*bKflhm}H(H}Yx6$ww4Oh;u=~R1Mq-
zXCt5_Ny_ms_M&j%cujFtcj&(4dYj$<Rwuji*w#rm>s^K2N?YYwk^KZuHp2azz1Mc0
zNH($qX$q&jc*3c=X+^*?U(j?`j#(Lle~2(?Ak6pr{k#=9MR#TS?$jKwT+vO<et#v)
zOa^6|6?+s^Bv)IpQK`c!@0ilCX=Idxg3Yz&46ZO=QP`Ki^kRevQV<oW&%a(MIxO(5
zAlg5nqHzQhQtR1}t`rvfBJnpU+`Es@$ETWUul1~vyR@UJ%cYFte$~UKud*he<^%T>
zd1hFjLDmnXa145yxSf{5Za<^88v&rI9P^euv7t^d#IoTDj&RFLwZUZUleTBB@X<R*
z0(NQfGBAk9yK*MBhNYOnkbu^UX*n>e^m20aFld!WFK^m|&LA?7*5+B0W#rji7Y0ja
zWm)%e2Vw-3W-VrltDi?KaB>Sk0SpU&YPz=z!DACMD=B?FQ;|$NlpvLatCD^i9fEtL
z6Vz`e+oV@v{mvBc9k$i}z;;9JWRoI0bfMG_0;NNXsbBF{Gcmy1^Pr#{+}-AvdTWYN
z_P*WQe_{>+x-)ak=u(f442W;v{Z)4|zkUJD{M_6iuf#?i5y(?18uImhU>ruFtLq>f
zzZVTZjoRO}6_4+We3<+&C1V#f1Ud#&<&oQ87q1-r2hT$_#ry<u6do-oe%iBBJvotp
zxKogs9H&980f^1A7Vs03<$yR0<^P47kRgA&vcvpN$vhqm6L*C?(GJTuZowqMiS<U>
zrCd)!r?9`g`(Ii@Vz-5FDA;#{oqy_u)wziK3`o@!E>-cJ+p%4Z^d4!=%36Ri%H%{r
zPgsPXZE@7v)-<dwIYH-=ZY~U+h#9Z7wC8ztc`PfbA#8EWWoT&fZB2WPk4ciP+WZWx
zR+;%8|0xj*3t!`+%1VLX<(eE_mX}&gv08|PH4L!ZkvsQ}Ib^!yD?&w?AKvqJM%e}A
zeD$epFxGqewdD4$q#G%X*g!$*XK7KPQ3^9F84mlCK1t1Cdwl;%K*Y8vr8q>nW0<Fh
z0w6yEYTR~XnasU|yW2!TRKlKlCj&wEfNOZi)$d^#(X>Mbx=dS<hect!4nI>uulGPl
zpBgeUWfdk*M@S}wCBQ7YqULen7dy0<1o<2pBwB{t6erU(dh@wBi7q!k%Ud2vDohF2
z*^|tuYsgES&9zqpR)3=CTm_5*iMJFfFIGH_T2OKZ;>OCcfswKS)v@B}uhyd1nA78C
z@E8m8aYfNiYE=*pPmazWPF*sqvzD(|iMhF(@R{5jwJCc&6X}>>sgdmm{`%Wk+~1>+
z?g~kd>d1F=_7EmsPw`<9Nnq47sw2#heA^VGY02^`Ik?a7PBWo%d3h@y5T2HPQgGyW
zODn|_JK9~t%$u|!B48y+oVn6RAxESqhv!k(|2I8?X)&=^IS1SILNYD4$^d!2=duLW
z)$GXu<|%z2QoJmDHc3h0atuS`a?0xt7lC$ylASrmy(-0Ki>fM{T@&AB_p&8cVsb)I
zvS*}<`6VrjU?`#*ddF}r?`J`YZL9KZfsr$*@0U`XDUl{uP<RAqzyL--xxbl0o@rgp
zf~KrAd%EEQviw=gaw9VJ2wKh=Bfgknc}f%|s6trgr#_A|A8{1{cW}ePnV&*ZT)68b
ztxT7A`+_f@ZaI%GIGRN2hors)f>KTy+hh~ydFgPr`9R1iDM6!3ZL1ICC&tG~!X#|`
z>7V{o?*9`INfB!o5oDr24w`j}r?<5TfB}5vklBmRl(PFl5mZ^?LPz}nVm5z>05+t(
z(=ryc2`4Y)%FEoG;!dQPD?8ic@tV}+j~yu{$pPvTdZ5IE3R=yKOh)8O<3kCHb>=SD
zftyKTia(dt`}H@~w-WP-olH1rp8Ve^G1O`+f-delT+;nLc$Y8$_SNz$FI_j$Or!$8
zJfj2JrCbL*x>R?X#4ZbWbBFV?EgItn4wmQ=SN2@$RY)bg=oTmV(uJ)wdJC<OK~fM)
zB}rWkZBFsSiEN_kx^3kjZ+*6v6H0aJ>bk6Af&O|Udac#RfE|?D5>iuLL;72Rt}NwN
z^+!nsvI0q|o8dun@lD?x5-R>dbQ#arD*%j;6bAUPPQhAP1ybzwusUf<u(h{-=cndp
zGU{)7)md5<`^hdVCy5SWPRztM5sK=v@<Lls$$D>PHf5f1&*^@UyJTAFQen!ib3est
zG8=qNx5>%RI@r165lE2|b6<EFJW0@$#8VATSwKE@!$Bq~z+f_0I5S|jR$~81gII&^
zw8j<RtMbznI#^h&O+P8%Os>^?phe~!;91{cd*UfV31f}ERrXcU;DF_?PaxjZ?3gyE
zh_)X&&qw3q<~n|&Fho&o+Mw9&>{3R2!~sVV%fF?Vp9#dKTzd^C3er=;jYshx)dn`3
z_W@2Bbv&K-0{2|>$vMRqKlz%P*HT`5^?#^?)D9KCo+koV9zva~!wd*$GMk<TU~v=$
zZliH0HaMY&#dNdcBk4(H{d{hhB5XB5A6fiUX+E_cZDq%t$c?{Z23KUA)!$<z_q)>z
z{A$|?ngRC!Nsn{Ko<;g0KZPzzE6t-+Pi#Hcc3;sp<f@;m5~ELa{uMFwmE?mT{+@A(
zVM>hn*y3|4Gp-ZHPo~ncLl3iWQ~fi~Mdq<EeJ=xqByClHSe=TDb{!o^qtA!C(EuKe
zimBEIr`6`e90$8pm0jID&~?k{Qf04QELVc3a&LN3`)-S&l7>3@iZ%s<4Q{g|rd7?o
zk}AOeCVPBd{DNf8vDN5<wHZ|i*e;bNpMmLc4-{J>>zSbL7qh?GdR$6gOr9sE19z8V
zfs5R|3`2|cM7{X_43YwQat9<E^Pm@0cz&znHER)T^em}2_lt$fi{0<p$wux}6_lp8
zSVz$)Ch*zF=}>wUcsPllcE_XQ<kek?&OnfD>0SRfTemfRpCJn<JptI^PKYcfy9CIQ
z<(|xI-<~TC8Ma=ndea;J{_p<lKmU1OncV#O=}(`3V1Z5^x-qP8GN7>X;-hdzy80O&
zwU(V;%*6Lys$HiFH~t`_zSLon94W*(`y_6h3`*+t0J3=p{uEPnGtQ^kE#v3-ge+`e
zkiCzv8Fda#60k;|eJH-M#J;H9`5$Hw(4~+M#%eB3Xfa`kS%r2nvZD+?)dL>U@i*Nx
z)e~(|ioV~!&`xL`=A8~NgqVadX;s#B*JKAeqoMvE>5BrcR{QUX;*7c<<pwjfG#1h9
zte7o@9Nj(?a#$Pz0D&{|!?7oKm8*BAh%c$=b3rLud~4|1`<)t}nI)~qtu;&Yy;d91
zAL#_$FT6#K!)b@irnE9ea(jf63BCCPDJt=fWRL<`7z%D2`YD<UX_qNCJHE<=LBSPp
z3R-e=E#6+H_EDWriyp1c#x75xK-V?XK1}u~O?w1zr)uldz;TeedCZPHi>&R&QT_hM
zAFsXQY7>rW8+T|k58wCUsCp_y`|J@5so7)du9AgU8&F6zjAEUZ`@glW$jC#vYvt%{
zm+X;$Yas$<Q4{}{>0cDN(KPlkl@Nh*s(uy-#y>Qk${?O5lNy7n@*P8y3c7ovdup?k
z2S2p?{nZcgWX_0btwpKv<c+hpH<g`6q+du){^UE3-iDxx;fBTdN=XFso=2WmyRu}1
z>uEebUYd-yJKTCG)KseU+;koD=ogS3O>5~*UGq3BbIKK%W@W8X9nNRs>;nd&hFY1n
zl|&{GvgZJt$v?IhUA7c;k5Fow*i%%VuW59ouGaL${S0@5IBp%=>dw=H&S5%ohYYR2
zll-c^2j_7HWpLJ&n9km#GLf668O$h3T<zQ!vbTpT+nu!eLUg~ksc)M06NRRFmV1X{
z84nK5&E8KYnf?s!cqChOnU+}DyBeb~O$DO;Xem?f8{U^#HKx~R+L|d@D86;G8)v5M
ztH`}X=)SU5frMS|>cNDJM17yRUuI{+)W)!TF+EpAD2p{t`+0`TxJM}wQE8UBqo2rv
z;$Ez#B{g+?o?SHn*cHo>2jCaG=~)^AVj=qU2QGtFnv<z<+dG?aFc)ijTyMno*+XWI
zT>1bnzi$fBEZmzyh3>~%c?J<?zfr8QO!{6{6?Y!BxeCpm%iSl|g6is0xhBkAjhu0*
z`vO$^=n)JPDVpjEqQEIz);DlW$u1u&w|7-p_zZCzb0GCX0LMcX-KZGgt=K(!Lb-ll
z#<LYNEqQrCjyCj9B2|j30Yf5@y?xzcax(-)V@YjP5^0gU%I!hc)o{o1;R^+QdPp9k
z!F7!|6j`{x0v5+!Z5SWlB^oBVojx&!_bB7DG{wuDq|4#QS42N3Ze=n#rGhDvvi~ek
z^xiG-=|BfId@N-1H50eldr|TP<pH8j(ZkOuYNXhLOdk(Mni8YXKLDS()KDW!na$W-
z{Ksv0c!i3jRj3a9^rsv3`az|%Qts(6fX>IOATiKBm6+nVkFU$f=oeD`bh#D@k-^}V
zg~VaJB}zP38Ndyod#aPVJTzH^Mn>YihBPf(%TZ+T6(*f2))0;-`#0#$p}5diF;wM7
zq3<ri4xFKWL}$hBLufnAQ_oFC!APF``=r|^H=izVa|+Oom=H=QJRVmo%aH>Kp3+tL
z9v##ncP`Lh@aq_+Mmvx<)TH00(3npRMdjqTaN0-xeW_WatzF7B0~a@s;;oYV925EY
z<f(@|%47@)AZMN|zP=+Tq5vo*?k-9{jh;+)r6oO;8@3F0jY@N>EPntL*W`H*j%ks_
z{S=K_bFd}kbBCI4v_?8`p$XgloYaX+`0yev5kILCx)$+J>GmG;rf{c-a#!R|$;-Ux
z1IOlK_Lcq$I7DPDrHTYr<1UTV+w5IQe%+Pl1FIhN4-xr+c^F}C$8aHBL=6XiF>g{E
zqf)tTW$di2xgO4~lHAM`zf^>!roUj`McQ=ppVAZks{sJCNRCe5D!mPn(pg6Vpp#&g
z#*;}*uIbejDt4gpLGvNWH}*_>6!XATzWUWnq#E->!#qEk??6*qA{1s^I$ECAb&C0D
z=>F>1*f`r$gFzs^a3VvtxsR`@Tef47aasy;2KhY4@L&8>eA{RsM7X_ZRjx&9CIw&M
zJ-=O>VHljpF~(+5tK7-D_ZUI!9eG&H-tNp(04(7f#B2#8W#e>XDr)GRwm4@*qg|Q`
z7UVXn2;9K<_#Pth_}%*B>l8P7l&=0XJJBKW?B<@G0=KJ4c!)vY3M+f4pbexCTaWNd
zcTl;IW{fCMKI6Hq>ee+L!g@Ym3#-8!VZ@dF^q1D^4f^Wh`?h+Xw3A1All7C=X5`0%
zN$41vqA2FBApK#uMocfB4Pak2s(c2rd3;eF!Tsp~<}5dS{gM$-qnXV@YyfCL3R`nm
zcw6k_SC5J^<)XwT9D@)V@l_DF2$C+`{^XN$Tf2OR#Hg%!JG3=wPPLA@6i)#wXv&Ms
z=q_INZM8$}Q(jOJo$^~ClMV@ThLB%q|D@$-s4Gt1Ko@U*D_6(iKuR9ra&j0(oVdD6
zyiY6aGDfWC0Y&lXQG&<OLpN#L@Cz94o<FaG3?=2S@l5r6M~)p(Gre**e~%DaM3T+u
zirX7HMGg0ly!_l+%oaV!l%BKtjLN6wIp}+|)ZVBs`)P0qYC#^vuHAQK2l7R&Q@Qkz
zP>Sf_kI|fxKm>NM{lVbtm6-IrX*fGHBnb58sD?^tsvzhgWam~3&bDGAAV}}sK)p%K
zp%`S}T(5_DA!F~tf84cF`x$g(ynh;$K3;xsA$kvY9WJ^3c$iw)@wIe=pI-LTN(Q@t
zp#_HXQo;&}jO1bsnw>VFGmj-);V8q+TErJTXz;{qK}UqhT6}?zhc!q~?woTwf6SK1
zz%4~l_fMb*jySi{qy42dxv5+cZ$H*?L`Pe=wpIsUD0m!`?|OX`GHPLGs-1fndK+f7
zr>1VQ81zkH8w%<Ubfm6Ld3Gjr1BVC)aq^CCxlu(3(p6k>P0fQP2!!k1AA3gmGU2(M
z8F28?^yPZ1&}s*DQ|>&*C0uW-wzH<!JFnr8Y)2Uxzi1^MZi;i!TL%J#xi*JEBPQ--
zeSSp>FXTP=Dko7WiE;(Ak5;)_PX!}}$i961PO%I*C@Ld|(sV<TWX7{tN?v+W2V6bS
zRjTVPJ!Srhu}*sz!z4;P@?|yzh7r+YruoHWO}`kGl7Vp8gR9izDNdvEir(G+mp`Hy
zDAK_c)vYMK2J7KLW1RQ?6><gzM>5lT;K03j<jgU-VP=PPc(jxuBUS>bFpi?s-XMMW
zM~OfF7-y5fugMbigNOBeb&5~DU5V2Q9@JqUUVsf8V1O3syl+~{@?6tS#%=1p(1`P(
ziSNAoKe}!VOtHV^2M*R|IwEW#dmE2GQ3fxlPJ2Xa>y2o~QeOGqT`vBY99B$wdw8l1
zT2~@Wi(<P;pf|^+x)k5d^FMz}ZDw&~`dl}Ql~^5nR`>Kzzp>Z!y>9v|G24ioN6*Sm
z_BZa1slJip^-s_L@>SoPn0tHH=QsrSq@25}jcc4Q#A@z~#)e`_dP@wxteS6ii^kl6
zCmr+VuK5j_YLs5Tbs77C$DND0E<r8r<8QFf-(10eU)eqVMw`CJe_*i{y{P9O00i-y
z!-Ax)O51v9DMVTqa$hmwJ_&{M*G(<k6{e1~S!TxF3LwR3s%tpiQ?Bl`yq1kk+9Ta;
zgN+K(mTW7B6m%f8o#JJOyWd>BwVU#*dvbQKN%`CHHh%&Z(l7AYmS<toyBK`$F+jsx
z+}svOx&H2%aOfiFO_`IMVfreX%1?Rk{+C(deUUbH-HUahy{nNV-`V|pQSRzMHd2;3
z^G5UyvY0jxN*d4CaeKi@e)qntjIS1dp0l!oeaD}mZslLTE-1S21CZ;OUjw@<eTf$1
zo=24};NY5~i2@`ws_^R#q`WV)^C{Z>q~IqG;Em6AR%6Rm=ui)Iq#e(i$kjoNVrYzn
zs^>EVjIO^Yg%vvZjC99~L^4Mns4742R_q2iWNnS1H0Gl5U@2$wK$gTaV)&f~h`{&%
zJm7ju3x@s=?lM!ExO7SMSHoHNBeCudBe#{w8LGKhV}8)Jc=Va%FSwbDJLaxK$?AzX
z6VR38ydO-FVjb+OQCqx;6CsYKVq7i-t59*Ubv*D{N*8#i#6LmT>6f6R0Dl778SrgI
zEt)v@gORJdwr+GP+J#I3{wS!nanHmra4Z-%#y3S(Vp0>EEZ+x;lD{<NOkpv%d8W>s
zQv$7<kluYR<{dnB`N3-4d+V)eAjwCm?kSR!WvopZduEBjoRyr*#?Le5THncKWnG>b
zsU5{QEJX<MlSX%gTkQNafQ~C&RVmoU7nR{LD_4V{g&~%|-px*&<}&!oAE6t7uJGoD
zahU$1UX6#R1cQ7&m9}u7!g-PQKuL}#DZ^jYypt#gh++|ve|-`{Wvep54ImAms%+08
zx2?t2a!sJqQJ*Tzvj{38kFV;u*iK1_2BS=csDN%;+8-1eU0m_5JGJsgpy-E|?&Px#
zJWY}9Tz>^6(0OX_e$bTD4hP|ZoV-2n%1Cxp4AOs^^!<BP?i^FcW)3|Wzt7&@*1a~^
zH!(>DN`HEQ|1{uP2@fAeSTLMesUg6>5hUMfa~g7Klb`*f(^f_vy`$md&?aZHaprqx
zI66FZh)52Y&!b;YGugR(QazA`7opwzyXB=&H<!|b!XkJ~_YIC@%>*b=MpoJLzcsF3
z@Y~Kr{3+vSId4zr^lVdb(%XH6)p;X&i4STVCf@E0c$9gji>ziPuKJ@Imz)fJ1=KGZ
zqfMFCFxox@%;~bz{R=qg(u!c~(ov@uXu{CvDT!G@91+o^r|+w1N;O!JvB@R}v*Q2|
zQl!A=B@4{yszWEC(7VMZlb4Kh&~yCAsnwWmG}QQ=aw3GX*!1VJJk{Y}WJs&K*nnf{
z&99u_pX3~sXCCh2@z35${Eyce>dL!6imKkA*yBQi#}-~BgGJeqK(BNYuOf)q@3U=~
z`oHfOi8s^Pk!^2$raYP?mr=>CDAV-hN<44liZ&-p<Ji>9-j+Yc#Up^ZHuh%ZQ&YL!
z4lM{gGB!6T%>jiGX;%^-P0^|jaaEYbnLQB|M!{?^mSQA9E}bDSr!l=O&PHsIKhw^+
zs$@zcs^eV2d(C~bPLzW{e!?elxppbCX7vT7;-yGOO(wr=2bmAV+--%O;Cy^~u~r?m
zEb?I90U_-5KgODq`{#nLrcBCTKt<nBl`^(;nub22R3_JEClLN`@;hy7x<`sW6^dvN
z>$EJ<Pg9oP@%=KMwd()yJN@B*nuIRD>*|gg?fGc~z#nuBq%Joz8C!o>;Ipufr!MAg
zp1%&&R;YIG8j|_<+kMv-V98JiH!C0JL09J|BF1#V|B0D$8HL~1$Md8yUnAZmER#`J
z?_8Rml=7n(&8i%LGXFG7OdR{?h1jz`x#|l8t!fIo&j^v;ZBd|-&jent86@!%<Mj3e
zR4u|wCH*kFg0VEF*xG?=0-Zr{pvD0Q(l<GSmwQ+45>SIymM(j`!4b;h&zL39|Iy^O
zL$r)=qiyJQ@m(M>HFD=lrv|R-j^*<6@+DrBUaTproXLS=M5z*7oe>uu9tt4Kohjl_
z4)JztO!Z6UER``5oP;KKHv*E0^k^7>WyxD;q<W7+%G|M0wb{Pv)xMPw#>=%fv6(%i
z#4;T(<GrXG7#zE7s`vpjl{vdu!)1x|7ApL^hw}5D{A6@{e?KZ6HAh2x;)rQyF7r97
z*mbi~`HTaYH!btCsfXZ@%6>W0l^q!`uacVk=A7KeapdQxjq456igYCSoO4<^?n;s$
zdabTK#efw#_q(hIfd%Qb^2L9eU%#F^Abu|_*wsBGawLV@4J5O(U<UC$+FiidafQUM
zS7k}MtOB@-b((Sgp`OL{$U<d>?kd|QcRO4D3*GG9%b_Wrd#N!q-u;U08RgUmPS1Ud
zn(}{+V1Shee3n<+=(J9hXZb02k#ByYFKc~u)%Or!a*#F|WxArsawx`EQ52carzj$S
z4AYlT_!ULp<Y3u5ff`S;!shIWV7Z;<j0^piWqAC<$+1v_>^t^rmJPTFQddwJ=iJ-M
zy<s=0n13n#96QbohmVhQ5UU>CNHolJBlQzeKw*FsIq+W_R3FgK^-+^G9zczV7}?<l
zrLEG^0~5m6o!q%4Y*TT|>}-L7hmAXqv$>DGI)DFnfBNS?pUSE#t3N;g`STCYW(oa&
ze``0>V4LJuF7Q|iZHrRaSp@2#@-SlBh4`V@>vh-BK!@{pc)op<k+`R~7`l`3SllV6
zkFNQ%v*}=SzSLr}<AadzrW0iNx$NAV?Z2bPAndpZI?!vT)JGJRo~zIfb8yHFM6y!;
z1HFWywZh2CGVn`tjxugK$1h{vW=uU2`tScEz6@{Y9HwR}<AkE{jQE;K{r1eMI{~7l
zXsj%`mZtKfD1Wb9QP_X{kI>8U75WJ%-k64-k0@jwNj4NH=z65YWd7TT{$oXkb`P4y
z7}`_nx6^f=&^+0wC=rd)UQt-02=xz1fjDY1r-h=lEx;Ug8o>RDHD*;~6{#3ZdEtv=
z`mx}shtBBfyUFDy2BH-?+z+6NJi&9p7-^4e+PTOqMWr7Qn>2*&{CZOyH80IW7)~+h
zzhpsj*IfOajIf}%7gv$PEq#^3U>t+-7`<=h;o#Bc!Xrp)-$_ro%R8Yu<iWU{i3x>1
zi-N>^S<yu=%~g=TV?nWQbq)m8y<(zLzv_rSOZsgd2$*n!d(sA~JTVfcG-?)bxe{*^
z%y6k~EdMR_iiPO>)Xh+PFUm`eNBFG@H{~c(MiB)%Z3p=`#ZJJ2iGJfZved-olWHgA
z<8m`FDo^V}V;wab^rk1UN!V3BzmMRC(Kdk2oy;DK_=2^ViABLj9p|e23HlHDtFJ|L
z8*)4KgBz>hFh#tcKNm52TJ3AI;NI3Lh1Ff!dg51S6?sYV=+L}-^#Ph{GQ5BVYo@lG
zHdTDb_{u?E2&sKn1C6X^;_Zl$0QMTEfZ57kRq*N2Wu3O!YbUPW!{~}3r&wz>tw8D8
zlt?N7nI6*l`1gH$=4QJP*T?rCa&eU8{imHAJ?z>zYochdQ}Q&gs*}B1i#gJpkebg}
ziCN^-oE!Jyx*^&FgFR)+dehJ~m+yw6D~S@`=0KTTIi`qAhSlZ?oD9>Na1nFbkTkYS
zHR9!~2C>n&?;j;K(Tzw7^DL^!ZWnTI>!a)khUn}Ay(rlYErYWV59Fh&4}UukDVizs
z7+e<f-L(zQmm}&H-E?jR$S8LARb}DQ$0}Wf*FxEKpaebFG13Pmy^~gEpYgIEkYSe(
zsM3R4!S#v|VQ%G?+Ugn^d%IjKlNNS1wyUK*!6Gg>UC*}SfB1s5OMYR_uJSG9fg+DX
zA%2y6XVU33;H*S3TC0T!iQw?1>nw<wXt8zGOP5uA#H+ASEJz107598h2+RV(Osmf-
z_O<NI<%U_}`*eyYzBkuUMvuIw*zssUQ<5pP>Z_I5llFkq!{9aWoH$U3goihIF);B!
zbY@v=Q4X-2iw$`!TDy$LG{kQ=am)vbtX8XUFXbAGA27na?jD?jsBz-F*S+PTLX96;
zKgBjaOaAf4^pxb@lK<>)l~ENHj_~ls-SGr(ZIp}0LbN3{%dhy$x5+8D!!ViAY)@BL
zTkQr}Fr=9s{4p4TIO%IX$T|PN0CO2_bc-@`^HGaSD755-fNfzu#!(IzMwoy<xbWzW
zvKkoXu2j4qT5LTz&*UnJrzGX+HA8uGkyz>A3WL=kH^irlwNJ8=jjABur*x8=MFq)M
z(bY;EzK!-^w9Ot{m(fii)`qlpSH^eT$4TlaVYz?gfS|Tfyu&w{J_Sj~{S_6;pNwRG
zjbj#aA1t@>;u&WP(A2$pqH{4d*%RvzCRg6;Rjwzv5?LmCf**m|5?`16igVNJ7TqkC
zV;<py41ZxSbTW>Cqwi$}He&LD)ErNZr`Vf3H-U$d?CcffV!va;MX%(z7BPz~w{PqB
zq(LOWHYxOy$v-bo)|J%Elg56UI=Q7}G#+5(B9nA3LKUMpdM9IVT3R!mNKv%nte7YB
z_D_zqv@KwyM}ywt|1<g5M*{8$edV)`qp6PN6)_Jj?q>e_aaM1l4|bQjaH-+Z+E2MO
zy%$oS+*vRBOTE}GCtFp&+iYmgni#_zJ=Uu(C{ok!OR>`MEb1OW*rP7F7jeh@cHjMF
zB_BzxrMqlS?%imsCWiS(x?bVyp9iR`<u$A5hVb24zU>F3T_~44<#xKpnklZ!DNKu;
zIG(ASY<8>|V;NT<Y~i4Qyu8y~V*U`@z3B_>Jl%Z+-X*m)Gpcdts^-K7x#SV$&ys5~
zS$JZ0*Kq?nM7%Q30VWxgJv54UF+I=;cFwc-ql>wiw>w!2Hprj+KRqa>oX~{b+l8UM
z5!9{pT}YCgh}u@)0d)J;>vGx1JK#lZ{+%B|fb1$AKd%ZFtl(Q&eB(-S&y<SHe6LED
z-}zeNoc7C8EaXo67X_>Dt~l7Lc-_<r*7sF>%`ZVQ*1Nz-A?D3qHEh1aaqjyl$~k<I
z-tzBjBjKfeYCWL_{llpo@K9vF0Hiy)f!XS9Bx2mdV|g!dB2H!QQWdj<9H;c^0M6Df
zYW}ptN%yABeDmkDAhVNvD7Ackh-G3?xb-PDlgxgQ{<N&lI!EnNMC{#kT+FBP&KuDb
zy5Ha!Wfy?pP_^}4Bq$#5>G?ARd!8~*_;})Uz9~vH(gbw9XW3*`3UNbycKe~4vdGdD
zkP~^?`UFpI_PR5+m7SBIqkD#njbrb`pod8mz!3IccWv-VotbOYO;fITx;@T#5KeD%
zhxeq7-WQL!dV*Qtp@6BW&OH3P4#Qp=EjPd3%9Iu_T6~9yf~VHrM9{$)A3fbKeBM_r
zp=LRamgi@A9l$0^AF@S#*6g%-)k=c+XIq&XlhSzfRfM`oa3)@4Ju?r_iO_BqkyPYa
z$u#z)ccaiRC?)=PfRMmP|LfPJWr7^X4VCkDUCrT(?PTwTJJ2TJDQpVMjKr9M*`$CQ
zjmVJ++bFa2zbQOSd{_7JN=CGj8So#)L^)+QJjprjzLWSx9t1ylTHd)o7-u^Zi76gr
zKW+_aj6dmJ@FFA2UD}6<4D>QyMObTb<wQt+R;H(fZ-mFNO^qH!f|2yS)Qadzz?5OK
z7htGa*48|)!cPfu7<pF0Hn%<Ybd8%N29%UuxJ_waSKI@aS<TUqw@YptkA^F2_l5b&
z<O!Y(9mSOHy=I^^B>IK9!zYS2-HNG79t}zR<BI7(7{5z)*Fzs>Z(B8D6}=-sXKiq%
z12A%m$uA7>s9edOT+1kK?2)&vs+u&hD(4l{@1cFMzr~juc_=32c2xvYC|<0^H9w!?
zH5)PVBJUp*B%5hWIV=V1XL9G|w+{?Tk@Slay#;;lF~?jMhs>p6*Nj#!oI?7~!-lLW
zsy0JlGKoWE9x+u)PP3aTmZBB4KM6<i_qgTQ)VW%&nTvMuE~d$C#n|yd4)Z~){ZsAm
zS;Z__{VAg0BLRcMN1tSTGZz6}=dz}tO@%cmeg^$jNdm3rA`aq4ggCi_=iFws9fr3`
z)GzJ%j7C=uJe~j`{BN4@Z$_OoFOdY1DX!eJ<Ohn$IiN*JQ}$-ZD+ZKj(tRrrOe!AW
z39khYF+)v%arY{_{iI7E9h}21A{6c{Fd<|wl6y|D17{7O`b5vzL*sqkmSI+O&kiA`
zq@kzo{&l~G=B!F|v`yZg^t}AT`#~7Dysl-_T&%0KO|@Fe^drX4sGGb_H3cG(&{^WR
zfJCHETv$pk5{a-SS`x^7On$PD0bU5{=~9!OwMcg^zYuN@1!J_9fnPyHU4^yi?%br*
z1s7*_f}}AD7F6XdW+&<Bot1n&GU_G{kP?6G!&c}Nn`A$0a@bGwf0MBnz7rwX`Z#EK
zEj}*GGkR5Pgi)A%_UM+6w{6RZ(;Z{X$PydWyDKosQ75ogo{`=v6wAo3i;w|G>x$+c
zreaSG=2OQ2)e3g&Ma3enLaevEs8C-(I%E*NCP>(eQ^=$ZpSecuCR;5<w&a?;=^fnN
zlg-XUQ%yy8vOa@<W?+`MLTChpTLv`F6t|9C6pB@w%EZTx%Uh~($6O^z)%KFliPt>Q
zXL8U~w-`C@rtz}Y6rs^o9S=WbUJfR0Vq6Bd997qI<Gs1$-G`Y<1w-s~k+n^*Z%-Uj
zUYuKCXjk7`N&4$3JqqT#jjAfEd?gGiI?zvQXJn(!eP(E0aF59tNu+`bwGp=^0cFn_
zq~61-)0$bXF1VVmIrB>qHs+;cmr-PmcH}{;iTdqQ9u3cOU~k7P3wt=IEppKxeo>%?
zr!z7C!yC-%!`ang+7N<EQkK!cZzJLcSOFM42H@CAkDWgiMxUmwZpgH2bo=UI2rb!d
z>{3?Pivr-dvgn0WGFI!QI(*%JmR0I%WNS9lDSmF!@IQ5mR;7J_b4CWOcem8QtUN4}
z{Cs?TczGb4AUqd@W%^n{c=6W=FZ;r?`AU6fFkpm^#fgMOOfgpu)cjS3BoD>3x=o2t
zt1(XvAeb8o*{d995+Z}C{+z~j$Se`<US|^!Nh2S`Vxgos4Bq<WM3{q|lXF4KgpQAi
znQPYM_A2~8ayKC>g7%UR20_<RbViH<?^R87ez0N{n33V+-oH|K56p!%n8ZFCZ_20s
zepi+B{sF(jC@+@lw0;^v!zHeXb~s03vp5ceD3o0rXnBj@JT*&|o4tc~rKApIhE8y>
zIZQ&2QA4Q)mAiwdmE}3XSz$CzdhG+$Bg^$5<SPIME$-4tV?{wo62@yZnrzgpD5so-
znv3>Dbdo>@TKR1q-x<n@K%{Q6zs^elWZ9x7_^##BBSel3+%YCc#$*_-&b*DcDjxEl
zo{TGyL?h?BHfd$yH7IDyjZV|5DWk<v*%!sB7cph*kZdElPQ~=-`W%@SzRwP*;<L9C
z$wF*(>!|*OZ`FX={3o(uHjIANd1SHQCcefqT6{47uH02^L4hMbnNk<t%0xY0_E=)r
z#bk~`FAS`la=9)ynt9VYG&ah%surQ5FmP05Z^Ci!E;_-6>i^PPR_1(0xu!nF&{fi7
zS2izopxbICKW!&Pb{{lO^@)j?I6ZMPm)D_27^M@{65u9HNw<SV?AR|Xu4i(f-Rbbh
zAl&1;S$iOGdGu9VoXUrtC6#=|$geHxvo-HbU^&HA@l=)mLF-;e*lHsd{9lAzS2ouS
z$wD4@|Ek7ywesAkl%w)ne3+DnG{(C+gnRMBV9Z!nN3etoX}LcP*jC8T4$u%i^i&dA
zVB7-N*mM6i1t;qt65q10dLI~ZMxM&K0&q6H_ukT?(!t|MXCo*R4a~(>mZ=aP;_=f!
zUx^aKTuu}@81T{??)6F8h)l5p7xLXvqd|Fd9!3if@qLQE^rbQ`5CK*t1w!vb`DkI8
zFI5SDi+w!(L<%0ugJAsSA4yjG|L^br?!W%?pUHdxs=EKSyJEPuWfFd8>pu_4K0ngp
zN0PguZ9p_fcJjm7izkiavD##-DO#*Ggv&ziFWh2qWLfCr1!Wq)d$8)jlv@-qvypm;
zOd#43S9^bOatjn1ooQpxvu7xub2~z;v??OcF~ljibO=pxf2l7qlbPL>`Tqm(?e{<a
z7)ia}4I4nDX3|L?f<*yqj4z(Z;$%%4tNtqp6%6t4g{U~WwEVnjS<*La+4q}fRnqc&
z#b|_h?OY-@Fej;}LSZlnBnTMQdvBIgL&Jqgz<_$V+Bv(B`48&UoUJdd)aB0YVkrx$
z$=gt1)eTo3Wf%|Hbq6$${(4IMa3GTyNLnIk9jRHiY#JA)htxLAX2yUdEH^k@=$<%$
zL-&C4nBIjc22Lm6OWXC-b`ag)YTd0-QW$jq@%Y&aY;}!)x|$4x$&vtZ@4v0yjZ&EW
z<|Q0BcI)|Q;<P%l@MUr%qSsHU@{zcY<=r;vsm*=E1rO=F=fftSd)DYb9w2IoG>S`V
zf_M?@m&(MhLT>iBm}`LyZu-i6ve=5vJMVN0nt=`i;Y)>E^8g0ltDjh+H6F>(fV}1L
z#Nmq{T%}>XHw$s%_}NrQ^X9a}O!`PUr}!)4_o1k%Y=CjAM>tzj>;tCl+>dzYj&A-0
zds|@chd1>74=%;Jq=%>Q?r=4O^8M!hDSYutiy-db9l5Z)7V1pgm6jktx}mN+A`v=)
zKap8YelpttuZ}cMPHa9HtFFJdMVOT@$b!~7c-dKu!blrX6biFD%WxPqJtJO;7i5Ml
z7a2~**GPqZwiHu|-n++8EUmb@U5gA>zHy0Oc#k4$gtuwFkR5AwrBUMYN3`cW_uWgE
zd?Uxw9UK7QFsU}9%?QS-Z{eq(D`wjt;$$8};azgE;8A=)Km<+@?)JXi492s<%|=Wf
zKae*6@rOtEdPp<&){6%Ew3^=sJOB=Dc_+ktDIdx(4&Tal;E6t>+;Ss2HmTwH$bOW&
z{DWJuF(C-jYE>3|6PomDB~sX(OnECSzdYzlMC~z)V`cfA=2lyrxi~6eZ{XkVUqD(f
zqOud_@wDfvEDzf9C}e2%>?emQQsR`FJ$X9xElibKd2ujnRj?jF|76?>ft&AD=7&gF
z!ib@;?@v7X_Jx`1^+Spc(3sC|d}DDp3B=rIz3b{gEg@`2o?C&@IQKcx&vGj{eS9DK
z@baH;#mnSiT!tnL^2aiY4{0OIt1TlOJie7~=CsU}!?qS}#DN@wmfV@uOnW(n>xf*4
zRy&`omFNL^BAzEAp_iANiLSDzzd=831*E<$_V<|i<rB{2kN^rU0n{{?MHliNe=_7}
zohzd~vFuiZ3?!iktsgppUOvZC<}qQyZ{<tK5lMOX{C8ktmG{kxf=G!KgQPE}A{Xe^
zW^f-4N4^<d|Man5ZyV7gJGJ|odmk?E+O81k*iLPx2yB)zBLN6t!A_~%hAXXaA4&fP
zrRMby<!i3RP6Suc|9~&|A#MKhH5X#$2vLa%d~Z)93j6k24u#5HP1az)*KW|wR_@7f
z$>jcY%JQs5maKQ*ZPJ`t^|wThIs8Kxlh6^<o)lNM)UXQA0&J{quN_RdCQ|1w?)h%t
zXW-?NzsyQ*xdrb1So-P>`mY{yae@hyT4%$+K-DW+hz)HNer1(uw7t6&<81s$cvoMV
zmxbpdMO^NaAYk0U|H2=;@*oiF`8bqy6Aw{t$qy)yy+T*}&z25F8hIMwi+63m%HEXM
z%u{605vI?s-NJIb+OcaWSFxB($g7T<LvhQ*o1!p$yQODTRTO_$Y%TS{9tN>h<h0^a
zy-kd;WQw(|0??jtSr*>kCR^jjy~s~8n~NR6oVStRS@a&tvjx$tsnu&v=F#lNjCT-i
zQ@P#FVjA9KFQSh=&g9x;@};)fiv9vKuRSp>j}&8e2e^wAG9BGxKhEntA@{1%y=efq
zb~Rfk+0T%tw2(XhQKA`Kjq$dkWjMF_nN0TyFZ;*81!()hw)>~lPX7XzhS8H1=24jR
z<5t^rvN`3MG?h7O+h8s4d<xSH&%N!+fNK0DLQE<2DwDU_EXf^(y06ug{stD7uX<_&
zHRm!l+4Za$*p2M%RbY&(n^FnmmK*W@;OsPiuV;Gc*Pj%TT2-08`^Y}f1}Scd+*#=h
zBi%5e$4us{KQ3PR=&8F+truL)AL90^ZDr0Umj}FvdZ49Vu9N*Ocv0$YyOl?}gE_qM
zjku~eBz507<jWGDg*w<Aa57(HKG)auR4V&ad2{#KToa77R~}si`GZ5Le!udaeQMRP
zU_BSpn!FvevewS=Mg%O}&!I2)Vx);4zep`a?T$wkJs=NX<>VgM_KZRA`-mF`HfO1y
z3z8=cJ&1VN%eBZMO_%MV#&r8B36aL+rZS3g--x>q&FScOJGKb5UhfhO9Q^G}2KnG@
z-x`XQAZ$+Z9<Rs*fZ*@LRY#}VW04VrGtm3J$?L96p{%AjHj1xocT6(7D=+oC8h+?a
zOMKryf`MOtcdlY&xyFdrF8Va@%Ewu6MYbd#I9v1nUEK(8B6HDA@PJfPnzcL4r&kBD
z@7kwEtA1Fh<n{fCEyY(`&Q)-4!0bE#3tz7)G2`evV6>+c5A;LROpXs@c{?#BfU>7V
z55ZTujwgmoG&Vms>?~{1)0^IYQ&uPpYgt;5%qu^CEcc<Js`!E`!uB?7&>CDO+>%$B
z`jnhYr+vKjR$Yf%uYj|emit)Ct{tPJ=ns5lxlV+<;zEqiz2N&p*eTq43l-iUppEqX
ze*pof1u|;=Dg!vGK9$;Ht;Ac*OO}s3T>W2EY!EYOrOtOe3Cpw=ORJr0@<<o@x=1QY
z+-FTrB_f~ug8AcWRg*OLZy-avd=2Z0$oKlaIaU1G6&Tp-BazSN;a?Gsa=}KB8*;Js
z9r>y_+qgnzKu*wq4?jB{p#}Tz9VLf3n@1oml@WQ`%w>pj`I*-K!<ByEjQSAL)ZV1#
z94dc19y1>q)Thmt-PQcN;rkySNbLGGU#WzcIZMXzqfTzI=sc<HUs`A5b1cQE&hYU|
z0WC3s?{wPRV1i^o-N_}~AL5N{3YVKKvU*A|XE5UMh=RAr1WW*o?F<MkT~RX+P!B$$
zfz+O*MggM2j7*AH%R-w$ARELTDo`r7pavH#$>rn;eQ>KpT63|C&h!kpw?7ac`!*02
zyS$O-TT+ziQ;%tO-9y8=|KnnDG_@H|XSw%XDL*WZKIL8zAidHU5t}@=4Rh(pd>M6Q
zlbA3}>6~ao*XAM+kNki_A<;&bP?o4q`kof6sYuQ5EX8zJ{e?793jb=rCx+&3i4RMb
z;zo=(%>vrOqOmYcP&eu=gkg44v9>sxBmHhuvb&_ewD9riZ7XxmAN=Z37a#o}`NkZ8
z$YM(|3Bx_B7f*=8%#&|7+>(SC5=(AT9U~!d$i{GA1A+qmkSo;gz-U}lh>pTs>7i@f
zSfz}e6gI|z4kp<CwU$rZB~n>EyU}DKd2=dl;S`!vrGvt2Ku&qu%3y2;ntXg|%rp&M
z73eCTWWFhSI9M0M*(rD7p=`N|;hSV9AHxILr|`fU3|fZM9ZeW?)0}K%*x}~KLM+s0
zD3?B!xL*6A#0%q5E_CP~{m>!-3JZk31jsd2Uh}l#_+pEq4MIDYM?6Mlq5|_YdAi>T
z?h4}z_y_AKY*7}b5XWeg1Im$giOIX*L>ZeYp^K4P4=~f4UHO-P`(t?jal1#h7pZ!*
z83P4lhWsc0t){@cY$yQI`0Nz)csQ;fC;@o~C^PZ)1sS7IFxQznLs|)BNUY2YowiB5
zNq)A)6Beg`eJBw0zM#Y~BnZAEF1^CSU>0MHw@!?f8EWue#<)3zA7w7}ojsw{AEB6+
zKn9PJ4ulE2HuDHuvaj01k5cA3qWp#|$29tf+``p(9Y3>@huwj%fzE84t53}7=8o$J
zewe{zj*Hw?Ce7>(;6bM`a-Jm6`t>&Btq%3<fzu>h!9_{dfL<ZM4qzC7w^R&~RHrUD
zgH^!+XjD!Fkp-Ab+)u!k7qPV%i9d*`lu`?k91j$1m@{do4Gj}>zGrhyDSpz%^{ixa
z;1iZ>N9I$BHx#@$_#l1dW~`MwmkhmeAFrT)1wi=l--Dz88ReHvxReX?k8&<>4_s(F
z?pv{KTfQ1lE5+2#SYARV!nL)nKD@!<%*14_e5Nx>{Mej3X_(S~vXoixtF~Zg<7Zoi
z3wgr*Uz>`qL7*D2S^+8e&``pMYl>q#x{pK|>PYez4q0eX48{5Yuu8$iTCQ_~=MuLN
zmpQps!EvYhRYi)$qp2Q<QlbnBN}OD`ejL{O<UYl^tv)OoIg7y_IYruia7~57Y|U|9
zZN-ybV<4c#fWH0yyLa?K^|&(?_#i;HOX~|rL}drey-Fivs>+cQ2G=;k@2R%E0Joa7
zS>tlkw6WBul8tUW!(IcJ^K_2UTkhT#)6$S$5Ch({jkKMM7A|yzDMZW>s-0e4Y*c>e
zwrl9~L>lF_ikfXg*t1b0m(35A2Is3G;w>huPxed^LXQA+t`@bsxhzf{t}+u(w=bSy
zn9_}_b~?>jSICNQVWnux)vT&4bJ*&p*Gw6qrF)T-65?99@L6&(Ub1jS=iz>pISPN*
zI5oIkh?(lZ@Rb9Q_0Ch!rZ_Q7SOv1(CV2*OjXn=`n+Mb(^OV=OSt2(A*Wc=!`=^Kd
zTm5S`Jm-eiGq2Dgo2j%~axuV1#I*nQX=)pJvI$GN+WE5m*Esq-i}%-vWk0_k9JkW3
zm%#?F<&YOMvbWldY{jZOru(74WG!91yEwCxOr2M9v&C%)^mL0*0V1<knW2=Wpg_O;
zuvLrzOJoZ=fEne`X5x8JiXr!n;$aSC<rVHMVD<#Vb+HUk@fKmI$yjL~WpWv0Up&~b
z>zRHS?mHz4=H_@FC8&=_TrEWa#j|K{wEb3eZXbmPn9BkMs><-fNC2e4Xh}HnqP4ZE
zcz__xKI-?`QlrJ7u%SPE=SkL_8;l(A0~N-Fluz<0d_pS->y+(}!cEQ5+ZTTKq0Q6#
zg`TWu+=+crRiTfln1(EPR)IO4h<+01Il;fUn0GZjIxo5WtcpuKnuBlA{?rPlBbIS*
z>4prXfx_#L*be(LgDt@DIgp_Ahe(gn0RoovPL)0~aC=mN@)>Zxk;zG7o>)~PevF}~
zIHGOvii?Tt_Mwks5JB%x8W`ijX4W(rDn|-G_n4<4KZ+g4-nRT9&ms|J`@&)-`bju6
zbOB1lUDEw!7}}S)`E)s)Il61<V}5+qSiuvmt^+*dLu*l3yT{cX?vYn(JsYsr{^~ts
z?#r8-HRJ}zjJsiQT$q2TmZOoyN~}^$()k*vEM#W$NjhI6nEwc!%l{vAz7FA8$b9mX
zbiUG2|3^IMYmV+eLg(u|-qAX}#S4KJ?vsx)qn-H=y`8j_bGuI(G0m>F81>SpvDt|6
zkuKi%6=j^klZL;v6r<EGLkl2k#1}J-H|<GT<@Q~Ma7O`;l>lzyfSpGj0DL3vNcRGj
z(}-tyke7;+QnQdI-_Jydu>L3cngZv%PO|d#<!8xqz<`Eg*Toh4(X*7E<o#z1OFsoy
z@T>-@?&u(_5wB9_wqdXVQw*n|9ANU>*YwKpt%P#%KB3q-mt66o&k2NXe)Gd^(V&(g
z!KFqludGViWJ9VvH7f?2nC$R{oB9To`BnBD(YFrhJs?kB*RjN9-M{I7418TLekpPQ
zmN;mB$t%h2K0O_I;Fy}ayI3~lR>!3_d*WS3WB=i`IgGCw>XW(9{}u1B7U(lkVv8Rl
z+DcI8e2eAFRA6rgUzE=0<K8l=boYfgYCx%U7DlP$Gah_APOfI=C^*#1L3i&(oA1E%
zm%nh%CTd#*j`)C&{j2pYllvchMVTv#SQt2X1WSzMcrVJ7O}(1%|B4<d{XcjNsuuj}
z{g1wu6IOr!cYplzpPP^Gz<qs7O>Kn~)$0#3MhyRLGW#S`fm%soj(~r^x`Sd2`y@V3
z$a>kH<Vt+gC&@*^U`vV{Dee{S#l=djF+S;7Ch4iI($lfz%&9HU+3SJ9qeyV~M>VUY
zGlxAx3~>op{wThtCH&yZ=U-aBO0w5;v5<K|p0tpIpTJ$<naF@fj*9c1RBRK>WaBDf
ze=Ua=;SDvus>1MhbcFOYh<(J58fHH{)yjQ1P(QjDSjyQfIkmgWy&eE1D0>pcPHcOF
ztj9<$!a#whKfQY)gMFUFmwprbWI2ZU^<R7F308mvrX?vk*s6F%x8Wg%)_#o;42875
zFM|gdI7qWiaw|gCWy`d`CyYAB2mxWJ`H1s9+k)|!p2>HfPJg2}?uiRCfFuPHx8Q?<
z`H$pS+5U!DD%{r(k~4CN=%v9f)3qr{4=|)=-d*E;!z!ZRo`e@{R4Q}^_T1aS<)6c@
z^L8}*j5)t1o{Bjv`@y8#+Xr@HX;VcH#vB%YZwhC#SI=$m<IQ2&@3c)5_s$#!{c<XQ
zs{`<hNhAATVCWIn-ZMcYh%fO44V%Q##;J_=74!ZzIb<5<u;~cIj~O^&`%e(Hr*jzf
zCnW3SS0I}X%S4B;5BM7+J_9`gg$EDwiY9+evg}tfiR9o32T3G#c#>Y+ZdKoFF#RQ#
z^a(zKC7Wtn=P>S<P}+T!6`{vZvTOV_M$Sz?z%an(gZsKBTAw+LZy+^E4pr-3(>NoZ
zoUHFVm>hj)>8rRng7QgxmSA!{KnJ(J7@u38e1gaf?@+%!RL%;PaCisJ7Y``z{z62h
z7cj(u6=dR9Sil57f^7N^9z8e<7+{dYXzMog>q{hfOVOPedgm3gUAb!-!5<s&L>Dl;
zwPBd$*)$d4BfEA7@XbW?w}1iW5+689roY6NuAibe271iOy@5K{R(`f}-x(j)KxP-c
zWbbr^@!svPVbo?VV0gDoYV(D!NA`Aa8}uNF=59!uZhgCJr42XmwaCtbgn$<eqsEF-
za2$tsJ=xpZphc%&P8jt47O=QTCyHlg$m?3bk7i~i?7A;}^(_WUWbbrzL{}l|Z!KW7
zch=A?h5BVE%N{V&WwItR-hnRLk`LT`VHxM%{X|+r^Wg@TGI-CZ2ltm!WFLEyUL-%u
z+MZ8SH}Db$yP`ig{X2KLh%el-v)E{AGL4^kG%=~Mgw>|~yFZn^^7EL7r(pCx>ZpHN
z4L$#=GrhIpQMJ?T&XR3+YS05a(O+4@STkQw-_Vbc-7@bz>h6<6oh9rwVy!I9y6S5f
z{ARG%WyKt#0tjL--dVz4^KQp|8Tv#-MFW@T62|+zXw!E(`~6FAD223y^}bE5nt14!
zFyPySw(r0E_b)8KDs-)uK99nX%`be*+4|PU{C#(|9#TCq8MkCRergW~09%mFOHW4d
z!AGpfHo>%{EQ3MtDflmZlQ;5!50-n|CI(MSm~x+6$8JC*SPD~;Mi>?tbRhU9UdKz=
z@zhku$vLfM=XZNLAHI$z?}qsjZ=VUd$f2KK`3wp;pgbqW#NS@|y8s=oWxu{QxCK|Z
z2D<v3shA>ZVq$uQ%YdotlDw@&iyXBjE1wsK|7}phc4XcrAIKGMgbVXJy*u$8qMLt(
zTY-CpYwik~XjWFZ7$|zh*du!Mq#F!6@f@#kLm<bSR_51aZ+?Z_q2v6X{Qfg7d>sCK
zbQRQQS2pq+M?HZRE&|V8C4KUtXI>^ufeJbT<e&O6lswbmNk^#3pz(A1o3A<Wt?apN
z#H7<wAtZWGK{r6=&SxQQGi*A@?0cqPjJTm~11R)VEo&H{W5zQaF^^^9S6H+Csanf3
zV&%7pp4>Go(8cZxThs4TAWC*jh)M=x@HLE3<?kFGayR`cR-iJhP@ln=j_J#=?nqVS
zdv8d5!|`B^0R#yuwNtOoHEdAB?L)XvteKpOS;PF$M#L2^t>IE^Tz-_F{ph+%{TP|a
z4?tGvBH&@iNX`KRYsxUuYP$Twv&CWu7xpqag}sI~@|?46eG;H9W1ayS+=g3^&Q;ln
zL(9jgStmT&M69I@eZdWUK)OLH-^Mi2p08n&xDs&WnRwHGpF$+JvsE`4ZSpZ>Ka|cO
zzM~Wu55EC<pan%~Ihv*CBN59_@?qRH7*sg60x#B7S6~gV^fvxe3ekohWYd-A(yFZM
zFBs+xEc=O^VxEUGU3dX516QA`)>9Nt%*<?H(YZ!Eap^W(mNT_vL8CX}1~#0LekZ|t
z4tdfWnD7eHJKLZGKMXxDgkvD`c|NJy;xaExBb~tkpJD?ujx0ez&iLf}d;=p+=U*yk
zN6+r2%PsgeZu)a4MngltKJPFx3@_R7P_l_8aKqNT#0mbw#w9zR+s5Q&btd1c4-pxv
zr)=1#yp(Wi_R3{8w30qKe2@V?1ILC;_#rdo%gFcOfYBiPeQS*8CYkRb`ZQJZO=<`k
zdX%>gAT76l3~QL9Kw+&1K283VWXesJ!$Afa%3dgL4R1&?=QV}b4fp3N5yA@;uEWah
zF%GXs?ELy;{=f1Nx$yU+EZ$vi4jZ4Ad)L1(<pSzmbdj7(AAF!sPAFuEKHpt_nfO^<
z=%xxx{}lh8`XEcifx2s2g>RhO%B2QVaj(Mbh_>&pWF25UsKVD`CQCj6cMY2)brm?Z
zz>^|Za^;BXu3}3YrgFu5a;c1z8WqQJcEWGnfN%`|QmS(D)cQ`v?8iUV*5Z>2J65ia
z-DPKp<(d=yE-j%LOp1K8o%r1IhVqe}vha)}a?NkRG`jvtACjHZeee7bf-<q6CnM~f
z0TqkHU|@Ax79X~7V42O_NQ-k<LJo`N=&Njx{VbxG-M<U_L<Dy#?5&!Hjs8B|m(Ro(
zCko6M+1s))=aF@SyPl2D0emEb+bD+OWY?H~tn6#2i~?J{YochN^fRi_mNN1-zKg#B
z+t-m8(p7wD8^Op$59B=HMJo&`;#L_Ck=dd2a593;lk+}zb+d$72HDOS9_=;lt%8fQ
zt7ywosOX9hQ_FxQT?^wq06GiS;#n5lb;7lFmz^izu;XzRbX0eJn!|zL4oFr!HRIN5
zI(SkkdnG52?~0ZTV(thaqy*&Pv#j__@ECNL!`*(}AA=g!N$u$5Cg;}fd%{t}!O#zD
z>MK&$=bph>MeQX+Km3``Id{rg1KfT4-Uk_ainp+#*>BUnL~If0kQ_^oV?@l+hjtIW
z3mpqb=V`ChpN8H5U=aR&PNH{?5E+kSFOw2i#fA0kPM@X~!*oA_%j=cRjVrLP=`Q?M
z_gfF*3(Dnn9(-!KpUxi_;j=^?w52bt@ZWc}g;SZiKGtWo5{UutdVu_{nyYB>247{J
zxWn!1o=l$iEzVM8?7yo(nj5_rjq4w==+5GzJE!M`^4KcsPI1Nk)D}m2<9<SJB^SAG
zvUDOCr_S8*Q_LDy*-tjh%7JYxvQ^)e_-mr0W1I55xu}?5xj4~(a{aF4VxoWY9XSv^
zJmzX4#=duH3MatH=THl{-k@nON@gOl>Rrjw!;7wWgn~6aFF3wwxTYA0>;;bMLE*KZ
z!|PoI@;ksWmm?q3^QZ?^-2#eM+C};X<eDPe!v1H7?s~2wHe|4Hr(9^|Y$k+b(Id~L
z#IDiA9$hF8PufX11LYasV`UvP3{QOh&P{U1q~CaZ`*kJsrqHc8_IlJ_f6M<@KJ-4j
zej7OmtfUDEH!=T+HMkZLE%^~eqgMK^gMF!QT*v!X*yd7Jy$-dd$U}8k!Ig(^9VzFi
z6vth7m7LL9{4E&DQPuyuG*`X8rER}!U?mbZ-z7J+!@gZkE<Js_*l)SD4MsbE`&kMZ
z)1jweKkA$PORGWuqhH{SrSGg2LhLBbuIS`Z^)d%ZD)!81S&-lsLqF!XTtOBZ{T^b1
zBPze>U{6JT06VZ!V~XnVl`gY$nG->qQQ=kMTWQ5=wd~s6h1W@2WON;z#MDOYkjyrG
z9g6MAgWn28t-a`04@th=;RQwM5hBuZ3*P8W$NV0@<|t58Y+kTQ<N;8(Yii1-=X{G{
znr--!!PLWqi4}*~N8$8K(T#E0Q@CE49r9;9YF1il{arZ^bASgS*pui=bMhqzasY(F
z$e&bl_Di^N?kZLxTFA;{AIBtjfZT=2XE0TBQZNwu*5rH(`XF2OQN^O~kb-Su+GE>K
z6%Kv5x1=s;M=D7S-F$OW<ycwx)s58gHL&~0__YNumzY$Ky(<~OpCv^xPUdcjn;&k|
zx*)g3NwS^I`U-{~J(T%f4fm$9AIhB@UAz@n=<Bl3s@%~hSkp<R0M~TqHZ!|BRXVqj
z$)%sz!9z~~kJE12tVeJ#DExLC|9z;++#3_lOV#~wZ%7%LyIln<w6o3FmVmo3iubgw
zE|!ZvJeFgsia9gHd7CSf-3_W(^i@pL;q!a`E5FFiMN-wf+&t1B9{s^Z*GAk>_%&%y
zaw2bEqEL3W%-O|sMpEWw=pj~#VdTYPwiLk^(k!o=@H$z)tyanV>DTXdyZc9B<#fxk
z=o$me4s$2Q_H*B*w+WA^+Z*w>XChJ1UHXkqPeqm=<Ny<YIq)I)#)PZ<>znAWlR2(z
z>dsY}$_`y@+33yEvPk*m8$VIf)E&`>_%s81iA=Q1$<t|!;$B7<W0|aX^3Tp<!JO~s
zYT0Q?=m|s(^7xOt1~SaG^4ntZ=Pro`!tx?9Y7FdovfG^DP(Vh}ul|-~*V<O>>!c22
zPi=F$W+s=7w(dG{C)b)R51n_RU^(y#p@)l8*Az-87whVgpozPVKt4q4cd>X8ss6kb
z5(XO2jn8s^<EFB2<$?aDs!DB0vzlO#wSX%bbA+5JzWL$f{i?k*8BS4l0*{LQ#-;b3
z9$^EhdOVRb{s{5#q0O>yc`1UL1$&@qJ<?T9-{{+!v+4ck6WmixQ;S)-=JtGMcNMnD
zl$8TA?Hipr3d!WI`SA~j2l9W0%IP->fcD>-@|)OBGHLywxOSKi@D20`!i6f*K}WRF
zzN?TPY);=Q+ZIyzKlRO<OZn|!Qd{HCxfA5CdB_WheXwU5nMUD9B=1KKSJ|ge@BJ|U
zS3d6t#s~@JC1T9(f+$xNzcrrJN@A6gjow|sf<yQtZRiG&^O1P(yF7QzXHKdd%aQV3
zbd}H+C`X#uyV#mNa&VWLOlZ-OW6itx1__W;0BM{?#|3u<+oT3Z1@Of@h@R9aS(!mq
zZ;TC2%UnA5@nt}^&wT>m-kfL+`VD6TzYYGVa$n0`M$#d<2`+-0*K_RUa==}1$%7y(
zpIR8%?0c790gO3S2zfgDdA8#n>AUO(Sdp_Sq@o>AG|?a4GSshjq8BZh-J9fF@*~;&
zPZM7h_(TAfJO#qBU#J-R*{*>d{O%kmP86T>*ta^9&r|f#7YQZ=04dj?P*WuI!Lo8M
zGj+zbaXt}4jlYF+!E@S$8nON^oJY*uFIor*avS~a1vm}&Qo?UQ(C2a>%v~4dp`)V_
zIgE6(F!%LFPJ&j+L8|l*Dp=nD<5ML!37{WfuaPgzsPLA*Tg;vfD0oX!!<0qf!N4e^
zsY{3Wl|hFH9VwKo|I(siKvJV$Yn)T$LRMx4w=nhui>_0mrW=7<m$j=w#?9T;gw#um
zTM`V3_8`>1<92@0c?R@3+q(NZQY&Rrq7So7cr5hJG9k3=iN-9kPsY$$$UK^mc1SIf
zdks9tI@w>bmVsSKJ{to%Hi-pU<6l~|7PQh*=sSZ`Gsv@&t0m>Bg}D4K`Nqs@ilY$U
zR3R;6)6AVkR;Ab*hhB<KSWh8FmCSXB;Lx`Ry-qtev3-|XCMHSAovnDw;NJpDyJwR+
z^$Aum3&<uE@}%*h&yMcgmSS3by16teA$AJc3(5ANSeX_Za7u=5(y|sy8_@>_HLBTr
zCbDBC=7Mw=o0Bm7%yl7#6S1vAZU}dqnfjWE=PUNrk$hn=EPW`1fq`kclDI4ej2}5A
zv$u7x)nw@DgRP^77x|E)i?;H}Z_L5lb-lAAtMWk!Acx;!{Yx|qgii3EZGKM9=mizp
zc$5JxcYSF7>k}0Z2+`q7Q+xJk=_#wg_`uT@<{i*24_qvSdsK$Z-L;SpV=%ovAs1X?
z<J6ro4-xG4OvBNN%n7I442B9K6`izaM8)18LXpgV8p-rWAIBd?@{o7sryM56cEpUU
zSTMZ5)!o?2{sZV{BvAF3;=Mk4Fzd9<5O5*~cc5#(R=62rOGy_1uOfx)byAgLf+M;B
z+!_S%8l5UJ2#iXv=o-Ovkj8_8CrYom_D3gANJS02b|?Z>0nUK)G#XNL*Rl=#T>S>V
zfrE&aISl+_Yz8xuXz#A1qPt9(D=_k%{>me~wZ!%7bD-G|Qep5nXUJau>?Np^SE8?n
zdx>kFS0(b~ip$bhv!I0urB!rLh*9q-MHKo~<xd0NxJvA?5_U;n3lji?IHE~@;=Zon
zBMSugqO;Lgx8}J(<TruCr9O){kqETztJ)+EF7uwY6*n_|fF%#8n&QeRqEU`rU+oro
z&(4?MkH(Gk6}|aJ;j}z^HTauRDTkgaINE6UtAKX}3_@xam3_9pt{Kx<i6d~7%wS)~
z9H>{s@n|;s()w^d<>)eG&oY^mXZUS6qeB0bUx6*!l^L0|vy&43vR~R)x1blhv3v!t
zk*^#a&b(CZ4*nK@F@qd}*qtkw`Mxr8;f`&A{uy$<OrHmQluh6i%$<{yx4{?nY$$d*
zXXHvX-bjrlM(S(YGJqOc3HsAQxJPA=tgq+|hN`B3=y>!AK9RI^%rnt(%Xw-u9Xc`l
zJxa-p?4`?gM&NuNsR6V@f$^Lh9lxq)0Y8PDw4?V{%t^{Qk2!7R@v!r067rjVIXN3Q
z@}X9typ+;sI+PU~qiok=?#Ew5E|B4K5-6fM7jT5LaglqAhhLR5T|C8^w^0th9}1IC
zGF`zaeGBAR4EPUon_1C9jncCOime7CtxsxyO$&0l6u8m4r)|SkFDaNArD_eRcADAT
z7ODS~h|mRwtFa0<e7*e!=N33AZc!TL;`UW-Yk!sJrWonq_I134%%V>-)`^^H8nZje
zuJEW{CLtpSrDy>J6O$-o7Aj=WxF4O&L(vFWqOq#xxTR{FGBH<PO6fGYnWe7^S17r6
zIC>cSSBJDqvg;=<Mm^oKOJ0YF#iw3uKXWH%EBo5|BpqX{AEzwX?+TbyHh%OxOzm!Z
z4XvYJ4rDB3U&0W$qL632j8Y{{V2zJwzXemd;nepiAkgcB1X+f7vuqbtrMxXcL<^6q
zJRHygT((7nr&b6F1=ms3PtjI#<`0rq=78%hG(LpS!x)S28fCVyoZ_$W;FZ+I!hz*l
z+IFJ0W%AP&g*%8AWUP`E-617yI(e{_VA4b_>v$1p?8A^BXz6V<X4bJ!vjyTEo#EBl
zV!jG}&D0&qMgl9klcpBQl~z&tTWGy^)%O$q1;bdy#vF0Hx1^B)66w24At)k?L@nsZ
zBQ3eaFFipG`dLl-ZZlf6rq2s9n~Ncas0y%DGTlY*zTJp5`o4~CTGF?@$qTzr#J(L`
zXdxC8mCf~uA7h`#V@m!FnWp|H7pL-xYM7}FR5_V@?|Y}?^{haA_&sw%H=WotEGk(i
zXPXI~N8=yE=Kz7XyP%9)D>HfIQFWrmZSnzLP-i>daSAV;kltQP84pq8TT=1GM&84|
z&r~A=?j%KYTrZYcI0C*9J}S~rGTLx6_A_+;7kB-q0-xS|+%<FKOSu>xZUmwcxscn3
z_jcDeU9~Tmz}Mf!f8p=tt0~^C{pOotMmg4<?3}p<7>j|AA_UmtUD$~)lA7+%a^!u}
zNqGcIhnFAOINJ$(%AvHaL@yxnsv%EF4=nv+<Z&^#WJk`8SRIP|AhU|Pyv+g9Xe-&5
z7YunP@7pU1E3%r(<0gVAXdpoMgJUs1-`NSz5_?4^S9)CG3hTt?NZR>0Bm8);H(hZc
zIt?B~!ij18jYKUkdsUr+<pY_c!Qu(|u;@3NtrA<fV=jGZ!EAO?>ty%;HfV5mVv;)Y
z%A3&H3@8!(37<os@}8n)o3up@66cZC`%do7?+$o+m;@j@j`~iSBdezIv_`~cw7D|Z
zpi(7bf#cH1R9~!l2kcBL@Cg>W6J7AcIH`S6PTR8_dqChwE6PN{iEBSO@8DIuB`*#L
z<TPtv2W#V+*A6rLJ0GTS<J-qOxn+_y$+w?|Er_(Ew4HGycH4mw+w{Gx$QkmWKwE)%
zB$Ii`WlUTry<t*zhBcyvKtKhEe2Sg;*3YT&#MtA)#7>kMXu%?jwlFEvterr1NYWFi
z4MqZoJF!{tH7)}5H2$>AJ#{9h@%aBpIHicGo_DD19E`!|_SY}5BBGr@&KY-<-{Y+$
z=Z90%GTapAMr8Plyzd&UpNZsV$A5LhDB-K+%P{=y`0JmWjCYPqqoa>B62;z0zA-ZN
z>?)Hoo0d$pqW?76SB`D1kfWYOcH?};sQ{)$K#_i2RM833=D0W!@ff*;=D;&C@X|vN
zMo*++X-@O|>%MaGoOvY|*on_y#HEa&wxc<fA0dF(?F}dI$xNZRe-ThzV%fWsTqQMU
zw8oWNIw_C1{wS}uMNZ{n`<IDsaX1z42REi=ABGVBLU1y`@0t41%j1nYXWS2!5z`_O
zL&fQJimi8IsHp!MS=F50E=zXyL~?+WPI*x#g#EN!y&Uf)gyD_~^hQQ~c7n?QFlJiH
zvGl*4@G4;@3}$|wvJ=RlhD;1aCzX|;|C&$2NK*iQin2!#Ht?flOy%-vWL@7hZg*XV
zEN5FZWlNXxPPpA-wqQQ<z}=mUJ&*k6Jo#x$vOeNKZT<73r^T6uR#G;6k?W07q>sXV
z_5s}laTNvy(Ebp(h!@S(7R!`VBZ-hYu|<MN0Wpd&KreC+azW+br<_dMI-Wt2!(X63
ziXFw^llBpVL%og0kZ@reMI<*osdAU~X#fWVw_QbEyt<=L`-ZE#1^@>H#DZ5iQ@0YM
zKkK}FAIm`0NzFXvzLcQQ_B<m!>y|vDREqv$tAo*7AksMm()5v?ft~acXZtd~^|g~*
z-FVgifeTb3c~E3&*Y`mCq6XTL->1#=ZZq=eroQkNwsUl6TqpGgBtPvzx{RFI!l27C
zo*({@$SW8ZoptbIF8P^C<UEVW&$~(?6OvD{FmQVB$jT|%0j4b_%X0NR#7eeJv!*M(
zAQJIWR$UFmekKQLfP#JnGgavgT98&wZX=I~uYBh>MhzG@wmqv8UZYT#+e$*aMZ_?q
z<}a=5_mjmJY0K1ZM^Ay;nCd8aInm#J3s4pGh$y^=x-@bEWpyqqlTnn71P++qrm4Va
z1Gf|V)+17LRBGO>e9dY%G><hzD!(z}_?O7<EddCz6%sz?a1d)mQTbJOZhr~A$dMd5
z4>PQ6T8c?I?e^jMg`Rnw8-FG}>Nfe=?vJ@|5PLndpkCFUl}8q`Gm!J35aX%r$A2Fl
zT4;{Eg$`LMj>?Pt_<|yYvun!C&&w5yCy}3C`m~zF${SfQo={kzgpoO!eIfG1>m+Y0
z1e_A3R^%7C366WYX+L%}^-~mV(Z(?;dB{@;_vSddVK7LMiO^WW2Lxw6J1X<$56EKZ
z@NrV6GK?B5!2`N;Le)Jgx@4E>+dq7hQY2AHxbN6GmC_v+5nVG=?Nq4^FabaYUj5%l
zaMQSkEr#G|Gs5))MlP497oqc2TPwMz`j3*&#eN5w!uL|51&TPAYd^jEQ{)bf{N$Y6
zfFlS&lx$t>XwMf<!XxJucET7~lOs)XaM&HwtO`0OGj}zT-=bX@ekviC=s0mk##8I|
z*OzSL2Q<jsMNu*D>bZhxDP>naBE7t~tnB2Dm52|;V+lD^$f(EyB!d%51w5RPPC{lb
zz8&8c-~Hc@-kv<6kah|(v(p}Y>zNhg;(1i^&7;X`U@@n-<vfN^bc&;Egdrry!YLhl
z+Lgmo2M?L8Ptjg;eP@tG|7kINMdAq1KL;ia8n9hdW-J?0C+P`fzrPm}R7yXzJX$dC
z+EV*o*T5&)&<<#k1k?c4@rT~}iJ*IzieD)H+!RN*E~B43ga2FRht|bWUN6gnbHk&X
zc{|xzQtoQ{gQ@0b&g0O??V9N7ZD7^UhQgIT>LdQ7=&H(m*YEYaOh%;r=$2EMJc|7`
zc;g^o3nLo-9c>}pP05~AZYE(bww%^Hm2`_81Xyz9#Yu8ml~)spO0w{`Q+~NiZaT@U
z(vneI4#F$IOQ(F^E$ndc08LemrLgW~!82=f&{tzF8=1RlKmB$S-=krpA!kKoERAHl
zx868swT%?7wFJ?Y9qyAm->~RY-R>>yE#{W(#cX>-Y#C;34Rv0`wP3FtoZ?37AC4?9
z$0mSTiZcKrqJB8MXGl$j?%ha{VGFz4@k8NESIow9p5**&nc~}K*>}0eOS|K?JiOpi
za&}-!GJFfG8ys=EnN{XGea*;())%A)%1P#a3nO~IRv^LEo>%Mr=-{U0P}8s0-`WC0
zJPWl~<n;l9ToUB;xirf67UVr-%E}$6!WsxF76E;G)dpLR*0fCQr)z|1KI2uact)o}
z+c-|e`T-{R)n(oI@{SKN8*4{q90X0<q&6R8=`jTPH{`z>G2$Gh0E3=%XKtK|Ss>=D
zO<K8w6M{Lp4kf6XcfLmvu$vcCOB+Ux0tAJNz~XYdZBhY6*J=A(BiL}n^tVQ^xRdj@
zMzFs%f(_~TTO-(zp1(DM{jCw~Kd=$(^%}3+Git0&TbWG0!C#-}QNrKbZQ-q?=%ACs
zBN(PvC39NwV(&y272)&0iZA>{+uh_6?tPphig-?jqTw8rDjzlzG4O)N@v3L#bEj94
zUS=(ZiLWZwN&P++Ecz-7JZ||up1k^1ehYe>k%^;MS<fi{rQfB4C(~|@`ek_E<Pea1
zH+GcStAe?jXdfZGAP2Q2?_<60F|3YLu+)=<vW<CF5ZY@j0M%dnc(Zj!_4uU+*zm~Y
zMDw9X{5}PN=rLd~;Nqlo;;8dCd&8|sk(iR38s3Us^^=Yexk*|fWZ1HyEzM*f@-$Y_
zhksQtrw93PSq+Ocvds3RVjh?a^XrX@#M4l}s9LPXsG9phn>*Mx#8NfW9Lv?Pp&c{#
zqv0{LNtoC;;zP5W)lKU`#=On;<co5JjoH-r6Y_47;(?*B$KF@%@)7~Pm;@8Nr&kS3
zd&JqEQDNY#xc9r-8Vu9`w@Q;l<;J;JO<Qtb%DUz#dY83goq6b7c#?Zk(Ud~?HXk7m
zZX=54#dAHp{SX4t$5p=QnC~&`Mzn_Ee1eh@kFoGJuI|G8Qz?WTn-TjL*kSN8`6d{{
z)%04ZWQ<##72>&U%IJYt4QtM_NXQ}jy2Iy%ZvbI-$GFGeVjZ*V1-5o)cGqldmGM>4
zTXM)sx=6q;1!gaCyD!9u`9&3713V<v`GOk%y|*)geRVf7&Un>3<M@KFOKvKyF+Zu+
zrfkXys8^lys7^`g(1YFvLgeOHa6*&Jt50!Gx7N3@5btj9iOoz_jqeD2)wS?btWmjd
zLQEx+Gp`4COlY_(5^3h+xQ8Ks-|qQ(1w!3@>G*LU9^$C_o!^Pqos7e1%IjBc8`K8t
zlkB~W`(R+*eEPHWL`RBz)RC<Uulm3~k!7~6@xPWfl&QYt?Q<7j7^|9Ot^v=X|3|R+
zjU=0V)i@&`39<Gysn26sCSG;Slb<c8ly`ItrD%3|k?3|gp31sGrBDPtk9ZZJBWc@U
z+7-LNsl>m%9I+*Fr!aRsiVyH1r8&kj%J)?hdqNekkI1L^l>2zVz>~J6$36d|-BTxM
zHxP#wj?gk^I|N>JUu5}{a8ShdrdLIaF8Q#tM^}u8OTOv>%U@&Y*q><&h%m$v53_nc
zvtQD&POjL&m*imO8QLRsY<Ol2Q&m)+79c-MISsKX^i|oGwo83Fv>7|}Qn%Am9M18o
z=Z&sc6vg~yj!(ZQ-`*n%T8v@*f?vG!oSlX*Mhs38n(Jvp4&CA0_!bh_mCeOLp<~tx
zVC;%V$8zsHwh{1Z>B;T{VBV<LTO@|v<vKl;C50jsh&XX;4H*LNm#gh3uhWMsH8zf+
z;Bo2UC+Md|TMAy0yhkS>`Nloex=TZI<8z71<5^6<jnL)yy#ZrM#iLZj^!=D46viin
ziBp|dV7WNz2U6^i==;)BzWK>4fl##H+cwG$&;4wt%A6Y+i8%Zq<O^RynLEkI*x1f{
z{gSeIlC>#UOvQU){$n`J!QFTDHOFM)>+Q1IOEf(roC5B~W4JUfm#YjL8ll7^y-?iP
z`5TKzk$ol4w(7{zMvvZn6<_fv9}7O=QUn+dSrqQtb8wOzeQ^&*=<+zYk$GBID{ZN9
z54h?0+#JXRn8v^}h!k{-6l4g+M&?nfw?mif&?a4V73K=$#O&(rT5+J<s0TvQd`Ei$
z!7?XbMGl-HPmkLIF>{=o?yB}Ir)Y+5FyAM5F=x*MGzwMps=yau8Lie`g8T<ESbv1V
z0BHr)IIU6xp0A22k-zwcYu^gYo^VJ2Nz=i{DcdA(oUEp6LhMW)u>sJOfEqkK3wzjp
z1X=G!9X^xj7vf3O-D{D>8c8z!qA6Sp-e$sBvB&k5ZHY#rOF3rL@wf+@>tM(c&%+Iu
z_7m;V2#1(e`zsH^eV2GI#>3O9X9sb{?{E@*-60k*&3vzv{6@Uil8%La=Nq9Rr((Ce
ze&x$^Zzhk};mZnF=f!<L$!u>Ul&yL5q!<l;qNU2L4p|)c^){V<ots9P;W-pTa5E=N
z?Y=x>(lJl*4!-|a9_j6wt<iOWzv)3X_kQ{|%#&b#N2GkX3Dlg~o^U@hHaJKx`_1*^
zw9Nzlg<SpW*!QPL1lQaq+7{%79VJGPQM;S_i;4p6{zb9~m#^N|IAn9zU9FWiIXM&H
zt3(VUyxa&)>mJY(e<?=yti0;m@R%dc!e5ZbrwuxGYTK)-El=EOoZPoF?)Bd2Zp`V4
z=nAgnF)PCxAlxS0uy87q3+LZ=Jg!;_XNQjoT`IAWe520oe13bV2APC+%&Eim)QaqD
z8eYBw;?ViG-#Ak%!Ej!m1^;!(>oNqVn-?hz{94Ie2hSS18Hs3FY5+JAln5XDSqbyq
zNs@w{c$QoS@+a)KJlR*~SQa9TU?tZ&pS7)$`b-X5&0Smlp6OBVvi1XHkCnjCG3u2z
z3`4~R_3uF*mGW_f?fR_B=KwIAoNkDmXS8;o>r3D1jh)}xRmuK0llPhI#wY<=K1^mJ
zcd9UwgEvYK`ozP$6!z|z_2vtD=I|7LL+Q(Q$ZND#3)O&Obqtv(N)JQXdG=(niZV*o
zoTr}pYz<0rS2D?c!?ltP*R!ex5Q(Ni{Q8wASt8LBnl%m%46o;2=k}=QScU2*jA(BA
z!_P)3Stsxdzw=cEj|JQ4)pD-nJMUQ$h<wlrYlUw;Mt;jBj$N6*vR<y_#*b%d<dgZL
ztz0c2v&Sq29j5z`QxDboaJF}xWX{lMO*8gMe)W#faxJ;r&$4qmJJ*ua*A;S>w8{J-
z_-(QQ<ihdJ$Svf<dzDMYJ8Rm;9omenc-JI4{V^7}<xgZfYwV7u>}^#!TlK7DnE>|U
z`aG_~Uzdx+P361AzpxHSlO0iwyn~xLt?5ltAJ;HyQ8vlRHBF~Rz2pWTZX3Wf9fG@+
zK9S(xJ!^UMm@;o}k<!*a_R#b2rBvr!d`YTa05nR)EK)Qd^R;Jl&*$#yVCH+y_syyd
zbx-04=!*ZgcLtDx(cP2G<?CgWARFnRR2D_nF@NY&4ml!ByZh^uBD2kf8?Sfgnz1A(
zio#_V<TnBmW74Coz`ti1o&_XfDJR^e9WLuzd+s+h0{wLlMkm;4IJ)tVvrT4iAZ|it
z<*$1lFb#Qf#>$?_^%`9Om-Wqj#McBD+UGY%B_I@ifqyrn0A(a}MNu@T&A`MPrf5E-
zp$}7*ZML|VJ-UuT-w4%D-;Un?vn8gaWZ0^u41XT~wku5CB~OIwSEFZjD$3R&!ND_E
z*^XE`@Vl})$@rx4yJf?kD5rJ8ePjiq?MdJHPE20V?LCqzU(xl(6HORh<Wer3Nqcbg
zW}g2Dx6O^%^so3T^BG-L<*whjJxR&jQ(0<cI?BB%=;|5*wK9fCsHG?(7ZOc855D!N
zdX%Eg;|;mcBO{K+FWB7Ud~%q}%!obR<29{3nMPUpj94bj>zTA_Yh(wCq&ERz{0`-i
zEXwCJelu(_B5PzpQ&I7jyrUt^6;cXY3hzWU;YPZ3_Ww1@gWKrp@eYrklu|?-uXY|X
zA9X5l!cN&#AZ<1K{gq5ZEkLIyJ=7ATySbmI%nn~S$@4DfzIR1IcG+iAzI^x2!*vw0
zbH`TBO^?^M@fk26MnEexawdzS%xC-<leU*ym?kNs{hyisNQEg&9}@84fksY`R#eUI
zp<-5-oHdbHGDfdFVDB}>eQ5d-8=F?zaN{n{Lcs-_vmao+K0)_i!sGn`s6>xTe=D7g
zM=?1H+sNHlkAk6b<zo8NUFJ)1_Qa#~B8=X;as?&jnFY6h-`6(qEX#*RW-wSy*RWy;
zVN=}@6vBhd7Bgv^KdM>ZWw{8`oJt-B<(s1e$<@olEkr29qt?wW`RhY@&TfF?tL)<;
zW()BgJ*uNSto4v!Zc4)wpZmiCW^ZCY|D&R}Fff23+mldlo-Z~7Z^k1Wn*@VBD*Jkj
z=Yx9?wc+4N0Zru&2a5uz{q{So@%Py)b=vGr>~VY4IFD+a_ce;q%cDlW)7!$OWzT<{
zy={gbXc<scz&_VVObOd(;0Z$Wcb64wz>D{~ZRS73!q20c^}Qds?vS74(4xe4Dx03e
z4rHU?%{*${@JSJ-j?CT$Uuqj(N=T{TU4-cLQTi>}xnuAt`i~u72)cYQUM<)OBpF^i
zM)3FpI%W^e)y{W>2@eu~3t)!;Uk7Pk4?Q?1TNGcy`N9{h!;1)XMtFn)9iF%k?kuo_
zeLk5v-4pRcJYV^tyrEf!{`AAv;k^Cm9bpA8`>1an(bqe`&?oo`kNP(Bbuc$AYkM{l
zZx^?b^DU-~T&3$|PS@6e@LOVX^9vuzk1FYJGgk2b0Z>Z;0v8Ju000080IopiK0{>y
z#G<1B069wo04D$u000000096X0Jebu0001NVPtuBVQp}7Wpi^aWMyVyb!>DfGB7bQ
zEigANFgGbJPg5>*cyv=l1OTi60031~1po&Kp%qS0O9ci1000010097I0001FqyPW_
E0J|a8F8}}l

diff --git a/testing/btest/Baseline/scripts.base.protocols.irc.dcc-extract/irc.log b/testing/btest/Baseline/scripts.base.protocols.irc.dcc-extract/irc.log
deleted file mode 100644
index 28ca448e05..0000000000
--- a/testing/btest/Baseline/scripts.base.protocols.irc.dcc-extract/irc.log
+++ /dev/null
@@ -1,13 +0,0 @@
-#separator \x09
-#set_separator	,
-#empty_field	(empty)
-#unset_field	-
-#path	irc
-#open	2013-06-07-19-08-42
-#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	nick	user	command	value	addl	dcc_file_name	dcc_file_size	dcc_mime_type	extraction_file
-#types	time	string	addr	port	addr	port	string	string	string	string	string	string	count	string	string
-1311189164.119437	UWkUyAuUGXf	192.168.1.77	57640	66.198.80.67	6667	-	-	NICK	bloed	-	-	-	-	-
-1311189164.119437	UWkUyAuUGXf	192.168.1.77	57640	66.198.80.67	6667	bloed	-	USER	sdkfje	sdkfje Montreal.QC.CA.Undernet.org dkdkrwq	-	-	-	-
-1311189174.474127	UWkUyAuUGXf	192.168.1.77	57640	66.198.80.67	6667	bloed	sdkfje	JOIN	#easymovies	(empty)	-	-	-	-
-1311189316.326025	UWkUyAuUGXf	192.168.1.77	57640	66.198.80.67	6667	bloed	sdkfje	DCC	#easymovies	(empty)	ladyvampress-default(2011-07-07)-OS.zip	42208	application/zip	irc-dcc-item-A3OSdqG9zvk.dat
-#close	2013-06-07-19-08-42
diff --git a/testing/btest/Baseline/scripts.base.protocols.smtp.basic/smtp.log b/testing/btest/Baseline/scripts.base.protocols.smtp.basic/smtp.log
index ba16578dfb..b56b8afab6 100644
--- a/testing/btest/Baseline/scripts.base.protocols.smtp.basic/smtp.log
+++ b/testing/btest/Baseline/scripts.base.protocols.smtp.basic/smtp.log
@@ -3,8 +3,8 @@
 #empty_field	(empty)
 #unset_field	-
 #path	smtp
-#open	2009-10-05-06-06-12
-#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	trans_depth	helo	mailfrom	rcptto	date	from	to	reply_to	msg_id	in_reply_to	subject	x_originating_ip	first_received	second_received	last_reply	path	user_agent
-#types	time	string	addr	port	addr	port	count	string	string	table[string]	string	string	table[string]	string	string	string	string	addr	string	string	string	vector[addr]	string
-1254722768.219663	arKYeMETxOg	10.10.1.4	1470	74.53.140.153	25	1	GP	<gurpartap@patriots.in>	<raj_deol2002in@yahoo.co.in>	Mon, 5 Oct 2009 11:36:07 +0530	"Gurpartap Singh" <gurpartap@patriots.in>	<raj_deol2002in@yahoo.co.in>	-	<000301ca4581$ef9e57f0$cedb07d0$@in>	-	SMTP	-	-	-	250 OK id=1Mugho-0003Dg-Un	74.53.140.153,10.10.1.4	Microsoft Office Outlook 12.0
-#close	2009-10-05-06-06-16
+#open	2013-07-25-19-52-35
+#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	trans_depth	helo	mailfrom	rcptto	date	from	to	reply_to	msg_id	in_reply_to	subject	x_originating_ip	first_received	second_received	last_reply	path	user_agent	fuids
+#types	time	string	addr	port	addr	port	count	string	string	table[string]	string	string	table[string]	string	string	string	string	addr	string	string	string	vector[addr]	string	vector[string]
+1254722768.219663	arKYeMETxOg	10.10.1.4	1470	74.53.140.153	25	1	GP	<gurpartap@patriots.in>	<raj_deol2002in@yahoo.co.in>	Mon, 5 Oct 2009 11:36:07 +0530	"Gurpartap Singh" <gurpartap@patriots.in>	<raj_deol2002in@yahoo.co.in>	-	<000301ca4581$ef9e57f0$cedb07d0$@in>	-	SMTP	-	-	-	250 OK id=1Mugho-0003Dg-Un	74.53.140.153,10.10.1.4	Microsoft Office Outlook 12.0	A1IqG95k9Tk,VUcocHqaWva,JJPHrvZaGJj
+#close	2013-07-25-19-52-35
diff --git a/testing/btest/Baseline/scripts.base.protocols.smtp.mime-extract/extractions b/testing/btest/Baseline/scripts.base.protocols.smtp.mime-extract/extractions
deleted file mode 100644
index 45d776a8e9..0000000000
--- a/testing/btest/Baseline/scripts.base.protocols.smtp.mime-extract/extractions
+++ /dev/null
@@ -1,277 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- 
- 
-    k6, k6-2, k6-3, athlon, athlon-tbird, athlon-4, athlon-xp, athlon-mp, winchip-c6, winchip2, k8, c3 and c3-2)
-  "windows.h", he gets all the WinAPI! If he adds "wx/wx.h", he gets all of
-  #included directly or indirectly)!
-  (available when right-clicking the class-browser
-  (still can be overriden by using "-c" command line parameter).
-  (the same filename as the project's but with extension ".layout"). If you
-  - Enable use of processor specific built-in functions (mmmx, sse, sse2, pni, 3dnow)
-  - Generate instructions for a specific machine (i386, i486, i586, i686, pentium, pentium-mmx, pentiumpro, pentium2, pentium3, pentium4, 20
-  - Strip executable
-  -c <config file directory>
-  20
-  Instead open the file in an already launched Dev-C++.
-  It used to be a checkbox, allowing only two states (on or off), but there is
-  The user can define this in the class browser's context menu under "View mode".
-  Well, it adds caching to code-completion. Depending on the cache size,
-  a third relevant option now: "Project classes" so it didn't fit the purpose...
-  and selecting "View mode").
-  cause of many errors (although it should be fixed by now), we are giving the
-  class inheritance and visibility (shows items only from files
-  code-completion and the user has all the commands (belonging to the files
-  compiler: -D__DEBUG__
-  displayed in the editor when the mouse moves over a word. Since this was the
-  have your project under CVS control, you ''ll know why this had to happen...
-  he added in the cache) at his fingertips. If, for example, the user adds
-  include files can now be generated.
-  information definitions
-  it in the private resource)
-  its counterpart are highlighted
-  resource)
-  the program may take a bit longer to start-up, but provides very fast
-  the requested CVS action
-  then we even get a stack trace in the bug report!
-  user the option to disable this feature.
-  wxWindows! You get the picture...
-* "Build priority" per-unit
-* "Compile as C++" per-unit
-* "Default" button in Compiler Options is back
-* "Include file in compilation process" per-unit
-* "Include file in linking process" per-unit
-* Added "Add Library" button in Project Options
-* Added "Classes" toolbar
-* Added "External programs" in Tools/Environment Options (for units "Open with")
-* Added "Files" tab in CVS form to allow selection of more than one file for
-* Added "Open with" in project units context menu
-* Added "Tip of the day" system.
-* Added *working* function arguments hint
-* Added CVS "login" and "logout" commands
-* Added CVS commands "Add" and "Remove"
-* Added ExceptionsAnalyzer. If the devcpp.map file is in the devcpp.exe directory
-* Added bracket highlighting. When the caret is on a bracket, that bracket and
-* Added configuration option for "Templates Directory" in "Environment Options"
-* Added display of project filename, project output and a summary of the project files in Project Options General tab.
-* Added doxygen-style comments in NewClass, NewMemberFunction and NewMemberVariable wizards
-* Added file's date/time stamp in File/Properties window
-* Added new WebUpdate module (inactive temporarily).
-* Added new code for code-completion caching of files (disabled - work in progress).
-* Added new compiler/linker options: 20
-* Added new file menu entry: Save Project As
-* Added new option in class-browser: Use colors
-* Added possibility to include in a Template the Project's directories (include, libs and ressources)
-* Added support for GCC > 3.2
-* Added support for macros in the "default source code" (Tools/Editor Options/Code)
-* Added support for the "interface" keyword
-* Added support for the '::' member access operator in code-completion
-* Added the possibility to modify the value of a variable during debugging (right click on a watch variable and select "Modify value")
-* Added the possibility to specify an include directory for the code completion cache to be created at Dev-C++ first startup
-* Added two new macros: <DATE> and <DATETIME>
-* Allow customizing of per-unit compile command in projects
-* Allow user to specify an alternate configuration file in Environment Options 20
-* Backtrace in debugging
-* Big speed up in function parameters listing while editing
-* Bug fixes
-* Bug fixes
-* Bug fixes
-* Bug fixes
-* Bug fixes
-* Bug fixes
-* Bug fixes
-* Bug fixes
-* Bug fixes
-* Bug fixes
-* Bug fixes
-* Bug fixes
-* Bug fixes
-* Bug fixes
-* Bug fixes
-* Bug fixes
-* Bug-fix for double quotes in devcpp.cfg file read by vUpdate
-* CPU Window (still in development)
-* CVS support
-* Caching of result set of code-completion for speed-up.
-* Changed position of compiler/linker parameters in Project Options.
-* Changed tint of Class browser pictures colors to match the New Look style
-* Class-parser speed-up (50% to 85% improvement timed!!!)
-* Code-completion updates
-* Compiler set per-project
-* Compiler settings per-project
-* Compiling progress window
-* Current windows listing in Window menu
-* Debug variable browser
-* Debug variables are now resent during next debug session
-* Dev-C++ now traps access violation of your programs (and of itself too ;)
-* During Dev-C++ First Time COnfiguration window, users can now choose between using or not class browser and code completion features.
-* Each project creates a <project_name>_private.h file containing version
-* Editor colors are initialized properly on Dev-C++ first-run
-* Environment options : "Show progress window" and "Auto-close progress window"
-* Error messages parsing improved
-* Fixed many class browser bugs, including some that had to do with class folders.
-* Fixed pre-compilation dependency checks to work correctly
-* Fixed the "compiler-dirs-with-spaces" bug that crept-in in 4.9.7.0
-* Fixed the dreaded "Clock skew detected" compiler warning!
-* Folders in Project and Class Browser
-* Implemented "compiler sets" infrastructure to switch between different compilers easily (e.g. gcc-2.95 and gcc-3.2)
-* Implemented new compiler settings framework
-* Implemented search in help files for the word at cursor (context sensitive help)
-* Implemented the "File/Export/Project to HTML" function.
-* Improved Indent/Unindent and Remove Comment
-* Improved WebUpdate module
-* Improved automatic indent
-* Improved code completion cache
-* Improved editor
-* Improved help file
-* Improved installer
-* Lots of bug fixes.
-* Lots of bugfixes
-* MSVC import now creates the folders structure of the original VC project
-* Made whole bottom report control floating instead of only debug output.
-* Makefile can now be customized.
-* Many bug fixes
-* Many bug fixes
-* Many bug fixes
-* Many bug fixes
-* Many bug fixes
-* Many bug fixes
-* Many bug fixes
-* Many code-completion updates. Now takes into account context,
-* Modified the behaviour of the -c param : 20
-* Multi-select files in project-view (when "double-click to open" is configured in Environment Settings)
-* Necessary UI changes in Project Options
-* Nested folders in project view
-* New "Abort compilation" button
-* New WebUpdater module.
-* New class browser option: "Show inherited members"
-* New code tooltip display
-* New debug feature for DLLs: attach to a running process
-* New environment options : "watch variable under mouse" and "Report watch errors"
-* New feature: compile current file only
-* New option "Execution/Parameters" (and "Debug/Parameters").
-* New option in Editor Options (code-completion): Use code-completion cache.
-* New option in Editor Options: Show editor hints. User can disable the hints
-* New project option: Use custom Makefile. 20
-* New splash screen and association icons
-* Now checks for vRoach existance when sending a crash report
-* On Dev-C++ first time configuration dialog, a code completion cache of all the standard 20
-* Other bug fixes
-* Possibility of changing compilers and tools filename.
-* Printing settings are now saved
-* Profiling support
-* Project manager and debugging window (in Debug tab) can now be trasnformed into floating windows.
-* Project version info (creates the relevant VERSIONINFO struct in the private
-* Removed "Only show classes from current file" option in class browser settings.
-* Resource errors are now reported in the Resource sheet
-* Resource files are treated as ordinary files now
-* Run to cursor
-* Saving of custom syntax parameter group
-* Send custom commands to GDB
-* Separated C++ compiler options from C compiler options in Makefile (see bug report #654744)
-* Separated C++ include dirs from C include dirs in Makefile (see bug report #654744)
-* Separated layout info from project file. It is now kept in a different file
-* Support XP Themes (creates the CommonControls 6.0 manifest file and includes
-* Support for latest Mingw compiler system builds
-* ToDo  list
-* Under NT, 2000 and XP, user application data directory will be used to store config files (i.e : C:\Documents and Settings\Username\Local Settings\Application Data)
-* Updates in "Project Options/Files" code
-* Watched Variables not in correct context are now kept and updated when it is needed
-* WebUpdate should now report installation problems from PackMan
-* WebUpdate will now backup downloaded DevPaks in Dev-C++\Packages directory, and Dev-C++ executable in devcpp.exe.BACKUP
-* When adding debugging symbols on request, remove "-s" option from linker
-* When compiling the current file only, no dependency checks are performed
-* When compiling with debugging symbols, an extra definition is passed to the
-* When creating a DLL, the created static lib respects now the project-defined output directory
-* When running a source file in explorer, don't spawn new instance.
-* Window list (in Window menu)
-* XP Theme support
-* added ENTER key for opening file in project browser, DEL to delete from the project.
-* back to gcc 2.95.3
-* bug fixes
-* bug fixes
-* new update/packages checker (vUpdate)
-* support for DLL application hosting, for debugging and executing DLLs under Dev-C++.
-* ~300% Speed-up in class parser
-Find the attachment
-GPS
-Hello
-I send u smtp pcap file 
-Version 4.9.4.1 (5.0 beta 4.1):
-Version 4.9.5.0 (5.0 beta 5):
-Version 4.9.5.1
-Version 4.9.5.2
-Version 4.9.5.3
-Version 4.9.5.4
-Version 4.9.5.5
-Version 4.9.6.5
-Version 4.9.6.6
-Version 4.9.6.7
-Version 4.9.6.8
-Version 4.9.6.9
-Version 4.9.7.0
-Version 4.9.7.1
-Version 4.9.7.2
-Version 4.9.7.3
-Version 4.9.7.4
-Version 4.9.7.5
-Version 4.9.7.6
-Version 4.9.7.7
-Version 4.9.7.8
-Version 4.9.7.9
-Version 4.9.8.0
-Version 4.9.8.1
-Version 4.9.8.2
-Version 4.9.8.3
-Version 4.9.8.4
-Version 4.9.8.5
-Version 4.9.8.7
-Version 4.9.8.9
-Version 4.9.9.0
-Version 4.9.9.1
-version 4.9.6.1
-version 4.9.6.2
-version 4.9.6.3
-version 4.9.6.4
diff --git a/testing/btest/Baseline/scripts.base.protocols.smtp.mime-extract/filecount b/testing/btest/Baseline/scripts.base.protocols.smtp.mime-extract/filecount
deleted file mode 100644
index 0cfbf08886..0000000000
--- a/testing/btest/Baseline/scripts.base.protocols.smtp.mime-extract/filecount
+++ /dev/null
@@ -1 +0,0 @@
-2
diff --git a/testing/btest/Baseline/scripts.base.protocols.smtp.mime-extract/smtp_entities.log b/testing/btest/Baseline/scripts.base.protocols.smtp.mime-extract/smtp_entities.log
deleted file mode 100644
index 865694e8a2..0000000000
--- a/testing/btest/Baseline/scripts.base.protocols.smtp.mime-extract/smtp_entities.log
+++ /dev/null
@@ -1,12 +0,0 @@
-#separator \x09
-#set_separator	,
-#empty_field	(empty)
-#unset_field	-
-#path	smtp_entities
-#open	2013-06-07-19-32-56
-#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	trans_depth	filename	content_len	mime_type	md5	extraction_file	excerpt
-#types	time	string	addr	port	addr	port	count	string	count	string	string	string	string
-1254722770.692743	arKYeMETxOg	10.10.1.4	1470	74.53.140.153	25	1	-	79	text/plain	-	smtp-entity-mR3f2AAKo11.dat	(empty)
-1254722770.692743	arKYeMETxOg	10.10.1.4	1470	74.53.140.153	25	1	-	1918	text/html	-	-	(empty)
-1254722770.692804	arKYeMETxOg	10.10.1.4	1470	74.53.140.153	25	1	NEWS.txt	10823	text/plain	-	smtp-entity-ZNp0KBSLByc.dat	(empty)
-#close	2013-06-07-19-32-56
diff --git a/testing/btest/Baseline/scripts.policy.frameworks.software.vulnerable/notice.log b/testing/btest/Baseline/scripts.policy.frameworks.software.vulnerable/notice.log
index f2cf09cab6..54b04aafae 100644
--- a/testing/btest/Baseline/scripts.policy.frameworks.software.vulnerable/notice.log
+++ b/testing/btest/Baseline/scripts.policy.frameworks.software.vulnerable/notice.log
@@ -3,9 +3,9 @@
 #empty_field	(empty)
 #unset_field	-
 #path	notice
-#open	2013-04-28-22-36-26
-#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	proto	note	msg	sub	src	dst	p	n	peer_descr	actions	suppress_for	dropped	remote_location.country_code	remote_location.region	remote_location.city	remote_location.latitude	remote_location.longitude
-#types	time	string	addr	port	addr	port	enum	enum	string	string	addr	addr	port	count	string	table[enum]	interval	bool	string	string	string	double	double
-1367188586.649122	-	-	-	-	-	-	Software::Vulnerable_Version	1.2.3.4 is running Java 1.7.0.15 which is vulnerable.	Java 1.7.0.15	1.2.3.4	-	-	-	bro	Notice::ACTION_LOG	3600.000000	F	-	-	-	-	-
-1367188586.649122	-	-	-	-	-	-	Software::Vulnerable_Version	1.2.3.5 is running Java 1.6.0.43 which is vulnerable.	Java 1.6.0.43	1.2.3.5	-	-	-	bro	Notice::ACTION_LOG	3600.000000	F	-	-	-	-	-
-#close	2013-04-28-22-36-26
+#open	2013-07-25-19-54-45
+#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	fuid	file_mime_type	file_desc	proto	note	msg	sub	src	dst	p	n	peer_descr	actions	suppress_for	dropped	remote_location.country_code	remote_location.region	remote_location.city	remote_location.latitude	remote_location.longitude
+#types	time	string	addr	port	addr	port	string	string	string	enum	enum	string	string	addr	addr	port	count	string	table[enum]	interval	bool	string	string	string	double	double
+1374782085.726121	-	-	-	-	-	-	-	-	-	Software::Vulnerable_Version	1.2.3.4 is running Java 1.7.0.15 which is vulnerable.	Java 1.7.0.15	1.2.3.4	-	-	-	bro	Notice::ACTION_LOG	3600.000000	F	-	-	-	-	-
+1374782085.726121	-	-	-	-	-	-	-	-	-	Software::Vulnerable_Version	1.2.3.5 is running Java 1.6.0.43 which is vulnerable.	Java 1.6.0.43	1.2.3.5	-	-	-	bro	Notice::ACTION_LOG	3600.000000	F	-	-	-	-	-
+#close	2013-07-25-19-54-45
diff --git a/testing/btest/istate/events-ssl.bro b/testing/btest/istate/events-ssl.bro
index 249ebc3754..d227417c15 100644
--- a/testing/btest/istate/events-ssl.bro
+++ b/testing/btest/istate/events-ssl.bro
@@ -41,16 +41,25 @@ redef ssl_ca_certificate = "../ca_cert.pem";
 redef ssl_private_key = "../bro.pem";
 redef ssl_passphrase = "my-password";
 
+# Make sure the HTTP connection really gets out.
+# (We still miss one final connection event because we shutdown before
+# it gets propagated but that's ok.)
+redef tcp_close_delay = 0secs;
+
 # File-analysis fields in http.log won't get set on receiver side correctly,
 # one problem is with the way serialization may send a unique ID in place
 # of a full value and expect the remote side to associate that unique ID with
-# a value it received at an earlier time.  So sometimes modifications the sender
-# makes to the value aren't seen on the receiver (in this case, the mime_type
-# field).
-event file_new(f: fa_file) &priority=10
+# a value it received at an earlier time.  So sometimes modifications the sender# makes to the value aren't seen on the receiver.
+function myfh(c: connection, is_orig: bool): string
 	{
-	delete f$mime_type;
-	FileAnalysis::stop(f);
+	return "";
+	}
+
+event bro_init() 
+	{
+	# Ignore all http files.
+	Files::register_protocol(Analyzer::ANALYZER_HTTP,
+							 [$get_file_handle = myfh]);
 	}
 
 @TEST-END-FILE
diff --git a/testing/btest/istate/events.bro b/testing/btest/istate/events.bro
index 21f46cf4b3..1edf14fee7 100644
--- a/testing/btest/istate/events.bro
+++ b/testing/btest/istate/events.bro
@@ -39,12 +39,17 @@ redef tcp_close_delay = 0secs;
 # File-analysis fields in http.log won't get set on receiver side correctly,
 # one problem is with the way serialization may send a unique ID in place
 # of a full value and expect the remote side to associate that unique ID with
-# a value it received at an earlier time.  So sometimes modifications the sender# makes to the value aren't seen on the receiver (in this case, the mime_type
-# field).
-event file_new(f: fa_file) &priority=10
+# a value it received at an earlier time.  So sometimes modifications the sender# makes to the value aren't seen on the receiver.
+function myfh(c: connection, is_orig: bool): string
 	{
-	delete f$mime_type;
-	FileAnalysis::stop(f);
+	return "";
+	}
+
+event bro_init() 
+	{
+	# Ignore all http files.
+	Files::register_protocol(Analyzer::ANALYZER_HTTP,
+	                         [$get_file_handle = myfh]);
 	}
 
 @TEST-END-FILE
diff --git a/testing/btest/scripts/base/frameworks/file-analysis/bifs/remove_action.bro b/testing/btest/scripts/base/frameworks/file-analysis/bifs/remove_action.bro
index e31abe5ea3..a3704618bd 100644
--- a/testing/btest/scripts/base/frameworks/file-analysis/bifs/remove_action.bro
+++ b/testing/btest/scripts/base/frameworks/file-analysis/bifs/remove_action.bro
@@ -13,6 +13,6 @@ event file_new(f: fa_file) &priority=-10
 	for ( tag in test_file_analyzers )
 		Files::remove_analyzer(f, tag);
 	local filename = test_get_file_name(f);
-	Files::remove_analyzer(f, [$tag=Files::ANALYZER_EXTRACT,
-	                                $extract_filename=filename]);
+	Files::remove_analyzer(f, Files::ANALYZER_EXTRACT,
+	                       [$extract_filename=filename]);
 	}
diff --git a/testing/btest/scripts/base/frameworks/file-analysis/irc.bro b/testing/btest/scripts/base/frameworks/file-analysis/irc.bro
index 2b93a59a8f..9fd8e06613 100644
--- a/testing/btest/scripts/base/frameworks/file-analysis/irc.bro
+++ b/testing/btest/scripts/base/frameworks/file-analysis/irc.bro
@@ -4,7 +4,17 @@
 
 redef test_file_analysis_source = "IRC_DATA";
 
-redef test_get_file_name = function(f: fa_file): string
+global first: bool = T;
+
+function myfile(f: fa_file): string
 	{
-	return "thefile";
-	};
+	if ( first )
+		{
+		first = F;
+		return "thefile";
+		}
+	else
+		return "";
+	}
+
+redef test_get_file_name = myfile;
diff --git a/testing/btest/scripts/base/frameworks/file-analysis/logging.bro b/testing/btest/scripts/base/frameworks/file-analysis/logging.bro
index 9792017962..1d1f5fd721 100644
--- a/testing/btest/scripts/base/frameworks/file-analysis/logging.bro
+++ b/testing/btest/scripts/base/frameworks/file-analysis/logging.bro
@@ -1,5 +1,5 @@
 # @TEST-EXEC: bro -r $TRACES/http/get.trace $SCRIPTS/file-analysis-test.bro %INPUT
-# @TEST-EXEC: btest-diff file_analysis.log
+# @TEST-EXEC: btest-diff files.log
 
 redef test_file_analysis_source = "HTTP";
 
diff --git a/testing/btest/scripts/base/protocols/ftp/ftp-extract.bro b/testing/btest/scripts/base/protocols/ftp/ftp-extract.bro
deleted file mode 100644
index 8cbacdbf6f..0000000000
--- a/testing/btest/scripts/base/protocols/ftp/ftp-extract.bro
+++ /dev/null
@@ -1,10 +0,0 @@
-# This tests FTP file extraction.
-#
-# @TEST-EXEC: bro -r $TRACES/ftp/ipv4.trace %INPUT
-# @TEST-EXEC: btest-diff conn.log
-# @TEST-EXEC: btest-diff ftp.log
-# @TEST-EXEC: cat ftp-item-*.dat | sort > extractions
-# @TEST-EXEC: btest-diff extractions
-
-redef FTP::logged_commands += {"LIST"};
-redef FTP::extract_file_types=/.*/;
diff --git a/testing/btest/scripts/base/protocols/http/http-extract-files.bro b/testing/btest/scripts/base/protocols/http/http-extract-files.bro
deleted file mode 100644
index 6156009821..0000000000
--- a/testing/btest/scripts/base/protocols/http/http-extract-files.bro
+++ /dev/null
@@ -1,6 +0,0 @@
-# @TEST-EXEC: bro -C -r $TRACES/web.trace %INPUT
-# @TEST-EXEC: btest-diff http.log
-# @TEST-EXEC: mv http-item-*.dat http-item.dat
-# @TEST-EXEC: btest-diff http-item.dat
-
-redef HTTP::extract_file_types += /text\/html/;
diff --git a/testing/btest/scripts/base/protocols/http/http-mime-and-md5.bro b/testing/btest/scripts/base/protocols/http/http-mime-and-md5.bro
deleted file mode 100644
index b35e491b4d..0000000000
--- a/testing/btest/scripts/base/protocols/http/http-mime-and-md5.bro
+++ /dev/null
@@ -1,6 +0,0 @@
-# This tests md5 calculation for a specified mime type.
-
-# @TEST-EXEC: bro -r $TRACES/http/pipelined-requests.trace %INPUT > output
-# @TEST-EXEC: btest-diff http.log
-
-redef HTTP::generate_md5 += /image\/png/;
diff --git a/testing/btest/scripts/base/protocols/http/multipart-extract.bro b/testing/btest/scripts/base/protocols/http/multipart-extract.bro
index c2789750a3..a919a844b2 100644
--- a/testing/btest/scripts/base/protocols/http/multipart-extract.bro
+++ b/testing/btest/scripts/base/protocols/http/multipart-extract.bro
@@ -1,5 +1,9 @@
 # @TEST-EXEC: bro -C -r $TRACES/http/multipart.trace %INPUT
 # @TEST-EXEC: btest-diff http.log
-# @TEST-EXEC: cat http-item-* | sort > extractions
+# @TEST-EXEC: cat extract_files/http-item-* | sort > extractions
 
-redef HTTP::extract_file_types += /.*/;
+event file_new(f: fa_file)
+	{
+	local fname = fmt("http-item-%s", f$id);
+	Files::add_analyzer(f, Files::ANALYZER_EXTRACT, [$extract_filename=fname]);
+	}
diff --git a/testing/btest/scripts/base/protocols/irc/dcc-extract.test b/testing/btest/scripts/base/protocols/irc/dcc-extract.test
deleted file mode 100644
index cbfc6890da..0000000000
--- a/testing/btest/scripts/base/protocols/irc/dcc-extract.test
+++ /dev/null
@@ -1,11 +0,0 @@
-# This tests that the contents of a DCC transfer negotiated with IRC can be
-# correctly extracted.
-
-# @TEST-EXEC: bro -r $TRACES/irc-dcc-send.trace %INPUT
-# @TEST-EXEC: btest-diff irc.log
-# @TEST-EXEC: mv irc-dcc-item-*.dat irc-dcc-item.dat
-# @TEST-EXEC: btest-diff irc-dcc-item.dat
-# @TEST-EXEC: bro -r $TRACES/irc-dcc-send.trace %INPUT IRC::extraction_prefix="test"
-# @TEST-EXEC: test -e test-*.dat
-
-redef IRC::extract_file_types=/.*/;
diff --git a/testing/btest/scripts/base/protocols/smtp/mime-extract.test b/testing/btest/scripts/base/protocols/smtp/mime-extract.test
deleted file mode 100644
index 0caa5d530c..0000000000
--- a/testing/btest/scripts/base/protocols/smtp/mime-extract.test
+++ /dev/null
@@ -1,11 +0,0 @@
-# @TEST-EXEC: bro -r $TRACES/smtp.trace %INPUT
-# @TEST-EXEC: btest-diff smtp_entities.log
-# @TEST-EXEC: cat smtp-entity-*.dat | sort > extractions
-# @TEST-EXEC: btest-diff extractions
-# @TEST-EXEC: bro -r $TRACES/smtp.trace %INPUT SMTP::extraction_prefix="test"
-# @TEST-EXEC: cnt=0 && for f in test-*.dat; do cnt=$((cnt+1)); done && echo $cnt >filecount
-# @TEST-EXEC: btest-diff filecount
-
-@load base/protocols/smtp
-
-redef SMTP::extract_file_types=/text\/plain/;
diff --git a/testing/external/subdir-btest.cfg b/testing/external/subdir-btest.cfg
index b631ba2457..31fce50adc 100644
--- a/testing/external/subdir-btest.cfg
+++ b/testing/external/subdir-btest.cfg
@@ -7,7 +7,7 @@ IgnoreFiles = *.tmp *.swp #* *.trace .gitignore *.skeleton
 
 [environment]
 BROPATH=`bash -c %(testbase)s/../../../build/bro-path-dev`:%(testbase)s/../scripts
-BROMAGIC=%(testbase)s/../../../magic
+BROMAGIC=%(testbase)s/../../magic/database
 BRO_SEED_FILE=%(testbase)s/../random.seed
 TZ=UTC
 LC_ALL=C
diff --git a/testing/scripts/file-analysis-test.bro b/testing/scripts/file-analysis-test.bro
index 8fe78b218e..d84fadae5c 100644
--- a/testing/scripts/file-analysis-test.bro
+++ b/testing/scripts/file-analysis-test.bro
@@ -1,3 +1,7 @@
+@load base/files/extract
+@load base/files/hash
+
+redef FileExtract::prefix = "./";
 
 global test_file_analysis_source: string = "" &redef;
 

From fb029617a4a8695f5ffffa75721ff978eed58d35 Mon Sep 17 00:00:00 2001
From: Seth Hall <seth@icir.org>
Date: Fri, 26 Jul 2013 16:38:18 -0400
Subject: [PATCH 110/118] Update the last two btest FAF tests.

  - Small changes were done to the ftp log.
---
 .../ftp.log                                   | 20 ++++++++---------
 .../ftp.log                                   | 22 +++++++++----------
 2 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-ipv4/ftp.log b/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-ipv4/ftp.log
index afa4c97830..b75d6955ba 100644
--- a/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-ipv4/ftp.log
+++ b/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-ipv4/ftp.log
@@ -3,13 +3,13 @@
 #empty_field	(empty)
 #unset_field	-
 #path	ftp
-#open	2013-04-12-16-32-25
-#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	user	password	command	arg	mime_type	file_size	reply_code	reply_msg	tags	data_channel.passive	data_channel.orig_h	data_channel.resp_h	data_channel.resp_p	extraction_file
-#types	time	string	addr	port	addr	port	string	string	string	string	string	count	count	string	table[string]	bool	addr	addr	port	string
-1329843175.680248	UWkUyAuUGXf	141.142.220.235	50003	199.233.217.249	21	anonymous	test	PASV	-	-	-	227	Entering Passive Mode (199,233,217,249,221,90)	(empty)	T	141.142.220.235	199.233.217.249	56666	-
-1329843179.815947	UWkUyAuUGXf	141.142.220.235	50003	199.233.217.249	21	anonymous	test	PASV	-	-	-	227	Entering Passive Mode (199,233,217,249,221,91)	(empty)	T	141.142.220.235	199.233.217.249	56667	-
-1329843179.926563	UWkUyAuUGXf	141.142.220.235	50003	199.233.217.249	21	anonymous	test	RETR	ftp://199.233.217.249/./robots.txt	text/plain	77	226	Transfer complete.	(empty)	-	-	-	-	-
-1329843194.040188	UWkUyAuUGXf	141.142.220.235	50003	199.233.217.249	21	anonymous	test	PORT	141,142,220,235,131,46	-	-	200	PORT command successful.	(empty)	F	199.233.217.249	141.142.220.235	33582	-
-1329843197.672179	UWkUyAuUGXf	141.142.220.235	50003	199.233.217.249	21	anonymous	test	PORT	141,142,220,235,147,203	-	-	200	PORT command successful.	(empty)	F	199.233.217.249	141.142.220.235	37835	-
-1329843197.727769	UWkUyAuUGXf	141.142.220.235	50003	199.233.217.249	21	anonymous	test	RETR	ftp://199.233.217.249/./robots.txt	text/plain	77	226	Transfer complete.	(empty)	-	-	-	-	-
-#close	2013-04-12-16-32-25
+#open	2013-07-26-20-37-01
+#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	user	password	command	arg	mime_type	file_size	reply_code	reply_msg	data_channel.passive	data_channel.orig_h	data_channel.resp_h	data_channel.resp_p	fuid
+#types	time	string	addr	port	addr	port	string	string	string	string	string	count	count	string	bool	addr	addr	port	string
+1329843175.680248	UWkUyAuUGXf	141.142.220.235	50003	199.233.217.249	21	anonymous	test	PASV	-	-	-	227	Entering Passive Mode (199,233,217,249,221,90)	T	141.142.220.235	199.233.217.249	56666	-
+1329843179.815947	UWkUyAuUGXf	141.142.220.235	50003	199.233.217.249	21	anonymous	test	PASV	-	-	-	227	Entering Passive Mode (199,233,217,249,221,91)	T	141.142.220.235	199.233.217.249	56667	-
+1329843179.926563	UWkUyAuUGXf	141.142.220.235	50003	199.233.217.249	21	anonymous	test	RETR	robots.txt	text/plain	77	226	Transfer complete.	-	-	-	-	4VAnSiNGSQh
+1329843194.040188	UWkUyAuUGXf	141.142.220.235	50003	199.233.217.249	21	anonymous	test	PORT	141,142,220,235,131,46	-	-	200	PORT command successful.	F	199.233.217.249	141.142.220.235	33582	4VAnSiNGSQh
+1329843197.672179	UWkUyAuUGXf	141.142.220.235	50003	199.233.217.249	21	anonymous	test	PORT	141,142,220,235,147,203	-	-	200	PORT command successful.	F	199.233.217.249	141.142.220.235	37835	4VAnSiNGSQh
+1329843197.727769	UWkUyAuUGXf	141.142.220.235	50003	199.233.217.249	21	anonymous	test	RETR	robots.txt	text/plain	77	226	Transfer complete.	-	-	-	-	aJg8mtdsS86
+#close	2013-07-26-20-37-01
diff --git a/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-ipv6/ftp.log b/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-ipv6/ftp.log
index 85207806c4..4177c52e1f 100644
--- a/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-ipv6/ftp.log
+++ b/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-ipv6/ftp.log
@@ -3,14 +3,14 @@
 #empty_field	(empty)
 #unset_field	-
 #path	ftp
-#open	2013-04-12-16-32-25
-#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	user	password	command	arg	mime_type	file_size	reply_code	reply_msg	tags	data_channel.passive	data_channel.orig_h	data_channel.resp_h	data_channel.resp_p	extraction_file
-#types	time	string	addr	port	addr	port	string	string	string	string	string	count	count	string	table[string]	bool	addr	addr	port	string
-1329327783.207785	UWkUyAuUGXf	2001:470:1f11:81f:c999:d94:aa7c:2e3e	49185	2001:470:4867:99::21	21	anonymous	test	EPSV	-	-	-	229	Entering Extended Passive Mode (|||57086|)	(empty)	T	2001:470:1f11:81f:c999:d94:aa7c:2e3e	2001:470:4867:99::21	57086	-
-1329327786.415755	UWkUyAuUGXf	2001:470:1f11:81f:c999:d94:aa7c:2e3e	49185	2001:470:4867:99::21	21	anonymous	test	EPSV	-	-	-	229	Entering Extended Passive Mode (|||57087|)	(empty)	T	2001:470:1f11:81f:c999:d94:aa7c:2e3e	2001:470:4867:99::21	57087	-
-1329327787.180814	UWkUyAuUGXf	2001:470:1f11:81f:c999:d94:aa7c:2e3e	49185	2001:470:4867:99::21	21	anonymous	test	EPSV	-	-	-	229	Entering Extended Passive Mode (|||57088|)	(empty)	T	2001:470:1f11:81f:c999:d94:aa7c:2e3e	2001:470:4867:99::21	57088	-
-1329327787.396984	UWkUyAuUGXf	2001:470:1f11:81f:c999:d94:aa7c:2e3e	49185	2001:470:4867:99::21	21	anonymous	test	RETR	ftp://[2001:470:4867:99::21]/robots.txt	-	77	226	Transfer complete.	(empty)	-	-	-	-	-
-1329327795.355248	UWkUyAuUGXf	2001:470:1f11:81f:c999:d94:aa7c:2e3e	49185	2001:470:4867:99::21	21	anonymous	test	EPRT	|2|2001:470:1f11:81f:c999:d94:aa7c:2e3e|49189|	-	-	200	EPRT command successful.	(empty)	F	2001:470:4867:99::21	2001:470:1f11:81f:c999:d94:aa7c:2e3e	49189	-
-1329327795.463946	UWkUyAuUGXf	2001:470:1f11:81f:c999:d94:aa7c:2e3e	49185	2001:470:4867:99::21	21	anonymous	test	RETR	ftp://[2001:470:4867:99::21]/robots.txt	-	77	226	Transfer complete.	(empty)	-	-	-	-	-
-1329327799.799327	UWkUyAuUGXf	2001:470:1f11:81f:c999:d94:aa7c:2e3e	49185	2001:470:4867:99::21	21	anonymous	test	EPRT	|2|2001:470:1f11:81f:c999:d94:aa7c:2e3e|49190|	-	-	200	EPRT command successful.	(empty)	F	2001:470:4867:99::21	2001:470:1f11:81f:c999:d94:aa7c:2e3e	49190	-
-#close	2013-04-12-16-32-25
+#open	2013-07-26-20-37-22
+#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	user	password	command	arg	mime_type	file_size	reply_code	reply_msg	data_channel.passive	data_channel.orig_h	data_channel.resp_h	data_channel.resp_p	fuid
+#types	time	string	addr	port	addr	port	string	string	string	string	string	count	count	string	bool	addr	addr	port	string
+1329327783.207785	UWkUyAuUGXf	2001:470:1f11:81f:c999:d94:aa7c:2e3e	49185	2001:470:4867:99::21	21	anonymous	test	EPSV	-	-	-	229	Entering Extended Passive Mode (|||57086|)	T	2001:470:1f11:81f:c999:d94:aa7c:2e3e	2001:470:4867:99::21	57086	-
+1329327786.415755	UWkUyAuUGXf	2001:470:1f11:81f:c999:d94:aa7c:2e3e	49185	2001:470:4867:99::21	21	anonymous	test	EPSV	-	-	-	229	Entering Extended Passive Mode (|||57087|)	T	2001:470:1f11:81f:c999:d94:aa7c:2e3e	2001:470:4867:99::21	57087	-
+1329327787.180814	UWkUyAuUGXf	2001:470:1f11:81f:c999:d94:aa7c:2e3e	49185	2001:470:4867:99::21	21	anonymous	test	EPSV	-	-	-	229	Entering Extended Passive Mode (|||57088|)	T	2001:470:1f11:81f:c999:d94:aa7c:2e3e	2001:470:4867:99::21	57088	-
+1329327787.396984	UWkUyAuUGXf	2001:470:1f11:81f:c999:d94:aa7c:2e3e	49185	2001:470:4867:99::21	21	anonymous	test	RETR	robots.txt	-	77	226	Transfer complete.	-	-	-	-	-
+1329327795.355248	UWkUyAuUGXf	2001:470:1f11:81f:c999:d94:aa7c:2e3e	49185	2001:470:4867:99::21	21	anonymous	test	EPRT	|2|2001:470:1f11:81f:c999:d94:aa7c:2e3e|49189|	-	-	200	EPRT command successful.	F	2001:470:4867:99::21	2001:470:1f11:81f:c999:d94:aa7c:2e3e	49189	4YhNtGvCehl
+1329327795.463946	UWkUyAuUGXf	2001:470:1f11:81f:c999:d94:aa7c:2e3e	49185	2001:470:4867:99::21	21	anonymous	test	RETR	robots.txt	-	77	226	Transfer complete.	-	-	-	-	4YhNtGvCehl
+1329327799.799327	UWkUyAuUGXf	2001:470:1f11:81f:c999:d94:aa7c:2e3e	49185	2001:470:4867:99::21	21	anonymous	test	EPRT	|2|2001:470:1f11:81f:c999:d94:aa7c:2e3e|49190|	-	-	200	EPRT command successful.	F	2001:470:4867:99::21	2001:470:1f11:81f:c999:d94:aa7c:2e3e	49190	4YhNtGvCehl
+#close	2013-07-26-20-37-22

From 1238e5bcf2b6b05471a2b0599c75f9a9e6a4a5ed Mon Sep 17 00:00:00 2001
From: Seth Hall <seth@icir.org>
Date: Fri, 26 Jul 2013 21:50:19 -0400
Subject: [PATCH 111/118] Undoing the FTP tests I updated earlier.

 - Fixed the external tests btest config too.
---
 scripts/base/protocols/ftp/main.bro                       | 8 +++++++-
 .../Baseline/scripts.base.protocols.ftp.ftp-ipv4/ftp.log  | 8 ++++----
 .../Baseline/scripts.base.protocols.ftp.ftp-ipv6/ftp.log  | 8 ++++----
 testing/external/subdir-btest.cfg                         | 2 +-
 4 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/scripts/base/protocols/ftp/main.bro b/scripts/base/protocols/ftp/main.bro
index f525c7792b..c9549a14ec 100644
--- a/scripts/base/protocols/ftp/main.bro
+++ b/scripts/base/protocols/ftp/main.bro
@@ -102,6 +102,8 @@ export {
 	global log_ftp: event(rec: Info);
 }
 
+@load ./utils
+
 # Add the state tracking information variable to the connection record
 redef record connection += {
 	ftp: Info &optional;
@@ -171,7 +173,11 @@ function ftp_message(s: Info)
 	{
 	s$ts=s$cmdarg$ts;
 	s$command=s$cmdarg$cmd;
-	s$arg=s$cmdarg$arg;
+
+	s$arg = s$cmdarg$arg;
+	if ( s$cmdarg$cmd in file_cmds )
+		s$arg = build_url_ftp(s);
+	
 	if ( s$arg == "" )
 		delete s$arg;
 
diff --git a/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-ipv4/ftp.log b/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-ipv4/ftp.log
index b75d6955ba..4cc6d67761 100644
--- a/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-ipv4/ftp.log
+++ b/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-ipv4/ftp.log
@@ -3,13 +3,13 @@
 #empty_field	(empty)
 #unset_field	-
 #path	ftp
-#open	2013-07-26-20-37-01
+#open	2013-07-27-01-49-02
 #fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	user	password	command	arg	mime_type	file_size	reply_code	reply_msg	data_channel.passive	data_channel.orig_h	data_channel.resp_h	data_channel.resp_p	fuid
 #types	time	string	addr	port	addr	port	string	string	string	string	string	count	count	string	bool	addr	addr	port	string
 1329843175.680248	UWkUyAuUGXf	141.142.220.235	50003	199.233.217.249	21	anonymous	test	PASV	-	-	-	227	Entering Passive Mode (199,233,217,249,221,90)	T	141.142.220.235	199.233.217.249	56666	-
 1329843179.815947	UWkUyAuUGXf	141.142.220.235	50003	199.233.217.249	21	anonymous	test	PASV	-	-	-	227	Entering Passive Mode (199,233,217,249,221,91)	T	141.142.220.235	199.233.217.249	56667	-
-1329843179.926563	UWkUyAuUGXf	141.142.220.235	50003	199.233.217.249	21	anonymous	test	RETR	robots.txt	text/plain	77	226	Transfer complete.	-	-	-	-	4VAnSiNGSQh
+1329843179.926563	UWkUyAuUGXf	141.142.220.235	50003	199.233.217.249	21	anonymous	test	RETR	ftp://199.233.217.249/./robots.txt	text/plain	77	226	Transfer complete.	-	-	-	-	4VAnSiNGSQh
 1329843194.040188	UWkUyAuUGXf	141.142.220.235	50003	199.233.217.249	21	anonymous	test	PORT	141,142,220,235,131,46	-	-	200	PORT command successful.	F	199.233.217.249	141.142.220.235	33582	4VAnSiNGSQh
 1329843197.672179	UWkUyAuUGXf	141.142.220.235	50003	199.233.217.249	21	anonymous	test	PORT	141,142,220,235,147,203	-	-	200	PORT command successful.	F	199.233.217.249	141.142.220.235	37835	4VAnSiNGSQh
-1329843197.727769	UWkUyAuUGXf	141.142.220.235	50003	199.233.217.249	21	anonymous	test	RETR	robots.txt	text/plain	77	226	Transfer complete.	-	-	-	-	aJg8mtdsS86
-#close	2013-07-26-20-37-01
+1329843197.727769	UWkUyAuUGXf	141.142.220.235	50003	199.233.217.249	21	anonymous	test	RETR	ftp://199.233.217.249/./robots.txt	text/plain	77	226	Transfer complete.	-	-	-	-	aJg8mtdsS86
+#close	2013-07-27-01-49-02
diff --git a/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-ipv6/ftp.log b/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-ipv6/ftp.log
index 4177c52e1f..d6f57bcf45 100644
--- a/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-ipv6/ftp.log
+++ b/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-ipv6/ftp.log
@@ -3,14 +3,14 @@
 #empty_field	(empty)
 #unset_field	-
 #path	ftp
-#open	2013-07-26-20-37-22
+#open	2013-07-27-01-49-13
 #fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	user	password	command	arg	mime_type	file_size	reply_code	reply_msg	data_channel.passive	data_channel.orig_h	data_channel.resp_h	data_channel.resp_p	fuid
 #types	time	string	addr	port	addr	port	string	string	string	string	string	count	count	string	bool	addr	addr	port	string
 1329327783.207785	UWkUyAuUGXf	2001:470:1f11:81f:c999:d94:aa7c:2e3e	49185	2001:470:4867:99::21	21	anonymous	test	EPSV	-	-	-	229	Entering Extended Passive Mode (|||57086|)	T	2001:470:1f11:81f:c999:d94:aa7c:2e3e	2001:470:4867:99::21	57086	-
 1329327786.415755	UWkUyAuUGXf	2001:470:1f11:81f:c999:d94:aa7c:2e3e	49185	2001:470:4867:99::21	21	anonymous	test	EPSV	-	-	-	229	Entering Extended Passive Mode (|||57087|)	T	2001:470:1f11:81f:c999:d94:aa7c:2e3e	2001:470:4867:99::21	57087	-
 1329327787.180814	UWkUyAuUGXf	2001:470:1f11:81f:c999:d94:aa7c:2e3e	49185	2001:470:4867:99::21	21	anonymous	test	EPSV	-	-	-	229	Entering Extended Passive Mode (|||57088|)	T	2001:470:1f11:81f:c999:d94:aa7c:2e3e	2001:470:4867:99::21	57088	-
-1329327787.396984	UWkUyAuUGXf	2001:470:1f11:81f:c999:d94:aa7c:2e3e	49185	2001:470:4867:99::21	21	anonymous	test	RETR	robots.txt	-	77	226	Transfer complete.	-	-	-	-	-
+1329327787.396984	UWkUyAuUGXf	2001:470:1f11:81f:c999:d94:aa7c:2e3e	49185	2001:470:4867:99::21	21	anonymous	test	RETR	ftp://[2001:470:4867:99::21]/robots.txt	-	77	226	Transfer complete.	-	-	-	-	-
 1329327795.355248	UWkUyAuUGXf	2001:470:1f11:81f:c999:d94:aa7c:2e3e	49185	2001:470:4867:99::21	21	anonymous	test	EPRT	|2|2001:470:1f11:81f:c999:d94:aa7c:2e3e|49189|	-	-	200	EPRT command successful.	F	2001:470:4867:99::21	2001:470:1f11:81f:c999:d94:aa7c:2e3e	49189	4YhNtGvCehl
-1329327795.463946	UWkUyAuUGXf	2001:470:1f11:81f:c999:d94:aa7c:2e3e	49185	2001:470:4867:99::21	21	anonymous	test	RETR	robots.txt	-	77	226	Transfer complete.	-	-	-	-	4YhNtGvCehl
+1329327795.463946	UWkUyAuUGXf	2001:470:1f11:81f:c999:d94:aa7c:2e3e	49185	2001:470:4867:99::21	21	anonymous	test	RETR	ftp://[2001:470:4867:99::21]/robots.txt	-	77	226	Transfer complete.	-	-	-	-	4YhNtGvCehl
 1329327799.799327	UWkUyAuUGXf	2001:470:1f11:81f:c999:d94:aa7c:2e3e	49185	2001:470:4867:99::21	21	anonymous	test	EPRT	|2|2001:470:1f11:81f:c999:d94:aa7c:2e3e|49190|	-	-	200	EPRT command successful.	F	2001:470:4867:99::21	2001:470:1f11:81f:c999:d94:aa7c:2e3e	49190	4YhNtGvCehl
-#close	2013-07-26-20-37-22
+#close	2013-07-27-01-49-13
diff --git a/testing/external/subdir-btest.cfg b/testing/external/subdir-btest.cfg
index 31fce50adc..fb5873418a 100644
--- a/testing/external/subdir-btest.cfg
+++ b/testing/external/subdir-btest.cfg
@@ -7,7 +7,7 @@ IgnoreFiles = *.tmp *.swp #* *.trace .gitignore *.skeleton
 
 [environment]
 BROPATH=`bash -c %(testbase)s/../../../build/bro-path-dev`:%(testbase)s/../scripts
-BROMAGIC=%(testbase)s/../../magic/database
+BROMAGIC=%(testbase)s/../../../magic/database
 BRO_SEED_FILE=%(testbase)s/../random.seed
 TZ=UTC
 LC_ALL=C

From 32f1c736f7d425b0d03deb93d5d057075737c3c1 Mon Sep 17 00:00:00 2001
From: Seth Hall <seth@icir.org>
Date: Mon, 29 Jul 2013 16:40:16 -0400
Subject: [PATCH 112/118] Some script reorg and a new intel extension script.

  - policy/frameworks/intel/seen is the new location for the scripts
    that push data into the intel framework for checking.

  - The new policy/frameworks/intel/do_notice script adds an example
    mechanism for data driven notices.
---
 doc/intel.rst                                 |  4 +-
 doc/scripts/DocSourcesList.cmake              | 19 ++++----
 scripts/base/frameworks/intel/main.bro        |  3 --
 scripts/policy/frameworks/intel/do_notice.bro | 44 +++++++++++++++++++
 .../frameworks/intel/{ => seen}/__load__.bro  |  0
 .../intel/{ => seen}/conn-established.bro     |  0
 .../frameworks/intel/{ => seen}/dns.bro       |  0
 .../intel/{ => seen}/http-host-header.bro     |  0
 .../frameworks/intel/{ => seen}/http-url.bro  |  0
 .../intel/{ => seen}/http-user-agents.bro     |  0
 .../intel/{ => seen}/smtp-url-extraction.bro  |  0
 .../frameworks/intel/{ => seen}/smtp.bro      |  0
 .../frameworks/intel/{ => seen}/ssl.bro       |  0
 .../intel/{ => seen}/where-locations.bro      |  0
 scripts/test-all-policy.bro                   | 21 ++++-----
 15 files changed, 67 insertions(+), 24 deletions(-)
 create mode 100644 scripts/policy/frameworks/intel/do_notice.bro
 rename scripts/policy/frameworks/intel/{ => seen}/__load__.bro (100%)
 rename scripts/policy/frameworks/intel/{ => seen}/conn-established.bro (100%)
 rename scripts/policy/frameworks/intel/{ => seen}/dns.bro (100%)
 rename scripts/policy/frameworks/intel/{ => seen}/http-host-header.bro (100%)
 rename scripts/policy/frameworks/intel/{ => seen}/http-url.bro (100%)
 rename scripts/policy/frameworks/intel/{ => seen}/http-user-agents.bro (100%)
 rename scripts/policy/frameworks/intel/{ => seen}/smtp-url-extraction.bro (100%)
 rename scripts/policy/frameworks/intel/{ => seen}/smtp.bro (100%)
 rename scripts/policy/frameworks/intel/{ => seen}/ssl.bro (100%)
 rename scripts/policy/frameworks/intel/{ => seen}/where-locations.bro (100%)

diff --git a/doc/intel.rst b/doc/intel.rst
index 2a59a98974..787524a417 100644
--- a/doc/intel.rst
+++ b/doc/intel.rst
@@ -27,7 +27,7 @@ Quick Start
 Load the package of scripts that sends data into the Intelligence
 Framework to be checked by loading this script in local.bro::
 
-	@load policy/frameworks/intel
+	@load policy/frameworks/intel/seen
 
 Refer to the "Loading Intelligence" section below to see the format
 for Intelligence Framework text files, then load those text files with
@@ -100,7 +100,7 @@ The full package of hook scripts that Bro ships with for sending this
 "seen" data into the intelligence framework can be loading by adding
 this line to local.bro::
 
-	@load policy/frameworks/intel
+	@load policy/frameworks/intel/seen
 
 Intelligence Matches
 ********************
diff --git a/doc/scripts/DocSourcesList.cmake b/doc/scripts/DocSourcesList.cmake
index 26a88027ef..f507172161 100644
--- a/doc/scripts/DocSourcesList.cmake
+++ b/doc/scripts/DocSourcesList.cmake
@@ -183,15 +183,16 @@ rest_target(${psd} policy/frameworks/control/controllee.bro)
 rest_target(${psd} policy/frameworks/control/controller.bro)
 rest_target(${psd} policy/frameworks/dpd/detect-protocols.bro)
 rest_target(${psd} policy/frameworks/dpd/packet-segment-logging.bro)
-rest_target(${psd} policy/frameworks/intel/conn-established.bro)
-rest_target(${psd} policy/frameworks/intel/dns.bro)
-rest_target(${psd} policy/frameworks/intel/http-host-header.bro)
-rest_target(${psd} policy/frameworks/intel/http-url.bro)
-rest_target(${psd} policy/frameworks/intel/http-user-agents.bro)
-rest_target(${psd} policy/frameworks/intel/smtp-url-extraction.bro)
-rest_target(${psd} policy/frameworks/intel/smtp.bro)
-rest_target(${psd} policy/frameworks/intel/ssl.bro)
-rest_target(${psd} policy/frameworks/intel/where-locations.bro)
+rest_target(${psd} policy/frameworks/intel/do_notice.bro)
+rest_target(${psd} policy/frameworks/intel/seen/conn-established.bro)
+rest_target(${psd} policy/frameworks/intel/seen/dns.bro)
+rest_target(${psd} policy/frameworks/intel/seen/http-host-header.bro)
+rest_target(${psd} policy/frameworks/intel/seen/http-url.bro)
+rest_target(${psd} policy/frameworks/intel/seen/http-user-agents.bro)
+rest_target(${psd} policy/frameworks/intel/seen/smtp-url-extraction.bro)
+rest_target(${psd} policy/frameworks/intel/seen/smtp.bro)
+rest_target(${psd} policy/frameworks/intel/seen/ssl.bro)
+rest_target(${psd} policy/frameworks/intel/seen/where-locations.bro)
 rest_target(${psd} policy/frameworks/packet-filter/shunt.bro)
 rest_target(${psd} policy/frameworks/software/version-changes.bro)
 rest_target(${psd} policy/frameworks/software/vulnerable.bro)
diff --git a/scripts/base/frameworks/intel/main.bro b/scripts/base/frameworks/intel/main.bro
index 1b740f538d..a201a7a041 100644
--- a/scripts/base/frameworks/intel/main.bro
+++ b/scripts/base/frameworks/intel/main.bro
@@ -63,9 +63,6 @@ export {
 		IN_ANYWHERE,
 	};
 
-	## The $host field and combination of $str and $str_type fields are mutually 
-	## exclusive.  These records *must* represent either an IP address being
-	## seen or a string being seen.
 	type Seen: record {
 		## The string if the data is about a string.
 		indicator:       string        &log &optional;
diff --git a/scripts/policy/frameworks/intel/do_notice.bro b/scripts/policy/frameworks/intel/do_notice.bro
new file mode 100644
index 0000000000..720e29c35c
--- /dev/null
+++ b/scripts/policy/frameworks/intel/do_notice.bro
@@ -0,0 +1,44 @@
+
+@load base/frameworks/intel
+@load base/frameworks/notice
+
+module Intel;
+
+export {
+	redef enum Notice::Type += {
+		## Intel::Notice is a notice that happens when an intelligence 
+		## indicator is denoted to be notice-worthy.
+		Intel::Notice
+	};
+
+	redef record Intel::MetaData += {
+		## A boolean value to allow the data itself to represent
+		## if the indicator that this metadata is attached to 
+		## is notice worthy.
+		do_notice: bool &default=F;
+
+		## Restrictions on when notices are created to only create
+		## them if the do_notice field is T and the notice was
+		## seen in the indicated location.
+		if_in: Intel::Where &optional;
+	};
+}
+
+event Intel::match(s: Seen, items: set[Item])
+	{
+	for ( item in items )
+		{
+		if ( item$meta$do_notice &&
+		     (! item$meta?$if_in || s$where == item$meta$if_in) )
+			{
+			local n = Notice::Info($note=Intel::Notice,
+			                       $msg=fmt("Intel hit on %s at %s", s$indicator, s$where),
+			                       $sub=s$indicator);
+
+			if ( s?$conn )
+				n$conn = s$conn;
+
+			NOTICE(n);
+			}
+		}
+	}
diff --git a/scripts/policy/frameworks/intel/__load__.bro b/scripts/policy/frameworks/intel/seen/__load__.bro
similarity index 100%
rename from scripts/policy/frameworks/intel/__load__.bro
rename to scripts/policy/frameworks/intel/seen/__load__.bro
diff --git a/scripts/policy/frameworks/intel/conn-established.bro b/scripts/policy/frameworks/intel/seen/conn-established.bro
similarity index 100%
rename from scripts/policy/frameworks/intel/conn-established.bro
rename to scripts/policy/frameworks/intel/seen/conn-established.bro
diff --git a/scripts/policy/frameworks/intel/dns.bro b/scripts/policy/frameworks/intel/seen/dns.bro
similarity index 100%
rename from scripts/policy/frameworks/intel/dns.bro
rename to scripts/policy/frameworks/intel/seen/dns.bro
diff --git a/scripts/policy/frameworks/intel/http-host-header.bro b/scripts/policy/frameworks/intel/seen/http-host-header.bro
similarity index 100%
rename from scripts/policy/frameworks/intel/http-host-header.bro
rename to scripts/policy/frameworks/intel/seen/http-host-header.bro
diff --git a/scripts/policy/frameworks/intel/http-url.bro b/scripts/policy/frameworks/intel/seen/http-url.bro
similarity index 100%
rename from scripts/policy/frameworks/intel/http-url.bro
rename to scripts/policy/frameworks/intel/seen/http-url.bro
diff --git a/scripts/policy/frameworks/intel/http-user-agents.bro b/scripts/policy/frameworks/intel/seen/http-user-agents.bro
similarity index 100%
rename from scripts/policy/frameworks/intel/http-user-agents.bro
rename to scripts/policy/frameworks/intel/seen/http-user-agents.bro
diff --git a/scripts/policy/frameworks/intel/smtp-url-extraction.bro b/scripts/policy/frameworks/intel/seen/smtp-url-extraction.bro
similarity index 100%
rename from scripts/policy/frameworks/intel/smtp-url-extraction.bro
rename to scripts/policy/frameworks/intel/seen/smtp-url-extraction.bro
diff --git a/scripts/policy/frameworks/intel/smtp.bro b/scripts/policy/frameworks/intel/seen/smtp.bro
similarity index 100%
rename from scripts/policy/frameworks/intel/smtp.bro
rename to scripts/policy/frameworks/intel/seen/smtp.bro
diff --git a/scripts/policy/frameworks/intel/ssl.bro b/scripts/policy/frameworks/intel/seen/ssl.bro
similarity index 100%
rename from scripts/policy/frameworks/intel/ssl.bro
rename to scripts/policy/frameworks/intel/seen/ssl.bro
diff --git a/scripts/policy/frameworks/intel/where-locations.bro b/scripts/policy/frameworks/intel/seen/where-locations.bro
similarity index 100%
rename from scripts/policy/frameworks/intel/where-locations.bro
rename to scripts/policy/frameworks/intel/seen/where-locations.bro
diff --git a/scripts/test-all-policy.bro b/scripts/test-all-policy.bro
index 1fd34d6f2f..809fc1d1ec 100644
--- a/scripts/test-all-policy.bro
+++ b/scripts/test-all-policy.bro
@@ -14,16 +14,17 @@
 # @load frameworks/control/controller.bro
 @load frameworks/dpd/detect-protocols.bro
 @load frameworks/dpd/packet-segment-logging.bro
-@load frameworks/intel/__load__.bro
-@load frameworks/intel/conn-established.bro
-@load frameworks/intel/dns.bro
-@load frameworks/intel/http-host-header.bro
-@load frameworks/intel/http-url.bro
-@load frameworks/intel/http-user-agents.bro
-@load frameworks/intel/smtp-url-extraction.bro
-@load frameworks/intel/smtp.bro
-@load frameworks/intel/ssl.bro
-@load frameworks/intel/where-locations.bro
+@load frameworks/intel/do_notice.bro
+@load frameworks/intel/seen/__load__.bro
+@load frameworks/intel/seen/conn-established.bro
+@load frameworks/intel/seen/dns.bro
+@load frameworks/intel/seen/http-host-header.bro
+@load frameworks/intel/seen/http-url.bro
+@load frameworks/intel/seen/http-user-agents.bro
+@load frameworks/intel/seen/smtp-url-extraction.bro
+@load frameworks/intel/seen/smtp.bro
+@load frameworks/intel/seen/ssl.bro
+@load frameworks/intel/seen/where-locations.bro
 @load frameworks/packet-filter/shunt.bro
 @load frameworks/software/version-changes.bro
 @load frameworks/software/vulnerable.bro

From 64fc80d7e4a4c1a653a16bf3d3892c50982fcffa Mon Sep 17 00:00:00 2001
From: Robin Sommer <robin@icir.org>
Date: Thu, 25 Jul 2013 13:31:57 -0700
Subject: [PATCH 113/118] Adding a trace with a DNSKEY RR.

Still had this sitting in my inbox, but seems Bro is doing everything
right.
---
 CHANGES                                          |   4 ++++
 VERSION                                          |   2 +-
 .../scripts.base.protocols.dns.dns-key/dns.log   |  10 ++++++++++
 testing/btest/Traces/dns-dnskey.trace            | Bin 0 -> 1110 bytes
 .../btest/scripts/base/protocols/dns/dns-key.bro |   4 ++++
 5 files changed, 19 insertions(+), 1 deletion(-)
 create mode 100644 testing/btest/Baseline/scripts.base.protocols.dns.dns-key/dns.log
 create mode 100644 testing/btest/Traces/dns-dnskey.trace
 create mode 100644 testing/btest/scripts/base/protocols/dns/dns-key.bro

diff --git a/CHANGES b/CHANGES
index f4b7e43a7e..0c7235bd47 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,4 +1,8 @@
 
+2.1-895 | 2013-07-29 14:07:35 -0700
+
+  * Adding a test for a DNSKEY RR. (Robin Sommer)
+
 2.1-894 | 2013-07-29 16:44:41 -0400
 
   * Updates for the Intel Framework. (Seth Hall)
diff --git a/VERSION b/VERSION
index 3131a2159f..9e4a84ae0a 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2.1-894
+2.1-895
diff --git a/testing/btest/Baseline/scripts.base.protocols.dns.dns-key/dns.log b/testing/btest/Baseline/scripts.base.protocols.dns.dns-key/dns.log
new file mode 100644
index 0000000000..722d2c3912
--- /dev/null
+++ b/testing/btest/Baseline/scripts.base.protocols.dns.dns-key/dns.log
@@ -0,0 +1,10 @@
+#separator \x09
+#set_separator	,
+#empty_field	(empty)
+#unset_field	-
+#path	dns
+#open	2013-07-25-20-29-44
+#fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	proto	trans_id	query	qclass	qclass_name	qtype	qtype_name	rcode	rcode_name	AA	TC	RD	RA	Z	answers	TTLs	rejected
+#types	time	string	addr	port	addr	port	enum	count	string	count	string	count	string	count	string	bool	bool	bool	bool	count	vector[string]	vector[interval]	bool
+1359565680.761790	UWkUyAuUGXf	192.168.6.10	53209	192.168.129.36	53	udp	41477	paypal.com	1	C_INTERNET	48	DNSKEY	0	NOERROR	F	F	T	F	1	-	-	F
+#close	2013-07-25-20-29-44
diff --git a/testing/btest/Traces/dns-dnskey.trace b/testing/btest/Traces/dns-dnskey.trace
new file mode 100644
index 0000000000000000000000000000000000000000..c7a6448e7990c6717c5f8da73cb1c300bcb4f534
GIT binary patch
literal 1110
zcmca|c+)~A1{MYw`2U}Qff2|l2<8mjH=mm!5Xc5$1_qw_pW+x)Crs=x;$U!PU~s++
zROui%Py4_MHm(CJ8dc8UWH4pWcg<eJ$|wLf0%#ChL1JY=Vh(e1el7!p0Z3F+0BBSL
z5Q8jmILgCtmYD&JAx1N(&5&hw1X;kmKU~a#;i;g}43Oa<OBhVg-(+6BCuI?9;{>2l
zAU9)JaDWG54dZb}4n{_1R%S*9#;dtA^fp<tefd3E`01NNUy^b-_m)Jtx?Ee#kf-Tj
zK56FVKb-vU_^*F$e0w^a+a+Dd@N?LvWqJpTUH3j)(qAy!UH;kR8kS<A(C^O6)s@W^
zXB+Ds(U|w!=E3i&8q3tWdyY?e`J+Tib^4SGC$FoyRIFT}?<MuK<929Pm*0-o3$+5z
zbuYX);_~5ZRjR`Nf=Sc2Hs<9y+_V#u*<mX)|L41+HjbISFPFI~GON_L9#LheKNk91
z@Y1g5Vw$~IuU%BEnjIUsh#{|_jW=;}o_=xXid|ltKD+HYT+z8<c5nY=fuDsgK2ZxV
zD2N|X3pn`8ODp2%?t2H$8wEdDAZm1U|A!|Ny~Cd)!i1rNkpU7W=YOx?W3|G=^4p(J
zwTBic<(;}GXTFWo%VNe0aiyKpB~ov`d*6Ow!#>+P2bQoGp4N|geL&lL*-Vp7o5g}J
zOiDO>LZzj#t#jUXgDH3N+L@~@Bs=(DJ$&)9_txXvJ9M5Uv@@M)`XV5jrn9D|M{BEK
zj=kgJuFnx?+O1ZkHpCv_)Yx<#=q5c-a$r2pFpt52m5G6YDI!ogN+96($re3j<YXhb
zL-$|fhETO)op+*a3!*QDX*If+^={2>DJs_1%i&kO`s>{Z8<XYRrak7LHqr0@#6$+E
zHK86a_E+q@vD-9wZ`_)seI`0jkH_D;6Pt5T;)~f*R*}zAo9<lQHvgsc_WHg;@m-5Q
zeVxFRe$#JG&OF6U3ob0u61?4Y<8a@Oy!HDWN&*f(g1MAY4&9|ey2viw_`IDhdZAN^
z$DO(>M|mvw@lF;N;3?2<lw+{%e(cMBD}}Y*BrrSWT}AMRY3=gM?@x2nNv+bHpT&EN
z<62Ve+@4*tzxsxY_lKNMe4fv9Nnuv}?FCoQuCSLVt;_PO=J;I8c<^Jn_pW~pWx2W+
zKi{oe>080u$S?4>&_C<&yL@pE_lvLnHLi$pWcCLAeH59Sd_SVCU72I2Nv_F-U(Y3u
zuF+j~iP2hWx@U{{ansb?GncbZ>{+uk<(5eOq;(DongKd%=l!cz_v1ZwpL^2O!*+=_
zQNB;P&ZV6-%1D@y_C%nUKX3u>mkPzOzfFq-YEE7~c%^ENUE}|3m&h%;YpkYC^3+iV
LW^UFVNah9rRu9Wl

literal 0
HcmV?d00001

diff --git a/testing/btest/scripts/base/protocols/dns/dns-key.bro b/testing/btest/scripts/base/protocols/dns/dns-key.bro
new file mode 100644
index 0000000000..c51788c605
--- /dev/null
+++ b/testing/btest/scripts/base/protocols/dns/dns-key.bro
@@ -0,0 +1,4 @@
+# Making sure DNSKEY gets logged as such.
+#
+# @TEST-EXEC: bro -r $TRACES/dns-dnskey.trace
+# @TEST-EXEC: btest-diff dns.log

From c7676c5e695b0a4590a2fa18e96241455ff4970e Mon Sep 17 00:00:00 2001
From: Robin Sommer <robin@icir.org>
Date: Mon, 29 Jul 2013 14:29:45 -0700
Subject: [PATCH 114/118] The new magic submodule didn't get merged.

---
 magic | 1 +
 1 file changed, 1 insertion(+)
 create mode 160000 magic

diff --git a/magic b/magic
new file mode 160000
index 0000000000..e87fe13a7b
--- /dev/null
+++ b/magic
@@ -0,0 +1 @@
+Subproject commit e87fe13a7b776182ffc8c75076d42702f5c28fed

From b76d1d07ca0d0175f57f83379612009c8c09400a Mon Sep 17 00:00:00 2001
From: Robin Sommer <robin@icir.org>
Date: Mon, 29 Jul 2013 15:06:07 -0700
Subject: [PATCH 115/118] Test updates.

BIT-1044 #merged
---
 CHANGES                                       | 40 +++++++++++++++++++
 NEWS                                          |  2 +-
 VERSION                                       |  2 +-
 .../canonified_loaded_scripts.log             |  5 ++-
 .../canonified_loaded_scripts.log             |  7 ++--
 .../http.ds.txt                               | 18 ++++-----
 testing/btest/coverage/bare-mode-errors.test  |  5 ++-
 7 files changed, 62 insertions(+), 17 deletions(-)

diff --git a/CHANGES b/CHANGES
index 0c7235bd47..1f64cc908a 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,4 +1,44 @@
 
+2.1-930 | 2013-07-29 15:06:07 -0700
+
+  * Major file analysis overhaul in naming and appearance, along with
+    fixes and test updates. (Seth Hall and Jon Siwek)
+
+    Includes:
+
+    * Added protocol description functions that provide a super
+      compressed log representation. (Seth Hall)
+
+    * Added mime types to http.log (Seth Hall)
+      
+    * Add jar files to the default MHR lookups. (Seth Hall)
+      
+    * Adding CAB files for MHR checking. (Seth Hall)
+      
+    * Improve malware hash registry script.
+
+        - Include a link to a virustotal search in the notice sub message field.
+        - Give all information returned from Team Cymru in the notice message.
+        - Add more file types to match on to the default set.
+
+    * Make the custom libmagic database a git submodule.
+
+    * Add an is_orig parameter to file_over_new_connection event.
+
+    * Recorrected the module name to Files.
+
+    * Added Files::analyzer_name to get a more readable name for a
+      file analyzer.
+
+    * Improved and just overall better handled multipart mime
+      transfers in HTTP and SMTP.  HTTP now has orig_fuids and
+      resp_fuids log fields since multiple "files" can be transferred
+      with multipart mime in a single request/response pair.  SMTP has
+      an fuids field which has file unique IDs for all parts
+      transferred. FTP and IRC have a log field named fuid added
+      because only a single file can be transferred per irc and ftp
+      log line.
+
 2.1-895 | 2013-07-29 14:07:35 -0700
 
   * Adding a test for a DNSKEY RR. (Robin Sommer)
diff --git a/NEWS b/NEWS
index c3eabf5554..de2ee1b684 100644
--- a/NEWS
+++ b/NEWS
@@ -80,7 +80,7 @@ New Functionality
   with the following user-visibible functionality (some of that was
   already available before, but done differently):
 
-  [TODO: This will probably change with further script updates.]
+  [TODO: Update with changes from 984e9793db56.]
 
       - A binary input reader interfaces the input framework with file
         analysis, allowing to inject files on disk into Bro's
diff --git a/VERSION b/VERSION
index 9e4a84ae0a..cacffbfffc 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2.1-895
+2.1-930
diff --git a/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log
index 0caafdf107..e28efc9563 100644
--- a/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log
+++ b/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log
@@ -3,7 +3,7 @@
 #empty_field	(empty)
 #unset_field	-
 #path	loaded_scripts
-#open	2013-07-25-17-54-33
+#open	2013-07-29-21-31-47
 #fields	name
 #types	string
 scripts/base/init-bare.bro
@@ -90,6 +90,7 @@ scripts/base/init-bare.bro
       build/scripts/base/bif/file_analysis.bif.bro
       scripts/base/utils/site.bro
         scripts/base/utils/patterns.bro
+  build/scripts/base/bif/__load__.bro
 scripts/policy/misc/loaded-scripts.bro
   scripts/base/utils/paths.bro
-#close	2013-07-25-19-59-47
+#close	2013-07-29-21-31-47
diff --git a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log
index deffbe364b..faf372222b 100644
--- a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log
+++ b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log
@@ -3,7 +3,7 @@
 #empty_field	(empty)
 #unset_field	-
 #path	loaded_scripts
-#open	2013-07-23-05-48-10
+#open	2013-07-29-21-31-48
 #fields	name
 #types	string
 scripts/base/init-bare.bro
@@ -90,6 +90,7 @@ scripts/base/init-bare.bro
       build/scripts/base/bif/file_analysis.bif.bro
       scripts/base/utils/site.bro
         scripts/base/utils/patterns.bro
+  build/scripts/base/bif/__load__.bro
 scripts/base/init-default.bro
   scripts/base/utils/addrs.bro
   scripts/base/utils/conn-ids.bro
@@ -158,7 +159,7 @@ scripts/base/init-default.bro
   scripts/base/protocols/ftp/__load__.bro
     scripts/base/protocols/ftp/utils-commands.bro
     scripts/base/protocols/ftp/main.bro
-    scripts/base/protocols/ftp/utils.bro
+      scripts/base/protocols/ftp/utils.bro
     scripts/base/protocols/ftp/files.bro
     scripts/base/protocols/ftp/gridftp.bro
       scripts/base/protocols/ssl/__load__.bro
@@ -197,4 +198,4 @@ scripts/base/init-default.bro
     scripts/base/files/extract/main.bro
   scripts/base/misc/find-checksum-offloading.bro
 scripts/policy/misc/loaded-scripts.bro
-#close	2013-07-23-05-48-10
+#close	2013-07-29-21-31-48
diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/http.ds.txt b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/http.ds.txt
index e919233b79..fd998057f3 100644
--- a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/http.ds.txt
+++ b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/http.ds.txt
@@ -32,10 +32,10 @@
 	<field type="variable32" name="username" pack_unique="yes"/>
 	<field type="variable32" name="password" pack_unique="yes"/>
 	<field type="variable32" name="proxied" pack_unique="yes"/>
-	<field type="variable32" name="mime_type" pack_unique="yes"/>
-	<field type="variable32" name="md5" pack_unique="yes"/>
-	<field type="variable32" name="extracted_request_files" pack_unique="yes"/>
-	<field type="variable32" name="extracted_response_files" pack_unique="yes"/>
+	<field type="variable32" name="orig_fuids" pack_unique="yes"/>
+	<field type="variable32" name="orig_mime_types" pack_unique="yes"/>
+	<field type="variable32" name="resp_fuids" pack_unique="yes"/>
+	<field type="variable32" name="resp_mime_types" pack_unique="yes"/>
 </ExtentType>
 <!-- ts : time -->
 <!-- uid : string -->
@@ -60,13 +60,13 @@
 <!-- username : string -->
 <!-- password : string -->
 <!-- proxied : table[string] -->
-<!-- mime_type : string -->
-<!-- md5 : string -->
-<!-- extracted_request_files : vector[string] -->
-<!-- extracted_response_files : vector[string] -->
+<!-- orig_fuids : vector[string] -->
+<!-- orig_mime_types : vector[string] -->
+<!-- resp_fuids : vector[string] -->
+<!-- resp_mime_types : vector[string] -->
 
 # Extent, type='http'
-ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied mime_type md5 extracted_request_files extracted_response_files
+ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied orig_fuids orig_mime_types resp_fuids resp_mime_types
 1300475168.784020 j4u32Pc5bif 141.142.220.118 48649 208.80.152.118 80 1 GET bits.wikimedia.org /skins-1.5/monobook/main.css http://www.wikipedia.org/ Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.15) Gecko/20110303 Ubuntu/10.04 (lucid) Firefox/3.6.15 0 0 304 Not Modified 0          
 1300475168.916018 VW0XPVINV8a 141.142.220.118 49997 208.80.152.3 80 1 GET upload.wikimedia.org /wikipedia/commons/6/63/Wikipedia-logo.png http://www.wikipedia.org/ Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.15) Gecko/20110303 Ubuntu/10.04 (lucid) Firefox/3.6.15 0 0 304 Not Modified 0          
 1300475168.916183 3PKsZ2Uye21 141.142.220.118 49996 208.80.152.3 80 1 GET upload.wikimedia.org /wikipedia/commons/thumb/b/bb/Wikipedia_wordmark.svg/174px-Wikipedia_wordmark.svg.png http://www.wikipedia.org/ Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.15) Gecko/20110303 Ubuntu/10.04 (lucid) Firefox/3.6.15 0 0 304 Not Modified 0          
diff --git a/testing/btest/coverage/bare-mode-errors.test b/testing/btest/coverage/bare-mode-errors.test
index 34ba063081..1910ef8e17 100644
--- a/testing/btest/coverage/bare-mode-errors.test
+++ b/testing/btest/coverage/bare-mode-errors.test
@@ -10,5 +10,8 @@
 #
 # @TEST-EXEC: test -d $DIST/scripts
 # @TEST-EXEC: for script in `find $DIST/scripts/ -name \*\.bro -not -path '*/site/*'`; do echo "=== $script" >>allerrors; if echo "$script" | egrep -q 'communication/listen|controllee'; then rm -rf load_attempt .bgprocs; btest-bg-run load_attempt bro -b $script; btest-bg-wait -k 2; cat load_attempt/.stderr >>allerrors; else bro -b $script 2>>allerrors; fi done || exit 0
-# @TEST-EXEC: cat allerrors | grep -v "received termination signal" | grep -v '===' | sort | uniq > unique_errors
+# @TEST-EXEC: cat allerrors | grep -v "received termination signal" | fgrep -v -f %INPUT | grep -v '===' | sort | uniq > unique_errors
 # @TEST-EXEC: btest-diff unique_errors
+
+# White-list of tests to exclude because of cyclic load dependencies.
+scripts/base/protocols/ftp/utils.bro

From c30fa36d14382c03d08f545002a33f21eb778cfe Mon Sep 17 00:00:00 2001
From: Robin Sommer <robin@icir.org>
Date: Mon, 29 Jul 2013 16:39:40 -0700
Subject: [PATCH 116/118] Updating submodule(s).

 [nomail]
---
 aux/binpac   | 2 +-
 aux/bro-aux  | 2 +-
 aux/broccoli | 2 +-
 aux/broctl   | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/aux/binpac b/aux/binpac
index 896ddedde5..314fa8f65f 160000
--- a/aux/binpac
+++ b/aux/binpac
@@ -1 +1 @@
-Subproject commit 896ddedde55c48ec2163577fc258b49c418abb3e
+Subproject commit 314fa8f65fc240e960c23c3bba98623436a72b98
diff --git a/aux/bro-aux b/aux/bro-aux
index a9942558c7..91d258cc8b 160000
--- a/aux/bro-aux
+++ b/aux/bro-aux
@@ -1 +1 @@
-Subproject commit a9942558c7d3dfd80148b8aaded64c82ade3d117
+Subproject commit 91d258cc8b2f74cd02fc93dfe61f73ec9f0dd489
diff --git a/aux/broccoli b/aux/broccoli
index 889f9c6594..d59c73b6e0 160000
--- a/aux/broccoli
+++ b/aux/broccoli
@@ -1 +1 @@
-Subproject commit 889f9c65944ceac20ad9230efc39d33e6e1221c3
+Subproject commit d59c73b6e0966ad63bbc63a35741b5f68263e7b1
diff --git a/aux/broctl b/aux/broctl
index 0cd102805e..52fd91261f 160000
--- a/aux/broctl
+++ b/aux/broctl
@@ -1 +1 @@
-Subproject commit 0cd102805e73343cab3f9fd4a76552e13940dad9
+Subproject commit 52fd91261f41fa1528f7b964837a364d7991889e

From 43825212db25ce540c6a12905844d246f8784c05 Mon Sep 17 00:00:00 2001
From: Matthias Vallentin <vallentin@icir.org>
Date: Tue, 30 Jul 2013 12:17:53 +0200
Subject: [PATCH 117/118] Update submodules.

---
 aux/binpac   | 2 +-
 aux/bro-aux  | 2 +-
 aux/broccoli | 2 +-
 aux/broctl   | 2 +-
 cmake        | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/aux/binpac b/aux/binpac
index c39bd478b9..314fa8f65f 160000
--- a/aux/binpac
+++ b/aux/binpac
@@ -1 +1 @@
-Subproject commit c39bd478b9d0ecd05b1b83aa9d09a7887893977c
+Subproject commit 314fa8f65fc240e960c23c3bba98623436a72b98
diff --git a/aux/bro-aux b/aux/bro-aux
index a9942558c7..91d258cc8b 160000
--- a/aux/bro-aux
+++ b/aux/bro-aux
@@ -1 +1 @@
-Subproject commit a9942558c7d3dfd80148b8aaded64c82ade3d117
+Subproject commit 91d258cc8b2f74cd02fc93dfe61f73ec9f0dd489
diff --git a/aux/broccoli b/aux/broccoli
index 889f9c6594..d59c73b6e0 160000
--- a/aux/broccoli
+++ b/aux/broccoli
@@ -1 +1 @@
-Subproject commit 889f9c65944ceac20ad9230efc39d33e6e1221c3
+Subproject commit d59c73b6e0966ad63bbc63a35741b5f68263e7b1
diff --git a/aux/broctl b/aux/broctl
index 0cd102805e..52fd91261f 160000
--- a/aux/broctl
+++ b/aux/broctl
@@ -1 +1 @@
-Subproject commit 0cd102805e73343cab3f9fd4a76552e13940dad9
+Subproject commit 52fd91261f41fa1528f7b964837a364d7991889e
diff --git a/cmake b/cmake
index 0187b33a29..026639f836 160000
--- a/cmake
+++ b/cmake
@@ -1 +1 @@
-Subproject commit 0187b33a29d5ec824f940feff60dc5d8c2fe314f
+Subproject commit 026639f8368e56742c0cb5d9fb390ea64e60ec50

From af9e181731b82167187b7a9ec8995b991920c0e1 Mon Sep 17 00:00:00 2001
From: Robin Sommer <robin@icir.org>
Date: Tue, 30 Jul 2013 10:29:27 -0700
Subject: [PATCH 118/118] Updating submodule(s).

 [nomail]
---
 magic | 1 +
 1 file changed, 1 insertion(+)
 create mode 160000 magic

diff --git a/magic b/magic
new file mode 160000
index 0000000000..e87fe13a7b
--- /dev/null
+++ b/magic
@@ -0,0 +1 @@
+Subproject commit e87fe13a7b776182ffc8c75076d42702f5c28fed