Merge remote-tracking branch 'origin/master' into topic/seth/file-entropy

2025-10-04 15:48:19 +00:00 · 2015-01-30 00:52:41 -05:00 · 2015-01-30 00:52:41 -05:00 · 8e53e719f3
commit 8e53e719f3
parent 51c83c7f42 36bc7ba5b5
1894 changed files with 189157 additions and 279280 deletions
--- a/src/file_analysis/Analyzer.cc
+++ b/src/file_analysis/Analyzer.cc
@ -3,9 +3,17 @@
 #include "Analyzer.h"
 #include "Manager.h"

+file_analysis::ID file_analysis::Analyzer::id_counter = 0;
+
 file_analysis::Analyzer::~Analyzer()
 	{
 	DBG_LOG(DBG_FILE_ANALYSIS, "Destroy file analyzer %s",
-	        file_mgr->GetComponentName(tag));
+	        file_mgr->GetComponentName(tag).c_str());
 	Unref(args);
 	}
+
+void file_analysis::Analyzer::SetAnalyzerTag(const file_analysis::Tag& arg_tag)
+	{
+	assert(! tag || tag == arg_tag);
+	tag = arg_tag;
+	}
--- a/src/file_analysis/Analyzer.h
+++ b/src/file_analysis/Analyzer.h
@ -13,6 +13,8 @@ namespace file_analysis {

 class File;

+typedef uint32 ID;
+
 /**
 * Base class for analyzers that can be attached to file_analysis::File objects.
 */
@ -25,6 +27,18 @@ public:
 	 */
 	virtual ~Analyzer();

+	/**
+	 * Initializes the analyzer before input processing starts.
+	 */
+	virtual void Init()
+		{ }
+
+	/**
+	 * Finishes the analyzer's operation after all input has been parsed.
+	 */
+	virtual void Done()
+		{ }
+
 	/**
 	 * Subclasses may override this metod to receive file data non-sequentially.
 	 * @param data points to start of a chunk of file data.
@ -72,6 +86,13 @@ public:
 	 */
 	file_analysis::Tag Tag() const { return tag; }

+	/**
+	 * Returns the analyzer instance's internal ID. These IDs are unique
+	 * across all analyzers instantiated and can thus be used to
+	 * indentify a specific instance.
+	 */
+	ID GetID() const	{ return id; }
+
 	/**
 	 * @return the AnalyzerArgs associated with the analyzer.
 	 */
@ -82,10 +103,31 @@ public:
 	 */
 	File* GetFile() const { return file; }

+	/**
+	 * Sets the tag associated with the analyzer's type. Note that this
+	 * can be called only right after construction, if the constructor
+	 * did not receive a name or tag. The method cannot be used to change
+	 * an existing tag.
+	 */
+	void SetAnalyzerTag(const file_analysis::Tag& tag);
+
+	/**
+	 * @return true if the analyzer has ever seen a stream-wise delivery.
+	 */
+	bool GotStreamDelivery() const
+		{ return got_stream_delivery; }
+
+	/**
+	 * Flag the analyzer as having seen a stream-wise delivery.
+	 */
+	void SetGotStreamDelivery()
+		{ got_stream_delivery = true; }
+
 protected:

 	/**
 	 * Constructor.  Only derived classes are meant to be instantiated.
+	 * @param arg_tag the tag definining the analyzer's type.
 	 * @param arg_args an \c AnalyzerArgs (script-layer type) value specifiying
 	 *        tunable options, if any, related to a particular analyzer type.
 	 * @param arg_file the file to which the the analyzer is being attached.
@ -93,14 +135,39 @@ protected:
 	Analyzer(file_analysis::Tag arg_tag, RecordVal* arg_args, File* arg_file)
 	    : tag(arg_tag),
 	      args(arg_args->Ref()->AsRecordVal()),
-	      file(arg_file)
-		{}
+	      file(arg_file),
+	      got_stream_delivery(false)
+		{
+		id = ++id_counter;
+		}
+
+	/**
+	 * Constructor.  Only derived classes are meant to be instantiated.
+	 * As this version of the constructor does not receive a name or tag,
+	 * SetAnalyzerTag() must be called before the instance can be used.
+	 *
+	 * @param arg_args an \c AnalyzerArgs (script-layer type) value specifiying
+	 *        tunable options, if any, related to a particular analyzer type.
+	 * @param arg_file the file to which the the analyzer is being attached.
+	 */
+	Analyzer(RecordVal* arg_args, File* arg_file)
+	    : tag(),
+	      args(arg_args->Ref()->AsRecordVal()),
+	      file(arg_file),
+	      got_stream_delivery(false)
+		{
+		id = ++id_counter;
+		}

 private:

+	ID id;	/**< Unique instance ID. */
 	file_analysis::Tag tag;	/**< The particular type of the analyzer instance. */
 	RecordVal* args;	/**< \c AnalyzerArgs val gives tunable analyzer params. */
 	File* file;	/**< The file to which the analyzer is attached. */
+	bool got_stream_delivery;
+
+	static ID id_counter;
 };

 } // namespace file_analysis
--- a/src/file_analysis/AnalyzerSet.cc
+++ b/src/file_analysis/AnalyzerSet.cc
@ -9,7 +9,10 @@ using namespace file_analysis;

 static void analyzer_del_func(void* v)
 	{
-	delete (file_analysis::Analyzer*) v;
+	file_analysis::Analyzer* a = (file_analysis::Analyzer*)v;
+
+	a->Done();
+	delete a;
 	}

 AnalyzerSet::AnalyzerSet(File* arg_file) : file(arg_file)
@ -35,6 +38,14 @@ AnalyzerSet::~AnalyzerSet()
 	delete analyzer_hash;
 	}

+Analyzer* AnalyzerSet::Find(file_analysis::Tag tag, RecordVal* args)
+	{
+	HashKey* key = GetKey(tag, args);
+	Analyzer* rval = analyzer_map.Lookup(key);
+	delete key;
+	return rval;
+	}
+
 bool AnalyzerSet::Add(file_analysis::Tag tag, RecordVal* args)
 	{
 	HashKey* key = GetKey(tag, args);
@ -42,7 +53,7 @@ bool AnalyzerSet::Add(file_analysis::Tag tag, RecordVal* args)
 	if ( analyzer_map.Lookup(key) )
 		{
 		DBG_LOG(DBG_FILE_ANALYSIS, "Instantiate analyzer %s skipped for file id"
-		        " %s: already exists", file_mgr->GetComponentName(tag),
+		        " %s: already exists", file_mgr->GetComponentName(tag).c_str(),
 		        file->GetID().c_str());
 		delete key;
 		return true;
@ -61,7 +72,7 @@ bool AnalyzerSet::Add(file_analysis::Tag tag, RecordVal* args)
 	return true;
 	}

-bool AnalyzerSet::QueueAdd(file_analysis::Tag tag, RecordVal* args)
+Analyzer* AnalyzerSet::QueueAdd(file_analysis::Tag tag, RecordVal* args)
 	{
 	HashKey* key = GetKey(tag, args);
 	file_analysis::Analyzer* a = InstantiateAnalyzer(tag, args);
@ -69,12 +80,12 @@ bool AnalyzerSet::QueueAdd(file_analysis::Tag tag, RecordVal* args)
 	if ( ! a )
 		{
 		delete key;
-		return false;
+		return 0;
 		}

 	mod_queue.push(new AddMod(a, key));

-	return true;
+	return a;
 	}

 bool AnalyzerSet::AddMod::Perform(AnalyzerSet* set)
@ -82,7 +93,7 @@ bool AnalyzerSet::AddMod::Perform(AnalyzerSet* set)
 	if ( set->analyzer_map.Lookup(key) )
 		{
 		DBG_LOG(DBG_FILE_ANALYSIS, "Add analyzer %s skipped for file id"
-		        " %s: already exists", file_mgr->GetComponentName(a->Tag()),
+		        " %s: already exists", file_mgr->GetComponentName(a->Tag()).c_str(),
 		        a->GetFile()->GetID().c_str());

 		Abort();
@ -90,6 +101,7 @@ bool AnalyzerSet::AddMod::Perform(AnalyzerSet* set)
 		}

 	set->Insert(a, key);
+
 	return true;
 	}

@ -108,15 +120,17 @@ bool AnalyzerSet::Remove(file_analysis::Tag tag, HashKey* key)
 	if ( ! a )
 		{
 		DBG_LOG(DBG_FILE_ANALYSIS, "Skip remove analyzer %s for file id %s",
-		        file_mgr->GetComponentName(tag), file->GetID().c_str());
+		        file_mgr->GetComponentName(tag).c_str(), file->GetID().c_str());
 		return false;
 		}

 	DBG_LOG(DBG_FILE_ANALYSIS, "Remove analyzer %s for file id %s",
-	        file_mgr->GetComponentName(tag),
+	        file_mgr->GetComponentName(tag).c_str(),
 	        file->GetID().c_str());

+	a->Done();
 	delete a;
+
 	return true;
 	}

@ -155,7 +169,7 @@ file_analysis::Analyzer* AnalyzerSet::InstantiateAnalyzer(Tag tag,
 	if ( ! a )
 		{
 		reporter->Error("Failed file analyzer %s instantiation for file id %s",
-		                file_mgr->GetComponentName(tag), file->GetID().c_str());
+		                file_mgr->GetComponentName(tag).c_str(), file->GetID().c_str());
 		return 0;
 		}

@ -165,9 +179,11 @@ file_analysis::Analyzer* AnalyzerSet::InstantiateAnalyzer(Tag tag,
 void AnalyzerSet::Insert(file_analysis::Analyzer* a, HashKey* key)
 	{
 	DBG_LOG(DBG_FILE_ANALYSIS, "Add analyzer %s for file id %s",
-	        file_mgr->GetComponentName(a->Tag()), file->GetID().c_str());
+	        file_mgr->GetComponentName(a->Tag()).c_str(), file->GetID().c_str());
 	analyzer_map.Insert(key, a);
 	delete key;
+
+	a->Init();
 	}

 void AnalyzerSet::DrainModifications()
--- a/src/file_analysis/AnalyzerSet.h
+++ b/src/file_analysis/AnalyzerSet.h
@ -37,6 +37,14 @@ public:
 	 */
 	~AnalyzerSet();

+	/**
+	 * Looks up an analyzer by its tag and arguments.
+	 * @param tag an analyzer tag.
+	 * @param args an \c AnalyzerArgs record.
+	 * @return pointer to an analyzer instance, or a null pointer if not found.
+	 */
+	Analyzer* Find(file_analysis::Tag tag, RecordVal* args);
+
 	/**
 	 * Attach an analyzer to #file immediately.
 	 * @param tag the analyzer tag of the file analyzer to add.
@ -49,9 +57,10 @@ public:
 	 * Queue the attachment of an analyzer to #file.
 	 * @param tag the analyzer tag of the file analyzer to add.
 	 * @param args an \c AnalyzerArgs value which specifies an analyzer.
-	 * @return true if analyzer was able to be instantiated, else false.
+	 * @return if successful, a pointer to a newly instantiated analyzer else
+	 * a null pointer.  The caller does *not* take ownership of the memory.
 	 */
-	bool QueueAdd(file_analysis::Tag tag, RecordVal* args);
+	file_analysis::Analyzer* QueueAdd(file_analysis::Tag tag, RecordVal* args);

 	/**
 	 * Remove an analyzer from #file immediately.
--- a/src/file_analysis/CMakeLists.txt
+++ b/src/file_analysis/CMakeLists.txt
@ -11,6 +11,7 @@ set(file_analysis_SRCS
    Manager.cc
    File.cc
    FileTimer.cc
+    FileReassembler.cc
    Analyzer.cc
    AnalyzerSet.cc
    Component.cc
--- a/src/file_analysis/Component.cc
+++ b/src/file_analysis/Component.cc
@ -8,54 +8,24 @@

 using namespace file_analysis;

-Component::Component(const char* arg_name, factory_callback arg_factory)
-	: plugin::Component(plugin::component::FILE_ANALYZER),
-	  plugin::TaggedComponent<file_analysis::Tag>()
+Component::Component(const std::string& name, factory_callback arg_factory, Tag::subtype_t subtype)
+	: plugin::Component(plugin::component::FILE_ANALYZER, name),
+	  plugin::TaggedComponent<file_analysis::Tag>(subtype)
 	{
-	name = copy_string(arg_name);
-	canon_name = canonify_name(arg_name);
 	factory = arg_factory;
-	}

-Component::Component(const Component& other)
-	: plugin::Component(Type()),
-	  plugin::TaggedComponent<file_analysis::Tag>(other)
-	{
-	name = copy_string(other.name);
-	canon_name = copy_string(other.canon_name);
-	factory = other.factory;
+	file_mgr->RegisterComponent(this, "ANALYZER_");
 	}

 Component::~Component()
 	{
-	delete [] name;
-	delete [] canon_name;
 	}

-void Component::Describe(ODesc* d) const
+void Component::DoDescribe(ODesc* d) const
 	{
-	plugin::Component::Describe(d);
-	d->Add(name);
-	d->Add(" (");
-
 	if ( factory )
 		{
 		d->Add("ANALYZER_");
-		d->Add(canon_name);
+		d->Add(CanonicalName());
 		}
-
-	d->Add(")");
-	}
-
-Component& Component::operator=(const Component& other)
-	{
-	plugin::TaggedComponent<file_analysis::Tag>::operator=(other);
-
-	if ( &other != this )
-		{
-		name = copy_string(other.name);
-		factory = other.factory;
-		}
-
-	return *this;
 	}
--- a/src/file_analysis/Component.h
+++ b/src/file_analysis/Component.h
@ -1,7 +1,7 @@
 // See the file "COPYING" in the main distribution directory for copyright.

-#ifndef FILE_ANALYZER_PLUGIN_COMPONENT_H
-#define FILE_ANALYZER_PLUGIN_COMPONENT_H
+#ifndef FILE_ANALYZER_COMPONENT_H
+#define FILE_ANALYZER_COMPONENT_H

 #include "Tag.h"
 #include "plugin/Component.h"
@ -40,51 +40,32 @@ public:
 	 * from file_analysis::Analyzer. This is typically a static \c
 	 * Instatiate() method inside the class that just allocates and
 	 * returns a new instance.
+	 *
+	 * @param subtype A subtype associated with this component that
+	 * further distinguishes it. The subtype will be integrated into the
+	 * analyzer::Tag that the manager associates with this analyzer, and
+	 * analyzer instances can accordingly access it via analyzer::Tag().
+	 * If not used, leave at zero.
 	 */
-	Component(const char* name, factory_callback factory);
-
-	/**
-	 * Copy constructor.
-	 */
-	Component(const Component& other);
+	Component(const std::string& name, factory_callback factory, Tag::subtype_t subtype = 0);

 	/**
 	 * Destructor.
 	 */
 	~Component();

-	/**
-	 * Returns the name of the analyzer. This name is unique across all
-	 * analyzers and used to identify it. The returned name is derived
-	 * from what's passed to the constructor but upper-cased and
-	 * canonified to allow being part of a script-level ID.
-	 */
-	virtual const char* Name() const	{ return name; }
-
-	/**
-	 * Returns a canonocalized version of the analyzer's name.  The
-	 * returned name is derived from what's passed to the constructor but
-	 * upper-cased and transformed to allow being part of a script-level
-	 * ID.
-	 */
-	const char* CanonicalName() const	{ return canon_name; }
-
 	/**
 	 * Returns the analyzer's factory function.
 	 */
 	factory_callback Factory() const	{ return factory; }

+protected:
 	/**
-	 * Generates a human-readable description of the component's main
-	 * parameters. This goes into the output of \c "bro -NN".
-	 */
-	virtual void Describe(ODesc* d) const;
-
-	Component& operator=(const Component& other);
+	  * Overriden from plugin::Component.
+	  */
+	virtual void DoDescribe(ODesc* d) const;

 private:
-	const char* name;	// The analyzer's name.
-	const char* canon_name;	// The analyzer's canonical name.
 	factory_callback factory;	// The analyzer's factory callback.
 };

--- a/src/file_analysis/File.cc
+++ b/src/file_analysis/File.cc
@ -1,6 +1,7 @@
 // See the file "COPYING" in the main distribution directory for copyright.

 #include <string>
+#include <algorithm>

 #include "File.h"
 #include "FileTimer.h"
@ -10,10 +11,13 @@
 #include "Val.h"
 #include "Type.h"
 #include "Event.h"
+#include "RuleMatcher.h"

 #include "analyzer/Analyzer.h"
 #include "analyzer/Manager.h"

+#include "analyzer/extract/Extract.h"
+
 using namespace file_analysis;

 static Val* empty_connection_table()
@ -49,7 +53,6 @@ int File::overflow_bytes_idx = -1;
 int File::timeout_interval_idx = -1;
 int File::bof_buffer_size_idx = -1;
 int File::bof_buffer_idx = -1;
-int File::mime_type_idx = -1;

 void File::StaticInit()
 	{
@ -69,26 +72,25 @@ void File::StaticInit()
 	timeout_interval_idx = Idx("timeout_interval");
 	bof_buffer_size_idx = Idx("bof_buffer_size");
 	bof_buffer_idx = Idx("bof_buffer");
-	mime_type_idx = Idx("mime_type");
 	}

-File::File(const string& file_id, Connection* conn, analyzer::Tag tag,
-           bool is_orig)
-	: id(file_id), val(0), postpone_timeout(false), first_chunk(true),
-	  missed_bof(false), need_reassembly(false), done(false),
-	  did_file_new_event(false), analyzers(this)
+File::File(const string& file_id, const string& source_name, Connection* conn,
+           analyzer::Tag tag, bool is_orig)
+	: id(file_id), val(0), file_reassembler(0), stream_offset(0), 
+	  reassembly_max_buffer(0), did_mime_type(false), 
+	  reassembly_enabled(false), postpone_timeout(false), done(false), 
+	  analyzers(this)
 	{
 	StaticInit();

-	DBG_LOG(DBG_FILE_ANALYSIS, "Creating new File object %s", file_id.c_str());
+	DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Creating new File object", file_id.c_str());

 	val = new RecordVal(fa_file_type);
 	val->Assign(id_idx, new StringVal(file_id.c_str()));
+	SetSource(source_name);

 	if ( conn )
 		{
-		// add source, connection, is_orig fields
-		SetSource(analyzer_mgr->GetComponentName(tag));
 		val->Assign(is_orig_idx, new Val(is_orig, TYPE_BOOL));
 		UpdateConnectionFields(conn, is_orig);
 		}
@ -98,15 +100,9 @@ File::File(const string& file_id, Connection* conn, analyzer::Tag tag,

 File::~File()
 	{
-	DBG_LOG(DBG_FILE_ANALYSIS, "Destroying File object %s", id.c_str());
+	DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Destroying File object", id.c_str());
 	Unref(val);
-
-	// Queue may not be empty in the case where only content gaps were seen.
-	while ( ! fonc_queue.empty() )
-		{
-		delete_vals(fonc_queue.front().second);
-		fonc_queue.pop();
-		}
+	delete file_reassembler;
 	}

 void File::UpdateLastActivityTime()
@ -119,10 +115,10 @@ double File::GetLastActivityTime() const
 	return val->Lookup(last_active_idx)->AsTime();
 	}

-void File::UpdateConnectionFields(Connection* conn, bool is_orig)
+bool File::UpdateConnectionFields(Connection* conn, bool is_orig)
 	{
 	if ( ! conn )
-		return;
+		return false;

 	Val* conns = val->Lookup(conns_idx);

@ -133,27 +129,28 @@ void File::UpdateConnectionFields(Connection* conn, bool is_orig)
 		}

 	Val* idx = get_conn_id_val(conn);
-	if ( ! conns->AsTableVal()->Lookup(idx) )
+
+	if ( conns->AsTableVal()->Lookup(idx) )
 		{
-		Val* conn_val = conn->BuildConnVal();
-		conns->AsTableVal()->Assign(idx, conn_val);
-
-		if ( FileEventAvailable(file_over_new_connection) )
-			{
-			val_list* vl = new val_list();
-			vl->append(val->Ref());
-			vl->append(conn_val->Ref());
-			vl->append(new Val(is_orig, TYPE_BOOL));
-
-			if ( did_file_new_event )
-				FileEvent(file_over_new_connection, vl);
-			else
-				fonc_queue.push(pair<EventHandlerPtr, val_list*>(
-				        file_over_new_connection, vl));
-			}
+		Unref(idx);
+		return false;
 		}

+	conns->AsTableVal()->Assign(idx, conn->BuildConnVal());
 	Unref(idx);
+	return true;
+	}
+
+void File::RaiseFileOverNewConnection(Connection* conn, bool is_orig)
+	{
+	if ( conn && FileEventAvailable(file_over_new_connection) )
+		{
+		val_list* vl = new val_list();
+		vl->append(val->Ref());
+		vl->append(conn->BuildConnVal());
+		vl->append(new Val(is_orig, TYPE_BOOL));
+		FileEvent(file_over_new_connection, vl);
+		}
 	}

 uint64 File::LookupFieldDefaultCount(int idx) const
@ -203,6 +200,22 @@ void File::SetTimeoutInterval(double interval)
 	val->Assign(timeout_interval_idx, new Val(interval, TYPE_INTERVAL));
 	}

+bool File::SetExtractionLimit(RecordVal* args, uint64 bytes)
+	{
+	Analyzer* a = analyzers.Find(file_mgr->GetComponentTag("EXTRACT"), args);
+
+	if ( ! a )
+		return false;
+
+	Extract* e = dynamic_cast<Extract*>(a);
+
+	if ( ! e )
+		return false;
+
+	e->SetLimit(bytes);
+	return true;
+	}
+
 void File::IncrementByteCount(uint64 size, int field_idx)
 	{
 	uint64 old = LookupFieldDefaultCount(field_idx);
@ -211,6 +224,7 @@ void File::IncrementByteCount(uint64 size, int field_idx)

 void File::SetTotalBytes(uint64 size)
 	{
+	DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Total bytes %" PRIu64, id.c_str(), size);
 	val->Assign(total_bytes_idx, new Val(size, TYPE_COUNT));
 	}

@ -220,7 +234,7 @@ bool File::IsComplete() const
 	if ( ! total )
 		return false;

-	if ( LookupFieldDefaultCount(seen_bytes_idx) >= total->AsCount() )
+	if ( stream_offset >= total->AsCount() )
 		return true;

 	return false;
@ -233,17 +247,87 @@ void File::ScheduleInactivityTimer() const

 bool File::AddAnalyzer(file_analysis::Tag tag, RecordVal* args)
 	{
-	return done ? false : analyzers.QueueAdd(tag, args);
+	DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Queuing addition of %s analyzer",
+		id.c_str(), file_mgr->GetComponentName(tag).c_str());
+
+	if ( done )
+		return false;
+
+	return analyzers.QueueAdd(tag, args) != 0;
 	}

 bool File::RemoveAnalyzer(file_analysis::Tag tag, RecordVal* args)
 	{
+	DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Queuing remove of %s analyzer",
+		id.c_str(), file_mgr->GetComponentName(tag).c_str());
+
 	return done ? false : analyzers.QueueRemove(tag, args);
 	}

+void File::EnableReassembly()
+	{
+	reassembly_enabled = true;
+	}
+
+void File::DisableReassembly()
+	{
+	reassembly_enabled = false;
+	delete file_reassembler;
+	file_reassembler = 0;
+	}
+
+void File::SetReassemblyBuffer(uint64 max)
+	{
+	reassembly_max_buffer = max;
+	}
+
+bool File::DetectMIME()
+	{
+	did_mime_type = true;
+
+	Val* bof_buffer_val = val->Lookup(bof_buffer_idx);
+
+	if ( ! bof_buffer_val )
+		{
+		if ( bof_buffer.size == 0 )
+			return false;
+
+		BroString* bs = concatenate(bof_buffer.chunks);
+		bof_buffer_val = new StringVal(bs);
+		val->Assign(bof_buffer_idx, bof_buffer_val);
+		}
+
+	RuleMatcher::MIME_Matches matches;
+	const u_char* data = bof_buffer_val->AsString()->Bytes();
+	uint64 len = bof_buffer_val->AsString()->Len();
+	len = min(len, LookupFieldDefaultCount(bof_buffer_size_idx));
+	file_mgr->DetectMIME(data, len, &matches);
+
+	if ( matches.empty() )
+		return false;
+
+	if ( FileEventAvailable(file_mime_type) )
+		{
+		val_list* vl = new val_list();
+		vl->append(val->Ref());
+		vl->append(new StringVal(*(matches.begin()->second.begin())));
+		FileEvent(file_mime_type, vl);
+		}
+
+	if ( FileEventAvailable(file_mime_types) )
+		{
+		val_list* vl = new val_list();
+		vl->append(val->Ref());
+		vl->append(file_analysis::GenMIMEMatchesVal(matches));
+		FileEvent(file_mime_types, vl);
+		}
+
+	return true;
+	}
+
 bool File::BufferBOF(const u_char* data, uint64 len)
 	{
-	if ( bof_buffer.full || bof_buffer.replayed )
+	if ( bof_buffer.full )
 		return false;

 	uint64 desired_size = LookupFieldDefaultCount(bof_buffer_size_idx);
@ -251,135 +335,174 @@ bool File::BufferBOF(const u_char* data, uint64 len)
 	bof_buffer.chunks.push_back(new BroString(data, len, 0));
 	bof_buffer.size += len;

-	if ( bof_buffer.size >= desired_size )
+	if ( bof_buffer.size < desired_size )
+		return true;
+
+	bof_buffer.full = true;
+
+	if ( bof_buffer.size > 0 )
 		{
-		bof_buffer.full = true;
-		ReplayBOF();
+		BroString* bs = concatenate(bof_buffer.chunks);
+		val->Assign(bof_buffer_idx, new StringVal(bs));
 		}

-	return true;
+	return false;
 	}

-bool File::DetectMIME(const u_char* data, uint64 len)
+void File::DeliverStream(const u_char* data, uint64 len)
 	{
-	const char* mime = bro_magic_buffer(magic_mime_cookie, data, len);
+	bool bof_was_full = bof_buffer.full;
+	// Buffer enough data for the BOF buffer
+	BufferBOF(data, len);

-	if ( mime )
+	if ( ! did_mime_type && bof_buffer.full &&
+	     LookupFieldDefaultCount(missing_bytes_idx) == 0 )
+		DetectMIME();
+
+	DBG_LOG(DBG_FILE_ANALYSIS,
+	        "[%s] %" PRIu64 " stream bytes in at offset %" PRIu64 "; %s [%s%s]",
+	        id.c_str(), len, stream_offset,
+	        IsComplete() ? "complete" : "incomplete",
+	        fmt_bytes((const char*) data, min((uint64)40, len)),
+	        len > 40 ? "..." : "");
+
+	file_analysis::Analyzer* a = 0;
+	IterCookie* c = analyzers.InitForIteration();
+
+	while ( (a = analyzers.NextEntry(c)) )
 		{
-		const char* mime_end = strchr(mime, ';');
+		if ( ! a->GotStreamDelivery() )
+			{
+			int num_bof_chunks_behind = bof_buffer.chunks.size();

-		if ( mime_end )
-			// strip off charset
-			val->Assign(mime_type_idx, new StringVal(mime_end - mime, mime));
-		else
-			val->Assign(mime_type_idx, new StringVal(mime));
+			if ( ! bof_was_full )
+				// We just added a chunk to the BOF buffer, don't count it
+				// as it will get delivered on its own.
+				num_bof_chunks_behind -= 1;
+
+			uint64 bytes_delivered = 0;
+
+			// Catch this analyzer up with the BOF buffer.
+			for ( int i = 0; i < num_bof_chunks_behind; ++i )
+				{
+				if ( ! a->DeliverStream(bof_buffer.chunks[i]->Bytes(),
+				                        bof_buffer.chunks[i]->Len()) )
+					analyzers.QueueRemove(a->Tag(), a->Args());
+
+				bytes_delivered += bof_buffer.chunks[i]->Len();
+				}
+
+			a->SetGotStreamDelivery();
+			// May need to catch analyzer up on missed gap?
+			// Analyzer should be fully caught up to stream_offset now.
+			}
+
+		if ( ! a->DeliverStream(data, len) )
+			analyzers.QueueRemove(a->Tag(), a->Args());
 		}

-	return mime;
+	stream_offset += len;
+	IncrementByteCount(len, seen_bytes_idx);
 	}

-void File::ReplayBOF()
+void File::DeliverChunk(const u_char* data, uint64 len, uint64 offset)
 	{
-	if ( bof_buffer.replayed )
-		return;
-
-	bof_buffer.replayed = true;
-
-	if ( bof_buffer.chunks.empty() )
+	// Potentially handle reassembly and deliver to the stream analyzers.
+	if ( file_reassembler )
 		{
-		// Since we missed the beginning, try file type detect on next data in.
-		missed_bof = true;
-		return;
+		if ( reassembly_max_buffer > 0 &&
+		     reassembly_max_buffer < file_reassembler->TotalSize() )
+			{
+			uint64 current_offset = stream_offset;
+			uint64 gap_bytes = file_reassembler->Flush();
+			IncrementByteCount(gap_bytes, overflow_bytes_idx);
+
+			if ( FileEventAvailable(file_reassembly_overflow) )
+				{
+				val_list* vl = new val_list();
+				vl->append(val->Ref());
+				vl->append(new Val(current_offset, TYPE_COUNT));
+				vl->append(new Val(gap_bytes, TYPE_COUNT));
+				FileEvent(file_reassembly_overflow, vl);
+				}
+			}
+
+		// Forward data to the reassembler.
+		file_reassembler->NewBlock(network_time, offset, len, data);
+		}
+	else if ( stream_offset == offset )
+		{
+		// This is the normal case where a file is transferred linearly.
+		// Nothing special should be done here.
+		DeliverStream(data, len);
+		}
+	else if ( reassembly_enabled )
+		{
+		// This is data that doesn't match the offset and the reassembler 
+		// needs to be enabled.
+		file_reassembler = new FileReassembler(this, stream_offset);
+		file_reassembler->NewBlock(network_time, offset, len, data);
+		}
+	else
+		{
+		// We can't reassemble so we throw out the data for streaming.
+		IncrementByteCount(len, overflow_bytes_idx);
 		}

-	BroString* bs = concatenate(bof_buffer.chunks);
-	val->Assign(bof_buffer_idx, new StringVal(bs));
+	DBG_LOG(DBG_FILE_ANALYSIS,
+	        "[%s] %" PRIu64 " chunk bytes in at offset %" PRIu64 "; %s [%s%s]",
+	        id.c_str(), len, offset,
+	        IsComplete() ? "complete" : "incomplete",
+	        fmt_bytes((const char*) data, min((uint64)40, len)),
+	        len > 40 ? "..." : "");

-	DetectMIME(bs->Bytes(), bs->Len());
+	file_analysis::Analyzer* a = 0;
+	IterCookie* c = analyzers.InitForIteration();

-	FileEvent(file_new);
+	while ( (a = analyzers.NextEntry(c)) )
+		{
+		if ( ! a->DeliverChunk(data, len, offset) )
+			{
+			analyzers.QueueRemove(a->Tag(), a->Args());
+			}
+		}

-	for ( size_t i = 0; i < bof_buffer.chunks.size(); ++i )
-		DataIn(bof_buffer.chunks[i]->Bytes(), bof_buffer.chunks[i]->Len());
+	if ( IsComplete() )
+		EndOfFile();
 	}

 void File::DataIn(const u_char* data, uint64 len, uint64 offset)
 	{
 	analyzers.DrainModifications();
-
-	if ( first_chunk )
-		{
-		// TODO: this should all really be delayed until we attempt reassembly
-		DetectMIME(data, len);
-		FileEvent(file_new);
-		first_chunk = false;
-		}
-
-	file_analysis::Analyzer* a = 0;
-	IterCookie* c = analyzers.InitForIteration();
-
-	while ( (a = analyzers.NextEntry(c)) )
-		{
-		if ( ! a->DeliverChunk(data, len, offset) )
-			analyzers.QueueRemove(a->Tag(), a->Args());
-		}
-
+	DeliverChunk(data, len, offset);
 	analyzers.DrainModifications();
-
-	// TODO: check reassembly requirement based on buffer size in record
-	if ( need_reassembly )
-		reporter->InternalError("file_analyzer::File TODO: reassembly not yet supported");
-
-	// TODO: reassembly overflow stuff, increment overflow count, eval trigger
-
-	IncrementByteCount(len, seen_bytes_idx);
 	}

 void File::DataIn(const u_char* data, uint64 len)
 	{
 	analyzers.DrainModifications();
-
-	if ( BufferBOF(data, len) )
-		return;
-
-	if ( missed_bof )
-		{
-		DetectMIME(data, len);
-		FileEvent(file_new);
-		missed_bof = false;
-		}
-
-	file_analysis::Analyzer* a = 0;
-	IterCookie* c = analyzers.InitForIteration();
-
-	while ( (a = analyzers.NextEntry(c)) )
-		{
-		if ( ! a->DeliverStream(data, len) )
-			{
-			analyzers.QueueRemove(a->Tag(), a->Args());
-			continue;
-			}
-
-		uint64 offset = LookupFieldDefaultCount(seen_bytes_idx) +
-		                LookupFieldDefaultCount(missing_bytes_idx);
-
-		if ( ! a->DeliverChunk(data, len, offset) )
-			analyzers.QueueRemove(a->Tag(), a->Args());
-		}
-
+	DeliverChunk(data, len, stream_offset);
 	analyzers.DrainModifications();
-	IncrementByteCount(len, seen_bytes_idx);
 	}

 void File::EndOfFile()
 	{
+	DBG_LOG(DBG_FILE_ANALYSIS, "[%s] End of file", id.c_str());
+
 	if ( done )
 		return;

+	if ( ! did_mime_type &&
+	     LookupFieldDefaultCount(missing_bytes_idx) == 0 )
+		DetectMIME();
+
 	analyzers.DrainModifications();

-	// Send along anything that's been buffered, but never flushed.
-	ReplayBOF();
+	if ( file_reassembler )
+		{
+		file_reassembler->Flush();
+		analyzers.DrainModifications();
+		}

 	done = true;

@ -399,11 +522,17 @@ void File::EndOfFile()

 void File::Gap(uint64 offset, uint64 len)
 	{
-	analyzers.DrainModifications();
+	DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Gap of size %" PRIu64 " at offset %," PRIu64,
+		id.c_str(), len, offset);

-	// If we were buffering the beginning of the file, a gap means we've got
-	// as much contiguous stuff at the beginning as possible, so work with that.
-	ReplayBOF();
+	if ( file_reassembler && ! file_reassembler->IsCurrentlyFlushing() )
+		{
+		file_reassembler->FlushTo(offset + len);
+		// The reassembler will call us back with all the gaps we need to know.
+		return;
+		}
+
+	analyzers.DrainModifications();

 	file_analysis::Analyzer* a = 0;
 	IterCookie* c = analyzers.InitForIteration();
@ -424,6 +553,8 @@ void File::Gap(uint64 offset, uint64 len)
 		}

 	analyzers.DrainModifications();
+
+	stream_offset += len;
 	IncrementByteCount(len, missing_bytes_idx);
 	}

@ -446,19 +577,9 @@ void File::FileEvent(EventHandlerPtr h, val_list* vl)
 	{
 	mgr.QueueEvent(h, vl);

-	if ( h == file_new )
-		{
-		did_file_new_event = true;
-
-		while ( ! fonc_queue.empty() )
-			{
-			pair<EventHandlerPtr, val_list*> p = fonc_queue.front();
-			mgr.QueueEvent(p.first, p.second);
-			fonc_queue.pop();
-			}
-		}
-
-	if ( h == file_new || h == file_timeout )
+	if ( h == file_new || h == file_over_new_connection ||
+	     h == file_mime_type ||
+	     h == file_timeout || h == file_extraction_limit )
 		{
 		// immediate feedback is required for these events.
 		mgr.Drain();
--- a/src/file_analysis/File.h
+++ b/src/file_analysis/File.h
@ -3,11 +3,11 @@
 #ifndef FILE_ANALYSIS_FILE_H
 #define FILE_ANALYSIS_FILE_H

-#include <queue>
 #include <string>
 #include <utility>
 #include <vector>

+#include "FileReassembler.h"
 #include "Conn.h"
 #include "Val.h"
 #include "Tag.h"
@ -16,6 +16,8 @@

 namespace file_analysis {

+class FileReassembler;
+
 /**
 * Wrapper class around \c fa_file record values from script layer.
 */
@ -56,6 +58,14 @@ public:
 	 */
 	void SetTimeoutInterval(double interval);

+	/**
+	 * Change the maximum size that an attached extraction analyzer is allowed.
+	 * @param args the file extraction analyzer whose limit needs changed.
+	 * @param bytes new limit.
+	 * @return false if no extraction analyzer is active, else true.
+	 */
+	bool SetExtractionLimit(RecordVal* args, uint64 bytes);
+
 	/**
 	 * @return value of the "id" field from #val record.
 	 */
@ -78,10 +88,10 @@ public:
 	void SetTotalBytes(uint64 size);

 	/**
-	 * Compares "seen_bytes" field to "total_bytes" field of #val record to
-	 * determine if the full file has been seen.
-	 * @return false if "total_bytes" hasn't been set yet or "seen_bytes" is
-	 *         less than it, else true.
+	 * @return true if file analysis is complete for the file, else false.
+	 * It is incomplete if the total size is unknown or if the number of bytes
+	 * streamed to analyzers (either as data delivers or gap information)
+	 * matches the known total size.
 	 */
 	bool IsComplete() const;

@ -158,18 +168,20 @@ public:

 protected:
 	friend class Manager;
+	friend class FileReassembler;

 	/**
 	 * Constructor; only file_analysis::Manager should be creating these.
 	 * @param file_id an identifier string for the file in pretty hash form
 	 *        (similar to connection uids).
+	 * @param source_name the value for the source field to fill in.
 	 * @param conn a network connection over which the file is transferred.
 	 * @param tag the network protocol over which the file is transferred.
 	 * @param is_orig true if the file is being transferred from the originator
 	 *        of the connection to the responder.  False indicates the other
 	 *        direction.
 	 */
-	File(const string& file_id, Connection* conn = 0,
+	File(const string& file_id, const string& source_name, Connection* conn = 0,
 	     analyzer::Tag tag = analyzer::Tag::Error, bool is_orig = false);

 	/**
@ -177,8 +189,14 @@ protected:
 	 * \c conn_id and UID taken from \a conn.
 	 * @param conn the connection over which a part of the file has been seen.
 	 * @param is_orig true if the connection originator is sending the file.
+	 * @return true if the connection was previously unknown.
 	 */
-	void UpdateConnectionFields(Connection* conn, bool is_orig);
+	bool UpdateConnectionFields(Connection* conn, bool is_orig);
+
+	/**
+	 * Raise the file_over_new_connection event with given arguments.
+	 */
+	void RaiseFileOverNewConnection(Connection* conn, bool is_orig);

 	/**
 	 * Increment a byte count field of #val record by \a size.
@ -212,18 +230,39 @@ protected:
 	bool BufferBOF(const u_char* data, uint64 len);

 	/**
-	 * Forward any beginning-of-file buffered data on to DataIn stream.
+	 * Does mime type detection via file magic signatures and assigns
+	 * strongest matching mime type (if available) to \c mime_type
+	 * field in #val.  It uses the data in the BOF buffer.
+	 * @return whether a mime type match was found.
 	 */
-	void ReplayBOF();
+	bool DetectMIME();

 	/**
-	 * Does mime type detection and assigns type (if available) to \c mime_type
-	 * field in #val.
-	 * @param data pointer to a chunk of file data.
-	 * @param len number of bytes in the data chunk.
-	 * @return whether mime type was available.
+	 * Enables reassembly on the file.
 	 */
-	bool DetectMIME(const u_char* data, uint64 len);
+	void EnableReassembly();
+
+	/**
+	 * Disables reassembly on the file.  If there is an existing reassembler
+	 * for the file, this will cause it to be deleted and won't allow a new
+	 * one to be created until reassembly is reenabled.
+	 */
+	void DisableReassembly();
+
+	/**
+	 * Set a maximum allowed bytes of memory for file reassembly for this file.
+	 */
+	void SetReassemblyBuffer(uint64 max);
+
+	/**
+	 * Perform stream-wise delivery for analyzers that need it.
+	 */
+	void DeliverStream(const u_char* data, uint64 len);
+
+	/** 
+	 * Perform chunk-wise delivery for analyzers that need it.
+	 */
+	void DeliverChunk(const u_char* data, uint64 len, uint64 offset);

 	/**
 	 * Lookup a record field index/offset by name.
@ -237,25 +276,24 @@ protected:
 	 */
 	static void StaticInit();

-private:
+protected:
 	string id;                 /**< A pretty hash that likely identifies file */
 	RecordVal* val;            /**< \c fa_file from script layer. */
+	FileReassembler* file_reassembler; /**< A reassembler for the file if it's needed. */
+	uint64 stream_offset;      /**< The offset of the file which has been forwarded. */
+	uint64 reassembly_max_buffer;      /**< Maximum allowed buffer for reassembly. */
+	bool did_mime_type;        /**< Whether the mime type ident has already been attempted. */
+	bool reassembly_enabled;           /**< Whether file stream reassembly is needed. */
 	bool postpone_timeout;     /**< Whether postponing timeout is requested. */
-	bool first_chunk;          /**< Track first non-linear chunk. */
-	bool missed_bof;           /**< Flags that we missed start of file. */
-	bool need_reassembly;      /**< Whether file stream reassembly is needed. */
 	bool done;                 /**< If this object is about to be deleted. */
-	bool did_file_new_event;   /**< Whether the file_new event has been done. */
-	AnalyzerSet analyzers;     /**< A set of attached file analyzer. */
-	queue<pair<EventHandlerPtr, val_list*> > fonc_queue;
+	AnalyzerSet analyzers;     /**< A set of attached file analyzers. */

 	struct BOF_Buffer {
-		BOF_Buffer() : full(false), replayed(false), size(0) {}
+		BOF_Buffer() : full(false), size(0) {}
 		~BOF_Buffer()
 			{ for ( size_t i = 0; i < chunks.size(); ++i ) delete chunks[i]; }

 		bool full;
-		bool replayed;
 		uint64 size;
 		BroString::CVec chunks;
 	} bof_buffer;              /**< Beginning of file buffer. */
@ -274,6 +312,7 @@ private:
 	static int bof_buffer_size_idx;
 	static int bof_buffer_idx;
 	static int mime_type_idx;
+	static int mime_types_idx;
 };

 } // namespace file_analysis
--- a/src/file_analysis/FileReassembler.cc
+++ b/src/file_analysis/FileReassembler.cc
@ -0,0 +1,128 @@
+
+#include "FileReassembler.h"
+#include "File.h"
+
+
+namespace file_analysis {
+
+class File;
+
+FileReassembler::FileReassembler(File *f, uint64 starting_offset)
+	: Reassembler(starting_offset), the_file(f), flushing(false)
+	{
+	}
+
+FileReassembler::FileReassembler()
+	: Reassembler(), the_file(0), flushing(false)
+	{
+	}
+
+FileReassembler::~FileReassembler()
+	{
+	}
+
+uint64 FileReassembler::Flush()
+	{
+	if ( flushing )
+		return 0;
+
+	if ( last_block )
+		{
+		// This is expected to call back into FileReassembler::Undelivered().
+		flushing = true;
+		uint64 rval = TrimToSeq(last_block->upper);
+		flushing = false;
+		return rval;
+		}
+
+	return 0;
+	}
+
+uint64 FileReassembler::FlushTo(uint64 sequence)
+	{
+	if ( flushing )
+		return 0;
+
+	flushing = true;
+	uint64 rval = TrimToSeq(sequence);
+	flushing = false;
+	last_reassem_seq = sequence;
+	return rval;
+	}
+
+void FileReassembler::BlockInserted(DataBlock* start_block)
+	{
+	if ( start_block->seq > last_reassem_seq ||
+	     start_block->upper <= last_reassem_seq )
+		return;
+
+	for ( DataBlock* b = start_block;
+	      b && b->seq <= last_reassem_seq; b = b->next )
+		{
+		if ( b->seq == last_reassem_seq )
+			{ // New stuff.
+			uint64 len = b->Size();
+			last_reassem_seq += len;
+			the_file->DeliverStream(b->block, len);
+			}
+		}
+
+	// Throw out forwarded data
+	TrimToSeq(last_reassem_seq);
+	}
+
+void FileReassembler::Undelivered(uint64 up_to_seq)
+	{
+	// If we have blocks that begin below up_to_seq, deliver them.
+	DataBlock* b = blocks;
+
+	while ( b )
+		{
+		if ( b->seq < last_reassem_seq )
+			{
+			// Already delivered this block.
+			b = b->next;
+			continue;
+			}
+
+		if ( b->seq >= up_to_seq )
+			// Block is beyond what we need to process at this point.
+			break;
+
+		uint64 gap_at_seq = last_reassem_seq;
+		uint64 gap_len = b->seq - last_reassem_seq;
+		the_file->Gap(gap_at_seq, gap_len);
+		last_reassem_seq += gap_len;
+		BlockInserted(b);
+		// Inserting a block may cause trimming of what's buffered,
+		// so have to assume 'b' is invalid, hence re-assign to start.
+		b = blocks;
+		}
+
+	if ( up_to_seq > last_reassem_seq )
+		{
+		the_file->Gap(last_reassem_seq, up_to_seq - last_reassem_seq);
+		last_reassem_seq = up_to_seq;
+		}
+	}
+
+void FileReassembler::Overlap(const u_char* b1, const u_char* b2, uint64 n)
+	{
+	// Not doing anything here yet.
+	}
+
+IMPLEMENT_SERIAL(FileReassembler, SER_FILE_REASSEMBLER);
+
+bool FileReassembler::DoSerialize(SerialInfo* info) const
+	{
+	reporter->InternalError("FileReassembler::DoSerialize not implemented");
+	return false; // Cannot be reached.
+	}
+
+bool FileReassembler::DoUnserialize(UnserialInfo* info)
+	{
+	reporter->InternalError("FileReassembler::DoUnserialize not implemented");
+	return false; // Cannot be reached.
+	}
+
+} // end file_analysis
--- a/src/file_analysis/FileReassembler.h
+++ b/src/file_analysis/FileReassembler.h
@ -0,0 +1,65 @@
+#ifndef FILE_ANALYSIS_FILEREASSEMBLER_H
+#define FILE_ANALYSIS_FILEREASSEMBLER_H
+
+#include "Reassem.h"
+#include "File.h"
+
+class BroFile;
+class Connection;
+
+namespace file_analysis {
+
+class File;
+
+class FileReassembler : public Reassembler {
+public:
+
+	FileReassembler(File* f, uint64 starting_offset);
+	virtual ~FileReassembler();
+
+	void Done();
+
+	// Checks if we have delivered all contents that we can possibly
+	// deliver for this endpoint.
+	void CheckEOF();
+
+	/**
+	 * Discards all contents of the reassembly buffer.  This will spin through
+	 * the buffer and call File::DeliverStream() and File::Gap() wherever
+	 * appropriate.
+	 * @return the number of new bytes now detected as gaps in the file.
+	 */
+	uint64 Flush();
+
+	/**
+	 * Discards all contents of the reassembly buffer up to a given sequence
+	 * number.  This will spin through the buffer and call
+	 * File::DeliverStream() and File::Gap() wherever appropriate.
+	 * @param sequence the sequence number to flush until.
+	 * @return the number of new bytes now detected as gaps in the file.
+	 */
+	uint64 FlushTo(uint64 sequence);
+
+	/**
+	 * @return whether the reassembler is currently is the process of flushing
+	 * out the contents of its buffer.
+	 */
+	bool IsCurrentlyFlushing() const
+		{ return flushing; }
+
+protected:
+	FileReassembler();
+
+	DECLARE_SERIAL(FileReassembler);
+
+	void Undelivered(uint64 up_to_seq);
+	void BlockInserted(DataBlock* b);
+	void Overlap(const u_char* b1, const u_char* b2, uint64 n);
+
+	File* the_file;
+	bool flushing;
+};
+
+} // namespace analyzer::* 
+
+#endif
--- a/src/file_analysis/Manager.cc
+++ b/src/file_analysis/Manager.cc
@ -9,47 +9,80 @@
 #include "Analyzer.h"
 #include "Var.h"
 #include "Event.h"
+#include "UID.h"

 #include "plugin/Manager.h"
+#include "analyzer/Manager.h"

 using namespace file_analysis;

 TableVal* Manager::disabled = 0;
+TableType* Manager::tag_set_type = 0;
 string Manager::salt;

 Manager::Manager()
 	: plugin::ComponentManager<file_analysis::Tag,
-	                           file_analysis::Component>("Files")
+	                           file_analysis::Component>("Files", "Tag"),
+	id_map(), ignored(), current_file_id(), magic_state()
 	{
 	}

 Manager::~Manager()
 	{
-	Terminate();
+	for ( MIMEMap::iterator i = mime_types.begin(); i != mime_types.end(); i++ )
+		delete i->second;
+
+	// Have to assume that too much of Bro has been shutdown by this point
+	// to do anything more than reclaim memory.
+
+	File* f;
+	bool* b;
+
+	IterCookie* it = id_map.InitForIteration();
+
+	while ( (f = id_map.NextEntry(it)) )
+		delete f;
+
+	it = ignored.InitForIteration();
+
+	while( (b = ignored.NextEntry(it)) )
+		delete b;
+
+	delete magic_state;
 	}

 void Manager::InitPreScript()
 	{
-	std::list<Component*> analyzers = plugin_mgr->Components<Component>();
-
-	for ( std::list<Component*>::const_iterator i = analyzers.begin();
-	      i != analyzers.end(); ++i )
-	      RegisterComponent(*i, "ANALYZER_");
 	}

 void Manager::InitPostScript()
 	{
 	}

+void Manager::InitMagic()
+	{
+	delete magic_state;
+	magic_state = rule_matcher->InitFileMagic();
+	}
+
 void Manager::Terminate()
 	{
 	vector<string> keys;

-	for ( IDMap::iterator it = id_map.begin(); it != id_map.end(); ++it )
-		keys.push_back(it->first);
+	IterCookie* it = id_map.InitForIteration();
+	HashKey* key;
+
+	while ( id_map.NextEntry(key, it) )
+		{
+		keys.push_back(string(static_cast<const char*>(key->Key()),
+		                      key->Size()));
+		delete key;
+		}

 	for ( size_t i = 0; i < keys.size(); ++i )
 		Timeout(keys[i], true);
+
+	mgr.Drain();
 	}

 string Manager::HashHandle(const string& handle) const
@ -57,15 +90,13 @@ string Manager::HashHandle(const string& handle) const
 	if ( salt.empty() )
 		salt = BifConst::Files::salt->CheckString();

-	char tmp[20];
 	uint64 hash[2];
 	string msg(handle + salt);

 	MD5(reinterpret_cast<const u_char*>(msg.data()), msg.size(),
 	    reinterpret_cast<u_char*>(hash));
-	uitoa_n(hash[0], tmp, sizeof(tmp), 62);

-	return tmp;
+	return Bro::UID(bits_per_uid, hash, 2).Base62("F");
 	}

 void Manager::SetHandle(const string& handle)
@ -73,52 +104,62 @@ void Manager::SetHandle(const string& handle)
 	if ( handle.empty() )
 		return;

+	DBG_LOG(DBG_FILE_ANALYSIS, "Set current handle to %s", handle.c_str());
 	current_file_id = HashHandle(handle);
 	}

-void Manager::DataIn(const u_char* data, uint64 len, uint64 offset,
-                     analyzer::Tag tag, Connection* conn, bool is_orig)
+string Manager::DataIn(const u_char* data, uint64 len, uint64 offset,
+                       analyzer::Tag tag, Connection* conn, bool is_orig,
+                       const string& precomputed_id)
 	{
-	GetFileHandle(tag, conn, is_orig);
-	File* file = GetFile(current_file_id, conn, tag, is_orig);
+	string id = precomputed_id.empty() ? GetFileID(tag, conn, is_orig) : precomputed_id;
+	File* file = GetFile(id, conn, tag, is_orig);

 	if ( ! file )
-		return;
+		return "";

 	file->DataIn(data, len, offset);

 	if ( file->IsComplete() )
+		{
 		RemoveFile(file->GetID());
+		return "";
+		}
+
+	return id;
 	}

-void Manager::DataIn(const u_char* data, uint64 len, analyzer::Tag tag,
-                     Connection* conn, bool is_orig)
+string Manager::DataIn(const u_char* data, uint64 len, analyzer::Tag tag,
+                       Connection* conn, bool is_orig, const string& precomputed_id)
 	{
-	GetFileHandle(tag, conn, is_orig);
+	string id = precomputed_id.empty() ? GetFileID(tag, conn, is_orig) : precomputed_id;
 	// Sequential data input shouldn't be going over multiple conns, so don't
 	// do the check to update connection set.
-	File* file = GetFile(current_file_id, conn, tag, is_orig, false);
+	File* file = GetFile(id, conn, tag, is_orig, false);

 	if ( ! file )
-		return;
+		return "";

 	file->DataIn(data, len);

 	if ( file->IsComplete() )
+		{
 		RemoveFile(file->GetID());
+		return "";
+		}
+
+	return id;
 	}

 void Manager::DataIn(const u_char* data, uint64 len, const string& file_id,
                     const string& source)
 	{
-	File* file = GetFile(file_id);
+	File* file = GetFile(file_id, 0, analyzer::Tag::Error, false, false,
+	                     source.c_str());

 	if ( ! file )
 		return;

-	if ( file->GetSource().empty() )
-		file->SetSource(source);
-
 	file->DataIn(data, len);

 	if ( file->IsComplete() )
@ -134,8 +175,7 @@ void Manager::EndOfFile(analyzer::Tag tag, Connection* conn)
 void Manager::EndOfFile(analyzer::Tag tag, Connection* conn, bool is_orig)
 	{
 	// Don't need to create a file if we're just going to remove it right away.
-	GetFileHandle(tag, conn, is_orig);
-	RemoveFile(current_file_id);
+	RemoveFile(GetFileID(tag, conn, is_orig));
 	}

 void Manager::EndOfFile(const string& file_id)
@ -143,31 +183,37 @@ void Manager::EndOfFile(const string& file_id)
 	RemoveFile(file_id);
 	}

-void Manager::Gap(uint64 offset, uint64 len, analyzer::Tag tag,
-                  Connection* conn, bool is_orig)
+string Manager::Gap(uint64 offset, uint64 len, analyzer::Tag tag,
+                    Connection* conn, bool is_orig, const string& precomputed_id)
 	{
-	GetFileHandle(tag, conn, is_orig);
-	File* file = GetFile(current_file_id, conn, tag, is_orig);
+	string id = precomputed_id.empty() ? GetFileID(tag, conn, is_orig) : precomputed_id;
+	File* file = GetFile(id, conn, tag, is_orig);

 	if ( ! file )
-		return;
+		return "";

 	file->Gap(offset, len);
+	return id;
 	}

-void Manager::SetSize(uint64 size, analyzer::Tag tag, Connection* conn,
-                      bool is_orig)
+string Manager::SetSize(uint64 size, analyzer::Tag tag, Connection* conn,
+                        bool is_orig, const string& precomputed_id)
 	{
-	GetFileHandle(tag, conn, is_orig);
-	File* file = GetFile(current_file_id, conn, tag, is_orig);
+	string id = precomputed_id.empty() ? GetFileID(tag, conn, is_orig) : precomputed_id;
+	File* file = GetFile(id, conn, tag, is_orig);

 	if ( ! file )
-		return;
+		return "";

 	file->SetTotalBytes(size);

 	if ( file->IsComplete() )
+		{
 		RemoveFile(file->GetID());
+		return "";
+		}
+
+	return id;
 	}

 bool Manager::SetTimeoutInterval(const string& file_id, double interval) const
@ -184,6 +230,50 @@ bool Manager::SetTimeoutInterval(const string& file_id, double interval) const
 	return true;
 	}

+bool Manager::EnableReassembly(const string& file_id)
+	{
+	File* file = LookupFile(file_id);
+
+	if ( ! file )
+		return false;
+
+	file->EnableReassembly();
+	return true;
+	}
+
+bool Manager::DisableReassembly(const string& file_id)
+	{
+	File* file = LookupFile(file_id);
+
+	if ( ! file )
+		return false;
+
+	file->DisableReassembly();
+	return true;
+	}
+
+bool Manager::SetReassemblyBuffer(const string& file_id, uint64 max)
+	{
+	File* file = LookupFile(file_id);
+
+	if ( ! file )
+		return false;
+
+	file->SetReassemblyBuffer(max);
+	return true;
+	}
+
+bool Manager::SetExtractionLimit(const string& file_id, RecordVal* args,
+                                 uint64 n) const
+	{
+	File* file = LookupFile(file_id);
+
+	if ( ! file )
+		return false;
+
+	return file->SetExtractionLimit(args, n);
+	}
+
 bool Manager::AddAnalyzer(const string& file_id, file_analysis::Tag tag,
                          RecordVal* args) const
 	{
@ -207,7 +297,8 @@ bool Manager::RemoveAnalyzer(const string& file_id, file_analysis::Tag tag,
 	}

 File* Manager::GetFile(const string& file_id, Connection* conn,
-                       analyzer::Tag tag, bool is_orig, bool update_conn)
+                       analyzer::Tag tag, bool is_orig, bool update_conn,
+                       const char* source_name)
 	{
 	if ( file_id.empty() )
 		return 0;
@ -215,13 +306,23 @@ File* Manager::GetFile(const string& file_id, Connection* conn,
 	if ( IsIgnored(file_id) )
 		return 0;

-	File* rval = id_map[file_id];
+	File* rval = id_map.Lookup(file_id.c_str());

 	if ( ! rval )
 		{
-		rval = id_map[file_id] = new File(file_id, conn, tag, is_orig);
+		rval = new File(file_id,
+		                source_name ? source_name
+		                            : analyzer_mgr->GetComponentName(tag),
+		                conn, tag, is_orig);
+		id_map.Insert(file_id.c_str(), rval);
 		rval->ScheduleInactivityTimer();

+		// Generate file_new after inserting it into manager's mapping
+		// in case script-layer calls back in to core from the event.
+		rval->FileEvent(file_new);
+		// Same for file_over_new_connection.
+		rval->RaiseFileOverNewConnection(conn, is_orig);
+
 		if ( IsIgnored(file_id) )
 			return 0;
 		}
@ -229,8 +330,8 @@ File* Manager::GetFile(const string& file_id, Connection* conn,
 		{
 		rval->UpdateLastActivityTime();

-		if ( update_conn )
-			rval->UpdateConnectionFields(conn, is_orig);
+		if ( update_conn && rval->UpdateConnectionFields(conn, is_orig) )
+			rval->RaiseFileOverNewConnection(conn, is_orig);
 		}

 	return rval;
@ -238,12 +339,7 @@ File* Manager::GetFile(const string& file_id, Connection* conn,

 File* Manager::LookupFile(const string& file_id) const
 	{
-	IDMap::const_iterator it = id_map.find(file_id);
-
-	if ( it == id_map.end() )
-		return 0;
-
-	return it->second;
+	return id_map.Lookup(file_id.c_str());
 	}

 void Manager::Timeout(const string& file_id, bool is_terminating)
@ -274,48 +370,52 @@ void Manager::Timeout(const string& file_id, bool is_terminating)

 bool Manager::IgnoreFile(const string& file_id)
 	{
-	if ( id_map.find(file_id) == id_map.end() )
+	if ( ! id_map.Lookup(file_id.c_str()) )
 		return false;

 	DBG_LOG(DBG_FILE_ANALYSIS, "Ignore FileID %s", file_id.c_str());

-	ignored.insert(file_id);
-
+	delete ignored.Insert(file_id.c_str(), new bool);
 	return true;
 	}

 bool Manager::RemoveFile(const string& file_id)
 	{
-	IDMap::iterator it = id_map.find(file_id);
+	HashKey key(file_id.c_str());
+	// Can't remove from the dictionary/map right away as invoking EndOfFile
+	// may cause some events to be executed which actually depend on the file
+	// still being in the dictionary/map.
+	File* f = static_cast<File*>(id_map.Lookup(&key));

-	if ( it == id_map.end() )
+	if ( ! f )
 		return false;

 	DBG_LOG(DBG_FILE_ANALYSIS, "Remove FileID %s", file_id.c_str());

-	it->second->EndOfFile();
-
-	delete it->second;
-	id_map.erase(file_id);
-	ignored.erase(file_id);
-
+	f->EndOfFile();
+	delete f;
+	id_map.Remove(&key);
+	delete static_cast<bool*>(ignored.Remove(&key));
 	return true;
 	}

 bool Manager::IsIgnored(const string& file_id)
 	{
-	return ignored.find(file_id) != ignored.end();
+	return ignored.Lookup(file_id.c_str()) != 0;
 	}

-void Manager::GetFileHandle(analyzer::Tag tag, Connection* c, bool is_orig)
+string Manager::GetFileID(analyzer::Tag tag, Connection* c, bool is_orig)
 	{
 	current_file_id.clear();

 	if ( IsDisabled(tag) )
-		return;
+		return "";

 	if ( ! get_file_handle )
-		return;
+		return "";
+
+	DBG_LOG(DBG_FILE_ANALYSIS, "Raise get_file_handle() for protocol analyzer %s",
+		analyzer_mgr->GetComponentName(tag).c_str());

 	EnumVal* tagval = tag.AsEnumVal();
 	Ref(tagval);
@ -327,6 +427,7 @@ void Manager::GetFileHandle(analyzer::Tag tag, Connection* c, bool is_orig)

 	mgr.QueueEvent(get_file_handle, vl);
 	mgr.Drain(); // need file handle immediately so we don't have to buffer data
+	return current_file_id;
 	}

 bool Manager::IsDisabled(analyzer::Tag tag)
@ -352,12 +453,73 @@ Analyzer* Manager::InstantiateAnalyzer(Tag tag, RecordVal* args, File* f) const
 	Component* c = Lookup(tag);

 	if ( ! c )
-		reporter->InternalError("cannot instantiate unknown file analyzer: %s",
-		                        tag.AsString().c_str());
+		{
+		reporter->InternalWarning(
+		            "unknown file analyzer instantiation request: %s",
+		            tag.AsString().c_str());
+		return 0;
+		}

 	if ( ! c->Factory() )
-		reporter->InternalError("file analyzer %s cannot be instantiated "
-								"dynamically", c->CanonicalName());
+		{
+		reporter->InternalWarning("file analyzer %s cannot be instantiated "
+					  "dynamically", c->CanonicalName().c_str());
+		return 0;
+		}

-	return c->Factory()(args, f);
+	DBG_LOG(DBG_FILE_ANALYSIS, "Instantiate analyzer %s for file %s",
+		GetComponentName(tag).c_str(), f->id.c_str());
+
+	Analyzer* a = c->Factory()(args, f);
+
+	if ( ! a )
+		reporter->InternalError("file analyzer instantiation failed");
+
+	a->SetAnalyzerTag(tag);
+
+	return a;
+	}
+
+RuleMatcher::MIME_Matches* Manager::DetectMIME(const u_char* data, uint64 len,
+        RuleMatcher::MIME_Matches* rval) const
+	{
+	if ( ! magic_state )
+		reporter->InternalError("file magic signature state not initialized");
+
+	rval = rule_matcher->Match(magic_state, data, len, rval);
+	rule_matcher->ClearFileMagicState(magic_state);
+	return rval;
+	}
+
+string Manager::DetectMIME(const u_char* data, uint64 len) const
+	{
+	RuleMatcher::MIME_Matches matches;
+	DetectMIME(data, len, &matches);
+
+	if ( matches.empty() )
+		return "";
+
+	return *(matches.begin()->second.begin());
+	}
+
+VectorVal* file_analysis::GenMIMEMatchesVal(const RuleMatcher::MIME_Matches& m)
+	{
+	VectorVal* rval = new VectorVal(mime_matches);
+
+	for ( RuleMatcher::MIME_Matches::const_iterator it = m.begin();
+	      it != m.end(); ++it )
+		{
+		RecordVal* element = new RecordVal(mime_match);
+
+		for ( set<string>::const_iterator it2 = it->second.begin();
+		      it2 != it->second.end(); ++it2 )
+			{
+			element->Assign(0, new Val(it->first, TYPE_INT));
+			element->Assign(1, new StringVal(*it2));
+			}
+
+		rval->Assign(rval->Size(), element);
+		}
+
+	return rval;
 	}
--- a/src/file_analysis/Manager.h
+++ b/src/file_analysis/Manager.h
@ -4,16 +4,16 @@
 #define FILE_ANALYSIS_MANAGER_H

 #include <string>
-#include <map>
-#include <set>
 #include <queue>

+#include "Dict.h"
 #include "Net.h"
 #include "Conn.h"
 #include "Val.h"
 #include "Analyzer.h"
 #include "Timer.h"
 #include "EventHandler.h"
+#include "RuleMatcher.h"

 #include "File.h"
 #include "FileTimer.h"
@ -26,6 +26,9 @@

 namespace file_analysis {

+declare(PDict,bool);
+declare(PDict,File);
+
 /**
 * Main entry point for interacting with file analysis.
 */
@ -54,6 +57,12 @@ public:
 	 */
 	void InitPostScript();

+	/**
+	 * Initializes the state required to match against file magic signatures
+	 * for MIME type identification.
+	 */
+	void InitMagic();
+
 	/**
 	 * Times out any active file analysis to prepare for shutdown.
 	 */
@ -62,7 +71,7 @@ public:
 	/**
 	 * Creates a file identifier from a unique file handle string.
 	 * @param handle a unique string which identifies a single file.
-	 * @return a prettified MD5 hash of \a handle, truncated to 64-bits.
+	 * @return a prettified MD5 hash of \a handle, truncated to *bits_per_uid* bits.
 	 */
 	string HashHandle(const string& handle) const;

@ -82,9 +91,17 @@ public:
 	 * @param conn network connection over which the file data is transferred.
 	 * @param is_orig true if the file is being sent from connection originator
 	 *        or false if is being sent in the opposite direction.
+	 * @param precomputed_file_id may be set to a previous return value in order to
+	 *        bypass costly file handle lookups.
+	 * @return a unique file ID string which, in certain contexts, may be
+	 *         cached and passed back in to a subsequent function call in order
+	 *         to avoid costly file handle lookups (which have to go through
+	 *         the \c get_file_handle script-layer event).  An empty string
+	 *         indicates the associate file is not going to be analyzed further.
 	 */
-	void DataIn(const u_char* data, uint64 len, uint64 offset,
-		    analyzer::Tag tag, Connection* conn, bool is_orig);
+	std::string DataIn(const u_char* data, uint64 len, uint64 offset,
+	                   analyzer::Tag tag, Connection* conn, bool is_orig,
+	                   const std::string& precomputed_file_id = "");

 	/**
 	 * Pass in sequential file data.
@ -94,9 +111,17 @@ public:
 	 * @param conn network connection over which the file data is transferred.
 	 * @param is_orig true if the file is being sent from connection originator
 	 *        or false if is being sent in the opposite direction.
+	 * @param precomputed_file_id may be set to a previous return value in order to
+	 *        bypass costly file handle lookups.
+	 * @return a unique file ID string which, in certain contexts, may be
+	 *         cached and passed back in to a subsequent function call in order
+	 *         to avoid costly file handle lookups (which have to go through
+	 *         the \c get_file_handle script-layer event).  An empty string
+	 *         indicates the associated file is not going to be analyzed further.
 	 */
-	void DataIn(const u_char* data, uint64 len, analyzer::Tag tag,
-	            Connection* conn, bool is_orig);
+	std::string DataIn(const u_char* data, uint64 len, analyzer::Tag tag,
+	                   Connection* conn, bool is_orig,
+	                   const std::string& precomputed_file_id = "");

 	/**
 	 * Pass in sequential file data from external source (e.g. input framework).
@ -140,9 +165,17 @@ public:
 	 * @param conn network connection over which the file data is transferred.
 	 * @param is_orig true if the file is being sent from connection originator
 	 *        or false if is being sent in the opposite direction.
+	 * @param precomputed_file_id may be set to a previous return value in order to
+	 *        bypass costly file handle lookups.
+	 * @return a unique file ID string which, in certain contexts, may be
+	 *         cached and passed back in to a subsequent function call in order
+	 *         to avoid costly file handle lookups (which have to go through
+	 *         the \c get_file_handle script-layer event).  An empty string
+	 *         indicates the associate file is not going to be analyzed further.
 	 */
-	void Gap(uint64 offset, uint64 len, analyzer::Tag tag, Connection* conn,
-	         bool is_orig);
+	std::string Gap(uint64 offset, uint64 len, analyzer::Tag tag,
+	                Connection* conn, bool is_orig,
+	                const std::string& precomputed_file_id = "");

 	/**
 	 * Provide the expected number of bytes that comprise a file.
@ -151,9 +184,16 @@ public:
 	 * @param conn network connection over which the file data is transferred.
 	 * @param is_orig true if the file is being sent from connection originator
 	 *        or false if is being sent in the opposite direction.
+	 * @param precomputed_file_id may be set to a previous return value in order to
+	 *        bypass costly file handle lookups.
+	 * @return a unique file ID string which, in certain contexts, may be
+	 *         cached and passed back in to a subsequent function call in order
+	 *         to avoid costly file handle lookups (which have to go through
+	 *         the \c get_file_handle script-layer event).  An empty string
+	 *         indicates the associate file is not going to be analyzed further.
 	 */
-	void SetSize(uint64 size, analyzer::Tag tag, Connection* conn,
-	             bool is_orig);
+	std::string SetSize(uint64 size, analyzer::Tag tag, Connection* conn,
+	                    bool is_orig, const std::string& precomputed_file_id = "");

 	/**
 	 * Starts ignoring a file, which will finally be removed from internal
@ -173,6 +213,34 @@ public:
 	 */
 	bool SetTimeoutInterval(const string& file_id, double interval) const;

+	/**
+	 * Enable the reassembler for a file.
+	 */
+	bool EnableReassembly(const string& file_id);
+	
+	/**
+	 * Disable the reassembler for a file.
+	 */
+	bool DisableReassembly(const string& file_id);
+
+	/**
+	 * Set the reassembly for a file in bytes.
+	 */
+	bool SetReassemblyBuffer(const string& file_id, uint64 max);
+
+	/**
+	 * Sets a limit on the maximum size allowed for extracting the file
+	 * to local disk;
+	 * @param file_id the file identifier/hash.
+	 * @param args a \c AnalyzerArgs value which describes a file analyzer,
+	 *        which should be a file extraction analyzer.
+	 * @param n the new extraction limit, in bytes.
+	 * @return false if file identifier and analyzer did not map to anything,
+	 *         else true.
+	 */
+	bool SetExtractionLimit(const string& file_id, RecordVal* args,
+	                        uint64 n) const;
+
 	/**
 	 * Queue attachment of an analzer to the file identifier.  Multiple
 	 * analyzers of a given type can be attached per file identifier at a time
@ -211,11 +279,34 @@ public:
 	 */
 	Analyzer* InstantiateAnalyzer(Tag tag, RecordVal* args, File* f) const;

+	/**
+	 * Returns a set of all matching MIME magic signatures for a given
+	 * chunk of data.
+	 * @param data A chunk of bytes to match magic MIME signatures against.
+	 * @param len The number of bytes in \a data.
+	 * @param rval An optional pre-existing structure in which to insert
+	 *             new matches.  If it's a null pointer, an object is
+	 *             allocated and returned from the method.
+	 * @return Set of all matching file magic signatures, which may be
+	 *         an object allocated by the method if \a rval is a null pointer.
+	 */
+	RuleMatcher::MIME_Matches* DetectMIME(const u_char* data, uint64 len,
+					      RuleMatcher::MIME_Matches* rval) const;
+
+	/**
+	 * Returns the strongest MIME magic signature match for a given data chunk.
+	 * @param data A chunk of bytes to match magic MIME signatures against.
+	 * @param len The number of bytes in \a data.
+	 * @returns The MIME type string of the strongest file magic signature
+	 *          match, or an empty string if nothing matched.
+	 */
+	std::string DetectMIME(const u_char* data, uint64 len) const;
+
 protected:
 	friend class FileTimer;

-	typedef set<string> IDSet;
-	typedef map<string, File*> IDMap;
+	typedef PDict(bool) IDSet;
+	typedef PDict(File) IDMap;

 	/**
 	 * Create a new file to be analyzed or retrieve an existing one.
@ -228,6 +319,7 @@ protected:
 	 *        this file isn't related to a connection).
 	 * @param update_conn whether we need to update connection-related field
 	 *        in the \c fa_file record value associated with the file.
+	 * @param an optional value of the source field to fill in.
 	 * @return the File object mapped to \a file_id or a null pointer if
 	 *         analysis is being ignored for the associated file.  An File
 	 *         object may be created if a mapping doesn't exist, and if it did
@ -236,7 +328,8 @@ protected:
 	 */
 	File* GetFile(const string& file_id, Connection* conn = 0,
 	              analyzer::Tag tag = analyzer::Tag::Error,
-	              bool is_orig = false, bool update_conn = true);
+	              bool is_orig = false, bool update_conn = true,
+	              const char* source_name = 0);

 	/**
 	 * Try to retrieve a file that's being analyzed, using its identifier/hash.
@ -270,8 +363,10 @@ protected:
 	 * @param conn network connection over which the file is transferred.
 	 * @param is_orig true if the file is being sent from connection originator
 	 *        or false if is being sent in the opposite direction.
+	 * @return #current_file_id, which is a hash of a unique file handle string
+	 *         set by a \c get_file_handle event handler.
 	 */
-	void GetFileHandle(analyzer::Tag tag, Connection* c, bool is_orig);
+	std::string GetFileID(analyzer::Tag tag, Connection* c, bool is_orig);

 	/**
 	 * Check if analysis is available for files transferred over a given
@ -284,15 +379,28 @@ protected:
 	static bool IsDisabled(analyzer::Tag tag);

 private:
+	typedef set<Tag> TagSet;
+	typedef map<string, TagSet*> MIMEMap;

-	IDMap id_map;	/**< Map file ID to file_analysis::File records. */
-	IDSet ignored;	/**< Ignored files.  Will be finally removed on EOF. */
+	TagSet* LookupMIMEType(const string& mtype, bool add_if_not_found);
+
+	PDict(File) id_map;  /**< Map file ID to file_analysis::File records. */
+	PDict(bool) ignored; /**< Ignored files.  Will be finally removed on EOF. */
 	string current_file_id;	/**< Hash of what get_file_handle event sets. */
+	RuleFileMagicState* magic_state;	/**< File magic signature match state. */
+	MIMEMap mime_types;/**< Mapping of MIME types to analyzers. */

 	static TableVal* disabled;	/**< Table of disabled analyzers. */
+	static TableType* tag_set_type;	/**< Type for set[tag]. */
 	static string salt; /**< A salt added to file handles before hashing. */
 };

+/**
+ * Returns a script-layer value corresponding to the \c mime_matches type.
+ * @param m The MIME match information with which to populate the value.
+ */
+VectorVal* GenMIMEMatchesVal(const RuleMatcher::MIME_Matches& m);
+
 } // namespace file_analysis

 extern file_analysis::Manager* file_mgr;
--- a/src/file_analysis/analyzer/CMakeLists.txt
+++ b/src/file_analysis/analyzer/CMakeLists.txt
@ -2,3 +2,5 @@ add_subdirectory(data_event)
 add_subdirectory(entropy)
 add_subdirectory(extract)
 add_subdirectory(hash)
+add_subdirectory(unified2)
+add_subdirectory(x509)
--- a/src/file_analysis/analyzer/data_event/DataEvent.cc
+++ b/src/file_analysis/analyzer/data_event/DataEvent.cc
@ -20,13 +20,8 @@ DataEvent::DataEvent(RecordVal* args, File* file,

 file_analysis::Analyzer* DataEvent::Instantiate(RecordVal* args, File* file)
 	{
-	using BifType::Record::Files::AnalyzerArgs;
-
-	int chunk_off = AnalyzerArgs->FieldOffset("chunk_event");
-	int stream_off = AnalyzerArgs->FieldOffset("stream_event");
-
-	Val* chunk_val = args->Lookup(chunk_off);
-	Val* stream_val = args->Lookup(stream_off);
+	Val* chunk_val = args->Lookup("chunk_event");
+	Val* stream_val = args->Lookup("stream_event");

 	if ( ! chunk_val && ! stream_val ) return 0;

--- a/src/file_analysis/analyzer/data_event/Plugin.cc
+++ b/src/file_analysis/analyzer/data_event/Plugin.cc
@ -1,26 +1,24 @@
+// See the file in the main distribution directory for copyright.
+
 #include "plugin/Plugin.h"
-#include "file_analysis/Component.h"

 #include "DataEvent.h"

-namespace plugin { namespace Bro_FileDataEvent {
+namespace plugin {
+namespace Bro_FileDataEvent {

 class Plugin : public plugin::Plugin {
-protected:
-	void InitPreScript()
+public:
+	plugin::Configuration Configure()
 		{
-		SetName("Bro::FileDataEvent");
-		SetVersion(-1);
-		SetAPIVersion(BRO_PLUGIN_API_VERSION);
-		SetDynamicPlugin(false);
+		AddComponent(new ::file_analysis::Component("DATA_EVENT", ::file_analysis::DataEvent::Instantiate));

-		SetDescription("Delivers file content via events");
-
-		AddComponent(new ::file_analysis::Component("DATA_EVENT",
-		        ::file_analysis::DataEvent::Instantiate));
+		plugin::Configuration config;
+		config.name = "Bro::FileDataEvent";
+		config.description = "Delivers file content";
+		return config;
 		}
-};
+} plugin;

-Plugin __plugin;
-
-} }
+}
+}
--- a/src/file_analysis/analyzer/extract/CMakeLists.txt
+++ b/src/file_analysis/analyzer/extract/CMakeLists.txt
@ -5,4 +5,6 @@ include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}

 bro_plugin_begin(Bro FileExtract)
 bro_plugin_cc(Extract.cc Plugin.cc ../../Analyzer.cc)
+bro_plugin_bif(events.bif)
+bro_plugin_bif(functions.bif)
 bro_plugin_end()
--- a/src/file_analysis/analyzer/extract/Extract.cc
+++ b/src/file_analysis/analyzer/extract/Extract.cc
@ -4,15 +4,17 @@

 #include "Extract.h"
 #include "util.h"
+#include "Event.h"
 #include "file_analysis/Manager.h"

 using namespace file_analysis;

-Extract::Extract(RecordVal* args, File* file, const string& arg_filename)
+Extract::Extract(RecordVal* args, File* file, const string& arg_filename,
+                 uint64 arg_limit)
    : file_analysis::Analyzer(file_mgr->GetComponentTag("EXTRACT"), args, file),
-	  filename(arg_filename)
+      filename(arg_filename), limit(arg_limit), depth(0)
 	{
-	fd = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0666);
+	fd = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_APPEND, 0666);

 	if ( fd < 0 )
 		{
@ -29,22 +31,94 @@ Extract::~Extract()
 		safe_close(fd);
 	}

-file_analysis::Analyzer* Extract::Instantiate(RecordVal* args, File* file)
+static Val* get_extract_field_val(RecordVal* args, const char* name)
 	{
-	using BifType::Record::Files::AnalyzerArgs;
-	Val* v = args->Lookup(AnalyzerArgs->FieldOffset("extract_filename"));
+	Val* rval = args->Lookup(name);

-	if ( ! v )
-		return 0;
+	if ( ! rval )
+		reporter->Error("File extraction analyzer missing arg field: %s", name);

-	return new Extract(args, file, v->AsString()->CheckString());
+	return rval;
 	}

-bool Extract::DeliverChunk(const u_char* data, uint64 len, uint64 offset)
+file_analysis::Analyzer* Extract::Instantiate(RecordVal* args, File* file)
+	{
+	Val* fname = get_extract_field_val(args, "extract_filename");
+	Val* limit = get_extract_field_val(args, "extract_limit");
+
+	if ( ! fname || ! limit )
+		return 0;
+
+	return new Extract(args, file, fname->AsString()->CheckString(),
+	                   limit->AsCount());
+	}
+
+static bool check_limit_exceeded(uint64 lim, uint64 depth, uint64 len, uint64* n)
+	{
+	if ( lim == 0 )
+		{
+		*n = len;
+		return false;
+		}
+
+	if ( depth >= lim )
+		{
+		*n = 0;
+		return true;
+		}
+	else if ( depth + len > lim )
+		{
+		*n = lim - depth;
+		return true;
+		}
+	else
+		{
+		*n = len;
+		}
+
+	return false;
+	}
+
+bool Extract::DeliverStream(const u_char* data, uint64 len)
 	{
 	if ( ! fd )
 		return false;

-	safe_pwrite(fd, data, len, offset);
+	uint64 towrite = 0;
+	bool limit_exceeded = check_limit_exceeded(limit, depth, len, &towrite);
+
+	if ( limit_exceeded && file_extraction_limit )
+		{
+		File* f = GetFile();
+		val_list* vl = new val_list();
+		vl->append(f->GetVal()->Ref());
+		vl->append(Args()->Ref());
+		vl->append(new Val(limit, TYPE_COUNT));
+		vl->append(new Val(len, TYPE_COUNT));
+		f->FileEvent(file_extraction_limit, vl);
+
+		// Limit may have been modified by a BIF, re-check it.
+		limit_exceeded = check_limit_exceeded(limit, depth, len, &towrite);
+		}
+
+	if ( towrite > 0 )
+		{
+		safe_write(fd, reinterpret_cast<const char*>(data), towrite);
+		depth += towrite;
+		}
+
+	return ( ! limit_exceeded );
+	}
+
+bool Extract::Undelivered(uint64 offset, uint64 len)
+	{
+	if ( depth == offset )
+		{
+		char* tmp = new char[len]();
+		safe_write(fd, tmp, len);
+		delete [] tmp;
+		depth += len;
+		}
+
 	return true;
 	}
--- a/src/file_analysis/analyzer/extract/Extract.h
+++ b/src/file_analysis/analyzer/extract/Extract.h
@ -9,6 +9,8 @@
 #include "File.h"
 #include "Analyzer.h"

+#include "analyzer/extract/events.bif.h"
+
 namespace file_analysis {

 /**
@ -26,11 +28,18 @@ public:
 	 * Write a chunk of file data to the local extraction file.
 	 * @param data pointer to a chunk of file data.
 	 * @param len number of bytes in the data chunk.
-	 * @param offset number of bytes from start of file at which chunk starts.
 	 * @return false if there was no extraction file open and the data couldn't
 	 *         be written, else true.
 	 */
-	virtual bool DeliverChunk(const u_char* data, uint64 len, uint64 offset);
+	virtual bool DeliverStream(const u_char* data, uint64 len);
+
+	/**
+	 * Report undelivered bytes.
+	 * @param offset distance into the file where the gap occurred.
+	 * @param len number of bytes undelivered.
+	 * @return true
+	 */
+	virtual bool Undelivered(uint64 offset, uint64 len);

 	/**
 	 * Create a new instance of an Extract analyzer.
@ -41,6 +50,13 @@ public:
 	 */
 	static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file);

+	/**
+	 * Sets the maximum allowed extracted file size.  A value of zero means
+	 * "no limit".
+	 * @param bytes number of bytes allowed to be extracted
+	 */
+	void SetLimit(uint64 bytes) { limit = bytes; }
+
 protected:

 	/**
@ -49,12 +65,16 @@ protected:
 	 * @param file the file to which the analyzer will be attached.
 	 * @param arg_filename a file system path which specifies the local file
 	 *        to which the contents of the file will be extracted/written.
+	 * @param arg_limit the maximum allowed file size.
 	 */
-	Extract(RecordVal* args, File* file, const string& arg_filename);
+	Extract(RecordVal* args, File* file, const string& arg_filename,
+	        uint64 arg_limit);

 private:
 	string filename;
 	int fd;
+	uint64 limit;
+	uint64 depth;
 };

 } // namespace file_analysis
--- a/src/file_analysis/analyzer/extract/Plugin.cc
+++ b/src/file_analysis/analyzer/extract/Plugin.cc
@ -1,26 +1,24 @@
+// See the file  in the main distribution directory for copyright.
+
 #include "plugin/Plugin.h"
-#include "file_analysis/Component.h"

 #include "Extract.h"

-namespace plugin { namespace Bro_FileExtract {
+namespace plugin {
+namespace Bro_FileExtract {

 class Plugin : public plugin::Plugin {
-protected:
-	void InitPreScript()
+public:
+	plugin::Configuration Configure()
 		{
-		SetName("Bro::FileExtract");
-		SetVersion(-1);
-		SetAPIVersion(BRO_PLUGIN_API_VERSION);
-		SetDynamicPlugin(false);
+		AddComponent(new ::file_analysis::Component("EXTRACT", ::file_analysis::Extract::Instantiate));

-		SetDescription("Extract file content to local file system");
-
-		AddComponent(new ::file_analysis::Component("EXTRACT",
-		        ::file_analysis::Extract::Instantiate));
+		plugin::Configuration config;
+		config.name = "Bro::FileExtract";
+		config.description = "Extract file content";
+		return config;
 		}
-};
+} plugin;

-Plugin __plugin;
-
-} }
+}
+}
--- a/src/file_analysis/analyzer/extract/events.bif
+++ b/src/file_analysis/analyzer/extract/events.bif
@ -0,0 +1,17 @@
+## This event is generated when a file extraction analyzer is about
+## to exceed the maximum permitted file size allowed by the
+## *extract_limit* field of :bro:see:`Files::AnalyzerArgs`.
+## The analyzer is automatically removed from file *f*.
+##
+## f: The file.
+##
+## args: Arguments that identify a particular file extraction analyzer.
+##       This is only provided to be able to pass along to
+##       :bro:see:`FileExtract::set_limit`.
+##
+## limit: The limit, in bytes, the extracted file is about to breach.
+##
+## len: The length of the file chunk about to be written.
+##
+## .. bro:see:: Files::add_analyzer Files::ANALYZER_EXTRACT
+event file_extraction_limit%(f: fa_file, args: any, limit: count, len: count%);
--- a/src/file_analysis/analyzer/extract/functions.bif
+++ b/src/file_analysis/analyzer/extract/functions.bif
@ -0,0 +1,19 @@
+##! Internal functions used by the extraction file analyzer.
+
+module FileExtract;
+
+%%{
+#include "file_analysis/Manager.h"
+%%}
+
+## :bro:see:`FileExtract::set_limit`.
+function FileExtract::__set_limit%(file_id: string, args: any, n: count%): bool
+    %{
+	using BifType::Record::Files::AnalyzerArgs;
+	RecordVal* rv = args->AsRecordVal()->CoerceTo(AnalyzerArgs);
+    bool result = file_mgr->SetExtractionLimit(file_id->CheckString(), rv, n);
+	Unref(rv);
+    return new Val(result, TYPE_BOOL);
+    %}
+
+module GLOBAL;
--- a/src/file_analysis/analyzer/hash/Plugin.cc
+++ b/src/file_analysis/analyzer/hash/Plugin.cc
@ -1,33 +1,26 @@
+// See the file  in the main distribution directory for copyright.
+
 #include "plugin/Plugin.h"
-#include "file_analysis/Component.h"

 #include "Hash.h"

-namespace plugin { namespace Bro_FileHash {
+namespace plugin {
+namespace Bro_FileHash {

 class Plugin : public plugin::Plugin {
-protected:
-	void InitPreScript()
+public:
+	plugin::Configuration Configure()
 		{
-		SetName("Bro::FileHash");
-		SetVersion(-1);
-		SetAPIVersion(BRO_PLUGIN_API_VERSION);
-		SetDynamicPlugin(false);
+		AddComponent(new ::file_analysis::Component("MD5", ::file_analysis::MD5::Instantiate));
+		AddComponent(new ::file_analysis::Component("SHA1", ::file_analysis::SHA1::Instantiate));
+		AddComponent(new ::file_analysis::Component("SHA256", ::file_analysis::SHA256::Instantiate));

-		SetDescription("Hash file content");
-
-		AddComponent(new ::file_analysis::Component("MD5",
-		        ::file_analysis::MD5::Instantiate));
-		AddComponent(new ::file_analysis::Component("SHA1",
-		        ::file_analysis::SHA1::Instantiate));
-		AddComponent(new ::file_analysis::Component("SHA256",
-		        ::file_analysis::SHA256::Instantiate));
-
-		extern std::list<std::pair<const char*, int> > __bif_events_init();
-		AddBifInitFunction(&__bif_events_init);
+		plugin::Configuration config;
+		config.name = "Bro::FileHash";
+		config.description = "Hash file content";
+		return config;
 		}
-};
+} plugin;

-Plugin __plugin;
-
-} }
+}
+}
--- a/src/file_analysis/analyzer/unified2/CMakeLists.txt
+++ b/src/file_analysis/analyzer/unified2/CMakeLists.txt
@ -0,0 +1,11 @@
+
+include(BroPlugin)
+
+include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}
+                           ${CMAKE_CURRENT_BINARY_DIR})
+
+bro_plugin_begin(Bro Unified2)
+bro_plugin_cc(Unified2.cc Plugin.cc ../../Analyzer.cc)
+bro_plugin_bif(events.bif types.bif)
+bro_plugin_pac(unified2.pac unified2-file.pac unified2-analyzer.pac)
+bro_plugin_end()
--- a/src/file_analysis/analyzer/unified2/Plugin.cc
+++ b/src/file_analysis/analyzer/unified2/Plugin.cc
@ -0,0 +1,26 @@
+// See the file  in the main distribution directory for copyright.
+
+// See the file "COPYING" in the main distribution directory for copyright.
+
+#include "plugin/Plugin.h"
+
+#include "Unified2.h"
+
+namespace plugin {
+namespace Bro_Unified2 {
+
+class Plugin : public plugin::Plugin {
+public:
+	plugin::Configuration Configure()
+		{
+		AddComponent(new ::file_analysis::Component("UNIFIED2", ::file_analysis::Unified2::Instantiate));
+
+		plugin::Configuration config;
+		config.name = "Bro::Unified2";
+		config.description = "Analyze Unified2 alert files.";
+		return config;
+		}
+} plugin;
+
+}
+}
--- a/src/file_analysis/analyzer/unified2/Unified2.cc
+++ b/src/file_analysis/analyzer/unified2/Unified2.cc
@ -0,0 +1,38 @@
+// See the file "COPYING" in the main distribution directory for copyright.
+
+#include "Unified2.h"
+#include "file_analysis/Manager.h"
+
+using namespace file_analysis;
+
+Unified2::Unified2(RecordVal* args, File* file)
+    : file_analysis::Analyzer(file_mgr->GetComponentTag("UNIFIED2"), args, file)
+	{
+	interp = new binpac::Unified2::Unified2_Analyzer(this);
+	}
+
+Unified2::~Unified2()
+	{
+	delete interp;
+	}
+
+file_analysis::Analyzer* Unified2::Instantiate(RecordVal* args, File* file)
+	{
+	return new Unified2(args, file);
+	}
+
+bool Unified2::DeliverStream(const u_char* data, uint64 len)
+	{
+	try
+		{
+		interp->NewData(true, data, data + len);
+		}
+
+	catch ( const binpac::Exception& e )
+		{
+		printf("Binpac exception: %s\n", e.c_msg());
+		return false;
+		}
+
+	return true;
+	}
--- a/src/file_analysis/analyzer/unified2/Unified2.h
+++ b/src/file_analysis/analyzer/unified2/Unified2.h
@ -0,0 +1,37 @@
+// See the file "COPYING" in the main distribution directory for copyright.
+
+#ifndef FILE_ANALYSIS_UNIFIED2_H
+#define FILE_ANALYSIS_UNIFIED2_H
+
+#include <string>
+
+#include "Val.h"
+#include "File.h"
+#include "Analyzer.h"
+#include "unified2_pac.h"
+
+namespace file_analysis {
+
+/**
+ * An analyzer to extract content of files from local disk.
+ */
+class Unified2 : public file_analysis::Analyzer {
+public:
+	virtual ~Unified2();
+
+	virtual bool DeliverStream(const u_char* data, uint64 len);
+
+	static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file);
+
+protected:
+	Unified2(RecordVal* args, File* file);
+
+private:
+	binpac::Unified2::Unified2_Analyzer* interp;
+
+	string filename;
+};
+
+} // namespace file_analysis
+
+#endif
--- a/src/file_analysis/analyzer/unified2/events.bif
+++ b/src/file_analysis/analyzer/unified2/events.bif
@ -0,0 +1,17 @@
+
+## Abstract all of the various Unified2 event formats into 
+## a single event.
+##
+## f: The file.
+##
+## ev: TODO.
+##
+event unified2_event%(f: fa_file, ev: Unified2::IDSEvent%);
+
+## The Unified2 packet format event.
+##
+## f: The file.
+##
+## pkt: TODO.
+##
+event unified2_packet%(f: fa_file, pkt: Unified2::Packet%);
--- a/src/file_analysis/analyzer/unified2/types.bif
+++ b/src/file_analysis/analyzer/unified2/types.bif
@ -0,0 +1,2 @@
+type Unified2::IDSEvent: record;
+type Unified2::Packet: record;
--- a/src/file_analysis/analyzer/unified2/unified2-analyzer.pac
+++ b/src/file_analysis/analyzer/unified2/unified2-analyzer.pac
@ -0,0 +1,170 @@
+
+%extern{
+#include "Event.h"
+#include "file_analysis/File.h"
+#include "events.bif.h"
+#include "types.bif.h"
+#include "IPAddr.h"
+%}
+
+refine flow Flow += {
+
+	%member{
+	%}
+
+	%init{
+	%}
+
+	%eof{
+	%}
+
+	%cleanup{
+	%}
+
+	function ts_to_double(ts: Time): double
+		%{
+		double t = ${ts.seconds} + (${ts.microseconds} / 1000000);
+		return t;
+		%}
+
+	function unified2_addr_to_bro_addr(a: uint32[]): AddrVal
+		%{
+		if ( a->size() == 1 )
+			{
+			return new AddrVal(IPAddr(IPv4, &(a->at(0)), IPAddr::Host));
+			}
+		else if ( a->size() == 4 )
+			{
+			uint32 tmp[4] = { a->at(0), a->at(1), a->at(2), a->at(3) };
+			return new AddrVal(IPAddr(IPv6, tmp, IPAddr::Host));
+			}
+		else
+			{
+			// Should never reach here.
+			return new AddrVal(1);
+			}
+		%}
+
+	function to_port(n: uint16, p: uint8): PortVal
+		%{
+		TransportProto proto = TRANSPORT_UNKNOWN;
+		switch ( p ) {
+		case 1: proto = TRANSPORT_ICMP; break;
+		case 6: proto = TRANSPORT_TCP; break;
+		case 17: proto = TRANSPORT_UDP; break;
+		}
+
+		return new PortVal(n, proto);
+		%}
+
+	#function proc_record(rec: Record) : bool
+	#	%{
+	#	return true;
+	#	%}
+
+	function proc_ids_event(ev: IDS_Event) : bool
+		%{
+		if ( ::unified2_event )
+			{
+			RecordVal* ids_event = new RecordVal(BifType::Record::Unified2::IDSEvent);
+			ids_event->Assign(0, new Val(${ev.sensor_id}, TYPE_COUNT));
+			ids_event->Assign(1, new Val(${ev.event_id}, TYPE_COUNT));
+			ids_event->Assign(2, new Val(ts_to_double(${ev.ts}), TYPE_TIME));
+			ids_event->Assign(3, new Val(${ev.signature_id}, TYPE_COUNT));
+			ids_event->Assign(4, new Val(${ev.generator_id}, TYPE_COUNT));
+			ids_event->Assign(5, new Val(${ev.signature_revision}, TYPE_COUNT));
+			ids_event->Assign(6, new Val(${ev.classification_id}, TYPE_COUNT));
+			ids_event->Assign(7, new Val(${ev.priority_id}, TYPE_COUNT));
+			ids_event->Assign(8, unified2_addr_to_bro_addr(${ev.src_ip}));
+			ids_event->Assign(9, unified2_addr_to_bro_addr(${ev.dst_ip}));
+			ids_event->Assign(10, to_port(${ev.src_p}, ${ev.protocol}));
+			ids_event->Assign(11, to_port(${ev.dst_p}, ${ev.protocol}));
+			ids_event->Assign(17, new Val(${ev.packet_action}, TYPE_COUNT));
+
+			val_list* vl = new val_list();
+			vl->append(connection()->bro_analyzer()->GetFile()->GetVal()->Ref());
+			vl->append(ids_event);
+			mgr.QueueEvent(::unified2_event, vl, SOURCE_LOCAL);
+			}
+		return true;
+		%}
+
+	function proc_ids_event_2(ev: IDS_Event_2) : bool
+		%{
+		if ( ::unified2_event )
+			{
+			RecordVal* ids_event = new RecordVal(BifType::Record::Unified2::IDSEvent);
+			ids_event->Assign(0, new Val(${ev.sensor_id}, TYPE_COUNT));
+			ids_event->Assign(1, new Val(${ev.event_id}, TYPE_COUNT));
+			ids_event->Assign(2, new Val(ts_to_double(${ev.ts}), TYPE_TIME));
+			ids_event->Assign(3, new Val(${ev.signature_id}, TYPE_COUNT));
+			ids_event->Assign(4, new Val(${ev.generator_id}, TYPE_COUNT));
+			ids_event->Assign(5, new Val(${ev.signature_revision}, TYPE_COUNT));
+			ids_event->Assign(6, new Val(${ev.classification_id}, TYPE_COUNT));
+			ids_event->Assign(7, new Val(${ev.priority_id}, TYPE_COUNT));
+			ids_event->Assign(8, unified2_addr_to_bro_addr(${ev.src_ip}));
+			ids_event->Assign(9, unified2_addr_to_bro_addr(${ev.dst_ip}));
+			ids_event->Assign(10, to_port(${ev.src_p}, ${ev.protocol}));
+			ids_event->Assign(11, to_port(${ev.dst_p}, ${ev.protocol}));
+			ids_event->Assign(12, new Val(${ev.impact_flag}, TYPE_COUNT));
+			ids_event->Assign(13, new Val(${ev.impact}, TYPE_COUNT));
+			ids_event->Assign(14, new Val(${ev.blocked}, TYPE_COUNT));
+			ids_event->Assign(15, new Val(${ev.mpls_label}, TYPE_COUNT));
+			ids_event->Assign(16, new Val(${ev.vlan_id}, TYPE_COUNT));
+
+			val_list* vl = new val_list();
+			vl->append(connection()->bro_analyzer()->GetFile()->GetVal()->Ref());
+			vl->append(ids_event);
+			mgr.QueueEvent(::unified2_event, vl, SOURCE_LOCAL);
+			}
+
+		return true;
+		%}
+
+	function proc_packet(pkt: Packet) : bool
+		%{
+		if ( ::unified2_packet )
+			{
+			RecordVal* packet = new RecordVal(BifType::Record::Unified2::Packet);
+			packet->Assign(0, new Val(${pkt.sensor_id}, TYPE_COUNT));
+			packet->Assign(1, new Val(${pkt.event_id}, TYPE_COUNT));
+			packet->Assign(2, new Val(${pkt.event_second}, TYPE_COUNT));
+			packet->Assign(3, new Val(ts_to_double(${pkt.packet_ts}), TYPE_TIME));
+			packet->Assign(4, new Val(${pkt.link_type}, TYPE_COUNT));
+			packet->Assign(5, bytestring_to_val(${pkt.packet_data}));
+
+			val_list* vl = new val_list();
+			vl->append(connection()->bro_analyzer()->GetFile()->GetVal()->Ref());
+			vl->append(packet);
+			mgr.QueueEvent(::unified2_packet, vl, SOURCE_LOCAL);
+			}
+
+		return true;
+		%}
+
+	#function proc_unknown_record_type(rec: UnknownRecordType) : bool
+	#	%{
+	#	printf("unknown packet type\n");
+	#	return true;
+	#	%}
+};
+
+#refine typeattr Record += &let {
+#	proc : bool = $context.flow.proc_record(this);
+#};
+
+refine typeattr IDS_Event += &let {
+	proc : bool = $context.flow.proc_ids_event(this);
+};
+
+refine typeattr IDS_Event_2 += &let {
+	proc : bool = $context.flow.proc_ids_event_2(this);
+};
+
+refine typeattr Packet += &let {
+	proc : bool = $context.flow.proc_packet(this);
+};
+
+#refine typeattr UnknownRecordType += &let {
+#	proc : bool = $context.flow.proc_unknown_record_type(this);
+#};
--- a/src/file_analysis/analyzer/unified2/unified2-file.pac
+++ b/src/file_analysis/analyzer/unified2/unified2-file.pac
@ -0,0 +1,91 @@
+
+enum Types {
+	PACKET           = 2,
+	IDS_EVENT        = 7,
+	IDS_EVENT_IPV6   = 72,
+	IDS_EVENT_2      = 104,
+	IDS_EVENT_IPV6_2 = 105,
+	EXTRA_DATA       = 110,
+};
+
+type Time = record {
+	seconds:      uint32;
+	microseconds: uint32;
+} &byteorder=bigendian;
+
+type Record = record {
+	rtype:   uint32;
+	length:  uint32;
+	data:   case rtype of {
+		PACKET            -> packet:              Packet(this);
+		IDS_EVENT         -> ids_event:           IDS_Event(this, 1);
+		IDS_EVENT_IPV6    -> ids_event_ipv6:      IDS_Event(this, 4);
+		IDS_EVENT_2       -> ids_event_vlan:      IDS_Event_2(this, 1);
+		IDS_EVENT_IPV6_2  -> ids_event_ipv6_vlan: IDS_Event_2(this, 4);
+		#EXTRA_DATA        -> extra_data:          ExtraData(this);
+		default           -> unknown_record_type: UnknownRecordType(this);
+	};
+} &byteorder=bigendian &length=length+8;
+
+type IDS_Event(rec: Record, ip_len: int) = record {
+	sensor_id:          uint32;
+	event_id:           uint32;
+	ts:                 Time;
+	signature_id:       uint32;
+	generator_id:       uint32;
+	signature_revision: uint32;
+	classification_id:  uint32;
+	priority_id:        uint32;
+	src_ip:             uint32[ip_len];
+	dst_ip:             uint32[ip_len];
+	src_p:              uint16;
+	dst_p:              uint16;
+	protocol:           uint8;
+	packet_action:      uint8;
+} &byteorder=bigendian;
+
+type IDS_Event_2(rec: Record, ip_len: int) = record {
+	sensor_id:          uint32;
+	event_id:           uint32;
+	ts:                 Time;
+	signature_id:       uint32;
+	generator_id:       uint32;
+	signature_revision: uint32;
+	classification_id:  uint32;
+	priority_id:        uint32;
+	src_ip:             uint32[ip_len];
+	dst_ip:             uint32[ip_len];
+	src_p:              uint16;
+	dst_p:              uint16;
+	protocol:           uint8;
+	impact_flag:        uint8;
+	impact:             uint8;
+	blocked:            uint8;
+	mpls_label:         uint32;
+	vlan_id:            uint16;
+	pad:                uint16;
+} &byteorder=bigendian;
+
+type Packet(rec: Record) = record {
+	sensor_id:          uint32;
+	event_id:           uint32;
+	event_second:       uint32;
+	packet_ts:          Time;
+	link_type:          uint32;
+	packet_len:         uint32;
+	packet_data:        bytestring &length=packet_len;
+} &byteorder=bigendian;
+
+type ExtraData(rec: Record) = record {
+	sensor_id:          uint32;
+	event_id:           uint32;
+	event_second:       uint32;
+	extra_type:         uint32;
+	data_type:          uint32;
+	blob_len:           uint32;
+	blob:               bytestring &length=blob_len;
+} &byteorder=bigendian &length=rec.length;
+
+type UnknownRecordType(rec: Record) = record {
+	data: bytestring &transient &length=rec.length;
+} &byteorder=bigendian &length=rec.length;
--- a/src/file_analysis/analyzer/unified2/unified2.pac
+++ b/src/file_analysis/analyzer/unified2/unified2.pac
@ -0,0 +1,21 @@
+
+%include binpac.pac
+%include bro.pac
+
+analyzer Unified2 withcontext {
+	analyzer:   Unified2_Analyzer;
+	flow:       Flow;
+};
+
+analyzer Unified2_Analyzer(bro_analyzer: BroFileAnalyzer) {
+	downflow = Flow;
+	upflow   = Flow;
+};
+
+%include unified2-file.pac
+
+flow Flow {
+	flowunit = Record withcontext(connection, this);
+};
+
+%include unified2-analyzer.pac
--- a/src/file_analysis/analyzer/x509/CMakeLists.txt
+++ b/src/file_analysis/analyzer/x509/CMakeLists.txt
@ -0,0 +1,10 @@
+
+include(BroPlugin)
+
+include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}
+                           ${CMAKE_CURRENT_BINARY_DIR})
+
+bro_plugin_begin(Bro X509)
+bro_plugin_cc(X509.cc Plugin.cc)
+bro_plugin_bif(events.bif types.bif functions.bif)
+bro_plugin_end()
--- a/src/file_analysis/analyzer/x509/Plugin.cc
+++ b/src/file_analysis/analyzer/x509/Plugin.cc
@ -0,0 +1,25 @@
+// See the file  in the main distribution directory for copyright.
+
+
+#include "plugin/Plugin.h"
+
+#include "X509.h"
+
+namespace plugin {
+namespace Bro_X509 {
+
+class Plugin : public plugin::Plugin {
+public:
+	plugin::Configuration Configure()
+		{
+		AddComponent(new ::file_analysis::Component("X509", ::file_analysis::X509::Instantiate));
+
+		plugin::Configuration config;
+		config.name = "Bro::X509";
+		config.description = "X509 analyzer";
+		return config;
+		}
+} plugin;
+
+}
+}
--- a/src/file_analysis/analyzer/x509/X509.cc
+++ b/src/file_analysis/analyzer/x509/X509.cc
@ -0,0 +1,635 @@
+// See the file "COPYING" in the main distribution directory for copyright.
+
+#include <string>
+
+#include "X509.h"
+#include "Event.h"
+
+#include "events.bif.h"
+#include "types.bif.h"
+
+#include "file_analysis/Manager.h"
+
+#include <openssl/x509.h>
+#include <openssl/x509v3.h>
+#include <openssl/asn1.h>
+#include <openssl/opensslconf.h>
+
+using namespace file_analysis;
+
+IMPLEMENT_SERIAL(X509Val, SER_X509_VAL);
+
+file_analysis::X509::X509(RecordVal* args, file_analysis::File* file)
+	: file_analysis::Analyzer(file_mgr->GetComponentTag("X509"), args, file)
+	{
+	cert_data.clear();
+	}
+
+bool file_analysis::X509::DeliverStream(const u_char* data, uint64 len)
+	{
+	// just add it to the data we have so far, since we cannot do anything else anyways...
+	cert_data.append(reinterpret_cast<const char*>(data), len);
+	return true;
+	}
+
+bool file_analysis::X509::Undelivered(uint64 offset, uint64 len)
+	{
+	return false;
+	}
+
+bool file_analysis::X509::EndOfFile()
+	{
+	// ok, now we can try to parse the certificate with openssl. Should
+	// be rather straightforward...
+	const unsigned char* cert_char = reinterpret_cast<const unsigned char*>(cert_data.data());
+
+	::X509* ssl_cert = d2i_X509(NULL, &cert_char, cert_data.size());
+	if ( ! ssl_cert )
+		{
+		reporter->Weird(fmt("Could not parse X509 certificate (fuid %s)", GetFile()->GetID().c_str()));
+		return false;
+		}
+
+	X509Val* cert_val = new X509Val(ssl_cert); // cert_val takes ownership of ssl_cert
+
+	RecordVal* cert_record = ParseCertificate(cert_val); // parse basic information into record
+
+	// and send the record on to scriptland
+	val_list* vl = new val_list();
+	vl->append(GetFile()->GetVal()->Ref());
+	vl->append(cert_val->Ref());
+	vl->append(cert_record->Ref()); // we Ref it here, because we want to keep a copy around for now...
+	mgr.QueueEvent(x509_certificate, vl);
+
+	// after parsing the certificate - parse the extensions...
+
+	int num_ext = X509_get_ext_count(ssl_cert);
+	for ( int k = 0; k < num_ext; ++k )
+		{
+		X509_EXTENSION* ex = X509_get_ext(ssl_cert, k);
+		if ( ! ex )
+			continue;
+
+		ParseExtension(ex);
+		}
+
+	// X509_free(ssl_cert); We do _not_ free the certificate here. It is refcounted
+	// inside the X509Val that is sent on in the cert record to scriptland.
+	//
+	// The certificate will be freed when the last X509Val is Unref'd.
+
+	Unref(cert_record); // Unref the RecordVal that we kept around from ParseCertificate
+	Unref(cert_val); // Same for cert_val
+
+	return false;
+	}
+
+RecordVal* file_analysis::X509::ParseCertificate(X509Val* cert_val)
+	{
+	::X509* ssl_cert = cert_val->GetCertificate();
+
+	char buf[2048]; // we need a buffer for some of the openssl functions
+	memset(buf, 0, sizeof(buf));
+
+	RecordVal* pX509Cert = new RecordVal(BifType::Record::X509::Certificate);
+	BIO *bio = BIO_new(BIO_s_mem());
+
+	pX509Cert->Assign(0, new Val((uint64) X509_get_version(ssl_cert) + 1, TYPE_COUNT));
+	i2a_ASN1_INTEGER(bio, X509_get_serialNumber(ssl_cert));
+	int len = BIO_read(bio, buf, sizeof(buf));
+	pX509Cert->Assign(1, new StringVal(len, buf));
+	BIO_reset(bio);
+
+	X509_NAME_print_ex(bio, X509_get_subject_name(ssl_cert), 0, XN_FLAG_RFC2253);
+	len = BIO_gets(bio, buf, sizeof(buf));
+	pX509Cert->Assign(2, new StringVal(len, buf));
+	BIO_reset(bio);
+	X509_NAME_print_ex(bio, X509_get_issuer_name(ssl_cert), 0, XN_FLAG_RFC2253);
+	len = BIO_gets(bio, buf, sizeof(buf));
+	pX509Cert->Assign(3, new StringVal(len, buf));
+	BIO_free(bio);
+
+	pX509Cert->Assign(4, new Val(GetTimeFromAsn1(X509_get_notBefore(ssl_cert)), TYPE_TIME));
+	pX509Cert->Assign(5, new Val(GetTimeFromAsn1(X509_get_notAfter(ssl_cert)), TYPE_TIME));
+
+	// we only read 255 bytes because byte 256 is always 0.
+	// if the string is longer than 255, that will be our null-termination,
+	// otherwhise i2t does null-terminate.
+	if ( ! i2t_ASN1_OBJECT(buf, 255, ssl_cert->cert_info->key->algor->algorithm) )
+		buf[0] = 0;
+
+	pX509Cert->Assign(6, new StringVal(buf));
+
+	if ( ! i2t_ASN1_OBJECT(buf, 255, ssl_cert->sig_alg->algorithm) )
+		buf[0] = 0;
+
+	pX509Cert->Assign(7, new StringVal(buf));
+
+	// Things we can do when we have the key...
+	EVP_PKEY *pkey = X509_extract_key(ssl_cert);
+	if ( pkey != NULL )
+		{
+		if ( pkey->type == EVP_PKEY_DSA )
+			pX509Cert->Assign(8, new StringVal("dsa"));
+
+		else if ( pkey->type == EVP_PKEY_RSA )
+			{
+			pX509Cert->Assign(8, new StringVal("rsa"));
+
+			char *exponent = BN_bn2dec(pkey->pkey.rsa->e);
+			if ( exponent != NULL )
+				{
+				pX509Cert->Assign(10, new StringVal(exponent));
+				OPENSSL_free(exponent);
+				exponent = NULL;
+				}
+			}
+#ifndef OPENSSL_NO_EC
+		else if ( pkey->type == EVP_PKEY_EC )
+			{
+			pX509Cert->Assign(8, new StringVal("ecdsa"));
+			pX509Cert->Assign(11, KeyCurve(pkey));
+			}
+#endif
+
+		unsigned int length = KeyLength(pkey);
+		if ( length > 0 )
+			pX509Cert->Assign(9, new Val(length, TYPE_COUNT));
+
+		EVP_PKEY_free(pkey);
+		}
+
+
+	return pX509Cert;
+	}
+
+StringVal* file_analysis::X509::GetExtensionFromBIO(BIO* bio)
+	{
+	BIO_flush(bio);
+	ERR_clear_error();
+	int length = BIO_pending(bio);
+
+	if ( ERR_peek_error() != 0 )
+		{
+		char tmp[120];
+		ERR_error_string_n(ERR_get_error(), tmp, sizeof(tmp));
+		reporter->Weird(fmt("X509::GetExtensionFromBIO: %s", tmp));
+		BIO_free_all(bio);
+		return 0;
+		}
+
+	if ( length == 0 )
+		{
+		BIO_free_all(bio);
+		return new StringVal("");
+		}
+
+	char* buffer = (char*) malloc(length);
+
+	if ( ! buffer )
+		{
+		// Just emit an error here and try to continue instead of aborting
+		// because it's unclear the length value is very reliable.
+		reporter->Error("X509::GetExtensionFromBIO malloc(%d) failed", length);
+		BIO_free_all(bio);
+		return 0;
+		}
+
+	BIO_read(bio, (void*) buffer, length);
+	StringVal* ext_val = new StringVal(length, buffer);
+
+	free(buffer);
+	BIO_free_all(bio);
+
+	return ext_val;
+	}
+
+void file_analysis::X509::ParseExtension(X509_EXTENSION* ex)
+	{
+	char name[256];
+	char oid[256];
+
+	ASN1_OBJECT* ext_asn = X509_EXTENSION_get_object(ex);
+	const char* short_name = OBJ_nid2sn(OBJ_obj2nid(ext_asn));
+
+	OBJ_obj2txt(name, 255, ext_asn, 0);
+	OBJ_obj2txt(oid, 255, ext_asn, 1);
+
+	int critical = 0;
+	if ( X509_EXTENSION_get_critical(ex) != 0 )
+		critical = 1;
+
+	BIO *bio = BIO_new(BIO_s_mem());
+	if( ! X509V3_EXT_print(bio, ex, 0, 0))
+		M_ASN1_OCTET_STRING_print(bio,ex->value);
+
+	StringVal* ext_val = GetExtensionFromBIO(bio);
+
+	if ( ! ext_val )
+		ext_val = new StringVal(0, "");
+
+	RecordVal* pX509Ext = new RecordVal(BifType::Record::X509::Extension);
+	pX509Ext->Assign(0, new StringVal(name));
+
+	if ( short_name and strlen(short_name) > 0 )
+		pX509Ext->Assign(1, new StringVal(short_name));
+
+	pX509Ext->Assign(2, new StringVal(oid));
+	pX509Ext->Assign(3, new Val(critical, TYPE_BOOL));
+	pX509Ext->Assign(4, ext_val);
+
+	// send off generic extension event
+	//
+	// and then look if we have a specialized event for the extension we just
+	// parsed. And if we have it, we send the specialized event on top of the
+	// generic event that we just had. I know, that is... kind of not nice,
+	// but I am not sure if there is a better way to do it...
+	val_list* vl = new val_list();
+	vl->append(GetFile()->GetVal()->Ref());
+	vl->append(pX509Ext);
+
+	mgr.QueueEvent(x509_extension, vl);
+
+	// look if we have a specialized handler for this event...
+	if ( OBJ_obj2nid(ext_asn) == NID_basic_constraints )
+		ParseBasicConstraints(ex);
+
+	else if ( OBJ_obj2nid(ext_asn) == NID_subject_alt_name )
+		ParseSAN(ex);
+	}
+
+void file_analysis::X509::ParseBasicConstraints(X509_EXTENSION* ex)
+	{
+	assert(OBJ_obj2nid(X509_EXTENSION_get_object(ex)) == NID_basic_constraints);
+
+	BASIC_CONSTRAINTS *constr = (BASIC_CONSTRAINTS *) X509V3_EXT_d2i(ex);
+
+	if ( constr )
+		{
+		RecordVal* pBasicConstraint = new RecordVal(BifType::Record::X509::BasicConstraints);
+		pBasicConstraint->Assign(0, new Val(constr->ca ? 1 : 0, TYPE_BOOL));
+
+		if ( constr->pathlen )
+			pBasicConstraint->Assign(1, new Val((int32_t) ASN1_INTEGER_get(constr->pathlen), TYPE_COUNT));
+
+		val_list* vl = new val_list();
+		vl->append(GetFile()->GetVal()->Ref());
+		vl->append(pBasicConstraint);
+
+		mgr.QueueEvent(x509_ext_basic_constraints, vl);
+		BASIC_CONSTRAINTS_free(constr);
+		}
+
+	else
+		reporter->Weird(fmt("Certificate with invalid BasicConstraint. fuid %s", GetFile()->GetID().c_str()));
+	}
+
+void file_analysis::X509::ParseSAN(X509_EXTENSION* ext)
+	{
+	assert(OBJ_obj2nid(X509_EXTENSION_get_object(ext)) == NID_subject_alt_name);
+
+	GENERAL_NAMES *altname = (GENERAL_NAMES*)X509V3_EXT_d2i(ext);
+	if ( ! altname )
+		{
+		reporter->Weird(fmt("Could not parse subject alternative names. fuid %s", GetFile()->GetID().c_str()));
+		return;
+		}
+
+	VectorVal* names = 0;
+	VectorVal* emails = 0;
+	VectorVal* uris = 0;
+	VectorVal* ips = 0;
+
+	unsigned int otherfields = 0;
+
+	for ( int i = 0; i < sk_GENERAL_NAME_num(altname); i++ )
+		{
+		GENERAL_NAME *gen = sk_GENERAL_NAME_value(altname, i);
+		assert(gen);
+
+		if ( gen->type == GEN_DNS || gen->type == GEN_URI || gen->type == GEN_EMAIL )
+			{
+			if ( ASN1_STRING_type(gen->d.ia5) != V_ASN1_IA5STRING )
+				{
+				reporter->Weird(fmt("DNS-field does not contain an IA5String. fuid %s", GetFile()->GetID().c_str()));
+				continue;
+				}
+
+			const char* name = (const char*) ASN1_STRING_data(gen->d.ia5);
+			StringVal* bs = new StringVal(name);
+
+			switch ( gen->type )
+				{
+				case GEN_DNS:
+					if ( names == 0 )
+						names = new VectorVal(internal_type("string_vec")->AsVectorType());
+
+					names->Assign(names->Size(), bs);
+					break;
+
+				case GEN_URI:
+					if ( uris == 0 )
+						uris = new VectorVal(internal_type("string_vec")->AsVectorType());
+
+					uris->Assign(uris->Size(), bs);
+					break;
+
+				case GEN_EMAIL:
+					if ( emails == 0 )
+						emails = new VectorVal(internal_type("string_vec")->AsVectorType());
+
+					emails->Assign(emails->Size(), bs);
+					break;
+				}
+			}
+
+		else if ( gen->type == GEN_IPADD )
+			{
+				if ( ips == 0 )
+					ips = new VectorVal(internal_type("addr_vec")->AsVectorType());
+
+				uint32* addr = (uint32*) gen->d.ip->data;
+
+				if( gen->d.ip->length == 4 )
+					ips->Assign(ips->Size(), new AddrVal(*addr));
+
+				else if ( gen->d.ip->length == 16 )
+					ips->Assign(ips->Size(), new AddrVal(addr));
+
+				else
+					{
+					reporter->Weird(fmt("Weird IP address length %d in subject alternative name. fuid %s", gen->d.ip->length, GetFile()->GetID().c_str()));
+					continue;
+					}
+			}
+
+		else
+			{
+			// reporter->Error("Subject alternative name contained unsupported fields. fuid %s", GetFile()->GetID().c_str());
+			// This happens quite often - just mark it
+			otherfields = 1;
+			continue;
+			}
+		}
+
+		RecordVal* sanExt = new RecordVal(BifType::Record::X509::SubjectAlternativeName);
+
+		if ( names != 0 )
+			sanExt->Assign(0, names);
+
+		if ( uris != 0 )
+			sanExt->Assign(1, uris);
+
+		if ( emails != 0 )
+			sanExt->Assign(2, emails);
+
+		if ( ips != 0 )
+			sanExt->Assign(3, ips);
+
+		sanExt->Assign(4, new Val(otherfields, TYPE_BOOL));
+
+		val_list* vl = new val_list();
+		vl->append(GetFile()->GetVal()->Ref());
+		vl->append(sanExt);
+		mgr.QueueEvent(x509_ext_subject_alternative_name, vl);
+	GENERAL_NAMES_free(altname);
+	}
+
+StringVal* file_analysis::X509::KeyCurve(EVP_PKEY *key)
+	{
+	assert(key != NULL);
+
+#ifdef OPENSSL_NO_EC
+	// well, we do not have EC-Support...
+	return NULL;
+#else
+	if ( key->type != EVP_PKEY_EC )
+		{
+		// no EC-key - no curve name
+		return NULL;
+		}
+
+	const EC_GROUP *group;
+	int nid;
+	if ( (group = EC_KEY_get0_group(key->pkey.ec)) == NULL)
+		// I guess we could not parse this
+		return NULL;
+
+	nid = EC_GROUP_get_curve_name(group);
+	if ( nid == 0 )
+		// and an invalid nid...
+		return NULL;
+
+	const char * curve_name = OBJ_nid2sn(nid);
+	if ( curve_name == NULL )
+		return NULL;
+
+	return new StringVal(curve_name);
+#endif
+	}
+
+unsigned int file_analysis::X509::KeyLength(EVP_PKEY *key)
+	{
+	assert(key != NULL);
+
+	switch(key->type) {
+	case EVP_PKEY_RSA:
+		return BN_num_bits(key->pkey.rsa->n);
+
+	case EVP_PKEY_DSA:
+		return BN_num_bits(key->pkey.dsa->p);
+
+#ifndef OPENSSL_NO_EC
+	case EVP_PKEY_EC:
+		{
+		BIGNUM* ec_order = BN_new();
+		if ( ! ec_order )
+			// could not malloc bignum?
+			return 0;
+
+		const EC_GROUP *group = EC_KEY_get0_group(key->pkey.ec);
+
+		if ( ! group )
+			{
+			// unknown ex-group
+			BN_free(ec_order);
+			return 0;
+			}
+
+		if ( ! EC_GROUP_get_order(group, ec_order, NULL) )
+			{
+			// could not get ec-group-order
+			BN_free(ec_order);
+			return 0;
+			}
+
+		unsigned int length = BN_num_bits(ec_order);
+		BN_free(ec_order);
+		return length;
+		}
+#endif
+	default:
+		return 0; // unknown public key type
+	}
+
+	reporter->InternalError("cannot be reached");
+	}
+
+double file_analysis::X509::GetTimeFromAsn1(const ASN1_TIME* atime)
+	{
+	time_t lResult = 0;
+
+	char lBuffer[24];
+	char* pBuffer = lBuffer;
+
+	size_t lTimeLength = atime->length;
+	char * pString = (char *) atime->data;
+
+	if ( atime->type == V_ASN1_UTCTIME )
+		{
+		if ( lTimeLength < 11 || lTimeLength > 17 )
+			return 0;
+
+		memcpy(pBuffer, pString, 10);
+		pBuffer += 10;
+		pString += 10;
+		}
+
+	else
+		{
+		if ( lTimeLength < 13 )
+			return 0;
+
+		memcpy(pBuffer, pString, 12);
+		pBuffer += 12;
+		pString += 12;
+		}
+
+	if ((*pString == 'Z') || (*pString == '-') || (*pString == '+'))
+		{
+		*(pBuffer++) = '0';
+		*(pBuffer++) = '0';
+		}
+
+	else
+		{
+		*(pBuffer++) = *(pString++);
+		*(pBuffer++) = *(pString++);
+
+		// Skip any fractional seconds...
+		if (*pString == '.')
+			{
+			pString++;
+			while ((*pString >= '0') && (*pString <= '9'))
+				pString++;
+			}
+		}
+
+	*(pBuffer++) = 'Z';
+	*(pBuffer++) = '\0';
+
+	time_t lSecondsFromUTC;
+
+	if ( *pString == 'Z' )
+		lSecondsFromUTC = 0;
+
+	else
+		{
+		if ((*pString != '+') && (pString[5] != '-'))
+			return 0;
+
+		lSecondsFromUTC = ((pString[1]-'0') * 10 + (pString[2]-'0')) * 60;
+		lSecondsFromUTC += (pString[3]-'0') * 10 + (pString[4]-'0');
+
+		if (*pString == '-')
+			lSecondsFromUTC = -lSecondsFromUTC;
+		}
+
+	tm lTime;
+	lTime.tm_sec  = ((lBuffer[10] - '0') * 10) + (lBuffer[11] - '0');
+	lTime.tm_min  = ((lBuffer[8] - '0') * 10) + (lBuffer[9] - '0');
+	lTime.tm_hour = ((lBuffer[6] - '0') * 10) + (lBuffer[7] - '0');
+	lTime.tm_mday = ((lBuffer[4] - '0') * 10) + (lBuffer[5] - '0');
+	lTime.tm_mon  = (((lBuffer[2] - '0') * 10) + (lBuffer[3] - '0')) - 1;
+	lTime.tm_year = ((lBuffer[0] - '0') * 10) + (lBuffer[1] - '0');
+
+	if ( lTime.tm_year < 50 )
+		lTime.tm_year += 100; // RFC 2459
+
+	lTime.tm_wday = 0;
+	lTime.tm_yday = 0;
+	lTime.tm_isdst = 0;  // No DST adjustment requested
+
+	lResult = mktime(&lTime);
+
+	if ( lResult )
+		{
+		if ( 0 != lTime.tm_isdst )
+			lResult -= 3600;  // mktime may adjust for DST  (OS dependent)
+
+		lResult += lSecondsFromUTC;
+		}
+
+	else
+		lResult = 0;
+
+	return lResult;
+}
+
+X509Val::X509Val(::X509* arg_certificate) : OpaqueVal(x509_opaque_type)
+	{
+	certificate = arg_certificate;
+	}
+
+X509Val::X509Val() : OpaqueVal(x509_opaque_type)
+	{
+	certificate = 0;
+	}
+
+X509Val::~X509Val()
+	{
+	if ( certificate )
+		X509_free(certificate);
+	}
+
+::X509* X509Val::GetCertificate() const
+	{
+	return certificate;
+	}
+
+bool X509Val::DoSerialize(SerialInfo* info) const
+	{
+	DO_SERIALIZE(SER_X509_VAL, OpaqueVal);
+
+	unsigned char *buf = NULL;
+
+	int length = i2d_X509(certificate, &buf);
+
+	if ( length < 0 )
+		return false;
+
+	bool res = SERIALIZE_STR(reinterpret_cast<const char*>(buf), length);
+
+	OPENSSL_free(buf);
+	return res;
+	}
+
+bool X509Val::DoUnserialize(UnserialInfo* info)
+	{
+	DO_UNSERIALIZE(OpaqueVal)
+
+	int length;
+	unsigned char *certbuf, *opensslbuf;
+
+	if ( ! UNSERIALIZE_STR(reinterpret_cast<char **>(&certbuf), &length) )
+		return false;
+
+	opensslbuf = certbuf; // OpenSSL likes to shift pointers around. really.
+	certificate = d2i_X509(NULL, const_cast<const unsigned char**>(&opensslbuf), length);
+	delete[] certbuf;
+
+	if ( !certificate )
+		return false;
+
+	return true;
+	}
--- a/src/file_analysis/analyzer/x509/X509.h
+++ b/src/file_analysis/analyzer/x509/X509.h
@ -0,0 +1,112 @@
+// See the file "COPYING" in the main distribution directory for copyright.
+
+#ifndef FILE_ANALYSIS_X509_H
+#define FILE_ANALYSIS_X509_H
+
+#include <string>
+
+#include "Val.h"
+#include "../File.h"
+#include "Analyzer.h"
+
+#include <openssl/x509.h>
+#include <openssl/asn1.h>
+
+namespace file_analysis {
+
+class X509Val;
+
+class X509 : public file_analysis::Analyzer {
+public:
+	virtual bool DeliverStream(const u_char* data, uint64 len);
+	virtual bool Undelivered(uint64 offset, uint64 len);
+	virtual bool EndOfFile();
+
+	/**
+	 * Converts an X509 certificate into a \c X509::Certificate record
+	 * value. This is a static function that can be called from external,
+	 * it doesn't depend on the state of any particular file analyzer.
+	 *
+	 * @param cert_val The certificate to converts.
+	 *
+	 * @param Returns the new record value and passes ownership to
+	 * caller.
+	 */
+	static RecordVal* ParseCertificate(X509Val* cert_val);
+
+	static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file)
+		{ return new X509(args, file); }
+
+	/**
+	 * Retrieve an X509 extension value from an OpenSSL BIO to which it was
+	 * written.
+	 *
+	 * @param bio the OpenSSL BIO to read. It will be freed by the function,
+	 * including when an error occurs.
+	 *
+	 * @return The X509 extension value.
+	 */
+	static StringVal* GetExtensionFromBIO(BIO* bio);
+
+protected:
+	X509(RecordVal* args, File* file);
+
+private:
+	void ParseExtension(X509_EXTENSION* ex);
+	void ParseBasicConstraints(X509_EXTENSION* ex);
+	void ParseSAN(X509_EXTENSION* ex);
+
+	std::string cert_data;
+
+	// Helpers for ParseCertificate.
+	static double GetTimeFromAsn1(const ASN1_TIME * atime);
+	static StringVal* KeyCurve(EVP_PKEY *key);
+	static unsigned int KeyLength(EVP_PKEY *key);
+};
+
+/**
+ * This class wraps an OpenSSL X509 data structure.
+ *
+ * We need these to be able to pass OpenSSL pointers around in Bro
+ * script-land. Otherwise, we cannot verify certificates from Bro
+ * scriptland
+ */
+class X509Val : public OpaqueVal {
+public:
+	/**
+	 * Construct an X509Val.
+	 *
+	 * @param certificate specifies the wrapped OpenSSL certificate
+	 *
+	 * @return A newly initialized X509Val.
+	 */
+	explicit X509Val(::X509* certificate);
+
+	/**
+	 * Destructor.
+	 */
+	~X509Val();
+
+	/**
+	 * Get the wrapped X509 certificate. Please take care, that the
+	 * internal OpenSSL reference counting stays the same.
+	 *
+	 * @return The wrapped OpenSSL X509 certificate.
+	 */
+	::X509* GetCertificate() const;
+
+protected:
+	/**
+	 * Construct an empty X509Val. Only used for deserialization
+	 */
+	X509Val();
+
+private:
+	::X509* certificate; // the wrapped certificate
+
+	DECLARE_SERIAL(X509Val);
+};
+
+}
+
+#endif
--- a/src/file_analysis/analyzer/x509/events.bif
+++ b/src/file_analysis/analyzer/x509/events.bif
@ -0,0 +1,57 @@
+## Generated for encountered X509 certificates, e.g., in the clear SSL/TLS 
+## connection handshake.
+##
+## See `Wikipedia <http://en.wikipedia.org/wiki/X.509>`__ for more information
+## about the X.509 format.
+##
+## f: The file.
+##
+## cert_ref: An opaque pointer to the underlying OpenSSL data structure of the
+##           certificate.
+##
+## cert: The parsed certificate information.
+##
+## .. bro:see:: x509_extension x509_ext_basic_constraints
+##              x509_ext_subject_alternative_name x509_parse x509_verify
+##              x509_get_certificate_string
+event x509_certificate%(f: fa_file, cert_ref: opaque of x509, cert: X509::Certificate%);
+
+## Generated for X509 extensions seen in a certificate.
+##
+## See `Wikipedia <http://en.wikipedia.org/wiki/X.509>`__ for more information
+## about the X.509 format.
+##
+## f: The file.
+##
+## ext: The parsed extension.
+##
+## .. bro:see:: x509_certificate x509_ext_basic_constraints
+##              x509_ext_subject_alternative_name x509_parse x509_verify
+##              x509_get_certificate_string
+event x509_extension%(f: fa_file, ext: X509::Extension%);
+
+## Generated for the X509 basic constraints extension seen in a certificate.
+## This extension can be used to identify the subject of a certificate as a CA.
+##
+## f: The file.
+##
+## ext: The parsed basic constraints extension.
+##
+## .. bro:see:: x509_certificate x509_extension
+##              x509_ext_subject_alternative_name x509_parse x509_verify
+##              x509_get_certificate_string
+event x509_ext_basic_constraints%(f: fa_file, ext: X509::BasicConstraints%);
+
+## Generated for the X509 subject alternative name extension seen in a certificate.
+## This extension can be used to allow additional entities to be bound to the
+## subject of the certificate. Usually it is used to specify one or multiple DNS
+## names for which a certificate is valid.
+##
+## f: The file.
+##
+## ext: The parsed subject alternative name extension.
+##
+## .. bro:see:: x509_certificate x509_extension x509_ext_basic_constraints
+##              x509_parse x509_verify
+##              x509_get_certificate_string
+event x509_ext_subject_alternative_name%(f: fa_file, ext: X509::SubjectAlternativeName%);
--- a/src/file_analysis/analyzer/x509/functions.bif
+++ b/src/file_analysis/analyzer/x509/functions.bif
@ -0,0 +1,539 @@
+%%{
+#include "file_analysis/analyzer/x509/X509.h"
+#include "types.bif.h"
+
+#include <openssl/x509.h>
+#include <openssl/asn1.h>
+#include <openssl/x509_vfy.h>
+#include <openssl/ocsp.h>
+
+// This is the indexed map of X509 certificate stores.
+static map<Val*, X509_STORE*> x509_stores;
+
+// ### NOTE: while d2i_X509 does not take a const u_char** pointer,
+// here we assume d2i_X509 does not write to <data>, so it is safe to
+// convert data to a non-const pointer.  Could some X509 guru verify
+// this?
+
+X509* d2i_X509_(X509** px, const u_char** in, int len)
+	{
+#ifdef OPENSSL_D2I_X509_USES_CONST_CHAR
+	  return d2i_X509(px, in, len);
+#else
+	  return d2i_X509(px, (u_char**)in, len);
+#endif
+	}
+
+// construct an error record
+RecordVal* x509_result_record(uint64_t num, const char* reason, Val* chainVector = 0)
+	{
+	RecordVal* rrecord = new RecordVal(BifType::Record::X509::Result);
+
+	rrecord->Assign(0, new Val(num, TYPE_INT));
+	rrecord->Assign(1, new StringVal(reason));
+	if ( chainVector )
+		rrecord->Assign(2, chainVector);
+
+	return rrecord;
+	}
+
+X509_STORE* x509_get_root_store(TableVal* root_certs)
+	{
+	// If this certificate store was built previously, just reuse the old one.
+	if ( x509_stores.count(root_certs) > 0 )
+		return x509_stores[root_certs];
+
+	X509_STORE* ctx = X509_STORE_new();
+	ListVal* idxs = root_certs->ConvertToPureList();
+
+	// Build the validation store
+	for ( int i = 0; i < idxs->Length(); ++i )
+		{
+		Val* key = idxs->Index(i);
+		StringVal *sv = root_certs->Lookup(key)->AsStringVal();
+		assert(sv);
+		const uint8* data = sv->Bytes();
+		X509* x = d2i_X509_(NULL, &data, sv->Len());
+		if ( ! x )
+			{
+			builtin_error(fmt("Root CA error: %s", ERR_error_string(ERR_get_error(),NULL)));
+			return 0;
+			}
+
+		X509_STORE_add_cert(ctx, x);
+		X509_free(x);
+		}
+
+	delete idxs;
+
+	// Save the newly constructed certificate store into the cacheing map.
+	x509_stores[root_certs] = ctx;
+
+	return ctx;
+	}
+
+// get all cretificates starting at the second one (assuming the first one is the host certificate)
+STACK_OF(X509)* x509_get_untrusted_stack(VectorVal* certs_vec)
+	{
+	STACK_OF(X509)* untrusted_certs = sk_X509_new_null();
+	if ( ! untrusted_certs )
+		{
+		builtin_error(fmt("Untrusted certificate stack initialization error: %s", ERR_error_string(ERR_get_error(),NULL)));
+		return 0;
+		}
+
+	for ( int i = 1; i < (int) certs_vec->Size(); ++i ) // start at 1 - 0 is host cert
+		{
+		Val *sv = certs_vec->Lookup(i);
+
+		if ( ! sv )
+			continue;
+
+		// Fixme: check type
+		X509* x = ((file_analysis::X509Val*) sv)->GetCertificate();
+		if ( ! x )
+			{
+			sk_X509_free(untrusted_certs);
+			builtin_error(fmt("No certificate in opaque in stack"));
+			return 0;
+			}
+
+		sk_X509_push(untrusted_certs, x);
+		}
+
+	return untrusted_certs;
+	}
+
+// We need this function to be able to identify the signer certificate of an
+// OCSP request out of a list of possible certificates.
+X509* x509_get_ocsp_signer(STACK_OF(X509) *certs, OCSP_RESPID *rid)
+	{
+	// We support two lookup types - either by response id or by key.
+	if ( rid->type == V_OCSP_RESPID_NAME )
+		return X509_find_by_subject(certs, rid->value.byName);
+
+	// There only should be name and type - but let's be sure...
+	if ( rid->type != V_OCSP_RESPID_KEY )
+		return 0;
+
+	// Just like OpenSSL, we just support SHA-1 lookups and bail out otherwhise.
+	if ( rid->value.byKey->length != SHA_DIGEST_LENGTH )
+		return 0;
+
+	unsigned char* key_hash = rid->value.byKey->data;
+	for ( int i = 0; i < sk_X509_num(certs); ++i )
+		{
+		unsigned char digest[SHA_DIGEST_LENGTH];
+		X509* cert = sk_X509_value(certs, i);
+		if ( ! X509_pubkey_digest(cert, EVP_sha1(), digest, NULL) )
+			// digest failed for this certificate, try with next
+			continue;
+
+		if ( memcmp(digest, key_hash, SHA_DIGEST_LENGTH) == 0 )
+			// keys match, return certificate
+			return cert;
+		}
+
+	return 0;
+	}
+
+%%}
+
+## Parses a certificate into an X509::Certificate structure.
+##
+## cert: The X509 certificate opaque handle.
+##
+## Returns: A X509::Certificate structure.
+##
+## .. bro:see:: x509_certificate x509_extension x509_ext_basic_constraints
+##              x509_ext_subject_alternative_name x509_verify
+##              x509_get_certificate_string
+function x509_parse%(cert: opaque of x509%): X509::Certificate
+	%{
+	assert(cert);
+	file_analysis::X509Val* h = (file_analysis::X509Val*) cert;
+
+	return file_analysis::X509::ParseCertificate(h);
+	%}
+
+## Returns the string form of a certificate.
+##
+## cert: The X509 certificate opaque handle.
+##
+## pem: A boolean that specifies if the certificate is returned
+##      in pem-form (true), or as the raw ASN1 encoded binary
+##      (false).
+##
+## Returns: X509 certificate as a string.
+##
+## .. bro:see:: x509_certificate x509_extension x509_ext_basic_constraints
+##              x509_ext_subject_alternative_name x509_parse x509_verify
+function x509_get_certificate_string%(cert: opaque of x509, pem: bool &default=F%): string
+	%{
+	assert(cert);
+	file_analysis::X509Val* h = (file_analysis::X509Val*) cert;
+
+	BIO *bio = BIO_new(BIO_s_mem());
+
+	if ( pem )
+		PEM_write_bio_X509(bio, h->GetCertificate());
+
+	else
+		i2d_X509_bio(bio, h->GetCertificate());
+
+	StringVal* ext_val = file_analysis::X509::GetExtensionFromBIO(bio);
+
+	if ( ! ext_val )
+		ext_val = new StringVal("");
+
+	return ext_val;
+	%}
+
+## Verifies an OCSP reply.
+##
+## certs: Specifies the certificate chain to use. Server certificate first.
+##
+## ocsp_reply: the ocsp reply to validate.
+##
+## root_certs: A list of root certificates to validate the certificate chain.
+##
+## verify_time: Time for the validity check of the certificates.
+##
+## Returns: A record of type X509::Result containing the result code of the
+##          verify operation.
+##
+## .. bro:see:: x509_certificate x509_extension x509_ext_basic_constraints
+##              x509_ext_subject_alternative_name x509_parse
+##              x509_get_certificate_string x509_verify
+function x509_ocsp_verify%(certs: x509_opaque_vector, ocsp_reply: string, root_certs: table_string_of_string, verify_time: time &default=network_time()%): X509::Result
+	%{
+	RecordVal* rval = 0;
+	X509_STORE* ctx = x509_get_root_store(root_certs->AsTableVal());
+	if ( ! ctx )
+		return x509_result_record(-1, "Problem initializing root store");
+
+
+	VectorVal *certs_vec = certs->AsVectorVal();
+	if ( certs_vec->Size() < 1 )
+		{
+		reporter->Error("No certificates given in vector");
+		return x509_result_record(-1, "no certificates");
+		}
+
+	// host certificate
+	unsigned int index = 0; // to prevent overloading to 0pointer
+	Val *sv = certs_vec->Lookup(index);
+	if ( ! sv )
+		{
+		builtin_error("undefined value in certificate vector");
+		return x509_result_record(-1, "undefined value in certificate vector");
+		}
+
+	file_analysis::X509Val* cert_handle = (file_analysis::X509Val*) sv;
+
+	X509* cert = cert_handle->GetCertificate();
+	if ( ! cert )
+		{
+		builtin_error(fmt("No certificate in opaque"));
+		return x509_result_record(-1, "No certificate in opaque");
+		}
+
+	const unsigned char* start = ocsp_reply->Bytes();
+
+	STACK_OF(X509)* untrusted_certs = x509_get_untrusted_stack(certs_vec);
+	if ( ! untrusted_certs )
+		return x509_result_record(-1, "Problem initializing list of untrusted certificates");
+
+	// from here, always goto cleanup. Initialize all other required variables...
+	time_t vtime = (time_t) verify_time;
+	OCSP_BASICRESP *basic = 0;
+	OCSP_SINGLERESP *single = 0;
+	X509_STORE_CTX *csc = 0;
+	OCSP_CERTID *certid = 0;
+	int status = -1;
+	int out = -1;
+	int result = -1;
+	X509* issuer_certificate = 0;
+	X509* signer = 0;
+	OCSP_RESPONSE *resp = d2i_OCSP_RESPONSE(NULL, &start, ocsp_reply->Len());
+	if ( ! resp )
+		{
+		rval = x509_result_record(-1, "Could not parse OCSP response");
+		goto x509_ocsp_cleanup;
+		}
+
+	status = OCSP_response_status(resp);
+	if ( status != OCSP_RESPONSE_STATUS_SUCCESSFUL )
+		{
+		rval = x509_result_record(-2, OCSP_response_status_str(status));
+		goto x509_ocsp_cleanup;
+		}
+
+	basic = OCSP_response_get1_basic(resp);
+	if ( ! basic )
+		{
+		rval = x509_result_record(-1, "Could not parse OCSP response");
+		goto x509_ocsp_cleanup;
+		}
+
+
+	// the following code took me _forever_ to get right.
+	// The OCSP_basic_verify command takes a list of certificates. However (which is not immediately
+	// visible or understandable), those are only used to find the signer certificate. They are _not_
+	// used for chain building during the actual verification (this would be stupid). But - if we sneakily
+	// inject the certificates in the certificate list of the OCSP reply, they actually are used during
+	// the lookup.
+	// Yay.
+
+	if ( ! basic->certs )
+		{
+		basic->certs = sk_X509_new_null();
+		if ( ! basic->certs )
+			{
+			rval = x509_result_record(-1, "Could not allocate basic x509 stack");
+			goto x509_ocsp_cleanup;
+			}
+		}
+
+	issuer_certificate = 0;
+	for ( int i = 0; i < sk_X509_num(untrusted_certs); i++)
+		{
+		sk_X509_push(basic->certs, X509_dup(sk_X509_value(untrusted_certs, i)));
+
+		if ( X509_NAME_cmp(X509_get_issuer_name(cert), X509_get_subject_name(sk_X509_value(untrusted_certs, i))) == 0 )
+			issuer_certificate = sk_X509_value(untrusted_certs, i);
+		}
+
+	// Because we actually want to be able to give nice error messages that show why we were
+	// not able to verify the OCSP response - do our own verification logic first.
+	signer = x509_get_ocsp_signer(basic->certs, basic->tbsResponseData->responderId);
+
+	/*
+	Do this perhaps - OpenSSL also cannot do it, so I do not really feel bad about it.
+	Needs a different lookup because the root store is no stack of X509 certs
+
+	if ( !s igner )
+		// if we did not find it in the certificates that were sent, search in the root store
+		signer = x509_get_ocsp_signer(basic->certs, basic->tbsResponseData->responderId);
+	*/
+
+	if ( ! signer )
+		{
+		rval = x509_result_record(-1, "Could not find OCSP responder certificate");
+		goto x509_ocsp_cleanup;
+		}
+
+	csc = X509_STORE_CTX_new();
+	X509_STORE_CTX_init(csc, ctx, signer, basic->certs);
+	X509_STORE_CTX_set_time(csc, 0, (time_t) verify_time);
+	X509_STORE_CTX_set_purpose(csc, X509_PURPOSE_OCSP_HELPER);
+
+	result = X509_verify_cert(csc);
+	if ( result != 1 )
+		{
+		const char *reason = X509_verify_cert_error_string((*csc).error);
+		rval = x509_result_record(result, X509_verify_cert_error_string((*csc).error));
+		goto x509_ocsp_cleanup;
+		}
+
+	out = OCSP_basic_verify(basic, NULL, ctx, 0);
+	if ( result < 1 )
+		{
+		rval = x509_result_record(out, ERR_error_string(ERR_get_error(),NULL));
+		goto x509_ocsp_cleanup;
+		}
+
+	// ok, now we verified the OCSP response. This means that we have a valid chain tying it
+	// to a root that we trust and that the signature also hopefully is valid. This does not yet
+	// mean that the ocsp response actually matches the certificate the server send us or that
+	// the OCSP response even says that the certificate is valid.
+
+	// let's start this out by checking that the response is actually for the certificate we want
+	// to validate and not for something completely unrelated that the server is trying to trick us
+	// into accepting.
+
+	if ( issuer_certificate )
+		certid = OCSP_cert_to_id(NULL, cert, issuer_certificate);
+	else
+		{
+		// issuer not in list sent by server, check store
+		X509_OBJECT obj;
+		int lookup = X509_STORE_get_by_subject(csc, X509_LU_X509, X509_get_subject_name(cert), &obj);
+		if ( lookup <= 0)
+			{
+			rval = x509_result_record(lookup, "Could not find issuer of host certificate");
+			goto x509_ocsp_cleanup;
+			}
+
+		certid = OCSP_cert_to_id(NULL, cert, obj.data.x509);
+		}
+
+
+	if ( ! certid )
+		{
+		rval = x509_result_record(-1, "Certificate ID construction failed");
+		goto x509_ocsp_cleanup;
+		}
+
+	// for now, assume we have one reply...
+	single = sk_OCSP_SINGLERESP_value(basic->tbsResponseData->responses, 0);
+	if ( ! single )
+		{
+		rval = x509_result_record(-1, "Could not lookup OCSP response information");
+		goto x509_ocsp_cleanup;
+		}
+
+	if ( OCSP_id_cmp(certid, single->certId) != 0 )
+		return x509_result_record(-1, "OCSP reply is not for host certificate");
+
+	// next - check freshness of proof...
+	if ( ! ASN1_GENERALIZEDTIME_check(single->thisUpdate) || ! ASN1_GENERALIZEDTIME_check(single->nextUpdate) )
+		{
+		rval = x509_result_record(-1, "OCSP reply contains invalid dates");
+		goto x509_ocsp_cleanup;
+		}
+
+	// now - nearly done. Check freshness and status code.
+	// There is a function to check the freshness of the ocsp reply in the ocsp code of OpenSSL. But - it only
+	// supports comparing it against the current time, not against arbitrary times. Hence it is kind of unusable
+	// for us...
+	// Well, we will do it manually.
+
+
+	if ( X509_cmp_time(single->thisUpdate, &vtime) > 0 )
+		rval = x509_result_record(-1, "OCSP reply specifies time in future");
+	else if ( X509_cmp_time(single->nextUpdate, &vtime) < 0 )
+		rval = x509_result_record(-1, "OCSP reply expired");
+	else if ( single->certStatus->type != V_OCSP_CERTSTATUS_GOOD )
+		rval = x509_result_record(-1, OCSP_cert_status_str(single->certStatus->type));
+
+	// if we have no error so far, we are done.
+	if ( !rval )
+		rval = x509_result_record(1, OCSP_cert_status_str(single->certStatus->type));
+
+x509_ocsp_cleanup:
+
+	if ( untrusted_certs )
+		sk_X509_free(untrusted_certs);
+
+	if ( resp )
+		OCSP_RESPONSE_free(resp);
+
+	if ( basic )
+		OCSP_BASICRESP_free(basic);
+
+	if ( csc )
+		{
+		X509_STORE_CTX_cleanup(csc);
+		X509_STORE_CTX_free(csc);
+		}
+
+	if ( certid )
+		OCSP_CERTID_free(certid);
+
+	return rval;
+	%}
+
+## Verifies a certificate.
+##
+## certs: Specifies a certificate chain that is being used to validate
+##        the given certificate against the root store given in *root_certs*.
+##        The host certificate has to be at index 0.
+##
+## root_certs: A list of root certificates to validate the certificate chain.
+##
+## verify_time: Time for the validity check of the certificates.
+##
+## Returns: A record of type X509::Result containing the result code of the
+##          verify operation. In case of success also returns the full
+##          certificate chain.
+##
+## .. bro:see:: x509_certificate x509_extension x509_ext_basic_constraints
+##              x509_ext_subject_alternative_name x509_parse
+##              x509_get_certificate_string x509_ocsp_verify
+function x509_verify%(certs: x509_opaque_vector, root_certs: table_string_of_string, verify_time: time &default=network_time()%): X509::Result
+	%{
+	X509_STORE* ctx = x509_get_root_store(root_certs->AsTableVal());
+	if ( ! ctx )
+		return x509_result_record(-1, "Problem initializing root store");
+
+
+	VectorVal *certs_vec = certs->AsVectorVal();
+	if ( ! certs_vec || certs_vec->Size() < 1 )
+		{
+		reporter->Error("No certificates given in vector");
+		return x509_result_record(-1, "no certificates");
+		}
+
+	// host certificate
+	unsigned int index = 0; // to prevent overloading to 0pointer
+	Val *sv = certs_vec->Lookup(index);
+	if ( !sv )
+		{
+		builtin_error("undefined value in certificate vector");
+		return x509_result_record(-1, "undefined value in certificate vector");
+		}
+	file_analysis::X509Val* cert_handle = (file_analysis::X509Val*) sv;
+
+	X509* cert = cert_handle->GetCertificate();
+	if ( ! cert )
+		{
+		builtin_error(fmt("No certificate in opaque"));
+		return x509_result_record(-1, "No certificate in opaque");
+		}
+
+	STACK_OF(X509)* untrusted_certs = x509_get_untrusted_stack(certs_vec);
+	if ( ! untrusted_certs )
+		return x509_result_record(-1, "Problem initializing list of untrusted certificates");
+
+	X509_STORE_CTX csc;
+	X509_STORE_CTX_init(&csc, ctx, cert, untrusted_certs);
+	X509_STORE_CTX_set_time(&csc, 0, (time_t) verify_time);
+	X509_STORE_CTX_set_flags(&csc, X509_V_FLAG_USE_CHECK_TIME);
+
+	int result = X509_verify_cert(&csc);
+
+	VectorVal* chainVector = 0;
+
+	if ( result == 1 ) // we have a valid chain. try to get it...
+		{
+		STACK_OF(X509)* chain = X509_STORE_CTX_get1_chain(&csc); // get1 = deep copy
+
+		if ( ! chain )
+			{
+			reporter->Error("Encountered valid chain that could not be resolved");
+			sk_X509_pop_free(chain, X509_free);
+			goto x509_verify_chainerror;
+			}
+
+		int num_certs = sk_X509_num(chain);
+		chainVector = new VectorVal(internal_type("x509_opaque_vector")->AsVectorType());
+
+		for ( int i = 0; i < num_certs; i++ )
+			{
+			X509* currcert = sk_X509_value(chain, i);
+
+			if ( currcert )
+				// X509Val takes ownership of currcert.
+				chainVector->Assign(i, new file_analysis::X509Val(currcert));
+			else
+				{
+				reporter->InternalWarning("OpenSSL returned null certificate");
+				sk_X509_pop_free(chain, X509_free);
+				goto x509_verify_chainerror;
+				}
+			}
+
+		sk_X509_free(chain);
+		}
+
+x509_verify_chainerror:
+
+	X509_STORE_CTX_cleanup(&csc);
+
+	sk_X509_free(untrusted_certs);
+
+	RecordVal* rrecord = x509_result_record(csc.error, X509_verify_cert_error_string(csc.error), chainVector);
+
+	return rrecord;
+	%}
--- a/src/file_analysis/analyzer/x509/types.bif
+++ b/src/file_analysis/analyzer/x509/types.bif
@ -0,0 +1,5 @@
+type X509::Certificate: record;
+type X509::Extension: record;
+type X509::BasicConstraints: record;
+type X509::SubjectAlternativeName: record;
+type X509::Result: record;
--- a/src/file_analysis/file_analysis.bif
+++ b/src/file_analysis/file_analysis.bif
@ -15,6 +15,27 @@ function Files::__set_timeout_interval%(file_id: string, t: interval%): bool
 	return new Val(result, TYPE_BOOL);
 	%}

+## :bro:see:`Files::enable_reassembly`.
+function Files::__enable_reassembly%(file_id: string%): bool
+	%{
+	bool result = file_mgr->EnableReassembly(file_id->CheckString());
+	return new Val(result, TYPE_BOOL);
+	%}
+
+## :bro:see:`Files::disable_reassembly`.
+function Files::__disable_reassembly%(file_id: string%): bool
+	%{
+	bool result = file_mgr->DisableReassembly(file_id->CheckString());
+	return new Val(result, TYPE_BOOL);
+	%}
+
+## :bro:see:`Files::set_reassembly_buffer`.
+function Files::__set_reassembly_buffer%(file_id: string, max: count%): bool
+	%{
+	bool result = file_mgr->SetReassemblyBuffer(file_id->CheckString(), max);
+	return new Val(result, TYPE_BOOL);
+	%}
+
 ## :bro:see:`Files::add_analyzer`.
 function Files::__add_analyzer%(file_id: string, tag: Files::Tag, args: any%): bool
 	%{