A set of file analysis extensions.

- Enable manager to associate analyzers with a MIME type. With that,
  one can now say enable all analyzers for, e.g., "image/gif". This is
  exposed to script-land as

    Files::add_analyzers_for_mime_type(f: fa_file, mtype: string)

  For MIME types identified via libmagic, this happens automatically
  (via the file_new() handler in files/main.bro).

- Extend the analyzer API to better match that of protocol analyzers:

    - Adding unique analyzer IDs so that we can refer to instances
      from script-land.

    - Adding subtypes to Components so that a single analyzer
      implementation can support different types of analyzers
      internally.

    - Add an analyzer method SetTag() that allows to set the tag after
      construction.

    - Adding Init() and Done() methods for consistency with what other
      classes offer.

- Add debug logging to the file_analysis stream.

TODO: test cases missing for the new script-land functionality.
This commit is contained in:
Robin Sommer 2013-11-26 11:16:58 -08:00
parent f0fe270029
commit d34f23c8d4
12 changed files with 337 additions and 30 deletions

View file

@ -56,7 +56,7 @@ export {
## local file path which was read, or some other input source. ## local file path which was read, or some other input source.
source: string &log &optional; source: string &log &optional;
## A value to represent the depth of this file in relation ## A value to represent the depth of this file in relation
## to its source. In SMTP, it is the depth of the MIME ## to its source. In SMTP, it is the depth of the MIME
## attachment on the message. In HTTP, it is the depth of the ## attachment on the message. In HTTP, it is the depth of the
## request within the TCP connection. ## request within the TCP connection.
@ -72,7 +72,7 @@ export {
mime_type: string &log &optional; mime_type: string &log &optional;
## A filename for the file if one is available from the source ## A filename for the file if one is available from the source
## for the file. These will frequently come from ## for the file. These will frequently come from
## "Content-Disposition" headers in network protocols. ## "Content-Disposition" headers in network protocols.
filename: string &log &optional; filename: string &log &optional;
@ -148,9 +148,18 @@ export {
## Returns: true if the analyzer will be added, or false if analysis ## Returns: true if the analyzer will be added, or false if analysis
## for the file isn't currently active or the *args* ## for the file isn't currently active or the *args*
## were invalid for the analyzer type. ## were invalid for the analyzer type.
global add_analyzer: function(f: fa_file, global add_analyzer: function(f: fa_file,
tag: Files::Tag, tag: Files::Tag,
args: AnalyzerArgs &default=AnalyzerArgs()): bool; args: AnalyzerArgs &default=AnalyzerArgs()): bool;
## Adds all analyzers associated with a give MIME type to the analysis of
## a file. Note that analyzers added via MIME types cannot take further
## arguments.
##
## f: the file.
##
## mtype: the MIME type; it will be compared case-insensitive.
global add_analyzers_for_mime_type: function(f: fa_file, mtype: string);
## Removes an analyzer from the analysis of a given file. ## Removes an analyzer from the analysis of a given file.
## ##
@ -195,7 +204,7 @@ export {
## A callback to generate a file handle on demand when ## A callback to generate a file handle on demand when
## one is needed by the core. ## one is needed by the core.
get_file_handle: function(c: connection, is_orig: bool): string; get_file_handle: function(c: connection, is_orig: bool): string;
## A callback to "describe" a file. In the case of an HTTP ## A callback to "describe" a file. In the case of an HTTP
## transfer the most obvious description would be the URL. ## transfer the most obvious description would be the URL.
## It's like an extremely compressed version of the normal log. ## It's like an extremely compressed version of the normal log.
@ -206,7 +215,7 @@ export {
## Register callbacks for protocols that work with the Files framework. ## Register callbacks for protocols that work with the Files framework.
## The callbacks must uniquely identify a file and each protocol can ## The callbacks must uniquely identify a file and each protocol can
## only have a single callback registered for it. ## only have a single callback registered for it.
## ##
## tag: Tag for the protocol analyzer having a callback being registered. ## tag: Tag for the protocol analyzer having a callback being registered.
## ##
## reg: A :bro:see:`Files::ProtoRegistration` record. ## reg: A :bro:see:`Files::ProtoRegistration` record.
@ -258,13 +267,13 @@ function set_info(f: fa_file)
f$info$source = f$source; f$info$source = f$source;
f$info$duration = f$last_active - f$info$ts; f$info$duration = f$last_active - f$info$ts;
f$info$seen_bytes = f$seen_bytes; f$info$seen_bytes = f$seen_bytes;
if ( f?$total_bytes ) if ( f?$total_bytes )
f$info$total_bytes = f$total_bytes; f$info$total_bytes = f$total_bytes;
f$info$missing_bytes = f$missing_bytes; f$info$missing_bytes = f$missing_bytes;
f$info$overflow_bytes = f$overflow_bytes; f$info$overflow_bytes = f$overflow_bytes;
if ( f?$is_orig ) if ( f?$is_orig )
f$info$is_orig = f$is_orig; f$info$is_orig = f$is_orig;
if ( f?$mime_type ) if ( f?$mime_type )
f$info$mime_type = f$mime_type; f$info$mime_type = f$mime_type;
} }
@ -288,6 +297,15 @@ function add_analyzer(f: fa_file, tag: Files::Tag, args: AnalyzerArgs): bool
return T; return T;
} }
function add_analyzers_for_mime_type(f: fa_file, mtype: string)
{
local dummy_args: AnalyzerArgs;
local analyzers = __add_analyzers_for_mime_type(f$id, mtype, dummy_args);
for ( tag in analyzers )
add f$info$analyzers[Files::analyzer_name(tag)];
}
function register_analyzer_add_callback(tag: Files::Tag, callback: function(f: fa_file, args: AnalyzerArgs)) function register_analyzer_add_callback(tag: Files::Tag, callback: function(f: fa_file, args: AnalyzerArgs))
{ {
analyzer_add_callbacks[tag] = callback; analyzer_add_callbacks[tag] = callback;
@ -311,6 +329,9 @@ function analyzer_name(tag: Files::Tag): string
event file_new(f: fa_file) &priority=10 event file_new(f: fa_file) &priority=10
{ {
set_info(f); set_info(f);
if ( f?$mime_type )
add_analyzers_for_mime_type(f, f$mime_type);
} }
event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=10 event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=10

View file

@ -60,6 +60,13 @@ type addr_vec: vector of addr;
## directly and then remove this alias. ## directly and then remove this alias.
type table_string_of_string: table[string] of string; type table_string_of_string: table[string] of string;
## A set of file analyzer tags.
##
## .. todo:: We need this type definition only for declaring builtin functions
## via ``bifcl``. We should extend ``bifcl`` to understand composite types
## directly and then remove this alias.
type files_tag_set: set[Files::Tag];
## A connection's transport-layer protocol. Note that Bro uses the term ## A connection's transport-layer protocol. Note that Bro uses the term
## "connection" broadly, using flow semantics for ICMP and UDP. ## "connection" broadly, using flow semantics for ICMP and UDP.
type transport_proto: enum { type transport_proto: enum {

View file

@ -3,9 +3,17 @@
#include "Analyzer.h" #include "Analyzer.h"
#include "Manager.h" #include "Manager.h"
file_analysis::ID file_analysis::Analyzer::id_counter = 0;
file_analysis::Analyzer::~Analyzer() file_analysis::Analyzer::~Analyzer()
{ {
DBG_LOG(DBG_FILE_ANALYSIS, "Destroy file analyzer %s", DBG_LOG(DBG_FILE_ANALYSIS, "Destroy file analyzer %s",
file_mgr->GetComponentName(tag)); file_mgr->GetComponentName(tag));
Unref(args); Unref(args);
} }
void file_analysis::Analyzer::SetAnalyzerTag(const file_analysis::Tag& arg_tag)
{
assert(! tag || tag == arg_tag);
tag = arg_tag;
}

View file

@ -13,6 +13,8 @@ namespace file_analysis {
class File; class File;
typedef uint32 ID;
/** /**
* Base class for analyzers that can be attached to file_analysis::File objects. * Base class for analyzers that can be attached to file_analysis::File objects.
*/ */
@ -25,6 +27,18 @@ public:
*/ */
virtual ~Analyzer(); virtual ~Analyzer();
/**
* Initializes the analyzer before input processing starts.
*/
virtual void Init()
{ };
/**
* Finishes the analyzer's operation after all input has been parsed.
*/
virtual void Done()
{ };
/** /**
* Subclasses may override this metod to receive file data non-sequentially. * Subclasses may override this metod to receive file data non-sequentially.
* @param data points to start of a chunk of file data. * @param data points to start of a chunk of file data.
@ -72,6 +86,13 @@ public:
*/ */
file_analysis::Tag Tag() const { return tag; } file_analysis::Tag Tag() const { return tag; }
/**
* Returns the analyzer instance's internal ID. These IDs are unique
* across all analyzers instantiated and can thus be used to
* indentify a specific instance.
*/
ID GetID() const { return id; }
/** /**
* @return the AnalyzerArgs associated with the analyzer. * @return the AnalyzerArgs associated with the analyzer.
*/ */
@ -82,10 +103,19 @@ public:
*/ */
File* GetFile() const { return file; } File* GetFile() const { return file; }
/**
* Sets the tag associated with the analyzer's type. Note that this
* can be called only right after construction, if the constructor
* did not receive a name or tag. The method cannot be used to change
* an existing tag.
*/
void SetAnalyzerTag(const file_analysis::Tag& tag);
protected: protected:
/** /**
* Constructor. Only derived classes are meant to be instantiated. * Constructor. Only derived classes are meant to be instantiated.
* @param arg_tag the tag definining the analyzer's type.
* @param arg_args an \c AnalyzerArgs (script-layer type) value specifiying * @param arg_args an \c AnalyzerArgs (script-layer type) value specifiying
* tunable options, if any, related to a particular analyzer type. * tunable options, if any, related to a particular analyzer type.
* @param arg_file the file to which the the analyzer is being attached. * @param arg_file the file to which the the analyzer is being attached.
@ -94,13 +124,35 @@ protected:
: tag(arg_tag), : tag(arg_tag),
args(arg_args->Ref()->AsRecordVal()), args(arg_args->Ref()->AsRecordVal()),
file(arg_file) file(arg_file)
{} {
id = ++id_counter;
}
/**
* Constructor. Only derived classes are meant to be instantiated.
* As this version of the constructor does not receive a name or tag,
* SetAnalyzerTag() must be called before the instance can be used.
*
* @param arg_args an \c AnalyzerArgs (script-layer type) value specifiying
* tunable options, if any, related to a particular analyzer type.
* @param arg_file the file to which the the analyzer is being attached.
*/
Analyzer(RecordVal* arg_args, File* arg_file)
: tag(),
args(arg_args->Ref()->AsRecordVal()),
file(arg_file)
{
id = ++id_counter;
}
private: private:
ID id; /**< Unique instance ID. */
file_analysis::Tag tag; /**< The particular type of the analyzer instance. */ file_analysis::Tag tag; /**< The particular type of the analyzer instance. */
RecordVal* args; /**< \c AnalyzerArgs val gives tunable analyzer params. */ RecordVal* args; /**< \c AnalyzerArgs val gives tunable analyzer params. */
File* file; /**< The file to which the analyzer is attached. */ File* file; /**< The file to which the analyzer is attached. */
static ID id_counter;
}; };
} // namespace file_analysis } // namespace file_analysis

View file

@ -9,7 +9,10 @@ using namespace file_analysis;
static void analyzer_del_func(void* v) static void analyzer_del_func(void* v)
{ {
delete (file_analysis::Analyzer*) v; file_analysis::Analyzer* a = (file_analysis::Analyzer*)v;
a->Done();
delete a;
} }
AnalyzerSet::AnalyzerSet(File* arg_file) : file(arg_file) AnalyzerSet::AnalyzerSet(File* arg_file) : file(arg_file)
@ -98,6 +101,7 @@ bool AnalyzerSet::AddMod::Perform(AnalyzerSet* set)
} }
set->Insert(a, key); set->Insert(a, key);
return true; return true;
} }
@ -124,7 +128,9 @@ bool AnalyzerSet::Remove(file_analysis::Tag tag, HashKey* key)
file_mgr->GetComponentName(tag), file_mgr->GetComponentName(tag),
file->GetID().c_str()); file->GetID().c_str());
a->Done();
delete a; delete a;
return true; return true;
} }
@ -176,6 +182,8 @@ void AnalyzerSet::Insert(file_analysis::Analyzer* a, HashKey* key)
file_mgr->GetComponentName(a->Tag()), file->GetID().c_str()); file_mgr->GetComponentName(a->Tag()), file->GetID().c_str());
analyzer_map.Insert(key, a); analyzer_map.Insert(key, a);
delete key; delete key;
a->Init();
} }
void AnalyzerSet::DrainModifications() void AnalyzerSet::DrainModifications()

View file

@ -8,13 +8,15 @@
using namespace file_analysis; using namespace file_analysis;
Component::Component(const char* arg_name, factory_callback arg_factory) Component::Component(const char* arg_name, factory_callback arg_factory, Tag::subtype_t subtype)
: plugin::Component(plugin::component::FILE_ANALYZER), : plugin::Component(plugin::component::FILE_ANALYZER),
plugin::TaggedComponent<file_analysis::Tag>() plugin::TaggedComponent<file_analysis::Tag>(subtype)
{ {
name = copy_string(arg_name); name = copy_string(arg_name);
canon_name = canonify_name(arg_name); canon_name = canonify_name(arg_name);
factory = arg_factory; factory = arg_factory;
file_mgr->RegisterComponent(this, "ANALYZER_");
} }
Component::Component(const Component& other) Component::Component(const Component& other)
@ -24,6 +26,8 @@ Component::Component(const Component& other)
name = copy_string(other.name); name = copy_string(other.name);
canon_name = copy_string(other.canon_name); canon_name = copy_string(other.canon_name);
factory = other.factory; factory = other.factory;
// TODO: Do we need the RegisterComponent() call here?
} }
Component::~Component() Component::~Component()

View file

@ -40,8 +40,14 @@ public:
* from file_analysis::Analyzer. This is typically a static \c * from file_analysis::Analyzer. This is typically a static \c
* Instatiate() method inside the class that just allocates and * Instatiate() method inside the class that just allocates and
* returns a new instance. * returns a new instance.
*
* @param subtype A subtype associated with this component that
* further distinguishes it. The subtype will be integrated into the
* analyzer::Tag that the manager associates with this analyzer, and
* analyzer instances can accordingly access it via analyzer::Tag().
* If not used, leave at zero.
*/ */
Component(const char* name, factory_callback factory); Component(const char* name, factory_callback factory, Tag::subtype_t subtype = 0);
/** /**
* Copy constructor. * Copy constructor.

View file

@ -1,6 +1,7 @@
// See the file "COPYING" in the main distribution directory for copyright. // See the file "COPYING" in the main distribution directory for copyright.
#include <string> #include <string>
#include <algorithm>
#include "File.h" #include "File.h"
#include "FileTimer.h" #include "FileTimer.h"
@ -82,7 +83,7 @@ File::File(const string& file_id, Connection* conn, analyzer::Tag tag,
{ {
StaticInit(); StaticInit();
DBG_LOG(DBG_FILE_ANALYSIS, "Creating new File object %s", file_id.c_str()); DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Creating new File object", file_id.c_str());
val = new RecordVal(fa_file_type); val = new RecordVal(fa_file_type);
val->Assign(id_idx, new StringVal(file_id.c_str())); val->Assign(id_idx, new StringVal(file_id.c_str()));
@ -100,7 +101,7 @@ File::File(const string& file_id, Connection* conn, analyzer::Tag tag,
File::~File() File::~File()
{ {
DBG_LOG(DBG_FILE_ANALYSIS, "Destroying File object %s", id.c_str()); DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Destroying File object", id.c_str());
Unref(val); Unref(val);
// Queue may not be empty in the case where only content gaps were seen. // Queue may not be empty in the case where only content gaps were seen.
@ -229,6 +230,7 @@ void File::IncrementByteCount(uint64 size, int field_idx)
void File::SetTotalBytes(uint64 size) void File::SetTotalBytes(uint64 size)
{ {
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Total bytes %" PRIu64, id.c_str(), size);
val->Assign(total_bytes_idx, new Val(size, TYPE_COUNT)); val->Assign(total_bytes_idx, new Val(size, TYPE_COUNT));
} }
@ -251,11 +253,17 @@ void File::ScheduleInactivityTimer() const
bool File::AddAnalyzer(file_analysis::Tag tag, RecordVal* args) bool File::AddAnalyzer(file_analysis::Tag tag, RecordVal* args)
{ {
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Queuing addition of %s analyzer",
id.c_str(), file_mgr->GetComponentName(tag));
return done ? false : analyzers.QueueAdd(tag, args); return done ? false : analyzers.QueueAdd(tag, args);
} }
bool File::RemoveAnalyzer(file_analysis::Tag tag, RecordVal* args) bool File::RemoveAnalyzer(file_analysis::Tag tag, RecordVal* args)
{ {
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Queuing remove of %s analyzer",
id.c_str(), file_mgr->GetComponentName(tag));
return done ? false : analyzers.QueueRemove(tag, args); return done ? false : analyzers.QueueRemove(tag, args);
} }
@ -284,16 +292,18 @@ bool File::DetectMIME(const u_char* data, uint64 len)
if ( mime ) if ( mime )
{ {
// strip off charset
const char* mime_end = strchr(mime, ';'); const char* mime_end = strchr(mime, ';');
if ( mime_end ) StringVal* mime_val = mime_end ?
// strip off charset new StringVal(mime_end - mime, mime) :
val->Assign(mime_type_idx, new StringVal(mime_end - mime, mime)); new StringVal(mime);
else
val->Assign(mime_type_idx, new StringVal(mime)); val->Assign(mime_type_idx, mime_val);
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Detected MIME type %s", id.c_str(), mime_val->CheckString());
} }
return mime; return true;
} }
void File::ReplayBOF() void File::ReplayBOF()
@ -314,7 +324,6 @@ void File::ReplayBOF()
val->Assign(bof_buffer_idx, new StringVal(bs)); val->Assign(bof_buffer_idx, new StringVal(bs));
DetectMIME(bs->Bytes(), bs->Len()); DetectMIME(bs->Bytes(), bs->Len());
FileEvent(file_new); FileEvent(file_new);
for ( size_t i = 0; i < bof_buffer.chunks.size(); ++i ) for ( size_t i = 0; i < bof_buffer.chunks.size(); ++i )
@ -333,6 +342,11 @@ void File::DataIn(const u_char* data, uint64 len, uint64 offset)
first_chunk = false; first_chunk = false;
} }
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] %" PRIu64 " bytes in at offset" PRIu64 "; %s [%s]",
id.c_str(), len, offset,
IsComplete() ? "complete" : "incomplete",
fmt_bytes((const char*) data, min((uint64)40, len)), len > 40 ? "..." : "");
file_analysis::Analyzer* a = 0; file_analysis::Analyzer* a = 0;
IterCookie* c = analyzers.InitForIteration(); IterCookie* c = analyzers.InitForIteration();
@ -367,6 +381,11 @@ void File::DataIn(const u_char* data, uint64 len)
missed_bof = false; missed_bof = false;
} }
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] %" PRIu64 " bytes in; %s [%s]",
id.c_str(), len,
IsComplete() ? "complete" : "incomplete",
fmt_bytes((const char*) data, min((uint64)40, len)), len > 40 ? "..." : "");
file_analysis::Analyzer* a = 0; file_analysis::Analyzer* a = 0;
IterCookie* c = analyzers.InitForIteration(); IterCookie* c = analyzers.InitForIteration();
@ -391,6 +410,8 @@ void File::DataIn(const u_char* data, uint64 len)
void File::EndOfFile() void File::EndOfFile()
{ {
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] End of file", id.c_str());
if ( done ) if ( done )
return; return;
@ -417,6 +438,9 @@ void File::EndOfFile()
void File::Gap(uint64 offset, uint64 len) void File::Gap(uint64 offset, uint64 len)
{ {
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Gap of size %" PRIu64 " at offset %" PRIu64,
id.c_str(), offset, len);
analyzers.DrainModifications(); analyzers.DrainModifications();
// If we were buffering the beginning of the file, a gap means we've got // If we were buffering the beginning of the file, a gap means we've got

View file

@ -229,7 +229,7 @@ protected:
* field in #val. * field in #val.
* @param data pointer to a chunk of file data. * @param data pointer to a chunk of file data.
* @param len number of bytes in the data chunk. * @param len number of bytes in the data chunk.
* @return whether mime type was available. * @return true if mime type available.
*/ */
bool DetectMIME(const u_char* data, uint64 len); bool DetectMIME(const u_char* data, uint64 len);

View file

@ -12,10 +12,12 @@
#include "UID.h" #include "UID.h"
#include "plugin/Manager.h" #include "plugin/Manager.h"
#include "analyzer/Manager.h"
using namespace file_analysis; using namespace file_analysis;
TableVal* Manager::disabled = 0; TableVal* Manager::disabled = 0;
TableType* Manager::tag_set_type = 0;
string Manager::salt; string Manager::salt;
Manager::Manager() Manager::Manager()
@ -27,15 +29,13 @@ Manager::Manager()
Manager::~Manager() Manager::~Manager()
{ {
Terminate(); Terminate();
for ( MIMEMap::iterator i = mime_types.begin(); i != mime_types.end(); i++ )
delete i->second;
} }
void Manager::InitPreScript() void Manager::InitPreScript()
{ {
std::list<Component*> analyzers = plugin_mgr->Components<Component>();
for ( std::list<Component*>::const_iterator i = analyzers.begin();
i != analyzers.end(); ++i )
RegisterComponent(*i, "ANALYZER_");
} }
void Manager::InitPostScript() void Manager::InitPostScript()
@ -72,6 +72,7 @@ void Manager::SetHandle(const string& handle)
if ( handle.empty() ) if ( handle.empty() )
return; return;
DBG_LOG(DBG_FILE_ANALYSIS, "Set current handle to %s", handle.c_str());
current_file_id = HashHandle(handle); current_file_id = HashHandle(handle);
} }
@ -205,6 +206,28 @@ bool Manager::AddAnalyzer(const string& file_id, file_analysis::Tag tag,
return file->AddAnalyzer(tag, args); return file->AddAnalyzer(tag, args);
} }
TableVal* Manager::AddAnalyzersForMIMEType(const string& file_id, const string& mtype,
RecordVal* args)
{
if ( ! tag_set_type )
tag_set_type = internal_type("files_tag_set")->AsTableType();
TableVal* sval = new TableVal(tag_set_type);
TagSet* l = LookupMIMEType(mtype, false);
if ( ! l )
return sval;
for ( TagSet::const_iterator i = l->begin(); i != l->end(); i++ )
{
file_analysis::Tag tag = *i;
if ( AddAnalyzer(file_id, tag, args) )
sval->Assign(tag.AsEnumVal(), 0);
}
return sval;
}
bool Manager::RemoveAnalyzer(const string& file_id, file_analysis::Tag tag, bool Manager::RemoveAnalyzer(const string& file_id, file_analysis::Tag tag,
RecordVal* args) const RecordVal* args) const
{ {
@ -327,6 +350,9 @@ void Manager::GetFileHandle(analyzer::Tag tag, Connection* c, bool is_orig)
if ( ! get_file_handle ) if ( ! get_file_handle )
return; return;
DBG_LOG(DBG_FILE_ANALYSIS, "Raise get_file_handle() for protocol analyzer %s",
analyzer_mgr->GetComponentName(tag));
EnumVal* tagval = tag.AsEnumVal(); EnumVal* tagval = tag.AsEnumVal();
Ref(tagval); Ref(tagval);
@ -376,5 +402,72 @@ Analyzer* Manager::InstantiateAnalyzer(Tag tag, RecordVal* args, File* f) const
return 0; return 0;
} }
return c->Factory()(args, f); DBG_LOG(DBG_FILE_ANALYSIS, "Instantiate analyzer %s for file %s",
GetComponentName(tag), f->id.c_str());
Analyzer* a = c->Factory()(args, f);
if ( ! a )
reporter->InternalError("file analyzer instantiation failed");
a->SetAnalyzerTag(tag);
return a;
}
Manager::TagSet* Manager::LookupMIMEType(const string& mtype, bool add_if_not_found)
{
MIMEMap::const_iterator i = mime_types.find(to_upper(mtype));
if ( i != mime_types.end() )
return i->second;
if ( ! add_if_not_found )
return 0;
TagSet* l = new TagSet;
mime_types.insert(std::make_pair(to_upper(mtype), l));
return l;
}
bool Manager::RegisterAnalyzerForMIMEType(EnumVal* tag, StringVal* mtype)
{
Component* p = Lookup(tag);
if ( ! p )
return false;
return RegisterAnalyzerForMIMEType(p->Tag(), mtype->CheckString());
}
bool Manager::RegisterAnalyzerForMIMEType(Tag tag, const string& mtype)
{
TagSet* l = LookupMIMEType(mtype, true);
DBG_LOG(DBG_FILE_ANALYSIS, "Register analyzer %s for MIME type %s",
GetComponentName(tag), mtype.c_str());
l->insert(tag);
return true;
}
bool Manager::UnregisterAnalyzerForMIMEType(EnumVal* tag, StringVal* mtype)
{
Component* p = Lookup(tag);
if ( ! p )
return false;
return UnregisterAnalyzerForMIMEType(p->Tag(), mtype->CheckString());
}
bool Manager::UnregisterAnalyzerForMIMEType(Tag tag, const string& mtype)
{
TagSet* l = LookupMIMEType(mtype, true);
DBG_LOG(DBG_FILE_ANALYSIS, "Unregister analyzer %s for MIME type %s",
GetComponentName(tag), mtype.c_str());
l->erase(tag);
return true;
} }

View file

@ -198,6 +198,18 @@ public:
bool AddAnalyzer(const string& file_id, file_analysis::Tag tag, bool AddAnalyzer(const string& file_id, file_analysis::Tag tag,
RecordVal* args) const; RecordVal* args) const;
/**
* Queue attachment of an all analyzers associated with a given MIME
* type to the file identifier.
*
* @param file_id the file identifier/hash.
* @param mtype the MIME type; comparisions will be performanced case-insensitive.
* @param args a \c AnalyzerArgs value which describes a file analyzer.
* @return A ref'ed \c set[Tag] with all added analyzers.
*/
TableVal* AddAnalyzersForMIMEType(const string& file_id, const string& mtype,
RecordVal* args);
/** /**
* Queue removal of an analyzer for a given file identifier. * Queue removal of an analyzer for a given file identifier.
* @param file_id the file identifier/hash. * @param file_id the file identifier/hash.
@ -224,6 +236,62 @@ public:
*/ */
Analyzer* InstantiateAnalyzer(Tag tag, RecordVal* args, File* f) const; Analyzer* InstantiateAnalyzer(Tag tag, RecordVal* args, File* f) const;
/**
* Registers a MIME type for an analyzer. Once registered, files of
* that MIME type will automatically get a corresponding analyzer
* assigned.
*
* @param tag The analyzer's tag as an enum of script type \c
* Files::Tag.
*
* @param mtype The MIME type. It will be matched case-insenistive.
*
* @return True if successful.
*/
bool RegisterAnalyzerForMIMEType(EnumVal* tag, StringVal* mtype);
/**
* Registers a MIME type for an analyzer. Once registered, files of
* that MIME type will automatically get a corresponding analyzer
* assigned.
*
* @param tag The analyzer's tag as an enum of script type \c
* Files::Tag.
*
* @param mtype The MIME type. It will be matched case-insenistive.
*
* @return True if successful.
*/
bool RegisterAnalyzerForMIMEType(Tag tag, const string& mtype);
/**
* Unregisters a MIME type for an analyzer.
*
* @param tag The analyzer's tag as an enum of script type \c
* Files::Tag.
*
* @param mtype The MIME type. It will be matched case-insenistive.
*
* @return True if successful (incl. when the type wasn't actually
* registered for the analyzer).
*
*/
bool UnregisterAnalyzerForMIMEType(EnumVal* tag, StringVal* mtype);
/**
* Unregisters a MIME type for an analyzer.
*
* @param tag The analyzer's tag as an enum of script type \c
* Files::Tag.
*
* @param mtype The MIME type. It will be matched case-insenistive.
*
* @return True if successful (incl. when the type wasn't actually
* registered for the analyzer).
*
*/
bool UnregisterAnalyzerForMIMEType(Tag tag, const string& mtype);
protected: protected:
friend class FileTimer; friend class FileTimer;
@ -297,12 +365,18 @@ protected:
static bool IsDisabled(analyzer::Tag tag); static bool IsDisabled(analyzer::Tag tag);
private: private:
typedef set<Tag> TagSet;
typedef map<string, TagSet*> MIMEMap;
TagSet* LookupMIMEType(const string& mtype, bool add_if_not_found);
IDMap id_map; /**< Map file ID to file_analysis::File records. */ IDMap id_map; /**< Map file ID to file_analysis::File records. */
IDSet ignored; /**< Ignored files. Will be finally removed on EOF. */ IDSet ignored; /**< Ignored files. Will be finally removed on EOF. */
string current_file_id; /**< Hash of what get_file_handle event sets. */ string current_file_id; /**< Hash of what get_file_handle event sets. */
MIMEMap mime_types;/**< Mapping of MIME types to analyzers. */
static TableVal* disabled; /**< Table of disabled analyzers. */ static TableVal* disabled; /**< Table of disabled analyzers. */
static TableType* tag_set_type; /**< Type for set[tag]. */
static string salt; /**< A salt added to file handles before hashing. */ static string salt; /**< A salt added to file handles before hashing. */
}; };

View file

@ -26,6 +26,16 @@ function Files::__add_analyzer%(file_id: string, tag: Files::Tag, args: any%): b
return new Val(result, TYPE_BOOL); return new Val(result, TYPE_BOOL);
%} %}
## :bro:see:`Files::add_analyzers_for_mime_type`.
function Files::__add_analyzers_for_mime_type%(file_id: string, mtype: string, args: any%): files_tag_set
%{
using BifType::Record::Files::AnalyzerArgs;
RecordVal* rv = args->AsRecordVal()->CoerceTo(AnalyzerArgs);
Val* analyzers = file_mgr->AddAnalyzersForMIMEType(file_id->CheckString(), mtype->CheckString(), rv);
Unref(rv);
return analyzers;
%}
## :bro:see:`Files::remove_analyzer`. ## :bro:see:`Files::remove_analyzer`.
function Files::__remove_analyzer%(file_id: string, tag: Files::Tag, args: any%): bool function Files::__remove_analyzer%(file_id: string, tag: Files::Tag, args: any%): bool
%{ %{