Refactor common MIME magic matching code.

Put some methods in file_analysis::Manager that can perform the
matching process and return MIME type results.  Also helps to
centralize the management/re-use of a signature matcher object.
This commit is contained in:
Jon Siwek 2014-03-05 10:49:57 -06:00
parent 9ac8110416
commit 0865b152bb
5 changed files with 71 additions and 17 deletions

View file

@ -852,15 +852,12 @@ function identify_data%(data: string, return_mime: bool &default=T%): string
if ( ! return_mime ) if ( ! return_mime )
reporter->Warning("identify_data() builtin-function only returns MIME types, but verbose file info requested"); reporter->Warning("identify_data() builtin-function only returns MIME types, but verbose file info requested");
static RuleFileMagicState* fms = rule_matcher->InitFileMagic(); string strongest_match = file_mgr->DetectMIME(data->Bytes(), data->Len());
rule_matcher->ClearFileMagicState(fms);
RuleMatcher::MIME_Matches matches;
rule_matcher->Match(fms, data->Bytes(), data->Len(), &matches);
if ( matches.empty() ) if ( strongest_match.empty() )
return new StringVal("<unknown>"); return new StringVal("<unknown>");
return new StringVal(*(matches.begin()->second.begin())); return new StringVal(strongest_match);
%} %}
## Determines the MIME type of a piece of data using Bro's file magic ## Determines the MIME type of a piece of data using Bro's file magic
@ -873,10 +870,8 @@ function identify_data%(data: string, return_mime: bool &default=T%): string
## .. bro:see:: identify_data ## .. bro:see:: identify_data
function file_magic%(data: string%): mime_matches function file_magic%(data: string%): mime_matches
%{ %{
static RuleFileMagicState* fms = rule_matcher->InitFileMagic();
rule_matcher->ClearFileMagicState(fms);
RuleMatcher::MIME_Matches matches; RuleMatcher::MIME_Matches matches;
rule_matcher->Match(fms, data->Bytes(), data->Len(), &matches); file_mgr->DetectMIME(data->Bytes(), data->Len(), &matches);
VectorVal* rval = new VectorVal(mime_matches); VectorVal* rval = new VectorVal(mime_matches);
for ( RuleMatcher::MIME_Matches::const_iterator it = matches.begin(); for ( RuleMatcher::MIME_Matches::const_iterator it = matches.begin();

View file

@ -280,16 +280,12 @@ bool File::BufferBOF(const u_char* data, uint64 len)
bool File::DetectMIME(const u_char* data, uint64 len) bool File::DetectMIME(const u_char* data, uint64 len)
{ {
static RuleFileMagicState* fms = rule_matcher->InitFileMagic(); string strongest_match = file_mgr->DetectMIME(data, len);
rule_matcher->ClearFileMagicState(fms);
RuleMatcher::MIME_Matches matches;
rule_matcher->Match(fms, data, len, &matches);
if ( matches.empty() ) if ( strongest_match.empty() )
return false; return false;
val->Assign(mime_type_idx, new StringVal(*matches.begin()->second.begin())); val->Assign(mime_type_idx, new StringVal(strongest_match));
return true; return true;
} }

View file

@ -20,13 +20,15 @@ string Manager::salt;
Manager::Manager() Manager::Manager()
: plugin::ComponentManager<file_analysis::Tag, : plugin::ComponentManager<file_analysis::Tag,
file_analysis::Component>("Files") file_analysis::Component>("Files"),
id_map(), ignored(), current_file_id(), magic_state()
{ {
} }
Manager::~Manager() Manager::~Manager()
{ {
Terminate(); Terminate();
delete magic_state;
} }
void Manager::InitPreScript() void Manager::InitPreScript()
@ -42,6 +44,12 @@ void Manager::InitPostScript()
{ {
} }
void Manager::InitMagic()
{
delete magic_state;
magic_state = rule_matcher->InitFileMagic();
}
void Manager::Terminate() void Manager::Terminate()
{ {
vector<string> keys; vector<string> keys;
@ -395,3 +403,25 @@ Analyzer* Manager::InstantiateAnalyzer(Tag tag, RecordVal* args, File* f) const
return c->Factory()(args, f); return c->Factory()(args, f);
} }
RuleMatcher::MIME_Matches* Manager::DetectMIME(const u_char* data, uint64 len,
RuleMatcher::MIME_Matches* rval) const
{
if ( ! magic_state )
reporter->InternalError("file magic signature state not initialized");
rval = rule_matcher->Match(magic_state, data, len, rval);
rule_matcher->ClearFileMagicState(magic_state);
return rval;
}
string Manager::DetectMIME(const u_char* data, uint64 len) const
{
RuleMatcher::MIME_Matches matches;
DetectMIME(data, len, &matches);
if ( matches.empty() )
return "";
return *(matches.begin()->second.begin());
}

View file

@ -14,6 +14,7 @@
#include "Analyzer.h" #include "Analyzer.h"
#include "Timer.h" #include "Timer.h"
#include "EventHandler.h" #include "EventHandler.h"
#include "RuleMatcher.h"
#include "File.h" #include "File.h"
#include "FileTimer.h" #include "FileTimer.h"
@ -54,6 +55,12 @@ public:
*/ */
void InitPostScript(); void InitPostScript();
/**
* Initializes the state required to match against file magic signatures
* for MIME type identification.
*/
void InitMagic();
/** /**
* Times out any active file analysis to prepare for shutdown. * Times out any active file analysis to prepare for shutdown.
*/ */
@ -255,6 +262,29 @@ public:
*/ */
Analyzer* InstantiateAnalyzer(Tag tag, RecordVal* args, File* f) const; Analyzer* InstantiateAnalyzer(Tag tag, RecordVal* args, File* f) const;
/**
* Returns a set of all matching MIME magic signatures for a given
* chunk of data.
* @param data A chunk of bytes to match magic MIME signatures against.
* @param len The number of bytes in \a data.
* @param rval An optional pre-existing structure in which to insert
* new matches. If it's a null pointer, an object is
* allocated and returned from the method.
* @return Set of all matching file magic signatures, which may be
* an object allocated by the method if \a rval is a null pointer.
*/
RuleMatcher::MIME_Matches* DetectMIME(const u_char* data, uint64 len,
RuleMatcher::MIME_Matches* rval) const;
/**
* Returns the strongest MIME magic signature match for a given data chunk.
* @param data A chunk of bytes to match magic MIME signatures against.
* @param len The number of bytes in \a data.
* @returns The MIME type string of the strongest file magic signature
* match, or an empty string if nothing matched.
*/
std::string DetectMIME(const u_char* data, uint64 len) const;
protected: protected:
friend class FileTimer; friend class FileTimer;
@ -334,6 +364,7 @@ private:
IDMap id_map; /**< Map file ID to file_analysis::File records. */ IDMap id_map; /**< Map file ID to file_analysis::File records. */
IDSet ignored; /**< Ignored files. Will be finally removed on EOF. */ IDSet ignored; /**< Ignored files. Will be finally removed on EOF. */
string current_file_id; /**< Hash of what get_file_handle event sets. */ string current_file_id; /**< Hash of what get_file_handle event sets. */
RuleFileMagicState* magic_state; /** File magic signature match state. */
static TableVal* disabled; /**< Table of disabled analyzers. */ static TableVal* disabled; /**< Table of disabled analyzers. */
static string salt; /**< A salt added to file handles before hashing. */ static string salt; /**< A salt added to file handles before hashing. */

View file

@ -933,6 +933,8 @@ int main(int argc, char** argv)
if ( rule_debug ) if ( rule_debug )
rule_matcher->PrintDebug(); rule_matcher->PrintDebug();
file_mgr->InitMagic();
} }
delete [] script_rule_files; delete [] script_rule_files;