diff --git a/src/bro.bif b/src/bro.bif index aea6377d91..e0bab31542 100644 --- a/src/bro.bif +++ b/src/bro.bif @@ -852,15 +852,12 @@ function identify_data%(data: string, return_mime: bool &default=T%): string if ( ! return_mime ) reporter->Warning("identify_data() builtin-function only returns MIME types, but verbose file info requested"); - static RuleFileMagicState* fms = rule_matcher->InitFileMagic(); - rule_matcher->ClearFileMagicState(fms); - RuleMatcher::MIME_Matches matches; - rule_matcher->Match(fms, data->Bytes(), data->Len(), &matches); + string strongest_match = file_mgr->DetectMIME(data->Bytes(), data->Len()); - if ( matches.empty() ) + if ( strongest_match.empty() ) return new StringVal(""); - return new StringVal(*(matches.begin()->second.begin())); + return new StringVal(strongest_match); %} ## Determines the MIME type of a piece of data using Bro's file magic @@ -873,10 +870,8 @@ function identify_data%(data: string, return_mime: bool &default=T%): string ## .. bro:see:: identify_data function file_magic%(data: string%): mime_matches %{ - static RuleFileMagicState* fms = rule_matcher->InitFileMagic(); - rule_matcher->ClearFileMagicState(fms); RuleMatcher::MIME_Matches matches; - rule_matcher->Match(fms, data->Bytes(), data->Len(), &matches); + file_mgr->DetectMIME(data->Bytes(), data->Len(), &matches); VectorVal* rval = new VectorVal(mime_matches); for ( RuleMatcher::MIME_Matches::const_iterator it = matches.begin(); diff --git a/src/file_analysis/File.cc b/src/file_analysis/File.cc index d7cb6e09c7..ad6e46bf79 100644 --- a/src/file_analysis/File.cc +++ b/src/file_analysis/File.cc @@ -280,16 +280,12 @@ bool File::BufferBOF(const u_char* data, uint64 len) bool File::DetectMIME(const u_char* data, uint64 len) { - static RuleFileMagicState* fms = rule_matcher->InitFileMagic(); - rule_matcher->ClearFileMagicState(fms); - RuleMatcher::MIME_Matches matches; - rule_matcher->Match(fms, data, len, &matches); + string strongest_match = file_mgr->DetectMIME(data, len); - if ( matches.empty() ) + if ( strongest_match.empty() ) return false; - val->Assign(mime_type_idx, new StringVal(*matches.begin()->second.begin())); - + val->Assign(mime_type_idx, new StringVal(strongest_match)); return true; } diff --git a/src/file_analysis/Manager.cc b/src/file_analysis/Manager.cc index a6878e7c5d..a674bd6665 100644 --- a/src/file_analysis/Manager.cc +++ b/src/file_analysis/Manager.cc @@ -20,13 +20,15 @@ string Manager::salt; Manager::Manager() : plugin::ComponentManager("Files") + file_analysis::Component>("Files"), + id_map(), ignored(), current_file_id(), magic_state() { } Manager::~Manager() { Terminate(); + delete magic_state; } void Manager::InitPreScript() @@ -42,6 +44,12 @@ void Manager::InitPostScript() { } +void Manager::InitMagic() + { + delete magic_state; + magic_state = rule_matcher->InitFileMagic(); + } + void Manager::Terminate() { vector keys; @@ -395,3 +403,25 @@ Analyzer* Manager::InstantiateAnalyzer(Tag tag, RecordVal* args, File* f) const return c->Factory()(args, f); } + +RuleMatcher::MIME_Matches* Manager::DetectMIME(const u_char* data, uint64 len, + RuleMatcher::MIME_Matches* rval) const + { + if ( ! magic_state ) + reporter->InternalError("file magic signature state not initialized"); + + rval = rule_matcher->Match(magic_state, data, len, rval); + rule_matcher->ClearFileMagicState(magic_state); + return rval; + } + +string Manager::DetectMIME(const u_char* data, uint64 len) const + { + RuleMatcher::MIME_Matches matches; + DetectMIME(data, len, &matches); + + if ( matches.empty() ) + return ""; + + return *(matches.begin()->second.begin()); + } diff --git a/src/file_analysis/Manager.h b/src/file_analysis/Manager.h index 649f82c164..7c46a0eeb7 100644 --- a/src/file_analysis/Manager.h +++ b/src/file_analysis/Manager.h @@ -14,6 +14,7 @@ #include "Analyzer.h" #include "Timer.h" #include "EventHandler.h" +#include "RuleMatcher.h" #include "File.h" #include "FileTimer.h" @@ -54,6 +55,12 @@ public: */ void InitPostScript(); + /** + * Initializes the state required to match against file magic signatures + * for MIME type identification. + */ + void InitMagic(); + /** * Times out any active file analysis to prepare for shutdown. */ @@ -255,6 +262,29 @@ public: */ Analyzer* InstantiateAnalyzer(Tag tag, RecordVal* args, File* f) const; + /** + * Returns a set of all matching MIME magic signatures for a given + * chunk of data. + * @param data A chunk of bytes to match magic MIME signatures against. + * @param len The number of bytes in \a data. + * @param rval An optional pre-existing structure in which to insert + * new matches. If it's a null pointer, an object is + * allocated and returned from the method. + * @return Set of all matching file magic signatures, which may be + * an object allocated by the method if \a rval is a null pointer. + */ + RuleMatcher::MIME_Matches* DetectMIME(const u_char* data, uint64 len, + RuleMatcher::MIME_Matches* rval) const; + + /** + * Returns the strongest MIME magic signature match for a given data chunk. + * @param data A chunk of bytes to match magic MIME signatures against. + * @param len The number of bytes in \a data. + * @returns The MIME type string of the strongest file magic signature + * match, or an empty string if nothing matched. + */ + std::string DetectMIME(const u_char* data, uint64 len) const; + protected: friend class FileTimer; @@ -334,6 +364,7 @@ private: IDMap id_map; /**< Map file ID to file_analysis::File records. */ IDSet ignored; /**< Ignored files. Will be finally removed on EOF. */ string current_file_id; /**< Hash of what get_file_handle event sets. */ + RuleFileMagicState* magic_state; /** File magic signature match state. */ static TableVal* disabled; /**< Table of disabled analyzers. */ static string salt; /**< A salt added to file handles before hashing. */ diff --git a/src/main.cc b/src/main.cc index a701e5abe1..5e59a54ebd 100644 --- a/src/main.cc +++ b/src/main.cc @@ -933,6 +933,8 @@ int main(int argc, char** argv) if ( rule_debug ) rule_matcher->PrintDebug(); + + file_mgr->InitMagic(); } delete [] script_rule_files;