diff --git a/scripts/base/frameworks/file-analysis/main.bro b/scripts/base/frameworks/file-analysis/main.bro index 142709dcc4..3f6f6f10f4 100644 --- a/scripts/base/frameworks/file-analysis/main.bro +++ b/scripts/base/frameworks/file-analysis/main.bro @@ -171,58 +171,6 @@ export { ## rest of it's contents, or false if analysis for the *id* ## isn't currently active. global stop: function(f: fa_file): bool; - - ## Sends a sequential stream of data in for file analysis. - ## Meant for use when providing external file analysis input (e.g. - ## from the input framework). - ## - ## source: a string that uniquely identifies the logical file that the - ## data is a part of and describes its source. - ## - ## data: bytestring contents of the file to analyze. - global data_stream: function(source: string, data: string); - - ## Sends a non-sequential chunk of data in for file analysis. - ## Meant for use when providing external file analysis input (e.g. - ## from the input framework). - ## - ## source: a string that uniquely identifies the logical file that the - ## data is a part of and describes its source. - ## - ## data: bytestring contents of the file to analyze. - ## - ## offset: the offset within the file that this chunk starts. - global data_chunk: function(source: string, data: string, offset: count); - - ## Signals a content gap in the file bytestream. - ## Meant for use when providing external file analysis input (e.g. - ## from the input framework). - ## - ## source: a string that uniquely identifies the logical file that the - ## data is a part of and describes its source. - ## - ## offset: the offset within the file that this gap starts. - ## - ## len: the number of bytes that are missing. - global gap: function(source: string, offset: count, len: count); - - ## Signals the total size of a file. - ## Meant for use when providing external file analysis input (e.g. - ## from the input framework). - ## - ## source: a string that uniquely identifies the logical file that the - ## data is a part of and describes its source. - ## - ## size: the number of bytes that comprise the full file. - global set_size: function(source: string, size: count); - - ## Signals the end of a file. - ## Meant for use when providing external file analysis input (e.g. - ## from the input framework). - ## - ## source: a string that uniquely identifies the logical file that the - ## data is a part of and describes its source. - global eof: function(source: string); } redef record fa_file += { @@ -287,31 +235,6 @@ function stop(f: fa_file): bool return __stop(f$id); } -function data_stream(source: string, data: string) - { - __data_stream(source, data); - } - -function data_chunk(source: string, data: string, offset: count) - { - __data_chunk(source, data, offset); - } - -function gap(source: string, offset: count, len: count) - { - __gap(source, offset, len); - } - -function set_size(source: string, size: count) - { - __set_size(source, size); - } - -function eof(source: string) - { - __eof(source); - } - event bro_init() &priority=5 { Log::create_stream(FileAnalysis::LOG, diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 447b7d9ec7..c853c301eb 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -464,7 +464,6 @@ set(bro_SRCS file_analysis/Manager.cc file_analysis/File.cc file_analysis/FileTimer.cc - file_analysis/FileID.h file_analysis/Analyzer.h file_analysis/AnalyzerSet.cc file_analysis/Extract.cc diff --git a/src/file_analysis.bif b/src/file_analysis.bif index cdece0d350..3c720d17b6 100644 --- a/src/file_analysis.bif +++ b/src/file_analysis.bif @@ -30,27 +30,23 @@ enum Analyzer %{ ## :bro:see:`FileAnalysis::postpone_timeout`. function FileAnalysis::__postpone_timeout%(file_id: string%): bool %{ - using file_analysis::FileID; - bool result = file_mgr->PostponeTimeout(FileID(file_id->CheckString())); + bool result = file_mgr->PostponeTimeout(file_id->CheckString()); return new Val(result, TYPE_BOOL); %} ## :bro:see:`FileAnalysis::set_timeout_interval`. function FileAnalysis::__set_timeout_interval%(file_id: string, t: interval%): bool %{ - using file_analysis::FileID; - bool result = file_mgr->SetTimeoutInterval(FileID(file_id->CheckString()), - t); + bool result = file_mgr->SetTimeoutInterval(file_id->CheckString(), t); return new Val(result, TYPE_BOOL); %} ## :bro:see:`FileAnalysis::add_analyzer`. function FileAnalysis::__add_analyzer%(file_id: string, args: any%): bool %{ - using file_analysis::FileID; using BifType::Record::FileAnalysis::AnalyzerArgs; RecordVal* rv = args->AsRecordVal()->CoerceTo(AnalyzerArgs); - bool result = file_mgr->AddAnalyzer(FileID(file_id->CheckString()), rv); + bool result = file_mgr->AddAnalyzer(file_id->CheckString(), rv); Unref(rv); return new Val(result, TYPE_BOOL); %} @@ -58,10 +54,9 @@ function FileAnalysis::__add_analyzer%(file_id: string, args: any%): bool ## :bro:see:`FileAnalysis::remove_analyzer`. function FileAnalysis::__remove_analyzer%(file_id: string, args: any%): bool %{ - using file_analysis::FileID; using BifType::Record::FileAnalysis::AnalyzerArgs; RecordVal* rv = args->AsRecordVal()->CoerceTo(AnalyzerArgs); - bool result = file_mgr->RemoveAnalyzer(FileID(file_id->CheckString()), rv); + bool result = file_mgr->RemoveAnalyzer(file_id->CheckString(), rv); Unref(rv); return new Val(result, TYPE_BOOL); %} @@ -69,47 +64,10 @@ function FileAnalysis::__remove_analyzer%(file_id: string, args: any%): bool ## :bro:see:`FileAnalysis::stop`. function FileAnalysis::__stop%(file_id: string%): bool %{ - using file_analysis::FileID; - bool result = file_mgr->IgnoreFile(FileID(file_id->CheckString())); + bool result = file_mgr->IgnoreFile(file_id->CheckString()); return new Val(result, TYPE_BOOL); %} -## :bro:see:`FileAnalysis::data_stream`. -function FileAnalysis::__data_stream%(source: string, data: string%): any - %{ - file_mgr->DataIn(data->Bytes(), data->Len(), source->CheckString()); - return 0; - %} - -## :bro:see:`FileAnalysis::data_chunk`. -function FileAnalysis::__data_chunk%(source: string, data: string, - offset: count%): any - %{ - file_mgr->DataIn(data->Bytes(), data->Len(), offset, source->CheckString()); - return 0; - %} - -## :bro:see:`FileAnalysis::gap`. -function FileAnalysis::__gap%(source: string, offset: count, len: count%): any - %{ - file_mgr->Gap(offset, len, source->CheckString()); - return 0; - %} - -## :bro:see:`FileAnalysis::set_size`. -function FileAnalysis::__set_size%(source: string, size: count%): any - %{ - file_mgr->SetSize(size, source->CheckString()); - return 0; - %} - -## :bro:see:`FileAnalysis::eof`. -function FileAnalysis::__eof%(source: string%): any - %{ - file_mgr->EndOfFile(source->CheckString()); - return 0; - %} - module GLOBAL; ## For use within a :bro:see:`get_file_handle` handler to set a unique diff --git a/src/file_analysis/File.cc b/src/file_analysis/File.cc index 17b01f6b39..95ea3c5926 100644 --- a/src/file_analysis/File.cc +++ b/src/file_analysis/File.cc @@ -1,11 +1,9 @@ // See the file "COPYING" in the main distribution directory for copyright. #include -#include #include "File.h" #include "FileTimer.h" -#include "FileID.h" #include "Analyzer.h" #include "Manager.h" #include "Reporter.h" @@ -51,8 +49,6 @@ int File::bof_buffer_size_idx = -1; int File::bof_buffer_idx = -1; int File::mime_type_idx = -1; -string File::salt; - void File::StaticInit() { if ( id_idx != -1 ) @@ -72,31 +68,19 @@ void File::StaticInit() bof_buffer_size_idx = Idx("bof_buffer_size"); bof_buffer_idx = Idx("bof_buffer"); mime_type_idx = Idx("mime_type"); - - salt = BifConst::FileAnalysis::salt->CheckString(); } -File::File(const string& unique, Connection* conn, AnalyzerTag::Tag tag, +File::File(const string& file_id, Connection* conn, AnalyzerTag::Tag tag, bool is_orig) - : id(""), unique(unique), val(0), postpone_timeout(false), - first_chunk(true), missed_bof(false), need_reassembly(false), done(false), - analyzers(this) + : id(file_id), val(0), postpone_timeout(false), first_chunk(true), + missed_bof(false), need_reassembly(false), done(false), analyzers(this) { StaticInit(); - char tmp[20]; - uint64 hash[2]; - string msg(unique + salt); - MD5(reinterpret_cast(msg.data()), msg.size(), - reinterpret_cast(hash)); - uitoa_n(hash[0], tmp, sizeof(tmp), 62); - - DBG_LOG(DBG_FILE_ANALYSIS, "Creating new File object %s (%s)", tmp, - unique.c_str()); + DBG_LOG(DBG_FILE_ANALYSIS, "Creating new File object %s", file_id.c_str()); val = new RecordVal(fa_file_type); - val->Assign(id_idx, new StringVal(tmp)); - id = FileID(tmp); + val->Assign(id_idx, new StringVal(file_id.c_str())); if ( conn ) { @@ -106,8 +90,9 @@ File::File(const string& unique, Connection* conn, AnalyzerTag::Tag tag, UpdateConnectionFields(conn); } else - // use the unique file handle as source - val->Assign(source_idx, new StringVal(unique.c_str())); + { + // TODO: what to use as source field? (input framework interface) + } UpdateLastActivityTime(); } @@ -423,7 +408,7 @@ void File::Gap(uint64 offset, uint64 len) bool File::FileEventAvailable(EventHandlerPtr h) { - return h && ! file_mgr->IsIgnored(unique); + return h && ! file_mgr->IsIgnored(id); } void File::FileEvent(EventHandlerPtr h) diff --git a/src/file_analysis/File.h b/src/file_analysis/File.h index a31f0bfa41..5fa0f80ec8 100644 --- a/src/file_analysis/File.h +++ b/src/file_analysis/File.h @@ -10,7 +10,6 @@ #include "Conn.h" #include "Val.h" #include "AnalyzerSet.h" -#include "FileID.h" #include "BroString.h" namespace file_analysis { @@ -40,12 +39,7 @@ public: /** * @return value of the "id" field from #val record. */ - FileID GetID() const { return id; } - - /** - * @return the string which uniquely identifies the file. - */ - string GetUnique() const { return unique; } + string GetID() const { return id; } /** * @return value of "last_active" field in #val record; @@ -131,7 +125,7 @@ protected: /** * Constructor; only file_analysis::Manager should be creating these. */ - File(const string& unique, Connection* conn = 0, + File(const string& file_id, Connection* conn = 0, AnalyzerTag::Tag tag = AnalyzerTag::Error, bool is_orig = false); /** @@ -186,8 +180,7 @@ protected: static void StaticInit(); private: - FileID id; /**< A pretty hash that likely identifies file */ - string unique; /**< A string that uniquely identifies file */ + string id; /**< A pretty hash that likely identifies file */ RecordVal* val; /**< \c fa_file from script layer. */ bool postpone_timeout; /**< Whether postponing timeout is requested. */ bool first_chunk; /**< Track first non-linear chunk. */ @@ -207,8 +200,6 @@ private: BroString::CVec chunks; } bof_buffer; /**< Beginning of file buffer. */ - static string salt; - static int id_idx; static int parent_id_idx; static int source_idx; diff --git a/src/file_analysis/FileID.h b/src/file_analysis/FileID.h deleted file mode 100644 index 9816437214..0000000000 --- a/src/file_analysis/FileID.h +++ /dev/null @@ -1,34 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef FILE_ANALYSIS_FILEID_H -#define FILE_ANALYSIS_FILEID_H - -namespace file_analysis { - -/** - * A simple string wrapper class to help enforce some type safety between - * methods of FileAnalysis::Manager, some of which use a unique string to - * identify files, and others which use a pretty hash (the FileID) to identify - * files. A FileID is primarily used in methods which interface with the - * script-layer, while the unique strings are used for methods which interface - * with protocol analyzers or anything that sends data to the file analysis - * framework. - */ -struct FileID { - string id; - - explicit FileID(const string arg_id) : id(arg_id) {} - FileID(const FileID& other) : id(other.id) {} - - const char* c_str() const { return id.c_str(); } - - bool operator==(const FileID& rhs) const { return id == rhs.id; } - bool operator<(const FileID& rhs) const { return id < rhs.id; } - - FileID& operator=(const FileID& rhs) { id = rhs.id; return *this; } - FileID& operator=(const string& rhs) { id = rhs; return *this; } -}; - -} // namespace file_analysis - -#endif diff --git a/src/file_analysis/FileTimer.cc b/src/file_analysis/FileTimer.cc index 84d4138616..575857fd15 100644 --- a/src/file_analysis/FileTimer.cc +++ b/src/file_analysis/FileTimer.cc @@ -5,7 +5,7 @@ using namespace file_analysis; -FileTimer::FileTimer(double t, const FileID& id, double interval) +FileTimer::FileTimer(double t, const string& id, double interval) : Timer(t + interval, TIMER_FILE_ANALYSIS_INACTIVITY), file_id(id) { DBG_LOG(DBG_FILE_ANALYSIS, "New %f second timeout timer for %s", diff --git a/src/file_analysis/FileTimer.h b/src/file_analysis/FileTimer.h index 6ab2638e5f..32d4e63254 100644 --- a/src/file_analysis/FileTimer.h +++ b/src/file_analysis/FileTimer.h @@ -5,7 +5,6 @@ #include #include "Timer.h" -#include "FileID.h" namespace file_analysis { @@ -14,7 +13,7 @@ namespace file_analysis { */ class FileTimer : public Timer { public: - FileTimer(double t, const FileID& id, double interval); + FileTimer(double t, const string& id, double interval); /** * Check inactivity of file_analysis::File corresponding to #file_id, @@ -23,7 +22,7 @@ public: void Dispatch(double t, int is_expire); private: - FileID file_id; + string file_id; }; } // namespace file_analysis diff --git a/src/file_analysis/Manager.cc b/src/file_analysis/Manager.cc index d6f00e1856..91df333523 100644 --- a/src/file_analysis/Manager.cc +++ b/src/file_analysis/Manager.cc @@ -2,6 +2,7 @@ #include #include +#include #include "Manager.h" #include "File.h" @@ -24,7 +25,7 @@ Manager::~Manager() void Manager::Terminate() { - vector keys; + vector keys; for ( IDMap::iterator it = id_map.begin(); it != id_map.end(); ++it ) keys.push_back(it->first); @@ -32,66 +33,60 @@ void Manager::Terminate() Timeout(keys[i], true); } +string Manager::HashHandle(const string& handle) const + { + static string salt; + + if ( salt.empty() ) + salt = BifConst::FileAnalysis::salt->CheckString(); + + char tmp[20]; + uint64 hash[2]; + string msg(handle + salt); + + MD5(reinterpret_cast(msg.data()), msg.size(), + reinterpret_cast(hash)); + uitoa_n(hash[0], tmp, sizeof(tmp), 62); + + return tmp; + } + void Manager::SetHandle(const string& handle) { - current_handle = handle; + if ( handle.empty() ) + return; + + current_file_id = HashHandle(handle); } void Manager::DataIn(const u_char* data, uint64 len, uint64 offset, AnalyzerTag::Tag tag, Connection* conn, bool is_orig) { - if ( IsDisabled(tag) ) - return; + File* file = GetFile(conn, tag, is_orig); - GetFileHandle(tag, conn, is_orig); - DataIn(data, len, offset, GetFile(current_handle, conn, tag, is_orig)); - } - -void Manager::DataIn(const u_char* data, uint64 len, uint64 offset, - const string& unique) - { - DataIn(data, len, offset, GetFile(unique)); - } - -void Manager::DataIn(const u_char* data, uint64 len, uint64 offset, - File* file) - { if ( ! file ) return; file->DataIn(data, len, offset); if ( file->IsComplete() ) - RemoveFile(file->GetUnique()); + RemoveFile(file->GetID()); } void Manager::DataIn(const u_char* data, uint64 len, AnalyzerTag::Tag tag, Connection* conn, bool is_orig) { - if ( IsDisabled(tag) ) - return; - - GetFileHandle(tag, conn, is_orig); - // Sequential data input shouldn't be going over multiple conns, so don't // do the check to update connection set. - DataIn(data, len, GetFile(current_handle, conn, tag, is_orig, false)); - } + File* file = GetFile(conn, tag, is_orig, false); -void Manager::DataIn(const u_char* data, uint64 len, const string& unique) - { - DataIn(data, len, GetFile(unique)); - } - -void Manager::DataIn(const u_char* data, uint64 len, File* file) - { if ( ! file ) return; file->DataIn(data, len); if ( file->IsComplete() ) - RemoveFile(file->GetUnique()); + RemoveFile(file->GetID()); } void Manager::EndOfFile(AnalyzerTag::Tag tag, Connection* conn) @@ -102,35 +97,16 @@ void Manager::EndOfFile(AnalyzerTag::Tag tag, Connection* conn) void Manager::EndOfFile(AnalyzerTag::Tag tag, Connection* conn, bool is_orig) { - if ( IsDisabled(tag) ) - return; - + // Don't need to create a file if we're just going to remove it right away. GetFileHandle(tag, conn, is_orig); - EndOfFile(current_handle); - } - -void Manager::EndOfFile(const string& unique) - { - RemoveFile(unique); + RemoveFile(current_file_id); } void Manager::Gap(uint64 offset, uint64 len, AnalyzerTag::Tag tag, Connection* conn, bool is_orig) { - if ( IsDisabled(tag) ) - return; + File* file = GetFile(conn, tag, is_orig); - GetFileHandle(tag, conn, is_orig); - Gap(offset, len, GetFile(current_handle, conn, tag, is_orig)); - } - -void Manager::Gap(uint64 offset, uint64 len, const string& unique) - { - Gap(offset, len, GetFile(unique)); - } - -void Manager::Gap(uint64 offset, uint64 len, File* file) - { if ( ! file ) return; @@ -140,30 +116,18 @@ void Manager::Gap(uint64 offset, uint64 len, File* file) void Manager::SetSize(uint64 size, AnalyzerTag::Tag tag, Connection* conn, bool is_orig) { - if ( IsDisabled(tag) ) - return; + File* file = GetFile(conn, tag, is_orig); - GetFileHandle(tag, conn, is_orig); - SetSize(size, GetFile(current_handle, conn, tag, is_orig)); - } - -void Manager::SetSize(uint64 size, const string& unique) - { - SetSize(size, GetFile(unique)); - } - -void Manager::SetSize(uint64 size, File* file) - { if ( ! file ) return; file->SetTotalBytes(size); if ( file->IsComplete() ) - RemoveFile(file->GetUnique()); + RemoveFile(file->GetID()); } -bool Manager::PostponeTimeout(const FileID& file_id) const +bool Manager::PostponeTimeout(const string& file_id) const { File* file = Lookup(file_id); @@ -174,7 +138,7 @@ bool Manager::PostponeTimeout(const FileID& file_id) const return true; } -bool Manager::SetTimeoutInterval(const FileID& file_id, double interval) const +bool Manager::SetTimeoutInterval(const string& file_id, double interval) const { File* file = Lookup(file_id); @@ -185,7 +149,7 @@ bool Manager::SetTimeoutInterval(const FileID& file_id, double interval) const return true; } -bool Manager::AddAnalyzer(const FileID& file_id, RecordVal* args) const +bool Manager::AddAnalyzer(const string& file_id, RecordVal* args) const { File* file = Lookup(file_id); @@ -195,7 +159,7 @@ bool Manager::AddAnalyzer(const FileID& file_id, RecordVal* args) const return file->AddAnalyzer(args); } -bool Manager::RemoveAnalyzer(const FileID& file_id, const RecordVal* args) const +bool Manager::RemoveAnalyzer(const string& file_id, const RecordVal* args) const { File* file = Lookup(file_id); @@ -205,32 +169,27 @@ bool Manager::RemoveAnalyzer(const FileID& file_id, const RecordVal* args) const return file->RemoveAnalyzer(args); } -File* Manager::GetFile(const string& unique, Connection* conn, - AnalyzerTag::Tag tag, bool is_orig, bool update_conn) +File* Manager::GetFile(Connection* conn, AnalyzerTag::Tag tag, bool is_orig, + bool update_conn) { - if ( unique.empty() ) + // sets current_file_id for us + GetFileHandle(tag, conn, is_orig); + + if ( current_file_id.empty() ) return 0; - if ( IsIgnored(unique) ) + if ( IsIgnored(current_file_id) ) return 0; - File* rval = str_map[unique]; + File* rval = id_map[current_file_id]; if ( ! rval ) { - rval = str_map[unique] = new File(unique, conn, tag, is_orig); - FileID id = rval->GetID(); - - if ( id_map[id] ) - { - reporter->Error("Evicted duplicate file ID: %s", id.c_str()); - RemoveFile(unique); - } - - id_map[id] = rval; + rval = id_map[current_file_id] = new File(current_file_id, conn, tag, + is_orig); rval->ScheduleInactivityTimer(); - if ( IsIgnored(unique) ) + if ( IsIgnored(current_file_id) ) return 0; } else @@ -244,7 +203,7 @@ File* Manager::GetFile(const string& unique, Connection* conn, return rval; } -File* Manager::Lookup(const FileID& file_id) const +File* Manager::Lookup(const string& file_id) const { IDMap::const_iterator it = id_map.find(file_id); @@ -254,7 +213,7 @@ File* Manager::Lookup(const FileID& file_id) const return it->second; } -void Manager::Timeout(const FileID& file_id, bool is_terminating) +void Manager::Timeout(const string& file_id, bool is_terminating) { File* file = Lookup(file_id); @@ -277,53 +236,50 @@ void Manager::Timeout(const FileID& file_id, bool is_terminating) DBG_LOG(DBG_FILE_ANALYSIS, "File analysis timeout for %s", file->GetID().c_str()); - RemoveFile(file->GetUnique()); + RemoveFile(file->GetID()); } -bool Manager::IgnoreFile(const FileID& file_id) +bool Manager::IgnoreFile(const string& file_id) + { + if ( id_map.find(file_id) == id_map.end() ) + return false; + + DBG_LOG(DBG_FILE_ANALYSIS, "Ignore FileID %s", file_id.c_str()); + + ignored.insert(file_id); + + return true; + } + +bool Manager::RemoveFile(const string& file_id) { IDMap::iterator it = id_map.find(file_id); if ( it == id_map.end() ) return false; - DBG_LOG(DBG_FILE_ANALYSIS, "Ignore FileID %s", file_id.c_str()); - - ignored.insert(it->second->GetUnique()); - - return true; - } - -bool Manager::RemoveFile(const string& unique) - { - StrMap::iterator it = str_map.find(unique); - - if ( it == str_map.end() ) - return false; + DBG_LOG(DBG_FILE_ANALYSIS, "Remove FileID %s", file_id.c_str()); it->second->EndOfFile(); - FileID id = it->second->GetID(); - - DBG_LOG(DBG_FILE_ANALYSIS, "Remove FileID %s", id.c_str()); - - if ( ! id_map.erase(id) ) - reporter->Error("No mapping for fileID %s", id.c_str()); - - ignored.erase(unique); delete it->second; - str_map.erase(unique); + id_map.erase(file_id); + ignored.erase(file_id); + return true; } -bool Manager::IsIgnored(const string& unique) +bool Manager::IsIgnored(const string& file_id) { - return ignored.find(unique) != ignored.end(); + return ignored.find(file_id) != ignored.end(); } void Manager::GetFileHandle(AnalyzerTag::Tag tag, Connection* c, bool is_orig) { - current_handle.clear(); + current_file_id.clear(); + + if ( IsDisabled(tag) ) + return; if ( ! get_file_handle ) return; diff --git a/src/file_analysis/Manager.h b/src/file_analysis/Manager.h index d2f8f6f1bf..3697f3c9b2 100644 --- a/src/file_analysis/Manager.h +++ b/src/file_analysis/Manager.h @@ -18,7 +18,6 @@ #include "File.h" #include "FileTimer.h" -#include "FileID.h" namespace file_analysis { @@ -36,7 +35,12 @@ public: void Terminate(); /** - * Take in a unique file handle string to identifiy incoming file data. + * @return a prettified MD5 hash of \a handle, truncated to 64-bits. + */ + string HashHandle(const string& handle) const; + + /** + * Take in a unique file handle string to identify incoming file data. */ void SetHandle(const string& handle); @@ -45,59 +49,48 @@ public: */ void DataIn(const u_char* data, uint64 len, uint64 offset, AnalyzerTag::Tag tag, Connection* conn, bool is_orig); - void DataIn(const u_char* data, uint64 len, uint64 offset, - const string& unique); - void DataIn(const u_char* data, uint64 len, uint64 offset, - File* file); /** * Pass in sequential file data. */ void DataIn(const u_char* data, uint64 len, AnalyzerTag::Tag tag, Connection* conn, bool is_orig); - void DataIn(const u_char* data, uint64 len, const string& unique); - void DataIn(const u_char* data, uint64 len, File* file); /** * Signal the end of file data. */ void EndOfFile(AnalyzerTag::Tag tag, Connection* conn); void EndOfFile(AnalyzerTag::Tag tag, Connection* conn, bool is_orig); - void EndOfFile(const string& unique); /** * Signal a gap in the file data stream. */ void Gap(uint64 offset, uint64 len, AnalyzerTag::Tag tag, Connection* conn, bool is_orig); - void Gap(uint64 offset, uint64 len, const string& unique); - void Gap(uint64 offset, uint64 len, File* file); /** * Provide the expected number of bytes that comprise a file. */ void SetSize(uint64 size, AnalyzerTag::Tag tag, Connection* conn, bool is_orig); - void SetSize(uint64 size, const string& unique); - void SetSize(uint64 size, File* file); /** * Starts ignoring a file, which will finally be removed from internal * mappings on EOF or TIMEOUT. * @return false if file identifier did not map to anything, else true. */ - bool IgnoreFile(const FileID& file_id); + bool IgnoreFile(const string& file_id); /** * If called during a \c file_timeout event handler, requests deferral of * analysis timeout. */ - bool PostponeTimeout(const FileID& file_id) const; + bool PostponeTimeout(const string& file_id) const; /** * Set's an inactivity threshold for the file. */ - bool SetTimeoutInterval(const FileID& file_id, double interval) const; + bool SetTimeoutInterval(const string& file_id, double interval) const; /** * Queue attachment of an analzer to the file identifier. Multiple @@ -105,34 +98,33 @@ public: * as long as the arguments differ. * @return false if the analyzer failed to be instantiated, else true. */ - bool AddAnalyzer(const FileID& file_id, RecordVal* args) const; + bool AddAnalyzer(const string& file_id, RecordVal* args) const; /** * Queue removal of an analyzer for a given file identifier. * @return true if the analyzer is active at the time of call, else false. */ - bool RemoveAnalyzer(const FileID& file_id, const RecordVal* args) const; + bool RemoveAnalyzer(const string& file_id, const RecordVal* args) const; /** - * @return whether the file mapped to \a unique is being ignored. + * @return whether the file mapped to \a file_id is being ignored. */ - bool IsIgnored(const string& unique); + bool IsIgnored(const string& file_id); protected: friend class FileTimer; - typedef map StrMap; - typedef set StrSet; - typedef map IDMap; + typedef set IDSet; + typedef map IDMap; /** - * @return the File object mapped to \a unique or a null pointer if analysis - * is being ignored for the associated file. An File object may be - * created if a mapping doesn't exist, and if it did exist, the - * activity time is refreshed along with any connection-related - * fields. + * @return the File object mapped to #current_file_id or a null pointer if + * analysis is being ignored for the associated file. An File + * object may be created if a mapping doesn't exist, and if it did + * exist, the activity time is refreshed along with any + * connection-related fields. */ - File* GetFile(const string& unique, Connection* conn = 0, + File* GetFile(Connection* conn = 0, AnalyzerTag::Tag tag = AnalyzerTag::Error, bool is_orig = false, bool update_conn = true); @@ -140,24 +132,24 @@ protected: * @return the File object mapped to \a file_id, or a null pointer if no * mapping exists. */ - File* Lookup(const FileID& file_id) const; + File* Lookup(const string& file_id) const; /** * Evaluate timeout policy for a file and remove the File object mapped to * \a file_id if needed. */ - void Timeout(const FileID& file_id, bool is_terminating = ::terminating); + void Timeout(const string& file_id, bool is_terminating = ::terminating); /** - * Immediately remove file_analysis::File object associated with \a unique. - * @return false if file string did not map to anything, else true. + * Immediately remove file_analysis::File object associated with \a file_id. + * @return false if file id string did not map to anything, else true. */ - bool RemoveFile(const string& unique); + bool RemoveFile(const string& file_id); /** - * Sets #current_handle to a unique file handle string based on what the - * \c get_file_handle event derives from the connection params. The - * event queue is flushed so that we can get the handle value immediately. + * Sets #current_file_id to a hash of a unique file handle string based on + * what the \c get_file_handle event derives from the connection params. + * Event queue is flushed so that we can get the handle value immediately. */ void GetFileHandle(AnalyzerTag::Tag tag, Connection* c, bool is_orig); @@ -167,10 +159,9 @@ protected: static bool IsDisabled(AnalyzerTag::Tag tag); private: - StrMap str_map; /**< Map unique string to file_analysis::File. */ IDMap id_map; /**< Map file ID to file_analysis::File records. */ - StrSet ignored; /**< Ignored files. Will be finally removed on EOF. */ - string current_handle; /**< Last file handle set by get_file_handle event.*/ + IDSet ignored; /**< Ignored files. Will be finally removed on EOF. */ + string current_file_id; /**< Hash of what get_file_handle event sets.*/ static TableVal* disabled; /**< Table of disabled analyzers. */ };