diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 16de055e11..489bfe39b4 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -451,6 +451,7 @@ set(bro_SRCS file_analysis/Manager.cc file_analysis/Info.cc file_analysis/InfoTimer.cc + file_analysis/FileID.h file_analysis/Action.h file_analysis/Extract.cc diff --git a/src/FileAnalyzer.cc b/src/FileAnalyzer.cc index 8e994adbf1..27592ea34c 100644 --- a/src/FileAnalyzer.cc +++ b/src/FileAnalyzer.cc @@ -21,15 +21,15 @@ File_Analyzer::File_Analyzer(Connection* conn) char op[256], rp[256]; modp_ulitoa10(ntohs(conn->OrigPort()), op); modp_ulitoa10(ntohs(conn->RespPort()), rp); - file_id = "TCPFile " + conn->OrigAddr().AsString() + ":" + op + "->" + - conn->RespAddr().AsString() + ":" + rp; + unique_file = "TCPFile " + conn->OrigAddr().AsString() + ":" + op + "->" + + conn->RespAddr().AsString() + ":" + rp; } void File_Analyzer::DeliverStream(int len, const u_char* data, bool orig) { TCP_ApplicationAnalyzer::DeliverStream(len, data, orig); - file_mgr->DataIn(file_id, data, len, Conn()); + file_mgr->DataIn(unique_file, data, len, Conn()); int n = min(len, BUFFER_SIZE - buffer_len); @@ -48,14 +48,14 @@ void File_Analyzer::Undelivered(int seq, int len, bool orig) { TCP_ApplicationAnalyzer::Undelivered(seq, len, orig); - file_mgr->Gap(file_id, seq, len); + file_mgr->Gap(unique_file, seq, len); } void File_Analyzer::Done() { TCP_ApplicationAnalyzer::Done(); - file_mgr->EndOfFile(file_id, Conn()); + file_mgr->EndOfFile(unique_file, Conn()); if ( buffer_len && buffer_len != BUFFER_SIZE ) Identify(); diff --git a/src/FileAnalyzer.h b/src/FileAnalyzer.h index e0a402daf2..9b737856fc 100644 --- a/src/FileAnalyzer.h +++ b/src/FileAnalyzer.h @@ -37,7 +37,7 @@ protected: static magic_t magic; static magic_t magic_mime; - string file_id; + string unique_file; }; #endif diff --git a/src/file_analysis.bif b/src/file_analysis.bif index 546ac5103c..bb8584cf84 100644 --- a/src/file_analysis.bif +++ b/src/file_analysis.bif @@ -61,7 +61,8 @@ enum Action %{ function FileAnalysis::postpone_timeout%(file_id: string%): bool %{ - bool result = file_mgr->PostponeTimeout(file_id->CheckString()); + using namespace file_analysis; + bool result = file_mgr->PostponeTimeout(FileID(file_id->CheckString())); return new Val(result, TYPE_BOOL); %} @@ -69,9 +70,10 @@ function FileAnalysis::add_action%(file_id: string, action: FileAnalysis::Action, args: any%): bool %{ + using namespace file_analysis; RecordVal* rv = args->AsRecordVal()->CoerceTo( BifType::Record::FileAnalysis::ActionArgs); - bool result = file_mgr->AddAction(file_id->CheckString(), + bool result = file_mgr->AddAction(FileID(file_id->CheckString()), action->AsEnumVal(), rv); Unref(rv); return new Val(result, TYPE_BOOL); @@ -80,13 +82,15 @@ function FileAnalysis::add_action%(file_id: string, function FileAnalysis::remove_action%(file_id: string, action: FileAnalysis::Action%): bool %{ - bool result = file_mgr->RemoveAction(file_id->CheckString(), + using namespace file_analysis; + bool result = file_mgr->RemoveAction(FileID(file_id->CheckString()), action->AsEnumVal()); return new Val(result, TYPE_BOOL); %} function FileAnalysis::stop%(file_id: string%): bool %{ - bool result = file_mgr->RemoveFile(file_id->CheckString()); + using namespace file_analysis; + bool result = file_mgr->RemoveFile(FileID(file_id->CheckString())); return new Val(result, TYPE_BOOL); %} diff --git a/src/file_analysis/FileID.h b/src/file_analysis/FileID.h new file mode 100644 index 0000000000..c339445ea8 --- /dev/null +++ b/src/file_analysis/FileID.h @@ -0,0 +1,32 @@ +#ifndef FILE_ANALYSIS_FILEID_H +#define FILE_ANALYSIS_FILEID_H + +namespace file_analysis { + +/** + * A simple string wrapper class to help enforce some type safety between + * methods of FileAnalysis::Manager, some of which use a unique string to + * identify files, and others which use a pretty hash (the FileID) to identify + * files. A FileID is primarily used in methods which interface with the + * script-layer, while the unique strings are used for methods which interface + * with protocol analyzers (to better accomodate the possibility that a file + * can be distributed over different connections and thus analyzer instances). + */ +struct FileID { + string id; + + explicit FileID(const string arg_id) : id(arg_id) {} + FileID(const FileID& other) : id(other.id) {} + + const char* c_str() const { return id.c_str(); } + + bool operator==(const FileID& rhs) const { return id == rhs.id; } + bool operator<(const FileID& rhs) const { return id < rhs.id; } + + FileID& operator=(const FileID& rhs) { id = rhs.id; return *this; } + FileID& operator=(const string& rhs) { id = rhs; return *this; } +}; + +} // namespace file_analysis + +#endif diff --git a/src/file_analysis/Info.cc b/src/file_analysis/Info.cc index 60729cd590..0578612825 100644 --- a/src/file_analysis/Info.cc +++ b/src/file_analysis/Info.cc @@ -2,6 +2,7 @@ #include "Info.h" #include "InfoTimer.h" +#include "FileID.h" #include "Reporter.h" #include "Val.h" @@ -70,17 +71,20 @@ void Info::InitFieldIndices() action_args_idx = Idx("action_args"); } -Info::Info(const string& file_id, Connection* conn, const string& protocol) - : val(0), last_activity_time(network_time), postpone_timeout(false), - need_reassembly(false) +Info::Info(const string& unique, Connection* conn, const string& protocol) + : file_id(unique), unique(unique), val(0), last_activity_time(network_time), + postpone_timeout(false), need_reassembly(false) { - DBG_LOG(DBG_FILE_ANALYSIS, "Creating new Info object %s", file_id.c_str()); - InitFieldIndices(); + char id[20]; + uitoa_n(calculate_unique_id(), id, sizeof(id), 62); + + DBG_LOG(DBG_FILE_ANALYSIS, "Creating new Info object %s", id); + val = new RecordVal(BifType::Record::FileAnalysis::Info); - // TODO: hash/prettify file_id for script layer presentation - val->Assign(file_id_idx, new StringVal(file_id.c_str())); + val->Assign(file_id_idx, new StringVal(id)); + file_id = FileID(id); UpdateConnectionFields(conn); @@ -96,7 +100,7 @@ Info::~Info() for ( it = actions.begin(); it != actions.end(); ++it ) delete it->second; - DBG_LOG(DBG_FILE_ANALYSIS, "Destroying Info object %s", FileID().c_str()); + DBG_LOG(DBG_FILE_ANALYSIS, "Destroying Info object %s",file_id.c_str()); Unref(val); } @@ -145,11 +149,6 @@ double Info::TimeoutInterval() const return LookupFieldDefaultInterval(timeout_interval_idx); } -string Info::FileID() const - { - return val->Lookup(file_id_idx)->AsString()->CheckString(); - } - void Info::IncrementByteCount(uint64 size, int field_idx) { uint64 old = LookupFieldDefaultCount(field_idx); @@ -172,7 +171,7 @@ bool Info::IsComplete() const void Info::ScheduleInactivityTimer() const { - timer_mgr->Add(new InfoTimer(network_time, FileID(), TimeoutInterval())); + timer_mgr->Add(new InfoTimer(network_time, file_id, TimeoutInterval())); } bool Info::AddAction(EnumVal* act, RecordVal* args) @@ -184,7 +183,7 @@ bool Info::AddAction(EnumVal* act, RecordVal* args) if ( ! a ) return false; DBG_LOG(DBG_FILE_ANALYSIS, "Add action %d for file id %s", act->AsEnum(), - FileID().c_str()); + file_id.c_str()); actions[act->AsEnum()] = a; VectorVal* av = val->LookupWithDefault(actions_idx)->AsVectorVal(); @@ -206,7 +205,7 @@ bool Info::RemoveAction(EnumVal* act) if ( it == actions.end() ) return false; DBG_LOG(DBG_FILE_ANALYSIS, "Remove action %d for file id %s", act->AsEnum(), - FileID().c_str()); + file_id.c_str()); delete it->second; actions.erase(it); return true; diff --git a/src/file_analysis/Info.h b/src/file_analysis/Info.h index 2823fa2d2c..aca33fa622 100644 --- a/src/file_analysis/Info.h +++ b/src/file_analysis/Info.h @@ -7,6 +7,7 @@ #include "Conn.h" #include "Val.h" #include "Action.h" +#include "FileID.h" namespace file_analysis { @@ -26,7 +27,12 @@ public: /** * @return value of the "file_id" field from #val record. */ - string FileID() const; + FileID GetFileID() const { return file_id; } + + /** + * @return the string which uniquely identifies the file. + */ + string Unique() const { return unique; } /** * @return #last_activity_time @@ -96,7 +102,7 @@ protected: /** * Constructor; only file_analysis::Manager should be creating these. */ - Info(const string& file_id, Connection* conn = 0, + Info(const string& unique, Connection* conn = 0, const string& protocol = ""); /** @@ -122,6 +128,8 @@ protected: */ double LookupFieldDefaultInterval(int idx) const; + FileID file_id; /**< A pretty hash that likely identifies file*/ + string unique; /**< A string that uniquely identifies file */ RecordVal* val; /**< \c FileAnalysis::Info from script layer. */ double last_activity_time; /**< Time of last activity. */ bool postpone_timeout; /**< Whether postponing timeout is requested. */ diff --git a/src/file_analysis/InfoTimer.h b/src/file_analysis/InfoTimer.h index d5432e0ebc..ac0d8b6b00 100644 --- a/src/file_analysis/InfoTimer.h +++ b/src/file_analysis/InfoTimer.h @@ -1,8 +1,9 @@ #ifndef FILE_ANALYSIS_INFOTIMER_H #define FILE_ANALYSIS_INFOTIMER_H -#include "Timer.h" #include +#include "Timer.h" +#include "FileID.h" namespace file_analysis { @@ -12,7 +13,7 @@ namespace file_analysis { class InfoTimer : public Timer { public: - InfoTimer(double t, const string& id, double interval) + InfoTimer(double t, const FileID& id, double interval) : Timer(t + interval, TIMER_FILE_ANALYSIS_INACTIVITY), file_id(id) {} ~InfoTimer() {} @@ -25,7 +26,7 @@ public: protected: - string file_id; + FileID file_id; }; } // namespace file_analysis diff --git a/src/file_analysis/Manager.cc b/src/file_analysis/Manager.cc index 33011a6ec6..37396de0de 100644 --- a/src/file_analysis/Manager.cc +++ b/src/file_analysis/Manager.cc @@ -17,8 +17,8 @@ Manager::~Manager() void Manager::Terminate() { - vector keys; - for ( FileMap::iterator it = file_map.begin(); it != file_map.end(); ++it ) + vector keys; + for ( IDMap::iterator it = id_map.begin(); it != id_map.end(); ++it ) keys.push_back(it->first); for ( size_t i = 0; i < keys.size(); ++i ) Timeout(keys[i], true); @@ -29,46 +29,46 @@ static void check_file_done(Info* info) if ( info->IsComplete() ) { Manager::EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_DONE, info); - file_mgr->RemoveFile(info->FileID()); + file_mgr->RemoveFile(info->GetFileID()); } } -void Manager::DataIn(const string& file_id, const u_char* data, uint64 len, +void Manager::DataIn(const string& unique, const u_char* data, uint64 len, uint64 offset, Connection* conn, const string& protocol) { - Info* info = IDtoInfo(file_id, conn, protocol); + Info* info = GetInfo(unique, conn, protocol); info->DataIn(data, len, offset); check_file_done(info); } -void Manager::DataIn(const string& file_id, const u_char* data, uint64 len, +void Manager::DataIn(const string& unique, const u_char* data, uint64 len, Connection* conn, const string& protocol) { - Info* info = IDtoInfo(file_id, conn, protocol); + Info* info = GetInfo(unique, conn, protocol); info->DataIn(data, len); check_file_done(info); } -void Manager::EndOfFile(const string& file_id, Connection* conn, +void Manager::EndOfFile(const string& unique, Connection* conn, const string& protocol) { - Info* info = IDtoInfo(file_id, conn, protocol); + Info* info = GetInfo(unique, conn, protocol); info->EndOfFile(); Manager::EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_EOF, info); } -void Manager::Gap(const string& file_id, uint64 offset, uint64 len, +void Manager::Gap(const string& unique, uint64 offset, uint64 len, Connection* conn, const string& protocol) { - Info* info = IDtoInfo(file_id, conn, protocol); + Info* info = GetInfo(unique, conn, protocol); info->Gap(offset, len); Manager::EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_GAP, info); } -void Manager::SetSize(const string& file_id, uint64 size, +void Manager::SetSize(const string& unique, uint64 size, Connection* conn, const string& protocol) { - Info* info = IDtoInfo(file_id, conn, protocol); + Info* info = GetInfo(unique, conn, protocol); info->SetTotalBytes(size); check_file_done(info); } @@ -89,7 +89,7 @@ void Manager::EvaluatePolicy(BifEnum::FileAnalysis::Trigger t, Info* info) Unref(result); } -bool Manager::PostponeTimeout(const string& file_id) const +bool Manager::PostponeTimeout(const FileID& file_id) const { Info* info = Lookup(file_id); @@ -99,7 +99,7 @@ bool Manager::PostponeTimeout(const string& file_id) const return true; } -bool Manager::AddAction(const string& file_id, EnumVal* act, +bool Manager::AddAction(const FileID& file_id, EnumVal* act, RecordVal* args) const { Info* info = Lookup(file_id); @@ -109,7 +109,7 @@ bool Manager::AddAction(const string& file_id, EnumVal* act, return info->AddAction(act, args); } -bool Manager::RemoveAction(const string& file_id, EnumVal* act) const +bool Manager::RemoveAction(const FileID& file_id, EnumVal* act) const { Info* info = Lookup(file_id); @@ -118,14 +118,23 @@ bool Manager::RemoveAction(const string& file_id, EnumVal* act) const return info->RemoveAction(act); } -Info* Manager::IDtoInfo(const string& file_id, Connection* conn, - const string& protocol) +Info* Manager::GetInfo(const string& unique, Connection* conn, + const string& protocol) { - Info* rval = file_map[file_id]; + Info* rval = str_map[unique]; if ( ! rval ) { - rval = file_map[file_id] = new Info(file_id, conn, protocol); + rval = str_map[unique] = new Info(unique, conn, protocol); + FileID id = rval->GetFileID(); + + if ( id_map[id] ) + { + reporter->Error("Evicted duplicate file ID: %s", id.c_str()); + RemoveFile(id); + } + + id_map[id] = rval; Manager::EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_NEW, rval); } else @@ -137,16 +146,16 @@ Info* Manager::IDtoInfo(const string& file_id, Connection* conn, return rval; } -Info* Manager::Lookup(const string& file_id) const +Info* Manager::Lookup(const FileID& file_id) const { - FileMap::const_iterator it = file_map.find(file_id); + IDMap::const_iterator it = id_map.find(file_id); - if ( it == file_map.end() ) return 0; + if ( it == id_map.end() ) return 0; return it->second; } -void Manager::Timeout(const string& file_id, bool is_terminating) +void Manager::Timeout(const FileID& file_id, bool is_terminating) { Info* info = Lookup(file_id); @@ -157,25 +166,27 @@ void Manager::Timeout(const string& file_id, bool is_terminating) if ( info->postpone_timeout && ! is_terminating ) { DBG_LOG(DBG_FILE_ANALYSIS, "Postpone file analysis timeout for %s", - info->FileID().c_str()); + info->GetFileID().c_str()); info->UpdateLastActivityTime(); info->ScheduleInactivityTimer(); return; } DBG_LOG(DBG_FILE_ANALYSIS, "File analysis timeout for %s", - info->FileID().c_str()); + info->GetFileID().c_str()); RemoveFile(file_id); } -bool Manager::RemoveFile(const string& file_id) +bool Manager::RemoveFile(const FileID& file_id) { - FileMap::iterator it = file_map.find(file_id); + IDMap::iterator it = id_map.find(file_id); - if ( it == file_map.end() ) return false; + if ( it == id_map.end() ) return false; + if ( ! str_map.erase(it->second->Unique()) ) + reporter->Error("No string mapping for file ID %s", file_id.c_str()); delete it->second; - file_map.erase(it); + id_map.erase(it); return true; } diff --git a/src/file_analysis/Manager.h b/src/file_analysis/Manager.h index 3601c8b43f..7dfaf5a665 100644 --- a/src/file_analysis/Manager.h +++ b/src/file_analysis/Manager.h @@ -10,6 +10,7 @@ #include "Info.h" #include "InfoTimer.h" +#include "FileID.h" namespace file_analysis { @@ -31,58 +32,58 @@ public: /** * Pass in non-sequential file data. */ - void DataIn(const string& file_id, const u_char* data, uint64 len, + void DataIn(const string& unique, const u_char* data, uint64 len, uint64 offset, Connection* conn = 0, const string& protocol = ""); /** * Pass in sequential file data. */ - void DataIn(const string& file_id, const u_char* data, uint64 len, + void DataIn(const string& unique, const u_char* data, uint64 len, Connection* conn = 0, const string& protocol = ""); /** * Signal the end of file data. */ - void EndOfFile(const string& file_id, Connection* conn = 0, + void EndOfFile(const string& unique, Connection* conn = 0, const string& protocol = ""); /** * Signal a gap in the file data stream. */ - void Gap(const string& file_id, uint64 offset, uint64 len, + void Gap(const string& unique, uint64 offset, uint64 len, Connection* conn = 0, const string& protocol = ""); /** * Provide the expected number of bytes that comprise a file. */ - void SetSize(const string& file_id, uint64 size, Connection* conn = 0, + void SetSize(const string& unique, uint64 size, Connection* conn = 0, const string& protocol = ""); /** * Discard the file_analysis::Info object associated with \a file_id. * @return false if file identifier did not map to anything, else true. */ - bool RemoveFile(const string& file_id); + bool RemoveFile(const FileID& file_id); /** * If called during \c FileAnalysis::policy evaluation for a * \c FileAnalysis::TRIGGER_TIMEOUT, requests deferral of analysis timeout. */ - bool PostponeTimeout(const string& file_id) const; + bool PostponeTimeout(const FileID& file_id) const; /** * Attaches an action to the file identifier. Only one action of a given * type can be attached per file identifier at a time. * @return true if the action was attached, else false. */ - bool AddAction(const string& file_id, EnumVal* act, RecordVal* args) const; + bool AddAction(const FileID& file_id, EnumVal* act, RecordVal* args) const; /** * Removes an action for a given file identifier. * @return true if the action was removed, else false. */ - bool RemoveAction(const string& file_id, EnumVal* act) const; + bool RemoveAction(const FileID& file_id, EnumVal* act) const; /** * Calls the \c FileAnalysis::policy hook. @@ -93,29 +94,31 @@ protected: friend class InfoTimer; - typedef map FileMap; + typedef map StrMap; + typedef map IDMap; /** - * @return the Info object mapped to \a file_id. One is created if mapping + * @return the Info object mapped to \a unique. One is created if mapping * doesn't exist. If it did exist, the activity time is refreshed * and connection-related fields of the record value may be updated. */ - Info* IDtoInfo(const string& file_id, Connection* conn = 0, - const string& protocol = ""); + Info* GetInfo(const string& unique, Connection* conn = 0, + const string& protocol = ""); /** * @return the Info object mapped to \a file_id, or a null pointer if no * mapping exists. */ - Info* Lookup(const string& file_id) const; + Info* Lookup(const FileID& file_id) const; /** * Evaluate timeout policy for a file and remove the Info object mapped to * \a file_id if needed. */ - void Timeout(const string& file_id, bool is_terminating = ::terminating); + void Timeout(const FileID& file_id, bool is_terminating = ::terminating); - FileMap file_map; /**< Map strings to \c FileAnalysis::Info records. */ + StrMap str_map; /**< Map unique strings to \c FileAnalysis::Info records. */ + IDMap id_map; /**< Map file IDs to \c FileAnalysis::Info records. */ }; } // namespace file_analysis