From ceb471fb365b41cf4a1981b4cb9a315f07009e70 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Wed, 20 Feb 2013 22:09:39 -0600 Subject: [PATCH 1/2] Prettify file analysis IDs to be more like connection uids. --- src/CMakeLists.txt | 1 + src/FileAnalyzer.cc | 10 ++--- src/FileAnalyzer.h | 2 +- src/file_analysis.bif | 12 ++++-- src/file_analysis/FileID.h | 32 ++++++++++++++++ src/file_analysis/Info.cc | 31 ++++++++------- src/file_analysis/Info.h | 12 +++++- src/file_analysis/InfoTimer.h | 7 ++-- src/file_analysis/Manager.cc | 71 ++++++++++++++++++++--------------- src/file_analysis/Manager.h | 35 +++++++++-------- 10 files changed, 136 insertions(+), 77 deletions(-) create mode 100644 src/file_analysis/FileID.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 16de055e11..489bfe39b4 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -451,6 +451,7 @@ set(bro_SRCS file_analysis/Manager.cc file_analysis/Info.cc file_analysis/InfoTimer.cc + file_analysis/FileID.h file_analysis/Action.h file_analysis/Extract.cc diff --git a/src/FileAnalyzer.cc b/src/FileAnalyzer.cc index 8e994adbf1..27592ea34c 100644 --- a/src/FileAnalyzer.cc +++ b/src/FileAnalyzer.cc @@ -21,15 +21,15 @@ File_Analyzer::File_Analyzer(Connection* conn) char op[256], rp[256]; modp_ulitoa10(ntohs(conn->OrigPort()), op); modp_ulitoa10(ntohs(conn->RespPort()), rp); - file_id = "TCPFile " + conn->OrigAddr().AsString() + ":" + op + "->" + - conn->RespAddr().AsString() + ":" + rp; + unique_file = "TCPFile " + conn->OrigAddr().AsString() + ":" + op + "->" + + conn->RespAddr().AsString() + ":" + rp; } void File_Analyzer::DeliverStream(int len, const u_char* data, bool orig) { TCP_ApplicationAnalyzer::DeliverStream(len, data, orig); - file_mgr->DataIn(file_id, data, len, Conn()); + file_mgr->DataIn(unique_file, data, len, Conn()); int n = min(len, BUFFER_SIZE - buffer_len); @@ -48,14 +48,14 @@ void File_Analyzer::Undelivered(int seq, int len, bool orig) { TCP_ApplicationAnalyzer::Undelivered(seq, len, orig); - file_mgr->Gap(file_id, seq, len); + file_mgr->Gap(unique_file, seq, len); } void File_Analyzer::Done() { TCP_ApplicationAnalyzer::Done(); - file_mgr->EndOfFile(file_id, Conn()); + file_mgr->EndOfFile(unique_file, Conn()); if ( buffer_len && buffer_len != BUFFER_SIZE ) Identify(); diff --git a/src/FileAnalyzer.h b/src/FileAnalyzer.h index e0a402daf2..9b737856fc 100644 --- a/src/FileAnalyzer.h +++ b/src/FileAnalyzer.h @@ -37,7 +37,7 @@ protected: static magic_t magic; static magic_t magic_mime; - string file_id; + string unique_file; }; #endif diff --git a/src/file_analysis.bif b/src/file_analysis.bif index 546ac5103c..bb8584cf84 100644 --- a/src/file_analysis.bif +++ b/src/file_analysis.bif @@ -61,7 +61,8 @@ enum Action %{ function FileAnalysis::postpone_timeout%(file_id: string%): bool %{ - bool result = file_mgr->PostponeTimeout(file_id->CheckString()); + using namespace file_analysis; + bool result = file_mgr->PostponeTimeout(FileID(file_id->CheckString())); return new Val(result, TYPE_BOOL); %} @@ -69,9 +70,10 @@ function FileAnalysis::add_action%(file_id: string, action: FileAnalysis::Action, args: any%): bool %{ + using namespace file_analysis; RecordVal* rv = args->AsRecordVal()->CoerceTo( BifType::Record::FileAnalysis::ActionArgs); - bool result = file_mgr->AddAction(file_id->CheckString(), + bool result = file_mgr->AddAction(FileID(file_id->CheckString()), action->AsEnumVal(), rv); Unref(rv); return new Val(result, TYPE_BOOL); @@ -80,13 +82,15 @@ function FileAnalysis::add_action%(file_id: string, function FileAnalysis::remove_action%(file_id: string, action: FileAnalysis::Action%): bool %{ - bool result = file_mgr->RemoveAction(file_id->CheckString(), + using namespace file_analysis; + bool result = file_mgr->RemoveAction(FileID(file_id->CheckString()), action->AsEnumVal()); return new Val(result, TYPE_BOOL); %} function FileAnalysis::stop%(file_id: string%): bool %{ - bool result = file_mgr->RemoveFile(file_id->CheckString()); + using namespace file_analysis; + bool result = file_mgr->RemoveFile(FileID(file_id->CheckString())); return new Val(result, TYPE_BOOL); %} diff --git a/src/file_analysis/FileID.h b/src/file_analysis/FileID.h new file mode 100644 index 0000000000..c339445ea8 --- /dev/null +++ b/src/file_analysis/FileID.h @@ -0,0 +1,32 @@ +#ifndef FILE_ANALYSIS_FILEID_H +#define FILE_ANALYSIS_FILEID_H + +namespace file_analysis { + +/** + * A simple string wrapper class to help enforce some type safety between + * methods of FileAnalysis::Manager, some of which use a unique string to + * identify files, and others which use a pretty hash (the FileID) to identify + * files. A FileID is primarily used in methods which interface with the + * script-layer, while the unique strings are used for methods which interface + * with protocol analyzers (to better accomodate the possibility that a file + * can be distributed over different connections and thus analyzer instances). + */ +struct FileID { + string id; + + explicit FileID(const string arg_id) : id(arg_id) {} + FileID(const FileID& other) : id(other.id) {} + + const char* c_str() const { return id.c_str(); } + + bool operator==(const FileID& rhs) const { return id == rhs.id; } + bool operator<(const FileID& rhs) const { return id < rhs.id; } + + FileID& operator=(const FileID& rhs) { id = rhs.id; return *this; } + FileID& operator=(const string& rhs) { id = rhs; return *this; } +}; + +} // namespace file_analysis + +#endif diff --git a/src/file_analysis/Info.cc b/src/file_analysis/Info.cc index 60729cd590..0578612825 100644 --- a/src/file_analysis/Info.cc +++ b/src/file_analysis/Info.cc @@ -2,6 +2,7 @@ #include "Info.h" #include "InfoTimer.h" +#include "FileID.h" #include "Reporter.h" #include "Val.h" @@ -70,17 +71,20 @@ void Info::InitFieldIndices() action_args_idx = Idx("action_args"); } -Info::Info(const string& file_id, Connection* conn, const string& protocol) - : val(0), last_activity_time(network_time), postpone_timeout(false), - need_reassembly(false) +Info::Info(const string& unique, Connection* conn, const string& protocol) + : file_id(unique), unique(unique), val(0), last_activity_time(network_time), + postpone_timeout(false), need_reassembly(false) { - DBG_LOG(DBG_FILE_ANALYSIS, "Creating new Info object %s", file_id.c_str()); - InitFieldIndices(); + char id[20]; + uitoa_n(calculate_unique_id(), id, sizeof(id), 62); + + DBG_LOG(DBG_FILE_ANALYSIS, "Creating new Info object %s", id); + val = new RecordVal(BifType::Record::FileAnalysis::Info); - // TODO: hash/prettify file_id for script layer presentation - val->Assign(file_id_idx, new StringVal(file_id.c_str())); + val->Assign(file_id_idx, new StringVal(id)); + file_id = FileID(id); UpdateConnectionFields(conn); @@ -96,7 +100,7 @@ Info::~Info() for ( it = actions.begin(); it != actions.end(); ++it ) delete it->second; - DBG_LOG(DBG_FILE_ANALYSIS, "Destroying Info object %s", FileID().c_str()); + DBG_LOG(DBG_FILE_ANALYSIS, "Destroying Info object %s",file_id.c_str()); Unref(val); } @@ -145,11 +149,6 @@ double Info::TimeoutInterval() const return LookupFieldDefaultInterval(timeout_interval_idx); } -string Info::FileID() const - { - return val->Lookup(file_id_idx)->AsString()->CheckString(); - } - void Info::IncrementByteCount(uint64 size, int field_idx) { uint64 old = LookupFieldDefaultCount(field_idx); @@ -172,7 +171,7 @@ bool Info::IsComplete() const void Info::ScheduleInactivityTimer() const { - timer_mgr->Add(new InfoTimer(network_time, FileID(), TimeoutInterval())); + timer_mgr->Add(new InfoTimer(network_time, file_id, TimeoutInterval())); } bool Info::AddAction(EnumVal* act, RecordVal* args) @@ -184,7 +183,7 @@ bool Info::AddAction(EnumVal* act, RecordVal* args) if ( ! a ) return false; DBG_LOG(DBG_FILE_ANALYSIS, "Add action %d for file id %s", act->AsEnum(), - FileID().c_str()); + file_id.c_str()); actions[act->AsEnum()] = a; VectorVal* av = val->LookupWithDefault(actions_idx)->AsVectorVal(); @@ -206,7 +205,7 @@ bool Info::RemoveAction(EnumVal* act) if ( it == actions.end() ) return false; DBG_LOG(DBG_FILE_ANALYSIS, "Remove action %d for file id %s", act->AsEnum(), - FileID().c_str()); + file_id.c_str()); delete it->second; actions.erase(it); return true; diff --git a/src/file_analysis/Info.h b/src/file_analysis/Info.h index 2823fa2d2c..aca33fa622 100644 --- a/src/file_analysis/Info.h +++ b/src/file_analysis/Info.h @@ -7,6 +7,7 @@ #include "Conn.h" #include "Val.h" #include "Action.h" +#include "FileID.h" namespace file_analysis { @@ -26,7 +27,12 @@ public: /** * @return value of the "file_id" field from #val record. */ - string FileID() const; + FileID GetFileID() const { return file_id; } + + /** + * @return the string which uniquely identifies the file. + */ + string Unique() const { return unique; } /** * @return #last_activity_time @@ -96,7 +102,7 @@ protected: /** * Constructor; only file_analysis::Manager should be creating these. */ - Info(const string& file_id, Connection* conn = 0, + Info(const string& unique, Connection* conn = 0, const string& protocol = ""); /** @@ -122,6 +128,8 @@ protected: */ double LookupFieldDefaultInterval(int idx) const; + FileID file_id; /**< A pretty hash that likely identifies file*/ + string unique; /**< A string that uniquely identifies file */ RecordVal* val; /**< \c FileAnalysis::Info from script layer. */ double last_activity_time; /**< Time of last activity. */ bool postpone_timeout; /**< Whether postponing timeout is requested. */ diff --git a/src/file_analysis/InfoTimer.h b/src/file_analysis/InfoTimer.h index d5432e0ebc..ac0d8b6b00 100644 --- a/src/file_analysis/InfoTimer.h +++ b/src/file_analysis/InfoTimer.h @@ -1,8 +1,9 @@ #ifndef FILE_ANALYSIS_INFOTIMER_H #define FILE_ANALYSIS_INFOTIMER_H -#include "Timer.h" #include +#include "Timer.h" +#include "FileID.h" namespace file_analysis { @@ -12,7 +13,7 @@ namespace file_analysis { class InfoTimer : public Timer { public: - InfoTimer(double t, const string& id, double interval) + InfoTimer(double t, const FileID& id, double interval) : Timer(t + interval, TIMER_FILE_ANALYSIS_INACTIVITY), file_id(id) {} ~InfoTimer() {} @@ -25,7 +26,7 @@ public: protected: - string file_id; + FileID file_id; }; } // namespace file_analysis diff --git a/src/file_analysis/Manager.cc b/src/file_analysis/Manager.cc index 33011a6ec6..37396de0de 100644 --- a/src/file_analysis/Manager.cc +++ b/src/file_analysis/Manager.cc @@ -17,8 +17,8 @@ Manager::~Manager() void Manager::Terminate() { - vector keys; - for ( FileMap::iterator it = file_map.begin(); it != file_map.end(); ++it ) + vector keys; + for ( IDMap::iterator it = id_map.begin(); it != id_map.end(); ++it ) keys.push_back(it->first); for ( size_t i = 0; i < keys.size(); ++i ) Timeout(keys[i], true); @@ -29,46 +29,46 @@ static void check_file_done(Info* info) if ( info->IsComplete() ) { Manager::EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_DONE, info); - file_mgr->RemoveFile(info->FileID()); + file_mgr->RemoveFile(info->GetFileID()); } } -void Manager::DataIn(const string& file_id, const u_char* data, uint64 len, +void Manager::DataIn(const string& unique, const u_char* data, uint64 len, uint64 offset, Connection* conn, const string& protocol) { - Info* info = IDtoInfo(file_id, conn, protocol); + Info* info = GetInfo(unique, conn, protocol); info->DataIn(data, len, offset); check_file_done(info); } -void Manager::DataIn(const string& file_id, const u_char* data, uint64 len, +void Manager::DataIn(const string& unique, const u_char* data, uint64 len, Connection* conn, const string& protocol) { - Info* info = IDtoInfo(file_id, conn, protocol); + Info* info = GetInfo(unique, conn, protocol); info->DataIn(data, len); check_file_done(info); } -void Manager::EndOfFile(const string& file_id, Connection* conn, +void Manager::EndOfFile(const string& unique, Connection* conn, const string& protocol) { - Info* info = IDtoInfo(file_id, conn, protocol); + Info* info = GetInfo(unique, conn, protocol); info->EndOfFile(); Manager::EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_EOF, info); } -void Manager::Gap(const string& file_id, uint64 offset, uint64 len, +void Manager::Gap(const string& unique, uint64 offset, uint64 len, Connection* conn, const string& protocol) { - Info* info = IDtoInfo(file_id, conn, protocol); + Info* info = GetInfo(unique, conn, protocol); info->Gap(offset, len); Manager::EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_GAP, info); } -void Manager::SetSize(const string& file_id, uint64 size, +void Manager::SetSize(const string& unique, uint64 size, Connection* conn, const string& protocol) { - Info* info = IDtoInfo(file_id, conn, protocol); + Info* info = GetInfo(unique, conn, protocol); info->SetTotalBytes(size); check_file_done(info); } @@ -89,7 +89,7 @@ void Manager::EvaluatePolicy(BifEnum::FileAnalysis::Trigger t, Info* info) Unref(result); } -bool Manager::PostponeTimeout(const string& file_id) const +bool Manager::PostponeTimeout(const FileID& file_id) const { Info* info = Lookup(file_id); @@ -99,7 +99,7 @@ bool Manager::PostponeTimeout(const string& file_id) const return true; } -bool Manager::AddAction(const string& file_id, EnumVal* act, +bool Manager::AddAction(const FileID& file_id, EnumVal* act, RecordVal* args) const { Info* info = Lookup(file_id); @@ -109,7 +109,7 @@ bool Manager::AddAction(const string& file_id, EnumVal* act, return info->AddAction(act, args); } -bool Manager::RemoveAction(const string& file_id, EnumVal* act) const +bool Manager::RemoveAction(const FileID& file_id, EnumVal* act) const { Info* info = Lookup(file_id); @@ -118,14 +118,23 @@ bool Manager::RemoveAction(const string& file_id, EnumVal* act) const return info->RemoveAction(act); } -Info* Manager::IDtoInfo(const string& file_id, Connection* conn, - const string& protocol) +Info* Manager::GetInfo(const string& unique, Connection* conn, + const string& protocol) { - Info* rval = file_map[file_id]; + Info* rval = str_map[unique]; if ( ! rval ) { - rval = file_map[file_id] = new Info(file_id, conn, protocol); + rval = str_map[unique] = new Info(unique, conn, protocol); + FileID id = rval->GetFileID(); + + if ( id_map[id] ) + { + reporter->Error("Evicted duplicate file ID: %s", id.c_str()); + RemoveFile(id); + } + + id_map[id] = rval; Manager::EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_NEW, rval); } else @@ -137,16 +146,16 @@ Info* Manager::IDtoInfo(const string& file_id, Connection* conn, return rval; } -Info* Manager::Lookup(const string& file_id) const +Info* Manager::Lookup(const FileID& file_id) const { - FileMap::const_iterator it = file_map.find(file_id); + IDMap::const_iterator it = id_map.find(file_id); - if ( it == file_map.end() ) return 0; + if ( it == id_map.end() ) return 0; return it->second; } -void Manager::Timeout(const string& file_id, bool is_terminating) +void Manager::Timeout(const FileID& file_id, bool is_terminating) { Info* info = Lookup(file_id); @@ -157,25 +166,27 @@ void Manager::Timeout(const string& file_id, bool is_terminating) if ( info->postpone_timeout && ! is_terminating ) { DBG_LOG(DBG_FILE_ANALYSIS, "Postpone file analysis timeout for %s", - info->FileID().c_str()); + info->GetFileID().c_str()); info->UpdateLastActivityTime(); info->ScheduleInactivityTimer(); return; } DBG_LOG(DBG_FILE_ANALYSIS, "File analysis timeout for %s", - info->FileID().c_str()); + info->GetFileID().c_str()); RemoveFile(file_id); } -bool Manager::RemoveFile(const string& file_id) +bool Manager::RemoveFile(const FileID& file_id) { - FileMap::iterator it = file_map.find(file_id); + IDMap::iterator it = id_map.find(file_id); - if ( it == file_map.end() ) return false; + if ( it == id_map.end() ) return false; + if ( ! str_map.erase(it->second->Unique()) ) + reporter->Error("No string mapping for file ID %s", file_id.c_str()); delete it->second; - file_map.erase(it); + id_map.erase(it); return true; } diff --git a/src/file_analysis/Manager.h b/src/file_analysis/Manager.h index 3601c8b43f..7dfaf5a665 100644 --- a/src/file_analysis/Manager.h +++ b/src/file_analysis/Manager.h @@ -10,6 +10,7 @@ #include "Info.h" #include "InfoTimer.h" +#include "FileID.h" namespace file_analysis { @@ -31,58 +32,58 @@ public: /** * Pass in non-sequential file data. */ - void DataIn(const string& file_id, const u_char* data, uint64 len, + void DataIn(const string& unique, const u_char* data, uint64 len, uint64 offset, Connection* conn = 0, const string& protocol = ""); /** * Pass in sequential file data. */ - void DataIn(const string& file_id, const u_char* data, uint64 len, + void DataIn(const string& unique, const u_char* data, uint64 len, Connection* conn = 0, const string& protocol = ""); /** * Signal the end of file data. */ - void EndOfFile(const string& file_id, Connection* conn = 0, + void EndOfFile(const string& unique, Connection* conn = 0, const string& protocol = ""); /** * Signal a gap in the file data stream. */ - void Gap(const string& file_id, uint64 offset, uint64 len, + void Gap(const string& unique, uint64 offset, uint64 len, Connection* conn = 0, const string& protocol = ""); /** * Provide the expected number of bytes that comprise a file. */ - void SetSize(const string& file_id, uint64 size, Connection* conn = 0, + void SetSize(const string& unique, uint64 size, Connection* conn = 0, const string& protocol = ""); /** * Discard the file_analysis::Info object associated with \a file_id. * @return false if file identifier did not map to anything, else true. */ - bool RemoveFile(const string& file_id); + bool RemoveFile(const FileID& file_id); /** * If called during \c FileAnalysis::policy evaluation for a * \c FileAnalysis::TRIGGER_TIMEOUT, requests deferral of analysis timeout. */ - bool PostponeTimeout(const string& file_id) const; + bool PostponeTimeout(const FileID& file_id) const; /** * Attaches an action to the file identifier. Only one action of a given * type can be attached per file identifier at a time. * @return true if the action was attached, else false. */ - bool AddAction(const string& file_id, EnumVal* act, RecordVal* args) const; + bool AddAction(const FileID& file_id, EnumVal* act, RecordVal* args) const; /** * Removes an action for a given file identifier. * @return true if the action was removed, else false. */ - bool RemoveAction(const string& file_id, EnumVal* act) const; + bool RemoveAction(const FileID& file_id, EnumVal* act) const; /** * Calls the \c FileAnalysis::policy hook. @@ -93,29 +94,31 @@ protected: friend class InfoTimer; - typedef map FileMap; + typedef map StrMap; + typedef map IDMap; /** - * @return the Info object mapped to \a file_id. One is created if mapping + * @return the Info object mapped to \a unique. One is created if mapping * doesn't exist. If it did exist, the activity time is refreshed * and connection-related fields of the record value may be updated. */ - Info* IDtoInfo(const string& file_id, Connection* conn = 0, - const string& protocol = ""); + Info* GetInfo(const string& unique, Connection* conn = 0, + const string& protocol = ""); /** * @return the Info object mapped to \a file_id, or a null pointer if no * mapping exists. */ - Info* Lookup(const string& file_id) const; + Info* Lookup(const FileID& file_id) const; /** * Evaluate timeout policy for a file and remove the Info object mapped to * \a file_id if needed. */ - void Timeout(const string& file_id, bool is_terminating = ::terminating); + void Timeout(const FileID& file_id, bool is_terminating = ::terminating); - FileMap file_map; /**< Map strings to \c FileAnalysis::Info records. */ + StrMap str_map; /**< Map unique strings to \c FileAnalysis::Info records. */ + IDMap id_map; /**< Map file IDs to \c FileAnalysis::Info records. */ }; } // namespace file_analysis From 85410a76578a665dcd0dc856576cc8d595d8ab13 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Thu, 21 Feb 2013 21:05:01 -0600 Subject: [PATCH 2/2] Add MD5/SHA1/SHA256 file analysis hashing actions. --- .../base/frameworks/file-analysis/main.bro | 9 ++ src/CMakeLists.txt | 1 + src/file_analysis.bif | 4 + src/file_analysis/Action.h | 28 ++++-- src/file_analysis/Extract.cc | 8 +- src/file_analysis/Extract.h | 7 +- src/file_analysis/Hash.cc | 54 ++++++++++++ src/file_analysis/Hash.h | 88 +++++++++++++++++++ src/file_analysis/Info.cc | 86 +++++++++++++----- src/file_analysis/Info.h | 23 +++-- src/file_analysis/Manager.cc | 5 +- 11 files changed, 271 insertions(+), 42 deletions(-) create mode 100644 src/file_analysis/Hash.cc create mode 100644 src/file_analysis/Hash.h diff --git a/scripts/base/frameworks/file-analysis/main.bro b/scripts/base/frameworks/file-analysis/main.bro index 0aaf67c07b..43c0c7a3ac 100644 --- a/scripts/base/frameworks/file-analysis/main.bro +++ b/scripts/base/frameworks/file-analysis/main.bro @@ -37,6 +37,12 @@ export { extract_filename: string &optional; }; + type ActionResults: record { + md5: string &optional; + sha1: string &optional; + sha256: string &optional; + }; + ## Contains all metadata related to the analysis of a given file, some ## of which is logged. type Info: record { @@ -81,6 +87,9 @@ export { ## The corresponding arguments supplied to each element of *actions*. action_args: vector of ActionArgs &default=vector(); + + ## Some actions may directly yield results in this record. + action_results: ActionResults; } &redef; ## TODO: document diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 489bfe39b4..b1efabf60e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -454,6 +454,7 @@ set(bro_SRCS file_analysis/FileID.h file_analysis/Action.h file_analysis/Extract.cc + file_analysis/Hash.cc nb_dns.c digest.h diff --git a/src/file_analysis.bif b/src/file_analysis.bif index bb8584cf84..3ee8865b8f 100644 --- a/src/file_analysis.bif +++ b/src/file_analysis.bif @@ -8,6 +8,7 @@ module FileAnalysis; type Info: record; type ActionArgs: record; +type ActionResults: record; ## An enumeration of possibly-interesting "events" that can occur over ## the course of analyzing files. The :bro:see:`FileAnalysis::policy` @@ -57,6 +58,9 @@ enum Trigger %{ enum Action %{ ACTION_EXTRACT, + ACTION_MD5, + ACTION_SHA1, + ACTION_SHA256, %} function FileAnalysis::postpone_timeout%(file_id: string%): bool diff --git a/src/file_analysis/Action.h b/src/file_analysis/Action.h index ac8713f81e..78611b4ef1 100644 --- a/src/file_analysis/Action.h +++ b/src/file_analysis/Action.h @@ -2,9 +2,12 @@ #define FILE_ANALYSIS_ACTION_H #include "Val.h" +#include "NetVar.h" namespace file_analysis { +typedef BifEnum::FileAnalysis::Action ActionTag; + class Info; /** @@ -17,29 +20,44 @@ public: /** * Subclasses may override this to receive file data non-sequentially. + * @return true if the action is still in a valid state to continue + * receiving data/events or false if it's essentially "done". */ - virtual void DeliverChunk(const u_char* data, uint64 len, uint64 offset) {} + virtual bool DeliverChunk(const u_char* data, uint64 len, uint64 offset) + { return true; } /** * Subclasses may override this to receive file sequentially. + * @return true if the action is still in a valid state to continue + * receiving data/events or false if it's essentially "done". */ - virtual void DeliverStream(const u_char* data, uint64 len) {} + virtual bool DeliverStream(const u_char* data, uint64 len) + { return true; } /** * Subclasses may override this to specifically handle the end of a file. + * @return true if the action is still in a valid state to continue + * receiving data/events or false if it's essentially "done". */ - virtual void EndOfFile() {} + virtual bool EndOfFile() + { return true; } /** * Subclasses may override this to handle missing data in a file stream. + * @return true if the action is still in a valid state to continue + * receiving data/events or false if it's essentially "done". */ - virtual void Undelivered(uint64 offset, uint64 len) {} + virtual bool Undelivered(uint64 offset, uint64 len) + { return true; } + + ActionTag Tag() const { return tag; } protected: - Action(Info* arg_info) {} + Action(Info* arg_info, ActionTag arg_tag) : info(arg_info), tag(arg_tag) {} Info* info; + ActionTag tag; }; typedef Action* (*ActionInstantiator)(const RecordVal* args, Info* info); diff --git a/src/file_analysis/Extract.cc b/src/file_analysis/Extract.cc index c580aaa0dd..3a4897e5cb 100644 --- a/src/file_analysis/Extract.cc +++ b/src/file_analysis/Extract.cc @@ -6,7 +6,8 @@ using namespace file_analysis; Extract::Extract(Info* arg_info, const string& arg_filename) - : Action(arg_info), filename(arg_filename) + : Action(arg_info, BifEnum::FileAnalysis::ACTION_EXTRACT), + filename(arg_filename) { fd = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0666); @@ -36,11 +37,12 @@ Action* Extract::Instantiate(const RecordVal* args, Info* info) return new Extract(info, v->AsString()->CheckString()); } -void Extract::DeliverChunk(const u_char* data, uint64 len, uint64 offset) +bool Extract::DeliverChunk(const u_char* data, uint64 len, uint64 offset) { Action::DeliverChunk(data, len, offset); - if ( ! fd ) return; + if ( ! fd ) return false; safe_pwrite(fd, data, len, offset); + return true; } diff --git a/src/file_analysis/Extract.h b/src/file_analysis/Extract.h index 17c3a959ad..df03d40967 100644 --- a/src/file_analysis/Extract.h +++ b/src/file_analysis/Extract.h @@ -5,20 +5,21 @@ #include "Val.h" #include "Info.h" +#include "Action.h" namespace file_analysis { /** * An action to simply extract files to disk. */ -class Extract : Action { +class Extract : public Action { public: static Action* Instantiate(const RecordVal* args, Info* info); - ~Extract(); + virtual ~Extract(); - virtual void DeliverChunk(const u_char* data, uint64 len, uint64 offset); + virtual bool DeliverChunk(const u_char* data, uint64 len, uint64 offset); protected: diff --git a/src/file_analysis/Hash.cc b/src/file_analysis/Hash.cc new file mode 100644 index 0000000000..1bb8e48e5d --- /dev/null +++ b/src/file_analysis/Hash.cc @@ -0,0 +1,54 @@ +#include + +#include "Hash.h" +#include "util.h" + +using namespace file_analysis; + +Hash::Hash(Info* arg_info, ActionTag tag, HashVal* hv) + : Action(arg_info, tag), hash(hv) + { + hash->Init(); + } + +Hash::~Hash() + { + // maybe it's all there... + Finalize(); + delete hash; + } + +bool Hash::DeliverStream(const u_char* data, uint64 len) + { + Action::DeliverStream(data, len); + + if ( ! hash->IsValid() ) return false; + + hash->Feed(data, len); + return true; + } + +bool Hash::EndOfFile() + { + Action::EndOfFile(); + Finalize(); + return false; + } + +bool Hash::Undelivered(uint64 offset, uint64 len) + { + return false; + } + +void Hash::Finalize() + { + if ( ! hash->IsValid() ) return; + + StringVal* sv = hash->Get(); + int i = GetResultFieldOffset(); + + if ( i < 0 ) + reporter->InternalError("Hash Action result field not found"); + + info->Results()->Assign(i, sv); + } diff --git a/src/file_analysis/Hash.h b/src/file_analysis/Hash.h new file mode 100644 index 0000000000..8b101ac7b6 --- /dev/null +++ b/src/file_analysis/Hash.h @@ -0,0 +1,88 @@ +#ifndef FILE_ANALYSIS_HASH_H +#define FILE_ANALYSIS_HASH_H + +#include + +#include "Val.h" +#include "OpaqueVal.h" +#include "Info.h" +#include "Action.h" + +namespace file_analysis { + +/** + * An action to produce a hash of file contents. + */ +class Hash : public Action { +public: + + virtual ~Hash(); + + virtual bool DeliverStream(const u_char* data, uint64 len); + + virtual bool EndOfFile(); + + virtual bool Undelivered(uint64 offset, uint64 len); + +protected: + + Hash(Info* arg_info, ActionTag arg_tag, HashVal* hv); + + void Finalize(); + + virtual int GetResultFieldOffset() const = 0; + + HashVal* hash; +}; + +class MD5 : public Hash { +public: + + static Action* Instantiate(const RecordVal* args, Info* info) + { return new MD5(info); } + +protected: + + MD5(Info* arg_info) + : Hash(arg_info, BifEnum::FileAnalysis::ACTION_MD5, new MD5Val()) {} + + virtual int GetResultFieldOffset() const + { return BifType::Record::FileAnalysis::ActionResults-> + FieldOffset("md5"); } +}; + +class SHA1 : public Hash { +public: + + static Action* Instantiate(const RecordVal* args, Info* info) + { return new SHA1(info); } + +protected: + + SHA1(Info* arg_info) + : Hash(arg_info, BifEnum::FileAnalysis::ACTION_SHA1, new SHA1Val()) {} + + virtual int GetResultFieldOffset() const + { return BifType::Record::FileAnalysis::ActionResults-> + FieldOffset("sha1"); } +}; + +class SHA256 : public Hash { +public: + + static Action* Instantiate(const RecordVal* args, Info* info) + { return new SHA256(info); } + +protected: + + SHA256(Info* arg_info) + : Hash(arg_info, BifEnum::FileAnalysis::ACTION_SHA256, new SHA256Val()) {} + + virtual int GetResultFieldOffset() const + { return BifType::Record::FileAnalysis::ActionResults-> + FieldOffset("sha256"); } +}; + +} // namespace file_analysis + +#endif diff --git a/src/file_analysis/Info.cc b/src/file_analysis/Info.cc index 0578612825..e2e0961c28 100644 --- a/src/file_analysis/Info.cc +++ b/src/file_analysis/Info.cc @@ -8,12 +8,16 @@ #include "Action.h" #include "Extract.h" +#include "Hash.h" using namespace file_analysis; // keep in order w/ declared enum values in file_analysis.bif static ActionInstantiator action_factory[] = { Extract::Instantiate, + MD5::Instantiate, + SHA1::Instantiate, + SHA256::Instantiate, }; static TableVal* empty_conn_id_set() @@ -53,6 +57,7 @@ int Info::overflow_bytes_idx = -1; int Info::timeout_interval_idx = -1; int Info::actions_idx = -1; int Info::action_args_idx = -1; +int Info::action_results_idx = -1; void Info::InitFieldIndices() { @@ -69,6 +74,7 @@ void Info::InitFieldIndices() timeout_interval_idx = Idx("timeout_interval"); actions_idx = Idx("actions"); action_args_idx = Idx("action_args"); + action_results_idx = Idx("action_results"); } Info::Info(const string& unique, Connection* conn, const string& protocol) @@ -149,6 +155,11 @@ double Info::TimeoutInterval() const return LookupFieldDefaultInterval(timeout_interval_idx); } +RecordVal* Info::Results() const + { + return val->Lookup(action_results_idx)->AsRecordVal(); + } + void Info::IncrementByteCount(uint64 size, int field_idx) { uint64 old = LookupFieldDefaultCount(field_idx); @@ -174,22 +185,25 @@ void Info::ScheduleInactivityTimer() const timer_mgr->Add(new InfoTimer(network_time, file_id, TimeoutInterval())); } -bool Info::AddAction(EnumVal* act, RecordVal* args) +bool Info::AddAction(ActionTag act, RecordVal* args) { - if ( actions.find(act->AsEnum()) != actions.end() ) return false; + if ( actions.find(act) != actions.end() ) return false; - Action* a = action_factory[act->AsEnum()](args, this); + ActionTag tag = static_cast(act); + + Action* a = action_factory[act](args, this); if ( ! a ) return false; - DBG_LOG(DBG_FILE_ANALYSIS, "Add action %d for file id %s", act->AsEnum(), + DBG_LOG(DBG_FILE_ANALYSIS, "Add action %d for file id %s", act, file_id.c_str()); - actions[act->AsEnum()] = a; + actions[act] = a; VectorVal* av = val->LookupWithDefault(actions_idx)->AsVectorVal(); VectorVal* aav = val->LookupWithDefault(action_args_idx)->AsVectorVal(); - av->Assign(av->Size(), act->Ref(), 0); + EnumVal* ev = new EnumVal(act, BifType::Enum::FileAnalysis::Action); + av->Assign(av->Size(), ev, 0); aav->Assign(aav->Size(), args->Ref(), 0); Unref(av); @@ -198,13 +212,18 @@ bool Info::AddAction(EnumVal* act, RecordVal* args) return true; } -bool Info::RemoveAction(EnumVal* act) +bool Info::RemoveAction(ActionTag act) { - ActionMap::iterator it = actions.find(act->AsEnum()); + ActionMap::iterator it = actions.find(act); if ( it == actions.end() ) return false; - DBG_LOG(DBG_FILE_ANALYSIS, "Remove action %d for file id %s", act->AsEnum(), + return RemoveAction(it); + } + +bool Info::RemoveAction(const ActionMap::iterator& it) + { + DBG_LOG(DBG_FILE_ANALYSIS, "Remove action %d for file id %s", it->first, file_id.c_str()); delete it->second; actions.erase(it); @@ -213,12 +232,18 @@ bool Info::RemoveAction(EnumVal* act) void Info::DataIn(const u_char* data, uint64 len, uint64 offset) { - ActionMap::const_iterator it; - for ( it = actions.begin(); it != actions.end(); ++it ) - it->second->DeliverChunk(data, len, offset); + ActionMap::iterator it = actions.begin(); + while ( it != actions.end() ) + if ( ! it->second->DeliverChunk(data, len, offset) ) + RemoveAction(it++); + else + ++it; // TODO: check reassembly requirement based on buffer size in record - if ( ! need_reassembly ) return; + if ( need_reassembly ) + { + // TODO + } // TODO: reassembly stuff, possibly having to deliver chunks if buffer full // and incrememt overflow bytes @@ -228,13 +253,22 @@ void Info::DataIn(const u_char* data, uint64 len, uint64 offset) void Info::DataIn(const u_char* data, uint64 len) { - ActionMap::const_iterator it; - for ( it = actions.begin(); it != actions.end(); ++it ) + ActionMap::iterator it = actions.begin(); + while ( it != actions.end() ) { - it->second->DeliverStream(data, len); + if ( ! it->second->DeliverStream(data, len) ) + { + RemoveAction(it++); + continue; + } + uint64 offset = LookupFieldDefaultCount(seen_bytes_idx) + LookupFieldDefaultCount(missing_bytes_idx); - it->second->DeliverChunk(data, len, offset); + + if ( ! it->second->DeliverChunk(data, len, offset) ) + RemoveAction(it++); + else + ++it; } IncrementByteCount(len, seen_bytes_idx); @@ -242,16 +276,22 @@ void Info::DataIn(const u_char* data, uint64 len) void Info::EndOfFile() { - ActionMap::const_iterator it; - for ( it = actions.begin(); it != actions.end(); ++it ) - it->second->EndOfFile(); + ActionMap::iterator it = actions.begin(); + while ( it != actions.end() ) + if ( ! it->second->EndOfFile() ) + RemoveAction(it++); + else + ++it; } void Info::Gap(uint64 offset, uint64 len) { - ActionMap::const_iterator it; - for ( it = actions.begin(); it != actions.end(); ++it ) - it->second->Undelivered(offset, len); + ActionMap::iterator it = actions.begin(); + while ( it != actions.end() ) + if ( ! it->second->Undelivered(offset, len) ) + RemoveAction(it++); + else + ++it; IncrementByteCount(len, missing_bytes_idx); } diff --git a/src/file_analysis/Info.h b/src/file_analysis/Info.h index aca33fa622..7a89e7d898 100644 --- a/src/file_analysis/Info.h +++ b/src/file_analysis/Info.h @@ -29,6 +29,11 @@ public: */ FileID GetFileID() const { return file_id; } + /** + * @return record val of the "action_results" field from #val record. + */ + RecordVal* Results() const; + /** * @return the string which uniquely identifies the file. */ @@ -67,13 +72,13 @@ public: * Attaches an action. Only one action per type can be attached at a time. * @return true if the action was attached, else false. */ - bool AddAction(EnumVal* act, RecordVal* args); + bool AddAction(ActionTag act, RecordVal* args); /** * Removes an action. * @return true if the action was removed, else false. */ - bool RemoveAction(EnumVal* act); + bool RemoveAction(ActionTag act); /** * Pass in non-sequential data and deliver to attached actions/analyzers. @@ -99,6 +104,8 @@ protected: friend class Manager; + typedef map ActionMap; + /** * Constructor; only file_analysis::Manager should be creating these. */ @@ -128,16 +135,19 @@ protected: */ double LookupFieldDefaultInterval(int idx) const; + /** + * Removes an action. + * @return true if the action was removed, else false. + */ + bool RemoveAction(const ActionMap::iterator& it); + FileID file_id; /**< A pretty hash that likely identifies file*/ string unique; /**< A string that uniquely identifies file */ RecordVal* val; /**< \c FileAnalysis::Info from script layer. */ double last_activity_time; /**< Time of last activity. */ bool postpone_timeout; /**< Whether postponing timeout is requested. */ bool need_reassembly; /**< Whether file stream reassembly is needed. */ - - typedef map ActionMap; - - ActionMap actions; + ActionMap actions; /**< Actions/analysis to perform on file. */ /** * @return the field offset in #val record corresponding to \a field_name. @@ -161,6 +171,7 @@ protected: static int timeout_interval_idx; static int actions_idx; static int action_args_idx; + static int action_results_idx; }; } // namespace file_analysis diff --git a/src/file_analysis/Manager.cc b/src/file_analysis/Manager.cc index 37396de0de..dbb8366ded 100644 --- a/src/file_analysis/Manager.cc +++ b/src/file_analysis/Manager.cc @@ -3,6 +3,7 @@ #include "Manager.h" #include "Info.h" +#include "Action.h" using namespace file_analysis; @@ -106,7 +107,7 @@ bool Manager::AddAction(const FileID& file_id, EnumVal* act, if ( ! info ) return false; - return info->AddAction(act, args); + return info->AddAction(static_cast(act->AsEnum()), args); } bool Manager::RemoveAction(const FileID& file_id, EnumVal* act) const @@ -115,7 +116,7 @@ bool Manager::RemoveAction(const FileID& file_id, EnumVal* act) const if ( ! info ) return false; - return info->RemoveAction(act); + return info->RemoveAction(static_cast(act->AsEnum())); } Info* Manager::GetInfo(const string& unique, Connection* conn,