diff --git a/scripts/base/frameworks/file-analysis/main.bro b/scripts/base/frameworks/file-analysis/main.bro index 7e0096f78b..3133ab43b9 100644 --- a/scripts/base/frameworks/file-analysis/main.bro +++ b/scripts/base/frameworks/file-analysis/main.bro @@ -18,8 +18,7 @@ export { const default_reassembly_buffer_size: count = 1024*1024 &redef; ## The default buffer size used for storing the beginning of files. - # TODO: what's a reasonable default? - const default_bof_buffer_size: count = 256 &redef; + const default_bof_buffer_size: count = 1024 &redef; ## The default amount of time file analysis will wait for new file data ## before giving up. @@ -27,12 +26,6 @@ export { #const default_timeout_interval: interval = 2 mins &redef; const default_timeout_interval: interval = 10 sec &redef; - ## The default amount of data that a user is allowed to extract - ## from a file to an event with the - ## :bro:see:`FileAnalysis::ACTION_DATA_EVENT` action. - ## TODO: what's a reasonable default? - const default_data_event_len: count = 1024*1024 &redef; - # Needed a forward declaration for event parameters... type Info: record {}; @@ -88,6 +81,20 @@ export { ## the analysis engine will wait before giving up on it. timeout_interval: interval &log &default=default_timeout_interval; + ## The number of bytes at the beginning of a file to save for later + ## inspection in *bof_buffer* field of + ## :bro:see:`FileAnalysis::ActionResults`. + bof_buffer_size: count &default=default_bof_buffer_size; + + ## The content of the beginning of a file up to *bof_buffer_size* bytes. + ## This is also the buffer that's used for file/mime type detection. + bof_buffer: string &optional; + + ## An initial guess at file type. + file_type: string &optional; + ## An initial guess at mime type. + mime_type: string &optional; + ## Actions that have been added to the analysis of this file. ## Not meant to be modified directly by scripts. actions: table[ActionArgs] of ActionResults; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3f8fd07be5..dbabaebff2 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -453,6 +453,7 @@ set(bro_SRCS file_analysis/InfoTimer.cc file_analysis/FileID.h file_analysis/Action.h + file_analysis/ActionSet.cc file_analysis/Extract.cc file_analysis/Hash.cc file_analysis/DataEvent.cc diff --git a/src/file_analysis.bif b/src/file_analysis.bif index 81a3490d47..cb3c017311 100644 --- a/src/file_analysis.bif +++ b/src/file_analysis.bif @@ -35,13 +35,13 @@ enum Trigger %{ ## Raised when the beginning of a file is available and that beginning ## is at least the number of bytes indicated by the *bof_buffer_size* ## field of :bro:see:`FileAnalysis::Info`. - TRIGGER_BOF_BUFFER_AVAIL, - ## Raised when the mime type of a file is matched based on magic - ## numbers. TODO: re-purposing protocols/http/file-ident.sig for - ## doing this is tricky since the signature engine doesn't expect + TRIGGER_BOF_BUFFER, + ## Raised when an initial guess at the file/mime type of a file is matched + ## based on magic numbers. TODO: re-purposing protocols/http/file-ident.sig + ## for doing this is tricky since the signature engine doesn't expect ## to be decoupled from connections, so figure out what work needs ## done there. - TRIGGER_MIME_TYPE, + TRIGGER_TYPE, ## Raised when the end of a file is detected. If the file is not ## being transferred linearly, then this doesn't have to mean the full ## file has been transferred. diff --git a/src/file_analysis/Action.h b/src/file_analysis/Action.h index 8461b4b4d3..6ddb67367b 100644 --- a/src/file_analysis/Action.h +++ b/src/file_analysis/Action.h @@ -39,7 +39,9 @@ public: { return true; } /** - * Subclasses may override this to specifically handle the end of a file. + * Subclasses may override this to specifically handle an EOF signal, + * which means no more data is going to be incoming and the action/analyzer + * may be deleted/cleaned up soon. * @return true if the action is still in a valid state to continue * receiving data/events or false if it's essentially "done". */ @@ -64,6 +66,11 @@ public: */ RecordVal* Args() const { return args; } + /** + * @return the file_analysis::Info object to which the action is attached. + */ + Info* GetInfo() const { return info; } + /** * @return the action tag equivalent of the 'act' field from the ActionArgs * value \a args. diff --git a/src/file_analysis/ActionSet.cc b/src/file_analysis/ActionSet.cc new file mode 100644 index 0000000000..c615484509 --- /dev/null +++ b/src/file_analysis/ActionSet.cc @@ -0,0 +1,186 @@ +#include "ActionSet.h" +#include "Info.h" +#include "Action.h" +#include "Extract.h" +#include "DataEvent.h" +#include "Hash.h" + +using namespace file_analysis; + +// keep in order w/ declared enum values in file_analysis.bif +static ActionInstantiator action_factory[] = { + Extract::Instantiate, + MD5::Instantiate, + SHA1::Instantiate, + SHA256::Instantiate, + DataEvent::Instantiate, +}; + +static void action_del_func(void* v) + { + delete (Action*) v; + } + +ActionSet::ActionSet(Info* arg_info) : info(arg_info) + { + TypeList* t = new TypeList(); + t->Append(BifType::Record::FileAnalysis::ActionArgs->Ref()); + action_hash = new CompositeHash(t); + Unref(t); + action_map.SetDeleteFunc(action_del_func); + } + +ActionSet::~ActionSet() + { + while ( ! mod_queue.empty() ) + { + Modification* mod = mod_queue.front(); + mod->Abort(); + delete mod; + mod_queue.pop(); + } + delete action_hash; + } + +bool ActionSet::AddAction(RecordVal* args) + { + HashKey* key = GetKey(args); + + if ( action_map.Lookup(key) ) + { + DBG_LOG(DBG_FILE_ANALYSIS, "Instantiate action %d skipped for file id" + " %s: already exists", Action::ArgsTag(args), + info->GetFileID().c_str()); + delete key; + return true; + } + + Action* act = InstantiateAction(args); + + if ( ! act ) + { + delete key; + return false; + } + + InsertAction(act, key); + + return true; + } + +bool ActionSet::QueueAddAction(RecordVal* args) + { + HashKey* key = GetKey(args); + Action* act = InstantiateAction(args); + + if ( ! act ) + { + delete key; + return false; + } + + mod_queue.push(new Add(act, key)); + + return true; + } + +bool ActionSet::Add::Perform(ActionSet* set) + { + if ( set->action_map.Lookup(key) ) + { + DBG_LOG(DBG_FILE_ANALYSIS, "Add action %d skipped for file id" + " %s: already exists", act->Tag(), + act->GetInfo()->GetFileID().c_str()); + Abort(); + return true; + } + + set->InsertAction(act, key); + return true; + } + +bool ActionSet::RemoveAction(const RecordVal* args) + { + return RemoveAction(Action::ArgsTag(args), GetKey(args)); + } + +bool ActionSet::RemoveAction(ActionTag tag, HashKey* key) + { + Action* act = (Action*) action_map.Remove(key); + delete key; + + if ( ! act ) + { + DBG_LOG(DBG_FILE_ANALYSIS, "Skip remove action %d for file id %s", + tag, info->GetFileID().c_str()); + return false; + } + + DBG_LOG(DBG_FILE_ANALYSIS, "Remove action %d for file id %s", act->Tag(), + info->GetFileID().c_str()); + delete act; + return true; + } + +bool ActionSet::QueueRemoveAction(const RecordVal* args) + { + HashKey* key = GetKey(args); + ActionTag tag = Action::ArgsTag(args); + + mod_queue.push(new Remove(tag, key)); + + return action_map.Lookup(key); + } + +bool ActionSet::Remove::Perform(ActionSet* set) + { + return set->RemoveAction(tag, key); + } + +HashKey* ActionSet::GetKey(const RecordVal* args) const + { + HashKey* key = action_hash->ComputeHash(args, 1); + if ( ! key ) + reporter->InternalError("ActionArgs type mismatch"); + return key; + } + +Action* ActionSet::InstantiateAction(RecordVal* args) const + { + Action* act = action_factory[Action::ArgsTag(args)](args, info); + + if ( ! act ) + { + DBG_LOG(DBG_FILE_ANALYSIS, "Instantiate action %d failed for file id", + " %s", Action::ArgsTag(args), info->GetFileID().c_str()); + return 0; + } + + return act; + } + +void ActionSet::InsertAction(Action* act, HashKey* key) + { + DBG_LOG(DBG_FILE_ANALYSIS, "Add action %d for file id %s", act->Tag(), + info->GetFileID().c_str()); + action_map.Insert(key, act); + info->GetVal()->Lookup(Info::actions_idx)->AsTableVal()->Assign(act->Args(), + new RecordVal(BifType::Record::FileAnalysis::ActionResults)); + } + +void ActionSet::FlushQueuedModifications() + { + if ( mod_queue.empty() ) return; + + DBG_LOG(DBG_FILE_ANALYSIS, "Start flushing action mod queue of file id %s", + info->GetFileID().c_str()); + do + { + Modification* mod = mod_queue.front(); + mod->Perform(this); + delete mod; + mod_queue.pop(); + } while ( ! mod_queue.empty() ); + DBG_LOG(DBG_FILE_ANALYSIS, "End flushing action mod queue of file id %s", + info->GetFileID().c_str()); + } diff --git a/src/file_analysis/ActionSet.h b/src/file_analysis/ActionSet.h new file mode 100644 index 0000000000..506fdf5c77 --- /dev/null +++ b/src/file_analysis/ActionSet.h @@ -0,0 +1,104 @@ +#ifndef FILE_ANALYSIS_ACTIONSET_H +#define FILE_ANALYSIS_ACTIONSET_H + +#include + +#include "Action.h" +#include "Dict.h" +#include "CompHash.h" +#include "Val.h" + +namespace file_analysis { + +class Info; +declare(PDict,Action); + +class ActionSet { +public: + + ActionSet(Info* arg_info); + + ~ActionSet(); + + /** + * @return true if action was instantiated/attached, else false. + */ + bool AddAction(RecordVal* args); + + /** + * @return true if action was able to be instantiated, else false. + */ + bool QueueAddAction(RecordVal* args); + + /** + * @return false if action didn't exist and so wasn't removed, else true. + */ + bool RemoveAction(const RecordVal* args); + + /** + * @return true if action exists at time of call, else false; + */ + bool QueueRemoveAction(const RecordVal* args); + + /** + * Perform all queued modifications to the currently active actions. + */ + void FlushQueuedModifications(); + + IterCookie* InitForIteration() const + { return action_map.InitForIteration(); } + + Action* NextEntry(IterCookie* c) + { return action_map.NextEntry(c); } + +protected: + + HashKey* GetKey(const RecordVal* args) const; + Action* InstantiateAction(RecordVal* args) const; + void InsertAction(Action* act, HashKey* key); + bool RemoveAction(ActionTag tag, HashKey* key); + + Info* info; + CompositeHash* action_hash; /**< ActionArgs hashes Action map lookup. */ + PDict(Action) action_map; /**< Actions indexed by ActionArgs. */ + + class Modification { + public: + virtual ~Modification() {} + virtual bool Perform(ActionSet* set) = 0; + virtual void Abort() = 0; + }; + + class Add : public Modification { + public: + Add(Action* arg_act, HashKey* arg_key) + : Modification(), act(arg_act), key(arg_key) {} + virtual ~Add() {} + virtual bool Perform(ActionSet* set); + virtual void Abort() { delete act; delete key; } + + protected: + Action* act; + HashKey* key; + }; + + class Remove : public Modification { + public: + Remove(ActionTag arg_tag, HashKey* arg_key) + : Modification(), tag(arg_tag), key(arg_key) {} + virtual ~Remove() {} + virtual bool Perform(ActionSet* set); + virtual void Abort() { delete key; } + + protected: + ActionTag tag; + HashKey* key; + }; + + typedef queue ModQueue; + ModQueue mod_queue; +}; + +} // namespace file_analysiss + +#endif diff --git a/src/file_analysis/DataEvent.cc b/src/file_analysis/DataEvent.cc index 9c1084c1cf..d901801f82 100644 --- a/src/file_analysis/DataEvent.cc +++ b/src/file_analysis/DataEvent.cc @@ -41,8 +41,6 @@ Action* DataEvent::Instantiate(RecordVal* args, Info* info) bool DataEvent::DeliverChunk(const u_char* data, uint64 len, uint64 offset) { - Action::DeliverChunk(data, len, offset); - if ( ! chunk_event ) return true; val_list* args = new val_list; @@ -56,8 +54,6 @@ bool DataEvent::DeliverChunk(const u_char* data, uint64 len, uint64 offset) bool DataEvent::DeliverStream(const u_char* data, uint64 len) { - Action::DeliverStream(data, len); - if ( ! stream_event ) return true; val_list* args = new val_list; diff --git a/src/file_analysis/Extract.cc b/src/file_analysis/Extract.cc index 76bd9bdee2..c05d37e7ce 100644 --- a/src/file_analysis/Extract.cc +++ b/src/file_analysis/Extract.cc @@ -38,8 +38,6 @@ Action* Extract::Instantiate(RecordVal* args, Info* info) bool Extract::DeliverChunk(const u_char* data, uint64 len, uint64 offset) { - Action::DeliverChunk(data, len, offset); - if ( ! fd ) return false; safe_pwrite(fd, data, len, offset); diff --git a/src/file_analysis/Hash.cc b/src/file_analysis/Hash.cc index 43cfcb74a7..28662559af 100644 --- a/src/file_analysis/Hash.cc +++ b/src/file_analysis/Hash.cc @@ -16,15 +16,11 @@ Hash::Hash(RecordVal* args, Info* info, HashVal* hv, const char* field) Hash::~Hash() { - // maybe it's all there... - Finalize(); Unref(hash); } bool Hash::DeliverStream(const u_char* data, uint64 len) { - Action::DeliverStream(data, len); - if ( ! hash->IsValid() ) return false; hash->Feed(data, len); @@ -33,7 +29,6 @@ bool Hash::DeliverStream(const u_char* data, uint64 len) bool Hash::EndOfFile() { - Action::EndOfFile(); Finalize(); return false; } diff --git a/src/file_analysis/Info.cc b/src/file_analysis/Info.cc index 6b135bab6c..c4b8e3f192 100644 --- a/src/file_analysis/Info.cc +++ b/src/file_analysis/Info.cc @@ -3,26 +3,13 @@ #include "Info.h" #include "InfoTimer.h" #include "FileID.h" +#include "Manager.h" #include "Reporter.h" #include "Val.h" #include "Type.h" -#include "Action.h" -#include "Extract.h" -#include "Hash.h" -#include "DataEvent.h" - using namespace file_analysis; -// keep in order w/ declared enum values in file_analysis.bif -static ActionInstantiator action_factory[] = { - Extract::Instantiate, - MD5::Instantiate, - SHA1::Instantiate, - SHA256::Instantiate, - DataEvent::Instantiate, -}; - static TableVal* empty_conn_id_set() { TypeList* set_index = new TypeList(conn_id); @@ -58,6 +45,10 @@ int Info::total_bytes_idx = -1; int Info::missing_bytes_idx = -1; int Info::overflow_bytes_idx = -1; int Info::timeout_interval_idx = -1; +int Info::bof_buffer_size_idx = -1; +int Info::bof_buffer_idx = -1; +int Info::file_type_idx = -1; +int Info::mime_type_idx = -1; int Info::actions_idx = -1; void Info::InitFieldIndices() @@ -73,17 +64,17 @@ void Info::InitFieldIndices() missing_bytes_idx = Idx("missing_bytes"); overflow_bytes_idx = Idx("overflow_bytes"); timeout_interval_idx = Idx("timeout_interval"); + bof_buffer_size_idx = Idx("bof_buffer_size"); + bof_buffer_idx = Idx("bof_buffer"); + file_type_idx = Idx("file_type"); + mime_type_idx = Idx("mime_type"); actions_idx = Idx("actions"); } -static void action_del_func(void* v) - { - delete (Action*) v; - } - Info::Info(const string& unique, Connection* conn, const string& protocol) : file_id(unique), unique(unique), val(0), last_activity_time(network_time), - postpone_timeout(false), need_reassembly(false) + postpone_timeout(false), need_reassembly(false), done(false), + actions(this) { InitFieldIndices(); @@ -96,24 +87,15 @@ Info::Info(const string& unique, Connection* conn, const string& protocol) val->Assign(file_id_idx, new StringVal(id)); file_id = FileID(id); - TypeList* t = new TypeList(); - t->Append(BifType::Record::FileAnalysis::ActionArgs->Ref()); - action_hash = new CompositeHash(t); - Unref(t); - action_map.SetDeleteFunc(action_del_func); - UpdateConnectionFields(conn); if ( protocol != "" ) val->Assign(protocol_idx, new StringVal(protocol.c_str())); - - ScheduleInactivityTimer(); } Info::~Info() { DBG_LOG(DBG_FILE_ANALYSIS, "Destroying Info object %s", file_id.c_str()); - delete action_hash; Unref(val); } @@ -203,89 +185,90 @@ void Info::ScheduleInactivityTimer() const bool Info::AddAction(RecordVal* args) { - HashKey* key = action_hash->ComputeHash(args, 1); - - if ( ! key ) - reporter->InternalError("ActionArgs type mismatch in add_action"); - - Action* act = action_map.Lookup(key); - - if ( act ) - { - DBG_LOG(DBG_FILE_ANALYSIS, "Add action %d skipped for already active" - " action on file id %s", act->Tag(), file_id.c_str()); - delete key; - return false; - } - - act = action_factory[Action::ArgsTag(args)](args, this); - - if ( ! act ) - { - DBG_LOG(DBG_FILE_ANALYSIS, "Failed to instantiate action %d" - " on file id %s", Action::ArgsTag(args), file_id.c_str()); - delete key; - return false; - } - - DBG_LOG(DBG_FILE_ANALYSIS, "Add action %d for file id %s", act->Tag(), - file_id.c_str()); - - action_map.Insert(key, act); - val->Lookup(actions_idx)->AsTableVal()->Assign(args, - new RecordVal(BifType::Record::FileAnalysis::ActionResults)); - - return true; - } - -void Info::ScheduleRemoval(const Action* act) - { - removing.push_back(act->Args()); - } - -void Info::DoActionRemoval() - { - ActionArgList::iterator it; - for ( it = removing.begin(); it != removing.end(); ++it ) - RemoveAction(*it); - removing.clear(); + return done ? false : actions.QueueAddAction(args); } bool Info::RemoveAction(const RecordVal* args) { - HashKey* key = action_hash->ComputeHash(args, 1); + return done ? false : actions.QueueRemoveAction(args); + } - if ( ! key ) - reporter->InternalError("ActionArgs type mismatch in remove_action"); +bool Info::BufferBOF(const u_char* data, uint64 len) + { + if ( bof_buffer.full || bof_buffer.replayed ) return false; - Action* act = (Action*) action_map.Remove(key); - delete key; + using BifEnum::FileAnalysis::TRIGGER_BOF; + using BifEnum::FileAnalysis::TRIGGER_BOF_BUFFER; - if ( ! act ) + if ( bof_buffer.chunks.size() == 0 ) + Manager::EvaluatePolicy(TRIGGER_BOF, this); + + if ( ! data ) { - DBG_LOG(DBG_FILE_ANALYSIS, "Skip remove action %d for file id %s", - Action::ArgsTag(args), file_id.c_str()); + // A gap means we're done seeing as much as the start of the file + // as possible, replay anything that we have + bof_buffer.full = true; + ReplayBOF(); + // TODO: libmagic stuff return false; } - DBG_LOG(DBG_FILE_ANALYSIS, "Remove action %d for file id %s", act->Tag(), - file_id.c_str()); - delete act; + uint64 desired_size = LookupFieldDefaultCount(bof_buffer_size_idx); + + // If no buffer is desired or if the first chunk satisfies desired size, + // just do everything we need with the first chunk without copying. + if ( desired_size == 0 || + (bof_buffer.chunks.empty() && len >= desired_size) ) + { + bof_buffer.full = bof_buffer.replayed = true; + val->Assign(bof_buffer_idx, new StringVal(new BroString(data, len, 0))); + Manager::EvaluatePolicy(TRIGGER_BOF_BUFFER, this); + // TODO: libmagic stuff + return false; + } + + bof_buffer.chunks.push_back(new BroString(data, len, 0)); + bof_buffer.size += len; + + if ( bof_buffer.size >= desired_size ) + { + bof_buffer.full = true; + // TODO: libmagic stuff + ReplayBOF(); + } + return true; } +void Info::ReplayBOF() + { + if ( bof_buffer.replayed ) return; + bof_buffer.replayed = true; + + val->Assign(bof_buffer_idx, new StringVal(concatenate(bof_buffer.chunks))); + + using BifEnum::FileAnalysis::TRIGGER_BOF_BUFFER; + Manager::EvaluatePolicy(TRIGGER_BOF_BUFFER, this); + + for ( size_t i = 0; i < bof_buffer.chunks.size(); ++i ) + DataIn(bof_buffer.chunks[i]->Bytes(), bof_buffer.chunks[i]->Len()); + } + void Info::DataIn(const u_char* data, uint64 len, uint64 offset) { - Action* act = 0; - IterCookie* c = action_map.InitForIteration(); + actions.FlushQueuedModifications(); + // TODO: attempt libmagic stuff here before doing reassembly? - while ( (act = action_map.NextEntry(c)) ) + Action* act = 0; + IterCookie* c = actions.InitForIteration(); + + while ( (act = actions.NextEntry(c)) ) { if ( ! act->DeliverChunk(data, len, offset) ) - ScheduleRemoval(act); + actions.QueueRemoveAction(act->Args()); } - DoActionRemoval(); + actions.FlushQueuedModifications(); // TODO: check reassembly requirement based on buffer size in record if ( need_reassembly ) @@ -293,62 +276,73 @@ void Info::DataIn(const u_char* data, uint64 len, uint64 offset) // TODO } - // TODO: reassembly stuff, possibly having to deliver chunks if buffer full - // and incrememt overflow bytes + // TODO: reassembly overflow stuff, increment overflow count, eval trigger IncrementByteCount(len, seen_bytes_idx); } void Info::DataIn(const u_char* data, uint64 len) { - Action* act = 0; - IterCookie* c = action_map.InitForIteration(); + actions.FlushQueuedModifications(); + if ( BufferBOF(data, len) ) return; - while ( (act = action_map.NextEntry(c)) ) + Action* act = 0; + IterCookie* c = actions.InitForIteration(); + + while ( (act = actions.NextEntry(c)) ) { if ( ! act->DeliverStream(data, len) ) { - ScheduleRemoval(act); + actions.QueueRemoveAction(act->Args()); continue; } uint64 offset = LookupFieldDefaultCount(seen_bytes_idx) + LookupFieldDefaultCount(missing_bytes_idx); - if ( ! act->DeliverChunk(data, len, offset) ) - ScheduleRemoval(act); + actions.QueueRemoveAction(act->Args()); } - DoActionRemoval(); + actions.FlushQueuedModifications(); IncrementByteCount(len, seen_bytes_idx); } void Info::EndOfFile() { - Action* act = 0; - IterCookie* c = action_map.InitForIteration(); + if ( done ) return; + done = true; + actions.FlushQueuedModifications(); - while ( (act = action_map.NextEntry(c)) ) + // send along anything that's been buffered, but never flushed + ReplayBOF(); + + Action* act = 0; + IterCookie* c = actions.InitForIteration(); + + while ( (act = actions.NextEntry(c)) ) { if ( ! act->EndOfFile() ) - ScheduleRemoval(act); + actions.QueueRemoveAction(act->Args()); } - DoActionRemoval(); + actions.FlushQueuedModifications(); } void Info::Gap(uint64 offset, uint64 len) { - Action* act = 0; - IterCookie* c = action_map.InitForIteration(); + actions.FlushQueuedModifications(); + if ( BufferBOF(0, len) ) return; - while ( (act = action_map.NextEntry(c)) ) + Action* act = 0; + IterCookie* c = actions.InitForIteration(); + + while ( (act = actions.NextEntry(c)) ) { if ( ! act->Undelivered(offset, len) ) - ScheduleRemoval(act); + actions.QueueRemoveAction(act->Args()); } - DoActionRemoval(); + actions.FlushQueuedModifications(); IncrementByteCount(len, missing_bytes_idx); } diff --git a/src/file_analysis/Info.h b/src/file_analysis/Info.h index c8a1c6bf58..bfb6b22015 100644 --- a/src/file_analysis/Info.h +++ b/src/file_analysis/Info.h @@ -2,19 +2,16 @@ #define FILE_ANALYSIS_INFO_H #include -#include +#include -#include "CompHash.h" -#include "Dict.h" #include "Conn.h" #include "Val.h" -#include "Action.h" +#include "ActionSet.h" #include "FileID.h" +#include "BroString.h" namespace file_analysis { -declare(PDict,Action); - /** * Wrapper class around \c FileAnalysis::Info record values from script layer. */ @@ -80,15 +77,15 @@ public: void ScheduleInactivityTimer() const; /** - * Attaches an action. Only one action per type can be attached at a time, - * unless the arguments differ. - * @return true if the action was attached, else false. + * Queues attaching an action. Only one action per type can be attached at + * a time unless the arguments differ. + * @return false if action can't be instantiated, else true. */ bool AddAction(RecordVal* args); /** - * Removes an action. - * @return true if the action was removed, else false. + * Queues removal of an action. + * @return true if action was active at time of call, else false. */ bool RemoveAction(const RecordVal* args); @@ -146,15 +143,16 @@ protected: double LookupFieldDefaultInterval(int idx) const; /** - * Adds file_analysis::Action associated with \a args to list of actions - * to remove, #removing. + * Buffers incoming data at the beginning of a file. If \a data is a null + * pointer, that signifies a gap and the buffering cannot continue. + * @return true if buffering is still required, else false */ - void ScheduleRemoval(const Action* act); + bool BufferBOF(const u_char* data, uint64 len); /** - * Deletes/removes all actions in #removing. + * Forward any beginning-of-file buffered data on to DataIn stream. */ - void DoActionRemoval(); + void ReplayBOF(); FileID file_id; /**< A pretty hash that likely identifies file*/ string unique; /**< A string that uniquely identifies file */ @@ -162,10 +160,19 @@ protected: double last_activity_time; /**< Time of last activity. */ bool postpone_timeout; /**< Whether postponing timeout is requested. */ bool need_reassembly; /**< Whether file stream reassembly is needed. */ - CompositeHash* action_hash;/**< ActionArgs hashes Action map lookup. */ - PDict(Action) action_map; /**< Actions indexed by ActionArgs. */ - typedef list ActionArgList; - ActionArgList removing; /**< Actions pending removal. */ + bool done; /**< If this object is about to be deleted. */ + ActionSet actions; + + struct BOF_Buffer { + BOF_Buffer() : full(false), replayed(false), size(0) {} + ~BOF_Buffer() + { for ( size_t i = 0; i < chunks.size(); ++i ) delete chunks[i]; } + + bool full; + bool replayed; + uint64 size; + BroString::CVec chunks; + } bof_buffer; /**< Beginning of file buffer. */ /** * @return the field offset in #val record corresponding to \a field_name. @@ -177,6 +184,7 @@ protected: */ static void InitFieldIndices(); +public: static int file_id_idx; static int parent_file_id_idx; static int protocol_idx; @@ -187,6 +195,10 @@ protected: static int missing_bytes_idx; static int overflow_bytes_idx; static int timeout_interval_idx; + static int bof_buffer_size_idx; + static int bof_buffer_idx; + static int file_type_idx; + static int mime_type_idx; static int actions_idx; }; diff --git a/src/file_analysis/Manager.cc b/src/file_analysis/Manager.cc index 0c718cce3d..67566f56b4 100644 --- a/src/file_analysis/Manager.cc +++ b/src/file_analysis/Manager.cc @@ -29,8 +29,8 @@ static void check_file_done(Info* info) { if ( info->IsComplete() ) { - Manager::EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_DONE, info); file_mgr->RemoveFile(info->GetFileID()); + Manager::EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_DONE, info); } } @@ -40,6 +40,7 @@ void Manager::DataIn(const string& unique, const u_char* data, uint64 len, Info* info = GetInfo(unique, conn, protocol); info->DataIn(data, len, offset); check_file_done(info); + DoRemoveFiles(); } void Manager::DataIn(const string& unique, const u_char* data, uint64 len, @@ -48,6 +49,7 @@ void Manager::DataIn(const string& unique, const u_char* data, uint64 len, Info* info = GetInfo(unique, conn, protocol); info->DataIn(data, len); check_file_done(info); + DoRemoveFiles(); } void Manager::EndOfFile(const string& unique, Connection* conn, @@ -56,6 +58,7 @@ void Manager::EndOfFile(const string& unique, Connection* conn, Info* info = GetInfo(unique, conn, protocol); info->EndOfFile(); Manager::EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_EOF, info); + DoRemoveFiles(); } void Manager::Gap(const string& unique, uint64 offset, uint64 len, @@ -64,6 +67,7 @@ void Manager::Gap(const string& unique, uint64 offset, uint64 len, Info* info = GetInfo(unique, conn, protocol); info->Gap(offset, len); Manager::EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_GAP, info); + DoRemoveFiles(); } void Manager::SetSize(const string& unique, uint64 size, @@ -72,6 +76,7 @@ void Manager::SetSize(const string& unique, uint64 size, Info* info = GetInfo(unique, conn, protocol); info->SetTotalBytes(size); check_file_done(info); + DoRemoveFiles(); } void Manager::EvaluatePolicy(BifEnum::FileAnalysis::Trigger t, Info* info) @@ -131,11 +136,12 @@ Info* Manager::GetInfo(const string& unique, Connection* conn, if ( id_map[id] ) { reporter->Error("Evicted duplicate file ID: %s", id.c_str()); - RemoveFile(id); + DoRemoveFile(id); } id_map[id] = rval; Manager::EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_NEW, rval); + rval->ScheduleInactivityTimer(); } else { @@ -175,7 +181,24 @@ void Manager::Timeout(const FileID& file_id, bool is_terminating) DBG_LOG(DBG_FILE_ANALYSIS, "File analysis timeout for %s", info->GetFileID().c_str()); - RemoveFile(file_id); + DoRemoveFile(file_id); + } + +bool Manager::DoRemoveFile(const FileID& file_id) + { + IDMap::iterator it = id_map.find(file_id); + + if ( it == id_map.end() ) return false; + + if ( ! str_map.erase(it->second->GetUnique()) ) + reporter->Error("No string mapping for file ID %s", file_id.c_str()); + + DBG_LOG(DBG_FILE_ANALYSIS, "Remove FileID %s", it->first.c_str()); + + it->second->EndOfFile(); + delete it->second; + id_map.erase(it); + return true; } bool Manager::RemoveFile(const FileID& file_id) @@ -184,9 +207,18 @@ bool Manager::RemoveFile(const FileID& file_id) if ( it == id_map.end() ) return false; - if ( ! str_map.erase(it->second->GetUnique()) ) - reporter->Error("No string mapping for file ID %s", file_id.c_str()); - delete it->second; - id_map.erase(it); + DBG_LOG(DBG_FILE_ANALYSIS, "Queue removal of FileID %s", + it->first.c_str()); + + it->second->EndOfFile(); + removing.push_back(it->first); return true; } + +void Manager::DoRemoveFiles() + { + IDList::iterator it; + for ( it = removing.begin(); it != removing.end(); ++it ) + DoRemoveFile(*it); + removing.clear(); + } diff --git a/src/file_analysis/Manager.h b/src/file_analysis/Manager.h index 300f351d9f..98f9a469d7 100644 --- a/src/file_analysis/Manager.h +++ b/src/file_analysis/Manager.h @@ -3,6 +3,7 @@ #include #include +#include #include "Net.h" #include "Conn.h" @@ -61,7 +62,9 @@ public: const string& protocol = ""); /** - * Discard the file_analysis::Info object associated with \a file_id. + * Queue the file_analysis::Info object associated with \a file_id to + * be discarded. It will be discarded at the end of DataIn, EndOfFile, Gap, + * or SetSize functions. * @return false if file identifier did not map to anything, else true. */ bool RemoveFile(const FileID& file_id); @@ -73,16 +76,16 @@ public: bool PostponeTimeout(const FileID& file_id) const; /** - * Attaches an action to the file identifier. Multiple actions of a given - * type can be attached per file identifier at a time as long as the - * arguments differ. - * @return true if the action was attached, else false. + * Queue attachment of an action to the file identifier. Multiple actions + * of a given type can be attached per file identifier at a time as long as + * the arguments differ. + * @return false if the action failed to be instantiated, else true. */ bool AddAction(const FileID& file_id, RecordVal* args) const; /** - * Removes an action for a given file identifier. - * @return true if the action was removed, else false. + * Queue removal of an action for a given file identifier. + * @return true if the action is active at the time of call, else false. */ bool RemoveAction(const FileID& file_id, const RecordVal* args) const; @@ -97,6 +100,7 @@ protected: typedef map StrMap; typedef map IDMap; + typedef list IDList; /** * @return the Info object mapped to \a unique. One is created if mapping @@ -118,8 +122,20 @@ protected: */ void Timeout(const FileID& file_id, bool is_terminating = ::terminating); + /** + * Immediately remove file_analysis::Info object associated with \a file_id. + * @return false if file identifier did not map to anything, else true. + */ + bool DoRemoveFile(const FileID& file_id); + + /** + * Clean up all pending file analysis for file IDs in #removing. + */ + void DoRemoveFiles(); + StrMap str_map; /**< Map unique strings to \c FileAnalysis::Info records. */ IDMap id_map; /**< Map file IDs to \c FileAnalysis::Info records. */ + IDList removing;/**< File IDs that are about to be removed. */ }; } // namespace file_analysis