From 9425c2508f9749a2156802d88adbf11f706dce0c Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Fri, 1 Mar 2013 14:03:37 -0600 Subject: [PATCH] Change semantics of FileAnalysis::stop BIF to internally mean "ignore". The manager has to remember that the file is being ignored until either EOF or timeout. --- src/file_analysis.bif | 12 ++-- src/file_analysis/Info.cc | 18 ++++-- src/file_analysis/Manager.cc | 109 +++++++++++++++++++---------------- src/file_analysis/Manager.h | 33 ++++++----- 4 files changed, 93 insertions(+), 79 deletions(-) diff --git a/src/file_analysis.bif b/src/file_analysis.bif index cb3c017311..abe7dbbd3c 100644 --- a/src/file_analysis.bif +++ b/src/file_analysis.bif @@ -37,14 +37,10 @@ enum Trigger %{ ## field of :bro:see:`FileAnalysis::Info`. TRIGGER_BOF_BUFFER, ## Raised when an initial guess at the file/mime type of a file is matched - ## based on magic numbers. TODO: re-purposing protocols/http/file-ident.sig - ## for doing this is tricky since the signature engine doesn't expect - ## to be decoupled from connections, so figure out what work needs - ## done there. + ## based on magic numbers. TRIGGER_TYPE, - ## Raised when the end of a file is detected. If the file is not - ## being transferred linearly, then this doesn't have to mean the full - ## file has been transferred. + ## Raised to signal that no more file data is incoming and it couldn't be + ## determined whether the full file was actually seen. TRIGGER_EOF, ## The reassembly buffer for the file filled and had to be discarded. ## The *undelivered* field of :bro:see:`FileAnalysis::Info` will @@ -94,6 +90,6 @@ function FileAnalysis::remove_action%(file_id: string, args: any%): bool function FileAnalysis::stop%(file_id: string%): bool %{ using file_analysis::FileID; - bool result = file_mgr->RemoveFile(FileID(file_id->CheckString())); + bool result = file_mgr->IgnoreFile(FileID(file_id->CheckString())); return new Val(result, TYPE_BOOL); %} diff --git a/src/file_analysis/Info.cc b/src/file_analysis/Info.cc index d98920604d..c3d2a7f175 100644 --- a/src/file_analysis/Info.cc +++ b/src/file_analysis/Info.cc @@ -197,11 +197,8 @@ bool Info::BufferBOF(const u_char* data, uint64 len) { if ( bof_buffer.full || bof_buffer.replayed ) return false; - using BifEnum::FileAnalysis::TRIGGER_BOF; - using BifEnum::FileAnalysis::TRIGGER_BOF_BUFFER; - if ( bof_buffer.chunks.size() == 0 ) - Manager::EvaluatePolicy(TRIGGER_BOF, this); + file_mgr->EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_BOF, this); uint64 desired_size = LookupFieldDefaultCount(bof_buffer_size_idx); @@ -211,7 +208,7 @@ bool Info::BufferBOF(const u_char* data, uint64 len) { bof_buffer.full = bof_buffer.replayed = true; val->Assign(bof_buffer_idx, new StringVal(new BroString(data, len, 0))); - Manager::EvaluatePolicy(TRIGGER_BOF_BUFFER, this); + file_mgr->EvaluatePolicy(TRIGGER_BOF_BUFFER, this); // TODO: libmagic stuff return false; } @@ -234,10 +231,12 @@ void Info::ReplayBOF() if ( bof_buffer.replayed ) return; bof_buffer.replayed = true; + if ( bof_buffer.chunks.empty() ) return; + val->Assign(bof_buffer_idx, new StringVal(concatenate(bof_buffer.chunks))); using BifEnum::FileAnalysis::TRIGGER_BOF_BUFFER; - Manager::EvaluatePolicy(TRIGGER_BOF_BUFFER, this); + file_mgr->EvaluatePolicy(TRIGGER_BOF_BUFFER, this); // TODO: libmagic stuff @@ -319,6 +318,11 @@ void Info::EndOfFile() actions.QueueRemoveAction(act->Args()); } + if ( IsComplete() ) + file_mgr->EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_DONE, this); + else + file_mgr->EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_EOF, this); + actions.FlushQueuedModifications(); } @@ -339,6 +343,8 @@ void Info::Gap(uint64 offset, uint64 len) actions.QueueRemoveAction(act->Args()); } + file_mgr->EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_GAP, this); + actions.FlushQueuedModifications(); IncrementByteCount(len, missing_bytes_idx); } diff --git a/src/file_analysis/Manager.cc b/src/file_analysis/Manager.cc index 67566f56b4..fa46f4b04c 100644 --- a/src/file_analysis/Manager.cc +++ b/src/file_analysis/Manager.cc @@ -25,62 +25,70 @@ void Manager::Terminate() Timeout(keys[i], true); } -static void check_file_done(Info* info) - { - if ( info->IsComplete() ) - { - file_mgr->RemoveFile(info->GetFileID()); - Manager::EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_DONE, info); - } - } - void Manager::DataIn(const string& unique, const u_char* data, uint64 len, uint64 offset, Connection* conn, const string& protocol) { + if ( IsIgnored(unique) ) return; + Info* info = GetInfo(unique, conn, protocol); + + if ( ! info ) return; + info->DataIn(data, len, offset); - check_file_done(info); - DoRemoveFiles(); + + if ( info->IsComplete() ) + RemoveFile(unique); } void Manager::DataIn(const string& unique, const u_char* data, uint64 len, Connection* conn, const string& protocol) { Info* info = GetInfo(unique, conn, protocol); + + if ( ! info ) return; + info->DataIn(data, len); - check_file_done(info); - DoRemoveFiles(); + + if ( info->IsComplete() ) + RemoveFile(unique); } void Manager::EndOfFile(const string& unique, Connection* conn, const string& protocol) { - Info* info = GetInfo(unique, conn, protocol); - info->EndOfFile(); - Manager::EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_EOF, info); - DoRemoveFiles(); + // Just call GetInfo because maybe the conn/protocol args will update + // something in the Info record. + GetInfo(unique, conn, protocol); + RemoveFile(unique); } void Manager::Gap(const string& unique, uint64 offset, uint64 len, Connection* conn, const string& protocol) { Info* info = GetInfo(unique, conn, protocol); + + if ( ! info ) return; + info->Gap(offset, len); - Manager::EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_GAP, info); - DoRemoveFiles(); } void Manager::SetSize(const string& unique, uint64 size, Connection* conn, const string& protocol) { Info* info = GetInfo(unique, conn, protocol); + + if ( ! info ) return; + info->SetTotalBytes(size); - check_file_done(info); - DoRemoveFiles(); + + if ( info->IsComplete() ) + RemoveFile(unique); } void Manager::EvaluatePolicy(BifEnum::FileAnalysis::Trigger t, Info* info) { + if ( IsIgnored(info->GetUnique()) ) return; + const ID* id = global_scope()->Lookup("FileAnalysis::policy"); assert(id); const Func* hook = id->ID_Val()->AsFunc(); @@ -126,6 +134,8 @@ bool Manager::RemoveAction(const FileID& file_id, const RecordVal* args) const Info* Manager::GetInfo(const string& unique, Connection* conn, const string& protocol) { + if ( IsIgnored(unique) ) return 0; + Info* rval = str_map[unique]; if ( ! rval ) @@ -136,12 +146,13 @@ Info* Manager::GetInfo(const string& unique, Connection* conn, if ( id_map[id] ) { reporter->Error("Evicted duplicate file ID: %s", id.c_str()); - DoRemoveFile(id); + RemoveFile(unique); } id_map[id] = rval; - Manager::EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_NEW, rval); + file_mgr->EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_NEW, rval); rval->ScheduleInactivityTimer(); + if ( IsIgnored(unique) ) return 0; } else { @@ -167,7 +178,7 @@ void Manager::Timeout(const FileID& file_id, bool is_terminating) if ( ! info ) return; - Manager::EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_TIMEOUT, info); + file_mgr->EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_TIMEOUT, info); if ( info->postpone_timeout && ! is_terminating ) { @@ -181,44 +192,44 @@ void Manager::Timeout(const FileID& file_id, bool is_terminating) DBG_LOG(DBG_FILE_ANALYSIS, "File analysis timeout for %s", info->GetFileID().c_str()); - DoRemoveFile(file_id); + RemoveFile(info->GetUnique()); } -bool Manager::DoRemoveFile(const FileID& file_id) +bool Manager::IgnoreFile(const FileID& file_id) { IDMap::iterator it = id_map.find(file_id); if ( it == id_map.end() ) return false; - if ( ! str_map.erase(it->second->GetUnique()) ) - reporter->Error("No string mapping for file ID %s", file_id.c_str()); + DBG_LOG(DBG_FILE_ANALYSIS, "Ignore FileID %s", file_id.c_str()); - DBG_LOG(DBG_FILE_ANALYSIS, "Remove FileID %s", it->first.c_str()); + ignored.insert(it->second->GetUnique()); + + return true; + } + +bool Manager::RemoveFile(const string& unique) + { + StrMap::iterator it = str_map.find(unique); + + if ( it == str_map.end() ) return false; it->second->EndOfFile(); + + FileID id = it->second->GetFileID(); + + DBG_LOG(DBG_FILE_ANALYSIS, "Remove FileID %s", id.c_str()); + + if ( ! id_map.erase(id) ) + reporter->Error("No mapping for fileID %s", id.c_str()); + + ignored.erase(unique); + str_map.erase(unique); delete it->second; - id_map.erase(it); return true; } -bool Manager::RemoveFile(const FileID& file_id) +bool Manager::IsIgnored(const string& unique) { - IDMap::iterator it = id_map.find(file_id); - - if ( it == id_map.end() ) return false; - - DBG_LOG(DBG_FILE_ANALYSIS, "Queue removal of FileID %s", - it->first.c_str()); - - it->second->EndOfFile(); - removing.push_back(it->first); - return true; - } - -void Manager::DoRemoveFiles() - { - IDList::iterator it; - for ( it = removing.begin(); it != removing.end(); ++it ) - DoRemoveFile(*it); - removing.clear(); + return ignored.find(unique) != ignored.end(); } diff --git a/src/file_analysis/Manager.h b/src/file_analysis/Manager.h index 98f9a469d7..257060f406 100644 --- a/src/file_analysis/Manager.h +++ b/src/file_analysis/Manager.h @@ -3,7 +3,7 @@ #include #include -#include +#include #include "Net.h" #include "Conn.h" @@ -62,12 +62,11 @@ public: const string& protocol = ""); /** - * Queue the file_analysis::Info object associated with \a file_id to - * be discarded. It will be discarded at the end of DataIn, EndOfFile, Gap, - * or SetSize functions. + * Starts ignoring a file, which will finally be removed from internal + * mappings on EOF or TIMEOUT. * @return false if file identifier did not map to anything, else true. */ - bool RemoveFile(const FileID& file_id); + bool IgnoreFile(const FileID& file_id); /** * If called during \c FileAnalysis::policy evaluation for a @@ -92,20 +91,22 @@ public: /** * Calls the \c FileAnalysis::policy hook. */ - static void EvaluatePolicy(BifEnum::FileAnalysis::Trigger t, Info* info); + void EvaluatePolicy(BifEnum::FileAnalysis::Trigger t, Info* info); protected: friend class InfoTimer; typedef map StrMap; + typedef set StrSet; typedef map IDMap; - typedef list IDList; /** - * @return the Info object mapped to \a unique. One is created if mapping - * doesn't exist. If it did exist, the activity time is refreshed - * and connection-related fields of the record value may be updated. + * @return the Info object mapped to \a unique or a null pointer if analysis + * is being ignored for the associated file. An Info object may be + * created if a mapping doesn't exist, and if it did exist, the + * activity time is refreshed and connection-related fields of the + * record value may be updated. */ Info* GetInfo(const string& unique, Connection* conn = 0, const string& protocol = ""); @@ -123,19 +124,19 @@ protected: void Timeout(const FileID& file_id, bool is_terminating = ::terminating); /** - * Immediately remove file_analysis::Info object associated with \a file_id. - * @return false if file identifier did not map to anything, else true. + * Immediately remove file_analysis::Info object associated with \a unique. + * @return false if file string did not map to anything, else true. */ - bool DoRemoveFile(const FileID& file_id); + bool RemoveFile(const string& unique); /** - * Clean up all pending file analysis for file IDs in #removing. + * @return whether the file mapped to \a unique is being ignored. */ - void DoRemoveFiles(); + bool IsIgnored(const string& unique); StrMap str_map; /**< Map unique strings to \c FileAnalysis::Info records. */ IDMap id_map; /**< Map file IDs to \c FileAnalysis::Info records. */ - IDList removing;/**< File IDs that are about to be removed. */ + StrSet ignored; /**< Ignored files. Will be finally removed on EOF. */ }; } // namespace file_analysis