diff --git a/CHANGES b/CHANGES index 28b5c96abb..17f90dfc22 100644 --- a/CHANGES +++ b/CHANGES @@ -1,4 +1,15 @@ +2.2-131 | 2014-01-30 16:11:11 -0800 + + * Extend file analysis API to allow file ID caching. This allows an + analyzer to either provide file IDs associated with some file + content or to cache a file ID that was already determined by + script-layer logic so that subsequent calls to the file analysis + interface can bypass costly detours through script-layer. This + can yield a decent performance improvement for analyzers that are + able to take advantage of it and deal with streaming content (like + HTTP, which has been adapted accordingly). (Jon Siwek) + 2.2-128 | 2014-01-30 15:58:47 -0800 * Add leak test for Exec module. (Bernhard Amann) diff --git a/NEWS b/NEWS index 07d8e53c52..9b87de3e41 100644 --- a/NEWS +++ b/NEWS @@ -41,6 +41,12 @@ Changed Functionality event x509_extension(c: connection, is_orig: bool, cert: X509, ext: X509_extension_info); +- Bro no longer special-cases SYN/FIN/RST-filtered traces by not + reporting missing data. The old behavior can be reverted by + redef'ing "detect_filtered_trace". + + TODO: Update if we add a detector for filtered traces. + Bro 2.2 ======= diff --git a/VERSION b/VERSION index 76549ddacf..c09ed8703c 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.2-128 +2.2-131 diff --git a/aux/btest b/aux/btest index 58b9c8bf76..36b96eb9c1 160000 --- a/aux/btest +++ b/aux/btest @@ -1 +1 @@ -Subproject commit 58b9c8bf762024136ec2c9bbcea16d417282af8d +Subproject commit 36b96eb9c13d1011bbc8be3581fd0f1c0bd8de44 diff --git a/src/analyzer/protocol/http/HTTP.cc b/src/analyzer/protocol/http/HTTP.cc index ffdcad226f..f605dce402 100644 --- a/src/analyzer/protocol/http/HTTP.cc +++ b/src/analyzer/protocol/http/HTTP.cc @@ -242,10 +242,17 @@ int HTTP_Entity::Undelivered(int64_t len) if ( end_of_data && in_header ) return 0; - file_mgr->Gap(body_length, len, - http_message->MyHTTP_Analyzer()->GetAnalyzerTag(), - http_message->MyHTTP_Analyzer()->Conn(), - http_message->IsOrig()); + if ( is_partial_content ) + file_mgr->Gap(body_length, len, + http_message->MyHTTP_Analyzer()->GetAnalyzerTag(), + http_message->MyHTTP_Analyzer()->Conn(), + http_message->IsOrig()); + else + precomputed_file_id = file_mgr->Gap(body_length, len, + http_message->MyHTTP_Analyzer()->GetAnalyzerTag(), + http_message->MyHTTP_Analyzer()->Conn(), + http_message->IsOrig(), + precomputed_file_id); if ( chunked_transfer_state != NON_CHUNKED_TRANSFER ) { @@ -314,15 +321,18 @@ void HTTP_Entity::SubmitData(int len, const char* buf) else { if ( send_size && content_length > 0 ) - file_mgr->SetSize(content_length, + precomputed_file_id = file_mgr->SetSize(content_length, http_message->MyHTTP_Analyzer()->GetAnalyzerTag(), http_message->MyHTTP_Analyzer()->Conn(), - http_message->IsOrig()); + http_message->IsOrig(), + precomputed_file_id); - file_mgr->DataIn(reinterpret_cast(buf), len, + precomputed_file_id = file_mgr->DataIn(reinterpret_cast(buf), + len, http_message->MyHTTP_Analyzer()->GetAnalyzerTag(), http_message->MyHTTP_Analyzer()->Conn(), - http_message->IsOrig()); + http_message->IsOrig(), + precomputed_file_id); } send_size = false; diff --git a/src/analyzer/protocol/http/HTTP.h b/src/analyzer/protocol/http/HTTP.h index 8339e48e3b..a1fedee41d 100644 --- a/src/analyzer/protocol/http/HTTP.h +++ b/src/analyzer/protocol/http/HTTP.h @@ -64,6 +64,7 @@ protected: uint64_t offset; int64_t instance_length; // total length indicated by content-range bool send_size; // whether to send size indication to FAF + std::string precomputed_file_id; MIME_Entity* NewChildEntity() { return new HTTP_Entity(http_message, this, 1); } diff --git a/src/file_analysis/Manager.cc b/src/file_analysis/Manager.cc index 0337dbb098..a6878e7c5d 100644 --- a/src/file_analysis/Manager.cc +++ b/src/file_analysis/Manager.cc @@ -75,36 +75,47 @@ void Manager::SetHandle(const string& handle) current_file_id = HashHandle(handle); } -void Manager::DataIn(const u_char* data, uint64 len, uint64 offset, - analyzer::Tag tag, Connection* conn, bool is_orig) +string Manager::DataIn(const u_char* data, uint64 len, uint64 offset, + analyzer::Tag tag, Connection* conn, bool is_orig, + const string& precomputed_id) { - GetFileHandle(tag, conn, is_orig); - File* file = GetFile(current_file_id, conn, tag, is_orig); + string id = precomputed_id.empty() ? GetFileID(tag, conn, is_orig) : precomputed_id; + File* file = GetFile(id, conn, tag, is_orig); if ( ! file ) - return; + return ""; file->DataIn(data, len, offset); if ( file->IsComplete() ) + { RemoveFile(file->GetID()); + return ""; + } + + return id; } -void Manager::DataIn(const u_char* data, uint64 len, analyzer::Tag tag, - Connection* conn, bool is_orig) +string Manager::DataIn(const u_char* data, uint64 len, analyzer::Tag tag, + Connection* conn, bool is_orig, const string& precomputed_id) { - GetFileHandle(tag, conn, is_orig); + string id = precomputed_id.empty() ? GetFileID(tag, conn, is_orig) : precomputed_id; // Sequential data input shouldn't be going over multiple conns, so don't // do the check to update connection set. - File* file = GetFile(current_file_id, conn, tag, is_orig, false); + File* file = GetFile(id, conn, tag, is_orig, false); if ( ! file ) - return; + return ""; file->DataIn(data, len); if ( file->IsComplete() ) + { RemoveFile(file->GetID()); + return ""; + } + + return id; } void Manager::DataIn(const u_char* data, uint64 len, const string& file_id, @@ -133,8 +144,7 @@ void Manager::EndOfFile(analyzer::Tag tag, Connection* conn) void Manager::EndOfFile(analyzer::Tag tag, Connection* conn, bool is_orig) { // Don't need to create a file if we're just going to remove it right away. - GetFileHandle(tag, conn, is_orig); - RemoveFile(current_file_id); + RemoveFile(GetFileID(tag, conn, is_orig)); } void Manager::EndOfFile(const string& file_id) @@ -142,31 +152,37 @@ void Manager::EndOfFile(const string& file_id) RemoveFile(file_id); } -void Manager::Gap(uint64 offset, uint64 len, analyzer::Tag tag, - Connection* conn, bool is_orig) +string Manager::Gap(uint64 offset, uint64 len, analyzer::Tag tag, + Connection* conn, bool is_orig, const string& precomputed_id) { - GetFileHandle(tag, conn, is_orig); - File* file = GetFile(current_file_id, conn, tag, is_orig); + string id = precomputed_id.empty() ? GetFileID(tag, conn, is_orig) : precomputed_id; + File* file = GetFile(id, conn, tag, is_orig); if ( ! file ) - return; + return ""; file->Gap(offset, len); + return id; } -void Manager::SetSize(uint64 size, analyzer::Tag tag, Connection* conn, - bool is_orig) +string Manager::SetSize(uint64 size, analyzer::Tag tag, Connection* conn, + bool is_orig, const string& precomputed_id) { - GetFileHandle(tag, conn, is_orig); - File* file = GetFile(current_file_id, conn, tag, is_orig); + string id = precomputed_id.empty() ? GetFileID(tag, conn, is_orig) : precomputed_id; + File* file = GetFile(id, conn, tag, is_orig); if ( ! file ) - return; + return ""; file->SetTotalBytes(size); if ( file->IsComplete() ) + { RemoveFile(file->GetID()); + return ""; + } + + return id; } bool Manager::SetTimeoutInterval(const string& file_id, double interval) const @@ -317,15 +333,15 @@ bool Manager::IsIgnored(const string& file_id) return ignored.find(file_id) != ignored.end(); } -void Manager::GetFileHandle(analyzer::Tag tag, Connection* c, bool is_orig) +string Manager::GetFileID(analyzer::Tag tag, Connection* c, bool is_orig) { current_file_id.clear(); if ( IsDisabled(tag) ) - return; + return ""; if ( ! get_file_handle ) - return; + return ""; EnumVal* tagval = tag.AsEnumVal(); Ref(tagval); @@ -337,6 +353,7 @@ void Manager::GetFileHandle(analyzer::Tag tag, Connection* c, bool is_orig) mgr.QueueEvent(get_file_handle, vl); mgr.Drain(); // need file handle immediately so we don't have to buffer data + return current_file_id; } bool Manager::IsDisabled(analyzer::Tag tag) diff --git a/src/file_analysis/Manager.h b/src/file_analysis/Manager.h index cf73c6b52d..649f82c164 100644 --- a/src/file_analysis/Manager.h +++ b/src/file_analysis/Manager.h @@ -82,9 +82,17 @@ public: * @param conn network connection over which the file data is transferred. * @param is_orig true if the file is being sent from connection originator * or false if is being sent in the opposite direction. + * @param precomputed_file_id may be set to a previous return value in order to + * bypass costly file handle lookups. + * @return a unique file ID string which, in certain contexts, may be + * cached and passed back in to a subsequent function call in order + * to avoid costly file handle lookups (which have to go through + * the \c get_file_handle script-layer event). An empty string + * indicates the associate file is not going to be analyzed further. */ - void DataIn(const u_char* data, uint64 len, uint64 offset, - analyzer::Tag tag, Connection* conn, bool is_orig); + std::string DataIn(const u_char* data, uint64 len, uint64 offset, + analyzer::Tag tag, Connection* conn, bool is_orig, + const std::string& precomputed_file_id = ""); /** * Pass in sequential file data. @@ -94,9 +102,17 @@ public: * @param conn network connection over which the file data is transferred. * @param is_orig true if the file is being sent from connection originator * or false if is being sent in the opposite direction. + * @param precomputed_file_id may be set to a previous return value in order to + * bypass costly file handle lookups. + * @return a unique file ID string which, in certain contexts, may be + * cached and passed back in to a subsequent function call in order + * to avoid costly file handle lookups (which have to go through + * the \c get_file_handle script-layer event). An empty string + * indicates the associate file is not going to be analyzed further. */ - void DataIn(const u_char* data, uint64 len, analyzer::Tag tag, - Connection* conn, bool is_orig); + std::string DataIn(const u_char* data, uint64 len, analyzer::Tag tag, + Connection* conn, bool is_orig, + const std::string& precomputed_file_id = ""); /** * Pass in sequential file data from external source (e.g. input framework). @@ -140,9 +156,17 @@ public: * @param conn network connection over which the file data is transferred. * @param is_orig true if the file is being sent from connection originator * or false if is being sent in the opposite direction. + * @param precomputed_file_id may be set to a previous return value in order to + * bypass costly file handle lookups. + * @return a unique file ID string which, in certain contexts, may be + * cached and passed back in to a subsequent function call in order + * to avoid costly file handle lookups (which have to go through + * the \c get_file_handle script-layer event). An empty string + * indicates the associate file is not going to be analyzed further. */ - void Gap(uint64 offset, uint64 len, analyzer::Tag tag, Connection* conn, - bool is_orig); + std::string Gap(uint64 offset, uint64 len, analyzer::Tag tag, + Connection* conn, bool is_orig, + const std::string& precomputed_file_id = ""); /** * Provide the expected number of bytes that comprise a file. @@ -151,9 +175,16 @@ public: * @param conn network connection over which the file data is transferred. * @param is_orig true if the file is being sent from connection originator * or false if is being sent in the opposite direction. + * @param precomputed_file_id may be set to a previous return value in order to + * bypass costly file handle lookups. + * @return a unique file ID string which, in certain contexts, may be + * cached and passed back in to a subsequent function call in order + * to avoid costly file handle lookups (which have to go through + * the \c get_file_handle script-layer event). An empty string + * indicates the associate file is not going to be analyzed further. */ - void SetSize(uint64 size, analyzer::Tag tag, Connection* conn, - bool is_orig); + std::string SetSize(uint64 size, analyzer::Tag tag, Connection* conn, + bool is_orig, const std::string& precomputed_file_id = ""); /** * Starts ignoring a file, which will finally be removed from internal @@ -283,8 +314,10 @@ protected: * @param conn network connection over which the file is transferred. * @param is_orig true if the file is being sent from connection originator * or false if is being sent in the opposite direction. + * @return #current_file_id, which is a hash of a unique file handle string + * set by a \c get_file_handle event handler. */ - void GetFileHandle(analyzer::Tag tag, Connection* c, bool is_orig); + std::string GetFileID(analyzer::Tag tag, Connection* c, bool is_orig); /** * Check if analysis is available for files transferred over a given