diff --git a/scripts/base/init-bare.bro b/scripts/base/init-bare.bro index 798eb387b3..81b25d9568 100644 --- a/scripts/base/init-bare.bro +++ b/scripts/base/init-bare.bro @@ -3025,6 +3025,26 @@ export { } module GLOBAL; +module FileAnalysis; +export { + ## When the file analysis framework receives input regarding a file + ## transferred over the network, and a unique handle string cannot + ## be determined immediately from :bro:see:`FileAnalysis::handle_callbacks`, + ## that input is buffered. This is the interval at which to automatically + ## check back on any currently buffered inputs to see if a handle is + ## available so that the input can be processed. Since any input + ## triggers the check for all buffered inputs, this option only helps + ## cases where the file analysis framework is getting little input. + const pending_file_drain_interval = 10 sec &redef; + + ## This is the interval at which to give up checking for a unique handle + ## string for files transferred over the network that were initially + ## buffered because no handle was available yet (e.g. when the necessary + ## events to construct the handle may not have been flushed yet). + const pending_file_timeout = 10 sec &redef; +} +module GLOBAL; + ## Number of bytes per packet to capture from live interfaces. const snaplen = 8192 &redef; diff --git a/scripts/base/protocols/http/file-analysis.bro b/scripts/base/protocols/http/file-analysis.bro index 901d82b74c..a0390710a8 100644 --- a/scripts/base/protocols/http/file-analysis.bro +++ b/scripts/base/protocols/http/file-analysis.bro @@ -12,6 +12,7 @@ function get_file_handle(c: connection, is_orig: bool): string if ( c$http$range_request ) return fmt("%s http(%s): %s: %s", c$start_time, is_orig, c$id$orig_h, build_url(c$http)); + return fmt("%s http(%s, %s): %s", c$start_time, is_orig, c$http$trans_depth, id_string(c$id)); } diff --git a/src/Net.cc b/src/Net.cc index d69337bd63..73c618b8af 100644 --- a/src/Net.cc +++ b/src/Net.cc @@ -30,7 +30,6 @@ #include "PacketSort.h" #include "Serializer.h" #include "PacketDumper.h" -#include "file_analysis/Manager.h" extern "C" { #include "setsignal.h" @@ -353,7 +352,6 @@ void net_packet_dispatch(double t, const struct pcap_pkthdr* hdr, sessions->DispatchPacket(t, hdr, pkt, hdr_size, src_ps, pkt_elem); mgr.Drain(); - file_mgr->DrainPending(); if ( sp ) { diff --git a/src/RemoteSerializer.cc b/src/RemoteSerializer.cc index 80c839a1da..66f8def489 100644 --- a/src/RemoteSerializer.cc +++ b/src/RemoteSerializer.cc @@ -192,7 +192,6 @@ #include "logging/Manager.h" #include "IPAddr.h" #include "bro_inet_ntop.h" -#include "file_analysis/Manager.h" extern "C" { #include "setsignal.h" @@ -1463,7 +1462,6 @@ void RemoteSerializer::Process() current_iosrc = this; sessions->NextPacket(p->time, p->hdr, p->pkt, p->hdr_size, 0); mgr.Drain(); - file_mgr->DrainPending(); current_hdr = 0; // done with these current_pkt = 0; diff --git a/src/Timer.h b/src/Timer.h index 615c8bf69a..f3192cbd79 100644 --- a/src/Timer.h +++ b/src/Timer.h @@ -23,6 +23,7 @@ enum TimerType { TIMER_CONN_STATUS_UPDATE, TIMER_DNS_EXPIRE, TIMER_FILE_ANALYSIS_INACTIVITY, + TIMER_FILE_ANALYSIS_DRAIN, TIMER_FRAG, TIMER_INCREMENTAL_SEND, TIMER_INCREMENTAL_WRITE, diff --git a/src/const.bif b/src/const.bif index ea84b3363d..0b843e34a9 100644 --- a/src/const.bif +++ b/src/const.bif @@ -23,3 +23,6 @@ const Tunnel::delay_gtp_confirmation: bool; const Tunnel::ip_tunnel_timeout: interval; const Threading::heartbeat_interval: interval; + +const FileAnalysis::pending_file_drain_interval: interval; +const FileAnalysis::pending_file_timeout: interval; diff --git a/src/file_analysis/InfoTimer.h b/src/file_analysis/InfoTimer.h index ac0d8b6b00..250dbedddf 100644 --- a/src/file_analysis/InfoTimer.h +++ b/src/file_analysis/InfoTimer.h @@ -16,8 +16,6 @@ public: InfoTimer(double t, const FileID& id, double interval) : Timer(t + interval, TIMER_FILE_ANALYSIS_INACTIVITY), file_id(id) {} - ~InfoTimer() {} - /** * Check inactivity of file_analysis::Info corresponding to #file_id, * reschedule if active, else call file_analysis::Manager::Timeout. diff --git a/src/file_analysis/Manager.cc b/src/file_analysis/Manager.cc index 1c2fcc4889..a6d2dfa3fc 100644 --- a/src/file_analysis/Manager.cc +++ b/src/file_analysis/Manager.cc @@ -8,8 +8,19 @@ using namespace file_analysis; -Manager::Manager() +void DrainTimer::Dispatch(double t, int is_expire) { + using BifConst::FileAnalysis::pending_file_drain_interval; + DBG_LOG(DBG_FILE_ANALYSIS, "DrainTimer dispatched"); + file_mgr->DrainPending(); + if ( ! is_expire ) + timer_mgr->Add(new DrainTimer(pending_file_drain_interval)); + } + +Manager::Manager() : is_draining(false) + { + using BifConst::FileAnalysis::pending_file_drain_interval; + timer_mgr->Add(new DrainTimer(pending_file_drain_interval)); } Manager::~Manager() @@ -38,10 +49,13 @@ string Manager::GetFileHandle(Analyzer* root, Connection* conn, vl.append(new Val(is_orig, TYPE_BOOL)); Val* result = callback->AsFunc()->Call(&vl); - string rval = result->AsString()->CheckString(); - Unref(result); - if ( ! rval.empty() ) return rval; + if ( result ) + { + string rval = result->AsString()->CheckString(); + Unref(result); + if ( ! rval.empty() ) return rval; + } } for ( analyzer_list::const_iterator it = root->GetChildren().begin(); @@ -63,14 +77,29 @@ string Manager::GetFileHandle(Connection* conn, bool is_orig) const void Manager::DrainPending() { - for ( size_t i = 0; i < pending.size(); ++i ) - pending[i].Retry(); + if ( is_draining ) return; - pending.clear(); + is_draining = true; + PendingList::iterator it = pending.begin(); + + while ( it != pending.end() ) + { + if ( (*it)->Retry() || (*it)->IsStale() ) + { + delete *it; + pending.erase(it++); + } + else + ++it; + } + + is_draining = false; } void Manager::Terminate() { + DrainPending(); + vector keys; for ( IDMap::iterator it = id_map.begin(); it != id_map.end(); ++it ) keys.push_back(it->first); @@ -78,19 +107,24 @@ void Manager::Terminate() Timeout(keys[i], true); } -void Manager::DataIn(const u_char* data, uint64 len, uint64 offset, - Connection* conn, bool is_orig, bool allow_retry) +bool Manager::DataIn(const u_char* data, uint64 len, uint64 offset, + Connection* conn, bool is_orig) { + DrainPending(); + string unique = GetFileHandle(conn, is_orig); if ( ! unique.empty() ) { DataIn(data, len, offset, GetInfo(unique, conn)); - return; + return true; } - if ( allow_retry ) - pending.push_back(PendingFile(data, len, offset, conn, is_orig)); + if ( ! is_draining ) + pending.push_back(new PendingDataInChunk(data, len, offset, conn, + is_orig)); + + return false; } void Manager::DataIn(const u_char* data, uint64 len, uint64 offset, @@ -102,6 +136,8 @@ void Manager::DataIn(const u_char* data, uint64 len, uint64 offset, void Manager::DataIn(const u_char* data, uint64 len, uint64 offset, Info* info) { + DrainPending(); + if ( ! info ) return; info->DataIn(data, len, offset); @@ -110,19 +146,23 @@ void Manager::DataIn(const u_char* data, uint64 len, uint64 offset, RemoveFile(info->GetUnique()); } -void Manager::DataIn(const u_char* data, uint64 len, Connection* conn, - bool is_orig, bool allow_retry) +bool Manager::DataIn(const u_char* data, uint64 len, Connection* conn, + bool is_orig) { + DrainPending(); + string unique = GetFileHandle(conn, is_orig); if ( ! unique.empty() ) { DataIn(data, len, GetInfo(unique, conn)); - return; + return true; } - if ( allow_retry ) - pending.push_back(PendingFile(data, len, conn, is_orig)); + if ( ! is_draining ) + pending.push_back(new PendingDataInStream(data, len, conn, is_orig)); + + return false; } void Manager::DataIn(const u_char* data, uint64 len, const string& unique) @@ -132,6 +172,8 @@ void Manager::DataIn(const u_char* data, uint64 len, const string& unique) void Manager::DataIn(const u_char* data, uint64 len, Info* info) { + DrainPending(); + if ( ! info ) return; info->DataIn(data, len); @@ -146,27 +188,46 @@ void Manager::EndOfFile(Connection* conn) EndOfFile(conn, false); } -void Manager::EndOfFile(Connection* conn, bool is_orig) +bool Manager::EndOfFile(Connection* conn, bool is_orig) { + DrainPending(); + string unique = GetFileHandle(conn, is_orig); - if ( unique.empty() ) return; // nothing to do + if ( ! unique.empty() ) + { + RemoveFile(unique); + return true; + } - RemoveFile(unique); + if ( ! is_draining ) + pending.push_back(new PendingEOF(conn, is_orig)); + + return false; } void Manager::EndOfFile(const string& unique) { + DrainPending(); RemoveFile(unique); } -void Manager::Gap(uint64 offset, uint64 len, Connection* conn, bool is_orig) +bool Manager::Gap(uint64 offset, uint64 len, Connection* conn, bool is_orig) { + DrainPending(); + string unique = GetFileHandle(conn, is_orig); - if ( unique.empty() ) return; // nothing to do since no data has been seen + if ( ! unique.empty() ) + { + Gap(offset, len, GetInfo(unique, conn)); + return true; + } - Gap(offset, len, GetInfo(unique, conn)); + if ( ! is_draining ) + pending.push_back(new PendingGap(offset, len, conn, is_orig)); + + return false; } void Manager::Gap(uint64 offset, uint64 len, const string& unique) @@ -176,18 +237,29 @@ void Manager::Gap(uint64 offset, uint64 len, const string& unique) void Manager::Gap(uint64 offset, uint64 len, Info* info) { + DrainPending(); + if ( ! info ) return; info->Gap(offset, len); } -void Manager::SetSize(uint64 size, Connection* conn, bool is_orig) +bool Manager::SetSize(uint64 size, Connection* conn, bool is_orig) { + DrainPending(); + string unique = GetFileHandle(conn, is_orig); - if ( unique.empty() ) return; // ok assuming this always follows a DataIn() + if ( ! unique.empty() ) + { + SetSize(size, GetInfo(unique, conn)); + return true; + } - SetSize(size, GetInfo(unique, conn)); + if ( ! is_draining ) + pending.push_back(new PendingSize(size, conn, is_orig)); + + return false; } void Manager::SetSize(uint64 size, const string& unique) @@ -197,6 +269,8 @@ void Manager::SetSize(uint64 size, const string& unique) void Manager::SetSize(uint64 size, Info* info) { + DrainPending(); + if ( ! info ) return; info->SetTotalBytes(size); @@ -294,6 +368,8 @@ Info* Manager::Lookup(const FileID& file_id) const void Manager::Timeout(const FileID& file_id, bool is_terminating) { + DrainPending(); + Info* info = Lookup(file_id); if ( ! info ) return; diff --git a/src/file_analysis/Manager.h b/src/file_analysis/Manager.h index db9be04a06..6f92553c3b 100644 --- a/src/file_analysis/Manager.h +++ b/src/file_analysis/Manager.h @@ -4,12 +4,13 @@ #include #include #include -#include +#include #include "Net.h" #include "Conn.h" #include "Val.h" #include "Analyzer.h" +#include "Timer.h" #include "Info.h" #include "InfoTimer.h" @@ -18,6 +19,15 @@ namespace file_analysis { +class DrainTimer : public Timer { +public: + + DrainTimer(double interval) + : Timer(network_time + interval, TIMER_FILE_ANALYSIS_DRAIN) {} + + void Dispatch(double t, int is_expire); +}; + /** * Main entry point for interacting with file analysis. */ @@ -28,17 +38,6 @@ public: ~Manager(); - /** - * Attempts to forward the data from any pending file contents, i.e. - * those for which a unique file handle string could not immediately - * be determined. If again a file handle can't be determined, give up. - * The assumption for this to work correctly is that the EventMgr would - * have always drained between packet boundaries, so calling this method - * at that time may mean the script-layer function for generating file - * handles can now come up with a result. - */ - void DrainPending(); - /** * Times out any active file analysis to prepare for shutdown. */ @@ -47,8 +46,8 @@ public: /** * Pass in non-sequential file data. */ - void DataIn(const u_char* data, uint64 len, uint64 offset, - Connection* conn, bool is_orig, bool allow_retry = true); + bool DataIn(const u_char* data, uint64 len, uint64 offset, + Connection* conn, bool is_orig); void DataIn(const u_char* data, uint64 len, uint64 offset, const string& unique); void DataIn(const u_char* data, uint64 len, uint64 offset, @@ -57,8 +56,7 @@ public: /** * Pass in sequential file data. */ - void DataIn(const u_char* data, uint64 len, Connection* conn, bool is_orig, - bool allow_retry = true); + bool DataIn(const u_char* data, uint64 len, Connection* conn, bool is_orig); void DataIn(const u_char* data, uint64 len, const string& unique); void DataIn(const u_char* data, uint64 len, Info* info); @@ -66,20 +64,20 @@ public: * Signal the end of file data. */ void EndOfFile(Connection* conn); - void EndOfFile(Connection* conn, bool is_orig); + bool EndOfFile(Connection* conn, bool is_orig); void EndOfFile(const string& unique); /** * Signal a gap in the file data stream. */ - void Gap(uint64 offset, uint64 len, Connection* conn, bool is_orig); + bool Gap(uint64 offset, uint64 len, Connection* conn, bool is_orig); void Gap(uint64 offset, uint64 len, const string& unique); void Gap(uint64 offset, uint64 len, Info* info); /** * Provide the expected number of bytes that comprise a file. */ - void SetSize(uint64 size, Connection* conn, bool is_orig); + bool SetSize(uint64 size, Connection* conn, bool is_orig); void SetSize(uint64 size, const string& unique); void SetSize(uint64 size, Info* info); @@ -118,12 +116,13 @@ public: protected: friend class InfoTimer; + friend class DrainTimer; friend class PendingFile; typedef map StrMap; typedef set StrSet; typedef map IDMap; - typedef vector PendingList; + typedef list PendingList; /** * @return the Info object mapped to \a unique or a null pointer if analysis @@ -169,10 +168,19 @@ protected: */ bool IsIgnored(const string& unique); + /** + * Attempts to forward the data from any pending file contents, i.e. + * those for which a unique file handle string could not immediately + * be determined. + */ + void DrainPending(); + StrMap str_map; /**< Map unique strings to \c FileAnalysis::Info records. */ IDMap id_map; /**< Map file IDs to \c FileAnalysis::Info records. */ StrSet ignored; /**< Ignored files. Will be finally removed on EOF. */ - PendingList pending; /**< Files waiting for next Tick to return a handle */ + PendingList pending; /**< Files awaiting a unique handle. */ + + bool is_draining; }; } // namespace file_analysis diff --git a/src/file_analysis/PendingFile.cc b/src/file_analysis/PendingFile.cc index 90029703b1..5d94c441a2 100644 --- a/src/file_analysis/PendingFile.cc +++ b/src/file_analysis/PendingFile.cc @@ -3,58 +3,112 @@ using namespace file_analysis; -PendingFile::PendingFile(const u_char* arg_data, uint64 arg_len, - uint64 arg_offset, Connection* arg_conn, - bool arg_is_orig) - : is_linear(false), data(arg_data), len(arg_len), offset(arg_offset), - conn(arg_conn), is_orig(arg_is_orig) +static void copy_data(const u_char** dst, const u_char* src, uint64 len) { - Ref(conn); + u_char* tmp = new u_char[len]; + memcpy(tmp, src, len); + *dst = tmp; } -PendingFile::PendingFile(const u_char* arg_data, uint64 arg_len, - Connection* arg_conn, bool arg_is_orig) - : is_linear(true), data(arg_data), len(arg_len), offset(0), - conn(arg_conn), is_orig(arg_is_orig) +static string conn_str(Connection* c) { - Ref(conn); + char op[256], rp[256]; + modp_ulitoa10(ntohs(c->OrigPort()), op); + modp_ulitoa10(ntohs(c->RespPort()), rp); + string rval = c->OrigAddr().AsString() + ":" + op + "->" + + c->RespAddr().AsString() + ":" + rp; + return rval; } -PendingFile::PendingFile(const PendingFile& other) - : is_linear(other.is_linear), data(other.data), len(other.len), - offset(other.offset), conn(other.conn), is_orig(other.is_orig) +PendingFile::PendingFile(Connection* arg_conn, bool arg_is_orig) + : conn(arg_conn), is_orig(arg_is_orig), creation_time(network_time) { Ref(conn); - } - -PendingFile& PendingFile::operator=(const PendingFile& other) - { - // handle self-assign for correct reference counting - if ( this == &other ) return *this; - - Unref(conn); - - is_linear = other.is_linear; - data = other.data; - len = other.len; - offset = other.offset; - conn = other.conn; - is_orig = other.is_orig; - - Ref(conn); - - return *this; + DBG_LOG(DBG_FILE_ANALYSIS, "New pending file: %s", conn_str(conn).c_str()); } PendingFile::~PendingFile() { Unref(conn); + DBG_LOG(DBG_FILE_ANALYSIS, "Delete pending file: %s", + conn_str(conn).c_str()); } -void PendingFile::Retry() const +bool PendingFile::IsStale() const { - if ( is_linear ) - file_mgr->DataIn(data, len, conn, is_orig, false); - else - file_mgr->DataIn(data, len, offset, conn, is_orig, false); + using BifConst::FileAnalysis::pending_file_timeout; + if ( creation_time + pending_file_timeout < network_time ) + { + DBG_LOG(DBG_FILE_ANALYSIS, "Stale pending file: %s", + conn_str(conn).c_str()); + return true; + } + return false; + } + +PendingDataInChunk::PendingDataInChunk(const u_char* arg_data, uint64 arg_len, + uint64 arg_offset, Connection* arg_conn, + bool arg_is_orig) + : PendingFile(arg_conn, arg_is_orig), len(arg_len), offset(arg_offset) + { + copy_data(&data, arg_data, len); + } + +bool PendingDataInChunk::Retry() const + { + return file_mgr->DataIn(data, len, offset, conn, is_orig); + } + +PendingDataInChunk::~PendingDataInChunk() + { + delete [] data; + } + +PendingDataInStream::PendingDataInStream(const u_char* arg_data, uint64 arg_len, + Connection* arg_conn, bool arg_is_orig) + : PendingFile(arg_conn, arg_is_orig), len(arg_len) + { + copy_data(&data, arg_data, len); + } + +bool PendingDataInStream::Retry() const + { + return file_mgr->DataIn(data, len, conn, is_orig); + } + +PendingDataInStream::~PendingDataInStream() + { + delete [] data; + } + +PendingGap::PendingGap(uint64 arg_offset, uint64 arg_len, Connection* arg_conn, + bool arg_is_orig) + : PendingFile(arg_conn, arg_is_orig), offset(arg_offset), len(arg_len) + { + } + +bool PendingGap::Retry() const + { + return file_mgr->Gap(offset, len, conn, is_orig); + } + +PendingEOF::PendingEOF(Connection* arg_conn, bool arg_is_orig) + : PendingFile(arg_conn, arg_is_orig) + { + } + +bool PendingEOF::Retry() const + { + return file_mgr->EndOfFile(conn, is_orig); + } + +PendingSize::PendingSize(uint64 arg_size, Connection* arg_conn, + bool arg_is_orig) + : PendingFile(arg_conn, arg_is_orig), size(arg_size) + { + } + +bool PendingSize::Retry() const + { + return file_mgr->SetSize(size, conn, is_orig); } diff --git a/src/file_analysis/PendingFile.h b/src/file_analysis/PendingFile.h index 81a6ee51d3..53e760e998 100644 --- a/src/file_analysis/PendingFile.h +++ b/src/file_analysis/PendingFile.h @@ -8,28 +8,87 @@ namespace file_analysis { class PendingFile { public: - PendingFile(const u_char* arg_data, uint64 arg_len, uint64 arg_offset, - Connection* arg_conn, bool arg_is_orig); + virtual ~PendingFile(); - PendingFile(const u_char* arg_data, uint64 arg_len, - Connection* arg_conn, bool arg_is_orig); + virtual bool Retry() const = 0; - PendingFile(const PendingFile& other); + bool IsStale() const; - PendingFile& operator=(const PendingFile& other); +protected: - ~PendingFile(); + PendingFile(Connection* arg_conn, bool arg_is_orig); - void Retry() const; + Connection* conn; + bool is_orig; + double creation_time; +}; -private: +class PendingDataInChunk : public PendingFile { +public: + + PendingDataInChunk(const u_char* arg_data, uint64 arg_len, + uint64 arg_offset, Connection* arg_conn, + bool arg_is_orig); + + virtual ~PendingDataInChunk(); + + virtual bool Retry() const; + +protected: - bool is_linear; const u_char* data; uint64 len; uint64 offset; - Connection* conn; - bool is_orig; +}; + +class PendingDataInStream : public PendingFile { +public: + + PendingDataInStream(const u_char* arg_data, uint64 arg_len, + Connection* arg_conn, bool arg_is_orig); + + virtual ~PendingDataInStream(); + + virtual bool Retry() const; + +protected: + + const u_char* data; + uint64 len; +}; + +class PendingGap : public PendingFile { +public: + + PendingGap(uint64 arg_offset, uint64 arg_len, Connection* arg_conn, + bool arg_is_orig); + + virtual bool Retry() const; + +protected: + + uint64 offset; + uint64 len; +}; + +class PendingEOF : public PendingFile { +public: + + PendingEOF(Connection* arg_conn, bool arg_is_orig); + + virtual bool Retry() const; +}; + +class PendingSize : public PendingFile { +public: + + PendingSize(uint64 arg_size, Connection* arg_conn, bool arg_is_orig); + + virtual bool Retry() const; + +protected: + + uint64 size; }; } // namespace file_analysis