From 3dd513e26e1b4ad2d83483b5bd47a4510c56c27f Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Tue, 12 Mar 2013 13:40:18 -0500 Subject: [PATCH] FileAnalysis: move unique file handle string generation to script-layer And add minimal integration with HTTP analyzer. --- .../base/frameworks/file-analysis/main.bro | 35 +++++ scripts/base/init-bare.bro | 2 +- scripts/base/protocols/http/main.bro | 7 + src/CMakeLists.txt | 1 + src/FileAnalyzer.cc | 12 +- src/HTTP.cc | 9 ++ src/Net.cc | 2 + src/RemoteSerializer.cc | 2 + src/file_analysis.bif | 20 +-- src/file_analysis/ActionSet.cc | 2 +- src/file_analysis/ActionSet.h | 2 +- src/file_analysis/FileID.h | 4 +- src/file_analysis/Info.cc | 45 ++++-- src/file_analysis/Info.h | 2 +- src/file_analysis/Manager.cc | 141 +++++++++++++++--- src/file_analysis/Manager.h | 63 ++++++-- src/file_analysis/PendingFile.cc | 60 ++++++++ src/file_analysis/PendingFile.h | 37 +++++ 18 files changed, 365 insertions(+), 81 deletions(-) create mode 100644 src/file_analysis/PendingFile.cc create mode 100644 src/file_analysis/PendingFile.h diff --git a/scripts/base/frameworks/file-analysis/main.bro b/scripts/base/frameworks/file-analysis/main.bro index 68246761cf..7bc62997ae 100644 --- a/scripts/base/frameworks/file-analysis/main.bro +++ b/scripts/base/frameworks/file-analysis/main.bro @@ -5,6 +5,11 @@ # TODO: do logging here? @load base/frameworks/logging +# dependendies for file handle determination +@load base/protocols/http/main +@load base/protocols/http/utils +@load base/protocols/ftp/main + module FileAnalysis; export { @@ -105,5 +110,35 @@ export { ## TODO: document global policy: hook(trig: Trigger, info: Info); + global get_handle: function(c: connection, is_orig: bool): string &redef; + # TODO: wrapper functions for BiFs ? } + +function conn_str(c: connection): string + { + return fmt("%s:%s -> %s:%s", c$id$orig_h, c$id$orig_p, + c$id$resp_h, c$id$resp_p); + } + +function get_handle(c: connection, is_orig: bool): string + { + local rval: string = ""; + local cid: conn_id = c$id; + + if ( "ftp-data" in c$service ) + rval = fmt("%s: %s", "ftp-data", conn_str(c)); + + else if ( c?$http ) + { + if ( c$http$range_request ) + rval = fmt("http(%s): %s: %s", is_orig, c$id$orig_h, + HTTP::build_url(c$http)); + else + rval = fmt("http(%s, %s): %s", is_orig, c$http$trans_depth, + conn_str(c)); + } + + #print fmt("file handle: %s", rval); + return rval; + } diff --git a/scripts/base/init-bare.bro b/scripts/base/init-bare.bro index bad8be6062..798eb387b3 100644 --- a/scripts/base/init-bare.bro +++ b/scripts/base/init-bare.bro @@ -300,7 +300,7 @@ type connection: record { ## one protocol analyzer is able to parse the same data. If so, all will ## be recorded. Also note that the recorced services are independent of any ## transport-level protocols. - service: set[string]; + service: set[string]; addl: string; ##< Deprecated. hot: count; ##< Deprecated. history: string; ##< State history of connections. See *history* in :bro:see:`Conn::Info`. diff --git a/scripts/base/protocols/http/main.bro b/scripts/base/protocols/http/main.bro index 660386f901..260c0e525a 100644 --- a/scripts/base/protocols/http/main.bro +++ b/scripts/base/protocols/http/main.bro @@ -71,6 +71,10 @@ export { ## All of the headers that may indicate if the request was proxied. proxied: set[string] &log &optional; + + ## Indicates if this request can assume 206 partial content in + ## response. + range_request: bool &default=F; }; ## Structure to maintain state for an HTTP connection with multiple @@ -235,6 +239,9 @@ event http_header(c: connection, is_orig: bool, name: string, value: string) &pr else if ( name == "HOST" ) # The split is done to remove the occasional port value that shows up here. c$http$host = split1(value, /:/)[1]; + + else if ( name == "RANGE" ) + c$http$range_request = T; else if ( name == "USER-AGENT" ) c$http$user_agent = value; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e60a68a5a3..41e2a2bec5 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -452,6 +452,7 @@ set(bro_SRCS file_analysis/Manager.cc file_analysis/Info.cc file_analysis/InfoTimer.cc + file_analysis/PendingFile.cc file_analysis/FileID.h file_analysis/Action.h file_analysis/ActionSet.cc diff --git a/src/FileAnalyzer.cc b/src/FileAnalyzer.cc index 1220b0ec87..c736ebf5c4 100644 --- a/src/FileAnalyzer.cc +++ b/src/FileAnalyzer.cc @@ -15,19 +15,13 @@ File_Analyzer::File_Analyzer(Connection* conn) bro_init_magic(&magic, MAGIC_NONE); bro_init_magic(&magic_mime, MAGIC_MIME); - - char op[256], rp[256]; - modp_ulitoa10(ntohs(conn->OrigPort()), op); - modp_ulitoa10(ntohs(conn->RespPort()), rp); - unique_file = "TCPFile " + conn->OrigAddr().AsString() + ":" + op + "->" + - conn->RespAddr().AsString() + ":" + rp; } void File_Analyzer::DeliverStream(int len, const u_char* data, bool orig) { TCP_ApplicationAnalyzer::DeliverStream(len, data, orig); - file_mgr->DataIn(unique_file, data, len, Conn()); + file_mgr->DataIn(data, len, Conn(), orig); int n = min(len, BUFFER_SIZE - buffer_len); @@ -46,14 +40,14 @@ void File_Analyzer::Undelivered(int seq, int len, bool orig) { TCP_ApplicationAnalyzer::Undelivered(seq, len, orig); - file_mgr->Gap(unique_file, seq, len); + file_mgr->Gap(seq, len, Conn(), orig); } void File_Analyzer::Done() { TCP_ApplicationAnalyzer::Done(); - file_mgr->EndOfFile(unique_file, Conn()); + file_mgr->EndOfFile(Conn()); if ( buffer_len && buffer_len != BUFFER_SIZE ) Identify(); diff --git a/src/HTTP.cc b/src/HTTP.cc index 7e4079b853..292db83765 100644 --- a/src/HTTP.cc +++ b/src/HTTP.cc @@ -12,6 +12,7 @@ #include "HTTP.h" #include "Event.h" #include "MIME.h" +#include "file_analysis/Manager.h" const bool DEBUG_http = false; @@ -194,6 +195,12 @@ void HTTP_Entity::DeliverBody(int len, const char* data, int trailing_CRLF) } else DeliverBodyClear(len, data, trailing_CRLF); + + file_mgr->DataIn(reinterpret_cast(data), len, + http_message->MyHTTP_Analyzer()->Conn(), + http_message->IsOrig()); + // TODO: set size if we have content_length? + // TODO: handle partial content and multipart/byteranges } void HTTP_Entity::DeliverBodyClear(int len, const char* data, int trailing_CRLF) @@ -586,6 +593,8 @@ void HTTP_Message::EndEntity(MIME_Entity* entity) // SubmitAllHeaders (through EndOfData). if ( entity == top_level ) Done(); + + file_mgr->EndOfFile(MyHTTP_Analyzer()->Conn(), is_orig); } void HTTP_Message::SubmitHeader(MIME_Header* h) diff --git a/src/Net.cc b/src/Net.cc index 73c618b8af..d69337bd63 100644 --- a/src/Net.cc +++ b/src/Net.cc @@ -30,6 +30,7 @@ #include "PacketSort.h" #include "Serializer.h" #include "PacketDumper.h" +#include "file_analysis/Manager.h" extern "C" { #include "setsignal.h" @@ -352,6 +353,7 @@ void net_packet_dispatch(double t, const struct pcap_pkthdr* hdr, sessions->DispatchPacket(t, hdr, pkt, hdr_size, src_ps, pkt_elem); mgr.Drain(); + file_mgr->DrainPending(); if ( sp ) { diff --git a/src/RemoteSerializer.cc b/src/RemoteSerializer.cc index 66f8def489..80c839a1da 100644 --- a/src/RemoteSerializer.cc +++ b/src/RemoteSerializer.cc @@ -192,6 +192,7 @@ #include "logging/Manager.h" #include "IPAddr.h" #include "bro_inet_ntop.h" +#include "file_analysis/Manager.h" extern "C" { #include "setsignal.h" @@ -1462,6 +1463,7 @@ void RemoteSerializer::Process() current_iosrc = this; sessions->NextPacket(p->time, p->hdr, p->pkt, p->hdr_size, 0); mgr.Drain(); + file_mgr->DrainPending(); current_hdr = 0; // done with these current_pkt = 0; diff --git a/src/file_analysis.bif b/src/file_analysis.bif index 91c235edfe..9bea289444 100644 --- a/src/file_analysis.bif +++ b/src/file_analysis.bif @@ -96,41 +96,31 @@ function FileAnalysis::stop%(file_id: string%): bool function FileAnalysis::input_data%(source: string, data: string%): any %{ - string s = source->CheckString(); - string unique = "BIF " + s; - file_mgr->DataIn(unique, data->Bytes(), data->Len(), 0, s); + file_mgr->DataIn(data->Bytes(), data->Len(), source->CheckString()); return 0; %} function FileAnalysis::input_data_chunk%(source: string, data: string, offset: count%): any %{ - string s = source->CheckString(); - string unique = "BIF " + s; - file_mgr->DataIn(unique, data->Bytes(), data->Len(), offset, 0, s); + file_mgr->DataIn(data->Bytes(), data->Len(), offset, source->CheckString()); return 0; %} function FileAnalysis::gap%(source: string, offset: count, len: count%): any %{ - string s = source->CheckString(); - string unique = "BIF " + s; - file_mgr->Gap(unique, offset, len, 0, s); + file_mgr->Gap(offset, len, source->CheckString()); return 0; %} function FileAnalysis::set_size%(source: string, size: count%): any %{ - string s = source->CheckString(); - string unique = "BIF " + s; - file_mgr->SetSize(unique, size, 0, s); + file_mgr->SetSize(size, source->CheckString()); return 0; %} function FileAnalysis::input_eof%(source: string%): any %{ - string s = source->CheckString(); - string unique = "BIF "+ s; - file_mgr->EndOfFile(unique, 0, s); + file_mgr->EndOfFile(source->CheckString()); return 0; %} diff --git a/src/file_analysis/ActionSet.cc b/src/file_analysis/ActionSet.cc index c615484509..2e055a5c24 100644 --- a/src/file_analysis/ActionSet.cc +++ b/src/file_analysis/ActionSet.cc @@ -168,7 +168,7 @@ void ActionSet::InsertAction(Action* act, HashKey* key) new RecordVal(BifType::Record::FileAnalysis::ActionResults)); } -void ActionSet::FlushQueuedModifications() +void ActionSet::DrainModifications() { if ( mod_queue.empty() ) return; diff --git a/src/file_analysis/ActionSet.h b/src/file_analysis/ActionSet.h index 506fdf5c77..e1f1355aa9 100644 --- a/src/file_analysis/ActionSet.h +++ b/src/file_analysis/ActionSet.h @@ -43,7 +43,7 @@ public: /** * Perform all queued modifications to the currently active actions. */ - void FlushQueuedModifications(); + void DrainModifications(); IterCookie* InitForIteration() const { return action_map.InitForIteration(); } diff --git a/src/file_analysis/FileID.h b/src/file_analysis/FileID.h index c339445ea8..6d594d6b37 100644 --- a/src/file_analysis/FileID.h +++ b/src/file_analysis/FileID.h @@ -9,8 +9,8 @@ namespace file_analysis { * identify files, and others which use a pretty hash (the FileID) to identify * files. A FileID is primarily used in methods which interface with the * script-layer, while the unique strings are used for methods which interface - * with protocol analyzers (to better accomodate the possibility that a file - * can be distributed over different connections and thus analyzer instances). + * with protocol analyzers or anything that sends data to the file analysis + * framework. */ struct FileID { string id; diff --git a/src/file_analysis/Info.cc b/src/file_analysis/Info.cc index 5929be966f..f1af9ffff1 100644 --- a/src/file_analysis/Info.cc +++ b/src/file_analysis/Info.cc @@ -74,7 +74,7 @@ void Info::InitFieldIndices() actions_idx = Idx("actions"); } -Info::Info(const string& unique, Connection* conn, const string& source) +Info::Info(const string& unique, Connection* conn) : file_id(unique), unique(unique), val(0), last_activity_time(network_time), postpone_timeout(false), need_reassembly(false), done(false), actions(this) @@ -93,10 +93,31 @@ Info::Info(const string& unique, Connection* conn, const string& source) val->Assign(file_id_idx, new StringVal(id)); file_id = FileID(id); - UpdateConnectionFields(conn); + if ( conn ) + { + // update source and connection fields + RecordVal* cval = conn->BuildConnVal(); + ListVal* services = cval->Lookup(5)->AsTableVal()->ConvertToPureList(); + Unref(cval); + string source; - if ( ! source.empty() ) - val->Assign(source_idx, new StringVal(source.c_str())); + for ( int i = 0; i < services->Length(); ++i ) + { + if ( i > 0 ) + source += ", "; + source += services->Index(i)->AsStringVal()->CheckString(); + } + + Unref(services); + + if ( ! source.empty() ) + val->Assign(source_idx, new StringVal(source.c_str())); + + UpdateConnectionFields(conn); + } + else + // use the unique file handle as source + val->Assign(source_idx, new StringVal(unique.c_str())); } Info::~Info() @@ -263,7 +284,7 @@ void Info::ReplayBOF() void Info::DataIn(const u_char* data, uint64 len, uint64 offset) { - actions.FlushQueuedModifications(); + actions.DrainModifications(); // TODO: attempt libmagic stuff here before doing reassembly? Action* act = 0; @@ -275,7 +296,7 @@ void Info::DataIn(const u_char* data, uint64 len, uint64 offset) actions.QueueRemoveAction(act->Args()); } - actions.FlushQueuedModifications(); + actions.DrainModifications(); // TODO: check reassembly requirement based on buffer size in record if ( need_reassembly ) @@ -290,7 +311,7 @@ void Info::DataIn(const u_char* data, uint64 len, uint64 offset) void Info::DataIn(const u_char* data, uint64 len) { - actions.FlushQueuedModifications(); + actions.DrainModifications(); if ( BufferBOF(data, len) ) return; @@ -312,7 +333,7 @@ void Info::DataIn(const u_char* data, uint64 len) actions.QueueRemoveAction(act->Args()); } - actions.FlushQueuedModifications(); + actions.DrainModifications(); IncrementByteCount(len, seen_bytes_idx); } @@ -321,7 +342,7 @@ void Info::EndOfFile() if ( done ) return; done = true; - actions.FlushQueuedModifications(); + actions.DrainModifications(); // Send along anything that's been buffered, but never flushed. ReplayBOF(); @@ -340,12 +361,12 @@ void Info::EndOfFile() else file_mgr->EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_EOF, this); - actions.FlushQueuedModifications(); + actions.DrainModifications(); } void Info::Gap(uint64 offset, uint64 len) { - actions.FlushQueuedModifications(); + actions.DrainModifications(); // If we were buffering the beginning of the file, a gap means we've got // as much contiguous stuff at the beginning as possible, so work with that. @@ -362,6 +383,6 @@ void Info::Gap(uint64 offset, uint64 len) file_mgr->EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_GAP, this); - actions.FlushQueuedModifications(); + actions.DrainModifications(); IncrementByteCount(len, missing_bytes_idx); } diff --git a/src/file_analysis/Info.h b/src/file_analysis/Info.h index a02262c99d..8b4c10473c 100644 --- a/src/file_analysis/Info.h +++ b/src/file_analysis/Info.h @@ -117,7 +117,7 @@ protected: /** * Constructor; only file_analysis::Manager should be creating these. */ - Info(const string& unique, Connection* conn = 0, const string& source = ""); + Info(const string& unique, Connection* conn = 0); /** * Updates the "conn_ids" and "conn_uids" fields in #val record with the diff --git a/src/file_analysis/Manager.cc b/src/file_analysis/Manager.cc index 85dbe8ff39..81fa9824c2 100644 --- a/src/file_analysis/Manager.cc +++ b/src/file_analysis/Manager.cc @@ -16,6 +16,32 @@ Manager::~Manager() Terminate(); } +string Manager::GetFileHandle(Connection* conn, bool is_orig) + { + if ( ! conn ) return ""; + + const ID* id = global_scope()->Lookup("FileAnalysis::get_handle"); + assert(id); + const Func* func = id->ID_Val()->AsFunc(); + + val_list vl(2); + vl.append(conn->BuildConnVal()); + vl.append(new Val(is_orig, TYPE_BOOL)); + + Val* result = func->Call(&vl); + string rval = result->AsString()->CheckString(); + Unref(result); + return rval; + } + +void Manager::DrainPending() + { + for ( size_t i = 0; i < pending.size(); ++i ) + pending[i].Retry(); + + pending.clear(); + } + void Manager::Terminate() { vector keys; @@ -25,66 +51,134 @@ void Manager::Terminate() Timeout(keys[i], true); } -void Manager::DataIn(const string& unique, const u_char* data, uint64 len, - uint64 offset, Connection* conn, const string& source) +void Manager::DataIn(const u_char* data, uint64 len, uint64 offset, + Connection* conn, bool is_orig, bool allow_retry) { - if ( IsIgnored(unique) ) return; + string unique = GetFileHandle(conn, is_orig); - Info* info = GetInfo(unique, conn, source); + if ( ! unique.empty() ) + { + DataIn(data, len, offset, GetInfo(unique, conn)); + return; + } + if ( allow_retry ) + pending.push_back(PendingFile(data, len, offset, conn, is_orig)); + } + +void Manager::DataIn(const u_char* data, uint64 len, uint64 offset, + const string& unique) + { + DataIn(data, len, offset, GetInfo(unique)); + } + +void Manager::DataIn(const u_char* data, uint64 len, uint64 offset, + Info* info) + { if ( ! info ) return; info->DataIn(data, len, offset); if ( info->IsComplete() ) - RemoveFile(unique); + RemoveFile(info->GetUnique()); } -void Manager::DataIn(const string& unique, const u_char* data, uint64 len, - Connection* conn, const string& source) +void Manager::DataIn(const u_char* data, uint64 len, Connection* conn, + bool is_orig, bool allow_retry) { - Info* info = GetInfo(unique, conn, source); + string unique = GetFileHandle(conn, is_orig); + if ( ! unique.empty() ) + { + DataIn(data, len, GetInfo(unique, conn)); + return; + } + + if ( allow_retry ) + pending.push_back(PendingFile(data, len, conn, is_orig)); + } + +void Manager::DataIn(const u_char* data, uint64 len, const string& unique) + { + DataIn(data, len, GetInfo(unique)); + } + +void Manager::DataIn(const u_char* data, uint64 len, Info* info) + { if ( ! info ) return; info->DataIn(data, len); if ( info->IsComplete() ) - RemoveFile(unique); + RemoveFile(info->GetUnique()); } -void Manager::EndOfFile(const string& unique, Connection* conn, - const string& source) +void Manager::EndOfFile(Connection* conn) { - // Just call GetInfo because maybe the conn/source args will update - // something in the Info record. - GetInfo(unique, conn, source); + EndOfFile(conn, true); + EndOfFile(conn, false); + } + +void Manager::EndOfFile(Connection* conn, bool is_orig) + { + string unique = GetFileHandle(conn, is_orig); + + if ( unique.empty() ) return; // nothing to do + RemoveFile(unique); } -void Manager::Gap(const string& unique, uint64 offset, uint64 len, - Connection* conn, const string& source) +void Manager::EndOfFile(const string& unique) { - Info* info = GetInfo(unique, conn, source); + RemoveFile(unique); + } +void Manager::Gap(uint64 offset, uint64 len, Connection* conn, bool is_orig) + { + string unique = GetFileHandle(conn, is_orig); + + if ( unique.empty() ) return; // nothing to do since no data has been seen + + Gap(offset, len, GetInfo(unique, conn)); + } + +void Manager::Gap(uint64 offset, uint64 len, const string& unique) + { + Gap(offset, len, GetInfo(unique)); + } + +void Manager::Gap(uint64 offset, uint64 len, Info* info) + { if ( ! info ) return; info->Gap(offset, len); } -void Manager::SetSize(const string& unique, uint64 size, - Connection* conn, const string& source) +void Manager::SetSize(uint64 size, Connection* conn, bool is_orig) { - Info* info = GetInfo(unique, conn, source); + string unique = GetFileHandle(conn, is_orig); + if ( unique.empty() ) return; // ok assuming this always follows a DataIn() + + SetSize(size, GetInfo(unique, conn)); + } + +void Manager::SetSize(uint64 size, const string& unique) + { + SetSize(size, GetInfo(unique)); + } + +void Manager::SetSize(uint64 size, Info* info) + { if ( ! info ) return; info->SetTotalBytes(size); if ( info->IsComplete() ) - RemoveFile(unique); + RemoveFile(info->GetUnique()); } + void Manager::EvaluatePolicy(BifEnum::FileAnalysis::Trigger t, Info* info) { if ( IsIgnored(info->GetUnique()) ) return; @@ -131,8 +225,7 @@ bool Manager::RemoveAction(const FileID& file_id, const RecordVal* args) const return info->RemoveAction(args); } -Info* Manager::GetInfo(const string& unique, Connection* conn, - const string& source) +Info* Manager::GetInfo(const string& unique, Connection* conn) { if ( IsIgnored(unique) ) return 0; @@ -140,7 +233,7 @@ Info* Manager::GetInfo(const string& unique, Connection* conn, if ( ! rval ) { - rval = str_map[unique] = new Info(unique, conn, source); + rval = str_map[unique] = new Info(unique, conn); FileID id = rval->GetFileID(); if ( id_map[id] ) diff --git a/src/file_analysis/Manager.h b/src/file_analysis/Manager.h index f17bdef540..0a660add26 100644 --- a/src/file_analysis/Manager.h +++ b/src/file_analysis/Manager.h @@ -4,6 +4,7 @@ #include #include #include +#include #include "Net.h" #include "Conn.h" @@ -12,6 +13,7 @@ #include "Info.h" #include "InfoTimer.h" #include "FileID.h" +#include "PendingFile.h" namespace file_analysis { @@ -25,6 +27,17 @@ public: ~Manager(); + /** + * Attempts to forward the data from any pending file contents, i.e. + * those for which a unique file handle string could not immediately + * be determined. If again a file handle can't be determined, give up. + * The assumption for this to work correctly is that the EventMgr would + * have always drained between packet boundaries, so calling this method + * at that time may mean the script-layer function for generating file + * handles can now come up with a result. + */ + void DrainPending(); + /** * Times out any active file analysis to prepare for shutdown. */ @@ -33,33 +46,41 @@ public: /** * Pass in non-sequential file data. */ - void DataIn(const string& unique, const u_char* data, uint64 len, - uint64 offset, Connection* conn = 0, - const string& source = ""); + void DataIn(const u_char* data, uint64 len, uint64 offset, + Connection* conn, bool is_orig, bool allow_retry = true); + void DataIn(const u_char* data, uint64 len, uint64 offset, + const string& unique); + void DataIn(const u_char* data, uint64 len, uint64 offset, + Info* info); /** * Pass in sequential file data. */ - void DataIn(const string& unique, const u_char* data, uint64 len, - Connection* conn = 0, const string& source = ""); + void DataIn(const u_char* data, uint64 len, Connection* conn, bool is_orig, + bool allow_retry = true); + void DataIn(const u_char* data, uint64 len, const string& unique); + void DataIn(const u_char* data, uint64 len, Info* info); /** * Signal the end of file data. */ - void EndOfFile(const string& unique, Connection* conn = 0, - const string& source = ""); + void EndOfFile(Connection* conn); + void EndOfFile(Connection* conn, bool is_orig); + void EndOfFile(const string& unique); /** * Signal a gap in the file data stream. */ - void Gap(const string& unique, uint64 offset, uint64 len, - Connection* conn = 0, const string& source = ""); + void Gap(uint64 offset, uint64 len, Connection* conn, bool is_orig); + void Gap(uint64 offset, uint64 len, const string& unique); + void Gap(uint64 offset, uint64 len, Info* info); /** * Provide the expected number of bytes that comprise a file. */ - void SetSize(const string& unique, uint64 size, Connection* conn = 0, - const string& source = ""); + void SetSize(uint64 size, Connection* conn, bool is_orig); + void SetSize(uint64 size, const string& unique); + void SetSize(uint64 size, Info* info); /** * Starts ignoring a file, which will finally be removed from internal @@ -96,20 +117,31 @@ public: protected: friend class InfoTimer; + friend class PendingFile; typedef map StrMap; typedef set StrSet; typedef map IDMap; + typedef vector PendingList; /** * @return the Info object mapped to \a unique or a null pointer if analysis * is being ignored for the associated file. An Info object may be * created if a mapping doesn't exist, and if it did exist, the - * activity time is refreshed and connection-related fields of the - * record value may be updated. + * activity time is refreshed along with any connection-related + * fields. */ - Info* GetInfo(const string& unique, Connection* conn = 0, - const string& source = ""); + Info* GetInfo(const string& unique, Connection* conn = 0); + + /** + * @return a string which can uniquely identify the file being transported + * over the connection. A script-layer function is evaluated in + * order to determine the unique string. An empty string means + * a unique handle for the file couldn't be determined at the time + * time the function was evaluated (possibly because some events + * have not yet been drained from the queue). + */ + string GetFileHandle(Connection* conn, bool is_orig); /** * @return the Info object mapped to \a file_id, or a null pointer if no @@ -137,6 +169,7 @@ protected: StrMap str_map; /**< Map unique strings to \c FileAnalysis::Info records. */ IDMap id_map; /**< Map file IDs to \c FileAnalysis::Info records. */ StrSet ignored; /**< Ignored files. Will be finally removed on EOF. */ + PendingList pending; /**< Files waiting for next Tick to return a handle */ }; } // namespace file_analysis diff --git a/src/file_analysis/PendingFile.cc b/src/file_analysis/PendingFile.cc new file mode 100644 index 0000000000..90029703b1 --- /dev/null +++ b/src/file_analysis/PendingFile.cc @@ -0,0 +1,60 @@ +#include "PendingFile.h" +#include "Manager.h" + +using namespace file_analysis; + +PendingFile::PendingFile(const u_char* arg_data, uint64 arg_len, + uint64 arg_offset, Connection* arg_conn, + bool arg_is_orig) + : is_linear(false), data(arg_data), len(arg_len), offset(arg_offset), + conn(arg_conn), is_orig(arg_is_orig) + { + Ref(conn); + } + +PendingFile::PendingFile(const u_char* arg_data, uint64 arg_len, + Connection* arg_conn, bool arg_is_orig) + : is_linear(true), data(arg_data), len(arg_len), offset(0), + conn(arg_conn), is_orig(arg_is_orig) + { + Ref(conn); + } + +PendingFile::PendingFile(const PendingFile& other) + : is_linear(other.is_linear), data(other.data), len(other.len), + offset(other.offset), conn(other.conn), is_orig(other.is_orig) + { + Ref(conn); + } + +PendingFile& PendingFile::operator=(const PendingFile& other) + { + // handle self-assign for correct reference counting + if ( this == &other ) return *this; + + Unref(conn); + + is_linear = other.is_linear; + data = other.data; + len = other.len; + offset = other.offset; + conn = other.conn; + is_orig = other.is_orig; + + Ref(conn); + + return *this; + } + +PendingFile::~PendingFile() + { + Unref(conn); + } + +void PendingFile::Retry() const + { + if ( is_linear ) + file_mgr->DataIn(data, len, conn, is_orig, false); + else + file_mgr->DataIn(data, len, offset, conn, is_orig, false); + } diff --git a/src/file_analysis/PendingFile.h b/src/file_analysis/PendingFile.h new file mode 100644 index 0000000000..81a6ee51d3 --- /dev/null +++ b/src/file_analysis/PendingFile.h @@ -0,0 +1,37 @@ +#ifndef FILE_ANALYSIS_PENDINGFILE_H +#define FILE_ANALYSIS_PENDINGFILE_H + +#include "Conn.h" + +namespace file_analysis { + +class PendingFile { +public: + + PendingFile(const u_char* arg_data, uint64 arg_len, uint64 arg_offset, + Connection* arg_conn, bool arg_is_orig); + + PendingFile(const u_char* arg_data, uint64 arg_len, + Connection* arg_conn, bool arg_is_orig); + + PendingFile(const PendingFile& other); + + PendingFile& operator=(const PendingFile& other); + + ~PendingFile(); + + void Retry() const; + +private: + + bool is_linear; + const u_char* data; + uint64 len; + uint64 offset; + Connection* conn; + bool is_orig; +}; + +} // namespace file_analysis + +#endif