From 38dbba762275e7ce2281445b56344eb6c82d49e1 Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Sun, 5 Jan 2014 04:58:01 -0500 Subject: [PATCH] More file reassembly work. - The reassembly behavior can be modified per-file by enabling or disabling the reassembler and/or modifying the size of the reassembly buffer. - Changed the file extraction analyzer to use the stream to avoid issues with the chunk based approach not immediately triggering the file_new event due to mime-type detection delay. Early chunks frequently ended up lost before. - Generally things are working now and I'd consider this in testing. --- scripts/base/frameworks/files/main.bro | 60 +++++- scripts/base/init-bare.bro | 5 +- src/SerialTypes.h | 1 + src/event.bif | 19 ++ src/file_analysis/File.cc | 195 +++++++++--------- src/file_analysis/File.h | 39 +++- src/file_analysis/FileReassembler.cc | 28 +-- src/file_analysis/FileReassembler.h | 12 +- src/file_analysis/Manager.cc | 33 +++ src/file_analysis/Manager.h | 15 ++ src/file_analysis/analyzer/extract/Extract.cc | 27 ++- src/file_analysis/analyzer/extract/Extract.h | 3 +- src/file_analysis/analyzer/extract/events.bif | 4 +- src/file_analysis/file_analysis.bif | 21 ++ .../out | 1 + .../bro..stdout | 12 +- .../out | 8 + .../a.out | 2 + .../b.out | 12 +- .../c.out | 9 +- .../out | 8 + .../out | 16 ++ .../files.log | 4 +- 23 files changed, 375 insertions(+), 159 deletions(-) diff --git a/scripts/base/frameworks/files/main.bro b/scripts/base/frameworks/files/main.bro index cf2c11be45..14e1228fc4 100644 --- a/scripts/base/frameworks/files/main.bro +++ b/scripts/base/frameworks/files/main.bro @@ -99,8 +99,9 @@ export { ## during the process of analysis e.g. due to dropped packets. missing_bytes: count &log &default=0; - ## The number of not all-in-sequence bytes in the file stream that - ## were delivered to file analyzers due to reassembly buffer overflow. + ## The number of bytes in the file stream that were not delivered to + ## stream file analyzers. This could be overlapping bytes or + ## bytes that couldn't be reassembled. overflow_bytes: count &log &default=0; ## Whether the file analysis timed out at least once for the file. @@ -123,6 +124,33 @@ export { ## generate two handles that would hash to the same file id. const salt = "I recommend changing this." &redef; + ## The default setting for if the file reassembler is enabled for + ## each file. + const enable_reassembler = T &redef; + + ## The default allow per-file reassembly buffer size. + const reassembly_buffer_size = 1048576 &redef; + + ## Allows the file reassembler to be used if it's necessary because the + ## file is transferred out of order. + ## + ## f: the file. + global enable_reassembly: function(f: fa_file); + + ## Disables the file reassembler on this file. If the file is not + ## transferred out of order this will have no effect. + ## + ## f: the file. + global disable_reassembly: function(f: fa_file); + + ## Set the maximum size the reassembly buffer is allowed to grow + ## for the given file. + ## + ## f: the file. + ## + ## max: Maximum allowed size of the reassembly buffer. + global set_reassembly_buffer_size: function(f: fa_file, max: count); + ## Sets the *timeout_interval* field of :bro:see:`fa_file`, which is ## used to determine the length of inactivity that is allowed for a file ## before internal state related to it is cleaned up. When used within @@ -273,6 +301,21 @@ function set_timeout_interval(f: fa_file, t: interval): bool return __set_timeout_interval(f$id, t); } +function enable_reassembly(f: fa_file) + { + __enable_reassembly(f$id); + } + +function disable_reassembly(f: fa_file) + { + __disable_reassembly(f$id); + } + +function set_reassembly_buffer_size(f: fa_file, max: count) + { + __set_reassembly_buffer(f$id, max); + } + function add_analyzer(f: fa_file, tag: Files::Tag, args: AnalyzerArgs): bool { add f$info$analyzers[Files::analyzer_name(tag)]; @@ -311,11 +354,24 @@ function analyzer_name(tag: Files::Tag): string event file_new(f: fa_file) &priority=10 { set_info(f); + + if ( enable_reassembler ) + { + Files::enable_reassembly(f); + Files::set_reassembly_buffer_size(f, reassembly_buffer_size); + } } event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=10 { set_info(f); + + if ( enable_reassembler ) + { + Files::enable_reassembly(f); + Files::set_reassembly_buffer_size(f, reassembly_buffer_size); + } + add f$info$conn_uids[c$uid]; local cid = c$id; add f$info$tx_hosts[f$is_orig ? cid$orig_h : cid$resp_h]; diff --git a/scripts/base/init-bare.bro b/scripts/base/init-bare.bro index c58559bd9a..5c60ec6690 100644 --- a/scripts/base/init-bare.bro +++ b/scripts/base/init-bare.bro @@ -355,8 +355,9 @@ type fa_file: record { ## during the process of analysis e.g. due to dropped packets. missing_bytes: count &default=0; - ## The number of not all-in-sequence bytes in the file stream that - ## were not delivered to file analyzers due to reassembly buffer overflow. + ## The number of bytes in the file stream that were not delivered to + ## stream file analyzers. This could be overlapping bytes or + ## bytes that couldn't be reassembled. overflow_bytes: count &default=0; ## The amount of time between receiving new data for this file that diff --git a/src/SerialTypes.h b/src/SerialTypes.h index 69927afb74..7f79328083 100644 --- a/src/SerialTypes.h +++ b/src/SerialTypes.h @@ -87,6 +87,7 @@ SERIAL_TCP_CONTENTS(TCP_NVT, 3) #define SERIAL_REASSEMBLER(name, val) SERIAL_CONST(name, val, REASSEMBLER) SERIAL_REASSEMBLER(REASSEMBLER, 1) SERIAL_REASSEMBLER(TCP_REASSEMBLER, 2) +SERIAL_REASSEMBLER(FILE_REASSEMBLER, 3) #define SERIAL_VAL(name, val) SERIAL_CONST(name, val, VAL) SERIAL_VAL(VAL, 1) diff --git a/src/event.bif b/src/event.bif index 4237bebc7b..25df2d823f 100644 --- a/src/event.bif +++ b/src/event.bif @@ -935,8 +935,27 @@ event file_timeout%(f: fa_file%); ## len: The number of missing bytes. ## ## .. bro:see:: file_new file_over_new_connection file_timeout file_state_remove +## file_reassembly_buffer_overflow event file_gap%(f: fa_file, offset: count, len: count%); +## Indicates that the file had an overflow of the reassembly buffer. +## This is a specialization of the :bro:id:`file_gap` event. +## +## f: The file. +## +## offset: The byte offset from the start of the file at which the reassembly +## couldn't continue due to running out of reassembly buffer space. +## +## skipped: The number of bytes of the file skipped over to flush some +## file data and get back under the reassembly buffer size limit. +## This value will also be represented as a gap. +## +## .. bro:see:: file_new file_over_new_connection file_timeout file_state_remove +## file_gap Files::enable_reassembler Files::reassembly_buffer_size +## Files::enable_reassembly Files::disable_reassembly +## Files::set_reassembly_buffer_size +event file_reassembly_buffer_overflow%(f: fa_file, offset: count, skipped: count%); + ## This event is generated each time file analysis is ending for a given file. ## ## f: The file. diff --git a/src/file_analysis/File.cc b/src/file_analysis/File.cc index d53c45fe06..269a78e396 100644 --- a/src/file_analysis/File.cc +++ b/src/file_analysis/File.cc @@ -3,7 +3,6 @@ #include #include "File.h" -#include "FileReassembler.h" #include "FileTimer.h" #include "Analyzer.h" #include "Manager.h" @@ -77,8 +76,8 @@ void File::StaticInit() File::File(const string& file_id, Connection* conn, analyzer::Tag tag, bool is_orig) - : id(file_id), val(0), postpone_timeout(false), first_chunk(true), - missed_bof(false), need_reassembly(false), done(false), + : id(file_id), val(0), stream_offset(0), reassembly_max_buffer(0), + reassembly_enabled(false), postpone_timeout(false), done(false), did_file_new_event(false), analyzers(this) { StaticInit(); @@ -88,7 +87,6 @@ File::File(const string& file_id, Connection* conn, analyzer::Tag tag, val = new RecordVal(fa_file_type); val->Assign(id_idx, new StringVal(file_id.c_str())); - forwarded_offset = 0; file_reassembler = 0; if ( conn ) { @@ -244,7 +242,7 @@ bool File::IsComplete() const if ( ! total ) return false; - if ( LookupFieldDefaultCount(seen_bytes_idx) >= total->AsCount() ) + if ( stream_offset >= total->AsCount() ) return true; return false; @@ -302,6 +300,26 @@ bool File::DetectMIME(const u_char* data, uint64 len) return mime; } +void File::EnableReassembly() + { + reassembly_enabled = true; + } + +void File::DisableReassembly() + { + reassembly_enabled = false; + if ( file_reassembler ) + { + delete file_reassembler; + file_reassembler = NULL; + } + } + +void File::SetReassemblyBuffer(uint64 max) + { + reassembly_max_buffer = max; + } + void File::ReplayBOF() { if ( bof_buffer.replayed ) @@ -311,141 +329,122 @@ void File::ReplayBOF() if ( bof_buffer.chunks.empty() ) { - // Since we missed the beginning, try file type detect on next data in. - missed_bof = true; + // We definitely can't do anything if we don't have any chunks. return; } BroString* bs = concatenate(bof_buffer.chunks); val->Assign(bof_buffer_idx, new StringVal(bs)); - DetectMIME(bs->Bytes(), bs->Len()); - - FileEvent(file_new); - for ( size_t i = 0; i < bof_buffer.chunks.size(); ++i ) DataIn(bof_buffer.chunks[i]->Bytes(), bof_buffer.chunks[i]->Len()); } -void File::DataIn(const u_char* data, uint64 len, uint64 offset) +void File::DeliverStream(const u_char* data, uint64 len) { - analyzers.DrainModifications(); + // Buffer enough data send to libmagic. + if ( BufferBOF(data, len) ) + return; - if ( file_reassembler ) + if ( stream_offset == 0 ) { - // If there is a file reassembler we must forward any data there. - // But this only happens if the incoming data doesn't happen - // to align with the current forwarded_offset - file_reassembler->NewBlock(network_time, offset, len, data); + DetectMIME(data, len); + FileEvent(file_new); + } - if ( !file_reassembler->HasBlocks() ) + file_analysis::Analyzer* a = 0; + IterCookie* c = analyzers.InitForIteration(); + while ( (a = analyzers.NextEntry(c)) ) + { + if ( !a->DeliverStream(data, len) ) { - delete file_reassembler; - file_reassembler = 0; + analyzers.QueueRemove(a->Tag(), a->Args()); } } - else if ( forwarded_offset == offset ) + + stream_offset += len; + IncrementByteCount(len, seen_bytes_idx); + } + +void File::DeliverChunk(const u_char* data, uint64 len, uint64 offset) + { + // Potentially handle reassembly and deliver to the stream analyzers. + if ( file_reassembler ) + { + if ( reassembly_max_buffer > 0 && + reassembly_max_buffer < file_reassembler->TotalSize() ) + { + uint64 first_offset = file_reassembler->GetFirstBlockOffset(); + int gap_bytes = file_reassembler->TrimToSeq(first_offset); + + if ( FileEventAvailable(file_reassembly_buffer_overflow) ) + { + val_list* vl = new val_list(); + vl->append(val->Ref()); + vl->append(new Val(stream_offset, TYPE_COUNT)); + vl->append(new Val(gap_bytes, TYPE_COUNT)); + FileEvent(file_reassembly_buffer_overflow, vl); + } + + Gap(stream_offset, gap_bytes); + } + + // Forward data to the reassembler. + file_reassembler->NewBlock(network_time, offset, len, data); + } + else if ( stream_offset == offset ) { // This is the normal case where a file is transferred linearly. - // Nothing should be done here. + // Nothing special should be done here. + DeliverStream(data, len); } - else if ( forwarded_offset > offset && forwarded_offset < offset+len ) + else if ( reassembly_enabled ) { - // This is a segment that begins before the forwarded_offset - // but proceeds past the forwarded_offset. It needs - // trimmed but the reassembler is not enabled. - uint64 adjustment = forwarded_offset - offset; - data = data + adjustment; - len = len - adjustment; - offset = forwarded_offset; - IncrementByteCount(adjustment, overflow_bytes_idx); - } - else if ( forwarded_offset < offset ) - { - // This is data past a gap and the reassembler needs to be enabled. - file_reassembler = new FileReassembler(this, forwarded_offset); + // This is data that doesn't match the offset and the reassembler + // needs to be enabled. + file_reassembler = new FileReassembler(this, stream_offset); file_reassembler->NewBlock(network_time, offset, len, data); - return; } else { - // This is data that was already seen so it can be completely ignored. + // We can't reassemble so we throw out the data for streaming. IncrementByteCount(len, overflow_bytes_idx); - return; } - if ( first_chunk ) + // Deliver to the chunk analyzers. + file_analysis::Analyzer* a = 0; + IterCookie* c = analyzers.InitForIteration(); + while ( (a = analyzers.NextEntry(c)) ) { - // TODO: this should all really be delayed until we attempt reassembly. - DetectMIME(data, len); - FileEvent(file_new); - first_chunk = false; + if ( !a->DeliverChunk(data, len, offset) ) + { + analyzers.QueueRemove(a->Tag(), a->Args()); + } } if ( IsComplete() ) { + // If the file is complete we can automatically go and close out the file from here. EndOfFile(); } - else - { - file_analysis::Analyzer* a = 0; - IterCookie* c = analyzers.InitForIteration(); + } - while ( (a = analyzers.NextEntry(c)) ) - { - //if ( ! a->DeliverChunk(data, len, offset) ) - // { - // analyzers.QueueRemove(a->Tag(), a->Args()); - // } - if ( ! a->DeliverStream(data, len) ) - { - analyzers.QueueRemove(a->Tag(), a->Args()); - } - - } - - analyzers.DrainModifications(); - - forwarded_offset += len; - IncrementByteCount(len, seen_bytes_idx); - } +void File::DataIn(const u_char* data, uint64 len, uint64 offset) + { + analyzers.DrainModifications(); + DeliverChunk(data, len, offset); + analyzers.DrainModifications(); } void File::DataIn(const u_char* data, uint64 len) { analyzers.DrainModifications(); - - if ( BufferBOF(data, len) ) - return; - - if ( missed_bof ) - { - DetectMIME(data, len); - FileEvent(file_new); - missed_bof = false; - } - - file_analysis::Analyzer* a = 0; - IterCookie* c = analyzers.InitForIteration(); - - while ( (a = analyzers.NextEntry(c)) ) - { - if ( ! a->DeliverStream(data, len) ) - { - analyzers.QueueRemove(a->Tag(), a->Args()); - continue; - } - - uint64 offset = LookupFieldDefaultCount(seen_bytes_idx) + - LookupFieldDefaultCount(missing_bytes_idx); - - if ( ! a->DeliverChunk(data, len, offset) ) - analyzers.QueueRemove(a->Tag(), a->Args()); - } - + + uint64 offset = LookupFieldDefaultCount(seen_bytes_idx) + + LookupFieldDefaultCount(missing_bytes_idx); + DeliverChunk(data, len, offset); analyzers.DrainModifications(); - IncrementByteCount(len, seen_bytes_idx); } void File::EndOfFile() @@ -501,6 +500,8 @@ void File::Gap(uint64 offset, uint64 len) } analyzers.DrainModifications(); + + stream_offset += len; IncrementByteCount(len, missing_bytes_idx); } diff --git a/src/file_analysis/File.h b/src/file_analysis/File.h index 3422982303..14a168d0f9 100644 --- a/src/file_analysis/File.h +++ b/src/file_analysis/File.h @@ -169,6 +169,7 @@ public: protected: friend class Manager; + friend class FileReassembler; /** * Constructor; only file_analysis::Manager should be creating these. @@ -236,6 +237,33 @@ protected: */ bool DetectMIME(const u_char* data, uint64 len); + /** + * Enables reassembly on the file. + */ + void EnableReassembly(); + + /** + * Disables reassembly on the file. If there is an existing reassembler + * for the file, this will cause it to be deleted and won't allow a new + * one to be created until reassembly is reenabled. + */ + void DisableReassembly(); + + /** + * Set a maximum allowed bytes of memory for file reassembly for this file. + */ + void SetReassemblyBuffer(uint64 max); + + /** + * Perform stream-wise delivery for analyzers that need it. + */ + void DeliverStream(const u_char* data, uint64 len); + + /** + * Perform chunk-wise delivery for analyzers that need it. + */ + void DeliverChunk(const u_char* data, uint64 len, uint64 offset); + /** * Lookup a record field index/offset by name. * @param field_name the name of the \c fa_file record field. @@ -248,18 +276,17 @@ protected: */ static void StaticInit(); -private: +protected: string id; /**< A pretty hash that likely identifies file */ RecordVal* val; /**< \c fa_file from script layer. */ - uint64 forwarded_offset; /**< The offset of the file which has been forwarded. */ FileReassembler *file_reassembler; /**< A reassembler for the file if it's needed. */ + uint64 stream_offset; /**< The offset of the file which has been forwarded. */ + uint64 reassembly_max_buffer; /**< Maximum allowed buffer for reassembly. */ + bool reassembly_enabled; /**< Whether file stream reassembly is needed. */ bool postpone_timeout; /**< Whether postponing timeout is requested. */ - bool first_chunk; /**< Track first non-linear chunk. */ - bool missed_bof; /**< Flags that we missed start of file. */ - bool need_reassembly; /**< Whether file stream reassembly is needed. */ bool done; /**< If this object is about to be deleted. */ bool did_file_new_event; /**< Whether the file_new event has been done. */ - AnalyzerSet analyzers; /**< A set of attached file analyzer. */ + AnalyzerSet analyzers; /**< A set of attached file analyzers. */ queue > fonc_queue; struct BOF_Buffer { diff --git a/src/file_analysis/FileReassembler.cc b/src/file_analysis/FileReassembler.cc index 8440fdca83..d05a573682 100644 --- a/src/file_analysis/FileReassembler.cc +++ b/src/file_analysis/FileReassembler.cc @@ -22,13 +22,6 @@ void FileReassembler::BlockInserted(DataBlock* start_block) seq_delta(start_block->upper, last_reassem_seq) <= 0 ) return; - - // We've filled a leading hole. Deliver as much as possible. - // Note that the new block may include both some old stuff - // and some new stuff. AddAndCheck() will have split the - // new stuff off into its own block(s), but in the following - // loop we have to take care not to deliver already-delivered - // data. for ( DataBlock* b = start_block; b && seq_delta(b->seq, last_reassem_seq) <= 0; b = b->next ) { @@ -36,23 +29,34 @@ void FileReassembler::BlockInserted(DataBlock* start_block) { // New stuff. int len = b->Size(); int seq = last_reassem_seq; + the_file->DeliverStream(b->block, len); last_reassem_seq += len; - the_file->DataIn(b->block, len, seq); } } - - //CheckEOF(); } void FileReassembler::Undelivered(int up_to_seq) { - //reporter->Warning("should probably do something here (file reassembler undelivered)\n"); + // Not doing anything here yet. } void FileReassembler::Overlap(const u_char* b1, const u_char* b2, int n) { - //reporter->Warning("should probably do something here (file reassembler overlap)\n"); + // Not doing anything here yet. } +IMPLEMENT_SERIAL(FileReassembler, SER_FILE_REASSEMBLER); + +bool FileReassembler::DoSerialize(SerialInfo* info) const + { + reporter->InternalError("FileReassembler::DoSerialize not implemented"); + return false; // Cannot be reached. + } + +bool FileReassembler::DoUnserialize(UnserialInfo* info) + { + reporter->InternalError("FileReassembler::DoUnserialize not implemented"); + return false; // Cannot be reached. + } } // end file_analysis diff --git a/src/file_analysis/FileReassembler.h b/src/file_analysis/FileReassembler.h index 7f73ec6fa4..146171b6ed 100644 --- a/src/file_analysis/FileReassembler.h +++ b/src/file_analysis/FileReassembler.h @@ -21,14 +21,16 @@ public: virtual ~FileReassembler(); void Done(); + uint64 GetFirstBlockOffset() { return blocks->seq; } // Checks if we have delivered all contents that we can possibly - // deliver for this endpoint. Calls TCP_Analyzer::EndpointEOF() - // when so. - //void CheckEOF(); + // deliver for this endpoint. + void CheckEOF(); -private: - //DECLARE_SERIAL(FileReassembler); +protected: + FileReassembler() { } + + DECLARE_SERIAL(FileReassembler); void Undelivered(int up_to_seq); void BlockInserted(DataBlock* b); diff --git a/src/file_analysis/Manager.cc b/src/file_analysis/Manager.cc index 0337dbb098..5585c6c33c 100644 --- a/src/file_analysis/Manager.cc +++ b/src/file_analysis/Manager.cc @@ -183,6 +183,39 @@ bool Manager::SetTimeoutInterval(const string& file_id, double interval) const return true; } +bool Manager::EnableReassembly(const string& file_id) + { + File* file = LookupFile(file_id); + + if ( ! file ) + return false; + + file->EnableReassembly(); + return true; + } + +bool Manager::DisableReassembly(const string& file_id) + { + File* file = LookupFile(file_id); + + if ( ! file ) + return false; + + file->DisableReassembly(); + return true; + } + +bool Manager::SetReassemblyBuffer(const string& file_id, uint64 max) + { + File* file = LookupFile(file_id); + + if ( ! file ) + return false; + + file->SetReassemblyBuffer(max); + return true; + } + bool Manager::SetExtractionLimit(const string& file_id, RecordVal* args, uint64 n) const { diff --git a/src/file_analysis/Manager.h b/src/file_analysis/Manager.h index cf73c6b52d..1d30e73e8a 100644 --- a/src/file_analysis/Manager.h +++ b/src/file_analysis/Manager.h @@ -173,6 +173,21 @@ public: */ bool SetTimeoutInterval(const string& file_id, double interval) const; + /** + * Enable the reassembler for a file. + */ + bool EnableReassembly(const string& file_id); + + /** + * Disable the reassembler for a file. + */ + bool DisableReassembly(const string& file_id); + + /** + * Set the reassembly for a file in bytes. + */ + bool SetReassemblyBuffer(const string& file_id, uint64 max); + /** * Sets a limit on the maximum size allowed for extracting the file * to local disk; diff --git a/src/file_analysis/analyzer/extract/Extract.cc b/src/file_analysis/analyzer/extract/Extract.cc index 1a3917cd0e..032a176564 100644 --- a/src/file_analysis/analyzer/extract/Extract.cc +++ b/src/file_analysis/analyzer/extract/Extract.cc @@ -14,7 +14,7 @@ Extract::Extract(RecordVal* args, File* file, const string& arg_filename, : file_analysis::Analyzer(file_mgr->GetComponentTag("EXTRACT"), args, file), filename(arg_filename), limit(arg_limit) { - fd = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0666); + fd = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_APPEND, 0666); if ( fd < 0 ) { @@ -53,7 +53,7 @@ file_analysis::Analyzer* Extract::Instantiate(RecordVal* args, File* file) limit->AsCount()); } -static bool check_limit_exceeded(uint64 lim, uint64 off, uint64 len, uint64* n) +static bool check_limit_exceeded(uint64 lim, uint64 len, uint64* n) { if ( lim == 0 ) { @@ -61,13 +61,13 @@ static bool check_limit_exceeded(uint64 lim, uint64 off, uint64 len, uint64* n) return false; } - if ( off >= lim ) - { - *n = 0; - return true; - } - - *n = lim - off; + //if ( off >= lim ) + // { + // *n = 0; + // return true; + // } + // + //*n = lim - off; if ( len > *n ) return true; @@ -77,13 +77,13 @@ static bool check_limit_exceeded(uint64 lim, uint64 off, uint64 len, uint64* n) return false; } -bool Extract::DeliverChunk(const u_char* data, uint64 len, uint64 offset) +bool Extract::DeliverStream(const u_char* data, uint64 len) { if ( ! fd ) return false; uint64 towrite = 0; - bool limit_exceeded = check_limit_exceeded(limit, offset, len, &towrite); + bool limit_exceeded = check_limit_exceeded(limit, len, &towrite); if ( limit_exceeded && file_extraction_limit ) { @@ -92,16 +92,15 @@ bool Extract::DeliverChunk(const u_char* data, uint64 len, uint64 offset) vl->append(f->GetVal()->Ref()); vl->append(Args()->Ref()); vl->append(new Val(limit, TYPE_COUNT)); - vl->append(new Val(offset, TYPE_COUNT)); vl->append(new Val(len, TYPE_COUNT)); f->FileEvent(file_extraction_limit, vl); // Limit may have been modified by BIF, re-check it. - limit_exceeded = check_limit_exceeded(limit, offset, len, &towrite); + limit_exceeded = check_limit_exceeded(limit, len, &towrite); } if ( towrite > 0 ) - safe_pwrite(fd, data, towrite, offset); + safe_write(fd, (const char *) data, towrite); return ( ! limit_exceeded ); } diff --git a/src/file_analysis/analyzer/extract/Extract.h b/src/file_analysis/analyzer/extract/Extract.h index 00c4dbe2b7..59130fa230 100644 --- a/src/file_analysis/analyzer/extract/Extract.h +++ b/src/file_analysis/analyzer/extract/Extract.h @@ -28,11 +28,10 @@ public: * Write a chunk of file data to the local extraction file. * @param data pointer to a chunk of file data. * @param len number of bytes in the data chunk. - * @param offset number of bytes from start of file at which chunk starts. * @return false if there was no extraction file open and the data couldn't * be written, else true. */ - virtual bool DeliverChunk(const u_char* data, uint64 len, uint64 offset); + virtual bool DeliverStream(const u_char* data, uint64 len); /** * Create a new instance of an Extract analyzer. diff --git a/src/file_analysis/analyzer/extract/events.bif b/src/file_analysis/analyzer/extract/events.bif index 1c08736416..f5ebb6816b 100644 --- a/src/file_analysis/analyzer/extract/events.bif +++ b/src/file_analysis/analyzer/extract/events.bif @@ -11,9 +11,7 @@ ## ## limit: The limit, in bytes, the extracted file is about to breach. ## -## offset: The offset at which a file chunk is about to be written. -## ## len: The length of the file chunk about to be written. ## ## .. bro:see:: Files::add_analyzer Files::ANALYZER_EXTRACT -event file_extraction_limit%(f: fa_file, args: any, limit: count, offset: count, len: count%); +event file_extraction_limit%(f: fa_file, args: any, limit: count, len: count%); diff --git a/src/file_analysis/file_analysis.bif b/src/file_analysis/file_analysis.bif index 0e904f298f..4e4b4c6cdb 100644 --- a/src/file_analysis/file_analysis.bif +++ b/src/file_analysis/file_analysis.bif @@ -15,6 +15,27 @@ function Files::__set_timeout_interval%(file_id: string, t: interval%): bool return new Val(result, TYPE_BOOL); %} +## :bro:see:`Files::enable_reassembly`. +function Files::__enable_reassembly%(file_id: string%): bool + %{ + bool result = file_mgr->EnableReassembly(file_id->CheckString()); + return new Val(result, TYPE_BOOL); + %} + +## :bro:see:`Files::disable_reassembly`. +function Files::__disable_reassembly%(file_id: string%): bool + %{ + bool result = file_mgr->DisableReassembly(file_id->CheckString()); + return new Val(result, TYPE_BOOL); + %} + +## :bro:see:`Files::set_reassembly_buffer`. +function Files::__set_reassembly_buffer%(file_id: string, max: count%): bool + %{ + bool result = file_mgr->SetReassemblyBuffer(file_id->CheckString(), max); + return new Val(result, TYPE_BOOL); + %} + ## :bro:see:`Files::add_analyzer`. function Files::__add_analyzer%(file_id: string, tag: Files::Tag, args: any%): bool %{ diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.actions.data_event/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.actions.data_event/out index cbd60840bf..c9b5e20466 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.actions.data_event/out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.actions.data_event/out @@ -7,6 +7,7 @@ text/plain FILE_OVER_NEW_CONNECTION file_stream, file #0, 1500, ^J0.26 | 2012-08-24 15:10:04 -0700^J^J * Fixing update-changes, which could pick the wrong control file. (Robin Sommer)^J^J * Fixing GPG signing script. (Robin Sommer)^J^J0.25 | 2012-08-01 13:55:46 -0500^J^J * Fix configure script to exit with non-zero status on error (Jon Siwek)^J^J0.24 | 2012-07-05 12:50:43 -0700^J^J * Raise minimum required CMake version to 2.6.3 (Jon Siwek)^J^J * Adding script to delete old fully-merged branches. (Robin Sommer)^J^J0.23-2 | 2012-01-25 13:24:01 -0800^J^J * Fix a bro-cut error message. (Daniel Thayer)^J^J0.23 | 2012-01-11 12:16:11 -0800^J^J * Tweaks to release scripts, plus a new one for signing files.^J (Robin Sommer)^J^J0.22 | 2012-01-10 16:45:19 -0800^J^J * Tweaks for OpenBSD support. (Jon Siwek)^J^J * bro-cut extensions and fixes. (Robin Sommer)^J ^J - If no field names are given on the command line, we now pass through^J all fields. Adresses #657.^J^J - Removing some GNUism from awk script. Addresses #653.^J^J - Added option for time output in UTC. Addresses #668.^J^J - Added output field separator option -F. Addresses #649.^J^J - Fixing option -c: only some header lines were passed through^J rather than all. (Robin Sommer)^J^J * Fix parallel make portability. (Jon Siwek)^J^J0.21-9 | 2011-11-07 05:44:14 -0800^J^J * Fixing compiler warnings. Addresses #388. (Jon Siwek)^J^J0.21-2 | 2011-11-02 18:12:13 -0700^J^J * Fix for misnaming temp file in update-changes script. (Robin Sommer)^J^J0.21-1 | 2011-11-02 18:10:39 -0700^J^J * Little fix for make-relea file_chunk, file #0, 1500, 0, ^J0.26 | 2012-08-24 15:10:04 -0700^J^J * Fixing update-changes, which could pick the wrong control file. (Robin Sommer)^J^J * Fixing GPG signing script. (Robin Sommer)^J^J0.25 | 2012-08-01 13:55:46 -0500^J^J * Fix configure script to exit with non-zero status on error (Jon Siwek)^J^J0.24 | 2012-07-05 12:50:43 -0700^J^J * Raise minimum required CMake version to 2.6.3 (Jon Siwek)^J^J * Adding script to delete old fully-merged branches. (Robin Sommer)^J^J0.23-2 | 2012-01-25 13:24:01 -0800^J^J * Fix a bro-cut error message. (Daniel Thayer)^J^J0.23 | 2012-01-11 12:16:11 -0800^J^J * Tweaks to release scripts, plus a new one for signing files.^J (Robin Sommer)^J^J0.22 | 2012-01-10 16:45:19 -0800^J^J * Tweaks for OpenBSD support. (Jon Siwek)^J^J * bro-cut extensions and fixes. (Robin Sommer)^J ^J - If no field names are given on the command line, we now pass through^J all fields. Adresses #657.^J^J - Removing some GNUism from awk script. Addresses #653.^J^J - Added option for time output in UTC. Addresses #668.^J^J - Added output field separator option -F. Addresses #649.^J^J - Fixing option -c: only some header lines were passed through^J rather than all. (Robin Sommer)^J^J * Fix parallel make portability. (Jon Siwek)^J^J0.21-9 | 2011-11-07 05:44:14 -0800^J^J * Fixing compiler warnings. Addresses #388. (Jon Siwek)^J^J0.21-2 | 2011-11-02 18:12:13 -0700^J^J * Fix for misnaming temp file in update-changes script. (Robin Sommer)^J^J0.21-1 | 2011-11-02 18:10:39 -0700^J^J * Little fix for make-relea +file_chunk, file #0, 1500, 0, ^J0.26 | 2012-08-24 15:10:04 -0700^J^J * Fixing update-changes, which could pick the wrong control file. (Robin Sommer)^J^J * Fixing GPG signing script. (Robin Sommer)^J^J0.25 | 2012-08-01 13:55:46 -0500^J^J * Fix configure script to exit with non-zero status on error (Jon Siwek)^J^J0.24 | 2012-07-05 12:50:43 -0700^J^J * Raise minimum required CMake version to 2.6.3 (Jon Siwek)^J^J * Adding script to delete old fully-merged branches. (Robin Sommer)^J^J0.23-2 | 2012-01-25 13:24:01 -0800^J^J * Fix a bro-cut error message. (Daniel Thayer)^J^J0.23 | 2012-01-11 12:16:11 -0800^J^J * Tweaks to release scripts, plus a new one for signing files.^J (Robin Sommer)^J^J0.22 | 2012-01-10 16:45:19 -0800^J^J * Tweaks for OpenBSD support. (Jon Siwek)^J^J * bro-cut extensions and fixes. (Robin Sommer)^J ^J - If no field names are given on the command line, we now pass through^J all fields. Adresses #657.^J^J - Removing some GNUism from awk script. Addresses #653.^J^J - Added option for time output in UTC. Addresses #668.^J^J - Added output field separator option -F. Addresses #649.^J^J - Fixing option -c: only some header lines were passed through^J rather than all. (Robin Sommer)^J^J * Fix parallel make portability. (Jon Siwek)^J^J0.21-9 | 2011-11-07 05:44:14 -0800^J^J * Fixing compiler warnings. Addresses #388. (Jon Siwek)^J^J0.21-2 | 2011-11-02 18:12:13 -0700^J^J * Fix for misnaming temp file in update-changes script. (Robin Sommer)^J^J0.21-1 | 2011-11-02 18:10:39 -0700^J^J * Little fix for make-relea file_stream, file #0, 1024, se script, which could pick out the wrong^J tag. (Robin Sommer)^J^J0.21 | 2011-10-27 17:40:45 -0700^J^J * Fixing bro-cut's usage message and argument error handling. (Robin Sommer)^J^J * Bugfix in update-changes script. (Robin Sommer)^J^J * update-changes now ignores commits it did itself. (Robin Sommer)^J^J * Fix a bug in the update-changes script. (Robin Sommer)^J^J * bro-cut now always installs to $prefix/bin by `make install`. (Jon Siwek)^J^J * Options to adjust time format for bro-cut. (Robin Sommer)^J^J The default with -d is now ISO format. The new option "-D "^J specifies a custom strftime()-style format string. Alternatively,^J the environment variable BRO_CUT_TIMEFMT can set the format as^J well.^J^J * bro-cut now understands the field separator header. (Robin Sommer)^J^J * Renaming options -h/-H -> -c/-C, and doing some general cleanup.^J^J0.2 | 2011-10-25 19:53:57 -0700^J^J * Adding support for replacing version string in a setup.py. (Robin^J Sommer)^J^J * Change generated root cert DN indices f file_chunk, file #0, 1024, 1500, se script, which could pick out the wrong^J tag. (Robin Sommer)^J^J0.21 | 2011-10-27 17:40:45 -0700^J^J * Fixing bro-cut's usage message and argument error handling. (Robin Sommer)^J^J * Bugfix in update-changes script. (Robin Sommer)^J^J * update-changes now ignores commits it did itself. (Robin Sommer)^J^J * Fix a bug in the update-changes script. (Robin Sommer)^J^J * bro-cut now always installs to $prefix/bin by `make install`. (Jon Siwek)^J^J * Options to adjust time format for bro-cut. (Robin Sommer)^J^J The default with -d is now ISO format. The new option "-D "^J specifies a custom strftime()-style format string. Alternatively,^J the environment variable BRO_CUT_TIMEFMT can set the format as^J well.^J^J * bro-cut now understands the field separator header. (Robin Sommer)^J^J * Renaming options -h/-H -> -c/-C, and doing some general cleanup.^J^J0.2 | 2011-10-25 19:53:57 -0700^J^J * Adding support for replacing version string in a setup.py. (Robin^J Sommer)^J^J * Change generated root cert DN indices f file_stream, file #0, 476, ormat for RFC2253^J compliance. (Jon Siwek)^J^J * New tool devel-tools/check-release to run before making releases.^J (Robin Sommer)^J^J * devel-tools/update-changes gets a new option -a to amend to^J previous commit if possible. Default is now not to (used to be the^J opposite). (Robin Sommer)^J^J * Change Mozilla trust root generation to index certs by subject DN. (Jon Siwek)^J^J * Change distclean to only remove build dir. (Jon Siwek)^J^J * Make dist now cleans the diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.set_timeout_interval/bro..stdout b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.set_timeout_interval/bro..stdout index e78f5c8c17..8b2826a925 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.set_timeout_interval/bro..stdout +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.set_timeout_interval/bro..stdout @@ -1,5 +1,7 @@ FILE_NEW file #0, 0, 0 +FILE_BOF_BUFFER +MZ\x90\0^C\0\0\0^D\0\0 MIME_TYPE application/x-dosexec FILE_OVER_NEW_CONNECTION @@ -8,15 +10,13 @@ file #0, 1022920, 0 [orig_h=192.168.72.14, orig_p=3254/tcp, resp_h=65.54.95.206, resp_p=80/tcp] total bytes: 1022920 source: HTTP -FILE_NEW -file #1, 0, 0 -MIME_TYPE -application/octet-stream -FILE_OVER_NEW_CONNECTION +MD5: fc13fee1d44ef737a3133f1298b21d28 +SHA1: 7d99803eaf3b6e8dfa3581348bc694089579d25a +SHA256: dcb87a62a2b5d449abc138776000fd1b14edc690e9da6ea325b8f352ab033202 FILE_TIMEOUT FILE_TIMEOUT FILE_STATE_REMOVE -file #1, 206024, 0 +file #0, 0, 0 [orig_h=192.168.72.14, orig_p=3257/tcp, resp_h=65.54.95.14, resp_p=80/tcp] total bytes: 1022920 source: HTTP diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.multipart/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.multipart/out index da42f4fd68..9870cd8888 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.multipart/out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.multipart/out @@ -55,3 +55,11 @@ source: HTTP MD5: 226244811006caf4ac904344841168dd SHA1: 7222902b8b8e68e25c0422e7f8bdf344efeda54d SHA256: dd485ecf240e12807516b0a27718fc3ab9a17c1158a452967343c98cefba07a0 +FILE_STATE_REMOVE +file #3, 465, 0 +[orig_h=141.142.228.5, orig_p=57262/tcp, resp_h=54.243.88.146, resp_p=80/tcp] +total bytes: 465 +source: HTTP +MD5: 226244811006caf4ac904344841168dd +SHA1: 7222902b8b8e68e25c0422e7f8bdf344efeda54d +SHA256: dd485ecf240e12807516b0a27718fc3ab9a17c1158a452967343c98cefba07a0 diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/a.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/a.out index 0eace71c67..47c42efd13 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/a.out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/a.out @@ -1,5 +1,7 @@ FILE_NEW file #0, 0, 0 +FILE_BOF_BUFFER +%PDF-1.4^J%\xd0 MIME_TYPE application/pdf FILE_OVER_NEW_CONNECTION diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/b.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/b.out index 9c05f311f3..9c123887e7 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/b.out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/b.out @@ -1,5 +1,7 @@ FILE_NEW file #0, 0, 0 +FILE_BOF_BUFFER +MZ\x90\0^C\0\0\0^D\0\0 MIME_TYPE application/x-dosexec FILE_OVER_NEW_CONNECTION @@ -8,14 +10,12 @@ file #0, 1022920, 0 [orig_h=192.168.72.14, orig_p=3254/tcp, resp_h=65.54.95.206, resp_p=80/tcp] total bytes: 1022920 source: HTTP -FILE_NEW -file #1, 0, 0 -MIME_TYPE -application/octet-stream -FILE_OVER_NEW_CONNECTION +MD5: fc13fee1d44ef737a3133f1298b21d28 +SHA1: 7d99803eaf3b6e8dfa3581348bc694089579d25a +SHA256: dcb87a62a2b5d449abc138776000fd1b14edc690e9da6ea325b8f352ab033202 FILE_TIMEOUT FILE_STATE_REMOVE -file #1, 206024, 0 +file #0, 0, 0 [orig_h=192.168.72.14, orig_p=3257/tcp, resp_h=65.54.95.14, resp_p=80/tcp] total bytes: 1022920 source: HTTP diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/c.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/c.out index d85a9de314..b344249aa1 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/c.out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/c.out @@ -1,12 +1,17 @@ FILE_NEW file #0, 0, 0 +FILE_BOF_BUFFER +%PDF-1.4^M%\xe2 MIME_TYPE -application/octet-stream +application/pdf FILE_OVER_NEW_CONNECTION FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE -file #0, 498702, 0 +file #0, 498668, 0 [orig_h=10.45.179.94, orig_p=19950/tcp, resp_h=129.174.93.170, resp_p=80/tcp] [orig_h=10.45.179.94, orig_p=19953/tcp, resp_h=129.174.93.170, resp_p=80/tcp] total bytes: 498668 source: HTTP +MD5: 94046a5fb1c5802d0f1e6d704cf3e10e +SHA1: 250aa71dd1594363bc7083d25cfd0240e441b119 +SHA256: 5c3bc213c9eff85f98feceac8810b955f8415564e50e3889b447e847c50c5ba7 diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.pipeline/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.pipeline/out index b85485cd1a..b3c4bd3e31 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.pipeline/out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.pipeline/out @@ -41,6 +41,14 @@ source: HTTP MD5: d903de7e30db1691d3130ba5eae6b9a7 SHA1: 81f5f056ce5e97d940854bb0c48017b45dd9f15e SHA256: 6fb22aa9d780ea63bd7a2e12b92b16fcbf1c4874f1d3e11309a5ba984433c315 +FILE_STATE_REMOVE +file #2, 94, 0 +[orig_h=192.168.1.104, orig_p=1673/tcp, resp_h=63.245.209.11, resp_p=80/tcp] +total bytes: 94 +source: HTTP +MD5: d903de7e30db1691d3130ba5eae6b9a7 +SHA1: 81f5f056ce5e97d940854bb0c48017b45dd9f15e +SHA256: 6fb22aa9d780ea63bd7a2e12b92b16fcbf1c4874f1d3e11309a5ba984433c315 FILE_NEW file #3, 0, 0 FILE_BOF_BUFFER diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.post/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.post/out index cedc396254..f130f2d270 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.post/out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.post/out @@ -13,6 +13,14 @@ source: HTTP MD5: 5eb63bbbe01eeed093cb22bb8f5acdc3 SHA1: 2aae6c35c94fcfb415dbe95f408b9ce91ee846ed SHA256: b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9 +FILE_STATE_REMOVE +file #0, 11, 0 +[orig_h=141.142.228.5, orig_p=53595/tcp, resp_h=54.243.55.129, resp_p=80/tcp] +total bytes: 11 +source: HTTP +MD5: 5eb63bbbe01eeed093cb22bb8f5acdc3 +SHA1: 2aae6c35c94fcfb415dbe95f408b9ce91ee846ed +SHA256: b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9 FILE_NEW file #1, 0, 0 FILE_BOF_BUFFER @@ -28,3 +36,11 @@ source: HTTP MD5: c9337794df612aeaa901dcf9fa446bca SHA1: 6a1582672c203210c6d18d700322060b676365e7 SHA256: 8eb24c16df7cb45cb6a1790b0d26ad2571f754228d0ac111b3ac59adbfecbeb8 +FILE_STATE_REMOVE +file #1, 366, 0 +[orig_h=141.142.228.5, orig_p=53595/tcp, resp_h=54.243.55.129, resp_p=80/tcp] +total bytes: 366 +source: HTTP +MD5: c9337794df612aeaa901dcf9fa446bca +SHA1: 6a1582672c203210c6d18d700322060b676365e7 +SHA256: 8eb24c16df7cb45cb6a1790b0d26ad2571f754228d0ac111b3ac59adbfecbeb8 diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/files.log b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/files.log index 447d991f3e..643176a6b3 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/files.log +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/files.log @@ -3,8 +3,8 @@ #empty_field (empty) #unset_field - #path files -#open 2013-08-26-18-39-03 +#open 2014-01-05-09-08-10 #fields ts fuid tx_hosts rx_hosts conn_uids source depth analyzers mime_type filename duration local_orig is_orig seen_bytes total_bytes missing_bytes overflow_bytes timedout parent_fuid md5 sha1 sha256 extracted #types time string table[addr] table[addr] table[string] string count table[string] string string interval bool bool count count count count bool string string string string string 1362692527.009721 FakNcS1Jfe01uljb3 192.150.187.43 141.142.228.5 CXWv6p3arKYeMETxOg HTTP 0 SHA256,DATA_EVENT,MD5,EXTRACT,SHA1 text/plain - 0.000054 - F 4705 4705 0 0 F - 397168fd09991a0e712254df7bc639ac 1dd7ac0398df6cbc0696445a91ec681facf4dc47 4e7c7ef0984119447e743e3ec77e1de52713e345cde03fe7df753a35849bed18 FakNcS1Jfe01uljb3-file -#close 2013-08-26-18-39-03 +#close 2014-01-05-09-08-10