diff --git a/scripts/base/init-bare.bro b/scripts/base/init-bare.bro index 9f8c9f42ac..c58559bd9a 100644 --- a/scripts/base/init-bare.bro +++ b/scripts/base/init-bare.bro @@ -356,7 +356,7 @@ type fa_file: record { missing_bytes: count &default=0; ## The number of not all-in-sequence bytes in the file stream that - ## were delivered to file analyzers due to reassembly buffer overflow. + ## were not delivered to file analyzers due to reassembly buffer overflow. overflow_bytes: count &default=0; ## The amount of time between receiving new data for this file that diff --git a/src/Frag.cc b/src/Frag.cc index b1efb41594..4669471227 100644 --- a/src/Frag.cc +++ b/src/Frag.cc @@ -28,7 +28,7 @@ void FragTimer::Dispatch(double t, int /* is_expire */) FragReassembler::FragReassembler(NetSessions* arg_s, const IP_Hdr* ip, const u_char* pkt, HashKey* k, double t) - : Reassembler(0, REASSEM_IP) + : Reassembler(0) { s = arg_s; key = k; diff --git a/src/Reassem.cc b/src/Reassem.cc index 19beaa0a16..e2664b59b9 100644 --- a/src/Reassem.cc +++ b/src/Reassem.cc @@ -40,7 +40,7 @@ DataBlock::DataBlock(const u_char* data, int size, int arg_seq, unsigned int Reassembler::total_size = 0; -Reassembler::Reassembler(int init_seq, ReassemblerType arg_type) +Reassembler::Reassembler(int init_seq) { blocks = last_block = 0; trim_seq = last_reassem_seq = init_seq; diff --git a/src/Reassem.h b/src/Reassem.h index 1f65059e02..d9dd7d72e5 100644 --- a/src/Reassem.h +++ b/src/Reassem.h @@ -22,11 +22,10 @@ public: }; -enum ReassemblerType { REASSEM_IP, REASSEM_TCP }; class Reassembler : public BroObj { public: - Reassembler(int init_seq, ReassemblerType arg_type); + Reassembler(int init_seq); virtual ~Reassembler(); void NewBlock(double t, int seq, int len, const u_char* data); diff --git a/src/analyzer/protocol/tcp/TCP_Reassembler.cc b/src/analyzer/protocol/tcp/TCP_Reassembler.cc index a1e20dc0e6..06c9c06e6c 100644 --- a/src/analyzer/protocol/tcp/TCP_Reassembler.cc +++ b/src/analyzer/protocol/tcp/TCP_Reassembler.cc @@ -33,7 +33,7 @@ TCP_Reassembler::TCP_Reassembler(analyzer::Analyzer* arg_dst_analyzer, TCP_Analyzer* arg_tcp_analyzer, TCP_Reassembler::Type arg_type, bool arg_is_orig, TCP_Endpoint* arg_endp) - : Reassembler(1, REASSEM_TCP) + : Reassembler(1) { dst_analyzer = arg_dst_analyzer; tcp_analyzer = arg_tcp_analyzer; diff --git a/src/file_analysis/CMakeLists.txt b/src/file_analysis/CMakeLists.txt index 846fc4bf15..34dc8d5387 100644 --- a/src/file_analysis/CMakeLists.txt +++ b/src/file_analysis/CMakeLists.txt @@ -11,6 +11,7 @@ set(file_analysis_SRCS Manager.cc File.cc FileTimer.cc + FileReassembler.cc Analyzer.cc AnalyzerSet.cc Component.cc diff --git a/src/file_analysis/File.cc b/src/file_analysis/File.cc index 55b28763c8..d53c45fe06 100644 --- a/src/file_analysis/File.cc +++ b/src/file_analysis/File.cc @@ -3,6 +3,7 @@ #include #include "File.h" +#include "FileReassembler.h" #include "FileTimer.h" #include "Analyzer.h" #include "Manager.h" @@ -87,6 +88,8 @@ File::File(const string& file_id, Connection* conn, analyzer::Tag tag, val = new RecordVal(fa_file_type); val->Assign(id_idx, new StringVal(file_id.c_str())); + forwarded_offset = 0; + file_reassembler = 0; if ( conn ) { // add source, connection, is_orig fields @@ -109,6 +112,9 @@ File::~File() delete_vals(fonc_queue.front().second); fonc_queue.pop(); } + + if ( file_reassembler ) + delete file_reassembler; } void File::UpdateLastActivityTime() @@ -325,32 +331,85 @@ void File::DataIn(const u_char* data, uint64 len, uint64 offset) { analyzers.DrainModifications(); + if ( file_reassembler ) + { + // If there is a file reassembler we must forward any data there. + // But this only happens if the incoming data doesn't happen + // to align with the current forwarded_offset + file_reassembler->NewBlock(network_time, offset, len, data); + + if ( !file_reassembler->HasBlocks() ) + { + delete file_reassembler; + file_reassembler = 0; + } + } + else if ( forwarded_offset == offset ) + { + // This is the normal case where a file is transferred linearly. + // Nothing should be done here. + } + else if ( forwarded_offset > offset && forwarded_offset < offset+len ) + { + // This is a segment that begins before the forwarded_offset + // but proceeds past the forwarded_offset. It needs + // trimmed but the reassembler is not enabled. + uint64 adjustment = forwarded_offset - offset; + data = data + adjustment; + len = len - adjustment; + offset = forwarded_offset; + IncrementByteCount(adjustment, overflow_bytes_idx); + } + else if ( forwarded_offset < offset ) + { + // This is data past a gap and the reassembler needs to be enabled. + file_reassembler = new FileReassembler(this, forwarded_offset); + file_reassembler->NewBlock(network_time, offset, len, data); + return; + } + else + { + // This is data that was already seen so it can be completely ignored. + IncrementByteCount(len, overflow_bytes_idx); + return; + } + if ( first_chunk ) { - // TODO: this should all really be delayed until we attempt reassembly + // TODO: this should all really be delayed until we attempt reassembly. DetectMIME(data, len); FileEvent(file_new); first_chunk = false; } - file_analysis::Analyzer* a = 0; - IterCookie* c = analyzers.InitForIteration(); - - while ( (a = analyzers.NextEntry(c)) ) + if ( IsComplete() ) { - if ( ! a->DeliverChunk(data, len, offset) ) - analyzers.QueueRemove(a->Tag(), a->Args()); + EndOfFile(); } + else + { + file_analysis::Analyzer* a = 0; + IterCookie* c = analyzers.InitForIteration(); - analyzers.DrainModifications(); + while ( (a = analyzers.NextEntry(c)) ) + { + //if ( ! a->DeliverChunk(data, len, offset) ) + // { + // analyzers.QueueRemove(a->Tag(), a->Args()); + // } - // TODO: check reassembly requirement based on buffer size in record - if ( need_reassembly ) - reporter->InternalError("file_analyzer::File TODO: reassembly not yet supported"); + if ( ! a->DeliverStream(data, len) ) + { + analyzers.QueueRemove(a->Tag(), a->Args()); + } - // TODO: reassembly overflow stuff, increment overflow count, eval trigger + } - IncrementByteCount(len, seen_bytes_idx); + analyzers.DrainModifications(); + + forwarded_offset += len; + IncrementByteCount(len, seen_bytes_idx); + } } void File::DataIn(const u_char* data, uint64 len) diff --git a/src/file_analysis/File.h b/src/file_analysis/File.h index 6354c1c7e9..3422982303 100644 --- a/src/file_analysis/File.h +++ b/src/file_analysis/File.h @@ -8,6 +8,7 @@ #include #include +#include "FileReassembler.h" #include "Conn.h" #include "Val.h" #include "Tag.h" @@ -16,6 +17,8 @@ namespace file_analysis { +class FileReassembler; + /** * Wrapper class around \c fa_file record values from script layer. */ @@ -248,6 +251,8 @@ protected: private: string id; /**< A pretty hash that likely identifies file */ RecordVal* val; /**< \c fa_file from script layer. */ + uint64 forwarded_offset; /**< The offset of the file which has been forwarded. */ + FileReassembler *file_reassembler; /**< A reassembler for the file if it's needed. */ bool postpone_timeout; /**< Whether postponing timeout is requested. */ bool first_chunk; /**< Track first non-linear chunk. */ bool missed_bof; /**< Flags that we missed start of file. */ diff --git a/src/file_analysis/FileReassembler.cc b/src/file_analysis/FileReassembler.cc new file mode 100644 index 0000000000..8440fdca83 --- /dev/null +++ b/src/file_analysis/FileReassembler.cc @@ -0,0 +1,58 @@ + +#include "FileReassembler.h" +#include "File.h" + + +namespace file_analysis { + +class File; + +FileReassembler::FileReassembler(File *f, int starting_offset) + : Reassembler(starting_offset), the_file(f) + { + } + +FileReassembler::~FileReassembler() + { + } + +void FileReassembler::BlockInserted(DataBlock* start_block) + { + if ( seq_delta(start_block->seq, last_reassem_seq) > 0 || + seq_delta(start_block->upper, last_reassem_seq) <= 0 ) + return; + + + // We've filled a leading hole. Deliver as much as possible. + // Note that the new block may include both some old stuff + // and some new stuff. AddAndCheck() will have split the + // new stuff off into its own block(s), but in the following + // loop we have to take care not to deliver already-delivered + // data. + for ( DataBlock* b = start_block; + b && seq_delta(b->seq, last_reassem_seq) <= 0; b = b->next ) + { + if ( b->seq == last_reassem_seq ) + { // New stuff. + int len = b->Size(); + int seq = last_reassem_seq; + last_reassem_seq += len; + the_file->DataIn(b->block, len, seq); + } + } + + //CheckEOF(); + } + +void FileReassembler::Undelivered(int up_to_seq) + { + //reporter->Warning("should probably do something here (file reassembler undelivered)\n"); + } + +void FileReassembler::Overlap(const u_char* b1, const u_char* b2, int n) + { + //reporter->Warning("should probably do something here (file reassembler overlap)\n"); + } + + +} // end file_analysis diff --git a/src/file_analysis/FileReassembler.h b/src/file_analysis/FileReassembler.h new file mode 100644 index 0000000000..7f73ec6fa4 --- /dev/null +++ b/src/file_analysis/FileReassembler.h @@ -0,0 +1,45 @@ +#ifndef FILE_ANALYSIS_FILEREASSEMBLER_H +#define FILE_ANALYSIS_FILEREASSEMBLER_H + +#include "Reassem.h" +#include "File.h" + +class BroFile; +class Connection; + +namespace file_analysis { + +class File; + +//const int STOP_ON_GAP = 1; +//const int PUNT_ON_PARTIAL = 1; + +class FileReassembler : public Reassembler { +public: + + FileReassembler(File* f, int starting_offset); + virtual ~FileReassembler(); + + void Done(); + + // Checks if we have delivered all contents that we can possibly + // deliver for this endpoint. Calls TCP_Analyzer::EndpointEOF() + // when so. + //void CheckEOF(); + +private: + //DECLARE_SERIAL(FileReassembler); + + void Undelivered(int up_to_seq); + void BlockInserted(DataBlock* b); + void Overlap(const u_char* b1, const u_char* b2, int n); + + unsigned int had_gap:1; + unsigned int did_EOF:1; + unsigned int skip_deliveries:1; + File* the_file; +}; + +} // namespace analyzer::* + +#endif diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/a.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/a.out index 077fb5282c..0eace71c67 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/a.out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/a.out @@ -10,3 +10,6 @@ file #0, 555523, 0 [orig_h=10.101.84.70, orig_p=10977/tcp, resp_h=129.174.93.161, resp_p=80/tcp] total bytes: 555523 source: HTTP +MD5: 5a484ada9c816c0e8b6d2d3978e3f503 +SHA1: 54e7d39e99eb9d40d6251c0361a1090a0d278571 +SHA256: 61c0718bd534ab55716eba161e91bb49155562ddc7c08f0c20f6359d7b808b66