diff --git a/doc/frameworks/file_analysis_02.bro b/doc/frameworks/file_analysis_02.bro index 3ef32d865c..141b11fca6 100644 --- a/doc/frameworks/file_analysis_02.bro +++ b/doc/frameworks/file_analysis_02.bro @@ -1,7 +1,7 @@ -event file_new(f: fa_file) +event file_mime_type(f: fa_file, mime_type: string) { print "new file", f$id; - if ( f?$mime_type && f$mime_type == "text/plain" ) + if ( mime_type == "text/plain" ) Files::add_analyzer(f, Files::ANALYZER_MD5); } diff --git a/doc/httpmonitor/file_extraction.bro b/doc/httpmonitor/file_extraction.bro index b2318c595e..3860cb361e 100644 --- a/doc/httpmonitor/file_extraction.bro +++ b/doc/httpmonitor/file_extraction.bro @@ -7,18 +7,15 @@ global mime_to_ext: table[string] of string = { ["text/html"] = "html", }; -event file_new(f: fa_file) +event file_mime_type(f: fa_file, mime_type: string) { if ( f$source != "HTTP" ) return; - if ( ! f?$mime_type ) + if ( mime_type !in mime_to_ext ) return; - if ( f$mime_type !in mime_to_ext ) - return; - - local fname = fmt("%s-%s.%s", f$source, f$id, mime_to_ext[f$mime_type]); + local fname = fmt("%s-%s.%s", f$source, f$id, mime_to_ext[mime_type]); print fmt("Extracting file %s", fname); Files::add_analyzer(f, Files::ANALYZER_EXTRACT, [$extract_filename=fname]); - } + } \ No newline at end of file diff --git a/doc/scripting/index.rst b/doc/scripting/index.rst index d18f87ac38..fb1c1b67a1 100644 --- a/doc/scripting/index.rst +++ b/doc/scripting/index.rst @@ -103,9 +103,9 @@ In the ``file_hash`` event handler, there is an ``if`` statement that is used to check for the correct type of hash, in this case a SHA1 hash. It also checks for a mime type we've defined as being of interest as defined in the constant ``match_file_types``. -The comparison is made against the expression ``f$mime_type``, which uses +The comparison is made against the expression ``f$info$mime_type``, which uses the ``$`` dereference operator to check the value ``mime_type`` -inside the variable ``f``. If the entire expression evaluates to true, +inside the variable ``f$info``. If the entire expression evaluates to true, then a helper function is called to do the rest of the work. In that function, a local variable is defined to hold a string comprised of the SHA1 hash concatenated with ``.malware.hash.cymru.com``; this diff --git a/scripts/base/frameworks/files/main.bro b/scripts/base/frameworks/files/main.bro index d680c467b6..f1f381a141 100644 --- a/scripts/base/frameworks/files/main.bro +++ b/scripts/base/frameworks/files/main.bro @@ -100,8 +100,9 @@ export { ## during the process of analysis e.g. due to dropped packets. missing_bytes: count &log &default=0; - ## The number of not all-in-sequence bytes in the file stream that - ## were delivered to file analyzers due to reassembly buffer overflow. + ## The number of bytes in the file stream that were not delivered to + ## stream file analyzers. This could be overlapping bytes or + ## bytes that couldn't be reassembled. overflow_bytes: count &log &default=0; ## Whether the file analysis timed out at least once for the file. @@ -124,6 +125,37 @@ export { ## generate two handles that would hash to the same file id. const salt = "I recommend changing this." &redef; + ## Decide if you want to automatically attached analyzers to + ## files based on the detected mime type of the file. + const analyze_by_mime_type_automatically = T &redef; + + ## The default setting for if the file reassembler is enabled for + ## each file. + const enable_reassembler = T &redef; + + ## The default allow per-file reassembly buffer size. + const reassembly_buffer_size = 1048576 &redef; + + ## Allows the file reassembler to be used if it's necessary because the + ## file is transferred out of order. + ## + ## f: the file. + global enable_reassembly: function(f: fa_file); + + ## Disables the file reassembler on this file. If the file is not + ## transferred out of order this will have no effect. + ## + ## f: the file. + global disable_reassembly: function(f: fa_file); + + ## Set the maximum size the reassembly buffer is allowed to grow + ## for the given file. + ## + ## f: the file. + ## + ## max: Maximum allowed size of the reassembly buffer. + global set_reassembly_buffer_size: function(f: fa_file, max: count); + ## Sets the *timeout_interval* field of :bro:see:`fa_file`, which is ## used to determine the length of inactivity that is allowed for a file ## before internal state related to it is cleaned up. When used within @@ -153,15 +185,6 @@ export { tag: Files::Tag, args: AnalyzerArgs &default=AnalyzerArgs()): bool; - ## Adds all analyzers associated with a give MIME type to the analysis of - ## a file. Note that analyzers added via MIME types cannot take further - ## arguments. - ## - ## f: the file. - ## - ## mtype: the MIME type; it will be compared case-insensitive. - global add_analyzers_for_mime_type: function(f: fa_file, mtype: string); - ## Removes an analyzer from the analysis of a given file. ## ## f: the file. @@ -284,6 +307,7 @@ global registered_protocols: table[Analyzer::Tag] of ProtoRegistration = table() # Store the MIME type to analyzer mappings. global mime_types: table[Analyzer::Tag] of set[string]; +global mime_type_to_analyzers: table[string] of set[Analyzer::Tag]; global analyzer_add_callbacks: table[Files::Tag] of function(f: fa_file, args: AnalyzerArgs) = table(); @@ -313,8 +337,6 @@ function set_info(f: fa_file) f$info$overflow_bytes = f$overflow_bytes; if ( f?$is_orig ) f$info$is_orig = f$is_orig; - if ( f?$mime_type ) - f$info$mime_type = f$mime_type; } function set_timeout_interval(f: fa_file, t: interval): bool @@ -322,6 +344,21 @@ function set_timeout_interval(f: fa_file, t: interval): bool return __set_timeout_interval(f$id, t); } +function enable_reassembly(f: fa_file) + { + __enable_reassembly(f$id); + } + +function disable_reassembly(f: fa_file) + { + __disable_reassembly(f$id); + } + +function set_reassembly_buffer_size(f: fa_file, max: count) + { + __set_reassembly_buffer(f$id, max); + } + function add_analyzer(f: fa_file, tag: Files::Tag, args: AnalyzerArgs): bool { add f$info$analyzers[Files::analyzer_name(tag)]; @@ -337,15 +374,6 @@ function add_analyzer(f: fa_file, tag: Files::Tag, args: AnalyzerArgs): bool return T; } -function add_analyzers_for_mime_type(f: fa_file, mtype: string) - { - local dummy_args: AnalyzerArgs; - local analyzers = __add_analyzers_for_mime_type(f$id, mtype, dummy_args); - - for ( tag in analyzers ) - add f$info$analyzers[Files::analyzer_name(tag)]; - } - function register_analyzer_add_callback(tag: Files::Tag, callback: function(f: fa_file, args: AnalyzerArgs)) { analyzer_add_callbacks[tag] = callback; @@ -366,42 +394,6 @@ function analyzer_name(tag: Files::Tag): string return __analyzer_name(tag); } -event file_new(f: fa_file) &priority=10 - { - set_info(f); - - if ( f?$mime_type ) - add_analyzers_for_mime_type(f, f$mime_type); - } - -event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=10 - { - set_info(f); - add f$info$conn_uids[c$uid]; - local cid = c$id; - add f$info$tx_hosts[f$is_orig ? cid$orig_h : cid$resp_h]; - if( |Site::local_nets| > 0 ) - f$info$local_orig=Site::is_local_addr(f$is_orig ? cid$orig_h : cid$resp_h); - - add f$info$rx_hosts[f$is_orig ? cid$resp_h : cid$orig_h]; - } - -event file_timeout(f: fa_file) &priority=10 - { - set_info(f); - f$info$timedout = T; - } - -event file_state_remove(f: fa_file) &priority=10 - { - set_info(f); - } - -event file_state_remove(f: fa_file) &priority=-10 - { - Log::write(Files::LOG, f$info); - } - function register_protocol(tag: Analyzer::Tag, reg: ProtoRegistration): bool { local result = (tag !in registered_protocols); @@ -424,13 +416,18 @@ function register_for_mime_types(tag: Analyzer::Tag, mime_types: set[string]) : function register_for_mime_type(tag: Analyzer::Tag, mt: string) : bool { - if ( ! __register_for_mime_type(tag, mt) ) - return F; - if ( tag !in mime_types ) + { mime_types[tag] = set(); - + } add mime_types[tag][mt]; + + if ( mt !in mime_type_to_analyzers ) + { + mime_type_to_analyzers[mt] = set(); + } + add mime_type_to_analyzers[mt][tag]; + return T; } @@ -462,3 +459,62 @@ event get_file_handle(tag: Analyzer::Tag, c: connection, is_orig: bool) &priorit local handler = registered_protocols[tag]; set_file_handle(handler$get_file_handle(c, is_orig)); } + +event file_new(f: fa_file) &priority=10 + { + set_info(f); + + if ( enable_reassembler ) + { + Files::enable_reassembly(f); + Files::set_reassembly_buffer_size(f, reassembly_buffer_size); + } + } + +event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=10 + { + set_info(f); + + add f$info$conn_uids[c$uid]; + local cid = c$id; + add f$info$tx_hosts[f$is_orig ? cid$orig_h : cid$resp_h]; + if( |Site::local_nets| > 0 ) + f$info$local_orig=Site::is_local_addr(f$is_orig ? cid$orig_h : cid$resp_h); + + add f$info$rx_hosts[f$is_orig ? cid$resp_h : cid$orig_h]; + } + +event file_mime_type(f: fa_file, mime_type: string) &priority=10 + { + set_info(f); + + f$info$mime_type = mime_type; + + + if ( analyze_by_mime_type_automatically && + mime_type in mime_type_to_analyzers ) + { + local analyzers = mime_type_to_analyzers[mime_type]; + for ( a in analyzers ) + { + add f$info$analyzers[Files::analyzer_name(a)]; + Files::add_analyzer(f, a); + } + } + } + +event file_timeout(f: fa_file) &priority=10 + { + set_info(f); + f$info$timedout = T; + } + +event file_state_remove(f: fa_file) &priority=10 + { + set_info(f); + } + +event file_state_remove(f: fa_file) &priority=-10 + { + Log::write(Files::LOG, f$info); + } diff --git a/scripts/base/frameworks/intel/main.bro b/scripts/base/frameworks/intel/main.bro index 5b31dd964e..b5f305a7fc 100644 --- a/scripts/base/frameworks/intel/main.bro +++ b/scripts/base/frameworks/intel/main.bro @@ -289,8 +289,8 @@ event Intel::match(s: Seen, items: set[Item]) &priority=5 if ( ! info?$fuid ) info$fuid = s$f$id; - if ( ! info?$file_mime_type && s$f?$mime_type ) - info$file_mime_type = s$f$mime_type; + if ( ! info?$file_mime_type && s$f?$info && s$f$info?$mime_type ) + info$file_mime_type = s$f$info$mime_type; if ( ! info?$file_desc ) info$file_desc = Files::describe(s$f); diff --git a/scripts/base/frameworks/notice/main.bro b/scripts/base/frameworks/notice/main.bro index 4790245de0..d7d9bd61c9 100644 --- a/scripts/base/frameworks/notice/main.bro +++ b/scripts/base/frameworks/notice/main.bro @@ -531,8 +531,8 @@ function create_file_info(f: fa_file): Notice::FileInfo local fi: Notice::FileInfo = Notice::FileInfo($fuid = f$id, $desc = Files::describe(f)); - if ( f?$mime_type ) - fi$mime = f$mime_type; + if ( f?$info && f$info?$mime_type ) + fi$mime = f$info$mime_type; if ( f?$conns && |f$conns| == 1 ) for ( id in f$conns ) diff --git a/scripts/base/init-bare.bro b/scripts/base/init-bare.bro index efce524fc5..b112d3ea0f 100644 --- a/scripts/base/init-bare.bro +++ b/scripts/base/init-bare.bro @@ -353,9 +353,9 @@ type connection: record { ## gives up and discards any internal state related to the file. const default_file_timeout_interval: interval = 2 mins &redef; -## Default amount of bytes that file analysis will buffer before raising -## :bro:see:`file_new`. -const default_file_bof_buffer_size: count = 1024 &redef; +## Default amount of bytes that file analysis will buffer to provide +## data back in time to attached analyzers +const default_file_bof_buffer_size: count = 4096 &redef; ## A file that Bro is analyzing. This is Bro's type for describing the basic ## internal metadata collected about a "file", which is essentially just a @@ -394,8 +394,9 @@ type fa_file: record { ## during the process of analysis e.g. due to dropped packets. missing_bytes: count &default=0; - ## The number of not all-in-sequence bytes in the file stream that - ## were delivered to file analyzers due to reassembly buffer overflow. + ## The number of bytes in the file stream that were not delivered to + ## stream file analyzers. This could be overlapping bytes or + ## bytes that couldn't be reassembled. overflow_bytes: count &default=0; ## The amount of time between receiving new data for this file that @@ -409,16 +410,6 @@ type fa_file: record { ## The content of the beginning of a file up to *bof_buffer_size* bytes. ## This is also the buffer that's used for file/mime type detection. bof_buffer: string &optional; - - ## The mime type of the strongest file magic signature matches against - ## the data chunk in *bof_buffer*, or in the cases where no buffering - ## of the beginning of file occurs, an initial guess of the mime type - ## based on the first data seen. - mime_type: string &optional; - - ## All mime types that matched file magic signatures against the data - ## chunk in *bof_buffer*, in order of their strength value. - mime_types: mime_matches &optional; } &redef; ## Fields of a SYN packet. diff --git a/scripts/base/protocols/ftp/files.bro b/scripts/base/protocols/ftp/files.bro index b507ca32a7..617b57348b 100644 --- a/scripts/base/protocols/ftp/files.bro +++ b/scripts/base/protocols/ftp/files.bro @@ -17,6 +17,10 @@ export { ## Describe the file being transferred. global describe_file: function(f: fa_file): string; + + redef record fa_file += { + ftp: FTP::Info &optional; + }; } function get_file_handle(c: connection, is_orig: bool): string @@ -48,7 +52,6 @@ event bro_init() &priority=5 $describe = FTP::describe_file]); } - event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5 { if ( [c$id$resp_h, c$id$resp_p] !in ftp_data_expected ) @@ -56,6 +59,14 @@ event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priori local ftp = ftp_data_expected[c$id$resp_h, c$id$resp_p]; ftp$fuid = f$id; - if ( f?$mime_type ) - ftp$mime_type = f$mime_type; + + f$ftp = ftp; + } + +event file_mime_type(f: fa_file, mime_type: string) &priority=5 + { + if ( ! f?$ftp ) + return; + + f$ftp$mime_type = mime_type; } diff --git a/scripts/base/protocols/http/entities.bro b/scripts/base/protocols/http/entities.bro index ff5c915801..9fcf7f24f7 100644 --- a/scripts/base/protocols/http/entities.bro +++ b/scripts/base/protocols/http/entities.bro @@ -35,6 +35,10 @@ export { ## body. resp_mime_depth: count &default=0; }; + + redef record fa_file += { + http: HTTP::Info &optional; + }; } event http_begin_entity(c: connection, is_orig: bool) &priority=10 @@ -67,6 +71,8 @@ event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priori { if ( f$source == "HTTP" && c?$http ) { + f$http = c$http; + if ( c$http?$current_entity && c$http$current_entity?$filename ) f$info$filename = c$http$current_entity$filename; @@ -76,14 +82,6 @@ event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priori c$http$orig_fuids = string_vec(f$id); else c$http$orig_fuids[|c$http$orig_fuids|] = f$id; - - if ( f?$mime_type ) - { - if ( ! c$http?$orig_mime_types ) - c$http$orig_mime_types = string_vec(f$mime_type); - else - c$http$orig_mime_types[|c$http$orig_mime_types|] = f$mime_type; - } } else { @@ -91,17 +89,29 @@ event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priori c$http$resp_fuids = string_vec(f$id); else c$http$resp_fuids[|c$http$resp_fuids|] = f$id; - - if ( f?$mime_type ) - { - if ( ! c$http?$resp_mime_types ) - c$http$resp_mime_types = string_vec(f$mime_type); - else - c$http$resp_mime_types[|c$http$resp_mime_types|] = f$mime_type; - } } } + } +event file_mime_type(f: fa_file, mime_type: string) &priority=5 + { + if ( ! f?$http || ! f?$is_orig ) + return; + + if ( f$is_orig ) + { + if ( ! f$http?$orig_mime_types ) + f$http$orig_mime_types = string_vec(mime_type); + else + f$http$orig_mime_types[|f$http$orig_mime_types|] = mime_type; + } + else + { + if ( ! f$http?$resp_mime_types ) + f$http$resp_mime_types = string_vec(mime_type); + else + f$http$resp_mime_types[|f$http$resp_mime_types|] = mime_type; + } } event http_end_entity(c: connection, is_orig: bool) &priority=5 diff --git a/scripts/base/protocols/irc/files.bro b/scripts/base/protocols/irc/files.bro index 7e077c8331..518775abb4 100644 --- a/scripts/base/protocols/irc/files.bro +++ b/scripts/base/protocols/irc/files.bro @@ -12,6 +12,10 @@ export { ## Default file handle provider for IRC. global get_file_handle: function(c: connection, is_orig: bool): string; + + redef record fa_file += { + irc: IRC::Info &optional; + }; } function get_file_handle(c: connection, is_orig: bool): string @@ -34,6 +38,12 @@ event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priori irc$fuid = f$id; if ( irc?$dcc_file_name ) f$info$filename = irc$dcc_file_name; - if ( f?$mime_type ) - irc$dcc_mime_type = f$mime_type; + + f$irc = irc; } + +event file_mime_type(f: fa_file, mime_type: string) &priority=5 + { + if ( f?$irc ) + f$irc$dcc_mime_type = mime_type; + } \ No newline at end of file diff --git a/scripts/policy/frameworks/files/detect-MHR.bro b/scripts/policy/frameworks/files/detect-MHR.bro index 3000d2d83e..d0b8a852e6 100644 --- a/scripts/policy/frameworks/files/detect-MHR.bro +++ b/scripts/policy/frameworks/files/detect-MHR.bro @@ -66,6 +66,7 @@ function do_mhr_lookup(hash: string, fi: Notice::FileInfo) event file_hash(f: fa_file, kind: string, hash: string) { - if ( kind == "sha1" && f?$mime_type && match_file_types in f$mime_type ) + if ( kind == "sha1" && f?$info && f$info?$mime_type && + match_file_types in f$info$mime_type ) do_mhr_lookup(hash, Notice::create_file_info(f)); } diff --git a/src/Frag.cc b/src/Frag.cc index d0389c264a..8ada148750 100644 --- a/src/Frag.cc +++ b/src/Frag.cc @@ -28,7 +28,7 @@ void FragTimer::Dispatch(double t, int /* is_expire */) FragReassembler::FragReassembler(NetSessions* arg_s, const IP_Hdr* ip, const u_char* pkt, HashKey* k, double t) - : Reassembler(0, REASSEM_IP) + : Reassembler(0) { s = arg_s; key = k; diff --git a/src/Reassem.cc b/src/Reassem.cc index 1ad0cb2717..8bf965427b 100644 --- a/src/Reassem.cc +++ b/src/Reassem.cc @@ -31,7 +31,7 @@ DataBlock::DataBlock(const u_char* data, uint64 size, uint64 arg_seq, uint64 Reassembler::total_size = 0; -Reassembler::Reassembler(uint64 init_seq, ReassemblerType arg_type) +Reassembler::Reassembler(uint64 init_seq) { blocks = last_block = 0; trim_seq = last_reassem_seq = init_seq; diff --git a/src/Reassem.h b/src/Reassem.h index 7b77a628d8..39617f7816 100644 --- a/src/Reassem.h +++ b/src/Reassem.h @@ -22,11 +22,10 @@ public: }; -enum ReassemblerType { REASSEM_IP, REASSEM_TCP }; class Reassembler : public BroObj { public: - Reassembler(uint64 init_seq, ReassemblerType arg_type); + Reassembler(uint64 init_seq); virtual ~Reassembler(); void NewBlock(double t, uint64 seq, uint64 len, const u_char* data); diff --git a/src/SerialTypes.h b/src/SerialTypes.h index 81ccbc030e..d2f227838c 100644 --- a/src/SerialTypes.h +++ b/src/SerialTypes.h @@ -87,6 +87,7 @@ SERIAL_TCP_CONTENTS(TCP_NVT, 3) #define SERIAL_REASSEMBLER(name, val) SERIAL_CONST(name, val, REASSEMBLER) SERIAL_REASSEMBLER(REASSEMBLER, 1) SERIAL_REASSEMBLER(TCP_REASSEMBLER, 2) +SERIAL_REASSEMBLER(FILE_REASSEMBLER, 3) #define SERIAL_VAL(name, val) SERIAL_CONST(name, val, VAL) SERIAL_VAL(VAL, 1) diff --git a/src/analyzer/protocol/tcp/TCP_Reassembler.cc b/src/analyzer/protocol/tcp/TCP_Reassembler.cc index e00e32ef1b..16bb9cc56d 100644 --- a/src/analyzer/protocol/tcp/TCP_Reassembler.cc +++ b/src/analyzer/protocol/tcp/TCP_Reassembler.cc @@ -28,7 +28,7 @@ TCP_Reassembler::TCP_Reassembler(analyzer::Analyzer* arg_dst_analyzer, TCP_Analyzer* arg_tcp_analyzer, TCP_Reassembler::Type arg_type, TCP_Endpoint* arg_endp) - : Reassembler(1, REASSEM_TCP) + : Reassembler(1) { dst_analyzer = arg_dst_analyzer; tcp_analyzer = arg_tcp_analyzer; diff --git a/src/event.bif b/src/event.bif index 4006888eab..dd941b6736 100644 --- a/src/event.bif +++ b/src/event.bif @@ -905,7 +905,8 @@ event get_file_handle%(tag: Analyzer::Tag, c: connection, is_orig: bool%); ## ## f: The file. ## -## .. bro:see:: file_over_new_connection file_timeout file_gap file_state_remove +## .. bro:see:: file_over_new_connection file_timeout file_gap file_mime_type +## file_state_remove event file_new%(f: fa_file%); ## Indicates that a file has been seen being transferred over a connection @@ -917,16 +918,39 @@ event file_new%(f: fa_file%); ## ## is_orig: true if the originator of *c* is the one sending the file. ## -## .. bro:see:: file_new file_timeout file_gap file_state_remove +## .. bro:see:: file_new file_timeout file_gap file_mime_type +## file_state_remove event file_over_new_connection%(f: fa_file, c: connection, is_orig: bool%); +## Provide the most likely matching MIME type for this file. The analysis +## can be augmented at this time via :bro:see:`Files::add_analyzer`. +## +## f: The file. +## +## mime_type: The mime type that was discovered. +## +## .. bro:see:: file_over_new_connection file_timeout file_gap file_mime_type +## file_mime_types file_state_remove +event file_mime_type%(f: fa_file, mime_type: string%); + +## Provide all matching MIME types for this file. The analysis can be +## augmented at this time via :bro:see:`Files::add_analyzer`. +## +## f: The file. +## +## mime_types: The mime types that were discovered. +## +## .. bro:see:: file_over_new_connection file_timeout file_gap file_mime_type +## file_mime_types file_state_remove +event file_mime_types%(f: fa_file, mime_types: mime_matches%); + ## Indicates that file analysis has timed out because no activity was seen ## for the file in a while. ## ## f: The file. ## -## .. bro:see:: file_new file_over_new_connection file_gap file_state_remove -## default_file_timeout_interval Files::set_timeout_interval +## .. bro:see:: file_new file_over_new_connection file_gap file_mime_type +## file_mime_types file_state_remove default_file_timeout_interval ## Files::set_timeout_interval event file_timeout%(f: fa_file%); @@ -938,14 +962,34 @@ event file_timeout%(f: fa_file%); ## ## len: The number of missing bytes. ## -## .. bro:see:: file_new file_over_new_connection file_timeout file_state_remove +## .. bro:see:: file_new file_over_new_connection file_timeout file_mime_type +## file_mime_types file_state_remove file_reassembly_overflow event file_gap%(f: fa_file, offset: count, len: count%); +## Indicates that the file had an overflow of the reassembly buffer. +## This is a specialization of the :bro:id:`file_gap` event. +## +## f: The file. +## +## offset: The byte offset from the start of the file at which the reassembly +## couldn't continue due to running out of reassembly buffer space. +## +## skipped: The number of bytes of the file skipped over to flush some +## file data and get back under the reassembly buffer size limit. +## This value will also be represented as a gap. +## +## .. bro:see:: file_new file_over_new_connection file_timeout file_mime_type +## file_mime_types file_state_remove file_gap Files::enable_reassembler +## Files::reassembly_buffer_size Files::enable_reassembly +## Files::disable_reassembly Files::set_reassembly_buffer_size +event file_reassembly_overflow%(f: fa_file, offset: count, skipped: count%); + ## This event is generated each time file analysis is ending for a given file. ## ## f: The file. ## ## .. bro:see:: file_new file_over_new_connection file_timeout file_gap +## file_mime_type file_mime_types event file_state_remove%(f: fa_file%); ## Generated when an internal DNS lookup produces the same result as last time. diff --git a/src/file_analysis/AnalyzerSet.cc b/src/file_analysis/AnalyzerSet.cc index b3f11b6816..2657a5b709 100644 --- a/src/file_analysis/AnalyzerSet.cc +++ b/src/file_analysis/AnalyzerSet.cc @@ -72,10 +72,10 @@ bool AnalyzerSet::Add(file_analysis::Tag tag, RecordVal* args) return true; } -bool AnalyzerSet::QueueAdd(file_analysis::Tag tag, RecordVal* args) +bool AnalyzerSet::QueueAdd(file_analysis::Tag tag, RecordVal* args, file_analysis::Analyzer* a) { HashKey* key = GetKey(tag, args); - file_analysis::Analyzer* a = InstantiateAnalyzer(tag, args); + a = InstantiateAnalyzer(tag, args); if ( ! a ) { diff --git a/src/file_analysis/AnalyzerSet.h b/src/file_analysis/AnalyzerSet.h index 38eddb8967..839425980c 100644 --- a/src/file_analysis/AnalyzerSet.h +++ b/src/file_analysis/AnalyzerSet.h @@ -57,9 +57,10 @@ public: * Queue the attachment of an analyzer to #file. * @param tag the analyzer tag of the file analyzer to add. * @param args an \c AnalyzerArgs value which specifies an analyzer. + * @param a an analyzer pointer to return the instantiated analyzer to the caller. * @return true if analyzer was able to be instantiated, else false. */ - bool QueueAdd(file_analysis::Tag tag, RecordVal* args); + bool QueueAdd(file_analysis::Tag tag, RecordVal* args, file_analysis::Analyzer* a); /** * Remove an analyzer from #file immediately. diff --git a/src/file_analysis/CMakeLists.txt b/src/file_analysis/CMakeLists.txt index 846fc4bf15..34dc8d5387 100644 --- a/src/file_analysis/CMakeLists.txt +++ b/src/file_analysis/CMakeLists.txt @@ -11,6 +11,7 @@ set(file_analysis_SRCS Manager.cc File.cc FileTimer.cc + FileReassembler.cc Analyzer.cc AnalyzerSet.cc Component.cc diff --git a/src/file_analysis/File.cc b/src/file_analysis/File.cc index 4509fc7d42..e4e9b6dc9d 100644 --- a/src/file_analysis/File.cc +++ b/src/file_analysis/File.cc @@ -53,8 +53,6 @@ int File::overflow_bytes_idx = -1; int File::timeout_interval_idx = -1; int File::bof_buffer_size_idx = -1; int File::bof_buffer_idx = -1; -int File::mime_type_idx = -1; -int File::mime_types_idx = -1; void File::StaticInit() { @@ -74,15 +72,14 @@ void File::StaticInit() timeout_interval_idx = Idx("timeout_interval"); bof_buffer_size_idx = Idx("bof_buffer_size"); bof_buffer_idx = Idx("bof_buffer"); - mime_type_idx = Idx("mime_type"); - mime_types_idx = Idx("mime_types"); } File::File(const string& file_id, Connection* conn, analyzer::Tag tag, bool is_orig) - : id(file_id), val(0), postpone_timeout(false), first_chunk(true), - missed_bof(false), need_reassembly(false), done(false), - did_file_new_event(false), analyzers(this) + : id(file_id), val(0), file_reassembler(0), stream_offset(0), + reassembly_max_buffer(0), did_mime_type(false), + reassembly_enabled(false), postpone_timeout(false), done(false), + analyzers(this) { StaticInit(); @@ -96,7 +93,6 @@ File::File(const string& file_id, Connection* conn, analyzer::Tag tag, // add source, connection, is_orig fields SetSource(analyzer_mgr->GetComponentName(tag)); val->Assign(is_orig_idx, new Val(is_orig, TYPE_BOOL)); - UpdateConnectionFields(conn, is_orig); } UpdateLastActivityTime(); @@ -107,10 +103,9 @@ File::~File() DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Destroying File object", id.c_str()); Unref(val); - while ( ! fonc_queue.empty() ) + if ( file_reassembler ) { - delete_vals(fonc_queue.front().second); - fonc_queue.pop(); + delete file_reassembler; } } @@ -150,11 +145,7 @@ void File::UpdateConnectionFields(Connection* conn, bool is_orig) vl->append(conn_val->Ref()); vl->append(new Val(is_orig, TYPE_BOOL)); - if ( did_file_new_event ) - FileEvent(file_over_new_connection, vl); - else - fonc_queue.push(pair( - file_over_new_connection, vl)); + FileEvent(file_over_new_connection, vl); } } @@ -242,7 +233,7 @@ bool File::IsComplete() const if ( ! total ) return false; - if ( LookupFieldDefaultCount(seen_bytes_idx) >= total->AsCount() ) + if ( stream_offset >= total->AsCount() ) return true; return false; @@ -258,7 +249,23 @@ bool File::AddAnalyzer(file_analysis::Tag tag, RecordVal* args) DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Queuing addition of %s analyzer", id.c_str(), file_mgr->GetComponentName(tag).c_str()); - return done ? false : analyzers.QueueAdd(tag, args); + if ( done ) + return false; + + file_analysis::Analyzer *a = 0; + bool success = analyzers.QueueAdd(tag, args, a); + if ( success && a ) + { + // Catch up this analyzer with the BOF buffer + for ( size_t i = 0; i < bof_buffer.chunks.size(); ++i ) + { + if ( ! a->DeliverStream(bof_buffer.chunks[i]->Bytes(), bof_buffer.chunks[i]->Len()) ) + { + analyzers.QueueRemove(a->Tag(), a->Args()); + } + } + } + return success; } bool File::RemoveAnalyzer(file_analysis::Tag tag, RecordVal* args) @@ -269,9 +276,61 @@ bool File::RemoveAnalyzer(file_analysis::Tag tag, RecordVal* args) return done ? false : analyzers.QueueRemove(tag, args); } +void File::EnableReassembly() + { + reassembly_enabled = true; + } + +void File::DisableReassembly() + { + reassembly_enabled = false; + if ( file_reassembler ) + { + delete file_reassembler; + file_reassembler = NULL; + } + } + +void File::SetReassemblyBuffer(uint64 max) + { + reassembly_max_buffer = max; + } + +bool File::DetectMIME() + { + RuleMatcher::MIME_Matches matches; + + BroString *bs = concatenate(bof_buffer.chunks); + const u_char* data = bs->Bytes(); + uint64 len = bs->Len(); + len = min(len, LookupFieldDefaultCount(bof_buffer_size_idx)); + file_mgr->DetectMIME(data, len, &matches); + + if ( matches.empty() ) + return false; + + if ( FileEventAvailable(file_mime_type) ) + { + val_list* vl = new val_list(); + vl->append(val->Ref()); + vl->append(new StringVal(*(matches.begin()->second.begin()))); + FileEvent(file_mime_type, vl); + } + + if ( FileEventAvailable(file_mime_types) ) + { + val_list* vl = new val_list(); + vl->append(val->Ref()); + vl->append(file_analysis::GenMIMEMatchesVal(matches)); + FileEvent(file_mime_types, vl); + } + + return true; + } + bool File::BufferBOF(const u_char* data, uint64 len) { - if ( bof_buffer.full || bof_buffer.replayed ) + if ( bof_buffer.full ) return false; uint64 desired_size = LookupFieldDefaultCount(bof_buffer_size_idx); @@ -282,101 +341,91 @@ bool File::BufferBOF(const u_char* data, uint64 len) if ( bof_buffer.size >= desired_size ) { bof_buffer.full = true; - ReplayBOF(); } return true; } -bool File::DetectMIME(const u_char* data, uint64 len) +void File::DeliverStream(const u_char* data, uint64 len) { - RuleMatcher::MIME_Matches matches; - len = min(len, LookupFieldDefaultCount(bof_buffer_size_idx)); - file_mgr->DetectMIME(data, len, &matches); + // Buffer enough data for the BOF buffer + BufferBOF(data, len); - if ( matches.empty() ) - return false; - - val->Assign(mime_type_idx, - new StringVal(*(matches.begin()->second.begin()))); - val->Assign(mime_types_idx, file_analysis::GenMIMEMatchesVal(matches)); - - return true; - } - -void File::ReplayBOF() - { - if ( bof_buffer.replayed ) - return; - - bof_buffer.replayed = true; - - if ( bof_buffer.chunks.empty() ) + // TODO: mime matching size needs defined. + if ( ! did_mime_type && + bof_buffer.size >= 1024 && + LookupFieldDefaultCount(missing_bytes_idx) == 0 ) { - // Since we missed the beginning, try file type detect on next data in. - missed_bof = true; - return; + did_mime_type = true; + DetectMIME(); + + // TODO: this needs to be done elsewhere. For now it's here. + BroString* bs = concatenate(bof_buffer.chunks); + val->Assign(bof_buffer_idx, new StringVal(bs)); } - BroString* bs = concatenate(bof_buffer.chunks); - val->Assign(bof_buffer_idx, new StringVal(bs)); - - DetectMIME(bs->Bytes(), bs->Len()); - FileEvent(file_new); - - for ( size_t i = 0; i < bof_buffer.chunks.size(); ++i ) - DataIn(bof_buffer.chunks[i]->Bytes(), bof_buffer.chunks[i]->Len()); - } - -void File::DataIn(const u_char* data, uint64 len, uint64 offset) - { - analyzers.DrainModifications(); - - if ( first_chunk ) - { - // TODO: this should all really be delayed until we attempt reassembly - DetectMIME(data, len); - FileEvent(file_new); - first_chunk = false; - } - - DBG_LOG(DBG_FILE_ANALYSIS, "[%s] %" PRIu64 " bytes in at offset" PRIu64 "; %s [%s]", - id.c_str(), len, offset, + DBG_LOG(DBG_FILE_ANALYSIS, "[%s] %" PRIu64 " bytes in at offset %" PRIu64 "; %s [%s]", + id.c_str(), len, stream_offset, IsComplete() ? "complete" : "incomplete", fmt_bytes((const char*) data, min((uint64)40, len)), len > 40 ? "..." : ""); file_analysis::Analyzer* a = 0; IterCookie* c = analyzers.InitForIteration(); - while ( (a = analyzers.NextEntry(c)) ) { - if ( ! a->DeliverChunk(data, len, offset) ) + if ( !a->DeliverStream(data, len) ) + { analyzers.QueueRemove(a->Tag(), a->Args()); + } } - analyzers.DrainModifications(); - - // TODO: check reassembly requirement based on buffer size in record - if ( need_reassembly ) - reporter->InternalError("file_analyzer::File TODO: reassembly not yet supported"); - - // TODO: reassembly overflow stuff, increment overflow count, eval trigger - + stream_offset += len; IncrementByteCount(len, seen_bytes_idx); } -void File::DataIn(const u_char* data, uint64 len) +void File::DeliverChunk(const u_char* data, uint64 len, uint64 offset) { - analyzers.DrainModifications(); - - if ( BufferBOF(data, len) ) - return; - - if ( missed_bof ) + // Potentially handle reassembly and deliver to the stream analyzers. + if ( file_reassembler ) { - DetectMIME(data, len); - FileEvent(file_new); - missed_bof = false; + if ( reassembly_max_buffer > 0 && + reassembly_max_buffer < file_reassembler->TotalSize() ) + { + uint64 first_offset = file_reassembler->GetFirstBlockOffset(); + int gap_bytes = file_reassembler->TrimToSeq(first_offset); + + if ( FileEventAvailable(file_reassembly_overflow) ) + { + val_list* vl = new val_list(); + vl->append(val->Ref()); + vl->append(new Val(stream_offset, TYPE_COUNT)); + vl->append(new Val(gap_bytes, TYPE_COUNT)); + FileEvent(file_reassembly_overflow, vl); + } + + Gap(stream_offset, gap_bytes); + } + + // Forward data to the reassembler. + file_reassembler->NewBlock(network_time, offset, len, data); + } + else if ( stream_offset == offset ) + { + // This is the normal case where a file is transferred linearly. + // Nothing special should be done here. + DeliverStream(data, len); + } + else if ( reassembly_enabled ) + { + // This is data that doesn't match the offset and the reassembler + // needs to be enabled. + file_reassembler = new FileReassembler(this, stream_offset); + file_reassembler->NewBlock(network_time, offset, len, data); + } + else + { + // We can't reassemble so we throw out the data for streaming. + IncrementByteCount(len, overflow_bytes_idx); } DBG_LOG(DBG_FILE_ANALYSIS, "[%s] %" PRIu64 " bytes in; %s [%s]", @@ -386,24 +435,37 @@ void File::DataIn(const u_char* data, uint64 len) file_analysis::Analyzer* a = 0; IterCookie* c = analyzers.InitForIteration(); - while ( (a = analyzers.NextEntry(c)) ) { - if ( ! a->DeliverStream(data, len) ) + if ( !a->DeliverChunk(data, len, offset) ) { analyzers.QueueRemove(a->Tag(), a->Args()); - continue; } - - uint64 offset = LookupFieldDefaultCount(seen_bytes_idx) + - LookupFieldDefaultCount(missing_bytes_idx); - - if ( ! a->DeliverChunk(data, len, offset) ) - analyzers.QueueRemove(a->Tag(), a->Args()); } + if ( IsComplete() ) + { + // If the file is complete we can automatically go and close out the file from here. + EndOfFile(); + } + } + + +void File::DataIn(const u_char* data, uint64 len, uint64 offset) + { + analyzers.DrainModifications(); + DeliverChunk(data, len, offset); + analyzers.DrainModifications(); + } + +void File::DataIn(const u_char* data, uint64 len) + { + analyzers.DrainModifications(); + + uint64 offset = LookupFieldDefaultCount(seen_bytes_idx) + + LookupFieldDefaultCount(missing_bytes_idx); + DeliverChunk(data, len, offset); analyzers.DrainModifications(); - IncrementByteCount(len, seen_bytes_idx); } void File::EndOfFile() @@ -413,10 +475,19 @@ void File::EndOfFile() if ( done ) return; - analyzers.DrainModifications(); + if ( ! did_mime_type ) + { + DetectMIME(); - // Send along anything that's been buffered, but never flushed. - ReplayBOF(); + // TODO: this also needs to be done elsewhere. + if ( bof_buffer.size > 0 ) + { + BroString* bs = concatenate(bof_buffer.chunks); + val->Assign(bof_buffer_idx, new StringVal(bs)); + } + } + + analyzers.DrainModifications(); done = true; @@ -441,10 +512,6 @@ void File::Gap(uint64 offset, uint64 len) analyzers.DrainModifications(); - // If we were buffering the beginning of the file, a gap means we've got - // as much contiguous stuff at the beginning as possible, so work with that. - ReplayBOF(); - file_analysis::Analyzer* a = 0; IterCookie* c = analyzers.InitForIteration(); @@ -464,6 +531,8 @@ void File::Gap(uint64 offset, uint64 len) } analyzers.DrainModifications(); + + stream_offset += len; IncrementByteCount(len, missing_bytes_idx); } @@ -482,30 +551,13 @@ void File::FileEvent(EventHandlerPtr h) FileEvent(h, vl); } -static void flush_file_event_queue(queue >& q) - { - while ( ! q.empty() ) - { - pair p = q.front(); - mgr.QueueEvent(p.first, p.second); - q.pop(); - } - } - void File::FileEvent(EventHandlerPtr h, val_list* vl) { - if ( h == file_state_remove ) - flush_file_event_queue(fonc_queue); - mgr.QueueEvent(h, vl); - if ( h == file_new ) - { - did_file_new_event = true; - flush_file_event_queue(fonc_queue); - } - - if ( h == file_new || h == file_timeout || h == file_extraction_limit ) + if ( h == file_new || h == file_over_new_connection || + h == file_mime_type || + h == file_timeout || h == file_extraction_limit ) { // immediate feedback is required for these events. mgr.Drain(); diff --git a/src/file_analysis/File.h b/src/file_analysis/File.h index 86f60caf9f..bfd38a263c 100644 --- a/src/file_analysis/File.h +++ b/src/file_analysis/File.h @@ -8,6 +8,7 @@ #include #include +#include "FileReassembler.h" #include "Conn.h" #include "Val.h" #include "Tag.h" @@ -16,6 +17,8 @@ namespace file_analysis { +class FileReassembler; + /** * Wrapper class around \c fa_file record values from script layer. */ @@ -166,6 +169,7 @@ public: protected: friend class Manager; + friend class FileReassembler; /** * Constructor; only file_analysis::Manager should be creating these. @@ -227,12 +231,37 @@ protected: /** * Does mime type detection via file magic signatures and assigns * strongest matching mime type (if available) to \c mime_type - * field in #val. - * @param data pointer to a chunk of file data. - * @param len number of bytes in the data chunk. + * field in #val. It uses the data in the BOF buffer * @return whether a mime type match was found. */ - bool DetectMIME(const u_char* data, uint64 len); + bool DetectMIME(); + + /** + * Enables reassembly on the file. + */ + void EnableReassembly(); + + /** + * Disables reassembly on the file. If there is an existing reassembler + * for the file, this will cause it to be deleted and won't allow a new + * one to be created until reassembly is reenabled. + */ + void DisableReassembly(); + + /** + * Set a maximum allowed bytes of memory for file reassembly for this file. + */ + void SetReassemblyBuffer(uint64 max); + + /** + * Perform stream-wise delivery for analyzers that need it. + */ + void DeliverStream(const u_char* data, uint64 len); + + /** + * Perform chunk-wise delivery for analyzers that need it. + */ + void DeliverChunk(const u_char* data, uint64 len, uint64 offset); /** * Lookup a record field index/offset by name. @@ -246,25 +275,24 @@ protected: */ static void StaticInit(); -private: +protected: string id; /**< A pretty hash that likely identifies file */ RecordVal* val; /**< \c fa_file from script layer. */ + FileReassembler *file_reassembler; /**< A reassembler for the file if it's needed. */ + uint64 stream_offset; /**< The offset of the file which has been forwarded. */ + uint64 reassembly_max_buffer; /**< Maximum allowed buffer for reassembly. */ + bool did_mime_type; /**< Whether the mime type ident has already been attempted. */ + bool reassembly_enabled; /**< Whether file stream reassembly is needed. */ bool postpone_timeout; /**< Whether postponing timeout is requested. */ - bool first_chunk; /**< Track first non-linear chunk. */ - bool missed_bof; /**< Flags that we missed start of file. */ - bool need_reassembly; /**< Whether file stream reassembly is needed. */ bool done; /**< If this object is about to be deleted. */ - bool did_file_new_event; /**< Whether the file_new event has been done. */ - AnalyzerSet analyzers; /**< A set of attached file analyzer. */ - queue > fonc_queue; + AnalyzerSet analyzers; /**< A set of attached file analyzers. */ struct BOF_Buffer { - BOF_Buffer() : full(false), replayed(false), size(0) {} + BOF_Buffer() : full(false), size(0) {} ~BOF_Buffer() { for ( size_t i = 0; i < chunks.size(); ++i ) delete chunks[i]; } bool full; - bool replayed; uint64 size; BroString::CVec chunks; } bof_buffer; /**< Beginning of file buffer. */ diff --git a/src/file_analysis/FileReassembler.cc b/src/file_analysis/FileReassembler.cc new file mode 100644 index 0000000000..71e4c30bca --- /dev/null +++ b/src/file_analysis/FileReassembler.cc @@ -0,0 +1,65 @@ + +#include "FileReassembler.h" +#include "File.h" + + +namespace file_analysis { + +class File; + +FileReassembler::FileReassembler(File *f, uint64 starting_offset) + : Reassembler(starting_offset), the_file(f) + { + } + +FileReassembler::~FileReassembler() + { + } + +void FileReassembler::BlockInserted(DataBlock* start_block) + { + if ( start_block->seq > last_reassem_seq || + start_block->upper <= last_reassem_seq ) + return; + + for ( DataBlock* b = start_block; + b && b->seq <= last_reassem_seq; b = b->next ) + { + if ( b->seq == last_reassem_seq ) + { // New stuff. + uint64 len = b->Size(); + uint64 seq = last_reassem_seq; + last_reassem_seq += len; + the_file->DeliverStream(b->block, len); + } + } + + // Throw out forwarded data + TrimToSeq(last_reassem_seq); + } + +void FileReassembler::Undelivered(uint64 up_to_seq) + { + // Not doing anything here yet. + } + +void FileReassembler::Overlap(const u_char* b1, const u_char* b2, uint64 n) + { + // Not doing anything here yet. + } + +IMPLEMENT_SERIAL(FileReassembler, SER_FILE_REASSEMBLER); + +bool FileReassembler::DoSerialize(SerialInfo* info) const + { + reporter->InternalError("FileReassembler::DoSerialize not implemented"); + return false; // Cannot be reached. + } + +bool FileReassembler::DoUnserialize(UnserialInfo* info) + { + reporter->InternalError("FileReassembler::DoUnserialize not implemented"); + return false; // Cannot be reached. + } + +} // end file_analysis diff --git a/src/file_analysis/FileReassembler.h b/src/file_analysis/FileReassembler.h new file mode 100644 index 0000000000..c29563efc8 --- /dev/null +++ b/src/file_analysis/FileReassembler.h @@ -0,0 +1,47 @@ +#ifndef FILE_ANALYSIS_FILEREASSEMBLER_H +#define FILE_ANALYSIS_FILEREASSEMBLER_H + +#include "Reassem.h" +#include "File.h" + +class BroFile; +class Connection; + +namespace file_analysis { + +class File; + +//const int STOP_ON_GAP = 1; +//const int PUNT_ON_PARTIAL = 1; + +class FileReassembler : public Reassembler { +public: + + FileReassembler(File* f, uint64 starting_offset); + virtual ~FileReassembler(); + + void Done(); + uint64 GetFirstBlockOffset() { return blocks->seq; } + + // Checks if we have delivered all contents that we can possibly + // deliver for this endpoint. + void CheckEOF(); + +protected: + FileReassembler() { } + + DECLARE_SERIAL(FileReassembler); + + void Undelivered(uint64 up_to_seq); + void BlockInserted(DataBlock* b); + void Overlap(const u_char* b1, const u_char* b2, uint64 n); + + unsigned int had_gap:1; + unsigned int did_EOF:1; + unsigned int skip_deliveries:1; + File* the_file; +}; + +} // namespace analyzer::* + +#endif diff --git a/src/file_analysis/Manager.cc b/src/file_analysis/Manager.cc index 59c0fa0023..191bd1e1e4 100644 --- a/src/file_analysis/Manager.cc +++ b/src/file_analysis/Manager.cc @@ -232,6 +232,39 @@ bool Manager::SetTimeoutInterval(const string& file_id, double interval) const return true; } +bool Manager::EnableReassembly(const string& file_id) + { + File* file = LookupFile(file_id); + + if ( ! file ) + return false; + + file->EnableReassembly(); + return true; + } + +bool Manager::DisableReassembly(const string& file_id) + { + File* file = LookupFile(file_id); + + if ( ! file ) + return false; + + file->DisableReassembly(); + return true; + } + +bool Manager::SetReassemblyBuffer(const string& file_id, uint64 max) + { + File* file = LookupFile(file_id); + + if ( ! file ) + return false; + + file->SetReassemblyBuffer(max); + return true; + } + bool Manager::SetExtractionLimit(const string& file_id, RecordVal* args, uint64 n) const { @@ -254,28 +287,6 @@ bool Manager::AddAnalyzer(const string& file_id, file_analysis::Tag tag, return file->AddAnalyzer(tag, args); } -TableVal* Manager::AddAnalyzersForMIMEType(const string& file_id, const string& mtype, - RecordVal* args) - { - if ( ! tag_set_type ) - tag_set_type = internal_type("files_tag_set")->AsTableType(); - - TableVal* sval = new TableVal(tag_set_type); - TagSet* l = LookupMIMEType(mtype, false); - - if ( ! l ) - return sval; - - for ( TagSet::const_iterator i = l->begin(); i != l->end(); i++ ) - { - file_analysis::Tag tag = *i; - if ( AddAnalyzer(file_id, tag, args) ) - sval->Assign(tag.AsEnumVal(), 0); - } - - return sval; - } - bool Manager::RemoveAnalyzer(const string& file_id, file_analysis::Tag tag, RecordVal* args) const { @@ -304,6 +315,12 @@ File* Manager::GetFile(const string& file_id, Connection* conn, id_map.Insert(file_id.c_str(), rval); rval->ScheduleInactivityTimer(); + // Generate file_new here so the manager knows about the file. + rval->FileEvent(file_new); + // Same for file_over_new_connection which is generated by + // updating the connection fields. + rval->UpdateConnectionFields(conn, is_orig); + if ( IsIgnored(file_id) ) return 0; } @@ -461,63 +478,6 @@ Analyzer* Manager::InstantiateAnalyzer(Tag tag, RecordVal* args, File* f) const return a; } -Manager::TagSet* Manager::LookupMIMEType(const string& mtype, bool add_if_not_found) - { - MIMEMap::const_iterator i = mime_types.find(to_upper(mtype)); - - if ( i != mime_types.end() ) - return i->second; - - if ( ! add_if_not_found ) - return 0; - - TagSet* l = new TagSet; - mime_types.insert(std::make_pair(to_upper(mtype), l)); - return l; - } - -bool Manager::RegisterAnalyzerForMIMEType(EnumVal* tag, StringVal* mtype) - { - Component* p = Lookup(tag); - - if ( ! p ) - return false; - - return RegisterAnalyzerForMIMEType(p->Tag(), mtype->CheckString()); - } - -bool Manager::RegisterAnalyzerForMIMEType(Tag tag, const string& mtype) - { - TagSet* l = LookupMIMEType(mtype, true); - - DBG_LOG(DBG_FILE_ANALYSIS, "Register analyzer %s for MIME type %s", - GetComponentName(tag).c_str(), mtype.c_str()); - - l->insert(tag); - return true; - } - -bool Manager::UnregisterAnalyzerForMIMEType(EnumVal* tag, StringVal* mtype) - { - Component* p = Lookup(tag); - - if ( ! p ) - return false; - - return UnregisterAnalyzerForMIMEType(p->Tag(), mtype->CheckString()); - } - -bool Manager::UnregisterAnalyzerForMIMEType(Tag tag, const string& mtype) - { - TagSet* l = LookupMIMEType(mtype, true); - - DBG_LOG(DBG_FILE_ANALYSIS, "Unregister analyzer %s for MIME type %s", - GetComponentName(tag).c_str(), mtype.c_str()); - - l->erase(tag); - return true; - } - RuleMatcher::MIME_Matches* Manager::DetectMIME(const u_char* data, uint64 len, RuleMatcher::MIME_Matches* rval) const { diff --git a/src/file_analysis/Manager.h b/src/file_analysis/Manager.h index 5a088e86cc..2e8efefcb0 100644 --- a/src/file_analysis/Manager.h +++ b/src/file_analysis/Manager.h @@ -213,6 +213,21 @@ public: */ bool SetTimeoutInterval(const string& file_id, double interval) const; + /** + * Enable the reassembler for a file. + */ + bool EnableReassembly(const string& file_id); + + /** + * Disable the reassembler for a file. + */ + bool DisableReassembly(const string& file_id); + + /** + * Set the reassembly for a file in bytes. + */ + bool SetReassemblyBuffer(const string& file_id, uint64 max); + /** * Sets a limit on the maximum size allowed for extracting the file * to local disk; @@ -238,18 +253,6 @@ public: bool AddAnalyzer(const string& file_id, file_analysis::Tag tag, RecordVal* args) const; - /** - * Queue attachment of an all analyzers associated with a given MIME - * type to the file identifier. - * - * @param file_id the file identifier/hash. - * @param mtype the MIME type; comparisions will be performanced case-insensitive. - * @param args a \c AnalyzerArgs value which describes a file analyzer. - * @return A ref'ed \c set[Tag] with all added analyzers. - */ - TableVal* AddAnalyzersForMIMEType(const string& file_id, const string& mtype, - RecordVal* args); - /** * Queue removal of an analyzer for a given file identifier. * @param file_id the file identifier/hash. @@ -277,62 +280,6 @@ public: Analyzer* InstantiateAnalyzer(Tag tag, RecordVal* args, File* f) const; /** - * Registers a MIME type for an analyzer. Once registered, files of - * that MIME type will automatically get a corresponding analyzer - * assigned. - * - * @param tag The analyzer's tag as an enum of script type \c - * Files::Tag. - * - * @param mtype The MIME type. It will be matched case-insenistive. - * - * @return True if successful. - */ - bool RegisterAnalyzerForMIMEType(EnumVal* tag, StringVal* mtype); - - /** - * Registers a MIME type for an analyzer. Once registered, files of - * that MIME type will automatically get a corresponding analyzer - * assigned. - * - * @param tag The analyzer's tag as an enum of script type \c - * Files::Tag. - * - * @param mtype The MIME type. It will be matched case-insenistive. - * - * @return True if successful. - */ - bool RegisterAnalyzerForMIMEType(Tag tag, const string& mtype); - - /** - * Unregisters a MIME type for an analyzer. - * - * @param tag The analyzer's tag as an enum of script type \c - * Files::Tag. - * - * @param mtype The MIME type. It will be matched case-insenistive. - * - * @return True if successful (incl. when the type wasn't actually - * registered for the analyzer). - * - */ - bool UnregisterAnalyzerForMIMEType(EnumVal* tag, StringVal* mtype); - - /** - * Unregisters a MIME type for an analyzer. - * - * @param tag The analyzer's tag as an enum of script type \c - * Files::Tag. - * - * @param mtype The MIME type. It will be matched case-insenistive. - * - * @return True if successful (incl. when the type wasn't actually - * registered for the analyzer). - * - */ - bool UnregisterAnalyzerForMIMEType(Tag tag, const string& mtype); - - /** * Returns a set of all matching MIME magic signatures for a given * chunk of data. * @param data A chunk of bytes to match magic MIME signatures against. diff --git a/src/file_analysis/analyzer/extract/Extract.cc b/src/file_analysis/analyzer/extract/Extract.cc index 1a3917cd0e..8b3ed4cdad 100644 --- a/src/file_analysis/analyzer/extract/Extract.cc +++ b/src/file_analysis/analyzer/extract/Extract.cc @@ -12,9 +12,9 @@ using namespace file_analysis; Extract::Extract(RecordVal* args, File* file, const string& arg_filename, uint64 arg_limit) : file_analysis::Analyzer(file_mgr->GetComponentTag("EXTRACT"), args, file), - filename(arg_filename), limit(arg_limit) + filename(arg_filename), limit(arg_limit), depth(0) { - fd = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0666); + fd = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_APPEND, 0666); if ( fd < 0 ) { @@ -53,7 +53,7 @@ file_analysis::Analyzer* Extract::Instantiate(RecordVal* args, File* file) limit->AsCount()); } -static bool check_limit_exceeded(uint64 lim, uint64 off, uint64 len, uint64* n) +static bool check_limit_exceeded(uint64 lim, uint64 depth, uint64 len, uint64* n) { if ( lim == 0 ) { @@ -61,29 +61,32 @@ static bool check_limit_exceeded(uint64 lim, uint64 off, uint64 len, uint64* n) return false; } - if ( off >= lim ) + if ( depth >= lim ) { *n = 0; return true; } - - *n = lim - off; - - if ( len > *n ) + else if ( depth + len > lim ) + { + printf("exceeded the maximum extraction lenght depth: %llu len: %llu lim: %llu\n", depth, len, lim); + *n = lim - depth; return true; + } else + { *n = len; + } return false; } -bool Extract::DeliverChunk(const u_char* data, uint64 len, uint64 offset) +bool Extract::DeliverStream(const u_char* data, uint64 len) { if ( ! fd ) return false; uint64 towrite = 0; - bool limit_exceeded = check_limit_exceeded(limit, offset, len, &towrite); + bool limit_exceeded = check_limit_exceeded(limit, depth, len, &towrite); if ( limit_exceeded && file_extraction_limit ) { @@ -92,16 +95,24 @@ bool Extract::DeliverChunk(const u_char* data, uint64 len, uint64 offset) vl->append(f->GetVal()->Ref()); vl->append(Args()->Ref()); vl->append(new Val(limit, TYPE_COUNT)); - vl->append(new Val(offset, TYPE_COUNT)); vl->append(new Val(len, TYPE_COUNT)); f->FileEvent(file_extraction_limit, vl); - // Limit may have been modified by BIF, re-check it. - limit_exceeded = check_limit_exceeded(limit, offset, len, &towrite); + // Limit may have been modified by a BIF, re-check it. + limit_exceeded = check_limit_exceeded(limit, depth, len, &towrite); } if ( towrite > 0 ) - safe_pwrite(fd, data, towrite, offset); + { + safe_pwrite(fd, (const u_char *) data, towrite, depth); + depth += towrite; + } return ( ! limit_exceeded ); } + +bool Extract::Undelivered(uint64 offset, uint64 len) + { + depth += len; + return true; + } diff --git a/src/file_analysis/analyzer/extract/Extract.h b/src/file_analysis/analyzer/extract/Extract.h index 00c4dbe2b7..cb57950d4c 100644 --- a/src/file_analysis/analyzer/extract/Extract.h +++ b/src/file_analysis/analyzer/extract/Extract.h @@ -28,11 +28,18 @@ public: * Write a chunk of file data to the local extraction file. * @param data pointer to a chunk of file data. * @param len number of bytes in the data chunk. - * @param offset number of bytes from start of file at which chunk starts. * @return false if there was no extraction file open and the data couldn't * be written, else true. */ - virtual bool DeliverChunk(const u_char* data, uint64 len, uint64 offset); + virtual bool DeliverStream(const u_char* data, uint64 len); + + /** + * Report undelivered bytes. + * @param offset distance into the file where the gap occurred. + * @param len number of bytes undelivered. + * @return true + */ + virtual bool Undelivered(uint64 offset, uint64 len); /** * Create a new instance of an Extract analyzer. @@ -67,6 +74,7 @@ private: string filename; int fd; uint64 limit; + uint64 depth; }; } // namespace file_analysis diff --git a/src/file_analysis/analyzer/extract/events.bif b/src/file_analysis/analyzer/extract/events.bif index 1c08736416..f5ebb6816b 100644 --- a/src/file_analysis/analyzer/extract/events.bif +++ b/src/file_analysis/analyzer/extract/events.bif @@ -11,9 +11,7 @@ ## ## limit: The limit, in bytes, the extracted file is about to breach. ## -## offset: The offset at which a file chunk is about to be written. -## ## len: The length of the file chunk about to be written. ## ## .. bro:see:: Files::add_analyzer Files::ANALYZER_EXTRACT -event file_extraction_limit%(f: fa_file, args: any, limit: count, offset: count, len: count%); +event file_extraction_limit%(f: fa_file, args: any, limit: count, len: count%); diff --git a/src/file_analysis/file_analysis.bif b/src/file_analysis/file_analysis.bif index 43efd8ba0f..4e4b4c6cdb 100644 --- a/src/file_analysis/file_analysis.bif +++ b/src/file_analysis/file_analysis.bif @@ -15,6 +15,27 @@ function Files::__set_timeout_interval%(file_id: string, t: interval%): bool return new Val(result, TYPE_BOOL); %} +## :bro:see:`Files::enable_reassembly`. +function Files::__enable_reassembly%(file_id: string%): bool + %{ + bool result = file_mgr->EnableReassembly(file_id->CheckString()); + return new Val(result, TYPE_BOOL); + %} + +## :bro:see:`Files::disable_reassembly`. +function Files::__disable_reassembly%(file_id: string%): bool + %{ + bool result = file_mgr->DisableReassembly(file_id->CheckString()); + return new Val(result, TYPE_BOOL); + %} + +## :bro:see:`Files::set_reassembly_buffer`. +function Files::__set_reassembly_buffer%(file_id: string, max: count%): bool + %{ + bool result = file_mgr->SetReassemblyBuffer(file_id->CheckString(), max); + return new Val(result, TYPE_BOOL); + %} + ## :bro:see:`Files::add_analyzer`. function Files::__add_analyzer%(file_id: string, tag: Files::Tag, args: any%): bool %{ @@ -26,16 +47,6 @@ function Files::__add_analyzer%(file_id: string, tag: Files::Tag, args: any%): b return new Val(result, TYPE_BOOL); %} -## :bro:see:`Files::add_analyzers_for_mime_type`. -function Files::__add_analyzers_for_mime_type%(file_id: string, mtype: string, args: any%): files_tag_set - %{ - using BifType::Record::Files::AnalyzerArgs; - RecordVal* rv = args->AsRecordVal()->CoerceTo(AnalyzerArgs); - Val* analyzers = file_mgr->AddAnalyzersForMIMEType(file_id->CheckString(), mtype->CheckString(), rv); - Unref(rv); - return analyzers; - %} - ## :bro:see:`Files::remove_analyzer`. function Files::__remove_analyzer%(file_id: string, tag: Files::Tag, args: any%): bool %{ @@ -60,13 +71,6 @@ function Files::__analyzer_name%(tag: Files::Tag%) : string return new StringVal(file_mgr->GetComponentName(tag)); %} -## :bro:see:`Files::register_for_mime_type`. -function Files::__register_for_mime_type%(id: Analyzer::Tag, mt: string%) : bool - %{ - bool result = file_mgr->RegisterAnalyzerForMIMEType(id->AsEnumVal(), mt); - return new Val(result, TYPE_BOOL); - %} - module GLOBAL; ## For use within a :bro:see:`get_file_handle` handler to set a unique diff --git a/testing/btest/Baseline/scripts.base.files.extract.limit/1.out b/testing/btest/Baseline/scripts.base.files.extract.limit/1.out index f767bfcccd..6cb00f1954 100644 --- a/testing/btest/Baseline/scripts.base.files.extract.limit/1.out +++ b/testing/btest/Baseline/scripts.base.files.extract.limit/1.out @@ -1 +1 @@ -file_extraction_limit, 3000, 2896, 1448 +file_extraction_limit, 3000, 1448 diff --git a/testing/btest/Baseline/scripts.base.files.extract.limit/2.out b/testing/btest/Baseline/scripts.base.files.extract.limit/2.out index bdf1f9d171..ecfd786e38 100644 --- a/testing/btest/Baseline/scripts.base.files.extract.limit/2.out +++ b/testing/btest/Baseline/scripts.base.files.extract.limit/2.out @@ -1,3 +1,3 @@ -file_extraction_limit, 3000, 2896, 1448 +file_extraction_limit, 3000, 1448 T -file_extraction_limit, 6000, 5792, 1448 +file_extraction_limit, 6000, 1448 diff --git a/testing/btest/Baseline/scripts.base.files.extract.limit/3.out b/testing/btest/Baseline/scripts.base.files.extract.limit/3.out index b6da9537b7..7f9b69f0ce 100644 --- a/testing/btest/Baseline/scripts.base.files.extract.limit/3.out +++ b/testing/btest/Baseline/scripts.base.files.extract.limit/3.out @@ -1,2 +1,2 @@ -file_extraction_limit, 7000, 5792, 1448 +file_extraction_limit, 7000, 1448 T diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.actions.data_event/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.actions.data_event/out index d1cc77944c..5e70c0645c 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.actions.data_event/out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.actions.data_event/out @@ -1,9 +1,5 @@ FILE_NEW file #0, 0, 0 -FILE_BOF_BUFFER -^J0.26 | 201 -MIME_TYPE -text/plain FILE_OVER_NEW_CONNECTION file_stream, file #0, 1146, ^J0.26 | 2012-08-24 15:10:04 -0700^J^J * Fixing update-changes, which could pick the wrong control file. (Robin Sommer)^J^J * Fixing GPG signing script. (Robin Sommer)^J^J0.25 | 2012-08-01 13:55:46 -0500^J^J * Fix configure script to exit with non-zero status on error (Jon Siwek)^J^J0.24 | 2012-07-05 12:50:43 -0700^J^J * Raise minimum required CMake version to 2.6.3 (Jon Siwek)^J^J * Adding script to delete old fully-merged branches. (Robin Sommer)^J^J0.23-2 | 2012-01-25 13:24:01 -0800^J^J * Fix a bro-cut error message. (Daniel Thayer)^J^J0.23 | 2012-01-11 12:16:11 -0800^J^J * Tweaks to release scripts, plus a new one for signing files.^J (Robin Sommer)^J^J0.22 | 2012-01-10 16:45:19 -0800^J^J * Tweaks for OpenBSD support. (Jon Siwek)^J^J * bro-cut extensions and fixes. (Robin Sommer)^J ^J - If no field names are given on the command line, we now pass through^J all fields. Adresses #657.^J^J - Removing some GNUism from awk script. Addresses #653.^J^J - Added option for time output in UTC. Addresses #668.^J^J - Added output field separator option -F. Addresses #649.^J^J - Fixing option -c: only some header lines were passed through^J file_chunk, file #0, 1146, 0, ^J0.26 | 2012-08-24 15:10:04 -0700^J^J * Fixing update-changes, which could pick the wrong control file. (Robin Sommer)^J^J * Fixing GPG signing script. (Robin Sommer)^J^J0.25 | 2012-08-01 13:55:46 -0500^J^J * Fix configure script to exit with non-zero status on error (Jon Siwek)^J^J0.24 | 2012-07-05 12:50:43 -0700^J^J * Raise minimum required CMake version to 2.6.3 (Jon Siwek)^J^J * Adding script to delete old fully-merged branches. (Robin Sommer)^J^J0.23-2 | 2012-01-25 13:24:01 -0800^J^J * Fix a bro-cut error message. (Daniel Thayer)^J^J0.23 | 2012-01-11 12:16:11 -0800^J^J * Tweaks to release scripts, plus a new one for signing files.^J (Robin Sommer)^J^J0.22 | 2012-01-10 16:45:19 -0800^J^J * Tweaks for OpenBSD support. (Jon Siwek)^J^J * bro-cut extensions and fixes. (Robin Sommer)^J ^J - If no field names are given on the command line, we now pass through^J all fields. Adresses #657.^J^J - Removing some GNUism from awk script. Addresses #653.^J^J - Added option for time output in UTC. Addresses #668.^J^J - Added output field separator option -F. Addresses #649.^J^J - Fixing option -c: only some header lines were passed through^J @@ -16,6 +12,10 @@ file_chunk, file #0, 663, 4042, thread library when necessary (e.g.^J PF_RIN FILE_STATE_REMOVE file #0, 4705, 0 [orig_h=141.142.228.5, orig_p=59856/tcp, resp_h=192.150.187.43, resp_p=80/tcp] +FILE_BOF_BUFFER +^J0.26 | 201 +MIME_TYPE +text/plain total bytes: 4705 source: HTTP MD5: 397168fd09991a0e712254df7bc639ac diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.remove_action/get.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.remove_action/get.out index eb62690f91..4b2bf1e210 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.remove_action/get.out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.remove_action/get.out @@ -1,12 +1,12 @@ FILE_NEW file #0, 0, 0 -FILE_BOF_BUFFER -^J0.26 | 201 -MIME_TYPE -text/plain FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #0, 4705, 0 [orig_h=141.142.228.5, orig_p=59856/tcp, resp_h=192.150.187.43, resp_p=80/tcp] +FILE_BOF_BUFFER +^J0.26 | 201 +MIME_TYPE +text/plain total bytes: 4705 source: HTTP diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.set_timeout_interval/bro..stdout b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.set_timeout_interval/bro..stdout index 1e68c761de..e1e0eb2da4 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.set_timeout_interval/bro..stdout +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.set_timeout_interval/bro..stdout @@ -1,20 +1,25 @@ FILE_NEW file #0, 0, 0 -MIME_TYPE -application/x-dosexec FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #0, 1022920, 0 [orig_h=192.168.72.14, orig_p=3254/tcp, resp_h=65.54.95.206, resp_p=80/tcp] +FILE_BOF_BUFFER +MZ\x90\0^C\0\0\0^D\0\0 +MIME_TYPE +application/x-dosexec total bytes: 1022920 source: HTTP +MD5: fc13fee1d44ef737a3133f1298b21d28 +SHA1: 7d99803eaf3b6e8dfa3581348bc694089579d25a +SHA256: dcb87a62a2b5d449abc138776000fd1b14edc690e9da6ea325b8f352ab033202 FILE_NEW file #1, 0, 0 FILE_OVER_NEW_CONNECTION FILE_TIMEOUT FILE_TIMEOUT FILE_STATE_REMOVE -file #1, 206024, 0 +file #1, 0, 0 [orig_h=192.168.72.14, orig_p=3257/tcp, resp_h=65.54.95.14, resp_p=80/tcp] total bytes: 1022920 source: HTTP diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.stop/get.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.stop/get.out index 13cfe5de58..72bc73e726 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.stop/get.out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.stop/get.out @@ -1,7 +1,2 @@ FILE_NEW file #0, 0, 0 -FILE_BOF_BUFFER -^J0.26 | 201 -MIME_TYPE -text/plain -FILE_OVER_NEW_CONNECTION diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.ftp/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.ftp/out index ef818618b3..a7ba6981c7 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.ftp/out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.ftp/out @@ -1,13 +1,13 @@ FILE_NEW file #0, 0, 0 -FILE_BOF_BUFFER -The Nationa -MIME_TYPE -text/plain FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #0, 16557, 0 [orig_h=141.142.228.5, orig_p=50737/tcp, resp_h=141.142.192.162, resp_p=38141/tcp] +FILE_BOF_BUFFER +The Nationa +MIME_TYPE +text/plain source: FTP_DATA MD5: 7192a8075196267203adb3dfaa5c908d SHA1: 44586aed07cfe19cad25076af98f535585cd5797 diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get-gzip.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get-gzip.out index d42db4b90a..0ed8262afc 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get-gzip.out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get-gzip.out @@ -1,13 +1,13 @@ FILE_NEW file #0, 0, 0 -FILE_BOF_BUFFER -{^J "origin -MIME_TYPE -text/plain FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #0, 197, 0 [orig_h=141.142.228.5, orig_p=50153/tcp, resp_h=54.243.118.187, resp_p=80/tcp] +FILE_BOF_BUFFER +{^J "origin +MIME_TYPE +text/plain source: HTTP MD5: 5baba7eea57bc8a42a92c817ed566d72 SHA1: e351b8c693c3353716787c02e2923f4d12ebbb31 diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get.out index 219aad4eff..cc04790c70 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get.out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get.out @@ -1,13 +1,13 @@ FILE_NEW file #0, 0, 0 -FILE_BOF_BUFFER -^J0.26 | 201 -MIME_TYPE -text/plain FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #0, 4705, 0 [orig_h=141.142.228.5, orig_p=59856/tcp, resp_h=192.150.187.43, resp_p=80/tcp] +FILE_BOF_BUFFER +^J0.26 | 201 +MIME_TYPE +text/plain total bytes: 4705 source: HTTP MD5: 397168fd09991a0e712254df7bc639ac diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.multipart/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.multipart/out index 0bf8d6a0c9..6499401f8d 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.multipart/out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.multipart/out @@ -1,49 +1,49 @@ FILE_NEW file #0, 0, 0 -FILE_BOF_BUFFER -test FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #0, 4, 0 [orig_h=141.142.228.5, orig_p=57262/tcp, resp_h=54.243.88.146, resp_p=80/tcp] +FILE_BOF_BUFFER +test^M^J source: HTTP MD5: 098f6bcd4621d373cade4e832627b4f6 SHA1: a94a8fe5ccb19ba61c4c0873d391e987982fbbd3 SHA256: 9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08 FILE_NEW file #1, 0, 0 -FILE_BOF_BUFFER -test2 FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #1, 5, 0 [orig_h=141.142.228.5, orig_p=57262/tcp, resp_h=54.243.88.146, resp_p=80/tcp] +FILE_BOF_BUFFER +test2^M^J source: HTTP MD5: ad0234829205b9033196ba818f7a872b SHA1: 109f4b3c50d7b0df729d299bc6f8e9ef9066971f SHA256: 60303ae22b998861bce3b28f33eec1be758a213c86c93c076dbe9f558c11c752 FILE_NEW file #2, 0, 0 -FILE_BOF_BUFFER -test3 FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #2, 5, 0 [orig_h=141.142.228.5, orig_p=57262/tcp, resp_h=54.243.88.146, resp_p=80/tcp] +FILE_BOF_BUFFER +test3^M^J source: HTTP MD5: 8ad8757baa8564dc136c1e07507f4a98 SHA1: 3ebfa301dc59196f18593c45e519287a23297589 SHA256: fd61a03af4f77d870fc21e05e7e80678095c92d808cfb3b5c279ee04c74aca13 FILE_NEW file #3, 0, 0 -FILE_BOF_BUFFER -{^J "data": -MIME_TYPE -text/plain FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #3, 465, 0 [orig_h=141.142.228.5, orig_p=57262/tcp, resp_h=54.243.88.146, resp_p=80/tcp] +FILE_BOF_BUFFER +{^J "data": +MIME_TYPE +text/plain total bytes: 465 source: HTTP MD5: 226244811006caf4ac904344841168dd diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/a.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/a.out index 077fb5282c..5f2e28889e 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/a.out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/a.out @@ -1,12 +1,17 @@ FILE_NEW file #0, 0, 0 -MIME_TYPE -application/pdf FILE_OVER_NEW_CONNECTION FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #0, 555523, 0 [orig_h=10.101.84.70, orig_p=10978/tcp, resp_h=129.174.93.161, resp_p=80/tcp] [orig_h=10.101.84.70, orig_p=10977/tcp, resp_h=129.174.93.161, resp_p=80/tcp] +FILE_BOF_BUFFER +%PDF-1.4^J%\xd0 +MIME_TYPE +application/pdf total bytes: 555523 source: HTTP +MD5: 5a484ada9c816c0e8b6d2d3978e3f503 +SHA1: 54e7d39e99eb9d40d6251c0361a1090a0d278571 +SHA256: 61c0718bd534ab55716eba161e91bb49155562ddc7c08f0c20f6359d7b808b66 diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/b.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/b.out index f5698aba23..2b3d76e59d 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/b.out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/b.out @@ -1,19 +1,24 @@ FILE_NEW file #0, 0, 0 -MIME_TYPE -application/x-dosexec FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #0, 1022920, 0 [orig_h=192.168.72.14, orig_p=3254/tcp, resp_h=65.54.95.206, resp_p=80/tcp] +FILE_BOF_BUFFER +MZ\x90\0^C\0\0\0^D\0\0 +MIME_TYPE +application/x-dosexec total bytes: 1022920 source: HTTP +MD5: fc13fee1d44ef737a3133f1298b21d28 +SHA1: 7d99803eaf3b6e8dfa3581348bc694089579d25a +SHA256: dcb87a62a2b5d449abc138776000fd1b14edc690e9da6ea325b8f352ab033202 FILE_NEW file #1, 0, 0 FILE_OVER_NEW_CONNECTION FILE_TIMEOUT FILE_STATE_REMOVE -file #1, 206024, 0 +file #1, 0, 0 [orig_h=192.168.72.14, orig_p=3257/tcp, resp_h=65.54.95.14, resp_p=80/tcp] total bytes: 1022920 source: HTTP diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/c.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/c.out index 5b213f429a..34cffd7f1e 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/c.out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/c.out @@ -3,8 +3,15 @@ file #0, 0, 0 FILE_OVER_NEW_CONNECTION FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE -file #0, 498702, 0 +file #0, 498668, 0 [orig_h=10.45.179.94, orig_p=19950/tcp, resp_h=129.174.93.170, resp_p=80/tcp] [orig_h=10.45.179.94, orig_p=19953/tcp, resp_h=129.174.93.170, resp_p=80/tcp] +FILE_BOF_BUFFER +%PDF-1.4^M%\xe2 +MIME_TYPE +application/pdf total bytes: 498668 source: HTTP +MD5: 94046a5fb1c5802d0f1e6d704cf3e10e +SHA1: 250aa71dd1594363bc7083d25cfd0240e441b119 +SHA256: 5c3bc213c9eff85f98feceac8810b955f8415564e50e3889b447e847c50c5ba7 diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.pipeline/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.pipeline/out index b85485cd1a..e0880d128c 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.pipeline/out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.pipeline/out @@ -1,41 +1,41 @@ FILE_NEW file #0, 0, 0 -FILE_BOF_BUFFER -/*^J******** -MIME_TYPE -text/plain FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #0, 2675, 0 [orig_h=192.168.1.104, orig_p=1673/tcp, resp_h=63.245.209.11, resp_p=80/tcp] +FILE_BOF_BUFFER +/*^J******** +MIME_TYPE +text/plain source: HTTP MD5: b932c3310ce47e158d1a5a42e0b01279 SHA1: 0e42ae17eea9b074981bd3a34535ad3a22d02706 SHA256: 5b037a2c5e36f56e63a3012c73e46a04b27741d8ff8f8b62c832fb681fc60f42 FILE_NEW file #1, 0, 0 -FILE_BOF_BUFFER -//-- Google -MIME_TYPE -text/plain FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #1, 21421, 0 [orig_h=192.168.1.104, orig_p=1673/tcp, resp_h=63.245.209.11, resp_p=80/tcp] +FILE_BOF_BUFFER +//-- Google +MIME_TYPE +text/plain source: HTTP MD5: e732f7bf1d7cb4eedcb1661697d7bc8c SHA1: 8f241117afaa8ca5f41dc059e66d75c283dcc983 SHA256: 6a509fd05aa7c8fa05080198894bb19e638554ffcee0e0b3d7bc8ff54afee1da FILE_NEW file #2, 0, 0 -FILE_BOF_BUFFER -GIF89a^D\0^D\0\xb3 -MIME_TYPE -image/gif FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #2, 94, 0 [orig_h=192.168.1.104, orig_p=1673/tcp, resp_h=63.245.209.11, resp_p=80/tcp] +FILE_BOF_BUFFER +GIF89a^D\0^D\0\xb3 +MIME_TYPE +image/gif total bytes: 94 source: HTTP MD5: d903de7e30db1691d3130ba5eae6b9a7 @@ -43,14 +43,14 @@ SHA1: 81f5f056ce5e97d940854bb0c48017b45dd9f15e SHA256: 6fb22aa9d780ea63bd7a2e12b92b16fcbf1c4874f1d3e11309a5ba984433c315 FILE_NEW file #3, 0, 0 -FILE_BOF_BUFFER -\x89PNG^M^J^Z^J\0\0\0 -MIME_TYPE -image/png FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #3, 2349, 0 [orig_h=192.168.1.104, orig_p=1673/tcp, resp_h=63.245.209.11, resp_p=80/tcp] +FILE_BOF_BUFFER +\x89PNG^M^J^Z^J\0\0\0 +MIME_TYPE +image/png total bytes: 2349 source: HTTP MD5: e0029eea80812e9a8e57b8d05d52938a @@ -58,14 +58,14 @@ SHA1: 560eab5a0177246827a94042dd103916d8765ac7 SHA256: e0b4500c1fd1d675da4137461cbe64d3c8489f4180d194e47683b20e7fb876f4 FILE_NEW file #4, 0, 0 -FILE_BOF_BUFFER -\x89PNG^M^J^Z^J\0\0\0 -MIME_TYPE -image/png FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #4, 27579, 0 [orig_h=192.168.1.104, orig_p=1673/tcp, resp_h=63.245.209.11, resp_p=80/tcp] +FILE_BOF_BUFFER +\x89PNG^M^J^Z^J\0\0\0 +MIME_TYPE +image/png total bytes: 27579 source: HTTP MD5: 30aa926344f58019d047e85ba049ca1e diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.post/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.post/out index cedc396254..deddfbb640 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.post/out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.post/out @@ -1,13 +1,13 @@ FILE_NEW file #0, 0, 0 -FILE_BOF_BUFFER -hello world -MIME_TYPE -text/plain FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #0, 11, 0 [orig_h=141.142.228.5, orig_p=53595/tcp, resp_h=54.243.55.129, resp_p=80/tcp] +FILE_BOF_BUFFER +hello world +MIME_TYPE +text/plain total bytes: 11 source: HTTP MD5: 5eb63bbbe01eeed093cb22bb8f5acdc3 @@ -15,14 +15,14 @@ SHA1: 2aae6c35c94fcfb415dbe95f408b9ce91ee846ed SHA256: b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9 FILE_NEW file #1, 0, 0 -FILE_BOF_BUFFER -{^J "origin -MIME_TYPE -text/plain FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #1, 366, 0 [orig_h=141.142.228.5, orig_p=53595/tcp, resp_h=54.243.55.129, resp_p=80/tcp] +FILE_BOF_BUFFER +{^J "origin +MIME_TYPE +text/plain total bytes: 366 source: HTTP MD5: c9337794df612aeaa901dcf9fa446bca diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.input.basic/bro..stdout b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.input.basic/bro..stdout index afeb32b334..a5093d22c2 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.input.basic/bro..stdout +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.input.basic/bro..stdout @@ -1,11 +1,11 @@ FILE_NEW file #0, 0, 0 +FILE_STATE_REMOVE +file #0, 311, 0 FILE_BOF_BUFFER #separator MIME_TYPE text/plain -FILE_STATE_REMOVE -file #0, 311, 0 source: ../input.log MD5: bf4dfa6169b74146da5236e918743599 SHA1: 0a0f20de89c86d7bce1301af6548d6e9ae87b0f1 diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.irc/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.irc/out index 082fb7e038..906225c051 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.irc/out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.irc/out @@ -1,18 +1,14 @@ FILE_NEW file #0, 0, 0 -FILE_BOF_BUFFER -PK^C^D^T\0\0\0^H\0\xae -MIME_TYPE -application/zip FILE_OVER_NEW_CONNECTION FILE_NEW file #1, 0, 0 -FILE_BOF_BUFFER -\0\0^Ex\0\0^J\xf0\0\0^P FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #1, 124, 0 [orig_h=192.168.1.77, orig_p=57655/tcp, resp_h=209.197.168.151, resp_p=1024/tcp] +FILE_BOF_BUFFER +\0\0^Ex\0\0^J\xf0\0\0^P source: IRC_DATA MD5: 35288fd50a74c7d675909ff83424d7a1 SHA1: 8a98f177cb47e6bf771bf57c2f7e94c4b5e79ffa @@ -20,6 +16,10 @@ SHA256: b24dde52b933a0d76e885ab418cb6d697b14a4e2fef45fce66e12ecc5a6a81aa FILE_STATE_REMOVE file #0, 42208, 0 [orig_h=192.168.1.77, orig_p=57655/tcp, resp_h=209.197.168.151, resp_p=1024/tcp] +FILE_BOF_BUFFER +PK^C^D^T\0\0\0^H\0\xae +MIME_TYPE +application/zip source: IRC_DATA MD5: 8c0803242f549c2780cb88b9a9215c65 SHA1: 8abe0239263fd7326eb803d4465cf494f8bea218 diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.smtp/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.smtp/out index 44c240c7ee..561f3c49f6 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.smtp/out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.smtp/out @@ -1,41 +1,41 @@ FILE_NEW file #0, 0, 0 -FILE_BOF_BUFFER -Hello^M^J^M^J ^M -MIME_TYPE -text/plain FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #0, 77, 0 [orig_h=10.10.1.4, orig_p=1470/tcp, resp_h=74.53.140.153, resp_p=25/tcp] +FILE_BOF_BUFFER +Hello^M^J^M^J ^M +MIME_TYPE +text/plain source: SMTP MD5: 58aff3af22807bc5f4b6357c0038256c SHA1: c39dc8cd0f8d8b1f7fc8b362c41e69fdf20f668a SHA256: 8d057f3af311c20675eea767a9df5fa31ff3597c6d5d50fd0cdc34766c40204d FILE_NEW file #1, 0, 0 -FILE_BOF_BUFFER -, dpd=, conn=, extract_orig=F, extract_resp=F, dhcp=, dnp3=, dns=, dns_state=, ftp=, ftp_data_reuse=F, ssl=, http=, http_state=, irc=, modbus=, radius=, snmp=, smtp=[ts=1254722768.219663, uid=CjhGID4nQcgTWjvg4c, id=[orig_h=10.10.1.4, orig_p=1470/tcp, resp_h=74.53.140.153, resp_p=25/tcp], trans_depth=1, helo=GP, mailfrom=, rcptto={^J^I^J}, date=Mon, 5 Oct 2009 11:36:07 +0530, from="Gurpartap Singh" , to={^J^I^J}, reply_to=, msg_id=<000301ca4581$ef9e57f0$cedb07d0$@in>, in_reply_to=, subject=SMTP, x_originating_ip=, first_received=, second_received=, last_reply=354 Enter message, ending with "." on a line by itself, path=[74.53.140.153, 10.10.1.4], user_agent=Microsoft Office Outlook 12.0, tls=F, process_received_from=T, has_client_activity=T, entity=[filename=], fuids=[]], smtp_state=[helo=GP, messages_transferred=0, pending_messages=, mime_depth=3], socks=, ssh=, syslog=] [2] is_orig: bool = F +1254722770.692743 file_new + [0] f: fa_file = [id=Fel9gs4OtNEV6gUJZ5, parent_id=, source=SMTP, is_orig=F, conns=, last_active=1254722770.692743, seen_bytes=0, total_bytes=, missing_bytes=0, overflow_bytes=0, timeout_interval=2.0 mins, bof_buffer_size=20480, bof_buffer=, info=, ftp=, http=, irc=, u2_events=] + +1254722770.692743 file_over_new_connection + [0] f: fa_file = [id=Fel9gs4OtNEV6gUJZ5, parent_id=, source=SMTP, is_orig=F, conns={^J^I[[orig_h=10.10.1.4, orig_p=1470/tcp, resp_h=74.53.140.153, resp_p=25/tcp]] = [id=[orig_h=10.10.1.4, orig_p=1470/tcp, resp_h=74.53.140.153, resp_p=25/tcp], orig=[size=1610, state=4, num_pkts=9, num_bytes_ip=518, flow_label=0], resp=[size=462, state=4, num_pkts=10, num_bytes_ip=870, flow_label=0], start_time=1254722767.529046, duration=3.163697, service={^J^I^ISMTP^J^I}, addl=, hot=0, history=ShAdDa, uid=CjhGID4nQcgTWjvg4c, tunnel=, dpd=, conn=, extract_orig=F, extract_resp=F, dhcp=, dnp3=, dns=, dns_state=, ftp=, ftp_data_reuse=F, ssl=, http=, http_state=, irc=, modbus=, radius=, snmp=, smtp=[ts=1254722768.219663, uid=CjhGID4nQcgTWjvg4c, id=[orig_h=10.10.1.4, orig_p=1470/tcp, resp_h=74.53.140.153, resp_p=25/tcp], trans_depth=1, helo=GP, mailfrom=, rcptto={^J^I^I^J^I}, date=Mon, 5 Oct 2009 11:36:07 +0530, from="Gurpartap Singh" , to={^J^I^I^J^I}, reply_to=, msg_id=<000301ca4581$ef9e57f0$cedb07d0$@in>, in_reply_to=, subject=SMTP, x_originating_ip=, first_received=, second_received=, last_reply=354 Enter message, ending with "." on a line by itself, path=[74.53.140.153, 10.10.1.4], user_agent=Microsoft Office Outlook 12.0, tls=F, process_received_from=T, has_client_activity=T, entity=[filename=], fuids=[]], smtp_state=[helo=GP, messages_transferred=0, pending_messages=, mime_depth=3], socks=, ssh=, syslog=]^J}, last_active=1254722770.692743, seen_bytes=0, total_bytes=, missing_bytes=0, overflow_bytes=0, timeout_interval=2.0 mins, bof_buffer_size=20480, bof_buffer=, info=[ts=1254722770.692743, fuid=Fel9gs4OtNEV6gUJZ5, tx_hosts={^J^J}, rx_hosts={^J^J}, conn_uids={^J^J}, source=SMTP, depth=0, analyzers={^J^J}, mime_type=, filename=, duration=0 secs, local_orig=, is_orig=F, seen_bytes=0, total_bytes=, missing_bytes=0, overflow_bytes=0, timedout=F, parent_fuid=, md5=, sha1=, sha256=, x509=, extracted=], ftp=, http=, irc=, u2_events=] + [1] c: connection = [id=[orig_h=10.10.1.4, orig_p=1470/tcp, resp_h=74.53.140.153, resp_p=25/tcp], orig=[size=1610, state=4, num_pkts=9, num_bytes_ip=518, flow_label=0], resp=[size=462, state=4, num_pkts=10, num_bytes_ip=870, flow_label=0], start_time=1254722767.529046, duration=3.163697, service={^J^ISMTP^J}, addl=, hot=0, history=ShAdDa, uid=CjhGID4nQcgTWjvg4c, tunnel=, dpd=, conn=, extract_orig=F, extract_resp=F, dhcp=, dnp3=, dns=, dns_state=, ftp=, ftp_data_reuse=F, ssl=, http=, http_state=, irc=, modbus=, radius=, snmp=, smtp=[ts=1254722768.219663, uid=CjhGID4nQcgTWjvg4c, id=[orig_h=10.10.1.4, orig_p=1470/tcp, resp_h=74.53.140.153, resp_p=25/tcp], trans_depth=1, helo=GP, mailfrom=, rcptto={^J^I^J}, date=Mon, 5 Oct 2009 11:36:07 +0530, from="Gurpartap Singh" , to={^J^I^J}, reply_to=, msg_id=<000301ca4581$ef9e57f0$cedb07d0$@in>, in_reply_to=, subject=SMTP, x_originating_ip=, first_received=, second_received=, last_reply=354 Enter message, ending with "." on a line by itself, path=[74.53.140.153, 10.10.1.4], user_agent=Microsoft Office Outlook 12.0, tls=F, process_received_from=T, has_client_activity=T, entity=[filename=], fuids=[]], smtp_state=[helo=GP, messages_transferred=0, pending_messages=, mime_depth=3], socks=, ssh=, syslog=] + [2] is_orig: bool = F + 1254722770.692743 mime_end_entity - [0] c: connection = [id=[orig_h=10.10.1.4, orig_p=1470/tcp, resp_h=74.53.140.153, resp_p=25/tcp], orig=[size=1610, state=4, num_pkts=9, num_bytes_ip=518, flow_label=0], resp=[size=462, state=4, num_pkts=10, num_bytes_ip=870, flow_label=0], start_time=1254722767.529046, duration=3.163697, service={^J^ISMTP^J}, addl=, hot=0, history=ShAdDa, uid=CjhGID4nQcgTWjvg4c, tunnel=, dpd=, conn=, extract_orig=F, extract_resp=F, dhcp=, dnp3=, dns=, dns_state=, ftp=, ftp_data_reuse=F, ssl=, http=, http_state=, irc=, modbus=, radius=, snmp=, smtp=[ts=1254722768.219663, uid=CjhGID4nQcgTWjvg4c, id=[orig_h=10.10.1.4, orig_p=1470/tcp, resp_h=74.53.140.153, resp_p=25/tcp], trans_depth=1, helo=GP, mailfrom=, rcptto={^J^I^J}, date=Mon, 5 Oct 2009 11:36:07 +0530, from="Gurpartap Singh" , to={^J^I^J}, reply_to=, msg_id=<000301ca4581$ef9e57f0$cedb07d0$@in>, in_reply_to=, subject=SMTP, x_originating_ip=, first_received=, second_received=, last_reply=354 Enter message, ending with "." on a line by itself, path=[74.53.140.153, 10.10.1.4], user_agent=Microsoft Office Outlook 12.0, tls=F, process_received_from=T, has_client_activity=T, entity=[filename=], fuids=[]], smtp_state=[helo=GP, messages_transferred=0, pending_messages=, mime_depth=3], socks=, ssh=, syslog=] + [0] c: connection = [id=[orig_h=10.10.1.4, orig_p=1470/tcp, resp_h=74.53.140.153, resp_p=25/tcp], orig=[size=1610, state=4, num_pkts=9, num_bytes_ip=518, flow_label=0], resp=[size=462, state=4, num_pkts=10, num_bytes_ip=870, flow_label=0], start_time=1254722767.529046, duration=3.163697, service={^J^ISMTP^J}, addl=, hot=0, history=ShAdDa, uid=CjhGID4nQcgTWjvg4c, tunnel=, dpd=, conn=, extract_orig=F, extract_resp=F, dhcp=, dnp3=, dns=, dns_state=, ftp=, ftp_data_reuse=F, ssl=, http=, http_state=, irc=, modbus=, radius=, snmp=, smtp=[ts=1254722768.219663, uid=CjhGID4nQcgTWjvg4c, id=[orig_h=10.10.1.4, orig_p=1470/tcp, resp_h=74.53.140.153, resp_p=25/tcp], trans_depth=1, helo=GP, mailfrom=, rcptto={^J^I^J}, date=Mon, 5 Oct 2009 11:36:07 +0530, from="Gurpartap Singh" , to={^J^I^J}, reply_to=, msg_id=<000301ca4581$ef9e57f0$cedb07d0$@in>, in_reply_to=, subject=SMTP, x_originating_ip=, first_received=, second_received=, last_reply=354 Enter message, ending with "." on a line by itself, path=[74.53.140.153, 10.10.1.4], user_agent=Microsoft Office Outlook 12.0, tls=F, process_received_from=T, has_client_activity=T, entity=[filename=], fuids=[Fel9gs4OtNEV6gUJZ5]], smtp_state=[helo=GP, messages_transferred=0, pending_messages=, mime_depth=3], socks=, ssh=, syslog=] 1254722770.692743 get_file_handle [0] tag: enum = Analyzer::ANALYZER_SMTP - [1] c: connection = [id=[orig_h=10.10.1.4, orig_p=1470/tcp, resp_h=74.53.140.153, resp_p=25/tcp], orig=[size=1610, state=4, num_pkts=9, num_bytes_ip=518, flow_label=0], resp=[size=462, state=4, num_pkts=10, num_bytes_ip=870, flow_label=0], start_time=1254722767.529046, duration=3.163697, service={^J^ISMTP^J}, addl=, hot=0, history=ShAdDa, uid=CjhGID4nQcgTWjvg4c, tunnel=, dpd=, conn=, extract_orig=F, extract_resp=F, dhcp=, dnp3=, dns=, dns_state=, ftp=, ftp_data_reuse=F, ssl=, http=, http_state=, irc=, modbus=, radius=, snmp=, smtp=[ts=1254722768.219663, uid=CjhGID4nQcgTWjvg4c, id=[orig_h=10.10.1.4, orig_p=1470/tcp, resp_h=74.53.140.153, resp_p=25/tcp], trans_depth=1, helo=GP, mailfrom=, rcptto={^J^I^J}, date=Mon, 5 Oct 2009 11:36:07 +0530, from="Gurpartap Singh" , to={^J^I^J}, reply_to=, msg_id=<000301ca4581$ef9e57f0$cedb07d0$@in>, in_reply_to=, subject=SMTP, x_originating_ip=, first_received=, second_received=, last_reply=354 Enter message, ending with "." on a line by itself, path=[74.53.140.153, 10.10.1.4], user_agent=Microsoft Office Outlook 12.0, tls=F, process_received_from=T, has_client_activity=T, entity=, fuids=[]], smtp_state=[helo=GP, messages_transferred=0, pending_messages=, mime_depth=3], socks=, ssh=, syslog=] + [1] c: connection = [id=[orig_h=10.10.1.4, orig_p=1470/tcp, resp_h=74.53.140.153, resp_p=25/tcp], orig=[size=1610, state=4, num_pkts=9, num_bytes_ip=518, flow_label=0], resp=[size=462, state=4, num_pkts=10, num_bytes_ip=870, flow_label=0], start_time=1254722767.529046, duration=3.163697, service={^J^ISMTP^J}, addl=, hot=0, history=ShAdDa, uid=CjhGID4nQcgTWjvg4c, tunnel=, dpd=, conn=, extract_orig=F, extract_resp=F, dhcp=, dnp3=, dns=, dns_state=, ftp=, ftp_data_reuse=F, ssl=, http=, http_state=, irc=, modbus=, radius=, snmp=, smtp=[ts=1254722768.219663, uid=CjhGID4nQcgTWjvg4c, id=[orig_h=10.10.1.4, orig_p=1470/tcp, resp_h=74.53.140.153, resp_p=25/tcp], trans_depth=1, helo=GP, mailfrom=, rcptto={^J^I^J}, date=Mon, 5 Oct 2009 11:36:07 +0530, from="Gurpartap Singh" , to={^J^I^J}, reply_to=, msg_id=<000301ca4581$ef9e57f0$cedb07d0$@in>, in_reply_to=, subject=SMTP, x_originating_ip=, first_received=, second_received=, last_reply=354 Enter message, ending with "." on a line by itself, path=[74.53.140.153, 10.10.1.4], user_agent=Microsoft Office Outlook 12.0, tls=F, process_received_from=T, has_client_activity=T, entity=, fuids=[Fel9gs4OtNEV6gUJZ5]], smtp_state=[helo=GP, messages_transferred=0, pending_messages=, mime_depth=3], socks=, ssh=, syslog=] [2] is_orig: bool = T 1254722770.692743 file_new @@ -313,7 +321,7 @@ [2] is_orig: bool = F 1254722770.692743 file_state_remove - [0] f: fa_file = [id=Fel9gs4OtNEV6gUJZ5, parent_id=, source=SMTP, is_orig=F, conns={^J^I[[orig_h=10.10.1.4, orig_p=1470/tcp, resp_h=74.53.140.153, resp_p=25/tcp]] = [id=[orig_h=10.10.1.4, orig_p=1470/tcp, resp_h=74.53.140.153, resp_p=25/tcp], orig=[size=1610, state=4, num_pkts=9, num_bytes_ip=518, flow_label=0], resp=[size=462, state=4, num_pkts=10, num_bytes_ip=870, flow_label=0], start_time=1254722767.529046, duration=3.163697, service={^J^I^ISMTP^J^I}, addl=, hot=0, history=ShAdDa, uid=CjhGID4nQcgTWjvg4c, tunnel=, dpd=, conn=, extract_orig=F, extract_resp=F, dhcp=, dnp3=, dns=, dns_state=, ftp=, ftp_data_reuse=F, ssl=, http=, http_state=, irc=, modbus=, radius=, snmp=, smtp=[ts=1254722768.219663, uid=CjhGID4nQcgTWjvg4c, id=[orig_h=10.10.1.4, orig_p=1470/tcp, resp_h=74.53.140.153, resp_p=25/tcp], trans_depth=1, helo=GP, mailfrom=, rcptto={^J^I^I^J^I}, date=Mon, 5 Oct 2009 11:36:07 +0530, from="Gurpartap Singh" , to={^J^I^I^J^I}, reply_to=, msg_id=<000301ca4581$ef9e57f0$cedb07d0$@in>, in_reply_to=, subject=SMTP, x_originating_ip=, first_received=, second_received=, last_reply=354 Enter message, ending with "." on a line by itself, path=[74.53.140.153, 10.10.1.4], user_agent=Microsoft Office Outlook 12.0, tls=F, process_received_from=T, has_client_activity=T, entity=, fuids=[Fel9gs4OtNEV6gUJZ5]], smtp_state=[helo=GP, messages_transferred=0, pending_messages=, mime_depth=3], socks=, ssh=, syslog=]^J}, last_active=1254722770.692743, seen_bytes=77, total_bytes=, missing_bytes=0, overflow_bytes=0, timeout_interval=2.0 mins, bof_buffer_size=1024, bof_buffer=Hello^M^J^M^J ^M^J^M^JI send u smtp pcap file ^M^J^M^JFind the attachment^M^J^M^J ^M^J^M^JGPS^M^J^M^J, mime_type=text/plain, mime_types=[[strength=-20, mime=text/plain]], info=[ts=1254722770.692743, fuid=Fel9gs4OtNEV6gUJZ5, tx_hosts={^J^I74.53.140.153^J}, rx_hosts={^J^I10.10.1.4^J}, conn_uids={^J^ICjhGID4nQcgTWjvg4c^J}, source=SMTP, depth=3, analyzers={^J^J}, mime_type=text/plain, filename=, duration=0 secs, local_orig=, is_orig=F, seen_bytes=0, total_bytes=, missing_bytes=0, overflow_bytes=0, timedout=F, parent_fuid=, md5=, sha1=, sha256=, x509=, extracted=], u2_events=] + [0] f: fa_file = [id=Fel9gs4OtNEV6gUJZ5, parent_id=, source=SMTP, is_orig=F, conns={^J^I[[orig_h=10.10.1.4, orig_p=1470/tcp, resp_h=74.53.140.153, resp_p=25/tcp]] = [id=[orig_h=10.10.1.4, orig_p=1470/tcp, resp_h=74.53.140.153, resp_p=25/tcp], orig=[size=1610, state=4, num_pkts=9, num_bytes_ip=518, flow_label=0], resp=[size=462, state=4, num_pkts=10, num_bytes_ip=870, flow_label=0], start_time=1254722767.529046, duration=3.163697, service={^J^I^ISMTP^J^I}, addl=, hot=0, history=ShAdDa, uid=CjhGID4nQcgTWjvg4c, tunnel=, dpd=, conn=, extract_orig=F, extract_resp=F, dhcp=, dnp3=, dns=, dns_state=, ftp=, ftp_data_reuse=F, ssl=, http=, http_state=, irc=, modbus=, radius=, snmp=, smtp=[ts=1254722768.219663, uid=CjhGID4nQcgTWjvg4c, id=[orig_h=10.10.1.4, orig_p=1470/tcp, resp_h=74.53.140.153, resp_p=25/tcp], trans_depth=1, helo=GP, mailfrom=, rcptto={^J^I^I^J^I}, date=Mon, 5 Oct 2009 11:36:07 +0530, from="Gurpartap Singh" , to={^J^I^I^J^I}, reply_to=, msg_id=<000301ca4581$ef9e57f0$cedb07d0$@in>, in_reply_to=, subject=SMTP, x_originating_ip=, first_received=, second_received=, last_reply=354 Enter message, ending with "." on a line by itself, path=[74.53.140.153, 10.10.1.4], user_agent=Microsoft Office Outlook 12.0, tls=F, process_received_from=T, has_client_activity=T, entity=, fuids=[Fel9gs4OtNEV6gUJZ5]], smtp_state=[helo=GP, messages_transferred=0, pending_messages=, mime_depth=3], socks=, ssh=, syslog=]^J}, last_active=1254722770.692743, seen_bytes=79, total_bytes=, missing_bytes=0, overflow_bytes=0, timeout_interval=2.0 mins, bof_buffer_size=20480, bof_buffer=Hello^M^J^M^J ^M^J^M^JI send u smtp pcap file ^M^J^M^JFind the attachment^M^J^M^J ^M^J^M^JGPS^M^J^M^J^M^J, info=[ts=1254722770.692743, fuid=Fel9gs4OtNEV6gUJZ5, tx_hosts={^J^I74.53.140.153^J}, rx_hosts={^J^I10.10.1.4^J}, conn_uids={^J^ICjhGID4nQcgTWjvg4c^J}, source=SMTP, depth=3, analyzers={^J^J}, mime_type=text/plain, filename=, duration=0 secs, local_orig=, is_orig=F, seen_bytes=79, total_bytes=, missing_bytes=0, overflow_bytes=0, timedout=F, parent_fuid=, md5=, sha1=, sha256=, x509=, extracted=], ftp=, http=, irc=, u2_events=] 1254722770.692743 get_file_handle [0] tag: enum = Analyzer::ANALYZER_SMTP @@ -336,14 +344,18 @@ [1] c: connection = [id=[orig_h=10.10.1.4, orig_p=1470/tcp, resp_h=74.53.140.153, resp_p=25/tcp], orig=[size=1610, state=4, num_pkts=9, num_bytes_ip=518, flow_label=0], resp=[size=462, state=4, num_pkts=10, num_bytes_ip=870, flow_label=0], start_time=1254722767.529046, duration=3.163697, service={^J^ISMTP^J}, addl=, hot=0, history=ShAdDa, uid=CjhGID4nQcgTWjvg4c, tunnel=, dpd=, conn=, extract_orig=F, extract_resp=F, dhcp=, dnp3=, dns=, dns_state=, ftp=, ftp_data_reuse=F, ssl=, http=, http_state=, irc=, modbus=, radius=, snmp=, smtp=[ts=1254722768.219663, uid=CjhGID4nQcgTWjvg4c, id=[orig_h=10.10.1.4, orig_p=1470/tcp, resp_h=74.53.140.153, resp_p=25/tcp], trans_depth=1, helo=GP, mailfrom=, rcptto={^J^I^J}, date=Mon, 5 Oct 2009 11:36:07 +0530, from="Gurpartap Singh" , to={^J^I^J}, reply_to=, msg_id=<000301ca4581$ef9e57f0$cedb07d0$@in>, in_reply_to=, subject=SMTP, x_originating_ip=, first_received=, second_received=, last_reply=354 Enter message, ending with "." on a line by itself, path=[74.53.140.153, 10.10.1.4], user_agent=Microsoft Office Outlook 12.0, tls=F, process_received_from=T, has_client_activity=T, entity=[filename=], fuids=[Fel9gs4OtNEV6gUJZ5]], smtp_state=[helo=GP, messages_transferred=0, pending_messages=, mime_depth=4], socks=, ssh=, syslog=] [2] is_orig: bool = F -1254722770.692786 file_new - [0] f: fa_file = [id=Ft4M3f2yMvLlmwtbq9, parent_id=, source=SMTP, is_orig=F, conns={^J^I[[orig_h=10.10.1.4, orig_p=1470/tcp, resp_h=74.53.140.153, resp_p=25/tcp]] = [id=[orig_h=10.10.1.4, orig_p=1470/tcp, resp_h=74.53.140.153, resp_p=25/tcp], orig=[size=1610, state=4, num_pkts=9, num_bytes_ip=518, flow_label=0], resp=[size=462, state=4, num_pkts=10, num_bytes_ip=870, flow_label=0], start_time=1254722767.529046, duration=3.163697, service={^J^I^ISMTP^J^I}, addl=, hot=0, history=ShAdDa, uid=CjhGID4nQcgTWjvg4c, tunnel=, dpd=, conn=, extract_orig=F, extract_resp=F, dhcp=, dnp3=, dns=, dns_state=, ftp=, ftp_data_reuse=F, ssl=, http=, http_state=, irc=, modbus=, radius=, snmp=, smtp=[ts=1254722768.219663, uid=CjhGID4nQcgTWjvg4c, id=[orig_h=10.10.1.4, orig_p=1470/tcp, resp_h=74.53.140.153, resp_p=25/tcp], trans_depth=1, helo=GP, mailfrom=, rcptto={^J^I^I^J^I}, date=Mon, 5 Oct 2009 11:36:07 +0530, from="Gurpartap Singh" , to={^J^I^I^J^I}, reply_to=, msg_id=<000301ca4581$ef9e57f0$cedb07d0$@in>, in_reply_to=, subject=SMTP, x_originating_ip=, first_received=, second_received=, last_reply=354 Enter message, ending with "." on a line by itself, path=[74.53.140.153, 10.10.1.4], user_agent=Microsoft Office Outlook 12.0, tls=F, process_received_from=T, has_client_activity=T, entity=[filename=], fuids=[Fel9gs4OtNEV6gUJZ5]], smtp_state=[helo=GP, messages_transferred=0, pending_messages=, mime_depth=4], socks=, ssh=, syslog=]^J}, last_active=1254722770.692786, seen_bytes=0, total_bytes=, missing_bytes=0, overflow_bytes=0, timeout_interval=2.0 mins, bof_buffer_size=1024, bof_buffer=^M^J^M^J^M^J^M^J^M^J