diff --git a/scripts/base/files/extract/__load__.bro b/scripts/base/files/extract/__load__.bro new file mode 100644 index 0000000000..d551be57d3 --- /dev/null +++ b/scripts/base/files/extract/__load__.bro @@ -0,0 +1 @@ +@load ./main \ No newline at end of file diff --git a/scripts/base/files/extract/main.bro b/scripts/base/files/extract/main.bro new file mode 100644 index 0000000000..70e61c8529 --- /dev/null +++ b/scripts/base/files/extract/main.bro @@ -0,0 +1,38 @@ +@load base/frameworks/files +@load base/utils/paths + +module FileExtract; + +export { + ## The prefix where files are extracted to. + const prefix = "./extract_files/" &redef; + + redef record Files::Info += { + ## Local filenames of extracted file. + extracted: string &optional &log; + }; + + redef record Files::AnalyzerArgs += { + ## The local filename to which to write an extracted file. + ## This field is used in the core by the extraction plugin + ## to know where to write the file to. It's also optional + extract_filename: string &optional; + }; +} + +function on_add(f: fa_file, args: Files::AnalyzerArgs) + { + if ( ! args?$extract_filename ) + args$extract_filename = cat("extract-", f$source, "-", f$id); + + f$info$extracted = args$extract_filename; + args$extract_filename = build_path_compressed(prefix, args$extract_filename); + } + +event bro_init() &priority=10 + { + Files::register_analyzer_add_callback(Files::ANALYZER_EXTRACT, on_add); + + # Create the extraction directory. + mkdir(prefix); + } \ No newline at end of file diff --git a/scripts/base/files/hash/main.bro b/scripts/base/files/hash/main.bro index cd50d6b291..926e39865a 100644 --- a/scripts/base/files/hash/main.bro +++ b/scripts/base/files/hash/main.bro @@ -1,13 +1,23 @@ +@load base/frameworks/files -module FilesHash; +module FileHash; export { - + redef record Files::Info += { + ## An MD5 digest of the file contents. + md5: string &log &optional; + + ## A SHA1 digest of the file contents. + sha1: string &log &optional; + + ## A SHA256 digest of the file contents. + sha256: string &log &optional; + }; + } event file_hash(f: fa_file, kind: string, hash: string) &priority=5 { - set_info(f); switch ( kind ) { case "md5": f$info$md5 = hash; diff --git a/scripts/base/frameworks/file-analysis/__load__.bro b/scripts/base/frameworks/files/__load__.bro similarity index 100% rename from scripts/base/frameworks/file-analysis/__load__.bro rename to scripts/base/frameworks/files/__load__.bro diff --git a/scripts/base/frameworks/file-analysis/main.bro b/scripts/base/frameworks/files/main.bro similarity index 52% rename from scripts/base/frameworks/file-analysis/main.bro rename to scripts/base/frameworks/files/main.bro index 7b1bd7d81c..1c0481a87c 100644 --- a/scripts/base/frameworks/file-analysis/main.bro +++ b/scripts/base/frameworks/files/main.bro @@ -3,8 +3,9 @@ @load base/file_analysis.bif @load base/frameworks/logging +@load base/utils/site -module FileAnalysis; +module Files; export { redef enum Log::ID += { @@ -14,21 +15,14 @@ export { ## A structure which represents a desired type of file analysis. type AnalyzerArgs: record { - ## The type of analysis. - tag: Analyzer; - - ## The local filename to which to write an extracted file. Must be - ## set when *tag* is :bro:see:`FileAnalysis::ANALYZER_EXTRACT`. - extract_filename: string &optional; - ## An event which will be generated for all new file contents, ## chunk-wise. Used when *tag* is - ## :bro:see:`FileAnalysis::ANALYZER_DATA_EVENT`. + ## :bro:see:`Files::ANALYZER_DATA_EVENT`. chunk_event: event(f: fa_file, data: string, off: count) &optional; ## An event which will be generated for all new file contents, ## stream-wise. Used when *tag* is - ## :bro:see:`FileAnalysis::ANALYZER_DATA_EVENT`. + ## :bro:see:`Files::ANALYZER_DATA_EVENT`. stream_event: event(f: fa_file, data: string) &optional; } &redef; @@ -40,23 +34,52 @@ export { ts: time &log; ## An identifier associated with a single file. - id: string &log; + fuid: string &log; - ## Identifier associated with a container file from which this one was - ## extracted as part of the file analysis. - parent_id: string &log &optional; + ## If this file was transferred over a network + ## connection this should show the host or hosts that + ## the data sourced from. + tx_hosts: set[addr] &log; + + ## If this file was transferred over a network + ## connection this should show the host or hosts that + ## the data traveled to. + rx_hosts: set[addr] &log; + + ## Connection UIDS over which the file was transferred. + conn_uids: set[string] &log; ## An identification of the source of the file data. E.g. it may be ## a network protocol over which it was transferred, or a local file ## path which was read, or some other input source. source: string &log &optional; - ## If the source of this file is is a network connection, this field - ## may be set to indicate the directionality. - is_orig: bool &log &optional; + ## A value to represent the depth of this file in relation + ## to its source. In SMTP, it is the depth of the MIME + ## attachment on the message. In HTTP, it is the depth of the + ## request within the TCP connection. + depth: count &default=0 &log; - ## The time at which the last activity for the file was seen. - last_active: time &log; + ## A set of analysis types done during the file analysis. + analyzers: set[Analyzer] &log; + + ## A mime type provided by libmagic against the *bof_buffer*, or + ## in the cases where no buffering of the beginning of file occurs, + ## an initial guess of the mime type based on the first data seen. + mime_type: string &log &optional; + + ## A filename for the file if one is available from the source + ## for the file. These will frequently come from + ## "Content-Disposition" headers in network protocols. + filename: string &log &optional; + + ## The duration the file was analyzed for. + duration: interval &log &default=0secs; + + ## If the source of this file is is a network connection, this field + ## indicates if the data originated from the local network or not as + ## determined by the configured bro:see:`Site::local_nets`. + local_orig: bool &log &optional; ## Number of bytes provided to the file analysis engine for the file. seen_bytes: count &log &default=0; @@ -72,49 +95,18 @@ export { ## were delivered to file analyzers due to reassembly buffer overflow. overflow_bytes: count &log &default=0; - ## The amount of time between receiving new data for this file that - ## the analysis engine will wait before giving up on it. - timeout_interval: interval &log &optional; - - ## The number of bytes at the beginning of a file to save for later - ## inspection in *bof_buffer* field. - bof_buffer_size: count &log &optional; - - ## A mime type provided by libmagic against the *bof_buffer*, or - ## in the cases where no buffering of the beginning of file occurs, - ## an initial guess of the mime type based on the first data seen. - mime_type: string &log &optional; - ## Whether the file analysis timed out at least once for the file. timedout: bool &log &default=F; - ## Connection UIDS over which the file was transferred. - conn_uids: set[string] &log; - - ## A set of analysis types done during the file analysis. - analyzers: set[Analyzer]; - - ## Local filenames of extracted files. - extracted_files: set[string] &log; - - ## An MD5 digest of the file contents. - md5: string &log &optional; - - ## A SHA1 digest of the file contents. - sha1: string &log &optional; - - ## A SHA256 digest of the file contents. - sha256: string &log &optional; + ## Identifier associated with a container file from which this one was + ## extracted as part of the file analysis. + parent_fuid: string &log &optional; } &redef; ## A table that can be used to disable file analysis completely for ## any files transferred over given network protocol analyzers. const disable: table[AnalyzerTag] of bool = table() &redef; - ## Event that can be handled to access the Info record as it is sent on - ## to the logging framework. - global log_file_analysis: event(rec: Info); - ## The salt concatenated to unique file handle strings generated by ## :bro:see:`get_file_handle` before hashing them in to a file id ## (the *id* field of :bro:see:`fa_file`). @@ -146,7 +138,9 @@ export { ## Returns: true if the analyzer will be added, or false if analysis ## for the *id* isn't currently active or the *args* ## were invalid for the analyzer type. - global add_analyzer: function(f: fa_file, args: AnalyzerArgs): bool; + global add_analyzer: function(f: fa_file, + tag: Files::Analyzer, + args: AnalyzerArgs &default=AnalyzerArgs()): bool; ## Removes an analyzer from the analysis of a given file. ## @@ -156,7 +150,7 @@ export { ## ## Returns: true if the analyzer will be removed, or false if analysis ## for the *id* isn't currently active. - global remove_analyzer: function(f: fa_file, args: AnalyzerArgs): bool; + global remove_analyzer: function(f: fa_file, tag: Files::Analyzer, args: AnalyzerArgs): bool; ## Stops/ignores any further analysis of a given file. ## @@ -166,45 +160,75 @@ export { ## rest of it's contents, or false if analysis for the *id* ## isn't currently active. global stop: function(f: fa_file): bool; + + ## Register callbacks for protocols that work with the Files framework. + ## The callbacks must uniquely identify a file and each protocol can + ## only have a single callback registered for it. + ## + ## tag: Tag for the protocol analyzer having a callback being registered. + ## + ## callback: Function that can generate a file handle for the protocol analyzer + ## defined previously. + ## + ## Returns: true if the protocol being registered was not previously registered. + global register_protocol: function(tag: AnalyzerTag, callback: function(c: connection, is_orig: bool): string): bool; + + ## Register a callback for file analyzers to use if they need to do some manipulation + ## when they are being added to a file before the core code takes over. This is + ## unlikely to be interesting for users and should only be called by file analyzer + ## authors but it *not required*. + ## + ## tag: Tag for the file analyzer. + ## + ## callback: Function to execute when the given file analyzer is being added. + global register_analyzer_add_callback: function(tag: Files::Analyzer, callback: function(f: fa_file, args: AnalyzerArgs)); + + ## Event that can be handled to access the Info record as it is sent on + ## to the logging framework. + global log_files: event(rec: Info); } redef record fa_file += { info: Info &optional; }; +redef record AnalyzerArgs += { + # This is used interally for the core file analyzer api. + tag: Files::Analyzer &optional; +}; + +# Store the callbacks for protocol analyzers that have files. +global registered_protocols: table[AnalyzerTag] of function(c: connection, is_orig: bool): string = table() + &default=function(c: connection, is_orig: bool): string { return cat(c$uid, is_orig); }; + +global analyzer_add_callbacks: table[Files::Analyzer] of function(f: fa_file, args: AnalyzerArgs) = table(); + +event bro_init() &priority=5 + { + Log::create_stream(Files::LOG, [$columns=Info, $ev=log_files]); + } + function set_info(f: fa_file) { if ( ! f?$info ) { - local tmp: Info = Info($ts=network_time()); + local tmp: Info = Info($ts=f$last_active, + $fuid=f$id); f$info = tmp; } - f$info$ts = network_time(); - f$info$id = f$id; if ( f?$parent_id ) - f$info$parent_id = f$parent_id; + f$info$parent_fuid = f$parent_id; if ( f?$source ) f$info$source = f$source; - if ( f?$is_orig ) - f$info$is_orig = f$is_orig; - f$info$last_active = f$last_active; + f$info$duration = f$last_active - f$info$ts; f$info$seen_bytes = f$seen_bytes; if ( f?$total_bytes ) f$info$total_bytes = f$total_bytes; f$info$missing_bytes = f$missing_bytes; f$info$overflow_bytes = f$overflow_bytes; - f$info$timeout_interval = f$timeout_interval; - f$info$bof_buffer_size = f$bof_buffer_size; if ( f?$mime_type ) f$info$mime_type = f$mime_type; - if ( f?$conns ) - { - for ( cid in f$conns ) - { - add f$info$conn_uids[f$conns[cid]$uid]; - } - } } function set_timeout_interval(f: fa_file, t: interval): bool @@ -212,21 +236,31 @@ function set_timeout_interval(f: fa_file, t: interval): bool return __set_timeout_interval(f$id, t); } -function add_analyzer(f: fa_file, args: AnalyzerArgs): bool +function add_analyzer(f: fa_file, tag: Analyzer, args: AnalyzerArgs): bool { - if ( ! __add_analyzer(f$id, args) ) return F; + # This is to construct the correct args for the core API. + args$tag = tag; + add f$info$analyzers[tag]; - set_info(f); - add f$info$analyzers[args$tag]; - - if ( args$tag == FileAnalysis::ANALYZER_EXTRACT ) - add f$info$extracted_files[args$extract_filename]; + if ( tag in analyzer_add_callbacks ) + analyzer_add_callbacks[tag](f, args); + if ( ! __add_analyzer(f$id, args) ) + { + Reporter::warning(fmt("Analyzer %s not added successfully to file %s.", tag, f$id)); + return F; + } return T; } -function remove_analyzer(f: fa_file, args: AnalyzerArgs): bool +function register_analyzer_add_callback(tag: Files::Analyzer, callback: function(f: fa_file, args: AnalyzerArgs)) { + analyzer_add_callbacks[tag] = callback; + } + +function remove_analyzer(f: fa_file, tag: Files::Analyzer, args: AnalyzerArgs): bool + { + args$tag = tag; return __remove_analyzer(f$id, args); } @@ -235,25 +269,48 @@ function stop(f: fa_file): bool return __stop(f$id); } -event bro_init() &priority=5 +event file_new(f: fa_file) &priority=10 { - Log::create_stream(FileAnalysis::LOG, - [$columns=Info, $ev=log_file_analysis]); + set_info(f); } -event file_timeout(f: fa_file) &priority=5 +event file_over_new_connection(f: fa_file, c: connection) &priority=10 + { + set_info(f); + add f$info$conn_uids[c$uid]; + local cid = c$id; + add f$info$tx_hosts[f$is_orig ? cid$orig_h : cid$resp_h]; + if( |Site::local_nets| > 0 ) + f$info$local_orig=Site::is_local_addr(f$is_orig ? cid$orig_h : cid$resp_h); + + add f$info$rx_hosts[f$is_orig ? cid$resp_h : cid$orig_h]; + } + +event file_timeout(f: fa_file) &priority=10 { set_info(f); f$info$timedout = T; } - -event file_state_remove(f: fa_file) &priority=5 +event file_state_remove(f: fa_file) &priority=10 { set_info(f); } -event file_state_remove(f: fa_file) &priority=-5 +event file_state_remove(f: fa_file) &priority=-10 { - Log::write(FileAnalysis::LOG, f$info); + Log::write(Files::LOG, f$info); + } + +function register_protocol(tag: AnalyzerTag, callback: function(c: connection, is_orig: bool): string): bool + { + local result = (tag !in registered_protocols); + registered_protocols[tag] = callback; + return result; + } + +event get_file_handle(tag: AnalyzerTag, c: connection, is_orig: bool) &priority=5 + { + local handler = registered_protocols[tag]; + set_file_handle(handler(c, is_orig)); } diff --git a/scripts/base/init-bare.bro b/scripts/base/init-bare.bro index c4245d9052..4e1a5248c8 100644 --- a/scripts/base/init-bare.bro +++ b/scripts/base/init-bare.bro @@ -339,7 +339,7 @@ type fa_file: record { ## An identification of the source of the file data. E.g. it may be ## a network protocol over which it was transferred, or a local file ## path which was read, or some other input source. - source: string &optional; + source: string; ## If the source of this file is is a network connection, this field ## may be set to indicate the directionality. @@ -3101,4 +3101,4 @@ const snaplen = 8192 &redef; @load base/frameworks/input -@load base/frameworks/file-analysis +@load base/frameworks/files diff --git a/scripts/base/init-default.bro b/scripts/base/init-default.bro index 03ba474e0b..719842af09 100644 --- a/scripts/base/init-default.bro +++ b/scripts/base/init-default.bro @@ -47,5 +47,6 @@ @load base/protocols/syslog @load base/files/hash +@load base/files/extract @load base/misc/find-checksum-offloading diff --git a/scripts/base/protocols/ftp/__load__.bro b/scripts/base/protocols/ftp/__load__.bro index 464571dc7d..9c839610ac 100644 --- a/scripts/base/protocols/ftp/__load__.bro +++ b/scripts/base/protocols/ftp/__load__.bro @@ -1,5 +1,4 @@ @load ./utils-commands @load ./main @load ./file-analysis -@load ./file-extract @load ./gridftp diff --git a/scripts/base/protocols/ftp/file-analysis.bro b/scripts/base/protocols/ftp/file-analysis.bro index f8fa2d816b..3710a44cee 100644 --- a/scripts/base/protocols/ftp/file-analysis.bro +++ b/scripts/base/protocols/ftp/file-analysis.bro @@ -1,6 +1,6 @@ @load ./main @load base/utils/conn-ids -@load base/frameworks/file-analysis/main +@load base/frameworks/files module FTP; @@ -9,40 +9,15 @@ export { global get_file_handle: function(c: connection, is_orig: bool): string; } -function get_handle_string(c: connection): string - { - return cat(ANALYZER_FTP_DATA, " ", c$start_time, " ", id_string(c$id)); - } - function get_file_handle(c: connection, is_orig: bool): string { - if ( [c$id$resp_h, c$id$resp_p] !in ftp_data_expected ) return ""; + if ( [c$id$resp_h, c$id$resp_p] !in ftp_data_expected ) + return ""; - local info: FTP::Info = ftp_data_expected[c$id$resp_h, c$id$resp_p]; - - if ( info$passive ) - # FTP client initiates data channel. - if ( is_orig ) - # Don't care about FTP client data. - return ""; - else - # Do care about FTP server data. - return get_handle_string(c); - else - # FTP server initiates dta channel. - if ( is_orig ) - # Do care about FTP server data. - return get_handle_string(c); - else - # Don't care about FTP client data. - return ""; + return cat(ANALYZER_FTP_DATA, c$start_time, c$id, is_orig); } -module GLOBAL; - -event get_file_handle(tag: AnalyzerTag, c: connection, is_orig: bool) - &priority=5 +event bro_init() &priority=5 { - if ( tag != ANALYZER_FTP_DATA ) return; - set_file_handle(FTP::get_file_handle(c, is_orig)); + Files::register_protocol(ANALYZER_FTP_DATA, FTP::get_file_handle); } diff --git a/scripts/base/protocols/ftp/file-extract.bro b/scripts/base/protocols/ftp/file-extract.bro deleted file mode 100644 index 2b7bb8cd50..0000000000 --- a/scripts/base/protocols/ftp/file-extract.bro +++ /dev/null @@ -1,90 +0,0 @@ -##! File extraction support for FTP. - -@load ./main -@load base/utils/files - -module FTP; - -export { - ## Pattern of file mime types to extract from FTP transfers. - const extract_file_types = /NO_DEFAULT/ &redef; - - ## The on-disk prefix for files to be extracted from FTP-data transfers. - const extraction_prefix = "ftp-item" &redef; -} - -redef record Info += { - ## On disk file where it was extracted to. - extraction_file: string &log &optional; - - ## Indicates if the current command/response pair should attempt to - ## extract the file if a file was transferred. - extract_file: bool &default=F; -}; - -function get_extraction_name(f: fa_file): string - { - local r = fmt("%s-%s.dat", extraction_prefix, f$id); - return r; - } - -event file_new(f: fa_file) &priority=5 - { - if ( ! f?$source ) return; - if ( f$source != "FTP_DATA" ) return; - - if ( f?$mime_type && extract_file_types in f$mime_type ) - { - FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT, - $extract_filename=get_extraction_name(f)]); - return; - } - - if ( ! f?$conns ) return; - - for ( cid in f$conns ) - { - local c: connection = f$conns[cid]; - - if ( [cid$resp_h, cid$resp_p] !in ftp_data_expected ) next; - - local s = ftp_data_expected[cid$resp_h, cid$resp_p]; - - if ( ! s$extract_file ) next; - - FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT, - $extract_filename=get_extraction_name(f)]); - return; - } - } - -event file_state_remove(f: fa_file) &priority=4 - { - if ( ! f?$source ) return; - if ( f$source != "FTP_DATA" ) return; - if ( ! f?$info ) return; - - for ( filename in f$info$extracted_files ) - { - local s: FTP::Info; - s$ts = network_time(); - s$tags = set(); - s$user = ""; - s$extraction_file = filename; - - if ( f?$conns ) - for ( cid in f$conns ) - { - s$uid = f$conns[cid]$uid; - s$id = cid; - } - - Log::write(FTP::LOG, s); - } - } - -event log_ftp(rec: Info) &priority=-10 - { - delete rec$extraction_file; - delete rec$extract_file; - } diff --git a/scripts/base/protocols/http/__load__.bro b/scripts/base/protocols/http/__load__.bro index 58618dedc7..585b815eed 100644 --- a/scripts/base/protocols/http/__load__.bro +++ b/scripts/base/protocols/http/__load__.bro @@ -1,6 +1,6 @@ @load ./main @load ./utils @load ./file-analysis -@load ./file-ident -@load ./file-hash -@load ./file-extract +#@load ./file-ident +#@load ./file-hash +#@load ./file-extract diff --git a/scripts/base/protocols/http/file-analysis.bro b/scripts/base/protocols/http/file-analysis.bro index 769bb509f5..b79ca041b8 100644 --- a/scripts/base/protocols/http/file-analysis.bro +++ b/scripts/base/protocols/http/file-analysis.bro @@ -1,53 +1,58 @@ @load ./main @load ./utils @load base/utils/conn-ids -@load base/frameworks/file-analysis/main +@load base/frameworks/files module HTTP; export { - redef record HTTP::Info += { - ## Number of MIME entities in the HTTP request message body so far. - request_mime_level: count &default=0; - ## Number of MIME entities in the HTTP response message body so far. - response_mime_level: count &default=0; + redef record Info += { + ## The sniffed mime type of the data being sent by the client. + client_mime_type: string &log &optional; + + ## The sniffed mime type of the data being returned by the server. + mime_type: string &log &optional; }; ## Default file handle provider for HTTP. global get_file_handle: function(c: connection, is_orig: bool): string; } -event http_begin_entity(c: connection, is_orig: bool) &priority=5 - { - if ( ! c?$http ) return; - - if ( is_orig ) - ++c$http$request_mime_level; - else - ++c$http$response_mime_level; - } - function get_file_handle(c: connection, is_orig: bool): string { - if ( ! c?$http ) return ""; - - local mime_level: count = - is_orig ? c$http$request_mime_level : c$http$response_mime_level; - local mime_level_str: string = mime_level > 1 ? cat(mime_level) : ""; + if ( ! c?$http ) + return ""; + local mime_depth = is_orig ? c$http$orig_mime_depth : c$http$resp_mime_depth; if ( c$http$range_request ) - return cat(ANALYZER_HTTP, " ", is_orig, " ", c$id$orig_h, " ", - build_url(c$http)); - - return cat(ANALYZER_HTTP, " ", c$start_time, " ", is_orig, " ", - c$http$trans_depth, mime_level_str, " ", id_string(c$id)); + { + return cat(ANALYZER_HTTP, is_orig, c$id$orig_h, mime_depth, build_url(c$http)); + } + else + { + return cat(ANALYZER_HTTP, c$start_time, is_orig, + c$http$trans_depth, mime_depth, id_string(c$id)); + } } -module GLOBAL; - -event get_file_handle(tag: AnalyzerTag, c: connection, is_orig: bool) - &priority=5 +event bro_init() &priority=5 { - if ( tag != ANALYZER_HTTP ) return; - set_file_handle(HTTP::get_file_handle(c, is_orig)); + Files::register_protocol(ANALYZER_HTTP, HTTP::get_file_handle); } + +event file_over_new_connection(f: fa_file, c: connection) &priority=5 + { + if ( c?$http ) + { + #if (!f?$mime_type) + # print f; +# + #if ( f$is_orig ) + # c$http$client_mime_type = f$mime_type; + #else + # c$http$mime_type = f$mime_type; + + if ( c$http?$filename ) + f$info$filename = c$http$filename; + } + } \ No newline at end of file diff --git a/scripts/base/protocols/http/file-extract.bro b/scripts/base/protocols/http/file-extract.bro deleted file mode 100644 index a8c6039395..0000000000 --- a/scripts/base/protocols/http/file-extract.bro +++ /dev/null @@ -1,100 +0,0 @@ -##! Extracts the items from HTTP traffic, one per file. At this time only -##! the message body from the server can be extracted with this script. - -@load ./main -@load ./file-analysis - -module HTTP; - -export { - ## Pattern of file mime types to extract from HTTP response entity bodies. - const extract_file_types = /NO_DEFAULT/ &redef; - - ## The on-disk prefix for files to be extracted from HTTP entity bodies. - const extraction_prefix = "http-item" &redef; - - redef record Info += { - ## On-disk location where files in request body were extracted. - extracted_request_files: vector of string &log &optional; - - ## On-disk location where files in response body were extracted. - extracted_response_files: vector of string &log &optional; - - ## Indicates if the response body is to be extracted or not. Must be - ## set before or by the first :bro:see:`file_new` for the file content. - extract_file: bool &default=F; - }; -} - -function get_extraction_name(f: fa_file): string - { - local r = fmt("%s-%s.dat", extraction_prefix, f$id); - return r; - } - -function add_extraction_file(c: connection, is_orig: bool, fn: string) - { - if ( is_orig ) - { - if ( ! c$http?$extracted_request_files ) - c$http$extracted_request_files = vector(); - c$http$extracted_request_files[|c$http$extracted_request_files|] = fn; - } - else - { - if ( ! c$http?$extracted_response_files ) - c$http$extracted_response_files = vector(); - c$http$extracted_response_files[|c$http$extracted_response_files|] = fn; - } - } - -event file_new(f: fa_file) &priority=5 - { - if ( ! f?$source ) return; - if ( f$source != "HTTP" ) return; - if ( ! f?$conns ) return; - - local fname: string; - local c: connection; - - if ( f?$mime_type && extract_file_types in f$mime_type ) - { - fname = get_extraction_name(f); - FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT, - $extract_filename=fname]); - - for ( cid in f$conns ) - { - c = f$conns[cid]; - if ( ! c?$http ) next; - add_extraction_file(c, f$is_orig, fname); - } - - return; - } - - local extracting: bool = F; - - for ( cid in f$conns ) - { - c = f$conns[cid]; - - if ( ! c?$http ) next; - - if ( ! c$http$extract_file ) next; - - fname = get_extraction_name(f); - FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT, - $extract_filename=fname]); - extracting = T; - break; - } - - if ( extracting ) - for ( cid in f$conns ) - { - c = f$conns[cid]; - if ( ! c?$http ) next; - add_extraction_file(c, f$is_orig, fname); - } - } diff --git a/scripts/base/protocols/http/file-hash.bro b/scripts/base/protocols/http/file-hash.bro deleted file mode 100644 index 34d91e45bb..0000000000 --- a/scripts/base/protocols/http/file-hash.bro +++ /dev/null @@ -1,68 +0,0 @@ -##! Calculate hashes for HTTP body transfers. - -@load ./main -@load ./file-analysis - -module HTTP; - -export { - redef record Info += { - ## MD5 sum for a file transferred over HTTP calculated from the - ## response body. - md5: string &log &optional; - - ## This value can be set per-transfer to determine per request - ## if a file should have an MD5 sum generated. It must be - ## set to T at the time of or before the first chunk of body data. - calc_md5: bool &default=F; - }; - - ## Generate MD5 sums for these filetypes. - const generate_md5 = /application\/x-dosexec/ # Windows and DOS executables - | /application\/x-executable/ # *NIX executable binary - &redef; -} - -event file_new(f: fa_file) &priority=5 - { - if ( ! f?$source ) return; - if ( f$source != "HTTP" ) return; - - if ( f?$mime_type && generate_md5 in f$mime_type ) - { - FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]); - return; - } - - if ( ! f?$conns ) return; - - for ( cid in f$conns ) - { - local c: connection = f$conns[cid]; - - if ( ! c?$http ) next; - - if ( ! c$http$calc_md5 ) next; - - FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]); - return; - } - } - -event file_state_remove(f: fa_file) &priority=4 - { - if ( ! f?$source ) return; - if ( f$source != "HTTP" ) return; - if ( ! f?$conns ) return; - if ( ! f?$info ) return; - if ( ! f$info?$md5 ) return; - - for ( cid in f$conns ) - { - local c: connection = f$conns[cid]; - - if ( ! c?$http ) next; - - c$http$md5 = f$info$md5; - } - } diff --git a/scripts/base/protocols/http/file-ident.bro b/scripts/base/protocols/http/file-ident.bro deleted file mode 100644 index 7ed4b58a37..0000000000 --- a/scripts/base/protocols/http/file-ident.bro +++ /dev/null @@ -1,105 +0,0 @@ -##! Identification of file types in HTTP response bodies with file content sniffing. - -@load base/frameworks/notice -@load ./main -@load ./utils -@load ./file-analysis - -module HTTP; - -export { - redef enum Notice::Type += { - ## Indicates when the file extension doesn't seem to match the file - ## contents. - Incorrect_File_Type, - }; - - redef record Info += { - ## Mime type of response body identified by content sniffing. - mime_type: string &log &optional; - }; - - ## Mapping between mime type strings (without character set) and - ## regular expressions for URLs. - ## The :bro:enum:`HTTP::Incorrect_File_Type` notice is generated if the - ## pattern doesn't match the mime type that was discovered. - const mime_types_extensions: table[string] of pattern = { - ["application/x-dosexec"] = /\.([eE][xX][eE]|[dD][lL][lL])/, - } &redef; - - ## A pattern for filtering out :bro:enum:`HTTP::Incorrect_File_Type` urls - ## that are not noteworthy before a notice is created. Each - ## pattern added should match the complete URL (the matched URLs include - ## "http://" at the beginning). - const ignored_incorrect_file_type_urls = /^$/ &redef; -} - -event file_new(f: fa_file) &priority=5 - { - if ( ! f?$source ) return; - if ( f$source != "HTTP" ) return; - if ( ! f?$mime_type ) return; - if ( ! f?$conns ) return; - - for ( cid in f$conns ) - { - local c: connection = f$conns[cid]; - - if ( ! c?$http ) next; - - c$http$mime_type = f$mime_type; - - local mime_str: string = c$http$mime_type; - - if ( mime_str !in mime_types_extensions ) next; - if ( ! c$http?$uri ) next; - if ( mime_types_extensions[mime_str] in c$http$uri ) next; - - local url = build_url_http(c$http); - - if ( url == ignored_incorrect_file_type_urls ) next; - - local message = fmt("%s %s %s", mime_str, c$http$method, url); - NOTICE([$note=Incorrect_File_Type, - $msg=message, - $conn=c]); - } - } - -event file_over_new_connection(f: fa_file, c: connection) &priority=5 - { - if ( ! f?$source ) return; - if ( f$source != "HTTP" ) return; - if ( ! f?$mime_type ) return; - if ( ! c?$http ) return; - - # Spread the mime around (e.g. for partial content, file_type event only - # happens once for the first connection, but if there's subsequent - # connections to transfer the same file, they'll be lacking the mime_type - # field if we don't do this). - c$http$mime_type = f$mime_type; - } - -# Tracks byte-range request / partial content response mime types, indexed -# by [connection, uri] pairs. This is needed because a person can pipeline -# byte-range requests over multiple connections to the same uri. Without -# the tracking, only the first request in the pipeline for each connection -# would get a mime_type field assigned to it (by the FileAnalysis policy hooks). -global partial_types: table[conn_id, string] of string &read_expire=5mins; - -# Priority 4 so that it runs before the handler that will write to http.log. -event http_message_done(c: connection, is_orig: bool, stat: http_message_stat) - &priority=4 - { - if ( ! c$http$range_request ) return; - if ( ! c$http?$uri ) return; - - if ( c$http?$mime_type ) - { - partial_types[c$id, c$http$uri] = c$http$mime_type; - return; - } - - if ( [c$id, c$http$uri] in partial_types ) - c$http$mime_type = partial_types[c$id, c$http$uri]; - } diff --git a/scripts/base/protocols/http/main.bro b/scripts/base/protocols/http/main.bro index a1771c8e77..ebf412d36e 100644 --- a/scripts/base/protocols/http/main.bro +++ b/scripts/base/protocols/http/main.bro @@ -71,10 +71,14 @@ export { ## All of the headers that may indicate if the request was proxied. proxied: set[string] &log &optional; - + ## Indicates if this request can assume 206 partial content in ## response. - range_request: bool &default=F; + range_request: bool &default=F; + ## Number of MIME entities in the HTTP request message body so far. + orig_mime_depth: count &default=0; + ## Number of MIME entities in the HTTP response message body so far. + resp_mime_depth: count &default=0; }; ## Structure to maintain state for an HTTP connection with multiple @@ -283,6 +287,16 @@ event http_header(c: connection, is_orig: bool, name: string, value: string) &pr } } +event http_begin_entity(c: connection, is_orig: bool) &priority=5 + { + set_state(c, F, is_orig); + + if ( is_orig ) + ++c$http$orig_mime_depth; + else + ++c$http$resp_mime_depth; + } + event http_message_done(c: connection, is_orig: bool, stat: http_message_stat) &priority = 5 { set_state(c, F, is_orig); diff --git a/scripts/base/protocols/irc/__load__.bro b/scripts/base/protocols/irc/__load__.bro index 5123385b0c..d20550c54f 100644 --- a/scripts/base/protocols/irc/__load__.bro +++ b/scripts/base/protocols/irc/__load__.bro @@ -1,3 +1,3 @@ @load ./main -@load ./dcc-send +#@load ./dcc-send @load ./file-analysis diff --git a/scripts/base/protocols/irc/dcc-send.bro b/scripts/base/protocols/irc/dcc-send.bro index 53381d0302..afe01485a2 100644 --- a/scripts/base/protocols/irc/dcc-send.bro +++ b/scripts/base/protocols/irc/dcc-send.bro @@ -15,12 +15,6 @@ module IRC; export { - ## Pattern of file mime types to extract from IRC DCC file transfers. - const extract_file_types = /NO_DEFAULT/ &redef; - - ## On-disk prefix for files to be extracted from IRC DCC file transfers. - const extraction_prefix = "irc-dcc-item" &redef; - redef record Info += { ## DCC filename requested. dcc_file_name: string &log &optional; @@ -28,101 +22,10 @@ export { dcc_file_size: count &log &optional; ## Sniffed mime type of the file. dcc_mime_type: string &log &optional; - - ## The file handle for the file to be extracted - extraction_file: string &log &optional; - - ## A boolean to indicate if the current file transfer should be extracted. - extract_file: bool &default=F; }; } -global dcc_expected_transfers: table[addr, port] of Info &read_expire=5mins; - -function set_dcc_mime(f: fa_file) - { - if ( ! f?$conns ) return; - - for ( cid in f$conns ) - { - local c: connection = f$conns[cid]; - - if ( [cid$resp_h, cid$resp_p] !in dcc_expected_transfers ) next; - - local s = dcc_expected_transfers[cid$resp_h, cid$resp_p]; - - s$dcc_mime_type = f$mime_type; - } - } - -function set_dcc_extraction_file(f: fa_file, filename: string) - { - if ( ! f?$conns ) return; - - for ( cid in f$conns ) - { - local c: connection = f$conns[cid]; - - if ( [cid$resp_h, cid$resp_p] !in dcc_expected_transfers ) next; - - local s = dcc_expected_transfers[cid$resp_h, cid$resp_p]; - - s$extraction_file = filename; - } - } - -function get_extraction_name(f: fa_file): string - { - local r = fmt("%s-%s.dat", extraction_prefix, f$id); - return r; - } - -# this handler sets the IRC::Info mime type -event file_new(f: fa_file) &priority=5 - { - if ( ! f?$source ) return; - if ( f$source != "IRC_DATA" ) return; - if ( ! f?$mime_type ) return; - - set_dcc_mime(f); - } - -# this handler check if file extraction is desired -event file_new(f: fa_file) &priority=5 - { - if ( ! f?$source ) return; - if ( f$source != "IRC_DATA" ) return; - - local fname: string; - - if ( f?$mime_type && extract_file_types in f$mime_type ) - { - fname = get_extraction_name(f); - FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT, - $extract_filename=fname]); - set_dcc_extraction_file(f, fname); - return; - } - - if ( ! f?$conns ) return; - - for ( cid in f$conns ) - { - local c: connection = f$conns[cid]; - - if ( [cid$resp_h, cid$resp_p] !in dcc_expected_transfers ) next; - - local s = dcc_expected_transfers[cid$resp_h, cid$resp_p]; - - if ( ! s$extract_file ) next; - - fname = get_extraction_name(f); - FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT, - $extract_filename=fname]); - s$extraction_file = fname; - return; - } - } +global dcc_expected_transfers: table[addr, port] of Info &synchronized &read_expire=5mins; function log_dcc(f: fa_file) { @@ -143,22 +46,17 @@ function log_dcc(f: fa_file) # Delete these values in case another DCC transfer # happens during the IRC session. - delete irc$extract_file; - delete irc$extraction_file; delete irc$dcc_file_name; delete irc$dcc_file_size; delete irc$dcc_mime_type; - return; } } event file_new(f: fa_file) &priority=-5 { - if ( ! f?$source ) return; - if ( f$source != "IRC_DATA" ) return; - - log_dcc(f); + if ( f?$source && f$source == "IRC_DATA" ) + log_dcc(f); } event irc_dcc_message(c: connection, is_orig: bool, diff --git a/scripts/base/protocols/irc/file-analysis.bro b/scripts/base/protocols/irc/file-analysis.bro index 5159064b27..f2e84fbc22 100644 --- a/scripts/base/protocols/irc/file-analysis.bro +++ b/scripts/base/protocols/irc/file-analysis.bro @@ -1,6 +1,6 @@ -@load ./dcc-send.bro +@load ./dcc-send @load base/utils/conn-ids -@load base/frameworks/file-analysis/main +@load base/frameworks/files module IRC; @@ -11,15 +11,13 @@ export { function get_file_handle(c: connection, is_orig: bool): string { - if ( is_orig ) return ""; - return cat(ANALYZER_IRC_DATA, " ", c$start_time, " ", id_string(c$id)); + if ( [c$id$resp_h, c$id$resp_p] !in dcc_expected_transfers ) + return ""; + + return cat(ANALYZER_IRC_DATA, c$start_time, c$id, is_orig); } -module GLOBAL; - -event get_file_handle(tag: AnalyzerTag, c: connection, is_orig: bool) - &priority=5 +event bro_init() &priority=5 { - if ( tag != ANALYZER_IRC_DATA ) return; - set_file_handle(IRC::get_file_handle(c, is_orig)); + Files::register_protocol(ANALYZER_IRC_DATA, IRC::get_file_handle); } diff --git a/scripts/base/protocols/smtp/__load__.bro b/scripts/base/protocols/smtp/__load__.bro index bac9cc118f..1e913d8dff 100644 --- a/scripts/base/protocols/smtp/__load__.bro +++ b/scripts/base/protocols/smtp/__load__.bro @@ -1,4 +1,4 @@ @load ./main @load ./entities -@load ./entities-excerpt +#@load ./entities-excerpt @load ./file-analysis diff --git a/scripts/base/protocols/smtp/entities.bro b/scripts/base/protocols/smtp/entities.bro index b58766e51d..dcb53dc0aa 100644 --- a/scripts/base/protocols/smtp/entities.bro +++ b/scripts/base/protocols/smtp/entities.bro @@ -1,5 +1,6 @@ ##! Analysis and logging for MIME entities found in SMTP sessions. +@load base/frameworks/files @load base/utils/strings @load base/utils/files @load ./main @@ -7,217 +8,56 @@ module SMTP; export { - redef enum Log::ID += { ENTITIES_LOG }; - - type EntityInfo: record { - ## This is the timestamp of when the MIME content transfer began. - ts: time &log; - uid: string &log; - id: conn_id &log; - ## A count to represent the depth of this message transaction in a - ## single connection where multiple messages were transferred. - trans_depth: count &log; - ## The filename seen in the Content-Disposition header. - filename: string &log &optional; - ## Track how many bytes of the MIME encoded file have been seen. - content_len: count &log &default=0; - ## The mime type of the entity discovered through magic bytes identification. - mime_type: string &log &optional; - - ## The calculated MD5 sum for the MIME entity. - md5: string &log &optional; - ## Optionally calculate the file's MD5 sum. Must be set prior to the - ## first data chunk being see in an event. - calc_md5: bool &default=F; - - ## Optionally write the file to disk. Must be set prior to first - ## data chunk being seen in an event. - extract_file: bool &default=F; - ## Store the file handle here for the file currently being extracted. - extraction_file: string &log &optional; + type Entity: record { + filename: string &optional; }; redef record Info += { - ## The in-progress entity information. - current_entity: EntityInfo &optional; + ## The current entity being seen. + entity: Entity &optional; }; redef record State += { - ## Track the number of MIME encoded files transferred during a session. - mime_level: count &default=0; + ## Track the number of MIME encoded files transferred + ## during a session. + mime_depth: count &default=0; }; - - ## Generate MD5 sums for these filetypes. - const generate_md5 = /application\/x-dosexec/ # Windows and DOS executables - | /application\/x-executable/ # *NIX executable binary - &redef; - - ## Pattern of file mime types to extract from MIME bodies. - const extract_file_types = /NO_DEFAULT/ &redef; - - ## The on-disk prefix for files to be extracted from MIME entity bodies. - const extraction_prefix = "smtp-entity" &redef; - - ## If set, never generate MD5s. This is mainly for testing purposes to create - ## reproducable output in the case that the decision whether to create - ## checksums depends on environment specifics. - const never_calc_md5 = F &redef; - - global log_mime: event(rec: EntityInfo); } -event bro_init() &priority=5 - { - Log::create_stream(SMTP::ENTITIES_LOG, [$columns=EntityInfo, $ev=log_mime]); - } - -function set_session(c: connection, new_entity: bool) - { - if ( ! c$smtp?$current_entity || new_entity ) - { - local info: EntityInfo; - info$ts=network_time(); - info$uid=c$uid; - info$id=c$id; - info$trans_depth=c$smtp$trans_depth; - - c$smtp$current_entity = info; - ++c$smtp_state$mime_level; - } - } - -function get_extraction_name(f: fa_file): string - { - local r = fmt("%s-%s.dat", extraction_prefix, f$id); - return r; - } - event mime_begin_entity(c: connection) &priority=10 { - if ( ! c?$smtp ) return; + #print fmt("%s : begin entity", c$uid); - set_session(c, T); + c$smtp$entity = Entity(); + ++c$smtp_state$mime_depth; } -event file_new(f: fa_file) &priority=5 +event file_over_new_connection(f: fa_file, c: connection) &priority=5 { - if ( ! f?$source ) return; - if ( f$source != "SMTP" ) return; - if ( ! f?$conns ) return; - - local fname: string; - local extracting: bool = F; - - for ( cid in f$conns ) - { - local c: connection = f$conns[cid]; - - if ( ! c?$smtp ) next; - if ( ! c$smtp?$current_entity ) next; - - if ( c$smtp$current_entity$extract_file ) - { - if ( ! extracting ) - { - fname = get_extraction_name(f); - FileAnalysis::add_analyzer(f, - [$tag=FileAnalysis::ANALYZER_EXTRACT, - $extract_filename=fname]); - extracting = T; - } - - c$smtp$current_entity$extraction_file = fname; - } - - if ( c$smtp$current_entity$calc_md5 ) - FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]); - } - } - -function check_extract_by_type(f: fa_file) - { - if ( extract_file_types !in f$mime_type ) return; - - if ( f?$info && FileAnalysis::ANALYZER_EXTRACT in f$info$analyzers ) + if ( f$source != "SMTP" ) return; - local fname: string = get_extraction_name(f); - FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT, - $extract_filename=fname]); - - if ( ! f?$conns ) return; - - for ( cid in f$conns ) - { - local c: connection = f$conns[cid]; - if ( ! c?$smtp ) next; - c$smtp$current_entity$extraction_file = fname; - } + if ( c$smtp$entity?$filename ) + f$info$filename = c$smtp$entity$filename; + f$info$depth = c$smtp_state$mime_depth; } -function check_md5_by_type(f: fa_file) +event mime_one_header(c: connection, h: mime_header_rec) &priority=5 { - if ( never_calc_md5 ) return; - if ( generate_md5 !in f$mime_type ) return; - - FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]); - } - -event file_new(f: fa_file) &priority=5 - { - if ( ! f?$source ) return; - if ( f$source != "SMTP" ) return; - if ( ! f?$mime_type ) return; - - if ( f?$conns ) - for ( cid in f$conns ) - { - local c: connection = f$conns[cid]; - - if ( ! c?$smtp ) next; - if ( ! c$smtp?$current_entity ) next; - - c$smtp$current_entity$mime_type = f$mime_type; - } - - check_extract_by_type(f); - check_md5_by_type(f); - } - -event file_state_remove(f: fa_file) &priority=4 - { - if ( ! f?$source ) return; - if ( f$source != "SMTP" ) return; - if ( ! f?$conns ) return; - - for ( cid in f$conns ) - { - local c: connection = f$conns[cid]; - - if ( ! c?$smtp ) next; - if ( ! c$smtp?$current_entity ) next; - # Only log if there was some content. - if ( f$seen_bytes == 0 ) next; - - if ( f?$info && f$info?$md5 ) - c$smtp$current_entity$md5 = f$info$md5; - - c$smtp$current_entity$content_len = f$seen_bytes; - Log::write(SMTP::ENTITIES_LOG, c$smtp$current_entity); - delete c$smtp$current_entity; + if ( ! c?$smtp ) return; - } - } -event mime_one_header(c: connection, h: mime_header_rec) - { - if ( ! c?$smtp ) return; - if ( h$name == "CONTENT-DISPOSITION" && /[fF][iI][lL][eE][nN][aA][mM][eE]/ in h$value ) - c$smtp$current_entity$filename = extract_filename_from_content_disposition(h$value); + c$smtp$entity$filename = extract_filename_from_content_disposition(h$value); if ( h$name == "CONTENT-TYPE" && /[nN][aA][mM][eE][:blank:]*=/ in h$value ) - c$smtp$current_entity$filename = extract_filename_from_content_disposition(h$value); + c$smtp$entity$filename = extract_filename_from_content_disposition(h$value); + } + +event mime_end_entity(c: connection) &priority=5 + { + if ( c?$smtp && c$smtp?$entity ) + delete c$smtp$entity; } diff --git a/scripts/base/protocols/smtp/file-analysis.bro b/scripts/base/protocols/smtp/file-analysis.bro index b893cbef7d..44938c8698 100644 --- a/scripts/base/protocols/smtp/file-analysis.bro +++ b/scripts/base/protocols/smtp/file-analysis.bro @@ -1,7 +1,7 @@ @load ./main @load ./entities @load base/utils/conn-ids -@load base/frameworks/file-analysis/main +@load base/frameworks/files module SMTP; @@ -12,16 +12,11 @@ export { function get_file_handle(c: connection, is_orig: bool): string { - if ( ! c?$smtp ) return ""; - return cat(ANALYZER_SMTP, " ", c$start_time, " ", c$smtp$trans_depth, " ", - c$smtp_state$mime_level); + return cat(ANALYZER_SMTP, c$start_time, c$smtp$trans_depth, + c$smtp_state$mime_depth); } -module GLOBAL; - -event get_file_handle(tag: AnalyzerTag, c: connection, is_orig: bool) - &priority=5 +event bro_init() &priority=5 { - if ( tag != ANALYZER_SMTP ) return; - set_file_handle(SMTP::get_file_handle(c, is_orig)); + Files::register_protocol(ANALYZER_SMTP, SMTP::get_file_handle); } diff --git a/scripts/policy/frameworks/files/detect-MHR.bro b/scripts/policy/frameworks/files/detect-MHR.bro new file mode 100644 index 0000000000..c896bd56fd --- /dev/null +++ b/scripts/policy/frameworks/files/detect-MHR.bro @@ -0,0 +1,63 @@ +##! Detect file downloads that have hash values matching files in Team +##! Cymru's Malware Hash Registry (http://www.team-cymru.org/Services/MHR/). + +@load base/frameworks/files +@load base/frameworks/notice +@load frameworks/files/hash-all-files + +module MalwareHashRegistery; + +export { + redef enum Notice::Type += { + ## The hash value of a file transferred over HTTP matched in the + ## malware hash registry. + Match + }; + + redef record Files::Info += { + ## Team Cymru Malware Hash Registry date of first detection. + mhr_first_detected: time &log &optional; + ## Team Cymru Malware Hash Registry percent of detection + ## among malware scanners. + mhr_detect_rate: count &log &optional; + }; + + ## File types to attempt matching against the Malware Hash Registry. + const match_file_types = /^application\/x-dosexec/ &redef; + + ## The malware hash registry runs each malware sample through several A/V engines. + ## Team Cymru returns a percentage to indicate how many A/V engines flagged the + ## sample as malicious. This threshold allows you to require a minimum detection + ## rate. + const notice_threshold = 10 &redef; +} + +event file_hash(f: fa_file, kind: string, hash: string) + { + if ( kind=="sha1" && match_file_types in f$mime_type ) + { + local hash_domain = fmt("%s.malware.hash.cymru.com", hash); + when ( local MHR_result = lookup_hostname_txt(hash_domain) ) + { + # Data is returned as " " + local MHR_answer = split1(MHR_result, / /); + if ( |MHR_answer| == 2 ) + { + f$info$mhr_first_detected = double_to_time(to_double(MHR_answer[1])); + f$info$mhr_detect_rate = to_count(MHR_answer[2]); + + #print strftime("%Y-%m-%d %H:%M:%S", f$info$mhr_first_detected); + if ( f$info$mhr_detect_rate >= notice_threshold ) + { + local url = ""; + # TODO: Create a generic mechanism for creating file "urls". + #if ( f$source == "HTTP" ) + # url = HTTP::build_url_http(f); + local message = fmt("%s %s", hash, url); + #local message = fmt("Host(s) %s sent a file with SHA1 hash %s to host %s", f$src_host, hash, f$dst_host); + NOTICE([$note=Match, $msg=message]); + } + } + } + } + } diff --git a/scripts/policy/frameworks/files/hash-all-files.bro b/scripts/policy/frameworks/files/hash-all-files.bro new file mode 100644 index 0000000000..931857c2bc --- /dev/null +++ b/scripts/policy/frameworks/files/hash-all-files.bro @@ -0,0 +1,7 @@ +# Perform MD5 and SHA1 hashing on all files. + +event file_new(f: fa_file) + { + Files::add_analyzer(f, Files::ANALYZER_MD5); + Files::add_analyzer(f, Files::ANALYZER_SHA1); + } diff --git a/scripts/policy/frameworks/intel/smtp-url-extraction.bro b/scripts/policy/frameworks/intel/smtp-url-extraction.bro index 2b87f809a6..b4ab32a915 100644 --- a/scripts/policy/frameworks/intel/smtp-url-extraction.bro +++ b/scripts/policy/frameworks/intel/smtp-url-extraction.bro @@ -26,6 +26,6 @@ event file_new(f: fa_file) &priority=5 if ( ! f?$source ) return; if ( f$source != "SMTP" ) return; - FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_DATA_EVENT, + Files::add_analyzer(f, [$tag=Files::ANALYZER_DATA_EVENT, $stream_event=intel_mime_data]); } diff --git a/scripts/base/protocols/smtp/entities-excerpt.bro b/scripts/policy/protocols/smtp/entities-excerpt.bro similarity index 100% rename from scripts/base/protocols/smtp/entities-excerpt.bro rename to scripts/policy/protocols/smtp/entities-excerpt.bro diff --git a/scripts/site/local.bro b/scripts/site/local.bro index dfebd9923a..e4b3a44e7a 100644 --- a/scripts/site/local.bro +++ b/scripts/site/local.bro @@ -64,7 +64,14 @@ # Detect logins using "interesting" hostnames. @load protocols/ssh/interesting-hostnames -# Detect MD5 sums in Team Cymru's Malware Hash Registry. -@load protocols/http/detect-MHR # Detect SQL injection attacks. @load protocols/http/detect-sqli + +#### Network File Handling #### + +# Enable MD5 and SHA1 hashing for all files. +@load frameworks/files/hash-all-files + +# Detect SHA1 sums in Team Cymru's Malware Hash Registry. +@load frameworks/files/detect-MHR + diff --git a/src/const.bif b/src/const.bif index 31e6ccee1a..10dceda6ff 100644 --- a/src/const.bif +++ b/src/const.bif @@ -24,4 +24,4 @@ const Tunnel::ip_tunnel_timeout: interval; const Threading::heartbeat_interval: interval; -const FileAnalysis::salt: string; +const Files::salt: string; diff --git a/src/event.bif b/src/event.bif index 5b14c05933..23ebc0591b 100644 --- a/src/event.bif +++ b/src/event.bif @@ -7001,7 +7001,7 @@ event event_queue_flush_point%(%); event get_file_handle%(tag: count, c: connection, is_orig: bool%); ## Indicates that an analysis of a new file has begun. The analysis can be -## augmented at this time via :bro:see:`FileAnalysis::add_analyzer`. +## augmented at this time via :bro:see:`Files::add_analyzer`. ## ## f: The file. ## @@ -7024,8 +7024,8 @@ event file_over_new_connection%(f: fa_file, c: connection%); ## f: The file. ## ## .. bro:see:: file_new file_over_new_connection file_gap file_state_remove -## default_file_timeout_interval FileAnalysis::set_timeout_interval -## FileAnalysis::set_timeout_interval +## default_file_timeout_interval Files::set_timeout_interval +## Files::set_timeout_interval event file_timeout%(f: fa_file%); ## Indicates that a chunk of the file is missing. @@ -7055,8 +7055,8 @@ event file_state_remove%(f: fa_file%); ## ## hash: The result of the hashing. ## -## .. bro:see:: FileAnalysis::add_analyzer FileAnalysis::ANALYZER_MD5 -## FileAnalysis::ANALYZER_SHA1 FileAnalysis::ANALYZER_SHA256 +## .. bro:see:: Files::add_analyzer Files::ANALYZER_MD5 +## Files::ANALYZER_SHA1 Files::ANALYZER_SHA256 event file_hash%(f: fa_file, kind: string, hash: string%); ## Deprecated. Will be removed. diff --git a/src/file_analysis.bif b/src/file_analysis.bif index ef46ccf9c1..648c031221 100644 --- a/src/file_analysis.bif +++ b/src/file_analysis.bif @@ -1,6 +1,6 @@ ##! Internal functions and types used by the logging framework. -module FileAnalysis; +module Files; %%{ #include "file_analysis/Manager.h" @@ -27,35 +27,35 @@ enum Analyzer %{ ANALYZER_DATA_EVENT, %} -## :bro:see:`FileAnalysis::set_timeout_interval`. -function FileAnalysis::__set_timeout_interval%(file_id: string, t: interval%): bool +## :bro:see:`Files::set_timeout_interval`. +function Files::__set_timeout_interval%(file_id: string, t: interval%): bool %{ bool result = file_mgr->SetTimeoutInterval(file_id->CheckString(), t); return new Val(result, TYPE_BOOL); %} -## :bro:see:`FileAnalysis::add_analyzer`. -function FileAnalysis::__add_analyzer%(file_id: string, args: any%): bool +## :bro:see:`Files::add_analyzer`. +function Files::__add_analyzer%(file_id: string, args: any%): bool %{ - using BifType::Record::FileAnalysis::AnalyzerArgs; + using BifType::Record::Files::AnalyzerArgs; RecordVal* rv = args->AsRecordVal()->CoerceTo(AnalyzerArgs); bool result = file_mgr->AddAnalyzer(file_id->CheckString(), rv); Unref(rv); return new Val(result, TYPE_BOOL); %} -## :bro:see:`FileAnalysis::remove_analyzer`. -function FileAnalysis::__remove_analyzer%(file_id: string, args: any%): bool +## :bro:see:`Files::remove_analyzer`. +function Files::__remove_analyzer%(file_id: string, args: any%): bool %{ - using BifType::Record::FileAnalysis::AnalyzerArgs; + using BifType::Record::Files::AnalyzerArgs; RecordVal* rv = args->AsRecordVal()->CoerceTo(AnalyzerArgs); bool result = file_mgr->RemoveAnalyzer(file_id->CheckString(), rv); Unref(rv); return new Val(result, TYPE_BOOL); %} -## :bro:see:`FileAnalysis::stop`. -function FileAnalysis::__stop%(file_id: string%): bool +## :bro:see:`Files::stop`. +function Files::__stop%(file_id: string%): bool %{ bool result = file_mgr->IgnoreFile(file_id->CheckString()); return new Val(result, TYPE_BOOL); diff --git a/src/file_analysis/Analyzer.h b/src/file_analysis/Analyzer.h index d32532b264..c348ab358b 100644 --- a/src/file_analysis/Analyzer.h +++ b/src/file_analysis/Analyzer.h @@ -8,7 +8,7 @@ namespace file_analysis { -typedef BifEnum::FileAnalysis::Analyzer FA_Tag; +typedef BifEnum::Files::Analyzer FA_Tag; class File; @@ -93,7 +93,7 @@ public: */ static FA_Tag ArgsTag(const RecordVal* args) { - using BifType::Record::FileAnalysis::AnalyzerArgs; + using BifType::Record::Files::AnalyzerArgs; return static_cast( args->Lookup(AnalyzerArgs->FieldOffset("tag"))->AsEnum()); } diff --git a/src/file_analysis/AnalyzerSet.cc b/src/file_analysis/AnalyzerSet.cc index 83c60d9abe..d10e78d338 100644 --- a/src/file_analysis/AnalyzerSet.cc +++ b/src/file_analysis/AnalyzerSet.cc @@ -26,7 +26,7 @@ static void analyzer_del_func(void* v) AnalyzerSet::AnalyzerSet(File* arg_file) : file(arg_file) { TypeList* t = new TypeList(); - t->Append(BifType::Record::FileAnalysis::AnalyzerArgs->Ref()); + t->Append(BifType::Record::Files::AnalyzerArgs->Ref()); analyzer_hash = new CompositeHash(t); Unref(t); analyzer_map.SetDeleteFunc(analyzer_del_func); diff --git a/src/file_analysis/DataEvent.cc b/src/file_analysis/DataEvent.cc index 159c8c19cd..1b04111c44 100644 --- a/src/file_analysis/DataEvent.cc +++ b/src/file_analysis/DataEvent.cc @@ -17,7 +17,7 @@ DataEvent::DataEvent(RecordVal* args, File* file, file_analysis::Analyzer* DataEvent::Instantiate(RecordVal* args, File* file) { - using BifType::Record::FileAnalysis::AnalyzerArgs; + using BifType::Record::Files::AnalyzerArgs; int chunk_off = AnalyzerArgs->FieldOffset("chunk_event"); int stream_off = AnalyzerArgs->FieldOffset("stream_event"); diff --git a/src/file_analysis/Extract.cc b/src/file_analysis/Extract.cc index cbe176d4ca..ef37425003 100644 --- a/src/file_analysis/Extract.cc +++ b/src/file_analysis/Extract.cc @@ -29,7 +29,7 @@ Extract::~Extract() file_analysis::Analyzer* Extract::Instantiate(RecordVal* args, File* file) { - using BifType::Record::FileAnalysis::AnalyzerArgs; + using BifType::Record::Files::AnalyzerArgs; Val* v = args->Lookup(AnalyzerArgs->FieldOffset("extract_filename")); if ( ! v ) diff --git a/src/file_analysis/Manager.cc b/src/file_analysis/Manager.cc index b247f23efc..61f9f7a10d 100644 --- a/src/file_analysis/Manager.cc +++ b/src/file_analysis/Manager.cc @@ -38,7 +38,7 @@ string Manager::HashHandle(const string& handle) const static string salt; if ( salt.empty() ) - salt = BifConst::FileAnalysis::salt->CheckString(); + salt = BifConst::Files::salt->CheckString(); char tmp[20]; uint64 hash[2]; @@ -310,7 +310,7 @@ void Manager::GetFileHandle(AnalyzerTag::Tag tag, Connection* c, bool is_orig) bool Manager::IsDisabled(AnalyzerTag::Tag tag) { if ( ! disabled ) - disabled = internal_const_val("FileAnalysis::disable")->AsTableVal(); + disabled = internal_const_val("Files::disable")->AsTableVal(); Val* index = new Val(tag, TYPE_COUNT); Val* yield = disabled->Lookup(index); diff --git a/testing/btest/scripts/base/frameworks/file-analysis/bifs/remove_action.bro b/testing/btest/scripts/base/frameworks/file-analysis/bifs/remove_action.bro index 1f15a4221f..e31abe5ea3 100644 --- a/testing/btest/scripts/base/frameworks/file-analysis/bifs/remove_action.bro +++ b/testing/btest/scripts/base/frameworks/file-analysis/bifs/remove_action.bro @@ -11,8 +11,8 @@ redef test_get_file_name = function(f: fa_file): string event file_new(f: fa_file) &priority=-10 { for ( tag in test_file_analyzers ) - FileAnalysis::remove_analyzer(f, tag); + Files::remove_analyzer(f, tag); local filename = test_get_file_name(f); - FileAnalysis::remove_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT, + Files::remove_analyzer(f, [$tag=Files::ANALYZER_EXTRACT, $extract_filename=filename]); } diff --git a/testing/btest/scripts/base/frameworks/file-analysis/bifs/set_timeout_interval.bro b/testing/btest/scripts/base/frameworks/file-analysis/bifs/set_timeout_interval.bro index 8ec4704cdb..c9eac4c31d 100644 --- a/testing/btest/scripts/base/frameworks/file-analysis/bifs/set_timeout_interval.bro +++ b/testing/btest/scripts/base/frameworks/file-analysis/bifs/set_timeout_interval.bro @@ -20,7 +20,7 @@ redef default_file_timeout_interval = 2sec; event file_timeout(f: fa_file) { if ( timeout_cnt < 1 ) - FileAnalysis::set_timeout_interval(f, f$timeout_interval); + Files::set_timeout_interval(f, f$timeout_interval); else terminate(); ++timeout_cnt; diff --git a/testing/btest/scripts/base/frameworks/file-analysis/bifs/stop.bro b/testing/btest/scripts/base/frameworks/file-analysis/bifs/stop.bro index e994706010..dd40c69684 100644 --- a/testing/btest/scripts/base/frameworks/file-analysis/bifs/stop.bro +++ b/testing/btest/scripts/base/frameworks/file-analysis/bifs/stop.bro @@ -4,5 +4,5 @@ event file_new(f: fa_file) { - FileAnalysis::stop(f); + Files::stop(f); } diff --git a/testing/scripts/file-analysis-test.bro b/testing/scripts/file-analysis-test.bro index 15929dd4f6..cb1027d8f1 100644 --- a/testing/scripts/file-analysis-test.bro +++ b/testing/scripts/file-analysis-test.bro @@ -1,7 +1,7 @@ global test_file_analysis_source: string = "" &redef; -global test_file_analyzers: set[FileAnalysis::AnalyzerArgs]; +global test_file_analyzers: set[Files::AnalyzerArgs]; global test_get_file_name: function(f: fa_file): string = function(f: fa_file): string { return ""; } &redef; @@ -30,13 +30,13 @@ event file_new(f: fa_file) f$source == test_file_analysis_source ) { for ( tag in test_file_analyzers ) - FileAnalysis::add_analyzer(f, tag); + Files::add_analyzer(f, tag); local filename: string = test_get_file_name(f); if ( filename != "" ) - FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT, + Files::add_analyzer(f, [$tag=Files::ANALYZER_EXTRACT, $extract_filename=filename]); - FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_DATA_EVENT, + Files::add_analyzer(f, [$tag=Files::ANALYZER_DATA_EVENT, $chunk_event=file_chunk, $stream_event=file_stream]); } @@ -94,7 +94,7 @@ event file_state_remove(f: fa_file) event bro_init() { - add test_file_analyzers[[$tag=FileAnalysis::ANALYZER_MD5]]; - add test_file_analyzers[[$tag=FileAnalysis::ANALYZER_SHA1]]; - add test_file_analyzers[[$tag=FileAnalysis::ANALYZER_SHA256]]; + add test_file_analyzers[[$tag=Files::ANALYZER_MD5]]; + add test_file_analyzers[[$tag=Files::ANALYZER_SHA1]]; + add test_file_analyzers[[$tag=Files::ANALYZER_SHA256]]; }