##! An interface for driving the analysis of files, possibly independent of ##! any network protocol over which they're transported. @load base/bif/file_analysis.bif @load base/frameworks/logging module FileAnalysis; export { redef enum Log::ID += { ## Logging stream for file analysis. LOG }; ## A structure which represents a desired type of file analysis. type AnalyzerArgs: record { ## The type of analysis. tag: FileAnalysis::Tag; ## The local filename to which to write an extracted file. Must be ## set when *tag* is :bro:see:`FileAnalysis::ANALYZER_EXTRACT`. extract_filename: string &optional; ## An event which will be generated for all new file contents, ## chunk-wise. Used when *tag* is ## :bro:see:`FileAnalysis::ANALYZER_DATA_EVENT`. chunk_event: event(f: fa_file, data: string, off: count) &optional; ## An event which will be generated for all new file contents, ## stream-wise. Used when *tag* is ## :bro:see:`FileAnalysis::ANALYZER_DATA_EVENT`. stream_event: event(f: fa_file, data: string) &optional; } &redef; ## Contains all metadata related to the analysis of a given file. ## For the most part, fields here are derived from ones of the same name ## in :bro:see:`fa_file`. type Info: record { ## An identifier associated with a single file. id: string &log; ## Identifier associated with a container file from which this one was ## extracted as part of the file analysis. parent_id: string &log &optional; ## An identification of the source of the file data. E.g. it may be ## a network protocol over which it was transferred, or a local file ## path which was read, or some other input source. source: string &log &optional; ## If the source of this file is is a network connection, this field ## may be set to indicate the directionality. is_orig: bool &log &optional; ## The time at which the last activity for the file was seen. last_active: time &log; ## Number of bytes provided to the file analysis engine for the file. seen_bytes: count &log &default=0; ## Total number of bytes that are supposed to comprise the full file. total_bytes: count &log &optional; ## The number of bytes in the file stream that were completely missed ## during the process of analysis e.g. due to dropped packets. missing_bytes: count &log &default=0; ## The number of not all-in-sequence bytes in the file stream that ## were delivered to file analyzers due to reassembly buffer overflow. overflow_bytes: count &log &default=0; ## The amount of time between receiving new data for this file that ## the analysis engine will wait before giving up on it. timeout_interval: interval &log &optional; ## The number of bytes at the beginning of a file to save for later ## inspection in *bof_buffer* field. bof_buffer_size: count &log &optional; ## A mime type provided by libmagic against the *bof_buffer*, or ## in the cases where no buffering of the beginning of file occurs, ## an initial guess of the mime type based on the first data seen. mime_type: string &log &optional; ## Whether the file analysis timed out at least once for the file. timedout: bool &log &default=F; ## Connection UIDS over which the file was transferred. conn_uids: set[string] &log; ## A set of analysis types done during the file analysis. analyzers: set[FileAnalysis::Tag]; ## Local filenames of extracted files. extracted_files: set[string] &log; ## An MD5 digest of the file contents. md5: string &log &optional; ## A SHA1 digest of the file contents. sha1: string &log &optional; ## A SHA256 digest of the file contents. sha256: string &log &optional; } &redef; ## A table that can be used to disable file analysis completely for ## any files transferred over given network protocol analyzers. const disable: table[Analyzer::Tag] of bool = table() &redef; ## Event that can be handled to access the Info record as it is sent on ## to the logging framework. global log_file_analysis: event(rec: Info); ## The salt concatenated to unique file handle strings generated by ## :bro:see:`get_file_handle` before hashing them in to a file id ## (the *id* field of :bro:see:`fa_file`). ## Provided to help mitigate the possiblility of manipulating parts of ## network connections that factor in to the file handle in order to ## generate two handles that would hash to the same file id. const salt = "I recommend changing this." &redef; ## Sets the *timeout_interval* field of :bro:see:`fa_file`, which is ## used to determine the length of inactivity that is allowed for a file ## before internal state related to it is cleaned up. When used within a ## :bro:see:`file_timeout` handler, the analysis will delay timing out ## again for the period specified by *t*. ## ## f: the file. ## ## t: the amount of time the file can remain inactive before discarding. ## ## Returns: true if the timeout interval was set, or false if analysis ## for the *id* isn't currently active. global set_timeout_interval: function(f: fa_file, t: interval): bool; ## Adds an analyzer to the analysis of a given file. ## ## f: the file. ## ## args: the analyzer type to add along with any arguments it takes. ## ## Returns: true if the analyzer will be added, or false if analysis ## for the *id* isn't currently active or the *args* ## were invalid for the analyzer type. global add_analyzer: function(f: fa_file, args: AnalyzerArgs): bool; ## Removes an analyzer from the analysis of a given file. ## ## f: the file. ## ## args: the analyzer (type and args) to remove. ## ## Returns: true if the analyzer will be removed, or false if analysis ## for the *id* isn't currently active. global remove_analyzer: function(f: fa_file, args: AnalyzerArgs): bool; ## Stops/ignores any further analysis of a given file. ## ## f: the file. ## ## Returns: true if analysis for the given file will be ignored for the ## rest of it's contents, or false if analysis for the *id* ## isn't currently active. global stop: function(f: fa_file): bool; } redef record fa_file += { info: Info &optional; }; function set_info(f: fa_file) { if ( ! f?$info ) { local tmp: Info; f$info = tmp; } f$info$id = f$id; if ( f?$parent_id ) f$info$parent_id = f$parent_id; if ( f?$source ) f$info$source = f$source; if ( f?$is_orig ) f$info$is_orig = f$is_orig; f$info$last_active = f$last_active; f$info$seen_bytes = f$seen_bytes; if ( f?$total_bytes ) f$info$total_bytes = f$total_bytes; f$info$missing_bytes = f$missing_bytes; f$info$overflow_bytes = f$overflow_bytes; f$info$timeout_interval = f$timeout_interval; f$info$bof_buffer_size = f$bof_buffer_size; if ( f?$mime_type ) f$info$mime_type = f$mime_type; if ( f?$conns ) for ( cid in f$conns ) add f$info$conn_uids[f$conns[cid]$uid]; } function set_timeout_interval(f: fa_file, t: interval): bool { return __set_timeout_interval(f$id, t); } function add_analyzer(f: fa_file, args: AnalyzerArgs): bool { if ( ! __add_analyzer(f$id, args) ) return F; set_info(f); add f$info$analyzers[args$tag]; if ( args$tag == FileAnalysis::ANALYZER_EXTRACT ) add f$info$extracted_files[args$extract_filename]; return T; } function remove_analyzer(f: fa_file, args: AnalyzerArgs): bool { return __remove_analyzer(f$id, args); } function stop(f: fa_file): bool { return __stop(f$id); } event bro_init() &priority=5 { Log::create_stream(FileAnalysis::LOG, [$columns=Info, $ev=log_file_analysis]); } event file_timeout(f: fa_file) &priority=5 { set_info(f); f$info$timedout = T; } event file_hash(f: fa_file, kind: string, hash: string) &priority=5 { set_info(f); switch ( kind ) { case "md5": f$info$md5 = hash; break; case "sha1": f$info$sha1 = hash; break; case "sha256": f$info$sha256 = hash; break; } } event file_state_remove(f: fa_file) &priority=5 { set_info(f); } event file_state_remove(f: fa_file) &priority=-5 { Log::write(FileAnalysis::LOG, f$info); }