##! An interface for driving the analysis of files, possibly independent of ##! any network protocol over which they're transported. @load base/file_analysis.bif @load base/frameworks/logging module FileAnalysis; export { redef enum Log::ID += { ## Logging stream for file analysis. LOG }; ## A structure which represents a desired file analysis action to take. type ActionArgs: record { ## The type of action. act: Action; ## The local filename to which to write an extracted file. Must be ## set when *act* is :bro:see:`FileAnalysis::ACTION_EXTRACT`. extract_filename: string &optional; ## An event which will be generated for all new file contents, ## chunk-wise. chunk_event: event(f: fa_file, data: string, off: count) &optional; ## An event which will be generated for all new file contents, ## stream-wise. stream_event: event(f: fa_file, data: string) &optional; } &redef; ## Contains all metadata related to the analysis of a given file. ## For the most part, fields here are derived from ones of the same name ## in :bro:see:`fa_file`. type Info: record { ## An identifier associated with a single file. id: string &log; ## Identifier associated with a container file from which this one was ## extracted as part of the file analysis. parent_id: string &log &optional; ## An identification of the source of the file data. E.g. it may be ## a network protocol over which it was transferred, or a local file ## path which was read, or some other input source. source: string &log &optional; ## The time at which the last activity for the file was seen. last_active: time &log; ## Number of bytes provided to the file analysis engine for the file. seen_bytes: count &log &default=0; ## Total number of bytes that are supposed to comprise the full file. total_bytes: count &log &optional; ## The number of bytes in the file stream that were completely missed ## during the process of analysis e.g. due to dropped packets. missing_bytes: count &log &default=0; ## The number of not all-in-sequence bytes in the file stream that ## were delivered to file actions/analyzers due to reassembly buffer ## overflow. overflow_bytes: count &log &default=0; ## The amount of time between receiving new data for this file that ## the analysis engine will wait before giving up on it. timeout_interval: interval &log &optional; ## The number of bytes at the beginning of a file to save for later ## inspection in *bof_buffer* field. bof_buffer_size: count &log &optional; ## A file type provided by libmagic against the *bof_buffer*, or ## in the cases where no buffering of the beginning of file occurs, ## an initial guess of the file type based on the first data seen. file_type: string &log &optional; ## A mime type provided by libmagic against the *bof_buffer*, or ## in the cases where no buffering of the beginning of file occurs, ## an initial guess of the mime type based on the first data seen. mime_type: string &log &optional; ## Whether the file analysis timed out at least once for the file. timedout: bool &log &default=F; ## Connection UIDS over which the file was transferred. conn_uids: set[string] &log; ## A set of action types taken during the file analysis. actions_taken: set[Action] &log; ## Local filenames of file extraction actions. extracted_files: set[string] &log; ## An MD5 digest of the file contents. md5: string &log &optional; ## A SHA1 digest of the file contents. sha1: string &log &optional; ## A SHA256 digest of the file contents. sha256: string &log &optional; } &redef; ## A table that can be used to disable file analysis completely for ## any files transferred over given network protocol analyzers. const disable: table[AnalyzerTag] of bool = table() &redef; ## Event that can be handled to access the Info record as it is sent on ## to the logging framework. global log_file_analysis: event(rec: Info); ## The salt concatenated to unique file handle strings generated by ## :bro:see:`get_file_handle` before hashing them in to a file id ## (the *id* field of :bro:see:`fa_file`). ## Provided to help mitigate the possiblility of manipulating parts of ## network connections that factor in to the file handle in order to ## generate two handles that would hash to the same file id. const salt = "I recommend changing this." &redef; ## Postpones the timeout of file analysis for a given file. ## When used within a :bro:see:`file_timeout` handler for, the analysis ## the analysis will delay timing out for the period of time indicated by ## the *timeout_interval* field of :bro:see:`fa_file`. ## ## f: the file. ## ## Returns: true if the timeout will be postponed, or false if analysis ## for the *id* isn't currently active. global postpone_timeout: function(f: fa_file): bool; ## Adds an action to the analysis of a given file. ## ## f: the file. ## ## args: the action type to add along with any arguments it takes. ## ## Returns: true if the action will be added, or false if analysis ## for the *id* isn't currently active or the *args* ## were invalid for the action type. global add_action: function(f: fa_file, args: ActionArgs): bool; ## Removes an action from the analysis of a given file. ## ## f: the file. ## ## args: the action (type and args) to remove. ## ## Returns: true if the action will be removed, or false if analysis ## for the *id* isn't currently active. global remove_action: function(f: fa_file, args: ActionArgs): bool; ## Stops/ignores any further analysis of a given file. ## ## f: the file. ## ## Returns: true if analysis for the given file will be ignored for the ## rest of it's contents, or false if analysis for the *id* ## isn't currently active. global stop: function(f: fa_file): bool; ## Sends a sequential stream of data in for file analysis. ## Meant for use when providing external file analysis input (e.g. ## from the input framework). ## ## source: a string that uniquely identifies the logical file that the ## data is a part of and describes its source. ## ## data: bytestring contents of the file to analyze. global data_stream: function(source: string, data: string); ## Sends a non-sequential chunk of data in for file analysis. ## Meant for use when providing external file analysis input (e.g. ## from the input framework). ## ## source: a string that uniquely identifies the logical file that the ## data is a part of and describes its source. ## ## data: bytestring contents of the file to analyze. ## ## offset: the offset within the file that this chunk starts. global data_chunk: function(source: string, data: string, offset: count); ## Signals a content gap in the file bytestream. ## Meant for use when providing external file analysis input (e.g. ## from the input framework). ## ## source: a string that uniquely identifies the logical file that the ## data is a part of and describes its source. ## ## offset: the offset within the file that this gap starts. ## ## len: the number of bytes that are missing. global gap: function(source: string, offset: count, len: count); ## Signals the total size of a file. ## Meant for use when providing external file analysis input (e.g. ## from the input framework). ## ## source: a string that uniquely identifies the logical file that the ## data is a part of and describes its source. ## ## size: the number of bytes that comprise the full file. global set_size: function(source: string, size: count); ## Signals the end of a file. ## Meant for use when providing external file analysis input (e.g. ## from the input framework). ## ## source: a string that uniquely identifies the logical file that the ## data is a part of and describes its source. global eof: function(source: string); } redef record fa_file += { info: Info &optional; }; function set_info(f: fa_file) { if ( ! f?$info ) { local tmp: Info; f$info = tmp; } f$info$id = f$id; if ( f?$parent_id ) f$info$parent_id = f$parent_id; if ( f?$source ) f$info$source = f$source; f$info$last_active = f$last_active; f$info$seen_bytes = f$seen_bytes; if ( f?$total_bytes ) f$info$total_bytes = f$total_bytes; f$info$missing_bytes = f$missing_bytes; f$info$overflow_bytes = f$overflow_bytes; f$info$timeout_interval = f$timeout_interval; f$info$bof_buffer_size = f$bof_buffer_size; if ( f?$file_type ) f$info$file_type = f$file_type; if ( f?$mime_type ) f$info$mime_type = f$mime_type; if ( f?$conns ) for ( cid in f$conns ) add f$info$conn_uids[f$conns[cid]$uid]; } function postpone_timeout(f: fa_file): bool { return __postpone_timeout(f$id); } function add_action(f: fa_file, args: ActionArgs): bool { if ( ! __add_action(f$id, args) ) return F; set_info(f); add f$info$actions_taken[args$act]; if ( args$act == FileAnalysis::ACTION_EXTRACT ) add f$info$extracted_files[args$extract_filename]; return T; } function remove_action(f: fa_file, args: ActionArgs): bool { return __remove_action(f$id, args); } function stop(f: fa_file): bool { return __stop(f$id); } function data_stream(source: string, data: string) { __data_stream(source, data); } function data_chunk(source: string, data: string, offset: count) { __data_chunk(source, data, offset); } function gap(source: string, offset: count, len: count) { __gap(source, offset, len); } function set_size(source: string, size: count) { __set_size(source, size); } function eof(source: string) { __eof(source); } event bro_init() &priority=5 { Log::create_stream(FileAnalysis::LOG, [$columns=Info, $ev=log_file_analysis]); } event file_timeout(f: fa_file) &priority=5 { set_info(f); f$info$timedout = T; } event file_hash(f: fa_file, kind: string, hash: string) &priority=5 { set_info(f); switch ( kind ) { case "md5": f$info$md5 = hash; break; case "sha1": f$info$sha1 = hash; break; case "sha256": f$info$sha256 = hash; break; } } event file_state_remove(f: fa_file) &priority=5 { set_info(f); } event file_state_remove(f: fa_file) &priority=-5 { Log::write(FileAnalysis::LOG, f$info); }