diff --git a/scripts/base/frameworks/file-analysis/main.bro b/scripts/base/frameworks/file-analysis/main.bro index 772bb35159..0fbffcef9f 100644 --- a/scripts/base/frameworks/file-analysis/main.bro +++ b/scripts/base/frameworks/file-analysis/main.bro @@ -1,8 +1,7 @@ -##! TODO add some comments here +##! An interface for driving the analysis of files, possibly independent of +##! any network protocol over which they're transported. @load base/file_analysis.bif - -# TODO: do logging here? @load base/frameworks/logging module FileAnalysis; @@ -13,10 +12,6 @@ export { LOG }; - ## The default buffer size used to reassemble files. - # TODO: what's a reasonable default? - const default_reassembly_buffer_size: count = 1024*1024 &redef; - ## The default buffer size used for storing the beginning of files. const default_bof_buffer_size: count = 1024 &redef; @@ -24,29 +19,32 @@ export { ## before giving up. const default_timeout_interval: interval = 2 mins &redef; - # Needed a forward declaration for event parameters... - type Info: record {}; - + ## A structure which represents a desired file analysis action to take. type ActionArgs: record { + ## The type of action. act: Action; + ## The local filename to which to write an extracted file. Must be + ## set when *act* is :bro:see:`FileAnalysis::ACTION_EXTRACT`. extract_filename: string &optional; - chunk_event: event(info: Info, data: string, off: count) &optional; - stream_event: event(info: Info, data: string) &optional; }; + ## A structure which contains the results of certain file analysis actions. type ActionResults: record { + ## An MD5 digest of the file contents. md5: string &optional; + ## An SHA1 digest of the file contents. sha1: string &optional; + ## An SHA256 digest of the file contents. sha256: string &optional; }; - ## Contains all metadata related to the analysis of a given file, some - ## of which is logged. + ## Contains all metadata related to the analysis of a given file. type Info: record { - ## Unique identifier associated with a single file. + ## An identifier associated with a single file. file_id: string &log; - ## Unique identifier associated with the file if it was extracted - ## from a container file as part of the analysis. + + ## Identifier associated with a container file from which this one was + ## extracted as part of the file analysis. parent_file_id: string &log &optional; ## An identification of the source of the file data. E.g. it may be @@ -62,18 +60,17 @@ export { ## Number of bytes provided to the file analysis engine for the file. seen_bytes: count &log &default=0; - ## Total number of bytes that are supposed to comprise the file content. + + ## Total number of bytes that are supposed to comprise the full file. total_bytes: count &log &optional; ## The number of bytes in the file stream that were completely missed ## during the process of analysis e.g. due to dropped packets. - ## analysis that had to be discarded due to a reassembly buffer size - ## of *reassembly_buffer_size* being filled. missing_bytes: count &log &default=0; ## The number of not all-in-sequence bytes in the file stream that ## were delivered to file actions/analyzers due to reassembly buffer - ## size of *reassembly_buffer_size* being filled. + ## overflow. overflow_bytes: count &log &default=0; ## The amount of time between receiving new data for this file that @@ -81,60 +78,237 @@ export { timeout_interval: interval &log &default=default_timeout_interval; ## The number of bytes at the beginning of a file to save for later - ## inspection in *bof_buffer* field of - ## :bro:see:`FileAnalysis::ActionResults`. + ## inspection in *bof_buffer* field. bof_buffer_size: count &log &default=default_bof_buffer_size; ## The content of the beginning of a file up to *bof_buffer_size* bytes. ## This is also the buffer that's used for file/mime type detection. bof_buffer: string &optional; - ## An initial guess at file type. + ## A file type provided by libmagic against the *bof_buffer*, or + ## in the cases where no buffering of the beginning of file occurs, + ## an initial guess of the file type based on the first data seen. file_type: string &log &optional; - ## An initial guess at mime type. + + ## A mime type provided by libmagic against the *bof_buffer*, or + ## in the cases where no buffering of the beginning of file occurs, + ## an initial guess of the mime type based on the first data seen. mime_type: string &log &optional; ## Actions that have been added to the analysis of this file. - ## Not meant to be modified directly by scripts. + ## Only meant for inspection by user scripts, not direct modification. actions: table[ActionArgs] of ActionResults; } &redef; - ## TODO: document + ## Fields that are derived from existing ones, and are set just in time + ## for logging purposes. + redef record FileAnalysis::Info += { + ## Whether the file analysis timed out at least once for the file. + timedout: bool &log &default=F; + + ## Connection UIDS over which the file was transferred. + conn_uids: set[string] &log &optional; + + ## A set of action types taken during the file analysis. + actions_taken: set[Action] &log &optional; + + ## Local filenames of file extraction actions. + extracted_files: set[string] &log &optional; + + ## An MD5 digest of the file contents. + md5: string &log &optional; + + ## A SHA1 digest of the file contents. + sha1: string &log &optional; + + ## A SHA256 digest of the file contents. + sha256: string &log &optional; + }; + + ## Redefined here just so the *info* parameters of the events have the + ## right type information. + redef record ActionArgs += { + ## An event which will be generated for all new file contents, + ## chunk-wise. + chunk_event: event(info: Info, data: string, off: count) &optional; + + ## An event which will be generated for all new file contents, + ## stream-wise. + stream_event: event(info: Info, data: string) &optional; + }; + + ## Evaluated every time a significant event occurs during the course of + ## file analysis. Fields of the *info* argument may be modified or + ## other actions may be added or removed inside the body of any handlers + ## of this hook. global policy: hook(trig: Trigger, info: Info); + ## A table that can be used to disable file analysis completely for + ## any files transferred over given network protocol analyzers. const disable: table[AnalyzerTag] of bool = table() &redef; - # TODO: wrapper functions for BiFs ? - ## Event that can be handled to access the Info record as it is sent on ## to the logging framework. global log_file_analysis: event(rec: Info); ## The salt concatenated to unique file handle strings generated by - ## :bro:see:`FileAnalysis::handle_callbacks` before hashing them - ## in to a file id (the *file_id* field of :bro:see:`FileAnalysis::Info`). + ## :bro:see:`get_file_handle` before hashing them in to a file id + ## (the *file_id* field of :bro:see:`FileAnalysis::Info`). ## Provided to help mitigate the possiblility of manipulating parts of ## network connections that factor in to the file handle in order to ## generate two handles that would hash to the same file id. const salt = "I recommend changing this." &redef; + + ## Postpones the timeout of file analysis for a given file. + ## When used within a :bro:see:`FileAnalysis::policy` handler for + ## :bro:see:`FileAnalysis::TRIGGER_TIMEOUT`, the analysis will delay + ## timing out for the period of time indicated by the *timeout_interval* + ## field of :bro:see:`FileAnalysis::Info`. + ## + ## file_id: the file identifier string from the *file_id* field of + ## :bro:see:`FileAnalysis::Info`. + ## + ## Returns: true if the timeout will be postponed, or false if analysis + ## for the *file_id* isn't currently active. + global postpone_timeout: function(file_id: string): bool; + + ## Adds an action to the analysis of a given file. + ## + ## file_id: the file identifier string from the *file_id* field of + ## :bro:see:`FileAnalysis::Info`. + ## + ## args: the action type to add along with any arguments it takes. + ## + ## Returns: true if the action will be added, or false if analysis + ## for the *file_id* isn't currently active or the *args* + ## were invalid for the action type. + global add_action: function(file_id: string, args: ActionArgs): bool; + + ## Removes an action from the analysis of a given file. + ## + ## file_id: the file identifier string from the *file_id* field of + ## :bro:see:`FileAnalysis::Info`. + ## + ## args: the action (type and args) to remove. + ## + ## Returns: true if the action will be removed, or false if analysis + ## for the *file_id* isn't currently active. + global remove_action: function(file_id: string, args: ActionArgs): bool; + + ## Stops/ignores any further analysis of a given file. + ## + ## file_id: the file identifier string from the *file_id* field of + ## :bro:see:`FileAnalysis::Info`. + ## + ## Returns: true if analysis for the given file will be ignored for the + ## rest of it's contents, or false if analysis for the *file_id* + ## isn't currently active. + global stop: function(file_id: string): bool; + + ## Sends a sequential stream of data in for file analysis. + ## Meant for use when providing external file analysis input (e.g. + ## from the input framework). + ## + ## source: a string that uniquely identifies the logical file that the + ## data is a part of and describes its source. + ## + ## data: bytestring contents of the file to analyze. + global data_stream: function(source: string, data: string); + + ## Sends a non-sequential chunk of data in for file analysis. + ## Meant for use when providing external file analysis input (e.g. + ## from the input framework). + ## + ## source: a string that uniquely identifies the logical file that the + ## data is a part of and describes its source. + ## + ## data: bytestring contents of the file to analyze. + ## + ## offset: the offset within the file that this chunk starts. + global data_chunk: function(source: string, data: string, offset: count); + + ## Signals a content gap in the file bytestream. + ## Meant for use when providing external file analysis input (e.g. + ## from the input framework). + ## + ## source: a string that uniquely identifies the logical file that the + ## data is a part of and describes its source. + ## + ## offset: the offset within the file that this gap starts. + ## + ## len: the number of bytes that are missing. + global gap: function(source: string, offset: count, len: count); + + ## Signals the total size of a file. + ## Meant for use when providing external file analysis input (e.g. + ## from the input framework). + ## + ## source: a string that uniquely identifies the logical file that the + ## data is a part of and describes its source. + ## + ## size: the number of bytes that comprise the full file. + global set_size: function(source: string, size: count); + + ## Signals the end of a file. + ## Meant for use when providing external file analysis input (e.g. + ## from the input framework). + ## + ## source: a string that uniquely identifies the logical file that the + ## data is a part of and describes its source. + global eof: function(source: string); } +function postpone_timeout(file_id: string): bool + { + return __postpone_timeout(file_id); + } + +function add_action(file_id: string, args: ActionArgs): bool + { + return __add_action(file_id, args); + } + +function remove_action(file_id: string, args: ActionArgs): bool + { + return __remove_action(file_id, args); + } + +function stop(file_id: string): bool + { + return __stop(file_id); + } + +function data_stream(source: string, data: string) + { + __data_stream(source, data); + } + +function data_chunk(source: string, data: string, offset: count) + { + __data_chunk(source, data, offset); + } + +function gap(source: string, offset: count, len: count) + { + __gap(source, offset, len); + } + +function set_size(source: string, size: count) + { + __set_size(source, size); + } + +function eof(source: string) + { + __eof(source); + } + event bro_init() &priority=5 { Log::create_stream(FileAnalysis::LOG, [$columns=Info, $ev=log_file_analysis]); } -redef record FileAnalysis::Info += { - timedout: bool &log &default=F; - conn_uids: set[string] &log &optional; - actions_taken: set[Action] &log &optional; - extracted_files: set[string] &log &optional; - md5: string &log &optional; - sha1: string &log &optional; - sha256: string &log &optional; -}; - hook FileAnalysis::policy(trig: FileAnalysis::Trigger, info: FileAnalysis::Info) &priority=5 { diff --git a/src/file_analysis.bif b/src/file_analysis.bif index e00c1c8d05..826089bdbb 100644 --- a/src/file_analysis.bif +++ b/src/file_analysis.bif @@ -10,21 +10,23 @@ type Info: record; type ActionArgs: record; type ActionResults: record; -## An enumeration of possibly-interesting "events" that can occur over -## the course of analyzing files. The :bro:see:`FileAnalysis::policy` -## hook is called each time a trigger occurs. +## An enumeration of significant things that can occur over the course of +## analyzing files. The :bro:see:`FileAnalysis::policy` hook is called each +## time a trigger occurs. enum Trigger %{ + ## Raised when any part of a new file is detected. TRIGGER_NEW, + ## Raised when file is detected being transported over a new network ## connection (other than the first). TRIGGER_NEW_CONN, + ## Raised when file analysis has likely seen a complete file. That ## is when a number of bytes indicated by the *total_bytes* field of - ## :bro:see:`FileAnalysis::Info` have been processed. Note that - ## the *undelivered* field does not have to be zero for this to have - ## occurred. + ## :bro:see:`FileAnalysis::Info` have been processed. TRIGGER_DONE, + ## Raised when file analysis for a given file is aborted due ## to not seeing any data for it recently. Note that this doesn't ## necessarily mean the full file wasn't seen (e.g. if the @@ -33,44 +35,55 @@ enum Trigger %{ ## during a :bro:see:`FileAnalysis::policy` handler for this trigger to ## defer the timeout until later. TRIGGER_TIMEOUT, + ## Raised when the beginning of a file is detected. TRIGGER_BOF, - ## Raised when the beginning of a file is available and that beginning - ## is at least the number of bytes indicated by the *bof_buffer_size* - ## field of :bro:see:`FileAnalysis::Info`. + + ## Raised when the beginning of a file is available in the *bof_buffer* + ## field of :bro:see:`FileAnalysis::Info` and that beginning + ## is at least the number of bytes indicated by the *bof_buffer_size* field. TRIGGER_BOF_BUFFER, - ## Raised when an initial guess at the file/mime type of a file is matched - ## based on magic numbers. + + ## Raised when an initial guess at the file/mime type of a file is matched. TRIGGER_TYPE, + ## Raised to signal that no more file data is incoming and it couldn't be - ## determined whether the full file was actually seen. + ## determined whether the full file was actually seen and analyzed. TRIGGER_EOF, - ## The reassembly buffer for the file filled and had to be discarded. - ## The *undelivered* field of :bro:see:`FileAnalysis::Info` will - ## indicate the number of bytes, if any, that were not all-in-sequence. - ## TODO: Is it possible to extend the reassembly buffer when "handling" - ## this trigger? - TRIGGER_REASSEMBLY_BUFFER_FULL, + ## Raised when there's a missing chunk of data in the file stream. TRIGGER_GAP, %} +## An enumeration of various file analysis actions that can be taken. enum Action %{ + + ## Extract a file to local filesystem ACTION_EXTRACT, + + ## Calculate an MD5 digest of the file's contents. ACTION_MD5, + + ## Calculate an SHA1 digest of the file's contents. ACTION_SHA1, + + ## Calculate an SHA256 digest of the file's contents. ACTION_SHA256, + + ## Deliver the file contents to the script-layer in an event. ACTION_DATA_EVENT, %} -function FileAnalysis::postpone_timeout%(file_id: string%): bool +## :bro:see:`FileAnalysis::postpone_timeout`. +function FileAnalysis::__postpone_timeout%(file_id: string%): bool %{ using file_analysis::FileID; bool result = file_mgr->PostponeTimeout(FileID(file_id->CheckString())); return new Val(result, TYPE_BOOL); %} -function FileAnalysis::add_action%(file_id: string, args: any%): bool +## :bro:see:`FileAnalysis::add_action`. +function FileAnalysis::__add_action%(file_id: string, args: any%): bool %{ using file_analysis::FileID; using BifType::Record::FileAnalysis::ActionArgs; @@ -80,7 +93,8 @@ function FileAnalysis::add_action%(file_id: string, args: any%): bool return new Val(result, TYPE_BOOL); %} -function FileAnalysis::remove_action%(file_id: string, args: any%): bool +## :bro:see:`FileAnalysis::remove_action`. +function FileAnalysis::__remove_action%(file_id: string, args: any%): bool %{ using file_analysis::FileID; using BifType::Record::FileAnalysis::ActionArgs; @@ -90,39 +104,45 @@ function FileAnalysis::remove_action%(file_id: string, args: any%): bool return new Val(result, TYPE_BOOL); %} -function FileAnalysis::stop%(file_id: string%): bool +## :bro:see:`FileAnalysis::stop`. +function FileAnalysis::__stop%(file_id: string%): bool %{ using file_analysis::FileID; bool result = file_mgr->IgnoreFile(FileID(file_id->CheckString())); return new Val(result, TYPE_BOOL); %} -function FileAnalysis::data_stream%(source: string, data: string%): any +## :bro:see:`FileAnalysis::data_stream`. +function FileAnalysis::__data_stream%(source: string, data: string%): any %{ file_mgr->DataIn(data->Bytes(), data->Len(), source->CheckString()); return 0; %} -function FileAnalysis::data_chunk%(source: string, data: string, - offset: count%): any +## :bro:see:`FileAnalysis::data_chunk`. +function FileAnalysis::__data_chunk%(source: string, data: string, + offset: count%): any %{ file_mgr->DataIn(data->Bytes(), data->Len(), offset, source->CheckString()); return 0; %} -function FileAnalysis::gap%(source: string, offset: count, len: count%): any +## :bro:see:`FileAnalysis::gap`. +function FileAnalysis::__gap%(source: string, offset: count, len: count%): any %{ file_mgr->Gap(offset, len, source->CheckString()); return 0; %} -function FileAnalysis::set_size%(source: string, size: count%): any +## :bro:see:`FileAnalysis::set_size`. +function FileAnalysis::__set_size%(source: string, size: count%): any %{ file_mgr->SetSize(size, source->CheckString()); return 0; %} -function FileAnalysis::eof%(source: string%): any +## :bro:see:`FileAnalysis::eof`. +function FileAnalysis::__eof%(source: string%): any %{ file_mgr->EndOfFile(source->CheckString()); return 0; diff --git a/src/file_analysis/ActionSet.h b/src/file_analysis/ActionSet.h index e1f1355aa9..aab3c1f94e 100644 --- a/src/file_analysis/ActionSet.h +++ b/src/file_analysis/ActionSet.h @@ -13,6 +13,11 @@ namespace file_analysis { class Info; declare(PDict,Action); +/** + * A set of file analysis actions indexed by ActionArgs. Allows queueing + * of addition/removals so that those modifications can happen at well-defined + * times (e.g. to make sure a loop iterator isn't invalidated). + */ class ActionSet { public: diff --git a/src/file_analysis/PendingFile.h b/src/file_analysis/PendingFile.h index 58b842d969..678d7aff94 100644 --- a/src/file_analysis/PendingFile.h +++ b/src/file_analysis/PendingFile.h @@ -7,6 +7,10 @@ namespace file_analysis { +/** + * Provides buffering for file contents until the script-layer is able to + * return a unique file handle for it. + */ class PendingFile { public: diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/file_analysis.log b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/file_analysis.log index 5d9322a8ae..482ac7216d 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/file_analysis.log +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/file_analysis.log @@ -3,8 +3,8 @@ #empty_field (empty) #unset_field - #path file_analysis -#open 2013-03-28-21-35-46 +#open 2013-03-29-18-28-57 #fields file_id parent_file_id source last_active seen_bytes total_bytes missing_bytes overflow_bytes timeout_interval bof_buffer_size file_type mime_type timedout conn_uids actions_taken extracted_files md5 sha1 sha256 #types string string string time count count count count interval count string string bool table[string] table[enum] table[string] string string string -Cx92a0ym5R8 - HTTP 1362692527.009775 4705 4705 0 0 120.000000 1024 set set F UWkUyAuUGXf FileAnalysis::ACTION_SHA1,FileAnalysis::ACTION_EXTRACT,FileAnalysis::ACTION_DATA_EVENT,FileAnalysis::ACTION_MD5,FileAnalysis::ACTION_SHA256 Cx92a0ym5R8-file 397168fd09991a0e712254df7bc639ac 1dd7ac0398df6cbc0696445a91ec681facf4dc47 4e7c7ef0984119447e743e3ec77e1de52713e345cde03fe7df753a35849bed18 -#close 2013-03-28-21-35-46 +Cx92a0ym5R8 - HTTP 1362692527.009775 4705 4705 0 0 120.000000 1024 set set F UWkUyAuUGXf FileAnalysis::ACTION_SHA1,FileAnalysis::ACTION_DATA_EVENT,FileAnalysis::ACTION_EXTRACT,FileAnalysis::ACTION_MD5,FileAnalysis::ACTION_SHA256 Cx92a0ym5R8-file 397168fd09991a0e712254df7bc639ac 1dd7ac0398df6cbc0696445a91ec681facf4dc47 4e7c7ef0984119447e743e3ec77e1de52713e345cde03fe7df753a35849bed18 +#close 2013-03-29-18-28-57