FileAnalysis: first pass over documentation.

This commit is contained in:
Jon Siwek 2013-03-29 13:41:37 -05:00
parent 3642ecc73e
commit 83f47d6f7a
5 changed files with 275 additions and 72 deletions

View file

@ -1,8 +1,7 @@
##! TODO add some comments here
##! An interface for driving the analysis of files, possibly independent of
##! any network protocol over which they're transported.
@load base/file_analysis.bif
# TODO: do logging here?
@load base/frameworks/logging
module FileAnalysis;
@ -13,10 +12,6 @@ export {
LOG
};
## The default buffer size used to reassemble files.
# TODO: what's a reasonable default?
const default_reassembly_buffer_size: count = 1024*1024 &redef;
## The default buffer size used for storing the beginning of files.
const default_bof_buffer_size: count = 1024 &redef;
@ -24,29 +19,32 @@ export {
## before giving up.
const default_timeout_interval: interval = 2 mins &redef;
# Needed a forward declaration for event parameters...
type Info: record {};
## A structure which represents a desired file analysis action to take.
type ActionArgs: record {
## The type of action.
act: Action;
## The local filename to which to write an extracted file. Must be
## set when *act* is :bro:see:`FileAnalysis::ACTION_EXTRACT`.
extract_filename: string &optional;
chunk_event: event(info: Info, data: string, off: count) &optional;
stream_event: event(info: Info, data: string) &optional;
};
## A structure which contains the results of certain file analysis actions.
type ActionResults: record {
## An MD5 digest of the file contents.
md5: string &optional;
## An SHA1 digest of the file contents.
sha1: string &optional;
## An SHA256 digest of the file contents.
sha256: string &optional;
};
## Contains all metadata related to the analysis of a given file, some
## of which is logged.
## Contains all metadata related to the analysis of a given file.
type Info: record {
## Unique identifier associated with a single file.
## An identifier associated with a single file.
file_id: string &log;
## Unique identifier associated with the file if it was extracted
## from a container file as part of the analysis.
## Identifier associated with a container file from which this one was
## extracted as part of the file analysis.
parent_file_id: string &log &optional;
## An identification of the source of the file data. E.g. it may be
@ -62,18 +60,17 @@ export {
## Number of bytes provided to the file analysis engine for the file.
seen_bytes: count &log &default=0;
## Total number of bytes that are supposed to comprise the file content.
## Total number of bytes that are supposed to comprise the full file.
total_bytes: count &log &optional;
## The number of bytes in the file stream that were completely missed
## during the process of analysis e.g. due to dropped packets.
## analysis that had to be discarded due to a reassembly buffer size
## of *reassembly_buffer_size* being filled.
missing_bytes: count &log &default=0;
## The number of not all-in-sequence bytes in the file stream that
## were delivered to file actions/analyzers due to reassembly buffer
## size of *reassembly_buffer_size* being filled.
## overflow.
overflow_bytes: count &log &default=0;
## The amount of time between receiving new data for this file that
@ -81,60 +78,237 @@ export {
timeout_interval: interval &log &default=default_timeout_interval;
## The number of bytes at the beginning of a file to save for later
## inspection in *bof_buffer* field of
## :bro:see:`FileAnalysis::ActionResults`.
## inspection in *bof_buffer* field.
bof_buffer_size: count &log &default=default_bof_buffer_size;
## The content of the beginning of a file up to *bof_buffer_size* bytes.
## This is also the buffer that's used for file/mime type detection.
bof_buffer: string &optional;
## An initial guess at file type.
## A file type provided by libmagic against the *bof_buffer*, or
## in the cases where no buffering of the beginning of file occurs,
## an initial guess of the file type based on the first data seen.
file_type: string &log &optional;
## An initial guess at mime type.
## A mime type provided by libmagic against the *bof_buffer*, or
## in the cases where no buffering of the beginning of file occurs,
## an initial guess of the mime type based on the first data seen.
mime_type: string &log &optional;
## Actions that have been added to the analysis of this file.
## Not meant to be modified directly by scripts.
## Only meant for inspection by user scripts, not direct modification.
actions: table[ActionArgs] of ActionResults;
} &redef;
## TODO: document
## Fields that are derived from existing ones, and are set just in time
## for logging purposes.
redef record FileAnalysis::Info += {
## Whether the file analysis timed out at least once for the file.
timedout: bool &log &default=F;
## Connection UIDS over which the file was transferred.
conn_uids: set[string] &log &optional;
## A set of action types taken during the file analysis.
actions_taken: set[Action] &log &optional;
## Local filenames of file extraction actions.
extracted_files: set[string] &log &optional;
## An MD5 digest of the file contents.
md5: string &log &optional;
## A SHA1 digest of the file contents.
sha1: string &log &optional;
## A SHA256 digest of the file contents.
sha256: string &log &optional;
};
## Redefined here just so the *info* parameters of the events have the
## right type information.
redef record ActionArgs += {
## An event which will be generated for all new file contents,
## chunk-wise.
chunk_event: event(info: Info, data: string, off: count) &optional;
## An event which will be generated for all new file contents,
## stream-wise.
stream_event: event(info: Info, data: string) &optional;
};
## Evaluated every time a significant event occurs during the course of
## file analysis. Fields of the *info* argument may be modified or
## other actions may be added or removed inside the body of any handlers
## of this hook.
global policy: hook(trig: Trigger, info: Info);
## A table that can be used to disable file analysis completely for
## any files transferred over given network protocol analyzers.
const disable: table[AnalyzerTag] of bool = table() &redef;
# TODO: wrapper functions for BiFs ?
## Event that can be handled to access the Info record as it is sent on
## to the logging framework.
global log_file_analysis: event(rec: Info);
## The salt concatenated to unique file handle strings generated by
## :bro:see:`FileAnalysis::handle_callbacks` before hashing them
## in to a file id (the *file_id* field of :bro:see:`FileAnalysis::Info`).
## :bro:see:`get_file_handle` before hashing them in to a file id
## (the *file_id* field of :bro:see:`FileAnalysis::Info`).
## Provided to help mitigate the possiblility of manipulating parts of
## network connections that factor in to the file handle in order to
## generate two handles that would hash to the same file id.
const salt = "I recommend changing this." &redef;
## Postpones the timeout of file analysis for a given file.
## When used within a :bro:see:`FileAnalysis::policy` handler for
## :bro:see:`FileAnalysis::TRIGGER_TIMEOUT`, the analysis will delay
## timing out for the period of time indicated by the *timeout_interval*
## field of :bro:see:`FileAnalysis::Info`.
##
## file_id: the file identifier string from the *file_id* field of
## :bro:see:`FileAnalysis::Info`.
##
## Returns: true if the timeout will be postponed, or false if analysis
## for the *file_id* isn't currently active.
global postpone_timeout: function(file_id: string): bool;
## Adds an action to the analysis of a given file.
##
## file_id: the file identifier string from the *file_id* field of
## :bro:see:`FileAnalysis::Info`.
##
## args: the action type to add along with any arguments it takes.
##
## Returns: true if the action will be added, or false if analysis
## for the *file_id* isn't currently active or the *args*
## were invalid for the action type.
global add_action: function(file_id: string, args: ActionArgs): bool;
## Removes an action from the analysis of a given file.
##
## file_id: the file identifier string from the *file_id* field of
## :bro:see:`FileAnalysis::Info`.
##
## args: the action (type and args) to remove.
##
## Returns: true if the action will be removed, or false if analysis
## for the *file_id* isn't currently active.
global remove_action: function(file_id: string, args: ActionArgs): bool;
## Stops/ignores any further analysis of a given file.
##
## file_id: the file identifier string from the *file_id* field of
## :bro:see:`FileAnalysis::Info`.
##
## Returns: true if analysis for the given file will be ignored for the
## rest of it's contents, or false if analysis for the *file_id*
## isn't currently active.
global stop: function(file_id: string): bool;
## Sends a sequential stream of data in for file analysis.
## Meant for use when providing external file analysis input (e.g.
## from the input framework).
##
## source: a string that uniquely identifies the logical file that the
## data is a part of and describes its source.
##
## data: bytestring contents of the file to analyze.
global data_stream: function(source: string, data: string);
## Sends a non-sequential chunk of data in for file analysis.
## Meant for use when providing external file analysis input (e.g.
## from the input framework).
##
## source: a string that uniquely identifies the logical file that the
## data is a part of and describes its source.
##
## data: bytestring contents of the file to analyze.
##
## offset: the offset within the file that this chunk starts.
global data_chunk: function(source: string, data: string, offset: count);
## Signals a content gap in the file bytestream.
## Meant for use when providing external file analysis input (e.g.
## from the input framework).
##
## source: a string that uniquely identifies the logical file that the
## data is a part of and describes its source.
##
## offset: the offset within the file that this gap starts.
##
## len: the number of bytes that are missing.
global gap: function(source: string, offset: count, len: count);
## Signals the total size of a file.
## Meant for use when providing external file analysis input (e.g.
## from the input framework).
##
## source: a string that uniquely identifies the logical file that the
## data is a part of and describes its source.
##
## size: the number of bytes that comprise the full file.
global set_size: function(source: string, size: count);
## Signals the end of a file.
## Meant for use when providing external file analysis input (e.g.
## from the input framework).
##
## source: a string that uniquely identifies the logical file that the
## data is a part of and describes its source.
global eof: function(source: string);
}
function postpone_timeout(file_id: string): bool
{
return __postpone_timeout(file_id);
}
function add_action(file_id: string, args: ActionArgs): bool
{
return __add_action(file_id, args);
}
function remove_action(file_id: string, args: ActionArgs): bool
{
return __remove_action(file_id, args);
}
function stop(file_id: string): bool
{
return __stop(file_id);
}
function data_stream(source: string, data: string)
{
__data_stream(source, data);
}
function data_chunk(source: string, data: string, offset: count)
{
__data_chunk(source, data, offset);
}
function gap(source: string, offset: count, len: count)
{
__gap(source, offset, len);
}
function set_size(source: string, size: count)
{
__set_size(source, size);
}
function eof(source: string)
{
__eof(source);
}
event bro_init() &priority=5
{
Log::create_stream(FileAnalysis::LOG,
[$columns=Info, $ev=log_file_analysis]);
}
redef record FileAnalysis::Info += {
timedout: bool &log &default=F;
conn_uids: set[string] &log &optional;
actions_taken: set[Action] &log &optional;
extracted_files: set[string] &log &optional;
md5: string &log &optional;
sha1: string &log &optional;
sha256: string &log &optional;
};
hook FileAnalysis::policy(trig: FileAnalysis::Trigger, info: FileAnalysis::Info)
&priority=5
{

View file

@ -10,21 +10,23 @@ type Info: record;
type ActionArgs: record;
type ActionResults: record;
## An enumeration of possibly-interesting "events" that can occur over
## the course of analyzing files. The :bro:see:`FileAnalysis::policy`
## hook is called each time a trigger occurs.
## An enumeration of significant things that can occur over the course of
## analyzing files. The :bro:see:`FileAnalysis::policy` hook is called each
## time a trigger occurs.
enum Trigger %{
## Raised when any part of a new file is detected.
TRIGGER_NEW,
## Raised when file is detected being transported over a new network
## connection (other than the first).
TRIGGER_NEW_CONN,
## Raised when file analysis has likely seen a complete file. That
## is when a number of bytes indicated by the *total_bytes* field of
## :bro:see:`FileAnalysis::Info` have been processed. Note that
## the *undelivered* field does not have to be zero for this to have
## occurred.
## :bro:see:`FileAnalysis::Info` have been processed.
TRIGGER_DONE,
## Raised when file analysis for a given file is aborted due
## to not seeing any data for it recently. Note that this doesn't
## necessarily mean the full file wasn't seen (e.g. if the
@ -33,44 +35,55 @@ enum Trigger %{
## during a :bro:see:`FileAnalysis::policy` handler for this trigger to
## defer the timeout until later.
TRIGGER_TIMEOUT,
## Raised when the beginning of a file is detected.
TRIGGER_BOF,
## Raised when the beginning of a file is available and that beginning
## is at least the number of bytes indicated by the *bof_buffer_size*
## field of :bro:see:`FileAnalysis::Info`.
## Raised when the beginning of a file is available in the *bof_buffer*
## field of :bro:see:`FileAnalysis::Info` and that beginning
## is at least the number of bytes indicated by the *bof_buffer_size* field.
TRIGGER_BOF_BUFFER,
## Raised when an initial guess at the file/mime type of a file is matched
## based on magic numbers.
## Raised when an initial guess at the file/mime type of a file is matched.
TRIGGER_TYPE,
## Raised to signal that no more file data is incoming and it couldn't be
## determined whether the full file was actually seen.
## determined whether the full file was actually seen and analyzed.
TRIGGER_EOF,
## The reassembly buffer for the file filled and had to be discarded.
## The *undelivered* field of :bro:see:`FileAnalysis::Info` will
## indicate the number of bytes, if any, that were not all-in-sequence.
## TODO: Is it possible to extend the reassembly buffer when "handling"
## this trigger?
TRIGGER_REASSEMBLY_BUFFER_FULL,
## Raised when there's a missing chunk of data in the file stream.
TRIGGER_GAP,
%}
## An enumeration of various file analysis actions that can be taken.
enum Action %{
## Extract a file to local filesystem
ACTION_EXTRACT,
## Calculate an MD5 digest of the file's contents.
ACTION_MD5,
## Calculate an SHA1 digest of the file's contents.
ACTION_SHA1,
## Calculate an SHA256 digest of the file's contents.
ACTION_SHA256,
## Deliver the file contents to the script-layer in an event.
ACTION_DATA_EVENT,
%}
function FileAnalysis::postpone_timeout%(file_id: string%): bool
## :bro:see:`FileAnalysis::postpone_timeout`.
function FileAnalysis::__postpone_timeout%(file_id: string%): bool
%{
using file_analysis::FileID;
bool result = file_mgr->PostponeTimeout(FileID(file_id->CheckString()));
return new Val(result, TYPE_BOOL);
%}
function FileAnalysis::add_action%(file_id: string, args: any%): bool
## :bro:see:`FileAnalysis::add_action`.
function FileAnalysis::__add_action%(file_id: string, args: any%): bool
%{
using file_analysis::FileID;
using BifType::Record::FileAnalysis::ActionArgs;
@ -80,7 +93,8 @@ function FileAnalysis::add_action%(file_id: string, args: any%): bool
return new Val(result, TYPE_BOOL);
%}
function FileAnalysis::remove_action%(file_id: string, args: any%): bool
## :bro:see:`FileAnalysis::remove_action`.
function FileAnalysis::__remove_action%(file_id: string, args: any%): bool
%{
using file_analysis::FileID;
using BifType::Record::FileAnalysis::ActionArgs;
@ -90,39 +104,45 @@ function FileAnalysis::remove_action%(file_id: string, args: any%): bool
return new Val(result, TYPE_BOOL);
%}
function FileAnalysis::stop%(file_id: string%): bool
## :bro:see:`FileAnalysis::stop`.
function FileAnalysis::__stop%(file_id: string%): bool
%{
using file_analysis::FileID;
bool result = file_mgr->IgnoreFile(FileID(file_id->CheckString()));
return new Val(result, TYPE_BOOL);
%}
function FileAnalysis::data_stream%(source: string, data: string%): any
## :bro:see:`FileAnalysis::data_stream`.
function FileAnalysis::__data_stream%(source: string, data: string%): any
%{
file_mgr->DataIn(data->Bytes(), data->Len(), source->CheckString());
return 0;
%}
function FileAnalysis::data_chunk%(source: string, data: string,
## :bro:see:`FileAnalysis::data_chunk`.
function FileAnalysis::__data_chunk%(source: string, data: string,
offset: count%): any
%{
file_mgr->DataIn(data->Bytes(), data->Len(), offset, source->CheckString());
return 0;
%}
function FileAnalysis::gap%(source: string, offset: count, len: count%): any
## :bro:see:`FileAnalysis::gap`.
function FileAnalysis::__gap%(source: string, offset: count, len: count%): any
%{
file_mgr->Gap(offset, len, source->CheckString());
return 0;
%}
function FileAnalysis::set_size%(source: string, size: count%): any
## :bro:see:`FileAnalysis::set_size`.
function FileAnalysis::__set_size%(source: string, size: count%): any
%{
file_mgr->SetSize(size, source->CheckString());
return 0;
%}
function FileAnalysis::eof%(source: string%): any
## :bro:see:`FileAnalysis::eof`.
function FileAnalysis::__eof%(source: string%): any
%{
file_mgr->EndOfFile(source->CheckString());
return 0;

View file

@ -13,6 +13,11 @@ namespace file_analysis {
class Info;
declare(PDict,Action);
/**
* A set of file analysis actions indexed by ActionArgs. Allows queueing
* of addition/removals so that those modifications can happen at well-defined
* times (e.g. to make sure a loop iterator isn't invalidated).
*/
class ActionSet {
public:

View file

@ -7,6 +7,10 @@
namespace file_analysis {
/**
* Provides buffering for file contents until the script-layer is able to
* return a unique file handle for it.
*/
class PendingFile {
public:

View file

@ -3,8 +3,8 @@
#empty_field (empty)
#unset_field -
#path file_analysis
#open 2013-03-28-21-35-46
#open 2013-03-29-18-28-57
#fields file_id parent_file_id source last_active seen_bytes total_bytes missing_bytes overflow_bytes timeout_interval bof_buffer_size file_type mime_type timedout conn_uids actions_taken extracted_files md5 sha1 sha256
#types string string string time count count count count interval count string string bool table[string] table[enum] table[string] string string string
Cx92a0ym5R8 - HTTP 1362692527.009775 4705 4705 0 0 120.000000 1024 set set F UWkUyAuUGXf FileAnalysis::ACTION_SHA1,FileAnalysis::ACTION_EXTRACT,FileAnalysis::ACTION_DATA_EVENT,FileAnalysis::ACTION_MD5,FileAnalysis::ACTION_SHA256 Cx92a0ym5R8-file 397168fd09991a0e712254df7bc639ac 1dd7ac0398df6cbc0696445a91ec681facf4dc47 4e7c7ef0984119447e743e3ec77e1de52713e345cde03fe7df753a35849bed18
#close 2013-03-28-21-35-46
Cx92a0ym5R8 - HTTP 1362692527.009775 4705 4705 0 0 120.000000 1024 set set F UWkUyAuUGXf FileAnalysis::ACTION_SHA1,FileAnalysis::ACTION_DATA_EVENT,FileAnalysis::ACTION_EXTRACT,FileAnalysis::ACTION_MD5,FileAnalysis::ACTION_SHA256 Cx92a0ym5R8-file 397168fd09991a0e712254df7bc639ac 1dd7ac0398df6cbc0696445a91ec681facf4dc47 4e7c7ef0984119447e743e3ec77e1de52713e345cde03fe7df753a35849bed18
#close 2013-03-29-18-28-57