Merge remote-tracking branch 'origin/topic/jsiwek/file-analysis' into topic/seth/file-analysis-exe-analyzer

Conflicts:
	src/file_analysis/ActionSet.cc
	src/types.bif
This commit is contained in:
Seth Hall 2013-04-24 13:01:39 -04:00
commit d72980828f
141 changed files with 3754 additions and 888 deletions

View file

@ -12,13 +12,13 @@ export {
LOG
};
## A structure which represents a desired file analysis action to take.
type ActionArgs: record {
## The type of action.
act: Action;
## A structure which represents a desired type of file analysis.
type AnalyzerArgs: record {
## The type of analysis.
tag: Analyzer;
## The local filename to which to write an extracted file. Must be
## set when *act* is :bro:see:`FileAnalysis::ACTION_EXTRACT`.
## set when *tag* is :bro:see:`FileAnalysis::ANALYZER_EXTRACT`.
extract_filename: string &optional;
## An event which will be generated for all new file contents,
@ -46,6 +46,10 @@ export {
## path which was read, or some other input source.
source: string &log &optional;
## If the source of this file is is a network connection, this field
## may be set to indicate the directionality.
is_orig: bool &log &optional;
## The time at which the last activity for the file was seen.
last_active: time &log;
@ -60,8 +64,7 @@ export {
missing_bytes: count &log &default=0;
## The number of not all-in-sequence bytes in the file stream that
## were delivered to file actions/analyzers due to reassembly buffer
## overflow.
## were delivered to file analyzers due to reassembly buffer overflow.
overflow_bytes: count &log &default=0;
## The amount of time between receiving new data for this file that
@ -72,11 +75,6 @@ export {
## inspection in *bof_buffer* field.
bof_buffer_size: count &log &optional;
## A file type provided by libmagic against the *bof_buffer*, or
## in the cases where no buffering of the beginning of file occurs,
## an initial guess of the file type based on the first data seen.
file_type: string &log &optional;
## A mime type provided by libmagic against the *bof_buffer*, or
## in the cases where no buffering of the beginning of file occurs,
## an initial guess of the mime type based on the first data seen.
@ -88,10 +86,10 @@ export {
## Connection UIDS over which the file was transferred.
conn_uids: set[string] &log;
## A set of action types taken during the file analysis.
actions_taken: set[Action] &log;
## A set of analysis types done during the file analysis.
analyzers: set[Analyzer] &log;
## Local filenames of file extraction actions.
## Local filenames of extracted files.
extracted_files: set[string] &log;
## An MD5 digest of the file contents.
@ -120,10 +118,23 @@ export {
## generate two handles that would hash to the same file id.
const salt = "I recommend changing this." &redef;
## Sets the *timeout_interval* field of :bro:see:`fa_file`, which is
## used to determine the length of inactivity that is allowed for a file
## before internal state related to it is cleaned up.
##
## f: the file.
##
## t: the amount of time the file can remain inactive before discarding.
##
## Returns: true if the timeout interval was set, or false if analysis
## for the *id* isn't currently active.
global set_timeout_interval: function(f: fa_file, t: interval): bool;
## Postpones the timeout of file analysis for a given file.
## When used within a :bro:see:`file_timeout` handler for, the analysis
## the analysis will delay timing out for the period of time indicated by
## the *timeout_interval* field of :bro:see:`fa_file`.
## the *timeout_interval* field of :bro:see:`fa_file`, which can be set
## with :bro:see:`FileAnalysis::set_timeout_interval`.
##
## f: the file.
##
@ -131,26 +142,26 @@ export {
## for the *id* isn't currently active.
global postpone_timeout: function(f: fa_file): bool;
## Adds an action to the analysis of a given file.
## Adds an analyzer to the analysis of a given file.
##
## f: the file.
##
## args: the action type to add along with any arguments it takes.
## args: the analyzer type to add along with any arguments it takes.
##
## Returns: true if the action will be added, or false if analysis
## Returns: true if the analyzer will be added, or false if analysis
## for the *id* isn't currently active or the *args*
## were invalid for the action type.
global add_action: function(f: fa_file, args: ActionArgs): bool;
## were invalid for the analyzer type.
global add_analyzer: function(f: fa_file, args: AnalyzerArgs): bool;
## Removes an action from the analysis of a given file.
## Removes an analyzer from the analysis of a given file.
##
## f: the file.
##
## args: the action (type and args) to remove.
## args: the analyzer (type and args) to remove.
##
## Returns: true if the action will be removed, or false if analysis
## Returns: true if the analyzer will be removed, or false if analysis
## for the *id* isn't currently active.
global remove_action: function(f: fa_file, args: ActionArgs): bool;
global remove_analyzer: function(f: fa_file, args: AnalyzerArgs): bool;
## Stops/ignores any further analysis of a given file.
##
@ -229,6 +240,7 @@ function set_info(f: fa_file)
f$info$id = f$id;
if ( f?$parent_id ) f$info$parent_id = f$parent_id;
if ( f?$source ) f$info$source = f$source;
if ( f?$is_orig ) f$info$is_orig = f$is_orig;
f$info$last_active = f$last_active;
f$info$seen_bytes = f$seen_bytes;
if ( f?$total_bytes ) f$info$total_bytes = f$total_bytes;
@ -236,34 +248,38 @@ function set_info(f: fa_file)
f$info$overflow_bytes = f$overflow_bytes;
f$info$timeout_interval = f$timeout_interval;
f$info$bof_buffer_size = f$bof_buffer_size;
if ( f?$file_type ) f$info$file_type = f$file_type;
if ( f?$mime_type ) f$info$mime_type = f$mime_type;
if ( f?$conns )
for ( cid in f$conns )
add f$info$conn_uids[f$conns[cid]$uid];
}
function set_timeout_interval(f: fa_file, t: interval): bool
{
return __set_timeout_interval(f$id, t);
}
function postpone_timeout(f: fa_file): bool
{
return __postpone_timeout(f$id);
}
function add_action(f: fa_file, args: ActionArgs): bool
function add_analyzer(f: fa_file, args: AnalyzerArgs): bool
{
if ( ! __add_action(f$id, args) ) return F;
if ( ! __add_analyzer(f$id, args) ) return F;
set_info(f);
add f$info$actions_taken[args$act];
add f$info$analyzers[args$tag];
if ( args$act == FileAnalysis::ACTION_EXTRACT )
if ( args$tag == FileAnalysis::ANALYZER_EXTRACT )
add f$info$extracted_files[args$extract_filename];
return T;
}
function remove_action(f: fa_file, args: ActionArgs): bool
function remove_analyzer(f: fa_file, args: AnalyzerArgs): bool
{
return __remove_action(f$id, args);
return __remove_analyzer(f$id, args);
}
function stop(f: fa_file): bool

View file

@ -316,7 +316,12 @@ type connection: record {
tunnel: EncapsulatingConnVector &optional;
};
## Default amount of time a file can be inactive before the file analysis
## gives up and discards any internal state related to the file.
const default_file_timeout_interval: interval = 2 mins &redef;
## Default amount of bytes that file analysis will buffer before raising
## :bro:see:`file_new`.
const default_file_bof_buffer_size: count = 1024 &redef;
## A file that Bro is analyzing. This is Bro's type for describing the basic
@ -336,6 +341,10 @@ type fa_file: record {
## path which was read, or some other input source.
source: string &optional;
## If the source of this file is is a network connection, this field
## may be set to indicate the directionality.
is_orig: bool &optional;
## The set of connections over which the file was transferred.
conns: table[conn_id] of connection &optional;
@ -353,8 +362,7 @@ type fa_file: record {
missing_bytes: count &default=0;
## The number of not all-in-sequence bytes in the file stream that
## were delivered to file actions/analyzers due to reassembly buffer
## overflow.
## were delivered to file analyzers due to reassembly buffer overflow.
overflow_bytes: count &default=0;
## The amount of time between receiving new data for this file that
@ -369,11 +377,6 @@ type fa_file: record {
## This is also the buffer that's used for file/mime type detection.
bof_buffer: string &optional;
## A file type provided by libmagic against the *bof_buffer*, or
## in the cases where no buffering of the beginning of file occurs,
## an initial guess of the file type based on the first data seen.
file_type: string &optional;
## A mime type provided by libmagic against the *bof_buffer*, or
## in the cases where no buffering of the beginning of file occurs,
## an initial guess of the mime type based on the first data seen.

View file

@ -11,7 +11,7 @@ export {
function get_handle_string(c: connection): string
{
return fmt("%s %s %s", ANALYZER_FTP_DATA, c$start_time, id_string(c$id));
return cat(ANALYZER_FTP_DATA, " ", c$start_time, " ", id_string(c$id));
}
function get_file_handle(c: connection, is_orig: bool): string

View file

@ -38,8 +38,8 @@ event file_new(f: fa_file) &priority=5
if ( f?$mime_type && extract_file_types in f$mime_type )
{
FileAnalysis::add_action(f, [$act=FileAnalysis::ACTION_EXTRACT,
$extract_filename=get_extraction_name(f)]);
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=get_extraction_name(f)]);
return;
}
@ -55,8 +55,8 @@ event file_new(f: fa_file) &priority=5
if ( ! s$extract_file ) next;
FileAnalysis::add_action(f, [$act=FileAnalysis::ACTION_EXTRACT,
$extract_filename=get_extraction_name(f)]);
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=get_extraction_name(f)]);
return;
}
}

View file

@ -56,8 +56,6 @@ export {
## Libmagic "sniffed" file type if the command indicates a file transfer.
mime_type: string &log &optional;
## Libmagic "sniffed" file description if the command indicates a file transfer.
mime_desc: string &log &optional;
## Size of the file if the command indicates a file transfer.
file_size: count &log &optional;
@ -205,7 +203,6 @@ function ftp_message(s: Info)
# and may not be used in all commands so they need reset to "blank"
# values after logging.
delete s$mime_type;
delete s$mime_desc;
delete s$file_size;
# Same with data channel.
delete s$data_channel;
@ -353,7 +350,6 @@ event file_transferred(c: connection, prefix: string, descr: string,
{
local s = ftp_data_expected[id$resp_h, id$resp_p];
s$mime_type = split1(mime_type, /;/)[1];
s$mime_desc = descr;
}
}

View file

@ -15,11 +15,11 @@ function get_file_handle(c: connection, is_orig: bool): string
if ( ! c?$http ) return "";
if ( c$http$range_request )
return fmt("%s %s %s %s", ANALYZER_HTTP, is_orig, c$id$orig_h,
return cat(ANALYZER_HTTP, " ", is_orig, " ", c$id$orig_h, " ",
build_url(c$http));
return fmt("%s %s %s %s %s", ANALYZER_HTTP, c$start_time, is_orig,
c$http$trans_depth, id_string(c$id));
return cat(ANALYZER_HTTP, " ", c$start_time, " ", is_orig, " ",
c$http$trans_depth, " ", id_string(c$id));
}
module GLOBAL;

View file

@ -44,8 +44,8 @@ event file_new(f: fa_file) &priority=5
if ( f?$mime_type && extract_file_types in f$mime_type )
{
fname = get_extraction_name(f);
FileAnalysis::add_action(f, [$act=FileAnalysis::ACTION_EXTRACT,
$extract_filename=fname]);
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]);
for ( cid in f$conns )
{
@ -68,8 +68,8 @@ event file_new(f: fa_file) &priority=5
if ( ! c$http$extract_file ) next;
fname = get_extraction_name(f);
FileAnalysis::add_action(f, [$act=FileAnalysis::ACTION_EXTRACT,
$extract_filename=fname]);
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]);
extracting = T;
break;
}

View file

@ -30,7 +30,7 @@ event file_new(f: fa_file) &priority=5
if ( f?$mime_type && generate_md5 in f$mime_type )
{
FileAnalysis::add_action(f, [$act=FileAnalysis::ACTION_MD5]);
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
return;
}
@ -44,7 +44,7 @@ event file_new(f: fa_file) &priority=5
if ( ! c$http$calc_md5 ) next;
FileAnalysis::add_action(f, [$act=FileAnalysis::ACTION_MD5]);
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
return;
}
}

View file

@ -49,7 +49,7 @@ event file_new(f: fa_file) &priority=5
c$http$mime_type = f$mime_type;
local mime_str: string = split1(f$mime_type, /;/)[1];
local mime_str: string = c$http$mime_type;
if ( mime_str !in mime_types_extensions ) next;
if ( ! c$http?$uri ) next;
@ -66,23 +66,18 @@ event file_new(f: fa_file) &priority=5
}
}
event file_over_new_connection(f: fa_file) &priority=5
event file_over_new_connection(f: fa_file, c: connection) &priority=5
{
if ( ! f?$source ) return;
if ( f$source != "HTTP" ) return;
if ( ! f?$mime_type ) return;
if ( ! f?$conns ) return;
if ( ! c?$http ) return;
# Spread the mime around (e.g. for partial content, file_type event only
# happens once for the first connection, but if there's subsequent
# connections to transfer the same file, they'll be lacking the mime_type
# field if we don't do this).
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( ! c?$http ) next;
c$http$mime_type = f$mime_type;
}
c$http$mime_type = f$mime_type;
}
# Tracks byte-range request / partial content response mime types, indexed

View file

@ -101,8 +101,8 @@ event file_new(f: fa_file) &priority=5
if ( f?$mime_type && extract_file_types in f$mime_type )
{
fname = get_extraction_name(f);
FileAnalysis::add_action(f, [$act=FileAnalysis::ACTION_EXTRACT,
$extract_filename=fname]);
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]);
set_dcc_extraction_file(f, fname);
return;
}
@ -120,8 +120,8 @@ event file_new(f: fa_file) &priority=5
if ( ! s$extract_file ) next;
fname = get_extraction_name(f);
FileAnalysis::add_action(f, [$act=FileAnalysis::ACTION_EXTRACT,
$extract_filename=fname]);
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]);
s$extraction_file = fname;
return;
}

View file

@ -12,7 +12,7 @@ export {
function get_file_handle(c: connection, is_orig: bool): string
{
if ( is_orig ) return "";
return fmt("%s %s %s", ANALYZER_IRC_DATA, c$start_time, id_string(c$id));
return cat(ANALYZER_IRC_DATA, " ", c$start_time, " ", id_string(c$id));
}
module GLOBAL;

View file

@ -123,8 +123,9 @@ event file_new(f: fa_file) &priority=5
if ( ! extracting )
{
fname = get_extraction_name(f);
FileAnalysis::add_action(f, [$act=FileAnalysis::ACTION_EXTRACT,
$extract_filename=fname]);
FileAnalysis::add_analyzer(f,
[$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]);
extracting = T;
++extract_count;
}
@ -133,7 +134,7 @@ event file_new(f: fa_file) &priority=5
}
if ( c$smtp$current_entity$calc_md5 )
FileAnalysis::add_action(f, [$act=FileAnalysis::ACTION_MD5]);
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
}
}
@ -141,12 +142,12 @@ function check_extract_by_type(f: fa_file)
{
if ( extract_file_types !in f$mime_type ) return;
if ( f?$info && FileAnalysis::ACTION_EXTRACT in f$info$actions_taken )
if ( f?$info && FileAnalysis::ANALYZER_EXTRACT in f$info$analyzers )
return;
local fname: string = get_extraction_name(f);
FileAnalysis::add_action(f, [$act=FileAnalysis::ACTION_EXTRACT,
$extract_filename=fname]);
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]);
if ( ! f?$conns ) return;
@ -163,7 +164,7 @@ function check_md5_by_type(f: fa_file)
if ( never_calc_md5 ) return;
if ( generate_md5 !in f$mime_type ) return;
FileAnalysis::add_action(f, [$act=FileAnalysis::ACTION_MD5]);
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
}
event file_new(f: fa_file) &priority=5

View file

@ -13,8 +13,8 @@ export {
function get_file_handle(c: connection, is_orig: bool): string
{
if ( ! c?$smtp ) return "";
return fmt("%s %s %s %s", ANALYZER_SMTP, c$start_time,
c$smtp$trans_depth, c$smtp_state$mime_level);
return cat(ANALYZER_SMTP, " ", c$start_time, " ", c$smtp$trans_depth, " ",
c$smtp_state$mime_level);
}
module GLOBAL;