Merge remote-tracking branch 'origin/topic/jsiwek/faf-cleanup'

Closes #1002.

* origin/topic/jsiwek/faf-cleanup:
  Move file analyzers to new plugin infrastructure.
  Add a general file analysis overview/how-to document.
  Improve file analysis doxygen comments.
  Improve tracking of HTTP file extraction (addresses #988).
  Fix HTTP multipart body file analysis.
  Remove logging of analyzers field of FileAnalysis::Info.
  Remove extraction counter in default file extraction scripts.
  Remove FileAnalysis::postpone_timeout.
  Make default get_file_handle handlers &priority=5.
  Add input interface to forward data for file analysis.
  File analysis framework interface simplifications.
This commit is contained in:
Robin Sommer 2013-07-03 16:22:43 -07:00
commit d8b05af7e5
127 changed files with 2458 additions and 1412 deletions

View file

@ -15,18 +15,20 @@ export {
## A structure which represents a desired type of file analysis.
type AnalyzerArgs: record {
## The type of analysis.
tag: Analyzer;
tag: FileAnalysis::Tag;
## The local filename to which to write an extracted file. Must be
## set when *tag* is :bro:see:`FileAnalysis::ANALYZER_EXTRACT`.
extract_filename: string &optional;
## An event which will be generated for all new file contents,
## chunk-wise.
## chunk-wise. Used when *tag* is
## :bro:see:`FileAnalysis::ANALYZER_DATA_EVENT`.
chunk_event: event(f: fa_file, data: string, off: count) &optional;
## An event which will be generated for all new file contents,
## stream-wise.
## stream-wise. Used when *tag* is
## :bro:see:`FileAnalysis::ANALYZER_DATA_EVENT`.
stream_event: event(f: fa_file, data: string) &optional;
} &redef;
@ -87,7 +89,7 @@ export {
conn_uids: set[string] &log;
## A set of analysis types done during the file analysis.
analyzers: set[Analyzer] &log;
analyzers: set[FileAnalysis::Tag];
## Local filenames of extracted files.
extracted_files: set[string] &log;
@ -120,7 +122,9 @@ export {
## Sets the *timeout_interval* field of :bro:see:`fa_file`, which is
## used to determine the length of inactivity that is allowed for a file
## before internal state related to it is cleaned up.
## before internal state related to it is cleaned up. When used within a
## :bro:see:`file_timeout` handler, the analysis will delay timing out
## again for the period specified by *t*.
##
## f: the file.
##
@ -130,18 +134,6 @@ export {
## for the *id* isn't currently active.
global set_timeout_interval: function(f: fa_file, t: interval): bool;
## Postpones the timeout of file analysis for a given file.
## When used within a :bro:see:`file_timeout` handler for, the analysis
## the analysis will delay timing out for the period of time indicated by
## the *timeout_interval* field of :bro:see:`fa_file`, which can be set
## with :bro:see:`FileAnalysis::set_timeout_interval`.
##
## f: the file.
##
## Returns: true if the timeout will be postponed, or false if analysis
## for the *id* isn't currently active.
global postpone_timeout: function(f: fa_file): bool;
## Adds an analyzer to the analysis of a given file.
##
## f: the file.
@ -171,58 +163,6 @@ export {
## rest of it's contents, or false if analysis for the *id*
## isn't currently active.
global stop: function(f: fa_file): bool;
## Sends a sequential stream of data in for file analysis.
## Meant for use when providing external file analysis input (e.g.
## from the input framework).
##
## source: a string that uniquely identifies the logical file that the
## data is a part of and describes its source.
##
## data: bytestring contents of the file to analyze.
global data_stream: function(source: string, data: string);
## Sends a non-sequential chunk of data in for file analysis.
## Meant for use when providing external file analysis input (e.g.
## from the input framework).
##
## source: a string that uniquely identifies the logical file that the
## data is a part of and describes its source.
##
## data: bytestring contents of the file to analyze.
##
## offset: the offset within the file that this chunk starts.
global data_chunk: function(source: string, data: string, offset: count);
## Signals a content gap in the file bytestream.
## Meant for use when providing external file analysis input (e.g.
## from the input framework).
##
## source: a string that uniquely identifies the logical file that the
## data is a part of and describes its source.
##
## offset: the offset within the file that this gap starts.
##
## len: the number of bytes that are missing.
global gap: function(source: string, offset: count, len: count);
## Signals the total size of a file.
## Meant for use when providing external file analysis input (e.g.
## from the input framework).
##
## source: a string that uniquely identifies the logical file that the
## data is a part of and describes its source.
##
## size: the number of bytes that comprise the full file.
global set_size: function(source: string, size: count);
## Signals the end of a file.
## Meant for use when providing external file analysis input (e.g.
## from the input framework).
##
## source: a string that uniquely identifies the logical file that the
## data is a part of and describes its source.
global eof: function(source: string);
}
redef record fa_file += {
@ -259,11 +199,6 @@ function set_timeout_interval(f: fa_file, t: interval): bool
return __set_timeout_interval(f$id, t);
}
function postpone_timeout(f: fa_file): bool
{
return __postpone_timeout(f$id);
}
function add_analyzer(f: fa_file, args: AnalyzerArgs): bool
{
if ( ! __add_analyzer(f$id, args) ) return F;
@ -287,31 +222,6 @@ function stop(f: fa_file): bool
return __stop(f$id);
}
function data_stream(source: string, data: string)
{
__data_stream(source, data);
}
function data_chunk(source: string, data: string, offset: count)
{
__data_chunk(source, data, offset);
}
function gap(source: string, offset: count, len: count)
{
__gap(source, offset, len);
}
function set_size(source: string, size: count)
{
__set_size(source, size);
}
function eof(source: string)
{
__eof(source);
}
event bro_init() &priority=5
{
Log::create_stream(FileAnalysis::LOG,

View file

@ -122,6 +122,34 @@ export {
config: table[string] of string &default=table();
};
## A file analyis input stream type used to forward input data to the
## file analysis framework.
type AnalysisDescription: record {
## String that allows the reader to find the source.
## For `READER_ASCII`, this is the filename.
source: string;
## Reader to use for this steam. Compatible readers must be
## able to accept a filter of a single string type (i.e.
## they read a byte stream).
reader: Reader &default=Input::READER_BINARY;
## Read mode to use for this stream
mode: Mode &default=default_mode;
## Descriptive name that uniquely identifies the input source.
## Can be used used to remove a stream at a later time.
## This will also be used for the unique *source* field of
## :bro:see:`fa_file`. Most of the time, the best choice for this
## field will be the same value as the *source* field.
name: string;
## A key/value table that will be passed on the reader.
## Interpretation of the values is left to the writer, but
## usually they will be used for configuration purposes.
config: table[string] of string &default=table();
};
## Create a new table input from a given source. Returns true on success.
##
## description: `TableDescription` record describing the source.
@ -132,6 +160,14 @@ export {
## description: `TableDescription` record describing the source.
global add_event: function(description: Input::EventDescription) : bool;
## Create a new file analysis input from a given source. Data read from
## the source is automatically forwarded to the file analysis framework.
##
## description: A record describing the source
##
## Returns: true on sucess.
global add_analysis: function(description: Input::AnalysisDescription) : bool;
## Remove a input stream. Returns true on success and false if the named stream was
## not found.
##
@ -164,6 +200,11 @@ function add_event(description: Input::EventDescription) : bool
return __create_event_stream(description);
}
function add_analysis(description: Input::AnalysisDescription) : bool
{
return __create_analysis_stream(description);
}
function remove(id: string) : bool
{
return __remove_stream(id);

View file

@ -222,17 +222,6 @@ type endpoint_stats: record {
endian_type: count;
};
## A unique analyzer instance ID. Each time instantiates a protocol analyzers
## for a connection, it assigns it a unique ID that can be used to reference
## that instance.
##
## .. bro:see:: Analyzer::name Analyzer::disable_analyzer protocol_confirmation
## protocol_violation
##
## .. todo::While we declare an alias for the type here, the events/functions still
## use ``count``. That should be changed.
type AnalyzerID: count;
module Tunnel;
export {
## Records the identity of an encapsulating parent of a tunneled connection.
@ -3065,12 +3054,12 @@ module GLOBAL;
## Number of bytes per packet to capture from live interfaces.
const snaplen = 8192 &redef;
# Load BiFs defined by plugins.
@load base/bif/plugins
# Load these frameworks here because they use fairly deep integration with
# BiFs and script-land defined types.
@load base/frameworks/logging
@load base/frameworks/input
@load base/frameworks/analyzer
@load base/frameworks/file-analysis
# Load BiFs defined by plugins.
@load base/bif/plugins

View file

@ -41,6 +41,7 @@ function get_file_handle(c: connection, is_orig: bool): string
module GLOBAL;
event get_file_handle(tag: Analyzer::Tag, c: connection, is_orig: bool)
&priority=5
{
if ( tag != Analyzer::ANALYZER_FTP_DATA ) return;
set_file_handle(FTP::get_file_handle(c, is_orig));

View file

@ -13,8 +13,6 @@ export {
const extraction_prefix = "ftp-item" &redef;
}
global extract_count: count = 0;
redef record Info += {
## On disk file where it was extracted to.
extraction_file: string &log &optional;
@ -26,8 +24,7 @@ redef record Info += {
function get_extraction_name(f: fa_file): string
{
local r = fmt("%s-%s-%d.dat", extraction_prefix, f$id, extract_count);
++extract_count;
local r = fmt("%s-%s.dat", extraction_prefix, f$id);
return r;
}

View file

@ -6,25 +6,48 @@
module HTTP;
export {
redef record HTTP::Info += {
## Number of MIME entities in the HTTP request message body so far.
request_mime_level: count &default=0;
## Number of MIME entities in the HTTP response message body so far.
response_mime_level: count &default=0;
};
## Default file handle provider for HTTP.
global get_file_handle: function(c: connection, is_orig: bool): string;
}
event http_begin_entity(c: connection, is_orig: bool) &priority=5
{
if ( ! c?$http )
return;
if ( is_orig )
++c$http$request_mime_level;
else
++c$http$response_mime_level;
}
function get_file_handle(c: connection, is_orig: bool): string
{
if ( ! c?$http ) return "";
local mime_level: count =
is_orig ? c$http$request_mime_level : c$http$response_mime_level;
local mime_level_str: string = mime_level > 1 ? cat(mime_level) : "";
if ( c$http$range_request )
return cat(Analyzer::ANALYZER_HTTP, " ", is_orig, " ", c$id$orig_h, " ",
build_url(c$http));
return cat(Analyzer::ANALYZER_HTTP, " ", c$start_time, " ", is_orig, " ",
c$http$trans_depth, " ", id_string(c$id));
c$http$trans_depth, mime_level_str, " ", id_string(c$id));
}
module GLOBAL;
event get_file_handle(tag: Analyzer::Tag, c: connection, is_orig: bool)
&priority=5
{
if ( tag != Analyzer::ANALYZER_HTTP ) return;
set_file_handle(HTTP::get_file_handle(c, is_orig));

View file

@ -14,8 +14,11 @@ export {
const extraction_prefix = "http-item" &redef;
redef record Info += {
## On-disk file where the response body was extracted to.
extraction_file: string &log &optional;
## On-disk location where files in request body were extracted.
extracted_request_files: vector of string &log &optional;
## On-disk location where files in response body were extracted.
extracted_response_files: vector of string &log &optional;
## Indicates if the response body is to be extracted or not. Must be
## set before or by the first :bro:see:`file_new` for the file content.
@ -23,15 +26,28 @@ export {
};
}
global extract_count: count = 0;
function get_extraction_name(f: fa_file): string
{
local r = fmt("%s-%s-%d.dat", extraction_prefix, f$id, extract_count);
++extract_count;
local r = fmt("%s-%s.dat", extraction_prefix, f$id);
return r;
}
function add_extraction_file(c: connection, is_orig: bool, fn: string)
{
if ( is_orig )
{
if ( ! c$http?$extracted_request_files )
c$http$extracted_request_files = vector();
c$http$extracted_request_files[|c$http$extracted_request_files|] = fn;
}
else
{
if ( ! c$http?$extracted_response_files )
c$http$extracted_response_files = vector();
c$http$extracted_response_files[|c$http$extracted_response_files|] = fn;
}
}
event file_new(f: fa_file) &priority=5
{
if ( ! f?$source ) return;
@ -51,7 +67,7 @@ event file_new(f: fa_file) &priority=5
{
c = f$conns[cid];
if ( ! c?$http ) next;
c$http$extraction_file = fname;
add_extraction_file(c, f$is_orig, fname);
}
return;
@ -79,6 +95,6 @@ event file_new(f: fa_file) &priority=5
{
c = f$conns[cid];
if ( ! c?$http ) next;
c$http$extraction_file = fname;
add_extraction_file(c, f$is_orig, fname);
}
}

View file

@ -39,8 +39,6 @@ export {
global dcc_expected_transfers: table[addr, port] of Info &read_expire=5mins;
global extract_count: count = 0;
function set_dcc_mime(f: fa_file)
{
if ( ! f?$conns ) return;
@ -75,8 +73,7 @@ function set_dcc_extraction_file(f: fa_file, filename: string)
function get_extraction_name(f: fa_file): string
{
local r = fmt("%s-%s-%d.dat", extraction_prefix, f$id, extract_count);
++extract_count;
local r = fmt("%s-%s.dat", extraction_prefix, f$id);
return r;
}

View file

@ -18,6 +18,7 @@ function get_file_handle(c: connection, is_orig: bool): string
module GLOBAL;
event get_file_handle(tag: Analyzer::Tag, c: connection, is_orig: bool)
&priority=5
{
if ( tag != Analyzer::ANALYZER_IRC_DATA ) return;
set_file_handle(IRC::get_file_handle(c, is_orig));

View file

@ -66,8 +66,6 @@ export {
global log_mime: event(rec: EntityInfo);
}
global extract_count: count = 0;
event bro_init() &priority=5
{
Log::create_stream(SMTP::ENTITIES_LOG, [$columns=EntityInfo, $ev=log_mime]);
@ -90,8 +88,7 @@ function set_session(c: connection, new_entity: bool)
function get_extraction_name(f: fa_file): string
{
local r = fmt("%s-%s-%d.dat", extraction_prefix, f$id, extract_count);
++extract_count;
local r = fmt("%s-%s.dat", extraction_prefix, f$id);
return r;
}
@ -127,7 +124,6 @@ event file_new(f: fa_file) &priority=5
[$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]);
extracting = T;
++extract_count;
}
c$smtp$current_entity$extraction_file = fname;

View file

@ -20,6 +20,7 @@ function get_file_handle(c: connection, is_orig: bool): string
module GLOBAL;
event get_file_handle(tag: Analyzer::Tag, c: connection, is_orig: bool)
&priority=5
{
if ( tag != Analyzer::ANALYZER_SMTP ) return;
set_file_handle(SMTP::get_file_handle(c, is_orig));