Merge remote-tracking branch 'origin/topic/jsiwek/faf-cleanup'

Closes #1002.

* origin/topic/jsiwek/faf-cleanup:
  Move file analyzers to new plugin infrastructure.
  Add a general file analysis overview/how-to document.
  Improve file analysis doxygen comments.
  Improve tracking of HTTP file extraction (addresses #988).
  Fix HTTP multipart body file analysis.
  Remove logging of analyzers field of FileAnalysis::Info.
  Remove extraction counter in default file extraction scripts.
  Remove FileAnalysis::postpone_timeout.
  Make default get_file_handle handlers &priority=5.
  Add input interface to forward data for file analysis.
  File analysis framework interface simplifications.
This commit is contained in:
Robin Sommer 2013-07-03 16:22:43 -07:00
commit d8b05af7e5
127 changed files with 2458 additions and 1412 deletions

View file

@ -15,18 +15,20 @@ export {
## A structure which represents a desired type of file analysis.
type AnalyzerArgs: record {
## The type of analysis.
tag: Analyzer;
tag: FileAnalysis::Tag;
## The local filename to which to write an extracted file. Must be
## set when *tag* is :bro:see:`FileAnalysis::ANALYZER_EXTRACT`.
extract_filename: string &optional;
## An event which will be generated for all new file contents,
## chunk-wise.
## chunk-wise. Used when *tag* is
## :bro:see:`FileAnalysis::ANALYZER_DATA_EVENT`.
chunk_event: event(f: fa_file, data: string, off: count) &optional;
## An event which will be generated for all new file contents,
## stream-wise.
## stream-wise. Used when *tag* is
## :bro:see:`FileAnalysis::ANALYZER_DATA_EVENT`.
stream_event: event(f: fa_file, data: string) &optional;
} &redef;
@ -87,7 +89,7 @@ export {
conn_uids: set[string] &log;
## A set of analysis types done during the file analysis.
analyzers: set[Analyzer] &log;
analyzers: set[FileAnalysis::Tag];
## Local filenames of extracted files.
extracted_files: set[string] &log;
@ -120,7 +122,9 @@ export {
## Sets the *timeout_interval* field of :bro:see:`fa_file`, which is
## used to determine the length of inactivity that is allowed for a file
## before internal state related to it is cleaned up.
## before internal state related to it is cleaned up. When used within a
## :bro:see:`file_timeout` handler, the analysis will delay timing out
## again for the period specified by *t*.
##
## f: the file.
##
@ -130,18 +134,6 @@ export {
## for the *id* isn't currently active.
global set_timeout_interval: function(f: fa_file, t: interval): bool;
## Postpones the timeout of file analysis for a given file.
## When used within a :bro:see:`file_timeout` handler for, the analysis
## the analysis will delay timing out for the period of time indicated by
## the *timeout_interval* field of :bro:see:`fa_file`, which can be set
## with :bro:see:`FileAnalysis::set_timeout_interval`.
##
## f: the file.
##
## Returns: true if the timeout will be postponed, or false if analysis
## for the *id* isn't currently active.
global postpone_timeout: function(f: fa_file): bool;
## Adds an analyzer to the analysis of a given file.
##
## f: the file.
@ -171,58 +163,6 @@ export {
## rest of it's contents, or false if analysis for the *id*
## isn't currently active.
global stop: function(f: fa_file): bool;
## Sends a sequential stream of data in for file analysis.
## Meant for use when providing external file analysis input (e.g.
## from the input framework).
##
## source: a string that uniquely identifies the logical file that the
## data is a part of and describes its source.
##
## data: bytestring contents of the file to analyze.
global data_stream: function(source: string, data: string);
## Sends a non-sequential chunk of data in for file analysis.
## Meant for use when providing external file analysis input (e.g.
## from the input framework).
##
## source: a string that uniquely identifies the logical file that the
## data is a part of and describes its source.
##
## data: bytestring contents of the file to analyze.
##
## offset: the offset within the file that this chunk starts.
global data_chunk: function(source: string, data: string, offset: count);
## Signals a content gap in the file bytestream.
## Meant for use when providing external file analysis input (e.g.
## from the input framework).
##
## source: a string that uniquely identifies the logical file that the
## data is a part of and describes its source.
##
## offset: the offset within the file that this gap starts.
##
## len: the number of bytes that are missing.
global gap: function(source: string, offset: count, len: count);
## Signals the total size of a file.
## Meant for use when providing external file analysis input (e.g.
## from the input framework).
##
## source: a string that uniquely identifies the logical file that the
## data is a part of and describes its source.
##
## size: the number of bytes that comprise the full file.
global set_size: function(source: string, size: count);
## Signals the end of a file.
## Meant for use when providing external file analysis input (e.g.
## from the input framework).
##
## source: a string that uniquely identifies the logical file that the
## data is a part of and describes its source.
global eof: function(source: string);
}
redef record fa_file += {
@ -259,11 +199,6 @@ function set_timeout_interval(f: fa_file, t: interval): bool
return __set_timeout_interval(f$id, t);
}
function postpone_timeout(f: fa_file): bool
{
return __postpone_timeout(f$id);
}
function add_analyzer(f: fa_file, args: AnalyzerArgs): bool
{
if ( ! __add_analyzer(f$id, args) ) return F;
@ -287,31 +222,6 @@ function stop(f: fa_file): bool
return __stop(f$id);
}
function data_stream(source: string, data: string)
{
__data_stream(source, data);
}
function data_chunk(source: string, data: string, offset: count)
{
__data_chunk(source, data, offset);
}
function gap(source: string, offset: count, len: count)
{
__gap(source, offset, len);
}
function set_size(source: string, size: count)
{
__set_size(source, size);
}
function eof(source: string)
{
__eof(source);
}
event bro_init() &priority=5
{
Log::create_stream(FileAnalysis::LOG,

View file

@ -122,6 +122,34 @@ export {
config: table[string] of string &default=table();
};
## A file analyis input stream type used to forward input data to the
## file analysis framework.
type AnalysisDescription: record {
## String that allows the reader to find the source.
## For `READER_ASCII`, this is the filename.
source: string;
## Reader to use for this steam. Compatible readers must be
## able to accept a filter of a single string type (i.e.
## they read a byte stream).
reader: Reader &default=Input::READER_BINARY;
## Read mode to use for this stream
mode: Mode &default=default_mode;
## Descriptive name that uniquely identifies the input source.
## Can be used used to remove a stream at a later time.
## This will also be used for the unique *source* field of
## :bro:see:`fa_file`. Most of the time, the best choice for this
## field will be the same value as the *source* field.
name: string;
## A key/value table that will be passed on the reader.
## Interpretation of the values is left to the writer, but
## usually they will be used for configuration purposes.
config: table[string] of string &default=table();
};
## Create a new table input from a given source. Returns true on success.
##
## description: `TableDescription` record describing the source.
@ -132,6 +160,14 @@ export {
## description: `TableDescription` record describing the source.
global add_event: function(description: Input::EventDescription) : bool;
## Create a new file analysis input from a given source. Data read from
## the source is automatically forwarded to the file analysis framework.
##
## description: A record describing the source
##
## Returns: true on sucess.
global add_analysis: function(description: Input::AnalysisDescription) : bool;
## Remove a input stream. Returns true on success and false if the named stream was
## not found.
##
@ -164,6 +200,11 @@ function add_event(description: Input::EventDescription) : bool
return __create_event_stream(description);
}
function add_analysis(description: Input::AnalysisDescription) : bool
{
return __create_analysis_stream(description);
}
function remove(id: string) : bool
{
return __remove_stream(id);