mirror of
https://github.com/zeek/zeek.git
synced 2025-10-13 20:18:20 +00:00
Large overhaul in name and appearance for file analysis.
This commit is contained in:
parent
caf61f619b
commit
df2841458d
39 changed files with 420 additions and 875 deletions
|
@ -3,8 +3,9 @@
|
|||
|
||||
@load base/file_analysis.bif
|
||||
@load base/frameworks/logging
|
||||
@load base/utils/site
|
||||
|
||||
module FileAnalysis;
|
||||
module Files;
|
||||
|
||||
export {
|
||||
redef enum Log::ID += {
|
||||
|
@ -14,21 +15,14 @@ export {
|
|||
|
||||
## A structure which represents a desired type of file analysis.
|
||||
type AnalyzerArgs: record {
|
||||
## The type of analysis.
|
||||
tag: Analyzer;
|
||||
|
||||
## The local filename to which to write an extracted file. Must be
|
||||
## set when *tag* is :bro:see:`FileAnalysis::ANALYZER_EXTRACT`.
|
||||
extract_filename: string &optional;
|
||||
|
||||
## An event which will be generated for all new file contents,
|
||||
## chunk-wise. Used when *tag* is
|
||||
## :bro:see:`FileAnalysis::ANALYZER_DATA_EVENT`.
|
||||
## :bro:see:`Files::ANALYZER_DATA_EVENT`.
|
||||
chunk_event: event(f: fa_file, data: string, off: count) &optional;
|
||||
|
||||
## An event which will be generated for all new file contents,
|
||||
## stream-wise. Used when *tag* is
|
||||
## :bro:see:`FileAnalysis::ANALYZER_DATA_EVENT`.
|
||||
## :bro:see:`Files::ANALYZER_DATA_EVENT`.
|
||||
stream_event: event(f: fa_file, data: string) &optional;
|
||||
} &redef;
|
||||
|
||||
|
@ -40,23 +34,52 @@ export {
|
|||
ts: time &log;
|
||||
|
||||
## An identifier associated with a single file.
|
||||
id: string &log;
|
||||
fuid: string &log;
|
||||
|
||||
## Identifier associated with a container file from which this one was
|
||||
## extracted as part of the file analysis.
|
||||
parent_id: string &log &optional;
|
||||
## If this file was transferred over a network
|
||||
## connection this should show the host or hosts that
|
||||
## the data sourced from.
|
||||
tx_hosts: set[addr] &log;
|
||||
|
||||
## If this file was transferred over a network
|
||||
## connection this should show the host or hosts that
|
||||
## the data traveled to.
|
||||
rx_hosts: set[addr] &log;
|
||||
|
||||
## Connection UIDS over which the file was transferred.
|
||||
conn_uids: set[string] &log;
|
||||
|
||||
## An identification of the source of the file data. E.g. it may be
|
||||
## a network protocol over which it was transferred, or a local file
|
||||
## path which was read, or some other input source.
|
||||
source: string &log &optional;
|
||||
|
||||
## If the source of this file is is a network connection, this field
|
||||
## may be set to indicate the directionality.
|
||||
is_orig: bool &log &optional;
|
||||
## A value to represent the depth of this file in relation
|
||||
## to its source. In SMTP, it is the depth of the MIME
|
||||
## attachment on the message. In HTTP, it is the depth of the
|
||||
## request within the TCP connection.
|
||||
depth: count &default=0 &log;
|
||||
|
||||
## The time at which the last activity for the file was seen.
|
||||
last_active: time &log;
|
||||
## A set of analysis types done during the file analysis.
|
||||
analyzers: set[Analyzer] &log;
|
||||
|
||||
## A mime type provided by libmagic against the *bof_buffer*, or
|
||||
## in the cases where no buffering of the beginning of file occurs,
|
||||
## an initial guess of the mime type based on the first data seen.
|
||||
mime_type: string &log &optional;
|
||||
|
||||
## A filename for the file if one is available from the source
|
||||
## for the file. These will frequently come from
|
||||
## "Content-Disposition" headers in network protocols.
|
||||
filename: string &log &optional;
|
||||
|
||||
## The duration the file was analyzed for.
|
||||
duration: interval &log &default=0secs;
|
||||
|
||||
## If the source of this file is is a network connection, this field
|
||||
## indicates if the data originated from the local network or not as
|
||||
## determined by the configured bro:see:`Site::local_nets`.
|
||||
local_orig: bool &log &optional;
|
||||
|
||||
## Number of bytes provided to the file analysis engine for the file.
|
||||
seen_bytes: count &log &default=0;
|
||||
|
@ -72,49 +95,18 @@ export {
|
|||
## were delivered to file analyzers due to reassembly buffer overflow.
|
||||
overflow_bytes: count &log &default=0;
|
||||
|
||||
## The amount of time between receiving new data for this file that
|
||||
## the analysis engine will wait before giving up on it.
|
||||
timeout_interval: interval &log &optional;
|
||||
|
||||
## The number of bytes at the beginning of a file to save for later
|
||||
## inspection in *bof_buffer* field.
|
||||
bof_buffer_size: count &log &optional;
|
||||
|
||||
## A mime type provided by libmagic against the *bof_buffer*, or
|
||||
## in the cases where no buffering of the beginning of file occurs,
|
||||
## an initial guess of the mime type based on the first data seen.
|
||||
mime_type: string &log &optional;
|
||||
|
||||
## Whether the file analysis timed out at least once for the file.
|
||||
timedout: bool &log &default=F;
|
||||
|
||||
## Connection UIDS over which the file was transferred.
|
||||
conn_uids: set[string] &log;
|
||||
|
||||
## A set of analysis types done during the file analysis.
|
||||
analyzers: set[Analyzer];
|
||||
|
||||
## Local filenames of extracted files.
|
||||
extracted_files: set[string] &log;
|
||||
|
||||
## An MD5 digest of the file contents.
|
||||
md5: string &log &optional;
|
||||
|
||||
## A SHA1 digest of the file contents.
|
||||
sha1: string &log &optional;
|
||||
|
||||
## A SHA256 digest of the file contents.
|
||||
sha256: string &log &optional;
|
||||
## Identifier associated with a container file from which this one was
|
||||
## extracted as part of the file analysis.
|
||||
parent_fuid: string &log &optional;
|
||||
} &redef;
|
||||
|
||||
## A table that can be used to disable file analysis completely for
|
||||
## any files transferred over given network protocol analyzers.
|
||||
const disable: table[AnalyzerTag] of bool = table() &redef;
|
||||
|
||||
## Event that can be handled to access the Info record as it is sent on
|
||||
## to the logging framework.
|
||||
global log_file_analysis: event(rec: Info);
|
||||
|
||||
## The salt concatenated to unique file handle strings generated by
|
||||
## :bro:see:`get_file_handle` before hashing them in to a file id
|
||||
## (the *id* field of :bro:see:`fa_file`).
|
||||
|
@ -146,7 +138,9 @@ export {
|
|||
## Returns: true if the analyzer will be added, or false if analysis
|
||||
## for the *id* isn't currently active or the *args*
|
||||
## were invalid for the analyzer type.
|
||||
global add_analyzer: function(f: fa_file, args: AnalyzerArgs): bool;
|
||||
global add_analyzer: function(f: fa_file,
|
||||
tag: Files::Analyzer,
|
||||
args: AnalyzerArgs &default=AnalyzerArgs()): bool;
|
||||
|
||||
## Removes an analyzer from the analysis of a given file.
|
||||
##
|
||||
|
@ -156,7 +150,7 @@ export {
|
|||
##
|
||||
## Returns: true if the analyzer will be removed, or false if analysis
|
||||
## for the *id* isn't currently active.
|
||||
global remove_analyzer: function(f: fa_file, args: AnalyzerArgs): bool;
|
||||
global remove_analyzer: function(f: fa_file, tag: Files::Analyzer, args: AnalyzerArgs): bool;
|
||||
|
||||
## Stops/ignores any further analysis of a given file.
|
||||
##
|
||||
|
@ -166,45 +160,75 @@ export {
|
|||
## rest of it's contents, or false if analysis for the *id*
|
||||
## isn't currently active.
|
||||
global stop: function(f: fa_file): bool;
|
||||
|
||||
## Register callbacks for protocols that work with the Files framework.
|
||||
## The callbacks must uniquely identify a file and each protocol can
|
||||
## only have a single callback registered for it.
|
||||
##
|
||||
## tag: Tag for the protocol analyzer having a callback being registered.
|
||||
##
|
||||
## callback: Function that can generate a file handle for the protocol analyzer
|
||||
## defined previously.
|
||||
##
|
||||
## Returns: true if the protocol being registered was not previously registered.
|
||||
global register_protocol: function(tag: AnalyzerTag, callback: function(c: connection, is_orig: bool): string): bool;
|
||||
|
||||
## Register a callback for file analyzers to use if they need to do some manipulation
|
||||
## when they are being added to a file before the core code takes over. This is
|
||||
## unlikely to be interesting for users and should only be called by file analyzer
|
||||
## authors but it *not required*.
|
||||
##
|
||||
## tag: Tag for the file analyzer.
|
||||
##
|
||||
## callback: Function to execute when the given file analyzer is being added.
|
||||
global register_analyzer_add_callback: function(tag: Files::Analyzer, callback: function(f: fa_file, args: AnalyzerArgs));
|
||||
|
||||
## Event that can be handled to access the Info record as it is sent on
|
||||
## to the logging framework.
|
||||
global log_files: event(rec: Info);
|
||||
}
|
||||
|
||||
redef record fa_file += {
|
||||
info: Info &optional;
|
||||
};
|
||||
|
||||
redef record AnalyzerArgs += {
|
||||
# This is used interally for the core file analyzer api.
|
||||
tag: Files::Analyzer &optional;
|
||||
};
|
||||
|
||||
# Store the callbacks for protocol analyzers that have files.
|
||||
global registered_protocols: table[AnalyzerTag] of function(c: connection, is_orig: bool): string = table()
|
||||
&default=function(c: connection, is_orig: bool): string { return cat(c$uid, is_orig); };
|
||||
|
||||
global analyzer_add_callbacks: table[Files::Analyzer] of function(f: fa_file, args: AnalyzerArgs) = table();
|
||||
|
||||
event bro_init() &priority=5
|
||||
{
|
||||
Log::create_stream(Files::LOG, [$columns=Info, $ev=log_files]);
|
||||
}
|
||||
|
||||
function set_info(f: fa_file)
|
||||
{
|
||||
if ( ! f?$info )
|
||||
{
|
||||
local tmp: Info = Info($ts=network_time());
|
||||
local tmp: Info = Info($ts=f$last_active,
|
||||
$fuid=f$id);
|
||||
f$info = tmp;
|
||||
}
|
||||
|
||||
f$info$ts = network_time();
|
||||
f$info$id = f$id;
|
||||
if ( f?$parent_id )
|
||||
f$info$parent_id = f$parent_id;
|
||||
f$info$parent_fuid = f$parent_id;
|
||||
if ( f?$source )
|
||||
f$info$source = f$source;
|
||||
if ( f?$is_orig )
|
||||
f$info$is_orig = f$is_orig;
|
||||
f$info$last_active = f$last_active;
|
||||
f$info$duration = f$last_active - f$info$ts;
|
||||
f$info$seen_bytes = f$seen_bytes;
|
||||
if ( f?$total_bytes )
|
||||
f$info$total_bytes = f$total_bytes;
|
||||
f$info$missing_bytes = f$missing_bytes;
|
||||
f$info$overflow_bytes = f$overflow_bytes;
|
||||
f$info$timeout_interval = f$timeout_interval;
|
||||
f$info$bof_buffer_size = f$bof_buffer_size;
|
||||
if ( f?$mime_type )
|
||||
f$info$mime_type = f$mime_type;
|
||||
if ( f?$conns )
|
||||
{
|
||||
for ( cid in f$conns )
|
||||
{
|
||||
add f$info$conn_uids[f$conns[cid]$uid];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function set_timeout_interval(f: fa_file, t: interval): bool
|
||||
|
@ -212,21 +236,31 @@ function set_timeout_interval(f: fa_file, t: interval): bool
|
|||
return __set_timeout_interval(f$id, t);
|
||||
}
|
||||
|
||||
function add_analyzer(f: fa_file, args: AnalyzerArgs): bool
|
||||
function add_analyzer(f: fa_file, tag: Analyzer, args: AnalyzerArgs): bool
|
||||
{
|
||||
if ( ! __add_analyzer(f$id, args) ) return F;
|
||||
# This is to construct the correct args for the core API.
|
||||
args$tag = tag;
|
||||
add f$info$analyzers[tag];
|
||||
|
||||
set_info(f);
|
||||
add f$info$analyzers[args$tag];
|
||||
|
||||
if ( args$tag == FileAnalysis::ANALYZER_EXTRACT )
|
||||
add f$info$extracted_files[args$extract_filename];
|
||||
if ( tag in analyzer_add_callbacks )
|
||||
analyzer_add_callbacks[tag](f, args);
|
||||
|
||||
if ( ! __add_analyzer(f$id, args) )
|
||||
{
|
||||
Reporter::warning(fmt("Analyzer %s not added successfully to file %s.", tag, f$id));
|
||||
return F;
|
||||
}
|
||||
return T;
|
||||
}
|
||||
|
||||
function remove_analyzer(f: fa_file, args: AnalyzerArgs): bool
|
||||
function register_analyzer_add_callback(tag: Files::Analyzer, callback: function(f: fa_file, args: AnalyzerArgs))
|
||||
{
|
||||
analyzer_add_callbacks[tag] = callback;
|
||||
}
|
||||
|
||||
function remove_analyzer(f: fa_file, tag: Files::Analyzer, args: AnalyzerArgs): bool
|
||||
{
|
||||
args$tag = tag;
|
||||
return __remove_analyzer(f$id, args);
|
||||
}
|
||||
|
||||
|
@ -235,25 +269,48 @@ function stop(f: fa_file): bool
|
|||
return __stop(f$id);
|
||||
}
|
||||
|
||||
event bro_init() &priority=5
|
||||
event file_new(f: fa_file) &priority=10
|
||||
{
|
||||
Log::create_stream(FileAnalysis::LOG,
|
||||
[$columns=Info, $ev=log_file_analysis]);
|
||||
set_info(f);
|
||||
}
|
||||
|
||||
event file_timeout(f: fa_file) &priority=5
|
||||
event file_over_new_connection(f: fa_file, c: connection) &priority=10
|
||||
{
|
||||
set_info(f);
|
||||
add f$info$conn_uids[c$uid];
|
||||
local cid = c$id;
|
||||
add f$info$tx_hosts[f$is_orig ? cid$orig_h : cid$resp_h];
|
||||
if( |Site::local_nets| > 0 )
|
||||
f$info$local_orig=Site::is_local_addr(f$is_orig ? cid$orig_h : cid$resp_h);
|
||||
|
||||
add f$info$rx_hosts[f$is_orig ? cid$resp_h : cid$orig_h];
|
||||
}
|
||||
|
||||
event file_timeout(f: fa_file) &priority=10
|
||||
{
|
||||
set_info(f);
|
||||
f$info$timedout = T;
|
||||
}
|
||||
|
||||
|
||||
event file_state_remove(f: fa_file) &priority=5
|
||||
event file_state_remove(f: fa_file) &priority=10
|
||||
{
|
||||
set_info(f);
|
||||
}
|
||||
|
||||
event file_state_remove(f: fa_file) &priority=-5
|
||||
event file_state_remove(f: fa_file) &priority=-10
|
||||
{
|
||||
Log::write(FileAnalysis::LOG, f$info);
|
||||
Log::write(Files::LOG, f$info);
|
||||
}
|
||||
|
||||
function register_protocol(tag: AnalyzerTag, callback: function(c: connection, is_orig: bool): string): bool
|
||||
{
|
||||
local result = (tag !in registered_protocols);
|
||||
registered_protocols[tag] = callback;
|
||||
return result;
|
||||
}
|
||||
|
||||
event get_file_handle(tag: AnalyzerTag, c: connection, is_orig: bool) &priority=5
|
||||
{
|
||||
local handler = registered_protocols[tag];
|
||||
set_file_handle(handler(c, is_orig));
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue