mirror of
https://github.com/zeek/zeek.git
synced 2025-10-16 13:38:19 +00:00
Merge branch 'topic/robin/file-analysis-merge'
Closes #982. * topic/robin/file-analysis-merge: (64 commits) A few more small tweaks. Various smalle tweaks in preparation for merging. FileAnalysis: load custom mime magic database just once. Improve a libmagic-related error message. FileAnalysis: add is_orig field to fa_file & Info. FileAnalysis: inlined doc fixes. FileAnalysis: optimizate connection set updating. FileAnalysis: optimize file handle construction. FileAnalysis: workarounds for older libmagics. FileAnalysis: add custom libmagic database. FileAnalysis: change terminology s/action/analyzer FileAnalysis: libmagic tweaks. FileAnalysis: add bif for setting timeout interval FileAnalysis: add more params to some events. FileAnalysis: insert explicit event queue flush points. FileAnalysis: remove some file events. FileAnalysis: finish switching hooks to events. FileAnalysis: checkpoint in middle of big reorganization. FileAnalysis: fix file type canonification for file_analysis.log Revert "FileAnalysis: optimize get_file_handle event queueing." ... Conflicts: NEWS
This commit is contained in:
commit
e050648621
262 changed files with 88998 additions and 160161 deletions
1
scripts/base/frameworks/file-analysis/__load__.bro
Normal file
1
scripts/base/frameworks/file-analysis/__load__.bro
Normal file
|
@ -0,0 +1 @@
|
|||
@load ./main.bro
|
351
scripts/base/frameworks/file-analysis/main.bro
Normal file
351
scripts/base/frameworks/file-analysis/main.bro
Normal file
|
@ -0,0 +1,351 @@
|
|||
##! An interface for driving the analysis of files, possibly independent of
|
||||
##! any network protocol over which they're transported.
|
||||
|
||||
@load base/file_analysis.bif
|
||||
@load base/frameworks/logging
|
||||
|
||||
module FileAnalysis;
|
||||
|
||||
export {
|
||||
redef enum Log::ID += {
|
||||
## Logging stream for file analysis.
|
||||
LOG
|
||||
};
|
||||
|
||||
## A structure which represents a desired type of file analysis.
|
||||
type AnalyzerArgs: record {
|
||||
## The type of analysis.
|
||||
tag: Analyzer;
|
||||
|
||||
## The local filename to which to write an extracted file. Must be
|
||||
## set when *tag* is :bro:see:`FileAnalysis::ANALYZER_EXTRACT`.
|
||||
extract_filename: string &optional;
|
||||
|
||||
## An event which will be generated for all new file contents,
|
||||
## chunk-wise.
|
||||
chunk_event: event(f: fa_file, data: string, off: count) &optional;
|
||||
|
||||
## An event which will be generated for all new file contents,
|
||||
## stream-wise.
|
||||
stream_event: event(f: fa_file, data: string) &optional;
|
||||
} &redef;
|
||||
|
||||
## Contains all metadata related to the analysis of a given file.
|
||||
## For the most part, fields here are derived from ones of the same name
|
||||
## in :bro:see:`fa_file`.
|
||||
type Info: record {
|
||||
## An identifier associated with a single file.
|
||||
id: string &log;
|
||||
|
||||
## Identifier associated with a container file from which this one was
|
||||
## extracted as part of the file analysis.
|
||||
parent_id: string &log &optional;
|
||||
|
||||
## An identification of the source of the file data. E.g. it may be
|
||||
## a network protocol over which it was transferred, or a local file
|
||||
## path which was read, or some other input source.
|
||||
source: string &log &optional;
|
||||
|
||||
## If the source of this file is is a network connection, this field
|
||||
## may be set to indicate the directionality.
|
||||
is_orig: bool &log &optional;
|
||||
|
||||
## The time at which the last activity for the file was seen.
|
||||
last_active: time &log;
|
||||
|
||||
## Number of bytes provided to the file analysis engine for the file.
|
||||
seen_bytes: count &log &default=0;
|
||||
|
||||
## Total number of bytes that are supposed to comprise the full file.
|
||||
total_bytes: count &log &optional;
|
||||
|
||||
## The number of bytes in the file stream that were completely missed
|
||||
## during the process of analysis e.g. due to dropped packets.
|
||||
missing_bytes: count &log &default=0;
|
||||
|
||||
## The number of not all-in-sequence bytes in the file stream that
|
||||
## were delivered to file analyzers due to reassembly buffer overflow.
|
||||
overflow_bytes: count &log &default=0;
|
||||
|
||||
## The amount of time between receiving new data for this file that
|
||||
## the analysis engine will wait before giving up on it.
|
||||
timeout_interval: interval &log &optional;
|
||||
|
||||
## The number of bytes at the beginning of a file to save for later
|
||||
## inspection in *bof_buffer* field.
|
||||
bof_buffer_size: count &log &optional;
|
||||
|
||||
## A mime type provided by libmagic against the *bof_buffer*, or
|
||||
## in the cases where no buffering of the beginning of file occurs,
|
||||
## an initial guess of the mime type based on the first data seen.
|
||||
mime_type: string &log &optional;
|
||||
|
||||
## Whether the file analysis timed out at least once for the file.
|
||||
timedout: bool &log &default=F;
|
||||
|
||||
## Connection UIDS over which the file was transferred.
|
||||
conn_uids: set[string] &log;
|
||||
|
||||
## A set of analysis types done during the file analysis.
|
||||
analyzers: set[Analyzer] &log;
|
||||
|
||||
## Local filenames of extracted files.
|
||||
extracted_files: set[string] &log;
|
||||
|
||||
## An MD5 digest of the file contents.
|
||||
md5: string &log &optional;
|
||||
|
||||
## A SHA1 digest of the file contents.
|
||||
sha1: string &log &optional;
|
||||
|
||||
## A SHA256 digest of the file contents.
|
||||
sha256: string &log &optional;
|
||||
} &redef;
|
||||
|
||||
## A table that can be used to disable file analysis completely for
|
||||
## any files transferred over given network protocol analyzers.
|
||||
const disable: table[AnalyzerTag] of bool = table() &redef;
|
||||
|
||||
## Event that can be handled to access the Info record as it is sent on
|
||||
## to the logging framework.
|
||||
global log_file_analysis: event(rec: Info);
|
||||
|
||||
## The salt concatenated to unique file handle strings generated by
|
||||
## :bro:see:`get_file_handle` before hashing them in to a file id
|
||||
## (the *id* field of :bro:see:`fa_file`).
|
||||
## Provided to help mitigate the possiblility of manipulating parts of
|
||||
## network connections that factor in to the file handle in order to
|
||||
## generate two handles that would hash to the same file id.
|
||||
const salt = "I recommend changing this." &redef;
|
||||
|
||||
## Sets the *timeout_interval* field of :bro:see:`fa_file`, which is
|
||||
## used to determine the length of inactivity that is allowed for a file
|
||||
## before internal state related to it is cleaned up.
|
||||
##
|
||||
## f: the file.
|
||||
##
|
||||
## t: the amount of time the file can remain inactive before discarding.
|
||||
##
|
||||
## Returns: true if the timeout interval was set, or false if analysis
|
||||
## for the *id* isn't currently active.
|
||||
global set_timeout_interval: function(f: fa_file, t: interval): bool;
|
||||
|
||||
## Postpones the timeout of file analysis for a given file.
|
||||
## When used within a :bro:see:`file_timeout` handler for, the analysis
|
||||
## the analysis will delay timing out for the period of time indicated by
|
||||
## the *timeout_interval* field of :bro:see:`fa_file`, which can be set
|
||||
## with :bro:see:`FileAnalysis::set_timeout_interval`.
|
||||
##
|
||||
## f: the file.
|
||||
##
|
||||
## Returns: true if the timeout will be postponed, or false if analysis
|
||||
## for the *id* isn't currently active.
|
||||
global postpone_timeout: function(f: fa_file): bool;
|
||||
|
||||
## Adds an analyzer to the analysis of a given file.
|
||||
##
|
||||
## f: the file.
|
||||
##
|
||||
## args: the analyzer type to add along with any arguments it takes.
|
||||
##
|
||||
## Returns: true if the analyzer will be added, or false if analysis
|
||||
## for the *id* isn't currently active or the *args*
|
||||
## were invalid for the analyzer type.
|
||||
global add_analyzer: function(f: fa_file, args: AnalyzerArgs): bool;
|
||||
|
||||
## Removes an analyzer from the analysis of a given file.
|
||||
##
|
||||
## f: the file.
|
||||
##
|
||||
## args: the analyzer (type and args) to remove.
|
||||
##
|
||||
## Returns: true if the analyzer will be removed, or false if analysis
|
||||
## for the *id* isn't currently active.
|
||||
global remove_analyzer: function(f: fa_file, args: AnalyzerArgs): bool;
|
||||
|
||||
## Stops/ignores any further analysis of a given file.
|
||||
##
|
||||
## f: the file.
|
||||
##
|
||||
## Returns: true if analysis for the given file will be ignored for the
|
||||
## rest of it's contents, or false if analysis for the *id*
|
||||
## isn't currently active.
|
||||
global stop: function(f: fa_file): bool;
|
||||
|
||||
## Sends a sequential stream of data in for file analysis.
|
||||
## Meant for use when providing external file analysis input (e.g.
|
||||
## from the input framework).
|
||||
##
|
||||
## source: a string that uniquely identifies the logical file that the
|
||||
## data is a part of and describes its source.
|
||||
##
|
||||
## data: bytestring contents of the file to analyze.
|
||||
global data_stream: function(source: string, data: string);
|
||||
|
||||
## Sends a non-sequential chunk of data in for file analysis.
|
||||
## Meant for use when providing external file analysis input (e.g.
|
||||
## from the input framework).
|
||||
##
|
||||
## source: a string that uniquely identifies the logical file that the
|
||||
## data is a part of and describes its source.
|
||||
##
|
||||
## data: bytestring contents of the file to analyze.
|
||||
##
|
||||
## offset: the offset within the file that this chunk starts.
|
||||
global data_chunk: function(source: string, data: string, offset: count);
|
||||
|
||||
## Signals a content gap in the file bytestream.
|
||||
## Meant for use when providing external file analysis input (e.g.
|
||||
## from the input framework).
|
||||
##
|
||||
## source: a string that uniquely identifies the logical file that the
|
||||
## data is a part of and describes its source.
|
||||
##
|
||||
## offset: the offset within the file that this gap starts.
|
||||
##
|
||||
## len: the number of bytes that are missing.
|
||||
global gap: function(source: string, offset: count, len: count);
|
||||
|
||||
## Signals the total size of a file.
|
||||
## Meant for use when providing external file analysis input (e.g.
|
||||
## from the input framework).
|
||||
##
|
||||
## source: a string that uniquely identifies the logical file that the
|
||||
## data is a part of and describes its source.
|
||||
##
|
||||
## size: the number of bytes that comprise the full file.
|
||||
global set_size: function(source: string, size: count);
|
||||
|
||||
## Signals the end of a file.
|
||||
## Meant for use when providing external file analysis input (e.g.
|
||||
## from the input framework).
|
||||
##
|
||||
## source: a string that uniquely identifies the logical file that the
|
||||
## data is a part of and describes its source.
|
||||
global eof: function(source: string);
|
||||
}
|
||||
|
||||
redef record fa_file += {
|
||||
info: Info &optional;
|
||||
};
|
||||
|
||||
function set_info(f: fa_file)
|
||||
{
|
||||
if ( ! f?$info )
|
||||
{
|
||||
local tmp: Info;
|
||||
f$info = tmp;
|
||||
}
|
||||
|
||||
f$info$id = f$id;
|
||||
if ( f?$parent_id ) f$info$parent_id = f$parent_id;
|
||||
if ( f?$source ) f$info$source = f$source;
|
||||
if ( f?$is_orig ) f$info$is_orig = f$is_orig;
|
||||
f$info$last_active = f$last_active;
|
||||
f$info$seen_bytes = f$seen_bytes;
|
||||
if ( f?$total_bytes ) f$info$total_bytes = f$total_bytes;
|
||||
f$info$missing_bytes = f$missing_bytes;
|
||||
f$info$overflow_bytes = f$overflow_bytes;
|
||||
f$info$timeout_interval = f$timeout_interval;
|
||||
f$info$bof_buffer_size = f$bof_buffer_size;
|
||||
if ( f?$mime_type ) f$info$mime_type = f$mime_type;
|
||||
if ( f?$conns )
|
||||
for ( cid in f$conns )
|
||||
add f$info$conn_uids[f$conns[cid]$uid];
|
||||
}
|
||||
|
||||
function set_timeout_interval(f: fa_file, t: interval): bool
|
||||
{
|
||||
return __set_timeout_interval(f$id, t);
|
||||
}
|
||||
|
||||
function postpone_timeout(f: fa_file): bool
|
||||
{
|
||||
return __postpone_timeout(f$id);
|
||||
}
|
||||
|
||||
function add_analyzer(f: fa_file, args: AnalyzerArgs): bool
|
||||
{
|
||||
if ( ! __add_analyzer(f$id, args) ) return F;
|
||||
|
||||
set_info(f);
|
||||
add f$info$analyzers[args$tag];
|
||||
|
||||
if ( args$tag == FileAnalysis::ANALYZER_EXTRACT )
|
||||
add f$info$extracted_files[args$extract_filename];
|
||||
|
||||
return T;
|
||||
}
|
||||
|
||||
function remove_analyzer(f: fa_file, args: AnalyzerArgs): bool
|
||||
{
|
||||
return __remove_analyzer(f$id, args);
|
||||
}
|
||||
|
||||
function stop(f: fa_file): bool
|
||||
{
|
||||
return __stop(f$id);
|
||||
}
|
||||
|
||||
function data_stream(source: string, data: string)
|
||||
{
|
||||
__data_stream(source, data);
|
||||
}
|
||||
|
||||
function data_chunk(source: string, data: string, offset: count)
|
||||
{
|
||||
__data_chunk(source, data, offset);
|
||||
}
|
||||
|
||||
function gap(source: string, offset: count, len: count)
|
||||
{
|
||||
__gap(source, offset, len);
|
||||
}
|
||||
|
||||
function set_size(source: string, size: count)
|
||||
{
|
||||
__set_size(source, size);
|
||||
}
|
||||
|
||||
function eof(source: string)
|
||||
{
|
||||
__eof(source);
|
||||
}
|
||||
|
||||
event bro_init() &priority=5
|
||||
{
|
||||
Log::create_stream(FileAnalysis::LOG,
|
||||
[$columns=Info, $ev=log_file_analysis]);
|
||||
}
|
||||
|
||||
event file_timeout(f: fa_file) &priority=5
|
||||
{
|
||||
set_info(f);
|
||||
f$info$timedout = T;
|
||||
}
|
||||
|
||||
event file_hash(f: fa_file, kind: string, hash: string) &priority=5
|
||||
{
|
||||
set_info(f);
|
||||
switch ( kind ) {
|
||||
case "md5":
|
||||
f$info$md5 = hash;
|
||||
break;
|
||||
case "sha1":
|
||||
f$info$sha1 = hash;
|
||||
break;
|
||||
case "sha256":
|
||||
f$info$sha256 = hash;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
event file_state_remove(f: fa_file) &priority=5
|
||||
{
|
||||
set_info(f);
|
||||
}
|
||||
|
||||
event file_state_remove(f: fa_file) &priority=-5
|
||||
{
|
||||
Log::write(FileAnalysis::LOG, f$info);
|
||||
}
|
|
@ -2,4 +2,5 @@
|
|||
@load ./readers/ascii
|
||||
@load ./readers/raw
|
||||
@load ./readers/benchmark
|
||||
@load ./readers/binary
|
||||
|
||||
|
|
8
scripts/base/frameworks/input/readers/binary.bro
Normal file
8
scripts/base/frameworks/input/readers/binary.bro
Normal file
|
@ -0,0 +1,8 @@
|
|||
##! Interface for the binary input reader.
|
||||
|
||||
module InputBinary;
|
||||
|
||||
export {
|
||||
## Size of data chunks to read from the input file at a time.
|
||||
const chunk_size = 1024 &redef;
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue