mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 14:48:21 +00:00
Merge remote-tracking branch 'origin/master' into topic/seth/files-tracking
Conflicts: scripts/base/frameworks/files/main.bro src/file_analysis/File.cc testing/btest/Baseline/scripts.base.frameworks.file-analysis.actions.data_event/out
This commit is contained in:
commit
42b2d56279
486 changed files with 106378 additions and 85985 deletions
|
@ -56,7 +56,7 @@ export {
|
|||
## local file path which was read, or some other input source.
|
||||
source: string &log &optional;
|
||||
|
||||
## A value to represent the depth of this file in relation
|
||||
## A value to represent the depth of this file in relation
|
||||
## to its source. In SMTP, it is the depth of the MIME
|
||||
## attachment on the message. In HTTP, it is the depth of the
|
||||
## request within the TCP connection.
|
||||
|
@ -73,7 +73,7 @@ export {
|
|||
mime_type: string &log &optional;
|
||||
|
||||
## A filename for the file if one is available from the source
|
||||
## for the file. These will frequently come from
|
||||
## for the file. These will frequently come from
|
||||
## "Content-Disposition" headers in network protocols.
|
||||
filename: string &log &optional;
|
||||
|
||||
|
@ -177,10 +177,19 @@ export {
|
|||
## Returns: true if the analyzer will be added, or false if analysis
|
||||
## for the file isn't currently active or the *args*
|
||||
## were invalid for the analyzer type.
|
||||
global add_analyzer: function(f: fa_file,
|
||||
tag: Files::Tag,
|
||||
global add_analyzer: function(f: fa_file,
|
||||
tag: Files::Tag,
|
||||
args: AnalyzerArgs &default=AnalyzerArgs()): bool;
|
||||
|
||||
## Adds all analyzers associated with a give MIME type to the analysis of
|
||||
## a file. Note that analyzers added via MIME types cannot take further
|
||||
## arguments.
|
||||
##
|
||||
## f: the file.
|
||||
##
|
||||
## mtype: the MIME type; it will be compared case-insensitive.
|
||||
global add_analyzers_for_mime_type: function(f: fa_file, mtype: string);
|
||||
|
||||
## Removes an analyzer from the analysis of a given file.
|
||||
##
|
||||
## f: the file.
|
||||
|
@ -224,7 +233,7 @@ export {
|
|||
## A callback to generate a file handle on demand when
|
||||
## one is needed by the core.
|
||||
get_file_handle: function(c: connection, is_orig: bool): string;
|
||||
|
||||
|
||||
## A callback to "describe" a file. In the case of an HTTP
|
||||
## transfer the most obvious description would be the URL.
|
||||
## It's like an extremely compressed version of the normal log.
|
||||
|
@ -235,7 +244,7 @@ export {
|
|||
## Register callbacks for protocols that work with the Files framework.
|
||||
## The callbacks must uniquely identify a file and each protocol can
|
||||
## only have a single callback registered for it.
|
||||
##
|
||||
##
|
||||
## tag: Tag for the protocol analyzer having a callback being registered.
|
||||
##
|
||||
## reg: A :bro:see:`Files::ProtoRegistration` record.
|
||||
|
@ -253,6 +262,42 @@ export {
|
|||
## callback: Function to execute when the given file analyzer is being added.
|
||||
global register_analyzer_add_callback: function(tag: Files::Tag, callback: function(f: fa_file, args: AnalyzerArgs));
|
||||
|
||||
## Registers a set of MIME types for an analyzer. If a future connection on one of
|
||||
## these types is seen, the analyzer will be automatically assigned to parsing it.
|
||||
## The function *adds* to all MIME types already registered, it doesn't replace
|
||||
## them.
|
||||
##
|
||||
## tag: The tag of the analyzer.
|
||||
##
|
||||
## mts: The set of MIME types, each in the form "foo/bar" (case-insensitive).
|
||||
##
|
||||
## Returns: True if the MIME types were successfully registered.
|
||||
global register_for_mime_types: function(tag: Analyzer::Tag, mts: set[string]) : bool;
|
||||
|
||||
## Registers a MIME type for an analyzer. If a future file with this type is seen,
|
||||
## the analyzer will be automatically assigned to parsing it. The function *adds*
|
||||
## to all MIME types already registered, it doesn't replace them.
|
||||
##
|
||||
## tag: The tag of the analyzer.
|
||||
##
|
||||
## mt: The MIME type in the form "foo/bar" (case-insensitive).
|
||||
##
|
||||
## Returns: True if the MIME type was successfully registered.
|
||||
global register_for_mime_type: function(tag: Analyzer::Tag, mt: string) : bool;
|
||||
|
||||
## Returns a set of all MIME types currently registered for a specific analyzer.
|
||||
##
|
||||
## tag: The tag of the analyzer.
|
||||
##
|
||||
## Returns: The set of MIME types.
|
||||
global registered_mime_types: function(tag: Analyzer::Tag) : set[string];
|
||||
|
||||
## Returns a table of all MIME-type-to-analyzer mappings currently registered.
|
||||
##
|
||||
## Returns: A table mapping each analyzer to the set of MIME types
|
||||
## registered for it.
|
||||
global all_registered_mime_types: function() : table[Analyzer::Tag] of set[string];
|
||||
|
||||
## Event that can be handled to access the Info record as it is sent on
|
||||
## to the logging framework.
|
||||
global log_files: event(rec: Info);
|
||||
|
@ -265,6 +310,9 @@ redef record fa_file += {
|
|||
# Store the callbacks for protocol analyzers that have files.
|
||||
global registered_protocols: table[Analyzer::Tag] of ProtoRegistration = table();
|
||||
|
||||
# Store the MIME type to analyzer mappings.
|
||||
global mime_types: table[Analyzer::Tag] of set[string];
|
||||
|
||||
global analyzer_add_callbacks: table[Files::Tag] of function(f: fa_file, args: AnalyzerArgs) = table();
|
||||
|
||||
event bro_init() &priority=5
|
||||
|
@ -287,13 +335,13 @@ function set_info(f: fa_file)
|
|||
f$info$source = f$source;
|
||||
f$info$duration = f$last_active - f$info$ts;
|
||||
f$info$seen_bytes = f$seen_bytes;
|
||||
if ( f?$total_bytes )
|
||||
if ( f?$total_bytes )
|
||||
f$info$total_bytes = f$total_bytes;
|
||||
f$info$missing_bytes = f$missing_bytes;
|
||||
f$info$overflow_bytes = f$overflow_bytes;
|
||||
if ( f?$is_orig )
|
||||
f$info$is_orig = f$is_orig;
|
||||
if ( f?$mime_type )
|
||||
if ( f?$mime_type )
|
||||
f$info$mime_type = f$mime_type;
|
||||
}
|
||||
|
||||
|
@ -332,6 +380,15 @@ function add_analyzer(f: fa_file, tag: Files::Tag, args: AnalyzerArgs): bool
|
|||
return T;
|
||||
}
|
||||
|
||||
function add_analyzers_for_mime_type(f: fa_file, mtype: string)
|
||||
{
|
||||
local dummy_args: AnalyzerArgs;
|
||||
local analyzers = __add_analyzers_for_mime_type(f$id, mtype, dummy_args);
|
||||
|
||||
for ( tag in analyzers )
|
||||
add f$info$analyzers[Files::analyzer_name(tag)];
|
||||
}
|
||||
|
||||
function register_analyzer_add_callback(tag: Files::Tag, callback: function(f: fa_file, args: AnalyzerArgs))
|
||||
{
|
||||
analyzer_add_callbacks[tag] = callback;
|
||||
|
@ -356,6 +413,9 @@ event file_new(f: fa_file) &priority=10
|
|||
{
|
||||
set_info(f);
|
||||
|
||||
if ( f?$mime_type )
|
||||
add_analyzers_for_mime_type(f, f$mime_type);
|
||||
|
||||
if ( enable_reassembler )
|
||||
{
|
||||
Files::enable_reassembly(f);
|
||||
|
@ -405,6 +465,41 @@ function register_protocol(tag: Analyzer::Tag, reg: ProtoRegistration): bool
|
|||
return result;
|
||||
}
|
||||
|
||||
function register_for_mime_types(tag: Analyzer::Tag, mime_types: set[string]) : bool
|
||||
{
|
||||
local rc = T;
|
||||
|
||||
for ( mt in mime_types )
|
||||
{
|
||||
if ( ! register_for_mime_type(tag, mt) )
|
||||
rc = F;
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
function register_for_mime_type(tag: Analyzer::Tag, mt: string) : bool
|
||||
{
|
||||
if ( ! __register_for_mime_type(tag, mt) )
|
||||
return F;
|
||||
|
||||
if ( tag !in mime_types )
|
||||
mime_types[tag] = set();
|
||||
|
||||
add mime_types[tag][mt];
|
||||
return T;
|
||||
}
|
||||
|
||||
function registered_mime_types(tag: Analyzer::Tag) : set[string]
|
||||
{
|
||||
return tag in mime_types ? mime_types[tag] : set();
|
||||
}
|
||||
|
||||
function all_registered_mime_types(): table[Analyzer::Tag] of set[string]
|
||||
{
|
||||
return mime_types;
|
||||
}
|
||||
|
||||
function describe(f: fa_file): string
|
||||
{
|
||||
local tag = Analyzer::get_tag(f$source);
|
||||
|
|
|
@ -4,6 +4,17 @@
|
|||
module Input;
|
||||
|
||||
export {
|
||||
type Event: enum {
|
||||
EVENT_NEW = 0,
|
||||
EVENT_CHANGED = 1,
|
||||
EVENT_REMOVED = 2,
|
||||
};
|
||||
|
||||
type Mode: enum {
|
||||
MANUAL = 0,
|
||||
REREAD = 1,
|
||||
STREAM = 2
|
||||
};
|
||||
|
||||
## The default input reader used. Defaults to `READER_ASCII`.
|
||||
const default_reader = READER_ASCII &redef;
|
||||
|
|
|
@ -1,7 +1,5 @@
|
|||
@load ./main
|
||||
@load ./postprocessors
|
||||
@load ./writers/ascii
|
||||
@load ./writers/dataseries
|
||||
@load ./writers/sqlite
|
||||
@load ./writers/elasticsearch
|
||||
@load ./writers/none
|
||||
|
|
|
@ -5,9 +5,15 @@
|
|||
|
||||
module Log;
|
||||
|
||||
# Log::ID and Log::Writer are defined in types.bif due to circular dependencies.
|
||||
|
||||
export {
|
||||
## Type that defines an ID unique to each log stream. Scripts creating new log
|
||||
## streams need to redef this enum to add their own specific log ID. The log ID
|
||||
## implicitly determines the default name of the generated log file.
|
||||
type Log::ID: enum {
|
||||
## Dummy place-holder.
|
||||
UNKNOWN
|
||||
};
|
||||
|
||||
## If true, local logging is by default enabled for all filters.
|
||||
const enable_local_logging = T &redef;
|
||||
|
||||
|
@ -27,13 +33,13 @@ export {
|
|||
const set_separator = "," &redef;
|
||||
|
||||
## String to use for empty fields. This should be different from
|
||||
## *unset_field* to make the output unambiguous.
|
||||
## *unset_field* to make the output unambiguous.
|
||||
## Can be overwritten by individual writers.
|
||||
const empty_field = "(empty)" &redef;
|
||||
|
||||
## String to use for an unset &optional field.
|
||||
## Can be overwritten by individual writers.
|
||||
const unset_field = "-" &redef;
|
||||
const unset_field = "-" &redef;
|
||||
|
||||
## Type defining the content of a logging stream.
|
||||
type Stream: record {
|
||||
|
|
|
@ -26,20 +26,20 @@ export {
|
|||
## This option is also available as a per-filter ``$config`` option.
|
||||
const use_json = F &redef;
|
||||
|
||||
## Format of timestamps when writing out JSON. By default, the JSON formatter will
|
||||
## use double values for timestamps which represent the number of seconds from the
|
||||
## UNIX epoch.
|
||||
## Format of timestamps when writing out JSON. By default, the JSON
|
||||
## formatter will use double values for timestamps which represent the
|
||||
## number of seconds from the UNIX epoch.
|
||||
const json_timestamps: JSON::TimestampFormat = JSON::TS_EPOCH &redef;
|
||||
|
||||
## If true, include lines with log meta information such as column names
|
||||
## with types, the values of ASCII logging options that are in use, and
|
||||
## the time when the file was opened and closed (the latter at the end).
|
||||
##
|
||||
##
|
||||
## If writing in JSON format, this is implicitly disabled.
|
||||
const include_meta = T &redef;
|
||||
|
||||
## Prefix for lines with meta information.
|
||||
##
|
||||
##
|
||||
## This option is also available as a per-filter ``$config`` option.
|
||||
const meta_prefix = "#" &redef;
|
||||
|
||||
|
|
|
@ -1,60 +0,0 @@
|
|||
##! Interface for the DataSeries log writer.
|
||||
|
||||
module LogDataSeries;
|
||||
|
||||
export {
|
||||
## Compression to use with the DS output file. Options are:
|
||||
##
|
||||
## 'none' -- No compression.
|
||||
## 'lzf' -- LZF compression (very quick, but leads to larger output files).
|
||||
## 'lzo' -- LZO compression (very fast decompression times).
|
||||
## 'gz' -- GZIP compression (slower than LZF, but also produces smaller output).
|
||||
## 'bz2' -- BZIP2 compression (slower than GZIP, but also produces smaller output).
|
||||
const compression = "gz" &redef;
|
||||
|
||||
## The extent buffer size.
|
||||
## Larger values here lead to better compression and more efficient writes,
|
||||
## but also increase the lag between the time events are received and
|
||||
## the time they are actually written to disk.
|
||||
const extent_size = 65536 &redef;
|
||||
|
||||
## Should we dump the XML schema we use for this DS file to disk?
|
||||
## If yes, the XML schema shares the name of the logfile, but has
|
||||
## an XML ending.
|
||||
const dump_schema = F &redef;
|
||||
|
||||
## How many threads should DataSeries spawn to perform compression?
|
||||
## Note that this dictates the number of threads per log stream. If
|
||||
## you're using a lot of streams, you may want to keep this number
|
||||
## relatively small.
|
||||
##
|
||||
## Default value is 1, which will spawn one thread / stream.
|
||||
##
|
||||
## Maximum is 128, minimum is 1.
|
||||
const num_threads = 1 &redef;
|
||||
|
||||
## Should time be stored as an integer or a double?
|
||||
## Storing time as a double leads to possible precision issues and
|
||||
## can (significantly) increase the size of the resulting DS log.
|
||||
## That said, timestamps stored in double form are consistent
|
||||
## with the rest of Bro, including the standard ASCII log. Hence, we
|
||||
## use them by default.
|
||||
const use_integer_for_time = F &redef;
|
||||
}
|
||||
|
||||
# Default function to postprocess a rotated DataSeries log file. It moves the
|
||||
# rotated file to a new name that includes a timestamp with the opening time,
|
||||
# and then runs the writer's default postprocessor command on it.
|
||||
function default_rotation_postprocessor_func(info: Log::RotationInfo) : bool
|
||||
{
|
||||
# Move file to name including both opening and closing time.
|
||||
local dst = fmt("%s.%s.ds", info$path,
|
||||
strftime(Log::default_rotation_date_format, info$open));
|
||||
|
||||
system(fmt("/bin/mv %s %s", info$fname, dst));
|
||||
|
||||
# Run default postprocessor.
|
||||
return Log::run_rotation_postprocessor_cmd(info, dst);
|
||||
}
|
||||
|
||||
redef Log::default_rotation_postprocessors += { [Log::WRITER_DATASERIES] = default_rotation_postprocessor_func };
|
|
@ -1,48 +0,0 @@
|
|||
##! Log writer for sending logs to an ElasticSearch server.
|
||||
##!
|
||||
##! Note: This module is in testing and is not yet considered stable!
|
||||
##!
|
||||
##! There is one known memory issue. If your elasticsearch server is
|
||||
##! running slowly and taking too long to return from bulk insert
|
||||
##! requests, the message queue to the writer thread will continue
|
||||
##! growing larger and larger giving the appearance of a memory leak.
|
||||
|
||||
module LogElasticSearch;
|
||||
|
||||
export {
|
||||
## Name of the ES cluster.
|
||||
const cluster_name = "elasticsearch" &redef;
|
||||
|
||||
## ES server.
|
||||
const server_host = "127.0.0.1" &redef;
|
||||
|
||||
## ES port.
|
||||
const server_port = 9200 &redef;
|
||||
|
||||
## Name of the ES index.
|
||||
const index_prefix = "bro" &redef;
|
||||
|
||||
## The ES type prefix comes before the name of the related log.
|
||||
## e.g. prefix = "bro\_" would create types of bro_dns, bro_software, etc.
|
||||
const type_prefix = "" &redef;
|
||||
|
||||
## The time before an ElasticSearch transfer will timeout. Note that
|
||||
## the fractional part of the timeout will be ignored. In particular,
|
||||
## time specifications less than a second result in a timeout value of
|
||||
## 0, which means "no timeout."
|
||||
const transfer_timeout = 2secs;
|
||||
|
||||
## The batch size is the number of messages that will be queued up before
|
||||
## they are sent to be bulk indexed.
|
||||
const max_batch_size = 1000 &redef;
|
||||
|
||||
## The maximum amount of wall-clock time that is allowed to pass without
|
||||
## finishing a bulk log send. This represents the maximum delay you
|
||||
## would like to have with your logs before they are sent to ElasticSearch.
|
||||
const max_batch_interval = 1min &redef;
|
||||
|
||||
## The maximum byte size for a buffered JSON string to send to the bulk
|
||||
## insert API.
|
||||
const max_byte_size = 1024 * 1024 &redef;
|
||||
}
|
||||
|
|
@ -20,7 +20,8 @@ export {
|
|||
## category along with the specific notice separating words with
|
||||
## underscores and using leading capitals on each word except for
|
||||
## abbreviations which are kept in all capitals. For example,
|
||||
## SSH::Login is for heuristically guessed successful SSH logins.
|
||||
## SSH::Password_Guessing is for hosts that have crossed a threshold of
|
||||
## heuristically determined failed SSH logins.
|
||||
type Type: enum {
|
||||
## Notice reporting a count of how often a notice occurred.
|
||||
Tally,
|
||||
|
|
|
@ -71,7 +71,7 @@ export {
|
|||
## to be logged has occurred.
|
||||
ts: time &log;
|
||||
## A unique identifier of the connection which triggered the
|
||||
## signature match event
|
||||
## signature match event.
|
||||
uid: string &log &optional;
|
||||
## The host which triggered the signature match event.
|
||||
src_addr: addr &log &optional;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue