Merge remote-tracking branch 'origin/master' into topic/vladg/file-analysis-exe-analyzer

Conflicts:
	scripts/base/init-default.bro
	src/file_analysis/analyzer/CMakeLists.txt
This commit is contained in:
Vlad Grigorescu 2014-06-21 13:15:14 -04:00
commit b91b0646b8
1719 changed files with 78600 additions and 198617 deletions

View file

@ -0,0 +1 @@
Support for extracing files with the file analysis framework.

View file

@ -0,0 +1 @@
@load ./main

View file

@ -0,0 +1,66 @@
@load base/frameworks/files
@load base/utils/paths
module FileExtract;
export {
## The prefix where files are extracted to.
const prefix = "./extract_files/" &redef;
## The default max size for extracted files (they won't exceed this
## number of bytes). A value of zero means unlimited.
const default_limit = 0 &redef;
redef record Files::Info += {
## Local filename of extracted file.
extracted: string &optional &log;
};
redef record Files::AnalyzerArgs += {
## The local filename to which to write an extracted file.
## This field is used in the core by the extraction plugin
## to know where to write the file to. If not specified, then
## a filename in the format "extract-<source>-<id>" is
## automatically assigned (using the *source* and *id*
## fields of :bro:see:`fa_file`).
extract_filename: string &optional;
## The maximum allowed file size in bytes of *extract_filename*.
## Once reached, a :bro:see:`file_extraction_limit` event is
## raised and the analyzer will be removed unless
## :bro:see:`FileExtract::set_limit` is called to increase the
## limit. A value of zero means "no limit".
extract_limit: count &default=default_limit;
};
## Sets the maximum allowed extracted file size.
##
## f: A file that's being extracted.
##
## args: Arguments that identify a file extraction analyzer.
##
## n: Allowed number of bytes to be extracted.
##
## Returns: false if a file extraction analyzer wasn't active for
## the file, else true.
global set_limit: function(f: fa_file, args: Files::AnalyzerArgs, n: count): bool;
}
function set_limit(f: fa_file, args: Files::AnalyzerArgs, n: count): bool
{
return __set_limit(f$id, args, n);
}
function on_add(f: fa_file, args: Files::AnalyzerArgs)
{
if ( ! args?$extract_filename )
args$extract_filename = cat("extract-", f$source, "-", f$id);
f$info$extracted = args$extract_filename;
args$extract_filename = build_path_compressed(prefix, args$extract_filename);
mkdir(prefix);
}
event bro_init() &priority=10
{
Files::register_analyzer_add_callback(Files::ANALYZER_EXTRACT, on_add);
}

View file

@ -0,0 +1 @@
Support for file hashes with the file analysis framework.

View file

@ -0,0 +1 @@
@load ./main

View file

@ -0,0 +1,32 @@
@load base/frameworks/files
module FileHash;
export {
redef record Files::Info += {
## An MD5 digest of the file contents.
md5: string &log &optional;
## A SHA1 digest of the file contents.
sha1: string &log &optional;
## A SHA256 digest of the file contents.
sha256: string &log &optional;
};
}
event file_hash(f: fa_file, kind: string, hash: string) &priority=5
{
switch ( kind ) {
case "md5":
f$info$md5 = hash;
break;
case "sha1":
f$info$sha1 = hash;
break;
case "sha256":
f$info$sha256 = hash;
break;
}
}

View file

@ -0,0 +1 @@
Support for Unified2 files in the file analysis framework.

View file

@ -0,0 +1 @@
@load ./main

View file

@ -0,0 +1,244 @@
@load base/utils/dir
@load base/utils/paths
module Unified2;
export {
redef enum Log::ID += { LOG };
## File to watch for Unified2 files.
const watch_file = "" &redef;
## Directory to watch for Unified2 records.
const watch_dir = "" &redef;
## The sid-msg.map file you would like to use for your alerts.
const sid_msg = "" &redef;
## The gen-msg.map file you would like to use for your alerts.
const gen_msg = "" &redef;
## The classification.config file you would like to use for your alerts.
const classification_config = "" &redef;
## Reconstructed "alert" which combines related events
## and packets.
global alert: event(f: fa_file, ev: Unified2::IDSEvent, pkt: Unified2::Packet);
type PacketID: record {
src_ip: addr;
src_p: port;
dst_ip: addr;
dst_p: port;
} &log;
type Info: record {
## Timestamp attached to the alert.
ts: time &log;
## Addresses and ports for the connection.
id: PacketID &log;
## Sensor that originated this event.
sensor_id: count &log;
## Sig id for this generator.
signature_id: count &log;
## A string representation of the *signature_id* field if a sid_msg.map file was loaded.
signature: string &log &optional;
## Which generator generated the alert?
generator_id: count &log;
## A string representation of the *generator_id* field if a gen_msg.map file was loaded.
generator: string &log &optional;
## Sig revision for this id.
signature_revision: count &log;
## Event classification.
classification_id: count &log;
## Descriptive classification string.
classification: string &log &optional;
## Event priority.
priority_id: count &log;
## Event ID.
event_id: count &log;
## Some of the packet data.
packet: string &log &optional;
} &log;
## The event for accessing logged records.
global log_unified2: event(rec: Info);
}
# Mappings for extended information from alerts.
global classification_map: table[count] of string;
global sid_map: table[count] of string;
global gen_map: table[count] of string;
# For reading in config files.
type OneLine: record {
line: string;
};
function create_info(ev: IDSEvent): Info
{
local info = Info($ts=ev$ts,
$id=PacketID($src_ip=ev$src_ip, $src_p=ev$src_p,
$dst_ip=ev$dst_ip, $dst_p=ev$dst_p),
$sensor_id=ev$sensor_id,
$signature_id=ev$signature_id,
$generator_id=ev$generator_id,
$signature_revision=ev$signature_revision,
$classification_id=ev$classification_id,
$priority_id=ev$priority_id,
$event_id=ev$event_id);
if ( ev$signature_id in sid_map )
info$signature=sid_map[ev$signature_id];
if ( ev$generator_id in gen_map )
info$generator=gen_map[ev$generator_id];
if ( ev$classification_id in classification_map )
info$classification=classification_map[ev$classification_id];
return info;
}
redef record fa_file += {
## Recently received IDS events. This is primarily used
## for tying together Unified2 events and packets.
u2_events: table[count] of Unified2::IDSEvent
&optional &create_expire=5sec
&expire_func=function(t: table[count] of Unified2::IDSEvent, event_id: count): interval
{
Log::write(LOG, create_info(t[event_id]));
return 0secs;
};
};
event Unified2::read_sid_msg_line(desc: Input::EventDescription, tpe: Input::Event, line: string)
{
local parts = split_n(line, / \|\| /, F, 100);
if ( |parts| >= 2 && /^[0-9]+$/ in parts[1] )
sid_map[to_count(parts[1])] = parts[2];
}
event Unified2::read_gen_msg_line(desc: Input::EventDescription, tpe: Input::Event, line: string)
{
local parts = split_n(line, / \|\| /, F, 3);
if ( |parts| >= 2 && /^[0-9]+$/ in parts[1] )
gen_map[to_count(parts[1])] = parts[3];
}
event Unified2::read_classification_line(desc: Input::EventDescription, tpe: Input::Event, line: string)
{
local parts = split_n(line, /: /, F, 2);
if ( |parts| == 2 )
{
local parts2 = split_n(parts[2], /,/, F, 4);
if ( |parts2| > 1 )
classification_map[|classification_map|+1] = parts2[1];
}
}
event bro_init() &priority=5
{
Log::create_stream(Unified2::LOG, [$columns=Info, $ev=log_unified2]);
if ( sid_msg != "" )
{
Input::add_event([$source=sid_msg,
$reader=Input::READER_RAW,
$mode=Input::REREAD,
$name=sid_msg,
$fields=Unified2::OneLine,
$want_record=F,
$ev=Unified2::read_sid_msg_line]);
}
if ( gen_msg != "" )
{
Input::add_event([$source=gen_msg,
$name=gen_msg,
$reader=Input::READER_RAW,
$mode=Input::REREAD,
$fields=Unified2::OneLine,
$want_record=F,
$ev=Unified2::read_gen_msg_line]);
}
if ( classification_config != "" )
{
Input::add_event([$source=classification_config,
$name=classification_config,
$reader=Input::READER_RAW,
$mode=Input::REREAD,
$fields=Unified2::OneLine,
$want_record=F,
$ev=Unified2::read_classification_line]);
}
if ( watch_dir != "" )
{
Dir::monitor(watch_dir, function(fname: string)
{
Input::add_analysis([$source=fname,
$reader=Input::READER_BINARY,
$mode=Input::STREAM,
$name=fname]);
}, 10secs);
}
if ( watch_file != "" )
{
Input::add_analysis([$source=watch_file,
$reader=Input::READER_BINARY,
$mode=Input::STREAM,
$name=watch_file]);
}
}
event file_new(f: fa_file)
{
local file_dir = "";
local parts = split_all(f$source, /\/[^\/]*$/);
if ( |parts| == 3 )
file_dir = parts[1];
if ( (watch_file != "" && f$source == watch_file) ||
(watch_dir != "" && compress_path(watch_dir) == file_dir) )
{
Files::add_analyzer(f, Files::ANALYZER_UNIFIED2);
f$u2_events = table();
}
}
event unified2_event(f: fa_file, ev: Unified2::IDSEvent)
{
f$u2_events[ev$event_id] = ev;
}
event unified2_packet(f: fa_file, pkt: Unified2::Packet)
{
if ( f?$u2_events && pkt$event_id in f$u2_events)
{
local ev = f$u2_events[pkt$event_id];
event Unified2::alert(f, ev, pkt);
delete f$u2_events[pkt$event_id];
}
}
event Unified2::alert(f: fa_file, ev: IDSEvent, pkt: Packet)
{
local info = create_info(ev);
info$packet=pkt$data;
Log::write(LOG, info);
}
event file_state_remove(f: fa_file)
{
if ( f?$u2_events )
{
# In case any events never had matching packets, flush
# the extras to the log.
for ( i in f$u2_events )
{
Log::write(LOG, create_info(f$u2_events[i]));
}
}
}

View file

@ -0,0 +1 @@
Support for X509 certificates with the file analysis framework.

View file

@ -0,0 +1 @@
@load ./main

View file

@ -0,0 +1,77 @@
@load base/frameworks/files
@load base/files/hash
module X509;
export {
redef enum Log::ID += { LOG };
type Info: record {
## Current timestamp.
ts: time &log;
## File id of this certificate.
id: string &log;
## Basic information about the certificate.
certificate: X509::Certificate &log;
## The opaque wrapping the certificate. Mainly used
## for the verify operations.
handle: opaque of x509;
## All extensions that were encountered in the certificate.
extensions: vector of X509::Extension &default=vector();
## Subject alternative name extension of the certificate.
san: X509::SubjectAlternativeName &optional &log;
## Basic constraints extension of the certificate.
basic_constraints: X509::BasicConstraints &optional &log;
};
## Event for accessing logged records.
global log_x509: event(rec: Info);
}
event bro_init() &priority=5
{
Log::create_stream(X509::LOG, [$columns=Info, $ev=log_x509]);
}
redef record Files::Info += {
## Information about X509 certificates. This is used to keep
## certificate information until all events have been received.
x509: X509::Info &optional;
};
event x509_certificate(f: fa_file, cert_ref: opaque of x509, cert: X509::Certificate) &priority=5
{
f$info$x509 = [$ts=f$info$ts, $id=f$id, $certificate=cert, $handle=cert_ref];
}
event x509_extension(f: fa_file, ext: X509::Extension) &priority=5
{
if ( f$info?$x509 )
f$info$x509$extensions[|f$info$x509$extensions|] = ext;
}
event x509_ext_basic_constraints(f: fa_file, ext: X509::BasicConstraints) &priority=5
{
if ( f$info?$x509 )
f$info$x509$basic_constraints = ext;
}
event x509_ext_subject_alternative_name(f: fa_file, ext: X509::SubjectAlternativeName) &priority=5
{
if ( f$info?$x509 )
f$info$x509$san = ext;
}
event file_state_remove(f: fa_file) &priority=5
{
if ( ! f$info?$x509 )
return;
Log::write(LOG, f$info$x509);
}

View file

@ -0,0 +1,3 @@
The analyzer framework allows to dynamically enable or disable Bro's
protocol analyzers, as well as to manage the well-known ports which
automatically activate a particular analyzer for new connections.

View file

@ -5,8 +5,8 @@
##! particular analyzer for new connections.
##!
##! Protocol analyzers are identified by unique tags of type
##! :bro:type:`Analyzer::Tag`, such as :bro:enum:`Analyzer::ANALYZER_HTTP` and
##! :bro:enum:`Analyzer::ANALYZER_HTTP`. These tags are defined internally by
##! :bro:type:`Analyzer::Tag`, such as :bro:enum:`Analyzer::ANALYZER_HTTP`.
##! These tags are defined internally by
##! the analyzers themselves, and documented in their analyzer-specific
##! description along with the events that they generate.
@ -15,8 +15,8 @@
module Analyzer;
export {
## If true, all available analyzers are initially disabled at startup. One
## can then selectively enable them with
## If true, all available analyzers are initially disabled at startup.
## One can then selectively enable them with
## :bro:id:`Analyzer::enable_analyzer`.
global disable_all = F &redef;
@ -45,7 +45,7 @@ export {
##
## ports: The set of well-known ports to associate with the analyzer.
##
## Returns: True if the ports were sucessfully registered.
## Returns: True if the ports were successfully registered.
global register_for_ports: function(tag: Analyzer::Tag, ports: set[port]) : bool;
## Registers an individual well-known port for an analyzer. If a future
@ -57,7 +57,7 @@ export {
##
## p: The well-known port to associate with the analyzer.
##
## Returns: True if the port was sucessfully registered.
## Returns: True if the port was successfully registered.
global register_for_port: function(tag: Analyzer::Tag, p: port) : bool;
## Returns a set of all well-known ports currently registered for a
@ -81,8 +81,15 @@ export {
## Returns: The analyzer name corresponding to the tag.
global name: function(tag: Analyzer::Tag) : string;
## Schedules an analyzer for a future connection originating from a given IP
## address and port.
## Translates an analyzer's name to a tag enum value.
##
## name: The analyzer name.
##
## Returns: The analyzer tag corresponding to the name.
global get_tag: function(name: string): Analyzer::Tag;
## Schedules an analyzer for a future connection originating from a
## given IP address and port.
##
## orig: The IP address originating a connection in the future.
## 0.0.0.0 can be used as a wildcard to match any originator address.
@ -96,7 +103,7 @@ export {
## tout: A timeout interval after which the scheduling request will be
## discarded if the connection has not yet been seen.
##
## Returns: True if succesful.
## Returns: True if successful.
global schedule_analyzer: function(orig: addr, resp: addr, resp_p: port,
analyzer: Analyzer::Tag, tout: interval) : bool;
@ -187,6 +194,11 @@ function name(atype: Analyzer::Tag) : string
return __name(atype);
}
function get_tag(name: string): Analyzer::Tag
{
return __tag(name);
}
function schedule_analyzer(orig: addr, resp: addr, resp_p: port,
analyzer: Analyzer::Tag, tout: interval) : bool
{

View file

@ -0,0 +1,2 @@
The cluster framework provides for establishing and controlling a cluster
of Bro instances.

View file

@ -39,7 +39,8 @@ export {
## The node type doing all the actual traffic analysis.
WORKER,
## A node acting as a traffic recorder using the
## `Time Machine <http://tracker.bro.org/time-machine>`_ software.
## `Time Machine <http://bro.org/community/time-machine.html>`_
## software.
TIME_MACHINE,
};
@ -58,7 +59,7 @@ export {
## Events raised by workers and handled by a manager.
const worker2manager_events = /(TimeMachine::command|Drop::.*)/ &redef;
## Events raised by workers and handled by proxies..
## Events raised by workers and handled by proxies.
const worker2proxy_events = /EMPTY/ &redef;
## Events raised by TimeMachine instances and handled by a manager.
@ -73,14 +74,14 @@ export {
## Record type to indicate a node in a cluster.
type Node: record {
## Identifies the type of cluster node in this node's configuration.
## Identifies the type of cluster node in this node's configuration.
node_type: NodeType;
## The IP address of the cluster node.
ip: addr;
## If the *ip* field is a non-global IPv6 address, this field
## can specify a particular :rfc:`4007` ``zone_id``.
zone_id: string &default="";
## The port to which the this local node can connect when
## The port to which this local node can connect when
## establishing communication.
p: port;
## Identifier for the interface a worker is sniffing.
@ -119,6 +120,7 @@ export {
## The cluster layout definition. This should be placed into a filter
## named cluster-layout.bro somewhere in the BROPATH. It will be
## automatically loaded if the CLUSTER_NODE environment variable is set.
## Note that BroControl handles all of this automatically.
const nodes: table[string] of Node = {} &redef;
## This is usually supplied on the command line for each instance

View file

@ -19,6 +19,6 @@ redef Log::default_rotation_postprocessor_cmd = "delete-log";
## Record all packets into trace file.
##
## Note that this only indicates that *if* we are recording packets, we want all
## of them (rather than just those the core deems sufficiently important). Setting
## this does not turn recording on. Use '-w <trace>' for that.
## of them (rather than just those the core deems sufficiently important).
## Setting this does not turn recording on. Use '-w <trace>' for that.
redef record_all_packets = T;

View file

@ -0,0 +1,2 @@
The communication framework facilitates connecting to remote Bro or
Broccoli instances to share state and transfer events.

View file

@ -15,13 +15,16 @@ export {
## are wildcards.
const listen_interface = 0.0.0.0 &redef;
## Which port to listen on.
## Which port to listen on. Note that BroControl sets this
## automatically.
const listen_port = 47757/tcp &redef;
## This defines if a listening socket should use SSL.
const listen_ssl = F &redef;
## Defines if a listening socket can bind to IPv6 addresses.
##
## Note that this is overridden by the BroControl IPv6Comm option.
const listen_ipv6 = F &redef;
## If :bro:id:`Communication::listen_interface` is a non-global
@ -42,10 +45,11 @@ export {
type Info: record {
## The network time at which a communication event occurred.
ts: time &log;
## The peer name (if any) with which a communication event is concerned.
## The peer name (if any) with which a communication event is
## concerned.
peer: string &log &optional;
## Where the communication event message originated from, that is,
## either from the scripting layer or inside the Bro process.
## Where the communication event message originated from, that
## is, either from the scripting layer or inside the Bro process.
src_name: string &log &optional;
## .. todo:: currently unused.
connected_peer_desc: string &log &optional;
@ -71,8 +75,8 @@ export {
## can specify a particular :rfc:`4007` ``zone_id``.
zone_id: string &optional;
## Port of the remote Bro communication endpoint if we are initiating
## the connection based on the :bro:id:`connect` field.
## Port of the remote Bro communication endpoint if we are
## initiating the connection (based on the *connect* field).
p: port &optional;
## When accepting a connection, the configuration only
@ -87,7 +91,7 @@ export {
events: pattern &optional;
## Whether we are going to connect (rather than waiting
## for the other sie to connect to us).
## for the other side to connect to us).
connect: bool &default = F;
## If disconnected, reconnect after this many seconds.
@ -103,13 +107,14 @@ export {
request_logs: bool &default = F;
## When performing state synchronization, whether we consider
## our state to be authoritative. If so, we will send the peer
## our current set when the connection is set up.
## (Only one side can be authoritative)
## our state to be authoritative (only one side can be
## authoritative). If so, we will send the peer our current
## set when the connection is set up.
auth: bool &default = F;
## If not set, no capture filter is sent.
## If set to "", the default capture filter is sent.
## If set to an empty string, then the default capture filter
## is sent.
capture_filter: string &optional;
## Whether to use SSL-based communication.
@ -126,7 +131,8 @@ export {
};
## The table of Bro or Broccoli nodes that Bro will initiate connections
## to or respond to connections from.
## to or respond to connections from. Note that BroControl sets this
## automatically.
global nodes: table[string] of Node &redef;
## A table of peer nodes for which this node issued a

View file

@ -0,0 +1,3 @@
The control framework provides the foundation for providing "commands"
that can be taken remotely at runtime to modify a running Bro instance
or collect information from the running instance.

View file

@ -57,7 +57,8 @@ export {
## Returns the current net_stats.
global net_stats_response: event(s: string);
## Inform the remote Bro instance that it's configuration may have been updated.
## Inform the remote Bro instance that it's configuration may have been
## updated.
global configuration_update_request: event();
## This event is a wrapper and alias for the
## :bro:id:`Control::configuration_update_request` event.

View file

@ -0,0 +1,2 @@
The DPD (dynamic protocol detection) activates port-independent protocol
detection and selectively disables analyzers if protocol violations occur.

View file

@ -1,261 +0,0 @@
##! An interface for driving the analysis of files, possibly independent of
##! any network protocol over which they're transported.
@load base/bif/file_analysis.bif
@load base/frameworks/logging
module FileAnalysis;
export {
redef enum Log::ID += {
## Logging stream for file analysis.
LOG
};
## A structure which represents a desired type of file analysis.
type AnalyzerArgs: record {
## The type of analysis.
tag: FileAnalysis::Tag;
## The local filename to which to write an extracted file. Must be
## set when *tag* is :bro:see:`FileAnalysis::ANALYZER_EXTRACT`.
extract_filename: string &optional;
## An event which will be generated for all new file contents,
## chunk-wise. Used when *tag* is
## :bro:see:`FileAnalysis::ANALYZER_DATA_EVENT`.
chunk_event: event(f: fa_file, data: string, off: count) &optional;
## An event which will be generated for all new file contents,
## stream-wise. Used when *tag* is
## :bro:see:`FileAnalysis::ANALYZER_DATA_EVENT`.
stream_event: event(f: fa_file, data: string) &optional;
} &redef;
## Contains all metadata related to the analysis of a given file.
## For the most part, fields here are derived from ones of the same name
## in :bro:see:`fa_file`.
type Info: record {
## An identifier associated with a single file.
id: string &log;
## Identifier associated with a container file from which this one was
## extracted as part of the file analysis.
parent_id: string &log &optional;
## An identification of the source of the file data. E.g. it may be
## a network protocol over which it was transferred, or a local file
## path which was read, or some other input source.
source: string &log &optional;
## If the source of this file is is a network connection, this field
## may be set to indicate the directionality.
is_orig: bool &log &optional;
## The time at which the last activity for the file was seen.
last_active: time &log;
## Number of bytes provided to the file analysis engine for the file.
seen_bytes: count &log &default=0;
## Total number of bytes that are supposed to comprise the full file.
total_bytes: count &log &optional;
## The number of bytes in the file stream that were completely missed
## during the process of analysis e.g. due to dropped packets.
missing_bytes: count &log &default=0;
## The number of not all-in-sequence bytes in the file stream that
## were delivered to file analyzers due to reassembly buffer overflow.
overflow_bytes: count &log &default=0;
## The amount of time between receiving new data for this file that
## the analysis engine will wait before giving up on it.
timeout_interval: interval &log &optional;
## The number of bytes at the beginning of a file to save for later
## inspection in *bof_buffer* field.
bof_buffer_size: count &log &optional;
## A mime type provided by libmagic against the *bof_buffer*, or
## in the cases where no buffering of the beginning of file occurs,
## an initial guess of the mime type based on the first data seen.
mime_type: string &log &optional;
## Whether the file analysis timed out at least once for the file.
timedout: bool &log &default=F;
## Connection UIDS over which the file was transferred.
conn_uids: set[string] &log;
## A set of analysis types done during the file analysis.
analyzers: set[FileAnalysis::Tag];
## Local filenames of extracted files.
extracted_files: set[string] &log;
## An MD5 digest of the file contents.
md5: string &log &optional;
## A SHA1 digest of the file contents.
sha1: string &log &optional;
## A SHA256 digest of the file contents.
sha256: string &log &optional;
} &redef;
## A table that can be used to disable file analysis completely for
## any files transferred over given network protocol analyzers.
const disable: table[Analyzer::Tag] of bool = table() &redef;
## Event that can be handled to access the Info record as it is sent on
## to the logging framework.
global log_file_analysis: event(rec: Info);
## The salt concatenated to unique file handle strings generated by
## :bro:see:`get_file_handle` before hashing them in to a file id
## (the *id* field of :bro:see:`fa_file`).
## Provided to help mitigate the possiblility of manipulating parts of
## network connections that factor in to the file handle in order to
## generate two handles that would hash to the same file id.
const salt = "I recommend changing this." &redef;
## Sets the *timeout_interval* field of :bro:see:`fa_file`, which is
## used to determine the length of inactivity that is allowed for a file
## before internal state related to it is cleaned up. When used within a
## :bro:see:`file_timeout` handler, the analysis will delay timing out
## again for the period specified by *t*.
##
## f: the file.
##
## t: the amount of time the file can remain inactive before discarding.
##
## Returns: true if the timeout interval was set, or false if analysis
## for the *id* isn't currently active.
global set_timeout_interval: function(f: fa_file, t: interval): bool;
## Adds an analyzer to the analysis of a given file.
##
## f: the file.
##
## args: the analyzer type to add along with any arguments it takes.
##
## Returns: true if the analyzer will be added, or false if analysis
## for the *id* isn't currently active or the *args*
## were invalid for the analyzer type.
global add_analyzer: function(f: fa_file, args: AnalyzerArgs): bool;
## Removes an analyzer from the analysis of a given file.
##
## f: the file.
##
## args: the analyzer (type and args) to remove.
##
## Returns: true if the analyzer will be removed, or false if analysis
## for the *id* isn't currently active.
global remove_analyzer: function(f: fa_file, args: AnalyzerArgs): bool;
## Stops/ignores any further analysis of a given file.
##
## f: the file.
##
## Returns: true if analysis for the given file will be ignored for the
## rest of it's contents, or false if analysis for the *id*
## isn't currently active.
global stop: function(f: fa_file): bool;
}
redef record fa_file += {
info: Info &optional;
};
function set_info(f: fa_file)
{
if ( ! f?$info )
{
local tmp: Info;
f$info = tmp;
}
f$info$id = f$id;
if ( f?$parent_id ) f$info$parent_id = f$parent_id;
if ( f?$source ) f$info$source = f$source;
if ( f?$is_orig ) f$info$is_orig = f$is_orig;
f$info$last_active = f$last_active;
f$info$seen_bytes = f$seen_bytes;
if ( f?$total_bytes ) f$info$total_bytes = f$total_bytes;
f$info$missing_bytes = f$missing_bytes;
f$info$overflow_bytes = f$overflow_bytes;
f$info$timeout_interval = f$timeout_interval;
f$info$bof_buffer_size = f$bof_buffer_size;
if ( f?$mime_type ) f$info$mime_type = f$mime_type;
if ( f?$conns )
for ( cid in f$conns )
add f$info$conn_uids[f$conns[cid]$uid];
}
function set_timeout_interval(f: fa_file, t: interval): bool
{
return __set_timeout_interval(f$id, t);
}
function add_analyzer(f: fa_file, args: AnalyzerArgs): bool
{
if ( ! __add_analyzer(f$id, args) ) return F;
set_info(f);
add f$info$analyzers[args$tag];
if ( args$tag == FileAnalysis::ANALYZER_EXTRACT )
add f$info$extracted_files[args$extract_filename];
return T;
}
function remove_analyzer(f: fa_file, args: AnalyzerArgs): bool
{
return __remove_analyzer(f$id, args);
}
function stop(f: fa_file): bool
{
return __stop(f$id);
}
event bro_init() &priority=5
{
Log::create_stream(FileAnalysis::LOG,
[$columns=Info, $ev=log_file_analysis]);
}
event file_timeout(f: fa_file) &priority=5
{
set_info(f);
f$info$timedout = T;
}
event file_hash(f: fa_file, kind: string, hash: string) &priority=5
{
set_info(f);
switch ( kind ) {
case "md5":
f$info$md5 = hash;
break;
case "sha1":
f$info$sha1 = hash;
break;
case "sha256":
f$info$sha256 = hash;
break;
}
}
event file_state_remove(f: fa_file) &priority=5
{
set_info(f);
}
event file_state_remove(f: fa_file) &priority=-5
{
Log::write(FileAnalysis::LOG, f$info);
}

View file

@ -0,0 +1,3 @@
The file analysis framework provides an interface for driving the analysis
of files, possibly independent of any network protocol over which they're
transported.

View file

@ -0,0 +1,2 @@
@load-sigs ./general
@load-sigs ./libmagic

View file

@ -0,0 +1,11 @@
# General purpose file magic signatures.
signature file-plaintext {
file-magic /([[:print:][:space:]]{10})/
file-mime "text/plain", -20
}
signature file-tar {
file-magic /([[:print:]\x00]){100}(([[:digit:]\x00\x20]){8}){3}/
file-mime "application/x-tar", 150
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,369 @@
##! An interface for driving the analysis of files, possibly independent of
##! any network protocol over which they're transported.
@load base/bif/file_analysis.bif
@load base/frameworks/analyzer
@load base/frameworks/logging
@load base/utils/site
module Files;
export {
redef enum Log::ID += {
## Logging stream for file analysis.
LOG
};
## A structure which parameterizes a type of file analysis.
type AnalyzerArgs: record {
## An event which will be generated for all new file contents,
## chunk-wise. Used when *tag* (in the
## :bro:see:`Files::add_analyzer` function) is
## :bro:see:`Files::ANALYZER_DATA_EVENT`.
chunk_event: event(f: fa_file, data: string, off: count) &optional;
## An event which will be generated for all new file contents,
## stream-wise. Used when *tag* is
## :bro:see:`Files::ANALYZER_DATA_EVENT`.
stream_event: event(f: fa_file, data: string) &optional;
} &redef;
## Contains all metadata related to the analysis of a given file.
## For the most part, fields here are derived from ones of the same name
## in :bro:see:`fa_file`.
type Info: record {
## The time when the file was first seen.
ts: time &log;
## An identifier associated with a single file.
fuid: string &log;
## If this file was transferred over a network
## connection this should show the host or hosts that
## the data sourced from.
tx_hosts: set[addr] &default=addr_set() &log;
## If this file was transferred over a network
## connection this should show the host or hosts that
## the data traveled to.
rx_hosts: set[addr] &default=addr_set() &log;
## Connection UIDs over which the file was transferred.
conn_uids: set[string] &default=string_set() &log;
## An identification of the source of the file data. E.g. it
## may be a network protocol over which it was transferred, or a
## local file path which was read, or some other input source.
source: string &log &optional;
## A value to represent the depth of this file in relation
## to its source. In SMTP, it is the depth of the MIME
## attachment on the message. In HTTP, it is the depth of the
## request within the TCP connection.
depth: count &default=0 &log;
## A set of analysis types done during the file analysis.
analyzers: set[string] &default=string_set() &log;
## A mime type provided by the strongest file magic signature
## match against the *bof_buffer* field of :bro:see:`fa_file`,
## or in the cases where no buffering of the beginning of file
## occurs, an initial guess of the mime type based on the first
## data seen.
mime_type: string &log &optional;
## A filename for the file if one is available from the source
## for the file. These will frequently come from
## "Content-Disposition" headers in network protocols.
filename: string &log &optional;
## The duration the file was analyzed for.
duration: interval &log &default=0secs;
## If the source of this file is a network connection, this field
## indicates if the data originated from the local network or not as
## determined by the configured :bro:see:`Site::local_nets`.
local_orig: bool &log &optional;
## If the source of this file is a network connection, this field
## indicates if the file is being sent by the originator of the
## connection or the responder.
is_orig: bool &log &optional;
## Number of bytes provided to the file analysis engine for the file.
seen_bytes: count &log &default=0;
## Total number of bytes that are supposed to comprise the full file.
total_bytes: count &log &optional;
## The number of bytes in the file stream that were completely missed
## during the process of analysis e.g. due to dropped packets.
missing_bytes: count &log &default=0;
## The number of not all-in-sequence bytes in the file stream that
## were delivered to file analyzers due to reassembly buffer overflow.
overflow_bytes: count &log &default=0;
## Whether the file analysis timed out at least once for the file.
timedout: bool &log &default=F;
## Identifier associated with a container file from which this one was
## extracted as part of the file analysis.
parent_fuid: string &log &optional;
} &redef;
## A table that can be used to disable file analysis completely for
## any files transferred over given network protocol analyzers.
const disable: table[Files::Tag] of bool = table() &redef;
## The salt concatenated to unique file handle strings generated by
## :bro:see:`get_file_handle` before hashing them in to a file id
## (the *id* field of :bro:see:`fa_file`).
## Provided to help mitigate the possibility of manipulating parts of
## network connections that factor in to the file handle in order to
## generate two handles that would hash to the same file id.
const salt = "I recommend changing this." &redef;
## Sets the *timeout_interval* field of :bro:see:`fa_file`, which is
## used to determine the length of inactivity that is allowed for a file
## before internal state related to it is cleaned up. When used within
## a :bro:see:`file_timeout` handler, the analysis will delay timing out
## again for the period specified by *t*.
##
## f: the file.
##
## t: the amount of time the file can remain inactive before discarding.
##
## Returns: true if the timeout interval was set, or false if analysis
## for the file isn't currently active.
global set_timeout_interval: function(f: fa_file, t: interval): bool;
## Adds an analyzer to the analysis of a given file.
##
## f: the file.
##
## tag: the analyzer type.
##
## args: any parameters the analyzer takes.
##
## Returns: true if the analyzer will be added, or false if analysis
## for the file isn't currently active or the *args*
## were invalid for the analyzer type.
global add_analyzer: function(f: fa_file,
tag: Files::Tag,
args: AnalyzerArgs &default=AnalyzerArgs()): bool;
## Removes an analyzer from the analysis of a given file.
##
## f: the file.
##
## tag: the analyzer type.
##
## args: the analyzer (type and args) to remove.
##
## Returns: true if the analyzer will be removed, or false if analysis
## for the file isn't currently active.
global remove_analyzer: function(f: fa_file,
tag: Files::Tag,
args: AnalyzerArgs &default=AnalyzerArgs()): bool;
## Stops/ignores any further analysis of a given file.
##
## f: the file.
##
## Returns: true if analysis for the given file will be ignored for the
## rest of its contents, or false if analysis for the file
## isn't currently active.
global stop: function(f: fa_file): bool;
## Translates a file analyzer enum value to a string with the
## analyzer's name.
##
## tag: The analyzer tag.
##
## Returns: The analyzer name corresponding to the tag.
global analyzer_name: function(tag: Files::Tag): string;
## Provides a text description regarding metadata of the file.
## For example, with HTTP it would return a URL.
##
## f: The file to be described.
##
## Returns: a text description regarding metadata of the file.
global describe: function(f: fa_file): string;
type ProtoRegistration: record {
## A callback to generate a file handle on demand when
## one is needed by the core.
get_file_handle: function(c: connection, is_orig: bool): string;
## A callback to "describe" a file. In the case of an HTTP
## transfer the most obvious description would be the URL.
## It's like an extremely compressed version of the normal log.
describe: function(f: fa_file): string
&default=function(f: fa_file): string { return ""; };
};
## Register callbacks for protocols that work with the Files framework.
## The callbacks must uniquely identify a file and each protocol can
## only have a single callback registered for it.
##
## tag: Tag for the protocol analyzer having a callback being registered.
##
## reg: A :bro:see:`Files::ProtoRegistration` record.
##
## Returns: true if the protocol being registered was not previously registered.
global register_protocol: function(tag: Analyzer::Tag, reg: ProtoRegistration): bool;
## Register a callback for file analyzers to use if they need to do some
## manipulation when they are being added to a file before the core code
## takes over. This is unlikely to be interesting for users and should
## only be called by file analyzer authors but is *not required*.
##
## tag: Tag for the file analyzer.
##
## callback: Function to execute when the given file analyzer is being added.
global register_analyzer_add_callback: function(tag: Files::Tag, callback: function(f: fa_file, args: AnalyzerArgs));
## Event that can be handled to access the Info record as it is sent on
## to the logging framework.
global log_files: event(rec: Info);
}
redef record fa_file += {
info: Info &optional;
};
# Store the callbacks for protocol analyzers that have files.
global registered_protocols: table[Analyzer::Tag] of ProtoRegistration = table();
global analyzer_add_callbacks: table[Files::Tag] of function(f: fa_file, args: AnalyzerArgs) = table();
event bro_init() &priority=5
{
Log::create_stream(Files::LOG, [$columns=Info, $ev=log_files]);
}
function set_info(f: fa_file)
{
if ( ! f?$info )
{
local tmp: Info = Info($ts=f$last_active,
$fuid=f$id);
f$info = tmp;
}
if ( f?$parent_id )
f$info$parent_fuid = f$parent_id;
if ( f?$source )
f$info$source = f$source;
f$info$duration = f$last_active - f$info$ts;
f$info$seen_bytes = f$seen_bytes;
if ( f?$total_bytes )
f$info$total_bytes = f$total_bytes;
f$info$missing_bytes = f$missing_bytes;
f$info$overflow_bytes = f$overflow_bytes;
if ( f?$is_orig )
f$info$is_orig = f$is_orig;
if ( f?$mime_type )
f$info$mime_type = f$mime_type;
}
function set_timeout_interval(f: fa_file, t: interval): bool
{
return __set_timeout_interval(f$id, t);
}
function add_analyzer(f: fa_file, tag: Files::Tag, args: AnalyzerArgs): bool
{
add f$info$analyzers[Files::analyzer_name(tag)];
if ( tag in analyzer_add_callbacks )
analyzer_add_callbacks[tag](f, args);
if ( ! __add_analyzer(f$id, tag, args) )
{
Reporter::warning(fmt("Analyzer %s not added successfully to file %s.", tag, f$id));
return F;
}
return T;
}
function register_analyzer_add_callback(tag: Files::Tag, callback: function(f: fa_file, args: AnalyzerArgs))
{
analyzer_add_callbacks[tag] = callback;
}
function remove_analyzer(f: fa_file, tag: Files::Tag, args: AnalyzerArgs): bool
{
return __remove_analyzer(f$id, tag, args);
}
function stop(f: fa_file): bool
{
return __stop(f$id);
}
function analyzer_name(tag: Files::Tag): string
{
return __analyzer_name(tag);
}
event file_new(f: fa_file) &priority=10
{
set_info(f);
}
event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=10
{
set_info(f);
add f$info$conn_uids[c$uid];
local cid = c$id;
add f$info$tx_hosts[f$is_orig ? cid$orig_h : cid$resp_h];
if( |Site::local_nets| > 0 )
f$info$local_orig=Site::is_local_addr(f$is_orig ? cid$orig_h : cid$resp_h);
add f$info$rx_hosts[f$is_orig ? cid$resp_h : cid$orig_h];
}
event file_timeout(f: fa_file) &priority=10
{
set_info(f);
f$info$timedout = T;
}
event file_state_remove(f: fa_file) &priority=10
{
set_info(f);
}
event file_state_remove(f: fa_file) &priority=-10
{
Log::write(Files::LOG, f$info);
}
function register_protocol(tag: Analyzer::Tag, reg: ProtoRegistration): bool
{
local result = (tag !in registered_protocols);
registered_protocols[tag] = reg;
return result;
}
function describe(f: fa_file): string
{
local tag = Analyzer::get_tag(f$source);
if ( tag !in registered_protocols )
return "";
local handler = registered_protocols[tag];
return handler$describe(f);
}
event get_file_handle(tag: Analyzer::Tag, c: connection, is_orig: bool) &priority=5
{
if ( tag !in registered_protocols )
return;
local handler = registered_protocols[tag];
set_file_handle(handler$get_file_handle(c, is_orig));
}

View file

@ -0,0 +1,2 @@
The input framework provides a way to read previously stored data either as
an event stream or into a Bro table.

View file

@ -33,45 +33,45 @@ export {
## that contain types that are not supported (at the moment
## file and function). If true, the input framework will
## warn in these cases, but continue. If false, it will
## abort. Defaults to false (abort)
## abort. Defaults to false (abort).
const accept_unsupported_types = F &redef;
## TableFilter description type used for the `table` method.
type TableDescription: record {
## Common definitions for tables and events
# Common definitions for tables and events
## String that allows the reader to find the source.
## For `READER_ASCII`, this is the filename.
source: string;
## Reader to use for this stream
## Reader to use for this stream.
reader: Reader &default=default_reader;
## Read mode to use for this stream
## Read mode to use for this stream.
mode: Mode &default=default_mode;
## Descriptive name. Used to remove a stream at a later time
## Descriptive name. Used to remove a stream at a later time.
name: string;
# Special definitions for tables
## Table which will receive the data read by the input framework
## Table which will receive the data read by the input framework.
destination: any;
## Record that defines the values used as the index of the table
## Record that defines the values used as the index of the table.
idx: any;
## Record that defines the values used as the elements of the table
## If val is undefined, destination has to be a set.
## Record that defines the values used as the elements of the table.
## If this is undefined, then *destination* has to be a set.
val: any &optional;
## Defines if the value of the table is a record (default), or a single value. Val
## can only contain one element when this is set to false.
## Defines if the value of the table is a record (default), or a single value.
## When this is set to false, then *val* can only contain one element.
want_record: bool &default=T;
## The event that is raised each time a value is added to, changed in or removed
## from the table. The event will receive an Input::Event enum as the first
## argument, the idx record as the second argument and the value (record) as the
## argument, the *idx* record as the second argument and the value (record) as the
## third argument.
ev: any &optional; # event containing idx, val as values.
@ -88,19 +88,19 @@ export {
## EventFilter description type used for the `event` method.
type EventDescription: record {
## Common definitions for tables and events
# Common definitions for tables and events
## String that allows the reader to find the source.
## For `READER_ASCII`, this is the filename.
source: string;
## Reader to use for this steam
## Reader to use for this stream.
reader: Reader &default=default_reader;
## Read mode to use for this stream
## Read mode to use for this stream.
mode: Mode &default=default_mode;
## Descriptive name. Used to remove a stream at a later time
## Descriptive name. Used to remove a stream at a later time.
name: string;
# Special definitions for events
@ -108,8 +108,8 @@ export {
## Record describing the fields to be retrieved from the source input.
fields: any;
## If want_record if false, the event receives each value in fields as a separate argument.
## If it is set to true (default), the event receives all fields in a single record value.
## If this is false, the event receives each value in fields as a separate argument.
## If this is set to true (default), the event receives all fields in a single record value.
want_record: bool &default=T;
## The event that is raised each time a new line is received from the reader.
@ -122,23 +122,23 @@ export {
config: table[string] of string &default=table();
};
## A file analyis input stream type used to forward input data to the
## A file analysis input stream type used to forward input data to the
## file analysis framework.
type AnalysisDescription: record {
## String that allows the reader to find the source.
## For `READER_ASCII`, this is the filename.
source: string;
## Reader to use for this steam. Compatible readers must be
## Reader to use for this stream. Compatible readers must be
## able to accept a filter of a single string type (i.e.
## they read a byte stream).
reader: Reader &default=Input::READER_BINARY;
## Read mode to use for this stream
## Read mode to use for this stream.
mode: Mode &default=default_mode;
## Descriptive name that uniquely identifies the input source.
## Can be used used to remove a stream at a later time.
## Can be used to remove a stream at a later time.
## This will also be used for the unique *source* field of
## :bro:see:`fa_file`. Most of the time, the best choice for this
## field will be the same value as the *source* field.
@ -150,38 +150,44 @@ export {
config: table[string] of string &default=table();
};
## Create a new table input from a given source. Returns true on success.
## Create a new table input from a given source.
##
## description: `TableDescription` record describing the source.
##
## Returns: true on success.
global add_table: function(description: Input::TableDescription) : bool;
## Create a new event input from a given source. Returns true on success.
## Create a new event input from a given source.
##
## description: `TableDescription` record describing the source.
## description: `EventDescription` record describing the source.
##
## Returns: true on success.
global add_event: function(description: Input::EventDescription) : bool;
## Create a new file analysis input from a given source. Data read from
## the source is automatically forwarded to the file analysis framework.
##
## description: A record describing the source
## description: A record describing the source.
##
## Returns: true on sucess.
## Returns: true on success.
global add_analysis: function(description: Input::AnalysisDescription) : bool;
## Remove a input stream. Returns true on success and false if the named stream was
## not found.
## Remove an input stream.
##
## id: string value identifying the stream to be removed
## id: string value identifying the stream to be removed.
##
## Returns: true on success and false if the named stream was not found.
global remove: function(id: string) : bool;
## Forces the current input to be checked for changes.
## Returns true on success and false if the named stream was not found
##
## id: string value identifying the stream
## id: string value identifying the stream.
##
## Returns: true on success and false if the named stream was not found.
global force_update: function(id: string) : bool;
## Event that is called, when the end of a data source has been reached, including
## after an update.
## Event that is called when the end of a data source has been reached,
## including after an update.
global end_of_data: event(name: string, source:string);
}

View file

@ -6,11 +6,11 @@ module InputAscii;
export {
## Separator between fields.
## Please note that the separator has to be exactly one character long
## Please note that the separator has to be exactly one character long.
const separator = Input::separator &redef;
## Separator between set elements.
## Please note that the separator has to be exactly one character long
## Please note that the separator has to be exactly one character long.
const set_separator = Input::set_separator &redef;
## String to use for empty fields.

View file

@ -1,23 +1,23 @@
##! Interface for the ascii input reader.
##! Interface for the benchmark input reader.
module InputBenchmark;
export {
## multiplication factor for each second
## Multiplication factor for each second.
const factor = 1.0 &redef;
## spread factor between lines
## Spread factor between lines.
const spread = 0 &redef;
## spreading where usleep = 1000000 / autospread * num_lines
## Spreading where usleep = 1000000 / autospread * num_lines
const autospread = 0.0 &redef;
## addition factor for each heartbeat
## Addition factor for each heartbeat.
const addfactor = 0 &redef;
## stop spreading at x lines per heartbeat
## Stop spreading at x lines per heartbeat.
const stopspreadat = 0 &redef;
## 1 -> enable timed spreading
## 1 -> enable timed spreading.
const timedspread = 0.0 &redef;
}

View file

@ -4,14 +4,14 @@ module InputRaw;
export {
## Separator between input records.
## Please note that the separator has to be exactly one character long
## Please note that the separator has to be exactly one character long.
const record_separator = "\n" &redef;
## Event that is called when a process created by the raw reader exits.
##
## name: name of the input stream
## source: source of the input stream
## exit_code: exit code of the program, or number of the signal that forced the program to exit
## signal_exit: false when program exitted normally, true when program was forced to exit by a signal
## name: name of the input stream.
## source: source of the input stream.
## exit_code: exit code of the program, or number of the signal that forced the program to exit.
## signal_exit: false when program exited normally, true when program was forced to exit by a signal.
global process_finished: event(name: string, source:string, exit_code:count, signal_exit:bool);
}

View file

@ -1,6 +1,12 @@
##! Interface for the SQLite input reader.
##! Interface for the SQLite input reader. Redefinable options are available
##! to tweak the input format of the SQLite reader.
##!
##! The defaults are set to match Bro's ASCII output.
##! See :doc:`/frameworks/logging-input-sqlite` for an introduction on how to
##! use the SQLite reader.
##!
##! When using the SQLite reader, you have to specify the SQL query that returns
##! the desired data by setting ``query`` in the ``config`` table. See the
##! introduction mentioned above for an example.
module InputSQLite;

View file

@ -0,0 +1,3 @@
The intelligence framework provides a way to store and query intelligence
data (such as IP addresses or strings). Metadata can also be associated
with the intelligence.

View file

@ -1,5 +1,5 @@
##! Cluster transparency support for the intelligence framework. This is mostly oriented
##! toward distributing intelligence information across clusters.
##! Cluster transparency support for the intelligence framework. This is mostly
##! oriented toward distributing intelligence information across clusters.
@load base/frameworks/cluster
@load ./input

View file

@ -4,7 +4,7 @@ module Intel;
export {
## Intelligence files that will be read off disk. The files are
## reread everytime they are updated so updates much be atomic with
## reread every time they are updated so updates must be atomic with
## "mv" instead of writing the file in place.
const read_files: set[string] = {} &redef;
}

View file

@ -1,6 +1,6 @@
##! The intelligence framework provides a way to store and query IP addresses,
##! and strings (with a str_type). Metadata can
##! also be associated with the intelligence like for making more informed
##! also be associated with the intelligence, like for making more informed
##! decisions about matching and handling of intelligence.
@load base/frameworks/notice
@ -10,31 +10,36 @@ module Intel;
export {
redef enum Log::ID += { LOG };
## String data needs to be further categoried since it could represent
## and number of types of data.
type StrType: enum {
## A complete URL without the prefix "http://".
## Enum type to represent various types of intelligence data.
type Type: enum {
## An IP address.
ADDR,
## A complete URL without the prefix ``"http://"``.
URL,
## User-Agent string, typically HTTP or mail message body.
USER_AGENT,
## Software name.
SOFTWARE,
## Email address.
EMAIL,
## DNS domain name.
DOMAIN,
## A user name.
USER_NAME,
## File hash which is non-hash type specific. It's up to the user to query
## for any relevant hash types.
## File hash which is non-hash type specific. It's up to the
## user to query for any relevant hash types.
FILE_HASH,
## File name. Typically with protocols with definite
## indications of a file name.
FILE_NAME,
## Certificate SHA-1 hash.
CERT_HASH,
};
## Data about an :bro:type:`Intel::Item`
## Data about an :bro:type:`Intel::Item`.
type MetaData: record {
## An arbitrary string value representing the data source. Typically,
## the convention for this field will be the source name and feed name
## separated by a hyphen. For example: "source1-c&c".
## An arbitrary string value representing the data source.
## Typically, the convention for this field will be the source
## name and feed name separated by a hyphen.
## For example: "source1-c&c".
source: string;
## A freeform description for the data.
desc: string &optional;
@ -44,18 +49,15 @@ export {
## Represents a piece of intelligence.
type Item: record {
## The IP address if the intelligence is about an IP address.
host: addr &optional;
## The network if the intelligence is about a CIDR block.
net: subnet &optional;
## The string if the intelligence is about a string.
str: string &optional;
## The type of data that is in the string if the $str field is set.
str_type: StrType &optional;
## The intelligence indicator.
indicator: string;
## The type of data that the indicator field represents.
indicator_type: Type;
## Metadata for the item. Typically represents more deeply \
## Metadata for the item. Typically represents more deeply
## descriptive data for a piece of intelligence.
meta: MetaData;
meta: MetaData;
};
## Enum to represent where data came from when it was discovered.
@ -65,23 +67,27 @@ export {
IN_ANYWHERE,
};
## The $host field and combination of $str and $str_type fields are mutually
## exclusive. These records *must* represent either an IP address being
## seen or a string being seen.
type Seen: record {
## The IP address if the data seen is an IP address.
host: addr &log &optional;
## The string if the data is about a string.
str: string &log &optional;
## The type of data that is in the string if the $str field is set.
str_type: StrType &log &optional;
indicator: string &log &optional;
## The type of data that the indicator represents.
indicator_type: Type &log &optional;
## If the indicator type was :bro:enum:`Intel::ADDR`, then this
## field will be present.
host: addr &optional;
## Where the data was discovered.
where: Where &log;
where: Where &log;
## If the data was discovered within a connection, the
## connection record should go into get to give context to the data.
conn: connection &optional;
## connection record should go here to give context to the data.
conn: connection &optional;
## If the data was discovered within a file, the file record
## should go here to provide context to the data.
f: fa_file &optional;
};
## Record used for the logging framework representing a positive
@ -97,10 +103,22 @@ export {
## this is the conn_id for the connection.
id: conn_id &log &optional;
## If a file was associated with this intelligence hit,
## this is the uid for the file.
fuid: string &log &optional;
## A mime type if the intelligence hit is related to a file.
## If the $f field is provided this will be automatically filled
## out.
file_mime_type: string &log &optional;
## Frequently files can be "described" to give a bit more context.
## If the $f field is provided this field will be automatically
## filled out.
file_desc: string &log &optional;
## Where the data was seen.
seen: Seen &log;
## Sources which supplied data that resulted in this match.
sources: set[string] &log;
sources: set[string] &log &default=string_set();
};
## Intelligence data manipulation functions.
@ -110,13 +128,13 @@ export {
## it against known intelligence for matches.
global seen: function(s: Seen);
## Event to represent a match in the intelligence data from data that was seen.
## On clusters there is no assurance as to where this event will be generated
## so do not assume that arbitrary global state beyond the given data
## will be available.
## Event to represent a match in the intelligence data from data that
## was seen. On clusters there is no assurance as to where this event
## will be generated so do not assume that arbitrary global state beyond
## the given data will be available.
##
## This is the primary mechanism where a user will take actions based on data
## within the intelligence framework.
## This is the primary mechanism where a user will take actions based on
## data within the intelligence framework.
global match: event(s: Seen, items: set[Item]);
global log_intel: event(rec: Info);
@ -125,7 +143,7 @@ export {
# Internal handler for matches with no metadata available.
global match_no_items: event(s: Seen);
# Internal events for cluster data distribution
# Internal events for cluster data distribution.
global new_item: event(item: Item);
global updated_item: event(item: Item);
@ -135,8 +153,8 @@ const have_full_data = T &redef;
# The in memory data structure for holding intelligence.
type DataStore: record {
net_data: table[subnet] of set[MetaData];
string_data: table[string, StrType] of set[MetaData];
host_data: table[addr] of set[MetaData];
string_data: table[string, Type] of set[MetaData];
};
global data_store: DataStore &redef;
@ -144,8 +162,8 @@ global data_store: DataStore &redef;
# This is primarily for workers to do the initial quick matches and store
# a minimal amount of data for the full match to happen on the manager.
type MinDataStore: record {
net_data: set[subnet];
string_data: set[string, StrType];
host_data: set[addr];
string_data: set[string, Type];
};
global min_data_store: MinDataStore &redef;
@ -157,15 +175,13 @@ event bro_init() &priority=5
function find(s: Seen): bool
{
if ( s?$host &&
((have_full_data && s$host in data_store$net_data) ||
(s$host in min_data_store$net_data)))
if ( s?$host )
{
return T;
return ((s$host in min_data_store$host_data) ||
(have_full_data && s$host in data_store$host_data));
}
else if ( s?$str && s?$str_type &&
((have_full_data && [s$str, s$str_type] in data_store$string_data) ||
([s$str, s$str_type] in min_data_store$string_data)))
else if ( ([to_lower(s$indicator), s$indicator_type] in min_data_store$string_data) ||
(have_full_data && [to_lower(s$indicator), s$indicator_type] in data_store$string_data) )
{
return T;
}
@ -177,8 +193,7 @@ function find(s: Seen): bool
function get_items(s: Seen): set[Item]
{
local item: Item;
local return_data: set[Item] = set();
local return_data: set[Item];
if ( ! have_full_data )
{
@ -191,26 +206,23 @@ function get_items(s: Seen): set[Item]
if ( s?$host )
{
# See if the host is known about and it has meta values
if ( s$host in data_store$net_data )
if ( s$host in data_store$host_data )
{
for ( m in data_store$net_data[s$host] )
for ( m in data_store$host_data[s$host] )
{
# TODO: the lookup should be finding all and not just most specific
# and $host/$net should have the correct value.
item = [$host=s$host, $meta=m];
add return_data[item];
add return_data[Item($indicator=cat(s$host), $indicator_type=ADDR, $meta=m)];
}
}
}
else if ( s?$str && s?$str_type )
else
{
local lower_indicator = to_lower(s$indicator);
# See if the string is known about and it has meta values
if ( [s$str, s$str_type] in data_store$string_data )
if ( [lower_indicator, s$indicator_type] in data_store$string_data )
{
for ( m in data_store$string_data[s$str, s$str_type] )
for ( m in data_store$string_data[lower_indicator, s$indicator_type] )
{
item = [$str=s$str, $str_type=s$str_type, $meta=m];
add return_data[item];
add return_data[Item($indicator=s$indicator, $indicator_type=s$indicator_type, $meta=m)];
}
}
}
@ -222,6 +234,12 @@ function Intel::seen(s: Seen)
{
if ( find(s) )
{
if ( s?$host )
{
s$indicator = cat(s$host);
s$indicator_type = Intel::ADDR;
}
if ( have_full_data )
{
local items = get_items(s);
@ -250,8 +268,25 @@ function has_meta(check: MetaData, metas: set[MetaData]): bool
event Intel::match(s: Seen, items: set[Item]) &priority=5
{
local empty_set: set[string] = set();
local info: Info = [$ts=network_time(), $seen=s, $sources=empty_set];
local info = Info($ts=network_time(), $seen=s);
if ( s?$f )
{
if ( s$f?$conns && |s$f$conns| == 1 )
{
for ( cid in s$f$conns )
s$conn = s$f$conns[cid];
}
if ( ! info?$fuid )
info$fuid = s$f$id;
if ( ! info?$file_mime_type && s$f?$mime_type )
info$file_mime_type = s$f$mime_type;
if ( ! info?$file_desc )
info$file_desc = Files::describe(s$f);
}
if ( s?$conn )
{
@ -267,52 +302,37 @@ event Intel::match(s: Seen, items: set[Item]) &priority=5
function insert(item: Item)
{
if ( item?$str && !item?$str_type )
{
event reporter_warning(network_time(), fmt("You must provide a str_type for strings or this item doesn't make sense. Item: %s", item), "");
return;
}
# Create and fill out the meta data item.
local meta = item$meta;
local metas: set[MetaData];
if ( item?$host )
# All intelligence is case insensitive at the moment.
local lower_indicator = to_lower(item$indicator);
if ( item$indicator_type == ADDR )
{
local host = mask_addr(item$host, is_v4_addr(item$host) ? 32 : 128);
local host = to_addr(item$indicator);
if ( have_full_data )
{
if ( host !in data_store$net_data )
data_store$net_data[host] = set();
if ( host !in data_store$host_data )
data_store$host_data[host] = set();
metas = data_store$net_data[host];
metas = data_store$host_data[host];
}
add min_data_store$net_data[host];
add min_data_store$host_data[host];
}
else if ( item?$net )
else
{
if ( have_full_data )
{
if ( item$net !in data_store$net_data )
data_store$net_data[item$net] = set();
if ( [lower_indicator, item$indicator_type] !in data_store$string_data )
data_store$string_data[lower_indicator, item$indicator_type] = set();
metas = data_store$net_data[item$net];
metas = data_store$string_data[lower_indicator, item$indicator_type];
}
add min_data_store$net_data[item$net];
}
else if ( item?$str )
{
if ( have_full_data )
{
if ( [item$str, item$str_type] !in data_store$string_data )
data_store$string_data[item$str, item$str_type] = set();
metas = data_store$string_data[item$str, item$str_type];
}
add min_data_store$string_data[item$str, item$str_type];
add min_data_store$string_data[lower_indicator, item$indicator_type];
}
local updated = F;

View file

@ -0,0 +1 @@
The logging framework provides a flexible key-value based logging interface.

View file

@ -1,6 +1,7 @@
##! The Bro logging interface.
##!
##! See :doc:`/logging` for a introduction to Bro's logging framework.
##! See :doc:`/frameworks/logging` for an introduction to Bro's
##! logging framework.
module Log;
@ -26,7 +27,7 @@ export {
const set_separator = "," &redef;
## String to use for empty fields. This should be different from
## *unset_field* to make the output non-ambigious.
## *unset_field* to make the output unambiguous.
## Can be overwritten by individual writers.
const empty_field = "(empty)" &redef;
@ -40,8 +41,8 @@ export {
columns: any;
## Event that will be raised once for each log entry.
## The event receives a single same parameter, an instance of type
## ``columns``.
## The event receives a single same parameter, an instance of
## type ``columns``.
ev: any &optional;
};
@ -75,9 +76,16 @@ export {
};
## Default rotation interval. Zero disables rotation.
##
## Note that this is overridden by the BroControl LogRotationInterval
## option.
const default_rotation_interval = 0secs &redef;
## Default alarm summary mail interval. Zero disables alarm summary mails.
## Default alarm summary mail interval. Zero disables alarm summary
## mails.
##
## Note that this is overridden by the BroControl MailAlarmsInterval
## option.
const default_mail_alarms_interval = 0secs &redef;
## Default naming format for timestamps embedded into filenames.
@ -113,7 +121,7 @@ export {
##
## The specific interpretation of the string is up to
## the used writer, and may for example be the destination
## file name. Generally, filenames are expected to given
## file name. Generally, filenames are expected to be given
## without any extensions; writers will add appropiate
## extensions automatically.
##
@ -125,34 +133,36 @@ export {
path: string &optional;
## A function returning the output path for recording entries
## matching this filter. This is similar to ``path`` yet allows
## matching this filter. This is similar to *path* yet allows
## to compute the string dynamically. It is ok to return
## different strings for separate calls, but be careful: it's
## easy to flood the disk by returning a new string for each
## connection ...
## connection.
##
## id: The ID associated with the log stream.
##
## path: A suggested path value, which may be either the filter's
## ``path`` if defined, else a previous result from the function.
## If no ``path`` is defined for the filter, then the first call
## to the function will contain an empty string.
## ``path`` if defined, else a previous result from the
## function. If no ``path`` is defined for the filter,
## then the first call to the function will contain an
## empty string.
##
## rec: An instance of the streams's ``columns`` type with its
## fields set to the values to be logged.
##
## Returns: The path to be used for the filter, which will be subject
## to the same automatic correction rules as the *path*
## field of :bro:type:`Log::Filter` in the case of conflicts
## with other filters trying to use the same writer/path pair.
## Returns: The path to be used for the filter, which will be
## subject to the same automatic correction rules as
## the *path* field of :bro:type:`Log::Filter` in the
## case of conflicts with other filters trying to use
## the same writer/path pair.
path_func: function(id: ID, path: string, rec: any): string &optional;
## Subset of column names to record. If not given, all
## columns are recorded.
include: set[string] &optional;
## Subset of column names to exclude from recording. If not given,
## all columns are recorded.
## Subset of column names to exclude from recording. If not
## given, all columns are recorded.
exclude: set[string] &optional;
## If true, entries are recorded locally.
@ -228,7 +238,7 @@ export {
##
## filter: A record describing the desired logging parameters.
##
## Returns: True if the filter was sucessfully added, false if
## Returns: True if the filter was successfully added, false if
## the filter was not added or the *filter* argument was not
## the correct type.
##
@ -276,7 +286,7 @@ export {
##
## Returns: True if the stream was found and no error occurred in writing
## to it or if the stream was disabled and nothing was written.
## False if the stream was was not found, or the *columns*
## False if the stream was not found, or the *columns*
## argument did not match what the stream was initially defined
## to handle, or one of the stream's filters has an invalid
## ``path_func``.
@ -285,8 +295,8 @@ export {
global write: function(id: ID, columns: any) : bool;
## Sets the buffering status for all the writers of a given logging stream.
## A given writer implementation may or may not support buffering and if it
## doesn't then toggling buffering with this function has no effect.
## A given writer implementation may or may not support buffering and if
## it doesn't then toggling buffering with this function has no effect.
##
## id: The ID associated with a logging stream for which to
## enable/disable buffering.
@ -346,7 +356,7 @@ export {
##
## npath: The new path of the file (after already being rotated/processed
## by writer-specific postprocessor as defined in
## :bro:id:`Log::default_rotation_postprocessors`.
## :bro:id:`Log::default_rotation_postprocessors`).
##
## Returns: True when :bro:id:`Log::default_rotation_postprocessor_cmd`
## is empty or the system command given by it has been invoked

View file

@ -0,0 +1 @@
Support for postprocessors in the logging framework.

View file

@ -16,9 +16,9 @@
module Log;
export {
## Secure-copies the rotated-log to all the remote hosts
## Secure-copies the rotated log to all the remote hosts
## defined in :bro:id:`Log::scp_destinations` and then deletes
## the local copy of the rotated-log. It's not active when
## the local copy of the rotated log. It's not active when
## reading from trace files.
##
## info: A record holding meta-information about the log file to be
@ -42,9 +42,9 @@ export {
};
## A table indexed by a particular log writer and filter path, that yields
## a set remote destinations. The :bro:id:`Log::scp_postprocessor`
## a set of remote destinations. The :bro:id:`Log::scp_postprocessor`
## function queries this table upon log rotation and performs a secure
## copy of the rotated-log to each destination in the set. This
## copy of the rotated log to each destination in the set. This
## table can be modified at run-time.
global scp_destinations: table[Writer, string] of set[SCPDestination];

View file

@ -16,9 +16,9 @@
module Log;
export {
## Securely transfers the rotated-log to all the remote hosts
## Securely transfers the rotated log to all the remote hosts
## defined in :bro:id:`Log::sftp_destinations` and then deletes
## the local copy of the rotated-log. It's not active when
## the local copy of the rotated log. It's not active when
## reading from trace files.
##
## info: A record holding meta-information about the log file to be
@ -42,9 +42,9 @@ export {
};
## A table indexed by a particular log writer and filter path, that yields
## a set remote destinations. The :bro:id:`Log::sftp_postprocessor`
## a set of remote destinations. The :bro:id:`Log::sftp_postprocessor`
## function queries this table upon log rotation and performs a secure
## transfer of the rotated-log to each destination in the set. This
## transfer of the rotated log to each destination in the set. This
## table can be modified at run-time.
global sftp_destinations: table[Writer, string] of set[SFTPDestination];

View file

@ -2,14 +2,14 @@
##! to tweak the output format of ASCII logs.
##!
##! The ASCII writer supports currently one writer-specific filter option via
##! ``config``: setting ``tsv`` to the string ``T`` turns the output into into
##! "tab-separated-value" mode where only a single header row with the column names
##! is printed out as meta information, with no "# fields" prepended; no other meta
##! data gets included in that mode.
##!
##! ``config``: setting ``tsv`` to the string ``T`` turns the output into
##! "tab-separated-value" mode where only a single header row with the column
##! names is printed out as meta information, with no "# fields" prepended; no
##! other meta data gets included in that mode.
##!
##! Example filter using this::
##!
##! local my_filter: Log::Filter = [$name = "my-filter", $writer = Log::WRITER_ASCII, $config = table(["tsv"] = "T")];
##!
##! local my_filter: Log::Filter = [$name = "my-filter", $writer = Log::WRITER_ASCII, $config = table(["tsv"] = "T")];
##!
module LogAscii;
@ -17,27 +17,51 @@ module LogAscii;
export {
## If true, output everything to stdout rather than
## into files. This is primarily for debugging purposes.
##
## This option is also available as a per-filter ``$config`` option.
const output_to_stdout = F &redef;
## If true, include lines with log meta information such as column names with
## types, the values of ASCII logging options that in use, and the time when the
## file was opened and closes (the latter at the end).
## If true, the default will be to write logs in a JSON format.
##
## This option is also available as a per-filter ``$config`` option.
const use_json = F &redef;
## Format of timestamps when writing out JSON. By default, the JSON formatter will
## use double values for timestamps which represent the number of seconds from the
## UNIX epoch.
const json_timestamps: JSON::TimestampFormat = JSON::TS_EPOCH &redef;
## If true, include lines with log meta information such as column names
## with types, the values of ASCII logging options that are in use, and
## the time when the file was opened and closed (the latter at the end).
##
## If writing in JSON format, this is implicitly disabled.
const include_meta = T &redef;
## Prefix for lines with meta information.
##
## This option is also available as a per-filter ``$config`` option.
const meta_prefix = "#" &redef;
## Separator between fields.
##
## This option is also available as a per-filter ``$config`` option.
const separator = Log::separator &redef;
## Separator between set elements.
##
## This option is also available as a per-filter ``$config`` option.
const set_separator = Log::set_separator &redef;
## String to use for empty fields. This should be different from
## *unset_field* to make the output non-ambigious.
## *unset_field* to make the output unambiguous.
##
## This option is also available as a per-filter ``$config`` option.
const empty_field = Log::empty_field &redef;
## String to use for an unset &optional field.
##
## This option is also available as a per-filter ``$config`` option.
const unset_field = Log::unset_field &redef;
}

View file

@ -6,16 +6,16 @@ export {
## Compression to use with the DS output file. Options are:
##
## 'none' -- No compression.
## 'lzf' -- LZF compression. Very quick, but leads to larger output files.
## 'lzo' -- LZO compression. Very fast decompression times.
## 'gz' -- GZIP compression. Slower than LZF, but also produces smaller output.
## 'bz2' -- BZIP2 compression. Slower than GZIP, but also produces smaller output.
## 'lzf' -- LZF compression (very quick, but leads to larger output files).
## 'lzo' -- LZO compression (very fast decompression times).
## 'gz' -- GZIP compression (slower than LZF, but also produces smaller output).
## 'bz2' -- BZIP2 compression (slower than GZIP, but also produces smaller output).
const compression = "gz" &redef;
## The extent buffer size.
## Larger values here lead to better compression and more efficient writes, but
## also increase the lag between the time events are received and the time they
## are actually written to disk.
## Larger values here lead to better compression and more efficient writes,
## but also increase the lag between the time events are received and
## the time they are actually written to disk.
const extent_size = 65536 &redef;
## Should we dump the XML schema we use for this DS file to disk?
@ -43,8 +43,8 @@ export {
}
# Default function to postprocess a rotated DataSeries log file. It moves the
# rotated file to a new name that includes a timestamp with the opening time, and
# then runs the writer's default postprocessor command on it.
# rotated file to a new name that includes a timestamp with the opening time,
# and then runs the writer's default postprocessor command on it.
function default_rotation_postprocessor_func(info: Log::RotationInfo) : bool
{
# Move file to name including both opening and closing time.

View file

@ -10,16 +10,16 @@
module LogElasticSearch;
export {
## Name of the ES cluster
## Name of the ES cluster.
const cluster_name = "elasticsearch" &redef;
## ES Server
## ES server.
const server_host = "127.0.0.1" &redef;
## ES Port
## ES port.
const server_port = 9200 &redef;
## Name of the ES index
## Name of the ES index.
const index_prefix = "bro" &redef;
## The ES type prefix comes before the name of the related log.
@ -27,9 +27,9 @@ export {
const type_prefix = "" &redef;
## The time before an ElasticSearch transfer will timeout. Note that
## the fractional part of the timeout will be ignored. In particular, time
## specifications less than a second result in a timeout value of 0, which
## means "no timeout."
## the fractional part of the timeout will be ignored. In particular,
## time specifications less than a second result in a timeout value of
## 0, which means "no timeout."
const transfer_timeout = 2secs;
## The batch size is the number of messages that will be queued up before

View file

@ -1,4 +1,4 @@
##! Interface for the None log writer. Thiis writer is mainly for debugging.
##! Interface for the None log writer. This writer is mainly for debugging.
module LogNone;

View file

@ -1,5 +1,13 @@
##! Interface for the SQLite log writer. Redefinable options are available
##! Interface for the SQLite log writer. Redefinable options are available
##! to tweak the output format of the SQLite reader.
##!
##! See :doc:`/frameworks/logging-input-sqlite` for an introduction on how to
##! use the SQLite log writer.
##!
##! The SQL writer currently supports one writer-specific filter option via
##! ``config``: setting ``tablename`` sets the name of the table that is used
##! or created in the SQLite database. An example for this is given in the
##! introduction mentioned above.
module LogSQLite;
@ -11,7 +19,7 @@ export {
const unset_field = Log::unset_field &redef;
## String to use for empty fields. This should be different from
## *unset_field* to make the output non-ambigious.
## *unset_field* to make the output unambiguous.
const empty_field = Log::empty_field &redef;
}

View file

@ -0,0 +1,4 @@
The notice framework enables Bro to "notice" things which are odd or
potentially bad, leaving it to the local configuration to define which
of them are actionable. This decoupling of detection and reporting allows
Bro to be customized to the different needs that sites have.

View file

@ -7,12 +7,14 @@ module Notice;
export {
redef enum Action += {
## Drops the address via Drop::drop_address, and generates an alarm.
## Drops the address via Drop::drop_address, and generates an
## alarm.
ACTION_DROP
};
redef record Info += {
## Indicate if the $src IP address was dropped and denied network access.
## Indicate if the $src IP address was dropped and denied
## network access.
dropped: bool &log &default=F;
};
}

View file

@ -6,12 +6,14 @@ module Notice;
export {
redef enum Action += {
## Indicates that the notice should be sent to the pager email address
## configured in the :bro:id:`Notice::mail_page_dest` variable.
## Indicates that the notice should be sent to the pager email
## address configured in the :bro:id:`Notice::mail_page_dest`
## variable.
ACTION_PAGE
};
## Email address to send notices with the :bro:enum:`Notice::ACTION_PAGE` action.
## Email address to send notices with the :bro:enum:`Notice::ACTION_PAGE`
## action.
const mail_page_dest = "" &redef;
}

View file

@ -13,13 +13,15 @@ export {
## Address to send the pretty-printed reports to. Default if not set is
## :bro:id:`Notice::mail_dest`.
##
## Note that this is overridden by the BroControl MailAlarmsTo option.
const mail_dest_pretty_printed = "" &redef;
## If an address from one of these networks is reported, we mark
## the entry with an additional quote symbol (i.e., ">"). Many MUAs
## then highlight such lines differently.
global flag_nets: set[subnet] &redef;
## Function that renders a single alarm. Can be overidden.
## Function that renders a single alarm. Can be overridden.
global pretty_print_alarm: function(out: file, n: Info) &redef;
## Force generating mail file, even if reading from traces or no mail

View file

@ -17,13 +17,14 @@ export {
## Manager can communicate notice suppression to workers.
redef Cluster::manager2worker_events += /Notice::begin_suppression/;
## Workers needs need ability to forward notices to manager.
## Workers need ability to forward notices to manager.
redef Cluster::worker2manager_events += /Notice::cluster_notice/;
@if ( Cluster::local_node_type() != Cluster::MANAGER )
event Notice::begin_suppression(n: Notice::Info)
{
suppressing[n$note, n$identifier] = n;
local suppress_until = n$ts + n$suppress_for;
suppressing[n$note, n$identifier] = suppress_until;
}
@endif

View file

@ -1,8 +1,8 @@
##! This is the notice framework which enables Bro to "notice" things which
##! are odd or potentially bad. Decisions of the meaning of various notices
##! need to be done per site because Bro does not ship with assumptions about
##! what is bad activity for sites. More extensive documetation about using
##! the notice framework can be found in :doc:`/notice`.
##! what is bad activity for sites. More extensive documentation about using
##! the notice framework can be found in :doc:`/frameworks/notice`.
module Notice;
@ -14,13 +14,13 @@ export {
ALARM_LOG,
};
## Scripts creating new notices need to redef this enum to add their own
## specific notice types which would then get used when they call the
## :bro:id:`NOTICE` function. The convention is to give a general category
## along with the specific notice separating words with underscores and
## using leading capitals on each word except for abbreviations which are
## kept in all capitals. For example, SSH::Login is for heuristically
## guessed successful SSH logins.
## Scripts creating new notices need to redef this enum to add their
## own specific notice types which would then get used when they call
## the :bro:id:`NOTICE` function. The convention is to give a general
## category along with the specific notice separating words with
## underscores and using leading capitals on each word except for
## abbreviations which are kept in all capitals. For example,
## SSH::Login is for heuristically guessed successful SSH logins.
type Type: enum {
## Notice reporting a count of how often a notice occurred.
Tally,
@ -30,46 +30,72 @@ export {
type Action: enum {
## Indicates that there is no action to be taken.
ACTION_NONE,
## Indicates that the notice should be sent to the notice logging stream.
## Indicates that the notice should be sent to the notice
## logging stream.
ACTION_LOG,
## Indicates that the notice should be sent to the email address(es)
## configured in the :bro:id:`Notice::mail_dest` variable.
## Indicates that the notice should be sent to the email
## address(es) configured in the :bro:id:`Notice::mail_dest`
## variable.
ACTION_EMAIL,
## Indicates that the notice should be alarmed. A readable ASCII
## version of the alarm log is emailed in bulk to the address(es)
## configured in :bro:id:`Notice::mail_dest`.
## Indicates that the notice should be alarmed. A readable
## ASCII version of the alarm log is emailed in bulk to the
## address(es) configured in :bro:id:`Notice::mail_dest`.
ACTION_ALARM,
};
## The notice framework is able to do automatic notice supression by
## utilizing the $identifier field in :bro:type:`Notice::Info` records.
## Set this to "0secs" to completely disable automated notice suppression.
type ActionSet: set[Notice::Action];
## The notice framework is able to do automatic notice suppression by
## utilizing the *identifier* field in :bro:type:`Notice::Info` records.
## Set this to "0secs" to completely disable automated notice
## suppression.
const default_suppression_interval = 1hrs &redef;
type Info: record {
## An absolute time indicating when the notice occurred, defaults
## to the current network time.
## An absolute time indicating when the notice occurred,
## defaults to the current network time.
ts: time &log &optional;
## A connection UID which uniquely identifies the endpoints
## concerned with the notice.
uid: string &log &optional;
## A connection 4-tuple identifying the endpoints concerned with the
## notice.
## A connection 4-tuple identifying the endpoints concerned
## with the notice.
id: conn_id &log &optional;
## A shorthand way of giving the uid and id to a notice. The
## reference to the actual connection will be deleted after applying
## the notice policy.
## reference to the actual connection will be deleted after
## applying the notice policy.
conn: connection &optional;
## A shorthand way of giving the uid and id to a notice. The
## reference to the actual connection will be deleted after applying
## the notice policy.
## reference to the actual connection will be deleted after
## applying the notice policy.
iconn: icmp_conn &optional;
## The transport protocol. Filled automatically when either conn, iconn
## or p is specified.
## A file record if the notice is related to a file. The
## reference to the actual fa_file record will be deleted after
## applying the notice policy.
f: fa_file &optional;
## A file unique ID if this notice is related to a file. If
## the *f* field is provided, this will be automatically filled
## out.
fuid: string &log &optional;
## A mime type if the notice is related to a file. If the *f*
## field is provided, this will be automatically filled out.
file_mime_type: string &log &optional;
## Frequently files can be "described" to give a bit more
## context. This field will typically be automatically filled
## out from an fa_file record. For example, if a notice was
## related to a file over HTTP, the URL of the request would
## be shown.
file_desc: string &log &optional;
## The transport protocol. Filled automatically when either
## *conn*, *iconn* or *p* is specified.
proto: transport_proto &log &optional;
## The :bro:type:`Notice::Type` of the notice.
@ -94,40 +120,44 @@ export {
peer_descr: string &log &optional;
## The actions which have been applied to this notice.
actions: set[Notice::Action] &log &optional;
actions: ActionSet &log &default=ActionSet();
## By adding chunks of text into this element, other scripts can
## expand on notices that are being emailed. The normal way to add text
## is to extend the vector by handling the :bro:id:`Notice::notice`
## event and modifying the notice in place.
## By adding chunks of text into this element, other scripts
## can expand on notices that are being emailed. The normal
## way to add text is to extend the vector by handling the
## :bro:id:`Notice::notice` event and modifying the notice in
## place.
email_body_sections: vector of string &optional;
## Adding a string "token" to this set will cause the notice framework's
## built-in emailing functionality to delay sending the email until
## either the token has been removed or the email has been delayed
## for :bro:id:`Notice::max_email_delay`.
## Adding a string "token" to this set will cause the notice
## framework's built-in emailing functionality to delay sending
## the email until either the token has been removed or the
## email has been delayed for :bro:id:`Notice::max_email_delay`.
email_delay_tokens: set[string] &optional;
## This field is to be provided when a notice is generated for the
## purpose of deduplicating notices. The identifier string should
## be unique for a single instance of the notice. This field should be
## filled out in almost all cases when generating notices to define
## when a notice is conceptually a duplicate of a previous notice.
## This field is to be provided when a notice is generated for
## the purpose of deduplicating notices. The identifier string
## should be unique for a single instance of the notice. This
## field should be filled out in almost all cases when
## generating notices to define when a notice is conceptually
## a duplicate of a previous notice.
##
## For example, an SSL certificate that is going to expire soon should
## always have the same identifier no matter the client IP address
## that connected and resulted in the certificate being exposed. In
## this case, the resp_h, resp_p, and hash of the certificate would be
## used to create this value. The hash of the cert is included
## because servers can return multiple certificates on the same port.
## For example, an SSL certificate that is going to expire soon
## should always have the same identifier no matter the client
## IP address that connected and resulted in the certificate
## being exposed. In this case, the resp_h, resp_p, and hash
## of the certificate would be used to create this value. The
## hash of the cert is included because servers can return
## multiple certificates on the same port.
##
## Another example might be a host downloading a file which triggered
## a notice because the MD5 sum of the file it downloaded was known
## by some set of intelligence. In that case, the orig_h (client)
## and MD5 sum would be used in this field to dedup because if the
## same file is downloaded over and over again you really only want to
## know about it a single time. This makes it possible to send those
## notices to email without worrying so much about sending thousands
## Another example might be a host downloading a file which
## triggered a notice because the MD5 sum of the file it
## downloaded was known by some set of intelligence. In that
## case, the orig_h (client) and MD5 sum would be used in this
## field to dedup because if the same file is downloaded over
## and over again you really only want to know about it a
## single time. This makes it possible to send those notices
## to email without worrying so much about sending thousands
## of emails.
identifier: string &optional;
@ -152,21 +182,62 @@ export {
global policy: hook(n: Notice::Info);
## Local system sendmail program.
##
## Note that this is overridden by the BroControl SendMail option.
const sendmail = "/usr/sbin/sendmail" &redef;
## Email address to send notices with the :bro:enum:`Notice::ACTION_EMAIL`
## action or to send bulk alarm logs on rotation with
## :bro:enum:`Notice::ACTION_ALARM`.
## Email address to send notices with the
## :bro:enum:`Notice::ACTION_EMAIL` action or to send bulk alarm logs
## on rotation with :bro:enum:`Notice::ACTION_ALARM`.
##
## Note that this is overridden by the BroControl MailTo option.
const mail_dest = "" &redef;
## Address that emails will be from.
##
## Note that this is overridden by the BroControl MailFrom option.
const mail_from = "Big Brother <bro@localhost>" &redef;
## Reply-to address used in outbound email.
const reply_to = "" &redef;
## Text string prefixed to the subject of all emails sent out.
##
## Note that this is overridden by the BroControl MailSubjectPrefix
## option.
const mail_subject_prefix = "[Bro]" &redef;
## The maximum amount of time a plugin can delay email from being sent.
const max_email_delay = 15secs &redef;
## Contains a portion of :bro:see:`fa_file` that's also contained in
## :bro:see:`Notice::Info`.
type FileInfo: record {
fuid: string; ##< File UID.
desc: string; ##< File description from e.g.
##< :bro:see:`Files::describe`.
mime: string &optional; ##< Strongest mime type match for file.
cid: conn_id &optional; ##< Connection tuple over which file is sent.
cuid: string &optional; ##< Connection UID over which file is sent.
};
## Creates a record containing a subset of a full :bro:see:`fa_file` record.
##
## f: record containing metadata about a file.
##
## Returns: record containing a subset of fields copied from *f*.
global create_file_info: function(f: fa_file): Notice::FileInfo;
## Populates file-related fields in a notice info record.
##
## f: record containing metadata about a file.
##
## n: a notice record that needs file-related fields populated.
global populate_file_info: function(f: fa_file, n: Notice::Info);
## Populates file-related fields in a notice info record.
##
## fi: record containing metadata about a file.
##
## n: a notice record that needs file-related fields populated.
global populate_file_info2: function(fi: Notice::FileInfo, n: Notice::Info);
## A log postprocessing function that implements emailing the contents
## of a log upon rotation to any configured :bro:id:`Notice::mail_dest`.
## The rotated log is removed upon being sent.
@ -177,9 +248,9 @@ export {
global log_mailing_postprocessor: function(info: Log::RotationInfo): bool;
## This is the event that is called as the entry point to the
## notice framework by the global :bro:id:`NOTICE` function. By the time
## this event is generated, default values have already been filled out in
## the :bro:type:`Notice::Info` record and the notice
## notice framework by the global :bro:id:`NOTICE` function. By the
## time this event is generated, default values have already been
## filled out in the :bro:type:`Notice::Info` record and the notice
## policy has also been applied.
##
## n: The record containing notice data.
@ -196,18 +267,13 @@ export {
## n: The record containing the notice in question.
global is_being_suppressed: function(n: Notice::Info): bool;
## This event is generated on each occurence of an event being suppressed.
## This event is generated on each occurrence of an event being
## suppressed.
##
## n: The record containing notice data regarding the notice type
## being suppressed.
global suppressed: event(n: Notice::Info);
## This event is generated when a notice stops being suppressed.
##
## n: The record containing notice data regarding the notice type
## that was being suppressed.
global end_suppression: event(n: Notice::Info);
## Call this function to send a notice in an email. It is already used
## by default with the built in :bro:enum:`Notice::ACTION_EMAIL` and
## :bro:enum:`Notice::ACTION_PAGE` actions.
@ -216,18 +282,19 @@ export {
##
## dest: The intended recipient of the notice email.
##
## extend: Whether to extend the email using the ``email_body_sections``
## field of *n*.
## extend: Whether to extend the email using the
## ``email_body_sections`` field of *n*.
global email_notice_to: function(n: Info, dest: string, extend: bool);
## Constructs mail headers to which an email body can be appended for
## sending with sendmail.
##
## subject_desc: a subject string to use for the mail
## subject_desc: a subject string to use for the mail.
##
## dest: recipient string to use for the mail
## dest: recipient string to use for the mail.
##
## Returns: a string of mail headers to which an email body can be appended
## Returns: a string of mail headers to which an email body can be
## appended.
global email_headers: function(subject_desc: string, dest: string): string;
## This event can be handled to access the :bro:type:`Notice::Info`
@ -236,35 +303,30 @@ export {
## rec: The record containing notice data before it is logged.
global log_notice: event(rec: Info);
## This is an internal wrapper for the global :bro:id:`NOTICE` function;
## disregard.
## This is an internal wrapper for the global :bro:id:`NOTICE`
## function; disregard.
##
## n: The record of notice data.
global internal_NOTICE: function(n: Notice::Info);
}
# This is used as a hack to implement per-item expiration intervals.
function per_notice_suppression_interval(t: table[Notice::Type, string] of Notice::Info, idx: any): interval
function per_notice_suppression_interval(t: table[Notice::Type, string] of time, idx: any): interval
{
local n: Notice::Type;
local s: string;
[n,s] = idx;
local suppress_time = t[n,s]$suppress_for - (network_time() - t[n,s]$ts);
local suppress_time = t[n,s] - network_time();
if ( suppress_time < 0secs )
suppress_time = 0secs;
# If there is no more suppression time left, the notice needs to be sent
# to the end_suppression event.
if ( suppress_time == 0secs )
event Notice::end_suppression(t[n,s]);
return suppress_time;
}
# This is the internally maintained notice suppression table. It's
# indexed on the Notice::Type and the $identifier field from the notice.
global suppressing: table[Type, string] of Notice::Info = {}
global suppressing: table[Type, string] of time = {}
&create_expire=0secs
&expire_func=per_notice_suppression_interval;
@ -359,11 +421,22 @@ function email_notice_to(n: Notice::Info, dest: string, extend: bool)
# First off, finish the headers and include the human readable messages
# then leave a blank line after the message.
email_text = string_cat(email_text, "\nMessage: ", n$msg);
if ( n?$sub )
email_text = string_cat(email_text, "\nSub-message: ", n$sub);
email_text = string_cat(email_text, "\nMessage: ", n$msg, "\n");
email_text = string_cat(email_text, "\n\n");
if ( n?$sub )
email_text = string_cat(email_text, "Sub-message: ", n$sub, "\n");
email_text = string_cat(email_text, "\n");
# Add information about the file if it exists.
if ( n?$file_desc )
email_text = string_cat(email_text, "File Description: ", n$file_desc, "\n");
if ( n?$file_mime_type )
email_text = string_cat(email_text, "File MIME Type: ", n$file_mime_type, "\n");
if ( n?$file_desc || n?$file_mime_type )
email_text = string_cat(email_text, "\n");
# Next, add information about the connection if it exists.
if ( n?$id )
@ -426,7 +499,8 @@ hook Notice::notice(n: Notice::Info) &priority=-5
[n$note, n$identifier] !in suppressing &&
n$suppress_for != 0secs )
{
suppressing[n$note, n$identifier] = n;
local suppress_until = n$ts + n$suppress_for;
suppressing[n$note, n$identifier] = suppress_until;
event Notice::begin_suppression(n);
}
}
@ -451,6 +525,42 @@ function execute_with_notice(cmd: string, n: Notice::Info)
#system_env(cmd, tags);
}
function create_file_info(f: fa_file): Notice::FileInfo
{
local fi: Notice::FileInfo = Notice::FileInfo($fuid = f$id,
$desc = Files::describe(f));
if ( f?$mime_type )
fi$mime = f$mime_type;
if ( f?$conns && |f$conns| == 1 )
for ( id in f$conns )
{
fi$cid = id;
fi$cuid = f$conns[id]$uid;
}
return fi;
}
function populate_file_info(f: fa_file, n: Notice::Info)
{
populate_file_info2(create_file_info(f), n);
}
function populate_file_info2(fi: Notice::FileInfo, n: Notice::Info)
{
if ( ! n?$fuid )
n$fuid = fi$fuid;
if ( ! n?$file_mime_type && fi?$mime )
n$file_mime_type = fi$mime;
n$file_desc = fi$desc;
n$id = fi$cid;
n$uid = fi$cuid;
}
# This is run synchronously as a function before all of the other
# notice related functions and events. It also modifies the
# :bro:type:`Notice::Info` record in place.
@ -460,10 +570,14 @@ function apply_policy(n: Notice::Info)
if ( ! n?$ts )
n$ts = network_time();
if ( n?$f )
populate_file_info(n$f, n);
if ( n?$conn )
{
if ( ! n?$id )
n$id = n$conn$id;
if ( ! n?$uid )
n$uid = n$conn$uid;
}
@ -496,9 +610,6 @@ function apply_policy(n: Notice::Info)
n$peer_descr = n$src_peer?$descr ?
n$src_peer$descr : fmt("%s", n$src_peer$host);
if ( ! n?$actions )
n$actions = set();
if ( ! n?$email_body_sections )
n$email_body_sections = vector();
if ( ! n?$email_delay_tokens )
@ -513,13 +624,15 @@ function apply_policy(n: Notice::Info)
if ( ! n?$suppress_for )
n$suppress_for = default_suppression_interval;
# Delete the connection record if it's there so we aren't sending that
# to remote machines. It can cause problems due to the size of the
# connection record.
# Delete the connection and file records if they're there so we
# aren't sending that to remote machines. It can cause problems
# due to the size of those records.
if ( n?$conn )
delete n$conn;
if ( n?$iconn )
delete n$iconn;
if ( n?$f )
delete n$f;
}
function internal_NOTICE(n: Notice::Info)

View file

@ -3,7 +3,7 @@
module GLOBAL;
## This is the entry point in the global namespace for notice framework.
## This is the entry point in the global namespace for the notice framework.
function NOTICE(n: Notice::Info)
{
# Suppress this notice if necessary.

View file

@ -26,8 +26,8 @@ export {
type Info: record {
## The time when the weird occurred.
ts: time &log;
## If a connection is associated with this weird, this will be the
## connection's unique ID.
## If a connection is associated with this weird, this will be
## the connection's unique ID.
uid: string &log &optional;
## conn_id for the optional connection.
id: conn_id &log &optional;
@ -37,16 +37,16 @@ export {
addl: string &log &optional;
## Indicate if this weird was also turned into a notice.
notice: bool &log &default=F;
## The peer that originated this weird. This is helpful in cluster
## deployments if a particular cluster node is having trouble to help
## identify which node is having trouble.
## The peer that originated this weird. This is helpful in
## cluster deployments if a particular cluster node is having
## trouble to help identify which node is having trouble.
peer: string &log &optional;
};
## Types of actions that may be taken when handling weird activity events.
type Action: enum {
## A dummy action indicating the user does not care what internal
## decision is made regarding a given type of weird.
## A dummy action indicating the user does not care what
## internal decision is made regarding a given type of weird.
ACTION_UNSPECIFIED,
## No action is to be taken.
ACTION_IGNORE,
@ -185,6 +185,7 @@ export {
["RPC_underflow"] = ACTION_LOG,
["RST_storm"] = ACTION_LOG,
["RST_with_data"] = ACTION_LOG,
["SSL_many_server_names"] = ACTION_LOG,
["simultaneous_open"] = ACTION_LOG_PER_CONN,
["spontaneous_FIN"] = ACTION_IGNORE,
["spontaneous_RST"] = ACTION_IGNORE,
@ -252,16 +253,16 @@ export {
## a unique weird every ``create_expire`` interval.
global weird_ignore: set[string, string] &create_expire=10min &redef;
## A state set which tracks unique weirds solely by the name to reduce
## duplicate logging. This is not synchronized deliberately because it
## could cause overload during storms
## A state set which tracks unique weirds solely by name to reduce
## duplicate logging. This is deliberately not synchronized because it
## could cause overload during storms.
global did_log: set[string, string] &create_expire=1day &redef;
## A state set which tracks unique weirds solely by the name to reduce
## A state set which tracks unique weirds solely by name to reduce
## duplicate notices from being raised.
global did_notice: set[string, string] &create_expire=1day &redef;
## Handlers of this event are invoked one per write to the weird
## Handlers of this event are invoked once per write to the weird
## logging stream before the data is actually written.
##
## rec: The weird columns about to be logged to the weird stream.

View file

@ -0,0 +1 @@
The packet filter framework supports how Bro sets its BPF capture filter.

View file

@ -1,3 +1,8 @@
@load ./utils
@load ./main
@load ./netstats
@load base/frameworks/cluster
@if ( Cluster::is_enabled() )
@load ./cluster
@endif

View file

@ -0,0 +1,17 @@
@load base/frameworks/cluster
@load ./main
module PacketFilter;
event remote_connection_handshake_done(p: event_peer) &priority=3
{
if ( Cluster::local_node_type() == Cluster::WORKER &&
p$descr in Cluster::nodes &&
Cluster::nodes[p$descr]$node_type == Cluster::MANAGER )
{
# This ensures that a packet filter is installed and logged
# after the manager connects to us.
install();
}
}

View file

@ -1,4 +1,4 @@
##! This script supports how Bro sets it's BPF capture filter. By default
##! This script supports how Bro sets its BPF capture filter. By default
##! Bro sets a capture filter that allows all traffic. If a filter
##! is set on the command line, that filter takes precedence over the default
##! open filter and all filters defined in Bro scripts with the
@ -19,7 +19,7 @@ export {
## This notice is generated if a packet filter cannot be compiled.
Compile_Failure,
## Generated if a packet filter is fails to install.
## Generated if a packet filter fails to install.
Install_Failure,
## Generated when a notice takes too long to compile.
@ -33,8 +33,8 @@ export {
ts: time &log;
## This is a string representation of the node that applied this
## packet filter. It's mostly useful in the context of dynamically
## changing filters on clusters.
## packet filter. It's mostly useful in the context of
## dynamically changing filters on clusters.
node: string &log &optional;
## The packet filter that is being set.
@ -48,27 +48,28 @@ export {
};
## The BPF filter that is used by default to define what traffic should
## be captured. Filters defined in :bro:id:`restrict_filters` will still
## be applied to reduce the captured traffic.
## be captured. Filters defined in :bro:id:`restrict_filters` will
## still be applied to reduce the captured traffic.
const default_capture_filter = "ip or not ip" &redef;
## Filter string which is unconditionally or'ed to the beginning of every
## dynamically built filter.
## Filter string which is unconditionally or'ed to the beginning of
## every dynamically built filter.
const unrestricted_filter = "" &redef;
## Filter string which is unconditionally and'ed to the beginning of every
## dynamically built filter. This is mostly used when a custom filter is being
## used but MPLS or VLAN tags are on the traffic.
## Filter string which is unconditionally and'ed to the beginning of
## every dynamically built filter. This is mostly used when a custom
## filter is being used but MPLS or VLAN tags are on the traffic.
const restricted_filter = "" &redef;
## The maximum amount of time that you'd like to allow for BPF filters to compile.
## If this time is exceeded, compensation measures may be taken by the framework
## to reduce the filter size. This threshold being crossed also results in
## the :bro:see:`PacketFilter::Too_Long_To_Compile_Filter` notice.
## to reduce the filter size. This threshold being crossed also results
## in the :bro:see:`PacketFilter::Too_Long_To_Compile_Filter` notice.
const max_filter_compile_time = 100msec &redef;
## Install a BPF filter to exclude some traffic. The filter should positively
## match what is to be excluded, it will be wrapped in a "not".
## Install a BPF filter to exclude some traffic. The filter should
## positively match what is to be excluded, it will be wrapped in
## a "not".
##
## filter_id: An arbitrary string that can be used to identify
## the filter.
@ -79,9 +80,9 @@ export {
## installed or not.
global exclude: function(filter_id: string, filter: string): bool;
## Install a temporary filter to traffic which should not be passed through
## the BPF filter. The filter should match the traffic you don't want
## to see (it will be wrapped in a "not" condition).
## Install a temporary filter to traffic which should not be passed
## through the BPF filter. The filter should match the traffic you
## don't want to see (it will be wrapped in a "not" condition).
##
## filter_id: An arbitrary string that can be used to identify
## the filter.
@ -109,7 +110,7 @@ export {
## Enables the old filtering approach of "only watch common ports for
## analyzed protocols".
##
##
## Unless you know what you are doing, leave this set to F.
const enable_auto_protocol_capture_filters = F &redef;
@ -125,7 +126,7 @@ global dynamic_restrict_filters: table[string] of string = {};
# install the filter.
global currently_building = F;
# Internal tracking for if the the filter being built has possibly been changed.
# Internal tracking for if the filter being built has possibly been changed.
global filter_changed = F;
global filter_plugins: set[FilterPlugin] = {};
@ -293,6 +294,7 @@ function install(): bool
# Do an audit log for the packet filter.
local info: Info;
info$ts = network_time();
info$node = peer_description;
# If network_time() is 0.0 we're at init time so use the wall clock.
if ( info$ts == 0.0 )
{

View file

@ -13,7 +13,7 @@ export {
##
## num_parts: The number of parts the traffic should be split into.
##
## this_part: The part of the traffic this filter will accept. 0-based.
## this_part: The part of the traffic this filter will accept (0-based).
global sampling_filter: function(num_parts: count, this_part: count): string;
## Combines two valid BPF filter strings with a string based operator

View file

@ -0,0 +1,2 @@
This framework is intended to create an output and filtering path for
internally generated messages/warnings/errors.

View file

@ -7,9 +7,9 @@
##! :bro:see:`Reporter::errors_to_stderr`.
##!
##! Note that this framework deals with the handling of internally generated
##! reporter messages, for the interface in to actually creating interface
##! reporter messages, for the interface
##! into actually creating reporter messages from the scripting layer, use
##! the built-in functions in :doc:`/scripts/base/bif/reporter.bif`.
##! the built-in functions in :doc:`/scripts/base/bif/reporter.bif.bro`.
module Reporter;

View file

@ -0,0 +1,4 @@
The signature framework provides for doing low-level pattern matching. While
signatures are not Bro's preferred detection tool, they sometimes come in
handy and are closer to what many people are familiar with from using
other NIDS.

View file

@ -1,6 +1,6 @@
##! Script level signature support. See the
##! :doc:`signature documentation </signatures>` for more information about
##! Bro's signature engine.
##! :doc:`signature documentation </frameworks/signatures>` for more
##! information about Bro's signature engine.
@load base/frameworks/notice
@ -11,21 +11,23 @@ export {
redef enum Notice::Type += {
## Generic notice type for notice-worthy signature matches.
Sensitive_Signature,
## Host has triggered many signatures on the same host. The number of
## signatures is defined by the
## Host has triggered many signatures on the same host. The
## number of signatures is defined by the
## :bro:id:`Signatures::vert_scan_thresholds` variable.
Multiple_Signatures,
## Host has triggered the same signature on multiple hosts as defined
## by the :bro:id:`Signatures::horiz_scan_thresholds` variable.
## Host has triggered the same signature on multiple hosts as
## defined by the :bro:id:`Signatures::horiz_scan_thresholds`
## variable.
Multiple_Sig_Responders,
## The same signature has triggered multiple times for a host. The
## number of times the signature has been triggered is defined by the
## :bro:id:`Signatures::count_thresholds` variable. To generate this
## notice, the :bro:enum:`Signatures::SIG_COUNT_PER_RESP` action must
## bet set for the signature.
## The same signature has triggered multiple times for a host.
## The number of times the signature has been triggered is
## defined by the :bro:id:`Signatures::count_thresholds`
## variable. To generate this notice, the
## :bro:enum:`Signatures::SIG_COUNT_PER_RESP` action must be
## set for the signature.
Count_Signature,
## Summarize the number of times a host triggered a signature. The
## interval between summaries is defined by the
## Summarize the number of times a host triggered a signature.
## The interval between summaries is defined by the
## :bro:id:`Signatures::summary_interval` variable.
Signature_Summary,
};
@ -37,11 +39,12 @@ export {
## All of them write the signature record to the logging stream unless
## declared otherwise.
type Action: enum {
## Ignore this signature completely (even for scan detection). Don't
## write to the signatures logging stream.
## Ignore this signature completely (even for scan detection).
## Don't write to the signatures logging stream.
SIG_IGNORE,
## Process through the various aggregate techniques, but don't report
## individually and don't write to the signatures logging stream.
## Process through the various aggregate techniques, but don't
## report individually and don't write to the signatures logging
## stream.
SIG_QUIET,
## Generate a notice.
SIG_LOG,
@ -64,20 +67,24 @@ export {
## The record type which contains the column fields of the signature log.
type Info: record {
## The network time at which a signature matching type of event to
## be logged has occurred.
## The network time at which a signature matching type of event
## to be logged has occurred.
ts: time &log;
## A unique identifier of the connection which triggered the
## signature match event
uid: string &log &optional;
## The host which triggered the signature match event.
src_addr: addr &log &optional;
## The host port on which the signature-matching activity occurred.
## The host port on which the signature-matching activity
## occurred.
src_port: port &log &optional;
## The destination host which was sent the payload that triggered the
## signature match.
## The destination host which was sent the payload that
## triggered the signature match.
dst_addr: addr &log &optional;
## The destination host port which was sent the payload that triggered
## the signature match.
## The destination host port which was sent the payload that
## triggered the signature match.
dst_port: port &log &optional;
## Notice associated with signature event
## Notice associated with signature event.
note: Notice::Type &log;
## The name of the signature that matched.
sig_id: string &log &optional;
@ -103,8 +110,8 @@ export {
## different responders has reached one of the thresholds.
const horiz_scan_thresholds = { 5, 10, 50, 100, 500, 1000 } &redef;
## Generate a notice if, for a pair [orig, resp], the number of different
## signature matches has reached one of the thresholds.
## Generate a notice if, for a pair [orig, resp], the number of
## different signature matches has reached one of the thresholds.
const vert_scan_thresholds = { 5, 10, 50, 100, 500, 1000 } &redef;
## Generate a notice if a :bro:enum:`Signatures::SIG_COUNT_PER_RESP`
@ -112,7 +119,7 @@ export {
const count_thresholds = { 5, 10, 50, 100, 500, 1000, 10000, 1000000, } &redef;
## The interval between when :bro:enum:`Signatures::Signature_Summary`
## notice are generated.
## notices are generated.
const summary_interval = 1 day &redef;
## This event can be handled to access/alter data about to be logged
@ -163,7 +170,7 @@ event signature_match(state: signature_state, msg: string, data: string)
# Trim the matched data down to something reasonable
if ( |data| > 140 )
data = fmt("%s...", sub_bytes(data, 0, 140));
local src_addr: addr;
local src_port: port;
local dst_addr: addr;
@ -188,6 +195,7 @@ event signature_match(state: signature_state, msg: string, data: string)
{
local info: Info = [$ts=network_time(),
$note=Sensitive_Signature,
$uid=state$conn$uid,
$src_addr=src_addr,
$src_port=src_port,
$dst_addr=dst_addr,
@ -208,11 +216,11 @@ event signature_match(state: signature_state, msg: string, data: string)
if ( ++count_per_resp[dst,sig_id] in count_thresholds )
{
NOTICE([$note=Count_Signature, $conn=state$conn,
$msg=msg,
$n=count_per_resp[dst,sig_id],
$sub=fmt("%d matches of signature %s on host %s",
count_per_resp[dst,sig_id],
sig_id, dst)]);
$msg=msg,
$n=count_per_resp[dst,sig_id],
$sub=fmt("%d matches of signature %s on host %s",
count_per_resp[dst,sig_id],
sig_id, dst)]);
}
}
@ -286,16 +294,16 @@ event signature_match(state: signature_state, msg: string, data: string)
orig, vcount, resp);
Log::write(Signatures::LOG,
[$ts=network_time(),
$note=Multiple_Signatures,
$src_addr=orig,
$dst_addr=resp, $sig_id=sig_id, $sig_count=vcount,
$event_msg=fmt("%s different signatures triggered", vcount),
$sub_msg=vert_scan_msg]);
[$ts=network_time(),
$note=Multiple_Signatures,
$src_addr=orig,
$dst_addr=resp, $sig_id=sig_id, $sig_count=vcount,
$event_msg=fmt("%s different signatures triggered", vcount),
$sub_msg=vert_scan_msg]);
NOTICE([$note=Multiple_Signatures, $src=orig, $dst=resp,
$msg=fmt("%s different signatures triggered", vcount),
$n=vcount, $sub=vert_scan_msg]);
$msg=fmt("%s different signatures triggered", vcount),
$n=vcount, $sub=vert_scan_msg]);
last_vthresh[orig] = vcount;
}

View file

@ -0,0 +1,4 @@
The software framework provides infrastructure for maintaining a table
of software versions seen on the network. The version parsing itself
is carried out by external protocol-specific scripts that feed into
this framework.

View file

@ -1,5 +1,5 @@
##! This script provides the framework for software version detection and
##! parsing but doesn't actually do any detection on it's own. It relys on
##! parsing but doesn't actually do any detection on it's own. It relies on
##! other protocol specific scripts to parse out software from the protocols
##! that they analyze. The entry point for providing new software detections
##! to this framework is through the :bro:id:`Software::found` function.
@ -23,15 +23,15 @@ export {
## A structure to represent the numeric version of software.
type Version: record {
## Major version number
## Major version number.
major: count &optional;
## Minor version number
## Minor version number.
minor: count &optional;
## Minor subversion number
## Minor subversion number.
minor2: count &optional;
## Minor updates number
## Minor updates number.
minor3: count &optional;
## Additional version string (e.g. "beta42")
## Additional version string (e.g. "beta42").
addl: string &optional;
} &log;
@ -41,7 +41,8 @@ export {
ts: time &log &optional;
## The IP address detected running the software.
host: addr &log;
## The Port on which the software is running. Only sensible for server software.
## The port on which the software is running. Only sensible for
## server software.
host_p: port &log &optional;
## The type of software detected (e.g. :bro:enum:`HTTP::SERVER`).
software_type: Type &log &default=UNKNOWN;
@ -49,9 +50,9 @@ export {
name: string &log &optional;
## Version of the software.
version: Version &log &optional;
## The full unparsed version string found because the version parsing
## doesn't always work reliably in all cases and this acts as a
## fallback in the logs.
## The full unparsed version string found because the version
## parsing doesn't always work reliably in all cases and this
## acts as a fallback in the logs.
unparsed_version: string &log &optional;
## This can indicate that this software being detected should
@ -59,13 +60,13 @@ export {
## default, only software that is "interesting" due to a change
## in version or it being currently unknown is sent to the
## logging framework. This can be set to T to force the record
## to be sent to the logging framework if some amount of this tracking
## needs to happen in a specific way to the software.
## to be sent to the logging framework if some amount of this
## tracking needs to happen in a specific way to the software.
force_log: bool &default=F;
};
## Hosts whose software should be detected and tracked.
## Choices are: LOCAL_HOSTS, REMOTE_HOSTS, ALL_HOSTS, NO_HOSTS
## Choices are: LOCAL_HOSTS, REMOTE_HOSTS, ALL_HOSTS, NO_HOSTS.
const asset_tracking = LOCAL_HOSTS &redef;
## Other scripts should call this function when they detect software.
@ -79,14 +80,14 @@ export {
## Compare two version records.
##
## Returns: -1 for v1 < v2, 0 for v1 == v2, 1 for v1 > v2.
## If the numerical version numbers match, the addl string
## If the numerical version numbers match, the *addl* string
## is compared lexicographically.
global cmp_versions: function(v1: Version, v2: Version): int;
## Type to represent a collection of :bro:type:`Software::Info` records.
## It's indexed with the name of a piece of software such as "Firefox"
## and it yields a :bro:type:`Software::Info` record with more information
## about the software.
## and it yields a :bro:type:`Software::Info` record with more
## information about the software.
type SoftwareSet: table[string] of Info;
## The set of software associated with an address. Data expires from
@ -208,7 +209,7 @@ function parse_mozilla(unparsed_version: string): Description
if ( 2 in parts )
v = parse(parts[2])$version;
}
else if ( / MSIE / in unparsed_version )
else if ( / MSIE |Trident\// in unparsed_version )
{
software_name = "MSIE";
if ( /Trident\/4\.0/ in unparsed_version )
@ -217,6 +218,8 @@ function parse_mozilla(unparsed_version: string): Description
v = [$major=9,$minor=0];
else if ( /Trident\/6\.0/ in unparsed_version )
v = [$major=10,$minor=0];
else if ( /Trident\/7\.0/ in unparsed_version )
v = [$major=11,$minor=0];
else
{
parts = split_all(unparsed_version, /MSIE [0-9]{1,2}\.*[0-9]*b?[0-9]*/);
@ -284,6 +287,13 @@ function parse_mozilla(unparsed_version: string): Description
if ( 2 in parts )
v = parse(parts[2])$version;
}
else if ( / Java\/[0-9]\./ in unparsed_version )
{
software_name = "Java";
parts = split_all(unparsed_version, /Java\/[0-9\._]*/);
if ( 2 in parts )
v = parse(parts[2])$version;
}
return [$version=v, $unparsed_version=unparsed_version, $name=software_name];
}
@ -436,7 +446,7 @@ function found(id: conn_id, info: Info): bool
{
Reporter::error("No unparsed version string present in Info record with version in Software::found");
return F;
}
}
local sw = parse(info$unparsed_version);
info$unparsed_version = sw$unparsed_version;
info$name = sw$name;

View file

@ -0,0 +1,2 @@
The summary statistics framework provides a way to summarize large streams
of data into simple reduced measurements.

View file

@ -1,6 +1,6 @@
##! This implements transparent cluster support for the SumStats framework.
##! Do not load this file directly. It's only meant to be loaded automatically
##! and will be depending on if the cluster framework has been enabled.
##! and will be if the cluster framework has been enabled.
##! The goal of this script is to make sumstats calculation completely and
##! transparently automated when running on a cluster.
@ -10,62 +10,56 @@
module SumStats;
export {
## Allows a user to decide how large of result groups the workers should transmit
## values for cluster stats aggregation.
const cluster_send_in_groups_of = 50 &redef;
## The percent of the full threshold value that needs to be met on a single worker
## for that worker to send the value to its manager in order for it to request a
## global view for that value. There is no requirement that the manager requests
## a global view for the key since it may opt not to if it requested a global view
## for the key recently.
## The percent of the full threshold value that needs to be met on a
## single worker for that worker to send the value to its manager in
## order for it to request a global view for that value. There is no
## requirement that the manager requests a global view for the key since
## it may opt not to if it requested a global view for the key recently.
const cluster_request_global_view_percent = 0.2 &redef;
## This is to deal with intermediate update overload. A manager will only allow
## this many intermediate update requests to the workers to be inflight at any
## given time. Requested intermediate updates are currently thrown out and not
## performed. In practice this should hopefully have a minimal effect.
## This is to deal with intermediate update overload. A manager will
## only allow this many intermediate update requests to the workers to
## be inflight at any given time. Requested intermediate updates are
## currently thrown out and not performed. In practice this should
## hopefully have a minimal effect.
const max_outstanding_global_views = 10 &redef;
## Intermediate updates can cause overload situations on very large clusters. This
## option may help reduce load and correct intermittent problems. The goal for this
## option is also meant to be temporary.
const enable_intermediate_updates = T &redef;
## Event sent by the manager in a cluster to initiate the collection of
## values for a sumstat.
global cluster_ss_request: event(uid: string, ss_name: string, cleanup: bool);
## Event sent by the manager in a cluster to initiate the collection of values for
## a sumstat.
global cluster_ss_request: event(uid: string, ssid: string);
## Event sent by nodes that are collecting sumstats after receiving a request for
## the sumstat from the manager.
global cluster_ss_response: event(uid: string, ssid: string, data: ResultTable, done: bool);
## This event is sent by the manager in a cluster to initiate the collection of
## a single key value from a sumstat. It's typically used to get intermediate
## updates before the break interval triggers to speed detection of a value
## crossing a threshold.
global cluster_key_request: event(uid: string, ssid: string, key: Key);
## This event is sent by the manager in a cluster to initiate the
## collection of a single key value from a sumstat. It's typically used
## to get intermediate updates before the break interval triggers to
## speed detection of a value crossing a threshold.
global cluster_get_result: event(uid: string, ss_name: string, key: Key, cleanup: bool);
## This event is sent by nodes in response to a
## :bro:id:`SumStats::cluster_key_request` event.
global cluster_key_response: event(uid: string, ssid: string, key: Key, result: Result);
## :bro:id:`SumStats::cluster_get_result` event.
global cluster_send_result: event(uid: string, ss_name: string, key: Key, result: Result, cleanup: bool);
## This is sent by workers to indicate that they crossed the percent
## of the current threshold by the percentage defined globally in
## :bro:id:`SumStats::cluster_request_global_view_percent`
global cluster_key_intermediate_response: event(ssid: string, key: SumStats::Key);
## :bro:id:`SumStats::cluster_request_global_view_percent`.
global cluster_key_intermediate_response: event(ss_name: string, key: SumStats::Key);
## This event is scheduled internally on workers to send result chunks.
global send_data: event(uid: string, ssid: string, data: ResultTable);
global send_data: event(uid: string, ss_name: string, data: ResultTable, cleanup: bool);
global get_a_key: event(uid: string, ss_name: string, cleanup: bool &default=F);
global send_a_key: event(uid: string, ss_name: string, key: Key);
global send_no_key: event(uid: string, ss_name: string);
## This event is generated when a threshold is crossed.
global cluster_threshold_crossed: event(ssid: string, key: SumStats::Key, thold: Thresholding);
global cluster_threshold_crossed: event(ss_name: string, key: SumStats::Key, thold_index: count);
}
# Add events to the cluster framework to make this work.
redef Cluster::manager2worker_events += /SumStats::cluster_(ss_request|key_request|threshold_crossed)/;
redef Cluster::manager2worker_events += /SumStats::thresholds_reset/;
redef Cluster::worker2manager_events += /SumStats::cluster_(ss_response|key_response|key_intermediate_response)/;
redef Cluster::manager2worker_events += /SumStats::cluster_(ss_request|get_result|threshold_crossed)/;
redef Cluster::manager2worker_events += /SumStats::(get_a_key)/;
redef Cluster::worker2manager_events += /SumStats::cluster_(send_result|key_intermediate_response)/;
redef Cluster::worker2manager_events += /SumStats::(send_a_key|send_no_key)/;
@if ( Cluster::local_node_type() != Cluster::MANAGER )
# This variable is maintained to know what keys have recently sent as
@ -74,12 +68,9 @@ redef Cluster::worker2manager_events += /SumStats::cluster_(ss_response|key_resp
# an intermediate result has been received.
global recent_global_view_keys: table[string, Key] of count &create_expire=1min &default=0;
event bro_init() &priority=-100
{
# The manager is the only host allowed to track these.
stats_store = table();
reducer_store = table();
}
# Result tables indexed on a uid that are currently being sent to the
# manager.
global sending_results: table[string] of ResultTable = table() &read_expire=1min;
# This is done on all non-manager node types in the event that a sumstat is
# being collected somewhere other than a worker.
@ -87,95 +78,130 @@ function data_added(ss: SumStat, key: Key, result: Result)
{
# If an intermediate update for this value was sent recently, don't send
# it again.
if ( [ss$id, key] in recent_global_view_keys )
if ( [ss$name, key] in recent_global_view_keys )
return;
# If val is 5 and global view % is 0.1 (10%), pct_val will be 50. If that
# crosses the full threshold then it's a candidate to send as an
# intermediate update.
if ( enable_intermediate_updates &&
check_thresholds(ss, key, result, cluster_request_global_view_percent) )
if ( check_thresholds(ss, key, result, cluster_request_global_view_percent) )
{
# kick off intermediate update
event SumStats::cluster_key_intermediate_response(ss$id, key);
++recent_global_view_keys[ss$id, key];
event SumStats::cluster_key_intermediate_response(ss$name, key);
++recent_global_view_keys[ss$name, key];
}
}
event SumStats::send_data(uid: string, ssid: string, data: ResultTable)
event SumStats::get_a_key(uid: string, ss_name: string, cleanup: bool)
{
#print fmt("WORKER %s: sending data for uid %s...", Cluster::node, uid);
local local_data: ResultTable = table();
local num_added = 0;
for ( key in data )
if ( uid in sending_results )
{
local_data[key] = data[key];
delete data[key];
# Only send cluster_send_in_groups_of at a time. Queue another
# event to send the next group.
if ( cluster_send_in_groups_of == ++num_added )
break;
if ( |sending_results[uid]| == 0 )
{
event SumStats::send_no_key(uid, ss_name);
}
else
{
for ( key in sending_results[uid] )
{
event SumStats::send_a_key(uid, ss_name, key);
# break to only send one.
break;
}
}
}
else if ( !cleanup && ss_name in result_store && |result_store[ss_name]| > 0 )
{
if ( |result_store[ss_name]| == 0 )
{
event SumStats::send_no_key(uid, ss_name);
}
else
{
for ( key in result_store[ss_name] )
{
event SumStats::send_a_key(uid, ss_name, key);
# break to only send one.
break;
}
}
}
else
{
event SumStats::send_no_key(uid, ss_name);
}
local done = F;
# If data is empty, this sumstat is done.
if ( |data| == 0 )
done = T;
# Note: copy is needed to compensate serialization caching issue. This should be
# changed to something else later.
event SumStats::cluster_ss_response(uid, ssid, copy(local_data), done);
if ( ! done )
schedule 0.01 sec { SumStats::send_data(uid, ssid, data) };
}
event SumStats::cluster_ss_request(uid: string, ssid: string)
event SumStats::cluster_ss_request(uid: string, ss_name: string, cleanup: bool)
{
#print fmt("WORKER %s: received the cluster_ss_request event for %s.", Cluster::node, id);
# Initiate sending all of the data for the requested stats.
if ( ssid in result_store )
event SumStats::send_data(uid, ssid, result_store[ssid]);
else
event SumStats::send_data(uid, ssid, table());
# Create a back store for the result
sending_results[uid] = (ss_name in result_store) ? result_store[ss_name] : table();
# Lookup the actual sumstats and reset it, the reference to the data
# currently stored will be maintained internally by the send_data event.
if ( ssid in stats_store )
reset(stats_store[ssid]);
# currently stored will be maintained internally from the
# sending_results table.
if ( cleanup && ss_name in stats_store )
reset(stats_store[ss_name]);
}
event SumStats::cluster_key_request(uid: string, ssid: string, key: Key)
event SumStats::cluster_get_result(uid: string, ss_name: string, key: Key, cleanup: bool)
{
if ( ssid in result_store && key in result_store[ssid] )
{
#print fmt("WORKER %s: received the cluster_key_request event for %s=%s.", Cluster::node, key2str(key), data);
#print fmt("WORKER %s: received the cluster_get_result event for %s=%s.", Cluster::node, key2str(key), data);
# Note: copy is needed to compensate serialization caching issue. This should be
# changed to something else later.
event SumStats::cluster_key_response(uid, ssid, key, copy(result_store[ssid][key]));
if ( cleanup ) # data will implicitly be in sending_results (i know this isn't great)
{
if ( uid in sending_results && key in sending_results[uid] )
{
# Note: copy is needed to compensate serialization caching issue. This should be
# changed to something else later.
event SumStats::cluster_send_result(uid, ss_name, key, copy(sending_results[uid][key]), cleanup);
delete sending_results[uid][key];
}
else
{
# We need to send an empty response if we don't have the data so that the manager
# can know that it heard back from all of the workers.
event SumStats::cluster_send_result(uid, ss_name, key, table(), cleanup);
}
}
else
{
# We need to send an empty response if we don't have the data so that the manager
# can know that it heard back from all of the workers.
event SumStats::cluster_key_response(uid, ssid, key, table());
if ( ss_name in result_store && key in result_store[ss_name] )
{
# Note: copy is needed to compensate serialization caching issue. This should be
# changed to something else later.
event SumStats::cluster_send_result(uid, ss_name, key, copy(result_store[ss_name][key]), cleanup);
}
else
{
# We need to send an empty response if we don't have the data so that the manager
# can know that it heard back from all of the workers.
event SumStats::cluster_send_result(uid, ss_name, key, table(), cleanup);
}
}
}
event SumStats::cluster_threshold_crossed(ssid: string, key: SumStats::Key, thold: Thresholding)
event SumStats::cluster_threshold_crossed(ss_name: string, key: SumStats::Key, thold_index: count)
{
if ( ssid !in threshold_tracker )
threshold_tracker[ssid] = table();
if ( ss_name !in threshold_tracker )
threshold_tracker[ss_name] = table();
threshold_tracker[ssid][key] = thold;
threshold_tracker[ss_name][key] = thold_index;
}
event SumStats::thresholds_reset(ssid: string)
# request-key is a non-op on the workers.
# It only should be called by the manager. Due to the fact that we usually run the same scripts on the
# workers and the manager, it might also be called by the workers, so we just ignore it here.
#
# There is a small chance that people will try running it on events that are just thrown on the workers.
# This does not work at the moment and we cannot throw an error message, because we cannot distinguish it
# from the "script is running it everywhere" case. But - people should notice that they do not get results.
# Not entirely pretty, sorry :(
function request_key(ss_name: string, key: Key): Result
{
threshold_tracker[ssid] = table();
return Result();
}
@endif
@ -186,14 +212,18 @@ event SumStats::thresholds_reset(ssid: string)
# This variable is maintained by manager nodes as they collect and aggregate
# results.
# Index on a uid.
global stats_results: table[string] of ResultTable &read_expire=1min;
global stats_keys: table[string] of set[Key] &read_expire=1min
&expire_func=function(s: table[string] of set[Key], idx: string): interval
{
Reporter::warning(fmt("SumStat key request for the %s SumStat uid took longer than 1 minute and was automatically cancelled.", idx));
return 0secs;
};
# This variable is maintained by manager nodes to track how many "dones" they
# collected per collection unique id. Once the number of results for a uid
# matches the number of peer nodes that results should be coming from, the
# result is written out and deleted from here.
# Indexed on a uid.
# TODO: add an &expire_func in case not all results are received.
global done_with: table[string] of count &read_expire=1min &default=0;
# This variable is maintained by managers to track intermediate responses as
@ -201,11 +231,15 @@ global done_with: table[string] of count &read_expire=1min &default=0;
# Indexed on a uid.
global key_requests: table[string] of Result &read_expire=1min;
# Store uids for dynamic requests here to avoid cleanup on the uid.
# (This needs to be done differently!)
global dynamic_requests: set[string] &read_expire=1min;
# This variable is maintained by managers to prevent overwhelming communication due
# to too many intermediate updates. Each sumstat is tracked separately so that
# one won't overwhelm and degrade other quieter sumstats.
# Indexed on a sumstat id.
global outstanding_global_views: table[string] of count &default=0;
global outstanding_global_views: table[string] of count &read_expire=1min &default=0;
const zero_time = double_to_time(0.0);
# Managers handle logging.
@ -213,15 +247,20 @@ event SumStats::finish_epoch(ss: SumStat)
{
if ( network_time() > zero_time )
{
#print fmt("%.6f MANAGER: breaking %s sumstat for %s sumstat", network_time(), ss$name, ss$id);
#print fmt("%.6f MANAGER: breaking %s sumstat", network_time(), ss$name);
local uid = unique_id("");
if ( uid in stats_results )
delete stats_results[uid];
stats_results[uid] = table();
if ( uid in stats_keys )
delete stats_keys[uid];
stats_keys[uid] = set();
# Request data from peers.
event SumStats::cluster_ss_request(uid, ss$id);
event SumStats::cluster_ss_request(uid, ss$name, T);
done_with[uid] = 0;
#print fmt("get_key by uid: %s", uid);
event SumStats::get_a_key(uid, ss$name, T);
}
# Schedule the next finish_epoch event.
@ -235,51 +274,176 @@ function data_added(ss: SumStat, key: Key, result: Result)
if ( check_thresholds(ss, key, result, 1.0) )
{
threshold_crossed(ss, key, result);
event SumStats::cluster_threshold_crossed(ss$id, key, threshold_tracker[ss$id][key]);
event SumStats::cluster_threshold_crossed(ss$name, key, threshold_tracker[ss$name][key]);
}
}
event SumStats::cluster_key_response(uid: string, ssid: string, key: Key, result: Result)
function handle_end_of_result_collection(uid: string, ss_name: string, key: Key, cleanup: bool)
{
if ( uid !in key_requests )
{
Reporter::warning(fmt("Tried to handle end of result collection with missing uid in key_request sumstat:%s, key:%s.", ss_name, key));
return;
}
#print fmt("worker_count:%d :: done_with:%d", Cluster::worker_count, done_with[uid]);
local ss = stats_store[ss_name];
local ir = key_requests[uid];
if ( check_thresholds(ss, key, ir, 1.0) )
{
threshold_crossed(ss, key, ir);
event SumStats::cluster_threshold_crossed(ss_name, key, threshold_tracker[ss_name][key]);
}
if ( cleanup )
{
# This is done here because "cleanup" implicitly means
# it's the end of an epoch.
if ( ss?$epoch_result && |ir| > 0 )
{
local now = network_time();
ss$epoch_result(now, key, ir);
}
# Check that there is an outstanding view before subtracting.
# Global views only apply to non-dynamic requests. Dynamic
# requests must be serviced.
if ( outstanding_global_views[ss_name] > 0 )
--outstanding_global_views[ss_name];
}
delete key_requests[uid];
delete done_with[uid];
}
function request_all_current_keys(uid: string, ss_name: string, cleanup: bool)
{
#print "request_all_current_keys";
if ( uid in stats_keys && |stats_keys[uid]| > 0 )
{
#print fmt(" -- %d remaining keys here", |stats_keys[uid]|);
for ( key in stats_keys[uid] )
{
done_with[uid] = 0;
event SumStats::cluster_get_result(uid, ss_name, key, cleanup);
delete stats_keys[uid][key];
break; # only a single key
}
}
else
{
# Get more keys! And this breaks us out of the evented loop.
done_with[uid] = 0;
#print fmt("get_key by uid: %s", uid);
event SumStats::get_a_key(uid, ss_name, cleanup);
}
}
event SumStats::send_no_key(uid: string, ss_name: string)
{
#print "send_no_key";
if ( uid !in done_with )
done_with[uid] = 0;
++done_with[uid];
if ( Cluster::worker_count == done_with[uid] )
{
delete done_with[uid];
if ( uid in stats_keys && |stats_keys[uid]| > 0 )
{
#print "we need more keys!";
# Now that we have a key from each worker, lets
# grab all of the results.
request_all_current_keys(uid, ss_name, T);
}
else
{
#print "we're out of keys!";
local ss = stats_store[ss_name];
if ( ss?$epoch_finished )
ss$epoch_finished(network_time());
delete stats_keys[uid];
reset(ss);
}
}
}
event SumStats::send_a_key(uid: string, ss_name: string, key: Key)
{
#print fmt("send_a_key %s", key);
if ( uid !in stats_keys )
{
Reporter::warning(fmt("Manager received a uid for an unknown request. SumStat: %s, Key: %s", ss_name, key));
return;
}
if ( key !in stats_keys[uid] )
add stats_keys[uid][key];
++done_with[uid];
if ( Cluster::worker_count == done_with[uid] )
{
delete done_with[uid];
if ( |stats_keys[uid]| > 0 )
{
#print "we need more keys!";
# Now that we have a key from each worker, lets
# grab all of the results.
request_all_current_keys(uid, ss_name, T);
}
else
{
#print "we're out of keys!";
local ss = stats_store[ss_name];
if ( ss?$epoch_finished )
ss$epoch_finished(network_time());
reset(ss);
}
}
}
event SumStats::cluster_send_result(uid: string, ss_name: string, key: Key, result: Result, cleanup: bool)
{
#print "cluster_send_result";
#print fmt("%0.6f MANAGER: receiving key data from %s - %s=%s", network_time(), get_event_peer()$descr, key2str(key), result);
# We only want to try and do a value merge if there are actually measured datapoints
# in the Result.
if ( uid in key_requests )
key_requests[uid] = compose_results(key_requests[uid], result);
else
if ( uid !in key_requests || |key_requests[uid]| == 0 )
key_requests[uid] = result;
else
key_requests[uid] = compose_results(key_requests[uid], result);
# Mark that a worker is done.
if ( uid !in done_with )
done_with[uid] = 0;
#print fmt("MANAGER: got a result for %s %s from %s", uid, key, get_event_peer()$descr);
++done_with[uid];
#print fmt("worker_count:%d :: done_with:%d", Cluster::worker_count, done_with[uid]);
if ( Cluster::worker_count == done_with[uid] )
if ( uid !in dynamic_requests &&
uid in done_with && Cluster::worker_count == done_with[uid] )
{
local ss = stats_store[ssid];
local ir = key_requests[uid];
if ( check_thresholds(ss, key, ir, 1.0) )
{
threshold_crossed(ss, key, ir);
event SumStats::cluster_threshold_crossed(ss$id, key, threshold_tracker[ss$id][key]);
}
handle_end_of_result_collection(uid, ss_name, key, cleanup);
delete done_with[uid];
delete key_requests[uid];
# Check that there is an outstanding view before subtracting.
if ( outstanding_global_views[ssid] > 0 )
--outstanding_global_views[ssid];
if ( cleanup )
request_all_current_keys(uid, ss_name, cleanup);
}
}
# Managers handle intermediate updates here.
event SumStats::cluster_key_intermediate_response(ssid: string, key: Key)
event SumStats::cluster_key_intermediate_response(ss_name: string, key: Key)
{
#print fmt("MANAGER: receiving intermediate key data from %s", get_event_peer()$descr);
#print fmt("MANAGER: requesting key data for %s", key2str(key));
#print fmt("MANAGER: requesting key data for %s", key);
if ( ssid in outstanding_global_views &&
|outstanding_global_views[ssid]| > max_outstanding_global_views )
if ( ss_name in outstanding_global_views &&
|outstanding_global_views[ss_name]| > max_outstanding_global_views )
{
# Don't do this intermediate update. Perhaps at some point in the future
# we will queue and randomly select from these ignored intermediate
@ -287,60 +451,38 @@ event SumStats::cluster_key_intermediate_response(ssid: string, key: Key)
return;
}
++outstanding_global_views[ssid];
++outstanding_global_views[ss_name];
local uid = unique_id("");
event SumStats::cluster_key_request(uid, ssid, key);
done_with[uid] = 0;
#print fmt("requesting results for: %s", uid);
event SumStats::cluster_get_result(uid, ss_name, key, F);
}
event SumStats::cluster_ss_response(uid: string, ssid: string, data: ResultTable, done: bool)
function request_key(ss_name: string, key: Key): Result
{
#print fmt("MANAGER: receiving results from %s", get_event_peer()$descr);
local uid = unique_id("");
done_with[uid] = 0;
key_requests[uid] = table();
add dynamic_requests[uid];
# Mark another worker as being "done" for this uid.
if ( done )
++done_with[uid];
local local_data = stats_results[uid];
local ss = stats_store[ssid];
for ( key in data )
event SumStats::cluster_get_result(uid, ss_name, key, F);
return when ( uid in done_with && Cluster::worker_count == done_with[uid] )
{
if ( key in local_data )
local_data[key] = compose_results(local_data[key], data[key]);
else
local_data[key] = data[key];
# If a stat is done being collected, thresholds for each key
# need to be checked so we're doing it here to avoid doubly
# iterating over each key.
if ( Cluster::worker_count == done_with[uid] )
{
if ( check_thresholds(ss, key, local_data[key], 1.0) )
{
threshold_crossed(ss, key, local_data[key]);
event SumStats::cluster_threshold_crossed(ss$id, key, threshold_tracker[ss$id][key]);
}
}
}
# If the data has been collected from all peers, we are done and ready to finish.
if ( Cluster::worker_count == done_with[uid] )
{
if ( ss?$epoch_finished )
ss$epoch_finished(local_data);
#print "done with request_key";
local result = key_requests[uid];
# Clean up
delete stats_results[uid];
delete key_requests[uid];
delete done_with[uid];
# Not sure I need to reset the sumstat on the manager.
reset(ss);
delete dynamic_requests[uid];
return result;
}
timeout 1.1min
{
Reporter::warning(fmt("Dynamic SumStat key request for %s in SumStat %s took longer than 1 minute and was automatically cancelled.", key, ss_name));
return Result();
}
}
event remote_connection_handshake_done(p: event_peer) &priority=5
{
send_id(p, "SumStats::stats_store");
send_id(p, "SumStats::reducer_store");
}
@endif

View file

@ -51,8 +51,8 @@ export {
## would like to accept the data being inserted.
pred: function(key: SumStats::Key, obs: SumStats::Observation): bool &optional;
## A function to normalize the key. This can be used to aggregate or
## normalize the entire key.
## A function to normalize the key. This can be used to
## aggregate or normalize the entire key.
normalize_key: function(key: SumStats::Key): Key &optional;
};
@ -74,8 +74,7 @@ export {
## Type to store results for multiple reducers.
type Result: table[string] of ResultVal;
## Type to store a table of sumstats results indexed
## by keys.
## Type to store a table of sumstats results indexed by keys.
type ResultTable: table[Key] of Result;
## SumStats represent an aggregation of reducers along with
@ -87,37 +86,47 @@ export {
## is no assurance provided as to where the callbacks
## will be executed on clusters.
type SumStat: record {
## An arbitrary name for the sumstat so that it can
## be referred to later.
name: string;
## The interval at which this filter should be "broken"
## and the '$epoch_finished' callback called. The
## and the *epoch_result* callback called. The
## results are also reset at this time so any threshold
## based detection needs to be set to a
## value that should be expected to happen within
## this epoch.
epoch: interval;
## The reducers for the SumStat
## The reducers for the SumStat.
reducers: set[Reducer];
## Provide a function to calculate a value from the
## :bro:see:`SumStats::Result` structure which will be used
## for thresholding.
## This is required if a $threshold value is given.
threshold_val: function(key: SumStats::Key, result: SumStats::Result): count &optional;
## This is required if a *threshold* value is given.
threshold_val: function(key: SumStats::Key, result: SumStats::Result): double &optional;
## The threshold value for calling the
## $threshold_crossed callback.
threshold: count &optional;
## *threshold_crossed* callback.
threshold: double &optional;
## A series of thresholds for calling the
## $threshold_crossed callback.
threshold_series: vector of count &optional;
## *threshold_crossed* callback.
threshold_series: vector of double &optional;
## A callback that is called when a threshold is crossed.
threshold_crossed: function(key: SumStats::Key, result: SumStats::Result) &optional;
## A callback with the full collection of Results for
## this SumStat.
epoch_finished: function(rt: SumStats::ResultTable) &optional;
## A callback that receives each of the results at the
## end of the analysis epoch. The function will be
## called once for each key.
epoch_result: function(ts: time, key: SumStats::Key, result: SumStats::Result) &optional;
## A callback that will be called when a single collection
## interval is completed. The *ts* value will be the time of
## when the collection started.
epoch_finished: function(ts:time) &optional;
};
## Create a summary statistic.
@ -134,19 +143,18 @@ export {
## obs: The data point to send into the stream.
global observe: function(id: string, key: SumStats::Key, obs: SumStats::Observation);
## This record is primarily used for internal threshold tracking.
type Thresholding: record {
# Internal use only. Indicates if a simple threshold was already crossed.
is_threshold_crossed: bool &default=F;
# Internal use only. Current key for threshold series.
threshold_series_index: count &default=0;
};
## This event is generated when thresholds are reset for a SumStat.
## Dynamically request a sumstat key. This function should be
## used sparingly and not as a replacement for the callbacks
## from the :bro:see:`SumStats::SumStat` record. The function is only
## available for use within "when" statements as an asynchronous
## function.
##
## ssid: SumStats ID that thresholds were reset for.
global thresholds_reset: event(ssid: string);
## ss_name: SumStat name.
##
## key: The SumStat key being requested.
##
## Returns: The result for the requested sumstat key.
global request_key: function(ss_name: string, key: Key): Result;
## Helper function to represent a :bro:type:`SumStats::Key` value as
## a simple string.
@ -157,18 +165,46 @@ export {
global key2str: function(key: SumStats::Key): string;
}
# The function prototype for plugins to do calculations.
type ObserveFunc: function(r: Reducer, val: double, data: Observation, rv: ResultVal);
redef record Reducer += {
# Internal use only. Provides a reference back to the related SumStats by it's ID.
sid: string &optional;
# Internal use only. Provides a reference back to the related SumStats by its name.
ssname: string &optional;
calc_funcs: vector of Calculation &optional;
};
# Internal use only. For tracking thresholds per sumstat and key.
global threshold_tracker: table[string] of table[Key] of Thresholding &optional;
# In the case of a single threshold, 0 means the threshold isn't crossed.
# In the case of a threshold series, the number tracks the threshold offset.
global threshold_tracker: table[string] of table[Key] of count;
redef record SumStat += {
# Internal use only (mostly for cluster coherency).
id: string &optional;
};
function increment_threshold_tracker(ss_name: string, key: Key)
{
if ( ss_name !in threshold_tracker )
threshold_tracker[ss_name] = table();
if ( key !in threshold_tracker[ss_name] )
threshold_tracker[ss_name][key] = 0;
++threshold_tracker[ss_name][key];
}
function get_threshold_index(ss_name: string, key: Key): count
{
if ( ss_name !in threshold_tracker )
return 0;
if ( key !in threshold_tracker[ss_name] )
return 0;
return threshold_tracker[ss_name][key];
}
# Prototype the hook point for plugins to initialize any result values.
global init_resultval_hook: hook(r: Reducer, rv: ResultVal);
# Prototype the hook point for plugins to merge Results.
global compose_resultvals_hook: hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal);
# Store of sumstats indexed on the sumstat id.
global stats_store: table[string] of SumStat = table();
@ -182,20 +218,20 @@ global result_store: table[string] of ResultTable = table();
# Store of threshold information.
global thresholds_store: table[string, Key] of bool = table();
# Store the calculations.
global calc_store: table[Calculation] of ObserveFunc = table();
# Store the dependencies for Calculations.
global calc_deps: table[Calculation] of vector of Calculation = table();
# Hook for registering observation calculation plugins.
global register_observe_plugins: hook();
# This is called whenever key values are updated and the new val is given as the
# `val` argument. It's only prototyped here because cluster and non-cluster have
# separate implementations.
global data_added: function(ss: SumStat, key: Key, result: Result);
# Prototype the hook point for plugins to do calculations.
global observe_hook: hook(r: Reducer, val: double, data: Observation, rv: ResultVal);
# Prototype the hook point for plugins to initialize any result values.
global init_resultval_hook: hook(r: Reducer, rv: ResultVal);
# Prototype the hook point for plugins to merge Results.
global compose_resultvals_hook: hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal);
# Event that is used to "finish" measurements and adapt the measurement
# framework for clustered or non-clustered usage.
global finish_epoch: event(ss: SumStat);
@ -210,6 +246,24 @@ function key2str(key: Key): string
return fmt("sumstats_key(%s)", out);
}
function register_observe_plugin(calc: Calculation, func: ObserveFunc)
{
calc_store[calc] = func;
}
function add_observe_plugin_dependency(calc: Calculation, depends_on: Calculation)
{
if ( calc !in calc_deps )
calc_deps[calc] = vector();
calc_deps[calc][|calc_deps[calc]|] = depends_on;
}
event bro_init() &priority=100000
{
# Call all of the plugin registration hooks
hook register_observe_plugins();
}
function init_resultval(r: Reducer): ResultVal
{
local rv: ResultVal = [$begin=network_time(), $end=network_time()];
@ -234,25 +288,17 @@ function compose_results(r1: Result, r2: Result): Result
{
local result: Result = table();
if ( |r1| > |r2| )
for ( id in r1 )
{
for ( data_id in r1 )
{
if ( data_id in r2 )
result[data_id] = compose_resultvals(r1[data_id], r2[data_id]);
else
result[data_id] = r1[data_id];
}
result[id] = r1[id];
}
else
for ( id in r2 )
{
for ( data_id in r2 )
{
if ( data_id in r1 )
result[data_id] = compose_resultvals(r1[data_id], r2[data_id]);
else
result[data_id] = r2[data_id];
}
if ( id in r1 )
result[id] = compose_resultvals(r1[id], r2[id]);
else
result[id] = r2[id];
}
return result;
@ -261,18 +307,42 @@ function compose_results(r1: Result, r2: Result): Result
function reset(ss: SumStat)
{
if ( ss$id in result_store )
delete result_store[ss$id];
if ( ss$name in result_store )
delete result_store[ss$name];
result_store[ss$id] = table();
result_store[ss$name] = table();
if ( ss?$threshold || ss?$threshold_series )
if ( ss$name in threshold_tracker )
{
threshold_tracker[ss$id] = table();
event SumStats::thresholds_reset(ss$id);
delete threshold_tracker[ss$name];
threshold_tracker[ss$name] = table();
}
}
# This could potentially recurse forever, but plugin authors
# should be making sure they aren't causing reflexive dependencies.
function add_calc_deps(calcs: vector of Calculation, c: Calculation)
{
#print fmt("Checking for deps for %s", c);
for ( i in calc_deps[c] )
{
local skip_calc=F;
for ( j in calcs )
{
if ( calcs[j] == calc_deps[c][i] )
skip_calc=T;
}
if ( ! skip_calc )
{
if ( calc_deps[c][i] in calc_deps )
add_calc_deps(calcs, calc_deps[c][i]);
calcs[|c|] = calc_deps[c][i];
#print fmt("add dep for %s [%s] ", c, calc_deps[c][i]);
}
}
}
function create(ss: SumStat)
{
if ( (ss?$threshold || ss?$threshold_series) && ! ss?$threshold_val )
@ -280,14 +350,34 @@ function create(ss: SumStat)
Reporter::error("SumStats given a threshold with no $threshold_val function");
}
if ( ! ss?$id )
ss$id=unique_id("");
threshold_tracker[ss$id] = table();
stats_store[ss$id] = ss;
stats_store[ss$name] = ss;
if ( ss?$threshold || ss?$threshold_series )
threshold_tracker[ss$name] = table();
for ( reducer in ss$reducers )
{
reducer$sid = ss$id;
reducer$ssname = ss$name;
reducer$calc_funcs = vector();
for ( calc in reducer$apply )
{
# Add in dependencies recursively.
if ( calc in calc_deps )
add_calc_deps(reducer$calc_funcs, calc);
# Don't add this calculation to the vector if
# it was already added by something else as a
# dependency.
local skip_calc=F;
for ( j in reducer$calc_funcs )
{
if ( calc == reducer$calc_funcs[j] )
skip_calc=T;
}
if ( ! skip_calc )
reducer$calc_funcs[|reducer$calc_funcs|] = calc;
}
if ( reducer$stream !in reducer_store )
reducer_store[reducer$stream] = set();
add reducer_store[reducer$stream][reducer];
@ -313,9 +403,9 @@ function observe(id: string, key: Key, obs: Observation)
if ( r?$pred && ! r$pred(key, obs) )
next;
local ss = stats_store[r$sid];
local ss = stats_store[r$ssname];
# If there is a threshold and no epoch_finished callback
# If there is a threshold and no epoch_result callback
# we don't need to continue counting since the data will
# never be accessed. This was leading
# to some state management issues when measuring
@ -323,18 +413,21 @@ function observe(id: string, key: Key, obs: Observation)
# NOTE: this optimization could need removed in the
# future if on demand access is provided to the
# SumStats results.
if ( ! ss?$epoch_finished &&
r$sid in threshold_tracker &&
key in threshold_tracker[r$sid] &&
if ( ! ss?$epoch_result &&
r$ssname in threshold_tracker &&
( ss?$threshold &&
threshold_tracker[r$sid][key]$is_threshold_crossed ) ||
key in threshold_tracker[r$ssname] &&
threshold_tracker[r$ssname][key] != 0 ) ||
( ss?$threshold_series &&
threshold_tracker[r$sid][key]$threshold_series_index+1 == |ss$threshold_series| ) )
key in threshold_tracker[r$ssname] &&
threshold_tracker[r$ssname][key] == |ss$threshold_series| ) )
{
next;
}
if ( r$sid !in result_store )
result_store[ss$id] = table();
local results = result_store[r$sid];
if ( r$ssname !in result_store )
result_store[r$ssname] = table();
local results = result_store[r$ssname];
if ( key !in results )
results[key] = table();
@ -350,10 +443,13 @@ function observe(id: string, key: Key, obs: Observation)
# If a string was given, fall back to 1.0 as the value.
local val = 1.0;
if ( obs?$num || obs?$dbl )
val = obs?$dbl ? obs$dbl : obs$num;
if ( obs?$num )
val = obs$num;
else if ( obs?$dbl )
val = obs$dbl;
hook observe_hook(r, val, obs, result_val);
for ( i in r$calc_funcs )
calc_store[r$calc_funcs[i]](r, val, obs, result_val);
data_added(ss, key, result);
}
}
@ -362,10 +458,12 @@ function observe(id: string, key: Key, obs: Observation)
# mid-break-interval threshold crossing detection for cluster deployments.
function check_thresholds(ss: SumStat, key: Key, result: Result, modify_pct: double): bool
{
if ( ! (ss?$threshold || ss?$threshold_series) )
if ( ! (ss?$threshold || ss?$threshold_series || ss?$threshold_crossed) )
return F;
# Add in the extra ResultVals to make threshold_vals easier to write.
# This length comparison should work because we just need to make
# sure that we have the same number of reducers and results.
if ( |ss$reducers| != |result| )
{
for ( reducer in ss$reducers )
@ -378,28 +476,21 @@ function check_thresholds(ss: SumStat, key: Key, result: Result, modify_pct: dou
local watch = ss$threshold_val(key, result);
if ( modify_pct < 1.0 && modify_pct > 0.0 )
watch = double_to_count(floor(watch/modify_pct));
watch = watch/modify_pct;
if ( ss$id !in threshold_tracker )
threshold_tracker[ss$id] = table();
local t_tracker = threshold_tracker[ss$id];
local t_index = get_threshold_index(ss$name, key);
if ( key !in t_tracker )
{
local ttmp: Thresholding;
t_tracker[key] = ttmp;
}
local tt = t_tracker[key];
if ( ss?$threshold && ! tt$is_threshold_crossed && watch >= ss$threshold )
if ( ss?$threshold &&
t_index == 0 && # Check that the threshold hasn't already been crossed.
watch >= ss$threshold )
{
# Value crossed the threshold.
return T;
}
if ( ss?$threshold_series &&
|ss$threshold_series| >= tt$threshold_series_index &&
watch >= ss$threshold_series[tt$threshold_series_index] )
|ss$threshold_series| > t_index && # Check if there are more thresholds.
watch >= ss$threshold_series[t_index] )
{
# A threshold series was given and the value crossed the next
# value in the series.
@ -415,6 +506,8 @@ function threshold_crossed(ss: SumStat, key: Key, result: Result)
if ( ! ss?$threshold_crossed )
return;
increment_threshold_tracker(ss$name,key);
# Add in the extra ResultVals to make threshold_crossed callbacks easier to write.
if ( |ss$reducers| != |result| )
{
@ -426,11 +519,5 @@ function threshold_crossed(ss: SumStat, key: Key, result: Result)
}
ss$threshold_crossed(key, result);
local tt = threshold_tracker[ss$id][key];
tt$is_threshold_crossed = T;
# Bump up to the next threshold series index if a threshold series is being used.
if ( ss?$threshold_series )
++tt$threshold_series_index;
}

View file

@ -2,23 +2,91 @@
module SumStats;
event SumStats::process_epoch_result(ss: SumStat, now: time, data: ResultTable)
{
# TODO: is this the right processing group size?
local i = 50;
for ( key in data )
{
ss$epoch_result(now, key, data[key]);
delete data[key];
if ( |data| == 0 )
{
if ( ss?$epoch_finished )
ss$epoch_finished(now);
# Now that no data is left we can finish.
return;
}
i = i-1;
if ( i == 0 )
{
# TODO: is this the right interval?
schedule 0.01 secs { process_epoch_result(ss, now, data) };
break;
}
}
}
event SumStats::finish_epoch(ss: SumStat)
{
if ( ss$id in result_store )
if ( ss$name in result_store )
{
local data = result_store[ss$id];
if ( ss?$epoch_finished )
ss$epoch_finished(data);
if ( ss?$epoch_result )
{
local data = result_store[ss$name];
local now = network_time();
if ( bro_is_terminating() )
{
for ( key in data )
ss$epoch_result(now, key, data[key]);
if ( ss?$epoch_finished )
ss$epoch_finished(now);
}
else
{
event SumStats::process_epoch_result(ss, now, data);
}
}
# We can reset here because we know that the reference
# to the data will be maintained by the process_epoch_result
# event.
reset(ss);
}
schedule ss$epoch { SumStats::finish_epoch(ss) };
}
function data_added(ss: SumStat, key: Key, result: Result)
{
if ( check_thresholds(ss, key, result, 1.0) )
threshold_crossed(ss, key, result);
}
function request(ss_name: string): ResultTable
{
# This only needs to be implemented this way for cluster compatibility.
return when ( T )
{
if ( ss_name in result_store )
return result_store[ss_name];
else
return table();
}
}
function request_key(ss_name: string, key: Key): Result
{
# This only needs to be implemented this way for cluster compatibility.
return when ( T )
{
if ( ss_name in result_store && key in result_store[ss_name] )
return result_store[ss_name][key];
else
return table();
}
}

View file

@ -0,0 +1 @@
Plugins for the summary statistics framework.

View file

@ -1,9 +1,11 @@
@load ./average
@load ./hll_unique
@load ./last
@load ./max
@load ./min
@load ./sample
@load ./std-dev
@load ./sum
@load ./topk
@load ./unique
@load ./variance

View file

@ -1,4 +1,4 @@
@load base/frameworks/sumstats/main
@load ../main
module SumStats;
@ -14,17 +14,18 @@ export {
};
}
hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
hook register_observe_plugins()
{
if ( AVERAGE in r$apply )
register_observe_plugin(AVERAGE, function(r: Reducer, val: double, obs: Observation, rv: ResultVal)
{
if ( ! rv?$average )
rv$average = val;
else
rv$average += (val - rv$average) / rv$num;
}
});
}
hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
{
if ( rv1?$average && rv2?$average )

View file

@ -0,0 +1,76 @@
@load base/frameworks/sumstats
module SumStats;
export {
redef record Reducer += {
## The error margin for HLL.
hll_error_margin: double &default=0.01;
## The confidence for HLL.
hll_confidence: double &default=0.95;
};
redef enum Calculation += {
## Calculate the number of unique values.
HLL_UNIQUE
};
redef record ResultVal += {
## If cardinality is being tracked, the number of unique
## items is tracked here.
hll_unique: count &default=0;
};
}
redef record ResultVal += {
# Internal use only. This is not meant to be publically available
# because probabilistic data structures have to be examined using
# specialized bifs.
card: opaque of cardinality &optional;
# We need these in the compose hook.
hll_error_margin: double &optional;
hll_confidence: double &optional;
};
hook register_observe_plugins()
{
register_observe_plugin(HLL_UNIQUE, function(r: Reducer, val: double, obs: Observation, rv: ResultVal)
{
if ( ! rv?$card )
{
rv$card = hll_cardinality_init(r$hll_error_margin, r$hll_confidence);
rv$hll_error_margin = r$hll_error_margin;
rv$hll_confidence = r$hll_confidence;
}
hll_cardinality_add(rv$card, obs);
rv$hll_unique = double_to_count(hll_cardinality_estimate(rv$card));
});
}
hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
{
if ( ! (rv1?$card || rv2?$card) )
return;
# Now at least one of rv1?$card or rv1?$card will be set, and
# potentially both.
local rhll: opaque of cardinality;
if ( rv1?$card )
{
rhll = hll_cardinality_init(rv1$hll_error_margin, rv1$hll_confidence);
hll_cardinality_merge_into(rhll, rv1$card);
}
else # If we do not have rv1, we have to have rv2 ...
rhll = hll_cardinality_init(rv2$hll_error_margin, rv2$hll_confidence);
if ( rv2?$card )
hll_cardinality_merge_into(rhll, rv2$card);
result$card = rhll;
result$hll_unique = double_to_count(hll_cardinality_estimate(rhll));
}

View file

@ -5,12 +5,12 @@ module SumStats;
export {
redef enum Calculation += {
## Keep last X observations in a queue
## Keep last X observations in a queue.
LAST
};
redef record Reducer += {
## number of elements to keep.
## Number of elements to keep.
num_last_elements: count &default=0;
};
@ -33,16 +33,20 @@ function get_last(rv: ResultVal): vector of Observation
return s;
}
hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
hook register_observe_plugins()
{
if ( LAST in r$apply && r$num_last_elements > 0 )
register_observe_plugin(LAST, function(r: Reducer, val: double, obs: Observation, rv: ResultVal)
{
if ( ! rv?$last_elements )
rv$last_elements = Queue::init([$max_len=r$num_last_elements]);
Queue::put(rv$last_elements, obs);
}
if ( r$num_last_elements > 0 )
{
if ( ! rv?$last_elements )
rv$last_elements = Queue::init([$max_len=r$num_last_elements]);
Queue::put(rv$last_elements, obs);
}
});
}
hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
{
# Merge $samples

View file

@ -1,4 +1,4 @@
@load base/frameworks/sumstats/main
@load ../main
module SumStats;
@ -14,15 +14,15 @@ export {
};
}
hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
hook register_observe_plugins()
{
if ( MAX in r$apply )
register_observe_plugin(MAX, function(r: Reducer, val: double, obs: Observation, rv: ResultVal)
{
if ( ! rv?$max )
rv$max = val;
else if ( val > rv$max )
rv$max = val;
}
});
}
hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)

View file

@ -1,4 +1,4 @@
@load base/frameworks/sumstats/main
@load ../main
module SumStats;
@ -14,17 +14,18 @@ export {
};
}
hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
hook register_observe_plugins()
{
if ( MIN in r$apply )
register_observe_plugin(MIN, function(r: Reducer, val: double, obs: Observation, rv: ResultVal)
{
if ( ! rv?$min )
rv$min = val;
else if ( val < rv$min )
rv$min = val;
}
});
}
hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
{
if ( rv1?$min && rv2?$min )

View file

@ -4,7 +4,8 @@ module SumStats;
export {
redef enum Calculation += {
## Get uniquely distributed random samples from the observation stream.
## Get uniquely distributed random samples from the observation
## stream.
SAMPLE
};
@ -24,8 +25,8 @@ export {
redef record ResultVal += {
# Internal use only. This is not meant to be publically available
# and just a copy of num_samples from the Reducer. Needed for availability
# in the compose hook.
# and just a copy of num_samples from the Reducer. Needed for
# availability in the compose hook.
num_samples: count &default=0;
};
@ -47,15 +48,14 @@ function sample_add_sample(obs:Observation, rv: ResultVal)
if ( ra < rv$num_samples )
rv$samples[ra] = obs;
}
}
hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
hook register_observe_plugins()
{
if ( SAMPLE in r$apply )
register_observe_plugin(SAMPLE, function(r: Reducer, val: double, obs: Observation, rv: ResultVal)
{
sample_add_sample(obs, rv);
}
});
}
hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
@ -75,7 +75,6 @@ hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
return;
}
if ( |rv1$samples| != num_samples && |rv2$samples| < num_samples )
{
if ( |rv1$samples| != rv1$sample_elements || |rv2$samples| < rv2$sample_elements )

View file

@ -1,5 +1,5 @@
@load base/frameworks/sumstats/main
@load ./variance
@load ../main
module SumStats;
@ -21,11 +21,18 @@ function calc_std_dev(rv: ResultVal)
rv$std_dev = sqrt(rv$variance);
}
# This depends on the variance plugin which uses priority -5
hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal) &priority=-10
hook std_dev_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
{
if ( STD_DEV in r$apply )
calc_std_dev(rv);
}
hook register_observe_plugins() &priority=-10
{
register_observe_plugin(STD_DEV, function(r: Reducer, val: double, obs: Observation, rv: ResultVal)
{
calc_std_dev(rv);
});
add_observe_plugin_dependency(STD_DEV, VARIANCE);
}
hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal) &priority=-10

View file

@ -1,4 +1,4 @@
@load base/frameworks/sumstats/main
@load ../main
module SumStats;
@ -14,19 +14,19 @@ export {
sum: double &default=0.0;
};
type threshold_function: function(key: SumStats::Key, result: SumStats::Result): count;
global sum_threshold: function(data_id: string): threshold_function;
#type threshold_function: function(key: SumStats::Key, result: SumStats::Result): count;
#global sum_threshold: function(data_id: string): threshold_function;
}
function sum_threshold(data_id: string): threshold_function
{
return function(key: SumStats::Key, result: SumStats::Result): count
{
print fmt("data_id: %s", data_id);
print result;
return double_to_count(result[data_id]$sum);
};
}
#function sum_threshold(data_id: string): threshold_function
# {
# return function(key: SumStats::Key, result: SumStats::Result): count
# {
# print fmt("data_id: %s", data_id);
# print result;
# return double_to_count(result[data_id]$sum);
# };
# }
hook init_resultval_hook(r: Reducer, rv: ResultVal)
{
@ -34,10 +34,12 @@ hook init_resultval_hook(r: Reducer, rv: ResultVal)
rv$sum = 0;
}
hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
hook register_observe_plugins()
{
if ( SUM in r$apply )
register_observe_plugin(SUM, function(r: Reducer, val: double, obs: Observation, rv: ResultVal)
{
rv$sum += val;
});
}
hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)

View file

@ -0,0 +1,52 @@
@load base/frameworks/sumstats
module SumStats;
export {
redef record Reducer += {
## Number of elements to keep in the top-k list.
topk_size: count &default=500;
};
redef enum Calculation += {
TOPK
};
redef record ResultVal += {
topk: opaque of topk &optional;
};
}
hook register_observe_plugins()
{
register_observe_plugin(TOPK, function(r: Reducer, val: double, obs: Observation, rv: ResultVal)
{
topk_add(rv$topk, obs);
});
}
hook init_resultval_hook(r: Reducer, rv: ResultVal)
{
if ( TOPK in r$apply && ! rv?$topk )
rv$topk = topk_init(r$topk_size);
}
hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
{
if ( rv1?$topk )
{
result$topk = topk_init(topk_size(rv1$topk));
topk_merge(result$topk, rv1$topk);
if ( rv2?$topk )
topk_merge(result$topk, rv2$topk);
}
else if ( rv2?$topk )
{
result$topk = topk_init(topk_size(rv2$topk));
topk_merge(result$topk, rv2$topk);
}
}

View file

@ -1,8 +1,13 @@
@load base/frameworks/sumstats/main
@load ../main
module SumStats;
export {
redef record Reducer += {
## Maximum number of unique elements to store.
unique_max: count &optional;
};
redef enum Calculation += {
## Calculate the number of unique values.
UNIQUE
@ -16,37 +21,63 @@ export {
}
redef record ResultVal += {
# Internal use only. This is used when multiple ResultVals
# are being merged and they need to abide the unique limit
# set in the reducer.
unique_max: count &optional;
# Internal use only. This is not meant to be publically available
# because we don't want to trust that we can inspect the values
# since we will like move to a probalistic data structure in the future.
# since we will likely move to a probabilistic data structure in the future.
# TODO: in the future this will optionally be a hyperloglog structure
unique_vals: set[Observation] &optional;
};
hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
hook register_observe_plugins()
{
if ( UNIQUE in r$apply )
register_observe_plugin(UNIQUE, function(r: Reducer, val: double, obs: Observation, rv: ResultVal)
{
if ( ! rv?$unique_vals )
rv$unique_vals=set();
add rv$unique_vals[obs];
if ( r?$unique_max )
rv$unique_max=r$unique_max;
if ( ! r?$unique_max || |rv$unique_vals| <= r$unique_max )
add rv$unique_vals[obs];
rv$unique = |rv$unique_vals|;
}
});
}
hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
{
if ( rv1?$unique_vals || rv2?$unique_vals )
{
if ( rv1?$unique_max )
result$unique_max = rv1$unique_max;
else if ( rv2?$unique_max )
result$unique_max = rv2$unique_max;
if ( rv1?$unique_vals )
result$unique_vals = copy(rv1$unique_vals);
if ( rv2?$unique_vals )
{
if ( ! result?$unique_vals )
{
result$unique_vals = copy(rv2$unique_vals);
}
else
{
for ( val2 in rv2$unique_vals )
{
if ( result?$unique_max && |result$unique_vals| >= result$unique_max )
break;
add result$unique_vals[copy(val2)];
}
}
}
result$unique = |result$unique_vals|;
}

View file

@ -1,5 +1,5 @@
@load base/frameworks/sumstats/main
@load ./average
@load ../main
module SumStats;
@ -28,17 +28,17 @@ function calc_variance(rv: ResultVal)
rv$variance = (rv$num > 1) ? rv$var_s/(rv$num-1) : 0.0;
}
# Reduced priority since this depends on the average
hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal) &priority=-5
hook register_observe_plugins() &priority=-5
{
if ( VARIANCE in r$apply )
register_observe_plugin(VARIANCE, function(r: Reducer, val: double, obs: Observation, rv: ResultVal)
{
if ( rv$num > 1 )
rv$var_s += ((val - rv$prev_avg) * (val - rv$average));
calc_variance(rv);
rv$prev_avg = rv$average;
}
});
add_observe_plugin_dependency(VARIANCE, AVERAGE);
}
# Reduced priority since this depends on the average

View file

@ -0,0 +1,2 @@
The tunnels framework handles the tracking/logging of tunnels (e.g. Teredo,
AYIYA, or IP-in-IP such as 6to4 where "IP" is either IPv4 or IPv6).

View file

@ -29,8 +29,8 @@ export {
## The unique identifier for the tunnel, which may correspond
## to a :bro:type:`connection`'s *uid* field for non-IP-in-IP tunnels.
## This is optional because there could be numerous connections
## for payload proxies like SOCKS but we should treat it as a single
## tunnel.
## for payload proxies like SOCKS but we should treat it as a
## single tunnel.
uid: string &log &optional;
## The tunnel "connection" 4-tuple of endpoint addresses/ports.
## For an IP tunnel, the ports will be 0.
@ -76,8 +76,8 @@ export {
## connections before it is considered inactive/expired.
const expiration_interval = 1hrs &redef;
## Currently active tunnels. That is, tunnels for which new, encapsulated
## connections have been seen in the interval indicated by
## Currently active tunnels. That is, tunnels for which new,
## encapsulated connections have been seen in the interval indicated by
## :bro:see:`Tunnel::expiration_interval`.
global active: table[conn_id] of Info = table() &read_expire=expiration_interval &expire_func=expire;
}

File diff suppressed because it is too large Load diff

View file

@ -1,13 +1,16 @@
##! This script loads everything in the base/ script directory. If you want
##! to run Bro without all of these scripts loaded by default, you can use
##! the -b (--bare-mode) command line argument. You can also copy the "@load"
##! lines from this script to your own script to load only the scripts that
##! you actually want.
##! the ``-b`` (``--bare-mode``) command line argument. You can also copy the
##! "@load" lines from this script to your own script to load only the scripts
##! that you actually want.
@load base/utils/site
@load base/utils/active-http
@load base/utils/addrs
@load base/utils/conn-ids
@load base/utils/dir
@load base/utils/directions-and-hosts
@load base/utils/exec
@load base/utils/files
@load base/utils/numbers
@load base/utils/paths
@ -36,12 +39,16 @@
@load base/frameworks/tunnels
@load base/protocols/conn
@load base/protocols/dhcp
@load base/protocols/dnp3
@load base/protocols/dns
@load base/protocols/ftp
@load base/protocols/http
@load base/protocols/irc
@load base/protocols/modbus
@load base/protocols/pop3
@load base/protocols/radius
@load base/protocols/snmp
@load base/protocols/smtp
@load base/protocols/socks
@load base/protocols/ssh
@ -50,5 +57,10 @@
@load base/protocols/tunnels
@load base/files/pe
@load base/files/hash
@load base/files/extract
@load base/files/unified2
@load base/files/x509
@load base/misc/find-checksum-offloading
@load base/misc/find-filtered-trace

View file

@ -16,6 +16,7 @@ export {
# Keep track of how many bad checksums have been seen.
global bad_ip_checksums = 0;
global bad_tcp_checksums = 0;
global bad_udp_checksums = 0;
# Track to see if this script is done so that messages aren't created multiple times.
global done = F;
@ -28,7 +29,11 @@ event ChecksumOffloading::check()
local pkts_recvd = net_stats()$pkts_recvd;
local bad_ip_checksum_pct = (pkts_recvd != 0) ? (bad_ip_checksums*1.0 / pkts_recvd*1.0) : 0;
local bad_tcp_checksum_pct = (pkts_recvd != 0) ? (bad_tcp_checksums*1.0 / pkts_recvd*1.0) : 0;
if ( bad_ip_checksum_pct > 0.05 || bad_tcp_checksum_pct > 0.05 )
local bad_udp_checksum_pct = (pkts_recvd != 0) ? (bad_udp_checksums*1.0 / pkts_recvd*1.0) : 0;
if ( bad_ip_checksum_pct > 0.05 ||
bad_tcp_checksum_pct > 0.05 ||
bad_udp_checksum_pct > 0.05 )
{
local packet_src = reading_traces() ? "trace file likely has" : "interface is likely receiving";
local bad_checksum_msg = (bad_ip_checksum_pct > 0.0) ? "IP" : "";
@ -38,6 +43,13 @@ event ChecksumOffloading::check()
bad_checksum_msg += " and ";
bad_checksum_msg += "TCP";
}
if ( bad_udp_checksum_pct > 0.0 )
{
if ( |bad_checksum_msg| > 0 )
bad_checksum_msg += " and ";
bad_checksum_msg += "UDP";
}
local message = fmt("Your %s invalid %s checksums, most likely from NIC checksum offloading.", packet_src, bad_checksum_msg);
Reporter::warning(message);
done = T;
@ -65,6 +77,8 @@ event conn_weird(name: string, c: connection, addl: string)
{
if ( name == "bad_TCP_checksum" )
++bad_tcp_checksums;
else if ( name == "bad_UDP_checksum" )
++bad_udp_checksums;
}
event bro_done()

View file

@ -0,0 +1,49 @@
##! Discovers trace files that contain TCP traffic consisting only of
##! control packets (e.g. it's been filtered to contain only SYN/FIN/RST
##! packets and no content). On finding such a trace, a warning is
##! emitted that suggests toggling the :bro:see:`detect_filtered_trace`
##! option may be desired if the user does not want Bro to report
##! missing TCP segments.
module FilteredTraceDetection;
export {
## Flag to enable filtered trace file detection and warning message.
global enable: bool = T &redef;
}
global saw_tcp_conn_with_data: bool = F;
global saw_a_tcp_conn: bool = F;
event connection_state_remove(c: connection)
{
if ( ! reading_traces() )
return;
if ( ! enable )
return;
if ( saw_tcp_conn_with_data )
return;
if ( ! is_tcp_port(c$id$orig_p) )
return;
saw_a_tcp_conn = T;
if ( /[Dd]/ in c$history )
saw_tcp_conn_with_data = T;
}
event bro_done()
{
if ( ! enable )
return;
if ( ! saw_a_tcp_conn )
return;
if ( ! saw_tcp_conn_with_data )
Reporter::warning("The analyzed trace file was determined to contain only TCP control packets, which may indicate it's been pre-filtered. By default, Bro reports the missing segments for this type of trace, but the 'detect_filtered_trace' option may be toggled if that's not desired.");
}

View file

@ -0,0 +1 @@
Support for connection (TCP, UDP, or ICMP) analysis.

View file

@ -16,12 +16,12 @@
module Conn;
export {
## The prefix given to files containing extracted connections as they are
## opened on disk.
## The prefix given to files containing extracted connections as they
## are opened on disk.
const extraction_prefix = "contents" &redef;
## If this variable is set to ``T``, then all contents of all connections
## will be extracted.
## If this variable is set to ``T``, then all contents of all
## connections will be extracted.
const default_extract = F &redef;
}

View file

@ -1,7 +1,7 @@
##! This script manages the tracking/logging of general information regarding
##! TCP, UDP, and ICMP traffic. For UDP and ICMP, "connections" are to
##! be interpreted using flow semantics (sequence of packets from a source
##! host/post to a destination host/port). Further, ICMP "ports" are to
##! host/port to a destination host/port). Further, ICMP "ports" are to
##! be interpreted as the source port meaning the ICMP message type and
##! the destination port being the ICMP message code.
@ -23,7 +23,7 @@ export {
id: conn_id &log;
## The transport layer protocol of the connection.
proto: transport_proto &log;
## An identification of an application protocol being sent over the
## An identification of an application protocol being sent over
## the connection.
service: string &log &optional;
## How long the connection lasted. For 3-way or 4-way connection
@ -31,9 +31,10 @@ export {
duration: interval &log &optional;
## The number of payload bytes the originator sent. For TCP
## this is taken from sequence numbers and might be inaccurate
## (e.g., due to large connections)
## (e.g., due to large connections).
orig_bytes: count &log &optional;
## The number of payload bytes the responder sent. See ``orig_bytes``.
## The number of payload bytes the responder sent. See
## *orig_bytes*.
resp_bytes: count &log &optional;
## ========== ===============================================
@ -55,20 +56,20 @@ export {
## ========== ===============================================
conn_state: string &log &optional;
## If the connection is originated locally, this value will be T. If
## it was originated remotely it will be F. In the case that the
## :bro:id:`Site::local_nets` variable is undefined, this field will
## be left empty at all times.
## If the connection is originated locally, this value will be T.
## If it was originated remotely it will be F. In the case that
## the :bro:id:`Site::local_nets` variable is undefined, this
## field will be left empty at all times.
local_orig: bool &log &optional;
## Indicates the number of bytes missed in content gaps, which is
## representative of packet loss. A value other than zero will
## normally cause protocol analysis to fail but some analysis may
## have been completed prior to the packet loss.
## Indicates the number of bytes missed in content gaps, which
## is representative of packet loss. A value other than zero
## will normally cause protocol analysis to fail but some
## analysis may have been completed prior to the packet loss.
missed_bytes: count &log &default=0;
## Records the state history of connections as a string of letters.
## The meaning of those letters is:
## Records the state history of connections as a string of
## letters. The meaning of those letters is:
##
## ====== ====================================================
## Letter Meaning
@ -83,24 +84,25 @@ export {
## i inconsistent packet (e.g. SYN+RST bits both set)
## ====== ====================================================
##
## If the event comes from the originator, the letter is in upper-case; if it comes
## from the responder, it's in lower-case. Multiple packets of the same type will
## only be noted once (e.g. we only record one "d" in each direction, regardless of
## how many data packets were seen.)
## If the event comes from the originator, the letter is in
## upper-case; if it comes from the responder, it's in
## lower-case. Multiple packets of the same type will only be
## noted once (e.g. we only record one "d" in each direction,
## regardless of how many data packets were seen.)
history: string &log &optional;
## Number of packets that the originator sent.
## Only set if :bro:id:`use_conn_size_analyzer` = T
## Only set if :bro:id:`use_conn_size_analyzer` = T.
orig_pkts: count &log &optional;
## Number of IP level bytes that the originator sent (as seen on the wire,
## taken from IP total_length header field).
## Only set if :bro:id:`use_conn_size_analyzer` = T
## Number of IP level bytes that the originator sent (as seen on
## the wire, taken from the IP total_length header field).
## Only set if :bro:id:`use_conn_size_analyzer` = T.
orig_ip_bytes: count &log &optional;
## Number of packets that the responder sent.
## Only set if :bro:id:`use_conn_size_analyzer` = T
## Only set if :bro:id:`use_conn_size_analyzer` = T.
resp_pkts: count &log &optional;
## Number og IP level bytes that the responder sent (as seen on the wire,
## taken from IP total_length header field).
## Only set if :bro:id:`use_conn_size_analyzer` = T
## Number of IP level bytes that the responder sent (as seen on
## the wire, taken from the IP total_length header field).
## Only set if :bro:id:`use_conn_size_analyzer` = T.
resp_ip_bytes: count &log &optional;
## If this connection was over a tunnel, indicate the
## *uid* values for any encapsulating parent connections

View file

@ -11,10 +11,11 @@ export {
## c: The connection to watch.
##
## callback: A callback function that takes as arguments the monitored
## *connection*, and counter *cnt* that increments each time the
## callback is called. It returns an interval indicating how long
## in the future to schedule an event which will call the
## callback. A negative return interval causes polling to stop.
## *connection*, and counter *cnt* that increments each time
## the callback is called. It returns an interval indicating
## how long in the future to schedule an event which will call
## the callback. A negative return interval causes polling
## to stop.
##
## cnt: The initial value of a counter which gets passed to *callback*.
##

View file

@ -0,0 +1 @@
Support for Dynamic Host Configuration Protocol (DHCP) analysis.

View file

@ -0,0 +1,4 @@
@load ./consts
@load ./main
@load-sigs ./dpd.sig

View file

@ -0,0 +1,20 @@
##! Types, errors, and fields for analyzing DHCP data. A helper file
##! for DHCP analysis scripts.
module DHCP;
export {
## Types of DHCP messages. See :rfc:`1533`.
const message_types = {
[1] = "DHCP_DISCOVER",
[2] = "DHCP_OFFER",
[3] = "DHCP_REQUEST",
[4] = "DHCP_DECLINE",
[5] = "DHCP_ACK",
[6] = "DHCP_NAK",
[7] = "DHCP_RELEASE",
[8] = "DHCP_INFORM",
} &default = function(n: count): string { return fmt("unknown-message-type-%d", n); };
}

View file

@ -0,0 +1,5 @@
signature dhcp_cookie {
ip-proto == udp
payload /^.*\x63\x82\x53\x63/
enable "dhcp"
}

View file

@ -0,0 +1,76 @@
##! Analyzes DHCP traffic in order to log DHCP leases given to clients.
##! This script ignores large swaths of the protocol, since it is rather
##! noisy on most networks, and focuses on the end-result: assigned leases.
##!
##! If you'd like to track known DHCP devices and to log the hostname
##! supplied by the client, see
##! :doc:`/scripts/policy/protocols/dhcp/known-devices-and-hostnames.bro`.
@load ./utils.bro
module DHCP;
export {
redef enum Log::ID += { LOG };
## The record type which contains the column fields of the DHCP log.
type Info: record {
## The earliest time at which a DHCP message over the
## associated connection is observed.
ts: time &log;
## A unique identifier of the connection over which DHCP is
## occurring.
uid: string &log;
## The connection's 4-tuple of endpoint addresses/ports.
id: conn_id &log;
## Client's hardware address.
mac: string &log &optional;
## Client's actual assigned IP address.
assigned_ip: addr &log &optional;
## IP address lease interval.
lease_time: interval &log &optional;
## A random number chosen by the client for this transaction.
trans_id: count &log;
};
## Event that can be handled to access the DHCP
## record as it is sent on to the logging framework.
global log_dhcp: event(rec: Info);
}
# Add the dhcp info to the connection record.
redef record connection += {
dhcp: Info &optional;
};
# 67/udp is the server's port, 68/udp the client.
const ports = { 67/udp, 68/udp };
redef likely_server_ports += { 67/udp };
event bro_init()
{
Log::create_stream(DHCP::LOG, [$columns=Info, $ev=log_dhcp]);
Analyzer::register_for_ports(Analyzer::ANALYZER_DHCP, ports);
}
event dhcp_ack(c: connection, msg: dhcp_msg, mask: addr, router: dhcp_router_list, lease: interval, serv_addr: addr, host_name: string)
{
local info: Info;
info$ts = network_time();
info$id = c$id;
info$uid = c$uid;
info$lease_time = lease;
info$trans_id = msg$xid;
if ( msg$h_addr != "" )
info$mac = msg$h_addr;
if ( reverse_ip(msg$yiaddr) != 0.0.0.0 )
info$assigned_ip = reverse_ip(msg$yiaddr);
else
info$assigned_ip = c$id$orig_h;
c$dhcp = info;
Log::write(DHCP::LOG, c$dhcp);
}

Some files were not shown because too many files have changed in this diff Show more