Merge branch 'master' into topic/jsiwek/exec-module

This commit is contained in:
Jon Siwek 2013-07-22 10:23:28 -05:00
commit ca5abbf13a
876 changed files with 260597 additions and 174691 deletions

View file

@ -0,0 +1 @@
@load ./main

View file

@ -0,0 +1,217 @@
##! Framework for managing Bro's protocol analyzers.
##!
##! The analyzer framework allows to dynamically enable or disable analyzers, as
##! well as to manage the well-known ports which automatically activate a
##! particular analyzer for new connections.
##!
##! Protocol analyzers are identified by unique tags of type
##! :bro:type:`Analyzer::Tag`, such as :bro:enum:`Analyzer::ANALYZER_HTTP` and
##! :bro:enum:`Analyzer::ANALYZER_HTTP`. These tags are defined internally by
##! the analyzers themselves, and documented in their analyzer-specific
##! description along with the events that they generate.
@load base/frameworks/packet-filter/utils
module Analyzer;
export {
## If true, all available analyzers are initially disabled at startup. One
## can then selectively enable them with
## :bro:id:`Analyzer::enable_analyzer`.
global disable_all = F &redef;
## Enables an analyzer. Once enabled, the analyzer may be used for analysis
## of future connections as decided by Bro's dynamic protocol detection.
##
## tag: The tag of the analyzer to enable.
##
## Returns: True if the analyzer was successfully enabled.
global enable_analyzer: function(tag: Analyzer::Tag) : bool;
## Disables an analyzer. Once disabled, the analyzer will not be used
## further for analysis of future connections.
##
## tag: The tag of the analyzer to disable.
##
## Returns: True if the analyzer was successfully disabled.
global disable_analyzer: function(tag: Analyzer::Tag) : bool;
## Registers a set of well-known ports for an analyzer. If a future
## connection on one of these ports is seen, the analyzer will be
## automatically assigned to parsing it. The function *adds* to all ports
## already registered, it doesn't replace them.
##
## tag: The tag of the analyzer.
##
## ports: The set of well-known ports to associate with the analyzer.
##
## Returns: True if the ports were sucessfully registered.
global register_for_ports: function(tag: Analyzer::Tag, ports: set[port]) : bool;
## Registers an individual well-known port for an analyzer. If a future
## connection on this port is seen, the analyzer will be automatically
## assigned to parsing it. The function *adds* to all ports already
## registered, it doesn't replace them.
##
## tag: The tag of the analyzer.
##
## p: The well-known port to associate with the analyzer.
##
## Returns: True if the port was sucessfully registered.
global register_for_port: function(tag: Analyzer::Tag, p: port) : bool;
## Returns a set of all well-known ports currently registered for a
## specific analyzer.
##
## tag: The tag of the analyzer.
##
## Returns: The set of ports.
global registered_ports: function(tag: Analyzer::Tag) : set[port];
## Returns a table of all ports-to-analyzer mappings currently registered.
##
## Returns: A table mapping each analyzer to the set of ports
## registered for it.
global all_registered_ports: function() : table[Analyzer::Tag] of set[port];
## Translates an analyzer type to a string with the analyzer's name.
##
## tag: The analyzer tag.
##
## Returns: The analyzer name corresponding to the tag.
global name: function(tag: Analyzer::Tag) : string;
## Schedules an analyzer for a future connection originating from a given IP
## address and port.
##
## orig: The IP address originating a connection in the future.
## 0.0.0.0 can be used as a wildcard to match any originator address.
##
## resp: The IP address responding to a connection from *orig*.
##
## resp_p: The destination port at *resp*.
##
## analyzer: The analyzer ID.
##
## tout: A timeout interval after which the scheduling request will be
## discarded if the connection has not yet been seen.
##
## Returns: True if succesful.
global schedule_analyzer: function(orig: addr, resp: addr, resp_p: port,
analyzer: Analyzer::Tag, tout: interval) : bool;
## Automatically creates a BPF filter for the specified protocol based
## on the data supplied for the protocol through the
## :bro:see:`Analyzer::register_for_ports` function.
##
## tag: The analyzer tag.
##
## Returns: BPF filter string.
global analyzer_to_bpf: function(tag: Analyzer::Tag): string;
## Create a BPF filter which matches all of the ports defined
## by the various protocol analysis scripts as "registered ports"
## for the protocol.
global get_bpf: function(): string;
## A set of analyzers to disable by default at startup. The default set
## contains legacy analyzers that are no longer supported.
global disabled_analyzers: set[Analyzer::Tag] = {
ANALYZER_INTERCONN,
ANALYZER_STEPPINGSTONE,
ANALYZER_BACKDOOR,
ANALYZER_TCPSTATS,
} &redef;
}
@load base/bif/analyzer.bif
global ports: table[Analyzer::Tag] of set[port];
event bro_init() &priority=5
{
if ( disable_all )
__disable_all_analyzers();
for ( a in disabled_analyzers )
disable_analyzer(a);
}
function enable_analyzer(tag: Analyzer::Tag) : bool
{
return __enable_analyzer(tag);
}
function disable_analyzer(tag: Analyzer::Tag) : bool
{
return __disable_analyzer(tag);
}
function register_for_ports(tag: Analyzer::Tag, ports: set[port]) : bool
{
local rc = T;
for ( p in ports )
{
if ( ! register_for_port(tag, p) )
rc = F;
}
return rc;
}
function register_for_port(tag: Analyzer::Tag, p: port) : bool
{
if ( ! __register_for_port(tag, p) )
return F;
if ( tag !in ports )
ports[tag] = set();
add ports[tag][p];
return T;
}
function registered_ports(tag: Analyzer::Tag) : set[port]
{
return tag in ports ? ports[tag] : set();
}
function all_registered_ports(): table[Analyzer::Tag] of set[port]
{
return ports;
}
function name(atype: Analyzer::Tag) : string
{
return __name(atype);
}
function schedule_analyzer(orig: addr, resp: addr, resp_p: port,
analyzer: Analyzer::Tag, tout: interval) : bool
{
return __schedule_analyzer(orig, resp, resp_p, analyzer, tout);
}
function analyzer_to_bpf(tag: Analyzer::Tag): string
{
# Return an empty string if an undefined analyzer was given.
if ( tag !in ports )
return "";
local output = "";
for ( p in ports[tag] )
output = PacketFilter::combine_filters(output, "or", PacketFilter::port_to_bpf(p));
return output;
}
function get_bpf(): string
{
local output = "";
for ( tag in ports )
{
output = PacketFilter::combine_filters(output, "or", analyzer_to_bpf(tag));
}
return output;
}

View file

@ -216,12 +216,9 @@ function setup_peer(p: event_peer, node: Node)
request_remote_events(p, node$events);
}
if ( node?$capture_filter )
if ( node?$capture_filter && node$capture_filter != "" )
{
local filter = node$capture_filter;
if ( filter == "" )
filter = PacketFilter::default_filter;
do_script_log(p, fmt("sending capture_filter: %s", filter));
send_capture_filter(p, filter);
}

View file

@ -1,212 +0,0 @@
# Signatures to initiate dynamic protocol detection.
signature dpd_ftp_client {
ip-proto == tcp
payload /(|.*[\n\r]) *[uU][sS][eE][rR] /
tcp-state originator
}
# Match for server greeting (220, 120) and for login or passwd
# required (230, 331).
signature dpd_ftp_server {
ip-proto == tcp
payload /[\n\r ]*(120|220)[^0-9].*[\n\r] *(230|331)[^0-9]/
tcp-state responder
requires-reverse-signature dpd_ftp_client
enable "ftp"
}
signature dpd_http_client {
ip-proto == tcp
payload /^[[:space:]]*(GET|HEAD|POST)[[:space:]]*/
tcp-state originator
}
signature dpd_http_server {
ip-proto == tcp
payload /^HTTP\/[0-9]/
tcp-state responder
requires-reverse-signature dpd_http_client
enable "http"
}
signature dpd_bittorrenttracker_client {
ip-proto == tcp
payload /^.*\/announce\?.*info_hash/
tcp-state originator
}
signature dpd_bittorrenttracker_server {
ip-proto == tcp
payload /^HTTP\/[0-9]/
tcp-state responder
requires-reverse-signature dpd_bittorrenttracker_client
enable "bittorrenttracker"
}
signature dpd_bittorrent_peer1 {
ip-proto == tcp
payload /^\x13BitTorrent protocol/
tcp-state originator
}
signature dpd_bittorrent_peer2 {
ip-proto == tcp
payload /^\x13BitTorrent protocol/
tcp-state responder
requires-reverse-signature dpd_bittorrent_peer1
enable "bittorrent"
}
signature irc_client1 {
ip-proto == tcp
payload /(|.*[\r\n]) *[Uu][Ss][Ee][Rr] +.+[\n\r]+ *[Nn][Ii][Cc][Kk] +.*[\r\n]/
requires-reverse-signature irc_server_reply
tcp-state originator
enable "irc"
}
signature irc_client2 {
ip-proto == tcp
payload /(|.*[\r\n]) *[Nn][Ii][Cc][Kk] +.+[\r\n]+ *[Uu][Ss][Ee][Rr] +.+[\r\n]/
requires-reverse-signature irc_server_reply
tcp-state originator
enable "irc"
}
signature irc_server_reply {
ip-proto == tcp
payload /^(|.*[\n\r])(:[^ \n\r]+ )?[0-9][0-9][0-9] /
tcp-state responder
}
signature irc_server_to_server1 {
ip-proto == tcp
payload /(|.*[\r\n]) *[Ss][Ee][Rr][Vv][Ee][Rr] +[^ ]+ +[0-9]+ +:.+[\r\n]/
}
signature irc_server_to_server2 {
ip-proto == tcp
payload /(|.*[\r\n]) *[Ss][Ee][Rr][Vv][Ee][Rr] +[^ ]+ +[0-9]+ +:.+[\r\n]/
requires-reverse-signature irc_server_to_server1
enable "irc"
}
signature dpd_smtp_client {
ip-proto == tcp
payload /(|.*[\n\r])[[:space:]]*([hH][eE][lL][oO]|[eE][hH][lL][oO])/
requires-reverse-signature dpd_smtp_server
enable "smtp"
tcp-state originator
}
signature dpd_smtp_server {
ip-proto == tcp
payload /^[[:space:]]*220[[:space:]-]/
tcp-state responder
}
signature dpd_ssh_client {
ip-proto == tcp
payload /^[sS][sS][hH]-/
requires-reverse-signature dpd_ssh_server
enable "ssh"
tcp-state originator
}
signature dpd_ssh_server {
ip-proto == tcp
payload /^[sS][sS][hH]-/
tcp-state responder
}
signature dpd_pop3_server {
ip-proto == tcp
payload /^\+OK/
requires-reverse-signature dpd_pop3_client
enable "pop3"
tcp-state responder
}
signature dpd_pop3_client {
ip-proto == tcp
payload /(|.*[\r\n])[[:space:]]*([uU][sS][eE][rR][[:space:]]|[aA][pP][oO][pP][[:space:]]|[cC][aA][pP][aA]|[aA][uU][tT][hH])/
tcp-state originator
}
signature dpd_ssl_server {
ip-proto == tcp
# Server hello.
payload /^(\x16\x03[\x00\x01\x02]..\x02...\x03[\x00\x01\x02]|...?\x04..\x00\x02).*/
requires-reverse-signature dpd_ssl_client
enable "ssl"
tcp-state responder
}
signature dpd_ssl_client {
ip-proto == tcp
# Client hello.
payload /^(\x16\x03[\x00\x01\x02]..\x01...\x03[\x00\x01\x02]|...?\x01[\x00\x01\x02][\x02\x03]).*/
tcp-state originator
}
signature dpd_ayiya {
ip-proto = udp
payload /^..\x11\x29/
enable "ayiya"
}
signature dpd_teredo {
ip-proto = udp
payload /^(\x00\x00)|(\x00\x01)|([\x60-\x6f])/
enable "teredo"
}
signature dpd_socks4_client {
ip-proto == tcp
# '32' is a rather arbitrary max length for the user name.
payload /^\x04[\x01\x02].{0,32}\x00/
tcp-state originator
}
signature dpd_socks4_server {
ip-proto == tcp
requires-reverse-signature dpd_socks4_client
payload /^\x00[\x5a\x5b\x5c\x5d]/
tcp-state responder
enable "socks"
}
signature dpd_socks4_reverse_client {
ip-proto == tcp
# '32' is a rather arbitrary max length for the user name.
payload /^\x04[\x01\x02].{0,32}\x00/
tcp-state responder
}
signature dpd_socks4_reverse_server {
ip-proto == tcp
requires-reverse-signature dpd_socks4_reverse_client
payload /^\x00[\x5a\x5b\x5c\x5d]/
tcp-state originator
enable "socks"
}
signature dpd_socks5_client {
ip-proto == tcp
# Watch for a few authentication methods to reduce false positives.
payload /^\x05.[\x00\x01\x02]/
tcp-state originator
}
signature dpd_socks5_server {
ip-proto == tcp
requires-reverse-signature dpd_socks5_client
# Watch for a single authentication method to be chosen by the server or
# the server to indicate the no authentication is required.
payload /^\x05(\x00|\x01[\x00\x01\x02])/
tcp-state responder
enable "socks"
}

View file

@ -3,8 +3,6 @@
module DPD;
@load-sigs ./dpd.sig
export {
## Add the DPD logging stream identifier.
redef enum Log::ID += { LOG };
@ -23,12 +21,12 @@ export {
analyzer: string &log;
## The textual reason for the analysis failure.
failure_reason: string &log;
## Disabled analyzer IDs. This is only for internal tracking
## Disabled analyzer IDs. This is only for internal tracking
## so as to not attempt to disable analyzers multiple times.
disabled_aids: set[count];
};
## Ignore violations which go this many bytes into the connection.
## Set to 0 to never ignore protocol violations.
const ignore_violations_after = 10 * 1024 &redef;
@ -41,41 +39,30 @@ redef record connection += {
event bro_init() &priority=5
{
Log::create_stream(DPD::LOG, [$columns=Info]);
# Populate the internal DPD analysis variable.
for ( a in dpd_config )
{
for ( p in dpd_config[a]$ports )
{
if ( p !in dpd_analyzer_ports )
dpd_analyzer_ports[p] = set();
add dpd_analyzer_ports[p][a];
}
}
}
event protocol_confirmation(c: connection, atype: count, aid: count) &priority=10
event protocol_confirmation(c: connection, atype: Analyzer::Tag, aid: count) &priority=10
{
local analyzer = analyzer_name(atype);
local analyzer = Analyzer::name(atype);
if ( fmt("-%s",analyzer) in c$service )
delete c$service[fmt("-%s", analyzer)];
add c$service[analyzer];
}
event protocol_violation(c: connection, atype: count, aid: count,
event protocol_violation(c: connection, atype: Analyzer::Tag, aid: count,
reason: string) &priority=10
{
local analyzer = analyzer_name(atype);
local analyzer = Analyzer::name(atype);
# If the service hasn't been confirmed yet, don't generate a log message
# for the protocol violation.
if ( analyzer !in c$service )
return;
delete c$service[analyzer];
add c$service[fmt("-%s", analyzer)];
local info: Info;
info$ts=network_time();
info$uid=c$uid;
@ -86,7 +73,7 @@ event protocol_violation(c: connection, atype: count, aid: count,
c$dpd = info;
}
event protocol_violation(c: connection, atype: count, aid: count, reason: string) &priority=5
event protocol_violation(c: connection, atype: Analyzer::Tag, aid: count, reason: string) &priority=5
{
if ( !c?$dpd || aid in c$dpd$disabled_aids )
return;
@ -94,13 +81,13 @@ event protocol_violation(c: connection, atype: count, aid: count, reason: string
local size = c$orig$size + c$resp$size;
if ( ignore_violations_after > 0 && size > ignore_violations_after )
return;
# Disable the analyzer that raised the last core-generated event.
disable_analyzer(c$id, aid);
add c$dpd$disabled_aids[aid];
}
event protocol_violation(c: connection, atype: count, aid: count,
event protocol_violation(c: connection, atype: Analyzer::Tag, aid: count,
reason: string) &priority=-5
{
if ( c?$dpd )

View file

@ -0,0 +1 @@
@load ./main.bro

View file

@ -0,0 +1,261 @@
##! An interface for driving the analysis of files, possibly independent of
##! any network protocol over which they're transported.
@load base/bif/file_analysis.bif
@load base/frameworks/logging
module FileAnalysis;
export {
redef enum Log::ID += {
## Logging stream for file analysis.
LOG
};
## A structure which represents a desired type of file analysis.
type AnalyzerArgs: record {
## The type of analysis.
tag: FileAnalysis::Tag;
## The local filename to which to write an extracted file. Must be
## set when *tag* is :bro:see:`FileAnalysis::ANALYZER_EXTRACT`.
extract_filename: string &optional;
## An event which will be generated for all new file contents,
## chunk-wise. Used when *tag* is
## :bro:see:`FileAnalysis::ANALYZER_DATA_EVENT`.
chunk_event: event(f: fa_file, data: string, off: count) &optional;
## An event which will be generated for all new file contents,
## stream-wise. Used when *tag* is
## :bro:see:`FileAnalysis::ANALYZER_DATA_EVENT`.
stream_event: event(f: fa_file, data: string) &optional;
} &redef;
## Contains all metadata related to the analysis of a given file.
## For the most part, fields here are derived from ones of the same name
## in :bro:see:`fa_file`.
type Info: record {
## An identifier associated with a single file.
id: string &log;
## Identifier associated with a container file from which this one was
## extracted as part of the file analysis.
parent_id: string &log &optional;
## An identification of the source of the file data. E.g. it may be
## a network protocol over which it was transferred, or a local file
## path which was read, or some other input source.
source: string &log &optional;
## If the source of this file is is a network connection, this field
## may be set to indicate the directionality.
is_orig: bool &log &optional;
## The time at which the last activity for the file was seen.
last_active: time &log;
## Number of bytes provided to the file analysis engine for the file.
seen_bytes: count &log &default=0;
## Total number of bytes that are supposed to comprise the full file.
total_bytes: count &log &optional;
## The number of bytes in the file stream that were completely missed
## during the process of analysis e.g. due to dropped packets.
missing_bytes: count &log &default=0;
## The number of not all-in-sequence bytes in the file stream that
## were delivered to file analyzers due to reassembly buffer overflow.
overflow_bytes: count &log &default=0;
## The amount of time between receiving new data for this file that
## the analysis engine will wait before giving up on it.
timeout_interval: interval &log &optional;
## The number of bytes at the beginning of a file to save for later
## inspection in *bof_buffer* field.
bof_buffer_size: count &log &optional;
## A mime type provided by libmagic against the *bof_buffer*, or
## in the cases where no buffering of the beginning of file occurs,
## an initial guess of the mime type based on the first data seen.
mime_type: string &log &optional;
## Whether the file analysis timed out at least once for the file.
timedout: bool &log &default=F;
## Connection UIDS over which the file was transferred.
conn_uids: set[string] &log;
## A set of analysis types done during the file analysis.
analyzers: set[FileAnalysis::Tag];
## Local filenames of extracted files.
extracted_files: set[string] &log;
## An MD5 digest of the file contents.
md5: string &log &optional;
## A SHA1 digest of the file contents.
sha1: string &log &optional;
## A SHA256 digest of the file contents.
sha256: string &log &optional;
} &redef;
## A table that can be used to disable file analysis completely for
## any files transferred over given network protocol analyzers.
const disable: table[Analyzer::Tag] of bool = table() &redef;
## Event that can be handled to access the Info record as it is sent on
## to the logging framework.
global log_file_analysis: event(rec: Info);
## The salt concatenated to unique file handle strings generated by
## :bro:see:`get_file_handle` before hashing them in to a file id
## (the *id* field of :bro:see:`fa_file`).
## Provided to help mitigate the possiblility of manipulating parts of
## network connections that factor in to the file handle in order to
## generate two handles that would hash to the same file id.
const salt = "I recommend changing this." &redef;
## Sets the *timeout_interval* field of :bro:see:`fa_file`, which is
## used to determine the length of inactivity that is allowed for a file
## before internal state related to it is cleaned up. When used within a
## :bro:see:`file_timeout` handler, the analysis will delay timing out
## again for the period specified by *t*.
##
## f: the file.
##
## t: the amount of time the file can remain inactive before discarding.
##
## Returns: true if the timeout interval was set, or false if analysis
## for the *id* isn't currently active.
global set_timeout_interval: function(f: fa_file, t: interval): bool;
## Adds an analyzer to the analysis of a given file.
##
## f: the file.
##
## args: the analyzer type to add along with any arguments it takes.
##
## Returns: true if the analyzer will be added, or false if analysis
## for the *id* isn't currently active or the *args*
## were invalid for the analyzer type.
global add_analyzer: function(f: fa_file, args: AnalyzerArgs): bool;
## Removes an analyzer from the analysis of a given file.
##
## f: the file.
##
## args: the analyzer (type and args) to remove.
##
## Returns: true if the analyzer will be removed, or false if analysis
## for the *id* isn't currently active.
global remove_analyzer: function(f: fa_file, args: AnalyzerArgs): bool;
## Stops/ignores any further analysis of a given file.
##
## f: the file.
##
## Returns: true if analysis for the given file will be ignored for the
## rest of it's contents, or false if analysis for the *id*
## isn't currently active.
global stop: function(f: fa_file): bool;
}
redef record fa_file += {
info: Info &optional;
};
function set_info(f: fa_file)
{
if ( ! f?$info )
{
local tmp: Info;
f$info = tmp;
}
f$info$id = f$id;
if ( f?$parent_id ) f$info$parent_id = f$parent_id;
if ( f?$source ) f$info$source = f$source;
if ( f?$is_orig ) f$info$is_orig = f$is_orig;
f$info$last_active = f$last_active;
f$info$seen_bytes = f$seen_bytes;
if ( f?$total_bytes ) f$info$total_bytes = f$total_bytes;
f$info$missing_bytes = f$missing_bytes;
f$info$overflow_bytes = f$overflow_bytes;
f$info$timeout_interval = f$timeout_interval;
f$info$bof_buffer_size = f$bof_buffer_size;
if ( f?$mime_type ) f$info$mime_type = f$mime_type;
if ( f?$conns )
for ( cid in f$conns )
add f$info$conn_uids[f$conns[cid]$uid];
}
function set_timeout_interval(f: fa_file, t: interval): bool
{
return __set_timeout_interval(f$id, t);
}
function add_analyzer(f: fa_file, args: AnalyzerArgs): bool
{
if ( ! __add_analyzer(f$id, args) ) return F;
set_info(f);
add f$info$analyzers[args$tag];
if ( args$tag == FileAnalysis::ANALYZER_EXTRACT )
add f$info$extracted_files[args$extract_filename];
return T;
}
function remove_analyzer(f: fa_file, args: AnalyzerArgs): bool
{
return __remove_analyzer(f$id, args);
}
function stop(f: fa_file): bool
{
return __stop(f$id);
}
event bro_init() &priority=5
{
Log::create_stream(FileAnalysis::LOG,
[$columns=Info, $ev=log_file_analysis]);
}
event file_timeout(f: fa_file) &priority=5
{
set_info(f);
f$info$timedout = T;
}
event file_hash(f: fa_file, kind: string, hash: string) &priority=5
{
set_info(f);
switch ( kind ) {
case "md5":
f$info$md5 = hash;
break;
case "sha1":
f$info$sha1 = hash;
break;
case "sha256":
f$info$sha256 = hash;
break;
}
}
event file_state_remove(f: fa_file) &priority=5
{
set_info(f);
}
event file_state_remove(f: fa_file) &priority=-5
{
Log::write(FileAnalysis::LOG, f$info);
}

View file

@ -2,4 +2,5 @@
@load ./readers/ascii
@load ./readers/raw
@load ./readers/benchmark
@load ./readers/binary
@load ./readers/sqlite

View file

@ -122,6 +122,34 @@ export {
config: table[string] of string &default=table();
};
## A file analyis input stream type used to forward input data to the
## file analysis framework.
type AnalysisDescription: record {
## String that allows the reader to find the source.
## For `READER_ASCII`, this is the filename.
source: string;
## Reader to use for this steam. Compatible readers must be
## able to accept a filter of a single string type (i.e.
## they read a byte stream).
reader: Reader &default=Input::READER_BINARY;
## Read mode to use for this stream
mode: Mode &default=default_mode;
## Descriptive name that uniquely identifies the input source.
## Can be used used to remove a stream at a later time.
## This will also be used for the unique *source* field of
## :bro:see:`fa_file`. Most of the time, the best choice for this
## field will be the same value as the *source* field.
name: string;
## A key/value table that will be passed on the reader.
## Interpretation of the values is left to the writer, but
## usually they will be used for configuration purposes.
config: table[string] of string &default=table();
};
## Create a new table input from a given source. Returns true on success.
##
## description: `TableDescription` record describing the source.
@ -132,6 +160,14 @@ export {
## description: `TableDescription` record describing the source.
global add_event: function(description: Input::EventDescription) : bool;
## Create a new file analysis input from a given source. Data read from
## the source is automatically forwarded to the file analysis framework.
##
## description: A record describing the source
##
## Returns: true on sucess.
global add_analysis: function(description: Input::AnalysisDescription) : bool;
## Remove a input stream. Returns true on success and false if the named stream was
## not found.
##
@ -149,7 +185,7 @@ export {
global end_of_data: event(name: string, source:string);
}
@load base/input.bif
@load base/bif/input.bif
module Input;
@ -164,6 +200,11 @@ function add_event(description: Input::EventDescription) : bool
return __create_event_stream(description);
}
function add_analysis(description: Input::AnalysisDescription) : bool
{
return __create_analysis_stream(description);
}
function remove(id: string) : bool
{
return __remove_stream(id);

View file

@ -0,0 +1,8 @@
##! Interface for the binary input reader.
module InputBinary;
export {
## Size of data chunks to read from the input file at a time.
const chunk_size = 1024 &redef;
}

View file

@ -7,11 +7,11 @@ export {
## Please note that the separator has to be exactly one character long
const record_separator = "\n" &redef;
## Event that is called, when a process created by the raw reader exits.
## Event that is called when a process created by the raw reader exits.
##
## name: name of the input stream
## source: source of the input stream
## exit_code: exit code of the program, or number of the signal that forced the program to exit
## signal_exit: false when program exitted normally, true when program was forced to exit by a signal
global process_finished: event(name: string, source:string, exit_code:count, signal_exit:bool);
global process_finished: event(name: string, source:string, exit_code:count, signal_exit:bool);
}

View file

@ -0,0 +1,17 @@
##! Interface for the SQLite input reader.
##!
##! The defaults are set to match Bro's ASCII output.
module InputSQLite;
export {
## Separator between set elements.
## Please note that the separator has to be exactly one character long.
const set_separator = Input::set_separator &redef;
## String to use for an unset &optional field.
const unset_field = Input::unset_field &redef;
## String to use for empty fields.
const empty_field = Input::empty_field &redef;
}

View file

@ -2,5 +2,6 @@
@load ./postprocessors
@load ./writers/ascii
@load ./writers/dataseries
@load ./writers/sqlite
@load ./writers/elasticsearch
@load ./writers/none

View file

@ -195,7 +195,7 @@ export {
##
## Returns: True if a new stream was successfully removed.
##
## .. bro:see:: Log:create_stream
## .. bro:see:: Log::create_stream
global remove_stream: function(id: ID) : bool;
## Enables a previously disabled logging stream. Disabled streams
@ -366,7 +366,7 @@ export {
# We keep a script-level copy of all filters so that we can manipulate them.
global filters: table[ID, string] of Filter;
@load base/logging.bif # Needs Filter and Stream defined.
@load base/bif/logging.bif # Needs Filter and Stream defined.
module Log;
@ -454,7 +454,6 @@ function create_stream(id: ID, stream: Stream) : bool
function remove_stream(id: ID) : bool
{
delete active_streams[id];
return __remove_stream(id);
}

View file

@ -0,0 +1,17 @@
##! Interface for the SQLite log writer. Redefinable options are available
##! to tweak the output format of the SQLite reader.
module LogSQLite;
export {
## Separator between set elements.
const set_separator = Log::set_separator &redef;
## String to use for an unset &optional field.
const unset_field = Log::unset_field &redef;
## String to use for empty fields. This should be different from
## *unset_field* to make the output non-ambigious.
const empty_field = Log::empty_field &redef;
}

View file

@ -1,264 +0,0 @@
##! This implements transparent cluster support for the metrics framework.
##! Do not load this file directly. It's only meant to be loaded automatically
##! and will be depending on if the cluster framework has been enabled.
##! The goal of this script is to make metric calculation completely and
##! transparently automated when running on a cluster.
##!
##! Events defined here are not exported deliberately because they are meant
##! to be an internal implementation detail.
@load base/frameworks/cluster
@load ./main
module Metrics;
export {
## Allows a user to decide how large of result groups the
## workers should transmit values for cluster metric aggregation.
const cluster_send_in_groups_of = 50 &redef;
## The percent of the full threshold value that needs to be met
## on a single worker for that worker to send the value to its manager in
## order for it to request a global view for that value. There is no
## requirement that the manager requests a global view for the index
## since it may opt not to if it requested a global view for the index
## recently.
const cluster_request_global_view_percent = 0.1 &redef;
## Event sent by the manager in a cluster to initiate the
## collection of metrics values for a filter.
global cluster_filter_request: event(uid: string, id: ID, filter_name: string);
## Event sent by nodes that are collecting metrics after receiving
## a request for the metric filter from the manager.
global cluster_filter_response: event(uid: string, id: ID, filter_name: string, data: MetricTable, done: bool);
## This event is sent by the manager in a cluster to initiate the
## collection of a single index value from a filter. It's typically
## used to get intermediate updates before the break interval triggers
## to speed detection of a value crossing a threshold.
global cluster_index_request: event(uid: string, id: ID, filter_name: string, index: Index);
## This event is sent by nodes in response to a
## :bro:id:`Metrics::cluster_index_request` event.
global cluster_index_response: event(uid: string, id: ID, filter_name: string, index: Index, val: count);
## This is sent by workers to indicate that they crossed the percent of the
## current threshold by the percentage defined globally in
## :bro:id:`Metrics::cluster_request_global_view_percent`
global cluster_index_intermediate_response: event(id: Metrics::ID, filter_name: string, index: Metrics::Index, val: count);
## This event is scheduled internally on workers to send result chunks.
global send_data: event(uid: string, id: ID, filter_name: string, data: MetricTable);
}
# This is maintained by managers so they can know what data they requested and
# when they requested it.
global requested_results: table[string] of time = table() &create_expire=5mins;
# TODO: The next 4 variables make the assumption that a value never
# takes longer than 5 minutes to transmit from workers to manager. This needs to
# be tunable or self-tuning. These should also be restructured to be
# maintained within a single variable.
# This variable is maintained by manager nodes as they collect and aggregate
# results.
global filter_results: table[string, ID, string] of MetricTable &create_expire=5mins;
# This variable is maintained by manager nodes to track how many "dones" they
# collected per collection unique id. Once the number of results for a uid
# matches the number of peer nodes that results should be coming from, the
# result is written out and deleted from here.
# TODO: add an &expire_func in case not all results are received.
global done_with: table[string] of count &create_expire=5mins &default=0;
# This variable is maintained by managers to track intermediate responses as
# they are getting a global view for a certain index.
global index_requests: table[string, ID, string, Index] of count &create_expire=5mins &default=0;
# This variable is maintained by all hosts for different purposes. Non-managers
# maintain it to know what indexes they have recently sent as intermediate
# updates so they don't overwhelm their manager. Managers maintain it so they
# don't overwhelm workers with intermediate index requests. The count that is
# yielded is the number of times the percentage threshold has been crossed and
# an intermediate result has been received. The manager may optionally request
# the index again before data expires from here if too many workers are crossing
# the percentage threshold (not implemented yet!).
global recent_global_view_indexes: table[ID, string, Index] of count &create_expire=5mins &default=0;
# Add events to the cluster framework to make this work.
redef Cluster::manager2worker_events += /Metrics::cluster_(filter_request|index_request)/;
redef Cluster::worker2manager_events += /Metrics::cluster_(filter_response|index_response|index_intermediate_response)/;
@if ( Cluster::local_node_type() != Cluster::MANAGER )
# This is done on all non-manager node types in the event that a metric is
# being collected somewhere other than a worker.
function data_added(filter: Filter, index: Index, val: count)
{
# If an intermediate update for this value was sent recently, don't send
# it again.
if ( [filter$id, filter$name, index] in recent_global_view_indexes )
return;
# If val is 5 and global view % is 0.1 (10%), pct_val will be 50. If that
# crosses the full threshold then it's a candidate to send as an
# intermediate update.
local pct_val = double_to_count(val / cluster_request_global_view_percent);
if ( check_notice(filter, index, pct_val) )
{
# kick off intermediate update
event Metrics::cluster_index_intermediate_response(filter$id, filter$name, index, val);
++recent_global_view_indexes[filter$id, filter$name, index];
}
}
event Metrics::send_data(uid: string, id: ID, filter_name: string, data: MetricTable)
{
#print fmt("WORKER %s: sending data for uid %s...", Cluster::node, uid);
local local_data: MetricTable;
local num_added = 0;
for ( index in data )
{
local_data[index] = data[index];
delete data[index];
# Only send cluster_send_in_groups_of at a time. Queue another
# event to send the next group.
if ( cluster_send_in_groups_of == ++num_added )
break;
}
local done = F;
# If data is empty, this metric is done.
if ( |data| == 0 )
done = T;
event Metrics::cluster_filter_response(uid, id, filter_name, local_data, done);
if ( ! done )
event Metrics::send_data(uid, id, filter_name, data);
}
event Metrics::cluster_filter_request(uid: string, id: ID, filter_name: string)
{
#print fmt("WORKER %s: received the cluster_filter_request event.", Cluster::node);
# Initiate sending all of the data for the requested filter.
event Metrics::send_data(uid, id, filter_name, store[id, filter_name]);
# Lookup the actual filter and reset it, the reference to the data
# currently stored will be maintained interally by the send_data event.
reset(filter_store[id, filter_name]);
}
event Metrics::cluster_index_request(uid: string, id: ID, filter_name: string, index: Index)
{
local val=0;
if ( index in store[id, filter_name] )
val = store[id, filter_name][index];
# fmt("WORKER %s: received the cluster_index_request event for %s=%d.", Cluster::node, index2str(index), val);
event Metrics::cluster_index_response(uid, id, filter_name, index, val);
}
@endif
@if ( Cluster::local_node_type() == Cluster::MANAGER )
# Manager's handle logging.
event Metrics::log_it(filter: Filter)
{
#print fmt("%.6f MANAGER: breaking %s filter for %s metric", network_time(), filter$name, filter$id);
local uid = unique_id("");
# Set some tracking variables.
requested_results[uid] = network_time();
filter_results[uid, filter$id, filter$name] = table();
# Request data from peers.
event Metrics::cluster_filter_request(uid, filter$id, filter$name);
# Schedule the log_it event for the next break period.
schedule filter$break_interval { Metrics::log_it(filter) };
}
# This is unlikely to be called often, but it's here in case there are metrics
# being collected by managers.
function data_added(filter: Filter, index: Index, val: count)
{
if ( check_notice(filter, index, val) )
do_notice(filter, index, val);
}
event Metrics::cluster_index_response(uid: string, id: ID, filter_name: string, index: Index, val: count)
{
#print fmt("%0.6f MANAGER: receiving index data from %s", network_time(), get_event_peer()$descr);
if ( [uid, id, filter_name, index] !in index_requests )
index_requests[uid, id, filter_name, index] = 0;
index_requests[uid, id, filter_name, index] += val;
local ir = index_requests[uid, id, filter_name, index];
++done_with[uid];
if ( Cluster::worker_count == done_with[uid] )
{
if ( check_notice(filter_store[id, filter_name], index, ir) )
do_notice(filter_store[id, filter_name], index, ir);
delete done_with[uid];
delete index_requests[uid, id, filter_name, index];
}
}
# Managers handle intermediate updates here.
event Metrics::cluster_index_intermediate_response(id: ID, filter_name: string, index: Index, val: count)
{
#print fmt("MANAGER: receiving intermediate index data from %s", get_event_peer()$descr);
#print fmt("MANAGER: requesting index data for %s", index2str(index));
local uid = unique_id("");
event Metrics::cluster_index_request(uid, id, filter_name, index);
++recent_global_view_indexes[id, filter_name, index];
}
event Metrics::cluster_filter_response(uid: string, id: ID, filter_name: string, data: MetricTable, done: bool)
{
#print fmt("MANAGER: receiving results from %s", get_event_peer()$descr);
local local_data = filter_results[uid, id, filter_name];
for ( index in data )
{
if ( index !in local_data )
local_data[index] = 0;
local_data[index] += data[index];
}
# Mark another worker as being "done" for this uid.
if ( done )
++done_with[uid];
# If the data has been collected from all peers, we are done and ready to log.
if ( Cluster::worker_count == done_with[uid] )
{
local ts = network_time();
# Log the time this was initially requested if it's available.
if ( uid in requested_results )
{
ts = requested_results[uid];
delete requested_results[uid];
}
write_log(ts, filter_store[id, filter_name], local_data);
# Clean up
delete filter_results[uid, id, filter_name];
delete done_with[uid];
}
}
@endif

View file

@ -1,320 +0,0 @@
##! The metrics framework provides a way to count and measure data.
@load base/frameworks/notice
module Metrics;
export {
## The metrics logging stream identifier.
redef enum Log::ID += { LOG };
## Identifiers for metrics to collect.
type ID: enum {
## Blank placeholder value.
NOTHING,
};
## The default interval used for "breaking" metrics and writing the
## current value to the logging stream.
const default_break_interval = 15mins &redef;
## This is the interval for how often threshold based notices will happen
## after they have already fired.
const renotice_interval = 1hr &redef;
## Represents a thing which is having metrics collected for it. An instance
## of this record type and a :bro:type:`Metrics::ID` together represent a
## single measurement.
type Index: record {
## Host is the value to which this metric applies.
host: addr &optional;
## A non-address related metric or a sub-key for an address based metric.
## An example might be successful SSH connections by client IP address
## where the client string would be the index value.
## Another example might be number of HTTP requests to a particular
## value in a Host header. This is an example of a non-host based
## metric since multiple IP addresses could respond for the same Host
## header value.
str: string &optional;
## The CIDR block that this metric applies to. This is typically
## only used internally for host based aggregation.
network: subnet &optional;
} &log;
## The record type that is used for logging metrics.
type Info: record {
## Timestamp at which the metric was "broken".
ts: time &log;
## What measurement the metric represents.
metric_id: ID &log;
## The name of the filter being logged. :bro:type:`Metrics::ID` values
## can have multiple filters which represent different perspectives on
## the data so this is necessary to understand the value.
filter_name: string &log;
## What the metric value applies to.
index: Index &log;
## The simple numeric value of the metric.
value: count &log;
};
# TODO: configure a metrics filter logging stream to log the current
# metrics configuration in case someone is looking through
# old logs and the configuration has changed since then.
## Filters define how the data from a metric is aggregated and handled.
## Filters can be used to set how often the measurements are cut or "broken"
## and logged or how the data within them is aggregated. It's also
## possible to disable logging and use filters for thresholding.
type Filter: record {
## The :bro:type:`Metrics::ID` that this filter applies to.
id: ID &optional;
## The name for this filter so that multiple filters can be
## applied to a single metrics to get a different view of the same
## metric data being collected (different aggregation, break, etc).
name: string &default="default";
## A predicate so that you can decide per index if you would like
## to accept the data being inserted.
pred: function(index: Index): bool &optional;
## Global mask by which you'd like to aggregate traffic.
aggregation_mask: count &optional;
## This is essentially a mapping table between addresses and subnets.
aggregation_table: table[subnet] of subnet &optional;
## The interval at which this filter should be "broken" and written
## to the logging stream. The counters are also reset to zero at
## this time so any threshold based detection needs to be set to a
## number that should be expected to happen within this period.
break_interval: interval &default=default_break_interval;
## This determines if the result of this filter is sent to the metrics
## logging stream. One use for the logging framework is as an internal
## thresholding and statistics gathering utility that is meant to
## never log but rather to generate notices and derive data.
log: bool &default=T;
## If this and a $notice_threshold value are set, this notice type
## will be generated by the metrics framework.
note: Notice::Type &optional;
## A straight threshold for generating a notice.
notice_threshold: count &optional;
## A series of thresholds at which to generate notices.
notice_thresholds: vector of count &optional;
## How often this notice should be raised for this filter. It
## will be generated everytime it crosses a threshold, but if the
## $break_interval is set to 5mins and this is set to 1hr the notice
## only be generated once per hour even if something crosses the
## threshold in every break interval.
notice_freq: interval &optional;
};
## Function to associate a metric filter with a metric ID.
##
## id: The metric ID that the filter should be associated with.
##
## filter: The record representing the filter configuration.
global add_filter: function(id: ID, filter: Filter);
## Add data into a :bro:type:`Metrics::ID`. This should be called when
## a script has measured some point value and is ready to increment the
## counters.
##
## id: The metric ID that the data represents.
##
## index: The metric index that the value is to be added to.
##
## increment: How much to increment the counter by.
global add_data: function(id: ID, index: Index, increment: count);
## Helper function to represent a :bro:type:`Metrics::Index` value as
## a simple string
##
## index: The metric index that is to be converted into a string.
##
## Returns: A string reprentation of the metric index.
global index2str: function(index: Index): string;
## Event that is used to "finish" metrics and adapt the metrics
## framework for clustered or non-clustered usage.
##
## ..note: This is primarily intended for internal use.
global log_it: event(filter: Filter);
## Event to access metrics records as they are passed to the logging framework.
global log_metrics: event(rec: Info);
## Type to store a table of metrics values. Interal use only!
type MetricTable: table[Index] of count &default=0;
}
redef record Notice::Info += {
metric_index: Index &log &optional;
};
global metric_filters: table[ID] of vector of Filter = table();
global filter_store: table[ID, string] of Filter = table();
# This is indexed by metric ID and stream filter name.
global store: table[ID, string] of MetricTable = table() &default=table();
# This function checks if a threshold has been crossed and generates a
# notice if it has. It is also used as a method to implement
# mid-break-interval threshold crossing detection for cluster deployments.
global check_notice: function(filter: Filter, index: Index, val: count): bool;
# This is hook for watching thresholds being crossed. It is called whenever
# index values are updated and the new val is given as the `val` argument.
global data_added: function(filter: Filter, index: Index, val: count);
# This stores the current threshold index for filters using the
# $notice_threshold and $notice_thresholds elements.
global thresholds: table[ID, string, Index] of count = {} &create_expire=renotice_interval &default=0;
event bro_init() &priority=5
{
Log::create_stream(Metrics::LOG, [$columns=Info, $ev=log_metrics]);
}
function index2str(index: Index): string
{
local out = "";
if ( index?$host )
out = fmt("%shost=%s", out, index$host);
if ( index?$network )
out = fmt("%s%snetwork=%s", out, |out|==0 ? "" : ", ", index$network);
if ( index?$str )
out = fmt("%s%sstr=%s", out, |out|==0 ? "" : ", ", index$str);
return fmt("metric_index(%s)", out);
}
function write_log(ts: time, filter: Filter, data: MetricTable)
{
for ( index in data )
{
local val = data[index];
local m: Info = [$ts=ts,
$metric_id=filter$id,
$filter_name=filter$name,
$index=index,
$value=val];
if ( filter$log )
Log::write(Metrics::LOG, m);
}
}
function reset(filter: Filter)
{
store[filter$id, filter$name] = table();
}
function add_filter(id: ID, filter: Filter)
{
if ( filter?$aggregation_table && filter?$aggregation_mask )
{
print "INVALID Metric filter: Defined $aggregation_table and $aggregation_mask.";
return;
}
if ( [id, filter$name] in store )
{
print fmt("INVALID Metric filter: Filter with name \"%s\" already exists.", filter$name);
return;
}
if ( filter?$notice_threshold && filter?$notice_thresholds )
{
print "INVALID Metric filter: Defined both $notice_threshold and $notice_thresholds";
return;
}
if ( ! filter?$id )
filter$id = id;
if ( id !in metric_filters )
metric_filters[id] = vector();
metric_filters[id][|metric_filters[id]|] = filter;
filter_store[id, filter$name] = filter;
store[id, filter$name] = table();
schedule filter$break_interval { Metrics::log_it(filter) };
}
function add_data(id: ID, index: Index, increment: count)
{
if ( id !in metric_filters )
return;
local filters = metric_filters[id];
# Try to add the data to all of the defined filters for the metric.
for ( filter_id in filters )
{
local filter = filters[filter_id];
# If this filter has a predicate, run the predicate and skip this
# index if the predicate return false.
if ( filter?$pred && ! filter$pred(index) )
next;
if ( index?$host )
{
if ( filter?$aggregation_mask )
{
index$network = mask_addr(index$host, filter$aggregation_mask);
delete index$host;
}
else if ( filter?$aggregation_table )
{
# Don't add the data if the aggregation table doesn't include
# the given host address.
if ( index$host !in filter$aggregation_table )
return;
index$network = filter$aggregation_table[index$host];
delete index$host;
}
}
local metric_tbl = store[id, filter$name];
if ( index !in metric_tbl )
metric_tbl[index] = 0;
metric_tbl[index] += increment;
data_added(filter, index, metric_tbl[index]);
}
}
function check_notice(filter: Filter, index: Index, val: count): bool
{
if ( (filter?$notice_threshold &&
[filter$id, filter$name, index] !in thresholds &&
val >= filter$notice_threshold) ||
(filter?$notice_thresholds &&
|filter$notice_thresholds| <= thresholds[filter$id, filter$name, index] &&
val >= filter$notice_thresholds[thresholds[filter$id, filter$name, index]]) )
return T;
else
return F;
}
function do_notice(filter: Filter, index: Index, val: count)
{
# We include $peer_descr here because the a manager count have actually
# generated the notice even though the current remote peer for the event
# calling this could be a worker if this is running as a cluster.
local n: Notice::Info = [$note=filter$note,
$n=val,
$metric_index=index,
$peer_descr=peer_description];
n$msg = fmt("Threshold crossed by %s %d/%d", index2str(index), val, filter$notice_threshold);
if ( index?$str )
n$sub = index$str;
if ( index?$host )
n$src = index$host;
# TODO: not sure where to put the network yet.
NOTICE(n);
# This just needs set to some value so that it doesn't refire the
# notice until it expires from the table or it crosses the next
# threshold in the case of vectors of thresholds.
++thresholds[filter$id, filter$name, index];
}

View file

@ -1,21 +0,0 @@
@load ./main
module Metrics;
event Metrics::log_it(filter: Filter)
{
local id = filter$id;
local name = filter$name;
write_log(network_time(), filter, store[id, name]);
reset(filter);
schedule filter$break_interval { Metrics::log_it(filter) };
}
function data_added(filter: Filter, index: Index, val: count)
{
if ( check_notice(filter, index, val) )
do_notice(filter, index, val);
}

View file

@ -431,9 +431,6 @@ hook Notice::notice(n: Notice::Info) &priority=-5
}
}
## This determines if a notice is being suppressed. It is only used
## internally as part of the mechanics for the global :bro:id:`NOTICE`
## function.
function is_being_suppressed(n: Notice::Info): bool
{
if ( n?$identifier && [n$note, n$identifier] in suppressing )

View file

@ -1,2 +1,3 @@
@load ./utils
@load ./main
@load ./netstats

View file

@ -1,10 +1,12 @@
##! This script supports how Bro sets it's BPF capture filter. By default
##! Bro sets an unrestricted filter that allows all traffic. If a filter
##! Bro sets a capture filter that allows all traffic. If a filter
##! is set on the command line, that filter takes precedence over the default
##! open filter and all filters defined in Bro scripts with the
##! :bro:id:`capture_filters` and :bro:id:`restrict_filters` variables.
@load base/frameworks/notice
@load base/frameworks/analyzer
@load ./utils
module PacketFilter;
@ -14,11 +16,14 @@ export {
## Add notice types related to packet filter errors.
redef enum Notice::Type += {
## This notice is generated if a packet filter is unable to be compiled.
## This notice is generated if a packet filter cannot be compiled.
Compile_Failure,
## This notice is generated if a packet filter is fails to install.
## Generated if a packet filter is fails to install.
Install_Failure,
## Generated when a notice takes too long to compile.
Too_Long_To_Compile_Filter
};
## The record type defining columns to be logged in the packet filter
@ -42,83 +47,248 @@ export {
success: bool &log &default=T;
};
## By default, Bro will examine all packets. If this is set to false,
## it will dynamically build a BPF filter that only select protocols
## for which the user has loaded a corresponding analysis script.
## The latter used to be default for Bro versions < 2.0. That has now
## changed however to enable port-independent protocol analysis.
const all_packets = T &redef;
## The BPF filter that is used by default to define what traffic should
## be captured. Filters defined in :bro:id:`restrict_filters` will still
## be applied to reduce the captured traffic.
const default_capture_filter = "ip or not ip" &redef;
## Filter string which is unconditionally or'ed to the beginning of every
## dynamically built filter.
const unrestricted_filter = "" &redef;
## Filter string which is unconditionally and'ed to the beginning of every
## dynamically built filter. This is mostly used when a custom filter is being
## used but MPLS or VLAN tags are on the traffic.
const restricted_filter = "" &redef;
## The maximum amount of time that you'd like to allow for BPF filters to compile.
## If this time is exceeded, compensation measures may be taken by the framework
## to reduce the filter size. This threshold being crossed also results in
## the :bro:see:`PacketFilter::Too_Long_To_Compile_Filter` notice.
const max_filter_compile_time = 100msec &redef;
## Install a BPF filter to exclude some traffic. The filter should positively
## match what is to be excluded, it will be wrapped in a "not".
##
## filter_id: An arbitrary string that can be used to identify
## the filter.
##
## filter: A BPF expression of traffic that should be excluded.
##
## Returns: A boolean value to indicate if the filter was successfully
## installed or not.
global exclude: function(filter_id: string, filter: string): bool;
## Install a temporary filter to traffic which should not be passed through
## the BPF filter. The filter should match the traffic you don't want
## to see (it will be wrapped in a "not" condition).
##
## filter_id: An arbitrary string that can be used to identify
## the filter.
##
## filter: A BPF expression of traffic that should be excluded.
##
## length: The duration for which this filter should be put in place.
##
## Returns: A boolean value to indicate if the filter was successfully
## installed or not.
global exclude_for: function(filter_id: string, filter: string, span: interval): bool;
## Call this function to build and install a new dynamically built
## packet filter.
global install: function();
global install: function(): bool;
## A data structure to represent filter generating plugins.
type FilterPlugin: record {
## A function that is directly called when generating the complete filter.
func : function();
};
## API function to register a new plugin for dynamic restriction filters.
global register_filter_plugin: function(fp: FilterPlugin);
## Enables the old filtering approach of "only watch common ports for
## analyzed protocols".
##
## Unless you know what you are doing, leave this set to F.
const enable_auto_protocol_capture_filters = F &redef;
## This is where the default packet filter is stored and it should not
## normally be modified by users.
global default_filter = "<not set yet>";
global current_filter = "<not set yet>";
}
global dynamic_restrict_filters: table[string] of string = {};
# Track if a filter is currently building so functions that would ultimately
# install a filter immediately can still be used but they won't try to build or
# install the filter.
global currently_building = F;
# Internal tracking for if the the filter being built has possibly been changed.
global filter_changed = F;
global filter_plugins: set[FilterPlugin] = {};
redef enum PcapFilterID += {
DefaultPcapFilter,
FilterTester,
};
function combine_filters(lfilter: string, rfilter: string, op: string): string
function test_filter(filter: string): bool
{
if ( lfilter == "" && rfilter == "" )
return "";
else if ( lfilter == "" )
return rfilter;
else if ( rfilter == "" )
return lfilter;
else
return fmt("(%s) %s (%s)", lfilter, op, rfilter);
if ( ! precompile_pcap_filter(FilterTester, filter) )
{
# The given filter was invalid
# TODO: generate a notice.
return F;
}
return T;
}
function build_default_filter(): string
# This tracks any changes for filtering mechanisms that play along nice
# and set filter_changed to T.
event filter_change_tracking()
{
if ( filter_changed )
install();
schedule 5min { filter_change_tracking() };
}
event bro_init() &priority=5
{
Log::create_stream(PacketFilter::LOG, [$columns=Info]);
# Preverify the capture and restrict filters to give more granular failure messages.
for ( id in capture_filters )
{
if ( ! test_filter(capture_filters[id]) )
Reporter::fatal(fmt("Invalid capture_filter named '%s' - '%s'", id, capture_filters[id]));
}
for ( id in restrict_filters )
{
if ( ! test_filter(restrict_filters[id]) )
Reporter::fatal(fmt("Invalid restrict filter named '%s' - '%s'", id, restrict_filters[id]));
}
}
event bro_init() &priority=-5
{
install();
event filter_change_tracking();
}
function register_filter_plugin(fp: FilterPlugin)
{
add filter_plugins[fp];
}
event remove_dynamic_filter(filter_id: string)
{
if ( filter_id in dynamic_restrict_filters )
{
delete dynamic_restrict_filters[filter_id];
install();
}
}
function exclude(filter_id: string, filter: string): bool
{
if ( ! test_filter(filter) )
return F;
dynamic_restrict_filters[filter_id] = filter;
install();
return T;
}
function exclude_for(filter_id: string, filter: string, span: interval): bool
{
if ( exclude(filter_id, filter) )
{
schedule span { remove_dynamic_filter(filter_id) };
return T;
}
return F;
}
function build(): string
{
if ( cmd_line_bpf_filter != "" )
# Return what the user specified on the command line;
return cmd_line_bpf_filter;
if ( all_packets )
# Return an "always true" filter.
return "ip or not ip";
currently_building = T;
# Build filter dynamically.
# Generate all of the plugin based filters.
for ( plugin in filter_plugins )
{
plugin$func();
}
# First the capture_filter.
local cfilter = "";
for ( id in capture_filters )
cfilter = combine_filters(cfilter, capture_filters[id], "or");
if ( |capture_filters| == 0 && ! enable_auto_protocol_capture_filters )
cfilter = default_capture_filter;
# Then the restrict_filter.
for ( id in capture_filters )
cfilter = combine_filters(cfilter, "or", capture_filters[id]);
if ( enable_auto_protocol_capture_filters )
cfilter = combine_filters(cfilter, "or", Analyzer::get_bpf());
# Apply the restriction filters.
local rfilter = "";
for ( id in restrict_filters )
rfilter = combine_filters(rfilter, restrict_filters[id], "and");
rfilter = combine_filters(rfilter, "and", restrict_filters[id]);
# Apply the dynamic restriction filters.
for ( filt in dynamic_restrict_filters )
rfilter = combine_filters(rfilter, "and", string_cat("not (", dynamic_restrict_filters[filt], ")"));
# Finally, join them into one filter.
local filter = combine_filters(rfilter, cfilter, "and");
if ( unrestricted_filter != "" )
filter = combine_filters(unrestricted_filter, filter, "or");
local filter = combine_filters(cfilter, "and", rfilter);
if ( unrestricted_filter != "" )
filter = combine_filters(unrestricted_filter, "or", filter);
if ( restricted_filter != "" )
filter = combine_filters(restricted_filter, "and", filter);
currently_building = F;
return filter;
}
function install()
function install(): bool
{
default_filter = build_default_filter();
if ( currently_building )
return F;
if ( ! precompile_pcap_filter(DefaultPcapFilter, default_filter) )
local tmp_filter = build();
# No need to proceed if the filter hasn't changed.
if ( tmp_filter == current_filter )
return F;
local ts = current_time();
if ( ! precompile_pcap_filter(DefaultPcapFilter, tmp_filter) )
{
NOTICE([$note=Compile_Failure,
$msg=fmt("Compiling packet filter failed"),
$sub=default_filter]);
Reporter::fatal(fmt("Bad pcap filter '%s'", default_filter));
$sub=tmp_filter]);
if ( network_time() == 0.0 )
Reporter::fatal(fmt("Bad pcap filter '%s'", tmp_filter));
else
Reporter::warning(fmt("Bad pcap filter '%s'", tmp_filter));
}
local diff = current_time()-ts;
if ( diff > max_filter_compile_time )
NOTICE([$note=Too_Long_To_Compile_Filter,
$msg=fmt("A BPF filter is taking longer than %0.1f seconds to compile", diff)]);
# Set it to the current filter if it passed precompiling
current_filter = tmp_filter;
# Do an audit log for the packet filter.
local info: Info;
@ -129,7 +299,7 @@ function install()
info$ts = current_time();
info$init = T;
}
info$filter = default_filter;
info$filter = current_filter;
if ( ! install_pcap_filter(DefaultPcapFilter) )
{
@ -137,15 +307,13 @@ function install()
info$success = F;
NOTICE([$note=Install_Failure,
$msg=fmt("Installing packet filter failed"),
$sub=default_filter]);
$sub=current_filter]);
}
if ( reading_live_traffic() || reading_traces() )
Log::write(PacketFilter::LOG, info);
}
event bro_init() &priority=10
{
Log::create_stream(PacketFilter::LOG, [$columns=Info]);
PacketFilter::install();
# Update the filter change tracking
filter_changed = F;
return T;
}

View file

@ -13,7 +13,7 @@ export {
};
## This is the interval between individual statistics collection.
const stats_collection_interval = 10secs;
const stats_collection_interval = 5min;
}
event net_stats_update(last_stat: NetStats)

View file

@ -0,0 +1,58 @@
module PacketFilter;
export {
## Takes a :bro:type:`port` and returns a BPF expression which will
## match the port.
##
## p: The port.
##
## Returns: A valid BPF filter string for matching the port.
global port_to_bpf: function(p: port): string;
## Create a BPF filter to sample IPv4 and IPv6 traffic.
##
## num_parts: The number of parts the traffic should be split into.
##
## this_part: The part of the traffic this filter will accept. 0-based.
global sampling_filter: function(num_parts: count, this_part: count): string;
## Combines two valid BPF filter strings with a string based operator
## to form a new filter.
##
## lfilter: Filter which will go on the left side.
##
## op: Operation being applied (typically "or" or "and").
##
## rfilter: Filter which will go on the right side.
##
## Returns: A new string representing the two filters combined with
## the operator. Either filter being an empty string will
## still result in a valid filter.
global combine_filters: function(lfilter: string, op: string, rfilter: string): string;
}
function port_to_bpf(p: port): string
{
local tp = get_port_transport_proto(p);
return cat(tp, " and ", fmt("port %d", p));
}
function combine_filters(lfilter: string, op: string, rfilter: string): string
{
if ( lfilter == "" && rfilter == "" )
return "";
else if ( lfilter == "" )
return rfilter;
else if ( rfilter == "" )
return lfilter;
else
return fmt("(%s) %s (%s)", lfilter, op, rfilter);
}
function sampling_filter(num_parts: count, this_part: count): string
{
local v4_filter = fmt("ip and ((ip[14:2]+ip[18:2]) - (%d*((ip[14:2]+ip[18:2])/%d)) == %d)", num_parts, num_parts, this_part);
# TODO: this is probably a fairly suboptimal filter, but it should work for now.
local v6_filter = fmt("ip6 and ((ip6[22:2]+ip6[38:2]) - (%d*((ip6[22:2]+ip6[38:2])/%d)) == %d)", num_parts, num_parts, this_part);
return combine_filters(v4_filter, "or", v6_filter);
}

View file

@ -9,7 +9,7 @@
##! Note that this framework deals with the handling of internally generated
##! reporter messages, for the interface in to actually creating interface
##! into actually creating reporter messages from the scripting layer, use
##! the built-in functions in :doc:`/scripts/base/reporter.bif`.
##! the built-in functions in :doc:`/scripts/base/bif/reporter.bif`.
module Reporter;

View file

@ -1,4 +1,5 @@
@load ./main
@load ./plugins
# The cluster framework must be loaded first.
@load base/frameworks/cluster

View file

@ -0,0 +1,346 @@
##! This implements transparent cluster support for the SumStats framework.
##! Do not load this file directly. It's only meant to be loaded automatically
##! and will be depending on if the cluster framework has been enabled.
##! The goal of this script is to make sumstats calculation completely and
##! transparently automated when running on a cluster.
@load base/frameworks/cluster
@load ./main
module SumStats;
export {
## Allows a user to decide how large of result groups the workers should transmit
## values for cluster stats aggregation.
const cluster_send_in_groups_of = 50 &redef;
## The percent of the full threshold value that needs to be met on a single worker
## for that worker to send the value to its manager in order for it to request a
## global view for that value. There is no requirement that the manager requests
## a global view for the key since it may opt not to if it requested a global view
## for the key recently.
const cluster_request_global_view_percent = 0.2 &redef;
## This is to deal with intermediate update overload. A manager will only allow
## this many intermediate update requests to the workers to be inflight at any
## given time. Requested intermediate updates are currently thrown out and not
## performed. In practice this should hopefully have a minimal effect.
const max_outstanding_global_views = 10 &redef;
## Intermediate updates can cause overload situations on very large clusters. This
## option may help reduce load and correct intermittent problems. The goal for this
## option is also meant to be temporary.
const enable_intermediate_updates = T &redef;
## Event sent by the manager in a cluster to initiate the collection of values for
## a sumstat.
global cluster_ss_request: event(uid: string, ssid: string);
## Event sent by nodes that are collecting sumstats after receiving a request for
## the sumstat from the manager.
global cluster_ss_response: event(uid: string, ssid: string, data: ResultTable, done: bool);
## This event is sent by the manager in a cluster to initiate the collection of
## a single key value from a sumstat. It's typically used to get intermediate
## updates before the break interval triggers to speed detection of a value
## crossing a threshold.
global cluster_key_request: event(uid: string, ssid: string, key: Key);
## This event is sent by nodes in response to a
## :bro:id:`SumStats::cluster_key_request` event.
global cluster_key_response: event(uid: string, ssid: string, key: Key, result: Result);
## This is sent by workers to indicate that they crossed the percent
## of the current threshold by the percentage defined globally in
## :bro:id:`SumStats::cluster_request_global_view_percent`
global cluster_key_intermediate_response: event(ssid: string, key: SumStats::Key);
## This event is scheduled internally on workers to send result chunks.
global send_data: event(uid: string, ssid: string, data: ResultTable);
## This event is generated when a threshold is crossed.
global cluster_threshold_crossed: event(ssid: string, key: SumStats::Key, thold: Thresholding);
}
# Add events to the cluster framework to make this work.
redef Cluster::manager2worker_events += /SumStats::cluster_(ss_request|key_request|threshold_crossed)/;
redef Cluster::manager2worker_events += /SumStats::thresholds_reset/;
redef Cluster::worker2manager_events += /SumStats::cluster_(ss_response|key_response|key_intermediate_response)/;
@if ( Cluster::local_node_type() != Cluster::MANAGER )
# This variable is maintained to know what keys have recently sent as
# intermediate updates so they don't overwhelm their manager. The count that is
# yielded is the number of times the percentage threshold has been crossed and
# an intermediate result has been received.
global recent_global_view_keys: table[string, Key] of count &create_expire=1min &default=0;
event bro_init() &priority=-100
{
# The manager is the only host allowed to track these.
stats_store = table();
reducer_store = table();
}
# This is done on all non-manager node types in the event that a sumstat is
# being collected somewhere other than a worker.
function data_added(ss: SumStat, key: Key, result: Result)
{
# If an intermediate update for this value was sent recently, don't send
# it again.
if ( [ss$id, key] in recent_global_view_keys )
return;
# If val is 5 and global view % is 0.1 (10%), pct_val will be 50. If that
# crosses the full threshold then it's a candidate to send as an
# intermediate update.
if ( enable_intermediate_updates &&
check_thresholds(ss, key, result, cluster_request_global_view_percent) )
{
# kick off intermediate update
event SumStats::cluster_key_intermediate_response(ss$id, key);
++recent_global_view_keys[ss$id, key];
}
}
event SumStats::send_data(uid: string, ssid: string, data: ResultTable)
{
#print fmt("WORKER %s: sending data for uid %s...", Cluster::node, uid);
local local_data: ResultTable = table();
local num_added = 0;
for ( key in data )
{
local_data[key] = data[key];
delete data[key];
# Only send cluster_send_in_groups_of at a time. Queue another
# event to send the next group.
if ( cluster_send_in_groups_of == ++num_added )
break;
}
local done = F;
# If data is empty, this sumstat is done.
if ( |data| == 0 )
done = T;
# Note: copy is needed to compensate serialization caching issue. This should be
# changed to something else later.
event SumStats::cluster_ss_response(uid, ssid, copy(local_data), done);
if ( ! done )
schedule 0.01 sec { SumStats::send_data(uid, ssid, data) };
}
event SumStats::cluster_ss_request(uid: string, ssid: string)
{
#print fmt("WORKER %s: received the cluster_ss_request event for %s.", Cluster::node, id);
# Initiate sending all of the data for the requested stats.
if ( ssid in result_store )
event SumStats::send_data(uid, ssid, result_store[ssid]);
else
event SumStats::send_data(uid, ssid, table());
# Lookup the actual sumstats and reset it, the reference to the data
# currently stored will be maintained internally by the send_data event.
if ( ssid in stats_store )
reset(stats_store[ssid]);
}
event SumStats::cluster_key_request(uid: string, ssid: string, key: Key)
{
if ( ssid in result_store && key in result_store[ssid] )
{
#print fmt("WORKER %s: received the cluster_key_request event for %s=%s.", Cluster::node, key2str(key), data);
# Note: copy is needed to compensate serialization caching issue. This should be
# changed to something else later.
event SumStats::cluster_key_response(uid, ssid, key, copy(result_store[ssid][key]));
}
else
{
# We need to send an empty response if we don't have the data so that the manager
# can know that it heard back from all of the workers.
event SumStats::cluster_key_response(uid, ssid, key, table());
}
}
event SumStats::cluster_threshold_crossed(ssid: string, key: SumStats::Key, thold: Thresholding)
{
if ( ssid !in threshold_tracker )
threshold_tracker[ssid] = table();
threshold_tracker[ssid][key] = thold;
}
event SumStats::thresholds_reset(ssid: string)
{
threshold_tracker[ssid] = table();
}
@endif
@if ( Cluster::local_node_type() == Cluster::MANAGER )
# This variable is maintained by manager nodes as they collect and aggregate
# results.
# Index on a uid.
global stats_results: table[string] of ResultTable &read_expire=1min;
# This variable is maintained by manager nodes to track how many "dones" they
# collected per collection unique id. Once the number of results for a uid
# matches the number of peer nodes that results should be coming from, the
# result is written out and deleted from here.
# Indexed on a uid.
# TODO: add an &expire_func in case not all results are received.
global done_with: table[string] of count &read_expire=1min &default=0;
# This variable is maintained by managers to track intermediate responses as
# they are getting a global view for a certain key.
# Indexed on a uid.
global key_requests: table[string] of Result &read_expire=1min;
# This variable is maintained by managers to prevent overwhelming communication due
# to too many intermediate updates. Each sumstat is tracked separately so that
# one won't overwhelm and degrade other quieter sumstats.
# Indexed on a sumstat id.
global outstanding_global_views: table[string] of count &default=0;
const zero_time = double_to_time(0.0);
# Managers handle logging.
event SumStats::finish_epoch(ss: SumStat)
{
if ( network_time() > zero_time )
{
#print fmt("%.6f MANAGER: breaking %s sumstat for %s sumstat", network_time(), ss$name, ss$id);
local uid = unique_id("");
if ( uid in stats_results )
delete stats_results[uid];
stats_results[uid] = table();
# Request data from peers.
event SumStats::cluster_ss_request(uid, ss$id);
}
# Schedule the next finish_epoch event.
schedule ss$epoch { SumStats::finish_epoch(ss) };
}
# This is unlikely to be called often, but it's here in
# case there are sumstats being collected by managers.
function data_added(ss: SumStat, key: Key, result: Result)
{
if ( check_thresholds(ss, key, result, 1.0) )
{
threshold_crossed(ss, key, result);
event SumStats::cluster_threshold_crossed(ss$id, key, threshold_tracker[ss$id][key]);
}
}
event SumStats::cluster_key_response(uid: string, ssid: string, key: Key, result: Result)
{
#print fmt("%0.6f MANAGER: receiving key data from %s - %s=%s", network_time(), get_event_peer()$descr, key2str(key), result);
# We only want to try and do a value merge if there are actually measured datapoints
# in the Result.
if ( uid in key_requests )
key_requests[uid] = compose_results(key_requests[uid], result);
else
key_requests[uid] = result;
# Mark that a worker is done.
++done_with[uid];
#print fmt("worker_count:%d :: done_with:%d", Cluster::worker_count, done_with[uid]);
if ( Cluster::worker_count == done_with[uid] )
{
local ss = stats_store[ssid];
local ir = key_requests[uid];
if ( check_thresholds(ss, key, ir, 1.0) )
{
threshold_crossed(ss, key, ir);
event SumStats::cluster_threshold_crossed(ss$id, key, threshold_tracker[ss$id][key]);
}
delete done_with[uid];
delete key_requests[uid];
# Check that there is an outstanding view before subtracting.
if ( outstanding_global_views[ssid] > 0 )
--outstanding_global_views[ssid];
}
}
# Managers handle intermediate updates here.
event SumStats::cluster_key_intermediate_response(ssid: string, key: Key)
{
#print fmt("MANAGER: receiving intermediate key data from %s", get_event_peer()$descr);
#print fmt("MANAGER: requesting key data for %s", key2str(key));
if ( ssid in outstanding_global_views &&
|outstanding_global_views[ssid]| > max_outstanding_global_views )
{
# Don't do this intermediate update. Perhaps at some point in the future
# we will queue and randomly select from these ignored intermediate
# update requests.
return;
}
++outstanding_global_views[ssid];
local uid = unique_id("");
event SumStats::cluster_key_request(uid, ssid, key);
}
event SumStats::cluster_ss_response(uid: string, ssid: string, data: ResultTable, done: bool)
{
#print fmt("MANAGER: receiving results from %s", get_event_peer()$descr);
# Mark another worker as being "done" for this uid.
if ( done )
++done_with[uid];
local local_data = stats_results[uid];
local ss = stats_store[ssid];
for ( key in data )
{
if ( key in local_data )
local_data[key] = compose_results(local_data[key], data[key]);
else
local_data[key] = data[key];
# If a stat is done being collected, thresholds for each key
# need to be checked so we're doing it here to avoid doubly
# iterating over each key.
if ( Cluster::worker_count == done_with[uid] )
{
if ( check_thresholds(ss, key, local_data[key], 1.0) )
{
threshold_crossed(ss, key, local_data[key]);
event SumStats::cluster_threshold_crossed(ss$id, key, threshold_tracker[ss$id][key]);
}
}
}
# If the data has been collected from all peers, we are done and ready to finish.
if ( Cluster::worker_count == done_with[uid] )
{
if ( ss?$epoch_finished )
ss$epoch_finished(local_data);
# Clean up
delete stats_results[uid];
delete done_with[uid];
# Not sure I need to reset the sumstat on the manager.
reset(ss);
}
}
event remote_connection_handshake_done(p: event_peer) &priority=5
{
send_id(p, "SumStats::stats_store");
send_id(p, "SumStats::reducer_store");
}
@endif

View file

@ -0,0 +1,436 @@
##! The summary statistics framework provides a way to
##! summarize large streams of data into simple reduced
##! measurements.
module SumStats;
export {
## The various calculations are all defined as plugins.
type Calculation: enum {
PLACEHOLDER
};
## Represents a thing which is having summarization
## results collected for it.
type Key: record {
## A non-address related summarization or a sub-key for
## an address based summarization. An example might be
## successful SSH connections by client IP address
## where the client string would be the key value.
## Another example might be number of HTTP requests to
## a particular value in a Host header. This is an
## example of a non-host based metric since multiple
## IP addresses could respond for the same Host
## header value.
str: string &optional;
## Host is the value to which this metric applies.
host: addr &optional;
};
## Represents data being added for a single observation.
## Only supply a single field at a time!
type Observation: record {
## Count value.
num: count &optional;
## Double value.
dbl: double &optional;
## String value.
str: string &optional;
};
type Reducer: record {
## Observation stream identifier for the reducer
## to attach to.
stream: string;
## The calculations to perform on the data points.
apply: set[Calculation];
## A predicate so that you can decide per key if you
## would like to accept the data being inserted.
pred: function(key: SumStats::Key, obs: SumStats::Observation): bool &optional;
## A function to normalize the key. This can be used to aggregate or
## normalize the entire key.
normalize_key: function(key: SumStats::Key): Key &optional;
};
## Value calculated for an observation stream fed into a reducer.
## Most of the fields are added by plugins.
type ResultVal: record {
## The time when the first observation was added to
## this result value.
begin: time;
## The time when the last observation was added to
## this result value.
end: time;
## The number of observations received.
num: count &default=0;
};
## Type to store results for multiple reducers.
type Result: table[string] of ResultVal;
## Type to store a table of sumstats results indexed
## by keys.
type ResultTable: table[Key] of Result;
## SumStats represent an aggregation of reducers along with
## mechanisms to handle various situations like the epoch ending
## or thresholds being crossed.
##
## It's best to not access any global state outside
## of the variables given to the callbacks because there
## is no assurance provided as to where the callbacks
## will be executed on clusters.
type SumStat: record {
## The interval at which this filter should be "broken"
## and the '$epoch_finished' callback called. The
## results are also reset at this time so any threshold
## based detection needs to be set to a
## value that should be expected to happen within
## this epoch.
epoch: interval;
## The reducers for the SumStat
reducers: set[Reducer];
## Provide a function to calculate a value from the
## :bro:see:`SumStats::Result` structure which will be used
## for thresholding.
## This is required if a $threshold value is given.
threshold_val: function(key: SumStats::Key, result: SumStats::Result): count &optional;
## The threshold value for calling the
## $threshold_crossed callback.
threshold: count &optional;
## A series of thresholds for calling the
## $threshold_crossed callback.
threshold_series: vector of count &optional;
## A callback that is called when a threshold is crossed.
threshold_crossed: function(key: SumStats::Key, result: SumStats::Result) &optional;
## A callback with the full collection of Results for
## this SumStat.
epoch_finished: function(rt: SumStats::ResultTable) &optional;
};
## Create a summary statistic.
global create: function(ss: SumStats::SumStat);
## Add data into an observation stream. This should be
## called when a script has measured some point value.
##
## id: The observation stream identifier that the data
## point represents.
##
## key: The key that the value is related to.
##
## obs: The data point to send into the stream.
global observe: function(id: string, key: SumStats::Key, obs: SumStats::Observation);
## This record is primarily used for internal threshold tracking.
type Thresholding: record {
# Internal use only. Indicates if a simple threshold was already crossed.
is_threshold_crossed: bool &default=F;
# Internal use only. Current key for threshold series.
threshold_series_index: count &default=0;
};
## This event is generated when thresholds are reset for a SumStat.
##
## ssid: SumStats ID that thresholds were reset for.
global thresholds_reset: event(ssid: string);
## Helper function to represent a :bro:type:`SumStats::Key` value as
## a simple string.
##
## key: The metric key that is to be converted into a string.
##
## Returns: A string representation of the metric key.
global key2str: function(key: SumStats::Key): string;
}
redef record Reducer += {
# Internal use only. Provides a reference back to the related SumStats by it's ID.
sid: string &optional;
};
# Internal use only. For tracking thresholds per sumstat and key.
global threshold_tracker: table[string] of table[Key] of Thresholding &optional;
redef record SumStat += {
# Internal use only (mostly for cluster coherency).
id: string &optional;
};
# Store of sumstats indexed on the sumstat id.
global stats_store: table[string] of SumStat = table();
# Store of reducers indexed on the data point stream id.
global reducer_store: table[string] of set[Reducer] = table();
# Store of results indexed on the measurement id.
global result_store: table[string] of ResultTable = table();
# Store of threshold information.
global thresholds_store: table[string, Key] of bool = table();
# This is called whenever key values are updated and the new val is given as the
# `val` argument. It's only prototyped here because cluster and non-cluster have
# separate implementations.
global data_added: function(ss: SumStat, key: Key, result: Result);
# Prototype the hook point for plugins to do calculations.
global observe_hook: hook(r: Reducer, val: double, data: Observation, rv: ResultVal);
# Prototype the hook point for plugins to initialize any result values.
global init_resultval_hook: hook(r: Reducer, rv: ResultVal);
# Prototype the hook point for plugins to merge Results.
global compose_resultvals_hook: hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal);
# Event that is used to "finish" measurements and adapt the measurement
# framework for clustered or non-clustered usage.
global finish_epoch: event(ss: SumStat);
function key2str(key: Key): string
{
local out = "";
if ( key?$host )
out = fmt("%shost=%s", out, key$host);
if ( key?$str )
out = fmt("%s%sstr=%s", out, |out|==0 ? "" : ", ", key$str);
return fmt("sumstats_key(%s)", out);
}
function init_resultval(r: Reducer): ResultVal
{
local rv: ResultVal = [$begin=network_time(), $end=network_time()];
hook init_resultval_hook(r, rv);
return rv;
}
function compose_resultvals(rv1: ResultVal, rv2: ResultVal): ResultVal
{
local result: ResultVal;
result$begin = (rv1$begin < rv2$begin) ? rv1$begin : rv2$begin;
result$end = (rv1$end > rv2$end) ? rv1$end : rv2$end;
result$num = rv1$num + rv2$num;
# Run the plugin composition hooks.
hook compose_resultvals_hook(result, rv1, rv2);
return result;
}
function compose_results(r1: Result, r2: Result): Result
{
local result: Result = table();
if ( |r1| > |r2| )
{
for ( data_id in r1 )
{
if ( data_id in r2 )
result[data_id] = compose_resultvals(r1[data_id], r2[data_id]);
else
result[data_id] = r1[data_id];
}
}
else
{
for ( data_id in r2 )
{
if ( data_id in r1 )
result[data_id] = compose_resultvals(r1[data_id], r2[data_id]);
else
result[data_id] = r2[data_id];
}
}
return result;
}
function reset(ss: SumStat)
{
if ( ss$id in result_store )
delete result_store[ss$id];
result_store[ss$id] = table();
if ( ss?$threshold || ss?$threshold_series )
{
threshold_tracker[ss$id] = table();
event SumStats::thresholds_reset(ss$id);
}
}
function create(ss: SumStat)
{
if ( (ss?$threshold || ss?$threshold_series) && ! ss?$threshold_val )
{
Reporter::error("SumStats given a threshold with no $threshold_val function");
}
if ( ! ss?$id )
ss$id=unique_id("");
threshold_tracker[ss$id] = table();
stats_store[ss$id] = ss;
for ( reducer in ss$reducers )
{
reducer$sid = ss$id;
if ( reducer$stream !in reducer_store )
reducer_store[reducer$stream] = set();
add reducer_store[reducer$stream][reducer];
}
reset(ss);
schedule ss$epoch { SumStats::finish_epoch(ss) };
}
function observe(id: string, key: Key, obs: Observation)
{
if ( id !in reducer_store )
return;
# Try to add the data to all of the defined reducers.
for ( r in reducer_store[id] )
{
if ( r?$normalize_key )
key = r$normalize_key(copy(key));
# If this reducer has a predicate, run the predicate
# and skip this key if the predicate return false.
if ( r?$pred && ! r$pred(key, obs) )
next;
local ss = stats_store[r$sid];
# If there is a threshold and no epoch_finished callback
# we don't need to continue counting since the data will
# never be accessed. This was leading
# to some state management issues when measuring
# uniqueness.
# NOTE: this optimization could need removed in the
# future if on demand access is provided to the
# SumStats results.
if ( ! ss?$epoch_finished &&
r$sid in threshold_tracker &&
key in threshold_tracker[r$sid] &&
( ss?$threshold &&
threshold_tracker[r$sid][key]$is_threshold_crossed ) ||
( ss?$threshold_series &&
threshold_tracker[r$sid][key]$threshold_series_index+1 == |ss$threshold_series| ) )
next;
if ( r$sid !in result_store )
result_store[ss$id] = table();
local results = result_store[r$sid];
if ( key !in results )
results[key] = table();
local result = results[key];
if ( id !in result )
result[id] = init_resultval(r);
local result_val = result[id];
++result_val$num;
# Continually update the $end field.
result_val$end=network_time();
# If a string was given, fall back to 1.0 as the value.
local val = 1.0;
if ( obs?$num || obs?$dbl )
val = obs?$dbl ? obs$dbl : obs$num;
hook observe_hook(r, val, obs, result_val);
data_added(ss, key, result);
}
}
# This function checks if a threshold has been crossed. It is also used as a method to implement
# mid-break-interval threshold crossing detection for cluster deployments.
function check_thresholds(ss: SumStat, key: Key, result: Result, modify_pct: double): bool
{
if ( ! (ss?$threshold || ss?$threshold_series) )
return F;
# Add in the extra ResultVals to make threshold_vals easier to write.
if ( |ss$reducers| != |result| )
{
for ( reducer in ss$reducers )
{
if ( reducer$stream !in result )
result[reducer$stream] = init_resultval(reducer);
}
}
local watch = ss$threshold_val(key, result);
if ( modify_pct < 1.0 && modify_pct > 0.0 )
watch = double_to_count(floor(watch/modify_pct));
if ( ss$id !in threshold_tracker )
threshold_tracker[ss$id] = table();
local t_tracker = threshold_tracker[ss$id];
if ( key !in t_tracker )
{
local ttmp: Thresholding;
t_tracker[key] = ttmp;
}
local tt = t_tracker[key];
if ( ss?$threshold && ! tt$is_threshold_crossed && watch >= ss$threshold )
{
# Value crossed the threshold.
return T;
}
if ( ss?$threshold_series &&
|ss$threshold_series| >= tt$threshold_series_index &&
watch >= ss$threshold_series[tt$threshold_series_index] )
{
# A threshold series was given and the value crossed the next
# value in the series.
return T;
}
return F;
}
function threshold_crossed(ss: SumStat, key: Key, result: Result)
{
# If there is no callback, there is no point in any of this.
if ( ! ss?$threshold_crossed )
return;
# Add in the extra ResultVals to make threshold_crossed callbacks easier to write.
if ( |ss$reducers| != |result| )
{
for ( reducer in ss$reducers )
{
if ( reducer$stream !in result )
result[reducer$stream] = init_resultval(reducer);
}
}
ss$threshold_crossed(key, result);
local tt = threshold_tracker[ss$id][key];
tt$is_threshold_crossed = T;
# Bump up to the next threshold series index if a threshold series is being used.
if ( ss?$threshold_series )
++tt$threshold_series_index;
}

View file

@ -0,0 +1,24 @@
@load ./main
module SumStats;
event SumStats::finish_epoch(ss: SumStat)
{
if ( ss$id in result_store )
{
local data = result_store[ss$id];
if ( ss?$epoch_finished )
ss$epoch_finished(data);
reset(ss);
}
schedule ss$epoch { SumStats::finish_epoch(ss) };
}
function data_added(ss: SumStat, key: Key, result: Result)
{
if ( check_thresholds(ss, key, result, 1.0) )
threshold_crossed(ss, key, result);
}

View file

@ -0,0 +1,9 @@
@load ./average
@load ./last
@load ./max
@load ./min
@load ./sample
@load ./std-dev
@load ./sum
@load ./unique
@load ./variance

View file

@ -0,0 +1,36 @@
@load base/frameworks/sumstats/main
module SumStats;
export {
redef enum Calculation += {
## Calculate the average of the values.
AVERAGE
};
redef record ResultVal += {
## For numeric data, this calculates the average of all values.
average: double &optional;
};
}
hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
{
if ( AVERAGE in r$apply )
{
if ( ! rv?$average )
rv$average = val;
else
rv$average += (val - rv$average) / rv$num;
}
}
hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
{
if ( rv1?$average && rv2?$average )
result$average = ((rv1$average*rv1$num) + (rv2$average*rv2$num))/(rv1$num+rv2$num);
else if ( rv1?$average )
result$average = rv1$average;
else if ( rv2?$average )
result$average = rv2$average;
}

View file

@ -0,0 +1,55 @@
@load base/frameworks/sumstats
@load base/utils/queue
module SumStats;
export {
redef enum Calculation += {
## Keep last X observations in a queue
LAST
};
redef record Reducer += {
## number of elements to keep.
num_last_elements: count &default=0;
};
redef record ResultVal += {
## This is the queue where elements are maintained. Use the
## :bro:see:`SumStats::get_last` function to get a vector of
## the current element values.
last_elements: Queue::Queue &optional;
};
## Get a vector of element values from a ResultVal.
global get_last: function(rv: ResultVal): vector of Observation;
}
function get_last(rv: ResultVal): vector of Observation
{
local s: vector of Observation = vector();
if ( rv?$last_elements )
Queue::get_vector(rv$last_elements, s);
return s;
}
hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
{
if ( LAST in r$apply && r$num_last_elements > 0 )
{
if ( ! rv?$last_elements )
rv$last_elements = Queue::init([$max_len=r$num_last_elements]);
Queue::put(rv$last_elements, obs);
}
}
hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
{
# Merge $samples
if ( rv1?$last_elements && rv2?$last_elements )
result$last_elements = Queue::merge(rv1$last_elements, rv2$last_elements);
else if ( rv1?$last_elements )
result$last_elements = rv1$last_elements;
else if ( rv2?$last_elements )
result$last_elements = rv2$last_elements;
}

View file

@ -0,0 +1,38 @@
@load base/frameworks/sumstats/main
module SumStats;
export {
redef enum Calculation += {
## Find the maximum value.
MAX
};
redef record ResultVal += {
## For numeric data, this tracks the maximum value given.
max: double &optional;
};
}
hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
{
if ( MAX in r$apply )
{
if ( ! rv?$max )
rv$max = val;
else if ( val > rv$max )
rv$max = val;
}
}
hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
{
if ( rv1?$max && rv2?$max )
result$max = (rv1$max > rv2$max) ? rv1$max : rv2$max;
else if ( rv1?$max )
result$max = rv1$max;
else if ( rv2?$max )
result$max = rv2$max;
}

View file

@ -0,0 +1,36 @@
@load base/frameworks/sumstats/main
module SumStats;
export {
redef enum Calculation += {
## Find the minimum value.
MIN
};
redef record ResultVal += {
## For numeric data, this tracks the minimum value given.
min: double &optional;
};
}
hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
{
if ( MIN in r$apply )
{
if ( ! rv?$min )
rv$min = val;
else if ( val < rv$min )
rv$min = val;
}
}
hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
{
if ( rv1?$min && rv2?$min )
result$min = (rv1$min < rv2$min) ? rv1$min : rv2$min;
else if ( rv1?$min )
result$min = rv1$min;
else if ( rv2?$min )
result$min = rv2$min;
}

View file

@ -0,0 +1,120 @@
@load base/frameworks/sumstats/main
module SumStats;
export {
redef enum Calculation += {
## Get uniquely distributed random samples from the observation stream.
SAMPLE
};
redef record Reducer += {
## A number of sample Observations to collect.
num_samples: count &default=0;
};
redef record ResultVal += {
## This is the vector in which the samples are maintained.
samples: vector of Observation &default=vector();
## Number of total observed elements.
sample_elements: count &default=0;
};
}
redef record ResultVal += {
# Internal use only. This is not meant to be publically available
# and just a copy of num_samples from the Reducer. Needed for availability
# in the compose hook.
num_samples: count &default=0;
};
hook init_resultval_hook(r: Reducer, rv: ResultVal)
{
if ( SAMPLE in r$apply )
rv$num_samples = r$num_samples;
}
function sample_add_sample(obs:Observation, rv: ResultVal)
{
++rv$sample_elements;
if ( |rv$samples| < rv$num_samples )
rv$samples[|rv$samples|] = obs;
else
{
local ra = rand(rv$sample_elements);
if ( ra < rv$num_samples )
rv$samples[ra] = obs;
}
}
hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
{
if ( SAMPLE in r$apply )
{
sample_add_sample(obs, rv);
}
}
hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
{
if ( rv1$num_samples != rv2$num_samples )
{
Reporter::error("Merging sample sets with differing sizes is not supported");
return;
}
local num_samples = rv1$num_samples;
result$num_samples = num_samples;
if ( |rv1$samples| > num_samples || |rv2$samples| > num_samples )
{
Reporter::error("Sample vector with too many elements. Aborting.");
return;
}
if ( |rv1$samples| != num_samples && |rv2$samples| < num_samples )
{
if ( |rv1$samples| != rv1$sample_elements || |rv2$samples| < rv2$sample_elements )
{
Reporter::error("Mismatch in sample element size and tracking. Aborting merge");
return;
}
for ( i in rv1$samples )
sample_add_sample(rv1$samples[i], result);
for ( i in rv2$samples)
sample_add_sample(rv2$samples[i], result);
}
else
{
local other_vector: vector of Observation;
local othercount: count;
if ( rv1$sample_elements > rv2$sample_elements )
{
result$samples = copy(rv1$samples);
other_vector = rv2$samples;
othercount = rv2$sample_elements;
}
else
{
result$samples = copy(rv2$samples);
other_vector = rv1$samples;
othercount = rv1$sample_elements;
}
local totalcount = rv1$sample_elements + rv2$sample_elements;
result$sample_elements = totalcount;
for ( i in other_vector )
{
if ( rand(totalcount) <= othercount )
result$samples[i] = other_vector[i];
}
}
}

View file

@ -0,0 +1,34 @@
@load base/frameworks/sumstats/main
@load ./variance
module SumStats;
export {
redef enum Calculation += {
## Find the standard deviation of the values.
STD_DEV
};
redef record ResultVal += {
## For numeric data, this calculates the standard deviation.
std_dev: double &default=0.0;
};
}
function calc_std_dev(rv: ResultVal)
{
if ( rv?$variance )
rv$std_dev = sqrt(rv$variance);
}
# This depends on the variance plugin which uses priority -5
hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal) &priority=-10
{
if ( STD_DEV in r$apply )
calc_std_dev(rv);
}
hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal) &priority=-10
{
calc_std_dev(result);
}

View file

@ -0,0 +1,51 @@
@load base/frameworks/sumstats/main
module SumStats;
export {
redef enum Calculation += {
## Sums the values given. For string values,
## this will be the number of strings given.
SUM
};
redef record ResultVal += {
## For numeric data, this tracks the sum of all values.
sum: double &default=0.0;
};
type threshold_function: function(key: SumStats::Key, result: SumStats::Result): count;
global sum_threshold: function(data_id: string): threshold_function;
}
function sum_threshold(data_id: string): threshold_function
{
return function(key: SumStats::Key, result: SumStats::Result): count
{
print fmt("data_id: %s", data_id);
print result;
return double_to_count(result[data_id]$sum);
};
}
hook init_resultval_hook(r: Reducer, rv: ResultVal)
{
if ( SUM in r$apply && ! rv?$sum )
rv$sum = 0;
}
hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
{
if ( SUM in r$apply )
rv$sum += val;
}
hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
{
if ( rv1?$sum || rv2?$sum )
{
result$sum = rv1?$sum ? rv1$sum : 0;
if ( rv2?$sum )
result$sum += rv2$sum;
}
}

View file

@ -0,0 +1,53 @@
@load base/frameworks/sumstats/main
module SumStats;
export {
redef enum Calculation += {
## Calculate the number of unique values.
UNIQUE
};
redef record ResultVal += {
## If cardinality is being tracked, the number of unique
## items is tracked here.
unique: count &default=0;
};
}
redef record ResultVal += {
# Internal use only. This is not meant to be publically available
# because we don't want to trust that we can inspect the values
# since we will like move to a probalistic data structure in the future.
# TODO: in the future this will optionally be a hyperloglog structure
unique_vals: set[Observation] &optional;
};
hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
{
if ( UNIQUE in r$apply )
{
if ( ! rv?$unique_vals )
rv$unique_vals=set();
add rv$unique_vals[obs];
rv$unique = |rv$unique_vals|;
}
}
hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
{
if ( rv1?$unique_vals || rv2?$unique_vals )
{
if ( rv1?$unique_vals )
result$unique_vals = copy(rv1$unique_vals);
if ( rv2?$unique_vals )
if ( ! result?$unique_vals )
result$unique_vals = copy(rv2$unique_vals);
else
for ( val2 in rv2$unique_vals )
add result$unique_vals[copy(val2)];
result$unique = |result$unique_vals|;
}
}

View file

@ -0,0 +1,69 @@
@load base/frameworks/sumstats/main
@load ./average
module SumStats;
export {
redef enum Calculation += {
## Find the variance of the values.
VARIANCE
};
redef record ResultVal += {
## For numeric data, this calculates the variance.
variance: double &optional;
};
}
redef record ResultVal += {
# Internal use only. Used for incrementally calculating variance.
prev_avg: double &optional;
# Internal use only. For calculating incremental variance.
var_s: double &default=0.0;
};
function calc_variance(rv: ResultVal)
{
rv$variance = (rv$num > 1) ? rv$var_s/(rv$num-1) : 0.0;
}
# Reduced priority since this depends on the average
hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal) &priority=-5
{
if ( VARIANCE in r$apply )
{
if ( rv$num > 1 )
rv$var_s += ((val - rv$prev_avg) * (val - rv$average));
calc_variance(rv);
rv$prev_avg = rv$average;
}
}
# Reduced priority since this depends on the average
hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal) &priority=-5
{
if ( rv1?$var_s && rv1?$average &&
rv2?$var_s && rv2?$average )
{
local rv1_avg_sq = (rv1$average - result$average);
rv1_avg_sq = rv1_avg_sq*rv1_avg_sq;
local rv2_avg_sq = (rv2$average - result$average);
rv2_avg_sq = rv2_avg_sq*rv2_avg_sq;
result$var_s = rv1$num*(rv1$var_s/rv1$num + rv1_avg_sq) + rv2$num*(rv2$var_s/rv2$num + rv2_avg_sq);
}
else if ( rv1?$var_s )
result$var_s = rv1$var_s;
else if ( rv2?$var_s )
result$var_s = rv2$var_s;
if ( rv1?$prev_avg && rv2?$prev_avg )
result$prev_avg = ((rv1$prev_avg*rv1$num) + (rv2$prev_avg*rv2$num))/(rv1$num+rv2$num);
else if ( rv1?$prev_avg )
result$prev_avg = rv1$prev_avg;
else if ( rv2?$prev_avg )
result$prev_avg = rv2$prev_avg;
calc_variance(result);
}

View file

@ -83,19 +83,17 @@ export {
}
const ayiya_ports = { 5072/udp };
redef dpd_config += { [ANALYZER_AYIYA] = [$ports = ayiya_ports] };
const teredo_ports = { 3544/udp };
redef dpd_config += { [ANALYZER_TEREDO] = [$ports = teredo_ports] };
const gtpv1_ports = { 2152/udp, 2123/udp };
redef dpd_config += { [ANALYZER_GTPV1] = [$ports = gtpv1_ports] };
redef likely_server_ports += { ayiya_ports, teredo_ports, gtpv1_ports };
event bro_init() &priority=5
{
Log::create_stream(Tunnel::LOG, [$columns=Info]);
Analyzer::register_for_ports(Analyzer::ANALYZER_AYIYA, ayiya_ports);
Analyzer::register_for_ports(Analyzer::ANALYZER_TEREDO, teredo_ports);
Analyzer::register_for_ports(Analyzer::ANALYZER_GTPV1, gtpv1_ports);
}
function register_all(ecv: EncapsulatingConnVector)

View file

@ -1,5 +1,5 @@
@load base/const.bif
@load base/types.bif
@load base/bif/const.bif.bro
@load base/bif/types.bif
# Type declarations
@ -222,17 +222,6 @@ type endpoint_stats: record {
endian_type: count;
};
## A unique analyzer instance ID. Each time instantiates a protocol analyzers
## for a connection, it assigns it a unique ID that can be used to reference
## that instance.
##
## .. bro:see:: analyzer_name disable_analyzer protocol_confirmation
## protocol_violation
##
## .. todo::While we declare an alias for the type here, the events/functions still
## use ``count``. That should be changed.
type AnalyzerID: count;
module Tunnel;
export {
## Records the identity of an encapsulating parent of a tunneled connection.
@ -300,7 +289,7 @@ type connection: record {
## one protocol analyzer is able to parse the same data. If so, all will
## be recorded. Also note that the recorced services are independent of any
## transport-level protocols.
service: set[string];
service: set[string];
addl: string; ##< Deprecated.
hot: count; ##< Deprecated.
history: string; ##< State history of connections. See *history* in :bro:see:`Conn::Info`.
@ -316,6 +305,73 @@ type connection: record {
tunnel: EncapsulatingConnVector &optional;
};
## Default amount of time a file can be inactive before the file analysis
## gives up and discards any internal state related to the file.
const default_file_timeout_interval: interval = 2 mins &redef;
## Default amount of bytes that file analysis will buffer before raising
## :bro:see:`file_new`.
const default_file_bof_buffer_size: count = 1024 &redef;
## A file that Bro is analyzing. This is Bro's type for describing the basic
## internal metadata collected about a "file", which is essentially just a
## byte stream that is e.g. pulled from a network connection or possibly
## some other input source.
type fa_file: record {
## An identifier associated with a single file.
id: string;
## Identifier associated with a container file from which this one was
## extracted as part of the file analysis.
parent_id: string &optional;
## An identification of the source of the file data. E.g. it may be
## a network protocol over which it was transferred, or a local file
## path which was read, or some other input source.
source: string &optional;
## If the source of this file is is a network connection, this field
## may be set to indicate the directionality.
is_orig: bool &optional;
## The set of connections over which the file was transferred.
conns: table[conn_id] of connection &optional;
## The time at which the last activity for the file was seen.
last_active: time;
## Number of bytes provided to the file analysis engine for the file.
seen_bytes: count &default=0;
## Total number of bytes that are supposed to comprise the full file.
total_bytes: count &optional;
## The number of bytes in the file stream that were completely missed
## during the process of analysis e.g. due to dropped packets.
missing_bytes: count &default=0;
## The number of not all-in-sequence bytes in the file stream that
## were delivered to file analyzers due to reassembly buffer overflow.
overflow_bytes: count &default=0;
## The amount of time between receiving new data for this file that
## the analysis engine will wait before giving up on it.
timeout_interval: interval &default=default_file_timeout_interval;
## The number of bytes at the beginning of a file to save for later
## inspection in *bof_buffer* field.
bof_buffer_size: count &default=default_file_bof_buffer_size;
## The content of the beginning of a file up to *bof_buffer_size* bytes.
## This is also the buffer that's used for file/mime type detection.
bof_buffer: string &optional;
## A mime type provided by libmagic against the *bof_buffer*, or
## in the cases where no buffering of the beginning of file occurs,
## an initial guess of the mime type based on the first data seen.
mime_type: string &optional;
} &redef;
## Fields of a SYN packet.
##
## .. bro:see:: connection_SYN_packet
@ -646,9 +702,9 @@ type entropy_test_result: record {
};
# Prototypes of Bro built-in functions.
@load base/strings.bif
@load base/bro.bif
@load base/reporter.bif
@load base/bif/strings.bif
@load base/bif/bro.bif
@load base/bif/reporter.bif
## Deprecated. This is superseded by the new logging framework.
global log_file_name: function(tag: string): string &redef;
@ -710,19 +766,6 @@ global signature_files = "" &add_func = add_signature_file;
## ``p0f`` fingerprint file to use. Will be searched relative to ``BROPATH``.
const passive_fingerprint_file = "base/misc/p0f.fp" &redef;
# todo::testing to see if I can remove these without causing problems.
#const ftp = 21/tcp;
#const ssh = 22/tcp;
#const telnet = 23/tcp;
#const smtp = 25/tcp;
#const domain = 53/tcp; # note, doesn't include UDP version
#const gopher = 70/tcp;
#const finger = 79/tcp;
#const http = 80/tcp;
#const ident = 113/tcp;
#const bgp = 179/tcp;
#const rlogin = 513/tcp;
# TCP values for :bro:see:`endpoint` *state* field.
# todo::these should go into an enum to make them autodoc'able.
const TCP_INACTIVE = 0; ##< Endpoint is still inactive.
@ -2656,7 +2699,7 @@ export {
}
module GLOBAL;
@load base/event.bif
@load base/bif/event.bif
## BPF filter the user has set via the -f command line options. Empty if none.
const cmd_line_bpf_filter = "" &redef;
@ -2846,34 +2889,11 @@ const remote_trace_sync_peers = 0 &redef;
## consistency check.
const remote_check_sync_consistency = F &redef;
## Analyzer tags. The core automatically defines constants
## ``ANALYZER_<analyzer-name>*``, e.g., ``ANALYZER_HTTP``.
##
## .. bro:see:: dpd_config
##
## .. todo::We should autodoc these automaticallty generated constants.
type AnalyzerTag: count;
## Set of ports activating a particular protocol analysis.
##
## .. bro:see:: dpd_config
type dpd_protocol_config: record {
ports: set[port] &optional; ##< Set of ports.
};
## Port configuration for Bro's "dynamic protocol detection". Protocol
## analyzers can be activated via either well-known ports or content analysis.
## This table defines the ports.
##
## .. bro:see:: dpd_reassemble_first_packets dpd_buffer_size
## dpd_match_only_beginning dpd_ignore_ports
const dpd_config: table[AnalyzerTag] of dpd_protocol_config = {} &redef;
## Reassemble the beginning of all TCP connections before doing
## signature-matching. Enabling this provides more accurate matching at the
## expensive of CPU cycles.
##
## .. bro:see:: dpd_config dpd_buffer_size
## .. bro:see:: dpd_buffer_size
## dpd_match_only_beginning dpd_ignore_ports
##
## .. note:: Despite the name, this option affects *all* signature matching, not
@ -2888,24 +2908,24 @@ const dpd_reassemble_first_packets = T &redef;
## activated afterwards. Then only analyzers that can deal with partial
## connections will be able to analyze the session.
##
## .. bro:see:: dpd_reassemble_first_packets dpd_config dpd_match_only_beginning
## .. bro:see:: dpd_reassemble_first_packets dpd_match_only_beginning
## dpd_ignore_ports
const dpd_buffer_size = 1024 &redef;
## If true, stops signature matching if dpd_buffer_size has been reached.
##
## .. bro:see:: dpd_reassemble_first_packets dpd_buffer_size
## dpd_config dpd_ignore_ports
## dpd_ignore_ports
##
## .. note:: Despite the name, this option affects *all* signature matching, not
## only signatures used for dynamic protocol detection.
const dpd_match_only_beginning = T &redef;
## If true, don't consider any ports for deciding which protocol analyzer to
## use. If so, the value of :bro:see:`dpd_config` is ignored.
## use.
##
## .. bro:see:: dpd_reassemble_first_packets dpd_buffer_size
## dpd_match_only_beginning dpd_config
## dpd_match_only_beginning
const dpd_ignore_ports = F &redef;
## Ports which the core considers being likely used by servers. For ports in
@ -2913,13 +2933,6 @@ const dpd_ignore_ports = F &redef;
## connection if it misses the initial handshake.
const likely_server_ports: set[port] &redef;
## Deprated. Set of all ports for which we know an analyzer, built by
## :doc:`/scripts/base/frameworks/dpd/main`.
##
## .. todo::This should be defined by :doc:`/scripts/base/frameworks/dpd/main`
## itself we still need it.
global dpd_analyzer_ports: table[port] of set[AnalyzerTag];
## Per-incident timer managers are drained after this amount of inactivity.
const timer_mgr_inactivity_timeout = 1 min &redef;
@ -3028,9 +3041,12 @@ module GLOBAL;
## Number of bytes per packet to capture from live interfaces.
const snaplen = 8192 &redef;
# Load the logging framework here because it uses fairly deep integration with
# Load BiFs defined by plugins.
@load base/bif/plugins
# Load these frameworks here because they use fairly deep integration with
# BiFs and script-land defined types.
@load base/frameworks/logging
@load base/frameworks/input
@load base/frameworks/analyzer
@load base/frameworks/file-analysis

View file

@ -15,14 +15,17 @@
@load base/utils/numbers
@load base/utils/paths
@load base/utils/patterns
@load base/utils/queue
@load base/utils/strings
@load base/utils/thresholds
@load base/utils/time
@load base/utils/urls
# This has some deep interplay between types and BiFs so it's
# loaded in base/init-bare.bro
#@load base/frameworks/logging
@load base/frameworks/notice
@load base/frameworks/analyzer
@load base/frameworks/dpd
@load base/frameworks/signatures
@load base/frameworks/packet-filter
@ -30,9 +33,9 @@
@load base/frameworks/communication
@load base/frameworks/control
@load base/frameworks/cluster
@load base/frameworks/metrics
@load base/frameworks/intel
@load base/frameworks/reporter
@load base/frameworks/sumstats
@load base/frameworks/tunnels
@load base/protocols/conn
@ -41,10 +44,12 @@
@load base/protocols/http
@load base/protocols/irc
@load base/protocols/modbus
@load base/protocols/pop3
@load base/protocols/smtp
@load base/protocols/socks
@load base/protocols/ssh
@load base/protocols/ssl
@load base/protocols/syslog
@load base/protocols/tunnels
@load base/misc/find-checksum-offloading

View file

@ -6,9 +6,9 @@ module Conn;
export {
## Define inactivity timeouts by the service detected being used over
## the connection.
const analyzer_inactivity_timeouts: table[AnalyzerTag] of interval = {
const analyzer_inactivity_timeouts: table[Analyzer::Tag] of interval = {
# For interactive services, allow longer periods of inactivity.
[[ANALYZER_SSH, ANALYZER_FTP]] = 1 hrs,
[[Analyzer::ANALYZER_SSH, Analyzer::ANALYZER_FTP]] = 1 hrs,
} &redef;
## Define inactivity timeouts based on common protocol ports.
@ -18,7 +18,7 @@ export {
}
event protocol_confirmation(c: connection, atype: count, aid: count)
event protocol_confirmation(c: connection, atype: Analyzer::Tag, aid: count)
{
if ( atype in analyzer_inactivity_timeouts )
set_inactivity_timeout(c$id, analyzer_inactivity_timeouts[atype]);

View file

@ -1,6 +1,7 @@
##! Base DNS analysis script which tracks and logs DNS queries along with
##! their responses.
@load base/utils/queue
@load ./consts
module DNS;
@ -73,19 +74,6 @@ export {
total_replies: count &optional;
};
## A record type which tracks the status of DNS queries for a given
## :bro:type:`connection`.
type State: record {
## Indexed by query id, returns Info record corresponding to
## query/response which haven't completed yet.
pending: table[count] of Info &optional;
## This is the list of DNS responses that have completed based on the
## number of responses declared and the number received. The contents
## of the set are transaction IDs.
finished_answers: set[count] &optional;
};
## An event that can be handled to access the :bro:type:`DNS::Info`
## record as it is sent to the logging framework.
global log_dns: event(rec: Info);
@ -102,46 +90,49 @@ export {
##
## reply: The specific response information according to RR type/class.
global do_reply: event(c: connection, msg: dns_msg, ans: dns_answer, reply: string);
## A hook that is called whenever a session is being set.
## This can be used if additional initialization logic needs to happen
## when creating a new session value.
##
## c: The connection involved in the new session
##
## msg: The DNS message header information.
##
## is_query: Indicator for if this is being called for a query or a response.
global set_session: hook(c: connection, msg: dns_msg, is_query: bool);
## A record type which tracks the status of DNS queries for a given
## :bro:type:`connection`.
type State: record {
## Indexed by query id, returns Info record corresponding to
## query/response which haven't completed yet.
pending: table[count] of Queue::Queue;
## This is the list of DNS responses that have completed based on the
## number of responses declared and the number received. The contents
## of the set are transaction IDs.
finished_answers: set[count];
};
}
redef record connection += {
dns: Info &optional;
dns_state: State &optional;
};
# DPD configuration.
redef capture_filters += {
["dns"] = "port 53",
["mdns"] = "udp and port 5353",
["llmns"] = "udp and port 5355",
["netbios-ns"] = "udp port 137",
};
const dns_ports = { 53/udp, 53/tcp, 137/udp, 5353/udp, 5355/udp };
redef dpd_config += { [ANALYZER_DNS] = [$ports = dns_ports] };
const dns_udp_ports = { 53/udp, 137/udp, 5353/udp, 5355/udp };
const dns_tcp_ports = { 53/tcp };
redef dpd_config += { [ANALYZER_DNS_UDP_BINPAC] = [$ports = dns_udp_ports] };
redef dpd_config += { [ANALYZER_DNS_TCP_BINPAC] = [$ports = dns_tcp_ports] };
redef likely_server_ports += { 53/udp, 53/tcp, 137/udp, 5353/udp, 5355/udp };
const ports = { 53/udp, 53/tcp, 137/udp, 5353/udp, 5355/udp };
redef likely_server_ports += { ports };
event bro_init() &priority=5
{
Log::create_stream(DNS::LOG, [$columns=Info, $ev=log_dns]);
Analyzer::register_for_ports(Analyzer::ANALYZER_DNS, ports);
}
function new_session(c: connection, trans_id: count): Info
{
if ( ! c?$dns_state )
{
local state: State;
state$pending=table();
state$finished_answers=set();
c$dns_state = state;
}
local info: Info;
info$ts = network_time();
info$id = c$id;
@ -151,18 +142,37 @@ function new_session(c: connection, trans_id: count): Info
return info;
}
function set_session(c: connection, msg: dns_msg, is_query: bool)
hook set_session(c: connection, msg: dns_msg, is_query: bool) &priority=5
{
if ( ! c?$dns_state || msg$id !in c$dns_state$pending )
if ( ! c?$dns_state )
{
c$dns_state$pending[msg$id] = new_session(c, msg$id);
# Try deleting this transaction id from the set of finished answers.
# Sometimes hosts will reuse ports and transaction ids and this should
# be considered to be a legit scenario (although bad practice).
delete c$dns_state$finished_answers[msg$id];
local state: State;
c$dns_state = state;
}
c$dns = c$dns_state$pending[msg$id];
if ( msg$id !in c$dns_state$pending )
c$dns_state$pending[msg$id] = Queue::init();
local info: Info;
# If this is either a query or this is the reply but
# no Info records are in the queue (we missed the query?)
# we need to create an Info record and put it in the queue.
if ( is_query ||
Queue::len(c$dns_state$pending[msg$id]) == 0 )
{
info = new_session(c, msg$id);
Queue::put(c$dns_state$pending[msg$id], info);
}
if ( is_query )
# If this is a query, assign the newly created info variable
# so that the world looks correct to anything else handling
# this query.
c$dns = info;
else
# Peek at the next item in the queue for this trans_id and
# assign it to c$dns since this is a response.
c$dns = Queue::peek(c$dns_state$pending[msg$id]);
if ( ! is_query )
{
@ -190,19 +200,21 @@ function set_session(c: connection, msg: dns_msg, is_query: bool)
event dns_message(c: connection, is_orig: bool, msg: dns_msg, len: count) &priority=5
{
set_session(c, msg, is_orig);
hook set_session(c, msg, is_orig);
}
event DNS::do_reply(c: connection, msg: dns_msg, ans: dns_answer, reply: string) &priority=5
{
if ( ans$answer_type == DNS_ANS )
{
if ( ! c?$dns )
{
event conn_weird("dns_unmatched_reply", c, "");
hook set_session(c, msg, F);
}
c$dns$AA = msg$AA;
c$dns$RA = msg$RA;
if ( msg$id in c$dns_state$finished_answers )
event conn_weird("dns_reply_seen_after_done", c, "");
if ( reply != "" )
{
if ( ! c$dns?$answers )
@ -217,7 +229,6 @@ event DNS::do_reply(c: connection, msg: dns_msg, ans: dns_answer, reply: string)
if ( c$dns?$answers && c$dns?$total_answers &&
|c$dns$answers| == c$dns$total_answers )
{
add c$dns_state$finished_answers[c$dns$trans_id];
# Indicate this request/reply pair is ready to be logged.
c$dns$ready = T;
}
@ -230,7 +241,7 @@ event DNS::do_reply(c: connection, msg: dns_msg, ans: dns_answer, reply: string)
{
Log::write(DNS::LOG, c$dns);
# This record is logged and no longer pending.
delete c$dns_state$pending[c$dns$trans_id];
Queue::get(c$dns_state$pending[c$dns$trans_id]);
delete c$dns;
}
}
@ -243,15 +254,14 @@ event dns_request(c: connection, msg: dns_msg, query: string, qtype: count, qcla
c$dns$qclass_name = classes[qclass];
c$dns$qtype = qtype;
c$dns$qtype_name = query_types[qtype];
c$dns$Z = msg$Z;
# Decode netbios name queries
# Note: I'm ignoring the name type for now. Not sure if this should be
# worked into the query/response in some fashion.
if ( c$id$resp_p == 137/udp )
query = decode_netbios_name(query);
c$dns$query = query;
c$dns$Z = msg$Z;
c$dns$query = query;
}
event dns_A_reply(c: connection, msg: dns_msg, ans: dns_answer, a: addr) &priority=5
@ -339,6 +349,13 @@ event connection_state_remove(c: connection) &priority=-5
# If Bro is expiring state, we should go ahead and log all unlogged
# request/response pairs now.
for ( trans_id in c$dns_state$pending )
Log::write(DNS::LOG, c$dns_state$pending[trans_id]);
{
local infos: vector of Info;
Queue::get_vector(c$dns_state$pending[trans_id], infos);
for ( i in infos )
{
Log::write(DNS::LOG, infos[i]);
}
}
}

View file

@ -1,4 +1,7 @@
@load ./utils-commands
@load ./main
@load ./file-analysis
@load ./file-extract
@load ./gridftp
@load-sigs ./dpd.sig

View file

@ -0,0 +1,15 @@
signature dpd_ftp_client {
ip-proto == tcp
payload /(|.*[\n\r]) *[uU][sS][eE][rR] /
tcp-state originator
}
# Match for server greeting (220, 120) and for login or passwd
# required (230, 331).
signature dpd_ftp_server {
ip-proto == tcp
payload /[\n\r ]*(120|220)[^0-9].*[\n\r] *(230|331)[^0-9]/
tcp-state responder
requires-reverse-signature dpd_ftp_client
enable "ftp"
}

View file

@ -0,0 +1,48 @@
@load ./main
@load base/utils/conn-ids
@load base/frameworks/file-analysis/main
module FTP;
export {
## Default file handle provider for FTP.
global get_file_handle: function(c: connection, is_orig: bool): string;
}
function get_handle_string(c: connection): string
{
return cat(Analyzer::ANALYZER_FTP_DATA, " ", c$start_time, " ", id_string(c$id));
}
function get_file_handle(c: connection, is_orig: bool): string
{
if ( [c$id$resp_h, c$id$resp_p] !in ftp_data_expected ) return "";
local info: FTP::Info = ftp_data_expected[c$id$resp_h, c$id$resp_p];
if ( info$passive )
# FTP client initiates data channel.
if ( is_orig )
# Don't care about FTP client data.
return "";
else
# Do care about FTP server data.
return get_handle_string(c);
else
# FTP server initiates dta channel.
if ( is_orig )
# Do care about FTP server data.
return get_handle_string(c);
else
# Don't care about FTP client data.
return "";
}
module GLOBAL;
event get_file_handle(tag: Analyzer::Tag, c: connection, is_orig: bool)
&priority=5
{
if ( tag != Analyzer::ANALYZER_FTP_DATA ) return;
set_file_handle(FTP::get_file_handle(c, is_orig));
}

View file

@ -15,51 +15,71 @@ export {
redef record Info += {
## On disk file where it was extracted to.
extraction_file: file &log &optional;
extraction_file: string &log &optional;
## Indicates if the current command/response pair should attempt to
## extract the file if a file was transferred.
extract_file: bool &default=F;
## Internal tracking of the total number of files extracted during this
## session.
num_extracted_files: count &default=0;
};
event file_transferred(c: connection, prefix: string, descr: string,
mime_type: string) &priority=3
function get_extraction_name(f: fa_file): string
{
local id = c$id;
if ( [id$resp_h, id$resp_p] !in ftp_data_expected )
return;
local s = ftp_data_expected[id$resp_h, id$resp_p];
local r = fmt("%s-%s.dat", extraction_prefix, f$id);
return r;
}
if ( extract_file_types in s$mime_type )
event file_new(f: fa_file) &priority=5
{
if ( ! f?$source ) return;
if ( f$source != "FTP_DATA" ) return;
if ( f?$mime_type && extract_file_types in f$mime_type )
{
s$extract_file = T;
++s$num_extracted_files;
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=get_extraction_name(f)]);
return;
}
if ( ! f?$conns ) return;
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( [cid$resp_h, cid$resp_p] !in ftp_data_expected ) next;
local s = ftp_data_expected[cid$resp_h, cid$resp_p];
if ( ! s$extract_file ) next;
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=get_extraction_name(f)]);
return;
}
}
event file_transferred(c: connection, prefix: string, descr: string,
mime_type: string) &priority=-4
event file_state_remove(f: fa_file) &priority=4
{
local id = c$id;
if ( [id$resp_h, id$resp_p] !in ftp_data_expected )
return;
local s = ftp_data_expected[id$resp_h, id$resp_p];
if ( s$extract_file )
if ( ! f?$source ) return;
if ( f$source != "FTP_DATA" ) return;
if ( ! f?$info ) return;
for ( filename in f$info$extracted_files )
{
local suffix = fmt("%d.dat", s$num_extracted_files);
local fname = generate_extraction_filename(extraction_prefix, c, suffix);
s$extraction_file = open(fname);
if ( s$passive )
set_contents_file(id, CONTENTS_RESP, s$extraction_file);
else
set_contents_file(id, CONTENTS_ORIG, s$extraction_file);
local s: FTP::Info;
s$ts = network_time();
s$tags = set();
s$user = "<ftp-data>";
s$extraction_file = filename;
if ( f?$conns )
for ( cid in f$conns )
{
s$uid = f$conns[cid]$uid;
s$id = cid;
}
Log::write(FTP::LOG, s);
}
}

View file

@ -1,6 +1,6 @@
##! The logging this script does is primarily focused on logging FTP commands
##! along with metadata. For example, if files are transferred, the argument
##! will take on the full path that the client is at along with the requested
##! will take on the full path that the client is at along with the requested
##! file name.
@load ./utils-commands
@ -13,18 +13,31 @@ module FTP;
export {
## The FTP protocol logging stream identifier.
redef enum Log::ID += { LOG };
## List of commands that should have their command/response pairs logged.
const logged_commands = {
"APPE", "DELE", "RETR", "STOR", "STOU", "ACCT"
"APPE", "DELE", "RETR", "STOR", "STOU", "ACCT", "PORT", "PASV", "EPRT",
"EPSV"
} &redef;
## This setting changes if passwords used in FTP sessions are captured or not.
const default_capture_password = F &redef;
## User IDs that can be considered "anonymous".
const guest_ids = { "anonymous", "ftp", "ftpuser", "guest" } &redef;
## The expected endpoints of an FTP data channel.
type ExpectedDataChannel: record {
## Whether PASV mode is toggled for control channel.
passive: bool &log;
## The host that will be initiating the data connection.
orig_h: addr &log;
## The host that will be accepting the data connection.
resp_h: addr &log;
## The port at which the acceptor is listening for the data connection.
resp_p: port &log;
};
type Info: record {
## Time when the command was sent.
ts: time &log;
@ -40,51 +53,52 @@ export {
command: string &log &optional;
## Argument for the command if one is given.
arg: string &log &optional;
## Libmagic "sniffed" file type if the command indicates a file transfer.
mime_type: string &log &optional;
## Libmagic "sniffed" file description if the command indicates a file transfer.
mime_desc: string &log &optional;
## Size of the file if the command indicates a file transfer.
file_size: count &log &optional;
## Reply code from the server in response to the command.
reply_code: count &log &optional;
## Reply message from the server in response to the command.
reply_msg: string &log &optional;
## Arbitrary tags that may indicate a particular attribute of this command.
tags: set[string] &log &default=set();
tags: set[string] &log;
## Expected FTP data channel.
data_channel: ExpectedDataChannel &log &optional;
## Current working directory that this session is in. By making
## the default value '/.', we can indicate that unless something
## the default value '.', we can indicate that unless something
## more concrete is discovered that the existing but unknown
## directory is ok to use.
cwd: string &default="/.";
cwd: string &default=".";
## Command that is currently waiting for a response.
cmdarg: CmdArg &optional;
## Queue for commands that have been sent but not yet responded to
## Queue for commands that have been sent but not yet responded to
## are tracked here.
pending_commands: PendingCmds;
## Indicates if the session is in active or passive mode.
passive: bool &default=F;
## Determines if the password will be captured for this request.
capture_password: bool &default=default_capture_password;
};
## This record is to hold a parsed FTP reply code. For example, for the
## This record is to hold a parsed FTP reply code. For example, for the
## 201 status code, the digits would be parsed as: x->2, y->0, z=>1.
type ReplyCode: record {
x: count;
y: count;
z: count;
};
## Parse FTP reply codes into the three constituent single digit values.
global parse_ftp_reply_code: function(code: count): ReplyCode;
## Event that can be handled to access the :bro:type:`FTP::Info`
## record as it is sent on to the logging framework.
global log_ftp: event(rec: Info);
@ -93,23 +107,21 @@ export {
# Add the state tracking information variable to the connection record
redef record connection += {
ftp: Info &optional;
ftp_data_reuse: bool &default=F;
};
# Configure DPD
const ports = { 21/tcp, 2811/tcp } &redef; # 2811/tcp is GridFTP.
redef capture_filters += { ["ftp"] = "port 21 and port 2811" };
redef dpd_config += { [ANALYZER_FTP] = [$ports = ports] };
redef likely_server_ports += { 21/tcp, 2811/tcp };
# Establish the variable for tracking expected connections.
global ftp_data_expected: table[addr, port] of Info &create_expire=5mins;
const ports = { 21/tcp, 2811/tcp };
redef likely_server_ports += { ports };
event bro_init() &priority=5
{
Log::create_stream(FTP::LOG, [$columns=Info, $ev=log_ftp]);
Analyzer::register_for_ports(Analyzer::ANALYZER_FTP, ports);
}
# Establish the variable for tracking expected connections.
global ftp_data_expected: table[addr, port] of Info &read_expire=5mins;
## A set of commands where the argument can be expected to refer
## to a file or directory.
const file_cmds = {
@ -151,7 +163,7 @@ function set_ftp_session(c: connection)
s$uid=c$uid;
s$id=c$id;
c$ftp=s;
# Add a shim command so the server can respond with some init response.
add_pending_cmd(c$ftp$pending_commands, "<init>", "");
}
@ -163,35 +175,51 @@ function ftp_message(s: Info)
# or it's a deliberately logged command.
if ( |s$tags| > 0 || (s?$cmdarg && s$cmdarg$cmd in logged_commands) )
{
if ( s?$password &&
! s$capture_password &&
if ( s?$password &&
! s$capture_password &&
to_lower(s$user) !in guest_ids )
{
s$password = "<hidden>";
}
local arg = s$cmdarg$arg;
if ( s$cmdarg$cmd in file_cmds )
arg = fmt("ftp://%s%s", addr_to_uri(s$id$resp_h), build_path_compressed(s$cwd, arg));
{
local comp_path = build_path_compressed(s$cwd, arg);
if ( comp_path[0] != "/" )
comp_path = cat("/", comp_path);
arg = fmt("ftp://%s%s", addr_to_uri(s$id$resp_h), comp_path);
}
s$ts=s$cmdarg$ts;
s$command=s$cmdarg$cmd;
if ( arg == "" )
delete s$arg;
else
s$arg=arg;
Log::write(FTP::LOG, s);
}
# The MIME and file_size fields are specific to file transfer commands
# and may not be used in all commands so they need reset to "blank"
# The MIME and file_size fields are specific to file transfer commands
# and may not be used in all commands so they need reset to "blank"
# values after logging.
delete s$mime_type;
delete s$mime_desc;
delete s$file_size;
# Same with data channel.
delete s$data_channel;
# Tags are cleared everytime too.
delete s$tags;
s$tags = set();
}
function add_expected_data_channel(s: Info, chan: ExpectedDataChannel)
{
s$passive = chan$passive;
s$data_channel = chan;
ftp_data_expected[chan$resp_h, chan$resp_p] = s;
Analyzer::schedule_analyzer(chan$orig_h, chan$resp_h, chan$resp_p, Analyzer::ANALYZER_FTP_DATA,
5mins);
}
event ftp_request(c: connection, command: string, arg: string) &priority=5
@ -206,19 +234,19 @@ event ftp_request(c: connection, command: string, arg: string) &priority=5
remove_pending_cmd(c$ftp$pending_commands, c$ftp$cmdarg);
ftp_message(c$ftp);
}
local id = c$id;
set_ftp_session(c);
# Queue up the new command and argument
add_pending_cmd(c$ftp$pending_commands, command, arg);
if ( command == "USER" )
c$ftp$user = arg;
else if ( command == "PASS" )
c$ftp$password = arg;
else if ( command == "PORT" || command == "EPRT" )
{
local data = (command == "PORT") ?
@ -226,9 +254,8 @@ event ftp_request(c: connection, command: string, arg: string) &priority=5
if ( data$valid )
{
c$ftp$passive=F;
ftp_data_expected[data$h, data$p] = c$ftp;
expect_connection(id$resp_h, data$h, data$p, ANALYZER_FILE, 5mins);
add_expected_data_channel(c$ftp, [$passive=F, $orig_h=id$resp_h,
$resp_h=data$h, $resp_p=data$p]);
}
else
{
@ -240,17 +267,14 @@ event ftp_request(c: connection, command: string, arg: string) &priority=5
event ftp_reply(c: connection, code: count, msg: string, cont_resp: bool) &priority=5
{
# TODO: figure out what to do with continued FTP response (not used much)
#if ( cont_resp ) return;
local id = c$id;
set_ftp_session(c);
c$ftp$cmdarg = get_pending_cmd(c$ftp$pending_commands, code, msg);
c$ftp$reply_code = code;
c$ftp$reply_msg = msg;
# TODO: figure out what to do with continued FTP response (not used much)
if ( cont_resp ) return;
# TODO: do some sort of generic clear text login processing here.
local response_xyz = parse_ftp_reply_code(code);
#if ( response_xyz$x == 2 && # successful
@ -266,22 +290,22 @@ event ftp_reply(c: connection, code: count, msg: string, cont_resp: bool) &prior
# if that's given as well which would be more correct.
c$ftp$file_size = extract_count(msg);
}
# PASV and EPSV processing
else if ( (code == 227 || code == 229) &&
(c$ftp$cmdarg$cmd == "PASV" || c$ftp$cmdarg$cmd == "EPSV") )
{
local data = (code == 227) ? parse_ftp_pasv(msg) : parse_ftp_epsv(msg);
if ( data$valid )
{
c$ftp$passive=T;
if ( code == 229 && data$h == [::] )
data$h = id$resp_h;
ftp_data_expected[data$h, data$p] = c$ftp;
expect_connection(id$orig_h, data$h, data$p, ANALYZER_FILE, 5mins);
data$h = c$id$resp_h;
add_expected_data_channel(c$ftp, [$passive=T, $orig_h=c$id$orig_h,
$resp_h=data$h, $resp_p=data$p]);
}
else
{
@ -300,9 +324,9 @@ event ftp_reply(c: connection, code: count, msg: string, cont_resp: bool) &prior
else if ( c$ftp$cmdarg$cmd == "PWD" || c$ftp$cmdarg$cmd == "XPWD" )
c$ftp$cwd = extract_path(msg);
}
# In case there are multiple commands queued, go ahead and remove the
# command here and log because we can't do the normal processing pipeline
# command here and log because we can't do the normal processing pipeline
# to wait for a new command before logging the command/response pair.
if ( |c$ftp$pending_commands| > 1 )
{
@ -311,8 +335,7 @@ event ftp_reply(c: connection, code: count, msg: string, cont_resp: bool) &prior
}
}
event expected_connection_seen(c: connection, a: count) &priority=10
event scheduled_analyzer_applied(c: connection, a: Analyzer::Tag) &priority=10
{
local id = c$id;
if ( [id$resp_h, id$resp_p] in ftp_data_expected )
@ -327,18 +350,21 @@ event file_transferred(c: connection, prefix: string, descr: string,
{
local s = ftp_data_expected[id$resp_h, id$resp_p];
s$mime_type = split1(mime_type, /;/)[1];
s$mime_desc = descr;
}
}
event file_transferred(c: connection, prefix: string, descr: string,
mime_type: string) &priority=-5
event connection_reused(c: connection) &priority=5
{
local id = c$id;
if ( [id$resp_h, id$resp_p] in ftp_data_expected )
delete ftp_data_expected[id$resp_h, id$resp_p];
if ( "ftp-data" in c$service )
c$ftp_data_reuse = T;
}
event connection_state_remove(c: connection) &priority=-5
{
if ( c$ftp_data_reuse ) return;
delete ftp_data_expected[c$id$resp_h, c$id$resp_p];
}
# Use state remove event to cover connections terminated by RST.
event connection_state_remove(c: connection) &priority=-5
{

View file

@ -1,5 +1,8 @@
@load ./main
@load ./utils
@load ./file-analysis
@load ./file-ident
@load ./file-hash
@load ./file-extract
@load-sigs ./dpd.sig

View file

@ -0,0 +1,13 @@
signature dpd_http_client {
ip-proto == tcp
payload /^[[:space:]]*(GET|HEAD|POST)[[:space:]]*/
tcp-state originator
}
signature dpd_http_server {
ip-proto == tcp
payload /^HTTP\/[0-9]/
tcp-state responder
requires-reverse-signature dpd_http_client
enable "http"
}

View file

@ -0,0 +1,54 @@
@load ./main
@load ./utils
@load base/utils/conn-ids
@load base/frameworks/file-analysis/main
module HTTP;
export {
redef record HTTP::Info += {
## Number of MIME entities in the HTTP request message body so far.
request_mime_level: count &default=0;
## Number of MIME entities in the HTTP response message body so far.
response_mime_level: count &default=0;
};
## Default file handle provider for HTTP.
global get_file_handle: function(c: connection, is_orig: bool): string;
}
event http_begin_entity(c: connection, is_orig: bool) &priority=5
{
if ( ! c?$http )
return;
if ( is_orig )
++c$http$request_mime_level;
else
++c$http$response_mime_level;
}
function get_file_handle(c: connection, is_orig: bool): string
{
if ( ! c?$http ) return "";
local mime_level: count =
is_orig ? c$http$request_mime_level : c$http$response_mime_level;
local mime_level_str: string = mime_level > 1 ? cat(mime_level) : "";
if ( c$http$range_request )
return cat(Analyzer::ANALYZER_HTTP, " ", is_orig, " ", c$id$orig_h, " ",
build_url(c$http));
return cat(Analyzer::ANALYZER_HTTP, " ", c$start_time, " ", is_orig, " ",
c$http$trans_depth, mime_level_str, " ", id_string(c$id));
}
module GLOBAL;
event get_file_handle(tag: Analyzer::Tag, c: connection, is_orig: bool)
&priority=5
{
if ( tag != Analyzer::ANALYZER_HTTP ) return;
set_file_handle(HTTP::get_file_handle(c, is_orig));
}

View file

@ -2,8 +2,7 @@
##! the message body from the server can be extracted with this script.
@load ./main
@load ./file-ident
@load base/utils/files
@load ./file-analysis
module HTTP;
@ -15,46 +14,87 @@ export {
const extraction_prefix = "http-item" &redef;
redef record Info += {
## On-disk file where the response body was extracted to.
extraction_file: file &log &optional;
## On-disk location where files in request body were extracted.
extracted_request_files: vector of string &log &optional;
## On-disk location where files in response body were extracted.
extracted_response_files: vector of string &log &optional;
## Indicates if the response body is to be extracted or not. Must be
## set before or by the first :bro:id:`http_entity_data` event for the
## content.
## set before or by the first :bro:see:`file_new` for the file content.
extract_file: bool &default=F;
};
}
event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=-5
function get_extraction_name(f: fa_file): string
{
local r = fmt("%s-%s.dat", extraction_prefix, f$id);
return r;
}
function add_extraction_file(c: connection, is_orig: bool, fn: string)
{
# Client body extraction is not currently supported in this script.
if ( is_orig )
return;
if ( c$http$first_chunk )
{
if ( c$http?$mime_type &&
extract_file_types in c$http$mime_type )
if ( ! c$http?$extracted_request_files )
c$http$extracted_request_files = vector();
c$http$extracted_request_files[|c$http$extracted_request_files|] = fn;
}
else
{
if ( ! c$http?$extracted_response_files )
c$http$extracted_response_files = vector();
c$http$extracted_response_files[|c$http$extracted_response_files|] = fn;
}
}
event file_new(f: fa_file) &priority=5
{
if ( ! f?$source ) return;
if ( f$source != "HTTP" ) return;
if ( ! f?$conns ) return;
local fname: string;
local c: connection;
if ( f?$mime_type && extract_file_types in f$mime_type )
{
fname = get_extraction_name(f);
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]);
for ( cid in f$conns )
{
c$http$extract_file = T;
}
if ( c$http$extract_file )
{
local suffix = fmt("%s_%d.dat", is_orig ? "orig" : "resp", c$http_state$current_response);
local fname = generate_extraction_filename(extraction_prefix, c, suffix);
c$http$extraction_file = open(fname);
enable_raw_output(c$http$extraction_file);
c = f$conns[cid];
if ( ! c?$http ) next;
add_extraction_file(c, f$is_orig, fname);
}
return;
}
if ( c$http?$extraction_file )
print c$http$extraction_file, data;
}
local extracting: bool = F;
event http_end_entity(c: connection, is_orig: bool)
{
if ( c$http?$extraction_file )
close(c$http$extraction_file);
for ( cid in f$conns )
{
c = f$conns[cid];
if ( ! c?$http ) next;
if ( ! c$http$extract_file ) next;
fname = get_extraction_name(f);
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]);
extracting = T;
break;
}
if ( extracting )
for ( cid in f$conns )
{
c = f$conns[cid];
if ( ! c?$http ) next;
add_extraction_file(c, f$is_orig, fname);
}
}

View file

@ -1,15 +1,11 @@
##! Calculate hashes for HTTP body transfers.
@load ./file-ident
@load ./main
@load ./file-analysis
module HTTP;
export {
redef enum Notice::Type += {
## Indicates that an MD5 sum was calculated for an HTTP response body.
MD5,
};
redef record Info += {
## MD5 sum for a file transferred over HTTP calculated from the
## response body.
@ -19,10 +15,6 @@ export {
## if a file should have an MD5 sum generated. It must be
## set to T at the time of or before the first chunk of body data.
calc_md5: bool &default=F;
## Indicates if an MD5 sum is being calculated for the current
## request/response pair.
md5_handle: opaque of md5 &optional;
};
## Generate MD5 sums for these filetypes.
@ -31,62 +23,46 @@ export {
&redef;
}
## Initialize and calculate the hash.
event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=5
event file_new(f: fa_file) &priority=5
{
if ( is_orig || ! c?$http ) return;
if ( c$http$first_chunk )
{
if ( c$http$calc_md5 ||
(c$http?$mime_type && generate_md5 in c$http$mime_type) )
{
c$http$md5_handle = md5_hash_init();
}
}
if ( c$http?$md5_handle )
md5_hash_update(c$http$md5_handle, data);
}
## In the event of a content gap during a file transfer, detect the state for
## the MD5 sum calculation and stop calculating the MD5 since it would be
## incorrect anyway.
event content_gap(c: connection, is_orig: bool, seq: count, length: count) &priority=5
{
if ( is_orig || ! c?$http || ! c$http?$md5_handle ) return;
set_state(c, F, is_orig);
md5_hash_finish(c$http$md5_handle); # Ignore return value.
delete c$http$md5_handle;
}
if ( ! f?$source ) return;
if ( f$source != "HTTP" ) return;
## When the file finishes downloading, finish the hash and generate a notice.
event http_message_done(c: connection, is_orig: bool, stat: http_message_stat) &priority=-3
{
if ( is_orig || ! c?$http ) return;
if ( c$http?$md5_handle )
if ( f?$mime_type && generate_md5 in f$mime_type )
{
local url = build_url_http(c$http);
c$http$md5 = md5_hash_finish(c$http$md5_handle);
delete c$http$md5_handle;
NOTICE([$note=MD5, $msg=fmt("%s %s %s", c$id$orig_h, c$http$md5, url),
$sub=c$http$md5, $conn=c]);
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
return;
}
if ( ! f?$conns ) return;
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( ! c?$http ) next;
if ( ! c$http$calc_md5 ) next;
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
return;
}
}
event connection_state_remove(c: connection) &priority=-5
event file_state_remove(f: fa_file) &priority=4
{
if ( c?$http_state &&
c$http_state$current_response in c$http_state$pending &&
c$http_state$pending[c$http_state$current_response]?$md5_handle )
if ( ! f?$source ) return;
if ( f$source != "HTTP" ) return;
if ( ! f?$conns ) return;
if ( ! f?$info ) return;
if ( ! f$info?$md5 ) return;
for ( cid in f$conns )
{
# The MD5 sum isn't going to be saved anywhere since the entire
# body wouldn't have been seen anyway and we'd just be giving an
# incorrect MD5 sum.
md5_hash_finish(c$http$md5_handle);
delete c$http$md5_handle;
local c: connection = f$conns[cid];
if ( ! c?$http ) next;
c$http$md5 = f$info$md5;
}
}

View file

@ -1,37 +1,28 @@
##! Identification of file types in HTTP response bodies with file content sniffing.
@load base/frameworks/signatures
@load base/frameworks/notice
@load ./main
@load ./utils
# Add the magic number signatures to the core signature set.
@load-sigs ./file-ident.sig
# Ignore the signatures used to match files
redef Signatures::ignored_ids += /^matchfile-/;
@load ./file-analysis
module HTTP;
export {
redef enum Notice::Type += {
## Indicates when the file extension doesn't seem to match the file contents.
## Indicates when the file extension doesn't seem to match the file
## contents.
Incorrect_File_Type,
};
redef record Info += {
## Mime type of response body identified by content sniffing.
mime_type: string &log &optional;
## Indicates that no data of the current file transfer has been
## seen yet. After the first :bro:id:`http_entity_data` event, it
## will be set to F.
first_chunk: bool &default=T;
};
## Mapping between mime types and regular expressions for URLs
## The :bro:enum:`HTTP::Incorrect_File_Type` notice is generated if the pattern
## doesn't match the mime type that was discovered.
## Mapping between mime type strings (without character set) and
## regular expressions for URLs.
## The :bro:enum:`HTTP::Incorrect_File_Type` notice is generated if the
## pattern doesn't match the mime type that was discovered.
const mime_types_extensions: table[string] of pattern = {
["application/x-dosexec"] = /\.([eE][xX][eE]|[dD][lL][lL])/,
} &redef;
@ -43,43 +34,72 @@ export {
const ignored_incorrect_file_type_urls = /^$/ &redef;
}
event signature_match(state: signature_state, msg: string, data: string) &priority=5
event file_new(f: fa_file) &priority=5
{
# Only signatures matching file types are dealt with here.
if ( /^matchfile-/ !in state$sig_id ) return;
if ( ! f?$source ) return;
if ( f$source != "HTTP" ) return;
if ( ! f?$mime_type ) return;
if ( ! f?$conns ) return;
local c = state$conn;
set_state(c, F, F);
# Not much point in any of this if we don't know about the HTTP session.
if ( ! c?$http ) return;
# Set the mime type that was detected.
c$http$mime_type = msg;
if ( msg in mime_types_extensions &&
c$http?$uri && mime_types_extensions[msg] !in c$http$uri )
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( ! c?$http ) next;
c$http$mime_type = f$mime_type;
local mime_str: string = c$http$mime_type;
if ( mime_str !in mime_types_extensions ) next;
if ( ! c$http?$uri ) next;
if ( mime_types_extensions[mime_str] in c$http$uri ) next;
local url = build_url_http(c$http);
if ( url == ignored_incorrect_file_type_urls )
return;
local message = fmt("%s %s %s", msg, c$http$method, url);
if ( url == ignored_incorrect_file_type_urls ) next;
local message = fmt("%s %s %s", mime_str, c$http$method, url);
NOTICE([$note=Incorrect_File_Type,
$msg=message,
$conn=c]);
}
}
event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=5
event file_over_new_connection(f: fa_file, c: connection) &priority=5
{
if ( c$http$first_chunk && ! c$http?$mime_type )
c$http$mime_type = split1(identify_data(data, T), /;/)[1];
if ( ! f?$source ) return;
if ( f$source != "HTTP" ) return;
if ( ! f?$mime_type ) return;
if ( ! c?$http ) return;
# Spread the mime around (e.g. for partial content, file_type event only
# happens once for the first connection, but if there's subsequent
# connections to transfer the same file, they'll be lacking the mime_type
# field if we don't do this).
c$http$mime_type = f$mime_type;
}
event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=-10
# Tracks byte-range request / partial content response mime types, indexed
# by [connection, uri] pairs. This is needed because a person can pipeline
# byte-range requests over multiple connections to the same uri. Without
# the tracking, only the first request in the pipeline for each connection
# would get a mime_type field assigned to it (by the FileAnalysis policy hooks).
global partial_types: table[conn_id, string] of string &read_expire=5mins;
# Priority 4 so that it runs before the handler that will write to http.log.
event http_message_done(c: connection, is_orig: bool, stat: http_message_stat)
&priority=4
{
if ( c$http$first_chunk )
c$http$first_chunk=F;
if ( ! c$http$range_request ) return;
if ( ! c$http?$uri ) return;
if ( c$http?$mime_type )
{
partial_types[c$id, c$http$uri] = c$http$mime_type;
return;
}
if ( [c$id, c$http$uri] in partial_types )
c$http$mime_type = partial_types[c$id, c$http$uri];
}

View file

@ -1,144 +0,0 @@
# These signatures are used as a replacement for libmagic. The signature
# name needs to start with "matchfile" and the "event" directive takes
# the mime type of the file matched by the http-reply-body pattern.
#
# Signatures from: http://www.garykessler.net/library/file_sigs.html
signature matchfile-exe {
http-reply-body /\x4D\x5A/
event "application/x-dosexec"
}
signature matchfile-elf {
http-reply-body /\x7F\x45\x4C\x46/
event "application/x-executable"
}
signature matchfile-script {
# This is meant to match the interpreter declaration at the top of many
# interpreted scripts.
http-reply-body /\#\![[:blank:]]?\//
event "application/x-script"
}
signature matchfile-wmv {
http-reply-body /\x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C/
event "video/x-ms-wmv"
}
signature matchfile-flv {
http-reply-body /\x46\x4C\x56\x01/
event "video/x-flv"
}
signature matchfile-swf {
http-reply-body /[\x46\x43]\x57\x53/
event "application/x-shockwave-flash"
}
signature matchfile-jar {
http-reply-body /\x5F\x27\xA8\x89/
event "application/java-archive"
}
signature matchfile-class {
http-reply-body /\xCA\xFE\xBA\xBE/
event "application/java-byte-code"
}
signature matchfile-msoffice-2007 {
# MS Office 2007 XML documents
http-reply-body /\x50\x4B\x03\x04\x14\x00\x06\x00/
event "application/msoffice"
}
signature matchfile-msoffice {
# Older MS Office files
http-reply-body /\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1/
event "application/msoffice"
}
signature matchfile-rtf {
http-reply-body /\x7B\x5C\x72\x74\x66\x31/
event "application/rtf"
}
signature matchfile-lnk {
http-reply-body /\x4C\x00\x00\x00\x01\x14\x02\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x00\x46/
event "application/x-ms-shortcut"
}
signature matchfile-torrent {
http-reply-body /\x64\x38\x3A\x61\x6E\x6E\x6F\x75\x6E\x63\x65/
event "application/x-bittorrent"
}
signature matchfile-pdf {
http-reply-body /\x25\x50\x44\x46/
event "application/pdf"
}
signature matchfile-html {
http-reply-body /<[hH][tT][mM][lL]/
event "text/html"
}
signature matchfile-html2 {
http-reply-body /<![dD][oO][cC][tT][yY][pP][eE][[:blank:]][hH][tT][mM][lL]/
event "text/html"
}
signature matchfile-xml {
http-reply-body /<\??[xX][mM][lL]/
event "text/xml"
}
signature matchfile-gif {
http-reply-body /\x47\x49\x46\x38[\x37\x39]\x61/
event "image/gif"
}
signature matchfile-jpg {
http-reply-body /\xFF\xD8\xFF[\xDB\xE0\xE1\xE2\xE3\xE8]..[\x4A\x45\x53][\x46\x78\x50][\x49\x69][\x46\x66]/
event "image/jpeg"
}
signature matchfile-tiff {
http-reply-body /\x4D\x4D\x00[\x2A\x2B]/
event "image/tiff"
}
signature matchfile-png {
http-reply-body /\x89\x50\x4e\x47/
event "image/png"
}
signature matchfile-zip {
http-reply-body /\x50\x4B\x03\x04/
event "application/zip"
}
signature matchfile-bzip {
http-reply-body /\x42\x5A\x68/
event "application/bzip2"
}
signature matchfile-gzip {
http-reply-body /\x1F\x8B\x08/
event "application/x-gzip"
}
signature matchfile-cab {
http-reply-body /\x4D\x53\x43\x46/
event "application/vnd.ms-cab-compressed"
}
signature matchfile-rar {
http-reply-body /\x52\x61\x72\x21\x1A\x07\x00/
event "application/x-rar-compressed"
}
signature matchfile-7z {
http-reply-body /\x37\x7A\xBC\xAF\x27\x1C/
event "application/x-7z-compressed"
}

View file

@ -71,6 +71,10 @@ export {
## All of the headers that may indicate if the request was proxied.
proxied: set[string] &log &optional;
## Indicates if this request can assume 206 partial content in
## response.
range_request: bool &default=F;
};
## Structure to maintain state for an HTTP connection with multiple
@ -119,28 +123,18 @@ redef record connection += {
http_state: State &optional;
};
# Initialize the HTTP logging stream.
event bro_init() &priority=5
{
Log::create_stream(HTTP::LOG, [$columns=Info, $ev=log_http]);
}
# DPD configuration.
const ports = {
80/tcp, 81/tcp, 631/tcp, 1080/tcp, 3128/tcp,
8000/tcp, 8080/tcp, 8888/tcp,
};
redef dpd_config += {
[[ANALYZER_HTTP, ANALYZER_HTTP_BINPAC]] = [$ports = ports],
};
redef capture_filters += {
["http"] = "tcp and port (80 or 81 or 631 or 1080 or 3138 or 8000 or 8080 or 8888)"
};
redef likely_server_ports += { ports };
redef likely_server_ports += {
80/tcp, 81/tcp, 631/tcp, 1080/tcp, 3138/tcp,
8000/tcp, 8080/tcp, 8888/tcp,
};
# Initialize the HTTP logging stream and ports.
event bro_init() &priority=5
{
Log::create_stream(HTTP::LOG, [$columns=Info, $ev=log_http]);
Analyzer::register_for_ports(Analyzer::ANALYZER_HTTP, ports);
}
function code_in_range(c: count, min: count, max: count) : bool
{
@ -235,6 +229,9 @@ event http_header(c: connection, is_orig: bool, name: string, value: string) &pr
else if ( name == "HOST" )
# The split is done to remove the occasional port value that shows up here.
c$http$host = split1(value, /:/)[1];
else if ( name == "RANGE" )
c$http$range_request = T;
else if ( name == "USER-AGENT" )
c$http$user_agent = value;

View file

@ -1,2 +1,5 @@
@load ./main
@load ./dcc-send
@load ./dcc-send
@load ./file-analysis
@load-sigs ./dpd.sig

View file

@ -28,69 +28,137 @@ export {
dcc_file_size: count &log &optional;
## Sniffed mime type of the file.
dcc_mime_type: string &log &optional;
## The file handle for the file to be extracted
extraction_file: file &log &optional;
extraction_file: string &log &optional;
## A boolean to indicate if the current file transfer should be extracted.
extract_file: bool &default=F;
## The count of the number of file that have been extracted during the session.
num_extracted_files: count &default=0;
};
}
global dcc_expected_transfers: table[addr, port] of Info = table();
global dcc_expected_transfers: table[addr, port] of Info &read_expire=5mins;
event file_transferred(c: connection, prefix: string, descr: string,
mime_type: string) &priority=3
function set_dcc_mime(f: fa_file)
{
local id = c$id;
if ( [id$resp_h, id$resp_p] !in dcc_expected_transfers )
return;
local irc = dcc_expected_transfers[id$resp_h, id$resp_p];
irc$dcc_mime_type = split1(mime_type, /;/)[1];
if ( ! f?$conns ) return;
if ( extract_file_types == irc$dcc_mime_type )
for ( cid in f$conns )
{
irc$extract_file = T;
}
if ( irc$extract_file )
{
local suffix = fmt("%d.dat", ++irc$num_extracted_files);
local fname = generate_extraction_filename(extraction_prefix, c, suffix);
irc$extraction_file = open(fname);
local c: connection = f$conns[cid];
if ( [cid$resp_h, cid$resp_p] !in dcc_expected_transfers ) next;
local s = dcc_expected_transfers[cid$resp_h, cid$resp_p];
s$dcc_mime_type = f$mime_type;
}
}
event file_transferred(c: connection, prefix: string, descr: string,
mime_type: string) &priority=-4
function set_dcc_extraction_file(f: fa_file, filename: string)
{
local id = c$id;
if ( [id$resp_h, id$resp_p] !in dcc_expected_transfers )
if ( ! f?$conns ) return;
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( [cid$resp_h, cid$resp_p] !in dcc_expected_transfers ) next;
local s = dcc_expected_transfers[cid$resp_h, cid$resp_p];
s$extraction_file = filename;
}
}
function get_extraction_name(f: fa_file): string
{
local r = fmt("%s-%s.dat", extraction_prefix, f$id);
return r;
}
# this handler sets the IRC::Info mime type
event file_new(f: fa_file) &priority=5
{
if ( ! f?$source ) return;
if ( f$source != "IRC_DATA" ) return;
if ( ! f?$mime_type ) return;
set_dcc_mime(f);
}
# this handler check if file extraction is desired
event file_new(f: fa_file) &priority=5
{
if ( ! f?$source ) return;
if ( f$source != "IRC_DATA" ) return;
local fname: string;
if ( f?$mime_type && extract_file_types in f$mime_type )
{
fname = get_extraction_name(f);
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]);
set_dcc_extraction_file(f, fname);
return;
}
local irc = dcc_expected_transfers[id$resp_h, id$resp_p];
if ( ! f?$conns ) return;
local tmp = irc$command;
irc$command = "DCC";
Log::write(IRC::LOG, irc);
irc$command = tmp;
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( irc?$extraction_file )
set_contents_file(id, CONTENTS_RESP, irc$extraction_file);
if ( [cid$resp_h, cid$resp_p] !in dcc_expected_transfers ) next;
# Delete these values in case another DCC transfer
# happens during the IRC session.
delete irc$extract_file;
delete irc$extraction_file;
delete irc$dcc_file_name;
delete irc$dcc_file_size;
delete irc$dcc_mime_type;
delete dcc_expected_transfers[id$resp_h, id$resp_p];
local s = dcc_expected_transfers[cid$resp_h, cid$resp_p];
if ( ! s$extract_file ) next;
fname = get_extraction_name(f);
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]);
s$extraction_file = fname;
return;
}
}
function log_dcc(f: fa_file)
{
if ( ! f?$conns ) return;
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( [cid$resp_h, cid$resp_p] !in dcc_expected_transfers ) next;
local irc = dcc_expected_transfers[cid$resp_h, cid$resp_p];
local tmp = irc$command;
irc$command = "DCC";
Log::write(IRC::LOG, irc);
irc$command = tmp;
# Delete these values in case another DCC transfer
# happens during the IRC session.
delete irc$extract_file;
delete irc$extraction_file;
delete irc$dcc_file_name;
delete irc$dcc_file_size;
delete irc$dcc_mime_type;
return;
}
}
event file_new(f: fa_file) &priority=-5
{
if ( ! f?$source ) return;
if ( f$source != "IRC_DATA" ) return;
log_dcc(f);
}
event irc_dcc_message(c: connection, is_orig: bool,
@ -100,17 +168,22 @@ event irc_dcc_message(c: connection, is_orig: bool,
{
set_session(c);
if ( dcc_type != "SEND" )
return;
return;
c$irc$dcc_file_name = argument;
c$irc$dcc_file_size = size;
local p = count_to_port(dest_port, tcp);
expect_connection(to_addr("0.0.0.0"), address, p, ANALYZER_FILE, 5 min);
Analyzer::schedule_analyzer(0.0.0.0, address, p, Analyzer::ANALYZER_IRC_DATA, 5 min);
dcc_expected_transfers[address, p] = c$irc;
}
event expected_connection_seen(c: connection, a: count) &priority=10
event expected_connection_seen(c: connection, a: Analyzer::Tag) &priority=10
{
local id = c$id;
if ( [id$resp_h, id$resp_p] in dcc_expected_transfers )
add c$service["irc-dcc-data"];
}
event connection_state_remove(c: connection) &priority=-5
{
delete dcc_expected_transfers[c$id$resp_h, c$id$resp_p];
}

View file

@ -0,0 +1,33 @@
signature irc_client1 {
ip-proto == tcp
payload /(|.*[\r\n]) *[Uu][Ss][Ee][Rr] +.+[\n\r]+ *[Nn][Ii][Cc][Kk] +.*[\r\n]/
requires-reverse-signature irc_server_reply
tcp-state originator
enable "irc"
}
signature irc_client2 {
ip-proto == tcp
payload /(|.*[\r\n]) *[Nn][Ii][Cc][Kk] +.+[\r\n]+ *[Uu][Ss][Ee][Rr] +.+[\r\n]/
requires-reverse-signature irc_server_reply
tcp-state originator
enable "irc"
}
signature irc_server_reply {
ip-proto == tcp
payload /^(|.*[\n\r])(:[^ \n\r]+ )?[0-9][0-9][0-9] /
tcp-state responder
}
signature irc_server_to_server1 {
ip-proto == tcp
payload /(|.*[\r\n]) *[Ss][Ee][Rr][Vv][Ee][Rr] +[^ ]+ +[0-9]+ +:.+[\r\n]/
}
signature irc_server_to_server2 {
ip-proto == tcp
payload /(|.*[\r\n]) *[Ss][Ee][Rr][Vv][Ee][Rr] +[^ ]+ +[0-9]+ +:.+[\r\n]/
requires-reverse-signature irc_server_to_server1
enable "irc"
}

View file

@ -0,0 +1,25 @@
@load ./dcc-send.bro
@load base/utils/conn-ids
@load base/frameworks/file-analysis/main
module IRC;
export {
## Default file handle provider for IRC.
global get_file_handle: function(c: connection, is_orig: bool): string;
}
function get_file_handle(c: connection, is_orig: bool): string
{
if ( is_orig ) return "";
return cat(Analyzer::ANALYZER_IRC_DATA, " ", c$start_time, " ", id_string(c$id));
}
module GLOBAL;
event get_file_handle(tag: Analyzer::Tag, c: connection, is_orig: bool)
&priority=5
{
if ( tag != Analyzer::ANALYZER_IRC_DATA ) return;
set_file_handle(IRC::get_file_handle(c, is_orig));
}

View file

@ -38,21 +38,13 @@ redef record connection += {
irc: Info &optional;
};
# Some common IRC ports.
redef capture_filters += { ["irc-6666"] = "port 6666" };
redef capture_filters += { ["irc-6667"] = "port 6667" };
redef capture_filters += { ["irc-6668"] = "port 6668" };
redef capture_filters += { ["irc-6669"] = "port 6669" };
# DPD configuration.
const irc_ports = { 6666/tcp, 6667/tcp, 6668/tcp, 6669/tcp };
redef dpd_config += { [ANALYZER_IRC] = [$ports = irc_ports] };
redef likely_server_ports += { 6666/tcp, 6667/tcp, 6668/tcp, 6669/tcp };
const ports = { 6666/tcp, 6667/tcp, 6668/tcp, 6669/tcp };
redef likely_server_ports += { ports };
event bro_init() &priority=5
{
Log::create_stream(IRC::LOG, [$columns=Info, $ev=irc_log]);
Analyzer::register_for_ports(Analyzer::ANALYZER_IRC, ports);
}
function new_session(c: connection): Info

View file

@ -29,14 +29,13 @@ redef record connection += {
modbus: Info &optional;
};
# Configure DPD and the packet filter.
redef capture_filters += { ["modbus"] = "tcp port 502" };
redef dpd_config += { [ANALYZER_MODBUS] = [$ports = set(502/tcp)] };
redef likely_server_ports += { 502/tcp };
const ports = { 502/tcp };
redef likely_server_ports += { ports };
event bro_init() &priority=5
{
Log::create_stream(Modbus::LOG, [$columns=Info, $ev=log_modbus]);
Analyzer::register_for_ports(Analyzer::ANALYZER_MODBUS, ports);
}
event modbus_message(c: connection, headers: ModbusHeaders, is_orig: bool) &priority=5

View file

@ -0,0 +1,2 @@
@load-sigs ./dpd.sig

View file

@ -0,0 +1,13 @@
signature dpd_pop3_server {
ip-proto == tcp
payload /^\+OK/
requires-reverse-signature dpd_pop3_client
enable "pop3"
tcp-state responder
}
signature dpd_pop3_client {
ip-proto == tcp
payload /(|.*[\r\n])[[:space:]]*([uU][sS][eE][rR][[:space:]]|[aA][pP][oO][pP][[:space:]]|[cC][aA][pP][aA]|[aA][uU][tT][hH])/
tcp-state originator
}

View file

@ -1,3 +1,6 @@
@load ./main
@load ./entities
@load ./entities-excerpt
@load ./entities-excerpt
@load ./file-analysis
@load-sigs ./dpd.sig

View file

@ -0,0 +1,13 @@
signature dpd_smtp_client {
ip-proto == tcp
payload /(|.*[\n\r])[[:space:]]*([hH][eE][lL][oO]|[eE][hH][lL][oO])/
requires-reverse-signature dpd_smtp_server
enable "smtp"
tcp-state originator
}
signature dpd_smtp_server {
ip-proto == tcp
payload /^[[:space:]]*220[[:space:]-]/
tcp-state responder
}

View file

@ -9,44 +9,29 @@ export {
redef record SMTP::EntityInfo += {
## The entity body excerpt.
excerpt: string &log &default="";
## Internal tracking to know how much of the body should be included
## in the excerpt.
excerpt_len: count &optional;
};
## This is the default value for how much of the entity body should be
## included for all MIME entities.
## included for all MIME entities. The lesser of this value and
## :bro:see:`default_file_bof_buffer_size` will be used.
const default_entity_excerpt_len = 0 &redef;
## This table defines how much of various entity bodies should be
## included in excerpts.
const entity_excerpt_len: table[string] of count = {}
&redef
&default = default_entity_excerpt_len;
}
event mime_segment_data(c: connection, length: count, data: string) &priority=-1
event file_new(f: fa_file) &priority=5
{
if ( ! c?$smtp ) return;
if ( c$smtp$current_entity$content_len == 0 )
c$smtp$current_entity$excerpt_len = entity_excerpt_len[c$smtp$current_entity$mime_type];
}
if ( ! f?$source ) return;
if ( f$source != "SMTP" ) return;
if ( ! f?$bof_buffer ) return;
if ( ! f?$conns ) return;
event mime_segment_data(c: connection, length: count, data: string) &priority=-2
{
if ( ! c?$smtp ) return;
local ent = c$smtp$current_entity;
if ( ent$content_len < ent$excerpt_len )
for ( cid in f$conns )
{
if ( ent$content_len + length < ent$excerpt_len )
ent$excerpt = cat(ent$excerpt, data);
else
{
local x_bytes = ent$excerpt_len - ent$content_len;
ent$excerpt = cat(ent$excerpt, sub_bytes(data, 1, x_bytes));
}
local c: connection = f$conns[cid];
if ( ! c?$smtp ) next;
if ( default_entity_excerpt_len > 0 )
c$smtp$current_entity$excerpt =
f$bof_buffer[0:default_entity_excerpt_len];
}
}

View file

@ -7,11 +7,6 @@
module SMTP;
export {
redef enum Notice::Type += {
## Indicates that an MD5 sum was calculated for a MIME message.
MD5,
};
redef enum Log::ID += { ENTITIES_LOG };
type EntityInfo: record {
@ -34,15 +29,12 @@ export {
## Optionally calculate the file's MD5 sum. Must be set prior to the
## first data chunk being see in an event.
calc_md5: bool &default=F;
## This boolean value indicates if an MD5 sum is being calculated
## for the current file transfer.
md5_handle: opaque of md5 &optional;
## Optionally write the file to disk. Must be set prior to first
## data chunk being seen in an event.
extract_file: bool &default=F;
## Store the file handle here for the file currently being extracted.
extraction_file: file &log &optional;
extraction_file: string &log &optional;
};
redef record Info += {
@ -51,9 +43,6 @@ export {
};
redef record State += {
## Store a count of the number of files that have been transferred in
## a conversation to create unique file names on disk.
num_extracted_files: count &default=0;
## Track the number of MIME encoded files transferred during a session.
mime_level: count &default=0;
};
@ -97,77 +86,126 @@ function set_session(c: connection, new_entity: bool)
}
}
function get_extraction_name(f: fa_file): string
{
local r = fmt("%s-%s.dat", extraction_prefix, f$id);
return r;
}
event mime_begin_entity(c: connection) &priority=10
{
if ( ! c?$smtp ) return;
set_session(c, T);
}
# This has priority -10 because other handlers need to know the current
# content_len before it's updated by this handler.
event mime_segment_data(c: connection, length: count, data: string) &priority=-10
event file_new(f: fa_file) &priority=5
{
if ( ! c?$smtp ) return;
c$smtp$current_entity$content_len = c$smtp$current_entity$content_len + length;
}
if ( ! f?$source ) return;
if ( f$source != "SMTP" ) return;
if ( ! f?$conns ) return;
event mime_segment_data(c: connection, length: count, data: string) &priority=7
{
if ( ! c?$smtp ) return;
if ( c$smtp$current_entity$content_len == 0 )
c$smtp$current_entity$mime_type = split1(identify_data(data, T), /;/)[1];
}
local fname: string;
local extracting: bool = F;
event mime_segment_data(c: connection, length: count, data: string) &priority=-5
{
if ( ! c?$smtp ) return;
if ( c$smtp$current_entity$content_len == 0 )
for ( cid in f$conns )
{
local entity = c$smtp$current_entity;
if ( generate_md5 in entity$mime_type && ! never_calc_md5 )
entity$calc_md5 = T;
local c: connection = f$conns[cid];
if ( entity$calc_md5 )
entity$md5_handle = md5_hash_init();
}
if ( ! c?$smtp ) next;
if ( ! c$smtp?$current_entity ) next;
if ( c$smtp$current_entity?$md5_handle )
md5_hash_update(entity$md5_handle, data);
}
if ( c$smtp$current_entity$extract_file )
{
if ( ! extracting )
{
fname = get_extraction_name(f);
FileAnalysis::add_analyzer(f,
[$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]);
extracting = T;
}
## In the event of a content gap during the MIME transfer, detect the state for
## the MD5 sum calculation and stop calculating the MD5 since it would be
## incorrect anyway.
event content_gap(c: connection, is_orig: bool, seq: count, length: count) &priority=5
{
if ( is_orig || ! c?$smtp || ! c$smtp?$current_entity ) return;
c$smtp$current_entity$extraction_file = fname;
}
local entity = c$smtp$current_entity;
if ( entity?$md5_handle )
{
md5_hash_finish(entity$md5_handle);
delete entity$md5_handle;
if ( c$smtp$current_entity$calc_md5 )
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
}
}
event mime_end_entity(c: connection) &priority=-3
{
# TODO: this check is only due to a bug in mime_end_entity that
# causes the event to be generated twice for the same real event.
if ( ! c?$smtp || ! c$smtp?$current_entity )
function check_extract_by_type(f: fa_file)
{
if ( extract_file_types !in f$mime_type ) return;
if ( f?$info && FileAnalysis::ANALYZER_EXTRACT in f$info$analyzers )
return;
local entity = c$smtp$current_entity;
if ( entity?$md5_handle )
{
entity$md5 = md5_hash_finish(entity$md5_handle);
delete entity$md5_handle;
local fname: string = get_extraction_name(f);
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT,
$extract_filename=fname]);
NOTICE([$note=MD5, $msg=fmt("Calculated a hash for a MIME entity from %s", c$id$orig_h),
$sub=entity$md5, $conn=c]);
if ( ! f?$conns ) return;
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( ! c?$smtp ) next;
c$smtp$current_entity$extraction_file = fname;
}
}
function check_md5_by_type(f: fa_file)
{
if ( never_calc_md5 ) return;
if ( generate_md5 !in f$mime_type ) return;
FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]);
}
event file_new(f: fa_file) &priority=5
{
if ( ! f?$source ) return;
if ( f$source != "SMTP" ) return;
if ( ! f?$mime_type ) return;
if ( f?$conns )
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( ! c?$smtp ) next;
if ( ! c$smtp?$current_entity ) next;
c$smtp$current_entity$mime_type = f$mime_type;
}
check_extract_by_type(f);
check_md5_by_type(f);
}
event file_state_remove(f: fa_file) &priority=4
{
if ( ! f?$source ) return;
if ( f$source != "SMTP" ) return;
if ( ! f?$conns ) return;
for ( cid in f$conns )
{
local c: connection = f$conns[cid];
if ( ! c?$smtp ) next;
if ( ! c$smtp?$current_entity ) next;
# Only log if there was some content.
if ( f$seen_bytes == 0 ) next;
if ( f?$info && f$info?$md5 )
c$smtp$current_entity$md5 = f$info$md5;
c$smtp$current_entity$content_len = f$seen_bytes;
Log::write(SMTP::ENTITIES_LOG, c$smtp$current_entity);
delete c$smtp$current_entity;
return;
}
}
@ -179,66 +217,7 @@ event mime_one_header(c: connection, h: mime_header_rec)
/[fF][iI][lL][eE][nN][aA][mM][eE]/ in h$value )
c$smtp$current_entity$filename = extract_filename_from_content_disposition(h$value);
if ( h$name == "CONTENT-TYPE" &&
if ( h$name == "CONTENT-TYPE" &&
/[nN][aA][mM][eE][:blank:]*=/ in h$value )
c$smtp$current_entity$filename = extract_filename_from_content_disposition(h$value);
}
event mime_end_entity(c: connection) &priority=-5
{
if ( ! c?$smtp ) return;
# This check and the delete below are just to cope with a bug where
# mime_end_entity can be generated multiple times for the same event.
if ( ! c$smtp?$current_entity )
return;
# Only log is there was some content.
if ( c$smtp$current_entity$content_len > 0 )
Log::write(SMTP::ENTITIES_LOG, c$smtp$current_entity);
delete c$smtp$current_entity;
}
event mime_segment_data(c: connection, length: count, data: string) &priority=5
{
if ( ! c?$smtp ) return;
if ( extract_file_types in c$smtp$current_entity$mime_type )
c$smtp$current_entity$extract_file = T;
}
event mime_segment_data(c: connection, length: count, data: string) &priority=3
{
if ( ! c?$smtp ) return;
if ( c$smtp$current_entity$extract_file &&
c$smtp$current_entity$content_len == 0 )
{
local suffix = fmt("%d.dat", ++c$smtp_state$num_extracted_files);
local fname = generate_extraction_filename(extraction_prefix, c, suffix);
c$smtp$current_entity$extraction_file = open(fname);
enable_raw_output(c$smtp$current_entity$extraction_file);
}
}
event mime_segment_data(c: connection, length: count, data: string) &priority=-5
{
if ( ! c?$smtp ) return;
if ( c$smtp$current_entity$extract_file && c$smtp$current_entity?$extraction_file )
print c$smtp$current_entity$extraction_file, data;
}
event mime_end_entity(c: connection) &priority=-3
{
if ( ! c?$smtp ) return;
# TODO: this check is only due to a bug in mime_end_entity that
# causes the event to be generated twice for the same real event.
if ( ! c$smtp?$current_entity )
return;
if ( c$smtp$current_entity?$extraction_file )
close(c$smtp$current_entity$extraction_file);
}

View file

@ -0,0 +1,27 @@
@load ./main
@load ./entities
@load base/utils/conn-ids
@load base/frameworks/file-analysis/main
module SMTP;
export {
## Default file handle provider for SMTP.
global get_file_handle: function(c: connection, is_orig: bool): string;
}
function get_file_handle(c: connection, is_orig: bool): string
{
if ( ! c?$smtp ) return "";
return cat(Analyzer::ANALYZER_SMTP, " ", c$start_time, " ", c$smtp$trans_depth, " ",
c$smtp_state$mime_level);
}
module GLOBAL;
event get_file_handle(tag: Analyzer::Tag, c: connection, is_orig: bool)
&priority=5
{
if ( tag != Analyzer::ANALYZER_SMTP ) return;
set_file_handle(SMTP::get_file_handle(c, is_orig));
}

View file

@ -74,9 +74,6 @@ export {
const mail_path_capture = ALL_HOSTS &redef;
global log_smtp: event(rec: Info);
## Configure the default ports for SMTP analysis.
const ports = { 25/tcp, 587/tcp } &redef;
}
redef record connection += {
@ -84,15 +81,13 @@ redef record connection += {
smtp_state: State &optional;
};
# Configure DPD
redef capture_filters += { ["smtp"] = "tcp port 25 or tcp port 587" };
redef dpd_config += { [ANALYZER_SMTP] = [$ports = ports] };
redef likely_server_ports += { 25/tcp, 587/tcp };
const ports = { 25/tcp, 587/tcp };
redef likely_server_ports += { ports };
event bro_init() &priority=5
{
Log::create_stream(SMTP::LOG, [$columns=SMTP::Info, $ev=log_smtp]);
Analyzer::register_for_ports(Analyzer::ANALYZER_SMTP, ports);
}
function find_address_in_smtp_header(header: string): string

View file

@ -1,2 +1,4 @@
@load ./consts
@load ./main
@load ./main
@load-sigs ./dpd.sig

View file

@ -0,0 +1,48 @@
signature dpd_socks4_client {
ip-proto == tcp
# '32' is a rather arbitrary max length for the user name.
payload /^\x04[\x01\x02].{0,32}\x00/
tcp-state originator
}
signature dpd_socks4_server {
ip-proto == tcp
requires-reverse-signature dpd_socks4_client
payload /^\x00[\x5a\x5b\x5c\x5d]/
tcp-state responder
enable "socks"
}
signature dpd_socks4_reverse_client {
ip-proto == tcp
# '32' is a rather arbitrary max length for the user name.
payload /^\x04[\x01\x02].{0,32}\x00/
tcp-state responder
}
signature dpd_socks4_reverse_server {
ip-proto == tcp
requires-reverse-signature dpd_socks4_reverse_client
payload /^\x00[\x5a\x5b\x5c\x5d]/
tcp-state originator
enable "socks"
}
signature dpd_socks5_client {
ip-proto == tcp
# Watch for a few authentication methods to reduce false positives.
payload /^\x05.[\x00\x01\x02]/
tcp-state originator
}
signature dpd_socks5_server {
ip-proto == tcp
requires-reverse-signature dpd_socks5_client
# Watch for a single authentication method to be chosen by the server or
# the server to indicate the no authentication is required.
payload /^\x05(\x00|\x01[\x00\x01\x02])/
tcp-state responder
enable "socks"
}

View file

@ -34,20 +34,19 @@ export {
global log_socks: event(rec: Info);
}
const ports = { 1080/tcp };
redef likely_server_ports += { ports };
event bro_init() &priority=5
{
Log::create_stream(SOCKS::LOG, [$columns=Info, $ev=log_socks]);
Analyzer::register_for_ports(Analyzer::ANALYZER_SOCKS, ports);
}
redef record connection += {
socks: SOCKS::Info &optional;
};
# Configure DPD
redef capture_filters += { ["socks"] = "tcp port 1080" };
redef dpd_config += { [ANALYZER_SOCKS] = [$ports = set(1080/tcp)] };
redef likely_server_ports += { 1080/tcp };
function set_session(c: connection, version: count)
{
if ( ! c?$socks )

View file

@ -1 +1,3 @@
@load ./main
@load ./main
@load-sigs ./dpd.sig

View file

@ -0,0 +1,13 @@
signature dpd_ssh_client {
ip-proto == tcp
payload /^[sS][sS][hH]-/
requires-reverse-signature dpd_ssh_server
enable "ssh"
tcp-state originator
}
signature dpd_ssh_server {
ip-proto == tcp
payload /^[sS][sS][hH]-/
tcp-state responder
}

View file

@ -1,10 +1,11 @@
##! Base SSH analysis script. The heuristic to blindly determine success or
##! Base SSH analysis script. The heuristic to blindly determine success or
##! failure for SSH connections is implemented here. At this time, it only
##! uses the size of the data being returned from the server to make the
##! heuristic determination about success of the connection.
##! heuristic determination about success of the connection.
##! Requires that :bro:id:`use_conn_size_analyzer` is set to T! The heuristic
##! is not attempted if the connection size analyzer isn't enabled.
@load base/protocols/conn
@load base/frameworks/notice
@load base/utils/site
@load base/utils/thresholds
@ -16,12 +17,6 @@ module SSH;
export {
## The SSH protocol logging stream identifier.
redef enum Log::ID += { LOG };
redef enum Notice::Type += {
## Indicates that a heuristically detected "successful" SSH
## authentication occurred.
Login
};
type Info: record {
## Time when the SSH connection began.
@ -30,10 +25,10 @@ export {
uid: string &log;
## The connection's 4-tuple of endpoint addresses/ports.
id: conn_id &log;
## Indicates if the login was heuristically guessed to be "success"
## or "failure".
status: string &log &optional;
## Direction of the connection. If the client was a local host
## Indicates if the login was heuristically guessed to be "success",
## "failure", or "undetermined".
status: string &log &default="undetermined";
## Direction of the connection. If the client was a local host
## logging into an external host, this would be OUTBOUND. INBOUND
## would be set for the opposite situation.
# TODO: handle local-local and remote-remote better.
@ -43,51 +38,49 @@ export {
## Software string from the server.
server: string &log &optional;
## Amount of data returned from the server. This is currently
## the only measure of the success heuristic and it is logged to
## the only measure of the success heuristic and it is logged to
## assist analysts looking at the logs to make their own determination
## about the success on a case-by-case basis.
resp_size: count &log &default=0;
## Indicate if the SSH session is done being watched.
done: bool &default=F;
};
## The size in bytes of data sent by the server at which the SSH
## The size in bytes of data sent by the server at which the SSH
## connection is presumed to be successful.
const authentication_data_size = 5500 &redef;
const authentication_data_size = 4000 &redef;
## If true, we tell the event engine to not look at further data
## packets after the initial SSH handshake. Helps with performance
## (especially with large file transfers) but precludes some
## kinds of analyses (e.g., tracking connection size).
## kinds of analyses.
const skip_processing_after_detection = F &redef;
## Event that is generated when the heuristic thinks that a login
## was successful.
global heuristic_successful_login: event(c: connection);
## Event that is generated when the heuristic thinks that a login
## failed.
global heuristic_failed_login: event(c: connection);
## Event that can be handled to access the :bro:type:`SSH::Info`
## record as it is sent on to the logging framework.
global log_ssh: event(rec: Info);
}
# Configure DPD and the packet filter
redef capture_filters += { ["ssh"] = "tcp port 22" };
redef dpd_config += { [ANALYZER_SSH] = [$ports = set(22/tcp)] };
redef likely_server_ports += { 22/tcp };
redef record connection += {
ssh: Info &optional;
};
const ports = { 22/tcp };
redef likely_server_ports += { ports };
event bro_init() &priority=5
{
Log::create_stream(SSH::LOG, [$columns=Info, $ev=log_ssh]);
Analyzer::register_for_ports(Analyzer::ANALYZER_SSH, ports);
}
function set_session(c: connection)
@ -104,55 +97,61 @@ function set_session(c: connection)
function check_ssh_connection(c: connection, done: bool)
{
# If done watching this connection, just return.
# If already done watching this connection, just return.
if ( c$ssh$done )
return;
# Make sure conn_size_analyzer is active by checking
# resp$num_bytes_ip. In general it should always be active though.
if ( ! c$resp?$num_bytes_ip )
return;
# Remove the IP and TCP header length from the total size.
# TODO: Fix for IPv6. This whole approach also seems to break in some
# cases where there are more header bytes than num_bytes_ip.
local header_bytes = c$resp$num_pkts*32 + c$resp$num_pkts*20;
local server_bytes = c$resp$num_bytes_ip;
if ( server_bytes >= header_bytes )
server_bytes = server_bytes - header_bytes;
else
server_bytes = c$resp$size;
# If this is still a live connection and the byte count has not crossed
# the threshold, just return and let the rescheduled check happen later.
if ( ! done && server_bytes < authentication_data_size )
return;
# Make sure the server has sent back more than 50 bytes to filter out
# hosts that are just port scanning. Nothing is ever logged if the server
# doesn't send back at least 50 bytes.
if ( server_bytes < 50 )
return;
c$ssh$direction = Site::is_local_addr(c$id$orig_h) ? OUTBOUND : INBOUND;
c$ssh$resp_size = server_bytes;
if ( server_bytes < authentication_data_size )
if ( done )
{
c$ssh$status = "failure";
event SSH::heuristic_failed_login(c);
# If this connection is done, then we can look to see if
# this matches the conditions for a failed login. Failed
# logins are only detected at connection state removal.
if ( # Require originators to have sent at least 50 bytes.
c$orig$size > 50 &&
# Responders must be below 4000 bytes.
c$resp$size < 4000 &&
# Responder must have sent fewer than 40 packets.
c$resp$num_pkts < 40 &&
# If there was a content gap we can't reliably do this heuristic.
c?$conn && c$conn$missed_bytes == 0 )# &&
# Only "normal" connections can count.
#c$conn?$conn_state && c$conn$conn_state in valid_states )
{
c$ssh$status = "failure";
event SSH::heuristic_failed_login(c);
}
if ( c$resp$size > authentication_data_size )
{
c$ssh$status = "success";
event SSH::heuristic_successful_login(c);
}
}
else
{
# presumed successful login
c$ssh$status = "success";
event SSH::heuristic_successful_login(c);
{
# If this connection is still being tracked, then it's possible
# to watch for it to be a successful connection.
if ( c$resp$size > authentication_data_size )
{
c$ssh$status = "success";
event SSH::heuristic_successful_login(c);
}
else
# This connection must be tracked longer. Let the scheduled
# check happen again.
return;
}
# Set the direction for the log.
c$ssh$direction = Site::is_local_addr(c$id$orig_h) ? OUTBOUND : INBOUND;
# Set the "done" flag to prevent the watching event from rescheduling
# after detection is done.
c$ssh$done=T;
Log::write(SSH::LOG, c$ssh);
if ( skip_processing_after_detection )
{
# Stop watching this connection, we don't care about it anymore.
@ -161,18 +160,6 @@ function check_ssh_connection(c: connection, done: bool)
}
}
event SSH::heuristic_successful_login(c: connection) &priority=-5
{
NOTICE([$note=Login,
$msg="Heuristically detected successful SSH login.",
$conn=c]);
Log::write(SSH::LOG, c$ssh);
}
event SSH::heuristic_failed_login(c: connection) &priority=-5
{
Log::write(SSH::LOG, c$ssh);
}
event connection_state_remove(c: connection) &priority=-5
{
@ -187,6 +174,7 @@ event ssh_watcher(c: connection)
if ( ! connection_exists(id) )
return;
lookup_connection(c$id);
check_ssh_connection(c, F);
if ( ! c$ssh$done )
schedule +15secs { ssh_watcher(c) };
@ -197,12 +185,12 @@ event ssh_server_version(c: connection, version: string) &priority=5
set_session(c);
c$ssh$server = version;
}
event ssh_client_version(c: connection, version: string) &priority=5
{
set_session(c);
c$ssh$client = version;
# The heuristic detection for SSH relies on the ConnSize analyzer.
# Don't do the heuristics if it's disabled.
if ( use_conn_size_analyzer )

View file

@ -1,3 +1,5 @@
@load ./consts
@load ./main
@load ./mozilla-ca-list
@load-sigs ./dpd.sig

View file

@ -0,0 +1,15 @@
signature dpd_ssl_server {
ip-proto == tcp
# Server hello.
payload /^(\x16\x03[\x00\x01\x02]..\x02...\x03[\x00\x01\x02]|...?\x04..\x00\x02).*/
requires-reverse-signature dpd_ssl_client
enable "ssl"
tcp-state responder
}
signature dpd_ssl_client {
ip-proto == tcp
# Client hello.
payload /^(\x16\x03[\x00\x01\x02]..\x01...\x03[\x00\x01\x02]|...?\x01[\x00\x01\x02][\x02\x03]).*/
tcp-state originator
}

View file

@ -94,46 +94,17 @@ redef record Info += {
delay_tokens: set[string] &optional;
};
event bro_init() &priority=5
{
Log::create_stream(SSL::LOG, [$columns=Info, $ev=log_ssl]);
}
redef capture_filters += {
["ssl"] = "tcp port 443",
["nntps"] = "tcp port 563",
["imap4-ssl"] = "tcp port 585",
["sshell"] = "tcp port 614",
["ldaps"] = "tcp port 636",
["ftps-data"] = "tcp port 989",
["ftps"] = "tcp port 990",
["telnets"] = "tcp port 992",
["imaps"] = "tcp port 993",
["ircs"] = "tcp port 994",
["pop3s"] = "tcp port 995",
["xmpps"] = "tcp port 5223",
};
const ports = {
443/tcp, 563/tcp, 585/tcp, 614/tcp, 636/tcp,
989/tcp, 990/tcp, 992/tcp, 993/tcp, 995/tcp, 5223/tcp
};
redef likely_server_ports += { ports };
redef dpd_config += {
[[ANALYZER_SSL]] = [$ports = ports]
};
redef likely_server_ports += {
443/tcp, 563/tcp, 585/tcp, 614/tcp, 636/tcp,
989/tcp, 990/tcp, 992/tcp, 993/tcp, 995/tcp, 5223/tcp
};
# A queue that buffers log records.
global log_delay_queue: table[count] of Info;
# The top queue index where records are added.
global log_delay_queue_head = 0;
# The bottom queue index that points to the next record to be flushed.
global log_delay_queue_tail = 0;
event bro_init() &priority=5
{
Log::create_stream(SSL::LOG, [$columns=Info, $ev=log_ssl]);
Analyzer::register_for_ports(Analyzer::ANALYZER_SSL, ports);
}
function set_session(c: connection)
{
@ -144,26 +115,17 @@ function set_session(c: connection)
function delay_log(info: Info, token: string)
{
info$delay_tokens = set();
if ( ! info?$delay_tokens )
info$delay_tokens = set();
add info$delay_tokens[token];
log_delay_queue[log_delay_queue_head] = info;
++log_delay_queue_head;
}
function undelay_log(info: Info, token: string)
{
if ( token in info$delay_tokens )
if ( info?$delay_tokens && token in info$delay_tokens )
delete info$delay_tokens[token];
}
global log_record: function(info: Info);
event delay_logging(info: Info)
{
log_record(info);
}
function log_record(info: Info)
{
if ( ! info?$delay_tokens || |info$delay_tokens| == 0 )
@ -172,26 +134,14 @@ function log_record(info: Info)
}
else
{
for ( unused_index in log_delay_queue )
when ( |info$delay_tokens| == 0 )
{
if ( log_delay_queue_head == log_delay_queue_tail )
return;
if ( |log_delay_queue[log_delay_queue_tail]$delay_tokens| > 0 )
{
if ( info$ts + max_log_delay > network_time() )
{
schedule 1sec { delay_logging(info) };
return;
}
else
{
Reporter::info(fmt("SSL delay tokens not released in time (%s)",
info$delay_tokens));
}
}
Log::write(SSL::LOG, log_delay_queue[log_delay_queue_tail]);
delete log_delay_queue[log_delay_queue_tail];
++log_delay_queue_tail;
log_record(info);
}
timeout SSL::max_log_delay
{
Reporter::info(fmt("SSL delay tokens not released in time (%s tokens remaining)",
|info$delay_tokens|));
}
}
}
@ -288,28 +238,16 @@ event ssl_established(c: connection) &priority=-5
finish(c);
}
event protocol_confirmation(c: connection, atype: count, aid: count) &priority=5
event protocol_confirmation(c: connection, atype: Analyzer::Tag, aid: count) &priority=5
{
# Check by checking for existence of c$ssl record.
if ( c?$ssl && analyzer_name(atype) == "SSL" )
if ( c?$ssl && atype == Analyzer::ANALYZER_SSL )
c$ssl$analyzer_id = aid;
}
event protocol_violation(c: connection, atype: count, aid: count,
event protocol_violation(c: connection, atype: Analyzer::Tag, aid: count,
reason: string) &priority=5
{
if ( c?$ssl )
finish(c);
}
event bro_done()
{
if ( |log_delay_queue| == 0 )
return;
for ( unused_index in log_delay_queue )
{
Log::write(SSL::LOG, log_delay_queue[log_delay_queue_tail]);
delete log_delay_queue[log_delay_queue_tail];
++log_delay_queue_tail;
}
}

View file

@ -26,19 +26,17 @@ export {
};
}
redef capture_filters += { ["syslog"] = "port 514" };
const ports = { 514/udp } &redef;
redef dpd_config += { [ANALYZER_SYSLOG_BINPAC] = [$ports = ports] };
redef likely_server_ports += { 514/udp };
redef record connection += {
syslog: Info &optional;
};
const ports = { 514/udp };
redef likely_server_ports += { ports };
event bro_init() &priority=5
{
Log::create_stream(Syslog::LOG, [$columns=Info]);
Analyzer::register_for_ports(Analyzer::ANALYZER_SYSLOG, ports);
}
event syslog_message(c: connection, facility: count, severity: count, msg: string) &priority=5

View file

@ -0,0 +1 @@
@load-sigs ./dpd.sig

View file

@ -0,0 +1,14 @@
# Provide DPD signatures for tunneling protocols that otherwise
# wouldn't be detected at all.
signature dpd_ayiya {
ip-proto = udp
payload /^..\x11\x29/
enable "ayiya"
}
signature dpd_teredo {
ip-proto = udp
payload /^(\x00\x00)|(\x00\x01)|([\x60-\x6f])/
enable "teredo"
}

View file

@ -19,7 +19,7 @@ function extract_path(input: string): string
}
## Compresses a given path by removing '..'s and the parent directory it
## references and also removing '/'s.
## references and also removing dual '/'s and extraneous '/./'s.
## dir: a path string, either relative or absolute
## Returns: a compressed version of the input path
function compress_path(dir: string): string
@ -41,7 +41,7 @@ function compress_path(dir: string): string
return compress_path(dir);
}
const multislash_sep = /(\/){2,}/;
const multislash_sep = /(\/\.?){2,}/;
parts = split_all(dir, multislash_sep);
for ( i in parts )
if ( i % 2 == 0 )

View file

@ -0,0 +1,155 @@
##! A FIFO queue.
module Queue;
export {
## Settings for initializing the queue.
type Settings: record {
## If a maximum length is set for the queue
## it will maintain itself at that
## maximum length automatically.
max_len: count &optional;
};
## The internal data structure for the queue.
type Queue: record {};
## Initialize a queue record structure.
##
## s: A record which configures the queue.
##
## Returns: An opaque queue record.
global init: function(s: Settings &default=[]): Queue;
## Put a value onto the beginning of a queue.
##
## q: The queue to put the value into.
##
## val: The value to insert into the queue.
global put: function(q: Queue, val: any);
## Get a value from the end of a queue.
##
## q: The queue to get the value from.
##
## Returns: The value gotten from the queue.
global get: function(q: Queue): any;
## Peek at the value at the end of the queue without removing it.
##
## q: The queue to get the value from.
##
## Returns: The value at the end of the queue.
global peek: function(q: Queue): any;
## Merge two queue's together. If any settings are applied
## to the queues, the settings from q1 are used for the new
## merged queue.
##
## q1: The first queue. Settings are taken from here.
##
## q2: The second queue.
##
## Returns: A new queue from merging the other two together.
global merge: function(q1: Queue, q2: Queue): Queue;
## Get the number of items in a queue.
##
## q: The queue.
##
## Returns: The length of the queue.
global len: function(q: Queue): count;
## Get the contents of the queue as a vector.
##
## q: The queue.
##
## ret: A vector containing the
## current contents of q as the type of ret.
global get_vector: function(q: Queue, ret: vector of any);
}
redef record Queue += {
# Indicator for if the queue was appropriately initialized.
initialized: bool &default=F;
# The values are stored here.
vals: table[count] of any &optional;
# Settings for the queue.
settings: Settings &optional;
# The top value in the vals table.
top: count &default=0;
# The bottom value in the vals table.
bottom: count &default=0;
# The number of bytes in the queue.
size: count &default=0;
};
function init(s: Settings): Queue
{
local q: Queue;
q$vals=table();
q$settings = copy(s);
q$initialized=T;
return q;
}
function put(q: Queue, val: any)
{
if ( q$settings?$max_len && len(q) >= q$settings$max_len )
get(q);
q$vals[q$top] = val;
++q$top;
}
function get(q: Queue): any
{
local ret = q$vals[q$bottom];
delete q$vals[q$bottom];
++q$bottom;
return ret;
}
function peek(q: Queue): any
{
return q$vals[q$bottom];
}
function merge(q1: Queue, q2: Queue): Queue
{
local ret = init(q1$settings);
local i = q1$bottom;
local j = q2$bottom;
for ( ignored_val in q1$vals )
{
if ( i in q1$vals )
put(ret, q1$vals[i]);
if ( j in q2$vals )
put(ret, q2$vals[j]);
++i;
++j;
}
return ret;
}
function len(q: Queue): count
{
return |q$vals|;
}
function get_vector(q: Queue, ret: vector of any)
{
local i = q$bottom;
local j = 0;
# Really dumb hack, this is only to provide
# the iteration for the correct number of
# values in q$vals.
for ( ignored_val in q$vals )
{
if ( i >= q$top )
break;
ret[j] = q$vals[i];
++j; ++i;
}
}

View file

@ -0,0 +1,9 @@
## Given an interval, returns a string of the form 3m34s to
## give a minimalized human readable string for the minutes
## and seconds represented by the interval.
function duration_to_mins_secs(dur: interval): string
{
local dur_count = double_to_count(interval_to_double(dur));
return fmt("%dm%ds", dur_count/60, dur_count%60);
}