Merge branch 'master', remote-tracking branch 'origin' into topic/gregor/tunnel

This commit is contained in:
Gregor Maier 2011-08-15 13:33:14 -07:00
commit ae1eb5379b
130 changed files with 2363 additions and 801 deletions

View file

@ -9,10 +9,6 @@ redef peer_description = Cluster::node;
# Add a cluster prefix.
@prefixes += cluster
# Make this a controllable node since all cluster nodes are inherently
# controllable.
@load frameworks/control/controllee
## If this script isn't found anywhere, the cluster bombs out.
## Loading the cluster framework requires that a script by this name exists
## somewhere in the BROPATH. The only thing in the file should be the
@ -23,7 +19,7 @@ redef peer_description = Cluster::node;
@load ./setup-connections
# Don't start the listening process until we're a bit more sure that the
# Don't load the listening script until we're a bit more sure that the
# cluster framework is actually being enabled.
@load frameworks/communication/listen-clear

View file

@ -47,6 +47,25 @@ export {
time_machine: string &optional;
};
## This function can be called at any time to determine if the cluster
## framework is being enabled for this run.
global is_enabled: function(): bool;
## This function can be called at any time to determine what type of
## cluster node the current Bro instance is going to be acting as.
## :bro:id:`is_enabled` should be called first to find out if this is
## actually going to be a cluster node.
global local_node_type: function(): NodeType;
## This gives the value for the number of workers currently connected to,
## and it's maintained internally by the cluster framework. It's
## primarily intended for use by managers to find out how many workers
## should be responding to requests.
global worker_count: count = 0;
## The cluster layout definition. This should be placed into a filter
## named cluster-layout.bro somewhere in the BROPATH. It will be
## automatically loaded if the CLUSTER_NODE environment variable is set.
const nodes: table[string] of Node = {} &redef;
## This is usually supplied on the command line for each instance
@ -54,7 +73,29 @@ export {
const node = getenv("CLUSTER_NODE") &redef;
}
event bro_init()
function is_enabled(): bool
{
return (node != "");
}
function local_node_type(): NodeType
{
return nodes[node]$node_type;
}
event remote_connection_handshake_done(p: event_peer)
{
if ( nodes[p$descr]$node_type == WORKER )
++worker_count;
}
event remote_connection_closed(p: event_peer)
{
if ( nodes[p$descr]$node_type == WORKER )
--worker_count;
}
event bro_init() &priority=5
{
# If a node is given, but it's an unknown name we need to fail.
if ( node != "" && node !in nodes )

View file

@ -10,11 +10,14 @@
@prefixes += cluster-manager
# Load the script for local site configuration for the manager node.
@load site/local-manager
## Turn off remote logging since this is the manager and should only log here.
redef Log::enable_remote_logging = F;
## Use the cluster's archive logging script.
redef Log::default_rotation_postprocessor = "archive-log";
redef Log::default_rotation_postprocessor_cmd = "archive-log";
## We're processing essentially *only* remote events.
redef max_remote_events_processed = 10000;

View file

@ -1,6 +1,9 @@
@prefixes += cluster-proxy
# Load the script for local site configuration for proxy nodes.
@load site/local-proxy
## The proxy only syncs state; does not forward events.
redef forward_remote_events = F;
redef forward_remote_state_changes = T;
@ -12,5 +15,5 @@ redef Log::enable_local_logging = F;
redef Log::enable_remote_logging = T;
## Use the cluster's delete-log script.
redef Log::default_rotation_postprocessor = "delete-log";
redef Log::default_rotation_postprocessor_cmd = "delete-log";

View file

@ -1,6 +1,9 @@
@prefixes += cluster-worker
# Load the script for local site configuration for the worker nodes.
@load site/local-worker
## Don't do any local logging.
redef Log::enable_local_logging = F;
@ -8,7 +11,7 @@ redef Log::enable_local_logging = F;
redef Log::enable_remote_logging = T;
## Use the cluster's delete-log script.
redef Log::default_rotation_postprocessor = "delete-log";
redef Log::default_rotation_postprocessor_cmd = "delete-log";
## Record all packets into trace file.
# TODO: should we really be setting this to T?

View file

@ -1,4 +1,5 @@
@load base/frameworks/communication
@load ./main
@load base/frameworks/communication/main
module Cluster;
@ -59,13 +60,12 @@ event bro_init() &priority=9
$connect=T, $retry=1mins,
$class=node];
}
else if ( me$node_type == WORKER )
{
if ( n$node_type == MANAGER && me$manager == i )
Communication::nodes["manager"] = [$host=nodes[i]$ip, $p=nodes[i]$p,
$connect=T, $retry=1mins,
$class=node];
$class=node, $events=manager_events];
if ( n$node_type == PROXY && me$proxy == i )
Communication::nodes["proxy"] = [$host=nodes[i]$ip, $p=nodes[i]$p,

View file

@ -1,5 +1 @@
# TODO: get rid of this as soon as the Expr.cc hack is changed.
@if ( getenv("ENABLE_COMMUNICATION") != "" )
@load ./main
@endif

View file

@ -108,6 +108,9 @@ const src_names = {
event bro_init()
{
Log::create_stream(COMMUNICATION, [$columns=Info]);
if ( |nodes| > 0 )
enable_communication();
}
function do_script_log_common(level: count, src: count, msg: string)

View file

@ -27,6 +27,19 @@ export {
ev: any &optional;
};
## Default function for building the path values for log filters if not
## speficied otherwise by a filter. The default implementation uses ``id``
## to derive a name.
##
## id: The log stream.
## path: A suggested path value, which may be either the filter's ``path``
## if defined or a fall-back generated internally.
## rec: An instance of the streams's ``columns`` type with its
## fields set to the values to logged.
##
## Returns: The path to be used for the filter.
global default_path_func: function(id: ID, path: string, rec: any) : string &redef;
## Filter customizing logging.
type Filter: record {
## Descriptive name to reference this filter.
@ -50,7 +63,7 @@ export {
## The specific interpretation of the string is up to
## the used writer, and may for example be the destination
## file name. Generally, filenames are expected to given
## without any extensions; writers will add appropiate
## without any extensions; writers will add appropiate
## extensions automatically.
path: string &optional;
@ -60,7 +73,15 @@ export {
## different strings for separate calls, but be careful: it's
## easy to flood the disk by returning a new string for each
## connection ...
path_func: function(id: ID, path: string): string &optional;
##
## id: The log stream.
## path: A suggested path value, which may be either the filter's ``path``
## if defined or a fall-back generated internally.
## rec: An instance of the streams's ``columns`` type with its
## fields set to the values to logged.
##
## Returns: The path to be used for the filter.
path_func: function(id: ID, path: string, rec: any): string &optional;
## Subset of column names to record. If not given, all
## columns are recorded.
@ -81,36 +102,34 @@ export {
## Information passed into rotation callback functions.
type RotationInfo: record {
writer: Writer; ##< Writer.
path: string; ##< Original path value.
open: time; ##< Time when opened.
close: time; ##< Time when closed.
writer: Writer; ##< Writer.
fname: string; ##< Full name of the rotated file.
path: string; ##< Original path value.
open: time; ##< Time when opened.
close: time; ##< Time when closed.
terminating: bool; ##< True if rotation occured due to Bro shutting down.
};
## Default rotation interval. Zero disables rotation.
const default_rotation_interval = 0secs &redef;
## Default naming suffix format. Uses a strftime() style.
const default_rotation_date_format = "%y-%m-%d_%H.%M.%S" &redef;
## Default naming format for timestamps embedded into filenames. Uses a strftime() style.
const default_rotation_date_format = "%Y-%m-%d-%H-%M-%S" &redef;
## Default postprocessor for writers outputting into files.
const default_rotation_postprocessor = "" &redef;
## Default shell command to run on rotated files. Empty for none.
const default_rotation_postprocessor_cmd = "" &redef;
## Default function to construct the name of a rotated output file.
## The default implementation appends info$date_fmt to the original
## file name.
##
## info: Meta-data about the file to be rotated.
global default_rotation_path_func: function(info: RotationInfo) : string &redef;
## Specifies the default postprocessor function per writer type. Entries in this
## table are initialized by each writer type.
const default_rotation_postprocessors: table[Writer] of function(info: RotationInfo) : bool &redef;
## Type for controlling file rotation.
type RotationControl: record {
## Rotation interval.
interv: interval &default=default_rotation_interval;
## Format for timestamps embedded into rotated file names.
date_fmt: string &default=default_rotation_date_format;
## Postprocessor process to run on rotate file.
postprocessor: string &default=default_rotation_postprocessor;
## Callback function to trigger for rotated files. If not set, the default
## comes out of default_rotation_postprocessors.
postprocessor: function(info: RotationInfo) : bool &optional;
};
## Specifies rotation parameters per ``(id, path)`` tuple.
@ -133,6 +152,8 @@ export {
global flush: function(id: ID): bool;
global add_default_filter: function(id: ID) : bool;
global remove_default_filter: function(id: ID) : bool;
global run_rotation_postprocessor_cmd: function(info: RotationInfo, npath: string) : bool;
}
# We keep a script-level copy of all filters so that we can manipulate them.
@ -140,10 +161,39 @@ global filters: table[ID, string] of Filter;
@load logging.bif.bro # Needs Filter and Stream defined.
function default_rotation_path_func(info: RotationInfo) : string
module Log;
# Used internally by the log manager.
function __default_rotation_postprocessor(info: RotationInfo) : bool
{
local date_fmt = rotation_control[info$writer, info$path]$date_fmt;
return fmt("%s-%s", info$path, strftime(date_fmt, info$open));
if ( info$writer in default_rotation_postprocessors )
return default_rotation_postprocessors[info$writer](info);
}
function default_path_func(id: ID, path: string, rec: any) : string
{
# TODO for Seth: Do what you want. :)
return path;
}
# Run post-processor on file. If there isn't any postprocessor defined,
# we move the file to a nicer name.
function run_rotation_postprocessor_cmd(info: RotationInfo, npath: string) : bool
{
local pp_cmd = default_rotation_postprocessor_cmd;
if ( pp_cmd == "" )
return T;
# The date format is hard-coded here to provide a standardized
# script interface.
system(fmt("%s %s %s %s %s %d",
pp_cmd, npath, info$path,
strftime("%y-%m-%d_%H.%M.%S", info$open),
strftime("%y-%m-%d_%H.%M.%S", info$close),
info$terminating));
return T;
}
function create_stream(id: ID, stream: Stream) : bool
@ -159,9 +209,15 @@ function disable_stream(id: ID) : bool
if ( ! __disable_stream(id) )
return F;
}
function add_filter(id: ID, filter: Filter) : bool
{
# This is a work-around for the fact that we can't forward-declare
# the default_path_func and then use it as &default in the record
# definition.
if ( ! filter?$path_func )
filter$path_func = default_path_func;
filters[id, filter$name] = filter;
return __add_filter(id, filter);
}

View file

@ -26,4 +26,19 @@ export {
const unset_field = "-" &redef;
}
# Default function to postprocess a rotated ASCII log file. It moves the rotated
# file to a new name that includes a timestamp with the opening time, and then
# runs the writer's default postprocessor command on it.
function default_rotation_postprocessor_func(info: Log::RotationInfo) : bool
{
# Move file to name including both opening and closing time.
local dst = fmt("%s.%s.log", info$path,
strftime(Log::default_rotation_date_format, info$open));
system(fmt("/bin/mv %s %s", info$fname, dst));
# Run default postprocessor.
return Log::run_rotation_postprocessor_cmd(info, dst);
}
redef Log::default_rotation_postprocessors += { [Log::WRITER_ASCII] = default_rotation_postprocessor_func };

View file

@ -1 +1,11 @@
@load ./main
# The cluster framework must be loaded first.
@load base/frameworks/cluster
# Load either the cluster support script or the non-cluster support script.
@if ( Cluster::is_enabled() )
@load ./cluster
@else
@load ./non-cluster
@endif

View file

@ -0,0 +1,146 @@
##! This implements transparent cluster support for the metrics framework.
##! Do not load this file directly. It's only meant to be loaded automatically
##! and will be depending on if the cluster framework has been enabled.
##! The goal of this script is to make metric calculation completely and
##! transparently automated when running on a cluster.
@load base/frameworks/cluster
module Metrics;
export {
## This event is sent by the manager in a cluster to initiate the 3
## collection of metrics values
global cluster_collect: event(uid: string, id: ID, filter_name: string);
## This event is sent by nodes that are collecting metrics after receiving
## a request for the metric filter from the manager.
global cluster_results: event(uid: string, id: ID, filter_name: string, data: MetricTable, done: bool);
## This event is used internally by workers to send result chunks.
global send_data: event(uid: string, id: ID, filter_name: string, data: MetricTable);
## This value allows a user to decide how large of result groups the
## workers should transmit values.
const cluster_send_in_groups_of = 50 &redef;
}
# This is maintained by managers so they can know what data they requested and
# when they requested it.
global requested_results: table[string] of time = table() &create_expire=5mins;
# TODO: Both of the next variables make the assumption that a value never
# takes longer than 5 minutes to transmit from workers to manager. This needs to
# be tunable or self-tuning. These should also be restructured to be
# maintained within a single variable.
# This variable is maintained by manager nodes as they collect and aggregate
# results.
global collecting_results: table[string, ID, string] of MetricTable &create_expire=5mins;
# This variable is maintained by manager nodes to track how many "dones" they
# collected per collection unique id. Once the number of results for a uid
# matches the number of peer nodes that results should be coming from, the
# result is written out and deleted from here.
# TODO: add an &expire_func in case not all results are received.
global done_with: table[string] of count &create_expire=5mins &default=0;
# Add events to the cluster framework to make this work.
redef Cluster::manager_events += /Metrics::cluster_collect/;
redef Cluster::worker_events += /Metrics::cluster_results/;
# The metrics collection process can only be done by a manager.
@if ( Cluster::local_node_type() == Cluster::MANAGER )
event Metrics::log_it(filter: Filter)
{
local uid = unique_id("");
# Set some tracking variables.
requested_results[uid] = network_time();
collecting_results[uid, filter$id, filter$name] = table();
# Request data from peers.
event Metrics::cluster_collect(uid, filter$id, filter$name);
# Schedule the log_it event for the next break period.
schedule filter$break_interval { Metrics::log_it(filter) };
}
@endif
@if ( Cluster::local_node_type() == Cluster::WORKER )
event Metrics::send_data(uid: string, id: ID, filter_name: string, data: MetricTable)
{
#print fmt("WORKER %s: sending data for uid %s...", Cluster::node, uid);
local local_data: MetricTable;
local num_added = 0;
for ( index in data )
{
local_data[index] = data[index];
delete data[index];
# Only send cluster_send_in_groups_of at a time. Queue another
# event to send the next group.
if ( cluster_send_in_groups_of == ++num_added )
break;
}
local done = F;
# If data is empty, this metric is done.
if ( |data| == 0 )
done = T;
event Metrics::cluster_results(uid, id, filter_name, local_data, done);
if ( ! done )
event Metrics::send_data(uid, id, filter_name, data);
}
event Metrics::cluster_collect(uid: string, id: ID, filter_name: string)
{
#print fmt("WORKER %s: received the cluster_collect event.", Cluster::node);
event Metrics::send_data(uid, id, filter_name, store[id, filter_name]);
# Lookup the actual filter and reset it, the reference to the data
# currently stored will be maintained interally by the send_data event.
reset(filter_store[id, filter_name]);
}
@endif
@if ( Cluster::local_node_type() == Cluster::MANAGER )
event Metrics::cluster_results(uid: string, id: ID, filter_name: string, data: MetricTable, done: bool)
{
#print fmt("MANAGER: receiving results from %s", get_event_peer()$descr);
local local_data = collecting_results[uid, id, filter_name];
for ( index in data )
{
if ( index !in local_data )
local_data[index] = 0;
local_data[index] += data[index];
}
# Mark another worker as being "done" for this uid.
if ( done )
++done_with[uid];
# If the data has been collected from all peers, we are done and ready to log.
if ( Cluster::worker_count == done_with[uid] )
{
local ts = network_time();
# Log the time this was initially requested if it's available.
if ( uid in requested_results )
ts = requested_results[uid];
write_log(ts, filter_store[id, filter_name], local_data);
if ( [uid, id, filter_name] in collecting_results )
delete collecting_results[uid, id, filter_name];
if ( uid in done_with )
delete done_with[uid];
if ( uid in requested_results )
delete requested_results[uid];
}
}
@endif

View file

@ -1,19 +0,0 @@
@load base/frameworks/metrics
redef enum Metrics::ID += {
CONNS_ORIGINATED,
CONNS_RESPONDED
};
event bro_init()
{
Metrics::configure(CONNS_ORIGINATED, [$aggregation_mask=24, $break_interval=5mins]);
Metrics::configure(CONNS_RESPONDED, [$aggregation_mask=24, $break_interval=5mins]);
}
event connection_established(c: connection)
{
Metrics::add_data(CONNS_ORIGINATED, [$host=c$id$orig_h], 1);
Metrics::add_data(CONNS_RESPONDED, [$host=c$id$resp_h], 1);
}

View file

@ -1,20 +0,0 @@
@load base/frameworks/metrics
redef enum Metrics::ID += {
HTTP_REQUESTS_BY_STATUS_CODE,
HTTP_REQUESTS_BY_HOST,
};
event bro_init()
{
Metrics::configure(HTTP_REQUESTS_BY_STATUS_CODE, [$aggregation_mask=24, $break_interval=10secs]);
Metrics::configure(HTTP_REQUESTS_BY_HOST, [$break_interval=10secs]);
}
event HTTP::log_http(rec: HTTP::Info)
{
if ( rec?$host )
Metrics::add_data(HTTP_REQUESTS_BY_HOST, [$index=rec$host], 1);
if ( rec?$status_code )
Metrics::add_data(HTTP_REQUESTS_BY_STATUS_CODE, [$host=rec$id$orig_h, $index=fmt("%d", rec$status_code)], 1);
}

View file

@ -1,28 +1,19 @@
##! This is the implementation of the metrics framework
##! This is the implementation of the metrics framework.
@load base/frameworks/notice
module Metrics;
export {
redef enum Log::ID += { METRICS };
type ID: enum {
ALL,
NOTHING,
};
const default_aggregation_mask = 24 &redef;
const default_break_interval = 5mins &redef;
# TODO: configure a metrics filter logging stream to log the current
# metrics configuration in case someone is looking through
# old logs and the configuration has changed since then.
type Filter: record {
name: ID &optional;
## Global mask by which you'd like to aggregate traffic.
aggregation_mask: count &optional;
## This is essentially applying names to various subnets.
aggregation_table: table[subnet] of string &optional;
break_interval: interval &default=default_break_interval;
};
## The default interval used for "breaking" metrics and writing the
## current value to the logging stream.
const default_break_interval = 15mins &redef;
type Index: record {
## Host is the value to which this metric applies.
@ -35,108 +26,190 @@ export {
## value in a Host header. This is an example of a non-host based
## metric since multiple IP addresses could respond for the same Host
## header value.
index: string &default="";
};
str: string &optional;
## The CIDR block that this metric applies to. This is typically
## only used internally for host based aggregation.
network: subnet &optional;
} &log;
type Info: record {
ts: time &log;
name: ID &log;
index: string &log &optional;
agg_subnet: string &log &optional;
metric_id: ID &log;
filter_name: string &log;
index: Index &log;
value: count &log;
};
global add_filter: function(name: ID, filter: Filter);
global add_data: function(name: ID, index: Index, increment: count);
# TODO: configure a metrics filter logging stream to log the current
# metrics configuration in case someone is looking through
# old logs and the configuration has changed since then.
type Filter: record {
## The :bro:type:`Metrics::ID` that this filter applies to.
id: ID &optional;
## The name for this filter so that multiple filters can be
## applied to a single metrics to get a different view of the same
## metric data being collected (different aggregation, break, etc).
name: string &default="default";
## A predicate so that you can decide per index if you would like
## to accept the data being inserted.
pred: function(index: Index): bool &optional;
## Global mask by which you'd like to aggregate traffic.
aggregation_mask: count &optional;
## This is essentially applying names to various subnets.
aggregation_table: table[subnet] of subnet &optional;
## The interval at which the metric should be "broken" and written
## to the logging stream.
break_interval: interval &default=default_break_interval;
## This determines if the result of this filter is sent to the metrics
## logging stream. One use for the logging framework is as an internal
## thresholding and statistics gathering utility that is meant to
## never log but rather to generate notices and derive data.
log: bool &default=T;
## A straight threshold for generating a notice.
notice_threshold: count &optional;
## A series of thresholds at which to generate notices.
## TODO: This is not implemented yet!
notice_thresholds: vector of count &optional;
## If this and a $notice_threshold value are set, this notice type
## will be generated by the metrics framework.
note: Notice::Type &optional;
};
global add_filter: function(id: ID, filter: Filter);
global add_data: function(id: ID, index: Index, increment: count);
# This is the event that is used to "finish" metrics and adapt the metrics
# framework for clustered or non-clustered usage.
global log_it: event(filter: Filter);
global log_metrics: event(rec: Info);
}
global metric_filters: table[ID] of Filter = table();
redef record Notice::Info += {
metric_index: Index &log &optional;
};
type MetricIndex: table[string] of count &default=0;
type MetricTable: table[string] of MetricIndex;
global store: table[ID] of MetricTable = table();
global metric_filters: table[ID] of vector of Filter = table();
global filter_store: table[ID, string] of Filter = table();
event bro_init()
type MetricTable: table[Index] of count &default=0;
# This is indexed by metric ID and stream filter name.
global store: table[ID, string] of MetricTable = table();
# This stores the current threshold index for filters using the
# $notice_thresholds element.
global thresholds: table[string] of count = {} &default=0;
event bro_init() &priority=5
{
Log::create_stream(METRICS, [$columns=Info, $ev=log_metrics]);
}
function reset(name: ID)
{
store[name] = table();
}
event log_it(filter: Filter)
function write_log(ts: time, filter: Filter, data: MetricTable)
{
# If this node is the manager in a cluster, this needs to request values
# for this metric from all of the workers.
local name = filter$name;
for ( agg_subnet in store[name] )
for ( index in data )
{
local metric_values = store[name][agg_subnet];
for ( index in metric_values )
local val = data[index];
local m: Info = [$ts=ts,
$metric_id=filter$id,
$filter_name=filter$name,
$index=index,
$value=val];
if ( m$index?$host &&
filter?$notice_threshold &&
m$value >= filter$notice_threshold )
{
local val = metric_values[index];
local m: Info = [$ts=network_time(),
$name=name,
$agg_subnet=fmt("%s", agg_subnet),
$index=index,
$value=val];
if ( index == "" )
delete m$index;
if ( agg_subnet == "" )
delete m$agg_subnet;
Log::write(METRICS, m);
NOTICE([$note=filter$note,
$msg=fmt("Metrics threshold crossed by %s %d/%d", index$host, m$value, filter$notice_threshold),
$src=m$index$host, $n=m$value,
$metric_index=index]);
}
else if ( filter?$notice_thresholds &&
m$value >= filter$notice_thresholds[thresholds[cat(filter$id,filter$name)]] )
{
# TODO: implement this
}
if ( filter$log )
Log::write(METRICS, m);
}
reset(name);
schedule filter$break_interval { log_it(filter) };
}
function add_filter(name: ID, filter: Filter)
function reset(filter: Filter)
{
store[filter$id, filter$name] = table();
}
function add_filter(id: ID, filter: Filter)
{
if ( filter?$aggregation_table && filter?$aggregation_mask )
{
print "INVALID Metric filter: Defined $aggregation_table and $aggregation_mask.";
return;
}
filter$name = name;
metric_filters[name] = filter;
store[name] = table();
# Only do this on the manager if in a cluster.
schedule filter$break_interval { log_it(filter) };
}
function add_data(name: ID, index: Index, increment: count)
{
local conf = metric_filters[name];
local agg_subnet = "";
if ( index?$host )
if ( [id, filter$name] in store )
{
if ( conf?$aggregation_mask )
{
local agg_mask = conf$aggregation_mask;
agg_subnet = fmt("%s", mask_addr(index$host, agg_mask));
}
else if ( conf?$aggregation_table )
agg_subnet = fmt("%s", conf$aggregation_table[index$host]);
else
agg_subnet = fmt("%s", index$host);
print fmt("INVALID Metric filter: Filter with name \"%s\" already exists.", filter$name);
return;
}
if ( filter?$notice_threshold && filter?$notice_thresholds )
{
print "INVALID Metric filter: Defined both $notice_threshold and $notice_thresholds";
return;
}
if ( agg_subnet !in store[name] )
store[name][agg_subnet] = table();
if ( ! filter?$id )
filter$id = id;
if ( index$index !in store[name][agg_subnet] )
store[name][agg_subnet][index$index] = 0;
store[name][agg_subnet][index$index] = store[name][agg_subnet][index$index] + increment;
if ( id !in metric_filters )
metric_filters[id] = vector();
metric_filters[id][|metric_filters[id]|] = filter;
filter_store[id, filter$name] = filter;
store[id, filter$name] = table();
schedule filter$break_interval { Metrics::log_it(filter) };
}
function add_data(id: ID, index: Index, increment: count)
{
if ( id !in metric_filters )
return;
local filters = metric_filters[id];
# Add the data to any of the defined filters.
for ( filter_id in filters )
{
local filter = filters[filter_id];
# If this filter has a predicate, run the predicate and skip this
# index if the predicate return false.
if ( filter?$pred &&
! filter$pred(index) )
next;
local filt_store = store[id, filter$name];
if ( index?$host )
{
if ( filter?$aggregation_mask )
{
index$network = mask_addr(index$host, filter$aggregation_mask);
delete index$host;
}
else if ( filter?$aggregation_table )
{
index$network = filter$aggregation_table[index$host];
delete index$host;
}
}
if ( index !in filt_store )
filt_store[index] = 0;
filt_store[index] += increment;
}
}

View file

@ -0,0 +1,17 @@
module Metrics;
export {
}
event Metrics::log_it(filter: Filter)
{
local id = filter$id;
local name = filter$name;
write_log(network_time(), filter, store[id, name]);
reset(filter);
schedule filter$break_interval { Metrics::log_it(filter) };
}

View file

@ -6,7 +6,8 @@
@load ./actions/drop
@load ./actions/email_admin
@load ./actions/page
@load ./actions/add-geodata
# Load the script to add hostnames to emails by default.
# NOTE: this exposes a memleak in async DNS lookups.
#@load ./extend-email/hostnames
# There shouldn't be any defaul toverhead from loading these since they
# *should* only do anything when notices have the ACTION_EMAIL action applied.
@load ./extend-email/hostnames

View file

@ -0,0 +1,47 @@
##! This script adds geographic location data to notices for the "remote"
##! host in a connection. It does make the assumption that one of the
##! addresses in a connection is "local" and one is "remote" which is
##! probably a safe assumption to make in most cases. If both addresses
##! are remote, it will use the $src address.
module Notice;
export {
redef enum Action += {
## Indicates that the notice should have geodata added for the
## "remote" host. :bro:id:`Site::local_nets` must be defined
## in order for this to work.
ACTION_ADD_GEODATA
};
redef record Info += {
## If libGeoIP support is built in, notices can have geographic
## information attached to them.
remote_location: geo_location &log &optional;
};
## Notice types which should have the "remote" location looked up.
## If GeoIP support is not built in, this does nothing.
const lookup_location_types: set[Notice::Type] = {} &redef;
## Add a helper to the notice policy for looking up GeoIP data.
redef Notice::policy += {
[$pred(n: Notice::Info) = { return (n$note in Notice::lookup_location_types); },
$priority = 10],
};
}
# This is handled at a high priority in case other notice handlers
# want to use the data.
event notice(n: Notice::Info) &priority=10
{
if ( ACTION_ADD_GEODATA in n$actions &&
|Site::local_nets| > 0 &&
! n?$remote_location )
{
if ( n?$src && ! Site::is_local_addr(n$src) )
n$remote_location = lookup_location(n$src);
else if ( n?$dst && ! Site::is_local_addr(n$dst) )
n$remote_location = lookup_location(n$dst);
}
}

View file

@ -519,7 +519,7 @@ const frag_timeout = 0.0 sec &redef;
# packets and IP-level bytes transfered by each endpoint. If
# true, these values are returned in the connection's endpoint
# record val.
const use_conn_size_analyzer = F &redef;
const use_conn_size_analyzer = T &redef;
const UDP_INACTIVE = 0;
const UDP_ACTIVE = 1; # means we've seen something from this endpoint

View file

@ -23,11 +23,11 @@
@load base/frameworks/signatures
@load base/frameworks/packet-filter
@load base/frameworks/software
@load base/frameworks/intel
@load base/frameworks/metrics
@load base/frameworks/communication
@load base/frameworks/control
@load base/frameworks/cluster
@load base/frameworks/metrics
@load base/frameworks/intel
@load base/frameworks/reporter
@load base/protocols/conn

View file

@ -214,7 +214,7 @@ event http_header(c: connection, is_orig: bool, name: string, value: string) &pr
c$http$response_content_length = extract_count(value);
else if ( name == "CONTENT-DISPOSITION" &&
/[fF][iI][lL][eE][nN][aA][mM][eE]/ in value )
c$http$filename = sub(value, /^.*[fF][iI][lL][eE][nN][aA][mM][eE]=/, "");
c$http$filename = extract_filename_from_content_disposition(value);
}
}

View file

@ -16,11 +16,13 @@ export {
extract_file: bool &default=F;
## Store the file handle here for the file currently being extracted.
extraction_file: file &optional;
extraction_file: file &log &optional;
};
redef record State += {
## Store a count of the number of files that have been transferred in
## this conversation to create unique file names on disk.
num_extracted_files: count &optional;
num_extracted_files: count &default=0;
};
}
@ -34,7 +36,7 @@ event mime_segment_data(c: connection, length: count, data: string) &priority=3
{
if ( c$mime$extract_file && c$mime$content_len == 0 )
{
local suffix = fmt("%d.dat", ++c$mime$num_extracted_files);
local suffix = fmt("%d.dat", ++c$mime_state$num_extracted_files);
local fname = generate_extraction_filename(extraction_prefix, c, suffix);
c$mime$extraction_file = open(fname);
enable_raw_output(c$mime$extraction_file);

View file

@ -1 +1,3 @@
@load ./main
@load ./entities
@load ./entities-excerpt

View file

@ -0,0 +1,52 @@
##! This script is for optionally adding a body excerpt to the SMTP
##! entities log.
@load ./entities
module SMTP;
export {
redef record SMTP::EntityInfo += {
## The entity body excerpt.
excerpt: string &log &default="";
## Internal tracking to know how much of the body should be included
## in the excerpt.
excerpt_len: count &optional;
};
## This is the default value for how much of the entity body should be
## included for all MIME entities.
const default_entity_excerpt_len = 0 &redef;
## This table defines how much of various entity bodies should be
## included in excerpts.
const entity_excerpt_len: table[string] of count = {}
&redef
&default = default_entity_excerpt_len;
}
event mime_segment_data(c: connection, length: count, data: string) &priority=-1
{
if ( ! c?$smtp ) return;
if ( c$smtp$current_entity$content_len == 0 )
c$smtp$current_entity$excerpt_len = entity_excerpt_len[c$smtp$current_entity$mime_type];
}
event mime_segment_data(c: connection, length: count, data: string) &priority=-2
{
if ( ! c?$smtp ) return;
local ent = c$smtp$current_entity;
if ( ent$content_len < ent$excerpt_len )
{
if ( ent$content_len + length < ent$excerpt_len )
ent$excerpt = cat(ent$excerpt, data);
else
{
local x_bytes = ent$excerpt_len - ent$content_len;
ent$excerpt = cat(ent$excerpt, sub_bytes(data, 1, x_bytes));
}
}
}

View file

@ -0,0 +1,234 @@
##! Analysis and logging for MIME entities found in SMTP sessions.
@load base/utils/strings
@load base/utils/files
@load ./main
module SMTP;
export {
redef enum Notice::Type += {
## Indicates that an MD5 sum was calculated for a MIME message.
MD5,
};
redef enum Log::ID += { SMTP_ENTITIES };
type EntityInfo: record {
## This is the timestamp of when the MIME content transfer began.
ts: time &log;
uid: string &log;
id: conn_id &log;
## Internally generated "message id" that ties back to the particular
## message in the SMTP log where this entity was seen.
mid: string &log;
## The filename seen in the Content-Disposition header.
filename: string &log &optional;
## Track how many bytes of the MIME encoded file have been seen.
content_len: count &log &default=0;
## The mime type of the entity discovered through magic bytes identification.
mime_type: string &log &optional;
## The calculated MD5 sum for the MIME entity.
md5: string &log &optional;
## Optionally calculate the file's MD5 sum. Must be set prior to the
## first data chunk being see in an event.
calc_md5: bool &default=F;
## This boolean value indicates if an MD5 sum is being calculated
## for the current file transfer.
calculating_md5: bool &default=F;
## Optionally write the file to disk. Must be set prior to first
## data chunk being seen in an event.
extract_file: bool &default=F;
## Store the file handle here for the file currently being extracted.
extraction_file: file &log &optional;
};
redef record Info += {
## The in-progress entity information.
current_entity: EntityInfo &optional;
};
redef record State += {
## Store a count of the number of files that have been transferred in
## a conversation to create unique file names on disk.
num_extracted_files: count &default=0;
## Track the number of MIME encoded files transferred during a session.
mime_level: count &default=0;
};
## Generate MD5 sums for these filetypes.
const generate_md5 = /application\/x-dosexec/ # Windows and DOS executables
| /application\/x-executable/ # *NIX executable binary
&redef;
## Pattern of file mime types to extract from MIME bodies.
const extract_file_types = /NO_DEFAULT/ &redef;
## The on-disk prefix for files to be extracted from MIME entity bodies.
const extraction_prefix = "smtp-entity" &redef;
global log_mime: event(rec: EntityInfo);
}
event bro_init() &priority=5
{
Log::create_stream(SMTP_ENTITIES, [$columns=EntityInfo, $ev=log_mime]);
}
function set_session(c: connection, new_entity: bool)
{
if ( ! c$smtp?$current_entity || new_entity )
{
local info: EntityInfo;
info$ts=network_time();
info$uid=c$uid;
info$id=c$id;
info$mid=c$smtp$mid;
c$smtp$current_entity = info;
++c$smtp_state$mime_level;
}
}
event mime_begin_entity(c: connection) &priority=10
{
if ( ! c?$smtp ) return;
set_session(c, T);
}
# This has priority -10 because other handlers need to know the current
# content_len before it's updated by this handler.
event mime_segment_data(c: connection, length: count, data: string) &priority=-10
{
if ( ! c?$smtp ) return;
c$smtp$current_entity$content_len = c$smtp$current_entity$content_len + length;
}
event mime_segment_data(c: connection, length: count, data: string) &priority=7
{
if ( ! c?$smtp ) return;
if ( c$smtp$current_entity$content_len == 0 )
c$smtp$current_entity$mime_type = split1(identify_data(data, T), /;/)[1];
}
event mime_segment_data(c: connection, length: count, data: string) &priority=-5
{
if ( ! c?$smtp ) return;
if ( c$smtp$current_entity$content_len == 0 )
{
if ( generate_md5 in c$smtp$current_entity$mime_type )
c$smtp$current_entity$calc_md5 = T;
if ( c$smtp$current_entity$calc_md5 )
{
c$smtp$current_entity$calculating_md5 = T;
md5_hash_init(c$id);
}
}
if ( c$smtp$current_entity$calculating_md5 )
md5_hash_update(c$id, data);
}
## In the event of a content gap during the MIME transfer, detect the state for
## the MD5 sum calculation and stop calculating the MD5 since it would be
## incorrect anyway.
event content_gap(c: connection, is_orig: bool, seq: count, length: count) &priority=5
{
if ( is_orig || ! c?$smtp || ! c$smtp?$current_entity ) return;
if ( c$smtp$current_entity$calculating_md5 )
{
c$smtp$current_entity$calculating_md5 = F;
md5_hash_finish(c$id);
}
}
event mime_end_entity(c: connection) &priority=-3
{
# TODO: this check is only due to a bug in mime_end_entity that
# causes the event to be generated twice for the same real event.
if ( ! c?$smtp || ! c$smtp?$current_entity )
return;
if ( c$smtp$current_entity$calculating_md5 )
{
c$smtp$current_entity$md5 = md5_hash_finish(c$id);
NOTICE([$note=MD5, $msg=fmt("Calculated a hash for a MIME entity from %s", c$id$orig_h),
$sub=c$smtp$current_entity$md5, $conn=c]);
}
}
event mime_one_header(c: connection, h: mime_header_rec)
{
if ( ! c?$smtp ) return;
if ( h$name == "CONTENT-DISPOSITION" &&
/[fF][iI][lL][eE][nN][aA][mM][eE]/ in h$value )
c$smtp$current_entity$filename = extract_filename_from_content_disposition(h$value);
}
event mime_end_entity(c: connection) &priority=-5
{
if ( ! c?$smtp ) return;
# This check and the delete below are just to cope with a bug where
# mime_end_entity can be generated multiple times for the same event.
if ( ! c$smtp?$current_entity )
return;
# Only log is there was some content.
if ( c$smtp$current_entity$content_len > 0 )
Log::write(SMTP_ENTITIES, c$smtp$current_entity);
delete c$smtp$current_entity;
}
event mime_segment_data(c: connection, length: count, data: string) &priority=5
{
if ( ! c?$smtp ) return;
if ( extract_file_types in c$smtp$current_entity$mime_type )
c$smtp$current_entity$extract_file = T;
}
event mime_segment_data(c: connection, length: count, data: string) &priority=3
{
if ( ! c?$smtp ) return;
if ( c$smtp$current_entity$extract_file &&
c$smtp$current_entity$content_len == 0 )
{
local suffix = fmt("%d.dat", ++c$smtp_state$num_extracted_files);
local fname = generate_extraction_filename(extraction_prefix, c, suffix);
c$smtp$current_entity$extraction_file = open(fname);
enable_raw_output(c$smtp$current_entity$extraction_file);
}
}
event mime_segment_data(c: connection, length: count, data: string) &priority=-5
{
if ( ! c?$smtp ) return;
if ( c$smtp$current_entity$extract_file && c$smtp$current_entity?$extraction_file )
print c$smtp$current_entity$extraction_file, data;
}
event mime_end_entity(c: connection) &priority=-3
{
if ( ! c?$smtp ) return;
# TODO: this check is only due to a bug in mime_end_entity that
# causes the event to be generated twice for the same real event.
if ( ! c$smtp?$current_entity )
return;
if ( c$smtp$current_entity?$extraction_file )
close(c$smtp$current_entity$extraction_file);
}

View file

@ -4,17 +4,14 @@ module SMTP;
export {
redef enum Log::ID += { SMTP };
redef enum Notice::Type += {
## Indicates that the server sent a reply mentioning an SMTP block list.
BL_Error_Message,
## Indicates the client's address is seen in the block list error message.
BL_Blocked_Host,
};
type Info: record {
ts: time &log;
uid: string &log;
id: conn_id &log;
## This is an internally generated "message id" that can be used to
## map between SMTP messages and MIME entities in the SMTP entities
## log.
mid: string &log;
helo: string &log &optional;
mailfrom: string &log &optional;
rcptto: set[string] &log &optional;
@ -30,19 +27,13 @@ export {
second_received: string &log &optional;
## The last message the server sent to the client.
last_reply: string &log &optional;
files: set[string] &log &optional;
path: vector of addr &log &optional;
user_agent: string &log &optional;
## Indicate if this session is currently transmitting SMTP message
## envelope headers.
in_headers: bool &default=F;
## Indicate if the "Received: from" headers should still be processed.
process_received_from: bool &default=T;
## Maintain the current header for cases where there is header wrapping.
current_header: string &default="";
## Indicate when the message is logged and no longer applicable.
done: bool &default=F;
## Indicates if client activity has been seen, but not yet logged
has_client_activity: bool &default=F;
};
type State: record {
@ -61,26 +52,7 @@ export {
## ALL_HOSTS - always capture the entire path.
## NO_HOSTS - never capture the path.
const mail_path_capture = ALL_HOSTS &redef;
# This matches content in SMTP error messages that indicate some
# block list doesn't like the connection/mail.
const bl_error_messages =
/spamhaus\.org\//
| /sophos\.com\/security\//
| /spamcop\.net\/bl/
| /cbl\.abuseat\.org\//
| /sorbs\.net\//
| /bsn\.borderware\.com\//
| /mail-abuse\.com\//
| /b\.barracudacentral\.com\//
| /psbl\.surriel\.com\//
| /antispam\.imp\.ch\//
| /dyndns\.com\/.*spam/
| /rbl\.knology\.net\//
| /intercept\.datapacket\.net\//
| /uceprotect\.net\//
| /hostkarma\.junkemailfilter\.com\// &redef;
global log_smtp: event(rec: Info);
## Configure the default ports for SMTP analysis.
@ -121,6 +93,7 @@ function new_smtp_log(c: connection): Info
l$ts=network_time();
l$uid=c$uid;
l$id=c$id;
l$mid=unique_id("@");
if ( c?$smtp_state && c$smtp_state?$helo )
l$helo = c$smtp_state$helo;
@ -136,26 +109,23 @@ function set_smtp_session(c: connection)
if ( ! c?$smtp_state )
c$smtp_state = [];
if ( ! c?$smtp || c$smtp$done )
{
if ( ! c?$smtp )
c$smtp = new_smtp_log(c);
}
}
function smtp_message(c: connection)
{
Log::write(SMTP, c$smtp);
c$smtp$done = T;
# Track the number of messages seen in this session.
++c$smtp_state$messages_transferred;
if ( c$smtp$has_client_activity )
Log::write(SMTP, c$smtp);
}
event smtp_request(c: connection, is_orig: bool, command: string, arg: string) &priority=5
{
set_smtp_session(c);
local upper_command = to_upper(command);
if ( upper_command != "QUIT" )
c$smtp$has_client_activity = T;
if ( upper_command == "HELO" || upper_command == "EHLO" )
{
@ -172,26 +142,11 @@ event smtp_request(c: connection, is_orig: bool, command: string, arg: string) &
else if ( upper_command == "MAIL" && /^[fF][rR][oO][mM]:/ in arg )
{
# In case this is not the first message in a session we want to
# essentially write out a log, clear the session tracking, and begin
# new session tracking.
if ( c$smtp_state$messages_transferred > 0 )
{
smtp_message(c);
set_smtp_session(c);
}
local partially_done = split1(arg, /:[[:blank:]]*/)[2];
c$smtp$mailfrom = split1(partially_done, /[[:blank:]]?/)[1];
}
else if ( upper_command == "DATA" )
{
c$smtp$in_headers = T;
}
}
event smtp_reply(c: connection, is_orig: bool, code: count, cmd: string,
msg: string, cont_resp: bool) &priority=5
{
@ -199,169 +154,98 @@ event smtp_reply(c: connection, is_orig: bool, code: count, cmd: string,
# This continually overwrites, but we want the last reply,
# so this actually works fine.
if ( code != 421 && code >= 400 )
c$smtp$last_reply = fmt("%d %s", code, msg);
}
event smtp_reply(c: connection, is_orig: bool, code: count, cmd: string,
msg: string, cont_resp: bool) &priority=-5
{
set_smtp_session(c);
if ( cmd == "." )
{
c$smtp$last_reply = fmt("%d %s", code, msg);
# Raise a notice when an SMTP error about a block list is discovered.
if ( bl_error_messages in msg )
{
local note = BL_Error_Message;
local message = fmt("%s received an error message mentioning an SMTP block list", c$id$orig_h);
# Determine if the originator's IP address is in the message.
local ips = find_ip_addresses(msg);
local text_ip = "";
if ( |ips| > 0 && to_addr(ips[0]) == c$id$orig_h )
{
note = BL_Blocked_Host;
message = fmt("%s is on an SMTP block list", c$id$orig_h);
}
NOTICE([$note=note, $conn=c, $msg=message, $sub=msg]);
}
# Track the number of messages seen in this session.
++c$smtp_state$messages_transferred;
smtp_message(c);
c$smtp = new_smtp_log(c);
}
}
event smtp_data(c: connection, is_orig: bool, data: string) &priority=5
event mime_one_header(c: connection, h: mime_header_rec) &priority=5
{
# Is there something we should be handling from the server?
if ( ! is_orig ) return;
set_smtp_session(c);
if ( ! c$smtp$in_headers )
{
if ( /^[cC][oO][nN][tT][eE][nN][tT]-[dD][iI][sS].*[fF][iI][lL][eE][nN][aA][mM][eE]/ in data )
{
if ( ! c$smtp?$files )
c$smtp$files = set();
data = sub(data, /^.*[fF][iI][lL][eE][nN][aA][mM][eE]=/, "");
add c$smtp$files[data];
}
return;
}
if ( ! c?$smtp ) return;
c$smtp$has_client_activity = T;
if ( /^[[:blank:]]*$/ in data )
c$smtp$in_headers = F;
if ( h$name == "MESSAGE-ID" )
c$smtp$msg_id = h$value;
# This is to reconstruct headers that tend to wrap around.
if ( /^[[:blank:]]/ in data )
{
# Remove all but a single space at the beginning (this seems to follow
# the most common behavior).
data = sub(data, /^[[:blank:]]*/, " ");
if ( c$smtp$current_header == "MESSAGE-ID" )
c$smtp$msg_id += data;
else if ( c$smtp$current_header == "RECEIVED" )
c$smtp$first_received += data;
else if ( c$smtp$current_header == "IN-REPLY-TO" )
c$smtp$in_reply_to += data;
else if ( c$smtp$current_header == "SUBJECCT" )
c$smtp$subject += data;
else if ( c$smtp$current_header == "FROM" )
c$smtp$from += data;
else if ( c$smtp$current_header == "REPLY-TO" )
c$smtp$reply_to += data;
else if ( c$smtp$current_header == "USER-AGENT" )
c$smtp$user_agent += data;
return;
}
# Once there isn't a line starting with a blank, we're not continuing a
# header anymore.
c$smtp$current_header = "";
local header_parts = split1(data, /:[[:blank:]]*/);
# TODO: do something in this case? This would definitely be odd.
# Header wrapping needs to be handled more elegantly. This will happen
# if the header value is wrapped immediately after the header key.
if ( |header_parts| != 2 )
return;
local header_key = to_upper(header_parts[1]);
c$smtp$current_header = header_key;
local header_val = header_parts[2];
if ( header_key == "MESSAGE-ID" )
c$smtp$msg_id = header_val;
else if ( header_key == "RECEIVED" )
else if ( h$name == "RECEIVED" )
{
if ( c$smtp?$first_received )
c$smtp$second_received = c$smtp$first_received;
c$smtp$first_received = header_val;
c$smtp$first_received = h$value;
}
else if ( header_key == "IN-REPLY-TO" )
c$smtp$in_reply_to = header_val;
else if ( header_key == "DATE" )
c$smtp$date = header_val;
else if ( header_key == "FROM" )
c$smtp$from = header_val;
else if ( header_key == "TO" )
else if ( h$name == "IN-REPLY-TO" )
c$smtp$in_reply_to = h$value;
else if ( h$name == "SUBJECT" )
c$smtp$subject = h$value;
else if ( h$name == "FROM" )
c$smtp$from = h$value;
else if ( h$name == "REPLY-TO" )
c$smtp$reply_to = h$value;
else if ( h$name == "DATE" )
c$smtp$date = h$value;
else if ( h$name == "TO" )
{
if ( ! c$smtp?$to )
c$smtp$to = set();
add c$smtp$to[header_val];
add c$smtp$to[h$value];
}
else if ( header_key == "REPLY-TO" )
c$smtp$reply_to = header_val;
else if ( header_key == "SUBJECT" )
c$smtp$subject = header_val;
else if ( header_key == "X-ORIGINATING-IP" )
else if ( h$name == "X-ORIGINATING-IP" )
{
local addresses = find_ip_addresses(header_val);
local addresses = find_ip_addresses(h$value);
if ( 1 in addresses )
c$smtp$x_originating_ip = to_addr(addresses[1]);
}
else if ( header_key == "X-MAILER" ||
header_key == "USER-AGENT" ||
header_key == "X-USER-AGENT" )
{
c$smtp$user_agent = header_val;
# Explicitly set the current header here because there are several
# headers bulked under this same key.
c$smtp$current_header = "USER-AGENT";
}
else if ( h$name == "X-MAILER" ||
h$name == "USER-AGENT" ||
h$name == "X-USER-AGENT" )
c$smtp$user_agent = h$value;
}
# This event handler builds the "Received From" path by reading the
# headers in the mail
event smtp_data(c: connection, is_orig: bool, data: string) &priority=3
event mime_one_header(c: connection, h: mime_header_rec) &priority=3
{
# If we've decided that we're done watching the received headers for
# whatever reason, we're done. Could be due to only watching until
# local addresses are seen in the received from headers.
if ( c$smtp$current_header != "RECEIVED" ||
! c$smtp$process_received_from )
if ( ! c?$smtp || h$name != "RECEIVED" || ! c$smtp$process_received_from )
return;
local text_ip = find_address_in_smtp_header(data);
local text_ip = find_address_in_smtp_header(h$value);
if ( text_ip == "" )
return;
local ip = to_addr(text_ip);
if ( ! addr_matches_host(ip, mail_path_capture) &&
! Site::is_private_addr(ip) )
{
c$smtp$process_received_from = F;
}
if ( c$smtp$path[|c$smtp$path|-1] != ip )
c$smtp$path[|c$smtp$path|] = ip;
}
event connection_state_remove(c: connection) &priority=-5
{
if ( c?$smtp && ! c$smtp$done )
if ( c?$smtp )
smtp_message(c);
}

View file

@ -1,74 +1,58 @@
##! Base SSH analysis script. The heuristic to blindly determine success or
##! failure for SSH connections is implemented here. At this time, it only
##! uses the size of the data being returned from the server to make the
##! heuristic determination about success of the connection.
##! Requires that :bro:id:`use_conn_size_analyzer` is set to T! The heuristic
##! is not attempted if the connection size analyzer isn't enabled.
module SSH;
export {
redef enum Log::ID += { SSH };
redef enum Notice::Type += {
Login,
Password_Guessing,
Login_By_Password_Guesser,
Login_From_Interesting_Hostname,
Bytecount_Inconsistency,
};
type Info: record {
ts: time &log;
uid: string &log;
id: conn_id &log;
## Indicates if the login was heuristically guessed to be "success"
## or "failure".
status: string &log &optional;
direction: string &log &optional;
remote_location: geo_location &log &optional;
## Direction of the connection. If the client was a local host
## logging into an external host, this would be OUTBOUD. INBOUND
## would be set for the opposite situation.
# TODO: handle local-local and remote-remote better.
direction: Direction &log &optional;
## The software string given by the client.
client: string &log &optional;
## The software string given by the server.
server: string &log &optional;
## The amount of data returned from the server. This is currently
## the only measure of the success heuristic and it is logged to
## assist analysts looking at the logs to make their own determination
## about the success on a case-by-case basis.
resp_size: count &log &default=0;
## Indicate if the SSH session is done being watched.
done: bool &default=F;
};
const password_guesses_limit = 30 &redef;
# The size in bytes at which the SSH connection is presumed to be
# successful.
## The size in bytes at which the SSH connection is presumed to be
## successful.
const authentication_data_size = 5500 &redef;
# The amount of time to remember presumed non-successful logins to build
# model of a password guesser.
const guessing_timeout = 30 mins &redef;
# The set of countries for which you'd like to throw notices upon successful login
# requires Bro compiled with libGeoIP support
const watched_countries: set[string] = {"RO"} &redef;
# Strange/bad host names to originate successful SSH logins
const interesting_hostnames =
/^d?ns[0-9]*\./ |
/^smtp[0-9]*\./ |
/^mail[0-9]*\./ |
/^pop[0-9]*\./ |
/^imap[0-9]*\./ |
/^www[0-9]*\./ |
/^ftp[0-9]*\./ &redef;
# This is a table with orig subnet as the key, and subnet as the value.
const ignore_guessers: table[subnet] of subnet &redef;
# If true, we tell the event engine to not look at further data
# packets after the initial SSH handshake. Helps with performance
# (especially with large file transfers) but precludes some
# kinds of analyses (e.g., tracking connection size).
## If true, we tell the event engine to not look at further data
## packets after the initial SSH handshake. Helps with performance
## (especially with large file transfers) but precludes some
## kinds of analyses (e.g., tracking connection size).
const skip_processing_after_detection = F &redef;
# Keeps count of how many rejections a host has had
global password_rejections: table[addr] of TrackCount
&write_expire=guessing_timeout
&synchronized;
# Keeps track of hosts identified as guessing passwords
# TODO: guessing_timeout doesn't work correctly here. If a user redefs
# the variable, it won't take effect.
global password_guessers: set[addr] &read_expire=guessing_timeout+1hr &synchronized;
## This event is generated when the heuristic thinks that a login
## was successful.
global heuristic_successful_login: event(c: connection);
## This event is generated when the heuristic thinks that a login
## failed.
global heuristic_failed_login: event(c: connection);
global log_ssh: event(rec: Info);
}
@ -106,116 +90,51 @@ function check_ssh_connection(c: connection, done: bool)
# If this is still a live connection and the byte count has not
# crossed the threshold, just return and let the resheduled check happen later.
if ( !done && c$resp$size < authentication_data_size )
if ( !done && c$resp$num_bytes_ip < authentication_data_size )
return;
# Make sure the server has sent back more than 50 bytes to filter out
# hosts that are just port scanning. Nothing is ever logged if the server
# doesn't send back at least 50 bytes.
if ( c$resp$size < 50 )
if ( c$resp$num_bytes_ip < 50 )
return;
local status = "failure";
local direction = Site::is_local_addr(c$id$orig_h) ? "to" : "from";
local location: geo_location;
location = (direction == "to") ? lookup_location(c$id$resp_h) : lookup_location(c$id$orig_h);
c$ssh$direction = Site::is_local_addr(c$id$orig_h) ? OUTBOUND : INBOUND;
c$ssh$resp_size = c$resp$num_bytes_ip;
if ( done && c$resp$size < authentication_data_size )
if ( c$resp$num_bytes_ip < authentication_data_size )
{
# presumed failure
if ( c$id$orig_h !in password_rejections )
password_rejections[c$id$orig_h] = new_track_count();
# Track the number of rejections
if ( !(c$id$orig_h in ignore_guessers &&
c$id$resp_h in ignore_guessers[c$id$orig_h]) )
++password_rejections[c$id$orig_h]$n;
if ( default_check_threshold(password_rejections[c$id$orig_h]) )
{
add password_guessers[c$id$orig_h];
NOTICE([$note=Password_Guessing,
$conn=c,
$msg=fmt("SSH password guessing by %s", c$id$orig_h),
$sub=fmt("%d failed logins", password_rejections[c$id$orig_h]$n),
$n=password_rejections[c$id$orig_h]$n]);
}
}
# TODO: This is to work around a quasi-bug in Bro which occasionally
# causes the byte count to be oversized.
# Watch for Gregors work that adds an actual counter of bytes transferred.
else if ( c$resp$size < 20000000 )
c$ssh$status = "failure";
event SSH::heuristic_failed_login(c);
}
else
{
# presumed successful login
status = "success";
c$ssh$done = T;
if ( c$id$orig_h in password_rejections &&
password_rejections[c$id$orig_h]$n > password_guesses_limit &&
c$id$orig_h !in password_guessers )
{
add password_guessers[c$id$orig_h];
NOTICE([$note=Login_By_Password_Guesser,
$conn=c,
$n=password_rejections[c$id$orig_h]$n,
$msg=fmt("Successful SSH login by password guesser %s", c$id$orig_h),
$sub=fmt("%d failed logins", password_rejections[c$id$orig_h]$n)]);
}
local message = fmt("SSH login %s %s \"%s\" \"%s\" %f %f %s (triggered with %d bytes)",
direction, location$country_code, location$region, location$city,
location$latitude, location$longitude,
id_string(c$id), c$resp$size);
NOTICE([$note=Login,
$conn=c,
$msg=message,
$sub=location$country_code]);
# Check to see if this login came from an interesting hostname
when ( local hostname = lookup_addr(c$id$orig_h) )
{
if ( interesting_hostnames in hostname )
{
NOTICE([$note=Login_From_Interesting_Hostname,
$conn=c,
$msg=fmt("Strange login from %s", hostname),
$sub=hostname]);
}
}
if ( location$country_code in watched_countries )
{
}
c$ssh$status = "success";
event SSH::heuristic_successful_login(c);
}
else if ( c$resp$size >= 200000000 )
{
NOTICE([$note=Bytecount_Inconsistency,
$conn=c,
$msg="During byte counting in SSH analysis, an overly large value was seen.",
$sub=fmt("%d",c$resp$size)]);
}
c$ssh$remote_location = location;
c$ssh$status = status;
c$ssh$direction = direction;
c$ssh$resp_size = c$resp$size;
Log::write(SSH, c$ssh);
# Set the "done" flag to prevent the watching event from rescheduling
# after detection is done.
c$ssh$done;
c$ssh$done=T;
# Stop watching this connection, we don't care about it anymore.
if ( skip_processing_after_detection )
{
# Stop watching this connection, we don't care about it anymore.
skip_further_processing(c$id);
set_record_packets(c$id, F);
}
}
event SSH::heuristic_successful_login(c: connection) &priority=-5
{
Log::write(SSH, c$ssh);
}
event SSH::heuristic_failed_login(c: connection) &priority=-5
{
Log::write(SSH, c$ssh);
}
event connection_state_remove(c: connection) &priority=-5
{
if ( c?$ssh )
@ -226,7 +145,7 @@ event ssh_watcher(c: connection)
{
local id = c$id;
# don't go any further if this connection is gone already!
if ( !connection_exists(id) )
if ( ! connection_exists(id) )
return;
check_ssh_connection(c, F);
@ -244,5 +163,9 @@ event ssh_client_version(c: connection, version: string) &priority=5
{
set_session(c);
c$ssh$client = version;
schedule +15secs { ssh_watcher(c) };
# The heuristic detection for SSH relies on the ConnSize analyzer.
# Don't do the heuristics if it's disabled.
if ( use_conn_size_analyzer )
schedule +15secs { ssh_watcher(c) };
}

View file

@ -12,4 +12,15 @@ function generate_extraction_filename(prefix: string, c: connection, suffix: str
conn_info = fmt("%s_%s", conn_info, suffix);
return conn_info;
}
}
## For CONTENT-DISPOSITION headers, this function can be used to extract
## the filename.
function extract_filename_from_content_disposition(data: string): string
{
local filename = sub(data, /^.*[fF][iI][lL][eE][nN][aA][mM][eE]=/, "");
# Remove quotes around the filename if they are there.
if ( /^\"/ in filename )
filename = split_n(filename, /\"/, F, 2)[2];
return filename;
}

View file

@ -16,6 +16,12 @@ export {
## Networks that are considered "local".
const local_nets: set[subnet] &redef;
## This is used for retrieving the subnet when you multiple
## :bro:id:`local_nets`. A membership query can be done with an
## :bro:type:`addr` and the table will yield the subnet it was found
## within.
global local_nets_table: table[subnet] of subnet = {};
## Networks that are considered "neighbors".
const neighbor_nets: set[subnet] &redef;
@ -138,4 +144,9 @@ event bro_init() &priority=10
# Double backslashes are needed due to string parsing.
local_dns_suffix_regex = set_to_regex(local_zones, "(^\\.?|\\.)(~~)$");
local_dns_neighbor_suffix_regex = set_to_regex(neighbor_zones, "(^\\.?|\\.)(~~)$");
# Create the local_nets mapping table.
for ( cidr in Site::local_nets )
local_nets_table[cidr] = cidr;
}