zeek/policy/large-conns.bro

336 lines
12 KiB
Text

# $Id: large-conns.bro 1332 2005-09-07 17:39:17Z vern $
# Written by Chema Gonzalez.
# Estimates the size of large "flows" (i.e., each direction of a TCP
# connection) by noting when their sequence numbers cross a set of regions
# in the sequence space. This can be done using a static packet filter,
# so is very efficient. It works for (TCP) traffic that Bro otherwise doesn't
# see.
# Usage
#
# 1) Set the appropriate number_of_regions and region_size:
#
# Modify the number_of_regions and (perhaps) region_size global
# variables. You do this *prior* to loading this script, so
# for example:
#
# const number_of_regions = 32;
# @load large-conns
#
# You do *not* redef them like you would with other script variables
# (this is because they need to be used directly in the initializations
# of other variables used by this script).
#
# Note that number_of_regions affects the granularity
# and definition of the script (see below).
#
# 2) To get an estimate of the true size of a flow, call:
#
# function estimate_flow_size_and_remove(cid: conn_id, orig: bool):
# flow_size_est
#
# If orig=T, then an estimate of the size of the forward (originator)
# direction is returned. If orig=F, then the reverse (responder)
# direction is returned. In both cases, what's returned is a
# flow_size_est, which includes a flag indicating whether there was
# any estimate formed, and, if the flag is T, a lower bound, an upper bound,
# and an inconsistency-count (which, if > 0, means that the estimates
# came from sequence numbers that were inconsistent, and thus something
# is wrong - perhaps packet drops by the secondary filter). Finally,
# calling this function causes the flow's record to be deleted. Perhaps
# at some point we'll need to add a version that just retrieves the
# estimate.
type flow_size_est: record {
have_est: bool;
lower: double &optional;
upper: double &optional;
num_inconsistent: count &optional;
};
global estimate_flow_size_and_remove:
function(cid: conn_id, orig: bool): flow_size_est;
module LargeConn;
# Rationale
#
# One of the mechanisms that Bro uses to detect large TCP flows is
# to calculate the difference in the sequence number (seq) field contents
# between the last packet (FIN or RST) and the first packet (SYN). This
# method may be wrong if a) the seq number is busted (which can happen
# frequently with RST termination), or b) the seq number wraps around
# the 4GB sequence number space (note that this is OK for TCP while
# there is no ambiguity on what a packet's sequence number means,
# due to its use of a window <= 2 GB in size).
#
# The purpose of this script is to resolve these ambiguities. In other
# words, help with differentiating truly large flows from flows with
# a busted seq, and detecting very large flows that wrap around the
# 4GB seq space.
#
# To do so, large-flow listens to a small group of thin regions in
# the sequence space, located at equal distances from each other. The idea
# is that a truly large flow will pass through the regions in
# an orderly fashion, maybe several times. This script keeps track of
# all packets that pass through any of the regions, counting the number
# of times a packet from a given flow passes through consecutive regions.
#
# Note that the exact number of regions, and the size of each region, can
# be controlled by redefining the global variables number_of_regions
# and region_size, respectively. Both should be powers of two (if not,
# they are rounded to be such), and default to 4 and 16KB, respectively.
# The effect of varying these parameters is the following:
#
# - Increasing number_of_regions will increase the granularity of the
# script, at the cost of elevating its cost in both processing (more
# packets will be seen) and memory (more flows will be seen).
# The granularity of the script is defined as the minimum variation
# in size the script can see. Its value is:
#
# granularity = (4GB / number_of_regions)
#
# For example, if we're using 4 regions, the minimum flow size difference
# that the script can see is 1GB.
#
# number_of_regions also affects the script definition, defined as the
# smallest size of a flow which ensures that the flow will be seen by
# the script. The script definition is:
#
# definition = (2 * granularity)
#
# The script sees no flow smaller than the granularity, some flows with
# size between granularity and definition, and all flows larger than
# definition. In our example, the script definition is 2GB (it will see
# for sure only flows bigger than 2GB).
#
# - Increasing region_size will only increase the resilience of the script
# to lost packets, at the cost of augmenting the cost in both processing
# and memory (see above). The default value of 16 KB is chosen to work
# in the presence of largish packets without too much additional work.
# Set up defaults, unless the user has already specified these. Note that
# these variables are *not* redef'able, since they are used in initializations
# later in this script (so a redef wouldn't be "seen" in time).
@ifndef ( number_of_regions )
const number_of_regions = 4;
@endif
@ifndef ( region_size )
const region_size = 16 * 1024; # 16 KB
@endif
# Track the regions visited for each flow.
type t_info: record {
last_region: count; # last region visited
num_regions: count; # number of regions visited
num_inconsistent: count; # num. inconsistent region crossings
};
# The state expiration for this table needs to be generous, as it's
# for tracking very large flows, which could be quite long-lived.
global flow_region_info: table[conn_id] of t_info &write_expire = 6 hr;
# Returns the integer logarithm in base b.
function logarithm(base: count, x: count): count
{
if ( x < base )
return 0;
else
return 1 + logarithm(base, x / base);
}
# Function used to get around Bro's lack of real ordered loop.
function do_while(i: count, max: count, total: count,
f: function(i: count, total: count): count): count
{
if ( i >= max )
return total;
else
return do_while(++i, max, f(--i, total), f);
}
function fn_mask_location(i: count, total: count): count
{
return total * 2 + 1;
}
function fn_filter_location(i: count, total: count): count
{
# The location pattern is 1010101010...
return total * 2 + (i % 2 == 0 ? 1 : 0);
}
function fn_common_region_size(i: count, total: count): count
{
return total * 2;
}
function get_interregion_distance(number_of_regions: count,
region_size: count): count
{
local bits_number_of_regions = logarithm(2, number_of_regions);
local bits_other = int_to_count(32 - bits_number_of_regions);
return do_while(0, bits_other, 1, fn_common_region_size);
}
global interregion_distance =
get_interregion_distance(number_of_regions, region_size);
# Returns an estiamte of size of the flow (one direction of a TCP connection)
# that this script has seen. This is based on the number of consecutive
# regions a flow has visited, weighted with the distance between regions.
#
# We know that the full sequence number space accounts for 4GB. This
# space comprises number_of_regions regions, separated from each other
# a (4GB / number_of_regions) distance. If a flow has been seen
# in X consecutive regions, it means that the size of the flow is
# greater than ((X - 1) * distance_between_regions) GB.
#
# Note that seeing a flow in just one region is no different from
# not seeing it at all.
function estimate_flow_size_and_remove(cid: conn_id, orig: bool): flow_size_est
{
local id = orig ? cid :
[$orig_h = cid$resp_h, $orig_p = cid$resp_p,
$resp_h = cid$orig_h, $resp_p = cid$orig_p];
if ( id !in flow_region_info )
return [$have_est = F];
local regions_crossed =
int_to_count(flow_region_info[id]$num_regions - 1);
local lower = regions_crossed * interregion_distance * 1.0;
local upper = lower + interregion_distance * 2.0;
local num_inconsis = flow_region_info[id]$num_inconsistent;
delete flow_region_info[id];
return [$have_est = T, $lower = lower, $upper = upper,
$num_inconsistent = num_inconsis];
}
# Returns a tcpdump filter corresponding to the number of regions and
# region size requested by the user.
#
# How to calculate the tcpdump filter used to hook packet_event to the
# secondary filter system? We are interested only in TCP packets whose
# seq number belongs to any of the test slices. Let's focus on the case
# of 4 regions, 16KB per region.
#
# The mask should be: [ x x L L L ... L L L x x ... x ]
# <---><---------------><--------->
# | | |
# | | +-> suffix: region size
# | +-> location: remaining bits
# +-> prefix: number of equidistant regions
#
# The 32-bit seq number is masked as follows:
#
# - suffix: defines size of the regions (16KB implies log_2(16KB) = 14 bits)
#
# - location: defines the exact location of the 4 regions. Note that, to
# minimize the amount of data we keep, the location will be distinct from
# zero, so segments with seq == 0 are not in a valid region
#
# - prefix: defines number of regions (4 implies log_2(4) = 2 bits)
#
# E.g., the mask will be seq_number & 0011...1100..00_2 = 00LL..LL00..00_2,
# which, by setting the location to 1010101010101010, will finally be
# seq_number & 0011...1100..00_2 = 00101010101010101000..00_2, i.e.,
# seq_number & 0x3fffc000 = 0x2aaa8000.
#
# For that particular parameterization, we'd like to wind up with a
# packet event filter of "(tcp[4:4] & 0x3fffc000) == 0x2aaa8000".
function get_event_filter(number_of_regions: count, region_size: count): string
{
local bits_number_of_regions = logarithm(2, number_of_regions);
local bits_region_size = logarithm(2, region_size);
local bits_remaining =
int_to_count(32 - bits_number_of_regions - bits_region_size);
# Set the bits corresponding to the location:
# i = 0;
# while ( i < bits_remaining )
# {
# mask = (mask * 2) + 1;
# filter = (filter * 2) + (((i % 2) == 0) ? 1 : 0);
# ++i;
# }
local mask = do_while(0, bits_remaining, 0, fn_mask_location);
local filter = do_while(0, bits_remaining, 0, fn_filter_location);
# Set the bits corrsponding to the region size
# i = 0;
# while ( i < bits_region_size )
# {
# mask = mask * 2;
# filter = filter * 2;
# ++i;
# }
mask = do_while(0, bits_region_size, mask, fn_common_region_size);
filter = do_while(0, bits_region_size, filter, fn_common_region_size);
return fmt("(tcp[4:4] & 0x%x) == 0x%x", mask, filter);
}
# packet_event --
#
# This event is raised once per (TCP) packet falling into any of the regions.
# It updates the flow_region_info table.
event packet_event(filter: string, pkt: pkt_hdr)
{
# Distill the region from the seq number.
local region = pkt$tcp$seq / interregion_distance;
# Get packet info and update global counters.
local cid = [$orig_h = pkt$ip$src, $orig_p = pkt$tcp$sport,
$resp_h = pkt$ip$dst, $resp_p = pkt$tcp$dport];
if ( cid !in flow_region_info )
{
flow_region_info[cid] =
[$last_region = region, $num_regions = 1,
$num_inconsistent = 0];
return;
}
local info = flow_region_info[cid];
local next_region = (info$last_region + 1) % number_of_regions;
if ( region == next_region )
{ # flow seen in the next region
info$last_region = region;
++info$num_regions;
}
else if ( region == info$last_region )
{ # flow seen in the same region, ignore
}
else
{
# Flow seen in another region (not the next one).
info$last_region = region;
info$num_regions = 1; # restart accounting
++info$num_inconsistent;
}
}
# Glue the filter into the secondary filter hookup.
global packet_event_filter = get_event_filter(number_of_regions, region_size);
redef secondary_filters += { [packet_event_filter] = packet_event };