# $Id: large-conns.bro 1332 2005-09-07 17:39:17Z vern $

# Written by Chema Gonzalez.


# Estimates the size of large "flows" (i.e., each direction of a TCP
# connection) by noting when their sequence numbers cross a set of regions
# in the sequence space.  This can be done using a static packet filter,
# so is very efficient.  It works for (TCP) traffic that Bro otherwise doesn't
# see.

# Usage
#
# 1) Set the appropriate number_of_regions and region_size:
#
#    Modify the number_of_regions and (perhaps) region_size global
#    variables.  You do this *prior* to loading this script, so
#    for example:
#
#	const number_of_regions = 32;
#	@load large-conns
#
#    You do *not* redef them like you would with other script variables
#    (this is because they need to be used directly in the initializations
#    of other variables used by this script).
#
#    Note that number_of_regions affects the granularity
#    and definition of the script (see below).
#
# 2) To get an estimate of the true size of a flow, call:
#
#	function estimate_flow_size_and_remove(cid: conn_id, orig: bool):
#								flow_size_est
#
#    If orig=T, then an estimate of the size of the forward (originator)
#    direction is returned.  If orig=F, then the reverse (responder)
#    direction is returned.  In both cases, what's returned is a
#    flow_size_est, which includes a flag indicating whether there was
#    any estimate formed, and, if the flag is T, a lower bound, an upper bound,
#    and an inconsistency-count (which, if > 0, means that the estimates
#    came from sequence numbers that were inconsistent, and thus something
#    is wrong - perhaps packet drops by the secondary filter).  Finally,
#    calling this function causes the flow's record to be deleted.  Perhaps
#    at some point we'll need to add a version that just retrieves the
#    estimate.

type flow_size_est: record {
	have_est: bool;
	lower: double &optional;
	upper: double &optional;
	num_inconsistent: count &optional;
};

global estimate_flow_size_and_remove:
	function(cid: conn_id, orig: bool): flow_size_est;

module LargeConn;


# Rationale
#
# One of the mechanisms that Bro uses to detect large TCP flows is 
# to calculate the difference in the sequence number (seq) field contents 
# between the last packet (FIN or RST) and the first packet (SYN). This 
# method may be wrong if a) the seq number is busted (which can happen
# frequently with RST termination), or b) the seq number wraps around
# the 4GB sequence number space (note that this is OK for TCP while
# there is no ambiguity on what a packet's sequence number means,
# due to its use of a window <= 2 GB in size).
#
# The purpose of this script is to resolve these ambiguities. In other 
# words, help with differentiating truly large flows from flows with
# a busted seq, and detecting very large flows that wrap around the
# 4GB seq space. 
#
# To do so, large-flow listens to a small group of thin regions in
# the sequence space, located at equal distances from each other. The idea 
# is that a truly large flow will pass through the regions in 
# an orderly fashion, maybe several times. This script keeps track of 
# all packets that pass through any of the regions, counting the number 
# of times a packet from a given flow passes through consecutive regions. 
#
# Note that the exact number of regions, and the size of each region, can 
# be controlled by redefining the global variables number_of_regions 
# and region_size, respectively.  Both should be powers of two (if not,
# they are rounded to be such), and default to 4 and 16KB, respectively. 
# The effect of varying these parameters is the following:
#
# - Increasing number_of_regions will increase the granularity of the 
#   script, at the cost of elevating its cost in both processing (more 
#   packets will be seen) and memory (more flows will be seen). 
#   The granularity of the script is defined as the minimum variation 
#   in size the script can see. Its value is: 
#
#     granularity = (4GB / number_of_regions)
#
#   For example, if we're using 4 regions, the minimum flow size difference
#   that the script can see is 1GB.
#
#   number_of_regions also affects the script definition, defined as the 
#   smallest size of a flow which ensures that the flow will be seen by
#   the script. The script definition is:
#
#     definition = (2 * granularity)
#
#   The script sees no flow smaller than the granularity, some flows with
#   size between granularity and definition, and all flows larger than
#   definition. In our example, the script definition is 2GB (it will see
#   for sure only flows bigger than 2GB). 
#
# - Increasing region_size will only increase the resilience of the script 
#   to lost packets, at the cost of augmenting the cost in both processing 
#   and memory (see above). The default value of 16 KB is chosen to work
#   in the presence of largish packets without too much additional work.

# Set up defaults, unless the user has already specified these.  Note that
# these variables are *not* redef'able, since they are used in initializations
# later in this script (so a redef wouldn't be "seen" in time).
@ifndef ( number_of_regions )
	const number_of_regions = 4;
@endif
@ifndef ( region_size )
	const region_size = 16 * 1024;	# 16 KB
@endif


# Track the regions visited for each flow.
type t_info: record {
	last_region: count;	# last region visited
	num_regions: count;	# number of regions visited
	num_inconsistent: count;	# num. inconsistent region crossings
};

# The state expiration for this table needs to be generous, as it's
# for tracking very large flows, which could be quite long-lived.
global flow_region_info: table[conn_id] of t_info &write_expire = 6 hr;


# Returns the integer logarithm in base b.
function logarithm(base: count, x: count): count
	{
	if ( x < base )
		return 0;
	else
		return 1 + logarithm(base, x / base);
	}


# Function used to get around Bro's lack of real ordered loop.
function do_while(i: count, max: count, total: count,
			f: function(i: count, total: count): count): count
	{
	if ( i >= max )
		return total;
	else
		return do_while(++i, max, f(--i, total), f);
	}

function fn_mask_location(i: count, total: count): count
	{
	return total * 2 + 1;
	}

function fn_filter_location(i: count, total: count): count
	{
	# The location pattern is 1010101010...
	return total * 2 + (i % 2 == 0 ? 1 : 0);
	}

function fn_common_region_size(i: count, total: count): count
	{
	return total * 2;
	}


function get_interregion_distance(number_of_regions: count,
					region_size: count): count
	{
	local bits_number_of_regions = logarithm(2, number_of_regions);
	local bits_other = int_to_count(32 - bits_number_of_regions);

	return do_while(0, bits_other, 1, fn_common_region_size);
	}


global interregion_distance =
	get_interregion_distance(number_of_regions, region_size);


# Returns an estiamte of size of the flow (one direction of a TCP connection)
# that this script has seen. This is based on the number of consecutive
# regions a flow has visited, weighted with the distance between regions.  
#
# We know that the full sequence number space accounts for 4GB. This 
# space comprises number_of_regions regions, separated from each other 
# a (4GB / number_of_regions) distance. If a flow has been seen 
# in X consecutive regions, it means that the size of the flow is 
# greater than ((X - 1) * distance_between_regions) GB. 
#
# Note that seeing a flow in just one region is no different from 
# not seeing it at all. 
function estimate_flow_size_and_remove(cid: conn_id, orig: bool): flow_size_est
	{
	local id = orig ? cid :
			  [$orig_h = cid$resp_h, $orig_p = cid$resp_p,
			   $resp_h = cid$orig_h, $resp_p = cid$orig_p];

	if ( id !in flow_region_info )
		return [$have_est = F];

	local regions_crossed =
		int_to_count(flow_region_info[id]$num_regions - 1);

	local lower = regions_crossed * interregion_distance * 1.0;
	local upper = lower + interregion_distance * 2.0;
	local num_inconsis = flow_region_info[id]$num_inconsistent;

	delete flow_region_info[id];

	return [$have_est = T, $lower = lower, $upper = upper,
		$num_inconsistent = num_inconsis];
	}


# Returns a tcpdump filter corresponding to the number of regions and 
# region size requested by the user.
#
# How to calculate the tcpdump filter used to hook packet_event to the 
# secondary filter system?  We are interested only in TCP packets whose
# seq number belongs to any of the test slices. Let's focus on the case
# of 4 regions, 16KB per region.
#
# The mask should be: [ x x  L L L ... L L L  x x ... x ]
#                      <---><---------------><--------->
#                       |          |            |
#                       |          |            +-> suffix: region size
#                       |          +-> location: remaining bits
#                       +-> prefix: number of equidistant regions
#
# The 32-bit seq number is masked as follows: 
#
#   - suffix: defines size of the regions (16KB implies log_2(16KB) = 14 bits)
#
#   - location: defines the exact location of the 4 regions. Note that, to 
#     minimize the amount of data we keep, the location will be distinct from
#     zero, so segments with seq == 0 are not in a valid region
#
#   - prefix: defines number of regions (4 implies log_2(4) = 2 bits)
#
# E.g., the mask will be seq_number & 0011...1100..00_2 = 00LL..LL00..00_2,
# which, by setting the location to 1010101010101010, will finally be
# seq_number & 0011...1100..00_2 = 00101010101010101000..00_2, i.e., 
# seq_number & 0x3fffc000 = 0x2aaa8000. 
#
# For that particular parameterization, we'd like to wind up with a
# packet event filter of "(tcp[4:4] & 0x3fffc000) == 0x2aaa8000".

function get_event_filter(number_of_regions: count, region_size: count): string
	{
	local bits_number_of_regions = logarithm(2, number_of_regions);
	local bits_region_size = logarithm(2, region_size);
	local bits_remaining = 
		int_to_count(32 - bits_number_of_regions - bits_region_size);

	# Set the bits corresponding to the location:
	#	i = 0;
	#	while ( i < bits_remaining )
	#		{
	#		mask = (mask * 2) + 1;
	#		filter = (filter * 2) + (((i % 2) == 0) ? 1 : 0);
	#		++i;
	#		}
	local mask = do_while(0, bits_remaining, 0, fn_mask_location);
	local filter = do_while(0, bits_remaining, 0, fn_filter_location);

	# Set the bits corrsponding to the region size
	#	i = 0;
	#	while ( i < bits_region_size )
	#		{
	#		mask = mask * 2;
	#		filter = filter * 2;
	#		++i;
	#		}
	mask = do_while(0, bits_region_size, mask, fn_common_region_size);
	filter = do_while(0, bits_region_size, filter, fn_common_region_size);

	return fmt("(tcp[4:4] & 0x%x) == 0x%x", mask, filter);
	}


# packet_event --
#
# This event is raised once per (TCP) packet falling into any of the regions. 
# It updates the flow_region_info table. 
event packet_event(filter: string, pkt: pkt_hdr)
	{
	# Distill the region from the seq number.
	local region = pkt$tcp$seq / interregion_distance;

	# Get packet info and update global counters.
	local cid = [$orig_h = pkt$ip$src, $orig_p = pkt$tcp$sport,
			$resp_h = pkt$ip$dst, $resp_p = pkt$tcp$dport];

	if ( cid !in flow_region_info )
		{
		flow_region_info[cid] =
			[$last_region = region, $num_regions = 1,
			 $num_inconsistent = 0];
		return;
		}

	local info = flow_region_info[cid];
	local next_region = (info$last_region + 1) % number_of_regions;

	if ( region == next_region )
		{ # flow seen in the next region
		info$last_region = region;
		++info$num_regions;
		}

	else if ( region == info$last_region )
		{ # flow seen in the same region, ignore
		}
	else 
		{
		# Flow seen in another region (not the next one).
		info$last_region = region;
		info$num_regions = 1;	# restart accounting
		++info$num_inconsistent;
		}
	}


# Glue the filter into the secondary filter hookup.
global packet_event_filter = get_event_filter(number_of_regions, region_size);
redef secondary_filters += { [packet_event_filter] = packet_event };