From 6db77dc28531d0a2bc10bf2dcc8afe19ed31483e Mon Sep 17 00:00:00 2001
From: Seth Hall <seth@icir.org>
Date: Wed, 28 Sep 2011 00:08:53 -0400
Subject: [PATCH] Ported the capture-loss script back into Bro.

- I removed the byte loss estimate since everything indicated that
  it could frequently be inaccurate.

- It has it's own logging stream now.  Notices are only used
  when too much loss has been detected (CaptureLoss::Too_Much_Loss)

- The gap_report event isn't used anymore.  I'm scheduling events
  to get the same effect by using the get_gap_summary BiF to
  collect the gap summary on demand.
---
 scripts/policy/misc/capture-loss.bro | 86 ++++++++++++++++++++++++++++
 1 file changed, 86 insertions(+)
 create mode 100644 scripts/policy/misc/capture-loss.bro

diff --git a/scripts/policy/misc/capture-loss.bro b/scripts/policy/misc/capture-loss.bro
new file mode 100644
index 0000000000..d2c17833d6
--- /dev/null
+++ b/scripts/policy/misc/capture-loss.bro
@@ -0,0 +1,86 @@
+##! This script logs evidence regarding the degree to which the packet
+##! capture process suffers from measurement loss.  
+
+## The loss could be due to overload on the host or NIC performing 
+## the packet capture or it could even be beyond the host.  If you are 
+## capturing from a switch with a SPAN port, it's very possible that 
+## the switch itself could be overloaded and dropping packets.
+## Reported loss is computed in terms of number of "gap events" (ACKs 
+## for a sequence number that's above a gap).
+
+@load base/frameworks/notice
+@load base/frameworks/metrics
+
+module CaptureLoss;
+
+export {
+	redef enum Log::ID += { LOG };
+	
+	redef enum Notice::Type += {
+		## Report if the detected capture loss exceeds the percentage
+		## threshold 
+		Too_Much_Loss
+	};
+	
+	type Info: record {
+		## Timestamp for when the measurement occurred.
+		ts:           time     &log;
+		## The time delay between this measurement and the last.
+		ts_delta:     interval &log;
+		## In the event that there are multiple Bro instances logging
+		## to the same host, this distinguishes each peer with it's
+		## individual name.
+		peer:         string   &log;
+		## Number of ACKs seen in the previous measurement interval.
+		acks:         count    &log;
+		## Number of missed ACKs from the previous measurement interval.
+		gaps:         count    &log;
+		## Percentage of ACKs seen where the data being ACKed wasn't seen.
+		percent_lost: double   &log;
+	};
+	
+	## The interval at which capture loss reports are created.
+	const watch_interval = 15mins &redef;
+	
+	## The percentage of missed data that is considered "too much" 
+	## when the :bro:enum:`Too_Much_Loss` notice should be generated.
+	## The value is expressed as a double between 0 and 1 with 1 being
+	## 100%
+	const too_much_loss: double = 0.1 &redef;
+}
+
+event CaptureLoss::take_measurement(last_ts: time, last_acks: count, last_gaps: count)
+	{
+	if ( last_ts == 0 )
+		{
+		schedule watch_interval { CaptureLoss::take_measurement(network_time(), 0, 0) };
+		return;
+		}
+	
+	local now = network_time();
+	local g = get_gap_summary();
+	local acks = g$ack_events - last_acks;
+	local gaps = g$gap_events - last_gaps;
+	local pct_lost = (acks == 0) ? 0.0 : (100 * (1.0 * gaps) / (1.0 * acks));
+	local info: Info = [$ts=now,
+	                    $ts_delta=now-last_ts,
+	                    $peer=peer_description,
+	                    $acks=acks, $gaps=gaps, 
+	                    $percent_lost=pct_lost];
+	
+	if ( pct_lost >= too_much_loss*100 )
+		NOTICE([$note=Too_Much_Loss, 
+		        $msg=fmt("The capture loss script detected an estimated loss rate above %.1f%%", pct_lost)]);
+	
+	Log::write(LOG, info);
+	schedule watch_interval { CaptureLoss::take_measurement(now, g$ack_events, g$gap_events) };
+	}
+
+event bro_init() &priority=5
+	{
+	Log::create_stream(LOG, [$columns=Info]);
+
+	# We only schedule the event if we are capturing packets.
+	if ( reading_live_traffic() || reading_traces() )
+		schedule watch_interval { CaptureLoss::take_measurement(network_time(), 0, 0) };
+	}