From 6db77dc28531d0a2bc10bf2dcc8afe19ed31483e Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Wed, 28 Sep 2011 00:08:53 -0400 Subject: [PATCH] Ported the capture-loss script back into Bro. - I removed the byte loss estimate since everything indicated that it could frequently be inaccurate. - It has it's own logging stream now. Notices are only used when too much loss has been detected (CaptureLoss::Too_Much_Loss) - The gap_report event isn't used anymore. I'm scheduling events to get the same effect by using the get_gap_summary BiF to collect the gap summary on demand. --- scripts/policy/misc/capture-loss.bro | 86 ++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 scripts/policy/misc/capture-loss.bro diff --git a/scripts/policy/misc/capture-loss.bro b/scripts/policy/misc/capture-loss.bro new file mode 100644 index 0000000000..d2c17833d6 --- /dev/null +++ b/scripts/policy/misc/capture-loss.bro @@ -0,0 +1,86 @@ +##! This script logs evidence regarding the degree to which the packet +##! capture process suffers from measurement loss. + +## The loss could be due to overload on the host or NIC performing +## the packet capture or it could even be beyond the host. If you are +## capturing from a switch with a SPAN port, it's very possible that +## the switch itself could be overloaded and dropping packets. +## Reported loss is computed in terms of number of "gap events" (ACKs +## for a sequence number that's above a gap). + +@load base/frameworks/notice +@load base/frameworks/metrics + +module CaptureLoss; + +export { + redef enum Log::ID += { LOG }; + + redef enum Notice::Type += { + ## Report if the detected capture loss exceeds the percentage + ## threshold + Too_Much_Loss + }; + + type Info: record { + ## Timestamp for when the measurement occurred. + ts: time &log; + ## The time delay between this measurement and the last. + ts_delta: interval &log; + ## In the event that there are multiple Bro instances logging + ## to the same host, this distinguishes each peer with it's + ## individual name. + peer: string &log; + ## Number of ACKs seen in the previous measurement interval. + acks: count &log; + ## Number of missed ACKs from the previous measurement interval. + gaps: count &log; + ## Percentage of ACKs seen where the data being ACKed wasn't seen. + percent_lost: double &log; + }; + + ## The interval at which capture loss reports are created. + const watch_interval = 15mins &redef; + + ## The percentage of missed data that is considered "too much" + ## when the :bro:enum:`Too_Much_Loss` notice should be generated. + ## The value is expressed as a double between 0 and 1 with 1 being + ## 100% + const too_much_loss: double = 0.1 &redef; +} + +event CaptureLoss::take_measurement(last_ts: time, last_acks: count, last_gaps: count) + { + if ( last_ts == 0 ) + { + schedule watch_interval { CaptureLoss::take_measurement(network_time(), 0, 0) }; + return; + } + + local now = network_time(); + local g = get_gap_summary(); + local acks = g$ack_events - last_acks; + local gaps = g$gap_events - last_gaps; + local pct_lost = (acks == 0) ? 0.0 : (100 * (1.0 * gaps) / (1.0 * acks)); + local info: Info = [$ts=now, + $ts_delta=now-last_ts, + $peer=peer_description, + $acks=acks, $gaps=gaps, + $percent_lost=pct_lost]; + + if ( pct_lost >= too_much_loss*100 ) + NOTICE([$note=Too_Much_Loss, + $msg=fmt("The capture loss script detected an estimated loss rate above %.1f%%", pct_lost)]); + + Log::write(LOG, info); + schedule watch_interval { CaptureLoss::take_measurement(now, g$ack_events, g$gap_events) }; + } + +event bro_init() &priority=5 + { + Log::create_stream(LOG, [$columns=Info]); + + # We only schedule the event if we are capturing packets. + if ( reading_live_traffic() || reading_traces() ) + schedule watch_interval { CaptureLoss::take_measurement(network_time(), 0, 0) }; + }