Changed netstats (packet loss) handling to script-land.

- Removed the net_stats_update event.
- Created a net_stats function for building and retrieving the
  current network statistics.
- Removed the internal timer for firing the net_stats_update event
  along with the global heartbeat_interval variable.
- Updated the netstats script to use the new BiF.
- Updated the stats script to use the new BiF.
This commit is contained in:
Seth Hall 2011-06-17 09:02:50 -04:00
parent 871eff9f90
commit c60015af22
10 changed files with 75 additions and 174 deletions

View file

@ -105,6 +105,14 @@ type SYN_packet: record {
SACK_OK: bool; SACK_OK: bool;
}; };
## This record is used for grabbing packet capturing information from
## the core with the :bro:id:`net_stats` BiF. All counts are cumulative.
type NetStats: record {
pkts_recvd: count &default=0; ##< Packets received by Bro.
pkts_dropped: count &default=0; ##< Packets dropped.
pkts_link: count &default=0; ##< Packets seen on the link (not always available).
};
type bro_resources: record { type bro_resources: record {
version: string; # Bro version string version: string; # Bro version string
debug: bool; # true if compiled with --enable-debug debug: bool; # true if compiled with --enable-debug
@ -606,17 +614,7 @@ global discarder_check_udp: function(i: ip_hdr, u: udp_hdr, d: string): bool;
global discarder_check_icmp: function(i: ip_hdr, ih: icmp_hdr): bool; global discarder_check_icmp: function(i: ip_hdr, ih: icmp_hdr): bool;
# End of definition of access to packet headers, discarders. # End of definition of access to packet headers, discarders.
type net_stats: record {
# All counts are cumulative.
pkts_recvd: count; # pkts received by Bro
pkts_dropped: count; # pkts dropped
pkts_link: count; # pkts seen on link (not always available)
};
const watchdog_interval = 10 sec &redef; const watchdog_interval = 10 sec &redef;
const heartbeat_interval = 10 sec &redef;
# The maximum number of timers to expire after processing each new # The maximum number of timers to expire after processing each new
# packet. The value trades off spreading out the timer expiration load # packet. The value trades off spreading out the timer expiration load

View file

@ -6,29 +6,27 @@ redef enum Notice += {
DroppedPackets, # Bro reported packets dropped by the packet filter DroppedPackets, # Bro reported packets dropped by the packet filter
}; };
global last_stat: net_stats; const stats_collection_interval = 10secs;
global last_stat_time: time;
global have_stats = F;
event net_stats_update(t: time, ns: net_stats) event net_stats_update(last_stat: NetStats)
{ {
if ( have_stats ) local ns = net_stats();
local new_dropped = ns$pkts_dropped - last_stat$pkts_dropped;
if ( new_dropped > 0 )
{ {
local new_dropped = ns$pkts_dropped - last_stat$pkts_dropped; local new_recvd = ns$pkts_recvd - last_stat$pkts_recvd;
if ( new_dropped > 0 ) local new_link = ns$pkts_link - last_stat$pkts_link;
{ NOTICE([$note=DroppedPackets,
local new_recvd = ns$pkts_recvd - last_stat$pkts_recvd; $msg=fmt("%d packets dropped after filtering, %d received%s",
local new_link = ns$pkts_link - last_stat$pkts_link; new_dropped, new_recvd + new_dropped,
NOTICE([$note=DroppedPackets, new_link != 0 ?
$msg=fmt("%d packets dropped after filtering, %d received%s", fmt(", %d on link", new_link) : "")]);
new_dropped, new_recvd + new_dropped,
new_link != 0 ?
fmt(", %d on link", new_link) : "")]);
}
} }
else
have_stats = T;
last_stat = ns; schedule stats_collection_interval { net_stats_update(ns) };
last_stat_time = t; }
event bro_init()
{
schedule stats_collection_interval { net_stats_update(net_stats()) };
} }

View file

@ -15,58 +15,47 @@ redef notice_action_filters += {
[[ResourceStats, OfflineResourceStats]] = file_notice [[ResourceStats, OfflineResourceStats]] = file_notice
}; };
global last_stats_time = current_time();
global last_stats_CPU_time =
resource_usage()$user_time + resource_usage()$system_time;
# Global to store the last net_stats object received.
global last_packet_stat: net_stats;
# Globals to store the results between reporting intervals
global stat_packets_received = 0;
global stat_packets_dropped = 0;
global stat_packets_link = 0;
global last_packets_processed = 0;
global last_events_dispatched = 0;
global last_events_queued = 0;
# Interval in which the results are sent as a notice. If this is less # Interval in which the results are sent as a notice. If this is less
# than heartbeat_interval, then it is set to heartbeat_interval, since # than heartbeat_interval, then it is set to heartbeat_interval, since
# some of the reported statistics are only gathered via the heartbeat. # some of the reported statistics are only gathered via the heartbeat.
global stats_report_interval = 10 sec &redef; global stats_report_interval = 10 sec &redef;
event check_stats() event check_stats(last_time: time, last_ns: NetStats, last_res: bro_resources)
{ {
local now = current_time(); local now = current_time();
local lag = now - network_time(); local lag = now - network_time();
local report_delta = now - last_stats_time; local report_delta = now - last_time;
local res = resource_usage(); local res = resource_usage();
local mem = res$mem; local ns = net_stats();
local total_CPU_time = res$user_time + res$system_time; local total_CPU_time = res$user_time + res$system_time;
local CPU_util = (total_CPU_time - last_stats_CPU_time) / report_delta; local last_CPU_time = last_res$user_time + last_res$system_time;
local CPU_util = ((total_CPU_time - last_CPU_time) / report_delta) * 100.0;
local pkts_recvd = ns$pkts_recvd - last_ns$pkts_recvd;
local pkts_dropped = ns$pkts_dropped - last_ns$pkts_dropped;
local pkts_link = ns$pkts_link - last_ns$pkts_link;
if ( bro_is_terminating() ) if ( bro_is_terminating() )
# No more stats will be written or scheduled when Bro is # No more stats will be written or scheduled when Bro is
# shutting down. # shutting down.
return; return;
local delta_pkts_processed = res$num_packets - last_packets_processed; local delta_pkts_processed = res$num_packets - last_res$num_packets;
local delta_events = res$num_events_dispatched - last_events_dispatched; local delta_events = res$num_events_dispatched - last_res$num_events_dispatched;
local delta_queued = res$num_events_queued - last_events_queued; local delta_queued = res$num_events_queued - last_res$num_events_queued;
local stat_msg = local stat_msg =
fmt("mem=%dMB pkts_proc=%d events_proc=%d events_queued=%d", fmt("mem=%dMB pkts_proc=%d events_proc=%d events_queued=%d",
mem / 1000000, delta_pkts_processed, res$mem / 1000000, delta_pkts_processed,
delta_events, delta_queued); delta_events, delta_queued);
if ( reading_live_traffic() ) if ( reading_live_traffic() )
{ {
stat_msg = fmt("%s et=%.2f lag=%fsec util=%.01f%% pkts_rcv=%d pkts_drp=%d pkts_link=%d", stat_msg = fmt("%s et=%.2f lag=%fsec util=%.01f%% pkts_rcv=%d pkts_drp=%d pkts_link=%d",
stat_msg, report_delta, lag, CPU_util * 100.0, stat_msg, report_delta, lag, CPU_util,
stat_packets_received, stat_packets_dropped, pkts_recvd, pkts_dropped, pkts_link);
stat_packets_link);
NOTICE([$note=ResourceStats, $msg=stat_msg]); NOTICE([$note=ResourceStats, $msg=stat_msg]);
} }
@ -77,57 +66,16 @@ event check_stats()
{ {
# Remote communication only. # Remote communication only.
stat_msg = fmt("mem=%dMB events_proc=%d events_queued=%d lag=%fsec util=%.01f%%", stat_msg = fmt("mem=%dMB events_proc=%d events_queued=%d lag=%fsec util=%.01f%%",
mem / 1000000, delta_events, delta_queued, res$mem / 1000000, delta_events, delta_queued,
lag, CPU_util * 100.0 ); lag, CPU_util);
NOTICE([$note=ResourceStats, $msg=stat_msg]); NOTICE([$note=ResourceStats, $msg=stat_msg]);
} }
last_stats_time = now; print "did stats!";
last_stats_CPU_time = total_CPU_time; schedule stats_report_interval { check_stats(now, ns, res) };
last_packets_processed = res$num_packets;
last_events_dispatched = res$num_events_dispatched;
last_events_queued = res$num_events_queued;
stat_packets_received = 0;
stat_packets_dropped = 0;
schedule stats_report_interval { check_stats() };
}
event net_stats_update(t: time, ns: net_stats)
{
if ( ns$pkts_recvd > last_packet_stat$pkts_recvd )
stat_packets_received +=
ns$pkts_recvd - last_packet_stat$pkts_recvd;
if ( ns$pkts_dropped > last_packet_stat$pkts_dropped )
stat_packets_dropped +=
ns$pkts_dropped - last_packet_stat$pkts_dropped;
if ( ns$pkts_link > last_packet_stat$pkts_link )
stat_packets_link += ns$pkts_link - last_packet_stat$pkts_link;
last_packet_stat = ns;
}
event start_check_stats()
{
# Can't start reporting data until network_time() is up.
local zero_time: time = 0;
if ( network_time() > zero_time )
schedule stats_report_interval { check_stats() };
else
schedule stats_report_interval { start_check_stats() };
} }
event bro_init() event bro_init()
{ {
last_packet_stat$pkts_recvd = last_packet_stat$pkts_dropped = schedule stats_report_interval { check_stats(current_time(), net_stats(), resource_usage()) };
last_packet_stat$pkts_link = 0;
if ( stats_report_interval < heartbeat_interval )
stats_report_interval = heartbeat_interval;
schedule stats_report_interval { start_check_stats() };
} }

View file

@ -514,6 +514,7 @@ void init_builtin_funcs()
{ {
ftp_port = internal_type("ftp_port")->AsRecordType(); ftp_port = internal_type("ftp_port")->AsRecordType();
bro_resources = internal_type("bro_resources")->AsRecordType(); bro_resources = internal_type("bro_resources")->AsRecordType();
NetStats = internal_type("NetStats")->AsRecordType();
matcher_stats = internal_type("matcher_stats")->AsRecordType(); matcher_stats = internal_type("matcher_stats")->AsRecordType();
var_sizes = internal_type("var_sizes")->AsTableType(); var_sizes = internal_type("var_sizes")->AsTableType();
gap_info = internal_type("gap_info")->AsRecordType(); gap_info = internal_type("gap_info")->AsRecordType();

View file

@ -19,10 +19,7 @@ RecordType* signature_state;
EnumType* transport_proto; EnumType* transport_proto;
TableType* string_set; TableType* string_set;
RecordType* net_stats;
int watchdog_interval; int watchdog_interval;
double heartbeat_interval;
int max_timer_expires; int max_timer_expires;
int max_remote_events_processed; int max_remote_events_processed;
@ -407,10 +404,7 @@ void init_net_var()
ntp_session_timeout = opt_internal_double("ntp_session_timeout"); ntp_session_timeout = opt_internal_double("ntp_session_timeout");
rpc_timeout = opt_internal_double("rpc_timeout"); rpc_timeout = opt_internal_double("rpc_timeout");
net_stats = internal_type("net_stats")->AsRecordType();
watchdog_interval = int(opt_internal_double("watchdog_interval")); watchdog_interval = int(opt_internal_double("watchdog_interval"));
heartbeat_interval = opt_internal_double("heartbeat_interval");
max_timer_expires = opt_internal_int("max_timer_expires"); max_timer_expires = opt_internal_int("max_timer_expires");
max_remote_events_processed = max_remote_events_processed =

View file

@ -22,10 +22,7 @@ extern RecordType* pcap_packet;
extern EnumType* transport_proto; extern EnumType* transport_proto;
extern TableType* string_set; extern TableType* string_set;
extern RecordType* net_stats;
extern int watchdog_interval; extern int watchdog_interval;
extern double heartbeat_interval;
extern int max_timer_expires; extern int max_timer_expires;
extern int max_remote_events_processed; extern int max_remote_events_processed;

View file

@ -44,27 +44,6 @@ enum NetBIOS_Service {
NetSessions* sessions; NetSessions* sessions;
class NetworkTimer : public Timer {
public:
NetworkTimer(NetSessions* arg_sess, double arg_t)
: Timer(arg_t, TIMER_NETWORK)
{ sess = arg_sess; }
void Dispatch(double t, int is_expire);
protected:
NetSessions* sess;
};
void NetworkTimer::Dispatch(double t, int is_expire)
{
if ( is_expire )
return;
sess->HeartBeat(t);
}
void TimerMgrExpireTimer::Dispatch(double t, int is_expire) void TimerMgrExpireTimer::Dispatch(double t, int is_expire)
{ {
if ( mgr->LastAdvance() + timer_mgr_inactivity_timeout < timer_mgr->Time() ) if ( mgr->LastAdvance() + timer_mgr_inactivity_timeout < timer_mgr->Time() )
@ -106,9 +85,6 @@ NetSessions::NetSessions()
udp_conns.SetDeleteFunc(bro_obj_delete_func); udp_conns.SetDeleteFunc(bro_obj_delete_func);
fragments.SetDeleteFunc(bro_obj_delete_func); fragments.SetDeleteFunc(bro_obj_delete_func);
if ( (reading_live || pseudo_realtime) && net_stats_update )
timer_mgr->Add(new NetworkTimer(this, 1.0));
if ( stp_correlate_pair ) if ( stp_correlate_pair )
stp_manager = new SteppingStoneManager(); stp_manager = new SteppingStoneManager();
else else
@ -1085,39 +1061,6 @@ void NetSessions::Drain()
ExpireTimerMgrs(); ExpireTimerMgrs();
} }
void NetSessions::HeartBeat(double t)
{
unsigned int recv = 0;
unsigned int drop = 0;
unsigned int link = 0;
loop_over_list(pkt_srcs, i)
{
PktSrc* ps = pkt_srcs[i];
struct PktSrc::Stats stat;
ps->Statistics(&stat);
recv += stat.received;
drop += stat.dropped;
link += stat.link;
}
val_list* vl = new val_list;
vl->append(new Val(t, TYPE_TIME));
RecordVal* ns = new RecordVal(net_stats);
ns->Assign(0, new Val(recv, TYPE_COUNT));
ns->Assign(1, new Val(drop, TYPE_COUNT));
ns->Assign(2, new Val(link, TYPE_COUNT));
vl->append(ns);
mgr.QueueEvent(net_stats_update, vl);
timer_mgr->Add(new NetworkTimer(this, t + heartbeat_interval));
}
void NetSessions::GetStats(SessionStats& s) const void NetSessions::GetStats(SessionStats& s) const
{ {
s.num_TCP_conns = tcp_conns.Length(); s.num_TCP_conns = tcp_conns.Length();

View file

@ -105,9 +105,6 @@ public:
// that are still active. // that are still active.
void Drain(); void Drain();
// Called periodically to generate statistics reports.
void HeartBeat(double t);
void GetStats(SessionStats& s) const; void GetStats(SessionStats& s) const;
void Weird(const char* name, void Weird(const char* name,

View file

@ -14,6 +14,7 @@
using namespace std; using namespace std;
RecordType* ftp_port; RecordType* ftp_port;
RecordType* NetStats;
RecordType* bro_resources; RecordType* bro_resources;
RecordType* matcher_stats; RecordType* matcher_stats;
TableType* var_sizes; TableType* var_sizes;
@ -1431,6 +1432,31 @@ function bytestring_to_hexstr%(bytestring: string%): string
extern const char* bro_version(); extern const char* bro_version();
%%} %%}
function net_stats%(%): NetStats
%{
unsigned int recv = 0;
unsigned int drop = 0;
unsigned int link = 0;
loop_over_list(pkt_srcs, i)
{
PktSrc* ps = pkt_srcs[i];
struct PktSrc::Stats stat;
ps->Statistics(&stat);
recv += stat.received;
drop += stat.dropped;
link += stat.link;
}
RecordVal* ns = new RecordVal(NetStats);
ns->Assign(0, new Val(recv, TYPE_COUNT));
ns->Assign(1, new Val(drop, TYPE_COUNT));
ns->Assign(2, new Val(link, TYPE_COUNT));
return ns;
%}
function resource_usage%(%): bro_resources function resource_usage%(%): bro_resources
%{ %{
struct rusage r; struct rusage r;

View file

@ -52,7 +52,6 @@ event icmp_echo_request%(c: connection, icmp: icmp_conn, id: count, seq: count,
event icmp_echo_reply%(c: connection, icmp: icmp_conn, id: count, seq: count, payload: string%); event icmp_echo_reply%(c: connection, icmp: icmp_conn, id: count, seq: count, payload: string%);
event icmp_unreachable%(c: connection, icmp: icmp_conn, code: count, context: icmp_context%); event icmp_unreachable%(c: connection, icmp: icmp_conn, code: count, context: icmp_context%);
event icmp_time_exceeded%(c: connection, icmp: icmp_conn, code: count, context: icmp_context%); event icmp_time_exceeded%(c: connection, icmp: icmp_conn, code: count, context: icmp_context%);
event net_stats_update%(t: time, ns: net_stats%);
event conn_stats%(c: connection, os: endpoint_stats, rs: endpoint_stats%); event conn_stats%(c: connection, os: endpoint_stats, rs: endpoint_stats%);
event conn_weird%(name: string, c: connection%); event conn_weird%(name: string, c: connection%);
event conn_weird_addl%(name: string, c: connection, addl: string%); event conn_weird_addl%(name: string, c: connection, addl: string%);