mirror of
https://github.com/zeek/zeek.git
synced 2025-10-05 08:08:19 +00:00
Merge remote-tracking branch 'origin/topic/seth/stats-improvement'
(Cleaned up some code a little bit.) * origin/topic/seth/stats-improvement: Fixing tests for stats improvements Rename the reporting interval variable for stats. Removing more broken functionality due to changed stats apis. Removing some references to resource_usage() Removing Broker stats, it was broken and incomplete. Fixing default stats collection interval to every 5 minutes. Add DNS stats to the stats.log Small stats script tweaks and beginning broker stats. Continued stats cleanup and extension. More stats collection extensions. More stats improvements Slight change to Mach API for collecting memory usage. Fixing some small mistakes. Updating the cmake submodule for the stats updates. Fix memory usage collection on Mac OS X. Cleaned up stats collection. BIT-1581 #merged
This commit is contained in:
commit
00d94f1bbc
53 changed files with 887 additions and 498 deletions
|
@ -18,7 +18,7 @@ export {
|
|||
|
||||
event net_stats_update(last_stat: NetStats)
|
||||
{
|
||||
local ns = net_stats();
|
||||
local ns = get_net_stats();
|
||||
local new_dropped = ns$pkts_dropped - last_stat$pkts_dropped;
|
||||
if ( new_dropped > 0 )
|
||||
{
|
||||
|
@ -38,5 +38,5 @@ event bro_init()
|
|||
# Since this currently only calculates packet drops, let's skip the stats
|
||||
# collection if reading traces.
|
||||
if ( ! reading_traces() )
|
||||
schedule stats_collection_interval { net_stats_update(net_stats()) };
|
||||
schedule stats_collection_interval { net_stats_update(get_net_stats()) };
|
||||
}
|
||||
|
|
|
@ -474,64 +474,127 @@ type NetStats: record {
|
|||
bytes_recvd: count &default=0; ##< Bytes received by Bro.
|
||||
};
|
||||
|
||||
## Statistics about Bro's resource consumption.
|
||||
type ConnStats: record {
|
||||
total_conns: count; ##<
|
||||
current_conns: count; ##<
|
||||
current_conns_extern: count; ##<
|
||||
sess_current_conns: count; ##<
|
||||
|
||||
num_packets: count;
|
||||
num_fragments: count;
|
||||
max_fragments: count;
|
||||
|
||||
num_tcp_conns: count; ##< Current number of TCP connections in memory.
|
||||
max_tcp_conns: count; ##< Maximum number of concurrent TCP connections so far.
|
||||
cumulative_tcp_conns: count; ##< Total number of TCP connections so far.
|
||||
|
||||
num_udp_conns: count; ##< Current number of UDP flows in memory.
|
||||
max_udp_conns: count; ##< Maximum number of concurrent UDP flows so far.
|
||||
cumulative_udp_conns: count; ##< Total number of UDP flows so far.
|
||||
|
||||
num_icmp_conns: count; ##< Current number of ICMP flows in memory.
|
||||
max_icmp_conns: count; ##< Maximum number of concurrent ICMP flows so far.
|
||||
cumulative_icmp_conns: count; ##< Total number of ICMP flows so far.
|
||||
|
||||
killed_by_inactivity: count;
|
||||
};
|
||||
|
||||
## Statistics about Bro's process.
|
||||
##
|
||||
## .. bro:see:: resource_usage
|
||||
## .. bro:see:: get_proc_stats
|
||||
##
|
||||
## .. note:: All process-level values refer to Bro's main process only, not to
|
||||
## the child process it spawns for doing communication.
|
||||
type bro_resources: record {
|
||||
version: string; ##< Bro version string.
|
||||
debug: bool; ##< True if compiled with --enable-debug.
|
||||
start_time: time; ##< Start time of process.
|
||||
real_time: interval; ##< Elapsed real time since Bro started running.
|
||||
user_time: interval; ##< User CPU seconds.
|
||||
system_time: interval; ##< System CPU seconds.
|
||||
mem: count; ##< Maximum memory consumed, in KB.
|
||||
minor_faults: count; ##< Page faults not requiring actual I/O.
|
||||
major_faults: count; ##< Page faults requiring actual I/O.
|
||||
num_swap: count; ##< Times swapped out.
|
||||
blocking_input: count; ##< Blocking input operations.
|
||||
blocking_output: count; ##< Blocking output operations.
|
||||
num_context: count; ##< Number of involuntary context switches.
|
||||
type ProcStats: record {
|
||||
debug: bool; ##< True if compiled with --enable-debug.
|
||||
start_time: time; ##< Start time of process.
|
||||
real_time: interval; ##< Elapsed real time since Bro started running.
|
||||
user_time: interval; ##< User CPU seconds.
|
||||
system_time: interval; ##< System CPU seconds.
|
||||
mem: count; ##< Maximum memory consumed, in KB.
|
||||
minor_faults: count; ##< Page faults not requiring actual I/O.
|
||||
major_faults: count; ##< Page faults requiring actual I/O.
|
||||
num_swap: count; ##< Times swapped out.
|
||||
blocking_input: count; ##< Blocking input operations.
|
||||
blocking_output: count; ##< Blocking output operations.
|
||||
num_context: count; ##< Number of involuntary context switches.
|
||||
};
|
||||
|
||||
num_TCP_conns: count; ##< Current number of TCP connections in memory.
|
||||
num_UDP_conns: count; ##< Current number of UDP flows in memory.
|
||||
num_ICMP_conns: count; ##< Current number of ICMP flows in memory.
|
||||
num_fragments: count; ##< Current number of fragments pending reassembly.
|
||||
num_packets: count; ##< Total number of packets processed to date.
|
||||
num_timers: count; ##< Current number of pending timers.
|
||||
num_events_queued: count; ##< Total number of events queued so far.
|
||||
num_events_dispatched: count; ##< Total number of events dispatched so far.
|
||||
|
||||
max_TCP_conns: count; ##< Maximum number of concurrent TCP connections so far.
|
||||
max_UDP_conns: count; ##< Maximum number of concurrent UDP connections so far.
|
||||
max_ICMP_conns: count; ##< Maximum number of concurrent ICMP connections so far.
|
||||
max_fragments: count; ##< Maximum number of concurrently buffered fragments so far.
|
||||
max_timers: count; ##< Maximum number of concurrent timers pending so far.
|
||||
type EventStats: record {
|
||||
queued: count; ##< Total number of events queued so far.
|
||||
dispatched: count; ##< Total number of events dispatched so far.
|
||||
};
|
||||
|
||||
## Summary statistics of all regular expression matchers.
|
||||
##
|
||||
## .. bro:see:: get_reassembler_stats
|
||||
type ReassemblerStats: record {
|
||||
file_size: count; ##< Byte size of File reassembly tracking.
|
||||
frag_size: count; ##< Byte size of Fragment reassembly tracking.
|
||||
tcp_size: count; ##< Byte size of TCP reassembly tracking.
|
||||
unknown_size: count; ##< Byte size of reassembly tracking for unknown purposes.
|
||||
};
|
||||
|
||||
## Statistics of all regular expression matchers.
|
||||
##
|
||||
## .. bro:see:: get_matcher_stats
|
||||
type matcher_stats: record {
|
||||
matchers: count; ##< Number of distinct RE matchers.
|
||||
dfa_states: count; ##< Number of DFA states across all matchers.
|
||||
computed: count; ##< Number of computed DFA state transitions.
|
||||
mem: count; ##< Number of bytes used by DFA states.
|
||||
hits: count; ##< Number of cache hits.
|
||||
misses: count; ##< Number of cache misses.
|
||||
avg_nfa_states: count; ##< Average number of NFA states across all matchers.
|
||||
type MatcherStats: record {
|
||||
matchers: count; ##< Number of distinct RE matchers.
|
||||
nfa_states: count; ##< Number of NFA states across all matchers.
|
||||
dfa_states: count; ##< Number of DFA states across all matchers.
|
||||
computed: count; ##< Number of computed DFA state transitions.
|
||||
mem: count; ##< Number of bytes used by DFA states.
|
||||
hits: count; ##< Number of cache hits.
|
||||
misses: count; ##< Number of cache misses.
|
||||
};
|
||||
|
||||
## Statistics of timers.
|
||||
##
|
||||
## .. bro:see:: get_timer_stats
|
||||
type TimerStats: record {
|
||||
current: count; ##< Current number of pending timers.
|
||||
max: count; ##< Maximum number of concurrent timers pending so far.
|
||||
cumulative: count; ##< Cumulative number of timers scheduled.
|
||||
};
|
||||
|
||||
## Statistics of file analysis.
|
||||
##
|
||||
## .. bro:see:: get_file_analysis_stats
|
||||
type FileAnalysisStats: record {
|
||||
current: count; ##< Current number of files being analyzed.
|
||||
max: count; ##< Maximum number of concurrent files so far.
|
||||
cumulative: count; ##< Cumulative number of files analyzed.
|
||||
};
|
||||
|
||||
## Statistics related to Bro's active use of DNS. These numbers are
|
||||
## about Bro performing DNS queries on it's own, not traffic
|
||||
## being seen.
|
||||
##
|
||||
## .. bro:see:: get_dns_stats
|
||||
type DNSStats: record {
|
||||
requests: count; ##< Number of DNS requests made
|
||||
successful: count; ##< Number of successful DNS replies.
|
||||
failed: count; ##< Number of DNS reply failures.
|
||||
pending: count; ##< Current pending queries.
|
||||
cached_hosts: count; ##< Number of cached hosts.
|
||||
cached_addresses: count; ##< Number of cached addresses.
|
||||
};
|
||||
|
||||
## Statistics about number of gaps in TCP connections.
|
||||
##
|
||||
## .. bro:see:: gap_report get_gap_summary
|
||||
type gap_info: record {
|
||||
ack_events: count; ##< How many ack events *could* have had gaps.
|
||||
ack_bytes: count; ##< How many bytes those covered.
|
||||
gap_events: count; ##< How many *did* have gaps.
|
||||
gap_bytes: count; ##< How many bytes were missing in the gaps.
|
||||
## .. bro:see:: get_gap_stats
|
||||
type GapStats: record {
|
||||
ack_events: count; ##< How many ack events *could* have had gaps.
|
||||
ack_bytes: count; ##< How many bytes those covered.
|
||||
gap_events: count; ##< How many *did* have gaps.
|
||||
gap_bytes: count; ##< How many bytes were missing in the gaps.
|
||||
};
|
||||
|
||||
## Statistics about threads.
|
||||
##
|
||||
## .. bro:see:: get_thread_stats
|
||||
type ThreadStats: record {
|
||||
num_threads: count;
|
||||
};
|
||||
|
||||
## Deprecated.
|
||||
|
@ -3435,23 +3498,17 @@ global pkt_profile_file: file &redef;
|
|||
## .. bro:see:: load_sample
|
||||
global load_sample_freq = 20 &redef;
|
||||
|
||||
## Rate at which to generate :bro:see:`gap_report` events assessing to what
|
||||
## degree the measurement process appears to exhibit loss.
|
||||
##
|
||||
## .. bro:see:: gap_report
|
||||
const gap_report_freq = 1.0 sec &redef;
|
||||
|
||||
## Whether to attempt to automatically detect SYN/FIN/RST-filtered trace
|
||||
## and not report missing segments for such connections.
|
||||
## If this is enabled, then missing data at the end of connections may not
|
||||
## be reported via :bro:see:`content_gap`.
|
||||
const detect_filtered_trace = F &redef;
|
||||
|
||||
## Whether we want :bro:see:`content_gap` and :bro:see:`gap_report` for partial
|
||||
## Whether we want :bro:see:`content_gap` and :bro:see:`get_gap_summary` for partial
|
||||
## connections. A connection is partial if it is missing a full handshake. Note
|
||||
## that gap reports for partial connections might not be reliable.
|
||||
##
|
||||
## .. bro:see:: content_gap gap_report partial_connection
|
||||
## .. bro:see:: content_gap get_gap_summary partial_connection
|
||||
const report_gaps_for_partial = F &redef;
|
||||
|
||||
## Flag to prevent Bro from exiting automatically when input is exhausted.
|
||||
|
|
|
@ -26,7 +26,7 @@ event ChecksumOffloading::check()
|
|||
if ( done )
|
||||
return;
|
||||
|
||||
local pkts_recvd = net_stats()$pkts_recvd;
|
||||
local pkts_recvd = get_net_stats()$pkts_recvd;
|
||||
local bad_ip_checksum_pct = (pkts_recvd != 0) ? (bad_ip_checksums*1.0 / pkts_recvd*1.0) : 0;
|
||||
local bad_tcp_checksum_pct = (pkts_recvd != 0) ? (bad_tcp_checksums*1.0 / pkts_recvd*1.0) : 0;
|
||||
local bad_udp_checksum_pct = (pkts_recvd != 0) ? (bad_udp_checksums*1.0 / pkts_recvd*1.0) : 0;
|
||||
|
|
|
@ -22,44 +22,24 @@ event Control::id_value_request(id: string)
|
|||
|
||||
event Control::peer_status_request()
|
||||
{
|
||||
local status = "";
|
||||
for ( p in Communication::nodes )
|
||||
{
|
||||
local peer = Communication::nodes[p];
|
||||
if ( ! peer$connected )
|
||||
next;
|
||||
|
||||
local res = resource_usage();
|
||||
status += fmt("%.6f peer=%s host=%s events_in=%s events_out=%s ops_in=%s ops_out=%s bytes_in=? bytes_out=?\n",
|
||||
network_time(),
|
||||
peer$peer$descr, peer$host,
|
||||
res$num_events_queued, res$num_events_dispatched,
|
||||
res$blocking_input, res$blocking_output);
|
||||
}
|
||||
|
||||
event Control::peer_status_response(status);
|
||||
}
|
||||
|
||||
event Control::net_stats_request()
|
||||
{
|
||||
local ns = net_stats();
|
||||
local reply = fmt("%.6f recvd=%d dropped=%d link=%d\n", network_time(),
|
||||
ns$pkts_recvd, ns$pkts_dropped, ns$pkts_link);
|
||||
event Control::net_stats_response(reply);
|
||||
}
|
||||
|
||||
|
||||
event Control::configuration_update_request()
|
||||
{
|
||||
# Generate the alias event.
|
||||
# Generate the alias event.
|
||||
event Control::configuration_update();
|
||||
|
||||
|
||||
# Don't need to do anything in particular here, it's just indicating that
|
||||
# the configuration is going to be updated. This event could be handled
|
||||
# by other scripts if they need to do some ancilliary processing if
|
||||
# by other scripts if they need to do some ancilliary processing if
|
||||
# redef-able consts are modified at runtime.
|
||||
event Control::configuration_update_response();
|
||||
}
|
||||
|
||||
|
||||
event Control::shutdown_request()
|
||||
{
|
||||
# Send the acknowledgement event.
|
||||
|
|
|
@ -56,7 +56,7 @@ event CaptureLoss::take_measurement(last_ts: time, last_acks: count, last_gaps:
|
|||
}
|
||||
|
||||
local now = network_time();
|
||||
local g = get_gap_summary();
|
||||
local g = get_gap_stats();
|
||||
local acks = g$ack_events - last_acks;
|
||||
local gaps = g$gap_events - last_gaps;
|
||||
local pct_lost = (acks == 0) ? 0.0 : (100 * (1.0 * gaps) / (1.0 * acks));
|
||||
|
|
|
@ -1,6 +1,4 @@
|
|||
##! Log memory/packet/lag statistics. Differs from
|
||||
##! :doc:`/scripts/policy/misc/profiling.bro` in that this
|
||||
##! is lighter-weight (much less info, and less load to generate).
|
||||
##! Log memory/packet/lag statistics.
|
||||
|
||||
@load base/frameworks/notice
|
||||
|
||||
|
@ -10,7 +8,7 @@ export {
|
|||
redef enum Log::ID += { LOG };
|
||||
|
||||
## How often stats are reported.
|
||||
const stats_report_interval = 1min &redef;
|
||||
const report_interval = 5min &redef;
|
||||
|
||||
type Info: record {
|
||||
## Timestamp for the measurement.
|
||||
|
@ -21,27 +19,63 @@ export {
|
|||
mem: count &log;
|
||||
## Number of packets processed since the last stats interval.
|
||||
pkts_proc: count &log;
|
||||
## Number of events processed since the last stats interval.
|
||||
events_proc: count &log;
|
||||
## Number of events that have been queued since the last stats
|
||||
## interval.
|
||||
events_queued: count &log;
|
||||
|
||||
## Lag between the wall clock and packet timestamps if reading
|
||||
## live traffic.
|
||||
lag: interval &log &optional;
|
||||
## Number of packets received since the last stats interval if
|
||||
## Number of bytes received since the last stats interval if
|
||||
## reading live traffic.
|
||||
pkts_recv: count &log &optional;
|
||||
bytes_recv: count &log;
|
||||
|
||||
## Number of packets dropped since the last stats interval if
|
||||
## reading live traffic.
|
||||
pkts_dropped: count &log &optional;
|
||||
## Number of packets seen on the link since the last stats
|
||||
## interval if reading live traffic.
|
||||
pkts_link: count &log &optional;
|
||||
## Number of bytes received since the last stats interval if
|
||||
## reading live traffic.
|
||||
bytes_recv: count &log &optional;
|
||||
## Lag between the wall clock and packet timestamps if reading
|
||||
## live traffic.
|
||||
pkt_lag: interval &log &optional;
|
||||
|
||||
## Number of events processed since the last stats interval.
|
||||
events_proc: count &log;
|
||||
## Number of events that have been queued since the last stats
|
||||
## interval.
|
||||
events_queued: count &log;
|
||||
|
||||
## TCP connections currently in memory.
|
||||
active_tcp_conns: count &log;
|
||||
## UDP connections currently in memory.
|
||||
active_udp_conns: count &log;
|
||||
## ICMP connections currently in memory.
|
||||
active_icmp_conns: count &log;
|
||||
|
||||
## TCP connections seen since last stats interval.
|
||||
tcp_conns: count &log;
|
||||
## UDP connections seen since last stats interval.
|
||||
udp_conns: count &log;
|
||||
## ICMP connections seen since last stats interval.
|
||||
icmp_conns: count &log;
|
||||
|
||||
## Number of timers scheduled since last stats interval.
|
||||
timers: count &log;
|
||||
## Current number of scheduled timers.
|
||||
active_timers: count &log;
|
||||
|
||||
## Number of files seen since last stats interval.
|
||||
files: count &log;
|
||||
## Current number of files actively being seen.
|
||||
active_files: count &log;
|
||||
|
||||
## Number of DNS requests seen since last stats interval.
|
||||
dns_requests: count &log;
|
||||
## Current number of DNS requests awaiting a reply.
|
||||
active_dns_requests: count &log;
|
||||
|
||||
## Current size of TCP data in reassembly.
|
||||
reassem_tcp_size: count &log;
|
||||
## Current size of File data in reassembly.
|
||||
reassem_file_size: count &log;
|
||||
## Current size of packet fragment data in reassembly.
|
||||
reassem_frag_size: count &log;
|
||||
## Current size of unkown data in reassembly (this is only PIA buffer right now).
|
||||
reassem_unknown_size: count &log;
|
||||
};
|
||||
|
||||
## Event to catch stats as they are written to the logging stream.
|
||||
|
@ -53,38 +87,69 @@ event bro_init() &priority=5
|
|||
Log::create_stream(Stats::LOG, [$columns=Info, $ev=log_stats, $path="stats"]);
|
||||
}
|
||||
|
||||
event check_stats(last_ts: time, last_ns: NetStats, last_res: bro_resources)
|
||||
event check_stats(then: time, last_ns: NetStats, last_cs: ConnStats, last_ps: ProcStats, last_es: EventStats, last_rs: ReassemblerStats, last_ts: TimerStats, last_fs: FileAnalysisStats, last_ds: DNSStats)
|
||||
{
|
||||
local now = current_time();
|
||||
local ns = net_stats();
|
||||
local res = resource_usage();
|
||||
local nettime = network_time();
|
||||
local ns = get_net_stats();
|
||||
local cs = get_conn_stats();
|
||||
local ps = get_proc_stats();
|
||||
local es = get_event_stats();
|
||||
local rs = get_reassembler_stats();
|
||||
local ts = get_timer_stats();
|
||||
local fs = get_file_analysis_stats();
|
||||
local ds = get_dns_stats();
|
||||
|
||||
if ( bro_is_terminating() )
|
||||
# No more stats will be written or scheduled when Bro is
|
||||
# shutting down.
|
||||
return;
|
||||
|
||||
local info: Info = [$ts=now, $peer=peer_description, $mem=res$mem/1000000,
|
||||
$pkts_proc=res$num_packets - last_res$num_packets,
|
||||
$events_proc=res$num_events_dispatched - last_res$num_events_dispatched,
|
||||
$events_queued=res$num_events_queued - last_res$num_events_queued];
|
||||
local info: Info = [$ts=nettime,
|
||||
$peer=peer_description,
|
||||
$mem=ps$mem/1048576,
|
||||
$pkts_proc=ns$pkts_recvd - last_ns$pkts_recvd,
|
||||
$bytes_recv = ns$bytes_recvd - last_ns$bytes_recvd,
|
||||
|
||||
$active_tcp_conns=cs$num_tcp_conns,
|
||||
$tcp_conns=cs$cumulative_tcp_conns - last_cs$cumulative_tcp_conns,
|
||||
$active_udp_conns=cs$num_udp_conns,
|
||||
$udp_conns=cs$cumulative_udp_conns - last_cs$cumulative_udp_conns,
|
||||
$active_icmp_conns=cs$num_icmp_conns,
|
||||
$icmp_conns=cs$cumulative_icmp_conns - last_cs$cumulative_icmp_conns,
|
||||
|
||||
$reassem_tcp_size=rs$tcp_size,
|
||||
$reassem_file_size=rs$file_size,
|
||||
$reassem_frag_size=rs$frag_size,
|
||||
$reassem_unknown_size=rs$unknown_size,
|
||||
|
||||
$events_proc=es$dispatched - last_es$dispatched,
|
||||
$events_queued=es$queued - last_es$queued,
|
||||
|
||||
$timers=ts$cumulative - last_ts$cumulative,
|
||||
$active_timers=ts$current,
|
||||
|
||||
$files=fs$cumulative - last_fs$cumulative,
|
||||
$active_files=fs$current,
|
||||
|
||||
$dns_requests=ds$requests - last_ds$requests,
|
||||
$active_dns_requests=ds$pending
|
||||
];
|
||||
|
||||
# Someone's going to have to explain what this is and add a field to the Info record.
|
||||
# info$util = 100.0*((ps$user_time + ps$system_time) - (last_ps$user_time + last_ps$system_time))/(now-then);
|
||||
|
||||
if ( reading_live_traffic() )
|
||||
{
|
||||
info$lag = now - network_time();
|
||||
# Someone's going to have to explain what this is and add a field to the Info record.
|
||||
# info$util = 100.0*((res$user_time + res$system_time) - (last_res$user_time + last_res$system_time))/(now-last_ts);
|
||||
info$pkts_recv = ns$pkts_recvd - last_ns$pkts_recvd;
|
||||
info$pkt_lag = current_time() - nettime;
|
||||
info$pkts_dropped = ns$pkts_dropped - last_ns$pkts_dropped;
|
||||
info$pkts_link = ns$pkts_link - last_ns$pkts_link;
|
||||
info$bytes_recv = ns$bytes_recvd - last_ns$bytes_recvd;
|
||||
}
|
||||
|
||||
Log::write(Stats::LOG, info);
|
||||
schedule stats_report_interval { check_stats(now, ns, res) };
|
||||
schedule report_interval { check_stats(nettime, ns, cs, ps, es, rs, ts, fs, ds) };
|
||||
}
|
||||
|
||||
event bro_init()
|
||||
{
|
||||
schedule stats_report_interval { check_stats(current_time(), net_stats(), resource_usage()) };
|
||||
schedule report_interval { check_stats(network_time(), get_net_stats(), get_conn_stats(), get_proc_stats(), get_event_stats(), get_reassembler_stats(), get_timer_stats(), get_file_analysis_stats(), get_dns_stats()) };
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue