diff --git a/scripts/base/frameworks/sumstats/plugins/__load__.bro b/scripts/base/frameworks/sumstats/plugins/__load__.bro index 3b2bb553e6..c0ee3a6767 100644 --- a/scripts/base/frameworks/sumstats/plugins/__load__.bro +++ b/scripts/base/frameworks/sumstats/plugins/__load__.bro @@ -1,4 +1,5 @@ @load ./average +@load ./last @load ./max @load ./min @load ./sample diff --git a/scripts/base/frameworks/sumstats/plugins/last.bro b/scripts/base/frameworks/sumstats/plugins/last.bro new file mode 100644 index 0000000000..d0587bde08 --- /dev/null +++ b/scripts/base/frameworks/sumstats/plugins/last.bro @@ -0,0 +1,49 @@ +@load base/frameworks/sumstats +@load base/utils/queue + +module SumStats; + +export { + redef record Reducer += { + ## A number of sample Observations to collect. + samples: count &default=0; + }; + + redef record ResultVal += { + ## This is the queue where samples are maintained. Use the + ## :bro:see:`SumStats::get_samples` function to get a vector of the samples. + samples: Queue::Queue &optional; + }; + + ## Get a vector of sample Observation values from a ResultVal. + global get_samples: function(rv: ResultVal): vector of Observation; +} + +function get_samples(rv: ResultVal): vector of Observation + { + local s: vector of Observation = vector(); + if ( rv?$samples ) + Queue::get_vector(rv$samples, s); + return s; + } + +hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal) + { + if ( r$samples > 0 ) + { + if ( ! rv?$samples ) + rv$samples = Queue::init([$max_len=r$samples]); + Queue::put(rv$samples, obs); + } + } + +hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal) + { + # Merge $samples + if ( rv1?$samples && rv2?$samples ) + result$samples = Queue::merge(rv1$samples, rv2$samples); + else if ( rv1?$samples ) + result$samples = rv1$samples; + else if ( rv2?$samples ) + result$samples = rv2$samples; + } diff --git a/scripts/base/frameworks/sumstats/plugins/sample.bro b/scripts/base/frameworks/sumstats/plugins/sample.bro index dc2f438c79..b04d2cf57c 100644 --- a/scripts/base/frameworks/sumstats/plugins/sample.bro +++ b/scripts/base/frameworks/sumstats/plugins/sample.bro @@ -1,49 +1,117 @@ @load base/frameworks/sumstats/main -@load base/utils/queue module SumStats; export { + redef enum Calculation += { + ## Get uniquely distributed random samples from the observation stream + SAMPLE + }; + redef record Reducer += { ## A number of sample Observations to collect. - samples: count &default=0; + num_samples: count &default=0; }; redef record ResultVal += { - ## This is the queue where samples are maintained. Use the - ## :bro:see:`SumStats::get_samples` function to get a vector of the samples. - samples: Queue::Queue &optional; - }; + ## This is the vector in which the samples are maintained. + sample_vector: vector of Observation &default=vector(); - ## Get a vector of sample Observation values from a ResultVal. - global get_samples: function(rv: ResultVal): vector of Observation; + ## Number of total observed elements. + sample_elements: count &default=0; + }; } -function get_samples(rv: ResultVal): vector of Observation +redef record ResultVal += { + # Internal use only. This is not meant to be publically available + # and just a copy of num_samples from the Reducer. Needed for availability + # in the compose hook. + num_samples: count &default=0; +}; + +hook init_resultval_hook(r: Reducer, rv: ResultVal) { - local s: vector of Observation = vector(); - if ( rv?$samples ) - Queue::get_vector(rv$samples, s); - return s; + if ( SAMPLE in r$apply ) + rv$num_samples = r$num_samples; } +function sample_add_sample(obs:Observation, rv: ResultVal) + { + ++rv$sample_elements; + + if ( |rv$sample_vector| < rv$num_samples ) + rv$sample_vector[|rv$sample_vector|] = obs; + else + { + local ra = rand(rv$sample_elements); + if ( ra < rv$num_samples ) + rv$sample_vector[ra] = obs; + } + + } + hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal) { - if ( r$samples > 0 ) + if ( SAMPLE in r$apply ) { - if ( ! rv?$samples ) - rv$samples = Queue::init([$max_len=r$samples]); - Queue::put(rv$samples, obs); + sample_add_sample(obs, rv); } } hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal) { - # Merge $samples - if ( rv1?$samples && rv2?$samples ) - result$samples = Queue::merge(rv1$samples, rv2$samples); - else if ( rv1?$samples ) - result$samples = rv1$samples; - else if ( rv2?$samples ) - result$samples = rv2$samples; + if ( rv1$num_samples != rv2$num_samples ) + { + Reporter::error("Merging sample sets with differing sizes is not supported"); + return; + } + + local num_samples = rv1$num_samples; + + if ( |rv1$sample_vector| > num_samples || |rv2$sample_vector| > num_samples ) + { + Reporter::error("Sample vector with too many elements. Aborting."); + return; + } + + + if ( |rv1$sample_vector| != num_samples && |rv2$sample_vector| < num_samples ) + { + if ( |rv1$sample_vector| != rv1$sample_elements || |rv2$sample_vector| < rv2$sample_elements ) + { + Reporter::error("Mismatch in sample element size and tracking. Aborting merge"); + return; + } + + for ( i in rv1$sample_vector ) + sample_add_sample(rv1$sample_vector[i], result); + + for ( i in rv2$sample_vector) + sample_add_sample(rv2$sample_vector[i], result); + } + else + { + local other_vector: vector of Observation; + local othercount: count; + if ( rv1$sample_elements > rv2$sample_elements ) + { + result$sample_vector = copy(rv1$sample_vector); + other_vector = rv2$sample_vector; + othercount = rv2$sample_elements; + } + else + { + result$sample_vector = copy(rv2$sample_vector); + other_vector = rv1$sample_vector; + othercount = rv1$sample_elements; + } + + local totalcount = rv1$sample_elements + rv2$sample_elements; + + for ( i in other_vector ) + { + if ( rand(totalcount) <= othercount ) + result$sample_vector[i] = other_vector[i]; + } + } } diff --git a/testing/btest/Baseline/scripts.base.frameworks.sumstats.last/.stdout b/testing/btest/Baseline/scripts.base.frameworks.sumstats.last/.stdout new file mode 100644 index 0000000000..35219765af --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.sumstats.last/.stdout @@ -0,0 +1,9 @@ +6.5.4.3 +[[num=2, dbl=, str=]] +1 +1.2.3.4 +[[num=5, dbl=, str=], [num=51, dbl=, str=]] +20 +7.2.1.5 +[[num=1, dbl=, str=]] +1 diff --git a/testing/btest/scripts/base/frameworks/sumstats/last.bro b/testing/btest/scripts/base/frameworks/sumstats/last.bro new file mode 100644 index 0000000000..e0cef0ec10 --- /dev/null +++ b/testing/btest/scripts/base/frameworks/sumstats/last.bro @@ -0,0 +1,47 @@ +# @TEST-EXEC: bro %INPUT +# @TEST-EXEC: btest-diff .stdout + +event bro_init() &priority=5 + { + local r1: SumStats::Reducer = [$stream="test.metric", + $apply=set(SumStats::SAMPLE), $num_samples=2]; + SumStats::create([$epoch=3secs, + $reducers=set(r1), + $epoch_finished(data: SumStats::ResultTable) = + { + for ( key in data ) + { + print key$host; + local r = data[key]["test.metric"]; + print r$sample_vector; + print r$sample_elements; + } + } + ]); + + SumStats::observe("test.metric", [$host=1.2.3.4], [$num=5]); + SumStats::observe("test.metric", [$host=1.2.3.4], [$num=22]); + SumStats::observe("test.metric", [$host=1.2.3.4], [$num=94]); + SumStats::observe("test.metric", [$host=1.2.3.4], [$num=50]); + # I checked the random numbers. seems legit. + SumStats::observe("test.metric", [$host=1.2.3.4], [$num=51]); + SumStats::observe("test.metric", [$host=1.2.3.4], [$num=51]); + SumStats::observe("test.metric", [$host=1.2.3.4], [$num=51]); + SumStats::observe("test.metric", [$host=1.2.3.4], [$num=51]); + SumStats::observe("test.metric", [$host=1.2.3.4], [$num=51]); + SumStats::observe("test.metric", [$host=1.2.3.4], [$num=51]); + SumStats::observe("test.metric", [$host=1.2.3.4], [$num=51]); + SumStats::observe("test.metric", [$host=1.2.3.4], [$num=51]); + SumStats::observe("test.metric", [$host=1.2.3.4], [$num=51]); + SumStats::observe("test.metric", [$host=1.2.3.4], [$num=51]); + SumStats::observe("test.metric", [$host=1.2.3.4], [$num=51]); + SumStats::observe("test.metric", [$host=1.2.3.4], [$num=51]); + SumStats::observe("test.metric", [$host=1.2.3.4], [$num=51]); + SumStats::observe("test.metric", [$host=1.2.3.4], [$num=51]); + SumStats::observe("test.metric", [$host=1.2.3.4], [$num=51]); + SumStats::observe("test.metric", [$host=1.2.3.4], [$num=51]); + + SumStats::observe("test.metric", [$host=6.5.4.3], [$num=2]); + SumStats::observe("test.metric", [$host=7.2.1.5], [$num=1]); + } +