mirror of
https://github.com/zeek/zeek.git
synced 2025-10-04 15:48:19 +00:00
reservoir sampler. untested.
This commit is contained in:
parent
69c7363147
commit
663082e2d5
5 changed files with 198 additions and 24 deletions
|
@ -1,4 +1,5 @@
|
|||
@load ./average
|
||||
@load ./last
|
||||
@load ./max
|
||||
@load ./min
|
||||
@load ./sample
|
||||
|
|
49
scripts/base/frameworks/sumstats/plugins/last.bro
Normal file
49
scripts/base/frameworks/sumstats/plugins/last.bro
Normal file
|
@ -0,0 +1,49 @@
|
|||
@load base/frameworks/sumstats
|
||||
@load base/utils/queue
|
||||
|
||||
module SumStats;
|
||||
|
||||
export {
|
||||
redef record Reducer += {
|
||||
## A number of sample Observations to collect.
|
||||
samples: count &default=0;
|
||||
};
|
||||
|
||||
redef record ResultVal += {
|
||||
## This is the queue where samples are maintained. Use the
|
||||
## :bro:see:`SumStats::get_samples` function to get a vector of the samples.
|
||||
samples: Queue::Queue &optional;
|
||||
};
|
||||
|
||||
## Get a vector of sample Observation values from a ResultVal.
|
||||
global get_samples: function(rv: ResultVal): vector of Observation;
|
||||
}
|
||||
|
||||
function get_samples(rv: ResultVal): vector of Observation
|
||||
{
|
||||
local s: vector of Observation = vector();
|
||||
if ( rv?$samples )
|
||||
Queue::get_vector(rv$samples, s);
|
||||
return s;
|
||||
}
|
||||
|
||||
hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
|
||||
{
|
||||
if ( r$samples > 0 )
|
||||
{
|
||||
if ( ! rv?$samples )
|
||||
rv$samples = Queue::init([$max_len=r$samples]);
|
||||
Queue::put(rv$samples, obs);
|
||||
}
|
||||
}
|
||||
|
||||
hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
|
||||
{
|
||||
# Merge $samples
|
||||
if ( rv1?$samples && rv2?$samples )
|
||||
result$samples = Queue::merge(rv1$samples, rv2$samples);
|
||||
else if ( rv1?$samples )
|
||||
result$samples = rv1$samples;
|
||||
else if ( rv2?$samples )
|
||||
result$samples = rv2$samples;
|
||||
}
|
|
@ -1,49 +1,117 @@
|
|||
@load base/frameworks/sumstats/main
|
||||
@load base/utils/queue
|
||||
|
||||
module SumStats;
|
||||
|
||||
export {
|
||||
redef enum Calculation += {
|
||||
## Get uniquely distributed random samples from the observation stream
|
||||
SAMPLE
|
||||
};
|
||||
|
||||
redef record Reducer += {
|
||||
## A number of sample Observations to collect.
|
||||
samples: count &default=0;
|
||||
num_samples: count &default=0;
|
||||
};
|
||||
|
||||
redef record ResultVal += {
|
||||
## This is the queue where samples are maintained. Use the
|
||||
## :bro:see:`SumStats::get_samples` function to get a vector of the samples.
|
||||
samples: Queue::Queue &optional;
|
||||
};
|
||||
## This is the vector in which the samples are maintained.
|
||||
sample_vector: vector of Observation &default=vector();
|
||||
|
||||
## Get a vector of sample Observation values from a ResultVal.
|
||||
global get_samples: function(rv: ResultVal): vector of Observation;
|
||||
## Number of total observed elements.
|
||||
sample_elements: count &default=0;
|
||||
};
|
||||
}
|
||||
|
||||
function get_samples(rv: ResultVal): vector of Observation
|
||||
redef record ResultVal += {
|
||||
# Internal use only. This is not meant to be publically available
|
||||
# and just a copy of num_samples from the Reducer. Needed for availability
|
||||
# in the compose hook.
|
||||
num_samples: count &default=0;
|
||||
};
|
||||
|
||||
hook init_resultval_hook(r: Reducer, rv: ResultVal)
|
||||
{
|
||||
local s: vector of Observation = vector();
|
||||
if ( rv?$samples )
|
||||
Queue::get_vector(rv$samples, s);
|
||||
return s;
|
||||
if ( SAMPLE in r$apply )
|
||||
rv$num_samples = r$num_samples;
|
||||
}
|
||||
|
||||
function sample_add_sample(obs:Observation, rv: ResultVal)
|
||||
{
|
||||
++rv$sample_elements;
|
||||
|
||||
if ( |rv$sample_vector| < rv$num_samples )
|
||||
rv$sample_vector[|rv$sample_vector|] = obs;
|
||||
else
|
||||
{
|
||||
local ra = rand(rv$sample_elements);
|
||||
if ( ra < rv$num_samples )
|
||||
rv$sample_vector[ra] = obs;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
|
||||
{
|
||||
if ( r$samples > 0 )
|
||||
if ( SAMPLE in r$apply )
|
||||
{
|
||||
if ( ! rv?$samples )
|
||||
rv$samples = Queue::init([$max_len=r$samples]);
|
||||
Queue::put(rv$samples, obs);
|
||||
sample_add_sample(obs, rv);
|
||||
}
|
||||
}
|
||||
|
||||
hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
|
||||
{
|
||||
# Merge $samples
|
||||
if ( rv1?$samples && rv2?$samples )
|
||||
result$samples = Queue::merge(rv1$samples, rv2$samples);
|
||||
else if ( rv1?$samples )
|
||||
result$samples = rv1$samples;
|
||||
else if ( rv2?$samples )
|
||||
result$samples = rv2$samples;
|
||||
if ( rv1$num_samples != rv2$num_samples )
|
||||
{
|
||||
Reporter::error("Merging sample sets with differing sizes is not supported");
|
||||
return;
|
||||
}
|
||||
|
||||
local num_samples = rv1$num_samples;
|
||||
|
||||
if ( |rv1$sample_vector| > num_samples || |rv2$sample_vector| > num_samples )
|
||||
{
|
||||
Reporter::error("Sample vector with too many elements. Aborting.");
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
if ( |rv1$sample_vector| != num_samples && |rv2$sample_vector| < num_samples )
|
||||
{
|
||||
if ( |rv1$sample_vector| != rv1$sample_elements || |rv2$sample_vector| < rv2$sample_elements )
|
||||
{
|
||||
Reporter::error("Mismatch in sample element size and tracking. Aborting merge");
|
||||
return;
|
||||
}
|
||||
|
||||
for ( i in rv1$sample_vector )
|
||||
sample_add_sample(rv1$sample_vector[i], result);
|
||||
|
||||
for ( i in rv2$sample_vector)
|
||||
sample_add_sample(rv2$sample_vector[i], result);
|
||||
}
|
||||
else
|
||||
{
|
||||
local other_vector: vector of Observation;
|
||||
local othercount: count;
|
||||
if ( rv1$sample_elements > rv2$sample_elements )
|
||||
{
|
||||
result$sample_vector = copy(rv1$sample_vector);
|
||||
other_vector = rv2$sample_vector;
|
||||
othercount = rv2$sample_elements;
|
||||
}
|
||||
else
|
||||
{
|
||||
result$sample_vector = copy(rv2$sample_vector);
|
||||
other_vector = rv1$sample_vector;
|
||||
othercount = rv1$sample_elements;
|
||||
}
|
||||
|
||||
local totalcount = rv1$sample_elements + rv2$sample_elements;
|
||||
|
||||
for ( i in other_vector )
|
||||
{
|
||||
if ( rand(totalcount) <= othercount )
|
||||
result$sample_vector[i] = other_vector[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
6.5.4.3
|
||||
[[num=2, dbl=<uninitialized>, str=<uninitialized>]]
|
||||
1
|
||||
1.2.3.4
|
||||
[[num=5, dbl=<uninitialized>, str=<uninitialized>], [num=51, dbl=<uninitialized>, str=<uninitialized>]]
|
||||
20
|
||||
7.2.1.5
|
||||
[[num=1, dbl=<uninitialized>, str=<uninitialized>]]
|
||||
1
|
47
testing/btest/scripts/base/frameworks/sumstats/last.bro
Normal file
47
testing/btest/scripts/base/frameworks/sumstats/last.bro
Normal file
|
@ -0,0 +1,47 @@
|
|||
# @TEST-EXEC: bro %INPUT
|
||||
# @TEST-EXEC: btest-diff .stdout
|
||||
|
||||
event bro_init() &priority=5
|
||||
{
|
||||
local r1: SumStats::Reducer = [$stream="test.metric",
|
||||
$apply=set(SumStats::SAMPLE), $num_samples=2];
|
||||
SumStats::create([$epoch=3secs,
|
||||
$reducers=set(r1),
|
||||
$epoch_finished(data: SumStats::ResultTable) =
|
||||
{
|
||||
for ( key in data )
|
||||
{
|
||||
print key$host;
|
||||
local r = data[key]["test.metric"];
|
||||
print r$sample_vector;
|
||||
print r$sample_elements;
|
||||
}
|
||||
}
|
||||
]);
|
||||
|
||||
SumStats::observe("test.metric", [$host=1.2.3.4], [$num=5]);
|
||||
SumStats::observe("test.metric", [$host=1.2.3.4], [$num=22]);
|
||||
SumStats::observe("test.metric", [$host=1.2.3.4], [$num=94]);
|
||||
SumStats::observe("test.metric", [$host=1.2.3.4], [$num=50]);
|
||||
# I checked the random numbers. seems legit.
|
||||
SumStats::observe("test.metric", [$host=1.2.3.4], [$num=51]);
|
||||
SumStats::observe("test.metric", [$host=1.2.3.4], [$num=51]);
|
||||
SumStats::observe("test.metric", [$host=1.2.3.4], [$num=51]);
|
||||
SumStats::observe("test.metric", [$host=1.2.3.4], [$num=51]);
|
||||
SumStats::observe("test.metric", [$host=1.2.3.4], [$num=51]);
|
||||
SumStats::observe("test.metric", [$host=1.2.3.4], [$num=51]);
|
||||
SumStats::observe("test.metric", [$host=1.2.3.4], [$num=51]);
|
||||
SumStats::observe("test.metric", [$host=1.2.3.4], [$num=51]);
|
||||
SumStats::observe("test.metric", [$host=1.2.3.4], [$num=51]);
|
||||
SumStats::observe("test.metric", [$host=1.2.3.4], [$num=51]);
|
||||
SumStats::observe("test.metric", [$host=1.2.3.4], [$num=51]);
|
||||
SumStats::observe("test.metric", [$host=1.2.3.4], [$num=51]);
|
||||
SumStats::observe("test.metric", [$host=1.2.3.4], [$num=51]);
|
||||
SumStats::observe("test.metric", [$host=1.2.3.4], [$num=51]);
|
||||
SumStats::observe("test.metric", [$host=1.2.3.4], [$num=51]);
|
||||
SumStats::observe("test.metric", [$host=1.2.3.4], [$num=51]);
|
||||
|
||||
SumStats::observe("test.metric", [$host=6.5.4.3], [$num=2]);
|
||||
SumStats::observe("test.metric", [$host=7.2.1.5], [$num=1]);
|
||||
}
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue