Merge remote-tracking branch 'origin/topic/bernhard/metrics-samples'

Closes #1003.

* origin/topic/bernhard/metrics-samples:
  finishing touches, make test more robust, rename function in last again
  change names of data structures after talking with seth
  make last plugin nicer and samplify sqli detector
  add tests for sampler
  reservoir sampler. untested.
This commit is contained in:
Robin Sommer 2013-05-15 16:09:31 -07:00
commit f76446fb4e
10 changed files with 347 additions and 29 deletions

View file

@ -1,49 +1,120 @@
@load base/frameworks/sumstats/main
@load base/utils/queue
module SumStats;
export {
redef enum Calculation += {
## Get uniquely distributed random samples from the observation stream.
SAMPLE
};
redef record Reducer += {
## A number of sample Observations to collect.
samples: count &default=0;
num_samples: count &default=0;
};
redef record ResultVal += {
## This is the queue where samples are maintained. Use the
## :bro:see:`SumStats::get_samples` function to get a vector of the samples.
samples: Queue::Queue &optional;
};
## This is the vector in which the samples are maintained.
samples: vector of Observation &default=vector();
## Get a vector of sample Observation values from a ResultVal.
global get_samples: function(rv: ResultVal): vector of Observation;
## Number of total observed elements.
sample_elements: count &default=0;
};
}
function get_samples(rv: ResultVal): vector of Observation
redef record ResultVal += {
# Internal use only. This is not meant to be publically available
# and just a copy of num_samples from the Reducer. Needed for availability
# in the compose hook.
num_samples: count &default=0;
};
hook init_resultval_hook(r: Reducer, rv: ResultVal)
{
local s: vector of Observation = vector();
if ( rv?$samples )
Queue::get_vector(rv$samples, s);
return s;
if ( SAMPLE in r$apply )
rv$num_samples = r$num_samples;
}
function sample_add_sample(obs:Observation, rv: ResultVal)
{
++rv$sample_elements;
if ( |rv$samples| < rv$num_samples )
rv$samples[|rv$samples|] = obs;
else
{
local ra = rand(rv$sample_elements);
if ( ra < rv$num_samples )
rv$samples[ra] = obs;
}
}
hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
{
if ( r$samples > 0 )
if ( SAMPLE in r$apply )
{
if ( ! rv?$samples )
rv$samples = Queue::init([$max_len=r$samples]);
Queue::put(rv$samples, obs);
sample_add_sample(obs, rv);
}
}
hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
{
# Merge $samples
if ( rv1?$samples && rv2?$samples )
result$samples = Queue::merge(rv1$samples, rv2$samples);
else if ( rv1?$samples )
result$samples = rv1$samples;
else if ( rv2?$samples )
result$samples = rv2$samples;
if ( rv1$num_samples != rv2$num_samples )
{
Reporter::error("Merging sample sets with differing sizes is not supported");
return;
}
local num_samples = rv1$num_samples;
result$num_samples = num_samples;
if ( |rv1$samples| > num_samples || |rv2$samples| > num_samples )
{
Reporter::error("Sample vector with too many elements. Aborting.");
return;
}
if ( |rv1$samples| != num_samples && |rv2$samples| < num_samples )
{
if ( |rv1$samples| != rv1$sample_elements || |rv2$samples| < rv2$sample_elements )
{
Reporter::error("Mismatch in sample element size and tracking. Aborting merge");
return;
}
for ( i in rv1$samples )
sample_add_sample(rv1$samples[i], result);
for ( i in rv2$samples)
sample_add_sample(rv2$samples[i], result);
}
else
{
local other_vector: vector of Observation;
local othercount: count;
if ( rv1$sample_elements > rv2$sample_elements )
{
result$samples = copy(rv1$samples);
other_vector = rv2$samples;
othercount = rv2$sample_elements;
}
else
{
result$samples = copy(rv2$samples);
other_vector = rv1$samples;
othercount = rv1$sample_elements;
}
local totalcount = rv1$sample_elements + rv2$sample_elements;
result$sample_elements = totalcount;
for ( i in other_vector )
{
if ( rand(totalcount) <= othercount )
result$samples[i] = other_vector[i];
}
}
}