zeek/scripts/base/frameworks/sumstats/plugins/variance.bro
Seth Hall bec965b66f Large update for the SumStats framework.
- On-demand access to sumstats results through "return from"
   functions named SumStats::request and Sumstats::request_key.
   Both functions are tested in standalone and clustered modes.

 - $name field has returned to SumStats which simplifies cluster
   code and makes the on-demand access stuff possible.

 - Clustered results can only be collected for 1 minute from their
   time of creation now instead of time of last read.

 - Thresholds use doubles instead of counts everywhere now.

 - Calculation dependency resolution occurs at start up time now
   instead of doing it at observation time which provide a minor
   cpu performance improvement.  A new plugin registration mechanism
   was created to support this change.

 - AppStats now has a minimal doc string and is broken into hook-based
   plugins.

 - AppStats and traceroute detection added to local.bro
2013-05-21 15:52:59 -04:00

69 lines
1.8 KiB
Text

@load ./average
@load ../main
module SumStats;
export {
redef enum Calculation += {
## Find the variance of the values.
VARIANCE
};
redef record ResultVal += {
## For numeric data, this calculates the variance.
variance: double &optional;
};
}
redef record ResultVal += {
# Internal use only. Used for incrementally calculating variance.
prev_avg: double &optional;
# Internal use only. For calculating incremental variance.
var_s: double &default=0.0;
};
function calc_variance(rv: ResultVal)
{
rv$variance = (rv$num > 1) ? rv$var_s/(rv$num-1) : 0.0;
}
hook register_observe_plugins() &priority=-5
{
register_observe_plugin(VARIANCE, function(r: Reducer, val: double, obs: Observation, rv: ResultVal)
{
if ( rv$num > 1 )
rv$var_s += ((val - rv$prev_avg) * (val - rv$average));
calc_variance(rv);
rv$prev_avg = rv$average;
});
add_observe_plugin_dependency(VARIANCE, AVERAGE);
}
# Reduced priority since this depends on the average
hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal) &priority=-5
{
if ( rv1?$var_s && rv1?$average &&
rv2?$var_s && rv2?$average )
{
local rv1_avg_sq = (rv1$average - result$average);
rv1_avg_sq = rv1_avg_sq*rv1_avg_sq;
local rv2_avg_sq = (rv2$average - result$average);
rv2_avg_sq = rv2_avg_sq*rv2_avg_sq;
result$var_s = rv1$num*(rv1$var_s/rv1$num + rv1_avg_sq) + rv2$num*(rv2$var_s/rv2$num + rv2_avg_sq);
}
else if ( rv1?$var_s )
result$var_s = rv1$var_s;
else if ( rv2?$var_s )
result$var_s = rv2$var_s;
if ( rv1?$prev_avg && rv2?$prev_avg )
result$prev_avg = ((rv1$prev_avg*rv1$num) + (rv2$prev_avg*rv2$num))/(rv1$num+rv2$num);
else if ( rv1?$prev_avg )
result$prev_avg = rv1$prev_avg;
else if ( rv2?$prev_avg )
result$prev_avg = rv2$prev_avg;
calc_variance(result);
}