Merge remote branch 'origin/master' into topic/bernhard/hyperloglog

This commit is contained in:
Bernhard Amann 2013-08-28 17:48:59 -07:00
commit dc9fd36497
117 changed files with 21121 additions and 20865 deletions

View file

@ -3,6 +3,11 @@
module SumStats;
export {
redef record Reducer += {
## Maximum number of unique elements to store.
unique_max: count &optional;
};
redef enum Calculation += {
## Calculate the number of unique values.
UNIQUE
@ -16,6 +21,11 @@ export {
}
redef record ResultVal += {
# Internal use only. This is used when multiple ResultVals
# are being merged and they need to abide the unique limit
# set in the reducer.
unique_max: count &optional;
# Internal use only. This is not meant to be publically available
# because we don't want to trust that we can inspect the values
# since we will like move to a probalistic data structure in the future.
@ -29,7 +39,12 @@ hook register_observe_plugins()
{
if ( ! rv?$unique_vals )
rv$unique_vals=set();
add rv$unique_vals[obs];
if ( r?$unique_max )
rv$unique_max=r$unique_max;
if ( ! r?$unique_max || |rv$unique_vals| <= r$unique_max )
add rv$unique_vals[obs];
rv$unique = |rv$unique_vals|;
});
}
@ -38,15 +53,31 @@ hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
{
if ( rv1?$unique_vals || rv2?$unique_vals )
{
if ( rv1?$unique_max )
result$unique_max = rv1$unique_max;
else if ( rv2?$unique_max )
result$unique_max = rv2$unique_max;
if ( rv1?$unique_vals )
result$unique_vals = copy(rv1$unique_vals);
if ( rv2?$unique_vals )
{
if ( ! result?$unique_vals )
{
result$unique_vals = copy(rv2$unique_vals);
}
else
{
for ( val2 in rv2$unique_vals )
{
if ( result?$unique_max && |result$unique_vals| >= result$unique_max )
break;
add result$unique_vals[copy(val2)];
}
}
}
result$unique = |result$unique_vals|;
}