mirror of
https://github.com/zeek/zeek.git
synced 2025-10-05 16:18:19 +00:00

* origin/topic/bernhard/hyperloglog: (32 commits) add clustered leak test for hll. No issues. make gcc happy (hopefully) fix refcounting problem in hll/bloom-filter opaque vals. Thanks Robin. re-use same hash class for all add operations get hll ready for merging and forgot a file... adapt to new structure fix opaqueval-related memleak. make it compile on case-sensitive file systems and fix warnings make error rate configureable add persistence test not using predetermined random seeds. update cluster test to also use hll persistence really works. well, with this commit synchronizing the data structure should work.. ...if we had consistent hashing. and also serialize the other things we need ok, this bug was hard to find. serialization compiles. change plugin after feedback of seth Forgot a file. Again. Like always. Basically. do away with old file. ...
57 lines
1.4 KiB
Text
57 lines
1.4 KiB
Text
@load base/frameworks/sumstats
|
|
|
|
module SumStats;
|
|
|
|
export {
|
|
redef record Reducer += {
|
|
## The threshold when we switch to hll
|
|
hll_error_margin: double &default=0.01;
|
|
};
|
|
|
|
redef enum Calculation += {
|
|
## Calculate the number of unique values.
|
|
HLL_UNIQUE
|
|
};
|
|
|
|
redef record ResultVal += {
|
|
## If cardinality is being tracked, the number of unique
|
|
## items is tracked here.
|
|
hll_unique: count &default=0;
|
|
};
|
|
}
|
|
|
|
redef record ResultVal += {
|
|
# Internal use only. This is not meant to be publically available
|
|
# because probabilistic data structures have to be examined using
|
|
# specialized bifs.
|
|
card: opaque of cardinality &optional;
|
|
|
|
# We need this in the compose hook.
|
|
hll_error_margin: double &optional;
|
|
};
|
|
|
|
hook register_observe_plugins()
|
|
{
|
|
register_observe_plugin(HLL_UNIQUE, function(r: Reducer, val: double, obs: Observation, rv: ResultVal)
|
|
{
|
|
if ( ! rv?$card )
|
|
{
|
|
rv$card = hll_cardinality_init(r$hll_error_margin);
|
|
rv$hll_error_margin = r$hll_error_margin;
|
|
rv$hll_unique = 0;
|
|
}
|
|
|
|
hll_cardinality_add(rv$card, obs);
|
|
rv$hll_unique = double_to_count(hll_cardinality_estimate(rv$card));
|
|
});
|
|
}
|
|
|
|
hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
|
|
{
|
|
local rhll = hll_cardinality_init(rv1$hll_error_margin);
|
|
hll_cardinality_merge_into(rhll, rv1$card);
|
|
hll_cardinality_merge_into(rhll, rv2$card);
|
|
|
|
result$card = rhll;
|
|
result$hll_unique = double_to_count(hll_cardinality_estimate(rhll));
|
|
}
|