mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 14:48:21 +00:00
Merge branch 'topic/robin/hyperloglog-merge'
* topic/robin/hyperloglog-merge: (35 commits) Making the confidence configurable. Renaming HyperLogLog->CardinalityCounter. Fixing bug introduced during merging. add clustered leak test for hll. No issues. make gcc happy (hopefully) fix refcounting problem in hll/bloom-filter opaque vals. Thanks Robin. re-use same hash class for all add operations get hll ready for merging and forgot a file... adapt to new structure fix opaqueval-related memleak. make it compile on case-sensitive file systems and fix warnings make error rate configureable add persistence test not using predetermined random seeds. update cluster test to also use hll persistence really works. well, with this commit synchronizing the data structure should work.. ...if we had consistent hashing. and also serialize the other things we need ok, this bug was hard to find. serialization compiles. ...
This commit is contained in:
commit
6f9d28cc18
31 changed files with 1018 additions and 19 deletions
|
@ -1,4 +1,5 @@
|
|||
@load ./average
|
||||
@load ./hll_unique
|
||||
@load ./last
|
||||
@load ./max
|
||||
@load ./min
|
||||
|
|
62
scripts/base/frameworks/sumstats/plugins/hll_unique.bro
Normal file
62
scripts/base/frameworks/sumstats/plugins/hll_unique.bro
Normal file
|
@ -0,0 +1,62 @@
|
|||
@load base/frameworks/sumstats
|
||||
|
||||
module SumStats;
|
||||
|
||||
export {
|
||||
redef record Reducer += {
|
||||
## The error margin for HLL.
|
||||
hll_error_margin: double &default=0.01;
|
||||
|
||||
## The confidence for HLL.
|
||||
hll_confidence: double &default=0.95;
|
||||
};
|
||||
|
||||
redef enum Calculation += {
|
||||
## Calculate the number of unique values.
|
||||
HLL_UNIQUE
|
||||
};
|
||||
|
||||
redef record ResultVal += {
|
||||
## If cardinality is being tracked, the number of unique
|
||||
## items is tracked here.
|
||||
hll_unique: count &default=0;
|
||||
};
|
||||
}
|
||||
|
||||
redef record ResultVal += {
|
||||
# Internal use only. This is not meant to be publically available
|
||||
# because probabilistic data structures have to be examined using
|
||||
# specialized bifs.
|
||||
card: opaque of cardinality &optional;
|
||||
|
||||
# We need these in the compose hook.
|
||||
hll_error_margin: double &optional;
|
||||
hll_confidence: double &optional;
|
||||
};
|
||||
|
||||
hook register_observe_plugins()
|
||||
{
|
||||
register_observe_plugin(HLL_UNIQUE, function(r: Reducer, val: double, obs: Observation, rv: ResultVal)
|
||||
{
|
||||
if ( ! rv?$card )
|
||||
{
|
||||
rv$card = hll_cardinality_init(r$hll_error_margin, r$hll_confidence);
|
||||
rv$hll_error_margin = r$hll_error_margin;
|
||||
rv$hll_confidence = r$hll_confidence;
|
||||
rv$hll_unique = 0;
|
||||
}
|
||||
|
||||
hll_cardinality_add(rv$card, obs);
|
||||
rv$hll_unique = double_to_count(hll_cardinality_estimate(rv$card));
|
||||
});
|
||||
}
|
||||
|
||||
hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
|
||||
{
|
||||
local rhll = hll_cardinality_init(rv1$hll_error_margin, rv1$hll_confidence);
|
||||
hll_cardinality_merge_into(rhll, rv1$card);
|
||||
hll_cardinality_merge_into(rhll, rv2$card);
|
||||
|
||||
result$card = rhll;
|
||||
result$hll_unique = double_to_count(hll_cardinality_estimate(rhll));
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue