diff --git a/scripts/base/frameworks/sumstats/plugins/__load__.bro b/scripts/base/frameworks/sumstats/plugins/__load__.bro index 0d4c2ed302..20b836d417 100644 --- a/scripts/base/frameworks/sumstats/plugins/__load__.bro +++ b/scripts/base/frameworks/sumstats/plugins/__load__.bro @@ -1,8 +1,9 @@ @load ./average +@load ./hll_unique @load ./max @load ./min @load ./sample @load ./std-dev @load ./sum @load ./unique -@load ./variance \ No newline at end of file +@load ./variance diff --git a/scripts/base/frameworks/sumstats/plugins/hll_unique.bro b/scripts/base/frameworks/sumstats/plugins/hll_unique.bro new file mode 100644 index 0000000000..47ded2ca60 --- /dev/null +++ b/scripts/base/frameworks/sumstats/plugins/hll_unique.bro @@ -0,0 +1,51 @@ +@load base/frameworks/sumstats + +module SumStats; + +export { + redef enum Calculation += { + ## Calculate the number of unique values. + HLLUNIQUE + }; + + redef record ResultVal += { + ## If cardinality is being tracked, the number of unique + ## items is tracked here. + hllunique: count &default=0; + }; +} + +redef record ResultVal += { + # Internal use only. This is not meant to be publically available + # because probabilistic data structures have to be examined using + # specialized bifs. + card: opaque of cardinality &default=hll_cardinality_init(0.01); +}; + + +hook init_resultval_hook(r: Reducer, rv: ResultVal) + { + if ( HLLUNIQUE in r$apply && ! rv?$card ) + rv$card = hll_cardinality_init(0.01); + rv$hllunique = 0; + } + + +hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal) + { + if ( HLLUNIQUE in r$apply ) + { + hll_cardinality_add(rv$card, obs); + rv$hllunique = double_to_count(hll_cardinality_estimate(rv$card)); + } + } + +hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal) + { + local rhll = hll_cardinality_init(0.01); + hll_cardinality_merge_into(rhll, rv1$card); + hll_cardinality_merge_into(rhll, rv2$card); + + result$card = rhll; + result$hllunique = double_to_count(hll_cardinality_estimate(rhll)); + } diff --git a/testing/btest/Baseline/scripts.base.frameworks.sumstats.basic-cluster/manager-1..stdout b/testing/btest/Baseline/scripts.base.frameworks.sumstats.basic-cluster/manager-1..stdout index ea8904d2e6..ab25d52947 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.sumstats.basic-cluster/manager-1..stdout +++ b/testing/btest/Baseline/scripts.base.frameworks.sumstats.basic-cluster/manager-1..stdout @@ -1,4 +1,4 @@ -Host: 6.5.4.3 - num:2 - sum:6.0 - avg:3.0 - max:5.0 - min:1.0 - var:8.0 - std_dev:2.8 - unique:2 -Host: 10.10.10.10 - num:1 - sum:5.0 - avg:5.0 - max:5.0 - min:5.0 - var:0.0 - std_dev:0.0 - unique:1 -Host: 1.2.3.4 - num:9 - sum:437.0 - avg:48.6 - max:95.0 - min:3.0 - var:758.8 - std_dev:27.5 - unique:8 -Host: 7.2.1.5 - num:2 - sum:145.0 - avg:72.5 - max:91.0 - min:54.0 - var:684.5 - std_dev:26.2 - unique:2 +Host: 6.5.4.3 - num:2 - sum:6.0 - avg:3.0 - max:5.0 - min:1.0 - var:8.0 - std_dev:2.8 - unique:2 - hllunique:2 +Host: 10.10.10.10 - num:1 - sum:5.0 - avg:5.0 - max:5.0 - min:5.0 - var:0.0 - std_dev:0.0 - unique:1 - hllunique:1 +Host: 1.2.3.4 - num:9 - sum:437.0 - avg:48.6 - max:95.0 - min:3.0 - var:758.8 - std_dev:27.5 - unique:8 - hllunique:8 +Host: 7.2.1.5 - num:2 - sum:145.0 - avg:72.5 - max:91.0 - min:54.0 - var:684.5 - std_dev:26.2 - unique:2 - hllunique:2 diff --git a/testing/btest/Baseline/scripts.base.frameworks.sumstats.basic/.stdout b/testing/btest/Baseline/scripts.base.frameworks.sumstats.basic/.stdout index 208b6103b7..0ada495cfc 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.sumstats.basic/.stdout +++ b/testing/btest/Baseline/scripts.base.frameworks.sumstats.basic/.stdout @@ -1,3 +1,3 @@ -Host: 6.5.4.3 - num:1 - sum:2.0 - var:0.0 - avg:2.0 - max:2.0 - min:2.0 - std_dev:0.0 - unique:1 -Host: 1.2.3.4 - num:5 - sum:221.0 - var:1144.2 - avg:44.2 - max:94.0 - min:5.0 - std_dev:33.8 - unique:4 -Host: 7.2.1.5 - num:1 - sum:1.0 - var:0.0 - avg:1.0 - max:1.0 - min:1.0 - std_dev:0.0 - unique:1 +Host: 6.5.4.3 - num:1 - sum:2.0 - var:0.0 - avg:2.0 - max:2.0 - min:2.0 - std_dev:0.0 - unique:1 - hllunique:1 +Host: 1.2.3.4 - num:5 - sum:221.0 - var:1144.2 - avg:44.2 - max:94.0 - min:5.0 - std_dev:33.8 - unique:4 - hllunique:4 +Host: 7.2.1.5 - num:1 - sum:1.0 - var:0.0 - avg:1.0 - max:1.0 - min:1.0 - std_dev:0.0 - unique:1 - hllunique:1 diff --git a/testing/btest/scripts/base/frameworks/sumstats/basic-cluster.bro b/testing/btest/scripts/base/frameworks/sumstats/basic-cluster.bro index 1b7903ca1a..080697a824 100644 --- a/testing/btest/scripts/base/frameworks/sumstats/basic-cluster.bro +++ b/testing/btest/scripts/base/frameworks/sumstats/basic-cluster.bro @@ -22,7 +22,7 @@ global n = 0; event bro_init() &priority=5 { - local r1: SumStats::Reducer = [$stream="test", $apply=set(SumStats::SUM, SumStats::MIN, SumStats::MAX, SumStats::AVERAGE, SumStats::STD_DEV, SumStats::VARIANCE, SumStats::UNIQUE)]; + local r1: SumStats::Reducer = [$stream="test", $apply=set(SumStats::SUM, SumStats::MIN, SumStats::MAX, SumStats::AVERAGE, SumStats::STD_DEV, SumStats::VARIANCE, SumStats::UNIQUE, SumStats::HLLUNIQUE)]; SumStats::create([$epoch=5secs, $reducers=set(r1), $epoch_finished(rt: SumStats::ResultTable) = @@ -30,7 +30,7 @@ event bro_init() &priority=5 for ( key in rt ) { local r = rt[key]["test"]; - print fmt("Host: %s - num:%d - sum:%.1f - avg:%.1f - max:%.1f - min:%.1f - var:%.1f - std_dev:%.1f - unique:%d", key$host, r$num, r$sum, r$average, r$max, r$min, r$variance, r$std_dev, r$unique); + print fmt("Host: %s - num:%d - sum:%.1f - avg:%.1f - max:%.1f - min:%.1f - var:%.1f - std_dev:%.1f - unique:%d - hllunique:%d", key$host, r$num, r$sum, r$average, r$max, r$min, r$variance, r$std_dev, r$unique, r$hllunique); } terminate(); diff --git a/testing/btest/scripts/base/frameworks/sumstats/basic.bro b/testing/btest/scripts/base/frameworks/sumstats/basic.bro index 0b2851bf10..cafd834a42 100644 --- a/testing/btest/scripts/base/frameworks/sumstats/basic.bro +++ b/testing/btest/scripts/base/frameworks/sumstats/basic.bro @@ -10,7 +10,8 @@ event bro_init() &priority=5 SumStats::MAX, SumStats::MIN, SumStats::STD_DEV, - SumStats::UNIQUE)]; + SumStats::UNIQUE, + SumStats::HLLUNIQUE)]; SumStats::create([$epoch=3secs, $reducers=set(r1), $epoch_finished(data: SumStats::ResultTable) = @@ -18,7 +19,7 @@ event bro_init() &priority=5 for ( key in data ) { local r = data[key]["test.metric"]; - print fmt("Host: %s - num:%d - sum:%.1f - var:%.1f - avg:%.1f - max:%.1f - min:%.1f - std_dev:%.1f - unique:%d", key$host, r$num, r$sum, r$variance, r$average, r$max, r$min, r$std_dev, r$unique); + print fmt("Host: %s - num:%d - sum:%.1f - var:%.1f - avg:%.1f - max:%.1f - min:%.1f - std_dev:%.1f - unique:%d - hllunique:%d", key$host, r$num, r$sum, r$variance, r$average, r$max, r$min, r$std_dev, r$unique, r$hllunique); } } ]);