From 7eee2f0d17d7b51dcd893a0cacb87474112a4fb3 Mon Sep 17 00:00:00 2001 From: Bernhard Amann Date: Mon, 8 Apr 2013 10:00:34 +0200 Subject: [PATCH 1/4] measurement framework with hll unique --- scripts/base/frameworks/measurement/plugins/__load__.bro | 3 ++- .../scripts.base.frameworks.measurement.basic/.stdout | 6 +++--- testing/btest/scripts/base/frameworks/measurement/basic.bro | 5 +++-- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/scripts/base/frameworks/measurement/plugins/__load__.bro b/scripts/base/frameworks/measurement/plugins/__load__.bro index 0d4c2ed302..0a51a081a9 100644 --- a/scripts/base/frameworks/measurement/plugins/__load__.bro +++ b/scripts/base/frameworks/measurement/plugins/__load__.bro @@ -5,4 +5,5 @@ @load ./std-dev @load ./sum @load ./unique -@load ./variance \ No newline at end of file +@load ./variance +@load ./hll_unique diff --git a/testing/btest/Baseline/scripts.base.frameworks.measurement.basic/.stdout b/testing/btest/Baseline/scripts.base.frameworks.measurement.basic/.stdout index 208b6103b7..5f1c5ab5e4 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.measurement.basic/.stdout +++ b/testing/btest/Baseline/scripts.base.frameworks.measurement.basic/.stdout @@ -1,3 +1,3 @@ -Host: 6.5.4.3 - num:1 - sum:2.0 - var:0.0 - avg:2.0 - max:2.0 - min:2.0 - std_dev:0.0 - unique:1 -Host: 1.2.3.4 - num:5 - sum:221.0 - var:1144.2 - avg:44.2 - max:94.0 - min:5.0 - std_dev:33.8 - unique:4 -Host: 7.2.1.5 - num:1 - sum:1.0 - var:0.0 - avg:1.0 - max:1.0 - min:1.0 - std_dev:0.0 - unique:1 +Host: 6.5.4.3 - num:1 - sum:2.0 - var:0.0 - avg:2.0 - max:2.0 - min:2.0 - std_dev:0.0 - unique:1 - hllunique:1.0 +Host: 1.2.3.4 - num:5 - sum:221.0 - var:1144.2 - avg:44.2 - max:94.0 - min:5.0 - std_dev:33.8 - unique:4 - hllunique:4.0 +Host: 7.2.1.5 - num:1 - sum:1.0 - var:0.0 - avg:1.0 - max:1.0 - min:1.0 - std_dev:0.0 - unique:1 - hllunique:1.0 diff --git a/testing/btest/scripts/base/frameworks/measurement/basic.bro b/testing/btest/scripts/base/frameworks/measurement/basic.bro index e9dd21e0ef..4706a7c9b1 100644 --- a/testing/btest/scripts/base/frameworks/measurement/basic.bro +++ b/testing/btest/scripts/base/frameworks/measurement/basic.bro @@ -10,7 +10,8 @@ event bro_init() &priority=5 Measurement::MAX, Measurement::MIN, Measurement::STD_DEV, - Measurement::UNIQUE)]; + Measurement::UNIQUE, + Measurement::HLLUNIQUE)]; Measurement::create([$epoch=3secs, $reducers=set(r1), $epoch_finished(data: Measurement::ResultTable) = @@ -18,7 +19,7 @@ event bro_init() &priority=5 for ( key in data ) { local r = data[key]["test.metric"]; - print fmt("Host: %s - num:%d - sum:%.1f - var:%.1f - avg:%.1f - max:%.1f - min:%.1f - std_dev:%.1f - unique:%d", key$host, r$num, r$sum, r$variance, r$average, r$max, r$min, r$std_dev, r$unique); + print fmt("Host: %s - num:%d - sum:%.1f - var:%.1f - avg:%.1f - max:%.1f - min:%.1f - std_dev:%.1f - unique:%d - hllunique:%.1f", key$host, r$num, r$sum, r$variance, r$average, r$max, r$min, r$std_dev, r$unique, hll_cardinality_estimate(r$hllunique)); } } ]); From bcd610fd50252b2095639e0d5821af1088e36325 Mon Sep 17 00:00:00 2001 From: Bernhard Amann Date: Mon, 8 Apr 2013 10:55:00 +0200 Subject: [PATCH 2/4] Forgot a file. Again. Like always. Basically. --- .../measurement/plugins/hll_unique.bro | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 scripts/base/frameworks/measurement/plugins/hll_unique.bro diff --git a/scripts/base/frameworks/measurement/plugins/hll_unique.bro b/scripts/base/frameworks/measurement/plugins/hll_unique.bro new file mode 100644 index 0000000000..0e95e6fcdd --- /dev/null +++ b/scripts/base/frameworks/measurement/plugins/hll_unique.bro @@ -0,0 +1,39 @@ + +module Measurement; + +export { + redef enum Calculation += { + ## Calculate the number of unique values. + HLLUNIQUE + }; + + redef record ResultVal += { + ## If cardinality is being tracked, the number of unique + ## items is tracked here. + hllunique: opaque of cardinality &default=hll_cardinality_init(0.01); + }; +} + +hook init_resultval_hook(r: Reducer, rv: ResultVal) + { + if ( HLLUNIQUE in r$apply && ! rv?$hllunique ) + rv$hllunique = hll_cardinality_init(0.01); + } + + +hook add_to_reducer_hook(r: Reducer, val: double, data: DataPoint, rv: ResultVal) + { + if ( HLLUNIQUE in r$apply ) + { + hll_cardinality_add(rv$hllunique, data); + } + } + +hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal) + { + local rhll = hll_cardinality_init(0.01); + hll_cardinality_merge_into(rhll, rv1$hllunique); + hll_cardinality_merge_into(rhll, rv2$hllunique); + + result$hllunique = rhll; + } From f10ed9e29a6abf56e8110e2cc400f8e796e4c07a Mon Sep 17 00:00:00 2001 From: Bernhard Amann Date: Wed, 10 Apr 2013 10:45:45 -0400 Subject: [PATCH 3/4] change plugin after feedback of seth --- .../measurement/plugins/hll_unique.bro | 25 +++++++++++++------ .../base/frameworks/measurement/basic.bro | 2 +- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/scripts/base/frameworks/measurement/plugins/hll_unique.bro b/scripts/base/frameworks/measurement/plugins/hll_unique.bro index 0e95e6fcdd..ccdb872606 100644 --- a/scripts/base/frameworks/measurement/plugins/hll_unique.bro +++ b/scripts/base/frameworks/measurement/plugins/hll_unique.bro @@ -10,14 +10,23 @@ export { redef record ResultVal += { ## If cardinality is being tracked, the number of unique ## items is tracked here. - hllunique: opaque of cardinality &default=hll_cardinality_init(0.01); + hllunique: count &default=0; }; } +redef record ResultVal += { + # Internal use only. This is not meant to be publically available + # because probabilistic data structures have to be examined using + # specialized bifs. + card: opaque of cardinality &default=hll_cardinality_init(0.01); +}; + + hook init_resultval_hook(r: Reducer, rv: ResultVal) { - if ( HLLUNIQUE in r$apply && ! rv?$hllunique ) - rv$hllunique = hll_cardinality_init(0.01); + if ( HLLUNIQUE in r$apply && ! rv?$card ) + rv$card = hll_cardinality_init(0.01); + rv$hllunique = 0; } @@ -25,15 +34,17 @@ hook add_to_reducer_hook(r: Reducer, val: double, data: DataPoint, rv: ResultVal { if ( HLLUNIQUE in r$apply ) { - hll_cardinality_add(rv$hllunique, data); + hll_cardinality_add(rv$card, data); + rv$hllunique = double_to_count(hll_cardinality_estimate(rv$card)); } } hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal) { local rhll = hll_cardinality_init(0.01); - hll_cardinality_merge_into(rhll, rv1$hllunique); - hll_cardinality_merge_into(rhll, rv2$hllunique); + hll_cardinality_merge_into(rhll, rv1$card); + hll_cardinality_merge_into(rhll, rv2$card); - result$hllunique = rhll; + result$card = rhll; + result$hllunique = double_to_count(hll_cardinality_estimate(rhll)); } diff --git a/testing/btest/scripts/base/frameworks/measurement/basic.bro b/testing/btest/scripts/base/frameworks/measurement/basic.bro index 4706a7c9b1..701b79fbb3 100644 --- a/testing/btest/scripts/base/frameworks/measurement/basic.bro +++ b/testing/btest/scripts/base/frameworks/measurement/basic.bro @@ -19,7 +19,7 @@ event bro_init() &priority=5 for ( key in data ) { local r = data[key]["test.metric"]; - print fmt("Host: %s - num:%d - sum:%.1f - var:%.1f - avg:%.1f - max:%.1f - min:%.1f - std_dev:%.1f - unique:%d - hllunique:%.1f", key$host, r$num, r$sum, r$variance, r$average, r$max, r$min, r$std_dev, r$unique, hll_cardinality_estimate(r$hllunique)); + print fmt("Host: %s - num:%d - sum:%.1f - var:%.1f - avg:%.1f - max:%.1f - min:%.1f - std_dev:%.1f - unique:%d - hllunique:%d", key$host, r$num, r$sum, r$variance, r$average, r$max, r$min, r$std_dev, r$unique, r$hllunique); } } ]); From 6e532e89608fc188e00d266a41a90ba6fdc9b668 Mon Sep 17 00:00:00 2001 From: Bernhard Amann Date: Fri, 19 Apr 2013 09:58:57 -0700 Subject: [PATCH 4/4] update cluster test to also use hll --- .../manager-1..stdout | 8 ++++---- .../scripts/base/frameworks/sumstats/basic-cluster.bro | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/testing/btest/Baseline/scripts.base.frameworks.sumstats.basic-cluster/manager-1..stdout b/testing/btest/Baseline/scripts.base.frameworks.sumstats.basic-cluster/manager-1..stdout index ea8904d2e6..ab25d52947 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.sumstats.basic-cluster/manager-1..stdout +++ b/testing/btest/Baseline/scripts.base.frameworks.sumstats.basic-cluster/manager-1..stdout @@ -1,4 +1,4 @@ -Host: 6.5.4.3 - num:2 - sum:6.0 - avg:3.0 - max:5.0 - min:1.0 - var:8.0 - std_dev:2.8 - unique:2 -Host: 10.10.10.10 - num:1 - sum:5.0 - avg:5.0 - max:5.0 - min:5.0 - var:0.0 - std_dev:0.0 - unique:1 -Host: 1.2.3.4 - num:9 - sum:437.0 - avg:48.6 - max:95.0 - min:3.0 - var:758.8 - std_dev:27.5 - unique:8 -Host: 7.2.1.5 - num:2 - sum:145.0 - avg:72.5 - max:91.0 - min:54.0 - var:684.5 - std_dev:26.2 - unique:2 +Host: 6.5.4.3 - num:2 - sum:6.0 - avg:3.0 - max:5.0 - min:1.0 - var:8.0 - std_dev:2.8 - unique:2 - hllunique:2 +Host: 10.10.10.10 - num:1 - sum:5.0 - avg:5.0 - max:5.0 - min:5.0 - var:0.0 - std_dev:0.0 - unique:1 - hllunique:1 +Host: 1.2.3.4 - num:9 - sum:437.0 - avg:48.6 - max:95.0 - min:3.0 - var:758.8 - std_dev:27.5 - unique:8 - hllunique:8 +Host: 7.2.1.5 - num:2 - sum:145.0 - avg:72.5 - max:91.0 - min:54.0 - var:684.5 - std_dev:26.2 - unique:2 - hllunique:2 diff --git a/testing/btest/scripts/base/frameworks/sumstats/basic-cluster.bro b/testing/btest/scripts/base/frameworks/sumstats/basic-cluster.bro index 1b7903ca1a..080697a824 100644 --- a/testing/btest/scripts/base/frameworks/sumstats/basic-cluster.bro +++ b/testing/btest/scripts/base/frameworks/sumstats/basic-cluster.bro @@ -22,7 +22,7 @@ global n = 0; event bro_init() &priority=5 { - local r1: SumStats::Reducer = [$stream="test", $apply=set(SumStats::SUM, SumStats::MIN, SumStats::MAX, SumStats::AVERAGE, SumStats::STD_DEV, SumStats::VARIANCE, SumStats::UNIQUE)]; + local r1: SumStats::Reducer = [$stream="test", $apply=set(SumStats::SUM, SumStats::MIN, SumStats::MAX, SumStats::AVERAGE, SumStats::STD_DEV, SumStats::VARIANCE, SumStats::UNIQUE, SumStats::HLLUNIQUE)]; SumStats::create([$epoch=5secs, $reducers=set(r1), $epoch_finished(rt: SumStats::ResultTable) = @@ -30,7 +30,7 @@ event bro_init() &priority=5 for ( key in rt ) { local r = rt[key]["test"]; - print fmt("Host: %s - num:%d - sum:%.1f - avg:%.1f - max:%.1f - min:%.1f - var:%.1f - std_dev:%.1f - unique:%d", key$host, r$num, r$sum, r$average, r$max, r$min, r$variance, r$std_dev, r$unique); + print fmt("Host: %s - num:%d - sum:%.1f - avg:%.1f - max:%.1f - min:%.1f - var:%.1f - std_dev:%.1f - unique:%d - hllunique:%d", key$host, r$num, r$sum, r$average, r$max, r$min, r$variance, r$std_dev, r$unique, r$hllunique); } terminate();