From de5769a88fd123cf6f34f978cf67ec5ee494de15 Mon Sep 17 00:00:00 2001 From: Bernhard Amann Date: Tue, 23 Apr 2013 15:19:01 -0700 Subject: [PATCH] topk for sumstats --- .../frameworks/sumstats/plugins/__load__.bro | 3 +- .../base/frameworks/sumstats/plugins/topk.bro | 24 ++++++++++ src/Topk.cc | 33 ++++++++++++- src/Topk.h | 4 +- src/bro.bif | 15 ++++++ testing/btest/Baseline/bifs.topk/.stderr | 6 +++ testing/btest/Baseline/bifs.topk/out | 30 ++++++++++++ .../.stdout | 8 ++++ testing/btest/bifs/topk.bro | 31 ++++++++++++ .../scripts/base/frameworks/sumstats/topk.bro | 48 +++++++++++++++++++ 10 files changed, 198 insertions(+), 4 deletions(-) create mode 100644 scripts/base/frameworks/sumstats/plugins/topk.bro create mode 100644 testing/btest/Baseline/bifs.topk/.stderr create mode 100644 testing/btest/Baseline/scripts.base.frameworks.sumstats.topk/.stdout create mode 100644 testing/btest/scripts/base/frameworks/sumstats/topk.bro diff --git a/scripts/base/frameworks/sumstats/plugins/__load__.bro b/scripts/base/frameworks/sumstats/plugins/__load__.bro index 0d4c2ed302..35191a4776 100644 --- a/scripts/base/frameworks/sumstats/plugins/__load__.bro +++ b/scripts/base/frameworks/sumstats/plugins/__load__.bro @@ -4,5 +4,6 @@ @load ./sample @load ./std-dev @load ./sum +@load ./topk @load ./unique -@load ./variance \ No newline at end of file +@load ./variance diff --git a/scripts/base/frameworks/sumstats/plugins/topk.bro b/scripts/base/frameworks/sumstats/plugins/topk.bro new file mode 100644 index 0000000000..f64e9fb18d --- /dev/null +++ b/scripts/base/frameworks/sumstats/plugins/topk.bro @@ -0,0 +1,24 @@ +@load base/frameworks/sumstats + +module SumStats; + +export { + redef enum Calculation += { + TOPK + }; + + redef record ResultVal += { + topk: opaque of topk &default=topk_init(500); + }; + +} + +hook observe_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal) + { + if ( TOPK in r$apply ) + { + topk_add(rv$topk, obs); + } + } + + diff --git a/src/Topk.cc b/src/Topk.cc index 8f4d63ed78..d5866b4f41 100644 --- a/src/Topk.cc +++ b/src/Topk.cc @@ -19,7 +19,7 @@ Element::~Element() value=0; } -HashKey* TopkVal::GetHash(Val* v) +HashKey* TopkVal::GetHash(Val* v) const { TypeList* tl = new TypeList(v->Type()); tl->Append(v->Type()); @@ -58,7 +58,8 @@ TopkVal::~TopkVal() type = 0; } -VectorVal* TopkVal::getTopK(int k) // returns vector + +VectorVal* TopkVal::getTopK(int k) // returns vector { if ( numElements == 0 ) { @@ -100,6 +101,34 @@ VectorVal* TopkVal::getTopK(int k) // returns vector return t; } +uint64_t TopkVal::getCount(Val* value) const + { + HashKey* key = GetHash(value); + Element* e = (Element*) elementDict->Lookup(key); + + if ( e == 0 ) + { + reporter->Error("getCount for element that is not in top-k"); + return 0; + } + + return e->parent->count; + } + +uint64_t TopkVal::getEpsilon(Val* value) const + { + HashKey* key = GetHash(value); + Element* e = (Element*) elementDict->Lookup(key); + + if ( e == 0 ) + { + reporter->Error("getEpsilon for element that is not in top-k"); + return 0; + } + + return e->epsilon; + } + void TopkVal::Encountered(Val* encountered) { // ok, let's see if we already know this one. diff --git a/src/Topk.h b/src/Topk.h index 7c983ebdfc..e4c6aa5aea 100644 --- a/src/Topk.h +++ b/src/Topk.h @@ -38,10 +38,12 @@ public: ~TopkVal(); void Encountered(Val* value); // we saw something VectorVal* getTopK(int k); // returns vector + uint64_t getCount(Val* value) const; + uint64_t getEpsilon(Val* value) const; private: void IncrementCounter(Element* e); - HashKey* GetHash(Val*); // this probably should go somewhere else. + HashKey* GetHash(Val*) const; // this probably should go somewhere else. BroType* type; std::list buckets; diff --git a/src/bro.bif b/src/bro.bif index 695337bcf1..e8e78c7872 100644 --- a/src/bro.bif +++ b/src/bro.bif @@ -5669,3 +5669,18 @@ function topk_get_top%(handle: opaque of topk, k: count%): any Topk::TopkVal* h = (Topk::TopkVal*) handle; return h->getTopK(k); %} + +function topk_count%(handle: opaque of topk, value: any%): count + %{ + assert(handle); + Topk::TopkVal* h = (Topk::TopkVal*) handle; + return new Val(h->getCount(value), TYPE_COUNT); + %} + +function topk_epsilon%(handle: opaque of topk, value: any%): count + %{ + assert(handle); + Topk::TopkVal* h = (Topk::TopkVal*) handle; + return new Val(h->getEpsilon(value), TYPE_COUNT); + %} + diff --git a/testing/btest/Baseline/bifs.topk/.stderr b/testing/btest/Baseline/bifs.topk/.stderr new file mode 100644 index 0000000000..f57e35ca51 --- /dev/null +++ b/testing/btest/Baseline/bifs.topk/.stderr @@ -0,0 +1,6 @@ +error: getCount for element that is not in top-k +error: getEpsilon for element that is not in top-k +error: getCount for element that is not in top-k +error: getEpsilon for element that is not in top-k +error: getCount for element that is not in top-k +error: getEpsilon for element that is not in top-k diff --git a/testing/btest/Baseline/bifs.topk/out b/testing/btest/Baseline/bifs.topk/out index 94aa5bd572..2116a30a12 100644 --- a/testing/btest/Baseline/bifs.topk/out +++ b/testing/btest/Baseline/bifs.topk/out @@ -1,7 +1,37 @@ [b, c] +0 +0 +2 +0 +2 +1 [d, c] +0 +0 +2 +1 +3 +2 [d, e] +3 +2 +3 +2 [f, e] +4 +3 +3 +2 [f, e] +4 +3 +4 +2 [g, e] +0 +0 +4 +2 +5 +4 [c, e, d] diff --git a/testing/btest/Baseline/scripts.base.frameworks.sumstats.topk/.stdout b/testing/btest/Baseline/scripts.base.frameworks.sumstats.topk/.stdout new file mode 100644 index 0000000000..c85316eecc --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.sumstats.topk/.stdout @@ -0,0 +1,8 @@ +Top entries for key counter +Num: 1, count: 99, epsilon: 0 +Num: 2, count: 98, epsilon: 0 +Num: 3, count: 97, epsilon: 0 +Num: 4, count: 96, epsilon: 0 +Num: 5, count: 95, epsilon: 0 +Top entries for key two +Num: 1, count: 2, epsilon: 0 diff --git a/testing/btest/bifs/topk.bro b/testing/btest/bifs/topk.bro index af1f38c773..9d936ce2f4 100644 --- a/testing/btest/bifs/topk.bro +++ b/testing/btest/bifs/topk.bro @@ -1,5 +1,6 @@ # @TEST-EXEC: bro -b %INPUT > out # @TEST-EXEC: btest-diff out +# @TEST-EXEC: btest-diff .stderr event bro_init() { @@ -13,26 +14,56 @@ event bro_init() local s = topk_get_top(k1, 5); print s; + print topk_count(k1, "a"); + print topk_epsilon(k1, "a"); + print topk_count(k1, "b"); + print topk_epsilon(k1, "b"); + print topk_count(k1, "c"); + print topk_epsilon(k1, "c"); topk_add(k1, "d"); s = topk_get_top(k1, 5); print s; + print topk_count(k1, "b"); + print topk_epsilon(k1, "b"); + print topk_count(k1, "c"); + print topk_epsilon(k1, "c"); + print topk_count(k1, "d"); + print topk_epsilon(k1, "d"); topk_add(k1, "e"); s = topk_get_top(k1, 5); print s; + print topk_count(k1, "d"); + print topk_epsilon(k1, "d"); + print topk_count(k1, "e"); + print topk_epsilon(k1, "e"); topk_add(k1, "f"); s = topk_get_top(k1, 5); print s; + print topk_count(k1, "f"); + print topk_epsilon(k1, "f"); + print topk_count(k1, "e"); + print topk_epsilon(k1, "e"); topk_add(k1, "e"); s = topk_get_top(k1, 5); print s; + print topk_count(k1, "f"); + print topk_epsilon(k1, "f"); + print topk_count(k1, "e"); + print topk_epsilon(k1, "e"); topk_add(k1, "g"); s = topk_get_top(k1, 5); print s; + print topk_count(k1, "f"); + print topk_epsilon(k1, "f"); + print topk_count(k1, "e"); + print topk_epsilon(k1, "e"); + print topk_count(k1, "g"); + print topk_epsilon(k1, "g"); k1 = topk_init(100); topk_add(k1, "a"); diff --git a/testing/btest/scripts/base/frameworks/sumstats/topk.bro b/testing/btest/scripts/base/frameworks/sumstats/topk.bro new file mode 100644 index 0000000000..22a5af1bc7 --- /dev/null +++ b/testing/btest/scripts/base/frameworks/sumstats/topk.bro @@ -0,0 +1,48 @@ +# @TEST-EXEC: bro %INPUT +# @TEST-EXEC: btest-diff .stdout + +event bro_init() &priority=5 + { + local r1: SumStats::Reducer = [$stream="test.metric", + $apply=set(SumStats::TOPK)]; + SumStats::create([$epoch=3secs, + $reducers=set(r1), + $epoch_finished(data: SumStats::ResultTable) = + { + for ( key in data ) + { + local r = data[key]["test.metric"]; + + local s: vector of SumStats::Observation; + s = topk_get_top(r$topk, 5); + + print fmt("Top entries for key %s", key$str); + for ( element in s ) + { + print fmt("Num: %d, count: %d, epsilon: %d", s[element]$num, topk_count(r$topk, s[element]), topk_epsilon(r$topk, s[element])); + } + + } + } + ]); + + + const loop_v: vector of count = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100}; + + local a: count; + a = 0; + + for ( i in loop_v ) + { + a = a + 1; + for ( j in loop_v ) + { + if ( i < j ) + SumStats::observe("test.metric", [$str="counter"], [$num=a]); + } + } + + + SumStats::observe("test.metric", [$str="two"], [$num=1]); + SumStats::observe("test.metric", [$str="two"], [$num=1]); + }