From ce7ad003f251e8c76be3d07190907157cd9a87c1 Mon Sep 17 00:00:00 2001 From: Bernhard Amann Date: Mon, 22 Apr 2013 02:40:42 -0700 Subject: [PATCH] well, a test that works.. Note: merging top-k data structures is not yet possible (and is actually quite awkward/expensive). I will have to think about how to do that for a bit... --- src/Topk.cc | 23 +++++++---- src/Topk.h | 9 ++-- src/bro.bif | 27 ++++++++++++ testing/btest/Baseline/bifs.topk/out | 7 ++++ testing/btest/bifs/topk.bro | 61 ++++++++++++++++++++++++++++ 5 files changed, 115 insertions(+), 12 deletions(-) create mode 100644 testing/btest/Baseline/bifs.topk/out create mode 100644 testing/btest/bifs/topk.bro diff --git a/src/Topk.cc b/src/Topk.cc index ef7d7bfbd8..8f4d63ed78 100644 --- a/src/Topk.cc +++ b/src/Topk.cc @@ -19,7 +19,7 @@ Element::~Element() value=0; } -HashKey* Topk::GetHash(Val* v) +HashKey* TopkVal::GetHash(Val* v) { TypeList* tl = new TypeList(v->Type()); tl->Append(v->Type()); @@ -31,15 +31,16 @@ HashKey* Topk::GetHash(Val* v) return key; } -Topk::Topk(uint64 arg_size) +TopkVal::TopkVal(uint64 arg_size) : OpaqueVal(new OpaqueType("topk")) { elementDict = new PDict(Element); elementDict->SetDeleteFunc(topk_element_hash_delete_func); size = arg_size; type = 0; + numElements = 0; } -Topk::~Topk() +TopkVal::~TopkVal() { elementDict->Clear(); delete elementDict; @@ -57,7 +58,7 @@ Topk::~Topk() type = 0; } -VectorVal* Topk::getTopK(int k) // returns vector +VectorVal* TopkVal::getTopK(int k) // returns vector { if ( numElements == 0 ) { @@ -75,17 +76,23 @@ VectorVal* Topk::getTopK(int k) // returns vector int read = 0; std::list::iterator it = buckets.end(); + it--; while (read < k ) { + //printf("Bucket %llu\n", (*it)->count); std::list::iterator eit = (*it)->elements.begin(); while (eit != (*it)->elements.end() ) { + //printf("Size: %ld\n", (*it)->elements.size()); t->Assign(read, (*eit)->value->Ref()); read++; + eit++; } if ( it == buckets.begin() ) break; + + it--; } @@ -93,13 +100,14 @@ VectorVal* Topk::getTopK(int k) // returns vector return t; } -void Topk::Encountered(Val* encountered) +void TopkVal::Encountered(Val* encountered) { // ok, let's see if we already know this one. + //printf("NumElements: %d\n", numElements); // check type compatibility if ( numElements == 0 ) - type = encountered->Type()->Ref(); + type = encountered->Type()->Ref()->Ref(); else if ( !same_type(type, encountered->Type()) ) { @@ -161,6 +169,7 @@ void Topk::Encountered(Val* encountered) e->epsilon = b->count; b->elements.insert(b->elements.end(), e); elementDict->Insert(key, e); + e->parent = b; // fallthrough, increment operation has to run! } @@ -172,7 +181,7 @@ void Topk::Encountered(Val* encountered) } -void Topk::IncrementCounter(Element* e) +void TopkVal::IncrementCounter(Element* e) { Bucket* currBucket = e->parent; uint64 currcount = currBucket->count; diff --git a/src/Topk.h b/src/Topk.h index b38e1e8ab3..7c983ebdfc 100644 --- a/src/Topk.h +++ b/src/Topk.h @@ -6,6 +6,7 @@ #include #include "Val.h" #include "CompHash.h" +#include "OpaqueVal.h" // This class implements the top-k algorithm. Or - to be more precise - my interpretation of it. @@ -30,11 +31,11 @@ struct Element { declare(PDict, Element); -class Topk { +class TopkVal : public OpaqueVal { public: - Topk(uint64 size); - ~Topk(); + TopkVal(uint64 size); + ~TopkVal(); void Encountered(Val* value); // we saw something VectorVal* getTopK(int k); // returns vector @@ -47,8 +48,6 @@ private: PDict(Element)* elementDict; uint64 size; // how many elements are we tracking? uint64 numElements; // how many elements do we have at the moment - - }; }; diff --git a/src/bro.bif b/src/bro.bif index ac54da0e75..695337bcf1 100644 --- a/src/bro.bif +++ b/src/bro.bif @@ -5642,3 +5642,30 @@ function anonymize_addr%(a: addr, cl: IPAddrAnonymizationClass%): addr } %} + + +%%{ +#include "Topk.h" +%%} + +function topk_init%(size: count%): opaque of topk + %{ + Topk::TopkVal* v = new Topk::TopkVal(size); + return v; + %} + +function topk_add%(handle: opaque of topk, value: any%): any + %{ + assert(handle); + Topk::TopkVal* h = (Topk::TopkVal*) handle; + h->Encountered(value); + + return 0; + %} + +function topk_get_top%(handle: opaque of topk, k: count%): any + %{ + assert(handle); + Topk::TopkVal* h = (Topk::TopkVal*) handle; + return h->getTopK(k); + %} diff --git a/testing/btest/Baseline/bifs.topk/out b/testing/btest/Baseline/bifs.topk/out new file mode 100644 index 0000000000..94aa5bd572 --- /dev/null +++ b/testing/btest/Baseline/bifs.topk/out @@ -0,0 +1,7 @@ +[b, c] +[d, c] +[d, e] +[f, e] +[f, e] +[g, e] +[c, e, d] diff --git a/testing/btest/bifs/topk.bro b/testing/btest/bifs/topk.bro new file mode 100644 index 0000000000..af1f38c773 --- /dev/null +++ b/testing/btest/bifs/topk.bro @@ -0,0 +1,61 @@ +# @TEST-EXEC: bro -b %INPUT > out +# @TEST-EXEC: btest-diff out + +event bro_init() + { + local k1 = topk_init(2); + + # first - peculiarity check... + topk_add(k1, "a"); + topk_add(k1, "b"); + topk_add(k1, "b"); + topk_add(k1, "c"); + + local s = topk_get_top(k1, 5); + print s; + + topk_add(k1, "d"); + s = topk_get_top(k1, 5); + print s; + + topk_add(k1, "e"); + s = topk_get_top(k1, 5); + print s; + + topk_add(k1, "f"); + s = topk_get_top(k1, 5); + print s; + + topk_add(k1, "e"); + s = topk_get_top(k1, 5); + print s; + + topk_add(k1, "g"); + s = topk_get_top(k1, 5); + print s; + + k1 = topk_init(100); + topk_add(k1, "a"); + topk_add(k1, "b"); + topk_add(k1, "b"); + topk_add(k1, "c"); + topk_add(k1, "c"); + topk_add(k1, "c"); + topk_add(k1, "c"); + topk_add(k1, "c"); + topk_add(k1, "c"); + topk_add(k1, "d"); + topk_add(k1, "d"); + topk_add(k1, "d"); + topk_add(k1, "d"); + topk_add(k1, "e"); + topk_add(k1, "e"); + topk_add(k1, "e"); + topk_add(k1, "e"); + topk_add(k1, "e"); + topk_add(k1, "f"); + s = topk_get_top(k1, 3); + print s; + + +}