mirror of
https://github.com/zeek/zeek.git
synced 2025-10-16 21:48:21 +00:00
well, a test that works..
Note: merging top-k data structures is not yet possible (and is actually quite awkward/expensive). I will have to think about how to do that for a bit...
This commit is contained in:
parent
c21c18ea45
commit
ce7ad003f2
5 changed files with 115 additions and 12 deletions
23
src/Topk.cc
23
src/Topk.cc
|
@ -19,7 +19,7 @@ Element::~Element()
|
|||
value=0;
|
||||
}
|
||||
|
||||
HashKey* Topk::GetHash(Val* v)
|
||||
HashKey* TopkVal::GetHash(Val* v)
|
||||
{
|
||||
TypeList* tl = new TypeList(v->Type());
|
||||
tl->Append(v->Type());
|
||||
|
@ -31,15 +31,16 @@ HashKey* Topk::GetHash(Val* v)
|
|||
return key;
|
||||
}
|
||||
|
||||
Topk::Topk(uint64 arg_size)
|
||||
TopkVal::TopkVal(uint64 arg_size) : OpaqueVal(new OpaqueType("topk"))
|
||||
{
|
||||
elementDict = new PDict(Element);
|
||||
elementDict->SetDeleteFunc(topk_element_hash_delete_func);
|
||||
size = arg_size;
|
||||
type = 0;
|
||||
numElements = 0;
|
||||
}
|
||||
|
||||
Topk::~Topk()
|
||||
TopkVal::~TopkVal()
|
||||
{
|
||||
elementDict->Clear();
|
||||
delete elementDict;
|
||||
|
@ -57,7 +58,7 @@ Topk::~Topk()
|
|||
type = 0;
|
||||
}
|
||||
|
||||
VectorVal* Topk::getTopK(int k) // returns vector
|
||||
VectorVal* TopkVal::getTopK(int k) // returns vector
|
||||
{
|
||||
if ( numElements == 0 )
|
||||
{
|
||||
|
@ -75,17 +76,23 @@ VectorVal* Topk::getTopK(int k) // returns vector
|
|||
|
||||
int read = 0;
|
||||
std::list<Bucket*>::iterator it = buckets.end();
|
||||
it--;
|
||||
while (read < k )
|
||||
{
|
||||
//printf("Bucket %llu\n", (*it)->count);
|
||||
std::list<Element*>::iterator eit = (*it)->elements.begin();
|
||||
while (eit != (*it)->elements.end() )
|
||||
{
|
||||
//printf("Size: %ld\n", (*it)->elements.size());
|
||||
t->Assign(read, (*eit)->value->Ref());
|
||||
read++;
|
||||
eit++;
|
||||
}
|
||||
|
||||
if ( it == buckets.begin() )
|
||||
break;
|
||||
|
||||
it--;
|
||||
}
|
||||
|
||||
|
||||
|
@ -93,13 +100,14 @@ VectorVal* Topk::getTopK(int k) // returns vector
|
|||
return t;
|
||||
}
|
||||
|
||||
void Topk::Encountered(Val* encountered)
|
||||
void TopkVal::Encountered(Val* encountered)
|
||||
{
|
||||
// ok, let's see if we already know this one.
|
||||
|
||||
//printf("NumElements: %d\n", numElements);
|
||||
// check type compatibility
|
||||
if ( numElements == 0 )
|
||||
type = encountered->Type()->Ref();
|
||||
type = encountered->Type()->Ref()->Ref();
|
||||
else
|
||||
if ( !same_type(type, encountered->Type()) )
|
||||
{
|
||||
|
@ -161,6 +169,7 @@ void Topk::Encountered(Val* encountered)
|
|||
e->epsilon = b->count;
|
||||
b->elements.insert(b->elements.end(), e);
|
||||
elementDict->Insert(key, e);
|
||||
e->parent = b;
|
||||
// fallthrough, increment operation has to run!
|
||||
}
|
||||
|
||||
|
@ -172,7 +181,7 @@ void Topk::Encountered(Val* encountered)
|
|||
|
||||
}
|
||||
|
||||
void Topk::IncrementCounter(Element* e)
|
||||
void TopkVal::IncrementCounter(Element* e)
|
||||
{
|
||||
Bucket* currBucket = e->parent;
|
||||
uint64 currcount = currBucket->count;
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include <list>
|
||||
#include "Val.h"
|
||||
#include "CompHash.h"
|
||||
#include "OpaqueVal.h"
|
||||
|
||||
// This class implements the top-k algorithm. Or - to be more precise - my interpretation of it.
|
||||
|
||||
|
@ -30,11 +31,11 @@ struct Element {
|
|||
|
||||
declare(PDict, Element);
|
||||
|
||||
class Topk {
|
||||
class TopkVal : public OpaqueVal {
|
||||
|
||||
public:
|
||||
Topk(uint64 size);
|
||||
~Topk();
|
||||
TopkVal(uint64 size);
|
||||
~TopkVal();
|
||||
void Encountered(Val* value); // we saw something
|
||||
VectorVal* getTopK(int k); // returns vector
|
||||
|
||||
|
@ -47,8 +48,6 @@ private:
|
|||
PDict(Element)* elementDict;
|
||||
uint64 size; // how many elements are we tracking?
|
||||
uint64 numElements; // how many elements do we have at the moment
|
||||
|
||||
|
||||
};
|
||||
|
||||
};
|
||||
|
|
27
src/bro.bif
27
src/bro.bif
|
@ -5642,3 +5642,30 @@ function anonymize_addr%(a: addr, cl: IPAddrAnonymizationClass%): addr
|
|||
}
|
||||
%}
|
||||
|
||||
|
||||
|
||||
%%{
|
||||
#include "Topk.h"
|
||||
%%}
|
||||
|
||||
function topk_init%(size: count%): opaque of topk
|
||||
%{
|
||||
Topk::TopkVal* v = new Topk::TopkVal(size);
|
||||
return v;
|
||||
%}
|
||||
|
||||
function topk_add%(handle: opaque of topk, value: any%): any
|
||||
%{
|
||||
assert(handle);
|
||||
Topk::TopkVal* h = (Topk::TopkVal*) handle;
|
||||
h->Encountered(value);
|
||||
|
||||
return 0;
|
||||
%}
|
||||
|
||||
function topk_get_top%(handle: opaque of topk, k: count%): any
|
||||
%{
|
||||
assert(handle);
|
||||
Topk::TopkVal* h = (Topk::TopkVal*) handle;
|
||||
return h->getTopK(k);
|
||||
%}
|
||||
|
|
7
testing/btest/Baseline/bifs.topk/out
Normal file
7
testing/btest/Baseline/bifs.topk/out
Normal file
|
@ -0,0 +1,7 @@
|
|||
[b, c]
|
||||
[d, c]
|
||||
[d, e]
|
||||
[f, e]
|
||||
[f, e]
|
||||
[g, e]
|
||||
[c, e, d]
|
61
testing/btest/bifs/topk.bro
Normal file
61
testing/btest/bifs/topk.bro
Normal file
|
@ -0,0 +1,61 @@
|
|||
# @TEST-EXEC: bro -b %INPUT > out
|
||||
# @TEST-EXEC: btest-diff out
|
||||
|
||||
event bro_init()
|
||||
{
|
||||
local k1 = topk_init(2);
|
||||
|
||||
# first - peculiarity check...
|
||||
topk_add(k1, "a");
|
||||
topk_add(k1, "b");
|
||||
topk_add(k1, "b");
|
||||
topk_add(k1, "c");
|
||||
|
||||
local s = topk_get_top(k1, 5);
|
||||
print s;
|
||||
|
||||
topk_add(k1, "d");
|
||||
s = topk_get_top(k1, 5);
|
||||
print s;
|
||||
|
||||
topk_add(k1, "e");
|
||||
s = topk_get_top(k1, 5);
|
||||
print s;
|
||||
|
||||
topk_add(k1, "f");
|
||||
s = topk_get_top(k1, 5);
|
||||
print s;
|
||||
|
||||
topk_add(k1, "e");
|
||||
s = topk_get_top(k1, 5);
|
||||
print s;
|
||||
|
||||
topk_add(k1, "g");
|
||||
s = topk_get_top(k1, 5);
|
||||
print s;
|
||||
|
||||
k1 = topk_init(100);
|
||||
topk_add(k1, "a");
|
||||
topk_add(k1, "b");
|
||||
topk_add(k1, "b");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "c");
|
||||
topk_add(k1, "d");
|
||||
topk_add(k1, "d");
|
||||
topk_add(k1, "d");
|
||||
topk_add(k1, "d");
|
||||
topk_add(k1, "e");
|
||||
topk_add(k1, "e");
|
||||
topk_add(k1, "e");
|
||||
topk_add(k1, "e");
|
||||
topk_add(k1, "e");
|
||||
topk_add(k1, "f");
|
||||
s = topk_get_top(k1, 3);
|
||||
print s;
|
||||
|
||||
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue