mirror of
https://github.com/zeek/zeek.git
synced 2025-10-16 21:48:21 +00:00
well, a test that works..
Note: merging top-k data structures is not yet possible (and is actually quite awkward/expensive). I will have to think about how to do that for a bit...
This commit is contained in:
parent
c21c18ea45
commit
ce7ad003f2
5 changed files with 115 additions and 12 deletions
23
src/Topk.cc
23
src/Topk.cc
|
@ -19,7 +19,7 @@ Element::~Element()
|
||||||
value=0;
|
value=0;
|
||||||
}
|
}
|
||||||
|
|
||||||
HashKey* Topk::GetHash(Val* v)
|
HashKey* TopkVal::GetHash(Val* v)
|
||||||
{
|
{
|
||||||
TypeList* tl = new TypeList(v->Type());
|
TypeList* tl = new TypeList(v->Type());
|
||||||
tl->Append(v->Type());
|
tl->Append(v->Type());
|
||||||
|
@ -31,15 +31,16 @@ HashKey* Topk::GetHash(Val* v)
|
||||||
return key;
|
return key;
|
||||||
}
|
}
|
||||||
|
|
||||||
Topk::Topk(uint64 arg_size)
|
TopkVal::TopkVal(uint64 arg_size) : OpaqueVal(new OpaqueType("topk"))
|
||||||
{
|
{
|
||||||
elementDict = new PDict(Element);
|
elementDict = new PDict(Element);
|
||||||
elementDict->SetDeleteFunc(topk_element_hash_delete_func);
|
elementDict->SetDeleteFunc(topk_element_hash_delete_func);
|
||||||
size = arg_size;
|
size = arg_size;
|
||||||
type = 0;
|
type = 0;
|
||||||
|
numElements = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
Topk::~Topk()
|
TopkVal::~TopkVal()
|
||||||
{
|
{
|
||||||
elementDict->Clear();
|
elementDict->Clear();
|
||||||
delete elementDict;
|
delete elementDict;
|
||||||
|
@ -57,7 +58,7 @@ Topk::~Topk()
|
||||||
type = 0;
|
type = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
VectorVal* Topk::getTopK(int k) // returns vector
|
VectorVal* TopkVal::getTopK(int k) // returns vector
|
||||||
{
|
{
|
||||||
if ( numElements == 0 )
|
if ( numElements == 0 )
|
||||||
{
|
{
|
||||||
|
@ -75,17 +76,23 @@ VectorVal* Topk::getTopK(int k) // returns vector
|
||||||
|
|
||||||
int read = 0;
|
int read = 0;
|
||||||
std::list<Bucket*>::iterator it = buckets.end();
|
std::list<Bucket*>::iterator it = buckets.end();
|
||||||
|
it--;
|
||||||
while (read < k )
|
while (read < k )
|
||||||
{
|
{
|
||||||
|
//printf("Bucket %llu\n", (*it)->count);
|
||||||
std::list<Element*>::iterator eit = (*it)->elements.begin();
|
std::list<Element*>::iterator eit = (*it)->elements.begin();
|
||||||
while (eit != (*it)->elements.end() )
|
while (eit != (*it)->elements.end() )
|
||||||
{
|
{
|
||||||
|
//printf("Size: %ld\n", (*it)->elements.size());
|
||||||
t->Assign(read, (*eit)->value->Ref());
|
t->Assign(read, (*eit)->value->Ref());
|
||||||
read++;
|
read++;
|
||||||
|
eit++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( it == buckets.begin() )
|
if ( it == buckets.begin() )
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
it--;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -93,13 +100,14 @@ VectorVal* Topk::getTopK(int k) // returns vector
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Topk::Encountered(Val* encountered)
|
void TopkVal::Encountered(Val* encountered)
|
||||||
{
|
{
|
||||||
// ok, let's see if we already know this one.
|
// ok, let's see if we already know this one.
|
||||||
|
|
||||||
|
//printf("NumElements: %d\n", numElements);
|
||||||
// check type compatibility
|
// check type compatibility
|
||||||
if ( numElements == 0 )
|
if ( numElements == 0 )
|
||||||
type = encountered->Type()->Ref();
|
type = encountered->Type()->Ref()->Ref();
|
||||||
else
|
else
|
||||||
if ( !same_type(type, encountered->Type()) )
|
if ( !same_type(type, encountered->Type()) )
|
||||||
{
|
{
|
||||||
|
@ -161,6 +169,7 @@ void Topk::Encountered(Val* encountered)
|
||||||
e->epsilon = b->count;
|
e->epsilon = b->count;
|
||||||
b->elements.insert(b->elements.end(), e);
|
b->elements.insert(b->elements.end(), e);
|
||||||
elementDict->Insert(key, e);
|
elementDict->Insert(key, e);
|
||||||
|
e->parent = b;
|
||||||
// fallthrough, increment operation has to run!
|
// fallthrough, increment operation has to run!
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -172,7 +181,7 @@ void Topk::Encountered(Val* encountered)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Topk::IncrementCounter(Element* e)
|
void TopkVal::IncrementCounter(Element* e)
|
||||||
{
|
{
|
||||||
Bucket* currBucket = e->parent;
|
Bucket* currBucket = e->parent;
|
||||||
uint64 currcount = currBucket->count;
|
uint64 currcount = currBucket->count;
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
#include <list>
|
#include <list>
|
||||||
#include "Val.h"
|
#include "Val.h"
|
||||||
#include "CompHash.h"
|
#include "CompHash.h"
|
||||||
|
#include "OpaqueVal.h"
|
||||||
|
|
||||||
// This class implements the top-k algorithm. Or - to be more precise - my interpretation of it.
|
// This class implements the top-k algorithm. Or - to be more precise - my interpretation of it.
|
||||||
|
|
||||||
|
@ -30,11 +31,11 @@ struct Element {
|
||||||
|
|
||||||
declare(PDict, Element);
|
declare(PDict, Element);
|
||||||
|
|
||||||
class Topk {
|
class TopkVal : public OpaqueVal {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Topk(uint64 size);
|
TopkVal(uint64 size);
|
||||||
~Topk();
|
~TopkVal();
|
||||||
void Encountered(Val* value); // we saw something
|
void Encountered(Val* value); // we saw something
|
||||||
VectorVal* getTopK(int k); // returns vector
|
VectorVal* getTopK(int k); // returns vector
|
||||||
|
|
||||||
|
@ -47,8 +48,6 @@ private:
|
||||||
PDict(Element)* elementDict;
|
PDict(Element)* elementDict;
|
||||||
uint64 size; // how many elements are we tracking?
|
uint64 size; // how many elements are we tracking?
|
||||||
uint64 numElements; // how many elements do we have at the moment
|
uint64 numElements; // how many elements do we have at the moment
|
||||||
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
27
src/bro.bif
27
src/bro.bif
|
@ -5642,3 +5642,30 @@ function anonymize_addr%(a: addr, cl: IPAddrAnonymizationClass%): addr
|
||||||
}
|
}
|
||||||
%}
|
%}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
%%{
|
||||||
|
#include "Topk.h"
|
||||||
|
%%}
|
||||||
|
|
||||||
|
function topk_init%(size: count%): opaque of topk
|
||||||
|
%{
|
||||||
|
Topk::TopkVal* v = new Topk::TopkVal(size);
|
||||||
|
return v;
|
||||||
|
%}
|
||||||
|
|
||||||
|
function topk_add%(handle: opaque of topk, value: any%): any
|
||||||
|
%{
|
||||||
|
assert(handle);
|
||||||
|
Topk::TopkVal* h = (Topk::TopkVal*) handle;
|
||||||
|
h->Encountered(value);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
%}
|
||||||
|
|
||||||
|
function topk_get_top%(handle: opaque of topk, k: count%): any
|
||||||
|
%{
|
||||||
|
assert(handle);
|
||||||
|
Topk::TopkVal* h = (Topk::TopkVal*) handle;
|
||||||
|
return h->getTopK(k);
|
||||||
|
%}
|
||||||
|
|
7
testing/btest/Baseline/bifs.topk/out
Normal file
7
testing/btest/Baseline/bifs.topk/out
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
[b, c]
|
||||||
|
[d, c]
|
||||||
|
[d, e]
|
||||||
|
[f, e]
|
||||||
|
[f, e]
|
||||||
|
[g, e]
|
||||||
|
[c, e, d]
|
61
testing/btest/bifs/topk.bro
Normal file
61
testing/btest/bifs/topk.bro
Normal file
|
@ -0,0 +1,61 @@
|
||||||
|
# @TEST-EXEC: bro -b %INPUT > out
|
||||||
|
# @TEST-EXEC: btest-diff out
|
||||||
|
|
||||||
|
event bro_init()
|
||||||
|
{
|
||||||
|
local k1 = topk_init(2);
|
||||||
|
|
||||||
|
# first - peculiarity check...
|
||||||
|
topk_add(k1, "a");
|
||||||
|
topk_add(k1, "b");
|
||||||
|
topk_add(k1, "b");
|
||||||
|
topk_add(k1, "c");
|
||||||
|
|
||||||
|
local s = topk_get_top(k1, 5);
|
||||||
|
print s;
|
||||||
|
|
||||||
|
topk_add(k1, "d");
|
||||||
|
s = topk_get_top(k1, 5);
|
||||||
|
print s;
|
||||||
|
|
||||||
|
topk_add(k1, "e");
|
||||||
|
s = topk_get_top(k1, 5);
|
||||||
|
print s;
|
||||||
|
|
||||||
|
topk_add(k1, "f");
|
||||||
|
s = topk_get_top(k1, 5);
|
||||||
|
print s;
|
||||||
|
|
||||||
|
topk_add(k1, "e");
|
||||||
|
s = topk_get_top(k1, 5);
|
||||||
|
print s;
|
||||||
|
|
||||||
|
topk_add(k1, "g");
|
||||||
|
s = topk_get_top(k1, 5);
|
||||||
|
print s;
|
||||||
|
|
||||||
|
k1 = topk_init(100);
|
||||||
|
topk_add(k1, "a");
|
||||||
|
topk_add(k1, "b");
|
||||||
|
topk_add(k1, "b");
|
||||||
|
topk_add(k1, "c");
|
||||||
|
topk_add(k1, "c");
|
||||||
|
topk_add(k1, "c");
|
||||||
|
topk_add(k1, "c");
|
||||||
|
topk_add(k1, "c");
|
||||||
|
topk_add(k1, "c");
|
||||||
|
topk_add(k1, "d");
|
||||||
|
topk_add(k1, "d");
|
||||||
|
topk_add(k1, "d");
|
||||||
|
topk_add(k1, "d");
|
||||||
|
topk_add(k1, "e");
|
||||||
|
topk_add(k1, "e");
|
||||||
|
topk_add(k1, "e");
|
||||||
|
topk_add(k1, "e");
|
||||||
|
topk_add(k1, "e");
|
||||||
|
topk_add(k1, "f");
|
||||||
|
s = topk_get_top(k1, 3);
|
||||||
|
print s;
|
||||||
|
|
||||||
|
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue