add serialization for topk

This commit is contained in:
Bernhard Amann 2013-04-23 23:24:02 -07:00
parent a426c76122
commit 6f863d2259
5 changed files with 209 additions and 0 deletions

View file

@ -104,6 +104,7 @@ SERIAL_VAL(MD5_VAL, 16)
SERIAL_VAL(SHA1_VAL, 17) SERIAL_VAL(SHA1_VAL, 17)
SERIAL_VAL(SHA256_VAL, 18) SERIAL_VAL(SHA256_VAL, 18)
SERIAL_VAL(ENTROPY_VAL, 19) SERIAL_VAL(ENTROPY_VAL, 19)
SERIAL_VAL(TOPK_VAL, 20)
#define SERIAL_EXPR(name, val) SERIAL_CONST(name, val, EXPR) #define SERIAL_EXPR(name, val) SERIAL_CONST(name, val, EXPR)
SERIAL_EXPR(EXPR, 1) SERIAL_EXPR(EXPR, 1)

View file

@ -3,9 +3,13 @@
#include "Topk.h" #include "Topk.h"
#include "CompHash.h" #include "CompHash.h"
#include "Reporter.h" #include "Reporter.h"
#include "Serializer.h"
namespace Topk { namespace Topk {
IMPLEMENT_SERIAL(TopkVal, SER_TOPK_VAL);
static void topk_element_hash_delete_func(void* val) static void topk_element_hash_delete_func(void* val)
{ {
Element* e = (Element*) val; Element* e = (Element*) val;
@ -40,6 +44,15 @@ TopkVal::TopkVal(uint64 arg_size) : OpaqueVal(new OpaqueType("topk"))
numElements = 0; numElements = 0;
} }
TopkVal::TopkVal() : OpaqueVal(new OpaqueType("topk"))
{
elementDict = new PDict(Element);
elementDict->SetDeleteFunc(topk_element_hash_delete_func);
size = 0;
type = 0;
numElements = 0;
}
TopkVal::~TopkVal() TopkVal::~TopkVal()
{ {
elementDict->Clear(); elementDict->Clear();
@ -59,6 +72,101 @@ TopkVal::~TopkVal()
} }
bool TopkVal::DoSerialize(SerialInfo* info) const
{
DO_SERIALIZE(SER_TOPK_VAL, OpaqueVal);
bool v = true;
v &= SERIALIZE(size);
v &= SERIALIZE(numElements);
bool type_present = (type != 0);
v &= SERIALIZE(type_present);
if ( type_present )
v &= type->Serialize(info);
else
assert(numElements == 0);
int i = 0;
std::list<Bucket*>::const_iterator it = buckets.begin();
while ( it != buckets.end() )
{
Bucket* b = *it;
uint32_t elements_count = b->elements.size();
v &= SERIALIZE(elements_count);
v &= SERIALIZE(b->count);
std::list<Element*>::const_iterator eit = b->elements.begin();
while ( eit != b->elements.end() )
{
Element* element = *eit;
v &= SERIALIZE(element->epsilon);
v &= element->value->Serialize(info);
eit++;
i++;
}
it++;
}
assert(i == numElements);
return v;
}
bool TopkVal::DoUnserialize(UnserialInfo* info)
{
DO_UNSERIALIZE(OpaqueVal);
bool v = true;
v &= UNSERIALIZE(&size);
v &= UNSERIALIZE(&numElements);
bool type_present = false;
v &= UNSERIALIZE(&type_present);
if ( type_present )
{
type = BroType::Unserialize(info);
assert(type);
}
else
assert(numElements == 0);
int i = 0;
while ( i < numElements )
{
Bucket* b = new Bucket();
uint32_t elements_count;
v &= UNSERIALIZE(&elements_count);
v &= UNSERIALIZE(&b->count);
b->bucketPos = buckets.insert(buckets.end(), b);
for ( int j = 0; j < elements_count; j++ )
{
Element* e = new Element();
v &= UNSERIALIZE(&e->epsilon);
e->value = Val::Unserialize(info, type);
e->parent = b;
b->elements.insert(b->elements.end(), e);
HashKey* key = GetHash(e->value);
assert ( elementDict->Lookup(key) == 0 );
elementDict->Insert(key, e);
delete key;
i++;
}
}
assert(i == numElements);
return v;
}
VectorVal* TopkVal::getTopK(int k) const // returns vector VectorVal* TopkVal::getTopK(int k) const // returns vector
{ {
if ( numElements == 0 ) if ( numElements == 0 )

View file

@ -41,6 +41,9 @@ public:
uint64_t getCount(Val* value) const; uint64_t getCount(Val* value) const;
uint64_t getEpsilon(Val* value) const; uint64_t getEpsilon(Val* value) const;
protected:
TopkVal(); // for deserialize
private: private:
void IncrementCounter(Element* e); void IncrementCounter(Element* e);
HashKey* GetHash(Val*) const; // this probably should go somewhere else. HashKey* GetHash(Val*) const; // this probably should go somewhere else.
@ -50,6 +53,8 @@ private:
PDict(Element)* elementDict; PDict(Element)* elementDict;
uint64 size; // how many elements are we tracking? uint64 size; // how many elements are we tracking?
uint64 numElements; // how many elements do we have at the moment uint64 numElements; // how many elements do we have at the moment
DECLARE_SERIAL(TopkVal);
}; };
}; };

View file

@ -0,0 +1,21 @@
1
2
6
4
5
1
[c, e, d]
1
2
6
4
5
1
[c, e, d]
2
4
12
8
10
2
[c, e, d]

View file

@ -0,0 +1,74 @@
# @TEST-EXEC: bro -b %INPUT runnumber=1 >out
# @TEST-EXEC: bro -b %INPUT runnumber=2 >>out
# @TEST-EXEC: bro -b %INPUT runnumber=3 >>out
# @TEST-EXEC: btest-diff out
global runnumber: count &redef; # differentiate runs
global k1: opaque of topk &persistent;
global k2: opaque of topk &persistent;
event bro_init()
{
k2 = topk_init(20);
if ( runnumber == 1 )
{
k1 = topk_init(100);
topk_add(k1, "a");
topk_add(k1, "b");
topk_add(k1, "b");
topk_add(k1, "c");
topk_add(k1, "c");
topk_add(k1, "c");
topk_add(k1, "c");
topk_add(k1, "c");
topk_add(k1, "c");
topk_add(k1, "d");
topk_add(k1, "d");
topk_add(k1, "d");
topk_add(k1, "d");
topk_add(k1, "e");
topk_add(k1, "e");
topk_add(k1, "e");
topk_add(k1, "e");
topk_add(k1, "e");
topk_add(k1, "f");
}
local s = topk_get_top(k1, 3);
print topk_count(k1, "a");
print topk_count(k1, "b");
print topk_count(k1, "c");
print topk_count(k1, "d");
print topk_count(k1, "e");
print topk_count(k1, "f");
if ( runnumber == 2 )
{
topk_add(k1, "a");
topk_add(k1, "b");
topk_add(k1, "b");
topk_add(k1, "c");
topk_add(k1, "c");
topk_add(k1, "c");
topk_add(k1, "c");
topk_add(k1, "c");
topk_add(k1, "c");
topk_add(k1, "d");
topk_add(k1, "d");
topk_add(k1, "d");
topk_add(k1, "d");
topk_add(k1, "e");
topk_add(k1, "e");
topk_add(k1, "e");
topk_add(k1, "e");
topk_add(k1, "e");
topk_add(k1, "f");
}
print s;
}