diff --git a/src/SerialTypes.h b/src/SerialTypes.h index 723badab1e..f07392eff4 100644 --- a/src/SerialTypes.h +++ b/src/SerialTypes.h @@ -104,6 +104,7 @@ SERIAL_VAL(MD5_VAL, 16) SERIAL_VAL(SHA1_VAL, 17) SERIAL_VAL(SHA256_VAL, 18) SERIAL_VAL(ENTROPY_VAL, 19) +SERIAL_VAL(TOPK_VAL, 20) #define SERIAL_EXPR(name, val) SERIAL_CONST(name, val, EXPR) SERIAL_EXPR(EXPR, 1) diff --git a/src/Topk.cc b/src/Topk.cc index b89fa2e96f..a31f49adf4 100644 --- a/src/Topk.cc +++ b/src/Topk.cc @@ -3,9 +3,13 @@ #include "Topk.h" #include "CompHash.h" #include "Reporter.h" +#include "Serializer.h" + namespace Topk { +IMPLEMENT_SERIAL(TopkVal, SER_TOPK_VAL); + static void topk_element_hash_delete_func(void* val) { Element* e = (Element*) val; @@ -40,6 +44,15 @@ TopkVal::TopkVal(uint64 arg_size) : OpaqueVal(new OpaqueType("topk")) numElements = 0; } +TopkVal::TopkVal() : OpaqueVal(new OpaqueType("topk")) + { + elementDict = new PDict(Element); + elementDict->SetDeleteFunc(topk_element_hash_delete_func); + size = 0; + type = 0; + numElements = 0; + } + TopkVal::~TopkVal() { elementDict->Clear(); @@ -59,6 +72,101 @@ TopkVal::~TopkVal() } +bool TopkVal::DoSerialize(SerialInfo* info) const + { + DO_SERIALIZE(SER_TOPK_VAL, OpaqueVal); + + bool v = true; + + v &= SERIALIZE(size); + v &= SERIALIZE(numElements); + bool type_present = (type != 0); + v &= SERIALIZE(type_present); + if ( type_present ) + v &= type->Serialize(info); + else + assert(numElements == 0); + + int i = 0; + std::list::const_iterator it = buckets.begin(); + while ( it != buckets.end() ) + { + Bucket* b = *it; + uint32_t elements_count = b->elements.size(); + v &= SERIALIZE(elements_count); + v &= SERIALIZE(b->count); + std::list::const_iterator eit = b->elements.begin(); + while ( eit != b->elements.end() ) + { + Element* element = *eit; + v &= SERIALIZE(element->epsilon); + v &= element->value->Serialize(info); + + eit++; + i++; + } + + it++; + } + + assert(i == numElements); + + return v; + } + +bool TopkVal::DoUnserialize(UnserialInfo* info) + { + DO_UNSERIALIZE(OpaqueVal); + + bool v = true; + + v &= UNSERIALIZE(&size); + v &= UNSERIALIZE(&numElements); + bool type_present = false; + v &= UNSERIALIZE(&type_present); + if ( type_present ) + { + type = BroType::Unserialize(info); + assert(type); + } + else + assert(numElements == 0); + + int i = 0; + while ( i < numElements ) + { + Bucket* b = new Bucket(); + uint32_t elements_count; + v &= UNSERIALIZE(&elements_count); + v &= UNSERIALIZE(&b->count); + b->bucketPos = buckets.insert(buckets.end(), b); + + for ( int j = 0; j < elements_count; j++ ) + { + Element* e = new Element(); + v &= UNSERIALIZE(&e->epsilon); + e->value = Val::Unserialize(info, type); + e->parent = b; + + b->elements.insert(b->elements.end(), e); + + HashKey* key = GetHash(e->value); + assert ( elementDict->Lookup(key) == 0 ); + + elementDict->Insert(key, e); + delete key; + + + i++; + } + } + + assert(i == numElements); + + return v; + } + + VectorVal* TopkVal::getTopK(int k) const // returns vector { if ( numElements == 0 ) diff --git a/src/Topk.h b/src/Topk.h index f486948c5c..0e38319380 100644 --- a/src/Topk.h +++ b/src/Topk.h @@ -41,6 +41,9 @@ public: uint64_t getCount(Val* value) const; uint64_t getEpsilon(Val* value) const; +protected: + TopkVal(); // for deserialize + private: void IncrementCounter(Element* e); HashKey* GetHash(Val*) const; // this probably should go somewhere else. @@ -50,6 +53,8 @@ private: PDict(Element)* elementDict; uint64 size; // how many elements are we tracking? uint64 numElements; // how many elements do we have at the moment + + DECLARE_SERIAL(TopkVal); }; }; diff --git a/testing/btest/Baseline/bifs.topk_persistence/out b/testing/btest/Baseline/bifs.topk_persistence/out new file mode 100644 index 0000000000..ef3d0cef30 --- /dev/null +++ b/testing/btest/Baseline/bifs.topk_persistence/out @@ -0,0 +1,21 @@ +1 +2 +6 +4 +5 +1 +[c, e, d] +1 +2 +6 +4 +5 +1 +[c, e, d] +2 +4 +12 +8 +10 +2 +[c, e, d] diff --git a/testing/btest/bifs/topk_persistence.bro b/testing/btest/bifs/topk_persistence.bro new file mode 100644 index 0000000000..4d599c2780 --- /dev/null +++ b/testing/btest/bifs/topk_persistence.bro @@ -0,0 +1,74 @@ +# @TEST-EXEC: bro -b %INPUT runnumber=1 >out +# @TEST-EXEC: bro -b %INPUT runnumber=2 >>out +# @TEST-EXEC: bro -b %INPUT runnumber=3 >>out +# @TEST-EXEC: btest-diff out + +global runnumber: count &redef; # differentiate runs + +global k1: opaque of topk &persistent; +global k2: opaque of topk &persistent; + +event bro_init() + { + + k2 = topk_init(20); + + if ( runnumber == 1 ) + { + k1 = topk_init(100); + + topk_add(k1, "a"); + topk_add(k1, "b"); + topk_add(k1, "b"); + topk_add(k1, "c"); + topk_add(k1, "c"); + topk_add(k1, "c"); + topk_add(k1, "c"); + topk_add(k1, "c"); + topk_add(k1, "c"); + topk_add(k1, "d"); + topk_add(k1, "d"); + topk_add(k1, "d"); + topk_add(k1, "d"); + topk_add(k1, "e"); + topk_add(k1, "e"); + topk_add(k1, "e"); + topk_add(k1, "e"); + topk_add(k1, "e"); + topk_add(k1, "f"); + } + + local s = topk_get_top(k1, 3); + print topk_count(k1, "a"); + print topk_count(k1, "b"); + print topk_count(k1, "c"); + print topk_count(k1, "d"); + print topk_count(k1, "e"); + print topk_count(k1, "f"); + + if ( runnumber == 2 ) + { + topk_add(k1, "a"); + topk_add(k1, "b"); + topk_add(k1, "b"); + topk_add(k1, "c"); + topk_add(k1, "c"); + topk_add(k1, "c"); + topk_add(k1, "c"); + topk_add(k1, "c"); + topk_add(k1, "c"); + topk_add(k1, "d"); + topk_add(k1, "d"); + topk_add(k1, "d"); + topk_add(k1, "d"); + topk_add(k1, "e"); + topk_add(k1, "e"); + topk_add(k1, "e"); + topk_add(k1, "e"); + topk_add(k1, "e"); + topk_add(k1, "f"); + } + + print s; + + }