diff --git a/scripts/base/init-bare.bro b/scripts/base/init-bare.bro index c368b9d610..33978091f1 100644 --- a/scripts/base/init-bare.bro +++ b/scripts/base/init-bare.bro @@ -706,6 +706,7 @@ type entropy_test_result: record { @load base/bif/bro.bif @load base/bif/reporter.bif @load base/bif/bloom-filter.bif +@load base/bif/hyper-loglog.bif ## Deprecated. This is superseded by the new logging framework. global log_file_name: function(tag: string): string &redef; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3d23f7a2b4..0c979df19f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -277,7 +277,6 @@ set(bro_SRCS Frame.cc Func.cc Hash.cc - HyperLogLog.cc ID.cc IntSet.cc IOSource.cc diff --git a/src/Func.cc b/src/Func.cc index 483699668f..4d8d7f3193 100644 --- a/src/Func.cc +++ b/src/Func.cc @@ -562,6 +562,7 @@ void builtin_error(const char* msg, BroObj* arg) // TODO: Add a nicer mechanism to pull in subdirectory bifs automatically. #include "probabilistic/bloom-filter.bif.h" +#include "probabilistic/hyper-loglog.bif.h" void init_builtin_funcs() { @@ -579,6 +580,7 @@ void init_builtin_funcs() // TODO: Add a nicer mechanism to pull in subdirectory bifs automatically. #include "probabilistic/bloom-filter.bif.init.cc" +#include "probabilistic/hyper-loglog.bif.init.cc" did_builtin_init = true; } diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc index 1d8214fd85..67e39aa2cc 100644 --- a/src/OpaqueVal.cc +++ b/src/OpaqueVal.cc @@ -4,7 +4,7 @@ #include "NetVar.h" #include "Reporter.h" #include "Serializer.h" -#include "HyperLogLog.h" +#include "probabilistic/HyperLogLog.h" CardinalityVal::CardinalityVal() : OpaqueVal(cardinality_type) @@ -54,7 +54,7 @@ bool CardinalityVal::DoUnserialize(UnserialInfo* info) uint64_t m; serialvalid &= UNSERIALIZE(&m); - c = new CardinalityCounter(m); + c = new probabilistic::CardinalityCounter(m); serialvalid &= UNSERIALIZE(&c->V); serialvalid &= UNSERIALIZE(&c->alpha_m); @@ -67,7 +67,7 @@ bool CardinalityVal::DoUnserialize(UnserialInfo* info) return valid; } -bool CardinalityVal::Init(CardinalityCounter* arg_c) +bool CardinalityVal::Init(probabilistic::CardinalityCounter* arg_c) { if ( valid ) return false; diff --git a/src/OpaqueVal.h b/src/OpaqueVal.h index 23df0d50d7..aeae4d9d51 100644 --- a/src/OpaqueVal.h +++ b/src/OpaqueVal.h @@ -10,28 +10,27 @@ #include "digest.h" #include "probabilistic/BloomFilter.h" -class CardinalityCounter; +namespace probabilistic { + class BloomFilter; + class CardinalityCounter; +} class CardinalityVal: public OpaqueVal { public: CardinalityVal(); ~CardinalityVal(); - bool Init(CardinalityCounter*); + bool Init(probabilistic::CardinalityCounter*); bool IsValid() const { return valid; }; - CardinalityCounter* Get() { return c; }; + probabilistic::CardinalityCounter* Get() { return c; }; private: bool valid; - CardinalityCounter* c; + probabilistic::CardinalityCounter* c; DECLARE_SERIAL(CardinalityVal); }; -namespace probabilistic { - class BloomFilter; -} - class HashVal : public OpaqueVal { public: virtual bool IsValid() const; diff --git a/src/bro.bif b/src/bro.bif index c3e46b501d..a01d68c585 100644 --- a/src/bro.bif +++ b/src/bro.bif @@ -4974,130 +4974,5 @@ function anonymize_addr%(a: addr, cl: IPAddrAnonymizationClass%): addr } %} -## This is where my code starts... -##Just a note about notation. I'm specifying everything with the prefix hll just -## in case in the future, there's a better way to count cardinalities or something. -## That way, code written that depends on the HyperLogLog algorithm will still be -## working. Though, I'm fairly certain that anything that might be better won't -## be significantly better. -%%{ -#include "HyperLogLog.h" -%%} - -## Initializes the hash for the HyperLogLog cardinality counting algorithm. -## It returns true if it was successful in creating a structure and false -## if it wasn't. - -function hll_cardinality_init%(err: double%): opaque of cardinality - %{ - CardinalityCounter* c = new CardinalityCounter(err); - CardinalityVal* cv = new CardinalityVal(); - - if ( !c ) - reporter->Error("Failed initialize Cardinality counter"); - else - cv->Init(c); - - return cv; - %} - -## Adds an element to the HyperLogLog data structure located at index. - -##elem->Type() to get the type of elem. - -function hll_cardinality_add%(handle: opaque of cardinality, elem: any%): bool - %{ - if ( !((CardinalityVal*) handle)->IsValid() ) { - reporter->Error("Need valid handle"); - return new Val(0, TYPE_BOOL); - } - - int status = 0; - uint64_t a = 123456; - - TypeList* tl = new TypeList(elem->Type()); - tl->Append(elem->Type()); - CompositeHash* hll_hash = new CompositeHash(tl); - Unref(tl); - - CardinalityCounter* h = ((CardinalityVal*) handle)->Get(); - HashKey* key = hll_hash->ComputeHash(elem, 1); - a = key->Hash(); - h->addElement(a); - - delete hll_hash; - return new Val(1, TYPE_BOOL); - %} - -## The data structure at index1 will contain the combined count for the -## elements measured by index1 and index2. -## It returns true if it either cloned the value at index2 into index1 -## or if it merged the two data structures together. - -function hll_cardinality_merge_into%(handle1: opaque of cardinality, handle2: opaque of cardinality%): bool - %{ - CardinalityVal* v1 = (CardinalityVal*) handle1; - CardinalityVal* v2 = (CardinalityVal*) handle2; - - if ( !v1->IsValid() || !v2->IsValid() ) { - reporter->Error("need valid handles"); - return new Val(0, TYPE_BOOL); - } - - CardinalityCounter* h1 = v1->Get(); - CardinalityCounter* h2 = v2->Get(); - - h1->merge(h2); - - return new Val(1, TYPE_BOOL); - %} - -## Returns true if it destroyed something. False if it didn't. -#function hll_cardinality_destroy%(handle: opaque of cardinality%): bool -# %{ -# if ( !((CardinalityVal*) handle)->IsValid() ) { -# reporter->Error("Need valid handle"); -# return new Val(0, TYPE_BOOL); -# } -# CardinalityCounter* h = ((CardinalityVal*) handle)->Get(); -# delete h; -# h = 0; -# return new Val(1, TYPE_BOOL); -# %} - -## Returns the cardinality estimate. Returns -1.0 if there is nothing in that index. -function hll_cardinality_estimate%(handle: opaque of cardinality%): double - %{ - if ( !((CardinalityVal*) handle)->IsValid() ) { - reporter->Error("Need valid handle"); - return new Val(0, TYPE_BOOL); - } - CardinalityCounter* h = ((CardinalityVal*) handle)->Get(); - - double estimate = h->size(); - - return new Val(estimate, TYPE_DOUBLE); - %} - -## Stores the data structure at index2 into index1. Deletes the data structure at index1 -## if there was any. Returns True if the data structure at index1 was changed in any way. - -function hll_cardinality_clone%(handle: opaque of cardinality%): opaque of cardinality - %{ - if ( !((CardinalityVal*) handle)->IsValid() ) { - reporter->Error("Need valid handle"); - return new Val(0, TYPE_BOOL); - } - CardinalityCounter* h = ((CardinalityVal*) handle)->Get(); - - - uint64_t m = h->getM(); - CardinalityCounter* h2 = new CardinalityCounter(m); - int i = 0; - h2->merge(h); - CardinalityVal* cv = new CardinalityVal(); - cv->Init(h2); - return cv; - %} diff --git a/src/probabilistic/CMakeLists.txt b/src/probabilistic/CMakeLists.txt index 961c07fb33..6f3c64f67e 100644 --- a/src/probabilistic/CMakeLists.txt +++ b/src/probabilistic/CMakeLists.txt @@ -10,9 +10,12 @@ set(probabilistic_SRCS BitVector.cc BloomFilter.cc CounterVector.cc - Hasher.cc) + Hasher.cc + HyperLogLog.cc) bif_target(bloom-filter.bif) +set(BIF_OUTPUT_CC_SAVE ${BIF_OUTPUT_CC}) +bif_target(hyper-loglog.bif) -bro_add_subdir_library(probabilistic ${probabilistic_SRCS} ${BIF_OUTPUT_CC}) +bro_add_subdir_library(probabilistic ${probabilistic_SRCS} ${BIF_OUTPUT_CC_SAVE} ${BIF_OUTPUT_CC}) add_dependencies(bro_probabilistic generate_outputs) diff --git a/src/HyperLogLog.cc b/src/probabilistic/HyperLogLog.cc similarity index 98% rename from src/HyperLogLog.cc rename to src/probabilistic/HyperLogLog.cc index 6dacab33a2..b1deb39552 100644 --- a/src/HyperLogLog.cc +++ b/src/probabilistic/HyperLogLog.cc @@ -5,7 +5,7 @@ #include "HyperLogLog.h" #include -using namespace std; +using namespace probabilistic; int CardinalityCounter::optimalB(double error) { diff --git a/src/HyperLogLog.h b/src/probabilistic/HyperLogLog.h similarity index 99% rename from src/HyperLogLog.h rename to src/probabilistic/HyperLogLog.h index f07167502a..0a7ea6ac2f 100644 --- a/src/HyperLogLog.h +++ b/src/probabilistic/HyperLogLog.h @@ -6,6 +6,8 @@ #include #include +namespace probabilistic { + /* * "conf" is how confident the estimate given by the counter is. * @@ -118,4 +120,6 @@ class CardinalityCounter { uint64_t getM(); }; +} + #endif