adapt to new structure

This commit is contained in:
Bernhard Amann 2013-07-24 12:50:01 -07:00
parent 9e0fd963e0
commit b7cdfc0e6e
9 changed files with 23 additions and 140 deletions

View file

@ -706,6 +706,7 @@ type entropy_test_result: record {
@load base/bif/bro.bif @load base/bif/bro.bif
@load base/bif/reporter.bif @load base/bif/reporter.bif
@load base/bif/bloom-filter.bif @load base/bif/bloom-filter.bif
@load base/bif/hyper-loglog.bif
## Deprecated. This is superseded by the new logging framework. ## Deprecated. This is superseded by the new logging framework.
global log_file_name: function(tag: string): string &redef; global log_file_name: function(tag: string): string &redef;

View file

@ -277,7 +277,6 @@ set(bro_SRCS
Frame.cc Frame.cc
Func.cc Func.cc
Hash.cc Hash.cc
HyperLogLog.cc
ID.cc ID.cc
IntSet.cc IntSet.cc
IOSource.cc IOSource.cc

View file

@ -562,6 +562,7 @@ void builtin_error(const char* msg, BroObj* arg)
// TODO: Add a nicer mechanism to pull in subdirectory bifs automatically. // TODO: Add a nicer mechanism to pull in subdirectory bifs automatically.
#include "probabilistic/bloom-filter.bif.h" #include "probabilistic/bloom-filter.bif.h"
#include "probabilistic/hyper-loglog.bif.h"
void init_builtin_funcs() void init_builtin_funcs()
{ {
@ -579,6 +580,7 @@ void init_builtin_funcs()
// TODO: Add a nicer mechanism to pull in subdirectory bifs automatically. // TODO: Add a nicer mechanism to pull in subdirectory bifs automatically.
#include "probabilistic/bloom-filter.bif.init.cc" #include "probabilistic/bloom-filter.bif.init.cc"
#include "probabilistic/hyper-loglog.bif.init.cc"
did_builtin_init = true; did_builtin_init = true;
} }

View file

@ -4,7 +4,7 @@
#include "NetVar.h" #include "NetVar.h"
#include "Reporter.h" #include "Reporter.h"
#include "Serializer.h" #include "Serializer.h"
#include "HyperLogLog.h" #include "probabilistic/HyperLogLog.h"
CardinalityVal::CardinalityVal() : OpaqueVal(cardinality_type) CardinalityVal::CardinalityVal() : OpaqueVal(cardinality_type)
@ -54,7 +54,7 @@ bool CardinalityVal::DoUnserialize(UnserialInfo* info)
uint64_t m; uint64_t m;
serialvalid &= UNSERIALIZE(&m); serialvalid &= UNSERIALIZE(&m);
c = new CardinalityCounter(m); c = new probabilistic::CardinalityCounter(m);
serialvalid &= UNSERIALIZE(&c->V); serialvalid &= UNSERIALIZE(&c->V);
serialvalid &= UNSERIALIZE(&c->alpha_m); serialvalid &= UNSERIALIZE(&c->alpha_m);
@ -67,7 +67,7 @@ bool CardinalityVal::DoUnserialize(UnserialInfo* info)
return valid; return valid;
} }
bool CardinalityVal::Init(CardinalityCounter* arg_c) bool CardinalityVal::Init(probabilistic::CardinalityCounter* arg_c)
{ {
if ( valid ) if ( valid )
return false; return false;

View file

@ -10,28 +10,27 @@
#include "digest.h" #include "digest.h"
#include "probabilistic/BloomFilter.h" #include "probabilistic/BloomFilter.h"
class CardinalityCounter; namespace probabilistic {
class BloomFilter;
class CardinalityCounter;
}
class CardinalityVal: public OpaqueVal { class CardinalityVal: public OpaqueVal {
public: public:
CardinalityVal(); CardinalityVal();
~CardinalityVal(); ~CardinalityVal();
bool Init(CardinalityCounter*); bool Init(probabilistic::CardinalityCounter*);
bool IsValid() const { return valid; }; bool IsValid() const { return valid; };
CardinalityCounter* Get() { return c; }; probabilistic::CardinalityCounter* Get() { return c; };
private: private:
bool valid; bool valid;
CardinalityCounter* c; probabilistic::CardinalityCounter* c;
DECLARE_SERIAL(CardinalityVal); DECLARE_SERIAL(CardinalityVal);
}; };
namespace probabilistic {
class BloomFilter;
}
class HashVal : public OpaqueVal { class HashVal : public OpaqueVal {
public: public:
virtual bool IsValid() const; virtual bool IsValid() const;

View file

@ -4974,130 +4974,5 @@ function anonymize_addr%(a: addr, cl: IPAddrAnonymizationClass%): addr
} }
%} %}
## This is where my code starts...
##Just a note about notation. I'm specifying everything with the prefix hll just
## in case in the future, there's a better way to count cardinalities or something.
## That way, code written that depends on the HyperLogLog algorithm will still be
## working. Though, I'm fairly certain that anything that might be better won't
## be significantly better.
%%{
#include "HyperLogLog.h"
%%}
## Initializes the hash for the HyperLogLog cardinality counting algorithm.
## It returns true if it was successful in creating a structure and false
## if it wasn't.
function hll_cardinality_init%(err: double%): opaque of cardinality
%{
CardinalityCounter* c = new CardinalityCounter(err);
CardinalityVal* cv = new CardinalityVal();
if ( !c )
reporter->Error("Failed initialize Cardinality counter");
else
cv->Init(c);
return cv;
%}
## Adds an element to the HyperLogLog data structure located at index.
##elem->Type() to get the type of elem.
function hll_cardinality_add%(handle: opaque of cardinality, elem: any%): bool
%{
if ( !((CardinalityVal*) handle)->IsValid() ) {
reporter->Error("Need valid handle");
return new Val(0, TYPE_BOOL);
}
int status = 0;
uint64_t a = 123456;
TypeList* tl = new TypeList(elem->Type());
tl->Append(elem->Type());
CompositeHash* hll_hash = new CompositeHash(tl);
Unref(tl);
CardinalityCounter* h = ((CardinalityVal*) handle)->Get();
HashKey* key = hll_hash->ComputeHash(elem, 1);
a = key->Hash();
h->addElement(a);
delete hll_hash;
return new Val(1, TYPE_BOOL);
%}
## The data structure at index1 will contain the combined count for the
## elements measured by index1 and index2.
## It returns true if it either cloned the value at index2 into index1
## or if it merged the two data structures together.
function hll_cardinality_merge_into%(handle1: opaque of cardinality, handle2: opaque of cardinality%): bool
%{
CardinalityVal* v1 = (CardinalityVal*) handle1;
CardinalityVal* v2 = (CardinalityVal*) handle2;
if ( !v1->IsValid() || !v2->IsValid() ) {
reporter->Error("need valid handles");
return new Val(0, TYPE_BOOL);
}
CardinalityCounter* h1 = v1->Get();
CardinalityCounter* h2 = v2->Get();
h1->merge(h2);
return new Val(1, TYPE_BOOL);
%}
## Returns true if it destroyed something. False if it didn't.
#function hll_cardinality_destroy%(handle: opaque of cardinality%): bool
# %{
# if ( !((CardinalityVal*) handle)->IsValid() ) {
# reporter->Error("Need valid handle");
# return new Val(0, TYPE_BOOL);
# }
# CardinalityCounter* h = ((CardinalityVal*) handle)->Get();
# delete h;
# h = 0;
# return new Val(1, TYPE_BOOL);
# %}
## Returns the cardinality estimate. Returns -1.0 if there is nothing in that index.
function hll_cardinality_estimate%(handle: opaque of cardinality%): double
%{
if ( !((CardinalityVal*) handle)->IsValid() ) {
reporter->Error("Need valid handle");
return new Val(0, TYPE_BOOL);
}
CardinalityCounter* h = ((CardinalityVal*) handle)->Get();
double estimate = h->size();
return new Val(estimate, TYPE_DOUBLE);
%}
## Stores the data structure at index2 into index1. Deletes the data structure at index1
## if there was any. Returns True if the data structure at index1 was changed in any way.
function hll_cardinality_clone%(handle: opaque of cardinality%): opaque of cardinality
%{
if ( !((CardinalityVal*) handle)->IsValid() ) {
reporter->Error("Need valid handle");
return new Val(0, TYPE_BOOL);
}
CardinalityCounter* h = ((CardinalityVal*) handle)->Get();
uint64_t m = h->getM();
CardinalityCounter* h2 = new CardinalityCounter(m);
int i = 0;
h2->merge(h);
CardinalityVal* cv = new CardinalityVal();
cv->Init(h2);
return cv;
%}

View file

@ -10,9 +10,12 @@ set(probabilistic_SRCS
BitVector.cc BitVector.cc
BloomFilter.cc BloomFilter.cc
CounterVector.cc CounterVector.cc
Hasher.cc) Hasher.cc
HyperLogLog.cc)
bif_target(bloom-filter.bif) bif_target(bloom-filter.bif)
set(BIF_OUTPUT_CC_SAVE ${BIF_OUTPUT_CC})
bif_target(hyper-loglog.bif)
bro_add_subdir_library(probabilistic ${probabilistic_SRCS} ${BIF_OUTPUT_CC}) bro_add_subdir_library(probabilistic ${probabilistic_SRCS} ${BIF_OUTPUT_CC_SAVE} ${BIF_OUTPUT_CC})
add_dependencies(bro_probabilistic generate_outputs) add_dependencies(bro_probabilistic generate_outputs)

View file

@ -5,7 +5,7 @@
#include "HyperLogLog.h" #include "HyperLogLog.h"
#include <iostream> #include <iostream>
using namespace std; using namespace probabilistic;
int CardinalityCounter::optimalB(double error) int CardinalityCounter::optimalB(double error)
{ {

View file

@ -6,6 +6,8 @@
#include <stdint.h> #include <stdint.h>
#include <OpaqueVal.h> #include <OpaqueVal.h>
namespace probabilistic {
/* /*
* "conf" is how confident the estimate given by the counter is. * "conf" is how confident the estimate given by the counter is.
* *
@ -118,4 +120,6 @@ class CardinalityCounter {
uint64_t getM(); uint64_t getM();
}; };
}
#endif #endif