From 3ffb4cab64d7940001761be738ed83238151f47a Mon Sep 17 00:00:00 2001 From: Soumya Basu Date: Thu, 30 Aug 2012 01:09:44 -0700 Subject: [PATCH] Everything works, and I just need to put the tests in the proper folder to get this functionality working. --- mytests.bro | 144 ++++++++++++--------------------------------- src/HyperLogLog.cc | 22 ++++++- src/HyperLogLog.h | 6 ++ src/bro.bif | 34 +++-------- 4 files changed, 70 insertions(+), 136 deletions(-) diff --git a/mytests.bro b/mytests.bro index 4de6f29dab..9392b205b0 100644 --- a/mytests.bro +++ b/mytests.bro @@ -83,65 +83,44 @@ event bro_init() print "This value should be around 13:"; print hll_cardinality_estimate("measurement1"); + + print "This value should be true:"; + print hll_cardinality_merge_into("measurement3", "measurement2"); + + print "This value should be false:"; + print hll_cardinality_merge_into("measurement4", "measurement6"); + + print "This value should be about 12:"; + print hll_cardinality_estimate("measurement3"); + + print "This value should be false:"; + print hll_cardinality_merge_into("measurement3", "measurement15"); + + print "This value should be about 12:"; + print hll_cardinality_estimate("measurement3"); + + print "This value should be true:"; + print hll_cardinality_merge_into("measurement2", "measurement1"); + + print "This value should be about 21:"; + print hll_cardinality_estimate("measurement2"); + + print "This value should be about 13:"; + print hll_cardinality_estimate("measurement1"); + + print "This value should be about 12:"; + print hll_cardinality_estimate("measurement3"); + + local keys = hll_cardinality_keys(); + for(key in keys) + { + print "The key is:"; + print key; + print "The value is:"; + print hll_cardinality_estimate(key); + } } -### The data structure at index1 will contain the combined count for the -## elements measured by index1 and index2. -## It returns true if it either cloned the value at index2 into index1 -## or if it merged the two data structures together. - -#function hll_cardinality_merge_into%(index1: any, index2: any%): bool -# %{ -# BroString* s1 = convert_index_to_string(index1); -# BroString* s2 = convert_index_to_string(index2); -# int status = 0; -# -# if(hll_counters.count(*s1) < 1) -# { -# if(hll_counters.count(*s2) < 1) -# { -# status = 0; -# } -# else -# { -# uint64_t m = (*hll_counters[*s2]).getM(); -# double error = 1.04/sqrt(m); -# CardinalityCounter* newInst = new CardinalityCounter(error); -# int i = 0; -# while((*newInst).getM() != m) -# { -# i += 1; -# newInst = new CardinalityCounter(error/i); -# } -# hll_counters[*s1] = newInst; -# (*newInst).merge(hll_counters[*s2]); -# status = 1; -# } -# } -# else -# { -# if(hll_counters.count(*s2) < 1) -# { -# status = 0; -# } -# else -# { -# if((*hll_counters[*s2]).getM() == (*hll_counters[*s1]).getM()) -# { -# status = 1; -## (*hll_counters[*s1]).merge(hll_counters[*s2]); -## } -# } -# } -# -# delete s1; -# delete s2; -# return new Val(status, TYPE_BOOL); -# -# %} - -##I'm really not sure about the notation of this function... -# #function hll_cardinality_keys%(%): bool # %{ #// TableVal* a = new TableVal(string_set); @@ -153,54 +132,3 @@ event bro_init() #// } #// return a; # return new Val(1, TYPE_BOOL); -# %} - -## Stores the data structure at index2 into index1. Deletes the data structure at index1 -## if there was any. Returns True if the data structure at index1 was changed in any way. - -#function hll_cardinality_clone%(index1: any, index2: any%): bool -# %{ -# BroString* s1 = convert_index_to_string(index1); -# BroString* s2 = convert_index_to_string(index2); -# int status = 0; -# -# if(hll_counters.count(*s2) < 1) -# { -# if(hll_counters.count(*s1) < 1) -## { -# status = 0; -# } -# else -# { -# delete hll_counters[*s1]; -# status = 1; -# } -# } -# else -# { -# uint64_t m = (*hll_counters[*s2]).getM(); -# double error = 1.04/sqrt(m); -# CardinalityCounter* newInst = new CardinalityCounter(error); -# int i = 0; -# while((*newInst).getM() != m) -# { -# i += 1; -# newInst = new CardinalityCounter(error/i); -# } -# (*newInst).merge(hll_counters[*s2]); -# if(hll_counters.count(*s1) < 1) -# { -# #hll_counters[*s1] = newInst; -# } -# else -# { -# delete hll_counters[*s1]; -# hll_counters[*s1] = newInst; -# } -# status = 1; -# } -# delete s1; -# delete s2; -# return new Val(status, TYPE_BOOL); -# %}} - diff --git a/src/HyperLogLog.cc b/src/HyperLogLog.cc index 22f522d1ab..22a06ee6c7 100644 --- a/src/HyperLogLog.cc +++ b/src/HyperLogLog.cc @@ -18,12 +18,32 @@ using namespace std; return answer; } + CardinalityCounter :: CardinalityCounter(uint64_t size){ + m = size; + buckets = new uint8_t[m]; + + if(m == 16) + alpha_m = 0.673; + else if(m == 32) + alpha_m = 0.697; + else if(m == 64) + alpha_m = 0.709; + else + alpha_m = 0.7213/(1+1.079/m); + + for(uint64_t i = 0; i < m; i++){ + buckets[i] = 0; + } + + V = m; + + } CardinalityCounter :: CardinalityCounter(double error_margin){ int b = optimalB(error_margin); m = (uint64_t) pow(2, b); buckets = new uint8_t[m]; - + if(m == 16) alpha_m = 0.673; else if(m == 32) diff --git a/src/HyperLogLog.h b/src/HyperLogLog.h index 3cbe4cfb03..ba9a46f1bd 100644 --- a/src/HyperLogLog.h +++ b/src/HyperLogLog.h @@ -53,6 +53,12 @@ class CardinalityCounter { uint8_t rank(uint64_t hash_modified); public: + /* + * This will be used when cloning. The error margin will be 1.04/sqrt(m) with approximately 68% + * probability. + */ + CardinalityCounter(uint64_t size); + /* * This will initialize the Cardinality counter.Based on the error_margin, the number of buckets * that need to be kept will be determined. Based on the max_size, the number of bits that will diff --git a/src/bro.bif b/src/bro.bif index e8fbbfc169..e75acfa653 100644 --- a/src/bro.bif +++ b/src/bro.bif @@ -5942,18 +5942,7 @@ function hll_cardinality_merge_into%(index1: any, index2: any%): bool else { uint64_t m = (*hll_counters[*s2]).getM(); - double error = 1.04/sqrt(m); - CardinalityCounter* newInst = new CardinalityCounter(error); - int i = 0; - while((*newInst).getM() != m) - { - i += 1; - newInst = new CardinalityCounter(error/i); - if(i >= 5) - { - break; - } - } + CardinalityCounter* newInst = new CardinalityCounter(m); hll_counters[*s1] = newInst; (*newInst).merge(hll_counters[*s2]); status = 1; @@ -6022,7 +6011,8 @@ function hll_cardinality_keys%(%): string_set for(it = hll_counters.begin() ; it != hll_counters.end(); it++) { - a->Assign(new Val(i++, TYPE_INT),new Val(&(*it).first, TYPE_STRING)); + BroString* s = (BroString*) &(it->first); + a->Assign(new StringVal(s), 0); } return a; %} @@ -6050,20 +6040,10 @@ function hll_cardinality_clone%(index1: any, index2: any%): bool } else { - uint64_t m = (*hll_counters[*s2]).getM(); - double error = 1.04/sqrt(m); - CardinalityCounter* newInst = new CardinalityCounter(error); - int i = 0; - while((*newInst).getM() != m) - { - i += 1; - newInst = new CardinalityCounter(error/i); - if(i >=5 ) - { - break; - } - } - (*newInst).merge(hll_counters[*s2]); + uint64_t m = (*hll_counters[*s2]).getM(); + CardinalityCounter* newInst = new CardinalityCounter(m); + int i = 0; + (*newInst).merge(hll_counters[*s2]); if(hll_counters.count(*s1) < 1) { hll_counters[*s1] = newInst;