Everything works, and I just need to put the tests in the proper folder to get this functionality working.

This commit is contained in:
Soumya Basu 2012-08-30 01:09:44 -07:00
parent 7e07ce3cb1
commit 3ffb4cab64
4 changed files with 70 additions and 136 deletions

View file

@ -83,65 +83,44 @@ event bro_init()
print "This value should be around 13:"; print "This value should be around 13:";
print hll_cardinality_estimate("measurement1"); print hll_cardinality_estimate("measurement1");
print "This value should be true:";
print hll_cardinality_merge_into("measurement3", "measurement2");
print "This value should be false:";
print hll_cardinality_merge_into("measurement4", "measurement6");
print "This value should be about 12:";
print hll_cardinality_estimate("measurement3");
print "This value should be false:";
print hll_cardinality_merge_into("measurement3", "measurement15");
print "This value should be about 12:";
print hll_cardinality_estimate("measurement3");
print "This value should be true:";
print hll_cardinality_merge_into("measurement2", "measurement1");
print "This value should be about 21:";
print hll_cardinality_estimate("measurement2");
print "This value should be about 13:";
print hll_cardinality_estimate("measurement1");
print "This value should be about 12:";
print hll_cardinality_estimate("measurement3");
local keys = hll_cardinality_keys();
for(key in keys)
{
print "The key is:";
print key;
print "The value is:";
print hll_cardinality_estimate(key);
}
} }
### The data structure at index1 will contain the combined count for the
## elements measured by index1 and index2.
## It returns true if it either cloned the value at index2 into index1
## or if it merged the two data structures together.
#function hll_cardinality_merge_into%(index1: any, index2: any%): bool
# %{
# BroString* s1 = convert_index_to_string(index1);
# BroString* s2 = convert_index_to_string(index2);
# int status = 0;
#
# if(hll_counters.count(*s1) < 1)
# {
# if(hll_counters.count(*s2) < 1)
# {
# status = 0;
# }
# else
# {
# uint64_t m = (*hll_counters[*s2]).getM();
# double error = 1.04/sqrt(m);
# CardinalityCounter* newInst = new CardinalityCounter(error);
# int i = 0;
# while((*newInst).getM() != m)
# {
# i += 1;
# newInst = new CardinalityCounter(error/i);
# }
# hll_counters[*s1] = newInst;
# (*newInst).merge(hll_counters[*s2]);
# status = 1;
# }
# }
# else
# {
# if(hll_counters.count(*s2) < 1)
# {
# status = 0;
# }
# else
# {
# if((*hll_counters[*s2]).getM() == (*hll_counters[*s1]).getM())
# {
# status = 1;
## (*hll_counters[*s1]).merge(hll_counters[*s2]);
## }
# }
# }
#
# delete s1;
# delete s2;
# return new Val(status, TYPE_BOOL);
#
# %}
##I'm really not sure about the notation of this function...
#
#function hll_cardinality_keys%(%): bool #function hll_cardinality_keys%(%): bool
# %{ # %{
#// TableVal* a = new TableVal(string_set); #// TableVal* a = new TableVal(string_set);
@ -153,54 +132,3 @@ event bro_init()
#// } #// }
#// return a; #// return a;
# return new Val(1, TYPE_BOOL); # return new Val(1, TYPE_BOOL);
# %}
## Stores the data structure at index2 into index1. Deletes the data structure at index1
## if there was any. Returns True if the data structure at index1 was changed in any way.
#function hll_cardinality_clone%(index1: any, index2: any%): bool
# %{
# BroString* s1 = convert_index_to_string(index1);
# BroString* s2 = convert_index_to_string(index2);
# int status = 0;
#
# if(hll_counters.count(*s2) < 1)
# {
# if(hll_counters.count(*s1) < 1)
## {
# status = 0;
# }
# else
# {
# delete hll_counters[*s1];
# status = 1;
# }
# }
# else
# {
# uint64_t m = (*hll_counters[*s2]).getM();
# double error = 1.04/sqrt(m);
# CardinalityCounter* newInst = new CardinalityCounter(error);
# int i = 0;
# while((*newInst).getM() != m)
# {
# i += 1;
# newInst = new CardinalityCounter(error/i);
# }
# (*newInst).merge(hll_counters[*s2]);
# if(hll_counters.count(*s1) < 1)
# {
# #hll_counters[*s1] = newInst;
# }
# else
# {
# delete hll_counters[*s1];
# hll_counters[*s1] = newInst;
# }
# status = 1;
# }
# delete s1;
# delete s2;
# return new Val(status, TYPE_BOOL);
# %}}

View file

@ -18,12 +18,32 @@ using namespace std;
return answer; return answer;
} }
CardinalityCounter :: CardinalityCounter(uint64_t size){
m = size;
buckets = new uint8_t[m];
if(m == 16)
alpha_m = 0.673;
else if(m == 32)
alpha_m = 0.697;
else if(m == 64)
alpha_m = 0.709;
else
alpha_m = 0.7213/(1+1.079/m);
for(uint64_t i = 0; i < m; i++){
buckets[i] = 0;
}
V = m;
}
CardinalityCounter :: CardinalityCounter(double error_margin){ CardinalityCounter :: CardinalityCounter(double error_margin){
int b = optimalB(error_margin); int b = optimalB(error_margin);
m = (uint64_t) pow(2, b); m = (uint64_t) pow(2, b);
buckets = new uint8_t[m]; buckets = new uint8_t[m];
if(m == 16) if(m == 16)
alpha_m = 0.673; alpha_m = 0.673;
else if(m == 32) else if(m == 32)

View file

@ -53,6 +53,12 @@ class CardinalityCounter {
uint8_t rank(uint64_t hash_modified); uint8_t rank(uint64_t hash_modified);
public: public:
/*
* This will be used when cloning. The error margin will be 1.04/sqrt(m) with approximately 68%
* probability.
*/
CardinalityCounter(uint64_t size);
/* /*
* This will initialize the Cardinality counter.Based on the error_margin, the number of buckets * This will initialize the Cardinality counter.Based on the error_margin, the number of buckets
* that need to be kept will be determined. Based on the max_size, the number of bits that will * that need to be kept will be determined. Based on the max_size, the number of bits that will

View file

@ -5942,18 +5942,7 @@ function hll_cardinality_merge_into%(index1: any, index2: any%): bool
else else
{ {
uint64_t m = (*hll_counters[*s2]).getM(); uint64_t m = (*hll_counters[*s2]).getM();
double error = 1.04/sqrt(m); CardinalityCounter* newInst = new CardinalityCounter(m);
CardinalityCounter* newInst = new CardinalityCounter(error);
int i = 0;
while((*newInst).getM() != m)
{
i += 1;
newInst = new CardinalityCounter(error/i);
if(i >= 5)
{
break;
}
}
hll_counters[*s1] = newInst; hll_counters[*s1] = newInst;
(*newInst).merge(hll_counters[*s2]); (*newInst).merge(hll_counters[*s2]);
status = 1; status = 1;
@ -6022,7 +6011,8 @@ function hll_cardinality_keys%(%): string_set
for(it = hll_counters.begin() ; it != hll_counters.end(); it++) for(it = hll_counters.begin() ; it != hll_counters.end(); it++)
{ {
a->Assign(new Val(i++, TYPE_INT),new Val(&(*it).first, TYPE_STRING)); BroString* s = (BroString*) &(it->first);
a->Assign(new StringVal(s), 0);
} }
return a; return a;
%} %}
@ -6050,20 +6040,10 @@ function hll_cardinality_clone%(index1: any, index2: any%): bool
} }
else else
{ {
uint64_t m = (*hll_counters[*s2]).getM(); uint64_t m = (*hll_counters[*s2]).getM();
double error = 1.04/sqrt(m); CardinalityCounter* newInst = new CardinalityCounter(m);
CardinalityCounter* newInst = new CardinalityCounter(error); int i = 0;
int i = 0; (*newInst).merge(hll_counters[*s2]);
while((*newInst).getM() != m)
{
i += 1;
newInst = new CardinalityCounter(error/i);
if(i >=5 )
{
break;
}
}
(*newInst).merge(hll_counters[*s2]);
if(hll_counters.count(*s1) < 1) if(hll_counters.count(*s1) < 1)
{ {
hll_counters[*s1] = newInst; hll_counters[*s1] = newInst;