Merge branch 'topic/bernhard/hyperloglog' into topic/bernhard/hyperloglog-with-measurement

This commit is contained in:
Bernhard Amann 2013-04-19 09:53:35 -07:00
commit 75f709ec6b
5 changed files with 65 additions and 23 deletions

View file

@ -43,7 +43,7 @@ CardinalityCounter::CardinalityCounter(uint64_t size)
V = m;
}
CardinalityCounter :: CardinalityCounter(double error_margin)
CardinalityCounter::CardinalityCounter(double error_margin)
{
int b = optimalB(error_margin);
m = (uint64_t) pow(2, b);
@ -101,9 +101,9 @@ void CardinalityCounter::addElement(uint64_t hash)
double CardinalityCounter::size()
{
double answer = 0;
for (int i = 0; i < m; i++)
for (int i = 0; i < m; i++)
answer += pow(2, -(int)buckets[i]);
answer = 1/answer;
answer = alpha_m*m*m*answer;

View file

@ -20,49 +20,47 @@ IMPLEMENT_SERIAL(CardinalityVal, SER_CARDINALITY_VAL);
bool CardinalityVal::DoSerialize(SerialInfo* info) const
{
printf("Serializing\n");
DO_SERIALIZE(SER_CARDINALITY_VAL, OpaqueVal);
bool serialvalid = true;
serialvalid &= SERIALIZE(&valid);
if ( ! IsValid() )
return true;
return serialvalid;
assert(c);
bool valid = true;
valid &= SERIALIZE(c->m);
valid &= SERIALIZE(c->V);
valid &= SERIALIZE(c->alpha_m);
serialvalid &= SERIALIZE(c->m);
serialvalid &= SERIALIZE(c->V);
serialvalid &= SERIALIZE(c->alpha_m);
for ( int i = 0; i < c->m; i++ )
{
valid &= SERIALIZE(c->buckets[i]);
}
serialvalid &= SERIALIZE( c->buckets[i] );
return valid;
return serialvalid;
}
bool CardinalityVal::DoUnserialize(UnserialInfo* info)
{
printf("Unserializing\n");
DO_UNSERIALIZE(OpaqueVal);
bool serialvalid = UNSERIALIZE(&valid);
if ( ! IsValid() )
return true;
return serialvalid;
uint64_t m;
bool valid = UNSERIALIZE(&m);
serialvalid &= UNSERIALIZE(&m);
c = new CardinalityCounter(m);
valid &= UNSERIALIZE(&c->V);
valid &= UNSERIALIZE(&c->alpha_m);
serialvalid &= UNSERIALIZE(&c->V);
serialvalid &= UNSERIALIZE(&c->alpha_m);
uint8_t* buckets = c->buckets;
for ( int i = 0; i < m; i++ )
{
uint8_t* currbucket = buckets + i;
valid &= UNSERIALIZE( currbucket );
serialvalid &= UNSERIALIZE( currbucket );
}
return valid;
}

View file

@ -112,7 +112,6 @@ bool BinarySerializationFormat::Read(uint8* v, const char* tag)
if ( ! ReadData(v, sizeof(*v)) )
return false;
*v = ntohs(*v);
DBG_LOG(DBG_SERIAL, "Read uint8 %hu [%s]", *v, tag);
return true;
}
@ -314,7 +313,6 @@ bool BinarySerializationFormat::Write(char v, const char* tag)
bool BinarySerializationFormat::Write(uint8 v, const char* tag)
{
DBG_LOG(DBG_SERIAL, "Write uint8 %hu [%s]", v, tag);
v = htons(v);
return WriteData(&v, sizeof(v));
}

View file

@ -0,0 +1,6 @@
1
10.000763
2
10.000763
3
11.000923

View file

@ -0,0 +1,40 @@
# @TEST-EXEC: bro -b %INPUT runnumber=1 >out
# @TEST-EXEC: bro -b %INPUT runnumber=2 >>out
# @TEST-EXEC: bro -b %INPUT runnumber=3 >>out
# @TEST-EXEC: btest-diff out
global runnumber: count &redef; # differentiate first and second run
global card: opaque of cardinality &persistent;
event bro_init()
{
print runnumber;
if ( runnumber == 1 )
{
card = hll_cardinality_init(0.01);
hll_cardinality_add(card, "a");
hll_cardinality_add(card, "b");
hll_cardinality_add(card, "c");
hll_cardinality_add(card, "d");
hll_cardinality_add(card, "e");
hll_cardinality_add(card, "f");
hll_cardinality_add(card, "g");
hll_cardinality_add(card, "h");
hll_cardinality_add(card, "i");
hll_cardinality_add(card, "j");
}
print hll_cardinality_estimate(card);
if ( runnumber == 2 )
{
hll_cardinality_add(card, "a");
hll_cardinality_add(card, "b");
hll_cardinality_add(card, "c");
hll_cardinality_add(card, "aa");
}
}