Implement and test Bloom filter merging.

This commit is contained in:
Matthias Vallentin 2013-07-22 18:11:12 +02:00
parent eb64f5f961
commit a39f980cd4
9 changed files with 81 additions and 13 deletions

View file

@ -70,8 +70,13 @@ size_t BasicBloomFilter::K(size_t cells, size_t capacity)
BasicBloomFilter* BasicBloomFilter::Merge(const BasicBloomFilter* x, BasicBloomFilter* BasicBloomFilter::Merge(const BasicBloomFilter* x,
const BasicBloomFilter* y) const BasicBloomFilter* y)
{ {
// TODO: Ensure that x and y use the same Hasher before proceeding. if ( ! x->hasher_->Equals(y->hasher_) )
{
reporter->InternalError("incompatible hashers during Bloom filter merge");
return NULL;
}
BasicBloomFilter* result = new BasicBloomFilter(); BasicBloomFilter* result = new BasicBloomFilter();
result->hasher_ = x->hasher_->Clone();
result->bits_ = new BitVector(*x->bits_ | *y->bits_); result->bits_ = new BitVector(*x->bits_ | *y->bits_);
return result; return result;
} }
@ -119,10 +124,17 @@ size_t BasicBloomFilter::CountImpl(const Hasher::digest_vector& h) const
CountingBloomFilter* CountingBloomFilter::Merge(const CountingBloomFilter* x, CountingBloomFilter* CountingBloomFilter::Merge(const CountingBloomFilter* x,
const CountingBloomFilter* y) const CountingBloomFilter* y)
{ {
assert(! "not yet implemented"); if ( ! x->hasher_->Equals(y->hasher_) )
return NULL; {
} reporter->InternalError("incompatible hashers during Bloom filter merge");
return NULL;
}
CountingBloomFilter* result = new CountingBloomFilter();
result->hasher_ = x->hasher_->Clone();
result->cells_ = new CounterVector(*x->cells_ | *y->cells_);
return result;
}
CountingBloomFilter::CountingBloomFilter() CountingBloomFilter::CountingBloomFilter()
: cells_(NULL) : cells_(NULL)

View file

@ -57,7 +57,6 @@ protected:
virtual void AddImpl(const Hasher::digest_vector& hashes) = 0; virtual void AddImpl(const Hasher::digest_vector& hashes) = 0;
virtual size_t CountImpl(const Hasher::digest_vector& hashes) const = 0; virtual size_t CountImpl(const Hasher::digest_vector& hashes) const = 0;
private:
const Hasher* hasher_; const Hasher* hasher_;
}; };

View file

@ -10,6 +10,12 @@ CounterVector::CounterVector(size_t width, size_t cells)
{ {
} }
CounterVector::CounterVector(const CounterVector& other)
: bits_(new BitVector(*other.bits_)),
width_(other.width_)
{
}
CounterVector::~CounterVector() CounterVector::~CounterVector()
{ {
delete bits_; delete bits_;

View file

@ -9,6 +9,7 @@ class BitVector;
* A vector of counters, each of which have a fixed number of bits. * A vector of counters, each of which have a fixed number of bits.
*/ */
class CounterVector : public SerialObj { class CounterVector : public SerialObj {
CounterVector& operator=(const CounterVector&);
public: public:
typedef size_t size_type; typedef size_t size_type;
typedef uint64 count_type; typedef uint64 count_type;
@ -24,6 +25,13 @@ public:
*/ */
CounterVector(size_t width, size_t cells = 1024); CounterVector(size_t width, size_t cells = 1024);
/**
* Copy-constructs a counter vector.
*
* @param other The counter vector to copy.
*/
CounterVector(const CounterVector& other);
~CounterVector(); ~CounterVector();
/** /**

View file

@ -64,7 +64,7 @@ DefaultHasher* DefaultHasher::Clone() const
return new DefaultHasher(*this); return new DefaultHasher(*this);
} }
bool DefaultHasher::Equals(const Hasher* other) const /* final */ bool DefaultHasher::Equals(const Hasher* other) const
{ {
if ( typeid(*this) != typeid(*other) ) if ( typeid(*this) != typeid(*other) )
return false; return false;
@ -94,7 +94,7 @@ DoubleHasher* DoubleHasher::Clone() const
return new DoubleHasher(*this); return new DoubleHasher(*this);
} }
bool DoubleHasher::Equals(const Hasher* other) const /* final */ bool DoubleHasher::Equals(const Hasher* other) const
{ {
if ( typeid(*this) != typeid(*other) ) if ( typeid(*this) != typeid(*other) )
return false; return false;

View file

@ -1,6 +1,5 @@
#include "OpaqueVal.h" #include "OpaqueVal.h"
#include "BloomFilter.h"
#include "NetVar.h" #include "NetVar.h"
#include "Reporter.h" #include "Reporter.h"
#include "Serializer.h" #include "Serializer.h"
@ -587,6 +586,7 @@ BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* x,
else if ( (result = DoMerge<CountingBloomFilter>(x, y)) ) else if ( (result = DoMerge<CountingBloomFilter>(x, y)) )
return result; return result;
reporter->InternalError("failed to merge Bloom filters");
return NULL; return NULL;
} }

View file

@ -3,6 +3,7 @@
#ifndef OPAQUEVAL_H #ifndef OPAQUEVAL_H
#define OPAQUEVAL_H #define OPAQUEVAL_H
#include "BloomFilter.h"
#include "RandTest.h" #include "RandTest.h"
#include "Val.h" #include "Val.h"
#include "digest.h" #include "digest.h"
@ -137,9 +138,23 @@ private:
static BloomFilterVal* DoMerge(const BloomFilterVal* x, static BloomFilterVal* DoMerge(const BloomFilterVal* x,
const BloomFilterVal* y) const BloomFilterVal* y)
{ {
const T* a = dynamic_cast<const T*>(x->bloom_filter_); if ( typeid(*x->bloom_filter_) != typeid(*y->bloom_filter_) )
const T* b = dynamic_cast<const T*>(y->bloom_filter_); {
return a && b ? new BloomFilterVal(T::Merge(a, b)) : NULL; reporter->InternalError("cannot merge different Bloom filter types");
return NULL;
}
if ( typeid(T) != typeid(*x->bloom_filter_) )
return NULL;
const T* a = static_cast<const T*>(x->bloom_filter_);
const T* b = static_cast<const T*>(y->bloom_filter_);
BloomFilterVal* merged = new BloomFilterVal(T::Merge(a, b));
assert(merged);
if ( ! merged->Typify(x->Type()) )
{
reporter->InternalError("failed to set type on merged Bloom filter");
return NULL;
}
return merged;
} }
BroType* type_; BroType* type_;

View file

@ -7,8 +7,15 @@
1 1
1 1
1 1
1
1
1
1
2 2
3 3
3 3
2 2
3 3
3
3
2

View file

@ -35,11 +35,21 @@ function test_basic_bloom_filter()
# Invalid parameters. # Invalid parameters.
local bf_bug0 = bloomfilter_basic_init(-0.5, 42); local bf_bug0 = bloomfilter_basic_init(-0.5, 42);
local bf_bug1 = bloomfilter_basic_init(1.1, 42); local bf_bug1 = bloomfilter_basic_init(1.1, 42);
# Merging
local bf_cnt2 = bloomfilter_basic_init(0.1, 1000);
bloomfilter_add(bf_cnt2, 42);
bloomfilter_add(bf_cnt, 100);
local bf_merged = bloomfilter_merge(bf_cnt, bf_cnt2);
print bloomfilter_lookup(bf_merged, 42);
print bloomfilter_lookup(bf_merged, 84);
print bloomfilter_lookup(bf_merged, 100);
print bloomfilter_lookup(bf_merged, 168);
} }
function test_counting_bloom_filter() function test_counting_bloom_filter()
{ {
local bf = bloomfilter_counting_init(3, 16, 3); local bf = bloomfilter_counting_init(3, 32, 3);
bloomfilter_add(bf, "foo"); bloomfilter_add(bf, "foo");
print bloomfilter_lookup(bf, "foo"); # 1 print bloomfilter_lookup(bf, "foo"); # 1
bloomfilter_add(bf, "foo"); bloomfilter_add(bf, "foo");
@ -49,10 +59,21 @@ function test_counting_bloom_filter()
bloomfilter_add(bf, "foo"); bloomfilter_add(bf, "foo");
print bloomfilter_lookup(bf, "foo"); # still 3 print bloomfilter_lookup(bf, "foo"); # still 3
bloomfilter_add(bf, "bar"); bloomfilter_add(bf, "bar");
bloomfilter_add(bf, "bar"); bloomfilter_add(bf, "bar");
print bloomfilter_lookup(bf, "bar"); # 2 print bloomfilter_lookup(bf, "bar"); # 2
print bloomfilter_lookup(bf, "foo"); # still 3 print bloomfilter_lookup(bf, "foo"); # still 3
# Merging
local bf2 = bloomfilter_counting_init(3, 32, 3);
bloomfilter_add(bf2, "baz");
bloomfilter_add(bf2, "baz");
bloomfilter_add(bf2, "bar");
local bf_merged = bloomfilter_merge(bf, bf2);
print bloomfilter_lookup(bf_merged, "foo");
print bloomfilter_lookup(bf_merged, "bar");
print bloomfilter_lookup(bf_merged, "baz");
} }
event bro_init() event bro_init()