Implement value merging.

The actual BloomFilter merging still lacks, this is just the first step in the
right direction from the user interface side.
This commit is contained in:
Matthias Vallentin 2013-06-10 22:24:23 -07:00
parent 22afbe42dd
commit 14a701a237
4 changed files with 54 additions and 25 deletions

View file

@ -124,9 +124,7 @@ BloomFilter* BloomFilter::Unserialize(UnserialInfo* info)
bool BloomFilter::DoSerialize(SerialInfo* info) const bool BloomFilter::DoSerialize(SerialInfo* info) const
{ {
DO_SERIALIZE(SER_BLOOMFILTER, SerialObj); DO_SERIALIZE(SER_BLOOMFILTER, SerialObj);
if ( ! SERIALIZE(static_cast<uint16>(hash_->K())) ) return SERIALIZE(static_cast<uint16>(hash_->K()));
return false;
return SERIALIZE(static_cast<uint64>(elements_));
} }
bool BloomFilter::DoUnserialize(UnserialInfo* info) bool BloomFilter::DoUnserialize(UnserialInfo* info)
@ -136,10 +134,6 @@ bool BloomFilter::DoUnserialize(UnserialInfo* info)
if ( ! UNSERIALIZE(&k) ) if ( ! UNSERIALIZE(&k) )
return false; return false;
hash_ = new hash_policy(static_cast<size_t>(k)); hash_ = new hash_policy(static_cast<size_t>(k));
uint64 elements;
if ( ! UNSERIALIZE(&elements) )
return false;
elements_ = static_cast<size_t>(elements);
return true; return true;
} }
@ -155,6 +149,17 @@ size_t BasicBloomFilter::K(size_t cells, size_t capacity)
return std::ceil(frac * std::log(2)); return std::ceil(frac * std::log(2));
} }
BasicBloomFilter* BasicBloomFilter::Merge(const BasicBloomFilter* x,
const BasicBloomFilter* y)
{
BasicBloomFilter* result = new BasicBloomFilter();
result->bits_ = new BitVector(*x->bits_ | *y->bits_);
// TODO: implement the hasher pool and make sure the new result gets the same
// number of (equal) hash functions.
//assert(x->hash_ == y->hash_);
return result;
}
BasicBloomFilter::BasicBloomFilter() BasicBloomFilter::BasicBloomFilter()
: bits_(NULL) : bits_(NULL)
{ {
@ -201,6 +206,14 @@ size_t BasicBloomFilter::CountImpl(const HashPolicy::HashVector& h) const
return 1; return 1;
} }
CountingBloomFilter* CountingBloomFilter::Merge(const CountingBloomFilter* x,
const CountingBloomFilter* y)
{
assert(! "not yet implemented");
return NULL;
}
CountingBloomFilter::CountingBloomFilter() CountingBloomFilter::CountingBloomFilter()
: cells_(NULL) : cells_(NULL)
{ {

View file

@ -155,7 +155,6 @@ public:
template <typename T> template <typename T>
void Add(const T& x) void Add(const T& x)
{ {
++elements_;
AddImpl(hash_->Hash(&x, sizeof(x))); AddImpl(hash_->Hash(&x, sizeof(x)));
} }
@ -172,16 +171,6 @@ public:
return CountImpl(hash_->Hash(&x, sizeof(x))); return CountImpl(hash_->Hash(&x, sizeof(x)));
} }
/**
* Retrieves the number of elements added to the Bloom filter.
*
* @return The number of elements in this Bloom filter.
*/
size_t Size() const
{
return elements_;
}
bool Serialize(SerialInfo* info) const; bool Serialize(SerialInfo* info) const;
static BloomFilter* Unserialize(UnserialInfo* info); static BloomFilter* Unserialize(UnserialInfo* info);
@ -196,7 +185,6 @@ protected:
private: private:
HashPolicy* hash_; HashPolicy* hash_;
size_t elements_;
}; };
/** /**
@ -230,6 +218,9 @@ public:
*/ */
static size_t K(size_t cells, size_t capacity); static size_t K(size_t cells, size_t capacity);
static BasicBloomFilter* Merge(const BasicBloomFilter* x,
const BasicBloomFilter* y);
/** /**
* Constructs a basic Bloom filter with a given false-positive rate and * Constructs a basic Bloom filter with a given false-positive rate and
* capacity. * capacity.
@ -258,6 +249,9 @@ private:
*/ */
class CountingBloomFilter : public BloomFilter { class CountingBloomFilter : public BloomFilter {
public: public:
static CountingBloomFilter* Merge(const CountingBloomFilter* x,
const CountingBloomFilter* y);
CountingBloomFilter(double fp, size_t capacity, size_t width); CountingBloomFilter(double fp, size_t capacity, size_t width);
CountingBloomFilter(size_t cells, size_t capacity, size_t width); CountingBloomFilter(size_t cells, size_t capacity, size_t width);

View file

@ -572,10 +572,21 @@ size_t BloomFilterVal::Count(const Val* val) const
return bloom_filter_->Count(key->Hash()); return bloom_filter_->Count(key->Hash());
} }
BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* first, BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* x,
const BloomFilterVal* second) const BloomFilterVal* y)
{ {
assert(! "not yet implemented"); if ( x->Type() != y->Type() )
{
reporter->InternalError("cannot merge Bloom filters with different types");
return NULL;
}
BloomFilterVal* result;
if ( (result = DoMerge<BasicBloomFilter>(x, y)) )
return result;
else if ( (result = DoMerge<CountingBloomFilter>(x, y)) )
return result;
return NULL; return NULL;
} }

View file

@ -113,10 +113,10 @@ class BloomFilterVal : public OpaqueVal {
BloomFilterVal(const BloomFilterVal&); BloomFilterVal(const BloomFilterVal&);
BloomFilterVal& operator=(const BloomFilterVal&); BloomFilterVal& operator=(const BloomFilterVal&);
public: public:
static BloomFilterVal* Merge(const BloomFilterVal* first, static BloomFilterVal* Merge(const BloomFilterVal* x,
const BloomFilterVal* second); const BloomFilterVal* y);
BloomFilterVal(BloomFilter* bf); explicit BloomFilterVal(BloomFilter* bf);
~BloomFilterVal(); ~BloomFilterVal();
bool Typify(BroType* type); bool Typify(BroType* type);
@ -133,6 +133,17 @@ protected:
DECLARE_SERIAL(BloomFilterVal); DECLARE_SERIAL(BloomFilterVal);
private: private:
template <typename T>
static BloomFilterVal* DoMerge(const BloomFilterVal* x,
const BloomFilterVal* y)
{
const T* a = dynamic_cast<const T*>(x->bloom_filter_);
const T* b = dynamic_cast<const T*>(y->bloom_filter_);
if ( a && b )
return new BloomFilterVal(T::Merge(a, b));
return NULL;
}
BroType* type_; BroType* type_;
CompositeHash* hash_; CompositeHash* hash_;
BloomFilter* bloom_filter_; BloomFilter* bloom_filter_;