diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc index 1d73734236..e55db71e46 100644 --- a/src/BloomFilter.cc +++ b/src/BloomFilter.cc @@ -124,9 +124,7 @@ BloomFilter* BloomFilter::Unserialize(UnserialInfo* info) bool BloomFilter::DoSerialize(SerialInfo* info) const { DO_SERIALIZE(SER_BLOOMFILTER, SerialObj); - if ( ! SERIALIZE(static_cast(hash_->K())) ) - return false; - return SERIALIZE(static_cast(elements_)); + return SERIALIZE(static_cast(hash_->K())); } bool BloomFilter::DoUnserialize(UnserialInfo* info) @@ -136,10 +134,6 @@ bool BloomFilter::DoUnserialize(UnserialInfo* info) if ( ! UNSERIALIZE(&k) ) return false; hash_ = new hash_policy(static_cast(k)); - uint64 elements; - if ( ! UNSERIALIZE(&elements) ) - return false; - elements_ = static_cast(elements); return true; } @@ -155,6 +149,17 @@ size_t BasicBloomFilter::K(size_t cells, size_t capacity) return std::ceil(frac * std::log(2)); } +BasicBloomFilter* BasicBloomFilter::Merge(const BasicBloomFilter* x, + const BasicBloomFilter* y) + { + BasicBloomFilter* result = new BasicBloomFilter(); + result->bits_ = new BitVector(*x->bits_ | *y->bits_); + // TODO: implement the hasher pool and make sure the new result gets the same + // number of (equal) hash functions. + //assert(x->hash_ == y->hash_); + return result; + } + BasicBloomFilter::BasicBloomFilter() : bits_(NULL) { @@ -201,6 +206,14 @@ size_t BasicBloomFilter::CountImpl(const HashPolicy::HashVector& h) const return 1; } + +CountingBloomFilter* CountingBloomFilter::Merge(const CountingBloomFilter* x, + const CountingBloomFilter* y) +{ + assert(! "not yet implemented"); + return NULL; +} + CountingBloomFilter::CountingBloomFilter() : cells_(NULL) { diff --git a/src/BloomFilter.h b/src/BloomFilter.h index 4a83ba904b..3b5d9efa71 100644 --- a/src/BloomFilter.h +++ b/src/BloomFilter.h @@ -155,7 +155,6 @@ public: template void Add(const T& x) { - ++elements_; AddImpl(hash_->Hash(&x, sizeof(x))); } @@ -172,16 +171,6 @@ public: return CountImpl(hash_->Hash(&x, sizeof(x))); } - /** - * Retrieves the number of elements added to the Bloom filter. - * - * @return The number of elements in this Bloom filter. - */ - size_t Size() const - { - return elements_; - } - bool Serialize(SerialInfo* info) const; static BloomFilter* Unserialize(UnserialInfo* info); @@ -196,7 +185,6 @@ protected: private: HashPolicy* hash_; - size_t elements_; }; /** @@ -230,6 +218,9 @@ public: */ static size_t K(size_t cells, size_t capacity); + static BasicBloomFilter* Merge(const BasicBloomFilter* x, + const BasicBloomFilter* y); + /** * Constructs a basic Bloom filter with a given false-positive rate and * capacity. @@ -258,6 +249,9 @@ private: */ class CountingBloomFilter : public BloomFilter { public: + static CountingBloomFilter* Merge(const CountingBloomFilter* x, + const CountingBloomFilter* y); + CountingBloomFilter(double fp, size_t capacity, size_t width); CountingBloomFilter(size_t cells, size_t capacity, size_t width); diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc index 76936dfb78..9dd5c7f980 100644 --- a/src/OpaqueVal.cc +++ b/src/OpaqueVal.cc @@ -572,10 +572,21 @@ size_t BloomFilterVal::Count(const Val* val) const return bloom_filter_->Count(key->Hash()); } -BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* first, - const BloomFilterVal* second) +BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* x, + const BloomFilterVal* y) { - assert(! "not yet implemented"); + if ( x->Type() != y->Type() ) + { + reporter->InternalError("cannot merge Bloom filters with different types"); + return NULL; + } + + BloomFilterVal* result; + if ( (result = DoMerge(x, y)) ) + return result; + else if ( (result = DoMerge(x, y)) ) + return result; + return NULL; } diff --git a/src/OpaqueVal.h b/src/OpaqueVal.h index e97a530f3a..4b45cad519 100644 --- a/src/OpaqueVal.h +++ b/src/OpaqueVal.h @@ -113,10 +113,10 @@ class BloomFilterVal : public OpaqueVal { BloomFilterVal(const BloomFilterVal&); BloomFilterVal& operator=(const BloomFilterVal&); public: - static BloomFilterVal* Merge(const BloomFilterVal* first, - const BloomFilterVal* second); + static BloomFilterVal* Merge(const BloomFilterVal* x, + const BloomFilterVal* y); - BloomFilterVal(BloomFilter* bf); + explicit BloomFilterVal(BloomFilter* bf); ~BloomFilterVal(); bool Typify(BroType* type); @@ -133,6 +133,17 @@ protected: DECLARE_SERIAL(BloomFilterVal); private: + template + static BloomFilterVal* DoMerge(const BloomFilterVal* x, + const BloomFilterVal* y) + { + const T* a = dynamic_cast(x->bloom_filter_); + const T* b = dynamic_cast(y->bloom_filter_); + if ( a && b ) + return new BloomFilterVal(T::Merge(a, b)); + return NULL; + } + BroType* type_; CompositeHash* hash_; BloomFilter* bloom_filter_;