Implement value merging.

The actual BloomFilter merging still lacks, this is just the first step in the
right direction from the user interface side.
This commit is contained in:
Matthias Vallentin 2013-06-10 22:24:23 -07:00
parent 22afbe42dd
commit 14a701a237
4 changed files with 54 additions and 25 deletions

View file

@ -124,9 +124,7 @@ BloomFilter* BloomFilter::Unserialize(UnserialInfo* info)
bool BloomFilter::DoSerialize(SerialInfo* info) const
{
DO_SERIALIZE(SER_BLOOMFILTER, SerialObj);
if ( ! SERIALIZE(static_cast<uint16>(hash_->K())) )
return false;
return SERIALIZE(static_cast<uint64>(elements_));
return SERIALIZE(static_cast<uint16>(hash_->K()));
}
bool BloomFilter::DoUnserialize(UnserialInfo* info)
@ -136,10 +134,6 @@ bool BloomFilter::DoUnserialize(UnserialInfo* info)
if ( ! UNSERIALIZE(&k) )
return false;
hash_ = new hash_policy(static_cast<size_t>(k));
uint64 elements;
if ( ! UNSERIALIZE(&elements) )
return false;
elements_ = static_cast<size_t>(elements);
return true;
}
@ -155,6 +149,17 @@ size_t BasicBloomFilter::K(size_t cells, size_t capacity)
return std::ceil(frac * std::log(2));
}
BasicBloomFilter* BasicBloomFilter::Merge(const BasicBloomFilter* x,
const BasicBloomFilter* y)
{
BasicBloomFilter* result = new BasicBloomFilter();
result->bits_ = new BitVector(*x->bits_ | *y->bits_);
// TODO: implement the hasher pool and make sure the new result gets the same
// number of (equal) hash functions.
//assert(x->hash_ == y->hash_);
return result;
}
BasicBloomFilter::BasicBloomFilter()
: bits_(NULL)
{
@ -201,6 +206,14 @@ size_t BasicBloomFilter::CountImpl(const HashPolicy::HashVector& h) const
return 1;
}
CountingBloomFilter* CountingBloomFilter::Merge(const CountingBloomFilter* x,
const CountingBloomFilter* y)
{
assert(! "not yet implemented");
return NULL;
}
CountingBloomFilter::CountingBloomFilter()
: cells_(NULL)
{

View file

@ -155,7 +155,6 @@ public:
template <typename T>
void Add(const T& x)
{
++elements_;
AddImpl(hash_->Hash(&x, sizeof(x)));
}
@ -172,16 +171,6 @@ public:
return CountImpl(hash_->Hash(&x, sizeof(x)));
}
/**
* Retrieves the number of elements added to the Bloom filter.
*
* @return The number of elements in this Bloom filter.
*/
size_t Size() const
{
return elements_;
}
bool Serialize(SerialInfo* info) const;
static BloomFilter* Unserialize(UnserialInfo* info);
@ -196,7 +185,6 @@ protected:
private:
HashPolicy* hash_;
size_t elements_;
};
/**
@ -230,6 +218,9 @@ public:
*/
static size_t K(size_t cells, size_t capacity);
static BasicBloomFilter* Merge(const BasicBloomFilter* x,
const BasicBloomFilter* y);
/**
* Constructs a basic Bloom filter with a given false-positive rate and
* capacity.
@ -258,6 +249,9 @@ private:
*/
class CountingBloomFilter : public BloomFilter {
public:
static CountingBloomFilter* Merge(const CountingBloomFilter* x,
const CountingBloomFilter* y);
CountingBloomFilter(double fp, size_t capacity, size_t width);
CountingBloomFilter(size_t cells, size_t capacity, size_t width);

View file

@ -572,10 +572,21 @@ size_t BloomFilterVal::Count(const Val* val) const
return bloom_filter_->Count(key->Hash());
}
BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* first,
const BloomFilterVal* second)
BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* x,
const BloomFilterVal* y)
{
assert(! "not yet implemented");
if ( x->Type() != y->Type() )
{
reporter->InternalError("cannot merge Bloom filters with different types");
return NULL;
}
BloomFilterVal* result;
if ( (result = DoMerge<BasicBloomFilter>(x, y)) )
return result;
else if ( (result = DoMerge<CountingBloomFilter>(x, y)) )
return result;
return NULL;
}

View file

@ -113,10 +113,10 @@ class BloomFilterVal : public OpaqueVal {
BloomFilterVal(const BloomFilterVal&);
BloomFilterVal& operator=(const BloomFilterVal&);
public:
static BloomFilterVal* Merge(const BloomFilterVal* first,
const BloomFilterVal* second);
static BloomFilterVal* Merge(const BloomFilterVal* x,
const BloomFilterVal* y);
BloomFilterVal(BloomFilter* bf);
explicit BloomFilterVal(BloomFilter* bf);
~BloomFilterVal();
bool Typify(BroType* type);
@ -133,6 +133,17 @@ protected:
DECLARE_SERIAL(BloomFilterVal);
private:
template <typename T>
static BloomFilterVal* DoMerge(const BloomFilterVal* x,
const BloomFilterVal* y)
{
const T* a = dynamic_cast<const T*>(x->bloom_filter_);
const T* b = dynamic_cast<const T*>(y->bloom_filter_);
if ( a && b )
return new BloomFilterVal(T::Merge(a, b));
return NULL;
}
BroType* type_;
CompositeHash* hash_;
BloomFilter* bloom_filter_;