mirror of
https://github.com/zeek/zeek.git
synced 2025-10-10 10:38:20 +00:00
Implement value merging.
The actual BloomFilter merging still lacks, this is just the first step in the right direction from the user interface side.
This commit is contained in:
parent
22afbe42dd
commit
14a701a237
4 changed files with 54 additions and 25 deletions
|
@ -124,9 +124,7 @@ BloomFilter* BloomFilter::Unserialize(UnserialInfo* info)
|
|||
bool BloomFilter::DoSerialize(SerialInfo* info) const
|
||||
{
|
||||
DO_SERIALIZE(SER_BLOOMFILTER, SerialObj);
|
||||
if ( ! SERIALIZE(static_cast<uint16>(hash_->K())) )
|
||||
return false;
|
||||
return SERIALIZE(static_cast<uint64>(elements_));
|
||||
return SERIALIZE(static_cast<uint16>(hash_->K()));
|
||||
}
|
||||
|
||||
bool BloomFilter::DoUnserialize(UnserialInfo* info)
|
||||
|
@ -136,10 +134,6 @@ bool BloomFilter::DoUnserialize(UnserialInfo* info)
|
|||
if ( ! UNSERIALIZE(&k) )
|
||||
return false;
|
||||
hash_ = new hash_policy(static_cast<size_t>(k));
|
||||
uint64 elements;
|
||||
if ( ! UNSERIALIZE(&elements) )
|
||||
return false;
|
||||
elements_ = static_cast<size_t>(elements);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -155,6 +149,17 @@ size_t BasicBloomFilter::K(size_t cells, size_t capacity)
|
|||
return std::ceil(frac * std::log(2));
|
||||
}
|
||||
|
||||
BasicBloomFilter* BasicBloomFilter::Merge(const BasicBloomFilter* x,
|
||||
const BasicBloomFilter* y)
|
||||
{
|
||||
BasicBloomFilter* result = new BasicBloomFilter();
|
||||
result->bits_ = new BitVector(*x->bits_ | *y->bits_);
|
||||
// TODO: implement the hasher pool and make sure the new result gets the same
|
||||
// number of (equal) hash functions.
|
||||
//assert(x->hash_ == y->hash_);
|
||||
return result;
|
||||
}
|
||||
|
||||
BasicBloomFilter::BasicBloomFilter()
|
||||
: bits_(NULL)
|
||||
{
|
||||
|
@ -201,6 +206,14 @@ size_t BasicBloomFilter::CountImpl(const HashPolicy::HashVector& h) const
|
|||
return 1;
|
||||
}
|
||||
|
||||
|
||||
CountingBloomFilter* CountingBloomFilter::Merge(const CountingBloomFilter* x,
|
||||
const CountingBloomFilter* y)
|
||||
{
|
||||
assert(! "not yet implemented");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
CountingBloomFilter::CountingBloomFilter()
|
||||
: cells_(NULL)
|
||||
{
|
||||
|
|
|
@ -155,7 +155,6 @@ public:
|
|||
template <typename T>
|
||||
void Add(const T& x)
|
||||
{
|
||||
++elements_;
|
||||
AddImpl(hash_->Hash(&x, sizeof(x)));
|
||||
}
|
||||
|
||||
|
@ -172,16 +171,6 @@ public:
|
|||
return CountImpl(hash_->Hash(&x, sizeof(x)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the number of elements added to the Bloom filter.
|
||||
*
|
||||
* @return The number of elements in this Bloom filter.
|
||||
*/
|
||||
size_t Size() const
|
||||
{
|
||||
return elements_;
|
||||
}
|
||||
|
||||
bool Serialize(SerialInfo* info) const;
|
||||
static BloomFilter* Unserialize(UnserialInfo* info);
|
||||
|
||||
|
@ -196,7 +185,6 @@ protected:
|
|||
|
||||
private:
|
||||
HashPolicy* hash_;
|
||||
size_t elements_;
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -230,6 +218,9 @@ public:
|
|||
*/
|
||||
static size_t K(size_t cells, size_t capacity);
|
||||
|
||||
static BasicBloomFilter* Merge(const BasicBloomFilter* x,
|
||||
const BasicBloomFilter* y);
|
||||
|
||||
/**
|
||||
* Constructs a basic Bloom filter with a given false-positive rate and
|
||||
* capacity.
|
||||
|
@ -258,6 +249,9 @@ private:
|
|||
*/
|
||||
class CountingBloomFilter : public BloomFilter {
|
||||
public:
|
||||
static CountingBloomFilter* Merge(const CountingBloomFilter* x,
|
||||
const CountingBloomFilter* y);
|
||||
|
||||
CountingBloomFilter(double fp, size_t capacity, size_t width);
|
||||
CountingBloomFilter(size_t cells, size_t capacity, size_t width);
|
||||
|
||||
|
|
|
@ -572,10 +572,21 @@ size_t BloomFilterVal::Count(const Val* val) const
|
|||
return bloom_filter_->Count(key->Hash());
|
||||
}
|
||||
|
||||
BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* first,
|
||||
const BloomFilterVal* second)
|
||||
BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* x,
|
||||
const BloomFilterVal* y)
|
||||
{
|
||||
assert(! "not yet implemented");
|
||||
if ( x->Type() != y->Type() )
|
||||
{
|
||||
reporter->InternalError("cannot merge Bloom filters with different types");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
BloomFilterVal* result;
|
||||
if ( (result = DoMerge<BasicBloomFilter>(x, y)) )
|
||||
return result;
|
||||
else if ( (result = DoMerge<CountingBloomFilter>(x, y)) )
|
||||
return result;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
|
|
@ -113,10 +113,10 @@ class BloomFilterVal : public OpaqueVal {
|
|||
BloomFilterVal(const BloomFilterVal&);
|
||||
BloomFilterVal& operator=(const BloomFilterVal&);
|
||||
public:
|
||||
static BloomFilterVal* Merge(const BloomFilterVal* first,
|
||||
const BloomFilterVal* second);
|
||||
static BloomFilterVal* Merge(const BloomFilterVal* x,
|
||||
const BloomFilterVal* y);
|
||||
|
||||
BloomFilterVal(BloomFilter* bf);
|
||||
explicit BloomFilterVal(BloomFilter* bf);
|
||||
~BloomFilterVal();
|
||||
|
||||
bool Typify(BroType* type);
|
||||
|
@ -133,6 +133,17 @@ protected:
|
|||
DECLARE_SERIAL(BloomFilterVal);
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
static BloomFilterVal* DoMerge(const BloomFilterVal* x,
|
||||
const BloomFilterVal* y)
|
||||
{
|
||||
const T* a = dynamic_cast<const T*>(x->bloom_filter_);
|
||||
const T* b = dynamic_cast<const T*>(y->bloom_filter_);
|
||||
if ( a && b )
|
||||
return new BloomFilterVal(T::Merge(a, b));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
BroType* type_;
|
||||
CompositeHash* hash_;
|
||||
BloomFilter* bloom_filter_;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue