mirror of
https://github.com/zeek/zeek.git
synced 2025-10-10 10:38:20 +00:00
Implement value merging.
The actual BloomFilter merging still lacks, this is just the first step in the right direction from the user interface side.
This commit is contained in:
parent
22afbe42dd
commit
14a701a237
4 changed files with 54 additions and 25 deletions
|
@ -124,9 +124,7 @@ BloomFilter* BloomFilter::Unserialize(UnserialInfo* info)
|
||||||
bool BloomFilter::DoSerialize(SerialInfo* info) const
|
bool BloomFilter::DoSerialize(SerialInfo* info) const
|
||||||
{
|
{
|
||||||
DO_SERIALIZE(SER_BLOOMFILTER, SerialObj);
|
DO_SERIALIZE(SER_BLOOMFILTER, SerialObj);
|
||||||
if ( ! SERIALIZE(static_cast<uint16>(hash_->K())) )
|
return SERIALIZE(static_cast<uint16>(hash_->K()));
|
||||||
return false;
|
|
||||||
return SERIALIZE(static_cast<uint64>(elements_));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BloomFilter::DoUnserialize(UnserialInfo* info)
|
bool BloomFilter::DoUnserialize(UnserialInfo* info)
|
||||||
|
@ -136,10 +134,6 @@ bool BloomFilter::DoUnserialize(UnserialInfo* info)
|
||||||
if ( ! UNSERIALIZE(&k) )
|
if ( ! UNSERIALIZE(&k) )
|
||||||
return false;
|
return false;
|
||||||
hash_ = new hash_policy(static_cast<size_t>(k));
|
hash_ = new hash_policy(static_cast<size_t>(k));
|
||||||
uint64 elements;
|
|
||||||
if ( ! UNSERIALIZE(&elements) )
|
|
||||||
return false;
|
|
||||||
elements_ = static_cast<size_t>(elements);
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -155,6 +149,17 @@ size_t BasicBloomFilter::K(size_t cells, size_t capacity)
|
||||||
return std::ceil(frac * std::log(2));
|
return std::ceil(frac * std::log(2));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
BasicBloomFilter* BasicBloomFilter::Merge(const BasicBloomFilter* x,
|
||||||
|
const BasicBloomFilter* y)
|
||||||
|
{
|
||||||
|
BasicBloomFilter* result = new BasicBloomFilter();
|
||||||
|
result->bits_ = new BitVector(*x->bits_ | *y->bits_);
|
||||||
|
// TODO: implement the hasher pool and make sure the new result gets the same
|
||||||
|
// number of (equal) hash functions.
|
||||||
|
//assert(x->hash_ == y->hash_);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
BasicBloomFilter::BasicBloomFilter()
|
BasicBloomFilter::BasicBloomFilter()
|
||||||
: bits_(NULL)
|
: bits_(NULL)
|
||||||
{
|
{
|
||||||
|
@ -201,6 +206,14 @@ size_t BasicBloomFilter::CountImpl(const HashPolicy::HashVector& h) const
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
CountingBloomFilter* CountingBloomFilter::Merge(const CountingBloomFilter* x,
|
||||||
|
const CountingBloomFilter* y)
|
||||||
|
{
|
||||||
|
assert(! "not yet implemented");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
CountingBloomFilter::CountingBloomFilter()
|
CountingBloomFilter::CountingBloomFilter()
|
||||||
: cells_(NULL)
|
: cells_(NULL)
|
||||||
{
|
{
|
||||||
|
|
|
@ -155,7 +155,6 @@ public:
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void Add(const T& x)
|
void Add(const T& x)
|
||||||
{
|
{
|
||||||
++elements_;
|
|
||||||
AddImpl(hash_->Hash(&x, sizeof(x)));
|
AddImpl(hash_->Hash(&x, sizeof(x)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -172,16 +171,6 @@ public:
|
||||||
return CountImpl(hash_->Hash(&x, sizeof(x)));
|
return CountImpl(hash_->Hash(&x, sizeof(x)));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Retrieves the number of elements added to the Bloom filter.
|
|
||||||
*
|
|
||||||
* @return The number of elements in this Bloom filter.
|
|
||||||
*/
|
|
||||||
size_t Size() const
|
|
||||||
{
|
|
||||||
return elements_;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool Serialize(SerialInfo* info) const;
|
bool Serialize(SerialInfo* info) const;
|
||||||
static BloomFilter* Unserialize(UnserialInfo* info);
|
static BloomFilter* Unserialize(UnserialInfo* info);
|
||||||
|
|
||||||
|
@ -196,7 +185,6 @@ protected:
|
||||||
|
|
||||||
private:
|
private:
|
||||||
HashPolicy* hash_;
|
HashPolicy* hash_;
|
||||||
size_t elements_;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -230,6 +218,9 @@ public:
|
||||||
*/
|
*/
|
||||||
static size_t K(size_t cells, size_t capacity);
|
static size_t K(size_t cells, size_t capacity);
|
||||||
|
|
||||||
|
static BasicBloomFilter* Merge(const BasicBloomFilter* x,
|
||||||
|
const BasicBloomFilter* y);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructs a basic Bloom filter with a given false-positive rate and
|
* Constructs a basic Bloom filter with a given false-positive rate and
|
||||||
* capacity.
|
* capacity.
|
||||||
|
@ -258,6 +249,9 @@ private:
|
||||||
*/
|
*/
|
||||||
class CountingBloomFilter : public BloomFilter {
|
class CountingBloomFilter : public BloomFilter {
|
||||||
public:
|
public:
|
||||||
|
static CountingBloomFilter* Merge(const CountingBloomFilter* x,
|
||||||
|
const CountingBloomFilter* y);
|
||||||
|
|
||||||
CountingBloomFilter(double fp, size_t capacity, size_t width);
|
CountingBloomFilter(double fp, size_t capacity, size_t width);
|
||||||
CountingBloomFilter(size_t cells, size_t capacity, size_t width);
|
CountingBloomFilter(size_t cells, size_t capacity, size_t width);
|
||||||
|
|
||||||
|
|
|
@ -572,10 +572,21 @@ size_t BloomFilterVal::Count(const Val* val) const
|
||||||
return bloom_filter_->Count(key->Hash());
|
return bloom_filter_->Count(key->Hash());
|
||||||
}
|
}
|
||||||
|
|
||||||
BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* first,
|
BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* x,
|
||||||
const BloomFilterVal* second)
|
const BloomFilterVal* y)
|
||||||
{
|
{
|
||||||
assert(! "not yet implemented");
|
if ( x->Type() != y->Type() )
|
||||||
|
{
|
||||||
|
reporter->InternalError("cannot merge Bloom filters with different types");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
BloomFilterVal* result;
|
||||||
|
if ( (result = DoMerge<BasicBloomFilter>(x, y)) )
|
||||||
|
return result;
|
||||||
|
else if ( (result = DoMerge<CountingBloomFilter>(x, y)) )
|
||||||
|
return result;
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -113,10 +113,10 @@ class BloomFilterVal : public OpaqueVal {
|
||||||
BloomFilterVal(const BloomFilterVal&);
|
BloomFilterVal(const BloomFilterVal&);
|
||||||
BloomFilterVal& operator=(const BloomFilterVal&);
|
BloomFilterVal& operator=(const BloomFilterVal&);
|
||||||
public:
|
public:
|
||||||
static BloomFilterVal* Merge(const BloomFilterVal* first,
|
static BloomFilterVal* Merge(const BloomFilterVal* x,
|
||||||
const BloomFilterVal* second);
|
const BloomFilterVal* y);
|
||||||
|
|
||||||
BloomFilterVal(BloomFilter* bf);
|
explicit BloomFilterVal(BloomFilter* bf);
|
||||||
~BloomFilterVal();
|
~BloomFilterVal();
|
||||||
|
|
||||||
bool Typify(BroType* type);
|
bool Typify(BroType* type);
|
||||||
|
@ -133,6 +133,17 @@ protected:
|
||||||
DECLARE_SERIAL(BloomFilterVal);
|
DECLARE_SERIAL(BloomFilterVal);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
template <typename T>
|
||||||
|
static BloomFilterVal* DoMerge(const BloomFilterVal* x,
|
||||||
|
const BloomFilterVal* y)
|
||||||
|
{
|
||||||
|
const T* a = dynamic_cast<const T*>(x->bloom_filter_);
|
||||||
|
const T* b = dynamic_cast<const T*>(y->bloom_filter_);
|
||||||
|
if ( a && b )
|
||||||
|
return new BloomFilterVal(T::Merge(a, b));
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
BroType* type_;
|
BroType* type_;
|
||||||
CompositeHash* hash_;
|
CompositeHash* hash_;
|
||||||
BloomFilter* bloom_filter_;
|
BloomFilter* bloom_filter_;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue