mirror of
https://github.com/zeek/zeek.git
synced 2025-10-10 18:48:20 +00:00
Stabilize Bloom filter interface.
This commit is contained in:
parent
d873db03ce
commit
f529df33e0
2 changed files with 65 additions and 53 deletions
33
src/BloomFilter.cc
Normal file
33
src/BloomFilter.cc
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
#include "BloomFilter.h"
|
||||||
|
|
||||||
|
HashPolicy::HashVector DefaultHashing::Hash(const void* x, size_t n) const
|
||||||
|
{
|
||||||
|
HashVector h(k(), 0);
|
||||||
|
for ( size_t i = 0; i < h.size(); ++i )
|
||||||
|
h[i] = hashers_[i](x, n);
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
|
HashPolicy::HashVector DoubleHashing::Hash(const void* x, size_t n) const
|
||||||
|
{
|
||||||
|
HashType h1 = hasher1_(x);
|
||||||
|
HashType h2 = hasher2_(x);
|
||||||
|
HashVector h(k(), 0);
|
||||||
|
for ( size_t i = 0; i < h.size(); ++i )
|
||||||
|
h[i] = h1 + i * h2;
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
|
void BasicBloomFilter::AddImpl(const HashPolicy::HashVector& h)
|
||||||
|
{
|
||||||
|
for ( size_t i = 0; i < h.size(); ++i )
|
||||||
|
bits_.set(h[i] % h.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t BasicBloomFilter::CountImpl(const HashPolicy::HashVector& h) const
|
||||||
|
{
|
||||||
|
for ( size_t i = 0; i < h.size(); ++i )
|
||||||
|
if ( ! bits_[h[i] % h.size()] )
|
||||||
|
return 0;
|
||||||
|
return 1;
|
||||||
|
}
|
|
@ -11,6 +11,9 @@
|
||||||
*/
|
*/
|
||||||
class CounterVector : SerialObj {
|
class CounterVector : SerialObj {
|
||||||
public:
|
public:
|
||||||
|
typedef size_t size_type;
|
||||||
|
typedef uint64 count_type;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructs a counter vector having cells of a given width.
|
* Constructs a counter vector having cells of a given width.
|
||||||
*
|
*
|
||||||
|
@ -70,21 +73,24 @@ private:
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The abstract base class for hash policies.
|
* The abstract base class for hash policies that hash elements *k* times.
|
||||||
* @tparam Codomain An integral type.
|
* @tparam Codomain An integral type.
|
||||||
*/
|
*/
|
||||||
class HashPolicy {
|
class HashPolicy {
|
||||||
public:
|
public:
|
||||||
typedef hash_t hash_type;
|
typedef hash_t HashType;
|
||||||
|
typedef std::vector<HashType> HashVector;
|
||||||
|
|
||||||
virtual ~HashPolicy() { }
|
virtual ~HashPolicy() { }
|
||||||
size_t k() const { return k; }
|
size_t k() const { return k_; }
|
||||||
virtual std::vector<hash_type> Hash(const void* x, size_t n) const = 0;
|
virtual HashVector Hash(const void* x, size_t n) const = 0;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
/**
|
/**
|
||||||
* A functor that computes a universal hash function.
|
* A functor that computes a universal hash function.
|
||||||
* @tparam Codomain An integral type.
|
* @tparam Codomain An integral type.
|
||||||
*/
|
*/
|
||||||
template <typename Codomain = hash_type>
|
template <typename Codomain = HashType>
|
||||||
class Hasher {
|
class Hasher {
|
||||||
public:
|
public:
|
||||||
template <typename Domain>
|
template <typename Domain>
|
||||||
|
@ -104,8 +110,9 @@ protected:
|
||||||
};
|
};
|
||||||
|
|
||||||
HashPolicy(size_t k) : k_(k) { }
|
HashPolicy(size_t k) : k_(k) { }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
size_t k_;
|
const size_t k_;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -114,18 +121,12 @@ private:
|
||||||
class DefaultHashing : public HashPolicy {
|
class DefaultHashing : public HashPolicy {
|
||||||
public:
|
public:
|
||||||
DefaultHashing(size_t k) : HashPolicy(k), hashers_(k) { }
|
DefaultHashing(size_t k) : HashPolicy(k), hashers_(k) { }
|
||||||
virtual ~DoubleHashing() { }
|
virtual ~DefaultHashing() { }
|
||||||
|
|
||||||
virtual std::vector<hash_type> Hash(const void* x, size_t n) const
|
virtual HashVector Hash(const void* x, size_t n) const;
|
||||||
{
|
|
||||||
std::vector<hash_type> h(k(), 0);
|
|
||||||
for (size_t i = 0; i < h.size(); ++i)
|
|
||||||
h[i] = hashers_[i](x, n);
|
|
||||||
return h;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::vector< Hasher<hash_type> > hashers_;
|
std::vector< Hasher<HashType> > hashers_;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -133,22 +134,14 @@ private:
|
||||||
*/
|
*/
|
||||||
class DoubleHashing : public HashPolicy {
|
class DoubleHashing : public HashPolicy {
|
||||||
public:
|
public:
|
||||||
DoubleHashing(size_t k) : HashPolicy(k), hashers_(k) { }
|
DoubleHashing(size_t k) : HashPolicy(k) { }
|
||||||
virtual ~DoubleHashing() { }
|
virtual ~DoubleHashing() { }
|
||||||
|
|
||||||
virtual std::vector<hash_type> Hash(const void* x, size_t n) const
|
virtual HashVector Hash(const void* x, size_t n) const;
|
||||||
{
|
|
||||||
Codomain h1 = hasher1_(x);
|
|
||||||
Codomain h2 = hasher2_(x);
|
|
||||||
std::vector<hash_type> h(k(), 0);
|
|
||||||
for (size_t i = 0; i < h.size(); ++i)
|
|
||||||
h[i] = h1 + i * h2;
|
|
||||||
return h;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Hasher<hash_type> hasher1_;
|
Hasher<HashType> hasher1_;
|
||||||
Hasher<hash_type> hasher2_;
|
Hasher<HashType> hasher2_;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -166,7 +159,7 @@ public:
|
||||||
void Add(const T& x)
|
void Add(const T& x)
|
||||||
{
|
{
|
||||||
++elements_;
|
++elements_;
|
||||||
AddImpl(hash_->Hash(x));
|
AddImpl(hash_->Hash(&x, sizeof(x)));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -179,7 +172,7 @@ public:
|
||||||
template <typename T>
|
template <typename T>
|
||||||
size_t Count(const T& x) const
|
size_t Count(const T& x) const
|
||||||
{
|
{
|
||||||
return CountImpl(hash_->Hash(x));
|
return CountImpl(hash_->Hash(&x, sizeof(x)));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -193,8 +186,6 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
typedef std::vector<HashPolicy::hash_value> HashVector;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Default-constructs a Bloom filter.
|
* Default-constructs a Bloom filter.
|
||||||
*/
|
*/
|
||||||
|
@ -206,17 +197,12 @@ protected:
|
||||||
*/
|
*/
|
||||||
BloomFilter(HashPolicy* hash);
|
BloomFilter(HashPolicy* hash);
|
||||||
|
|
||||||
virtual void AddImpl(const HashVector& hashes) = 0;
|
virtual void AddImpl(const HashPolicy::HashVector& hashes) = 0;
|
||||||
|
|
||||||
virtual size_t CountImpl(const HashVector& hashes) const = 0;
|
virtual size_t CountImpl(const HashPolicy::HashVector& hashes) const = 0;
|
||||||
|
|
||||||
std::vector<HashPolicy::hash_value> Hash(const T& x) const
|
|
||||||
{
|
|
||||||
return hash_->Hash(&x, sizeof(x));
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
HashPolicy<T>* hash_; // Owned by *this.
|
HashPolicy* hash_; // Owned by *this.
|
||||||
|
|
||||||
size_t elements_;
|
size_t elements_;
|
||||||
};
|
};
|
||||||
|
@ -230,19 +216,9 @@ public:
|
||||||
BasicBloomFilter(HashPolicy* hash);
|
BasicBloomFilter(HashPolicy* hash);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
virtual void AddImpl(const HashVector& h)
|
virtual void AddImpl(const HashPolicy::HashVector& h);
|
||||||
{
|
|
||||||
for ( size_t i = 0; i < h.size(); ++i )
|
|
||||||
bits_.set(h[i] % h.size());
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual size_t CountImpl(const HashVector& h) const
|
virtual size_t CountImpl(const HashPolicy::HashVector& h) const;
|
||||||
{
|
|
||||||
for ( size_t i = 0; i < h.size(); ++i )
|
|
||||||
if ( ! bits_[h[i] % h.size()] )
|
|
||||||
return 0;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
BitVector bits_;
|
BitVector bits_;
|
||||||
|
@ -253,12 +229,15 @@ private:
|
||||||
*/
|
*/
|
||||||
class CountingBloomFilter : public BloomFilter {
|
class CountingBloomFilter : public BloomFilter {
|
||||||
public:
|
public:
|
||||||
CountingBloomFilter(unsigned width);
|
CountingBloomFilter(unsigned width, HashPolicy* hash);
|
||||||
CountingBloomFilter(HashPolicy* hash);
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
CountingBloomFilter();
|
CountingBloomFilter();
|
||||||
|
|
||||||
|
virtual void AddImpl(const HashPolicy::HashVector& h);
|
||||||
|
|
||||||
|
virtual size_t CountImpl(const HashPolicy::HashVector& h) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
CounterVector cells_;
|
CounterVector cells_;
|
||||||
};
|
};
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue