Stabilize Bloom filter interface.

This commit is contained in:
Matthias Vallentin 2013-06-03 14:00:28 -07:00
parent d873db03ce
commit f529df33e0
2 changed files with 65 additions and 53 deletions

33
src/BloomFilter.cc Normal file
View file

@ -0,0 +1,33 @@
#include "BloomFilter.h"
HashPolicy::HashVector DefaultHashing::Hash(const void* x, size_t n) const
{
HashVector h(k(), 0);
for ( size_t i = 0; i < h.size(); ++i )
h[i] = hashers_[i](x, n);
return h;
}
HashPolicy::HashVector DoubleHashing::Hash(const void* x, size_t n) const
{
HashType h1 = hasher1_(x);
HashType h2 = hasher2_(x);
HashVector h(k(), 0);
for ( size_t i = 0; i < h.size(); ++i )
h[i] = h1 + i * h2;
return h;
}
void BasicBloomFilter::AddImpl(const HashPolicy::HashVector& h)
{
for ( size_t i = 0; i < h.size(); ++i )
bits_.set(h[i] % h.size());
}
size_t BasicBloomFilter::CountImpl(const HashPolicy::HashVector& h) const
{
for ( size_t i = 0; i < h.size(); ++i )
if ( ! bits_[h[i] % h.size()] )
return 0;
return 1;
}

View file

@ -11,6 +11,9 @@
*/ */
class CounterVector : SerialObj { class CounterVector : SerialObj {
public: public:
typedef size_t size_type;
typedef uint64 count_type;
/** /**
* Constructs a counter vector having cells of a given width. * Constructs a counter vector having cells of a given width.
* *
@ -70,21 +73,24 @@ private:
}; };
/** /**
* The abstract base class for hash policies. * The abstract base class for hash policies that hash elements *k* times.
* @tparam Codomain An integral type. * @tparam Codomain An integral type.
*/ */
class HashPolicy { class HashPolicy {
public: public:
typedef hash_t hash_type; typedef hash_t HashType;
typedef std::vector<HashType> HashVector;
virtual ~HashPolicy() { } virtual ~HashPolicy() { }
size_t k() const { return k; } size_t k() const { return k_; }
virtual std::vector<hash_type> Hash(const void* x, size_t n) const = 0; virtual HashVector Hash(const void* x, size_t n) const = 0;
protected: protected:
/** /**
* A functor that computes a universal hash function. * A functor that computes a universal hash function.
* @tparam Codomain An integral type. * @tparam Codomain An integral type.
*/ */
template <typename Codomain = hash_type> template <typename Codomain = HashType>
class Hasher { class Hasher {
public: public:
template <typename Domain> template <typename Domain>
@ -104,8 +110,9 @@ protected:
}; };
HashPolicy(size_t k) : k_(k) { } HashPolicy(size_t k) : k_(k) { }
private: private:
size_t k_; const size_t k_;
}; };
/** /**
@ -114,18 +121,12 @@ private:
class DefaultHashing : public HashPolicy { class DefaultHashing : public HashPolicy {
public: public:
DefaultHashing(size_t k) : HashPolicy(k), hashers_(k) { } DefaultHashing(size_t k) : HashPolicy(k), hashers_(k) { }
virtual ~DoubleHashing() { } virtual ~DefaultHashing() { }
virtual std::vector<hash_type> Hash(const void* x, size_t n) const virtual HashVector Hash(const void* x, size_t n) const;
{
std::vector<hash_type> h(k(), 0);
for (size_t i = 0; i < h.size(); ++i)
h[i] = hashers_[i](x, n);
return h;
}
private: private:
std::vector< Hasher<hash_type> > hashers_; std::vector< Hasher<HashType> > hashers_;
}; };
/** /**
@ -133,22 +134,14 @@ private:
*/ */
class DoubleHashing : public HashPolicy { class DoubleHashing : public HashPolicy {
public: public:
DoubleHashing(size_t k) : HashPolicy(k), hashers_(k) { } DoubleHashing(size_t k) : HashPolicy(k) { }
virtual ~DoubleHashing() { } virtual ~DoubleHashing() { }
virtual std::vector<hash_type> Hash(const void* x, size_t n) const virtual HashVector Hash(const void* x, size_t n) const;
{
Codomain h1 = hasher1_(x);
Codomain h2 = hasher2_(x);
std::vector<hash_type> h(k(), 0);
for (size_t i = 0; i < h.size(); ++i)
h[i] = h1 + i * h2;
return h;
}
private: private:
Hasher<hash_type> hasher1_; Hasher<HashType> hasher1_;
Hasher<hash_type> hasher2_; Hasher<HashType> hasher2_;
}; };
/** /**
@ -166,7 +159,7 @@ public:
void Add(const T& x) void Add(const T& x)
{ {
++elements_; ++elements_;
AddImpl(hash_->Hash(x)); AddImpl(hash_->Hash(&x, sizeof(x)));
} }
/** /**
@ -179,7 +172,7 @@ public:
template <typename T> template <typename T>
size_t Count(const T& x) const size_t Count(const T& x) const
{ {
return CountImpl(hash_->Hash(x)); return CountImpl(hash_->Hash(&x, sizeof(x)));
} }
/** /**
@ -193,8 +186,6 @@ public:
} }
protected: protected:
typedef std::vector<HashPolicy::hash_value> HashVector;
/** /**
* Default-constructs a Bloom filter. * Default-constructs a Bloom filter.
*/ */
@ -206,17 +197,12 @@ protected:
*/ */
BloomFilter(HashPolicy* hash); BloomFilter(HashPolicy* hash);
virtual void AddImpl(const HashVector& hashes) = 0; virtual void AddImpl(const HashPolicy::HashVector& hashes) = 0;
virtual size_t CountImpl(const HashVector& hashes) const = 0; virtual size_t CountImpl(const HashPolicy::HashVector& hashes) const = 0;
std::vector<HashPolicy::hash_value> Hash(const T& x) const
{
return hash_->Hash(&x, sizeof(x));
}
private: private:
HashPolicy<T>* hash_; // Owned by *this. HashPolicy* hash_; // Owned by *this.
size_t elements_; size_t elements_;
}; };
@ -230,19 +216,9 @@ public:
BasicBloomFilter(HashPolicy* hash); BasicBloomFilter(HashPolicy* hash);
protected: protected:
virtual void AddImpl(const HashVector& h) virtual void AddImpl(const HashPolicy::HashVector& h);
{
for ( size_t i = 0; i < h.size(); ++i )
bits_.set(h[i] % h.size());
}
virtual size_t CountImpl(const HashVector& h) const virtual size_t CountImpl(const HashPolicy::HashVector& h) const;
{
for ( size_t i = 0; i < h.size(); ++i )
if ( ! bits_[h[i] % h.size()] )
return 0;
return 1;
}
private: private:
BitVector bits_; BitVector bits_;
@ -253,12 +229,15 @@ private:
*/ */
class CountingBloomFilter : public BloomFilter { class CountingBloomFilter : public BloomFilter {
public: public:
CountingBloomFilter(unsigned width); CountingBloomFilter(unsigned width, HashPolicy* hash);
CountingBloomFilter(HashPolicy* hash);
protected: protected:
CountingBloomFilter(); CountingBloomFilter();
virtual void AddImpl(const HashPolicy::HashVector& h);
virtual size_t CountImpl(const HashPolicy::HashVector& h) const;
private: private:
CounterVector cells_; CounterVector cells_;
}; };