mirror of
https://github.com/zeek/zeek.git
synced 2025-10-10 02:28:21 +00:00
Tweak hasher interface.
This commit is contained in:
parent
446344ae99
commit
fd2e155d1a
8 changed files with 225 additions and 212 deletions
|
@ -6,19 +6,19 @@
|
||||||
#include "Serializer.h"
|
#include "Serializer.h"
|
||||||
|
|
||||||
BloomFilter::BloomFilter()
|
BloomFilter::BloomFilter()
|
||||||
: hash_(NULL)
|
: hasher_(NULL)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
BloomFilter::BloomFilter(const HashPolicy* hash_policy)
|
BloomFilter::BloomFilter(const Hasher* hasher)
|
||||||
: hash_(hash_policy)
|
: hasher_(hasher)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
BloomFilter::~BloomFilter()
|
BloomFilter::~BloomFilter()
|
||||||
{
|
{
|
||||||
if ( hash_ )
|
if ( hasher_ )
|
||||||
delete hash_;
|
delete hasher_;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BloomFilter::Serialize(SerialInfo* info) const
|
bool BloomFilter::Serialize(SerialInfo* info) const
|
||||||
|
@ -35,9 +35,9 @@ BloomFilter* BloomFilter::Unserialize(UnserialInfo* info)
|
||||||
bool BloomFilter::DoSerialize(SerialInfo* info) const
|
bool BloomFilter::DoSerialize(SerialInfo* info) const
|
||||||
{
|
{
|
||||||
DO_SERIALIZE(SER_BLOOMFILTER, SerialObj);
|
DO_SERIALIZE(SER_BLOOMFILTER, SerialObj);
|
||||||
if ( ! SERIALIZE(static_cast<uint16>(hash_->K())) )
|
if ( ! SERIALIZE(static_cast<uint16>(hasher_->K())) )
|
||||||
return false;
|
return false;
|
||||||
return SERIALIZE_STR(hash_->Name().c_str(), hash_->Name().size());
|
return SERIALIZE_STR(hasher_->Name().c_str(), hasher_->Name().size());
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BloomFilter::DoUnserialize(UnserialInfo* info)
|
bool BloomFilter::DoUnserialize(UnserialInfo* info)
|
||||||
|
@ -49,7 +49,7 @@ bool BloomFilter::DoUnserialize(UnserialInfo* info)
|
||||||
const char* name;
|
const char* name;
|
||||||
if ( ! UNSERIALIZE_STR(&name, 0) )
|
if ( ! UNSERIALIZE_STR(&name, 0) )
|
||||||
return false;
|
return false;
|
||||||
hash_ = HashPolicy::Create(k, name);
|
hasher_ = Hasher::Create(k, name);
|
||||||
delete [] name;
|
delete [] name;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -70,7 +70,7 @@ size_t BasicBloomFilter::K(size_t cells, size_t capacity)
|
||||||
BasicBloomFilter* BasicBloomFilter::Merge(const BasicBloomFilter* x,
|
BasicBloomFilter* BasicBloomFilter::Merge(const BasicBloomFilter* x,
|
||||||
const BasicBloomFilter* y)
|
const BasicBloomFilter* y)
|
||||||
{
|
{
|
||||||
// TODO: Ensure that x and y use the same HashPolicy before proceeding.
|
// TODO: Ensure that x and y use the same Hasher before proceeding.
|
||||||
BasicBloomFilter* result = new BasicBloomFilter();
|
BasicBloomFilter* result = new BasicBloomFilter();
|
||||||
result->bits_ = new BitVector(*x->bits_ | *y->bits_);
|
result->bits_ = new BitVector(*x->bits_ | *y->bits_);
|
||||||
return result;
|
return result;
|
||||||
|
@ -81,8 +81,8 @@ BasicBloomFilter::BasicBloomFilter()
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
BasicBloomFilter::BasicBloomFilter(const HashPolicy* hash_policy, size_t cells)
|
BasicBloomFilter::BasicBloomFilter(const Hasher* hasher, size_t cells)
|
||||||
: BloomFilter(hash_policy),
|
: BloomFilter(hasher),
|
||||||
bits_(new BitVector(cells))
|
bits_(new BitVector(cells))
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
@ -102,13 +102,13 @@ bool BasicBloomFilter::DoUnserialize(UnserialInfo* info)
|
||||||
return bits_ != NULL;
|
return bits_ != NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
void BasicBloomFilter::AddImpl(const HashPolicy::hash_vector& h)
|
void BasicBloomFilter::AddImpl(const Hasher::digest_vector& h)
|
||||||
{
|
{
|
||||||
for ( size_t i = 0; i < h.size(); ++i )
|
for ( size_t i = 0; i < h.size(); ++i )
|
||||||
bits_->Set(h[i] % bits_->Size());
|
bits_->Set(h[i] % bits_->Size());
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t BasicBloomFilter::CountImpl(const HashPolicy::hash_vector& h) const
|
size_t BasicBloomFilter::CountImpl(const Hasher::digest_vector& h) const
|
||||||
{
|
{
|
||||||
for ( size_t i = 0; i < h.size(); ++i )
|
for ( size_t i = 0; i < h.size(); ++i )
|
||||||
if ( ! (*bits_)[h[i] % bits_->Size()] )
|
if ( ! (*bits_)[h[i] % bits_->Size()] )
|
||||||
|
@ -129,9 +129,9 @@ CountingBloomFilter::CountingBloomFilter()
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
CountingBloomFilter::CountingBloomFilter(const HashPolicy* hash_policy,
|
CountingBloomFilter::CountingBloomFilter(const Hasher* hasher,
|
||||||
size_t cells, size_t width)
|
size_t cells, size_t width)
|
||||||
: BloomFilter(hash_policy)
|
: BloomFilter(hasher)
|
||||||
{
|
{
|
||||||
cells_ = new CounterVector(width, cells);
|
cells_ = new CounterVector(width, cells);
|
||||||
}
|
}
|
||||||
|
@ -152,13 +152,13 @@ bool CountingBloomFilter::DoUnserialize(UnserialInfo* info)
|
||||||
return cells_ != NULL;
|
return cells_ != NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
void CountingBloomFilter::AddImpl(const HashPolicy::hash_vector& h)
|
void CountingBloomFilter::AddImpl(const Hasher::digest_vector& h)
|
||||||
{
|
{
|
||||||
for ( size_t i = 0; i < h.size(); ++i )
|
for ( size_t i = 0; i < h.size(); ++i )
|
||||||
cells_->Increment(h[i] % cells_->Size(), 1);
|
cells_->Increment(h[i] % cells_->Size(), 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t CountingBloomFilter::CountImpl(const HashPolicy::hash_vector& h) const
|
size_t CountingBloomFilter::CountImpl(const Hasher::digest_vector& h) const
|
||||||
{
|
{
|
||||||
CounterVector::size_type min =
|
CounterVector::size_type min =
|
||||||
std::numeric_limits<CounterVector::size_type>::max();
|
std::numeric_limits<CounterVector::size_type>::max();
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include "BitVector.h"
|
#include "BitVector.h"
|
||||||
#include "HashPolicy.h"
|
#include "Hasher.h"
|
||||||
|
|
||||||
class CounterVector;
|
class CounterVector;
|
||||||
|
|
||||||
|
@ -12,7 +12,7 @@ class CounterVector;
|
||||||
*/
|
*/
|
||||||
class BloomFilter : public SerialObj {
|
class BloomFilter : public SerialObj {
|
||||||
public:
|
public:
|
||||||
// At this point we won't let the user choose the hash policy, but we might
|
// At this point we won't let the user choose the hasher, but we might
|
||||||
// open up the interface in the future.
|
// open up the interface in the future.
|
||||||
virtual ~BloomFilter();
|
virtual ~BloomFilter();
|
||||||
|
|
||||||
|
@ -23,7 +23,7 @@ public:
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void Add(const T& x)
|
void Add(const T& x)
|
||||||
{
|
{
|
||||||
AddImpl(hash_->Hash(&x, sizeof(x)));
|
AddImpl((*hasher_)(x));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -36,7 +36,7 @@ public:
|
||||||
template <typename T>
|
template <typename T>
|
||||||
size_t Count(const T& x) const
|
size_t Count(const T& x) const
|
||||||
{
|
{
|
||||||
return CountImpl(hash_->Hash(&x, sizeof(x)));
|
return CountImpl((*hasher_)(x));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Serialize(SerialInfo* info) const;
|
bool Serialize(SerialInfo* info) const;
|
||||||
|
@ -50,15 +50,15 @@ protected:
|
||||||
/**
|
/**
|
||||||
* Constructs a Bloom filter.
|
* Constructs a Bloom filter.
|
||||||
*
|
*
|
||||||
* @param hash_policy The hash policy to use for this Bloom filter.
|
* @param hasher The hasher to use for this Bloom filter.
|
||||||
*/
|
*/
|
||||||
BloomFilter(const HashPolicy* hash_policy);
|
BloomFilter(const Hasher* hasher);
|
||||||
|
|
||||||
virtual void AddImpl(const HashPolicy::hash_vector& hashes) = 0;
|
virtual void AddImpl(const Hasher::digest_vector& hashes) = 0;
|
||||||
virtual size_t CountImpl(const HashPolicy::hash_vector& hashes) const = 0;
|
virtual size_t CountImpl(const Hasher::digest_vector& hashes) const = 0;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const HashPolicy* hash_;
|
const Hasher* hasher_;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -98,15 +98,15 @@ public:
|
||||||
/**
|
/**
|
||||||
* Constructs a basic Bloom filter with a given number of cells and capacity.
|
* Constructs a basic Bloom filter with a given number of cells and capacity.
|
||||||
*/
|
*/
|
||||||
BasicBloomFilter(const HashPolicy* hash_policy, size_t cells);
|
BasicBloomFilter(const Hasher* hasher, size_t cells);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
DECLARE_SERIAL(BasicBloomFilter);
|
DECLARE_SERIAL(BasicBloomFilter);
|
||||||
|
|
||||||
BasicBloomFilter();
|
BasicBloomFilter();
|
||||||
|
|
||||||
virtual void AddImpl(const HashPolicy::hash_vector& h);
|
virtual void AddImpl(const Hasher::digest_vector& h);
|
||||||
virtual size_t CountImpl(const HashPolicy::hash_vector& h) const;
|
virtual size_t CountImpl(const Hasher::digest_vector& h) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
BitVector* bits_;
|
BitVector* bits_;
|
||||||
|
@ -120,16 +120,15 @@ public:
|
||||||
static CountingBloomFilter* Merge(const CountingBloomFilter* x,
|
static CountingBloomFilter* Merge(const CountingBloomFilter* x,
|
||||||
const CountingBloomFilter* y);
|
const CountingBloomFilter* y);
|
||||||
|
|
||||||
CountingBloomFilter(const HashPolicy* hash_policy, size_t cells,
|
CountingBloomFilter(const Hasher* hasher, size_t cells, size_t width);
|
||||||
size_t width);
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
DECLARE_SERIAL(CountingBloomFilter);
|
DECLARE_SERIAL(CountingBloomFilter);
|
||||||
|
|
||||||
CountingBloomFilter();
|
CountingBloomFilter();
|
||||||
|
|
||||||
virtual void AddImpl(const HashPolicy::hash_vector& h);
|
virtual void AddImpl(const Hasher::digest_vector& h);
|
||||||
virtual size_t CountImpl(const HashPolicy::hash_vector& h) const;
|
virtual size_t CountImpl(const Hasher::digest_vector& h) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
CounterVector* cells_;
|
CounterVector* cells_;
|
||||||
|
|
|
@ -279,7 +279,7 @@ set(bro_SRCS
|
||||||
Frame.cc
|
Frame.cc
|
||||||
Func.cc
|
Func.cc
|
||||||
Hash.cc
|
Hash.cc
|
||||||
HashPolicy.cc
|
Hasher.cc
|
||||||
ID.cc
|
ID.cc
|
||||||
IntSet.cc
|
IntSet.cc
|
||||||
IOSource.cc
|
IOSource.cc
|
||||||
|
|
|
@ -1,77 +0,0 @@
|
||||||
#include "HashPolicy.h"
|
|
||||||
|
|
||||||
#include "digest.h"
|
|
||||||
|
|
||||||
Hasher::Hasher(size_t seed, const std::string& extra)
|
|
||||||
: h_(compute_seed(seed, extra))
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
Hasher::hash_type Hasher::operator()(const void* x, size_t n) const
|
|
||||||
{
|
|
||||||
return n == 0 ? 0 : h_(x, n);
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t Hasher::compute_seed(size_t seed, const std::string& extra)
|
|
||||||
{
|
|
||||||
u_char digest[SHA256_DIGEST_LENGTH];
|
|
||||||
SHA256_CTX ctx;
|
|
||||||
sha256_init(&ctx);
|
|
||||||
if ( extra.empty() )
|
|
||||||
{
|
|
||||||
unsigned int first_seed = initial_seed();
|
|
||||||
sha256_update(&ctx, &first_seed, sizeof(first_seed));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
sha256_update(&ctx, extra.c_str(), extra.size());
|
|
||||||
}
|
|
||||||
sha256_update(&ctx, &seed, sizeof(seed));
|
|
||||||
sha256_final(&ctx, digest);
|
|
||||||
return *reinterpret_cast<size_t*>(digest);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
HashPolicy* HashPolicy::Create(size_t k, const std::string& name)
|
|
||||||
{
|
|
||||||
return new DefaultHashing(k, name);
|
|
||||||
}
|
|
||||||
|
|
||||||
HashPolicy::HashPolicy(size_t k, const std::string& name)
|
|
||||||
: k_(k), name_(name)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
DefaultHashing::DefaultHashing(size_t k, const std::string& name)
|
|
||||||
: HashPolicy(k, name)
|
|
||||||
{
|
|
||||||
for ( size_t i = 0; i < k; ++i )
|
|
||||||
hashers_.push_back(Hasher(i, name));
|
|
||||||
}
|
|
||||||
|
|
||||||
HashPolicy::hash_vector DefaultHashing::Hash(const void* x, size_t n) const
|
|
||||||
{
|
|
||||||
hash_vector h(K(), 0);
|
|
||||||
for ( size_t i = 0; i < h.size(); ++i )
|
|
||||||
h[i] = hashers_[i](x, n);
|
|
||||||
return h;
|
|
||||||
}
|
|
||||||
|
|
||||||
DoubleHashing::DoubleHashing(size_t k, const std::string& name)
|
|
||||||
: HashPolicy(k, name),
|
|
||||||
hasher1_(1, name),
|
|
||||||
hasher2_(2, name)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
HashPolicy::hash_vector DoubleHashing::Hash(const void* x, size_t n) const
|
|
||||||
{
|
|
||||||
hash_type h1 = hasher1_(x, n);
|
|
||||||
hash_type h2 = hasher2_(x, n);
|
|
||||||
hash_vector h(K(), 0);
|
|
||||||
for ( size_t i = 0; i < h.size(); ++i )
|
|
||||||
h[i] = h1 + i * h2;
|
|
||||||
return h;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
|
@ -1,97 +0,0 @@
|
||||||
#ifndef HashPolicy_h
|
|
||||||
#define HashPolicy_h
|
|
||||||
|
|
||||||
#include "Hash.h"
|
|
||||||
#include "H3.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A functor that computes a universal hash function.
|
|
||||||
*/
|
|
||||||
class Hasher {
|
|
||||||
public:
|
|
||||||
typedef hash_t hash_type;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs a hasher seeded by a given seed and optionally an extra
|
|
||||||
* descriptor.
|
|
||||||
*
|
|
||||||
* @param seed The seed to use.
|
|
||||||
*
|
|
||||||
* @param extra If not `NULL`, the hasher will not mix in the initial seed
|
|
||||||
* but instead use this NUL-terminated string as additional seed.
|
|
||||||
*/
|
|
||||||
Hasher(size_t seed, const std::string& extra = "");
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Computes the hash digest of contiguous data.
|
|
||||||
*
|
|
||||||
* @param x A pointer to the beginning of the byte sequence to hash.
|
|
||||||
*
|
|
||||||
* @param n The length of the sequence pointed to by *x*.
|
|
||||||
*/
|
|
||||||
hash_type operator()(const void* x, size_t n) const;
|
|
||||||
|
|
||||||
private:
|
|
||||||
static size_t compute_seed(size_t seed, const std::string& extra);
|
|
||||||
|
|
||||||
H3<hash_type, UHASH_KEY_SIZE> h_;
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The abstract base class for hash policies that hash elements *k* times.
|
|
||||||
*/
|
|
||||||
class HashPolicy {
|
|
||||||
public:
|
|
||||||
/**
|
|
||||||
* Constructs the hashing policy used by the implementation. This factory
|
|
||||||
* function exists because the HashingPolicy class hierachy is not yet
|
|
||||||
* serializable.
|
|
||||||
*/
|
|
||||||
static HashPolicy* Create(size_t k, const std::string& name);
|
|
||||||
|
|
||||||
typedef Hasher::hash_type hash_type;
|
|
||||||
typedef std::vector<hash_type> hash_vector;
|
|
||||||
|
|
||||||
virtual ~HashPolicy() { }
|
|
||||||
|
|
||||||
virtual hash_vector Hash(const void* x, size_t n) const = 0;
|
|
||||||
|
|
||||||
size_t K() const { return k_; }
|
|
||||||
const std::string& Name() const { return name_; }
|
|
||||||
|
|
||||||
protected:
|
|
||||||
HashPolicy(size_t k, const std::string& name);
|
|
||||||
|
|
||||||
private:
|
|
||||||
const size_t k_;
|
|
||||||
std::string name_;
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The default hashing policy. Performs *k* hash function computations.
|
|
||||||
*/
|
|
||||||
class DefaultHashing : public HashPolicy {
|
|
||||||
public:
|
|
||||||
DefaultHashing(size_t k, const std::string& name);
|
|
||||||
|
|
||||||
virtual hash_vector Hash(const void* x, size_t n) const /* override */;
|
|
||||||
|
|
||||||
private:
|
|
||||||
std::vector<Hasher> hashers_;
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The *double-hashing* policy. Uses a linear combination of two hash functions.
|
|
||||||
*/
|
|
||||||
class DoubleHashing : public HashPolicy {
|
|
||||||
public:
|
|
||||||
DoubleHashing(size_t k, const std::string& name);
|
|
||||||
|
|
||||||
virtual hash_vector Hash(const void* x, size_t n) const;
|
|
||||||
|
|
||||||
private:
|
|
||||||
Hasher hasher1_;
|
|
||||||
Hasher hasher2_;
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
|
79
src/Hasher.cc
Normal file
79
src/Hasher.cc
Normal file
|
@ -0,0 +1,79 @@
|
||||||
|
#include "Hasher.h"
|
||||||
|
|
||||||
|
#include "digest.h"
|
||||||
|
|
||||||
|
Hasher::UHF::UHF(size_t seed, const std::string& extra)
|
||||||
|
: h_(compute_seed(seed, extra))
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
Hasher::digest Hasher::UHF::hash(const void* x, size_t n) const
|
||||||
|
{
|
||||||
|
assert(n <= UHASH_KEY_SIZE);
|
||||||
|
return n == 0 ? 0 : h_(x, n);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t Hasher::UHF::compute_seed(size_t seed, const std::string& extra)
|
||||||
|
{
|
||||||
|
u_char buf[SHA256_DIGEST_LENGTH];
|
||||||
|
SHA256_CTX ctx;
|
||||||
|
sha256_init(&ctx);
|
||||||
|
if ( extra.empty() )
|
||||||
|
{
|
||||||
|
unsigned int first_seed = initial_seed();
|
||||||
|
sha256_update(&ctx, &first_seed, sizeof(first_seed));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
sha256_update(&ctx, extra.c_str(), extra.size());
|
||||||
|
}
|
||||||
|
sha256_update(&ctx, &seed, sizeof(seed));
|
||||||
|
sha256_final(&ctx, buf);
|
||||||
|
// Take the first sizeof(size_t) bytes as seed.
|
||||||
|
return *reinterpret_cast<size_t*>(buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Hasher* Hasher::Create(size_t k, const std::string& name)
|
||||||
|
{
|
||||||
|
return new DefaultHasher(k, name);
|
||||||
|
}
|
||||||
|
|
||||||
|
Hasher::Hasher(size_t k, const std::string& name)
|
||||||
|
: k_(k), name_(name)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
DefaultHasher::DefaultHasher(size_t k, const std::string& name)
|
||||||
|
: Hasher(k, name)
|
||||||
|
{
|
||||||
|
for ( size_t i = 0; i < k; ++i )
|
||||||
|
hash_functions_.push_back(UHF(i, name));
|
||||||
|
}
|
||||||
|
|
||||||
|
Hasher::digest_vector DefaultHasher::Hash(const void* x, size_t n) const
|
||||||
|
{
|
||||||
|
digest_vector h(K(), 0);
|
||||||
|
for ( size_t i = 0; i < h.size(); ++i )
|
||||||
|
h[i] = hash_functions_[i](x, n);
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
|
DoubleHasher::DoubleHasher(size_t k, const std::string& name)
|
||||||
|
: Hasher(k, name),
|
||||||
|
h1_(1, name),
|
||||||
|
h2_(2, name)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
Hasher::digest_vector DoubleHasher::Hash(const void* x, size_t n) const
|
||||||
|
{
|
||||||
|
digest h1 = h1_(x, n);
|
||||||
|
digest h2 = h2_(x, n);
|
||||||
|
digest_vector h(K(), 0);
|
||||||
|
for ( size_t i = 0; i < h.size(); ++i )
|
||||||
|
h[i] = h1 + i * h2;
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
109
src/Hasher.h
Normal file
109
src/Hasher.h
Normal file
|
@ -0,0 +1,109 @@
|
||||||
|
#ifndef Hasher_h
|
||||||
|
#define Hasher_h
|
||||||
|
|
||||||
|
#include "Hash.h"
|
||||||
|
#include "H3.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The abstract base class for hashers, i.e., constructs which hash elements
|
||||||
|
* *k* times.
|
||||||
|
*/
|
||||||
|
class Hasher {
|
||||||
|
public:
|
||||||
|
typedef hash_t digest;
|
||||||
|
typedef std::vector<digest> digest_vector;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs the hashing policy used by the implementation.
|
||||||
|
*
|
||||||
|
* @todo This factory function exists because the HashingPolicy class
|
||||||
|
* hierachy is not yet serializable.
|
||||||
|
*/
|
||||||
|
static Hasher* Create(size_t k, const std::string& name);
|
||||||
|
|
||||||
|
virtual ~Hasher() { }
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
digest_vector operator()(const T& x) const
|
||||||
|
{
|
||||||
|
return Hash(&x, sizeof(T));
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual digest_vector Hash(const void* x, size_t n) const = 0;
|
||||||
|
|
||||||
|
size_t K() const { return k_; }
|
||||||
|
const std::string& Name() const { return name_; }
|
||||||
|
|
||||||
|
protected:
|
||||||
|
/**
|
||||||
|
* A universal hash function family.
|
||||||
|
*/
|
||||||
|
class UHF {
|
||||||
|
public:
|
||||||
|
/**
|
||||||
|
* Constructs an H3 hash function seeded with a given seed and an optional
|
||||||
|
* extra seed to replace the initial Bro seed.
|
||||||
|
*
|
||||||
|
* @param seed The seed to use for this instance.
|
||||||
|
*
|
||||||
|
* @param extra If not empty, this parameter replaces the initial seed to
|
||||||
|
* compute the seed for t to compute the
|
||||||
|
* seed
|
||||||
|
* NUL-terminated string as additional seed.
|
||||||
|
*/
|
||||||
|
UHF(size_t seed, const std::string& extra = "");
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
digest operator()(const T& x) const
|
||||||
|
{
|
||||||
|
return hash(&x, sizeof(T));
|
||||||
|
}
|
||||||
|
|
||||||
|
digest operator()(const void* x, size_t n) const
|
||||||
|
{
|
||||||
|
return hash(x, n);
|
||||||
|
}
|
||||||
|
|
||||||
|
digest hash(const void* x, size_t n) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
static size_t compute_seed(size_t seed, const std::string& extra);
|
||||||
|
|
||||||
|
H3<digest, UHASH_KEY_SIZE> h_;
|
||||||
|
};
|
||||||
|
|
||||||
|
Hasher(size_t k, const std::string& name);
|
||||||
|
|
||||||
|
private:
|
||||||
|
const size_t k_;
|
||||||
|
std::string name_;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The default hashing policy. Performs *k* hash function computations.
|
||||||
|
*/
|
||||||
|
class DefaultHasher : public Hasher {
|
||||||
|
public:
|
||||||
|
DefaultHasher(size_t k, const std::string& name);
|
||||||
|
|
||||||
|
virtual digest_vector Hash(const void* x, size_t n) const /* final */;
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::vector<UHF> hash_functions_;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The *double-hashing* policy. Uses a linear combination of two hash functions.
|
||||||
|
*/
|
||||||
|
class DoubleHasher : public Hasher {
|
||||||
|
public:
|
||||||
|
DoubleHasher(size_t k, const std::string& name);
|
||||||
|
|
||||||
|
virtual digest_vector Hash(const void* x, size_t n) const /* final */;
|
||||||
|
|
||||||
|
private:
|
||||||
|
UHF h1_;
|
||||||
|
UHF h2_;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
|
@ -5008,8 +5008,8 @@ function bloomfilter_basic_init%(fp: double, capacity: count,
|
||||||
|
|
||||||
size_t cells = BasicBloomFilter::M(fp, capacity);
|
size_t cells = BasicBloomFilter::M(fp, capacity);
|
||||||
size_t optimal_k = BasicBloomFilter::K(cells, capacity);
|
size_t optimal_k = BasicBloomFilter::K(cells, capacity);
|
||||||
const HashPolicy* hp = HashPolicy::Create(optimal_k, name->CheckString());
|
const Hasher* h = Hasher::Create(optimal_k, name->CheckString());
|
||||||
return new BloomFilterVal(new BasicBloomFilter(hp, cells));
|
return new BloomFilterVal(new BasicBloomFilter(h, cells));
|
||||||
%}
|
%}
|
||||||
|
|
||||||
## Creates a counting Bloom filter.
|
## Creates a counting Bloom filter.
|
||||||
|
@ -5029,11 +5029,11 @@ function bloomfilter_basic_init%(fp: double, capacity: count,
|
||||||
function bloomfilter_counting_init%(k: count, cells: count, max: count,
|
function bloomfilter_counting_init%(k: count, cells: count, max: count,
|
||||||
name: string &default=""%): opaque of bloomfilter
|
name: string &default=""%): opaque of bloomfilter
|
||||||
%{
|
%{
|
||||||
const HashPolicy* hp = HashPolicy::Create(k, name->CheckString());
|
const Hasher* h = Hasher::Create(k, name->CheckString());
|
||||||
uint16 width = 0;
|
uint16 width = 0;
|
||||||
while ( max >>= 1 )
|
while ( max >>= 1 )
|
||||||
++width;
|
++width;
|
||||||
return new BloomFilterVal(new CountingBloomFilter(hp, cells, width));
|
return new BloomFilterVal(new CountingBloomFilter(h, cells, width));
|
||||||
%}
|
%}
|
||||||
|
|
||||||
## Adds an element to a Bloom filter.
|
## Adds an element to a Bloom filter.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue