mirror of
https://github.com/zeek/zeek.git
synced 2025-10-09 18:18:19 +00:00
Factor implementation and change interface.
When constructing a Bloom filter, one now has to pass a HashPolicy instance to it. This separates more clearly the concerns of hashing and Bloom filter management. This commit also changes the interface to initialize Bloom filters: there exist now two initialization functions, one for each type: (1) bloomfilter_basic_init(fp: double, capacity: count, name: string &default=""): opaque of bloomfilter (2) bloomfilter_counting_init(k: count, cells: count, max: count, name: string &default=""): opaque of bloomfilter The BiFs for adding elements and performing lookups remain the same. This essentially gives us "BiF polymorphism" at script land, where the initialization BiF constructs the most derived type while subsequent BiFs adhere to the same interface. The reason why we split up the constructor in this case is that we have not yet derived the math that computes the optimal number of hash functions for counting Bloom filters---users have to explicitly parameterize them for now.
This commit is contained in:
parent
9f74064289
commit
532fbfb4d2
11 changed files with 409 additions and 319 deletions
90
src/HashPolicy.h
Normal file
90
src/HashPolicy.h
Normal file
|
@ -0,0 +1,90 @@
|
|||
#ifndef HashPolicy_h
|
||||
#define HashPolicy_h
|
||||
|
||||
#include "Hash.h"
|
||||
#include "H3.h"
|
||||
|
||||
/**
|
||||
* A functor that computes a universal hash function.
|
||||
*/
|
||||
class Hasher {
|
||||
public:
|
||||
typedef hash_t hash_type;
|
||||
|
||||
/**
|
||||
* Constructs a hasher seeded by a given seed and optionally an extra
|
||||
* descriptor.
|
||||
*
|
||||
* @param seed The seed to use.
|
||||
*
|
||||
* @param extra If not `NULL`, the hasher will not mix in the initial seed
|
||||
* but instead use this NUL-terminated string as additional seed.
|
||||
*/
|
||||
Hasher(size_t seed, const std::string& extra = "");
|
||||
|
||||
/**
|
||||
* Computes the hash digest of contiguous data.
|
||||
*
|
||||
* @param x A pointer to the beginning of the byte sequence to hash.
|
||||
*
|
||||
* @param n The length of the sequence pointed to by *x*.
|
||||
*/
|
||||
hash_type operator()(const void* x, size_t n) const;
|
||||
|
||||
private:
|
||||
static size_t compute_seed(size_t seed, const std::string& extra);
|
||||
|
||||
H3<hash_type, UHASH_KEY_SIZE> h_;
|
||||
};
|
||||
|
||||
/**
|
||||
* The abstract base class for hash policies that hash elements *k* times.
|
||||
*/
|
||||
class HashPolicy {
|
||||
public:
|
||||
typedef Hasher::hash_type hash_type;
|
||||
typedef std::vector<hash_type> hash_vector;
|
||||
|
||||
virtual ~HashPolicy() { }
|
||||
|
||||
virtual hash_vector Hash(const void* x, size_t n) const = 0;
|
||||
|
||||
size_t K() const { return k_; }
|
||||
const std::string& Name() const { return name_; }
|
||||
|
||||
protected:
|
||||
HashPolicy(size_t k, const std::string& name);
|
||||
|
||||
private:
|
||||
const size_t k_;
|
||||
std::string name_;
|
||||
};
|
||||
|
||||
/**
|
||||
* The default hashing policy. Performs *k* hash function computations.
|
||||
*/
|
||||
class DefaultHashing : public HashPolicy {
|
||||
public:
|
||||
DefaultHashing(size_t k, const std::string& name);
|
||||
|
||||
virtual hash_vector Hash(const void* x, size_t n) const /* override */;
|
||||
|
||||
private:
|
||||
std::vector<Hasher> hashers_;
|
||||
};
|
||||
|
||||
/**
|
||||
* The *double-hashing* policy. Uses a linear combination of two hash functions.
|
||||
*/
|
||||
class DoubleHashing : public HashPolicy {
|
||||
public:
|
||||
DoubleHashing(size_t k, const std::string& name);
|
||||
|
||||
virtual hash_vector Hash(const void* x, size_t n) const;
|
||||
|
||||
private:
|
||||
Hasher hasher1_;
|
||||
Hasher hasher2_;
|
||||
};
|
||||
|
||||
#endif
|
Loading…
Add table
Add a link
Reference in a new issue