// See the file "COPYING" in the main distribution directory for copyright. #ifndef PROBABILISTIC_HASHER_H #define PROBABILISTIC_HASHER_H #include "Hash.h" #include "H3.h" namespace probabilistic { /** * Abstract base class for hashers. A hasher creates a family of hash * functions to hash an element *k* times. */ class Hasher { public: typedef hash_t digest; typedef std::vector digest_vector; /** * Destructor. */ virtual ~Hasher() { } /** * Computes hash values for an element. * * @param x The element to hash. * * @return Vector of *k* hash values. */ template digest_vector operator()(const T& x) const { return Hash(&x, sizeof(T)); } /** * Computes the hashes for a set of bytes. * * @param x Pointer to first byte to hash. * * @param n Number of bytes to hash. * * @return Vector of *k* hash values. * */ virtual digest_vector Hash(const void* x, size_t n) const = 0; /** * Returns a deep copy of the hasher. */ virtual Hasher* Clone() const = 0; /** * Returns true if two hashers are identical. */ virtual bool Equals(const Hasher* other) const = 0; /** * Returns the number *k* of hash functions the hashers applies. */ size_t K() const { return k; } /** * Returns the hasher's name. If not empty, the hasher uses this descriptor * to seed its *k* hash functions. Otherwise the hasher mixes in the initial * seed derived from the environment variable `$BRO_SEED`. */ const std::string& Name() const { return name; } /** * Constructs the hasher used by the implementation. This hardcodes a * specific hashing policy. It exists only because the HashingPolicy * class hierachy is not yet serializable. * * @param k The number of hash functions to apply. * * @param name The hasher's name. * * @return Returns a new hasher instance. */ static Hasher* Create(size_t k, const std::string& name); protected: Hasher(size_t k, const std::string& name); private: const size_t k; std::string name; }; /** * A universal hash function family. This is a helper class that Hasher * implementations can use in their implementation. */ class UHF { public: /** * Constructs an H3 hash function seeded with a given seed and an * optional extra seed to replace the initial Bro seed. * * @param seed The seed to use for this instance. * * @param extra If not empty, this parameter replaces the initial * seed to compute the seed for t to compute the seed NUL-terminated * string as additional seed. */ UHF(size_t seed, const std::string& extra = ""); template Hasher::digest operator()(const T& x) const { return hash(&x, sizeof(T)); } /** * Computes hash values for an element. * * @param x The element to hash. * * @return Vector of *k* hash values. */ Hasher::digest operator()(const void* x, size_t n) const { return hash(x, n); } /** * Computes the hashes for a set of bytes. * * @param x Pointer to first byte to hash. * * @param n Number of bytes to hash. * * @return Vector of *k* hash values. * */ Hasher::digest hash(const void* x, size_t n) const; friend bool operator==(const UHF& x, const UHF& y) { return x.h == y.h; } friend bool operator!=(const UHF& x, const UHF& y) { return ! (x == y); } private: static size_t compute_seed(size_t seed, const std::string& extra); H3 h; }; /** * A hasher implementing the default hashing policy. Uses *k* separate hash * functions internally. */ class DefaultHasher : public Hasher { public: /** * Constructor for a hasher with *k* hash functions. * * @param k The number of hash functions to use. * * @param name The name of the hasher. */ DefaultHasher(size_t k, const std::string& name = ""); // Overridden from Hasher. virtual digest_vector Hash(const void* x, size_t n) const /* final */; virtual DefaultHasher* Clone() const /* final */; virtual bool Equals(const Hasher* other) const /* final */; private: std::vector hash_functions; }; /** * The *double-hashing* policy. Uses a linear combination of two hash * functions. */ class DoubleHasher : public Hasher { public: /** * Constructor for a double hasher with *k* hash functions. * * @param k The number of hash functions to use. * * @param name The name of the hasher. */ DoubleHasher(size_t k, const std::string& name = ""); // Overridden from Hasher. virtual digest_vector Hash(const void* x, size_t n) const /* final */; virtual DoubleHasher* Clone() const /* final */; virtual bool Equals(const Hasher* other) const /* final */; private: UHF h1; UHF h2; }; } #endif