mirror of
https://github.com/zeek/zeek.git
synced 2025-10-15 04:58:21 +00:00
Broifying the code.
Also extending API documentation a bit more and fixing a memory leak.
This commit is contained in:
parent
21685d2529
commit
474107fe40
18 changed files with 1651 additions and 1329 deletions
|
@ -1,5 +1,7 @@
|
|||
#ifndef Hasher_h
|
||||
#define Hasher_h
|
||||
// See the file "COPYING" in the main distribution directory for copyright.
|
||||
|
||||
#ifndef PROBABILISTIC_HASHER_H
|
||||
#define PROBABILISTIC_HASHER_H
|
||||
|
||||
#include "Hash.h"
|
||||
#include "H3.h"
|
||||
|
@ -7,123 +9,197 @@
|
|||
namespace probabilistic {
|
||||
|
||||
/**
|
||||
* The abstract base class for hashers, i.e., constructs which hash elements
|
||||
* *k* times.
|
||||
* Abstract base class for hashers. A hasher creates a family of hash
|
||||
* functions to hash an element *k* times.
|
||||
*/
|
||||
class Hasher {
|
||||
public:
|
||||
typedef hash_t digest;
|
||||
typedef std::vector<digest> digest_vector;
|
||||
typedef hash_t digest;
|
||||
typedef std::vector<digest> digest_vector;
|
||||
|
||||
/**
|
||||
* Constructs the hashing policy used by the implementation.
|
||||
*
|
||||
* @todo This factory function exists because the HashingPolicy class
|
||||
* hierachy is not yet serializable.
|
||||
*/
|
||||
/**
|
||||
* Destructor.
|
||||
*/
|
||||
virtual ~Hasher() { }
|
||||
|
||||
/**
|
||||
* Computes hash values for an element.
|
||||
*
|
||||
* @param x The element to hash.
|
||||
*
|
||||
* @return Vector of *k* hash values.
|
||||
*/
|
||||
template <typename T>
|
||||
digest_vector operator()(const T& x) const
|
||||
{
|
||||
return Hash(&x, sizeof(T));
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the hashes for a set of bytes.
|
||||
*
|
||||
* @param x Pointer to first byte to hash.
|
||||
*
|
||||
* @param n Number of bytes to hash.
|
||||
*
|
||||
* @return Vector of *k* hash values.
|
||||
*
|
||||
*/
|
||||
virtual digest_vector Hash(const void* x, size_t n) const = 0;
|
||||
|
||||
/**
|
||||
* Returns a deep copy of the hasher.
|
||||
*/
|
||||
virtual Hasher* Clone() const = 0;
|
||||
|
||||
/**
|
||||
* Returns true if two hashers are identical.
|
||||
*/
|
||||
virtual bool Equals(const Hasher* other) const = 0;
|
||||
|
||||
/**
|
||||
* Returns the number *k* of hash functions the hashers applies.
|
||||
*/
|
||||
size_t K() const { return k; }
|
||||
|
||||
/**
|
||||
* Returns the hasher's name. TODO: What's this?
|
||||
*/
|
||||
const std::string& Name() const { return name; }
|
||||
|
||||
/**
|
||||
* Constructs the hasher used by the implementation. This hardcodes a
|
||||
* specific hashing policy. It exists only because the HashingPolicy
|
||||
* class hierachy is not yet serializable.
|
||||
*
|
||||
* @param k The number of hash functions to apply.
|
||||
*
|
||||
* @param name The hasher's name.
|
||||
*
|
||||
* @return Returns a new hasher instance.
|
||||
*/
|
||||
static Hasher* Create(size_t k, const std::string& name);
|
||||
|
||||
virtual ~Hasher() { }
|
||||
|
||||
template <typename T>
|
||||
digest_vector operator()(const T& x) const
|
||||
{
|
||||
return Hash(&x, sizeof(T));
|
||||
}
|
||||
|
||||
virtual digest_vector Hash(const void* x, size_t n) const = 0;
|
||||
|
||||
virtual Hasher* Clone() const = 0;
|
||||
|
||||
virtual bool Equals(const Hasher* other) const = 0;
|
||||
|
||||
size_t K() const { return k_; }
|
||||
const std::string& Name() const { return name_; }
|
||||
|
||||
protected:
|
||||
/**
|
||||
* A universal hash function family.
|
||||
*/
|
||||
class UHF {
|
||||
public:
|
||||
/**
|
||||
* Constructs an H3 hash function seeded with a given seed and an optional
|
||||
* extra seed to replace the initial Bro seed.
|
||||
*
|
||||
* @param seed The seed to use for this instance.
|
||||
*
|
||||
* @param extra If not empty, this parameter replaces the initial seed to
|
||||
* compute the seed for t to compute the
|
||||
* seed
|
||||
* NUL-terminated string as additional seed.
|
||||
*/
|
||||
UHF(size_t seed, const std::string& extra = "");
|
||||
Hasher(size_t k, const std::string& name);
|
||||
|
||||
template <typename T>
|
||||
digest operator()(const T& x) const
|
||||
{
|
||||
return hash(&x, sizeof(T));
|
||||
}
|
||||
|
||||
digest operator()(const void* x, size_t n) const
|
||||
{
|
||||
return hash(x, n);
|
||||
}
|
||||
|
||||
friend bool operator==(const UHF& x, const UHF& y)
|
||||
{
|
||||
return x.h_ == y.h_;
|
||||
}
|
||||
|
||||
friend bool operator!=(const UHF& x, const UHF& y)
|
||||
{
|
||||
return ! (x == y);
|
||||
}
|
||||
|
||||
digest hash(const void* x, size_t n) const;
|
||||
|
||||
private:
|
||||
static size_t compute_seed(size_t seed, const std::string& extra);
|
||||
|
||||
H3<digest, UHASH_KEY_SIZE> h_;
|
||||
};
|
||||
|
||||
Hasher(size_t k, const std::string& name);
|
||||
|
||||
private:
|
||||
const size_t k_;
|
||||
std::string name_;
|
||||
private:
|
||||
const size_t k;
|
||||
std::string name;
|
||||
};
|
||||
|
||||
/**
|
||||
* The default hashing policy. Performs *k* hash function computations.
|
||||
* A universal hash function family. This is a helper class that Hasher
|
||||
* implementations can use in their implementation.
|
||||
*/
|
||||
class UHF {
|
||||
public:
|
||||
/**
|
||||
* Constructs an H3 hash function seeded with a given seed and an
|
||||
* optional extra seed to replace the initial Bro seed.
|
||||
*
|
||||
* @param seed The seed to use for this instance.
|
||||
*
|
||||
* @param extra If not empty, this parameter replaces the initial
|
||||
* seed to compute the seed for t to compute the seed NUL-terminated
|
||||
* string as additional seed.
|
||||
*/
|
||||
UHF(size_t seed, const std::string& extra = "");
|
||||
|
||||
template <typename T>
|
||||
Hasher::digest operator()(const T& x) const
|
||||
{
|
||||
return hash(&x, sizeof(T));
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes hash values for an element.
|
||||
*
|
||||
* @param x The element to hash.
|
||||
*
|
||||
* @return Vector of *k* hash values.
|
||||
*/
|
||||
Hasher::digest operator()(const void* x, size_t n) const
|
||||
{
|
||||
return hash(x, n);
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the hashes for a set of bytes.
|
||||
*
|
||||
* @param x Pointer to first byte to hash.
|
||||
*
|
||||
* @param n Number of bytes to hash.
|
||||
*
|
||||
* @return Vector of *k* hash values.
|
||||
*
|
||||
*/
|
||||
Hasher::digest hash(const void* x, size_t n) const;
|
||||
|
||||
friend bool operator==(const UHF& x, const UHF& y)
|
||||
{
|
||||
return x.h == y.h;
|
||||
}
|
||||
|
||||
friend bool operator!=(const UHF& x, const UHF& y)
|
||||
{
|
||||
return ! (x == y);
|
||||
}
|
||||
|
||||
private:
|
||||
static size_t compute_seed(size_t seed, const std::string& extra);
|
||||
|
||||
H3<Hasher::digest, UHASH_KEY_SIZE> h;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* A hasher implementing the default hashing policy. Uses *k* separate hash
|
||||
* functions internally.
|
||||
*/
|
||||
class DefaultHasher : public Hasher {
|
||||
public:
|
||||
DefaultHasher(size_t k, const std::string& name);
|
||||
/**
|
||||
* Constructor for a hasher with *k* hash functions.
|
||||
*
|
||||
* @param k The number of hash functions to use.
|
||||
*
|
||||
* @param name The name of the hasher.
|
||||
*/
|
||||
DefaultHasher(size_t k, const std::string& name);
|
||||
|
||||
virtual digest_vector Hash(const void* x, size_t n) const /* final */;
|
||||
virtual DefaultHasher* Clone() const /* final */;
|
||||
virtual bool Equals(const Hasher* other) const /* final */;
|
||||
// Overridden from Hasher.
|
||||
virtual digest_vector Hash(const void* x, size_t n) const /* final */;
|
||||
virtual DefaultHasher* Clone() const /* final */;
|
||||
virtual bool Equals(const Hasher* other) const /* final */;
|
||||
|
||||
private:
|
||||
std::vector<UHF> hash_functions_;
|
||||
std::vector<UHF> hash_functions;
|
||||
};
|
||||
|
||||
/**
|
||||
* The *double-hashing* policy. Uses a linear combination of two hash functions.
|
||||
* The *double-hashing* policy. Uses a linear combination of two hash
|
||||
* functions.
|
||||
*/
|
||||
class DoubleHasher : public Hasher {
|
||||
public:
|
||||
DoubleHasher(size_t k, const std::string& name);
|
||||
/**
|
||||
* Constructor for a double hasher with *k* hash functions.
|
||||
*
|
||||
* @param k The number of hash functions to use.
|
||||
*
|
||||
* @param name The name of the hasher.
|
||||
*/
|
||||
DoubleHasher(size_t k, const std::string& name);
|
||||
|
||||
virtual digest_vector Hash(const void* x, size_t n) const /* final */;
|
||||
virtual DoubleHasher* Clone() const /* final */;
|
||||
virtual bool Equals(const Hasher* other) const /* final */;
|
||||
// Overridden from Hasher.
|
||||
virtual digest_vector Hash(const void* x, size_t n) const /* final */;
|
||||
virtual DoubleHasher* Clone() const /* final */;
|
||||
virtual bool Equals(const Hasher* other) const /* final */;
|
||||
|
||||
private:
|
||||
UHF h1_;
|
||||
UHF h2_;
|
||||
UHF h1;
|
||||
UHF h2;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue