mirror of
https://github.com/zeek/zeek.git
synced 2025-10-04 15:48:19 +00:00
Make hashers serializable.
There exists still a small bug that I could not find; the unit test istate/opaque.bro fails. If someone sees why, please chime in.
This commit is contained in:
parent
e482897f88
commit
2fc5ca53ff
6 changed files with 117 additions and 47 deletions
|
@ -52,6 +52,7 @@ SERIAL_IS(RE_MATCHER, 0x1400)
|
||||||
SERIAL_IS(BITVECTOR, 0x1500)
|
SERIAL_IS(BITVECTOR, 0x1500)
|
||||||
SERIAL_IS(COUNTERVECTOR, 0x1600)
|
SERIAL_IS(COUNTERVECTOR, 0x1600)
|
||||||
SERIAL_IS(BLOOMFILTER, 0x1700)
|
SERIAL_IS(BLOOMFILTER, 0x1700)
|
||||||
|
SERIAL_IS(HASHER, 0x1800)
|
||||||
|
|
||||||
// These are the externally visible types.
|
// These are the externally visible types.
|
||||||
const SerialType SER_NONE = 0;
|
const SerialType SER_NONE = 0;
|
||||||
|
@ -206,6 +207,11 @@ SERIAL_BLOOMFILTER(BLOOMFILTER, 1)
|
||||||
SERIAL_BLOOMFILTER(BASICBLOOMFILTER, 2)
|
SERIAL_BLOOMFILTER(BASICBLOOMFILTER, 2)
|
||||||
SERIAL_BLOOMFILTER(COUNTINGBLOOMFILTER, 3)
|
SERIAL_BLOOMFILTER(COUNTINGBLOOMFILTER, 3)
|
||||||
|
|
||||||
|
#define SERIAL_HASHER(name, val) SERIAL_CONST(name, val, HASHER)
|
||||||
|
SERIAL_HASHER(HASHER, 1)
|
||||||
|
SERIAL_HASHER(DEFAULTHASHER, 2)
|
||||||
|
SERIAL_HASHER(DOUBLEHASHER, 3)
|
||||||
|
|
||||||
SERIAL_CONST2(ID)
|
SERIAL_CONST2(ID)
|
||||||
SERIAL_CONST2(STATE_ACCESS)
|
SERIAL_CONST2(STATE_ACCESS)
|
||||||
SERIAL_CONST2(CASE)
|
SERIAL_CONST2(CASE)
|
||||||
|
|
|
@ -38,28 +38,15 @@ bool BloomFilter::DoSerialize(SerialInfo* info) const
|
||||||
{
|
{
|
||||||
DO_SERIALIZE(SER_BLOOMFILTER, SerialObj);
|
DO_SERIALIZE(SER_BLOOMFILTER, SerialObj);
|
||||||
|
|
||||||
if ( ! SERIALIZE(static_cast<uint16>(hasher->K())) )
|
return hasher->Serialize(info);
|
||||||
return false;
|
|
||||||
|
|
||||||
return SERIALIZE_STR(hasher->Name().c_str(), hasher->Name().size());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BloomFilter::DoUnserialize(UnserialInfo* info)
|
bool BloomFilter::DoUnserialize(UnserialInfo* info)
|
||||||
{
|
{
|
||||||
DO_UNSERIALIZE(SerialObj);
|
DO_UNSERIALIZE(SerialObj);
|
||||||
|
|
||||||
uint16 k;
|
hasher = Hasher::Unserialize(info);
|
||||||
if ( ! UNSERIALIZE(&k) )
|
return hasher != 0;
|
||||||
return false;
|
|
||||||
|
|
||||||
const char* name;
|
|
||||||
if ( ! UNSERIALIZE_STR(&name, 0) )
|
|
||||||
return false;
|
|
||||||
|
|
||||||
hasher = Hasher::Create(k, name);
|
|
||||||
|
|
||||||
delete [] name;
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t BasicBloomFilter::M(double fp, size_t capacity)
|
size_t BasicBloomFilter::M(double fp, size_t capacity)
|
||||||
|
|
|
@ -13,9 +13,6 @@ class CounterVector;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The abstract base class for Bloom filters.
|
* The abstract base class for Bloom filters.
|
||||||
*
|
|
||||||
* At this point we won't let the user choose the hasher, but we might open
|
|
||||||
* up the interface in the future.
|
|
||||||
*/
|
*/
|
||||||
class BloomFilter : public SerialObj {
|
class BloomFilter : public SerialObj {
|
||||||
public:
|
public:
|
||||||
|
|
|
@ -4,9 +4,56 @@
|
||||||
|
|
||||||
#include "Hasher.h"
|
#include "Hasher.h"
|
||||||
#include "digest.h"
|
#include "digest.h"
|
||||||
|
#include "Serializer.h"
|
||||||
|
|
||||||
using namespace probabilistic;
|
using namespace probabilistic;
|
||||||
|
|
||||||
|
bool Hasher::Serialize(SerialInfo* info) const
|
||||||
|
{
|
||||||
|
return SerialObj::Serialize(info);
|
||||||
|
}
|
||||||
|
|
||||||
|
Hasher* Hasher::Unserialize(UnserialInfo* info)
|
||||||
|
{
|
||||||
|
return reinterpret_cast<Hasher*>(SerialObj::Unserialize(info, SER_HASHER));
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Hasher::DoSerialize(SerialInfo* info) const
|
||||||
|
{
|
||||||
|
DO_SERIALIZE(SER_HASHER, SerialObj);
|
||||||
|
|
||||||
|
if ( ! SERIALIZE(static_cast<uint16>(k)) )
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return SERIALIZE_STR(name.c_str(), name.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Hasher::DoUnserialize(UnserialInfo* info)
|
||||||
|
{
|
||||||
|
DO_UNSERIALIZE(SerialObj);
|
||||||
|
|
||||||
|
uint16 serial_k;
|
||||||
|
if ( ! UNSERIALIZE(&serial_k) )
|
||||||
|
return false;
|
||||||
|
k = serial_k;
|
||||||
|
assert(k > 0);
|
||||||
|
|
||||||
|
const char* serial_name;
|
||||||
|
if ( ! UNSERIALIZE_STR(&serial_name, 0) )
|
||||||
|
return false;
|
||||||
|
name = serial_name;
|
||||||
|
delete [] serial_name;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
Hasher::Hasher(size_t k, const std::string& arg_name)
|
||||||
|
: k(k)
|
||||||
|
{
|
||||||
|
name = arg_name;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
UHF::UHF(size_t seed, const std::string& extra)
|
UHF::UHF(size_t seed, const std::string& extra)
|
||||||
: h(compute_seed(seed, extra))
|
: h(compute_seed(seed, extra))
|
||||||
{
|
{
|
||||||
|
@ -40,17 +87,6 @@ size_t UHF::compute_seed(size_t seed, const std::string& extra)
|
||||||
return *reinterpret_cast<size_t*>(buf);
|
return *reinterpret_cast<size_t*>(buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
Hasher* Hasher::Create(size_t k, const std::string& name)
|
|
||||||
{
|
|
||||||
return new DefaultHasher(k, name);
|
|
||||||
}
|
|
||||||
|
|
||||||
Hasher::Hasher(size_t k, const std::string& arg_name)
|
|
||||||
: k(k)
|
|
||||||
{
|
|
||||||
name = arg_name;
|
|
||||||
}
|
|
||||||
|
|
||||||
DefaultHasher::DefaultHasher(size_t k, const std::string& name)
|
DefaultHasher::DefaultHasher(size_t k, const std::string& name)
|
||||||
: Hasher(k, name)
|
: Hasher(k, name)
|
||||||
{
|
{
|
||||||
|
@ -82,6 +118,27 @@ bool DefaultHasher::Equals(const Hasher* other) const
|
||||||
return hash_functions == o->hash_functions;
|
return hash_functions == o->hash_functions;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
IMPLEMENT_SERIAL(DefaultHasher, SER_DEFAULTHASHER)
|
||||||
|
|
||||||
|
bool DefaultHasher::DoSerialize(SerialInfo* info) const
|
||||||
|
{
|
||||||
|
DO_SERIALIZE(SER_DEFAULTHASHER, Hasher);
|
||||||
|
|
||||||
|
// Nothing to do here, the base class has all we need serialized already.
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DefaultHasher::DoUnserialize(UnserialInfo* info)
|
||||||
|
{
|
||||||
|
DO_UNSERIALIZE(Hasher);
|
||||||
|
|
||||||
|
hash_functions.clear();
|
||||||
|
for ( size_t i = 0; i < K(); ++i )
|
||||||
|
hash_functions.push_back(UHF(i, Name()));
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
DoubleHasher::DoubleHasher(size_t k, const std::string& name)
|
DoubleHasher::DoubleHasher(size_t k, const std::string& name)
|
||||||
: Hasher(k, name), h1(1, name), h2(2, name)
|
: Hasher(k, name), h1(1, name), h2(2, name)
|
||||||
{
|
{
|
||||||
|
@ -112,3 +169,23 @@ bool DoubleHasher::Equals(const Hasher* other) const
|
||||||
const DoubleHasher* o = static_cast<const DoubleHasher*>(other);
|
const DoubleHasher* o = static_cast<const DoubleHasher*>(other);
|
||||||
return h1 == o->h1 && h2 == o->h2;
|
return h1 == o->h1 && h2 == o->h2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
IMPLEMENT_SERIAL(DoubleHasher, SER_DOUBLEHASHER)
|
||||||
|
|
||||||
|
bool DoubleHasher::DoSerialize(SerialInfo* info) const
|
||||||
|
{
|
||||||
|
DO_SERIALIZE(SER_DOUBLEHASHER, Hasher);
|
||||||
|
|
||||||
|
// Nothing to do here, the base class has all we need serialized already.
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DoubleHasher::DoUnserialize(UnserialInfo* info)
|
||||||
|
{
|
||||||
|
DO_UNSERIALIZE(Hasher);
|
||||||
|
|
||||||
|
h1 = UHF(1, Name());
|
||||||
|
h2 = UHF(2, Name());
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
|
|
||||||
#include "Hash.h"
|
#include "Hash.h"
|
||||||
#include "H3.h"
|
#include "H3.h"
|
||||||
|
#include "SerialObj.h"
|
||||||
|
|
||||||
namespace probabilistic {
|
namespace probabilistic {
|
||||||
|
|
||||||
|
@ -12,7 +13,7 @@ namespace probabilistic {
|
||||||
* Abstract base class for hashers. A hasher creates a family of hash
|
* Abstract base class for hashers. A hasher creates a family of hash
|
||||||
* functions to hash an element *k* times.
|
* functions to hash an element *k* times.
|
||||||
*/
|
*/
|
||||||
class Hasher {
|
class Hasher : public SerialObj {
|
||||||
public:
|
public:
|
||||||
typedef hash_t digest;
|
typedef hash_t digest;
|
||||||
typedef std::vector<digest> digest_vector;
|
typedef std::vector<digest> digest_vector;
|
||||||
|
@ -69,24 +70,18 @@ public:
|
||||||
*/
|
*/
|
||||||
const std::string& Name() const { return name; }
|
const std::string& Name() const { return name; }
|
||||||
|
|
||||||
/**
|
bool Serialize(SerialInfo* info) const;
|
||||||
* Constructs the hasher used by the implementation. This hardcodes a
|
static Hasher* Unserialize(UnserialInfo* info);
|
||||||
* specific hashing policy. It exists only because the HashingPolicy
|
|
||||||
* class hierachy is not yet serializable.
|
|
||||||
*
|
|
||||||
* @param k The number of hash functions to apply.
|
|
||||||
*
|
|
||||||
* @param name The hasher's name.
|
|
||||||
*
|
|
||||||
* @return Returns a new hasher instance.
|
|
||||||
*/
|
|
||||||
static Hasher* Create(size_t k, const std::string& name);
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
DECLARE_ABSTRACT_SERIAL(Hasher);
|
||||||
|
|
||||||
|
Hasher() { }
|
||||||
|
|
||||||
Hasher(size_t k, const std::string& name);
|
Hasher(size_t k, const std::string& name);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const size_t k;
|
size_t k;
|
||||||
std::string name;
|
std::string name;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -106,7 +101,7 @@ public:
|
||||||
* seed to compute the seed for t to compute the seed NUL-terminated
|
* seed to compute the seed for t to compute the seed NUL-terminated
|
||||||
* string as additional seed.
|
* string as additional seed.
|
||||||
*/
|
*/
|
||||||
UHF(size_t seed, const std::string& extra = "");
|
UHF(size_t seed = 0, const std::string& extra = "");
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
Hasher::digest operator()(const T& x) const
|
Hasher::digest operator()(const T& x) const
|
||||||
|
@ -175,7 +170,11 @@ public:
|
||||||
virtual DefaultHasher* Clone() const /* final */;
|
virtual DefaultHasher* Clone() const /* final */;
|
||||||
virtual bool Equals(const Hasher* other) const /* final */;
|
virtual bool Equals(const Hasher* other) const /* final */;
|
||||||
|
|
||||||
|
DECLARE_SERIAL(DefaultHasher);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
DefaultHasher() { }
|
||||||
|
|
||||||
std::vector<UHF> hash_functions;
|
std::vector<UHF> hash_functions;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -199,7 +198,11 @@ public:
|
||||||
virtual DoubleHasher* Clone() const /* final */;
|
virtual DoubleHasher* Clone() const /* final */;
|
||||||
virtual bool Equals(const Hasher* other) const /* final */;
|
virtual bool Equals(const Hasher* other) const /* final */;
|
||||||
|
|
||||||
|
DECLARE_SERIAL(DoubleHasher);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
DoubleHasher() { }
|
||||||
|
|
||||||
UHF h1;
|
UHF h1;
|
||||||
UHF h2;
|
UHF h2;
|
||||||
};
|
};
|
||||||
|
|
|
@ -40,7 +40,7 @@ function bloomfilter_basic_init%(fp: double, capacity: count,
|
||||||
|
|
||||||
size_t cells = BasicBloomFilter::M(fp, capacity);
|
size_t cells = BasicBloomFilter::M(fp, capacity);
|
||||||
size_t optimal_k = BasicBloomFilter::K(cells, capacity);
|
size_t optimal_k = BasicBloomFilter::K(cells, capacity);
|
||||||
const Hasher* h = Hasher::Create(optimal_k, name->CheckString());
|
const Hasher* h = new DefaultHasher(optimal_k, name->CheckString());
|
||||||
|
|
||||||
return new BloomFilterVal(new BasicBloomFilter(h, cells));
|
return new BloomFilterVal(new BasicBloomFilter(h, cells));
|
||||||
%}
|
%}
|
||||||
|
@ -68,7 +68,7 @@ function bloomfilter_counting_init%(k: count, cells: count, max: count,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
const Hasher* h = Hasher::Create(k, name->CheckString());
|
const Hasher* h = new DefaultHasher(k, name->CheckString());
|
||||||
|
|
||||||
uint16 width = 1;
|
uint16 width = 1;
|
||||||
while ( max >>= 1 )
|
while ( max >>= 1 )
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue