mirror of
https://github.com/zeek/zeek.git
synced 2025-10-04 15:48:19 +00:00
Make hashers serializable.
There exists still a small bug that I could not find; the unit test istate/opaque.bro fails. If someone sees why, please chime in.
This commit is contained in:
parent
e482897f88
commit
2fc5ca53ff
6 changed files with 117 additions and 47 deletions
|
@ -52,6 +52,7 @@ SERIAL_IS(RE_MATCHER, 0x1400)
|
|||
SERIAL_IS(BITVECTOR, 0x1500)
|
||||
SERIAL_IS(COUNTERVECTOR, 0x1600)
|
||||
SERIAL_IS(BLOOMFILTER, 0x1700)
|
||||
SERIAL_IS(HASHER, 0x1800)
|
||||
|
||||
// These are the externally visible types.
|
||||
const SerialType SER_NONE = 0;
|
||||
|
@ -206,6 +207,11 @@ SERIAL_BLOOMFILTER(BLOOMFILTER, 1)
|
|||
SERIAL_BLOOMFILTER(BASICBLOOMFILTER, 2)
|
||||
SERIAL_BLOOMFILTER(COUNTINGBLOOMFILTER, 3)
|
||||
|
||||
#define SERIAL_HASHER(name, val) SERIAL_CONST(name, val, HASHER)
|
||||
SERIAL_HASHER(HASHER, 1)
|
||||
SERIAL_HASHER(DEFAULTHASHER, 2)
|
||||
SERIAL_HASHER(DOUBLEHASHER, 3)
|
||||
|
||||
SERIAL_CONST2(ID)
|
||||
SERIAL_CONST2(STATE_ACCESS)
|
||||
SERIAL_CONST2(CASE)
|
||||
|
|
|
@ -38,28 +38,15 @@ bool BloomFilter::DoSerialize(SerialInfo* info) const
|
|||
{
|
||||
DO_SERIALIZE(SER_BLOOMFILTER, SerialObj);
|
||||
|
||||
if ( ! SERIALIZE(static_cast<uint16>(hasher->K())) )
|
||||
return false;
|
||||
|
||||
return SERIALIZE_STR(hasher->Name().c_str(), hasher->Name().size());
|
||||
return hasher->Serialize(info);
|
||||
}
|
||||
|
||||
bool BloomFilter::DoUnserialize(UnserialInfo* info)
|
||||
{
|
||||
DO_UNSERIALIZE(SerialObj);
|
||||
|
||||
uint16 k;
|
||||
if ( ! UNSERIALIZE(&k) )
|
||||
return false;
|
||||
|
||||
const char* name;
|
||||
if ( ! UNSERIALIZE_STR(&name, 0) )
|
||||
return false;
|
||||
|
||||
hasher = Hasher::Create(k, name);
|
||||
|
||||
delete [] name;
|
||||
return true;
|
||||
hasher = Hasher::Unserialize(info);
|
||||
return hasher != 0;
|
||||
}
|
||||
|
||||
size_t BasicBloomFilter::M(double fp, size_t capacity)
|
||||
|
|
|
@ -13,9 +13,6 @@ class CounterVector;
|
|||
|
||||
/**
|
||||
* The abstract base class for Bloom filters.
|
||||
*
|
||||
* At this point we won't let the user choose the hasher, but we might open
|
||||
* up the interface in the future.
|
||||
*/
|
||||
class BloomFilter : public SerialObj {
|
||||
public:
|
||||
|
|
|
@ -4,9 +4,56 @@
|
|||
|
||||
#include "Hasher.h"
|
||||
#include "digest.h"
|
||||
#include "Serializer.h"
|
||||
|
||||
using namespace probabilistic;
|
||||
|
||||
bool Hasher::Serialize(SerialInfo* info) const
|
||||
{
|
||||
return SerialObj::Serialize(info);
|
||||
}
|
||||
|
||||
Hasher* Hasher::Unserialize(UnserialInfo* info)
|
||||
{
|
||||
return reinterpret_cast<Hasher*>(SerialObj::Unserialize(info, SER_HASHER));
|
||||
}
|
||||
|
||||
bool Hasher::DoSerialize(SerialInfo* info) const
|
||||
{
|
||||
DO_SERIALIZE(SER_HASHER, SerialObj);
|
||||
|
||||
if ( ! SERIALIZE(static_cast<uint16>(k)) )
|
||||
return false;
|
||||
|
||||
return SERIALIZE_STR(name.c_str(), name.size());
|
||||
}
|
||||
|
||||
bool Hasher::DoUnserialize(UnserialInfo* info)
|
||||
{
|
||||
DO_UNSERIALIZE(SerialObj);
|
||||
|
||||
uint16 serial_k;
|
||||
if ( ! UNSERIALIZE(&serial_k) )
|
||||
return false;
|
||||
k = serial_k;
|
||||
assert(k > 0);
|
||||
|
||||
const char* serial_name;
|
||||
if ( ! UNSERIALIZE_STR(&serial_name, 0) )
|
||||
return false;
|
||||
name = serial_name;
|
||||
delete [] serial_name;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
Hasher::Hasher(size_t k, const std::string& arg_name)
|
||||
: k(k)
|
||||
{
|
||||
name = arg_name;
|
||||
}
|
||||
|
||||
|
||||
UHF::UHF(size_t seed, const std::string& extra)
|
||||
: h(compute_seed(seed, extra))
|
||||
{
|
||||
|
@ -40,17 +87,6 @@ size_t UHF::compute_seed(size_t seed, const std::string& extra)
|
|||
return *reinterpret_cast<size_t*>(buf);
|
||||
}
|
||||
|
||||
Hasher* Hasher::Create(size_t k, const std::string& name)
|
||||
{
|
||||
return new DefaultHasher(k, name);
|
||||
}
|
||||
|
||||
Hasher::Hasher(size_t k, const std::string& arg_name)
|
||||
: k(k)
|
||||
{
|
||||
name = arg_name;
|
||||
}
|
||||
|
||||
DefaultHasher::DefaultHasher(size_t k, const std::string& name)
|
||||
: Hasher(k, name)
|
||||
{
|
||||
|
@ -82,6 +118,27 @@ bool DefaultHasher::Equals(const Hasher* other) const
|
|||
return hash_functions == o->hash_functions;
|
||||
}
|
||||
|
||||
IMPLEMENT_SERIAL(DefaultHasher, SER_DEFAULTHASHER)
|
||||
|
||||
bool DefaultHasher::DoSerialize(SerialInfo* info) const
|
||||
{
|
||||
DO_SERIALIZE(SER_DEFAULTHASHER, Hasher);
|
||||
|
||||
// Nothing to do here, the base class has all we need serialized already.
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DefaultHasher::DoUnserialize(UnserialInfo* info)
|
||||
{
|
||||
DO_UNSERIALIZE(Hasher);
|
||||
|
||||
hash_functions.clear();
|
||||
for ( size_t i = 0; i < K(); ++i )
|
||||
hash_functions.push_back(UHF(i, Name()));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
DoubleHasher::DoubleHasher(size_t k, const std::string& name)
|
||||
: Hasher(k, name), h1(1, name), h2(2, name)
|
||||
{
|
||||
|
@ -112,3 +169,23 @@ bool DoubleHasher::Equals(const Hasher* other) const
|
|||
const DoubleHasher* o = static_cast<const DoubleHasher*>(other);
|
||||
return h1 == o->h1 && h2 == o->h2;
|
||||
}
|
||||
|
||||
IMPLEMENT_SERIAL(DoubleHasher, SER_DOUBLEHASHER)
|
||||
|
||||
bool DoubleHasher::DoSerialize(SerialInfo* info) const
|
||||
{
|
||||
DO_SERIALIZE(SER_DOUBLEHASHER, Hasher);
|
||||
|
||||
// Nothing to do here, the base class has all we need serialized already.
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DoubleHasher::DoUnserialize(UnserialInfo* info)
|
||||
{
|
||||
DO_UNSERIALIZE(Hasher);
|
||||
|
||||
h1 = UHF(1, Name());
|
||||
h2 = UHF(2, Name());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
|
||||
#include "Hash.h"
|
||||
#include "H3.h"
|
||||
#include "SerialObj.h"
|
||||
|
||||
namespace probabilistic {
|
||||
|
||||
|
@ -12,7 +13,7 @@ namespace probabilistic {
|
|||
* Abstract base class for hashers. A hasher creates a family of hash
|
||||
* functions to hash an element *k* times.
|
||||
*/
|
||||
class Hasher {
|
||||
class Hasher : public SerialObj {
|
||||
public:
|
||||
typedef hash_t digest;
|
||||
typedef std::vector<digest> digest_vector;
|
||||
|
@ -69,24 +70,18 @@ public:
|
|||
*/
|
||||
const std::string& Name() const { return name; }
|
||||
|
||||
/**
|
||||
* Constructs the hasher used by the implementation. This hardcodes a
|
||||
* specific hashing policy. It exists only because the HashingPolicy
|
||||
* class hierachy is not yet serializable.
|
||||
*
|
||||
* @param k The number of hash functions to apply.
|
||||
*
|
||||
* @param name The hasher's name.
|
||||
*
|
||||
* @return Returns a new hasher instance.
|
||||
*/
|
||||
static Hasher* Create(size_t k, const std::string& name);
|
||||
bool Serialize(SerialInfo* info) const;
|
||||
static Hasher* Unserialize(UnserialInfo* info);
|
||||
|
||||
protected:
|
||||
DECLARE_ABSTRACT_SERIAL(Hasher);
|
||||
|
||||
Hasher() { }
|
||||
|
||||
Hasher(size_t k, const std::string& name);
|
||||
|
||||
private:
|
||||
const size_t k;
|
||||
size_t k;
|
||||
std::string name;
|
||||
};
|
||||
|
||||
|
@ -106,7 +101,7 @@ public:
|
|||
* seed to compute the seed for t to compute the seed NUL-terminated
|
||||
* string as additional seed.
|
||||
*/
|
||||
UHF(size_t seed, const std::string& extra = "");
|
||||
UHF(size_t seed = 0, const std::string& extra = "");
|
||||
|
||||
template <typename T>
|
||||
Hasher::digest operator()(const T& x) const
|
||||
|
@ -175,7 +170,11 @@ public:
|
|||
virtual DefaultHasher* Clone() const /* final */;
|
||||
virtual bool Equals(const Hasher* other) const /* final */;
|
||||
|
||||
DECLARE_SERIAL(DefaultHasher);
|
||||
|
||||
private:
|
||||
DefaultHasher() { }
|
||||
|
||||
std::vector<UHF> hash_functions;
|
||||
};
|
||||
|
||||
|
@ -199,7 +198,11 @@ public:
|
|||
virtual DoubleHasher* Clone() const /* final */;
|
||||
virtual bool Equals(const Hasher* other) const /* final */;
|
||||
|
||||
DECLARE_SERIAL(DoubleHasher);
|
||||
|
||||
private:
|
||||
DoubleHasher() { }
|
||||
|
||||
UHF h1;
|
||||
UHF h2;
|
||||
};
|
||||
|
|
|
@ -40,7 +40,7 @@ function bloomfilter_basic_init%(fp: double, capacity: count,
|
|||
|
||||
size_t cells = BasicBloomFilter::M(fp, capacity);
|
||||
size_t optimal_k = BasicBloomFilter::K(cells, capacity);
|
||||
const Hasher* h = Hasher::Create(optimal_k, name->CheckString());
|
||||
const Hasher* h = new DefaultHasher(optimal_k, name->CheckString());
|
||||
|
||||
return new BloomFilterVal(new BasicBloomFilter(h, cells));
|
||||
%}
|
||||
|
@ -68,7 +68,7 @@ function bloomfilter_counting_init%(k: count, cells: count, max: count,
|
|||
return 0;
|
||||
}
|
||||
|
||||
const Hasher* h = Hasher::Create(k, name->CheckString());
|
||||
const Hasher* h = new DefaultHasher(k, name->CheckString());
|
||||
|
||||
uint16 width = 1;
|
||||
while ( max >>= 1 )
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue