Make hashers serializable.

There exists still a small bug that I could not find; the unit test
istate/opaque.bro fails. If someone sees why, please chime in.
This commit is contained in:
Matthias Vallentin 2013-07-25 17:35:35 +02:00
parent e482897f88
commit 2fc5ca53ff
6 changed files with 117 additions and 47 deletions

View file

@ -52,6 +52,7 @@ SERIAL_IS(RE_MATCHER, 0x1400)
SERIAL_IS(BITVECTOR, 0x1500) SERIAL_IS(BITVECTOR, 0x1500)
SERIAL_IS(COUNTERVECTOR, 0x1600) SERIAL_IS(COUNTERVECTOR, 0x1600)
SERIAL_IS(BLOOMFILTER, 0x1700) SERIAL_IS(BLOOMFILTER, 0x1700)
SERIAL_IS(HASHER, 0x1800)
// These are the externally visible types. // These are the externally visible types.
const SerialType SER_NONE = 0; const SerialType SER_NONE = 0;
@ -206,6 +207,11 @@ SERIAL_BLOOMFILTER(BLOOMFILTER, 1)
SERIAL_BLOOMFILTER(BASICBLOOMFILTER, 2) SERIAL_BLOOMFILTER(BASICBLOOMFILTER, 2)
SERIAL_BLOOMFILTER(COUNTINGBLOOMFILTER, 3) SERIAL_BLOOMFILTER(COUNTINGBLOOMFILTER, 3)
#define SERIAL_HASHER(name, val) SERIAL_CONST(name, val, HASHER)
SERIAL_HASHER(HASHER, 1)
SERIAL_HASHER(DEFAULTHASHER, 2)
SERIAL_HASHER(DOUBLEHASHER, 3)
SERIAL_CONST2(ID) SERIAL_CONST2(ID)
SERIAL_CONST2(STATE_ACCESS) SERIAL_CONST2(STATE_ACCESS)
SERIAL_CONST2(CASE) SERIAL_CONST2(CASE)

View file

@ -38,28 +38,15 @@ bool BloomFilter::DoSerialize(SerialInfo* info) const
{ {
DO_SERIALIZE(SER_BLOOMFILTER, SerialObj); DO_SERIALIZE(SER_BLOOMFILTER, SerialObj);
if ( ! SERIALIZE(static_cast<uint16>(hasher->K())) ) return hasher->Serialize(info);
return false;
return SERIALIZE_STR(hasher->Name().c_str(), hasher->Name().size());
} }
bool BloomFilter::DoUnserialize(UnserialInfo* info) bool BloomFilter::DoUnserialize(UnserialInfo* info)
{ {
DO_UNSERIALIZE(SerialObj); DO_UNSERIALIZE(SerialObj);
uint16 k; hasher = Hasher::Unserialize(info);
if ( ! UNSERIALIZE(&k) ) return hasher != 0;
return false;
const char* name;
if ( ! UNSERIALIZE_STR(&name, 0) )
return false;
hasher = Hasher::Create(k, name);
delete [] name;
return true;
} }
size_t BasicBloomFilter::M(double fp, size_t capacity) size_t BasicBloomFilter::M(double fp, size_t capacity)

View file

@ -13,9 +13,6 @@ class CounterVector;
/** /**
* The abstract base class for Bloom filters. * The abstract base class for Bloom filters.
*
* At this point we won't let the user choose the hasher, but we might open
* up the interface in the future.
*/ */
class BloomFilter : public SerialObj { class BloomFilter : public SerialObj {
public: public:

View file

@ -4,9 +4,56 @@
#include "Hasher.h" #include "Hasher.h"
#include "digest.h" #include "digest.h"
#include "Serializer.h"
using namespace probabilistic; using namespace probabilistic;
bool Hasher::Serialize(SerialInfo* info) const
{
return SerialObj::Serialize(info);
}
Hasher* Hasher::Unserialize(UnserialInfo* info)
{
return reinterpret_cast<Hasher*>(SerialObj::Unserialize(info, SER_HASHER));
}
bool Hasher::DoSerialize(SerialInfo* info) const
{
DO_SERIALIZE(SER_HASHER, SerialObj);
if ( ! SERIALIZE(static_cast<uint16>(k)) )
return false;
return SERIALIZE_STR(name.c_str(), name.size());
}
bool Hasher::DoUnserialize(UnserialInfo* info)
{
DO_UNSERIALIZE(SerialObj);
uint16 serial_k;
if ( ! UNSERIALIZE(&serial_k) )
return false;
k = serial_k;
assert(k > 0);
const char* serial_name;
if ( ! UNSERIALIZE_STR(&serial_name, 0) )
return false;
name = serial_name;
delete [] serial_name;
return true;
}
Hasher::Hasher(size_t k, const std::string& arg_name)
: k(k)
{
name = arg_name;
}
UHF::UHF(size_t seed, const std::string& extra) UHF::UHF(size_t seed, const std::string& extra)
: h(compute_seed(seed, extra)) : h(compute_seed(seed, extra))
{ {
@ -40,17 +87,6 @@ size_t UHF::compute_seed(size_t seed, const std::string& extra)
return *reinterpret_cast<size_t*>(buf); return *reinterpret_cast<size_t*>(buf);
} }
Hasher* Hasher::Create(size_t k, const std::string& name)
{
return new DefaultHasher(k, name);
}
Hasher::Hasher(size_t k, const std::string& arg_name)
: k(k)
{
name = arg_name;
}
DefaultHasher::DefaultHasher(size_t k, const std::string& name) DefaultHasher::DefaultHasher(size_t k, const std::string& name)
: Hasher(k, name) : Hasher(k, name)
{ {
@ -82,6 +118,27 @@ bool DefaultHasher::Equals(const Hasher* other) const
return hash_functions == o->hash_functions; return hash_functions == o->hash_functions;
} }
IMPLEMENT_SERIAL(DefaultHasher, SER_DEFAULTHASHER)
bool DefaultHasher::DoSerialize(SerialInfo* info) const
{
DO_SERIALIZE(SER_DEFAULTHASHER, Hasher);
// Nothing to do here, the base class has all we need serialized already.
return true;
}
bool DefaultHasher::DoUnserialize(UnserialInfo* info)
{
DO_UNSERIALIZE(Hasher);
hash_functions.clear();
for ( size_t i = 0; i < K(); ++i )
hash_functions.push_back(UHF(i, Name()));
return true;
}
DoubleHasher::DoubleHasher(size_t k, const std::string& name) DoubleHasher::DoubleHasher(size_t k, const std::string& name)
: Hasher(k, name), h1(1, name), h2(2, name) : Hasher(k, name), h1(1, name), h2(2, name)
{ {
@ -112,3 +169,23 @@ bool DoubleHasher::Equals(const Hasher* other) const
const DoubleHasher* o = static_cast<const DoubleHasher*>(other); const DoubleHasher* o = static_cast<const DoubleHasher*>(other);
return h1 == o->h1 && h2 == o->h2; return h1 == o->h1 && h2 == o->h2;
} }
IMPLEMENT_SERIAL(DoubleHasher, SER_DOUBLEHASHER)
bool DoubleHasher::DoSerialize(SerialInfo* info) const
{
DO_SERIALIZE(SER_DOUBLEHASHER, Hasher);
// Nothing to do here, the base class has all we need serialized already.
return true;
}
bool DoubleHasher::DoUnserialize(UnserialInfo* info)
{
DO_UNSERIALIZE(Hasher);
h1 = UHF(1, Name());
h2 = UHF(2, Name());
return true;
}

View file

@ -5,6 +5,7 @@
#include "Hash.h" #include "Hash.h"
#include "H3.h" #include "H3.h"
#include "SerialObj.h"
namespace probabilistic { namespace probabilistic {
@ -12,7 +13,7 @@ namespace probabilistic {
* Abstract base class for hashers. A hasher creates a family of hash * Abstract base class for hashers. A hasher creates a family of hash
* functions to hash an element *k* times. * functions to hash an element *k* times.
*/ */
class Hasher { class Hasher : public SerialObj {
public: public:
typedef hash_t digest; typedef hash_t digest;
typedef std::vector<digest> digest_vector; typedef std::vector<digest> digest_vector;
@ -69,24 +70,18 @@ public:
*/ */
const std::string& Name() const { return name; } const std::string& Name() const { return name; }
/** bool Serialize(SerialInfo* info) const;
* Constructs the hasher used by the implementation. This hardcodes a static Hasher* Unserialize(UnserialInfo* info);
* specific hashing policy. It exists only because the HashingPolicy
* class hierachy is not yet serializable.
*
* @param k The number of hash functions to apply.
*
* @param name The hasher's name.
*
* @return Returns a new hasher instance.
*/
static Hasher* Create(size_t k, const std::string& name);
protected: protected:
DECLARE_ABSTRACT_SERIAL(Hasher);
Hasher() { }
Hasher(size_t k, const std::string& name); Hasher(size_t k, const std::string& name);
private: private:
const size_t k; size_t k;
std::string name; std::string name;
}; };
@ -106,7 +101,7 @@ public:
* seed to compute the seed for t to compute the seed NUL-terminated * seed to compute the seed for t to compute the seed NUL-terminated
* string as additional seed. * string as additional seed.
*/ */
UHF(size_t seed, const std::string& extra = ""); UHF(size_t seed = 0, const std::string& extra = "");
template <typename T> template <typename T>
Hasher::digest operator()(const T& x) const Hasher::digest operator()(const T& x) const
@ -175,7 +170,11 @@ public:
virtual DefaultHasher* Clone() const /* final */; virtual DefaultHasher* Clone() const /* final */;
virtual bool Equals(const Hasher* other) const /* final */; virtual bool Equals(const Hasher* other) const /* final */;
DECLARE_SERIAL(DefaultHasher);
private: private:
DefaultHasher() { }
std::vector<UHF> hash_functions; std::vector<UHF> hash_functions;
}; };
@ -199,7 +198,11 @@ public:
virtual DoubleHasher* Clone() const /* final */; virtual DoubleHasher* Clone() const /* final */;
virtual bool Equals(const Hasher* other) const /* final */; virtual bool Equals(const Hasher* other) const /* final */;
DECLARE_SERIAL(DoubleHasher);
private: private:
DoubleHasher() { }
UHF h1; UHF h1;
UHF h2; UHF h2;
}; };

View file

@ -40,7 +40,7 @@ function bloomfilter_basic_init%(fp: double, capacity: count,
size_t cells = BasicBloomFilter::M(fp, capacity); size_t cells = BasicBloomFilter::M(fp, capacity);
size_t optimal_k = BasicBloomFilter::K(cells, capacity); size_t optimal_k = BasicBloomFilter::K(cells, capacity);
const Hasher* h = Hasher::Create(optimal_k, name->CheckString()); const Hasher* h = new DefaultHasher(optimal_k, name->CheckString());
return new BloomFilterVal(new BasicBloomFilter(h, cells)); return new BloomFilterVal(new BasicBloomFilter(h, cells));
%} %}
@ -68,7 +68,7 @@ function bloomfilter_counting_init%(k: count, cells: count, max: count,
return 0; return 0;
} }
const Hasher* h = Hasher::Create(k, name->CheckString()); const Hasher* h = new DefaultHasher(k, name->CheckString());
uint16 width = 1; uint16 width = 1;
while ( max >>= 1 ) while ( max >>= 1 )