From 2fc5ca53ff8f90aa959b2bc65626b319a1dee529 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Thu, 25 Jul 2013 17:35:35 +0200 Subject: [PATCH] Make hashers serializable. There exists still a small bug that I could not find; the unit test istate/opaque.bro fails. If someone sees why, please chime in. --- src/SerialTypes.h | 6 ++ src/probabilistic/BloomFilter.cc | 19 +----- src/probabilistic/BloomFilter.h | 3 - src/probabilistic/Hasher.cc | 99 ++++++++++++++++++++++++++---- src/probabilistic/Hasher.h | 33 +++++----- src/probabilistic/bloom-filter.bif | 4 +- 6 files changed, 117 insertions(+), 47 deletions(-) diff --git a/src/SerialTypes.h b/src/SerialTypes.h index 85aed10bda..9933d005f0 100644 --- a/src/SerialTypes.h +++ b/src/SerialTypes.h @@ -52,6 +52,7 @@ SERIAL_IS(RE_MATCHER, 0x1400) SERIAL_IS(BITVECTOR, 0x1500) SERIAL_IS(COUNTERVECTOR, 0x1600) SERIAL_IS(BLOOMFILTER, 0x1700) +SERIAL_IS(HASHER, 0x1800) // These are the externally visible types. const SerialType SER_NONE = 0; @@ -206,6 +207,11 @@ SERIAL_BLOOMFILTER(BLOOMFILTER, 1) SERIAL_BLOOMFILTER(BASICBLOOMFILTER, 2) SERIAL_BLOOMFILTER(COUNTINGBLOOMFILTER, 3) +#define SERIAL_HASHER(name, val) SERIAL_CONST(name, val, HASHER) +SERIAL_HASHER(HASHER, 1) +SERIAL_HASHER(DEFAULTHASHER, 2) +SERIAL_HASHER(DOUBLEHASHER, 3) + SERIAL_CONST2(ID) SERIAL_CONST2(STATE_ACCESS) SERIAL_CONST2(CASE) diff --git a/src/probabilistic/BloomFilter.cc b/src/probabilistic/BloomFilter.cc index 7f769cbf7c..d446643ed3 100644 --- a/src/probabilistic/BloomFilter.cc +++ b/src/probabilistic/BloomFilter.cc @@ -38,28 +38,15 @@ bool BloomFilter::DoSerialize(SerialInfo* info) const { DO_SERIALIZE(SER_BLOOMFILTER, SerialObj); - if ( ! SERIALIZE(static_cast(hasher->K())) ) - return false; - - return SERIALIZE_STR(hasher->Name().c_str(), hasher->Name().size()); + return hasher->Serialize(info); } bool BloomFilter::DoUnserialize(UnserialInfo* info) { DO_UNSERIALIZE(SerialObj); - uint16 k; - if ( ! UNSERIALIZE(&k) ) - return false; - - const char* name; - if ( ! UNSERIALIZE_STR(&name, 0) ) - return false; - - hasher = Hasher::Create(k, name); - - delete [] name; - return true; + hasher = Hasher::Unserialize(info); + return hasher != 0; } size_t BasicBloomFilter::M(double fp, size_t capacity) diff --git a/src/probabilistic/BloomFilter.h b/src/probabilistic/BloomFilter.h index b6cf18672f..4865ae145c 100644 --- a/src/probabilistic/BloomFilter.h +++ b/src/probabilistic/BloomFilter.h @@ -13,9 +13,6 @@ class CounterVector; /** * The abstract base class for Bloom filters. - * - * At this point we won't let the user choose the hasher, but we might open - * up the interface in the future. */ class BloomFilter : public SerialObj { public: diff --git a/src/probabilistic/Hasher.cc b/src/probabilistic/Hasher.cc index f9ce7bdd6b..7db363142d 100644 --- a/src/probabilistic/Hasher.cc +++ b/src/probabilistic/Hasher.cc @@ -4,9 +4,56 @@ #include "Hasher.h" #include "digest.h" +#include "Serializer.h" using namespace probabilistic; +bool Hasher::Serialize(SerialInfo* info) const + { + return SerialObj::Serialize(info); + } + +Hasher* Hasher::Unserialize(UnserialInfo* info) + { + return reinterpret_cast(SerialObj::Unserialize(info, SER_HASHER)); + } + +bool Hasher::DoSerialize(SerialInfo* info) const + { + DO_SERIALIZE(SER_HASHER, SerialObj); + + if ( ! SERIALIZE(static_cast(k)) ) + return false; + + return SERIALIZE_STR(name.c_str(), name.size()); + } + +bool Hasher::DoUnserialize(UnserialInfo* info) + { + DO_UNSERIALIZE(SerialObj); + + uint16 serial_k; + if ( ! UNSERIALIZE(&serial_k) ) + return false; + k = serial_k; + assert(k > 0); + + const char* serial_name; + if ( ! UNSERIALIZE_STR(&serial_name, 0) ) + return false; + name = serial_name; + delete [] serial_name; + + return true; + } + +Hasher::Hasher(size_t k, const std::string& arg_name) + : k(k) + { + name = arg_name; + } + + UHF::UHF(size_t seed, const std::string& extra) : h(compute_seed(seed, extra)) { @@ -40,17 +87,6 @@ size_t UHF::compute_seed(size_t seed, const std::string& extra) return *reinterpret_cast(buf); } -Hasher* Hasher::Create(size_t k, const std::string& name) - { - return new DefaultHasher(k, name); - } - -Hasher::Hasher(size_t k, const std::string& arg_name) - : k(k) - { - name = arg_name; - } - DefaultHasher::DefaultHasher(size_t k, const std::string& name) : Hasher(k, name) { @@ -82,6 +118,27 @@ bool DefaultHasher::Equals(const Hasher* other) const return hash_functions == o->hash_functions; } +IMPLEMENT_SERIAL(DefaultHasher, SER_DEFAULTHASHER) + +bool DefaultHasher::DoSerialize(SerialInfo* info) const + { + DO_SERIALIZE(SER_DEFAULTHASHER, Hasher); + + // Nothing to do here, the base class has all we need serialized already. + return true; + } + +bool DefaultHasher::DoUnserialize(UnserialInfo* info) + { + DO_UNSERIALIZE(Hasher); + + hash_functions.clear(); + for ( size_t i = 0; i < K(); ++i ) + hash_functions.push_back(UHF(i, Name())); + + return true; + } + DoubleHasher::DoubleHasher(size_t k, const std::string& name) : Hasher(k, name), h1(1, name), h2(2, name) { @@ -112,3 +169,23 @@ bool DoubleHasher::Equals(const Hasher* other) const const DoubleHasher* o = static_cast(other); return h1 == o->h1 && h2 == o->h2; } + +IMPLEMENT_SERIAL(DoubleHasher, SER_DOUBLEHASHER) + +bool DoubleHasher::DoSerialize(SerialInfo* info) const + { + DO_SERIALIZE(SER_DOUBLEHASHER, Hasher); + + // Nothing to do here, the base class has all we need serialized already. + return true; + } + +bool DoubleHasher::DoUnserialize(UnserialInfo* info) + { + DO_UNSERIALIZE(Hasher); + + h1 = UHF(1, Name()); + h2 = UHF(2, Name()); + + return true; + } diff --git a/src/probabilistic/Hasher.h b/src/probabilistic/Hasher.h index d266565284..7e6a8ba134 100644 --- a/src/probabilistic/Hasher.h +++ b/src/probabilistic/Hasher.h @@ -5,6 +5,7 @@ #include "Hash.h" #include "H3.h" +#include "SerialObj.h" namespace probabilistic { @@ -12,7 +13,7 @@ namespace probabilistic { * Abstract base class for hashers. A hasher creates a family of hash * functions to hash an element *k* times. */ -class Hasher { +class Hasher : public SerialObj { public: typedef hash_t digest; typedef std::vector digest_vector; @@ -69,24 +70,18 @@ public: */ const std::string& Name() const { return name; } - /** - * Constructs the hasher used by the implementation. This hardcodes a - * specific hashing policy. It exists only because the HashingPolicy - * class hierachy is not yet serializable. - * - * @param k The number of hash functions to apply. - * - * @param name The hasher's name. - * - * @return Returns a new hasher instance. - */ - static Hasher* Create(size_t k, const std::string& name); + bool Serialize(SerialInfo* info) const; + static Hasher* Unserialize(UnserialInfo* info); protected: + DECLARE_ABSTRACT_SERIAL(Hasher); + + Hasher() { } + Hasher(size_t k, const std::string& name); private: - const size_t k; + size_t k; std::string name; }; @@ -106,7 +101,7 @@ public: * seed to compute the seed for t to compute the seed NUL-terminated * string as additional seed. */ - UHF(size_t seed, const std::string& extra = ""); + UHF(size_t seed = 0, const std::string& extra = ""); template Hasher::digest operator()(const T& x) const @@ -175,7 +170,11 @@ public: virtual DefaultHasher* Clone() const /* final */; virtual bool Equals(const Hasher* other) const /* final */; + DECLARE_SERIAL(DefaultHasher); + private: + DefaultHasher() { } + std::vector hash_functions; }; @@ -199,7 +198,11 @@ public: virtual DoubleHasher* Clone() const /* final */; virtual bool Equals(const Hasher* other) const /* final */; + DECLARE_SERIAL(DoubleHasher); + private: + DoubleHasher() { } + UHF h1; UHF h2; }; diff --git a/src/probabilistic/bloom-filter.bif b/src/probabilistic/bloom-filter.bif index dd21688fdd..f03e3d149b 100644 --- a/src/probabilistic/bloom-filter.bif +++ b/src/probabilistic/bloom-filter.bif @@ -40,7 +40,7 @@ function bloomfilter_basic_init%(fp: double, capacity: count, size_t cells = BasicBloomFilter::M(fp, capacity); size_t optimal_k = BasicBloomFilter::K(cells, capacity); - const Hasher* h = Hasher::Create(optimal_k, name->CheckString()); + const Hasher* h = new DefaultHasher(optimal_k, name->CheckString()); return new BloomFilterVal(new BasicBloomFilter(h, cells)); %} @@ -68,7 +68,7 @@ function bloomfilter_counting_init%(k: count, cells: count, max: count, return 0; } - const Hasher* h = Hasher::Create(k, name->CheckString()); + const Hasher* h = new DefaultHasher(k, name->CheckString()); uint16 width = 1; while ( max >>= 1 )