diff --git a/CHANGES b/CHANGES index 3d30b3d195..693dc36dac 100644 --- a/CHANGES +++ b/CHANGES @@ -1,4 +1,11 @@ +2.1-945 | 2013-07-30 10:05:10 -0700 + + * Make hashers serializable. (Matthias Vallentin) + + * Add docs and use default value for hasher names. (Matthias + Vallentin) + 2.1-939 | 2013-07-29 15:42:38 -0700 * Added Exec, Dir, and ActiveHTTP modules. (Seth Hall) diff --git a/VERSION b/VERSION index 4ebbf81a9f..6b605113b8 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.1-939 +2.1-945 diff --git a/magic b/magic deleted file mode 160000 index e87fe13a7b..0000000000 --- a/magic +++ /dev/null @@ -1 +0,0 @@ -Subproject commit e87fe13a7b776182ffc8c75076d42702f5c28fed diff --git a/src/SerialTypes.h b/src/SerialTypes.h index 85aed10bda..9933d005f0 100644 --- a/src/SerialTypes.h +++ b/src/SerialTypes.h @@ -52,6 +52,7 @@ SERIAL_IS(RE_MATCHER, 0x1400) SERIAL_IS(BITVECTOR, 0x1500) SERIAL_IS(COUNTERVECTOR, 0x1600) SERIAL_IS(BLOOMFILTER, 0x1700) +SERIAL_IS(HASHER, 0x1800) // These are the externally visible types. const SerialType SER_NONE = 0; @@ -206,6 +207,11 @@ SERIAL_BLOOMFILTER(BLOOMFILTER, 1) SERIAL_BLOOMFILTER(BASICBLOOMFILTER, 2) SERIAL_BLOOMFILTER(COUNTINGBLOOMFILTER, 3) +#define SERIAL_HASHER(name, val) SERIAL_CONST(name, val, HASHER) +SERIAL_HASHER(HASHER, 1) +SERIAL_HASHER(DEFAULTHASHER, 2) +SERIAL_HASHER(DOUBLEHASHER, 3) + SERIAL_CONST2(ID) SERIAL_CONST2(STATE_ACCESS) SERIAL_CONST2(CASE) diff --git a/src/probabilistic/BloomFilter.cc b/src/probabilistic/BloomFilter.cc index db768ed934..23b812269c 100644 --- a/src/probabilistic/BloomFilter.cc +++ b/src/probabilistic/BloomFilter.cc @@ -40,28 +40,15 @@ bool BloomFilter::DoSerialize(SerialInfo* info) const { DO_SERIALIZE(SER_BLOOMFILTER, SerialObj); - if ( ! SERIALIZE(static_cast(hasher->K())) ) - return false; - - return SERIALIZE_STR(hasher->Name().c_str(), hasher->Name().size()); + return hasher->Serialize(info); } bool BloomFilter::DoUnserialize(UnserialInfo* info) { DO_UNSERIALIZE(SerialObj); - uint16 k; - if ( ! UNSERIALIZE(&k) ) - return false; - - const char* name; - if ( ! UNSERIALIZE_STR(&name, 0) ) - return false; - - hasher = Hasher::Create(k, name); - - delete [] name; - return true; + hasher = Hasher::Unserialize(info); + return hasher != 0; } size_t BasicBloomFilter::M(double fp, size_t capacity) diff --git a/src/probabilistic/BloomFilter.h b/src/probabilistic/BloomFilter.h index b6cf18672f..4865ae145c 100644 --- a/src/probabilistic/BloomFilter.h +++ b/src/probabilistic/BloomFilter.h @@ -13,9 +13,6 @@ class CounterVector; /** * The abstract base class for Bloom filters. - * - * At this point we won't let the user choose the hasher, but we might open - * up the interface in the future. */ class BloomFilter : public SerialObj { public: diff --git a/src/probabilistic/Hasher.cc b/src/probabilistic/Hasher.cc index f9ce7bdd6b..17597b9a82 100644 --- a/src/probabilistic/Hasher.cc +++ b/src/probabilistic/Hasher.cc @@ -4,9 +4,59 @@ #include "Hasher.h" #include "digest.h" +#include "Serializer.h" using namespace probabilistic; +bool Hasher::Serialize(SerialInfo* info) const + { + return SerialObj::Serialize(info); + } + +Hasher* Hasher::Unserialize(UnserialInfo* info) + { + return reinterpret_cast(SerialObj::Unserialize(info, SER_HASHER)); + } + +bool Hasher::DoSerialize(SerialInfo* info) const + { + DO_SERIALIZE(SER_HASHER, SerialObj); + + if ( ! SERIALIZE(static_cast(k)) ) + return false; + + return SERIALIZE_STR(name.c_str(), name.size()); + } + +bool Hasher::DoUnserialize(UnserialInfo* info) + { + DO_UNSERIALIZE(SerialObj); + + uint16 serial_k; + if ( ! UNSERIALIZE(&serial_k) ) + return false; + + k = serial_k; + assert(k > 0); + + const char* serial_name; + if ( ! UNSERIALIZE_STR(&serial_name, 0) ) + return false; + + name = serial_name; + delete [] serial_name; + + return true; + } + +Hasher::Hasher(size_t k, const std::string& arg_name) + : k(k) + { + k = k; + name = arg_name; + } + + UHF::UHF(size_t seed, const std::string& extra) : h(compute_seed(seed, extra)) { @@ -40,17 +90,6 @@ size_t UHF::compute_seed(size_t seed, const std::string& extra) return *reinterpret_cast(buf); } -Hasher* Hasher::Create(size_t k, const std::string& name) - { - return new DefaultHasher(k, name); - } - -Hasher::Hasher(size_t k, const std::string& arg_name) - : k(k) - { - name = arg_name; - } - DefaultHasher::DefaultHasher(size_t k, const std::string& name) : Hasher(k, name) { @@ -82,6 +121,27 @@ bool DefaultHasher::Equals(const Hasher* other) const return hash_functions == o->hash_functions; } +IMPLEMENT_SERIAL(DefaultHasher, SER_DEFAULTHASHER) + +bool DefaultHasher::DoSerialize(SerialInfo* info) const + { + DO_SERIALIZE(SER_DEFAULTHASHER, Hasher); + + // Nothing to do here, the base class has all we need serialized already. + return true; + } + +bool DefaultHasher::DoUnserialize(UnserialInfo* info) + { + DO_UNSERIALIZE(Hasher); + + hash_functions.clear(); + for ( size_t i = 0; i < K(); ++i ) + hash_functions.push_back(UHF(i, Name())); + + return true; + } + DoubleHasher::DoubleHasher(size_t k, const std::string& name) : Hasher(k, name), h1(1, name), h2(2, name) { @@ -112,3 +172,23 @@ bool DoubleHasher::Equals(const Hasher* other) const const DoubleHasher* o = static_cast(other); return h1 == o->h1 && h2 == o->h2; } + +IMPLEMENT_SERIAL(DoubleHasher, SER_DOUBLEHASHER) + +bool DoubleHasher::DoSerialize(SerialInfo* info) const + { + DO_SERIALIZE(SER_DOUBLEHASHER, Hasher); + + // Nothing to do here, the base class has all we need serialized already. + return true; + } + +bool DoubleHasher::DoUnserialize(UnserialInfo* info) + { + DO_UNSERIALIZE(Hasher); + + h1 = UHF(1, Name()); + h2 = UHF(2, Name()); + + return true; + } diff --git a/src/probabilistic/Hasher.h b/src/probabilistic/Hasher.h index 9f7d4ae32d..3acd5c5867 100644 --- a/src/probabilistic/Hasher.h +++ b/src/probabilistic/Hasher.h @@ -5,6 +5,7 @@ #include "Hash.h" #include "H3.h" +#include "SerialObj.h" namespace probabilistic { @@ -12,7 +13,7 @@ namespace probabilistic { * Abstract base class for hashers. A hasher creates a family of hash * functions to hash an element *k* times. */ -class Hasher { +class Hasher : public SerialObj { public: typedef hash_t digest; typedef std::vector digest_vector; @@ -63,25 +64,20 @@ public: size_t K() const { return k; } /** - * Returns the hasher's name. TODO: What's this? + * Returns the hasher's name. If not empty, the hasher uses this descriptor + * to seed its *k* hash functions. Otherwise the hasher mixes in the initial + * seed derived from the environment variable `$BRO_SEED`. */ const std::string& Name() const { return name; } - /** - * Constructs the hasher used by the implementation. This hardcodes a - * specific hashing policy. It exists only because the HashingPolicy - * class hierachy is not yet serializable. - * - * @param k The number of hash functions to apply. - * - * @param name The hasher's name. Hashers with the same name should - * provide consistent results. - * - * @return Returns a new hasher instance. - */ - static Hasher* Create(size_t k, const std::string& name); + bool Serialize(SerialInfo* info) const; + static Hasher* Unserialize(UnserialInfo* info); protected: + DECLARE_ABSTRACT_SERIAL(Hasher); + + Hasher() { } + /** * Constructor. * @@ -93,7 +89,7 @@ protected: Hasher(size_t k, const std::string& name); private: - const size_t k; + size_t k; std::string name; }; @@ -113,7 +109,7 @@ public: * seed to compute the seed for t to compute the seed NUL-terminated * string as additional seed. */ - UHF(size_t seed, const std::string& extra = ""); + UHF(size_t seed = 0, const std::string& extra = ""); template Hasher::digest operator()(const T& x) const @@ -175,14 +171,18 @@ public: * * @param name The name of the hasher. */ - DefaultHasher(size_t k, const std::string& name); + DefaultHasher(size_t k, const std::string& name = ""); // Overridden from Hasher. virtual digest_vector Hash(const void* x, size_t n) const /* final */; virtual DefaultHasher* Clone() const /* final */; virtual bool Equals(const Hasher* other) const /* final */; + DECLARE_SERIAL(DefaultHasher); + private: + DefaultHasher() { } + std::vector hash_functions; }; @@ -199,14 +199,18 @@ public: * * @param name The name of the hasher. */ - DoubleHasher(size_t k, const std::string& name); + DoubleHasher(size_t k, const std::string& name = ""); // Overridden from Hasher. virtual digest_vector Hash(const void* x, size_t n) const /* final */; virtual DoubleHasher* Clone() const /* final */; virtual bool Equals(const Hasher* other) const /* final */; + DECLARE_SERIAL(DoubleHasher); + private: + DoubleHasher() { } + UHF h1; UHF h2; }; diff --git a/src/probabilistic/bloom-filter.bif b/src/probabilistic/bloom-filter.bif index c6760f6adf..a3567ad6f7 100644 --- a/src/probabilistic/bloom-filter.bif +++ b/src/probabilistic/bloom-filter.bif @@ -48,7 +48,7 @@ function bloomfilter_basic_init%(fp: double, capacity: count, size_t cells = BasicBloomFilter::M(fp, capacity); size_t optimal_k = BasicBloomFilter::K(cells, capacity); - const Hasher* h = Hasher::Create(optimal_k, name->CheckString()); + const Hasher* h = new DefaultHasher(optimal_k, name->CheckString()); return new BloomFilterVal(new BasicBloomFilter(h, cells)); %} @@ -86,7 +86,7 @@ function bloomfilter_counting_init%(k: count, cells: count, max: count, return 0; } - const Hasher* h = Hasher::Create(k, name->CheckString()); + const Hasher* h = new DefaultHasher(k, name->CheckString()); uint16 width = 1; while ( max >>= 1 )