From e482897f885e2f1039b96782d5e4bc080d74a535 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Thu, 25 Jul 2013 15:16:53 +0200 Subject: [PATCH 1/3] Add docs and use default value for hasher names. --- src/probabilistic/Hasher.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/probabilistic/Hasher.h b/src/probabilistic/Hasher.h index 62c5d58d1f..d266565284 100644 --- a/src/probabilistic/Hasher.h +++ b/src/probabilistic/Hasher.h @@ -63,7 +63,9 @@ public: size_t K() const { return k; } /** - * Returns the hasher's name. TODO: What's this? + * Returns the hasher's name. If not empty, the hasher uses this descriptor + * to seed its *k* hash functions. Otherwise the hasher mixes in the initial + * seed derived from the environment variable `$BRO_SEED`. */ const std::string& Name() const { return name; } @@ -83,7 +85,7 @@ public: protected: Hasher(size_t k, const std::string& name); - private: +private: const size_t k; std::string name; }; @@ -166,7 +168,7 @@ public: * * @param name The name of the hasher. */ - DefaultHasher(size_t k, const std::string& name); + DefaultHasher(size_t k, const std::string& name = ""); // Overridden from Hasher. virtual digest_vector Hash(const void* x, size_t n) const /* final */; @@ -190,7 +192,7 @@ public: * * @param name The name of the hasher. */ - DoubleHasher(size_t k, const std::string& name); + DoubleHasher(size_t k, const std::string& name = ""); // Overridden from Hasher. virtual digest_vector Hash(const void* x, size_t n) const /* final */; From 2fc5ca53ff8f90aa959b2bc65626b319a1dee529 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Thu, 25 Jul 2013 17:35:35 +0200 Subject: [PATCH 2/3] Make hashers serializable. There exists still a small bug that I could not find; the unit test istate/opaque.bro fails. If someone sees why, please chime in. --- src/SerialTypes.h | 6 ++ src/probabilistic/BloomFilter.cc | 19 +----- src/probabilistic/BloomFilter.h | 3 - src/probabilistic/Hasher.cc | 99 ++++++++++++++++++++++++++---- src/probabilistic/Hasher.h | 33 +++++----- src/probabilistic/bloom-filter.bif | 4 +- 6 files changed, 117 insertions(+), 47 deletions(-) diff --git a/src/SerialTypes.h b/src/SerialTypes.h index 85aed10bda..9933d005f0 100644 --- a/src/SerialTypes.h +++ b/src/SerialTypes.h @@ -52,6 +52,7 @@ SERIAL_IS(RE_MATCHER, 0x1400) SERIAL_IS(BITVECTOR, 0x1500) SERIAL_IS(COUNTERVECTOR, 0x1600) SERIAL_IS(BLOOMFILTER, 0x1700) +SERIAL_IS(HASHER, 0x1800) // These are the externally visible types. const SerialType SER_NONE = 0; @@ -206,6 +207,11 @@ SERIAL_BLOOMFILTER(BLOOMFILTER, 1) SERIAL_BLOOMFILTER(BASICBLOOMFILTER, 2) SERIAL_BLOOMFILTER(COUNTINGBLOOMFILTER, 3) +#define SERIAL_HASHER(name, val) SERIAL_CONST(name, val, HASHER) +SERIAL_HASHER(HASHER, 1) +SERIAL_HASHER(DEFAULTHASHER, 2) +SERIAL_HASHER(DOUBLEHASHER, 3) + SERIAL_CONST2(ID) SERIAL_CONST2(STATE_ACCESS) SERIAL_CONST2(CASE) diff --git a/src/probabilistic/BloomFilter.cc b/src/probabilistic/BloomFilter.cc index 7f769cbf7c..d446643ed3 100644 --- a/src/probabilistic/BloomFilter.cc +++ b/src/probabilistic/BloomFilter.cc @@ -38,28 +38,15 @@ bool BloomFilter::DoSerialize(SerialInfo* info) const { DO_SERIALIZE(SER_BLOOMFILTER, SerialObj); - if ( ! SERIALIZE(static_cast(hasher->K())) ) - return false; - - return SERIALIZE_STR(hasher->Name().c_str(), hasher->Name().size()); + return hasher->Serialize(info); } bool BloomFilter::DoUnserialize(UnserialInfo* info) { DO_UNSERIALIZE(SerialObj); - uint16 k; - if ( ! UNSERIALIZE(&k) ) - return false; - - const char* name; - if ( ! UNSERIALIZE_STR(&name, 0) ) - return false; - - hasher = Hasher::Create(k, name); - - delete [] name; - return true; + hasher = Hasher::Unserialize(info); + return hasher != 0; } size_t BasicBloomFilter::M(double fp, size_t capacity) diff --git a/src/probabilistic/BloomFilter.h b/src/probabilistic/BloomFilter.h index b6cf18672f..4865ae145c 100644 --- a/src/probabilistic/BloomFilter.h +++ b/src/probabilistic/BloomFilter.h @@ -13,9 +13,6 @@ class CounterVector; /** * The abstract base class for Bloom filters. - * - * At this point we won't let the user choose the hasher, but we might open - * up the interface in the future. */ class BloomFilter : public SerialObj { public: diff --git a/src/probabilistic/Hasher.cc b/src/probabilistic/Hasher.cc index f9ce7bdd6b..7db363142d 100644 --- a/src/probabilistic/Hasher.cc +++ b/src/probabilistic/Hasher.cc @@ -4,9 +4,56 @@ #include "Hasher.h" #include "digest.h" +#include "Serializer.h" using namespace probabilistic; +bool Hasher::Serialize(SerialInfo* info) const + { + return SerialObj::Serialize(info); + } + +Hasher* Hasher::Unserialize(UnserialInfo* info) + { + return reinterpret_cast(SerialObj::Unserialize(info, SER_HASHER)); + } + +bool Hasher::DoSerialize(SerialInfo* info) const + { + DO_SERIALIZE(SER_HASHER, SerialObj); + + if ( ! SERIALIZE(static_cast(k)) ) + return false; + + return SERIALIZE_STR(name.c_str(), name.size()); + } + +bool Hasher::DoUnserialize(UnserialInfo* info) + { + DO_UNSERIALIZE(SerialObj); + + uint16 serial_k; + if ( ! UNSERIALIZE(&serial_k) ) + return false; + k = serial_k; + assert(k > 0); + + const char* serial_name; + if ( ! UNSERIALIZE_STR(&serial_name, 0) ) + return false; + name = serial_name; + delete [] serial_name; + + return true; + } + +Hasher::Hasher(size_t k, const std::string& arg_name) + : k(k) + { + name = arg_name; + } + + UHF::UHF(size_t seed, const std::string& extra) : h(compute_seed(seed, extra)) { @@ -40,17 +87,6 @@ size_t UHF::compute_seed(size_t seed, const std::string& extra) return *reinterpret_cast(buf); } -Hasher* Hasher::Create(size_t k, const std::string& name) - { - return new DefaultHasher(k, name); - } - -Hasher::Hasher(size_t k, const std::string& arg_name) - : k(k) - { - name = arg_name; - } - DefaultHasher::DefaultHasher(size_t k, const std::string& name) : Hasher(k, name) { @@ -82,6 +118,27 @@ bool DefaultHasher::Equals(const Hasher* other) const return hash_functions == o->hash_functions; } +IMPLEMENT_SERIAL(DefaultHasher, SER_DEFAULTHASHER) + +bool DefaultHasher::DoSerialize(SerialInfo* info) const + { + DO_SERIALIZE(SER_DEFAULTHASHER, Hasher); + + // Nothing to do here, the base class has all we need serialized already. + return true; + } + +bool DefaultHasher::DoUnserialize(UnserialInfo* info) + { + DO_UNSERIALIZE(Hasher); + + hash_functions.clear(); + for ( size_t i = 0; i < K(); ++i ) + hash_functions.push_back(UHF(i, Name())); + + return true; + } + DoubleHasher::DoubleHasher(size_t k, const std::string& name) : Hasher(k, name), h1(1, name), h2(2, name) { @@ -112,3 +169,23 @@ bool DoubleHasher::Equals(const Hasher* other) const const DoubleHasher* o = static_cast(other); return h1 == o->h1 && h2 == o->h2; } + +IMPLEMENT_SERIAL(DoubleHasher, SER_DOUBLEHASHER) + +bool DoubleHasher::DoSerialize(SerialInfo* info) const + { + DO_SERIALIZE(SER_DOUBLEHASHER, Hasher); + + // Nothing to do here, the base class has all we need serialized already. + return true; + } + +bool DoubleHasher::DoUnserialize(UnserialInfo* info) + { + DO_UNSERIALIZE(Hasher); + + h1 = UHF(1, Name()); + h2 = UHF(2, Name()); + + return true; + } diff --git a/src/probabilistic/Hasher.h b/src/probabilistic/Hasher.h index d266565284..7e6a8ba134 100644 --- a/src/probabilistic/Hasher.h +++ b/src/probabilistic/Hasher.h @@ -5,6 +5,7 @@ #include "Hash.h" #include "H3.h" +#include "SerialObj.h" namespace probabilistic { @@ -12,7 +13,7 @@ namespace probabilistic { * Abstract base class for hashers. A hasher creates a family of hash * functions to hash an element *k* times. */ -class Hasher { +class Hasher : public SerialObj { public: typedef hash_t digest; typedef std::vector digest_vector; @@ -69,24 +70,18 @@ public: */ const std::string& Name() const { return name; } - /** - * Constructs the hasher used by the implementation. This hardcodes a - * specific hashing policy. It exists only because the HashingPolicy - * class hierachy is not yet serializable. - * - * @param k The number of hash functions to apply. - * - * @param name The hasher's name. - * - * @return Returns a new hasher instance. - */ - static Hasher* Create(size_t k, const std::string& name); + bool Serialize(SerialInfo* info) const; + static Hasher* Unserialize(UnserialInfo* info); protected: + DECLARE_ABSTRACT_SERIAL(Hasher); + + Hasher() { } + Hasher(size_t k, const std::string& name); private: - const size_t k; + size_t k; std::string name; }; @@ -106,7 +101,7 @@ public: * seed to compute the seed for t to compute the seed NUL-terminated * string as additional seed. */ - UHF(size_t seed, const std::string& extra = ""); + UHF(size_t seed = 0, const std::string& extra = ""); template Hasher::digest operator()(const T& x) const @@ -175,7 +170,11 @@ public: virtual DefaultHasher* Clone() const /* final */; virtual bool Equals(const Hasher* other) const /* final */; + DECLARE_SERIAL(DefaultHasher); + private: + DefaultHasher() { } + std::vector hash_functions; }; @@ -199,7 +198,11 @@ public: virtual DoubleHasher* Clone() const /* final */; virtual bool Equals(const Hasher* other) const /* final */; + DECLARE_SERIAL(DoubleHasher); + private: + DoubleHasher() { } + UHF h1; UHF h2; }; diff --git a/src/probabilistic/bloom-filter.bif b/src/probabilistic/bloom-filter.bif index dd21688fdd..f03e3d149b 100644 --- a/src/probabilistic/bloom-filter.bif +++ b/src/probabilistic/bloom-filter.bif @@ -40,7 +40,7 @@ function bloomfilter_basic_init%(fp: double, capacity: count, size_t cells = BasicBloomFilter::M(fp, capacity); size_t optimal_k = BasicBloomFilter::K(cells, capacity); - const Hasher* h = Hasher::Create(optimal_k, name->CheckString()); + const Hasher* h = new DefaultHasher(optimal_k, name->CheckString()); return new BloomFilterVal(new BasicBloomFilter(h, cells)); %} @@ -68,7 +68,7 @@ function bloomfilter_counting_init%(k: count, cells: count, max: count, return 0; } - const Hasher* h = Hasher::Create(k, name->CheckString()); + const Hasher* h = new DefaultHasher(k, name->CheckString()); uint16 width = 1; while ( max >>= 1 ) From 43825212db25ce540c6a12905844d246f8784c05 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Tue, 30 Jul 2013 12:17:53 +0200 Subject: [PATCH 3/3] Update submodules. --- aux/binpac | 2 +- aux/bro-aux | 2 +- aux/broccoli | 2 +- aux/broctl | 2 +- cmake | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/aux/binpac b/aux/binpac index c39bd478b9..314fa8f65f 160000 --- a/aux/binpac +++ b/aux/binpac @@ -1 +1 @@ -Subproject commit c39bd478b9d0ecd05b1b83aa9d09a7887893977c +Subproject commit 314fa8f65fc240e960c23c3bba98623436a72b98 diff --git a/aux/bro-aux b/aux/bro-aux index a9942558c7..91d258cc8b 160000 --- a/aux/bro-aux +++ b/aux/bro-aux @@ -1 +1 @@ -Subproject commit a9942558c7d3dfd80148b8aaded64c82ade3d117 +Subproject commit 91d258cc8b2f74cd02fc93dfe61f73ec9f0dd489 diff --git a/aux/broccoli b/aux/broccoli index 889f9c6594..d59c73b6e0 160000 --- a/aux/broccoli +++ b/aux/broccoli @@ -1 +1 @@ -Subproject commit 889f9c65944ceac20ad9230efc39d33e6e1221c3 +Subproject commit d59c73b6e0966ad63bbc63a35741b5f68263e7b1 diff --git a/aux/broctl b/aux/broctl index 0cd102805e..52fd91261f 160000 --- a/aux/broctl +++ b/aux/broctl @@ -1 +1 @@ -Subproject commit 0cd102805e73343cab3f9fd4a76552e13940dad9 +Subproject commit 52fd91261f41fa1528f7b964837a364d7991889e diff --git a/cmake b/cmake index 0187b33a29..026639f836 160000 --- a/cmake +++ b/cmake @@ -1 +1 @@ -Subproject commit 0187b33a29d5ec824f940feff60dc5d8c2fe314f +Subproject commit 026639f8368e56742c0cb5d9fb390ea64e60ec50