From 34965b4e77b3091dd0d959873b21239f3da02ac4 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Thu, 1 Aug 2013 19:15:28 +0200 Subject: [PATCH] Support UHF hashing for >= UHASH_KEY_SIZE bytes. --- src/probabilistic/Hasher.cc | 23 +++++++++++++++++++---- src/probabilistic/Hasher.h | 5 +++-- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/src/probabilistic/Hasher.cc b/src/probabilistic/Hasher.cc index b59274df7d..fe8eb66ad9 100644 --- a/src/probabilistic/Hasher.cc +++ b/src/probabilistic/Hasher.cc @@ -1,6 +1,7 @@ // See the file "COPYING" in the main distribution directory for copyright. #include +#include #include "Hasher.h" #include "NetVar.h" @@ -82,15 +83,29 @@ Hasher::Hasher(size_t arg_k, size_t arg_seed) seed = arg_seed; } -UHF::UHF(size_t seed) - : h(seed) +UHF::UHF(size_t arg_seed) + : h(arg_seed) { + seed = arg_seed; } +// This function is almost equivalent to HashKey::HashBytes except that it does +// not depend on global state and that we mix in the seed multiple times. Hasher::digest UHF::hash(const void* x, size_t n) const { - assert(n <= UHASH_KEY_SIZE); - return n == 0 ? 0 : h(x, n); + if ( n <= UHASH_KEY_SIZE ) + return n == 0 ? 0 : h(x, n); + + unsigned char d[16]; + MD5(reinterpret_cast(x), n, d); + + const unsigned char* s = reinterpret_cast(&seed); + for ( size_t i = 0; i < 16; ++i ) + d[i] ^= s[i % sizeof(seed)]; + + MD5(d, 16, d); + + return d[0]; } DefaultHasher::DefaultHasher(size_t k, size_t seed) diff --git a/src/probabilistic/Hasher.h b/src/probabilistic/Hasher.h index 6b75fa1bea..a3322f5e37 100644 --- a/src/probabilistic/Hasher.h +++ b/src/probabilistic/Hasher.h @@ -123,9 +123,9 @@ public: * Constructs an H3 hash function seeded with a given seed and an * optional extra seed to replace the initial Bro seed. * - * @param seed The seed to use for this instance. + * @param arg_seed The seed to use for this instance. */ - UHF(size_t seed = 0); + UHF(size_t arg_seed = 0); template Hasher::digest operator()(const T& x) const @@ -171,6 +171,7 @@ private: static size_t compute_seed(size_t seed); H3 h; + size_t seed; };