From 1d508742560281ccd44a52b285fe5672f71afeb7 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Thu, 5 Jun 2014 15:42:31 +0200 Subject: [PATCH 1/2] Use full digest length instead of just one byte. When our universal hash function fell back to MD5 for inputs larger than supported by H3, the computation only returned the first byte of the MD5 result instead of as many bytes as needed to cover sizeof(Hasher::digest). --- src/probabilistic/Hasher.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/probabilistic/Hasher.cc b/src/probabilistic/Hasher.cc index 1f5f0910ba..0f209bfb5b 100644 --- a/src/probabilistic/Hasher.cc +++ b/src/probabilistic/Hasher.cc @@ -108,7 +108,7 @@ Hasher::digest UHF::hash(const void* x, size_t n) const MD5(d, 16, d); - return d[0]; + return *reinterpret_cast(d); } DefaultHasher::DefaultHasher(size_t k, size_t seed) From 673607f9a7167f4265c43b22ad34f22c2bd3f577 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Thu, 5 Jun 2014 16:02:25 +0200 Subject: [PATCH 2/2] Switch to double hashing. For large k, standard hashing imposes an unnecessary overhead. By switchting to double hashing, we invoke the hash function code at most two times. --- src/probabilistic/bloom-filter.bif | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/probabilistic/bloom-filter.bif b/src/probabilistic/bloom-filter.bif index 26865f160d..3e6b89fa4f 100644 --- a/src/probabilistic/bloom-filter.bif +++ b/src/probabilistic/bloom-filter.bif @@ -44,7 +44,7 @@ function bloomfilter_basic_init%(fp: double, capacity: count, size_t optimal_k = BasicBloomFilter::K(cells, capacity); size_t seed = Hasher::MakeSeed(name->Len() > 0 ? name->Bytes() : 0, name->Len()); - const Hasher* h = new DefaultHasher(optimal_k, seed); + const Hasher* h = new DoubleHasher(optimal_k, seed); return new BloomFilterVal(new BasicBloomFilter(h, cells)); %} @@ -84,7 +84,7 @@ function bloomfilter_basic_init2%(k: count, cells: count, size_t seed = Hasher::MakeSeed(name->Len() > 0 ? name->Bytes() : 0, name->Len()); - const Hasher* h = new DefaultHasher(k, seed); + const Hasher* h = new DoubleHasher(k, seed); return new BloomFilterVal(new BasicBloomFilter(h, cells)); %}