mirror of
https://github.com/zeek/zeek.git
synced 2025-10-13 20:18:20 +00:00
Remove the siphash->hmac-md5 switch after 36 bytes.
Currently, siphash is used for strings up to 36 bytes. hmac-md5 is used for longer strings. This switch-over is a remnant of the previous hash-function that was used, which apparently was slower with longer input strings. This change serves no purpose anymore. I performed a few performance tests on strings of varying sizes: For a 40 byte string with 10 million iterations: siphash: 0.31 seconds hmac-md5: 3.8 seconds For a 1080 byte string with 10 million iterations: siphash: 4.2 seconds hmac-md5: 17 seconds For a 18360 byte string with 10 million iterations: siphash: 69 seconds hmac-md5: 240 seconds Hence, this commit removes the use of hmac-md5. This change causes reordering of lines in a few logs. This commit also changes the datastructure for the seed in probabilistic/Hasher to get rid of a type-punning warning.
This commit is contained in:
parent
bb050910bb
commit
5e7915ae7a
13 changed files with 269 additions and 297 deletions
|
@ -54,7 +54,7 @@ broker::expected<broker::data> Hasher::Serialize() const
|
|||
{
|
||||
return {broker::vector{
|
||||
static_cast<uint64_t>(Type()), static_cast<uint64_t>(k),
|
||||
seed.h1, seed.h2 }};
|
||||
seed.h[0], seed.h[1] }};
|
||||
}
|
||||
|
||||
std::unique_ptr<Hasher> Hasher::Unserialize(const broker::data& data)
|
||||
|
@ -107,23 +107,7 @@ UHF::UHF(Hasher::seed_t arg_seed)
|
|||
Hasher::digest UHF::hash(const void* x, size_t n) const
|
||||
{
|
||||
assert(sizeof(Hasher::seed_t) == 16); // siphash always needs a 128 bit seed
|
||||
|
||||
if ( n <= UHASH_KEY_SIZE )
|
||||
return highwayhash::SipHash(*(reinterpret_cast<const highwayhash::SipHashState::Key*>(&seed)), reinterpret_cast<const char*>(x), n);
|
||||
|
||||
union {
|
||||
unsigned char d[16];
|
||||
Hasher::digest rval;
|
||||
} u;
|
||||
|
||||
internal_md5(reinterpret_cast<const unsigned char*>(x), n, u.d);
|
||||
|
||||
const unsigned char* s = reinterpret_cast<const unsigned char*>(&seed);
|
||||
for ( size_t i = 0; i < 16; ++i )
|
||||
u.d[i] ^= s[i % sizeof(seed)];
|
||||
|
||||
internal_md5(u.d, 16, u.d);
|
||||
return u.rval;
|
||||
return highwayhash::SipHash(seed.h, reinterpret_cast<const char*>(x), n);
|
||||
}
|
||||
|
||||
DefaultHasher::DefaultHasher(size_t k, Hasher::seed_t seed)
|
||||
|
@ -132,7 +116,7 @@ DefaultHasher::DefaultHasher(size_t k, Hasher::seed_t seed)
|
|||
for ( size_t i = 1; i <= k; ++i )
|
||||
{
|
||||
seed_t s = Seed();
|
||||
s.h1 += bro_prng(i);
|
||||
s.h[0] += bro_prng(i);
|
||||
hash_functions.push_back(UHF(s));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,11 +24,10 @@ public:
|
|||
typedef hash_t digest;
|
||||
typedef std::vector<digest> digest_vector;
|
||||
struct seed_t {
|
||||
uint64_t h1;
|
||||
uint64_t h2;
|
||||
alignas(16) highwayhash::HH_U64 h[2];
|
||||
|
||||
friend seed_t operator+(seed_t lhs, const uint64_t rhs) {
|
||||
lhs.h1 += rhs;
|
||||
lhs.h[0] += rhs;
|
||||
return lhs;
|
||||
}
|
||||
};
|
||||
|
@ -179,8 +178,8 @@ public:
|
|||
|
||||
friend bool operator==(const UHF& x, const UHF& y)
|
||||
{
|
||||
return (x.seed.h1 == y.seed.h1) &&
|
||||
(x.seed.h2 == y.seed.h2);
|
||||
return (x.seed.h[0] == y.seed.h[0]) &&
|
||||
(x.seed.h[1] == y.seed.h[1]);
|
||||
}
|
||||
|
||||
friend bool operator!=(const UHF& x, const UHF& y)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue