mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 14:48:21 +00:00
Remove the siphash->hmac-md5 switch after 36 bytes.
Currently, siphash is used for strings up to 36 bytes. hmac-md5 is used for longer strings. This switch-over is a remnant of the previous hash-function that was used, which apparently was slower with longer input strings. This change serves no purpose anymore. I performed a few performance tests on strings of varying sizes: For a 40 byte string with 10 million iterations: siphash: 0.31 seconds hmac-md5: 3.8 seconds For a 1080 byte string with 10 million iterations: siphash: 4.2 seconds hmac-md5: 17 seconds For a 18360 byte string with 10 million iterations: siphash: 69 seconds hmac-md5: 240 seconds Hence, this commit removes the use of hmac-md5. This change causes reordering of lines in a few logs. This commit also changes the datastructure for the seed in probabilistic/Hasher to get rid of a type-punning warning.
This commit is contained in:
parent
bb050910bb
commit
5e7915ae7a
13 changed files with 269 additions and 297 deletions
13
src/Hash.cc
13
src/Hash.cc
|
@ -156,15 +156,6 @@ void* HashKey::CopyKey(const void* k, int s) const
|
|||
|
||||
hash_t HashKey::HashBytes(const void* bytes, int size)
|
||||
{
|
||||
if ( size <= UHASH_KEY_SIZE )
|
||||
{
|
||||
hash_t digest = highwayhash::SipHash(shared_siphash_key, reinterpret_cast<const char *>(bytes), size);
|
||||
return digest;
|
||||
}
|
||||
|
||||
// Fall back to HMAC/MD5 for longer data (which is usually rare).
|
||||
assert(sizeof(hash_t) == 8);
|
||||
hash_t digest[2]; // 2x hash_t (uint64_t) = 128 bits = 32 hex chars = sizeof md5
|
||||
hmac_md5(size, (const unsigned char*) bytes, (unsigned char*) digest);
|
||||
return digest[0];
|
||||
hash_t digest = highwayhash::SipHash(shared_siphash_key, reinterpret_cast<const char *>(bytes), size);
|
||||
return digest;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue