mirror of
https://github.com/zeek/zeek.git
synced 2025-10-09 10:08:20 +00:00
Start refactoring hashing.
This commit moves some of the hash datastructures and code from util.cc into Hash.cc - where it seems more appropriate. It also starts to make more Keyed hash functions available - still using siphash as the default 64 bit keyed hash, but also making 128 and 256 bit highway hashes available. There already are a few other functions that are defined but not yet implemented - these will be "static" keyed hashes - which use an installation specific key. These will be used to, e.g., get rid of md5 hashing for the generation of file UIDs.
This commit is contained in:
parent
d34532f847
commit
360c06a3f8
7 changed files with 123 additions and 67 deletions
61
src/Hash.cc
61
src/Hash.cc
|
@ -1,32 +1,58 @@
|
|||
// See the file "COPYING" in the main distribution directory for copyright.
|
||||
|
||||
// The hash function works as follows:
|
||||
//
|
||||
// 1) For short data we have a number of universal hash functions:
|
||||
// UHASH_CW (ax + b (mod p)), H3, Dietzfelbinger and UMAC_NH (UMAC_NH is
|
||||
// not as strongly universal as the others, but probably enough). All
|
||||
// these functions require number of random bits linear to the data
|
||||
// length. And we use them for data no longer than UHASH_KEY_SIZE.
|
||||
// They are faster than HMAC/MD5 used for longer data, and most hash
|
||||
// operations are on short data.
|
||||
//
|
||||
// 2) As a fall-back, we use HMAC/MD5 (keyed MD5) for data of arbitrary
|
||||
// length. MD5 is used as a scrambling scheme so that it is difficult
|
||||
// for the adversary to construct conflicts, though I do not know if
|
||||
// HMAC/MD5 is provably universal.
|
||||
|
||||
#include "zeek-config.h"
|
||||
|
||||
#include "Hash.h"
|
||||
#include "digest.h"
|
||||
#include "Reporter.h"
|
||||
#include "BroString.h"
|
||||
|
||||
#include "highwayhash/sip_hash.h"
|
||||
#include "highwayhash/highwayhash_target.h"
|
||||
#include "highwayhash/instruction_sets.h"
|
||||
|
||||
// we use the following lines to not pull in the highwayhash headers in Hash.h - but to check the types did not change underneath us.
|
||||
static_assert(std::is_same<hash64_t, highwayhash::HHResult64>::value, "Highwayhash return values must match hash_x_t");
|
||||
static_assert(std::is_same<hash128_t, highwayhash::HHResult128>::value, "Highwayhash return values must match hash_x_t");
|
||||
static_assert(std::is_same<hash256_t, highwayhash::HHResult256>::value, "Highwayhash return values must match hash_x_t");
|
||||
|
||||
void KeyedHash::InitializeSeeds(const std::array<uint32_t, SEED_INIT_SIZE>& seed_data)
|
||||
{
|
||||
static_assert(std::is_same<decltype(KeyedHash::shared_siphash_key), highwayhash::SipHashState::Key>::value, "Highwayhash Key is not unsigned long long[2]");
|
||||
static_assert(std::is_same<decltype(KeyedHash::shared_highwayhash_key), highwayhash::HHKey>::value, "Highwayhash HHKey is not uint64_t[4]");
|
||||
if ( seeds_initialized )
|
||||
return;
|
||||
|
||||
internal_md5((const u_char*) seed_data.data(), sizeof(seed_data) - 16, shared_hmac_md5_key); // The last 128 bits of buf are for siphash
|
||||
// yes, we use the same buffer twice to initialize two different keys. This should not really be a
|
||||
// security problem of any kind: hmac-md5 is not really used anymore - and even if it was, the hashes
|
||||
// should not reveal any information about their initialization vector.
|
||||
static_assert(sizeof(shared_highwayhash_key) == SHA256_DIGEST_LENGTH);
|
||||
calculate_digest(Hash_SHA256, (const u_char*) seed_data.data(), sizeof(seed_data) - 16, reinterpret_cast<unsigned char*>(shared_highwayhash_key));
|
||||
memcpy(shared_siphash_key, reinterpret_cast<const char*>(seed_data.data()) + 64, 16);
|
||||
|
||||
seeds_initialized = true;
|
||||
}
|
||||
|
||||
hash64_t KeyedHash::Hash64(const void* bytes, uint64_t size)
|
||||
{
|
||||
return highwayhash::SipHash(shared_siphash_key, reinterpret_cast<const char *>(bytes), size);
|
||||
}
|
||||
|
||||
void KeyedHash::Hash128(const void* bytes, uint64_t size, hash128_t* result)
|
||||
{
|
||||
highwayhash::InstructionSets::Run<highwayhash::HighwayHash>(shared_highwayhash_key, reinterpret_cast<const char *>(bytes), size, result);
|
||||
}
|
||||
|
||||
void KeyedHash::Hash256(const void* bytes, uint64_t size, hash256_t* result)
|
||||
{
|
||||
highwayhash::InstructionSets::Run<highwayhash::HighwayHash>(shared_highwayhash_key, reinterpret_cast<const char *>(bytes), size, result);
|
||||
}
|
||||
|
||||
void init_hash_function()
|
||||
{
|
||||
// Make sure we have already called init_random_seed().
|
||||
if ( ! (hmac_key_set && siphash_key_set) )
|
||||
if ( ! KeyedHash::IsInitialized() )
|
||||
reporter->InternalError("Zeek's hash functions aren't fully initialized");
|
||||
}
|
||||
|
||||
|
@ -156,6 +182,5 @@ void* HashKey::CopyKey(const void* k, int s) const
|
|||
|
||||
hash_t HashKey::HashBytes(const void* bytes, int size)
|
||||
{
|
||||
hash_t digest = highwayhash::SipHash(shared_siphash_key, reinterpret_cast<const char *>(bytes), size);
|
||||
return digest;
|
||||
return KeyedHash::Hash64(bytes, size);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue