diff --git a/src/Hash.cc b/src/Hash.cc index 29a3b3cc01..106e6692bc 100644 --- a/src/Hash.cc +++ b/src/Hash.cc @@ -1,32 +1,58 @@ // See the file "COPYING" in the main distribution directory for copyright. -// The hash function works as follows: -// -// 1) For short data we have a number of universal hash functions: -// UHASH_CW (ax + b (mod p)), H3, Dietzfelbinger and UMAC_NH (UMAC_NH is -// not as strongly universal as the others, but probably enough). All -// these functions require number of random bits linear to the data -// length. And we use them for data no longer than UHASH_KEY_SIZE. -// They are faster than HMAC/MD5 used for longer data, and most hash -// operations are on short data. -// -// 2) As a fall-back, we use HMAC/MD5 (keyed MD5) for data of arbitrary -// length. MD5 is used as a scrambling scheme so that it is difficult -// for the adversary to construct conflicts, though I do not know if -// HMAC/MD5 is provably universal. - #include "zeek-config.h" #include "Hash.h" +#include "digest.h" #include "Reporter.h" #include "BroString.h" #include "highwayhash/sip_hash.h" +#include "highwayhash/highwayhash_target.h" +#include "highwayhash/instruction_sets.h" + +// we use the following lines to not pull in the highwayhash headers in Hash.h - but to check the types did not change underneath us. +static_assert(std::is_same::value, "Highwayhash return values must match hash_x_t"); +static_assert(std::is_same::value, "Highwayhash return values must match hash_x_t"); +static_assert(std::is_same::value, "Highwayhash return values must match hash_x_t"); + +void KeyedHash::InitializeSeeds(const std::array& seed_data) + { + static_assert(std::is_same::value, "Highwayhash Key is not unsigned long long[2]"); + static_assert(std::is_same::value, "Highwayhash HHKey is not uint64_t[4]"); + if ( seeds_initialized ) + return; + + internal_md5((const u_char*) seed_data.data(), sizeof(seed_data) - 16, shared_hmac_md5_key); // The last 128 bits of buf are for siphash + // yes, we use the same buffer twice to initialize two different keys. This should not really be a + // security problem of any kind: hmac-md5 is not really used anymore - and even if it was, the hashes + // should not reveal any information about their initialization vector. + static_assert(sizeof(shared_highwayhash_key) == SHA256_DIGEST_LENGTH); + calculate_digest(Hash_SHA256, (const u_char*) seed_data.data(), sizeof(seed_data) - 16, reinterpret_cast(shared_highwayhash_key)); + memcpy(shared_siphash_key, reinterpret_cast(seed_data.data()) + 64, 16); + + seeds_initialized = true; + } + +hash64_t KeyedHash::Hash64(const void* bytes, uint64_t size) + { + return highwayhash::SipHash(shared_siphash_key, reinterpret_cast(bytes), size); + } + +void KeyedHash::Hash128(const void* bytes, uint64_t size, hash128_t* result) + { + highwayhash::InstructionSets::Run(shared_highwayhash_key, reinterpret_cast(bytes), size, result); + } + +void KeyedHash::Hash256(const void* bytes, uint64_t size, hash256_t* result) + { + highwayhash::InstructionSets::Run(shared_highwayhash_key, reinterpret_cast(bytes), size, result); + } void init_hash_function() { // Make sure we have already called init_random_seed(). - if ( ! (hmac_key_set && siphash_key_set) ) + if ( ! KeyedHash::IsInitialized() ) reporter->InternalError("Zeek's hash functions aren't fully initialized"); } @@ -156,6 +182,5 @@ void* HashKey::CopyKey(const void* k, int s) const hash_t HashKey::HashBytes(const void* bytes, int size) { - hash_t digest = highwayhash::SipHash(shared_siphash_key, reinterpret_cast(bytes), size); - return digest; + return KeyedHash::Hash64(bytes, size); } diff --git a/src/Hash.h b/src/Hash.h index db3c571641..ab0dcaa96e 100644 --- a/src/Hash.h +++ b/src/Hash.h @@ -1,5 +1,22 @@ // See the file "COPYING" in the main distribution directory for copyright. +/*** + * This file contains functions to generate hashes used keyed hash functions. + * Keyed hash functions make it difficult/impossible to find information about the + * output of a hash when the key is unknown to the attacker. This fact holds, even + * when the input value us known. + * + * We use these kinds of hashes heavily internally - e.g. for scriptland hash generation. + * It is important that these hashes are not easily guessable to prevent complexity attacks. + * + * The HashKey class is the actual class that is used to generate Hash keys that are used internally, + * e.g. for lookups in hash-tables; the Hashes are also used for connection ID generation. + * + * This means that the hashes created by most functions in this file will be different each run, unless + * a seed file is used. There are a few functions that create hashes that are static over runs + * and use an installation-wide seed value; these are specifically called out. + */ + #pragma once #include "util.h" // for bro_int_t @@ -8,7 +25,43 @@ class BroString; +// to allow bro_md5_hmac access to the hmac seed +#include "ZeekArgs.h" +class Val; +class Frame; +namespace BifFunc { + extern Val* bro_md5_hmac(Frame* frame, const zeek::Args*); +} + typedef uint64_t hash_t; +typedef uint64_t hash64_t; +typedef uint64_t hash128_t[2]; +typedef uint64_t hash256_t[4]; + +class KeyedHash { +public: + constexpr static int SEED_INIT_SIZE = 20; + static void InitializeSeeds(const std::array& seed_data); + static bool IsInitialized() { return seeds_initialized; } + + static hash64_t Hash64(const void* bytes, uint64_t size); + static void Hash128(const void* bytes, uint64_t size, hash128_t* result); + static void Hash256(const void* bytes, uint64_t size, hash256_t* result); + + static hash64_t StaticHash64(const void* bytes, uint64_t size); + static void StaticHash128(const void* bytes, uint64_t size, hash128_t* result); + static void StaticHash256(const void* bytes, uint64_t size, hash256_t* result); +private: + // actually HHKey + alignas(32) inline static uint64_t shared_highwayhash_key[4]; + // actually HH_U64, which has the same type + alignas(16) inline static unsigned long long shared_siphash_key[2]; + inline static uint8_t shared_hmac_md5_key[16]; + inline static bool seeds_initialized = false; + + friend void hmac_md5(size_t size, const unsigned char* bytes, unsigned char digest[16]); + friend Val* BifFunc::bro_md5_hmac(Frame* frame, const zeek::Args*); +}; typedef enum { HASH_KEY_INT, diff --git a/src/probabilistic/Hasher.cc b/src/probabilistic/Hasher.cc index 992a7e0d7a..78f7bb3c72 100644 --- a/src/probabilistic/Hasher.cc +++ b/src/probabilistic/Hasher.cc @@ -106,7 +106,7 @@ UHF::UHF(Hasher::seed_t arg_seed) // times. Hasher::digest UHF::hash(const void* x, size_t n) const { - assert(sizeof(Hasher::seed_t) == 16); // siphash always needs a 128 bit seed + static_assert(std::is_same::value, "Seed value is not the same type as highwayhash key"); return highwayhash::SipHash(seed.h, reinterpret_cast(x), n); } diff --git a/src/probabilistic/Hasher.h b/src/probabilistic/Hasher.h index d8f79a7322..5211373024 100644 --- a/src/probabilistic/Hasher.h +++ b/src/probabilistic/Hasher.h @@ -3,6 +3,7 @@ #pragma once #include "Hash.h" +#include "highwayhash/sip_hash.h" #include @@ -24,7 +25,8 @@ public: typedef hash_t digest; typedef std::vector digest_vector; struct seed_t { - alignas(16) highwayhash::HH_U64 h[2]; + // actually HH_U64, which has the same type + alignas(16) unsigned long long h[2]; friend seed_t operator+(seed_t lhs, const uint64_t rhs) { lhs.h[0] += rhs; diff --git a/src/util.cc b/src/util.cc index 19aef286c5..cd3d687c0c 100644 --- a/src/util.cc +++ b/src/util.cc @@ -55,6 +55,7 @@ #include "iosource/Manager.h" #include "iosource/PktSrc.h" #include "ConvertUTF.h" +#include "Hash.h" #include "3rdparty/doctest.h" @@ -997,27 +998,21 @@ std::string strstrip(std::string s) return s; } -bool hmac_key_set = false; -uint8_t shared_hmac_md5_key[16]; - -bool siphash_key_set = false; -alignas(16) highwayhash::HH_U64 shared_siphash_key[2]; - void hmac_md5(size_t size, const unsigned char* bytes, unsigned char digest[16]) { - if ( ! hmac_key_set ) + if ( ! KeyedHash::seeds_initialized ) reporter->InternalError("HMAC-MD5 invoked before the HMAC key is set"); internal_md5(bytes, size, digest); for ( int i = 0; i < 16; ++i ) - digest[i] ^= shared_hmac_md5_key[i]; + digest[i] ^= KeyedHash::shared_hmac_md5_key[i]; internal_md5(digest, 16, digest); } static bool read_random_seeds(const char* read_file, uint32_t* seed, - uint32_t* buf, int bufsiz) + std::array& buf) { FILE* f = nullptr; @@ -1035,8 +1030,8 @@ static bool read_random_seeds(const char* read_file, uint32_t* seed, return false; } - // Read seeds for MD5. - for ( int i = 0; i < bufsiz; ++i ) + // Read seeds for hmac-md5/siphash/highwayhash. + for ( int i = 0; i < KeyedHash::SEED_INIT_SIZE; ++i ) { int tmp; if ( fscanf(f, "%u", &tmp) != 1 ) @@ -1053,7 +1048,7 @@ static bool read_random_seeds(const char* read_file, uint32_t* seed, } static bool write_random_seeds(const char* write_file, uint32_t seed, - uint32_t* buf, int bufsiz) + std::array& buf) { FILE* f = nullptr; @@ -1066,7 +1061,7 @@ static bool write_random_seeds(const char* write_file, uint32_t seed, fprintf(f, "%u\n", seed); - for ( int i = 0; i < bufsiz; ++i ) + for ( int i = 0; i < KeyedHash::SEED_INIT_SIZE; ++i ) fprintf(f, "%u\n", buf[i]); fclose(f); @@ -1096,16 +1091,14 @@ void bro_srandom(unsigned int seed) void init_random_seed(const char* read_file, const char* write_file) { - static const int bufsiz = 20; - uint32_t buf[bufsiz]; - memset(buf, 0, sizeof(buf)); - int pos = 0; // accumulates entropy + std::array buf = {}; + size_t pos = 0; // accumulates entropy bool seeds_done = false; uint32_t seed = 0; if ( read_file ) { - if ( ! read_random_seeds(read_file, &seed, buf, bufsiz) ) + if ( ! read_random_seeds(read_file, &seed, buf) ) reporter->FatalError("Could not load seeds from file '%s'.\n", read_file); else @@ -1115,7 +1108,7 @@ void init_random_seed(const char* read_file, const char* write_file) #ifdef HAVE_GETRANDOM if ( ! seeds_done ) { - ssize_t nbytes = getrandom(buf, sizeof(buf), 0); + ssize_t nbytes = getrandom(buf.data(), sizeof(buf), 0); seeds_done = nbytes == ssize_t(sizeof(buf)); } #endif @@ -1123,7 +1116,7 @@ void init_random_seed(const char* read_file, const char* write_file) if ( ! seeds_done ) { // Gather up some entropy. - gettimeofday((struct timeval *)(buf + pos), 0); + gettimeofday((struct timeval *)(buf.data() + pos), 0); pos += sizeof(struct timeval) / sizeof(uint32_t); // use urandom. For reasons see e.g. http://www.2uo.de/myths-about-urandom/ @@ -1137,8 +1130,8 @@ void init_random_seed(const char* read_file, const char* write_file) if ( fd >= 0 ) { - int amt = read(fd, buf + pos, - sizeof(uint32_t) * (bufsiz - pos)); + int amt = read(fd, buf.data() + pos, + sizeof(uint32_t) * (KeyedHash::SEED_INIT_SIZE - pos)); safe_close(fd); if ( amt > 0 ) @@ -1149,12 +1142,12 @@ void init_random_seed(const char* read_file, const char* write_file) errno = 0; } - if ( pos < bufsiz ) - reporter->FatalError("Could not read enough random data from /dev/urandom. Wanted %d, got %d", bufsiz, pos); + if ( pos < KeyedHash::SEED_INIT_SIZE ) + reporter->FatalError("Could not read enough random data from /dev/urandom. Wanted %d, got %lu", KeyedHash::SEED_INIT_SIZE, pos); if ( ! seed ) { - for ( int i = 0; i < pos; ++i ) + for ( size_t i = 0; i < pos; ++i ) { seed ^= buf[i]; seed = (seed << 1) | (seed >> 31); @@ -1172,22 +1165,10 @@ void init_random_seed(const char* read_file, const char* write_file) first_seed_saved = true; } - if ( ! hmac_key_set ) - { - assert(sizeof(buf) - 16 == 64); - internal_md5((const u_char*) buf, sizeof(buf) - 16, shared_hmac_md5_key); // The last 128 bits of buf are for siphash - hmac_key_set = true; - } + if ( ! KeyedHash::IsInitialized() ) + KeyedHash::InitializeSeeds(buf); - if ( ! siphash_key_set ) - { - assert(sizeof(buf) - 64 == 16); // siphash key length is always 128 bytes, independent of implementation - assert(sizeof(shared_siphash_key) == 16); - memcpy(shared_siphash_key, reinterpret_cast(buf) + 64, 16); - siphash_key_set = true; - } - - if ( write_file && ! write_random_seeds(write_file, seed, buf, bufsiz) ) + if ( write_file && ! write_random_seeds(write_file, seed, buf) ) reporter->Error("Could not write seeds to file '%s'.\n", write_file); } diff --git a/src/util.h b/src/util.h index 97ae3c19bd..21051b0a66 100644 --- a/src/util.h +++ b/src/util.h @@ -25,7 +25,6 @@ #include #include #include // std::unique_ptr -#include "highwayhash/sip_hash.h" #include "zeek-config.h" @@ -200,11 +199,6 @@ extern std::string strreplace(const std::string& s, const std::string& o, const // Remove all leading and trailing white space from string. extern std::string strstrip(std::string s); -extern bool hmac_key_set; -extern uint8_t shared_hmac_md5_key[16]; -extern bool siphash_key_set; -extern highwayhash::HH_U64 shared_siphash_key[2]; - extern void hmac_md5(size_t size, const unsigned char* bytes, unsigned char digest[16]); diff --git a/src/zeek.bif b/src/zeek.bif index 539c87e5d2..864ad84efc 100644 --- a/src/zeek.bif +++ b/src/zeek.bif @@ -27,6 +27,7 @@ #include "iosource/PktDumper.h" #include "IntrusivePtr.h" #include "input.h" +#include "Hash.h" using namespace std; @@ -615,7 +616,7 @@ function sha256_hash%(...%): string function md5_hmac%(...%): string %{ unsigned char hmac[MD5_DIGEST_LENGTH]; - MD5Val::hmac(@ARG@, shared_hmac_md5_key, hmac); + MD5Val::hmac(@ARG@, KeyedHash::shared_hmac_md5_key, hmac); return new StringVal(md5_digest_print(hmac)); %}