mirror of
https://github.com/zeek/zeek.git
synced 2025-10-07 17:18:20 +00:00
Start refactoring hashing.
This commit moves some of the hash datastructures and code from util.cc into Hash.cc - where it seems more appropriate. It also starts to make more Keyed hash functions available - still using siphash as the default 64 bit keyed hash, but also making 128 and 256 bit highway hashes available. There already are a few other functions that are defined but not yet implemented - these will be "static" keyed hashes - which use an installation specific key. These will be used to, e.g., get rid of md5 hashing for the generation of file UIDs.
This commit is contained in:
parent
d34532f847
commit
360c06a3f8
7 changed files with 123 additions and 67 deletions
61
src/Hash.cc
61
src/Hash.cc
|
@ -1,32 +1,58 @@
|
|||
// See the file "COPYING" in the main distribution directory for copyright.
|
||||
|
||||
// The hash function works as follows:
|
||||
//
|
||||
// 1) For short data we have a number of universal hash functions:
|
||||
// UHASH_CW (ax + b (mod p)), H3, Dietzfelbinger and UMAC_NH (UMAC_NH is
|
||||
// not as strongly universal as the others, but probably enough). All
|
||||
// these functions require number of random bits linear to the data
|
||||
// length. And we use them for data no longer than UHASH_KEY_SIZE.
|
||||
// They are faster than HMAC/MD5 used for longer data, and most hash
|
||||
// operations are on short data.
|
||||
//
|
||||
// 2) As a fall-back, we use HMAC/MD5 (keyed MD5) for data of arbitrary
|
||||
// length. MD5 is used as a scrambling scheme so that it is difficult
|
||||
// for the adversary to construct conflicts, though I do not know if
|
||||
// HMAC/MD5 is provably universal.
|
||||
|
||||
#include "zeek-config.h"
|
||||
|
||||
#include "Hash.h"
|
||||
#include "digest.h"
|
||||
#include "Reporter.h"
|
||||
#include "BroString.h"
|
||||
|
||||
#include "highwayhash/sip_hash.h"
|
||||
#include "highwayhash/highwayhash_target.h"
|
||||
#include "highwayhash/instruction_sets.h"
|
||||
|
||||
// we use the following lines to not pull in the highwayhash headers in Hash.h - but to check the types did not change underneath us.
|
||||
static_assert(std::is_same<hash64_t, highwayhash::HHResult64>::value, "Highwayhash return values must match hash_x_t");
|
||||
static_assert(std::is_same<hash128_t, highwayhash::HHResult128>::value, "Highwayhash return values must match hash_x_t");
|
||||
static_assert(std::is_same<hash256_t, highwayhash::HHResult256>::value, "Highwayhash return values must match hash_x_t");
|
||||
|
||||
void KeyedHash::InitializeSeeds(const std::array<uint32_t, SEED_INIT_SIZE>& seed_data)
|
||||
{
|
||||
static_assert(std::is_same<decltype(KeyedHash::shared_siphash_key), highwayhash::SipHashState::Key>::value, "Highwayhash Key is not unsigned long long[2]");
|
||||
static_assert(std::is_same<decltype(KeyedHash::shared_highwayhash_key), highwayhash::HHKey>::value, "Highwayhash HHKey is not uint64_t[4]");
|
||||
if ( seeds_initialized )
|
||||
return;
|
||||
|
||||
internal_md5((const u_char*) seed_data.data(), sizeof(seed_data) - 16, shared_hmac_md5_key); // The last 128 bits of buf are for siphash
|
||||
// yes, we use the same buffer twice to initialize two different keys. This should not really be a
|
||||
// security problem of any kind: hmac-md5 is not really used anymore - and even if it was, the hashes
|
||||
// should not reveal any information about their initialization vector.
|
||||
static_assert(sizeof(shared_highwayhash_key) == SHA256_DIGEST_LENGTH);
|
||||
calculate_digest(Hash_SHA256, (const u_char*) seed_data.data(), sizeof(seed_data) - 16, reinterpret_cast<unsigned char*>(shared_highwayhash_key));
|
||||
memcpy(shared_siphash_key, reinterpret_cast<const char*>(seed_data.data()) + 64, 16);
|
||||
|
||||
seeds_initialized = true;
|
||||
}
|
||||
|
||||
hash64_t KeyedHash::Hash64(const void* bytes, uint64_t size)
|
||||
{
|
||||
return highwayhash::SipHash(shared_siphash_key, reinterpret_cast<const char *>(bytes), size);
|
||||
}
|
||||
|
||||
void KeyedHash::Hash128(const void* bytes, uint64_t size, hash128_t* result)
|
||||
{
|
||||
highwayhash::InstructionSets::Run<highwayhash::HighwayHash>(shared_highwayhash_key, reinterpret_cast<const char *>(bytes), size, result);
|
||||
}
|
||||
|
||||
void KeyedHash::Hash256(const void* bytes, uint64_t size, hash256_t* result)
|
||||
{
|
||||
highwayhash::InstructionSets::Run<highwayhash::HighwayHash>(shared_highwayhash_key, reinterpret_cast<const char *>(bytes), size, result);
|
||||
}
|
||||
|
||||
void init_hash_function()
|
||||
{
|
||||
// Make sure we have already called init_random_seed().
|
||||
if ( ! (hmac_key_set && siphash_key_set) )
|
||||
if ( ! KeyedHash::IsInitialized() )
|
||||
reporter->InternalError("Zeek's hash functions aren't fully initialized");
|
||||
}
|
||||
|
||||
|
@ -156,6 +182,5 @@ void* HashKey::CopyKey(const void* k, int s) const
|
|||
|
||||
hash_t HashKey::HashBytes(const void* bytes, int size)
|
||||
{
|
||||
hash_t digest = highwayhash::SipHash(shared_siphash_key, reinterpret_cast<const char *>(bytes), size);
|
||||
return digest;
|
||||
return KeyedHash::Hash64(bytes, size);
|
||||
}
|
||||
|
|
53
src/Hash.h
53
src/Hash.h
|
@ -1,5 +1,22 @@
|
|||
// See the file "COPYING" in the main distribution directory for copyright.
|
||||
|
||||
/***
|
||||
* This file contains functions to generate hashes used keyed hash functions.
|
||||
* Keyed hash functions make it difficult/impossible to find information about the
|
||||
* output of a hash when the key is unknown to the attacker. This fact holds, even
|
||||
* when the input value us known.
|
||||
*
|
||||
* We use these kinds of hashes heavily internally - e.g. for scriptland hash generation.
|
||||
* It is important that these hashes are not easily guessable to prevent complexity attacks.
|
||||
*
|
||||
* The HashKey class is the actual class that is used to generate Hash keys that are used internally,
|
||||
* e.g. for lookups in hash-tables; the Hashes are also used for connection ID generation.
|
||||
*
|
||||
* This means that the hashes created by most functions in this file will be different each run, unless
|
||||
* a seed file is used. There are a few functions that create hashes that are static over runs
|
||||
* and use an installation-wide seed value; these are specifically called out.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "util.h" // for bro_int_t
|
||||
|
@ -8,7 +25,43 @@
|
|||
|
||||
class BroString;
|
||||
|
||||
// to allow bro_md5_hmac access to the hmac seed
|
||||
#include "ZeekArgs.h"
|
||||
class Val;
|
||||
class Frame;
|
||||
namespace BifFunc {
|
||||
extern Val* bro_md5_hmac(Frame* frame, const zeek::Args*);
|
||||
}
|
||||
|
||||
typedef uint64_t hash_t;
|
||||
typedef uint64_t hash64_t;
|
||||
typedef uint64_t hash128_t[2];
|
||||
typedef uint64_t hash256_t[4];
|
||||
|
||||
class KeyedHash {
|
||||
public:
|
||||
constexpr static int SEED_INIT_SIZE = 20;
|
||||
static void InitializeSeeds(const std::array<uint32_t, SEED_INIT_SIZE>& seed_data);
|
||||
static bool IsInitialized() { return seeds_initialized; }
|
||||
|
||||
static hash64_t Hash64(const void* bytes, uint64_t size);
|
||||
static void Hash128(const void* bytes, uint64_t size, hash128_t* result);
|
||||
static void Hash256(const void* bytes, uint64_t size, hash256_t* result);
|
||||
|
||||
static hash64_t StaticHash64(const void* bytes, uint64_t size);
|
||||
static void StaticHash128(const void* bytes, uint64_t size, hash128_t* result);
|
||||
static void StaticHash256(const void* bytes, uint64_t size, hash256_t* result);
|
||||
private:
|
||||
// actually HHKey
|
||||
alignas(32) inline static uint64_t shared_highwayhash_key[4];
|
||||
// actually HH_U64, which has the same type
|
||||
alignas(16) inline static unsigned long long shared_siphash_key[2];
|
||||
inline static uint8_t shared_hmac_md5_key[16];
|
||||
inline static bool seeds_initialized = false;
|
||||
|
||||
friend void hmac_md5(size_t size, const unsigned char* bytes, unsigned char digest[16]);
|
||||
friend Val* BifFunc::bro_md5_hmac(Frame* frame, const zeek::Args*);
|
||||
};
|
||||
|
||||
typedef enum {
|
||||
HASH_KEY_INT,
|
||||
|
|
|
@ -106,7 +106,7 @@ UHF::UHF(Hasher::seed_t arg_seed)
|
|||
// times.
|
||||
Hasher::digest UHF::hash(const void* x, size_t n) const
|
||||
{
|
||||
assert(sizeof(Hasher::seed_t) == 16); // siphash always needs a 128 bit seed
|
||||
static_assert(std::is_same<highwayhash::SipHashState::Key, decltype(seed.h)>::value, "Seed value is not the same type as highwayhash key");
|
||||
return highwayhash::SipHash(seed.h, reinterpret_cast<const char*>(x), n);
|
||||
}
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "Hash.h"
|
||||
#include "highwayhash/sip_hash.h"
|
||||
|
||||
#include <broker/expected.hh>
|
||||
|
||||
|
@ -24,7 +25,8 @@ public:
|
|||
typedef hash_t digest;
|
||||
typedef std::vector<digest> digest_vector;
|
||||
struct seed_t {
|
||||
alignas(16) highwayhash::HH_U64 h[2];
|
||||
// actually HH_U64, which has the same type
|
||||
alignas(16) unsigned long long h[2];
|
||||
|
||||
friend seed_t operator+(seed_t lhs, const uint64_t rhs) {
|
||||
lhs.h[0] += rhs;
|
||||
|
|
61
src/util.cc
61
src/util.cc
|
@ -55,6 +55,7 @@
|
|||
#include "iosource/Manager.h"
|
||||
#include "iosource/PktSrc.h"
|
||||
#include "ConvertUTF.h"
|
||||
#include "Hash.h"
|
||||
|
||||
#include "3rdparty/doctest.h"
|
||||
|
||||
|
@ -997,27 +998,21 @@ std::string strstrip(std::string s)
|
|||
return s;
|
||||
}
|
||||
|
||||
bool hmac_key_set = false;
|
||||
uint8_t shared_hmac_md5_key[16];
|
||||
|
||||
bool siphash_key_set = false;
|
||||
alignas(16) highwayhash::HH_U64 shared_siphash_key[2];
|
||||
|
||||
void hmac_md5(size_t size, const unsigned char* bytes, unsigned char digest[16])
|
||||
{
|
||||
if ( ! hmac_key_set )
|
||||
if ( ! KeyedHash::seeds_initialized )
|
||||
reporter->InternalError("HMAC-MD5 invoked before the HMAC key is set");
|
||||
|
||||
internal_md5(bytes, size, digest);
|
||||
|
||||
for ( int i = 0; i < 16; ++i )
|
||||
digest[i] ^= shared_hmac_md5_key[i];
|
||||
digest[i] ^= KeyedHash::shared_hmac_md5_key[i];
|
||||
|
||||
internal_md5(digest, 16, digest);
|
||||
}
|
||||
|
||||
static bool read_random_seeds(const char* read_file, uint32_t* seed,
|
||||
uint32_t* buf, int bufsiz)
|
||||
std::array<uint32_t, KeyedHash::SEED_INIT_SIZE>& buf)
|
||||
{
|
||||
FILE* f = nullptr;
|
||||
|
||||
|
@ -1035,8 +1030,8 @@ static bool read_random_seeds(const char* read_file, uint32_t* seed,
|
|||
return false;
|
||||
}
|
||||
|
||||
// Read seeds for MD5.
|
||||
for ( int i = 0; i < bufsiz; ++i )
|
||||
// Read seeds for hmac-md5/siphash/highwayhash.
|
||||
for ( int i = 0; i < KeyedHash::SEED_INIT_SIZE; ++i )
|
||||
{
|
||||
int tmp;
|
||||
if ( fscanf(f, "%u", &tmp) != 1 )
|
||||
|
@ -1053,7 +1048,7 @@ static bool read_random_seeds(const char* read_file, uint32_t* seed,
|
|||
}
|
||||
|
||||
static bool write_random_seeds(const char* write_file, uint32_t seed,
|
||||
uint32_t* buf, int bufsiz)
|
||||
std::array<uint32_t, KeyedHash::SEED_INIT_SIZE>& buf)
|
||||
{
|
||||
FILE* f = nullptr;
|
||||
|
||||
|
@ -1066,7 +1061,7 @@ static bool write_random_seeds(const char* write_file, uint32_t seed,
|
|||
|
||||
fprintf(f, "%u\n", seed);
|
||||
|
||||
for ( int i = 0; i < bufsiz; ++i )
|
||||
for ( int i = 0; i < KeyedHash::SEED_INIT_SIZE; ++i )
|
||||
fprintf(f, "%u\n", buf[i]);
|
||||
|
||||
fclose(f);
|
||||
|
@ -1096,16 +1091,14 @@ void bro_srandom(unsigned int seed)
|
|||
|
||||
void init_random_seed(const char* read_file, const char* write_file)
|
||||
{
|
||||
static const int bufsiz = 20;
|
||||
uint32_t buf[bufsiz];
|
||||
memset(buf, 0, sizeof(buf));
|
||||
int pos = 0; // accumulates entropy
|
||||
std::array<uint32_t, KeyedHash::SEED_INIT_SIZE> buf = {};
|
||||
size_t pos = 0; // accumulates entropy
|
||||
bool seeds_done = false;
|
||||
uint32_t seed = 0;
|
||||
|
||||
if ( read_file )
|
||||
{
|
||||
if ( ! read_random_seeds(read_file, &seed, buf, bufsiz) )
|
||||
if ( ! read_random_seeds(read_file, &seed, buf) )
|
||||
reporter->FatalError("Could not load seeds from file '%s'.\n",
|
||||
read_file);
|
||||
else
|
||||
|
@ -1115,7 +1108,7 @@ void init_random_seed(const char* read_file, const char* write_file)
|
|||
#ifdef HAVE_GETRANDOM
|
||||
if ( ! seeds_done )
|
||||
{
|
||||
ssize_t nbytes = getrandom(buf, sizeof(buf), 0);
|
||||
ssize_t nbytes = getrandom(buf.data(), sizeof(buf), 0);
|
||||
seeds_done = nbytes == ssize_t(sizeof(buf));
|
||||
}
|
||||
#endif
|
||||
|
@ -1123,7 +1116,7 @@ void init_random_seed(const char* read_file, const char* write_file)
|
|||
if ( ! seeds_done )
|
||||
{
|
||||
// Gather up some entropy.
|
||||
gettimeofday((struct timeval *)(buf + pos), 0);
|
||||
gettimeofday((struct timeval *)(buf.data() + pos), 0);
|
||||
pos += sizeof(struct timeval) / sizeof(uint32_t);
|
||||
|
||||
// use urandom. For reasons see e.g. http://www.2uo.de/myths-about-urandom/
|
||||
|
@ -1137,8 +1130,8 @@ void init_random_seed(const char* read_file, const char* write_file)
|
|||
|
||||
if ( fd >= 0 )
|
||||
{
|
||||
int amt = read(fd, buf + pos,
|
||||
sizeof(uint32_t) * (bufsiz - pos));
|
||||
int amt = read(fd, buf.data() + pos,
|
||||
sizeof(uint32_t) * (KeyedHash::SEED_INIT_SIZE - pos));
|
||||
safe_close(fd);
|
||||
|
||||
if ( amt > 0 )
|
||||
|
@ -1149,12 +1142,12 @@ void init_random_seed(const char* read_file, const char* write_file)
|
|||
errno = 0;
|
||||
}
|
||||
|
||||
if ( pos < bufsiz )
|
||||
reporter->FatalError("Could not read enough random data from /dev/urandom. Wanted %d, got %d", bufsiz, pos);
|
||||
if ( pos < KeyedHash::SEED_INIT_SIZE )
|
||||
reporter->FatalError("Could not read enough random data from /dev/urandom. Wanted %d, got %lu", KeyedHash::SEED_INIT_SIZE, pos);
|
||||
|
||||
if ( ! seed )
|
||||
{
|
||||
for ( int i = 0; i < pos; ++i )
|
||||
for ( size_t i = 0; i < pos; ++i )
|
||||
{
|
||||
seed ^= buf[i];
|
||||
seed = (seed << 1) | (seed >> 31);
|
||||
|
@ -1172,22 +1165,10 @@ void init_random_seed(const char* read_file, const char* write_file)
|
|||
first_seed_saved = true;
|
||||
}
|
||||
|
||||
if ( ! hmac_key_set )
|
||||
{
|
||||
assert(sizeof(buf) - 16 == 64);
|
||||
internal_md5((const u_char*) buf, sizeof(buf) - 16, shared_hmac_md5_key); // The last 128 bits of buf are for siphash
|
||||
hmac_key_set = true;
|
||||
}
|
||||
if ( ! KeyedHash::IsInitialized() )
|
||||
KeyedHash::InitializeSeeds(buf);
|
||||
|
||||
if ( ! siphash_key_set )
|
||||
{
|
||||
assert(sizeof(buf) - 64 == 16); // siphash key length is always 128 bytes, independent of implementation
|
||||
assert(sizeof(shared_siphash_key) == 16);
|
||||
memcpy(shared_siphash_key, reinterpret_cast<const char*>(buf) + 64, 16);
|
||||
siphash_key_set = true;
|
||||
}
|
||||
|
||||
if ( write_file && ! write_random_seeds(write_file, seed, buf, bufsiz) )
|
||||
if ( write_file && ! write_random_seeds(write_file, seed, buf) )
|
||||
reporter->Error("Could not write seeds to file '%s'.\n",
|
||||
write_file);
|
||||
}
|
||||
|
|
|
@ -25,7 +25,6 @@
|
|||
#include <stdarg.h>
|
||||
#include <libgen.h>
|
||||
#include <memory> // std::unique_ptr
|
||||
#include "highwayhash/sip_hash.h"
|
||||
|
||||
#include "zeek-config.h"
|
||||
|
||||
|
@ -200,11 +199,6 @@ extern std::string strreplace(const std::string& s, const std::string& o, const
|
|||
// Remove all leading and trailing white space from string.
|
||||
extern std::string strstrip(std::string s);
|
||||
|
||||
extern bool hmac_key_set;
|
||||
extern uint8_t shared_hmac_md5_key[16];
|
||||
extern bool siphash_key_set;
|
||||
extern highwayhash::HH_U64 shared_siphash_key[2];
|
||||
|
||||
extern void hmac_md5(size_t size, const unsigned char* bytes,
|
||||
unsigned char digest[16]);
|
||||
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
#include "iosource/PktDumper.h"
|
||||
#include "IntrusivePtr.h"
|
||||
#include "input.h"
|
||||
#include "Hash.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
|
@ -615,7 +616,7 @@ function sha256_hash%(...%): string
|
|||
function md5_hmac%(...%): string
|
||||
%{
|
||||
unsigned char hmac[MD5_DIGEST_LENGTH];
|
||||
MD5Val::hmac(@ARG@, shared_hmac_md5_key, hmac);
|
||||
MD5Val::hmac(@ARG@, KeyedHash::shared_hmac_md5_key, hmac);
|
||||
return new StringVal(md5_digest_print(hmac));
|
||||
%}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue