Refactor HashKey class to support read/write operations

This preserves the optimization of storing values directly in the key_u member
union when feasible, and using a variable size buffer otherwise. It also adds
bounds-checking for that buffer, moves size arguments to size_t, decouples
construction from hash computation, emulates the tagging feature found in
SerializationFormat to assist troubleshooting, and switches feasible
reinterpret_casts to static_casts.
This commit is contained in:
Christian Kreibich 2021-09-16 17:17:13 -07:00
parent 2585ccd873
commit 82822b1e07
2 changed files with 427 additions and 83 deletions

View file

@ -64,39 +64,39 @@ void KeyedHash::InitOptions()
hash64_t KeyedHash::Hash64(const void* bytes, uint64_t size) hash64_t KeyedHash::Hash64(const void* bytes, uint64_t size)
{ {
return highwayhash::SipHash(shared_siphash_key, reinterpret_cast<const char*>(bytes), size); return highwayhash::SipHash(shared_siphash_key, static_cast<const char*>(bytes), size);
} }
void KeyedHash::Hash128(const void* bytes, uint64_t size, hash128_t* result) void KeyedHash::Hash128(const void* bytes, uint64_t size, hash128_t* result)
{ {
highwayhash::InstructionSets::Run<highwayhash::HighwayHash>( highwayhash::InstructionSets::Run<highwayhash::HighwayHash>(
shared_highwayhash_key, reinterpret_cast<const char*>(bytes), size, result); shared_highwayhash_key, static_cast<const char*>(bytes), size, result);
} }
void KeyedHash::Hash256(const void* bytes, uint64_t size, hash256_t* result) void KeyedHash::Hash256(const void* bytes, uint64_t size, hash256_t* result)
{ {
highwayhash::InstructionSets::Run<highwayhash::HighwayHash>( highwayhash::InstructionSets::Run<highwayhash::HighwayHash>(
shared_highwayhash_key, reinterpret_cast<const char*>(bytes), size, result); shared_highwayhash_key, static_cast<const char*>(bytes), size, result);
} }
hash64_t KeyedHash::StaticHash64(const void* bytes, uint64_t size) hash64_t KeyedHash::StaticHash64(const void* bytes, uint64_t size)
{ {
hash64_t result = 0; hash64_t result = 0;
highwayhash::InstructionSets::Run<highwayhash::HighwayHash>( highwayhash::InstructionSets::Run<highwayhash::HighwayHash>(
cluster_highwayhash_key, reinterpret_cast<const char*>(bytes), size, &result); cluster_highwayhash_key, static_cast<const char*>(bytes), size, &result);
return result; return result;
} }
void KeyedHash::StaticHash128(const void* bytes, uint64_t size, hash128_t* result) void KeyedHash::StaticHash128(const void* bytes, uint64_t size, hash128_t* result)
{ {
highwayhash::InstructionSets::Run<highwayhash::HighwayHash>( highwayhash::InstructionSets::Run<highwayhash::HighwayHash>(
cluster_highwayhash_key, reinterpret_cast<const char*>(bytes), size, result); cluster_highwayhash_key, static_cast<const char*>(bytes), size, result);
} }
void KeyedHash::StaticHash256(const void* bytes, uint64_t size, hash256_t* result) void KeyedHash::StaticHash256(const void* bytes, uint64_t size, hash256_t* result)
{ {
highwayhash::InstructionSets::Run<highwayhash::HighwayHash>( highwayhash::InstructionSets::Run<highwayhash::HighwayHash>(
cluster_highwayhash_key, reinterpret_cast<const char*>(bytes), size, result); cluster_highwayhash_key, static_cast<const char*>(bytes), size, result);
} }
void init_hash_function() void init_hash_function()
@ -106,111 +106,101 @@ void init_hash_function()
reporter->InternalError("Zeek's hash functions aren't fully initialized"); reporter->InternalError("Zeek's hash functions aren't fully initialized");
} }
HashKey::HashKey(bro_int_t i) HashKey::HashKey(bool b)
{ {
key_u.i = i; Set(b);
key = (void*)&key_u;
size = sizeof(i);
hash = HashBytes(key, size);
} }
HashKey::HashKey(bro_uint_t u) HashKey::HashKey(int i)
{ {
key_u.i = bro_int_t(u); Set(i);
key = (void*)&key_u; }
size = sizeof(u);
hash = HashBytes(key, size); HashKey::HashKey(bro_int_t bi)
{
Set(bi);
}
HashKey::HashKey(bro_uint_t bu)
{
Set(bu);
} }
HashKey::HashKey(uint32_t u) HashKey::HashKey(uint32_t u)
{ {
key_u.u32 = u; Set(u);
key = (void*)&key_u;
size = sizeof(u);
hash = HashBytes(key, size);
} }
HashKey::HashKey(const uint32_t u[], int n) HashKey::HashKey(const uint32_t u[], size_t n)
{ {
size = n * sizeof(u[0]); size = write_size = n * sizeof(u[0]);
key = (void*)u; key = (char*)u;
hash = HashBytes(key, size);
} }
HashKey::HashKey(double d) HashKey::HashKey(double d)
{ {
union { Set(d);
double d;
int i[2];
} u;
key_u.d = u.d = d;
key = (void*)&key_u;
size = sizeof(d);
hash = HashBytes(key, size);
} }
HashKey::HashKey(const void* p) HashKey::HashKey(const void* p)
{ {
key_u.p = p; Set(p);
key = (void*)&key_u;
size = sizeof(p);
hash = HashBytes(key, size);
} }
HashKey::HashKey(const char* s) HashKey::HashKey(const char* s)
{ {
size = strlen(s); // note - skip final \0 size = write_size = strlen(s); // note - skip final \0
key = (void*)s; key = (char*)s;
hash = HashBytes(key, size);
} }
HashKey::HashKey(const String* s) HashKey::HashKey(const String* s)
{ {
size = s->Len(); size = write_size = s->Len();
key = (void*)s->Bytes(); key = (char*)s->Bytes();
hash = HashBytes(key, size);
} }
HashKey::HashKey(int copy_key, void* arg_key, int arg_size) HashKey::HashKey(int copy_key, void* arg_key, size_t arg_size)
{ {
size = arg_size; size = write_size = arg_size;
is_our_dynamic = true;
if ( copy_key ) if ( copy_key )
{ {
key = (void*)new char[size]; key = new char[size]; // s == 0 is okay, returns non-nil
memcpy(key, arg_key, size); memcpy(key, arg_key, size);
} }
else else
key = arg_key; key = (char*)arg_key;
hash = HashBytes(key, size);
} }
HashKey::HashKey(const void* arg_key, int arg_size, hash_t arg_hash) HashKey::HashKey(const void* arg_key, size_t arg_size, hash_t arg_hash)
{ {
size = arg_size; size = write_size = arg_size;
hash = arg_hash; hash = arg_hash;
key = CopyKey(arg_key, size); key = CopyKey((char*)arg_key, size);
is_our_dynamic = true; is_our_dynamic = true;
} }
HashKey::HashKey(const void* arg_key, int arg_size, hash_t arg_hash, bool /* dont_copy */) HashKey::HashKey(const void* arg_key, size_t arg_size, hash_t arg_hash, bool /* dont_copy */)
{ {
size = arg_size; size = write_size = arg_size;
hash = arg_hash; hash = arg_hash;
key = const_cast<void*>(arg_key); key = (char*)arg_key;
} }
HashKey::HashKey(const void* bytes, int arg_size) HashKey::HashKey(const void* bytes, size_t arg_size)
{ {
size = arg_size; size = write_size = arg_size;
key = CopyKey(bytes, size); key = CopyKey((char*)bytes, size);
hash = HashBytes(key, size);
is_our_dynamic = true; is_our_dynamic = true;
} }
hash_t HashKey::Hash() const
{
if ( hash == 0 )
hash = HashBytes(key, size);
return hash;
}
void* HashKey::TakeKey() void* HashKey::TakeKey()
{ {
if ( is_our_dynamic ) if ( is_our_dynamic )
@ -222,16 +212,284 @@ void* HashKey::TakeKey()
return CopyKey(key, size); return CopyKey(key, size);
} }
void* HashKey::CopyKey(const void* k, int s) const char* HashKey::CopyKey(const char* k, size_t s) const
{ {
void* k_copy = (void*)new char[s]; char* k_copy = new char[s]; // s == 0 is okay, returns non-nil
memcpy(k_copy, k, s); memcpy(k_copy, k, s);
return k_copy; return k_copy;
} }
hash_t HashKey::HashBytes(const void* bytes, int size) hash_t HashKey::HashBytes(const void* bytes, size_t size)
{ {
return KeyedHash::Hash64(bytes, size); return KeyedHash::Hash64(bytes, size);
} }
void HashKey::Set(bool b)
{
key_u.b = b;
key = reinterpret_cast<char*>(&key_u);
size = write_size = sizeof(b);
}
void HashKey::Set(int i)
{
key_u.i = i;
key = reinterpret_cast<char*>(&key_u);
size = write_size = sizeof(i);
}
void HashKey::Set(bro_int_t bi)
{
key_u.bi = bi;
key = reinterpret_cast<char*>(&key_u);
size = write_size = sizeof(bi);
}
void HashKey::Set(bro_uint_t bu)
{
key_u.bi = bro_int_t(bu);
key = reinterpret_cast<char*>(&key_u);
size = write_size = sizeof(bu);
}
void HashKey::Set(uint32_t u)
{
key_u.u32 = u;
key = reinterpret_cast<char*>(&key_u);
size = write_size = sizeof(u);
}
void HashKey::Set(double d)
{
key_u.d = d;
key = reinterpret_cast<char*>(&key_u);
size = write_size = sizeof(d);
}
void HashKey::Set(const void* p)
{
key_u.p = p;
key = reinterpret_cast<char*>(&key_u);
size = write_size = sizeof(p);
}
void HashKey::Reserve(const char* tag, size_t addl_size, size_t alignment)
{
ASSERT(! IsAllocated());
size_t s0 = size;
size_t s1 = util::memory_size_align(size, alignment);
size = s1 + addl_size;
}
void HashKey::Allocate()
{
if ( key != nullptr and key != reinterpret_cast<char*>(&key_u) )
{
reporter->InternalWarning("usage error in HashKey::Allocate(): already allocated");
return;
}
is_our_dynamic = true;
key = reinterpret_cast<char*>(new double[size / sizeof(double) + 1]);
read_size = 0;
write_size = 0;
}
void HashKey::Write(const char* tag, bool b)
{
Write(tag, &b, sizeof(b), 0);
}
void HashKey::Write(const char* tag, int i, bool align)
{
if ( ! IsAllocated() )
{
Set(i);
return;
}
Write(tag, &i, sizeof(i), align ? sizeof(i) : 0);
}
void HashKey::Write(const char* tag, bro_int_t bi, bool align)
{
if ( ! IsAllocated() )
{
Set(bi);
return;
}
Write(tag, &bi, sizeof(bi), align ? sizeof(bi) : 0);
}
void HashKey::Write(const char* tag, bro_uint_t bu, bool align)
{
if ( ! IsAllocated() )
{
Set(bu);
return;
}
Write(tag, &bu, sizeof(bu), align ? sizeof(bu) : 0);
}
void HashKey::Write(const char* tag, uint32_t u, bool align)
{
if ( ! IsAllocated() )
{
Set(u);
return;
}
Write(tag, &u, sizeof(u), align ? sizeof(u) : 0);
}
void HashKey::Write(const char* tag, double d, bool align)
{
if ( ! IsAllocated() )
{
Set(d);
return;
}
Write(tag, &d, sizeof(d), align ? sizeof(d) : 0);
}
void HashKey::Write(const char* tag, const void* bytes, size_t n, size_t alignment)
{
AlignWrite(alignment);
EnsureWriteSpace(n);
memcpy(key + write_size, bytes, n);
write_size += n;
DBG_LOG(DBG_HASHKEY, "HashKey %p writing %lu/%lu: %lu -> %lu -> %lu [%s]", this, n, alignment,
s0, s1, write_size, tag);
}
void HashKey::SkipWrite(const char* tag, size_t n)
{
EnsureWriteSpace(n);
write_size += n;
}
void HashKey::AlignWrite(size_t alignment)
{
ASSERT(IsAllocated());
if ( alignment == 0 )
return;
size_t old_size = write_size;
write_size = util::memory_size_align(write_size, alignment);
if ( write_size > size )
reporter->InternalError("buffer overflow in HashKey::AlignWrite(): "
"after alignment, %lu bytes used of %lu allocated",
write_size, size);
while ( old_size < write_size )
key[old_size++] = '\0';
}
void HashKey::AlignRead(size_t alignment) const
{
ASSERT(IsAllocated());
if ( alignment == 0 )
return;
int old_size = read_size;
read_size = util::memory_size_align(read_size, alignment);
if ( read_size > size )
reporter->InternalError("buffer overflow in HashKey::AlignRead(): "
"after alignment, %lu bytes used of %lu allocated",
read_size, size);
}
void HashKey::Read(const char* tag, bool& b) const
{
Read(tag, &b, sizeof(b), 0);
}
void HashKey::Read(const char* tag, int& i, bool align) const
{
Read(tag, &i, sizeof(i), align ? sizeof(i) : 0);
}
void HashKey::Read(const char* tag, bro_int_t& i, bool align) const
{
Read(tag, &i, sizeof(i), align ? sizeof(i) : 0);
}
void HashKey::Read(const char* tag, bro_uint_t& u, bool align) const
{
Read(tag, &u, sizeof(u), align ? sizeof(u) : 0);
}
void HashKey::Read(const char* tag, uint32_t& u, bool align) const
{
Read(tag, &u, sizeof(u), align ? sizeof(u) : 0);
}
void HashKey::Read(const char* tag, double& d, bool align) const
{
Read(tag, &d, sizeof(d), align ? sizeof(d) : 0);
}
void HashKey::Read(const char* tag, void* out, size_t n, size_t alignment) const
{
AlignRead(alignment);
EnsureReadSpace(n);
// In case out is nil, make sure nothing is to be read, and only memcpy
// when there is a non-zero amount. Memory checkers don't nullpointers
// in memcpy even if the size is 0.
ASSERT(out != nullptr || (out == nullptr && n == 0));
if ( n > 0 )
{
memcpy(out, key + read_size, n);
read_size += n;
}
}
void HashKey::SkipRead(const char* tag, size_t n) const
{
EnsureReadSpace(n);
read_size += n;
}
void HashKey::EnsureWriteSpace(size_t n) const
{
if ( n == 0 )
return;
if ( ! IsAllocated() )
reporter->InternalError("usage error in HashKey::EnsureWriteSpace(): "
"size-checking unreserved buffer");
if ( write_size + n > size )
reporter->InternalError("buffer overflow in HashKey::Write(): writing %lu "
"bytes with %lu remaining",
n, size - write_size);
}
void HashKey::EnsureReadSpace(size_t n) const
{
if ( n == 0 )
return;
if ( ! IsAllocated() )
reporter->InternalError("usage error in HashKey::EnsureReadSpace(): "
"size-checking unreserved buffer");
if ( read_size + n > size )
reporter->InternalError("buffer overflow in HashKey::EnsureReadSpace(): reading %lu "
"bytes with %lu remaining",
n, size - read_size);
}
} // namespace zeek::detail } // namespace zeek::detail

View file

@ -1,7 +1,7 @@
// See the file "COPYING" in the main distribution directory for copyright. // See the file "COPYING" in the main distribution directory for copyright.
/*** /***
* This file contains functions to generate hashes used keyed hash functions. * This file contains functions to generate hashes using keyed hash functions.
* Keyed hash functions make it difficult/impossible to find information about the * Keyed hash functions make it difficult/impossible to find information about the
* output of a hash when the key is unknown to the attacker. This fact holds, even * output of a hash when the key is unknown to the attacker. This fact holds, even
* when the input value is known. * when the input value is known.
@ -10,8 +10,7 @@
* It is important that these hashes are not easily guessable to prevent complexity attacks. * It is important that these hashes are not easily guessable to prevent complexity attacks.
* *
* The HashKey class is the actual class that is used to generate Hash keys that are used * The HashKey class is the actual class that is used to generate Hash keys that are used
* internally, e.g. for lookups in hash-tables; the Hashes are also used for connection ID * internally, e.g. for lookups in hash-tables and connection ID generation.
* generation.
* *
* This means that the hashes created by most functions in this file will be different each run, * This means that the hashes created by most functions in this file will be different each run,
* unless a seed file is used. There are a few functions that create hashes that are static over * unless a seed file is used. There are a few functions that create hashes that are static over
@ -228,14 +227,18 @@ constexpr int NUM_HASH_KEYS = HASH_KEY_STRING + 1;
class HashKey class HashKey
{ {
public: public:
explicit HashKey(bro_int_t i); explicit HashKey() { }
explicit HashKey(bro_uint_t u); explicit HashKey(bool b);
explicit HashKey(int i);
explicit HashKey(bro_int_t bi);
explicit HashKey(bro_uint_t bu);
explicit HashKey(uint32_t u); explicit HashKey(uint32_t u);
HashKey(const uint32_t u[], int n); HashKey(const uint32_t u[], size_t n);
explicit HashKey(double d); explicit HashKey(double d);
explicit HashKey(const void* p); explicit HashKey(const void* p);
explicit HashKey(const char* s); explicit HashKey(const char* s); // No copying, no ownership
explicit HashKey(const String* s); explicit HashKey(const String* s); // No copying, no ownership
~HashKey() ~HashKey()
{ {
if ( is_our_dynamic ) if ( is_our_dynamic )
@ -250,19 +253,19 @@ public:
// The calling sequence here is unusual (normally key would be // The calling sequence here is unusual (normally key would be
// first) to avoid possible ambiguities with the next constructor, // first) to avoid possible ambiguities with the next constructor,
// which is the more commonly used one. // which is the more commonly used one.
HashKey(int copy_key, void* key, int size); HashKey(int copy_key, void* key, size_t size);
// Same, but automatically copies the key. // Same, but automatically copies the key.
HashKey(const void* key, int size, hash_t hash); HashKey(const void* key, size_t size, hash_t hash);
// Builds a key from the given chunk of bytes. // Builds a key from the given chunk of bytes.
HashKey(const void* bytes, int size); HashKey(const void* bytes, size_t size);
// Create a Hashkey given all of its components *without* // Create a Hashkey given all of its components *without*
// copying the key and *without* taking ownership. Note that // copying the key and *without* taking ownership. Note that
// "dont_copy" is a type placeholder to differentiate this member // "dont_copy" is a type placeholder to differentiate this member
// function from the one above; its value is not used. // function from the one above; its value is not used.
HashKey(const void* key, int size, hash_t hash, bool dont_copy); HashKey(const void* key, size_t size, hash_t hash, bool dont_copy);
// Hands over the key to the caller. This means that if the // Hands over the key to the caller. This means that if the
// key is our dynamic, we give it to the caller and mark it // key is our dynamic, we give it to the caller and mark it
@ -271,8 +274,8 @@ public:
void* TakeKey(); void* TakeKey();
const void* Key() const { return key; } const void* Key() const { return key; }
int Size() const { return size; } size_t Size() const { return size; }
hash_t Hash() const { return hash; } hash_t Hash() const;
[[deprecated("Remove in v5.1. MemoryAllocation() is deprecated and will be removed. See " [[deprecated("Remove in v5.1. MemoryAllocation() is deprecated and will be removed. See "
"GHI-572.")]] unsigned int "GHI-572.")]] unsigned int
@ -281,22 +284,105 @@ public:
return padded_sizeof(*this) + util::pad_size(size); return padded_sizeof(*this) + util::pad_size(size);
} }
static hash_t HashBytes(const void* bytes, int size); static hash_t HashBytes(const void* bytes, size_t size);
// A HashKey is "allocated" when the underlying key points somewhere
// other than our internal key_u union. This is almost like
// is_our_dynamic, but remains true also after TakeKey().
bool IsAllocated() const
{
return (key != nullptr && key != reinterpret_cast<const char*>(&key_u));
}
// Buffer size reservation. Repeated calls to these methods
// incrementally build up the eventual buffer size to be allocated via
// Allocate().
template <typename T> void ReserveType(const char* tag) { Reserve(tag, sizeof(T), sizeof(T)); }
void Reserve(const char* tag, size_t addl_size, size_t alignment = 0);
// Allocates the reserved amount of memory
void Allocate();
// Incremental writes into an allocated HashKey. The tags give context
// to what's being written and are only used in debug-build log streams.
// When true, the alignment boolean will cause write-marker alignment to
// the size of the item being written, otherwise writes happen directly
// at the current marker.
void Write(const char* tag, bool b);
void Write(const char* tag, int i, bool align = true);
void Write(const char* tag, bro_int_t bi, bool align = true);
void Write(const char* tag, bro_uint_t bu, bool align = true);
void Write(const char* tag, uint32_t u, bool align = true);
void Write(const char* tag, double d, bool align = true);
void Write(const char* tag, const void* bytes, size_t n, size_t alignment = 0);
// For writes that copy directly into the allocated buffer, this method
// advances the write marker without modifying content.
void SkipWrite(const char* tag, size_t n);
// Aligns the write marker to the next multiple of the given alignment size.
void AlignWrite(size_t alignment);
// Bounds check: if the buffer does not have at least n bytes available
// to write into, triggers an InternalError.
void EnsureWriteSpace(size_t n) const;
// Reads don't modify our internal state except for the read offset
// pointer. To blend in more seamlessly with the rest of Zeek we keep
// reads a const operation.
void ResetRead() const { read_size = 0; }
// Incremental reads from an allocated HashKey. As with writes, the
// tags are only used for debug-build logging, and alignment prior
// to the read of the item is controlled by the align boolean.
void Read(const char* tag, bool& b) const;
void Read(const char* tag, int& i, bool align = true) const;
void Read(const char* tag, bro_int_t& bi, bool align = true) const;
void Read(const char* tag, bro_uint_t& bu, bool align = true) const;
void Read(const char* tag, uint32_t& u, bool align = true) const;
void Read(const char* tag, double& d, bool align = true) const;
void Read(const char* tag, void* out, size_t n, size_t alignment = 0) const;
// These mirror the corresponding write methods above.
void SkipRead(const char* tag, size_t n) const;
void AlignRead(size_t alignment) const;
void EnsureReadSpace(size_t n) const;
void* KeyAtWrite() { return static_cast<void*>(key + write_size); }
const void* KeyAtRead() const { return static_cast<void*>(key + read_size); }
const void* KeyEnd() const { return static_cast<void*>(key + size); }
protected: protected:
void* CopyKey(const void* key, int size) const; char* CopyKey(const char* key, size_t size) const;
// Payload setters for types stored directoly in the key_u union. These
// adjust the size and write_size markers to indicate a full buffer, and
// use the key_u union for storage.
void Set(bool b);
void Set(int i);
void Set(bro_int_t bi);
void Set(bro_uint_t bu);
void Set(uint32_t u);
void Set(double d);
void Set(const void* p);
union { union {
bro_int_t i; bool b;
int i;
bro_int_t bi;
uint32_t u32; uint32_t u32;
double d; double d;
const void* p; const void* p;
} key_u; } key_u;
void* key; char* key = nullptr;
hash_t hash; mutable hash_t hash = 0;
int size; size_t size = 0;
bool is_our_dynamic = false; bool is_our_dynamic = false;
size_t write_size = 0;
mutable size_t read_size = 0;
}; };
extern void init_hash_function(); extern void init_hash_function();