mirror of
https://github.com/zeek/zeek.git
synced 2025-10-04 15:48:19 +00:00
Switch file UID hashing from md5 to highwayhash.
This commit switches UID hashing from md5 to a highway hash. It also moves the salt value out of the file plugin - and makes it installation-specific instead - it is moved to the global namespace. There now are digest hash functions to make "static" installation-specific hashes that are stable over workers available to everyone; hashes can be 64, 128 or 256 bits in size. Due to the fact that we switch the file hashing algorithm, all file hashes change. The underlyigng algorithm that is used for hashing is highwayhash-128, which is significantly faster than md5.
This commit is contained in:
parent
bc546634d1
commit
3bce313b12
153 changed files with 953 additions and 799 deletions
147
src/Hash.h
147
src/Hash.h
|
@ -40,22 +40,157 @@ typedef uint64_t hash256_t[4];
|
|||
|
||||
class KeyedHash {
|
||||
public:
|
||||
constexpr static int SEED_INIT_SIZE = 20;
|
||||
static void InitializeSeeds(const std::array<uint32_t, SEED_INIT_SIZE>& seed_data);
|
||||
static bool IsInitialized() { return seeds_initialized; }
|
||||
|
||||
/**
|
||||
* Generate a 64 bit digest hash.
|
||||
*
|
||||
* This hash is seeded with random data, unless the ZEEK_SEED_FILE environment
|
||||
* variable is set. Thus, typically every node will return a different hash
|
||||
* after every restart.
|
||||
*
|
||||
* This should be used for internal hashes that do not have to be stable over
|
||||
* the cluster/runs - like, e.g. connection ID generation.
|
||||
*
|
||||
* @param bytes Bytes to hash
|
||||
*
|
||||
* @param size Size of bytes
|
||||
*
|
||||
* @returns 64 bit digest hash
|
||||
*/
|
||||
static hash64_t Hash64(const void* bytes, uint64_t size);
|
||||
|
||||
/**
|
||||
* Generate a 128 bit digest hash.
|
||||
*
|
||||
* This hash is seeded with random data, unless the ZEEK_SEED_FILE environment
|
||||
* variable is set. Thus, typically every node will return a different hash
|
||||
* after every restart.
|
||||
*
|
||||
* This should be used for internal hashes that do not have to be stable over
|
||||
* the cluster/runs - like, e.g. connection ID generation.
|
||||
*
|
||||
* @param bytes Bytes to hash
|
||||
*
|
||||
* @param size Size of bytes
|
||||
*
|
||||
* @param result Result of the hashing operation.
|
||||
*/
|
||||
static void Hash128(const void* bytes, uint64_t size, hash128_t* result);
|
||||
|
||||
/**
|
||||
* Generate a 256 bit digest hash.
|
||||
*
|
||||
* This hash is seeded with random data, unless the ZEEK_SEED_FILE environment
|
||||
* variable is set. Thus, typically every node will return a different hash
|
||||
* after every restart.
|
||||
*
|
||||
* This should be used for internal hashes that do not have to be stable over
|
||||
* the cluster/runs - like, e.g. connection ID generation.
|
||||
*
|
||||
* @param bytes Bytes to hash
|
||||
*
|
||||
* @param size Size of bytes
|
||||
*
|
||||
* @param result Result of the hashing operation.
|
||||
*/
|
||||
static void Hash256(const void* bytes, uint64_t size, hash256_t* result);
|
||||
|
||||
/**
|
||||
* Generates a installation-specific 64 bit hash.
|
||||
*
|
||||
* This function generates a 64 bit digest hash, which is stable over a cluster
|
||||
* or a restart.
|
||||
*
|
||||
* To be more exact - the seed value for this hash is generated from the script-level
|
||||
* :seek:see:`digest_salt` constant. The seeds are stable as long as this value
|
||||
* is not changed.
|
||||
*
|
||||
* This should be used for hashes that have to remain stable over the entire
|
||||
* cluster. An example are file IDs, which have to be stable over several workers.
|
||||
*
|
||||
* @param bytes Bytes to hash
|
||||
*
|
||||
* @param size Size of bytes
|
||||
*
|
||||
* @returns 64 bit digest hash
|
||||
*/
|
||||
static hash64_t StaticHash64(const void* bytes, uint64_t size);
|
||||
|
||||
/**
|
||||
* Generates a installation-specific 128 bit hash.
|
||||
*
|
||||
* This function generates a 128 bit digest hash, which is stable over a cluster
|
||||
* or a restart.
|
||||
*
|
||||
* To be more exact - the seed value for this hash is generated from the script-level
|
||||
* :seek:see:`digest_salt` constant. The seeds are stable as long as this value
|
||||
* is not changed.
|
||||
*
|
||||
* This should be used for hashes that have to remain stable over the entire
|
||||
* cluster. An example are file IDs, which have to be stable over several workers.
|
||||
*
|
||||
* @param bytes Bytes to hash
|
||||
*
|
||||
* @param size Size of bytes
|
||||
*
|
||||
* @param result Result of the hashing operation.
|
||||
*/
|
||||
static void StaticHash128(const void* bytes, uint64_t size, hash128_t* result);
|
||||
|
||||
/**
|
||||
* Generates a installation-specific 256 bit hash.
|
||||
*
|
||||
* This function generates a 128 bit digest hash, which is stable over a cluster
|
||||
* or a restart.
|
||||
*
|
||||
* To be more exact - the seed value for this hash is generated from the script-level
|
||||
* :seek:see:`digest_salt` constant. The seeds are stable as long as this value
|
||||
* is not changed.
|
||||
*
|
||||
* This should be used for hashes that have to remain stable over the entire
|
||||
* cluster. An example are file IDs, which have to be stable over several workers.
|
||||
*
|
||||
* @param bytes Bytes to hash
|
||||
*
|
||||
* @param size Size of bytes
|
||||
*
|
||||
* @param result Result of the hashing operation.
|
||||
*/
|
||||
static void StaticHash256(const void* bytes, uint64_t size, hash256_t* result);
|
||||
|
||||
/**
|
||||
* Size of the initial seed
|
||||
*/
|
||||
constexpr static int SEED_INIT_SIZE = 20;
|
||||
|
||||
/**
|
||||
* Initialize the (typically process-specific) seeds. This function is indirectly
|
||||
* called from main, during early initialization.
|
||||
*
|
||||
* @param seed_data random data used as an initial seed
|
||||
*/
|
||||
static void InitializeSeeds(const std::array<uint32_t, SEED_INIT_SIZE>& seed_data);
|
||||
|
||||
/**
|
||||
* Returns true if the process-specific seeds have been initialized
|
||||
*
|
||||
* @return True if the seeds are initialized
|
||||
*/
|
||||
static bool IsInitialized() { return seeds_initialized; }
|
||||
|
||||
/**
|
||||
* Initializes the static hash seeds using the script-level
|
||||
* :seek:see:`digest_salt` constant.
|
||||
*/
|
||||
static void InitOptions();
|
||||
|
||||
private:
|
||||
// actually HHKey
|
||||
// actually HHKey. This key changes each start (unless a seed is specified)
|
||||
alignas(32) inline static uint64_t shared_highwayhash_key[4];
|
||||
// actually HH_U64, which has the same type
|
||||
// actually HHKey. This key is installation specific and sourced from the digest_salt script-level const.
|
||||
alignas(32) inline static uint64_t cluster_highwayhash_key[4];
|
||||
// actually HH_U64, which has the same type. This key changes each start (unless a seed is specified)
|
||||
alignas(16) inline static unsigned long long shared_siphash_key[2];
|
||||
// This key changes each start (unless a seed is specified)
|
||||
inline static uint8_t shared_hmac_md5_key[16];
|
||||
inline static bool seeds_initialized = false;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue