mirror of
https://github.com/zeek/zeek.git
synced 2025-10-14 04:28:20 +00:00
Introduce global_hash_seed script variable.
This commit adds support for script-level specification of a seed to be used by hashers. For example, if the given name of a Bloom filter is not empty, then the seed used by the underlying hasher only depends on the Bloom filter name. If the name is empty, we check whether the user defined a non-empty global_hash_seed string variable at script and use it instead. If that script variable does not exist, then we fall back to the initial seed computed a Bro startup (which is affected ultimately by $BRO_SEED). See Hasher::MakeSeed for details.
This commit is contained in:
parent
af9e181731
commit
8ca76dd4ee
6 changed files with 82 additions and 67 deletions
|
@ -3,11 +3,34 @@
|
|||
#include <typeinfo>
|
||||
|
||||
#include "Hasher.h"
|
||||
#include "NetVar.h"
|
||||
#include "digest.h"
|
||||
#include "Serializer.h"
|
||||
|
||||
using namespace probabilistic;
|
||||
|
||||
size_t Hasher::MakeSeed(const void* data, size_t size)
|
||||
{
|
||||
u_char buf[SHA256_DIGEST_LENGTH];
|
||||
SHA256_CTX ctx;
|
||||
sha256_init(&ctx);
|
||||
|
||||
if ( data )
|
||||
sha256_update(&ctx, data, size);
|
||||
|
||||
else if ( global_hash_seed && global_hash_seed->Len() > 0 )
|
||||
sha256_update(&ctx, global_hash_seed->Bytes(), global_hash_seed->Len());
|
||||
|
||||
else
|
||||
{
|
||||
unsigned int first_seed = initial_seed();
|
||||
sha256_update(&ctx, &first_seed, sizeof(first_seed));
|
||||
}
|
||||
|
||||
sha256_final(&ctx, buf);
|
||||
return *reinterpret_cast<size_t*>(buf); // Use the first bytes as seed.
|
||||
}
|
||||
|
||||
bool Hasher::Serialize(SerialInfo* info) const
|
||||
{
|
||||
return SerialObj::Serialize(info);
|
||||
|
@ -25,7 +48,7 @@ bool Hasher::DoSerialize(SerialInfo* info) const
|
|||
if ( ! SERIALIZE(static_cast<uint16>(k)) )
|
||||
return false;
|
||||
|
||||
return SERIALIZE_STR(name.c_str(), name.size());
|
||||
return SERIALIZE(static_cast<uint64>(seed));
|
||||
}
|
||||
|
||||
bool Hasher::DoUnserialize(UnserialInfo* info)
|
||||
|
@ -35,30 +58,26 @@ bool Hasher::DoUnserialize(UnserialInfo* info)
|
|||
uint16 serial_k;
|
||||
if ( ! UNSERIALIZE(&serial_k) )
|
||||
return false;
|
||||
|
||||
k = serial_k;
|
||||
assert(k > 0);
|
||||
|
||||
const char* serial_name;
|
||||
if ( ! UNSERIALIZE_STR(&serial_name, 0) )
|
||||
uint64 serial_seed;
|
||||
if ( ! UNSERIALIZE(&serial_seed) )
|
||||
return false;
|
||||
|
||||
name = serial_name;
|
||||
delete [] serial_name;
|
||||
seed = serial_seed;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
Hasher::Hasher(size_t k, const std::string& arg_name)
|
||||
: k(k)
|
||||
Hasher::Hasher(size_t arg_k, size_t arg_seed)
|
||||
{
|
||||
k = k;
|
||||
name = arg_name;
|
||||
k = arg_k;
|
||||
seed = arg_seed;
|
||||
}
|
||||
|
||||
|
||||
UHF::UHF(size_t seed, const std::string& extra)
|
||||
: h(compute_seed(seed, extra))
|
||||
UHF::UHF(size_t seed)
|
||||
: h(seed)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -68,33 +87,11 @@ Hasher::digest UHF::hash(const void* x, size_t n) const
|
|||
return n == 0 ? 0 : h(x, n);
|
||||
}
|
||||
|
||||
size_t UHF::compute_seed(size_t seed, const std::string& extra)
|
||||
DefaultHasher::DefaultHasher(size_t k, size_t seed)
|
||||
: Hasher(k, seed)
|
||||
{
|
||||
u_char buf[SHA256_DIGEST_LENGTH];
|
||||
SHA256_CTX ctx;
|
||||
sha256_init(&ctx);
|
||||
|
||||
if ( extra.empty() )
|
||||
{
|
||||
unsigned int first_seed = initial_seed();
|
||||
sha256_update(&ctx, &first_seed, sizeof(first_seed));
|
||||
}
|
||||
|
||||
else
|
||||
sha256_update(&ctx, extra.c_str(), extra.size());
|
||||
|
||||
sha256_update(&ctx, &seed, sizeof(seed));
|
||||
sha256_final(&ctx, buf);
|
||||
|
||||
// Take the first sizeof(size_t) bytes as seed.
|
||||
return *reinterpret_cast<size_t*>(buf);
|
||||
}
|
||||
|
||||
DefaultHasher::DefaultHasher(size_t k, const std::string& name)
|
||||
: Hasher(k, name)
|
||||
{
|
||||
for ( size_t i = 0; i < k; ++i )
|
||||
hash_functions.push_back(UHF(i, name));
|
||||
for ( size_t i = 1; i <= k; ++i )
|
||||
hash_functions.push_back(UHF(Seed() + bro_prng(i)));
|
||||
}
|
||||
|
||||
Hasher::digest_vector DefaultHasher::Hash(const void* x, size_t n) const
|
||||
|
@ -137,13 +134,13 @@ bool DefaultHasher::DoUnserialize(UnserialInfo* info)
|
|||
|
||||
hash_functions.clear();
|
||||
for ( size_t i = 0; i < K(); ++i )
|
||||
hash_functions.push_back(UHF(i, Name()));
|
||||
hash_functions.push_back(UHF(Seed() + bro_prng(i)));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
DoubleHasher::DoubleHasher(size_t k, const std::string& name)
|
||||
: Hasher(k, name), h1(1, name), h2(2, name)
|
||||
DoubleHasher::DoubleHasher(size_t k, size_t seed)
|
||||
: Hasher(k, seed), h1(seed + bro_prng(1)), h2(seed + bro_prng(2))
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -187,8 +184,8 @@ bool DoubleHasher::DoUnserialize(UnserialInfo* info)
|
|||
{
|
||||
DO_UNSERIALIZE(Hasher);
|
||||
|
||||
h1 = UHF(1, Name());
|
||||
h2 = UHF(2, Name());
|
||||
h1 = UHF(Seed() + bro_prng(1));
|
||||
h2 = UHF(Seed() + bro_prng(2));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue