Introduce global_hash_seed script variable.

This commit adds support for script-level specification of a seed to be used by
hashers. For example, if the given name of a Bloom filter is not empty, then
the seed used by the underlying hasher only depends on the Bloom filter name.
If the name is empty, we check whether the user defined a non-empty
global_hash_seed string variable at script and use it instead. If that script
variable does not exist, then we fall back to the initial seed computed a
Bro startup (which is affected ultimately by $BRO_SEED).

See Hasher::MakeSeed for details.
This commit is contained in:
Matthias Vallentin 2013-07-31 17:59:08 +02:00
parent af9e181731
commit 8ca76dd4ee
6 changed files with 82 additions and 67 deletions

View file

@ -3,11 +3,34 @@
#include <typeinfo>
#include "Hasher.h"
#include "NetVar.h"
#include "digest.h"
#include "Serializer.h"
using namespace probabilistic;
size_t Hasher::MakeSeed(const void* data, size_t size)
{
u_char buf[SHA256_DIGEST_LENGTH];
SHA256_CTX ctx;
sha256_init(&ctx);
if ( data )
sha256_update(&ctx, data, size);
else if ( global_hash_seed && global_hash_seed->Len() > 0 )
sha256_update(&ctx, global_hash_seed->Bytes(), global_hash_seed->Len());
else
{
unsigned int first_seed = initial_seed();
sha256_update(&ctx, &first_seed, sizeof(first_seed));
}
sha256_final(&ctx, buf);
return *reinterpret_cast<size_t*>(buf); // Use the first bytes as seed.
}
bool Hasher::Serialize(SerialInfo* info) const
{
return SerialObj::Serialize(info);
@ -25,7 +48,7 @@ bool Hasher::DoSerialize(SerialInfo* info) const
if ( ! SERIALIZE(static_cast<uint16>(k)) )
return false;
return SERIALIZE_STR(name.c_str(), name.size());
return SERIALIZE(static_cast<uint64>(seed));
}
bool Hasher::DoUnserialize(UnserialInfo* info)
@ -35,30 +58,26 @@ bool Hasher::DoUnserialize(UnserialInfo* info)
uint16 serial_k;
if ( ! UNSERIALIZE(&serial_k) )
return false;
k = serial_k;
assert(k > 0);
const char* serial_name;
if ( ! UNSERIALIZE_STR(&serial_name, 0) )
uint64 serial_seed;
if ( ! UNSERIALIZE(&serial_seed) )
return false;
name = serial_name;
delete [] serial_name;
seed = serial_seed;
return true;
}
Hasher::Hasher(size_t k, const std::string& arg_name)
: k(k)
Hasher::Hasher(size_t arg_k, size_t arg_seed)
{
k = k;
name = arg_name;
k = arg_k;
seed = arg_seed;
}
UHF::UHF(size_t seed, const std::string& extra)
: h(compute_seed(seed, extra))
UHF::UHF(size_t seed)
: h(seed)
{
}
@ -68,33 +87,11 @@ Hasher::digest UHF::hash(const void* x, size_t n) const
return n == 0 ? 0 : h(x, n);
}
size_t UHF::compute_seed(size_t seed, const std::string& extra)
DefaultHasher::DefaultHasher(size_t k, size_t seed)
: Hasher(k, seed)
{
u_char buf[SHA256_DIGEST_LENGTH];
SHA256_CTX ctx;
sha256_init(&ctx);
if ( extra.empty() )
{
unsigned int first_seed = initial_seed();
sha256_update(&ctx, &first_seed, sizeof(first_seed));
}
else
sha256_update(&ctx, extra.c_str(), extra.size());
sha256_update(&ctx, &seed, sizeof(seed));
sha256_final(&ctx, buf);
// Take the first sizeof(size_t) bytes as seed.
return *reinterpret_cast<size_t*>(buf);
}
DefaultHasher::DefaultHasher(size_t k, const std::string& name)
: Hasher(k, name)
{
for ( size_t i = 0; i < k; ++i )
hash_functions.push_back(UHF(i, name));
for ( size_t i = 1; i <= k; ++i )
hash_functions.push_back(UHF(Seed() + bro_prng(i)));
}
Hasher::digest_vector DefaultHasher::Hash(const void* x, size_t n) const
@ -137,13 +134,13 @@ bool DefaultHasher::DoUnserialize(UnserialInfo* info)
hash_functions.clear();
for ( size_t i = 0; i < K(); ++i )
hash_functions.push_back(UHF(i, Name()));
hash_functions.push_back(UHF(Seed() + bro_prng(i)));
return true;
}
DoubleHasher::DoubleHasher(size_t k, const std::string& name)
: Hasher(k, name), h1(1, name), h2(2, name)
DoubleHasher::DoubleHasher(size_t k, size_t seed)
: Hasher(k, seed), h1(seed + bro_prng(1)), h2(seed + bro_prng(2))
{
}
@ -187,8 +184,8 @@ bool DoubleHasher::DoUnserialize(UnserialInfo* info)
{
DO_UNSERIALIZE(Hasher);
h1 = UHF(1, Name());
h2 = UHF(2, Name());
h1 = UHF(Seed() + bro_prng(1));
h2 = UHF(Seed() + bro_prng(2));
return true;
}