Introduce ZEEK_SEED_VALUES environment variable

For "individually different but deterministic" runs specifying Zeek's
seed as an environment variable eases setups as one can avoid creating
extra seed files for each of the individual processes.

It is an error to specify the new ZEEK_SEED_VALUES variable together
with the existing ZEEK_SEED_FILE and -G. ZEEK_SEED takes precedence over
deterministic mode (-D) like ZEEK_SEED_FILE does today already.
This commit is contained in:
Arne Welzel 2023-05-12 19:45:37 +02:00
parent b98ae9bdb2
commit b2d934226f
9 changed files with 134 additions and 8 deletions

View file

@ -166,6 +166,8 @@ void usage(const char* prog, int code)
util::zeek_prefixes().c_str());
fprintf(stderr, " $ZEEK_DNS_FAKE | disable DNS lookups (%s)\n",
fake_dns() ? "on" : "off");
fprintf(stderr, " $ZEEK_SEED_VALUES | list of space separated seeds (%s)\n",
getenv("ZEEK_SEED_VALUES") ? "set" : "not set");
fprintf(stderr, " $ZEEK_SEED_FILE | file to load seeds from (not set)\n");
fprintf(stderr, " $ZEEK_LOG_SUFFIX | ASCII log file extension (.%s)\n",
logging::writer::detail::Ascii::LogExt().c_str());

View file

@ -25,6 +25,7 @@
#include <cstdarg>
#include <cstdio>
#include <cstdlib>
#include <sstream>
#if defined(HAVE_MALLINFO) || defined(HAVE_MALLINFO2)
#include <malloc.h>
@ -383,6 +384,23 @@ static bool write_random_seeds(const char* write_file, uint32_t seed,
return true;
}
// Same as read_random_seeds() but takes seeds from a space separated string instead.
static bool fill_random_seeds(const std::string& seed_string, uint32_t* seed,
std::array<uint32_t, zeek::detail::KeyedHash::SEED_INIT_SIZE>& buf)
{
stringstream ss{seed_string};
if ( ! (ss >> *seed) )
return false;
for ( auto& v : buf )
{
if ( ! (ss >> v) )
return false;
}
return true;
}
static bool zeek_rand_deterministic = false;
static long int zeek_rand_state = 0;
static bool first_seed_saved = false;
@ -404,7 +422,8 @@ void seed_random(unsigned int seed)
srandom(seed);
}
void init_random_seed(const char* read_file, const char* write_file, bool use_empty_seeds)
void init_random_seed(const char* read_file, const char* write_file, bool use_empty_seeds,
const std::string& seed_string)
{
std::array<uint32_t, zeek::detail::KeyedHash::SEED_INIT_SIZE> buf = {};
size_t pos = 0; // accumulates entropy
@ -414,7 +433,14 @@ void init_random_seed(const char* read_file, const char* write_file, bool use_em
if ( read_file )
{
if ( ! read_random_seeds(read_file, &seed, buf) )
reporter->FatalError("Could not load seeds from file '%s'.\n", read_file);
reporter->FatalError("Could not load seeds from file '%s'.", read_file);
else
seeds_done = true;
}
else if ( ! seed_string.empty() )
{
if ( ! fill_random_seeds(seed_string, &seed, buf) )
reporter->FatalError("Could not load seeds from string");
else
seeds_done = true;
}

View file

@ -138,12 +138,15 @@ extern bool ensure_dir(const char* dirname);
extern void hmac_md5(size_t size, const unsigned char* bytes, unsigned char digest[16]);
// Initializes RNGs for zeek::random_number() and MD5 usage. If load_file is given,
// the seeds (both random & MD5) are loaded from that file. This takes
// precedence over the "use_empty_seeds" argument, which just
// zero-initializes all seed values. If write_file is given, the seeds are
// Initializes RNGs for zeek::random_number() and hmac-md5/siphash/highwayhash usage.
// If load_file is given, the seeds (both random & hashes) are loaded from that file. This
// takes precedence over the "seed_string and "use_empty_seeds" arguments. The content of
// "seed_string" is used as seeds if not empty next. Otherwise, when "use_empty_seeds" is
// set it zero-initializes all seed values. If neither of these provides initial seed values,
// platform specific random data is used as seeds. If write_file is given, the seeds are
// written to that file.
extern void init_random_seed(const char* load_file, const char* write_file, bool use_empty_seeds);
extern void init_random_seed(const char* load_file, const char* write_file, bool use_empty_seeds,
const std::string& seed_string = {});
// Retrieves the initial seed computed after the very first call to
// init_random_seed(). Repeated calls to init_random_seed() will not affect

View file

@ -629,15 +629,22 @@ SetupResult setup(int argc, char** argv, Options* zopts)
supervisor_mgr = new Supervisor(std::move(cfg), std::move(*stem));
}
std::string seed_string;
if ( const auto* seed_env = getenv("ZEEK_SEED_VALUES") )
seed_string = seed_env;
const char* seed_load_file = getenv("ZEEK_SEED_FILE");
if ( options.random_seed_input_file )
seed_load_file = options.random_seed_input_file->data();
if ( seed_load_file && *seed_load_file && ! seed_string.empty() )
reporter->FatalError("can't use ZEEK_SEED_VALUES together with ZEEK_SEED_FILE or -G");
util::detail::init_random_seed(
(seed_load_file && *seed_load_file ? seed_load_file : nullptr),
options.random_seed_output_file ? options.random_seed_output_file->data() : nullptr,
options.deterministic_mode);
options.deterministic_mode, seed_string);
// DEBUG_MSG("HMAC key: %s\n", md5_digest_print(shared_hmac_md5_key));
init_hash_function();

View file

@ -0,0 +1,12 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
file does not exist
warning: Could not open seed file './does/not-exist': No such file or directory
fatal error: Could not load seeds from file './does/not-exist'.
cannot use ZEEK_SEED_FILE and ZEEK_SEED
fatal error: can't use ZEEK_SEED_VALUES together with ZEEK_SEED_FILE or -G
cannot use -G and ZEEK_SEED
fatal error: can't use ZEEK_SEED_VALUES together with ZEEK_SEED_FILE or -G
wrong format (1)
fatal error: Could not load seeds from string
wrong format (2)
fatal error: Could not load seeds from string

View file

@ -0,0 +1,17 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
test seed
10307, CrFj3eGxkRR5
10307, CrFj3eGxkRR5
1 to 21
3, CS6rj43joLxl
3, CS6rj43joLxl
21 x 0, deterministic
3, CwwXWmUIYdSg
3, CwwXWmUIYdSg
different
39, CBZDI3Gc0Cl7
78, CU9o4PABWw9k
117, CpaF4wTs0sq8
writing seeds (twice)
3, CdYMcd1tcP2
3, CZKuY3Fww4V7

View file

@ -0,0 +1,22 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
0
1
2
3
4
5
6
7
8
9
10
9
8
7
6
5
4
3
2
1
0

View file

@ -0,0 +1,16 @@
# @TEST-DOC: Test specifying the same seeds via ZEEK_SEED
# @TEST-EXEC: seq 21 > random.seed
# @TEST-EXEC: echo "file does not exist" >> output
# @TEST-EXEC-FAIL: ZEEK_SEED_FILE=./does/not-exist zeek -b %INPUT >> output 2>&1
# @TEST-EXEC: echo "cannot use ZEEK_SEED_FILE and ZEEK_SEED" >> output
# @TEST-EXEC-FAIL: ZEEK_SEED_FILE=./does/not-exist ZEEK_SEED_VALUES="0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0" zeek -b %INPUT >> output 2>&1
# @TEST-EXEC: echo "cannot use -G and ZEEK_SEED" >> output
# @TEST-EXEC-FAIL: ZEEK_SEED_FILE= ZEEK_SEED_VALUES="0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0" zeek -G ./random.seed -b %INPUT >> output 2>&1
# @TEST-EXEC: echo "wrong format (1)" >> output
# @TEST-EXEC-FAIL: ZEEK_SEED_FILE= ZEEK_SEED_VALUES="a b c 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0" zeek -b %INPUT >> output 2>&1
# @TEST-EXEC: echo "wrong format (2)" >> output
# @TEST-EXEC-FAIL: ZEEK_SEED_FILE= ZEEK_SEED_VALUES="0 0 0" zeek -b %INPUT >> output 2>&1
# @TEST-EXEC: btest-diff output
print rand(500000), unique_id("C");

View file

@ -0,0 +1,21 @@
# @TEST-DOC: Test specifying seeds via ZEEK_SEED_VALUES
# @TEST-EXEC: echo "test seed" >> output
# @TEST-EXEC: zeek -b %INPUT >> output
# @TEST-EXEC: bash -c 'ZEEK_SEED_VALUES=$(paste -d " " $ZEEK_SEED_FILE) ZEEK_SEED_FILE= zeek -b %INPUT' >> output
# @TEST-EXEC: echo "1 to 21" >> output
# @TEST-EXEC: bash -c 'ZEEK_SEED_FILE= ZEEK_SEED_VALUES=$(echo {1..21}) zeek -b %INPUT ' >> output
# @TEST-EXEC: ZEEK_SEED_FILE= ZEEK_SEED_VALUES="1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21" zeek -b %INPUT >> output
# @TEST-EXEC: echo "21 x 0, deterministic" >> output
# @TEST-EXEC: ZEEK_SEED_FILE= ZEEK_SEED_VALUES="0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0" zeek -b %INPUT >> output
# @TEST-EXEC: ZEEK_SEED_FILE= zeek -D -b %INPUT >> output
# @TEST-EXEC: echo "different" >> output
# @TEST-EXEC: ZEEK_SEED_FILE= ZEEK_SEED_VALUES="10 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1" zeek -b %INPUT >> output
# @TEST-EXEC: ZEEK_SEED_FILE= ZEEK_SEED_VALUES="20 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2" zeek -b %INPUT >> output
# @TEST-EXEC: ZEEK_SEED_FILE= ZEEK_SEED_VALUES="30 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3" zeek -b %INPUT >> output
# @TEST-EXEC: echo "writing seeds (twice)" >> output
# @TEST-EXEC: ZEEK_SEED_FILE= ZEEK_SEED_VALUES="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21" zeek -b %INPUT -H seeds.out >> output
# @TEST-EXEC: ZEEK_SEED_FILE= ZEEK_SEED_VALUES="0 1 2 3 4 5 6 7 8 9 10 9 8 7 6 5 4 3 2 1 0" zeek -b %INPUT -H seeds.out >> output
# @TEST-EXEC: btest-diff output
# @TEST-EXEC: btest-diff seeds.out
print rand(500000), unique_id("C");