diff --git a/CHANGES b/CHANGES index 6178a200ad..5347a360c0 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,15 @@ +6.0.0-dev.570 | 2023-05-12 23:29:09 +0200 + + * Introduce ZEEK_SEED_VALUES environment variable (Arne Welzel, Corelight) + + For "individually different but deterministic" runs specifying Zeek's + seed as an environment variable eases setups as one can avoid creating + extra seed files for each of the individual processes. + + It is an error to specify the new ZEEK_SEED_VALUES variable together + with the existing ZEEK_SEED_FILE and -G. ZEEK_SEED takes precedence over + deterministic mode (-D) like ZEEK_SEED_FILE does today already. + 6.0.0-dev.568 | 2023-05-12 13:41:55 -0700 * enhancements for event-tracing: (Vern Paxson, Corelight) diff --git a/NEWS b/NEWS index 1a24050c84..39e8c95985 100644 --- a/NEWS +++ b/NEWS @@ -253,6 +253,10 @@ New Functionality - New bifs for ``ceil()`` and ``log2()`` have been added. +- Seeds for deterministic processing can now also be set through a new environment + variable called ``ZEEK_SEED_VALUES``. The format is expected to contain 21 + positive numbers separated by spaces. + Changed Functionality --------------------- diff --git a/VERSION b/VERSION index 8724cac6da..fcb6d0bb1c 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -6.0.0-dev.568 +6.0.0-dev.570 diff --git a/src/Options.cc b/src/Options.cc index d937957f90..ac4da88099 100644 --- a/src/Options.cc +++ b/src/Options.cc @@ -166,6 +166,8 @@ void usage(const char* prog, int code) util::zeek_prefixes().c_str()); fprintf(stderr, " $ZEEK_DNS_FAKE | disable DNS lookups (%s)\n", fake_dns() ? "on" : "off"); + fprintf(stderr, " $ZEEK_SEED_VALUES | list of space separated seeds (%s)\n", + getenv("ZEEK_SEED_VALUES") ? "set" : "not set"); fprintf(stderr, " $ZEEK_SEED_FILE | file to load seeds from (not set)\n"); fprintf(stderr, " $ZEEK_LOG_SUFFIX | ASCII log file extension (.%s)\n", logging::writer::detail::Ascii::LogExt().c_str()); diff --git a/src/util.cc b/src/util.cc index f13bd3048a..1e4abcf222 100644 --- a/src/util.cc +++ b/src/util.cc @@ -25,6 +25,7 @@ #include #include #include +#include #if defined(HAVE_MALLINFO) || defined(HAVE_MALLINFO2) #include @@ -383,6 +384,23 @@ static bool write_random_seeds(const char* write_file, uint32_t seed, return true; } +// Same as read_random_seeds() but takes seeds from a space separated string instead. +static bool fill_random_seeds(const std::string& seed_string, uint32_t* seed, + std::array& buf) + { + stringstream ss{seed_string}; + if ( ! (ss >> *seed) ) + return false; + + for ( auto& v : buf ) + { + if ( ! (ss >> v) ) + return false; + } + + return true; + } + static bool zeek_rand_deterministic = false; static long int zeek_rand_state = 0; static bool first_seed_saved = false; @@ -404,7 +422,8 @@ void seed_random(unsigned int seed) srandom(seed); } -void init_random_seed(const char* read_file, const char* write_file, bool use_empty_seeds) +void init_random_seed(const char* read_file, const char* write_file, bool use_empty_seeds, + const std::string& seed_string) { std::array buf = {}; size_t pos = 0; // accumulates entropy @@ -414,7 +433,14 @@ void init_random_seed(const char* read_file, const char* write_file, bool use_em if ( read_file ) { if ( ! read_random_seeds(read_file, &seed, buf) ) - reporter->FatalError("Could not load seeds from file '%s'.\n", read_file); + reporter->FatalError("Could not load seeds from file '%s'.", read_file); + else + seeds_done = true; + } + else if ( ! seed_string.empty() ) + { + if ( ! fill_random_seeds(seed_string, &seed, buf) ) + reporter->FatalError("Could not load seeds from string"); else seeds_done = true; } diff --git a/src/util.h b/src/util.h index 09ebb0eca7..d08a819488 100644 --- a/src/util.h +++ b/src/util.h @@ -138,12 +138,15 @@ extern bool ensure_dir(const char* dirname); extern void hmac_md5(size_t size, const unsigned char* bytes, unsigned char digest[16]); -// Initializes RNGs for zeek::random_number() and MD5 usage. If load_file is given, -// the seeds (both random & MD5) are loaded from that file. This takes -// precedence over the "use_empty_seeds" argument, which just -// zero-initializes all seed values. If write_file is given, the seeds are +// Initializes RNGs for zeek::random_number() and hmac-md5/siphash/highwayhash usage. +// If load_file is given, the seeds (both random & hashes) are loaded from that file. This +// takes precedence over the "seed_string and "use_empty_seeds" arguments. The content of +// "seed_string" is used as seeds if not empty next. Otherwise, when "use_empty_seeds" is +// set it zero-initializes all seed values. If neither of these provides initial seed values, +// platform specific random data is used as seeds. If write_file is given, the seeds are // written to that file. -extern void init_random_seed(const char* load_file, const char* write_file, bool use_empty_seeds); +extern void init_random_seed(const char* load_file, const char* write_file, bool use_empty_seeds, + const std::string& seed_string = {}); // Retrieves the initial seed computed after the very first call to // init_random_seed(). Repeated calls to init_random_seed() will not affect diff --git a/src/zeek-setup.cc b/src/zeek-setup.cc index 768d4b7e7e..5eb981a2a1 100644 --- a/src/zeek-setup.cc +++ b/src/zeek-setup.cc @@ -629,15 +629,22 @@ SetupResult setup(int argc, char** argv, Options* zopts) supervisor_mgr = new Supervisor(std::move(cfg), std::move(*stem)); } + std::string seed_string; + if ( const auto* seed_env = getenv("ZEEK_SEED_VALUES") ) + seed_string = seed_env; + const char* seed_load_file = getenv("ZEEK_SEED_FILE"); if ( options.random_seed_input_file ) seed_load_file = options.random_seed_input_file->data(); + if ( seed_load_file && *seed_load_file && ! seed_string.empty() ) + reporter->FatalError("can't use ZEEK_SEED_VALUES together with ZEEK_SEED_FILE or -G"); + util::detail::init_random_seed( (seed_load_file && *seed_load_file ? seed_load_file : nullptr), options.random_seed_output_file ? options.random_seed_output_file->data() : nullptr, - options.deterministic_mode); + options.deterministic_mode, seed_string); // DEBUG_MSG("HMAC key: %s\n", md5_digest_print(shared_hmac_md5_key)); init_hash_function(); diff --git a/testing/btest/Baseline/core.seed-errors/output b/testing/btest/Baseline/core.seed-errors/output new file mode 100644 index 0000000000..6792ee320f --- /dev/null +++ b/testing/btest/Baseline/core.seed-errors/output @@ -0,0 +1,12 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +file does not exist +warning: Could not open seed file './does/not-exist': No such file or directory +fatal error: Could not load seeds from file './does/not-exist'. +cannot use ZEEK_SEED_FILE and ZEEK_SEED +fatal error: can't use ZEEK_SEED_VALUES together with ZEEK_SEED_FILE or -G +cannot use -G and ZEEK_SEED +fatal error: can't use ZEEK_SEED_VALUES together with ZEEK_SEED_FILE or -G +wrong format (1) +fatal error: Could not load seeds from string +wrong format (2) +fatal error: Could not load seeds from string diff --git a/testing/btest/Baseline/core.seed/output b/testing/btest/Baseline/core.seed/output new file mode 100644 index 0000000000..3c314654c4 --- /dev/null +++ b/testing/btest/Baseline/core.seed/output @@ -0,0 +1,17 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +test seed +10307, CrFj3eGxkRR5 +10307, CrFj3eGxkRR5 +1 to 21 +3, CS6rj43joLxl +3, CS6rj43joLxl +21 x 0, deterministic +3, CwwXWmUIYdSg +3, CwwXWmUIYdSg +different +39, CBZDI3Gc0Cl7 +78, CU9o4PABWw9k +117, CpaF4wTs0sq8 +writing seeds (twice) +3, CdYMcd1tcP2 +3, CZKuY3Fww4V7 diff --git a/testing/btest/Baseline/core.seed/seeds.out b/testing/btest/Baseline/core.seed/seeds.out new file mode 100644 index 0000000000..6eac81bd48 --- /dev/null +++ b/testing/btest/Baseline/core.seed/seeds.out @@ -0,0 +1,22 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +9 +8 +7 +6 +5 +4 +3 +2 +1 +0 diff --git a/testing/btest/core/seed-errors.zeek b/testing/btest/core/seed-errors.zeek new file mode 100644 index 0000000000..da7538584b --- /dev/null +++ b/testing/btest/core/seed-errors.zeek @@ -0,0 +1,16 @@ +# @TEST-DOC: Test specifying the same seeds via ZEEK_SEED +# @TEST-EXEC: seq 21 > random.seed +# @TEST-EXEC: echo "file does not exist" >> output +# @TEST-EXEC-FAIL: ZEEK_SEED_FILE=./does/not-exist zeek -b %INPUT >> output 2>&1 +# @TEST-EXEC: echo "cannot use ZEEK_SEED_FILE and ZEEK_SEED" >> output +# @TEST-EXEC-FAIL: ZEEK_SEED_FILE=./does/not-exist ZEEK_SEED_VALUES="0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0" zeek -b %INPUT >> output 2>&1 +# @TEST-EXEC: echo "cannot use -G and ZEEK_SEED" >> output +# @TEST-EXEC-FAIL: ZEEK_SEED_FILE= ZEEK_SEED_VALUES="0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0" zeek -G ./random.seed -b %INPUT >> output 2>&1 +# @TEST-EXEC: echo "wrong format (1)" >> output +# @TEST-EXEC-FAIL: ZEEK_SEED_FILE= ZEEK_SEED_VALUES="a b c 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0" zeek -b %INPUT >> output 2>&1 +# @TEST-EXEC: echo "wrong format (2)" >> output +# @TEST-EXEC-FAIL: ZEEK_SEED_FILE= ZEEK_SEED_VALUES="0 0 0" zeek -b %INPUT >> output 2>&1 + +# @TEST-EXEC: btest-diff output + +print rand(500000), unique_id("C"); diff --git a/testing/btest/core/seed.zeek b/testing/btest/core/seed.zeek new file mode 100644 index 0000000000..57804927fc --- /dev/null +++ b/testing/btest/core/seed.zeek @@ -0,0 +1,21 @@ +# @TEST-DOC: Test specifying seeds via ZEEK_SEED_VALUES +# @TEST-EXEC: echo "test seed" >> output +# @TEST-EXEC: zeek -b %INPUT >> output +# @TEST-EXEC: bash -c 'ZEEK_SEED_VALUES=$(paste -d " " $ZEEK_SEED_FILE) ZEEK_SEED_FILE= zeek -b %INPUT' >> output +# @TEST-EXEC: echo "1 to 21" >> output +# @TEST-EXEC: bash -c 'ZEEK_SEED_FILE= ZEEK_SEED_VALUES=$(echo {1..21}) zeek -b %INPUT ' >> output +# @TEST-EXEC: ZEEK_SEED_FILE= ZEEK_SEED_VALUES="1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21" zeek -b %INPUT >> output +# @TEST-EXEC: echo "21 x 0, deterministic" >> output +# @TEST-EXEC: ZEEK_SEED_FILE= ZEEK_SEED_VALUES="0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0" zeek -b %INPUT >> output +# @TEST-EXEC: ZEEK_SEED_FILE= zeek -D -b %INPUT >> output +# @TEST-EXEC: echo "different" >> output +# @TEST-EXEC: ZEEK_SEED_FILE= ZEEK_SEED_VALUES="10 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1" zeek -b %INPUT >> output +# @TEST-EXEC: ZEEK_SEED_FILE= ZEEK_SEED_VALUES="20 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2" zeek -b %INPUT >> output +# @TEST-EXEC: ZEEK_SEED_FILE= ZEEK_SEED_VALUES="30 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3" zeek -b %INPUT >> output +# @TEST-EXEC: echo "writing seeds (twice)" >> output +# @TEST-EXEC: ZEEK_SEED_FILE= ZEEK_SEED_VALUES="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21" zeek -b %INPUT -H seeds.out >> output +# @TEST-EXEC: ZEEK_SEED_FILE= ZEEK_SEED_VALUES="0 1 2 3 4 5 6 7 8 9 10 9 8 7 6 5 4 3 2 1 0" zeek -b %INPUT -H seeds.out >> output +# @TEST-EXEC: btest-diff output +# @TEST-EXEC: btest-diff seeds.out + +print rand(500000), unique_id("C");