diff --git a/CHANGES b/CHANGES index e1e1562c70..f99b1b4065 100644 --- a/CHANGES +++ b/CHANGES @@ -1,4 +1,65 @@ +3.2.0-dev.974 | 2020-07-23 13:02:13 -0700 + + * origin/topic/jsiwek/gh-1076-fix-random: + Deprecate bro_srandom(), replace with zeek::seed_random(). + Add zeek::max_random() & fix misuse of RAND_MAX w/ zeek::random_number() + Deprecate bro_random(), replace with zeek::random_number() + Deprecate bro_prng(), replace with zeek::prng() + GH-1076: Fix bro_srandom() to replace 0 seeds with 1 + GH-1076: Fix bro_prng() implementation + GH-1076: Fix use of getrandom() (Tim Wojtulewicz, Corelight) + + * Deprecate bro_srandom(), replace with zeek::seed_random(). + + Avoiding zeek::srandom() to avoid potential for confusion with srandom() (Jon Siwek, Corelight) + + * Add zeek::max_random() & fix misuse of RAND_MAX w/ zeek::random_number() + + In deterministic mode, RAND_MAX is not related to the result of + zeek::random_number() (formerly bro_random()), but some logic was + using RAND_MAX as indication of the possible range of values. The + new zeek::max_random() will give the correct upper-bound regardless + of whether deterministic-mode is used. (Jon Siwek, Corelight) + + * Deprecate bro_random(), replace with zeek::random_number() + + Avoiding the use of zeek::random() due to potential for confusion + with random(). (Jon Siwek, Corelight) + + * Deprecate bro_prng(), replace with zeek::prng() + + The type used for storing the state of the RNG is changed from + `unsigned int` to `long int` since the former has a minimal range + of [0, 65,535] while the RNG function itself has a range of + [1, 2147483646]. A `long int` must be capable of + [−2147483647, +2147483647] and is also the return type of `random()`, + which is what zeek::prng() aims to roughly parity. (Jon Siwek, Corelight) + + * GH-1076: Fix bro_srandom() to replace 0 seeds with 1 + + The bro_prng() implementation cannot generate 0 as a result since it + causes every subsequent number from the PRNG to also be 0, so use the + number 1 instead of 0. (Jon Siwek, Corelight) + + * GH-1076: Fix bro_prng() implementation + + The intermediate result of the PRNG used unsigned storage, preventing + the ( result < 0 ) branch from ever being evaluated. This could cause + return values to exceed the modulus as well as RAND_MAX. + + One interesting effect of this is potential for the rand() BIF to + return values outside the requested maximum limit. + + Another interesting effect of this is that a PacketFilter may start + randomly dropping packets even if it was not configured for + random-packet-drops. (Jon Siwek, Corelight) + + * GH-1076: Fix use of getrandom() + + The availability and use of getrandom() actually caused unrandom and + deterministic results in terms of Zeek's random number generation. (Jon Siwek, Corelight) + 3.2.0-dev.965 | 2020-07-23 08:31:41 -0700 * GH-1068: Add zeek symlink to allow "zeek/Foo.h" include style diff --git a/VERSION b/VERSION index 4c70a0ee04..3f24308dd6 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.2.0-dev.965 +3.2.0-dev.974 diff --git a/src/Anon.cc b/src/Anon.cc index 98d98a9f99..f8f580c1db 100644 --- a/src/Anon.cc +++ b/src/Anon.cc @@ -20,7 +20,7 @@ AnonymizeIPAddr* zeek::detail::ip_anonymizer[NUM_ADDR_ANONYMIZATION_METHODS] = { static uint32_t rand32() { - return ((bro_random() & 0xffff) << 16) | (bro_random() & 0xffff); + return ((zeek::random_number() & 0xffff) << 16) | (zeek::random_number() & 0xffff); } // From tcpdpriv. diff --git a/src/Net.cc b/src/Net.cc index 97940a792d..eaffe0b749 100644 --- a/src/Net.cc +++ b/src/Net.cc @@ -248,7 +248,7 @@ void net_packet_dispatch(double t, const Packet* pkt, iosource::PktSrc* src_ps) if ( load_freq == 0 ) load_freq = uint32_t(0xffffffff) / uint32_t(load_sample_freq); - if ( uint32_t(bro_random() & 0xffffffff) < load_freq ) + if ( uint32_t(zeek::random_number() & 0xffffffff) < load_freq ) { // Drain the queued timer events so they're not // charged against this sample. diff --git a/src/PacketFilter.cc b/src/PacketFilter.cc index 61afbe0afd..c1c496235d 100644 --- a/src/PacketFilter.cc +++ b/src/PacketFilter.cc @@ -18,7 +18,7 @@ void PacketFilter::AddSrc(const IPAddr& src, uint32_t tcp_flags, double probabil { Filter* f = new Filter; f->tcp_flags = tcp_flags; - f->probability = uint32_t(probability * RAND_MAX); + f->probability = probability * static_cast(zeek::max_random()); auto prev = static_cast(src_filter.Insert(src, 128, f)); delete prev; } @@ -27,7 +27,7 @@ void PacketFilter::AddSrc(zeek::Val* src, uint32_t tcp_flags, double probability { Filter* f = new Filter; f->tcp_flags = tcp_flags; - f->probability = uint32_t(probability * RAND_MAX); + f->probability = probability * static_cast(zeek::max_random()); auto prev = static_cast(src_filter.Insert(src, f)); delete prev; } @@ -36,7 +36,7 @@ void PacketFilter::AddDst(const IPAddr& dst, uint32_t tcp_flags, double probabil { Filter* f = new Filter; f->tcp_flags = tcp_flags; - f->probability = uint32_t(probability * RAND_MAX); + f->probability = probability * static_cast(zeek::max_random()); auto prev = static_cast(dst_filter.Insert(dst, 128, f)); delete prev; } @@ -45,7 +45,7 @@ void PacketFilter::AddDst(zeek::Val* dst, uint32_t tcp_flags, double probability { Filter* f = new Filter; f->tcp_flags = tcp_flags; - f->probability = uint32_t(probability * RAND_MAX); + f->probability = probability * static_cast(zeek::max_random()); auto prev = static_cast(dst_filter.Insert(dst, f)); delete prev; } @@ -113,5 +113,5 @@ bool PacketFilter::MatchFilter(const Filter& f, const IP_Hdr& ip, return false; } - return uint32_t(bro_random()) < f.probability; + return zeek::random_number() < f.probability; } diff --git a/src/PacketFilter.h b/src/PacketFilter.h index a485b604e8..c450120ee4 100644 --- a/src/PacketFilter.h +++ b/src/PacketFilter.h @@ -34,7 +34,7 @@ public: private: struct Filter { uint32_t tcp_flags; - uint32_t probability; + double probability; }; static void DeleteFilter(void* data); diff --git a/src/probabilistic/Hasher.cc b/src/probabilistic/Hasher.cc index 835cb7e5d7..cfe5f3244b 100644 --- a/src/probabilistic/Hasher.cc +++ b/src/probabilistic/Hasher.cc @@ -119,7 +119,7 @@ DefaultHasher::DefaultHasher(size_t k, Hasher::seed_t seed) for ( size_t i = 1; i <= k; ++i ) { seed_t s = Seed(); - s.h[0] += bro_prng(i); + s.h[0] += zeek::prng(i); hash_functions.push_back(UHF(s)); } } @@ -149,7 +149,7 @@ bool DefaultHasher::Equals(const Hasher* other) const } DoubleHasher::DoubleHasher(size_t k, seed_t seed) - : Hasher(k, seed), h1(seed + bro_prng(1)), h2(seed + bro_prng(2)) + : Hasher(k, seed), h1(seed + zeek::prng(1)), h2(seed + zeek::prng(2)) { } diff --git a/src/util.cc b/src/util.cc index 80af723c72..61fe2df55d 100644 --- a/src/util.cc +++ b/src/util.cc @@ -1066,26 +1066,31 @@ static bool write_random_seeds(const char* write_file, uint32_t seed, } static bool bro_rand_determistic = false; -static unsigned int bro_rand_state = 0; +static long int bro_rand_state = 0; static bool first_seed_saved = false; static unsigned int first_seed = 0; static void bro_srandom(unsigned int seed, bool deterministic) { - bro_rand_state = seed; + bro_rand_state = seed == 0 ? 1 : seed; bro_rand_determistic = deterministic; srandom(seed); } -void bro_srandom(unsigned int seed) +void zeek::seed_random(unsigned int seed) { if ( bro_rand_determistic ) - bro_rand_state = seed; + bro_rand_state = seed == 0 ? 1 : seed; else srandom(seed); } +void bro_srandom(unsigned int seed) + { + zeek::seed_random(seed); + } + void init_random_seed(const char* read_file, const char* write_file, bool use_empty_seeds) { @@ -1105,16 +1110,15 @@ void init_random_seed(const char* read_file, const char* write_file, else if ( use_empty_seeds ) seeds_done = true; + if ( ! seeds_done ) + { #ifdef HAVE_GETRANDOM - if ( ! seeds_done ) - { - ssize_t nbytes = getrandom(buf.data(), sizeof(buf), 0); - seeds_done = nbytes == ssize_t(sizeof(buf)); - } -#endif - - if ( ! seeds_done ) - { + // getrandom() guarantees reads up to 256 bytes are always successful, + assert(sizeof(buf) < 256); + auto nbytes = getrandom(buf.data(), sizeof(buf), 0); + assert(nbytes == sizeof(buf)); + pos += nbytes / sizeof(uint32_t); +#else // Gather up some entropy. gettimeofday((struct timeval *)(buf.data() + pos), 0); pos += sizeof(struct timeval) / sizeof(uint32_t); @@ -1141,9 +1145,10 @@ void init_random_seed(const char* read_file, const char* write_file, // systems due to a lack of entropy. errno = 0; } +#endif if ( pos < KeyedHash::SEED_INIT_SIZE ) - reporter->FatalError("Could not read enough random data from /dev/urandom. Wanted %d, got %lu", KeyedHash::SEED_INIT_SIZE, pos); + reporter->FatalError("Could not read enough random data. Wanted %d, got %lu", KeyedHash::SEED_INIT_SIZE, pos); if ( ! seed ) { @@ -1183,33 +1188,57 @@ bool have_random_seed() return bro_rand_determistic; } -unsigned int bro_prng(unsigned int state) +constexpr uint32_t zeek_prng_mod = 2147483647; +constexpr uint32_t zeek_prng_max = zeek_prng_mod - 1; + +long int zeek::max_random() { - // Use our own simple linear congruence PRNG to make sure we are - // predictable across platforms. - static const long int m = 2147483647; - static const long int a = 16807; - const long int q = m / a; - const long int r = m % a; - - state = a * ( state % q ) - r * ( state / q ); - - if ( state <= 0 ) - state += m; - - return state; + return bro_rand_determistic ? zeek_prng_max : RAND_MAX; } -long int bro_random() +long int zeek::prng(long int state) + { + // Use our own simple linear congruence PRNG to make sure we are + // predictable across platforms. (Lehmer RNG, Schrage's method) + // Note: the choice of "long int" storage type for the state is mostly + // for parity with the possible return values of random(). + constexpr uint32_t m = zeek_prng_mod; + constexpr uint32_t a = 16807; + constexpr uint32_t q = m / a; + constexpr uint32_t r = m % a; + + uint32_t rem = state % q; + uint32_t div = state / q; + int32_t s = a * rem; + int32_t t = r * div; + int32_t res = s - t; + + if ( res < 0 ) + res += m; + + return res; + } + +unsigned int bro_prng(unsigned int state) + { + return zeek::prng(state); + } + +long int zeek::random_number() { if ( ! bro_rand_determistic ) return random(); // Use system PRNG. - bro_rand_state = bro_prng(bro_rand_state); + bro_rand_state = zeek::prng(bro_rand_state); return bro_rand_state; } +long int bro_random() + { + return zeek::random_number(); + } + // Returns a 64-bit random string. uint64_t rand64bit() { @@ -1217,7 +1246,7 @@ uint64_t rand64bit() int i; for ( i = 1; i <= 4; ++i ) - base = (base<<16) | bro_random(); + base = (base<<16) | zeek::random_number(); return base; } @@ -2087,7 +2116,7 @@ uint64_t calculate_unique_id(size_t pool) gettimeofday(&unique.time, 0); unique.pool = (uint64_t) pool; unique.pid = getpid(); - unique.rnd = bro_random(); + unique.rnd = static_cast(zeek::random_number()); uid_instance = HashKey::HashBytes(&unique, sizeof(unique)); ++uid_instance; // Now it's larger than zero. diff --git a/src/util.h b/src/util.h index 3bfee23e6e..d848a43078 100644 --- a/src/util.h +++ b/src/util.h @@ -202,7 +202,7 @@ extern std::string strstrip(std::string s); extern void hmac_md5(size_t size, const unsigned char* bytes, unsigned char digest[16]); -// Initializes RNGs for bro_random() and MD5 usage. If load_file is given, +// Initializes RNGs for zeek::random_number() and MD5 usage. If load_file is given, // the seeds (both random & MD5) are loaded from that file. This takes // precedence over the "use_empty_seeds" argument, which just // zero-initializes all seed values. If write_file is given, the seeds are @@ -220,14 +220,17 @@ extern bool have_random_seed(); // A simple linear congruence PRNG. It takes its state as argument and // returns a new random value, which can serve as state for subsequent calls. +[[deprecated("Remove in v4.1. Use zeek::prng()")]] unsigned int bro_prng(unsigned int state); // Replacement for the system random(), to which is normally falls back // except when a seed has been given. In that case, the function bro_prng. +[[deprecated("Remove in v4.1. Use zeek::random_number()")]] long int bro_random(); // Calls the system srandom() function with the given seed if not running // in deterministic mode, else it updates the state of the deterministic PRNG. +[[deprecated("Remove in v4.1. Use zeek::seed_random()")]] void bro_srandom(unsigned int seed); extern uint64_t rand64bit(); @@ -588,4 +591,36 @@ namespace zeek { */ void set_thread_name(const char* name, pthread_t tid = pthread_self()); +/** + * A platform-independent PRNG implementation. Note that this is not + * necessarily a "statistically sound" implementation as the main purpose is + * not for production use, but rather for regression testing. + * @param state The value used to generate the next random number. + * @return A new random value generated from *state* and that can passed + * back into subsequent calls to generate further random numbers. + */ +long int prng(long int state); + +/** + * Wrapper for system random() in the default case, but when running in + * deterministic mode, uses the platform-independent zeek::prng() + * to obtain consistent results since implementations of rand() may vary. + * @return A value in the range [0, zeek::max_random()]. + */ +long int random_number(); + +/** + * @return The maximum value that can be returned from zeek::random_number(). + * When not using deterministic-mode, this is always equivalent to RAND_MAX. + */ +long int max_random(); + +/** + * Wrapper for system srandom() in the default case, but when running in + * deterministic mode, updates the state used for calling zeek::prng() + * inside of zeek::random_number(). + * @param seed Value to use for initializing the PRNG. + */ +void seed_random(unsigned int seed); + } // namespace zeek diff --git a/src/zeek.bif b/src/zeek.bif index f1af07ea36..0de446fbbc 100644 --- a/src/zeek.bif +++ b/src/zeek.bif @@ -930,7 +930,7 @@ function hrw_weight%(key_digest: count, site_id: count%): count ## provided by the OS. function rand%(max: count%): count %{ - auto result = bro_uint_t(double(max) * double(bro_random()) / (RAND_MAX + 1.0)); + auto result = bro_uint_t(double(max) * double(zeek::random_number()) / (zeek::max_random() + 1.0)); return zeek::val_mgr->Count(result); %} @@ -946,7 +946,7 @@ function rand%(max: count%): count ## provided by the OS. function srand%(seed: count%): any %{ - bro_srandom(seed); + zeek::seed_random(seed); return nullptr; %} diff --git a/testing/btest/Baseline/bifs.rand/out.3 b/testing/btest/Baseline/bifs.rand/out.3 new file mode 100644 index 0000000000..cf84443e49 --- /dev/null +++ b/testing/btest/Baseline/bifs.rand/out.3 @@ -0,0 +1 @@ +F diff --git a/testing/btest/Baseline/bifs.rand/out.4 b/testing/btest/Baseline/bifs.rand/out.4 new file mode 100644 index 0000000000..a9a4ef9ac4 --- /dev/null +++ b/testing/btest/Baseline/bifs.rand/out.4 @@ -0,0 +1,6 @@ +0 +131 +755 +4 +634 +473 diff --git a/testing/btest/Baseline/scripts.base.frameworks.netcontrol.packetfilter/conn.log b/testing/btest/Baseline/scripts.base.frameworks.netcontrol.packetfilter/conn.log index 336c8ab55f..e6a6ef559d 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.netcontrol.packetfilter/conn.log +++ b/testing/btest/Baseline/scripts.base.frameworks.netcontrol.packetfilter/conn.log @@ -3,13 +3,12 @@ #empty_field (empty) #unset_field - #path conn -#open 2019-07-31-22-25-32 +#open 2020-07-22-05-02-04 #fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p proto service duration orig_bytes resp_bytes conn_state local_orig local_resp missed_bytes history orig_pkts orig_ip_bytes resp_pkts resp_ip_bytes tunnel_parents #types time string addr port addr port enum string interval count count string bool bool count string count count count count set[string] 1254722767.492060 CHhAvVGS1DHFjwGM9 10.10.1.4 56166 10.10.1.1 53 udp dns 0.034025 34 100 SF - - 0 Dd 1 62 1 128 - 1254722776.690444 C4J4Th3PJpwUYZZ6gc 10.10.1.20 138 10.10.1.255 138 udp - - - - S0 - - 0 D 1 229 0 0 - 1254722767.529046 ClEkJM2Vm5giqnMf4h 10.10.1.4 1470 74.53.140.153 25 tcp - 0.346950 0 0 S1 - - 0 Sh 1 48 1 48 - 1437831776.764391 CtPZjS20MLrsMUOJi2 192.168.133.100 49285 66.196.121.26 5050 tcp - 0.343008 41 0 OTH - - 0 Da 1 93 1 52 - -1437831798.533765 CmES5u32sYpV7JYN 192.168.133.100 49336 74.125.71.189 443 tcp - - - - OTH - - 0 A 1 52 0 0 - -1437831787.856895 CUM0KZ3MLUfNB0cl11 192.168.133.100 49648 192.168.133.102 25 tcp - 0.048043 162 154 S1 - - 154 ShDgA 3 192 1 60 - -#close 2019-07-31-22-25-32 +1437831787.856895 CUM0KZ3MLUfNB0cl11 192.168.133.100 49648 192.168.133.102 25 tcp - 0.004707 0 0 S1 - - 0 Sh 1 64 1 60 - +#close 2020-07-22-05-02-04 diff --git a/testing/btest/bifs/rand.zeek b/testing/btest/bifs/rand.zeek index b4b0facabc..c182ef6a47 100644 --- a/testing/btest/bifs/rand.zeek +++ b/testing/btest/bifs/rand.zeek @@ -1,13 +1,22 @@ # # @TEST-EXEC: zeek -b %INPUT >out # @TEST-EXEC: zeek -b %INPUT do_seed=F >out.2 +# @TEST-EXEC: unset ZEEK_SEED_FILE && zeek -b %INPUT real_random=T >out.3 +# @TEST-EXEC: for i in $(seq 21); do echo 0 >>random-zero.seed; done +# @TEST-EXEC: ZEEK_SEED_FILE=random-zero.seed zeek -b %INPUT >out.4 # @TEST-EXEC: btest-diff out # @TEST-EXEC: btest-diff out.2 +# @TEST-EXEC: btest-diff out.3 +# @TEST-EXEC: btest-diff out.4 const do_seed = T &redef; +const real_random = F &redef; event zeek_init() { + if ( real_random ) + return; + local a = rand(1000); local b = rand(1000); local c = rand(1000); @@ -26,4 +35,53 @@ event zeek_init() print d; print e; print f; + + local i = 0; + local max = 3; + + while ( i < 100 ) + { + local rn = rand(max); + + if ( rn >= max ) + print fmt("ERROR: rand returned value greater than %s: %s", + max, rn); + + i += 1; + } + } + +event zeek_init() &priority=-10 + { + if ( ! real_random ) + return; + + local v1: vector of count = vector(); + local v2: vector of count = vector(); + local i = 0; + + while ( i < 20 ) + { + v1 += rand(65535); + i += 1; + } + + i = 0; + + while ( i < 20 ) + { + v2 += rand(65535); + i += 1; + } + + # Note: this is expected to be F with high probability, but + # technically could all be the same because, well, that's a + # valid "random" sequence, too + print all_set(v1 == v2); + + if ( all_set(v1 == v2) ) + { + print v1; + print v2; + } } diff --git a/testing/btest/plugins/reader-plugin/src/Foo.cc b/testing/btest/plugins/reader-plugin/src/Foo.cc index 4a05420bc3..a7e5021932 100644 --- a/testing/btest/plugins/reader-plugin/src/Foo.cc +++ b/testing/btest/plugins/reader-plugin/src/Foo.cc @@ -45,10 +45,10 @@ std::string Foo::RandomString(const int len) "abcdefghijklmnopqrstuvwxyz"; for (int i = 0; i < len; ++i) - // bro_random is not thread-safe; as we are only using one simultaneous thread + // zeek::random_number() is not thread-safe; as we are only using one simultaneous thread // here, this should not matter in this case. If this test ever starts showing // random errors, this might be the culprit. - s[i] = values[bro_random() / (RAND_MAX / sizeof(values))]; + s[i] = values[zeek::random_number() / (zeek::max_random() / sizeof(values))]; return s; }