Merge remote-tracking branch 'origin/topic/jsiwek/gh-1076-fix-random'

* origin/topic/jsiwek/gh-1076-fix-random:
  Deprecate bro_srandom(), replace with zeek::seed_random().
  Add zeek::max_random() & fix misuse of RAND_MAX w/ zeek::random_number()
  Deprecate bro_random(), replace with zeek::random_number()
  Deprecate bro_prng(), replace with zeek::prng()
  GH-1076: Fix bro_srandom() to replace 0 seeds with 1
  GH-1076: Fix bro_prng() implementation
  GH-1076: Fix use of getrandom()
This commit is contained in:
Tim Wojtulewicz 2020-07-23 13:02:13 -07:00
commit ebc073ba92
15 changed files with 241 additions and 52 deletions

61
CHANGES
View file

@ -1,4 +1,65 @@
3.2.0-dev.974 | 2020-07-23 13:02:13 -0700
* origin/topic/jsiwek/gh-1076-fix-random:
Deprecate bro_srandom(), replace with zeek::seed_random().
Add zeek::max_random() & fix misuse of RAND_MAX w/ zeek::random_number()
Deprecate bro_random(), replace with zeek::random_number()
Deprecate bro_prng(), replace with zeek::prng()
GH-1076: Fix bro_srandom() to replace 0 seeds with 1
GH-1076: Fix bro_prng() implementation
GH-1076: Fix use of getrandom() (Tim Wojtulewicz, Corelight)
* Deprecate bro_srandom(), replace with zeek::seed_random().
Avoiding zeek::srandom() to avoid potential for confusion with srandom() (Jon Siwek, Corelight)
* Add zeek::max_random() & fix misuse of RAND_MAX w/ zeek::random_number()
In deterministic mode, RAND_MAX is not related to the result of
zeek::random_number() (formerly bro_random()), but some logic was
using RAND_MAX as indication of the possible range of values. The
new zeek::max_random() will give the correct upper-bound regardless
of whether deterministic-mode is used. (Jon Siwek, Corelight)
* Deprecate bro_random(), replace with zeek::random_number()
Avoiding the use of zeek::random() due to potential for confusion
with random(). (Jon Siwek, Corelight)
* Deprecate bro_prng(), replace with zeek::prng()
The type used for storing the state of the RNG is changed from
`unsigned int` to `long int` since the former has a minimal range
of [0, 65,535] while the RNG function itself has a range of
[1, 2147483646]. A `long int` must be capable of
[2147483647, +2147483647] and is also the return type of `random()`,
which is what zeek::prng() aims to roughly parity. (Jon Siwek, Corelight)
* GH-1076: Fix bro_srandom() to replace 0 seeds with 1
The bro_prng() implementation cannot generate 0 as a result since it
causes every subsequent number from the PRNG to also be 0, so use the
number 1 instead of 0. (Jon Siwek, Corelight)
* GH-1076: Fix bro_prng() implementation
The intermediate result of the PRNG used unsigned storage, preventing
the ( result < 0 ) branch from ever being evaluated. This could cause
return values to exceed the modulus as well as RAND_MAX.
One interesting effect of this is potential for the rand() BIF to
return values outside the requested maximum limit.
Another interesting effect of this is that a PacketFilter may start
randomly dropping packets even if it was not configured for
random-packet-drops. (Jon Siwek, Corelight)
* GH-1076: Fix use of getrandom()
The availability and use of getrandom() actually caused unrandom and
deterministic results in terms of Zeek's random number generation. (Jon Siwek, Corelight)
3.2.0-dev.965 | 2020-07-23 08:31:41 -0700
* GH-1068: Add zeek symlink to allow "zeek/Foo.h" include style

View file

@ -1 +1 @@
3.2.0-dev.965
3.2.0-dev.974

View file

@ -20,7 +20,7 @@ AnonymizeIPAddr* zeek::detail::ip_anonymizer[NUM_ADDR_ANONYMIZATION_METHODS] = {
static uint32_t rand32()
{
return ((bro_random() & 0xffff) << 16) | (bro_random() & 0xffff);
return ((zeek::random_number() & 0xffff) << 16) | (zeek::random_number() & 0xffff);
}
// From tcpdpriv.

View file

@ -248,7 +248,7 @@ void net_packet_dispatch(double t, const Packet* pkt, iosource::PktSrc* src_ps)
if ( load_freq == 0 )
load_freq = uint32_t(0xffffffff) / uint32_t(load_sample_freq);
if ( uint32_t(bro_random() & 0xffffffff) < load_freq )
if ( uint32_t(zeek::random_number() & 0xffffffff) < load_freq )
{
// Drain the queued timer events so they're not
// charged against this sample.

View file

@ -18,7 +18,7 @@ void PacketFilter::AddSrc(const IPAddr& src, uint32_t tcp_flags, double probabil
{
Filter* f = new Filter;
f->tcp_flags = tcp_flags;
f->probability = uint32_t(probability * RAND_MAX);
f->probability = probability * static_cast<double>(zeek::max_random());
auto prev = static_cast<Filter*>(src_filter.Insert(src, 128, f));
delete prev;
}
@ -27,7 +27,7 @@ void PacketFilter::AddSrc(zeek::Val* src, uint32_t tcp_flags, double probability
{
Filter* f = new Filter;
f->tcp_flags = tcp_flags;
f->probability = uint32_t(probability * RAND_MAX);
f->probability = probability * static_cast<double>(zeek::max_random());
auto prev = static_cast<Filter*>(src_filter.Insert(src, f));
delete prev;
}
@ -36,7 +36,7 @@ void PacketFilter::AddDst(const IPAddr& dst, uint32_t tcp_flags, double probabil
{
Filter* f = new Filter;
f->tcp_flags = tcp_flags;
f->probability = uint32_t(probability * RAND_MAX);
f->probability = probability * static_cast<double>(zeek::max_random());
auto prev = static_cast<Filter*>(dst_filter.Insert(dst, 128, f));
delete prev;
}
@ -45,7 +45,7 @@ void PacketFilter::AddDst(zeek::Val* dst, uint32_t tcp_flags, double probability
{
Filter* f = new Filter;
f->tcp_flags = tcp_flags;
f->probability = uint32_t(probability * RAND_MAX);
f->probability = probability * static_cast<double>(zeek::max_random());
auto prev = static_cast<Filter*>(dst_filter.Insert(dst, f));
delete prev;
}
@ -113,5 +113,5 @@ bool PacketFilter::MatchFilter(const Filter& f, const IP_Hdr& ip,
return false;
}
return uint32_t(bro_random()) < f.probability;
return zeek::random_number() < f.probability;
}

View file

@ -34,7 +34,7 @@ public:
private:
struct Filter {
uint32_t tcp_flags;
uint32_t probability;
double probability;
};
static void DeleteFilter(void* data);

View file

@ -119,7 +119,7 @@ DefaultHasher::DefaultHasher(size_t k, Hasher::seed_t seed)
for ( size_t i = 1; i <= k; ++i )
{
seed_t s = Seed();
s.h[0] += bro_prng(i);
s.h[0] += zeek::prng(i);
hash_functions.push_back(UHF(s));
}
}
@ -149,7 +149,7 @@ bool DefaultHasher::Equals(const Hasher* other) const
}
DoubleHasher::DoubleHasher(size_t k, seed_t seed)
: Hasher(k, seed), h1(seed + bro_prng(1)), h2(seed + bro_prng(2))
: Hasher(k, seed), h1(seed + zeek::prng(1)), h2(seed + zeek::prng(2))
{
}

View file

@ -1066,26 +1066,31 @@ static bool write_random_seeds(const char* write_file, uint32_t seed,
}
static bool bro_rand_determistic = false;
static unsigned int bro_rand_state = 0;
static long int bro_rand_state = 0;
static bool first_seed_saved = false;
static unsigned int first_seed = 0;
static void bro_srandom(unsigned int seed, bool deterministic)
{
bro_rand_state = seed;
bro_rand_state = seed == 0 ? 1 : seed;
bro_rand_determistic = deterministic;
srandom(seed);
}
void bro_srandom(unsigned int seed)
void zeek::seed_random(unsigned int seed)
{
if ( bro_rand_determistic )
bro_rand_state = seed;
bro_rand_state = seed == 0 ? 1 : seed;
else
srandom(seed);
}
void bro_srandom(unsigned int seed)
{
zeek::seed_random(seed);
}
void init_random_seed(const char* read_file, const char* write_file,
bool use_empty_seeds)
{
@ -1105,16 +1110,15 @@ void init_random_seed(const char* read_file, const char* write_file,
else if ( use_empty_seeds )
seeds_done = true;
if ( ! seeds_done )
{
#ifdef HAVE_GETRANDOM
if ( ! seeds_done )
{
ssize_t nbytes = getrandom(buf.data(), sizeof(buf), 0);
seeds_done = nbytes == ssize_t(sizeof(buf));
}
#endif
if ( ! seeds_done )
{
// getrandom() guarantees reads up to 256 bytes are always successful,
assert(sizeof(buf) < 256);
auto nbytes = getrandom(buf.data(), sizeof(buf), 0);
assert(nbytes == sizeof(buf));
pos += nbytes / sizeof(uint32_t);
#else
// Gather up some entropy.
gettimeofday((struct timeval *)(buf.data() + pos), 0);
pos += sizeof(struct timeval) / sizeof(uint32_t);
@ -1141,9 +1145,10 @@ void init_random_seed(const char* read_file, const char* write_file,
// systems due to a lack of entropy.
errno = 0;
}
#endif
if ( pos < KeyedHash::SEED_INIT_SIZE )
reporter->FatalError("Could not read enough random data from /dev/urandom. Wanted %d, got %lu", KeyedHash::SEED_INIT_SIZE, pos);
reporter->FatalError("Could not read enough random data. Wanted %d, got %lu", KeyedHash::SEED_INIT_SIZE, pos);
if ( ! seed )
{
@ -1183,33 +1188,57 @@ bool have_random_seed()
return bro_rand_determistic;
}
unsigned int bro_prng(unsigned int state)
constexpr uint32_t zeek_prng_mod = 2147483647;
constexpr uint32_t zeek_prng_max = zeek_prng_mod - 1;
long int zeek::max_random()
{
// Use our own simple linear congruence PRNG to make sure we are
// predictable across platforms.
static const long int m = 2147483647;
static const long int a = 16807;
const long int q = m / a;
const long int r = m % a;
state = a * ( state % q ) - r * ( state / q );
if ( state <= 0 )
state += m;
return state;
return bro_rand_determistic ? zeek_prng_max : RAND_MAX;
}
long int bro_random()
long int zeek::prng(long int state)
{
// Use our own simple linear congruence PRNG to make sure we are
// predictable across platforms. (Lehmer RNG, Schrage's method)
// Note: the choice of "long int" storage type for the state is mostly
// for parity with the possible return values of random().
constexpr uint32_t m = zeek_prng_mod;
constexpr uint32_t a = 16807;
constexpr uint32_t q = m / a;
constexpr uint32_t r = m % a;
uint32_t rem = state % q;
uint32_t div = state / q;
int32_t s = a * rem;
int32_t t = r * div;
int32_t res = s - t;
if ( res < 0 )
res += m;
return res;
}
unsigned int bro_prng(unsigned int state)
{
return zeek::prng(state);
}
long int zeek::random_number()
{
if ( ! bro_rand_determistic )
return random(); // Use system PRNG.
bro_rand_state = bro_prng(bro_rand_state);
bro_rand_state = zeek::prng(bro_rand_state);
return bro_rand_state;
}
long int bro_random()
{
return zeek::random_number();
}
// Returns a 64-bit random string.
uint64_t rand64bit()
{
@ -1217,7 +1246,7 @@ uint64_t rand64bit()
int i;
for ( i = 1; i <= 4; ++i )
base = (base<<16) | bro_random();
base = (base<<16) | zeek::random_number();
return base;
}
@ -2087,7 +2116,7 @@ uint64_t calculate_unique_id(size_t pool)
gettimeofday(&unique.time, 0);
unique.pool = (uint64_t) pool;
unique.pid = getpid();
unique.rnd = bro_random();
unique.rnd = static_cast<int>(zeek::random_number());
uid_instance = HashKey::HashBytes(&unique, sizeof(unique));
++uid_instance; // Now it's larger than zero.

View file

@ -202,7 +202,7 @@ extern std::string strstrip(std::string s);
extern void hmac_md5(size_t size, const unsigned char* bytes,
unsigned char digest[16]);
// Initializes RNGs for bro_random() and MD5 usage. If load_file is given,
// Initializes RNGs for zeek::random_number() and MD5 usage. If load_file is given,
// the seeds (both random & MD5) are loaded from that file. This takes
// precedence over the "use_empty_seeds" argument, which just
// zero-initializes all seed values. If write_file is given, the seeds are
@ -220,14 +220,17 @@ extern bool have_random_seed();
// A simple linear congruence PRNG. It takes its state as argument and
// returns a new random value, which can serve as state for subsequent calls.
[[deprecated("Remove in v4.1. Use zeek::prng()")]]
unsigned int bro_prng(unsigned int state);
// Replacement for the system random(), to which is normally falls back
// except when a seed has been given. In that case, the function bro_prng.
[[deprecated("Remove in v4.1. Use zeek::random_number()")]]
long int bro_random();
// Calls the system srandom() function with the given seed if not running
// in deterministic mode, else it updates the state of the deterministic PRNG.
[[deprecated("Remove in v4.1. Use zeek::seed_random()")]]
void bro_srandom(unsigned int seed);
extern uint64_t rand64bit();
@ -588,4 +591,36 @@ namespace zeek {
*/
void set_thread_name(const char* name, pthread_t tid = pthread_self());
/**
* A platform-independent PRNG implementation. Note that this is not
* necessarily a "statistically sound" implementation as the main purpose is
* not for production use, but rather for regression testing.
* @param state The value used to generate the next random number.
* @return A new random value generated from *state* and that can passed
* back into subsequent calls to generate further random numbers.
*/
long int prng(long int state);
/**
* Wrapper for system random() in the default case, but when running in
* deterministic mode, uses the platform-independent zeek::prng()
* to obtain consistent results since implementations of rand() may vary.
* @return A value in the range [0, zeek::max_random()].
*/
long int random_number();
/**
* @return The maximum value that can be returned from zeek::random_number().
* When not using deterministic-mode, this is always equivalent to RAND_MAX.
*/
long int max_random();
/**
* Wrapper for system srandom() in the default case, but when running in
* deterministic mode, updates the state used for calling zeek::prng()
* inside of zeek::random_number().
* @param seed Value to use for initializing the PRNG.
*/
void seed_random(unsigned int seed);
} // namespace zeek

View file

@ -930,7 +930,7 @@ function hrw_weight%(key_digest: count, site_id: count%): count
## provided by the OS.
function rand%(max: count%): count
%{
auto result = bro_uint_t(double(max) * double(bro_random()) / (RAND_MAX + 1.0));
auto result = bro_uint_t(double(max) * double(zeek::random_number()) / (zeek::max_random() + 1.0));
return zeek::val_mgr->Count(result);
%}
@ -946,7 +946,7 @@ function rand%(max: count%): count
## provided by the OS.
function srand%(seed: count%): any
%{
bro_srandom(seed);
zeek::seed_random(seed);
return nullptr;
%}

View file

@ -0,0 +1 @@
F

View file

@ -0,0 +1,6 @@
0
131
755
4
634
473

View file

@ -3,13 +3,12 @@
#empty_field (empty)
#unset_field -
#path conn
#open 2019-07-31-22-25-32
#open 2020-07-22-05-02-04
#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p proto service duration orig_bytes resp_bytes conn_state local_orig local_resp missed_bytes history orig_pkts orig_ip_bytes resp_pkts resp_ip_bytes tunnel_parents
#types time string addr port addr port enum string interval count count string bool bool count string count count count count set[string]
1254722767.492060 CHhAvVGS1DHFjwGM9 10.10.1.4 56166 10.10.1.1 53 udp dns 0.034025 34 100 SF - - 0 Dd 1 62 1 128 -
1254722776.690444 C4J4Th3PJpwUYZZ6gc 10.10.1.20 138 10.10.1.255 138 udp - - - - S0 - - 0 D 1 229 0 0 -
1254722767.529046 ClEkJM2Vm5giqnMf4h 10.10.1.4 1470 74.53.140.153 25 tcp - 0.346950 0 0 S1 - - 0 Sh 1 48 1 48 -
1437831776.764391 CtPZjS20MLrsMUOJi2 192.168.133.100 49285 66.196.121.26 5050 tcp - 0.343008 41 0 OTH - - 0 Da 1 93 1 52 -
1437831798.533765 CmES5u32sYpV7JYN 192.168.133.100 49336 74.125.71.189 443 tcp - - - - OTH - - 0 A 1 52 0 0 -
1437831787.856895 CUM0KZ3MLUfNB0cl11 192.168.133.100 49648 192.168.133.102 25 tcp - 0.048043 162 154 S1 - - 154 ShDgA 3 192 1 60 -
#close 2019-07-31-22-25-32
1437831787.856895 CUM0KZ3MLUfNB0cl11 192.168.133.100 49648 192.168.133.102 25 tcp - 0.004707 0 0 S1 - - 0 Sh 1 64 1 60 -
#close 2020-07-22-05-02-04

View file

@ -1,13 +1,22 @@
#
# @TEST-EXEC: zeek -b %INPUT >out
# @TEST-EXEC: zeek -b %INPUT do_seed=F >out.2
# @TEST-EXEC: unset ZEEK_SEED_FILE && zeek -b %INPUT real_random=T >out.3
# @TEST-EXEC: for i in $(seq 21); do echo 0 >>random-zero.seed; done
# @TEST-EXEC: ZEEK_SEED_FILE=random-zero.seed zeek -b %INPUT >out.4
# @TEST-EXEC: btest-diff out
# @TEST-EXEC: btest-diff out.2
# @TEST-EXEC: btest-diff out.3
# @TEST-EXEC: btest-diff out.4
const do_seed = T &redef;
const real_random = F &redef;
event zeek_init()
{
if ( real_random )
return;
local a = rand(1000);
local b = rand(1000);
local c = rand(1000);
@ -26,4 +35,53 @@ event zeek_init()
print d;
print e;
print f;
local i = 0;
local max = 3;
while ( i < 100 )
{
local rn = rand(max);
if ( rn >= max )
print fmt("ERROR: rand returned value greater than %s: %s",
max, rn);
i += 1;
}
}
event zeek_init() &priority=-10
{
if ( ! real_random )
return;
local v1: vector of count = vector();
local v2: vector of count = vector();
local i = 0;
while ( i < 20 )
{
v1 += rand(65535);
i += 1;
}
i = 0;
while ( i < 20 )
{
v2 += rand(65535);
i += 1;
}
# Note: this is expected to be F with high probability, but
# technically could all be the same because, well, that's a
# valid "random" sequence, too
print all_set(v1 == v2);
if ( all_set(v1 == v2) )
{
print v1;
print v2;
}
}

View file

@ -45,10 +45,10 @@ std::string Foo::RandomString(const int len)
"abcdefghijklmnopqrstuvwxyz";
for (int i = 0; i < len; ++i)
// bro_random is not thread-safe; as we are only using one simultaneous thread
// zeek::random_number() is not thread-safe; as we are only using one simultaneous thread
// here, this should not matter in this case. If this test ever starts showing
// random errors, this might be the culprit.
s[i] = values[bro_random() / (RAND_MAX / sizeof(values))];
s[i] = values[zeek::random_number() / (zeek::max_random() / sizeof(values))];
return s;
}