diff --git a/scripts/base/init-bare.bro b/scripts/base/init-bare.bro index 9876ad03f7..e5300cdc9f 100644 --- a/scripts/base/init-bare.bro +++ b/scripts/base/init-bare.bro @@ -3042,6 +3042,11 @@ module GLOBAL; ## Number of bytes per packet to capture from live interfaces. const snaplen = 8192 &redef; +## Seed for hashes computed internally for probabilistic data structures. Using +## the same value here will make the hashes compatible between independent Bro +## instances. If left unset, Bro will use a temporary local seed. +const global_hash_seed: string = "" &redef; + # Load BiFs defined by plugins. @load base/bif/plugins diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc index 66b3c081e7..e79b4435b3 100644 --- a/src/OpaqueVal.cc +++ b/src/OpaqueVal.cc @@ -566,14 +566,14 @@ BroType* BloomFilterVal::Type() const void BloomFilterVal::Add(const Val* val) { HashKey* key = hash->ComputeHash(val, 1); - bloom_filter->Add(key->Hash()); + bloom_filter->Add(key); delete key; } size_t BloomFilterVal::Count(const Val* val) const { HashKey* key = hash->ComputeHash(val, 1); - size_t cnt = bloom_filter->Count(key->Hash()); + size_t cnt = bloom_filter->Count(key); delete key; return cnt; } @@ -588,6 +588,11 @@ bool BloomFilterVal::Empty() const return bloom_filter->Empty(); } +string BloomFilterVal::InternalState() const + { + return bloom_filter->InternalState(); + } + BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* x, const BloomFilterVal* y) { diff --git a/src/OpaqueVal.h b/src/OpaqueVal.h index 52c9583fc7..08a20b1a31 100644 --- a/src/OpaqueVal.h +++ b/src/OpaqueVal.h @@ -127,6 +127,7 @@ public: size_t Count(const Val* val) const; void Clear(); bool Empty() const; + string InternalState() const; static BloomFilterVal* Merge(const BloomFilterVal* x, const BloomFilterVal* y); diff --git a/src/probabilistic/BitVector.cc b/src/probabilistic/BitVector.cc index 6e642e62c1..f820e6df27 100644 --- a/src/probabilistic/BitVector.cc +++ b/src/probabilistic/BitVector.cc @@ -490,6 +490,16 @@ BitVector::size_type BitVector::FindNext(size_type i) const return block ? bi * bits_per_block + lowest_bit(block) : find_from(bi + 1); } +size_t BitVector::Hash() const + { + size_t hash = 0; + + for ( size_type i = 0; i < Blocks(); ++i ) + hash += bits[i]; + + return hash; + } + BitVector::size_type BitVector::lowest_bit(block_type block) { block_type x = block - (block & (block - 1)); diff --git a/src/probabilistic/BitVector.h b/src/probabilistic/BitVector.h index d9c55d53c6..8e24336345 100644 --- a/src/probabilistic/BitVector.h +++ b/src/probabilistic/BitVector.h @@ -276,6 +276,13 @@ public: */ size_type FindNext(size_type i) const; + /** Computes a hash value of the internal representation. + * This is mainly for debugging/testing purposes. + * + * @return The hash. + */ + size_t Hash() const; + /** * Serializes the bit vector. * diff --git a/src/probabilistic/BloomFilter.cc b/src/probabilistic/BloomFilter.cc index 23b812269c..bcab6c9b54 100644 --- a/src/probabilistic/BloomFilter.cc +++ b/src/probabilistic/BloomFilter.cc @@ -9,6 +9,8 @@ #include "CounterVector.h" #include "Serializer.h" +#include "../util.h" + using namespace probabilistic; BloomFilter::BloomFilter() @@ -107,6 +109,11 @@ BasicBloomFilter* BasicBloomFilter::Clone() const return copy; } +std::string BasicBloomFilter::InternalState() const + { + return fmt("%" PRIu64, (uint64_t)bits->Hash()); + } + BasicBloomFilter::BasicBloomFilter() { bits = 0; @@ -133,14 +140,18 @@ bool BasicBloomFilter::DoUnserialize(UnserialInfo* info) return (bits != 0); } -void BasicBloomFilter::AddImpl(const Hasher::digest_vector& h) +void BasicBloomFilter::Add(const HashKey* key) { + Hasher::digest_vector h = hasher->Hash(key); + for ( size_t i = 0; i < h.size(); ++i ) bits->Set(h[i] % bits->Size()); } -size_t BasicBloomFilter::CountImpl(const Hasher::digest_vector& h) const +size_t BasicBloomFilter::Count(const HashKey* key) const { + Hasher::digest_vector h = hasher->Hash(key); + for ( size_t i = 0; i < h.size(); ++i ) { if ( ! (*bits)[h[i] % bits->Size()] ) @@ -206,6 +217,11 @@ CountingBloomFilter* CountingBloomFilter::Clone() const return copy; } +string CountingBloomFilter::InternalState() const + { + return fmt("%" PRIu64, (uint64_t)cells->Hash()); + } + IMPLEMENT_SERIAL(CountingBloomFilter, SER_COUNTINGBLOOMFILTER) bool CountingBloomFilter::DoSerialize(SerialInfo* info) const @@ -222,14 +238,18 @@ bool CountingBloomFilter::DoUnserialize(UnserialInfo* info) } // TODO: Use partitioning in add/count to allow for reusing CMS bounds. -void CountingBloomFilter::AddImpl(const Hasher::digest_vector& h) +void CountingBloomFilter::Add(const HashKey* key) { + Hasher::digest_vector h = hasher->Hash(key); + for ( size_t i = 0; i < h.size(); ++i ) cells->Increment(h[i] % cells->Size()); } -size_t CountingBloomFilter::CountImpl(const Hasher::digest_vector& h) const +size_t CountingBloomFilter::Count(const HashKey* key) const { + Hasher::digest_vector h = hasher->Hash(key); + CounterVector::size_type min = std::numeric_limits::max(); diff --git a/src/probabilistic/BloomFilter.h b/src/probabilistic/BloomFilter.h index 4865ae145c..65dda2396d 100644 --- a/src/probabilistic/BloomFilter.h +++ b/src/probabilistic/BloomFilter.h @@ -22,27 +22,20 @@ public: virtual ~BloomFilter(); /** - * Adds an element of type T to the Bloom filter. - * @param x The element to add + * Adds an element to the Bloom filter. + * + * @param key The key associated with the element to add. */ - template - void Add(const T& x) - { - AddImpl((*hasher)(x)); - } + virtual void Add(const HashKey* key) = 0; /** * Retrieves the associated count of a given value. * - * @param x The value of type `T` to check. + * @param key The key associated with the element to check. * - * @return The counter associated with *x*. + * @return The counter associated with *key*. */ - template - size_t Count(const T& x) const - { - return CountImpl((*hasher)(x)); - } + virtual size_t Count(const HashKey* key) const = 0; /** * Checks whether the Bloom filter is empty. @@ -72,6 +65,12 @@ public: */ virtual BloomFilter* Clone() const = 0; + /** + * Returns a string with a representation of the Bloom filter's + * internal state. This is for debugging/testing purposes only. + */ + virtual string InternalState() const = 0; + /** * Serializes the Bloom filter. * @@ -106,25 +105,6 @@ protected: */ BloomFilter(const Hasher* hasher); - /** - * Abstract method for implementinng the *Add* operation. - * - * @param hashes A set of *k* hashes for the item to add, computed by - * the internal hasher object. - * - */ - virtual void AddImpl(const Hasher::digest_vector& hashes) = 0; - - /** - * Abstract method for implementing the *Count* operation. - * - * @param hashes A set of *k* hashes for the item to add, computed by - * the internal hasher object. - * - * @return Returns the counter associated with the hashed element. - */ - virtual size_t CountImpl(const Hasher::digest_vector& hashes) const = 0; - const Hasher* hasher; }; @@ -177,6 +157,7 @@ public: virtual void Clear(); virtual bool Merge(const BloomFilter* other); virtual BasicBloomFilter* Clone() const; + virtual string InternalState() const; protected: DECLARE_SERIAL(BasicBloomFilter); @@ -187,8 +168,8 @@ protected: BasicBloomFilter(); // Overridden from BloomFilter. - virtual void AddImpl(const Hasher::digest_vector& h); - virtual size_t CountImpl(const Hasher::digest_vector& h) const; + virtual void Add(const HashKey* key); + virtual size_t Count(const HashKey* key) const; private: BitVector* bits; @@ -216,6 +197,7 @@ public: virtual void Clear(); virtual bool Merge(const BloomFilter* other); virtual CountingBloomFilter* Clone() const; + virtual string InternalState() const; protected: DECLARE_SERIAL(CountingBloomFilter); @@ -226,8 +208,8 @@ protected: CountingBloomFilter(); // Overridden from BloomFilter. - virtual void AddImpl(const Hasher::digest_vector& h); - virtual size_t CountImpl(const Hasher::digest_vector& h) const; + virtual void Add(const HashKey* key); + virtual size_t Count(const HashKey* key) const; private: CounterVector* cells; diff --git a/src/probabilistic/CounterVector.cc b/src/probabilistic/CounterVector.cc index d5635fc0f2..8a6feae5fd 100644 --- a/src/probabilistic/CounterVector.cc +++ b/src/probabilistic/CounterVector.cc @@ -153,6 +153,11 @@ CounterVector operator|(const CounterVector& x, const CounterVector& y) } +size_t CounterVector::Hash() const + { + return bits->Hash(); + } + bool CounterVector::Serialize(SerialInfo* info) const { return SerialObj::Serialize(info); diff --git a/src/probabilistic/CounterVector.h b/src/probabilistic/CounterVector.h index df6fc57ac2..9ce522d61c 100644 --- a/src/probabilistic/CounterVector.h +++ b/src/probabilistic/CounterVector.h @@ -126,6 +126,13 @@ public: */ CounterVector& operator|=(const CounterVector& other); + /** Computes a hash value of the internal representation. + * This is mainly for debugging/testing purposes. + * + * @return The hash. + */ + size_t Hash() const; + /** * Serializes the bit vector. * diff --git a/src/probabilistic/Hasher.cc b/src/probabilistic/Hasher.cc index 8b34aa5c77..b59274df7d 100644 --- a/src/probabilistic/Hasher.cc +++ b/src/probabilistic/Hasher.cc @@ -31,6 +31,11 @@ size_t Hasher::MakeSeed(const void* data, size_t size) return *reinterpret_cast(buf); // Use the first bytes as seed. } +Hasher::digest_vector Hasher::Hash(const HashKey* key) const + { + return Hash(key->Key(), key->Size()); + } + bool Hasher::Serialize(SerialInfo* info) const { return SerialObj::Serialize(info); @@ -77,7 +82,6 @@ Hasher::Hasher(size_t arg_k, size_t arg_seed) seed = arg_seed; } - UHF::UHF(size_t seed) : h(seed) { diff --git a/src/probabilistic/Hasher.h b/src/probabilistic/Hasher.h index bd8f5ce5ff..6b75fa1bea 100644 --- a/src/probabilistic/Hasher.h +++ b/src/probabilistic/Hasher.h @@ -50,6 +50,15 @@ public: return Hash(&x, sizeof(T)); } + /** + * Computes hash values for an element. + * + * @param x The key of the value to hash. + * + * @return Vector of *k* hash values. + */ + digest_vector Hash(const HashKey* key) const; + /** * Computes the hashes for a set of bytes. * diff --git a/src/probabilistic/bloom-filter.bif b/src/probabilistic/bloom-filter.bif index c288171e5d..98c8dd59a8 100644 --- a/src/probabilistic/bloom-filter.bif +++ b/src/probabilistic/bloom-filter.bif @@ -20,11 +20,6 @@ module GLOBAL; ## Creates a basic Bloom filter. ## -## .. note:: A Bloom filter can have a name associated with it. In the future, -## Bloom filters with the same name will be compatible across indepedent Bro -## instances, i.e., it will be possible to merge them. Currently, however, that is -## not yet supported. -## ## fp: The desired false-positive rate. ## ## capacity: the maximum number of elements that guarantees a false-positive @@ -61,11 +56,6 @@ function bloomfilter_basic_init%(fp: double, capacity: count, ## alternative to bloomfilter_basic_init where the user has full control over ## the number of hash functions and cells in the underlying bit vector. ## -## .. note:: A Bloom filter can have a name associated with it. In the future, -## Bloom filters with the same name will be compatible across indepedent Bro -## instances, i.e., it will be possible to merge them. Currently, however, that is -## not yet supported. -## ## k: The number of hash functions to use. ## ## cells: The number of cells of the underlying bit vector. @@ -102,11 +92,6 @@ function bloomfilter_basic_init2%(k: count, cells: count, ## Creates a counting Bloom filter. ## -## .. note:: A Bloom filter can have a name associated with it. In the future, -## Bloom filters with the same name will be compatible across indepedent Bro -## instances, i.e., it will be possible to merge them. Currently, however, that is -## not yet supported. -## ## k: The number of hash functions to use. ## ## cells: The number of cells of the underlying counter vector. As there's no @@ -250,3 +235,13 @@ function bloomfilter_merge%(bf1: opaque of bloomfilter, return BloomFilterVal::Merge(bfv1, bfv2); %} + +## Returns a string with a representation of a Bloom filter's internal +## state. This is for debugging/testing purposes only. +## +## bf: The Bloom filter handle. +function bloomfilter_internal_state%(bf: opaque of bloomfilter%): string + %{ + BloomFilterVal* bfv = static_cast(bf); + return new StringVal(bfv->InternalState()); + %} diff --git a/testing/btest/Baseline/bifs.bloomfilter-seed/output b/testing/btest/Baseline/bifs.bloomfilter-seed/output new file mode 100644 index 0000000000..53e0f583f2 --- /dev/null +++ b/testing/btest/Baseline/bifs.bloomfilter-seed/output @@ -0,0 +1,8 @@ +bf1, global_seed, 1 +bf2, global_seed, 5 +bf3, my_seed, 5 +bf4, my_seed, 6 +bf1, global_seed, 5 +bf2, global_seed, 6 +bf3, my_seed, 5 +bf4, my_seed, 6 diff --git a/testing/btest/bifs/bloomfilter-seed.bro b/testing/btest/bifs/bloomfilter-seed.bro new file mode 100644 index 0000000000..436638e2af --- /dev/null +++ b/testing/btest/bifs/bloomfilter-seed.bro @@ -0,0 +1,40 @@ +# @TEST-EXEC: bro -b %INPUT global_hash_seed="foo" >>output +# @TEST-EXEC: bro -b %INPUT global_hash_seed="my_seed" >>output +# @TEST-EXEC: btest-diff output + +type Foo: record + { + a: count; + b: string; + }; + +function test_bloom_filter() + { + local bf1 = bloomfilter_basic_init(0.9, 10); + bloomfilter_add(bf1, "foo"); + bloomfilter_add(bf1, "bar"); + + local bf2 = bloomfilter_basic_init(0.9, 10); + bloomfilter_add(bf2, Foo($a=1, $b="xx")); + bloomfilter_add(bf2, Foo($a=2, $b="yy")); + + local bf3 = bloomfilter_basic_init(0.9, 10, "my_seed"); + bloomfilter_add(bf3, "foo"); + bloomfilter_add(bf3, "bar"); + + local bf4 = bloomfilter_basic_init(0.9, 10, "my_seed"); + bloomfilter_add(bf4, Foo($a=1, $b="xx")); + bloomfilter_add(bf4, Foo($a=2, $b="yy")); + + print "bf1, global_seed", bloomfilter_internal_state(bf1); + print "bf2, global_seed", bloomfilter_internal_state(bf2); + print "bf3, my_seed", bloomfilter_internal_state(bf3); + print "bf4, my_seed", bloomfilter_internal_state(bf4); + + + } + +event bro_init() + { + test_bloom_filter(); + }