Merge branch 'topic/robin/bloom-filter-merge'

* topic/robin/bloom-filter-merge: Using a real hash function for hashing a BitVector's internal state. Support UHF hashing for >= UHASH_KEY_SIZE bytes. Changing the Bloom filter hashing so that it's independent of CompositeHash. Add new BiF for low-level Bloom filter initialization. Introduce global_hash_seed script variable. Conflicts: testing/btest/Baseline/bifs.bloomfilter/output
2025-10-04 15:48:19 +00:00 · 2013-08-01 10:52:08 -07:00 · 2013-08-01 10:52:08 -07:00 · 32a403cdaf
commit 32a403cdaf
parent 2ccc963e22 7ab2170641
19 changed files with 337 additions and 138 deletions
--- a/src/probabilistic/BitVector.cc
+++ b/src/probabilistic/BitVector.cc
@ -1,10 +1,12 @@
 // See the file "COPYING" in the main distribution directory for copyright.

-#include "BitVector.h"
-
+#include <openssl/sha.h>
 #include <cassert>
 #include <limits>
+
+#include "BitVector.h"
 #include "Serializer.h"
+#include "digest.h"

 using namespace probabilistic;

@ -490,6 +492,21 @@ BitVector::size_type BitVector::FindNext(size_type i) const
 	return block ? bi * bits_per_block + lowest_bit(block) : find_from(bi + 1);
 	}

+size_t BitVector::Hash() const
+	{
+	size_t hash = 0;
+
+	u_char buf[SHA256_DIGEST_LENGTH];
+	SHA256_CTX ctx;
+	sha256_init(&ctx);
+
+	for ( size_type i = 0; i < Blocks(); ++i )
+		sha256_update(&ctx, &bits[i], sizeof(bits[i]));
+
+	sha256_final(&ctx, buf);
+	return *reinterpret_cast<size_t*>(buf); // Use the first bytes as seed.
+	}
+
 BitVector::size_type BitVector::lowest_bit(block_type block)
 	{
 	block_type x = block - (block & (block - 1));
--- a/src/probabilistic/BitVector.h
+++ b/src/probabilistic/BitVector.h
@ -276,6 +276,13 @@ public:
 	 */
 	size_type FindNext(size_type i) const;

+	/** Computes a hash value of the internal representation.
+	  * This is mainly for debugging/testing purposes.
+	  *
+	  * @return The hash.
+	  */
+	size_t Hash() const;
+
 	/**
 	 * Serializes the bit vector.
 	 *
--- a/src/probabilistic/BloomFilter.cc
+++ b/src/probabilistic/BloomFilter.cc
@ -9,6 +9,8 @@
 #include "CounterVector.h"
 #include "Serializer.h"

+#include "../util.h"
+
 using namespace probabilistic;

 BloomFilter::BloomFilter()
@ -107,6 +109,11 @@ BasicBloomFilter* BasicBloomFilter::Clone() const
 	return copy;
 	}

+std::string BasicBloomFilter::InternalState() const
+	{
+	return fmt("%" PRIu64, (uint64_t)bits->Hash());
+	}
+
 BasicBloomFilter::BasicBloomFilter()
 	{
 	bits = 0;
@ -133,14 +140,18 @@ bool BasicBloomFilter::DoUnserialize(UnserialInfo* info)
 	return (bits != 0);
 	}

-void BasicBloomFilter::AddImpl(const Hasher::digest_vector& h)
+void BasicBloomFilter::Add(const HashKey* key)
 	{
+	Hasher::digest_vector h = hasher->Hash(key);
+
 	for ( size_t i = 0; i < h.size(); ++i )
 		bits->Set(h[i] % bits->Size());
 	}

-size_t BasicBloomFilter::CountImpl(const Hasher::digest_vector& h) const
+size_t BasicBloomFilter::Count(const HashKey* key) const
 	{
+	Hasher::digest_vector h = hasher->Hash(key);
+
 	for ( size_t i = 0; i < h.size(); ++i )
 		{
 		if ( ! (*bits)[h[i] % bits->Size()] )
@ -206,6 +217,11 @@ CountingBloomFilter* CountingBloomFilter::Clone() const
 	return copy;
 	}

+string CountingBloomFilter::InternalState() const
+	{
+	return fmt("%" PRIu64, (uint64_t)cells->Hash());
+	}
+
 IMPLEMENT_SERIAL(CountingBloomFilter, SER_COUNTINGBLOOMFILTER)

 bool CountingBloomFilter::DoSerialize(SerialInfo* info) const
@ -222,14 +238,18 @@ bool CountingBloomFilter::DoUnserialize(UnserialInfo* info)
 	}

 // TODO: Use partitioning in add/count to allow for reusing CMS bounds.
-void CountingBloomFilter::AddImpl(const Hasher::digest_vector& h)
+void CountingBloomFilter::Add(const HashKey* key)
 	{
+	Hasher::digest_vector h = hasher->Hash(key);
+
 	for ( size_t i = 0; i < h.size(); ++i )
 		cells->Increment(h[i] % cells->Size());
 	}

-size_t CountingBloomFilter::CountImpl(const Hasher::digest_vector& h) const
+size_t CountingBloomFilter::Count(const HashKey* key) const
 	{
+	Hasher::digest_vector h = hasher->Hash(key);
+
 	CounterVector::size_type min =
 		std::numeric_limits<CounterVector::size_type>::max();

--- a/src/probabilistic/BloomFilter.h
+++ b/src/probabilistic/BloomFilter.h
@ -22,27 +22,20 @@ public:
 	virtual ~BloomFilter();

 	/**
-	 * Adds an element of type T to the Bloom filter.
-	 * @param x The element to add
+	 * Adds an element to the Bloom filter.
+	 *
+	 * @param key The key associated with the element to add.
 	 */
-	template <typename T>
-	void Add(const T& x)
-		{
-		AddImpl((*hasher)(x));
-		}
+	virtual void Add(const HashKey* key) = 0;

 	/**
 	 * Retrieves the associated count of a given value.
 	 *
-	 * @param x The value of type `T` to check.
+	 * @param key The key associated with the element to check.
 	 *
-	 * @return The counter associated with *x*.
+	 * @return The counter associated with *key*.
 	 */
-	template <typename T>
-	size_t Count(const T& x) const
-		{
-		return CountImpl((*hasher)(x));
-		}
+	virtual size_t Count(const HashKey* key) const = 0;

 	/**
 	 * Checks whether the Bloom filter is empty.
@ -72,6 +65,12 @@ public:
 	 */
 	virtual BloomFilter* Clone() const = 0;

+	/**
+	 * Returns a string with a representation of the Bloom filter's
+	 * internal state. This is for debugging/testing purposes only.
+	 */
+	virtual string InternalState() const = 0;
+
 	/**
 	 * Serializes the Bloom filter.
 	 *
@ -106,25 +105,6 @@ protected:
 	 */
 	BloomFilter(const Hasher* hasher);

-	/**
-	 * Abstract method for implementinng the *Add* operation.
-	 *
-	 * @param hashes A set of *k* hashes for the item to add, computed by
-	 * the internal hasher object.
-	 *
-	 */
-	virtual void AddImpl(const Hasher::digest_vector& hashes) = 0;
-
-	/**
-	 * Abstract method for implementing the *Count* operation.
-	 *
-	 * @param hashes A set of *k* hashes for the item to add, computed by
-	 * the internal hasher object.
-	 *
-	 * @return Returns the counter associated with the hashed element.
-	 */
-	virtual size_t CountImpl(const Hasher::digest_vector& hashes) const = 0;
-
 	const Hasher* hasher;
 };

@ -177,6 +157,7 @@ public:
 	virtual void Clear();
 	virtual bool Merge(const BloomFilter* other);
 	virtual BasicBloomFilter* Clone() const;
+	virtual string InternalState() const;

 protected:
 	DECLARE_SERIAL(BasicBloomFilter);
@ -187,8 +168,8 @@ protected:
 	BasicBloomFilter();

 	// Overridden from BloomFilter.
-	virtual void AddImpl(const Hasher::digest_vector& h);
-	virtual size_t CountImpl(const Hasher::digest_vector& h) const;
+	virtual void Add(const HashKey* key);
+	virtual size_t Count(const HashKey* key) const;

 private:
 	BitVector* bits;
@ -216,6 +197,7 @@ public:
 	virtual void Clear();
 	virtual bool Merge(const BloomFilter* other);
 	virtual CountingBloomFilter* Clone() const;
+	virtual string InternalState() const;

 protected:
 	DECLARE_SERIAL(CountingBloomFilter);
@ -226,8 +208,8 @@ protected:
 	CountingBloomFilter();

 	// Overridden from BloomFilter.
-	virtual void AddImpl(const Hasher::digest_vector& h);
-	virtual size_t CountImpl(const Hasher::digest_vector& h) const;
+	virtual void Add(const HashKey* key);
+	virtual size_t Count(const HashKey* key) const;

 private:
 	CounterVector* cells;
--- a/src/probabilistic/CounterVector.cc
+++ b/src/probabilistic/CounterVector.cc
@ -153,6 +153,11 @@ CounterVector operator|(const CounterVector& x, const CounterVector& y)

 }

+size_t CounterVector::Hash() const
+	{
+	return bits->Hash();
+	}
+
 bool CounterVector::Serialize(SerialInfo* info) const
 	{
 	return SerialObj::Serialize(info);
--- a/src/probabilistic/CounterVector.h
+++ b/src/probabilistic/CounterVector.h
@ -126,6 +126,13 @@ public:
 	 */
 	CounterVector& operator|=(const CounterVector& other);

+	/** Computes a hash value of the internal representation.
+	  * This is mainly for debugging/testing purposes.
+	  *
+	  * @return The hash.
+	  */
+	size_t Hash() const;
+
 	/**
 	 * Serializes the bit vector.
 	 *
--- a/src/probabilistic/Hasher.cc
+++ b/src/probabilistic/Hasher.cc
@ -1,13 +1,42 @@
 // See the file "COPYING" in the main distribution directory for copyright.

 #include <typeinfo>
+#include <openssl/md5.h>

 #include "Hasher.h"
+#include "NetVar.h"
 #include "digest.h"
 #include "Serializer.h"

 using namespace probabilistic;

+size_t Hasher::MakeSeed(const void* data, size_t size)
+	{
+	u_char buf[SHA256_DIGEST_LENGTH];
+	SHA256_CTX ctx;
+	sha256_init(&ctx);
+
+	if ( data )
+		sha256_update(&ctx, data, size);
+
+	else if ( global_hash_seed && global_hash_seed->Len() > 0 )
+		sha256_update(&ctx, global_hash_seed->Bytes(), global_hash_seed->Len());
+
+	else
+		{
+		unsigned int first_seed = initial_seed();
+		sha256_update(&ctx, &first_seed, sizeof(first_seed));
+		}
+
+	sha256_final(&ctx, buf);
+	return *reinterpret_cast<size_t*>(buf); // Use the first bytes as seed.
+	}
+
+Hasher::digest_vector Hasher::Hash(const HashKey* key) const
+	{
+	return Hash(key->Key(), key->Size());
+	}
+
 bool Hasher::Serialize(SerialInfo* info) const
 	{
 	return SerialObj::Serialize(info);
@ -25,7 +54,7 @@ bool Hasher::DoSerialize(SerialInfo* info) const
 	if ( ! SERIALIZE(static_cast<uint16>(k)) )
 		return false;

-	return SERIALIZE_STR(name.c_str(), name.size());
+	return SERIALIZE(static_cast<uint64>(seed));
 	}

 bool Hasher::DoUnserialize(UnserialInfo* info)
@ -39,62 +68,52 @@ bool Hasher::DoUnserialize(UnserialInfo* info)
 	k = serial_k;
 	assert(k > 0);

-	const char* serial_name;
-	if ( ! UNSERIALIZE_STR(&serial_name, 0) )
+	uint64 serial_seed;
+	if ( ! UNSERIALIZE(&serial_seed) )
 		return false;

-	name = serial_name;
-	delete [] serial_name;
+	seed = serial_seed;

 	return true;
 	}

-Hasher::Hasher(size_t k, const std::string& arg_name)
-	: k(k)
+Hasher::Hasher(size_t arg_k, size_t arg_seed)
 	{
-	k = k;
-	name = arg_name;
+	k = arg_k;
+	seed = arg_seed;
 	}

-
-UHF::UHF(size_t seed, const std::string& extra)
-	: h(compute_seed(seed, extra))
+UHF::UHF(size_t arg_seed)
+	: h(arg_seed)
 	{
+	seed = arg_seed;
 	}

+// This function is almost equivalent to HashKey::HashBytes except that it
+// does not depend on global state and that we mix in the seed multiple
+// times.
 Hasher::digest UHF::hash(const void* x, size_t n) const
 	{
-	assert(n <= UHASH_KEY_SIZE);
-	return n == 0 ? 0 : h(x, n);
+	if ( n <= UHASH_KEY_SIZE )
+		return n == 0 ? 0 : h(x, n);
+
+	unsigned char d[16];
+	MD5(reinterpret_cast<const unsigned char*>(x), n, d);
+
+	const unsigned char* s = reinterpret_cast<const unsigned char*>(&seed);
+	for ( size_t i = 0; i < 16; ++i )
+		d[i] ^= s[i % sizeof(seed)];
+
+	MD5(d, 16, d);
+
+	return d[0];
 	}

-size_t UHF::compute_seed(size_t seed, const std::string& extra)
+DefaultHasher::DefaultHasher(size_t k, size_t seed)
+	: Hasher(k, seed)
 	{
-	u_char buf[SHA256_DIGEST_LENGTH];
-	SHA256_CTX ctx;
-	sha256_init(&ctx);
-
-	if ( extra.empty() )
-		{
-		unsigned int first_seed = initial_seed();
-		sha256_update(&ctx, &first_seed, sizeof(first_seed));
-		}
-
-	else
-		sha256_update(&ctx, extra.c_str(), extra.size());
-
-	sha256_update(&ctx, &seed, sizeof(seed));
-	sha256_final(&ctx, buf);
-
-	// Take the first sizeof(size_t) bytes as seed.
-	return *reinterpret_cast<size_t*>(buf);
-	}
-
-DefaultHasher::DefaultHasher(size_t k, const std::string& name)
-	: Hasher(k, name)
-	{
-	for ( size_t i = 0; i < k; ++i )
-		hash_functions.push_back(UHF(i, name));
+	for ( size_t i = 1; i <= k; ++i )
+		hash_functions.push_back(UHF(Seed() + bro_prng(i)));
 	}

 Hasher::digest_vector DefaultHasher::Hash(const void* x, size_t n) const
@ -137,13 +156,13 @@ bool DefaultHasher::DoUnserialize(UnserialInfo* info)

 	hash_functions.clear();
 	for ( size_t i = 0; i < K(); ++i )
-		hash_functions.push_back(UHF(i, Name()));
+		hash_functions.push_back(UHF(Seed() + bro_prng(i)));

 	return true;
 	}

-DoubleHasher::DoubleHasher(size_t k, const std::string& name)
-	: Hasher(k, name), h1(1, name), h2(2, name)
+DoubleHasher::DoubleHasher(size_t k, size_t seed)
+	: Hasher(k, seed), h1(seed + bro_prng(1)), h2(seed + bro_prng(2))
 	{
 	}

@ -187,8 +206,8 @@ bool DoubleHasher::DoUnserialize(UnserialInfo* info)
 	{
 	DO_UNSERIALIZE(Hasher);

-	h1 = UHF(1, Name());
-	h2 = UHF(2, Name());
+	h1 = UHF(Seed() + bro_prng(1));
+	h2 = UHF(Seed() + bro_prng(2));

 	return true;
 	}
--- a/src/probabilistic/Hasher.h
+++ b/src/probabilistic/Hasher.h
@ -18,6 +18,20 @@ public:
 	typedef hash_t digest;
 	typedef std::vector<digest> digest_vector;

+	/**
+	 * Creates a valid hasher seed from an arbitrary string.
+	 *
+	 * @param data A pointer to contiguous data that should be crunched into a
+	 * seed. If 0, the function tries to find a global_hash_seed script variable
+	 * to derive a seed from. If this variable does not exist, the function uses
+	 * the initial seed generated at Bro startup.
+	 *
+	 * @param size The number of bytes of *data*.
+	 *
+	 * @return A seed suitable for hashers.
+	 */
+	static size_t MakeSeed(const void* data, size_t size);
+
 	/**
 	 * Destructor.
 	 */
@ -36,6 +50,15 @@ public:
 		return Hash(&x, sizeof(T));
 		}

+	/**
+	 * Computes hash values for an element.
+	 *
+	 * @param x The key of the value to hash.
+	 *
+	 * @return Vector of *k* hash values.
+	 */
+	digest_vector Hash(const HashKey* key) const;
+
 	/**
 	 * Computes the hashes for a set of bytes.
 	 *
@ -64,11 +87,9 @@ public:
 	size_t K() const	{ return k; }

 	/**
-	 * Returns the hasher's name. If not empty, the hasher uses this descriptor
-	 * to seed its *k* hash functions. Otherwise the hasher mixes in the initial
-	 * seed derived from the environment variable `$BRO_SEED`.
+	 * Returns the seed used to construct the hasher.
 	 */
-	const std::string& Name() const { return name; }
+	size_t Seed() const	{ return seed; }

 	bool Serialize(SerialInfo* info) const;
 	static Hasher* Unserialize(UnserialInfo* info);
@ -81,16 +102,15 @@ protected:
 	/**
 	 * Constructor.
 	 *
-	 * @param k the number of hash functions.
+	 * @param arg_k the number of hash functions.
 	 *
-	 * @param name A name for the hasher. Hashers with the same name
-	 * should provide consistent results.
+	 * @param arg_seed The seed for the hasher.
 	 */
-	Hasher(size_t k, const std::string& name);
+	Hasher(size_t arg_k, size_t arg_seed);

 private:
 	size_t k;
-	std::string name;
+	size_t seed;
 };

 /**
@ -103,13 +123,9 @@ public:
 	 * Constructs an H3 hash function seeded with a given seed and an
 	 * optional extra seed to replace the initial Bro seed.
 	 *
-	 * @param seed The seed to use for this instance.
-	 *
-	 * @param extra If not empty, this parameter replaces the initial
-	 * seed to compute the seed for t to compute the seed NUL-terminated
-	 * string as additional seed.
+	 * @param arg_seed The seed to use for this instance.
 	 */
-	UHF(size_t seed = 0, const std::string& extra = "");
+	UHF(size_t arg_seed = 0);

 	template <typename T>
 	Hasher::digest operator()(const T& x) const
@ -152,9 +168,10 @@ public:
 		}

 private:
-	static size_t compute_seed(size_t seed, const std::string& extra);
+	static size_t compute_seed(size_t seed);

 	H3<Hasher::digest, UHASH_KEY_SIZE> h;
+	size_t seed;
 };


@ -169,9 +186,9 @@ public:
 	 *
 	 * @param k The number of hash functions to use.
 	 *
-	 * @param name The name of the hasher.
+	 * @param seed The seed for the hasher.
 	 */
-	DefaultHasher(size_t k, const std::string& name = "");
+	DefaultHasher(size_t k, size_t seed);

 	// Overridden from Hasher.
 	virtual digest_vector Hash(const void* x, size_t n) const /* final */;
@ -197,9 +214,9 @@ public:
 	 *
 	 * @param k The number of hash functions to use.
 	 *
-	 * @param name The name of the hasher.
+	 * @param seed The seed for the hasher.
 	 */
-	DoubleHasher(size_t k, const std::string& name = "");
+	DoubleHasher(size_t k, size_t seed);

 	// Overridden from Hasher.
 	virtual digest_vector Hash(const void* x, size_t n) const /* final */;
--- a/src/probabilistic/bloom-filter.bif
+++ b/src/probabilistic/bloom-filter.bif
@ -20,23 +20,20 @@ module GLOBAL;

 ## Creates a basic Bloom filter.
 ##
-## .. note:: A Bloom filter can have a name associated with it. In the future,
-##    Bloom filters with the same name will be compatible across indepedent Bro
-##    instances, i.e., it will be possible to merge them. Currently, however, that is
-##    not yet supported.
-##
 ## fp: The desired false-positive rate.
 ##
 ## capacity: the maximum number of elements that guarantees a false-positive
 ## rate of *fp*.
 ##
 ## name: A name that uniquely identifies and seeds the Bloom filter. If empty,
-## the filter will remain tied to the current Bro process.
+## the filter will use :bro:id:`global_hash_seed` if that's set, and otherwise use
+## a local seed tied to the current Bro process. Only filters with the same seed
+## can be merged with :bro:id:`bloomfilter_merge` .
 ##
 ## Returns: A Bloom filter handle.
 ##
-## .. bro:see:: bloomfilter_counting_init  bloomfilter_add bloomfilter_lookup
-##    bloomfilter_clear bloomfilter_merge
+## .. bro:see:: bloomfilter_basic_init2 bloomfilter_counting_init bloomfilter_add
+##    bloomfilter_lookup bloomfilter_clear bloomfilter_merge global_hash_seed
 function bloomfilter_basic_init%(fp: double, capacity: count,
                                 name: string &default=""%): opaque of bloomfilter
 	%{
@ -48,18 +45,53 @@ function bloomfilter_basic_init%(fp: double, capacity: count,

 	size_t cells = BasicBloomFilter::M(fp, capacity);
 	size_t optimal_k = BasicBloomFilter::K(cells, capacity);
-	const Hasher* h = new DefaultHasher(optimal_k, name->CheckString());
+	size_t seed = Hasher::MakeSeed(name->Len() > 0 ? name->Bytes() : 0,
+                                 name->Len());
+	const Hasher* h = new DefaultHasher(optimal_k, seed);
+
+	return new BloomFilterVal(new BasicBloomFilter(h, cells));
+	%}
+
+## Creates a basic Bloom filter. This function serves as a low-level
+## alternative to bloomfilter_basic_init where the user has full control over
+## the number of hash functions and cells in the underlying bit vector.
+##
+## k: The number of hash functions to use.
+##
+## cells: The number of cells of the underlying bit vector.
+##
+## name: A name that uniquely identifies and seeds the Bloom filter. If empty,
+## the filter will use :bro:id:`global_hash_seed` if that's set, and otherwise use
+## a local seed tied to the current Bro process. Only filters with the same seed
+## can be merged with :bro:id:`bloomfilter_merge` .
+##
+## Returns: A Bloom filter handle.
+##
+## .. bro:see:: bloom_filter_basic_init bloomfilter_counting_init  bloomfilter_add 
+##    bloomfilter_lookup bloomfilter_clear bloomfilter_merge global_hash_seed
+function bloomfilter_basic_init2%(k: count, cells: count,
+                                  name: string &default=""%): opaque of bloomfilter
+	%{
+	if ( k == 0 )
+		{
+		reporter->Error("number of hash functions must be non-negative");
+		return 0;
+		}
+	if ( cells == 0 )
+		{
+		reporter->Error("number of cells must be non-negative");
+		return 0;
+		}
+
+	size_t seed = Hasher::MakeSeed(name->Len() > 0 ? name->Bytes() : 0,
+				       name->Len());
+	const Hasher* h = new DefaultHasher(k, seed);

 	return new BloomFilterVal(new BasicBloomFilter(h, cells));
 	%}

 ## Creates a counting Bloom filter.
 ##
-## .. note:: A Bloom filter can have a name associated with it. In the future,
-##    Bloom filters with the same name will be compatible across indepedent Bro
-##    instances, i.e., it will be possible to merge them. Currently, however, that is
-##    not yet supported.
-##
 ## k: The number of hash functions to use.
 ##
 ## cells: The number of cells of the underlying counter vector. As there's no
@ -71,12 +103,14 @@ function bloomfilter_basic_init%(fp: double, capacity: count,
 ## becomes a cell of size *w* bits.
 ##
 ## name: A name that uniquely identifies and seeds the Bloom filter. If empty,
-## the filter will remain tied to the current Bro process.
+## the filter will use :bro:id:`global_hash_seed` if that's set, and otherwise use
+## a local seed tied to the current Bro process. Only filters with the same seed
+## can be merged with :bro:id:`bloomfilter_merge` .
 ##
 ## Returns: A Bloom filter handle.
 ##
-## .. bro:see:: bloomfilter_basic_init bloomfilter_add bloomfilter_lookup
-##    bloomfilter_clear bloomfilter_merge
+## .. bro:see:: bloomfilter_basic_init bloomfilter_basic_init2 bloomfilter_add
+##    bloomfilter_lookup bloomfilter_clear bloomfilter_merge global_hash_seed
 function bloomfilter_counting_init%(k: count, cells: count, max: count,
 				    name: string &default=""%): opaque of bloomfilter
 	%{
@ -86,7 +120,10 @@ function bloomfilter_counting_init%(k: count, cells: count, max: count,
 		return 0;
 		}

-	const Hasher* h = new DefaultHasher(k, name->CheckString());
+	size_t seed = Hasher::MakeSeed(name->Len() > 0 ? name->Bytes() : 0,
+				       name->Len());
+
+	const Hasher* h = new DefaultHasher(k, seed);

 	uint16 width = 1;
 	while ( max >>= 1 )
@ -101,8 +138,9 @@ function bloomfilter_counting_init%(k: count, cells: count, max: count,
 ##
 ## x: The element to add.
 ##
-## .. bro:see:: bloomfilter_counting_init bloomfilter_basic_init loomfilter_lookup
-##    bloomfilter_clear bloomfilter_merge
+## .. bro:see:: bloomfilter_basic_init bloomfilter_basic_init2 
+##    bloomfilter_counting_init bloomfilter_lookup bloomfilter_clear 
+##    bloomfilter_merge
 function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any
 	%{
 	BloomFilterVal* bfv = static_cast<BloomFilterVal*>(bf);
@ -127,8 +165,9 @@ function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any
 ##
 ## Returns: the counter associated with *x* in *bf*.
 ##
-## .. bro:see:: bloomfilter_counting_init bloomfilter_basic_init
-##    bloomfilter_add bloomfilter_clear bloomfilter_merge
+## .. bro:see:: bloomfilter_basic_init bloomfilter_basic_init2
+##    bloomfilter_counting_init bloomfilter_add bloomfilter_clear
+##    bloomfilter_merge
 function bloomfilter_lookup%(bf: opaque of bloomfilter, x: any%): count
 	%{
 	const BloomFilterVal* bfv = static_cast<const BloomFilterVal*>(bf);
@ -154,8 +193,9 @@ function bloomfilter_lookup%(bf: opaque of bloomfilter, x: any%): count
 ##
 ## bf: The Bloom filter handle.
 ##
-## .. bro:see:: bloomfilter_counting_init bloomfilter_basic_init
-##    bloomfilter_add bloomfilter_lookup bloomfilter_merge
+## .. bro:see:: bloomfilter_basic_init bloomfilter_counting_init2
+##    bloomfilter_counting_init bloomfilter_add bloomfilter_lookup
+##    bloomfilter_merge
 function bloomfilter_clear%(bf: opaque of bloomfilter%): any
 	%{
 	BloomFilterVal* bfv = static_cast<BloomFilterVal*>(bf);
@ -178,8 +218,9 @@ function bloomfilter_clear%(bf: opaque of bloomfilter%): any
 ##
 ## Returns: The union of *bf1* and *bf2*.
 ##
-## .. bro:see:: bloomfilter_counting_init bloomfilter_basic_init
-##    bloomfilter_add bloomfilter_lookup bloomfilter_clear
+## .. bro:see:: bloomfilter_basic_init bloomfilter_basic_init2
+##    bloomfilter_counting_init bloomfilter_add bloomfilter_lookup
+##    bloomfilter_clear
 function bloomfilter_merge%(bf1: opaque of bloomfilter,
 			    bf2: opaque of bloomfilter%): opaque of bloomfilter
 	%{
@ -196,3 +237,13 @@ function bloomfilter_merge%(bf1: opaque of bloomfilter,

 	return BloomFilterVal::Merge(bfv1, bfv2);
 	%}
+
+## Returns a string with a representation of a Bloom filter's internal
+## state. This is for debugging/testing purposes only.
+## 
+## bf: The Bloom filter handle.
+function bloomfilter_internal_state%(bf: opaque of bloomfilter%): string
+	%{
+	BloomFilterVal* bfv = static_cast<BloomFilterVal*>(bf);
+	return new StringVal(bfv->InternalState());
+	%}