mirror of
https://github.com/zeek/zeek.git
synced 2025-10-03 23:28:20 +00:00
Merge branch 'topic/robin/bloom-filter-merge'
* topic/robin/bloom-filter-merge: Using a real hash function for hashing a BitVector's internal state. Support UHF hashing for >= UHASH_KEY_SIZE bytes. Changing the Bloom filter hashing so that it's independent of CompositeHash. Add new BiF for low-level Bloom filter initialization. Introduce global_hash_seed script variable. Conflicts: testing/btest/Baseline/bifs.bloomfilter/output
This commit is contained in:
commit
32a403cdaf
19 changed files with 337 additions and 138 deletions
1
NEWS
1
NEWS
|
@ -113,6 +113,7 @@ New Functionality
|
||||||
the frequency of elements. The corresponding functions are:
|
the frequency of elements. The corresponding functions are:
|
||||||
|
|
||||||
bloomfilter_basic_init(fp: double, capacity: count, name: string &default=""): opaque of bloomfilter
|
bloomfilter_basic_init(fp: double, capacity: count, name: string &default=""): opaque of bloomfilter
|
||||||
|
bloomfilter_basic_init2(k: count, cells: count, name: string &default=""): opaque of bloomfilter
|
||||||
bloomfilter_counting_init(k: count, cells: count, max: count, name: string &default=""): opaque of bloomfilter
|
bloomfilter_counting_init(k: count, cells: count, max: count, name: string &default=""): opaque of bloomfilter
|
||||||
bloomfilter_add(bf: opaque of bloomfilter, x: any)
|
bloomfilter_add(bf: opaque of bloomfilter, x: any)
|
||||||
bloomfilter_lookup(bf: opaque of bloomfilter, x: any): count
|
bloomfilter_lookup(bf: opaque of bloomfilter, x: any): count
|
||||||
|
|
|
@ -3042,6 +3042,11 @@ module GLOBAL;
|
||||||
## Number of bytes per packet to capture from live interfaces.
|
## Number of bytes per packet to capture from live interfaces.
|
||||||
const snaplen = 8192 &redef;
|
const snaplen = 8192 &redef;
|
||||||
|
|
||||||
|
## Seed for hashes computed internally for probabilistic data structures. Using
|
||||||
|
## the same value here will make the hashes compatible between independent Bro
|
||||||
|
## instances. If left unset, Bro will use a temporary local seed.
|
||||||
|
const global_hash_seed: string = "" &redef;
|
||||||
|
|
||||||
# Load BiFs defined by plugins.
|
# Load BiFs defined by plugins.
|
||||||
@load base/bif/plugins
|
@load base/bif/plugins
|
||||||
|
|
||||||
|
|
|
@ -238,6 +238,8 @@ TableType* record_field_table;
|
||||||
|
|
||||||
StringVal* cmd_line_bpf_filter;
|
StringVal* cmd_line_bpf_filter;
|
||||||
|
|
||||||
|
StringVal* global_hash_seed;
|
||||||
|
|
||||||
OpaqueType* md5_type;
|
OpaqueType* md5_type;
|
||||||
OpaqueType* sha1_type;
|
OpaqueType* sha1_type;
|
||||||
OpaqueType* sha256_type;
|
OpaqueType* sha256_type;
|
||||||
|
@ -304,6 +306,8 @@ void init_general_global_var()
|
||||||
cmd_line_bpf_filter =
|
cmd_line_bpf_filter =
|
||||||
internal_val("cmd_line_bpf_filter")->AsStringVal();
|
internal_val("cmd_line_bpf_filter")->AsStringVal();
|
||||||
|
|
||||||
|
global_hash_seed = opt_internal_string("global_hash_seed");
|
||||||
|
|
||||||
md5_type = new OpaqueType("md5");
|
md5_type = new OpaqueType("md5");
|
||||||
sha1_type = new OpaqueType("sha1");
|
sha1_type = new OpaqueType("sha1");
|
||||||
sha256_type = new OpaqueType("sha256");
|
sha256_type = new OpaqueType("sha256");
|
||||||
|
|
|
@ -242,6 +242,8 @@ extern TableType* record_field_table;
|
||||||
|
|
||||||
extern StringVal* cmd_line_bpf_filter;
|
extern StringVal* cmd_line_bpf_filter;
|
||||||
|
|
||||||
|
extern StringVal* global_hash_seed;
|
||||||
|
|
||||||
class OpaqueType;
|
class OpaqueType;
|
||||||
extern OpaqueType* md5_type;
|
extern OpaqueType* md5_type;
|
||||||
extern OpaqueType* sha1_type;
|
extern OpaqueType* sha1_type;
|
||||||
|
|
|
@ -566,14 +566,14 @@ BroType* BloomFilterVal::Type() const
|
||||||
void BloomFilterVal::Add(const Val* val)
|
void BloomFilterVal::Add(const Val* val)
|
||||||
{
|
{
|
||||||
HashKey* key = hash->ComputeHash(val, 1);
|
HashKey* key = hash->ComputeHash(val, 1);
|
||||||
bloom_filter->Add(key->Hash());
|
bloom_filter->Add(key);
|
||||||
delete key;
|
delete key;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t BloomFilterVal::Count(const Val* val) const
|
size_t BloomFilterVal::Count(const Val* val) const
|
||||||
{
|
{
|
||||||
HashKey* key = hash->ComputeHash(val, 1);
|
HashKey* key = hash->ComputeHash(val, 1);
|
||||||
size_t cnt = bloom_filter->Count(key->Hash());
|
size_t cnt = bloom_filter->Count(key);
|
||||||
delete key;
|
delete key;
|
||||||
return cnt;
|
return cnt;
|
||||||
}
|
}
|
||||||
|
@ -588,6 +588,11 @@ bool BloomFilterVal::Empty() const
|
||||||
return bloom_filter->Empty();
|
return bloom_filter->Empty();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
string BloomFilterVal::InternalState() const
|
||||||
|
{
|
||||||
|
return bloom_filter->InternalState();
|
||||||
|
}
|
||||||
|
|
||||||
BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* x,
|
BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* x,
|
||||||
const BloomFilterVal* y)
|
const BloomFilterVal* y)
|
||||||
{
|
{
|
||||||
|
|
|
@ -127,6 +127,7 @@ public:
|
||||||
size_t Count(const Val* val) const;
|
size_t Count(const Val* val) const;
|
||||||
void Clear();
|
void Clear();
|
||||||
bool Empty() const;
|
bool Empty() const;
|
||||||
|
string InternalState() const;
|
||||||
|
|
||||||
static BloomFilterVal* Merge(const BloomFilterVal* x,
|
static BloomFilterVal* Merge(const BloomFilterVal* x,
|
||||||
const BloomFilterVal* y);
|
const BloomFilterVal* y);
|
||||||
|
|
|
@ -1,10 +1,12 @@
|
||||||
// See the file "COPYING" in the main distribution directory for copyright.
|
// See the file "COPYING" in the main distribution directory for copyright.
|
||||||
|
|
||||||
#include "BitVector.h"
|
#include <openssl/sha.h>
|
||||||
|
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
|
|
||||||
|
#include "BitVector.h"
|
||||||
#include "Serializer.h"
|
#include "Serializer.h"
|
||||||
|
#include "digest.h"
|
||||||
|
|
||||||
using namespace probabilistic;
|
using namespace probabilistic;
|
||||||
|
|
||||||
|
@ -490,6 +492,21 @@ BitVector::size_type BitVector::FindNext(size_type i) const
|
||||||
return block ? bi * bits_per_block + lowest_bit(block) : find_from(bi + 1);
|
return block ? bi * bits_per_block + lowest_bit(block) : find_from(bi + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t BitVector::Hash() const
|
||||||
|
{
|
||||||
|
size_t hash = 0;
|
||||||
|
|
||||||
|
u_char buf[SHA256_DIGEST_LENGTH];
|
||||||
|
SHA256_CTX ctx;
|
||||||
|
sha256_init(&ctx);
|
||||||
|
|
||||||
|
for ( size_type i = 0; i < Blocks(); ++i )
|
||||||
|
sha256_update(&ctx, &bits[i], sizeof(bits[i]));
|
||||||
|
|
||||||
|
sha256_final(&ctx, buf);
|
||||||
|
return *reinterpret_cast<size_t*>(buf); // Use the first bytes as seed.
|
||||||
|
}
|
||||||
|
|
||||||
BitVector::size_type BitVector::lowest_bit(block_type block)
|
BitVector::size_type BitVector::lowest_bit(block_type block)
|
||||||
{
|
{
|
||||||
block_type x = block - (block & (block - 1));
|
block_type x = block - (block & (block - 1));
|
||||||
|
|
|
@ -276,6 +276,13 @@ public:
|
||||||
*/
|
*/
|
||||||
size_type FindNext(size_type i) const;
|
size_type FindNext(size_type i) const;
|
||||||
|
|
||||||
|
/** Computes a hash value of the internal representation.
|
||||||
|
* This is mainly for debugging/testing purposes.
|
||||||
|
*
|
||||||
|
* @return The hash.
|
||||||
|
*/
|
||||||
|
size_t Hash() const;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Serializes the bit vector.
|
* Serializes the bit vector.
|
||||||
*
|
*
|
||||||
|
|
|
@ -9,6 +9,8 @@
|
||||||
#include "CounterVector.h"
|
#include "CounterVector.h"
|
||||||
#include "Serializer.h"
|
#include "Serializer.h"
|
||||||
|
|
||||||
|
#include "../util.h"
|
||||||
|
|
||||||
using namespace probabilistic;
|
using namespace probabilistic;
|
||||||
|
|
||||||
BloomFilter::BloomFilter()
|
BloomFilter::BloomFilter()
|
||||||
|
@ -107,6 +109,11 @@ BasicBloomFilter* BasicBloomFilter::Clone() const
|
||||||
return copy;
|
return copy;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string BasicBloomFilter::InternalState() const
|
||||||
|
{
|
||||||
|
return fmt("%" PRIu64, (uint64_t)bits->Hash());
|
||||||
|
}
|
||||||
|
|
||||||
BasicBloomFilter::BasicBloomFilter()
|
BasicBloomFilter::BasicBloomFilter()
|
||||||
{
|
{
|
||||||
bits = 0;
|
bits = 0;
|
||||||
|
@ -133,14 +140,18 @@ bool BasicBloomFilter::DoUnserialize(UnserialInfo* info)
|
||||||
return (bits != 0);
|
return (bits != 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void BasicBloomFilter::AddImpl(const Hasher::digest_vector& h)
|
void BasicBloomFilter::Add(const HashKey* key)
|
||||||
{
|
{
|
||||||
|
Hasher::digest_vector h = hasher->Hash(key);
|
||||||
|
|
||||||
for ( size_t i = 0; i < h.size(); ++i )
|
for ( size_t i = 0; i < h.size(); ++i )
|
||||||
bits->Set(h[i] % bits->Size());
|
bits->Set(h[i] % bits->Size());
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t BasicBloomFilter::CountImpl(const Hasher::digest_vector& h) const
|
size_t BasicBloomFilter::Count(const HashKey* key) const
|
||||||
{
|
{
|
||||||
|
Hasher::digest_vector h = hasher->Hash(key);
|
||||||
|
|
||||||
for ( size_t i = 0; i < h.size(); ++i )
|
for ( size_t i = 0; i < h.size(); ++i )
|
||||||
{
|
{
|
||||||
if ( ! (*bits)[h[i] % bits->Size()] )
|
if ( ! (*bits)[h[i] % bits->Size()] )
|
||||||
|
@ -206,6 +217,11 @@ CountingBloomFilter* CountingBloomFilter::Clone() const
|
||||||
return copy;
|
return copy;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
string CountingBloomFilter::InternalState() const
|
||||||
|
{
|
||||||
|
return fmt("%" PRIu64, (uint64_t)cells->Hash());
|
||||||
|
}
|
||||||
|
|
||||||
IMPLEMENT_SERIAL(CountingBloomFilter, SER_COUNTINGBLOOMFILTER)
|
IMPLEMENT_SERIAL(CountingBloomFilter, SER_COUNTINGBLOOMFILTER)
|
||||||
|
|
||||||
bool CountingBloomFilter::DoSerialize(SerialInfo* info) const
|
bool CountingBloomFilter::DoSerialize(SerialInfo* info) const
|
||||||
|
@ -222,14 +238,18 @@ bool CountingBloomFilter::DoUnserialize(UnserialInfo* info)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Use partitioning in add/count to allow for reusing CMS bounds.
|
// TODO: Use partitioning in add/count to allow for reusing CMS bounds.
|
||||||
void CountingBloomFilter::AddImpl(const Hasher::digest_vector& h)
|
void CountingBloomFilter::Add(const HashKey* key)
|
||||||
{
|
{
|
||||||
|
Hasher::digest_vector h = hasher->Hash(key);
|
||||||
|
|
||||||
for ( size_t i = 0; i < h.size(); ++i )
|
for ( size_t i = 0; i < h.size(); ++i )
|
||||||
cells->Increment(h[i] % cells->Size());
|
cells->Increment(h[i] % cells->Size());
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t CountingBloomFilter::CountImpl(const Hasher::digest_vector& h) const
|
size_t CountingBloomFilter::Count(const HashKey* key) const
|
||||||
{
|
{
|
||||||
|
Hasher::digest_vector h = hasher->Hash(key);
|
||||||
|
|
||||||
CounterVector::size_type min =
|
CounterVector::size_type min =
|
||||||
std::numeric_limits<CounterVector::size_type>::max();
|
std::numeric_limits<CounterVector::size_type>::max();
|
||||||
|
|
||||||
|
|
|
@ -22,27 +22,20 @@ public:
|
||||||
virtual ~BloomFilter();
|
virtual ~BloomFilter();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adds an element of type T to the Bloom filter.
|
* Adds an element to the Bloom filter.
|
||||||
* @param x The element to add
|
*
|
||||||
|
* @param key The key associated with the element to add.
|
||||||
*/
|
*/
|
||||||
template <typename T>
|
virtual void Add(const HashKey* key) = 0;
|
||||||
void Add(const T& x)
|
|
||||||
{
|
|
||||||
AddImpl((*hasher)(x));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieves the associated count of a given value.
|
* Retrieves the associated count of a given value.
|
||||||
*
|
*
|
||||||
* @param x The value of type `T` to check.
|
* @param key The key associated with the element to check.
|
||||||
*
|
*
|
||||||
* @return The counter associated with *x*.
|
* @return The counter associated with *key*.
|
||||||
*/
|
*/
|
||||||
template <typename T>
|
virtual size_t Count(const HashKey* key) const = 0;
|
||||||
size_t Count(const T& x) const
|
|
||||||
{
|
|
||||||
return CountImpl((*hasher)(x));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Checks whether the Bloom filter is empty.
|
* Checks whether the Bloom filter is empty.
|
||||||
|
@ -72,6 +65,12 @@ public:
|
||||||
*/
|
*/
|
||||||
virtual BloomFilter* Clone() const = 0;
|
virtual BloomFilter* Clone() const = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a string with a representation of the Bloom filter's
|
||||||
|
* internal state. This is for debugging/testing purposes only.
|
||||||
|
*/
|
||||||
|
virtual string InternalState() const = 0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Serializes the Bloom filter.
|
* Serializes the Bloom filter.
|
||||||
*
|
*
|
||||||
|
@ -106,25 +105,6 @@ protected:
|
||||||
*/
|
*/
|
||||||
BloomFilter(const Hasher* hasher);
|
BloomFilter(const Hasher* hasher);
|
||||||
|
|
||||||
/**
|
|
||||||
* Abstract method for implementinng the *Add* operation.
|
|
||||||
*
|
|
||||||
* @param hashes A set of *k* hashes for the item to add, computed by
|
|
||||||
* the internal hasher object.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
virtual void AddImpl(const Hasher::digest_vector& hashes) = 0;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Abstract method for implementing the *Count* operation.
|
|
||||||
*
|
|
||||||
* @param hashes A set of *k* hashes for the item to add, computed by
|
|
||||||
* the internal hasher object.
|
|
||||||
*
|
|
||||||
* @return Returns the counter associated with the hashed element.
|
|
||||||
*/
|
|
||||||
virtual size_t CountImpl(const Hasher::digest_vector& hashes) const = 0;
|
|
||||||
|
|
||||||
const Hasher* hasher;
|
const Hasher* hasher;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -177,6 +157,7 @@ public:
|
||||||
virtual void Clear();
|
virtual void Clear();
|
||||||
virtual bool Merge(const BloomFilter* other);
|
virtual bool Merge(const BloomFilter* other);
|
||||||
virtual BasicBloomFilter* Clone() const;
|
virtual BasicBloomFilter* Clone() const;
|
||||||
|
virtual string InternalState() const;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
DECLARE_SERIAL(BasicBloomFilter);
|
DECLARE_SERIAL(BasicBloomFilter);
|
||||||
|
@ -187,8 +168,8 @@ protected:
|
||||||
BasicBloomFilter();
|
BasicBloomFilter();
|
||||||
|
|
||||||
// Overridden from BloomFilter.
|
// Overridden from BloomFilter.
|
||||||
virtual void AddImpl(const Hasher::digest_vector& h);
|
virtual void Add(const HashKey* key);
|
||||||
virtual size_t CountImpl(const Hasher::digest_vector& h) const;
|
virtual size_t Count(const HashKey* key) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
BitVector* bits;
|
BitVector* bits;
|
||||||
|
@ -216,6 +197,7 @@ public:
|
||||||
virtual void Clear();
|
virtual void Clear();
|
||||||
virtual bool Merge(const BloomFilter* other);
|
virtual bool Merge(const BloomFilter* other);
|
||||||
virtual CountingBloomFilter* Clone() const;
|
virtual CountingBloomFilter* Clone() const;
|
||||||
|
virtual string InternalState() const;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
DECLARE_SERIAL(CountingBloomFilter);
|
DECLARE_SERIAL(CountingBloomFilter);
|
||||||
|
@ -226,8 +208,8 @@ protected:
|
||||||
CountingBloomFilter();
|
CountingBloomFilter();
|
||||||
|
|
||||||
// Overridden from BloomFilter.
|
// Overridden from BloomFilter.
|
||||||
virtual void AddImpl(const Hasher::digest_vector& h);
|
virtual void Add(const HashKey* key);
|
||||||
virtual size_t CountImpl(const Hasher::digest_vector& h) const;
|
virtual size_t Count(const HashKey* key) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
CounterVector* cells;
|
CounterVector* cells;
|
||||||
|
|
|
@ -153,6 +153,11 @@ CounterVector operator|(const CounterVector& x, const CounterVector& y)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t CounterVector::Hash() const
|
||||||
|
{
|
||||||
|
return bits->Hash();
|
||||||
|
}
|
||||||
|
|
||||||
bool CounterVector::Serialize(SerialInfo* info) const
|
bool CounterVector::Serialize(SerialInfo* info) const
|
||||||
{
|
{
|
||||||
return SerialObj::Serialize(info);
|
return SerialObj::Serialize(info);
|
||||||
|
|
|
@ -126,6 +126,13 @@ public:
|
||||||
*/
|
*/
|
||||||
CounterVector& operator|=(const CounterVector& other);
|
CounterVector& operator|=(const CounterVector& other);
|
||||||
|
|
||||||
|
/** Computes a hash value of the internal representation.
|
||||||
|
* This is mainly for debugging/testing purposes.
|
||||||
|
*
|
||||||
|
* @return The hash.
|
||||||
|
*/
|
||||||
|
size_t Hash() const;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Serializes the bit vector.
|
* Serializes the bit vector.
|
||||||
*
|
*
|
||||||
|
|
|
@ -1,13 +1,42 @@
|
||||||
// See the file "COPYING" in the main distribution directory for copyright.
|
// See the file "COPYING" in the main distribution directory for copyright.
|
||||||
|
|
||||||
#include <typeinfo>
|
#include <typeinfo>
|
||||||
|
#include <openssl/md5.h>
|
||||||
|
|
||||||
#include "Hasher.h"
|
#include "Hasher.h"
|
||||||
|
#include "NetVar.h"
|
||||||
#include "digest.h"
|
#include "digest.h"
|
||||||
#include "Serializer.h"
|
#include "Serializer.h"
|
||||||
|
|
||||||
using namespace probabilistic;
|
using namespace probabilistic;
|
||||||
|
|
||||||
|
size_t Hasher::MakeSeed(const void* data, size_t size)
|
||||||
|
{
|
||||||
|
u_char buf[SHA256_DIGEST_LENGTH];
|
||||||
|
SHA256_CTX ctx;
|
||||||
|
sha256_init(&ctx);
|
||||||
|
|
||||||
|
if ( data )
|
||||||
|
sha256_update(&ctx, data, size);
|
||||||
|
|
||||||
|
else if ( global_hash_seed && global_hash_seed->Len() > 0 )
|
||||||
|
sha256_update(&ctx, global_hash_seed->Bytes(), global_hash_seed->Len());
|
||||||
|
|
||||||
|
else
|
||||||
|
{
|
||||||
|
unsigned int first_seed = initial_seed();
|
||||||
|
sha256_update(&ctx, &first_seed, sizeof(first_seed));
|
||||||
|
}
|
||||||
|
|
||||||
|
sha256_final(&ctx, buf);
|
||||||
|
return *reinterpret_cast<size_t*>(buf); // Use the first bytes as seed.
|
||||||
|
}
|
||||||
|
|
||||||
|
Hasher::digest_vector Hasher::Hash(const HashKey* key) const
|
||||||
|
{
|
||||||
|
return Hash(key->Key(), key->Size());
|
||||||
|
}
|
||||||
|
|
||||||
bool Hasher::Serialize(SerialInfo* info) const
|
bool Hasher::Serialize(SerialInfo* info) const
|
||||||
{
|
{
|
||||||
return SerialObj::Serialize(info);
|
return SerialObj::Serialize(info);
|
||||||
|
@ -25,7 +54,7 @@ bool Hasher::DoSerialize(SerialInfo* info) const
|
||||||
if ( ! SERIALIZE(static_cast<uint16>(k)) )
|
if ( ! SERIALIZE(static_cast<uint16>(k)) )
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return SERIALIZE_STR(name.c_str(), name.size());
|
return SERIALIZE(static_cast<uint64>(seed));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Hasher::DoUnserialize(UnserialInfo* info)
|
bool Hasher::DoUnserialize(UnserialInfo* info)
|
||||||
|
@ -39,62 +68,52 @@ bool Hasher::DoUnserialize(UnserialInfo* info)
|
||||||
k = serial_k;
|
k = serial_k;
|
||||||
assert(k > 0);
|
assert(k > 0);
|
||||||
|
|
||||||
const char* serial_name;
|
uint64 serial_seed;
|
||||||
if ( ! UNSERIALIZE_STR(&serial_name, 0) )
|
if ( ! UNSERIALIZE(&serial_seed) )
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
name = serial_name;
|
seed = serial_seed;
|
||||||
delete [] serial_name;
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
Hasher::Hasher(size_t k, const std::string& arg_name)
|
Hasher::Hasher(size_t arg_k, size_t arg_seed)
|
||||||
: k(k)
|
|
||||||
{
|
{
|
||||||
k = k;
|
k = arg_k;
|
||||||
name = arg_name;
|
seed = arg_seed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
UHF::UHF(size_t arg_seed)
|
||||||
UHF::UHF(size_t seed, const std::string& extra)
|
: h(arg_seed)
|
||||||
: h(compute_seed(seed, extra))
|
|
||||||
{
|
{
|
||||||
|
seed = arg_seed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This function is almost equivalent to HashKey::HashBytes except that it
|
||||||
|
// does not depend on global state and that we mix in the seed multiple
|
||||||
|
// times.
|
||||||
Hasher::digest UHF::hash(const void* x, size_t n) const
|
Hasher::digest UHF::hash(const void* x, size_t n) const
|
||||||
{
|
{
|
||||||
assert(n <= UHASH_KEY_SIZE);
|
if ( n <= UHASH_KEY_SIZE )
|
||||||
return n == 0 ? 0 : h(x, n);
|
return n == 0 ? 0 : h(x, n);
|
||||||
|
|
||||||
|
unsigned char d[16];
|
||||||
|
MD5(reinterpret_cast<const unsigned char*>(x), n, d);
|
||||||
|
|
||||||
|
const unsigned char* s = reinterpret_cast<const unsigned char*>(&seed);
|
||||||
|
for ( size_t i = 0; i < 16; ++i )
|
||||||
|
d[i] ^= s[i % sizeof(seed)];
|
||||||
|
|
||||||
|
MD5(d, 16, d);
|
||||||
|
|
||||||
|
return d[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t UHF::compute_seed(size_t seed, const std::string& extra)
|
DefaultHasher::DefaultHasher(size_t k, size_t seed)
|
||||||
|
: Hasher(k, seed)
|
||||||
{
|
{
|
||||||
u_char buf[SHA256_DIGEST_LENGTH];
|
for ( size_t i = 1; i <= k; ++i )
|
||||||
SHA256_CTX ctx;
|
hash_functions.push_back(UHF(Seed() + bro_prng(i)));
|
||||||
sha256_init(&ctx);
|
|
||||||
|
|
||||||
if ( extra.empty() )
|
|
||||||
{
|
|
||||||
unsigned int first_seed = initial_seed();
|
|
||||||
sha256_update(&ctx, &first_seed, sizeof(first_seed));
|
|
||||||
}
|
|
||||||
|
|
||||||
else
|
|
||||||
sha256_update(&ctx, extra.c_str(), extra.size());
|
|
||||||
|
|
||||||
sha256_update(&ctx, &seed, sizeof(seed));
|
|
||||||
sha256_final(&ctx, buf);
|
|
||||||
|
|
||||||
// Take the first sizeof(size_t) bytes as seed.
|
|
||||||
return *reinterpret_cast<size_t*>(buf);
|
|
||||||
}
|
|
||||||
|
|
||||||
DefaultHasher::DefaultHasher(size_t k, const std::string& name)
|
|
||||||
: Hasher(k, name)
|
|
||||||
{
|
|
||||||
for ( size_t i = 0; i < k; ++i )
|
|
||||||
hash_functions.push_back(UHF(i, name));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Hasher::digest_vector DefaultHasher::Hash(const void* x, size_t n) const
|
Hasher::digest_vector DefaultHasher::Hash(const void* x, size_t n) const
|
||||||
|
@ -137,13 +156,13 @@ bool DefaultHasher::DoUnserialize(UnserialInfo* info)
|
||||||
|
|
||||||
hash_functions.clear();
|
hash_functions.clear();
|
||||||
for ( size_t i = 0; i < K(); ++i )
|
for ( size_t i = 0; i < K(); ++i )
|
||||||
hash_functions.push_back(UHF(i, Name()));
|
hash_functions.push_back(UHF(Seed() + bro_prng(i)));
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
DoubleHasher::DoubleHasher(size_t k, const std::string& name)
|
DoubleHasher::DoubleHasher(size_t k, size_t seed)
|
||||||
: Hasher(k, name), h1(1, name), h2(2, name)
|
: Hasher(k, seed), h1(seed + bro_prng(1)), h2(seed + bro_prng(2))
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -187,8 +206,8 @@ bool DoubleHasher::DoUnserialize(UnserialInfo* info)
|
||||||
{
|
{
|
||||||
DO_UNSERIALIZE(Hasher);
|
DO_UNSERIALIZE(Hasher);
|
||||||
|
|
||||||
h1 = UHF(1, Name());
|
h1 = UHF(Seed() + bro_prng(1));
|
||||||
h2 = UHF(2, Name());
|
h2 = UHF(Seed() + bro_prng(2));
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,6 +18,20 @@ public:
|
||||||
typedef hash_t digest;
|
typedef hash_t digest;
|
||||||
typedef std::vector<digest> digest_vector;
|
typedef std::vector<digest> digest_vector;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a valid hasher seed from an arbitrary string.
|
||||||
|
*
|
||||||
|
* @param data A pointer to contiguous data that should be crunched into a
|
||||||
|
* seed. If 0, the function tries to find a global_hash_seed script variable
|
||||||
|
* to derive a seed from. If this variable does not exist, the function uses
|
||||||
|
* the initial seed generated at Bro startup.
|
||||||
|
*
|
||||||
|
* @param size The number of bytes of *data*.
|
||||||
|
*
|
||||||
|
* @return A seed suitable for hashers.
|
||||||
|
*/
|
||||||
|
static size_t MakeSeed(const void* data, size_t size);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Destructor.
|
* Destructor.
|
||||||
*/
|
*/
|
||||||
|
@ -36,6 +50,15 @@ public:
|
||||||
return Hash(&x, sizeof(T));
|
return Hash(&x, sizeof(T));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Computes hash values for an element.
|
||||||
|
*
|
||||||
|
* @param x The key of the value to hash.
|
||||||
|
*
|
||||||
|
* @return Vector of *k* hash values.
|
||||||
|
*/
|
||||||
|
digest_vector Hash(const HashKey* key) const;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Computes the hashes for a set of bytes.
|
* Computes the hashes for a set of bytes.
|
||||||
*
|
*
|
||||||
|
@ -64,11 +87,9 @@ public:
|
||||||
size_t K() const { return k; }
|
size_t K() const { return k; }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the hasher's name. If not empty, the hasher uses this descriptor
|
* Returns the seed used to construct the hasher.
|
||||||
* to seed its *k* hash functions. Otherwise the hasher mixes in the initial
|
|
||||||
* seed derived from the environment variable `$BRO_SEED`.
|
|
||||||
*/
|
*/
|
||||||
const std::string& Name() const { return name; }
|
size_t Seed() const { return seed; }
|
||||||
|
|
||||||
bool Serialize(SerialInfo* info) const;
|
bool Serialize(SerialInfo* info) const;
|
||||||
static Hasher* Unserialize(UnserialInfo* info);
|
static Hasher* Unserialize(UnserialInfo* info);
|
||||||
|
@ -81,16 +102,15 @@ protected:
|
||||||
/**
|
/**
|
||||||
* Constructor.
|
* Constructor.
|
||||||
*
|
*
|
||||||
* @param k the number of hash functions.
|
* @param arg_k the number of hash functions.
|
||||||
*
|
*
|
||||||
* @param name A name for the hasher. Hashers with the same name
|
* @param arg_seed The seed for the hasher.
|
||||||
* should provide consistent results.
|
|
||||||
*/
|
*/
|
||||||
Hasher(size_t k, const std::string& name);
|
Hasher(size_t arg_k, size_t arg_seed);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
size_t k;
|
size_t k;
|
||||||
std::string name;
|
size_t seed;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -103,13 +123,9 @@ public:
|
||||||
* Constructs an H3 hash function seeded with a given seed and an
|
* Constructs an H3 hash function seeded with a given seed and an
|
||||||
* optional extra seed to replace the initial Bro seed.
|
* optional extra seed to replace the initial Bro seed.
|
||||||
*
|
*
|
||||||
* @param seed The seed to use for this instance.
|
* @param arg_seed The seed to use for this instance.
|
||||||
*
|
|
||||||
* @param extra If not empty, this parameter replaces the initial
|
|
||||||
* seed to compute the seed for t to compute the seed NUL-terminated
|
|
||||||
* string as additional seed.
|
|
||||||
*/
|
*/
|
||||||
UHF(size_t seed = 0, const std::string& extra = "");
|
UHF(size_t arg_seed = 0);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
Hasher::digest operator()(const T& x) const
|
Hasher::digest operator()(const T& x) const
|
||||||
|
@ -152,9 +168,10 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static size_t compute_seed(size_t seed, const std::string& extra);
|
static size_t compute_seed(size_t seed);
|
||||||
|
|
||||||
H3<Hasher::digest, UHASH_KEY_SIZE> h;
|
H3<Hasher::digest, UHASH_KEY_SIZE> h;
|
||||||
|
size_t seed;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -169,9 +186,9 @@ public:
|
||||||
*
|
*
|
||||||
* @param k The number of hash functions to use.
|
* @param k The number of hash functions to use.
|
||||||
*
|
*
|
||||||
* @param name The name of the hasher.
|
* @param seed The seed for the hasher.
|
||||||
*/
|
*/
|
||||||
DefaultHasher(size_t k, const std::string& name = "");
|
DefaultHasher(size_t k, size_t seed);
|
||||||
|
|
||||||
// Overridden from Hasher.
|
// Overridden from Hasher.
|
||||||
virtual digest_vector Hash(const void* x, size_t n) const /* final */;
|
virtual digest_vector Hash(const void* x, size_t n) const /* final */;
|
||||||
|
@ -197,9 +214,9 @@ public:
|
||||||
*
|
*
|
||||||
* @param k The number of hash functions to use.
|
* @param k The number of hash functions to use.
|
||||||
*
|
*
|
||||||
* @param name The name of the hasher.
|
* @param seed The seed for the hasher.
|
||||||
*/
|
*/
|
||||||
DoubleHasher(size_t k, const std::string& name = "");
|
DoubleHasher(size_t k, size_t seed);
|
||||||
|
|
||||||
// Overridden from Hasher.
|
// Overridden from Hasher.
|
||||||
virtual digest_vector Hash(const void* x, size_t n) const /* final */;
|
virtual digest_vector Hash(const void* x, size_t n) const /* final */;
|
||||||
|
|
|
@ -20,23 +20,20 @@ module GLOBAL;
|
||||||
|
|
||||||
## Creates a basic Bloom filter.
|
## Creates a basic Bloom filter.
|
||||||
##
|
##
|
||||||
## .. note:: A Bloom filter can have a name associated with it. In the future,
|
|
||||||
## Bloom filters with the same name will be compatible across indepedent Bro
|
|
||||||
## instances, i.e., it will be possible to merge them. Currently, however, that is
|
|
||||||
## not yet supported.
|
|
||||||
##
|
|
||||||
## fp: The desired false-positive rate.
|
## fp: The desired false-positive rate.
|
||||||
##
|
##
|
||||||
## capacity: the maximum number of elements that guarantees a false-positive
|
## capacity: the maximum number of elements that guarantees a false-positive
|
||||||
## rate of *fp*.
|
## rate of *fp*.
|
||||||
##
|
##
|
||||||
## name: A name that uniquely identifies and seeds the Bloom filter. If empty,
|
## name: A name that uniquely identifies and seeds the Bloom filter. If empty,
|
||||||
## the filter will remain tied to the current Bro process.
|
## the filter will use :bro:id:`global_hash_seed` if that's set, and otherwise use
|
||||||
|
## a local seed tied to the current Bro process. Only filters with the same seed
|
||||||
|
## can be merged with :bro:id:`bloomfilter_merge` .
|
||||||
##
|
##
|
||||||
## Returns: A Bloom filter handle.
|
## Returns: A Bloom filter handle.
|
||||||
##
|
##
|
||||||
## .. bro:see:: bloomfilter_counting_init bloomfilter_add bloomfilter_lookup
|
## .. bro:see:: bloomfilter_basic_init2 bloomfilter_counting_init bloomfilter_add
|
||||||
## bloomfilter_clear bloomfilter_merge
|
## bloomfilter_lookup bloomfilter_clear bloomfilter_merge global_hash_seed
|
||||||
function bloomfilter_basic_init%(fp: double, capacity: count,
|
function bloomfilter_basic_init%(fp: double, capacity: count,
|
||||||
name: string &default=""%): opaque of bloomfilter
|
name: string &default=""%): opaque of bloomfilter
|
||||||
%{
|
%{
|
||||||
|
@ -48,18 +45,53 @@ function bloomfilter_basic_init%(fp: double, capacity: count,
|
||||||
|
|
||||||
size_t cells = BasicBloomFilter::M(fp, capacity);
|
size_t cells = BasicBloomFilter::M(fp, capacity);
|
||||||
size_t optimal_k = BasicBloomFilter::K(cells, capacity);
|
size_t optimal_k = BasicBloomFilter::K(cells, capacity);
|
||||||
const Hasher* h = new DefaultHasher(optimal_k, name->CheckString());
|
size_t seed = Hasher::MakeSeed(name->Len() > 0 ? name->Bytes() : 0,
|
||||||
|
name->Len());
|
||||||
|
const Hasher* h = new DefaultHasher(optimal_k, seed);
|
||||||
|
|
||||||
|
return new BloomFilterVal(new BasicBloomFilter(h, cells));
|
||||||
|
%}
|
||||||
|
|
||||||
|
## Creates a basic Bloom filter. This function serves as a low-level
|
||||||
|
## alternative to bloomfilter_basic_init where the user has full control over
|
||||||
|
## the number of hash functions and cells in the underlying bit vector.
|
||||||
|
##
|
||||||
|
## k: The number of hash functions to use.
|
||||||
|
##
|
||||||
|
## cells: The number of cells of the underlying bit vector.
|
||||||
|
##
|
||||||
|
## name: A name that uniquely identifies and seeds the Bloom filter. If empty,
|
||||||
|
## the filter will use :bro:id:`global_hash_seed` if that's set, and otherwise use
|
||||||
|
## a local seed tied to the current Bro process. Only filters with the same seed
|
||||||
|
## can be merged with :bro:id:`bloomfilter_merge` .
|
||||||
|
##
|
||||||
|
## Returns: A Bloom filter handle.
|
||||||
|
##
|
||||||
|
## .. bro:see:: bloom_filter_basic_init bloomfilter_counting_init bloomfilter_add
|
||||||
|
## bloomfilter_lookup bloomfilter_clear bloomfilter_merge global_hash_seed
|
||||||
|
function bloomfilter_basic_init2%(k: count, cells: count,
|
||||||
|
name: string &default=""%): opaque of bloomfilter
|
||||||
|
%{
|
||||||
|
if ( k == 0 )
|
||||||
|
{
|
||||||
|
reporter->Error("number of hash functions must be non-negative");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if ( cells == 0 )
|
||||||
|
{
|
||||||
|
reporter->Error("number of cells must be non-negative");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t seed = Hasher::MakeSeed(name->Len() > 0 ? name->Bytes() : 0,
|
||||||
|
name->Len());
|
||||||
|
const Hasher* h = new DefaultHasher(k, seed);
|
||||||
|
|
||||||
return new BloomFilterVal(new BasicBloomFilter(h, cells));
|
return new BloomFilterVal(new BasicBloomFilter(h, cells));
|
||||||
%}
|
%}
|
||||||
|
|
||||||
## Creates a counting Bloom filter.
|
## Creates a counting Bloom filter.
|
||||||
##
|
##
|
||||||
## .. note:: A Bloom filter can have a name associated with it. In the future,
|
|
||||||
## Bloom filters with the same name will be compatible across indepedent Bro
|
|
||||||
## instances, i.e., it will be possible to merge them. Currently, however, that is
|
|
||||||
## not yet supported.
|
|
||||||
##
|
|
||||||
## k: The number of hash functions to use.
|
## k: The number of hash functions to use.
|
||||||
##
|
##
|
||||||
## cells: The number of cells of the underlying counter vector. As there's no
|
## cells: The number of cells of the underlying counter vector. As there's no
|
||||||
|
@ -71,12 +103,14 @@ function bloomfilter_basic_init%(fp: double, capacity: count,
|
||||||
## becomes a cell of size *w* bits.
|
## becomes a cell of size *w* bits.
|
||||||
##
|
##
|
||||||
## name: A name that uniquely identifies and seeds the Bloom filter. If empty,
|
## name: A name that uniquely identifies and seeds the Bloom filter. If empty,
|
||||||
## the filter will remain tied to the current Bro process.
|
## the filter will use :bro:id:`global_hash_seed` if that's set, and otherwise use
|
||||||
|
## a local seed tied to the current Bro process. Only filters with the same seed
|
||||||
|
## can be merged with :bro:id:`bloomfilter_merge` .
|
||||||
##
|
##
|
||||||
## Returns: A Bloom filter handle.
|
## Returns: A Bloom filter handle.
|
||||||
##
|
##
|
||||||
## .. bro:see:: bloomfilter_basic_init bloomfilter_add bloomfilter_lookup
|
## .. bro:see:: bloomfilter_basic_init bloomfilter_basic_init2 bloomfilter_add
|
||||||
## bloomfilter_clear bloomfilter_merge
|
## bloomfilter_lookup bloomfilter_clear bloomfilter_merge global_hash_seed
|
||||||
function bloomfilter_counting_init%(k: count, cells: count, max: count,
|
function bloomfilter_counting_init%(k: count, cells: count, max: count,
|
||||||
name: string &default=""%): opaque of bloomfilter
|
name: string &default=""%): opaque of bloomfilter
|
||||||
%{
|
%{
|
||||||
|
@ -86,7 +120,10 @@ function bloomfilter_counting_init%(k: count, cells: count, max: count,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
const Hasher* h = new DefaultHasher(k, name->CheckString());
|
size_t seed = Hasher::MakeSeed(name->Len() > 0 ? name->Bytes() : 0,
|
||||||
|
name->Len());
|
||||||
|
|
||||||
|
const Hasher* h = new DefaultHasher(k, seed);
|
||||||
|
|
||||||
uint16 width = 1;
|
uint16 width = 1;
|
||||||
while ( max >>= 1 )
|
while ( max >>= 1 )
|
||||||
|
@ -101,8 +138,9 @@ function bloomfilter_counting_init%(k: count, cells: count, max: count,
|
||||||
##
|
##
|
||||||
## x: The element to add.
|
## x: The element to add.
|
||||||
##
|
##
|
||||||
## .. bro:see:: bloomfilter_counting_init bloomfilter_basic_init loomfilter_lookup
|
## .. bro:see:: bloomfilter_basic_init bloomfilter_basic_init2
|
||||||
## bloomfilter_clear bloomfilter_merge
|
## bloomfilter_counting_init bloomfilter_lookup bloomfilter_clear
|
||||||
|
## bloomfilter_merge
|
||||||
function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any
|
function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any
|
||||||
%{
|
%{
|
||||||
BloomFilterVal* bfv = static_cast<BloomFilterVal*>(bf);
|
BloomFilterVal* bfv = static_cast<BloomFilterVal*>(bf);
|
||||||
|
@ -127,8 +165,9 @@ function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any
|
||||||
##
|
##
|
||||||
## Returns: the counter associated with *x* in *bf*.
|
## Returns: the counter associated with *x* in *bf*.
|
||||||
##
|
##
|
||||||
## .. bro:see:: bloomfilter_counting_init bloomfilter_basic_init
|
## .. bro:see:: bloomfilter_basic_init bloomfilter_basic_init2
|
||||||
## bloomfilter_add bloomfilter_clear bloomfilter_merge
|
## bloomfilter_counting_init bloomfilter_add bloomfilter_clear
|
||||||
|
## bloomfilter_merge
|
||||||
function bloomfilter_lookup%(bf: opaque of bloomfilter, x: any%): count
|
function bloomfilter_lookup%(bf: opaque of bloomfilter, x: any%): count
|
||||||
%{
|
%{
|
||||||
const BloomFilterVal* bfv = static_cast<const BloomFilterVal*>(bf);
|
const BloomFilterVal* bfv = static_cast<const BloomFilterVal*>(bf);
|
||||||
|
@ -154,8 +193,9 @@ function bloomfilter_lookup%(bf: opaque of bloomfilter, x: any%): count
|
||||||
##
|
##
|
||||||
## bf: The Bloom filter handle.
|
## bf: The Bloom filter handle.
|
||||||
##
|
##
|
||||||
## .. bro:see:: bloomfilter_counting_init bloomfilter_basic_init
|
## .. bro:see:: bloomfilter_basic_init bloomfilter_counting_init2
|
||||||
## bloomfilter_add bloomfilter_lookup bloomfilter_merge
|
## bloomfilter_counting_init bloomfilter_add bloomfilter_lookup
|
||||||
|
## bloomfilter_merge
|
||||||
function bloomfilter_clear%(bf: opaque of bloomfilter%): any
|
function bloomfilter_clear%(bf: opaque of bloomfilter%): any
|
||||||
%{
|
%{
|
||||||
BloomFilterVal* bfv = static_cast<BloomFilterVal*>(bf);
|
BloomFilterVal* bfv = static_cast<BloomFilterVal*>(bf);
|
||||||
|
@ -178,8 +218,9 @@ function bloomfilter_clear%(bf: opaque of bloomfilter%): any
|
||||||
##
|
##
|
||||||
## Returns: The union of *bf1* and *bf2*.
|
## Returns: The union of *bf1* and *bf2*.
|
||||||
##
|
##
|
||||||
## .. bro:see:: bloomfilter_counting_init bloomfilter_basic_init
|
## .. bro:see:: bloomfilter_basic_init bloomfilter_basic_init2
|
||||||
## bloomfilter_add bloomfilter_lookup bloomfilter_clear
|
## bloomfilter_counting_init bloomfilter_add bloomfilter_lookup
|
||||||
|
## bloomfilter_clear
|
||||||
function bloomfilter_merge%(bf1: opaque of bloomfilter,
|
function bloomfilter_merge%(bf1: opaque of bloomfilter,
|
||||||
bf2: opaque of bloomfilter%): opaque of bloomfilter
|
bf2: opaque of bloomfilter%): opaque of bloomfilter
|
||||||
%{
|
%{
|
||||||
|
@ -196,3 +237,13 @@ function bloomfilter_merge%(bf1: opaque of bloomfilter,
|
||||||
|
|
||||||
return BloomFilterVal::Merge(bfv1, bfv2);
|
return BloomFilterVal::Merge(bfv1, bfv2);
|
||||||
%}
|
%}
|
||||||
|
|
||||||
|
## Returns a string with a representation of a Bloom filter's internal
|
||||||
|
## state. This is for debugging/testing purposes only.
|
||||||
|
##
|
||||||
|
## bf: The Bloom filter handle.
|
||||||
|
function bloomfilter_internal_state%(bf: opaque of bloomfilter%): string
|
||||||
|
%{
|
||||||
|
BloomFilterVal* bfv = static_cast<BloomFilterVal*>(bf);
|
||||||
|
return new StringVal(bfv->InternalState());
|
||||||
|
%}
|
||||||
|
|
8
testing/btest/Baseline/bifs.bloomfilter-seed/output
Normal file
8
testing/btest/Baseline/bifs.bloomfilter-seed/output
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
bf1, global_seed, 11979365913534242684
|
||||||
|
bf2, global_seed, 12550100962110750449
|
||||||
|
bf3, my_seed, 12550100962110750449
|
||||||
|
bf4, my_seed, 945716460325754659
|
||||||
|
bf1, global_seed, 12550100962110750449
|
||||||
|
bf2, global_seed, 945716460325754659
|
||||||
|
bf3, my_seed, 12550100962110750449
|
||||||
|
bf4, my_seed, 945716460325754659
|
|
@ -18,6 +18,7 @@ error: false-positive rate must take value between 0 and 1
|
||||||
1
|
1
|
||||||
1
|
1
|
||||||
1
|
1
|
||||||
|
1
|
||||||
2
|
2
|
||||||
3
|
3
|
||||||
3
|
3
|
||||||
|
|
40
testing/btest/bifs/bloomfilter-seed.bro
Normal file
40
testing/btest/bifs/bloomfilter-seed.bro
Normal file
|
@ -0,0 +1,40 @@
|
||||||
|
# @TEST-EXEC: bro -b %INPUT global_hash_seed="foo" >>output
|
||||||
|
# @TEST-EXEC: bro -b %INPUT global_hash_seed="my_seed" >>output
|
||||||
|
# @TEST-EXEC: btest-diff output
|
||||||
|
|
||||||
|
type Foo: record
|
||||||
|
{
|
||||||
|
a: count;
|
||||||
|
b: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
function test_bloom_filter()
|
||||||
|
{
|
||||||
|
local bf1 = bloomfilter_basic_init(0.9, 10);
|
||||||
|
bloomfilter_add(bf1, "foo");
|
||||||
|
bloomfilter_add(bf1, "bar");
|
||||||
|
|
||||||
|
local bf2 = bloomfilter_basic_init(0.9, 10);
|
||||||
|
bloomfilter_add(bf2, Foo($a=1, $b="xx"));
|
||||||
|
bloomfilter_add(bf2, Foo($a=2, $b="yy"));
|
||||||
|
|
||||||
|
local bf3 = bloomfilter_basic_init(0.9, 10, "my_seed");
|
||||||
|
bloomfilter_add(bf3, "foo");
|
||||||
|
bloomfilter_add(bf3, "bar");
|
||||||
|
|
||||||
|
local bf4 = bloomfilter_basic_init(0.9, 10, "my_seed");
|
||||||
|
bloomfilter_add(bf4, Foo($a=1, $b="xx"));
|
||||||
|
bloomfilter_add(bf4, Foo($a=2, $b="yy"));
|
||||||
|
|
||||||
|
print "bf1, global_seed", bloomfilter_internal_state(bf1);
|
||||||
|
print "bf2, global_seed", bloomfilter_internal_state(bf2);
|
||||||
|
print "bf3, my_seed", bloomfilter_internal_state(bf3);
|
||||||
|
print "bf4, my_seed", bloomfilter_internal_state(bf4);
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
event bro_init()
|
||||||
|
{
|
||||||
|
test_bloom_filter();
|
||||||
|
}
|
|
@ -15,14 +15,21 @@ function test_basic_bloom_filter()
|
||||||
bloomfilter_add(bf_cnt, 0.5); # Type mismatch
|
bloomfilter_add(bf_cnt, 0.5); # Type mismatch
|
||||||
bloomfilter_add(bf_cnt, "foo"); # Type mismatch
|
bloomfilter_add(bf_cnt, "foo"); # Type mismatch
|
||||||
|
|
||||||
|
# Alternative constructor.
|
||||||
|
local bf_dbl = bloomfilter_basic_init2(4, 10);
|
||||||
|
bloomfilter_add(bf_dbl, 4.2);
|
||||||
|
bloomfilter_add(bf_dbl, 3.14);
|
||||||
|
print bloomfilter_lookup(bf_dbl, 4.2);
|
||||||
|
print bloomfilter_lookup(bf_dbl, 3.14);
|
||||||
|
|
||||||
# Basic usage with strings.
|
# Basic usage with strings.
|
||||||
local bf_str = bloomfilter_basic_init(0.9, 10);
|
local bf_str = bloomfilter_basic_init(0.9, 10);
|
||||||
bloomfilter_add(bf_str, "foo");
|
bloomfilter_add(bf_str, "foo");
|
||||||
bloomfilter_add(bf_str, "bar");
|
bloomfilter_add(bf_str, "bar");
|
||||||
print bloomfilter_lookup(bf_str, "foo");
|
print bloomfilter_lookup(bf_str, "foo");
|
||||||
print bloomfilter_lookup(bf_str, "bar");
|
print bloomfilter_lookup(bf_str, "bar");
|
||||||
print bloomfilter_lookup(bf_str, "b4z"); # FP
|
print bloomfilter_lookup(bf_str, "b4zzz"); # FP
|
||||||
print bloomfilter_lookup(bf_str, "quux"); # FP
|
print bloomfilter_lookup(bf_str, "quuux"); # FP
|
||||||
bloomfilter_add(bf_str, 0.5); # Type mismatch
|
bloomfilter_add(bf_str, 0.5); # Type mismatch
|
||||||
bloomfilter_add(bf_str, 100); # Type mismatch
|
bloomfilter_add(bf_str, 100); # Type mismatch
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue