mirror of
https://github.com/zeek/zeek.git
synced 2025-10-04 15:48:19 +00:00
Merge remote-tracking branch 'origin/topic/matthias/bloom-filter'
I'm moving the new files into a subdirectory probabilistic, and into a corresponding namespace. We can later put code for the other probabilistic data structures there as well. * origin/topic/matthias/bloom-filter: (45 commits) Implement and test Bloom filter merging. Make hash functions equality comparable. Make counter vectors mergeable. Use half adder for bitwise addition and subtraction. Fix and test counting Bloom filter. Implement missing CounterVector functions. Tweak hasher interface. Add missing include for GCC. Fixing for unserializion error. Small fixes and style tweaks. Only serialize Bloom filter type if available. Create hash policies through factory. Remove lingering debug code. Factor implementation and change interface. Expose Bro's linear congruence PRNG as utility function. H3 does not check for zero length input. Support seeding for hashers. Add utility function to access first random seed. Update H3 documentation (and minor style nits.) Make H3 seed configurable. ...
This commit is contained in:
commit
21685d2529
26 changed files with 2279 additions and 67 deletions
|
@ -705,6 +705,7 @@ type entropy_test_result: record {
|
|||
@load base/bif/strings.bif
|
||||
@load base/bif/bro.bif
|
||||
@load base/bif/reporter.bif
|
||||
@load base/bif/bloom-filter.bif
|
||||
|
||||
## Deprecated. This is superseded by the new logging framework.
|
||||
global log_file_name: function(tag: string): string &redef;
|
||||
|
|
|
@ -150,6 +150,7 @@ set(bro_PLUGIN_LIBS CACHE INTERNAL "plugin libraries" FORCE)
|
|||
|
||||
add_subdirectory(analyzer)
|
||||
add_subdirectory(file_analysis)
|
||||
add_subdirectory(probabilistic)
|
||||
|
||||
set(bro_SUBDIRS
|
||||
${bro_SUBDIR_LIBS}
|
||||
|
|
|
@ -560,6 +560,9 @@ void builtin_error(const char* msg, BroObj* arg)
|
|||
#include "reporter.bif.func_def"
|
||||
#include "strings.bif.func_def"
|
||||
|
||||
// TODO: Add a nicer mechanism to pull subdirectory bifs automatically.
|
||||
#include "probabilistic/bloom-filter.bif.h"
|
||||
|
||||
void init_builtin_funcs()
|
||||
{
|
||||
bro_resources = internal_type("bro_resources")->AsRecordType();
|
||||
|
@ -574,6 +577,9 @@ void init_builtin_funcs()
|
|||
#include "reporter.bif.func_init"
|
||||
#include "strings.bif.func_init"
|
||||
|
||||
// TODO: Add a nicer mechanism to pull subdirectory bifs automatically.
|
||||
#include "probabilistic/bloom-filter.bif.init.cc"
|
||||
|
||||
did_builtin_init = true;
|
||||
}
|
||||
|
||||
|
|
114
src/H3.h
114
src/H3.h
|
@ -49,69 +49,83 @@
|
|||
// hash a substring of the data. Hashes of substrings can be bitwise-XOR'ed
|
||||
// together to get the same result as hashing the full string.
|
||||
// Any number of hash functions can be created by creating new instances of H3,
|
||||
// with the same or different template parameters. The hash function is
|
||||
// randomly generated using bro_random(); you must call init_random_seed()
|
||||
// before the H3 constructor if you wish to seed it.
|
||||
// with the same or different template parameters. The hash function
|
||||
// constructor takes a seed as argument which defaults to a call to
|
||||
// bro_random().
|
||||
|
||||
|
||||
#ifndef H3_H
|
||||
#define H3_H
|
||||
|
||||
#include <climits>
|
||||
#include <cstring>
|
||||
|
||||
// The number of values representable by a byte.
|
||||
#define H3_BYTE_RANGE (UCHAR_MAX+1)
|
||||
|
||||
template<class T, int N> class H3 {
|
||||
T byte_lookup[N][H3_BYTE_RANGE];
|
||||
template <typename T, int N>
|
||||
class H3 {
|
||||
public:
|
||||
H3();
|
||||
T operator()(const void* data, size_t size, size_t offset = 0) const
|
||||
{
|
||||
const unsigned char *p = static_cast<const unsigned char*>(data);
|
||||
T result = 0;
|
||||
H3(T seed = bro_random())
|
||||
{
|
||||
T bit_lookup[N * CHAR_BIT];
|
||||
|
||||
// loop optmized with Duff's Device
|
||||
register unsigned n = (size + 7) / 8;
|
||||
switch (size % 8) {
|
||||
case 0: do { result ^= byte_lookup[offset++][*p++];
|
||||
case 7: result ^= byte_lookup[offset++][*p++];
|
||||
case 6: result ^= byte_lookup[offset++][*p++];
|
||||
case 5: result ^= byte_lookup[offset++][*p++];
|
||||
case 4: result ^= byte_lookup[offset++][*p++];
|
||||
case 3: result ^= byte_lookup[offset++][*p++];
|
||||
case 2: result ^= byte_lookup[offset++][*p++];
|
||||
case 1: result ^= byte_lookup[offset++][*p++];
|
||||
} while (--n > 0);
|
||||
}
|
||||
for ( size_t bit = 0; bit < N * CHAR_BIT; bit++ )
|
||||
{
|
||||
bit_lookup[bit] = 0;
|
||||
seed = bro_prng(seed);
|
||||
for ( size_t i = 0; i < sizeof(T)/2; i++ )
|
||||
// assume random() returns at least 16 random bits
|
||||
bit_lookup[bit] = (bit_lookup[bit] << 16) | (seed & 0xFFFF);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
for ( size_t byte = 0; byte < N; byte++ )
|
||||
{
|
||||
for ( unsigned val = 0; val < H3_BYTE_RANGE; val++ )
|
||||
{
|
||||
byte_lookup[byte][val] = 0;
|
||||
for ( size_t bit = 0; bit < CHAR_BIT; bit++ )
|
||||
// Does this mean byte_lookup[*][0] == 0? -RP
|
||||
if (val & (1 << bit))
|
||||
byte_lookup[byte][val] ^= bit_lookup[byte*CHAR_BIT+bit];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
T operator()(const void* data, size_t size, size_t offset = 0) const
|
||||
{
|
||||
const unsigned char *p = static_cast<const unsigned char*>(data);
|
||||
T result = 0;
|
||||
|
||||
// loop optmized with Duff's Device
|
||||
register unsigned n = (size + 7) / 8;
|
||||
switch ( size % 8 ) {
|
||||
case 0: do { result ^= byte_lookup[offset++][*p++];
|
||||
case 7: result ^= byte_lookup[offset++][*p++];
|
||||
case 6: result ^= byte_lookup[offset++][*p++];
|
||||
case 5: result ^= byte_lookup[offset++][*p++];
|
||||
case 4: result ^= byte_lookup[offset++][*p++];
|
||||
case 3: result ^= byte_lookup[offset++][*p++];
|
||||
case 2: result ^= byte_lookup[offset++][*p++];
|
||||
case 1: result ^= byte_lookup[offset++][*p++];
|
||||
} while ( --n > 0 );
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
friend bool operator==(const H3& x, const H3& y)
|
||||
{
|
||||
return ! std::memcmp(x.byte_lookup, y.byte_lookup, N * H3_BYTE_RANGE);
|
||||
}
|
||||
|
||||
friend bool operator!=(const H3& x, const H3& y)
|
||||
{
|
||||
return ! (x == y);
|
||||
}
|
||||
|
||||
private:
|
||||
T byte_lookup[N][H3_BYTE_RANGE];
|
||||
};
|
||||
|
||||
template<class T, int N>
|
||||
H3<T,N>::H3()
|
||||
{
|
||||
T bit_lookup[N * CHAR_BIT];
|
||||
|
||||
for (size_t bit = 0; bit < N * CHAR_BIT; bit++) {
|
||||
bit_lookup[bit] = 0;
|
||||
for (size_t i = 0; i < sizeof(T)/2; i++) {
|
||||
// assume random() returns at least 16 random bits
|
||||
bit_lookup[bit] = (bit_lookup[bit] << 16) | (bro_random() & 0xFFFF);
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t byte = 0; byte < N; byte++) {
|
||||
for (unsigned val = 0; val < H3_BYTE_RANGE; val++) {
|
||||
byte_lookup[byte][val] = 0;
|
||||
for (size_t bit = 0; bit < CHAR_BIT; bit++) {
|
||||
// Does this mean byte_lookup[*][0] == 0? -RP
|
||||
if (val & (1 << bit))
|
||||
byte_lookup[byte][val] ^= bit_lookup[byte*CHAR_BIT+bit];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif //H3_H
|
||||
|
|
|
@ -242,6 +242,7 @@ OpaqueType* md5_type;
|
|||
OpaqueType* sha1_type;
|
||||
OpaqueType* sha256_type;
|
||||
OpaqueType* entropy_type;
|
||||
OpaqueType* bloomfilter_type;
|
||||
|
||||
#include "const.bif.netvar_def"
|
||||
#include "types.bif.netvar_def"
|
||||
|
@ -307,6 +308,7 @@ void init_general_global_var()
|
|||
sha1_type = new OpaqueType("sha1");
|
||||
sha256_type = new OpaqueType("sha256");
|
||||
entropy_type = new OpaqueType("entropy");
|
||||
bloomfilter_type = new OpaqueType("bloomfilter");
|
||||
}
|
||||
|
||||
void init_net_var()
|
||||
|
|
|
@ -247,6 +247,7 @@ extern OpaqueType* md5_type;
|
|||
extern OpaqueType* sha1_type;
|
||||
extern OpaqueType* sha256_type;
|
||||
extern OpaqueType* entropy_type;
|
||||
extern OpaqueType* bloomfilter_type;
|
||||
|
||||
// Initializes globals that don't pertain to network/event analysis.
|
||||
extern void init_general_global_var();
|
||||
|
|
118
src/OpaqueVal.cc
118
src/OpaqueVal.cc
|
@ -1,4 +1,5 @@
|
|||
#include "OpaqueVal.h"
|
||||
|
||||
#include "NetVar.h"
|
||||
#include "Reporter.h"
|
||||
#include "Serializer.h"
|
||||
|
@ -515,3 +516,120 @@ bool EntropyVal::DoUnserialize(UnserialInfo* info)
|
|||
|
||||
return true;
|
||||
}
|
||||
|
||||
BloomFilterVal::BloomFilterVal()
|
||||
: OpaqueVal(bloomfilter_type),
|
||||
type_(NULL),
|
||||
hash_(NULL),
|
||||
bloom_filter_(NULL)
|
||||
{
|
||||
}
|
||||
|
||||
BloomFilterVal::BloomFilterVal(OpaqueType* t)
|
||||
: OpaqueVal(t),
|
||||
type_(NULL),
|
||||
hash_(NULL),
|
||||
bloom_filter_(NULL)
|
||||
{
|
||||
}
|
||||
|
||||
BloomFilterVal::BloomFilterVal(probabilistic::BloomFilter* bf)
|
||||
: OpaqueVal(bloomfilter_type),
|
||||
type_(NULL),
|
||||
hash_(NULL),
|
||||
bloom_filter_(bf)
|
||||
{
|
||||
}
|
||||
|
||||
bool BloomFilterVal::Typify(BroType* type)
|
||||
{
|
||||
if ( type_ )
|
||||
return false;
|
||||
type_ = type;
|
||||
type_->Ref();
|
||||
TypeList* tl = new TypeList(type_);
|
||||
tl->Append(type_);
|
||||
hash_ = new CompositeHash(tl);
|
||||
Unref(tl);
|
||||
return true;
|
||||
}
|
||||
|
||||
BroType* BloomFilterVal::Type() const
|
||||
{
|
||||
return type_;
|
||||
}
|
||||
|
||||
void BloomFilterVal::Add(const Val* val)
|
||||
{
|
||||
HashKey* key = hash_->ComputeHash(val, 1);
|
||||
bloom_filter_->Add(key->Hash());
|
||||
}
|
||||
|
||||
size_t BloomFilterVal::Count(const Val* val) const
|
||||
{
|
||||
HashKey* key = hash_->ComputeHash(val, 1);
|
||||
return bloom_filter_->Count(key->Hash());
|
||||
}
|
||||
|
||||
BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* x,
|
||||
const BloomFilterVal* y)
|
||||
{
|
||||
if ( x->Type() != y->Type() )
|
||||
{
|
||||
reporter->InternalError("cannot merge Bloom filters with different types");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
BloomFilterVal* result;
|
||||
if ( (result = DoMerge<probabilistic::BasicBloomFilter>(x, y)) )
|
||||
return result;
|
||||
else if ( (result = DoMerge<probabilistic::CountingBloomFilter>(x, y)) )
|
||||
return result;
|
||||
|
||||
reporter->InternalError("failed to merge Bloom filters");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
BloomFilterVal::~BloomFilterVal()
|
||||
{
|
||||
if ( type_ )
|
||||
Unref(type_);
|
||||
if ( hash_ )
|
||||
delete hash_;
|
||||
if ( bloom_filter_ )
|
||||
delete bloom_filter_;
|
||||
}
|
||||
|
||||
IMPLEMENT_SERIAL(BloomFilterVal, SER_BLOOMFILTER_VAL);
|
||||
|
||||
bool BloomFilterVal::DoSerialize(SerialInfo* info) const
|
||||
{
|
||||
DO_SERIALIZE(SER_BLOOMFILTER_VAL, OpaqueVal);
|
||||
|
||||
bool is_typed = type_ != NULL;
|
||||
if ( ! SERIALIZE(is_typed) )
|
||||
return false;
|
||||
if ( is_typed && ! type_->Serialize(info) )
|
||||
return false;
|
||||
|
||||
return bloom_filter_->Serialize(info);
|
||||
}
|
||||
|
||||
bool BloomFilterVal::DoUnserialize(UnserialInfo* info)
|
||||
{
|
||||
DO_UNSERIALIZE(OpaqueVal);
|
||||
|
||||
bool is_typed;
|
||||
if ( ! UNSERIALIZE(&is_typed) )
|
||||
return false;
|
||||
if ( is_typed )
|
||||
{
|
||||
BroType* type = BroType::Unserialize(info);
|
||||
if ( ! Typify(type) )
|
||||
return false;
|
||||
Unref(type);
|
||||
}
|
||||
|
||||
bloom_filter_ = probabilistic::BloomFilter::Unserialize(info);
|
||||
return bloom_filter_ != NULL;
|
||||
}
|
||||
|
|
|
@ -3,10 +3,18 @@
|
|||
#ifndef OPAQUEVAL_H
|
||||
#define OPAQUEVAL_H
|
||||
|
||||
#include <typeinfo>
|
||||
|
||||
#include "RandTest.h"
|
||||
#include "Val.h"
|
||||
#include "digest.h"
|
||||
|
||||
#include "probabilistic/BloomFilter.h"
|
||||
|
||||
namespace probabilistic {
|
||||
class BloomFilter;
|
||||
}
|
||||
|
||||
class HashVal : public OpaqueVal {
|
||||
public:
|
||||
virtual bool IsValid() const;
|
||||
|
@ -107,4 +115,56 @@ private:
|
|||
RandTest state;
|
||||
};
|
||||
|
||||
class BloomFilterVal : public OpaqueVal {
|
||||
BloomFilterVal(const BloomFilterVal&);
|
||||
BloomFilterVal& operator=(const BloomFilterVal&);
|
||||
public:
|
||||
static BloomFilterVal* Merge(const BloomFilterVal* x,
|
||||
const BloomFilterVal* y);
|
||||
|
||||
explicit BloomFilterVal(probabilistic::BloomFilter* bf);
|
||||
~BloomFilterVal();
|
||||
|
||||
bool Typify(BroType* type);
|
||||
BroType* Type() const;
|
||||
|
||||
void Add(const Val* val);
|
||||
size_t Count(const Val* val) const;
|
||||
|
||||
protected:
|
||||
friend class Val;
|
||||
BloomFilterVal();
|
||||
BloomFilterVal(OpaqueType* t);
|
||||
|
||||
DECLARE_SERIAL(BloomFilterVal);
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
static BloomFilterVal* DoMerge(const BloomFilterVal* x,
|
||||
const BloomFilterVal* y)
|
||||
{
|
||||
if ( typeid(*x->bloom_filter_) != typeid(*y->bloom_filter_) )
|
||||
{
|
||||
reporter->InternalError("cannot merge different Bloom filter types");
|
||||
return NULL;
|
||||
}
|
||||
if ( typeid(T) != typeid(*x->bloom_filter_) )
|
||||
return NULL;
|
||||
const T* a = static_cast<const T*>(x->bloom_filter_);
|
||||
const T* b = static_cast<const T*>(y->bloom_filter_);
|
||||
BloomFilterVal* merged = new BloomFilterVal(T::Merge(a, b));
|
||||
assert(merged);
|
||||
if ( ! merged->Typify(x->Type()) )
|
||||
{
|
||||
reporter->InternalError("failed to set type on merged Bloom filter");
|
||||
return NULL;
|
||||
}
|
||||
return merged;
|
||||
}
|
||||
|
||||
BroType* type_;
|
||||
CompositeHash* hash_;
|
||||
probabilistic::BloomFilter* bloom_filter_;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -49,6 +49,9 @@ SERIAL_IS(STATE_ACCESS, 0x1100)
|
|||
SERIAL_IS_BO(CASE, 0x1200)
|
||||
SERIAL_IS(LOCATION, 0x1300)
|
||||
SERIAL_IS(RE_MATCHER, 0x1400)
|
||||
SERIAL_IS(BITVECTOR, 0x1500)
|
||||
SERIAL_IS(COUNTERVECTOR, 0x1600)
|
||||
SERIAL_IS(BLOOMFILTER, 0x1700)
|
||||
|
||||
// These are the externally visible types.
|
||||
const SerialType SER_NONE = 0;
|
||||
|
@ -104,6 +107,7 @@ SERIAL_VAL(MD5_VAL, 16)
|
|||
SERIAL_VAL(SHA1_VAL, 17)
|
||||
SERIAL_VAL(SHA256_VAL, 18)
|
||||
SERIAL_VAL(ENTROPY_VAL, 19)
|
||||
SERIAL_VAL(BLOOMFILTER_VAL, 20)
|
||||
|
||||
#define SERIAL_EXPR(name, val) SERIAL_CONST(name, val, EXPR)
|
||||
SERIAL_EXPR(EXPR, 1)
|
||||
|
@ -197,10 +201,17 @@ SERIAL_FUNC(BRO_FUNC, 2)
|
|||
SERIAL_FUNC(DEBUG_FUNC, 3)
|
||||
SERIAL_FUNC(BUILTIN_FUNC, 4)
|
||||
|
||||
#define SERIAL_BLOOMFILTER(name, val) SERIAL_CONST(name, val, BLOOMFILTER)
|
||||
SERIAL_BLOOMFILTER(BLOOMFILTER, 1)
|
||||
SERIAL_BLOOMFILTER(BASICBLOOMFILTER, 2)
|
||||
SERIAL_BLOOMFILTER(COUNTINGBLOOMFILTER, 3)
|
||||
|
||||
SERIAL_CONST2(ID)
|
||||
SERIAL_CONST2(STATE_ACCESS)
|
||||
SERIAL_CONST2(CASE)
|
||||
SERIAL_CONST2(LOCATION)
|
||||
SERIAL_CONST2(RE_MATCHER)
|
||||
SERIAL_CONST2(BITVECTOR)
|
||||
SERIAL_CONST2(COUNTERVECTOR)
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1311,19 +1311,19 @@ IMPLEMENT_SERIAL(OpaqueType, SER_OPAQUE_TYPE);
|
|||
bool OpaqueType::DoSerialize(SerialInfo* info) const
|
||||
{
|
||||
DO_SERIALIZE(SER_OPAQUE_TYPE, BroType);
|
||||
return SERIALIZE(name);
|
||||
return SERIALIZE_STR(name.c_str(), name.size());
|
||||
}
|
||||
|
||||
bool OpaqueType::DoUnserialize(UnserialInfo* info)
|
||||
{
|
||||
DO_UNSERIALIZE(BroType);
|
||||
|
||||
char const* n;
|
||||
const char* n;
|
||||
if ( ! UNSERIALIZE_STR(&n, 0) )
|
||||
return false;
|
||||
|
||||
name = n;
|
||||
delete [] n;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -4975,4 +4975,3 @@ function anonymize_addr%(a: addr, cl: IPAddrAnonymizationClass%): addr
|
|||
(enum ip_addr_anonymization_class_t) anon_class));
|
||||
}
|
||||
%}
|
||||
|
||||
|
|
512
src/probabilistic/BitVector.cc
Normal file
512
src/probabilistic/BitVector.cc
Normal file
|
@ -0,0 +1,512 @@
|
|||
#include "BitVector.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <limits>
|
||||
#include "Serializer.h"
|
||||
|
||||
using namespace probabilistic;
|
||||
|
||||
BitVector::size_type BitVector::npos = static_cast<BitVector::size_type>(-1);
|
||||
BitVector::block_type BitVector::bits_per_block =
|
||||
std::numeric_limits<BitVector::block_type>::digits;
|
||||
|
||||
namespace {
|
||||
|
||||
uint8_t count_table[] = {
|
||||
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2,
|
||||
3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3,
|
||||
3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3,
|
||||
4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4,
|
||||
3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5,
|
||||
6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4,
|
||||
4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5,
|
||||
6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5,
|
||||
3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3,
|
||||
4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6,
|
||||
6, 7, 6, 7, 7, 8
|
||||
};
|
||||
|
||||
} // namespace <anonymous>
|
||||
|
||||
BitVector::Reference::Reference(block_type& block, block_type i)
|
||||
: block_(block),
|
||||
mask_(block_type(1) << i)
|
||||
{
|
||||
assert(i < bits_per_block);
|
||||
}
|
||||
|
||||
BitVector::Reference& BitVector::Reference::Flip()
|
||||
{
|
||||
block_ ^= mask_;
|
||||
return *this;
|
||||
}
|
||||
|
||||
BitVector::Reference::operator bool() const
|
||||
{
|
||||
return (block_ & mask_) != 0;
|
||||
}
|
||||
|
||||
bool BitVector::Reference::operator~() const
|
||||
{
|
||||
return (block_ & mask_) == 0;
|
||||
}
|
||||
|
||||
BitVector::Reference& BitVector::Reference::operator=(bool x)
|
||||
{
|
||||
x ? block_ |= mask_ : block_ &= ~mask_;
|
||||
return *this;
|
||||
}
|
||||
|
||||
BitVector::Reference& BitVector::Reference::operator=(Reference const& other)
|
||||
{
|
||||
other ? block_ |= mask_ : block_ &= ~mask_;
|
||||
return *this;
|
||||
}
|
||||
|
||||
BitVector::Reference& BitVector::Reference::operator|=(bool x)
|
||||
{
|
||||
if (x)
|
||||
block_ |= mask_;
|
||||
return *this;
|
||||
}
|
||||
|
||||
BitVector::Reference& BitVector::Reference::operator&=(bool x)
|
||||
{
|
||||
if (! x)
|
||||
block_ &= ~mask_;
|
||||
return *this;
|
||||
}
|
||||
|
||||
BitVector::Reference& BitVector::Reference::operator^=(bool x)
|
||||
{
|
||||
if (x)
|
||||
block_ ^= mask_;
|
||||
return *this;
|
||||
}
|
||||
|
||||
BitVector::Reference& BitVector::Reference::operator-=(bool x)
|
||||
{
|
||||
if (x)
|
||||
block_ &= ~mask_;
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
BitVector::BitVector() : num_bits_(0) { }
|
||||
|
||||
BitVector::BitVector(size_type size, bool value)
|
||||
: bits_(bits_to_blocks(size), value ? ~block_type(0) : 0),
|
||||
num_bits_(size)
|
||||
{ }
|
||||
|
||||
BitVector::BitVector(BitVector const& other)
|
||||
: bits_(other.bits_),
|
||||
num_bits_(other.num_bits_)
|
||||
{ }
|
||||
|
||||
BitVector BitVector::operator~() const
|
||||
{
|
||||
BitVector b(*this);
|
||||
b.Flip();
|
||||
return b;
|
||||
}
|
||||
|
||||
BitVector& BitVector::operator=(BitVector const& other)
|
||||
{
|
||||
bits_ = other.bits_;
|
||||
return *this;
|
||||
}
|
||||
|
||||
BitVector BitVector::operator<<(size_type n) const
|
||||
{
|
||||
BitVector b(*this);
|
||||
return b <<= n;
|
||||
}
|
||||
|
||||
BitVector BitVector::operator>>(size_type n) const
|
||||
{
|
||||
BitVector b(*this);
|
||||
return b >>= n;
|
||||
}
|
||||
|
||||
BitVector& BitVector::operator<<=(size_type n)
|
||||
{
|
||||
if (n >= num_bits_)
|
||||
return Reset();
|
||||
|
||||
if (n > 0)
|
||||
{
|
||||
size_type last = Blocks() - 1;
|
||||
size_type div = n / bits_per_block;
|
||||
block_type r = bit_index(n);
|
||||
block_type* b = &bits_[0];
|
||||
assert(Blocks() >= 1);
|
||||
assert(div <= last);
|
||||
|
||||
if (r != 0)
|
||||
{
|
||||
for (size_type i = last - div; i > 0; --i)
|
||||
b[i + div] = (b[i] << r) | (b[i - 1] >> (bits_per_block - r));
|
||||
b[div] = b[0] << r;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_type i = last-div; i > 0; --i)
|
||||
b[i + div] = b[i];
|
||||
b[div] = b[0];
|
||||
}
|
||||
|
||||
std::fill_n(b, div, block_type(0));
|
||||
zero_unused_bits();
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
BitVector& BitVector::operator>>=(size_type n)
|
||||
{
|
||||
if (n >= num_bits_)
|
||||
return Reset();
|
||||
|
||||
if (n > 0)
|
||||
{
|
||||
size_type last = Blocks() - 1;
|
||||
size_type div = n / bits_per_block;
|
||||
block_type r = bit_index(n);
|
||||
block_type* b = &bits_[0];
|
||||
assert(Blocks() >= 1);
|
||||
assert(div <= last);
|
||||
|
||||
if (r != 0)
|
||||
{
|
||||
for (size_type i = last - div; i > 0; --i)
|
||||
b[i - div] = (b[i] >> r) | (b[i + 1] << (bits_per_block - r));
|
||||
b[last - div] = b[last] >> r;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_type i = div; i <= last; ++i)
|
||||
b[i-div] = b[i];
|
||||
}
|
||||
|
||||
std::fill_n(b + (Blocks() - div), div, block_type(0));
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
BitVector& BitVector::operator&=(BitVector const& other)
|
||||
{
|
||||
assert(Size() >= other.Size());
|
||||
for (size_type i = 0; i < Blocks(); ++i)
|
||||
bits_[i] &= other.bits_[i];
|
||||
return *this;
|
||||
}
|
||||
|
||||
BitVector& BitVector::operator|=(BitVector const& other)
|
||||
{
|
||||
assert(Size() >= other.Size());
|
||||
for (size_type i = 0; i < Blocks(); ++i)
|
||||
bits_[i] |= other.bits_[i];
|
||||
return *this;
|
||||
}
|
||||
|
||||
BitVector& BitVector::operator^=(BitVector const& other)
|
||||
{
|
||||
assert(Size() >= other.Size());
|
||||
for (size_type i = 0; i < Blocks(); ++i)
|
||||
bits_[i] ^= other.bits_[i];
|
||||
return *this;
|
||||
}
|
||||
|
||||
BitVector& BitVector::operator-=(BitVector const& other)
|
||||
{
|
||||
assert(Size() >= other.Size());
|
||||
for (size_type i = 0; i < Blocks(); ++i)
|
||||
bits_[i] &= ~other.bits_[i];
|
||||
return *this;
|
||||
}
|
||||
|
||||
namespace probabilistic {
|
||||
|
||||
BitVector operator&(BitVector const& x, BitVector const& y)
|
||||
{
|
||||
BitVector b(x);
|
||||
return b &= y;
|
||||
}
|
||||
|
||||
BitVector operator|(BitVector const& x, BitVector const& y)
|
||||
{
|
||||
BitVector b(x);
|
||||
return b |= y;
|
||||
}
|
||||
|
||||
BitVector operator^(BitVector const& x, BitVector const& y)
|
||||
{
|
||||
BitVector b(x);
|
||||
return b ^= y;
|
||||
}
|
||||
|
||||
BitVector operator-(BitVector const& x, BitVector const& y)
|
||||
{
|
||||
BitVector b(x);
|
||||
return b -= y;
|
||||
}
|
||||
|
||||
bool operator==(BitVector const& x, BitVector const& y)
|
||||
{
|
||||
return x.num_bits_ == y.num_bits_ && x.bits_ == y.bits_;
|
||||
}
|
||||
|
||||
bool operator!=(BitVector const& x, BitVector const& y)
|
||||
{
|
||||
return ! (x == y);
|
||||
}
|
||||
|
||||
bool operator<(BitVector const& x, BitVector const& y)
|
||||
{
|
||||
assert(x.Size() == y.Size());
|
||||
for (BitVector::size_type r = x.Blocks(); r > 0; --r)
|
||||
{
|
||||
BitVector::size_type i = r - 1;
|
||||
if (x.bits_[i] < y.bits_[i])
|
||||
return true;
|
||||
else if (x.bits_[i] > y.bits_[i])
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void BitVector::Resize(size_type n, bool value)
|
||||
{
|
||||
size_type old = Blocks();
|
||||
size_type required = bits_to_blocks(n);
|
||||
block_type block_value = value ? ~block_type(0) : block_type(0);
|
||||
|
||||
if (required != old)
|
||||
bits_.resize(required, block_value);
|
||||
|
||||
if (value && (n > num_bits_) && extra_bits())
|
||||
bits_[old - 1] |= (block_value << extra_bits());
|
||||
|
||||
num_bits_ = n;
|
||||
zero_unused_bits();
|
||||
}
|
||||
|
||||
void BitVector::Clear()
|
||||
{
|
||||
bits_.clear();
|
||||
num_bits_ = 0;
|
||||
}
|
||||
|
||||
void BitVector::PushBack(bool bit)
|
||||
{
|
||||
size_type s = Size();
|
||||
Resize(s + 1);
|
||||
Set(s, bit);
|
||||
}
|
||||
|
||||
void BitVector::Append(block_type block)
|
||||
{
|
||||
size_type excess = extra_bits();
|
||||
if (excess)
|
||||
{
|
||||
assert(! Empty());
|
||||
bits_.push_back(block >> (bits_per_block - excess));
|
||||
bits_[Blocks() - 2] |= (block << excess);
|
||||
}
|
||||
else
|
||||
{
|
||||
bits_.push_back(block);
|
||||
}
|
||||
num_bits_ += bits_per_block;
|
||||
}
|
||||
|
||||
BitVector& BitVector::Set(size_type i, bool bit)
|
||||
{
|
||||
assert(i < num_bits_);
|
||||
if (bit)
|
||||
bits_[block_index(i)] |= bit_mask(i);
|
||||
else
|
||||
Reset(i);
|
||||
return *this;
|
||||
}
|
||||
|
||||
BitVector& BitVector::Set()
|
||||
{
|
||||
std::fill(bits_.begin(), bits_.end(), ~block_type(0));
|
||||
zero_unused_bits();
|
||||
return *this;
|
||||
}
|
||||
|
||||
BitVector& BitVector::Reset(size_type i)
|
||||
{
|
||||
assert(i < num_bits_);
|
||||
bits_[block_index(i)] &= ~bit_mask(i);
|
||||
return *this;
|
||||
}
|
||||
|
||||
BitVector& BitVector::Reset()
|
||||
{
|
||||
std::fill(bits_.begin(), bits_.end(), block_type(0));
|
||||
return *this;
|
||||
}
|
||||
|
||||
BitVector& BitVector::Flip(size_type i)
|
||||
{
|
||||
assert(i < num_bits_);
|
||||
bits_[block_index(i)] ^= bit_mask(i);
|
||||
return *this;
|
||||
}
|
||||
|
||||
BitVector& BitVector::Flip()
|
||||
{
|
||||
for (size_type i = 0; i < Blocks(); ++i)
|
||||
bits_[i] = ~bits_[i];
|
||||
zero_unused_bits();
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool BitVector::operator[](size_type i) const
|
||||
{
|
||||
assert(i < num_bits_);
|
||||
return (bits_[block_index(i)] & bit_mask(i)) != 0;
|
||||
}
|
||||
|
||||
BitVector::Reference BitVector::operator[](size_type i)
|
||||
{
|
||||
assert(i < num_bits_);
|
||||
return Reference(bits_[block_index(i)], bit_index(i));
|
||||
}
|
||||
|
||||
BitVector::size_type BitVector::Count() const
|
||||
{
|
||||
std::vector<block_type>::const_iterator first = bits_.begin();
|
||||
size_t n = 0;
|
||||
size_type length = Blocks();
|
||||
while (length)
|
||||
{
|
||||
block_type block = *first;
|
||||
while (block)
|
||||
{
|
||||
// TODO: use __popcnt if available.
|
||||
n += count_table[block & ((1u << 8) - 1)];
|
||||
block >>= 8;
|
||||
}
|
||||
++first;
|
||||
--length;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
BitVector::size_type BitVector::Blocks() const
|
||||
{
|
||||
return bits_.size();
|
||||
}
|
||||
|
||||
BitVector::size_type BitVector::Size() const
|
||||
{
|
||||
return num_bits_;
|
||||
}
|
||||
|
||||
bool BitVector::Empty() const
|
||||
{
|
||||
return bits_.empty();
|
||||
}
|
||||
|
||||
BitVector::size_type BitVector::FindFirst() const
|
||||
{
|
||||
return find_from(0);
|
||||
}
|
||||
|
||||
BitVector::size_type BitVector::FindNext(size_type i) const
|
||||
{
|
||||
if (i >= (Size() - 1) || Size() == 0)
|
||||
return npos;
|
||||
++i;
|
||||
size_type bi = block_index(i);
|
||||
block_type block = bits_[bi] & (~block_type(0) << bit_index(i));
|
||||
return block ? bi * bits_per_block + lowest_bit(block) : find_from(bi + 1);
|
||||
}
|
||||
|
||||
BitVector::size_type BitVector::lowest_bit(block_type block)
|
||||
{
|
||||
block_type x = block - (block & (block - 1));
|
||||
size_type log = 0;
|
||||
while (x >>= 1)
|
||||
++log;
|
||||
return log;
|
||||
}
|
||||
|
||||
BitVector::block_type BitVector::extra_bits() const
|
||||
{
|
||||
return bit_index(Size());
|
||||
}
|
||||
|
||||
void BitVector::zero_unused_bits()
|
||||
{
|
||||
if (extra_bits())
|
||||
bits_.back() &= ~(~block_type(0) << extra_bits());
|
||||
}
|
||||
|
||||
BitVector::size_type BitVector::find_from(size_type i) const
|
||||
{
|
||||
while (i < Blocks() && bits_[i] == 0)
|
||||
++i;
|
||||
if (i >= Blocks())
|
||||
return npos;
|
||||
return i * bits_per_block + lowest_bit(bits_[i]);
|
||||
}
|
||||
|
||||
bool BitVector::Serialize(SerialInfo* info) const
|
||||
{
|
||||
return SerialObj::Serialize(info);
|
||||
}
|
||||
|
||||
BitVector* BitVector::Unserialize(UnserialInfo* info)
|
||||
{
|
||||
return reinterpret_cast<BitVector*>(
|
||||
SerialObj::Unserialize(info, SER_BITVECTOR));
|
||||
}
|
||||
|
||||
IMPLEMENT_SERIAL(BitVector, SER_BITVECTOR);
|
||||
|
||||
bool BitVector::DoSerialize(SerialInfo* info) const
|
||||
{
|
||||
DO_SERIALIZE(SER_BITVECTOR, SerialObj);
|
||||
|
||||
if ( ! SERIALIZE(static_cast<uint64>(bits_.size())) )
|
||||
return false;
|
||||
|
||||
for ( size_t i = 0; i < bits_.size(); ++i )
|
||||
if ( ! SERIALIZE(static_cast<uint64>(bits_[i])) )
|
||||
return false;
|
||||
|
||||
return SERIALIZE(static_cast<uint64>(num_bits_));
|
||||
}
|
||||
|
||||
bool BitVector::DoUnserialize(UnserialInfo* info)
|
||||
{
|
||||
DO_UNSERIALIZE(SerialObj);
|
||||
|
||||
uint64 size;
|
||||
if ( ! UNSERIALIZE(&size) )
|
||||
return false;
|
||||
|
||||
bits_.resize(static_cast<size_t>(size));
|
||||
uint64 block;
|
||||
for ( size_t i = 0; i < bits_.size(); ++i )
|
||||
{
|
||||
if ( ! UNSERIALIZE(&block) )
|
||||
return false;
|
||||
bits_[i] = static_cast<block_type>(block);
|
||||
}
|
||||
|
||||
uint64 num_bits;
|
||||
if ( ! UNSERIALIZE(&num_bits) )
|
||||
return false;
|
||||
num_bits_ = static_cast<size_type>(num_bits);
|
||||
|
||||
return true;
|
||||
}
|
335
src/probabilistic/BitVector.h
Normal file
335
src/probabilistic/BitVector.h
Normal file
|
@ -0,0 +1,335 @@
|
|||
#ifndef BitVector_h
|
||||
#define BitVector_h
|
||||
|
||||
#include <iterator>
|
||||
#include <vector>
|
||||
#include "SerialObj.h"
|
||||
|
||||
namespace probabilistic {
|
||||
|
||||
/**
|
||||
* A vector of bits.
|
||||
*/
|
||||
class BitVector : public SerialObj {
|
||||
public:
|
||||
typedef size_t block_type;
|
||||
typedef size_t size_type;
|
||||
static size_type npos;
|
||||
static block_type bits_per_block;
|
||||
|
||||
public:
|
||||
/**
|
||||
* An lvalue proxy for single bits.
|
||||
*/
|
||||
class Reference {
|
||||
friend class BitVector;
|
||||
Reference(block_type& block, block_type i);
|
||||
|
||||
public:
|
||||
Reference& Flip();
|
||||
operator bool() const;
|
||||
bool operator~() const;
|
||||
Reference& operator=(bool x);
|
||||
Reference& operator=(Reference const& other);
|
||||
Reference& operator|=(bool x);
|
||||
Reference& operator&=(bool x);
|
||||
Reference& operator^=(bool x);
|
||||
Reference& operator-=(bool x);
|
||||
|
||||
private:
|
||||
void operator&();
|
||||
block_type& block_;
|
||||
block_type const mask_;
|
||||
};
|
||||
|
||||
typedef bool const_reference;
|
||||
|
||||
/**
|
||||
* Default-constructs an empty bit vector.
|
||||
*/
|
||||
BitVector();
|
||||
|
||||
/**
|
||||
* Constructs a bit vector of a given size.
|
||||
* @param size The number of bits.
|
||||
* @param value The value for each bit.
|
||||
*/
|
||||
explicit BitVector(size_type size, bool value = false);
|
||||
|
||||
/**
|
||||
* Constructs a bit vector from a sequence of blocks.
|
||||
*/
|
||||
template <typename InputIterator>
|
||||
BitVector(InputIterator first, InputIterator last)
|
||||
{
|
||||
bits_.insert(bits_.end(), first, last);
|
||||
num_bits_ = bits_.size() * bits_per_block;
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy-constructs a bit vector.
|
||||
* @param other The bit vector to copy.
|
||||
*/
|
||||
BitVector(const BitVector& other);
|
||||
|
||||
/**
|
||||
* Assigns another bit vector to this instance.
|
||||
* @param other The RHS of the assignment.
|
||||
*/
|
||||
BitVector& operator=(const BitVector& other);
|
||||
|
||||
//
|
||||
// Bitwise operations
|
||||
//
|
||||
BitVector operator~() const;
|
||||
BitVector operator<<(size_type n) const;
|
||||
BitVector operator>>(size_type n) const;
|
||||
BitVector& operator<<=(size_type n);
|
||||
BitVector& operator>>=(size_type n);
|
||||
BitVector& operator&=(BitVector const& other);
|
||||
BitVector& operator|=(BitVector const& other);
|
||||
BitVector& operator^=(BitVector const& other);
|
||||
BitVector& operator-=(BitVector const& other);
|
||||
friend BitVector operator&(BitVector const& x, BitVector const& y);
|
||||
friend BitVector operator|(BitVector const& x, BitVector const& y);
|
||||
friend BitVector operator^(BitVector const& x, BitVector const& y);
|
||||
friend BitVector operator-(BitVector const& x, BitVector const& y);
|
||||
|
||||
//
|
||||
// Relational operators
|
||||
//
|
||||
friend bool operator==(BitVector const& x, BitVector const& y);
|
||||
friend bool operator!=(BitVector const& x, BitVector const& y);
|
||||
friend bool operator<(BitVector const& x, BitVector const& y);
|
||||
|
||||
//
|
||||
// Basic operations
|
||||
//
|
||||
/** Appends the bits in a sequence of values.
|
||||
* @tparam Iterator A forward iterator.
|
||||
* @param first An iterator pointing to the first element of the sequence.
|
||||
* @param last An iterator pointing to one past the last element of the
|
||||
* sequence.
|
||||
*/
|
||||
template <typename ForwardIterator>
|
||||
void Append(ForwardIterator first, ForwardIterator last)
|
||||
{
|
||||
if (first == last)
|
||||
return;
|
||||
|
||||
block_type excess = extra_bits();
|
||||
typename std::iterator_traits<ForwardIterator>::difference_type delta =
|
||||
std::distance(first, last);
|
||||
|
||||
bits_.reserve(Blocks() + delta);
|
||||
if (excess == 0)
|
||||
{
|
||||
bits_.back() |= (*first << excess);
|
||||
do
|
||||
{
|
||||
block_type b = *first++ >> (bits_per_block - excess);
|
||||
bits_.push_back(b | (first == last ? 0 : *first << excess));
|
||||
} while (first != last);
|
||||
}
|
||||
else
|
||||
{
|
||||
bits_.insert(bits_.end(), first, last);
|
||||
}
|
||||
num_bits_ += bits_per_block * delta;
|
||||
}
|
||||
|
||||
/**
|
||||
* Appends the bits in a given block.
|
||||
* @param block The block containing bits to append.
|
||||
*/
|
||||
void Append(block_type block);
|
||||
|
||||
/** Appends a single bit to the end of the bit vector.
|
||||
* @param bit The value of the bit.
|
||||
*/
|
||||
void PushBack(bool bit);
|
||||
|
||||
/**
|
||||
* Clears all bits in the bitvector.
|
||||
*/
|
||||
void Clear();
|
||||
|
||||
/**
|
||||
* Resizes the bit vector to a new number of bits.
|
||||
* @param n The new number of bits of the bit vector.
|
||||
* @param value The bit value of new values, if the vector expands.
|
||||
*/
|
||||
void Resize(size_type n, bool value = false);
|
||||
|
||||
/**
|
||||
* Sets a bit at a specific position to a given value.
|
||||
* @param i The bit position.
|
||||
* @param bit The value assigned to position *i*.
|
||||
* @return A reference to the bit vector instance.
|
||||
*/
|
||||
BitVector& Set(size_type i, bool bit = true);
|
||||
|
||||
/**
|
||||
* Sets all bits to 1.
|
||||
* @return A reference to the bit vector instance.
|
||||
*/
|
||||
BitVector& Set();
|
||||
|
||||
/**
|
||||
* Resets a bit at a specific position, i.e., sets it to 0.
|
||||
* @param i The bit position.
|
||||
* @return A reference to the bit vector instance.
|
||||
*/
|
||||
BitVector& Reset(size_type i);
|
||||
|
||||
/**
|
||||
* Sets all bits to 0.
|
||||
* @return A reference to the bit vector instance.
|
||||
*/
|
||||
BitVector& Reset();
|
||||
|
||||
/**
|
||||
* Toggles/flips a bit at a specific position.
|
||||
* @param i The bit position.
|
||||
* @return A reference to the bit vector instance.
|
||||
*/
|
||||
BitVector& Flip(size_type i);
|
||||
|
||||
/**
|
||||
* Computes the complement.
|
||||
* @return A reference to the bit vector instance.
|
||||
*/
|
||||
BitVector& Flip();
|
||||
|
||||
/** Retrieves a single bit.
|
||||
* @param i The bit position.
|
||||
* @return A mutable reference to the bit at position *i*.
|
||||
*/
|
||||
Reference operator[](size_type i);
|
||||
|
||||
/**
|
||||
* Retrieves a single bit.
|
||||
* @param i The bit position.
|
||||
* @return A const-reference to the bit at position *i*.
|
||||
*/
|
||||
const_reference operator[](size_type i) const;
|
||||
|
||||
/**
|
||||
* Counts the number of 1-bits in the bit vector. Also known as *population
|
||||
* count* or *Hamming weight*.
|
||||
* @return The number of bits set to 1.
|
||||
*/
|
||||
size_type Count() const;
|
||||
|
||||
/**
|
||||
* Retrieves the number of blocks of the underlying storage.
|
||||
* @param The number of blocks that represent `Size()` bits.
|
||||
*/
|
||||
size_type Blocks() const;
|
||||
|
||||
/**
|
||||
* Retrieves the number of bits the bitvector consist of.
|
||||
* @return The length of the bit vector in bits.
|
||||
*/
|
||||
size_type Size() const;
|
||||
|
||||
/**
|
||||
* Checks whether the bit vector is empty.
|
||||
* @return `true` iff the bitvector has zero length.
|
||||
*/
|
||||
bool Empty() const;
|
||||
|
||||
/**
|
||||
* Finds the bit position of of the first 1-bit.
|
||||
* @return The position of the first bit that equals to one or `npos` if no
|
||||
* such bit exists.
|
||||
*/
|
||||
size_type FindFirst() const;
|
||||
|
||||
/**
|
||||
* Finds the next 1-bit from a given starting position.
|
||||
*
|
||||
* @param i The index where to start looking.
|
||||
*
|
||||
* @return The position of the first bit that equals to 1 after position
|
||||
* *i* or `npos` if no such bit exists.
|
||||
*/
|
||||
size_type FindNext(size_type i) const;
|
||||
|
||||
bool Serialize(SerialInfo* info) const;
|
||||
static BitVector* Unserialize(UnserialInfo* info);
|
||||
|
||||
protected:
|
||||
DECLARE_SERIAL(BitVector);
|
||||
|
||||
private:
|
||||
/**
|
||||
* Computes the block index for a given bit position.
|
||||
*/
|
||||
static size_type block_index(size_type i)
|
||||
{
|
||||
return i / bits_per_block;
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the bit index within a given block for a given bit position.
|
||||
*/
|
||||
static block_type bit_index(size_type i)
|
||||
{
|
||||
return i % bits_per_block;
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the bitmask block to extract a bit a given bit position.
|
||||
*/
|
||||
static block_type bit_mask(size_type i)
|
||||
{
|
||||
return block_type(1) << bit_index(i);
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the number of blocks needed to represent a given number of
|
||||
* bits.
|
||||
* @param bits the number of bits.
|
||||
* @return The number of blocks to represent *bits* number of bits.
|
||||
*/
|
||||
static size_type bits_to_blocks(size_type bits)
|
||||
{
|
||||
return bits / bits_per_block
|
||||
+ static_cast<size_type>(bits % bits_per_block != 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the bit position first 1-bit in a given block.
|
||||
* @param block The block to inspect.
|
||||
* @return The bit position where *block* has its first bit set to 1.
|
||||
*/
|
||||
static size_type lowest_bit(block_type block);
|
||||
|
||||
/**
|
||||
* Computes the number of excess/unused bits in the bit vector.
|
||||
*/
|
||||
block_type extra_bits() const;
|
||||
|
||||
/**
|
||||
* If the number of bits in the vector are not not a multiple of
|
||||
* bitvector::bits_per_block, then the last block exhibits unused bits which
|
||||
* this function resets.
|
||||
*/
|
||||
void zero_unused_bits();
|
||||
|
||||
/**
|
||||
* Looks for the first 1-bit starting at a given position.
|
||||
* @param i The block index to start looking.
|
||||
* @return The block index of the first 1-bit starting from *i* or
|
||||
* `bitvector::npos` if no 1-bit exists.
|
||||
*/
|
||||
size_type find_from(size_type i) const;
|
||||
|
||||
std::vector<block_type> bits_;
|
||||
size_type num_bits_;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
188
src/probabilistic/BloomFilter.cc
Normal file
188
src/probabilistic/BloomFilter.cc
Normal file
|
@ -0,0 +1,188 @@
|
|||
#include "BloomFilter.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
#include "CounterVector.h"
|
||||
#include "Serializer.h"
|
||||
|
||||
using namespace probabilistic;
|
||||
|
||||
BloomFilter::BloomFilter()
|
||||
: hasher_(NULL)
|
||||
{
|
||||
}
|
||||
|
||||
BloomFilter::BloomFilter(const Hasher* hasher)
|
||||
: hasher_(hasher)
|
||||
{
|
||||
}
|
||||
|
||||
BloomFilter::~BloomFilter()
|
||||
{
|
||||
if ( hasher_ )
|
||||
delete hasher_;
|
||||
}
|
||||
|
||||
bool BloomFilter::Serialize(SerialInfo* info) const
|
||||
{
|
||||
return SerialObj::Serialize(info);
|
||||
}
|
||||
|
||||
BloomFilter* BloomFilter::Unserialize(UnserialInfo* info)
|
||||
{
|
||||
return reinterpret_cast<BloomFilter*>(
|
||||
SerialObj::Unserialize(info, SER_BLOOMFILTER));
|
||||
}
|
||||
|
||||
bool BloomFilter::DoSerialize(SerialInfo* info) const
|
||||
{
|
||||
DO_SERIALIZE(SER_BLOOMFILTER, SerialObj);
|
||||
if ( ! SERIALIZE(static_cast<uint16>(hasher_->K())) )
|
||||
return false;
|
||||
return SERIALIZE_STR(hasher_->Name().c_str(), hasher_->Name().size());
|
||||
}
|
||||
|
||||
bool BloomFilter::DoUnserialize(UnserialInfo* info)
|
||||
{
|
||||
DO_UNSERIALIZE(SerialObj);
|
||||
uint16 k;
|
||||
if ( ! UNSERIALIZE(&k) )
|
||||
return false;
|
||||
const char* name;
|
||||
if ( ! UNSERIALIZE_STR(&name, 0) )
|
||||
return false;
|
||||
hasher_ = Hasher::Create(k, name);
|
||||
delete [] name;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
size_t BasicBloomFilter::M(double fp, size_t capacity)
|
||||
{
|
||||
double ln2 = std::log(2);
|
||||
return std::ceil(-(capacity * std::log(fp) / ln2 / ln2));
|
||||
}
|
||||
|
||||
size_t BasicBloomFilter::K(size_t cells, size_t capacity)
|
||||
{
|
||||
double frac = static_cast<double>(cells) / static_cast<double>(capacity);
|
||||
return std::ceil(frac * std::log(2));
|
||||
}
|
||||
|
||||
BasicBloomFilter* BasicBloomFilter::Merge(const BasicBloomFilter* x,
|
||||
const BasicBloomFilter* y)
|
||||
{
|
||||
if ( ! x->hasher_->Equals(y->hasher_) )
|
||||
{
|
||||
reporter->InternalError("incompatible hashers during Bloom filter merge");
|
||||
return NULL;
|
||||
}
|
||||
BasicBloomFilter* result = new BasicBloomFilter();
|
||||
result->hasher_ = x->hasher_->Clone();
|
||||
result->bits_ = new BitVector(*x->bits_ | *y->bits_);
|
||||
return result;
|
||||
}
|
||||
|
||||
BasicBloomFilter::BasicBloomFilter()
|
||||
: bits_(NULL)
|
||||
{
|
||||
}
|
||||
|
||||
BasicBloomFilter::BasicBloomFilter(const Hasher* hasher, size_t cells)
|
||||
: BloomFilter(hasher),
|
||||
bits_(new BitVector(cells))
|
||||
{
|
||||
}
|
||||
|
||||
IMPLEMENT_SERIAL(BasicBloomFilter, SER_BASICBLOOMFILTER)
|
||||
|
||||
bool BasicBloomFilter::DoSerialize(SerialInfo* info) const
|
||||
{
|
||||
DO_SERIALIZE(SER_BASICBLOOMFILTER, BloomFilter);
|
||||
return bits_->Serialize(info);
|
||||
}
|
||||
|
||||
bool BasicBloomFilter::DoUnserialize(UnserialInfo* info)
|
||||
{
|
||||
DO_UNSERIALIZE(BloomFilter);
|
||||
bits_ = BitVector::Unserialize(info);
|
||||
return bits_ != NULL;
|
||||
}
|
||||
|
||||
void BasicBloomFilter::AddImpl(const Hasher::digest_vector& h)
|
||||
{
|
||||
for ( size_t i = 0; i < h.size(); ++i )
|
||||
bits_->Set(h[i] % bits_->Size());
|
||||
}
|
||||
|
||||
size_t BasicBloomFilter::CountImpl(const Hasher::digest_vector& h) const
|
||||
{
|
||||
for ( size_t i = 0; i < h.size(); ++i )
|
||||
if ( ! (*bits_)[h[i] % bits_->Size()] )
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
CountingBloomFilter* CountingBloomFilter::Merge(const CountingBloomFilter* x,
|
||||
const CountingBloomFilter* y)
|
||||
{
|
||||
if ( ! x->hasher_->Equals(y->hasher_) )
|
||||
{
|
||||
reporter->InternalError("incompatible hashers during Bloom filter merge");
|
||||
return NULL;
|
||||
}
|
||||
CountingBloomFilter* result = new CountingBloomFilter();
|
||||
result->hasher_ = x->hasher_->Clone();
|
||||
result->cells_ = new CounterVector(*x->cells_ | *y->cells_);
|
||||
return result;
|
||||
}
|
||||
|
||||
CountingBloomFilter::CountingBloomFilter()
|
||||
: cells_(NULL)
|
||||
{
|
||||
}
|
||||
|
||||
CountingBloomFilter::CountingBloomFilter(const Hasher* hasher,
|
||||
size_t cells, size_t width)
|
||||
: BloomFilter(hasher),
|
||||
cells_(new CounterVector(width, cells))
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
IMPLEMENT_SERIAL(CountingBloomFilter, SER_COUNTINGBLOOMFILTER)
|
||||
|
||||
bool CountingBloomFilter::DoSerialize(SerialInfo* info) const
|
||||
{
|
||||
DO_SERIALIZE(SER_COUNTINGBLOOMFILTER, BloomFilter);
|
||||
return cells_->Serialize(info);
|
||||
}
|
||||
|
||||
bool CountingBloomFilter::DoUnserialize(UnserialInfo* info)
|
||||
{
|
||||
DO_UNSERIALIZE(BloomFilter);
|
||||
cells_ = CounterVector::Unserialize(info);
|
||||
return cells_ != NULL;
|
||||
}
|
||||
|
||||
// TODO: Use partitioning in add/count to allow for reusing CMS bounds.
|
||||
|
||||
void CountingBloomFilter::AddImpl(const Hasher::digest_vector& h)
|
||||
{
|
||||
for ( size_t i = 0; i < h.size(); ++i )
|
||||
cells_->Increment(h[i] % cells_->Size());
|
||||
}
|
||||
|
||||
size_t CountingBloomFilter::CountImpl(const Hasher::digest_vector& h) const
|
||||
{
|
||||
CounterVector::size_type min =
|
||||
std::numeric_limits<CounterVector::size_type>::max();
|
||||
for ( size_t i = 0; i < h.size(); ++i )
|
||||
{
|
||||
CounterVector::size_type cnt = cells_->Count(h[i] % cells_->Size());
|
||||
if ( cnt < min )
|
||||
min = cnt;
|
||||
}
|
||||
return min;
|
||||
}
|
140
src/probabilistic/BloomFilter.h
Normal file
140
src/probabilistic/BloomFilter.h
Normal file
|
@ -0,0 +1,140 @@
|
|||
#ifndef BloomFilter_h
|
||||
#define BloomFilter_h
|
||||
|
||||
#include <vector>
|
||||
#include "BitVector.h"
|
||||
#include "Hasher.h"
|
||||
|
||||
namespace probabilistic {
|
||||
|
||||
class CounterVector;
|
||||
|
||||
/**
|
||||
* The abstract base class for Bloom filters.
|
||||
*/
|
||||
class BloomFilter : public SerialObj {
|
||||
public:
|
||||
// At this point we won't let the user choose the hasher, but we might
|
||||
// open up the interface in the future.
|
||||
virtual ~BloomFilter();
|
||||
|
||||
/**
|
||||
* Adds an element of type T to the Bloom filter.
|
||||
* @param x The element to add
|
||||
*/
|
||||
template <typename T>
|
||||
void Add(const T& x)
|
||||
{
|
||||
AddImpl((*hasher_)(x));
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the associated count of a given value.
|
||||
*
|
||||
* @param x The value of type `T` to check.
|
||||
*
|
||||
* @return The counter associated with *x*.
|
||||
*/
|
||||
template <typename T>
|
||||
size_t Count(const T& x) const
|
||||
{
|
||||
return CountImpl((*hasher_)(x));
|
||||
}
|
||||
|
||||
bool Serialize(SerialInfo* info) const;
|
||||
static BloomFilter* Unserialize(UnserialInfo* info);
|
||||
|
||||
protected:
|
||||
DECLARE_ABSTRACT_SERIAL(BloomFilter);
|
||||
|
||||
BloomFilter();
|
||||
|
||||
/**
|
||||
* Constructs a Bloom filter.
|
||||
*
|
||||
* @param hasher The hasher to use for this Bloom filter.
|
||||
*/
|
||||
BloomFilter(const Hasher* hasher);
|
||||
|
||||
virtual void AddImpl(const Hasher::digest_vector& hashes) = 0;
|
||||
virtual size_t CountImpl(const Hasher::digest_vector& hashes) const = 0;
|
||||
|
||||
const Hasher* hasher_;
|
||||
};
|
||||
|
||||
/**
|
||||
* A basic Bloom filter.
|
||||
*/
|
||||
class BasicBloomFilter : public BloomFilter {
|
||||
public:
|
||||
/**
|
||||
* Computes the number of cells based a given false-positive rate and
|
||||
* capacity. In the literature, this parameter often has the name *M*.
|
||||
*
|
||||
* @param fp The false-positive rate.
|
||||
*
|
||||
* @param capacity The number of exepected elements.
|
||||
*
|
||||
* Returns: The number cells needed to support a false-positive rate of *fp*
|
||||
* with at most *capacity* elements.
|
||||
*/
|
||||
static size_t M(double fp, size_t capacity);
|
||||
|
||||
/**
|
||||
* Computes the optimal number of hash functions based on the number cells
|
||||
* and expected number of elements.
|
||||
*
|
||||
* @param cells The number of cells (*m*).
|
||||
*
|
||||
* @param capacity The maximum number of elements.
|
||||
*
|
||||
* Returns: the optimal number of hash functions for a false-positive rate of
|
||||
* *fp* for at most *capacity* elements.
|
||||
*/
|
||||
static size_t K(size_t cells, size_t capacity);
|
||||
|
||||
static BasicBloomFilter* Merge(const BasicBloomFilter* x,
|
||||
const BasicBloomFilter* y);
|
||||
|
||||
/**
|
||||
* Constructs a basic Bloom filter with a given number of cells and capacity.
|
||||
*/
|
||||
BasicBloomFilter(const Hasher* hasher, size_t cells);
|
||||
|
||||
protected:
|
||||
DECLARE_SERIAL(BasicBloomFilter);
|
||||
|
||||
BasicBloomFilter();
|
||||
|
||||
virtual void AddImpl(const Hasher::digest_vector& h);
|
||||
virtual size_t CountImpl(const Hasher::digest_vector& h) const;
|
||||
|
||||
private:
|
||||
BitVector* bits_;
|
||||
};
|
||||
|
||||
/**
|
||||
* A counting Bloom filter.
|
||||
*/
|
||||
class CountingBloomFilter : public BloomFilter {
|
||||
public:
|
||||
static CountingBloomFilter* Merge(const CountingBloomFilter* x,
|
||||
const CountingBloomFilter* y);
|
||||
|
||||
CountingBloomFilter(const Hasher* hasher, size_t cells, size_t width);
|
||||
|
||||
protected:
|
||||
DECLARE_SERIAL(CountingBloomFilter);
|
||||
|
||||
CountingBloomFilter();
|
||||
|
||||
virtual void AddImpl(const Hasher::digest_vector& h);
|
||||
virtual size_t CountImpl(const Hasher::digest_vector& h) const;
|
||||
|
||||
private:
|
||||
CounterVector* cells_;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
18
src/probabilistic/CMakeLists.txt
Normal file
18
src/probabilistic/CMakeLists.txt
Normal file
|
@ -0,0 +1,18 @@
|
|||
|
||||
include(BroSubdir)
|
||||
|
||||
include_directories(BEFORE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}
|
||||
${CMAKE_CURRENT_BINARY_DIR}
|
||||
)
|
||||
|
||||
set(probabilistic_SRCS
|
||||
BitVector.cc
|
||||
BloomFilter.cc
|
||||
CounterVector.cc
|
||||
Hasher.cc)
|
||||
|
||||
bif_target(bloom-filter.bif)
|
||||
|
||||
bro_add_subdir_library(probabilistic ${probabilistic_SRCS} ${BIF_OUTPUT_CC})
|
||||
add_dependencies(bro_probabilistic generate_outputs)
|
159
src/probabilistic/CounterVector.cc
Normal file
159
src/probabilistic/CounterVector.cc
Normal file
|
@ -0,0 +1,159 @@
|
|||
#include "CounterVector.h"
|
||||
|
||||
#include <limits>
|
||||
#include "BitVector.h"
|
||||
#include "Serializer.h"
|
||||
|
||||
using namespace probabilistic;
|
||||
|
||||
CounterVector::CounterVector(size_t width, size_t cells)
|
||||
: bits_(new BitVector(width * cells)),
|
||||
width_(width)
|
||||
{
|
||||
}
|
||||
|
||||
CounterVector::CounterVector(const CounterVector& other)
|
||||
: bits_(new BitVector(*other.bits_)),
|
||||
width_(other.width_)
|
||||
{
|
||||
}
|
||||
|
||||
CounterVector::~CounterVector()
|
||||
{
|
||||
delete bits_;
|
||||
}
|
||||
|
||||
bool CounterVector::Increment(size_type cell, count_type value)
|
||||
{
|
||||
assert(cell < Size());
|
||||
assert(value != 0);
|
||||
size_t lsb = cell * width_;
|
||||
bool carry = false;
|
||||
for ( size_t i = 0; i < width_; ++i )
|
||||
{
|
||||
bool b1 = (*bits_)[lsb + i];
|
||||
bool b2 = value & (1 << i);
|
||||
(*bits_)[lsb + i] = b1 ^ b2 ^ carry;
|
||||
carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) );
|
||||
}
|
||||
if ( carry )
|
||||
for ( size_t i = 0; i < width_; ++i )
|
||||
bits_->Set(lsb + i);
|
||||
return ! carry;
|
||||
}
|
||||
|
||||
bool CounterVector::Decrement(size_type cell, count_type value)
|
||||
{
|
||||
assert(cell < Size());
|
||||
assert(value != 0);
|
||||
value = ~value + 1; // A - B := A + ~B + 1
|
||||
bool carry = false;
|
||||
size_t lsb = cell * width_;
|
||||
for ( size_t i = 0; i < width_; ++i )
|
||||
{
|
||||
bool b1 = (*bits_)[lsb + i];
|
||||
bool b2 = value & (1 << i);
|
||||
(*bits_)[lsb + i] = b1 ^ b2 ^ carry;
|
||||
carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) );
|
||||
}
|
||||
return carry;
|
||||
}
|
||||
|
||||
CounterVector::count_type CounterVector::Count(size_type cell) const
|
||||
{
|
||||
assert(cell < Size());
|
||||
size_t cnt = 0, order = 1;
|
||||
size_t lsb = cell * width_;
|
||||
for (size_t i = lsb; i < lsb + width_; ++i, order <<= 1)
|
||||
if ((*bits_)[i])
|
||||
cnt |= order;
|
||||
return cnt;
|
||||
}
|
||||
|
||||
CounterVector::size_type CounterVector::Size() const
|
||||
{
|
||||
return bits_->Size() / width_;
|
||||
}
|
||||
|
||||
size_t CounterVector::Width() const
|
||||
{
|
||||
return width_;
|
||||
}
|
||||
|
||||
size_t CounterVector::Max() const
|
||||
{
|
||||
return std::numeric_limits<size_t>::max()
|
||||
>> (std::numeric_limits<size_t>::digits - width_);
|
||||
}
|
||||
|
||||
CounterVector& CounterVector::Merge(const CounterVector& other)
|
||||
{
|
||||
assert(Size() == other.Size());
|
||||
assert(Width() == other.Width());
|
||||
for ( size_t cell = 0; cell < Size(); ++cell )
|
||||
{
|
||||
size_t lsb = cell * width_;
|
||||
bool carry = false;
|
||||
for ( size_t i = 0; i < width_; ++i )
|
||||
{
|
||||
bool b1 = (*bits_)[lsb + i];
|
||||
bool b2 = (*other.bits_)[lsb + i];
|
||||
(*bits_)[lsb + i] = b1 ^ b2 ^ carry;
|
||||
carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) );
|
||||
}
|
||||
if ( carry )
|
||||
for ( size_t i = 0; i < width_; ++i )
|
||||
bits_->Set(lsb + i);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
namespace probabilistic {
|
||||
|
||||
CounterVector& CounterVector::operator|=(const CounterVector& other)
|
||||
{
|
||||
return Merge(other);
|
||||
}
|
||||
|
||||
CounterVector operator|(const CounterVector& x, const CounterVector& y)
|
||||
{
|
||||
CounterVector cv(x);
|
||||
return cv |= y;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
bool CounterVector::Serialize(SerialInfo* info) const
|
||||
{
|
||||
return SerialObj::Serialize(info);
|
||||
}
|
||||
|
||||
CounterVector* CounterVector::Unserialize(UnserialInfo* info)
|
||||
{
|
||||
return reinterpret_cast<CounterVector*>(
|
||||
SerialObj::Unserialize(info, SER_COUNTERVECTOR));
|
||||
}
|
||||
|
||||
IMPLEMENT_SERIAL(CounterVector, SER_COUNTERVECTOR)
|
||||
|
||||
bool CounterVector::DoSerialize(SerialInfo* info) const
|
||||
{
|
||||
DO_SERIALIZE(SER_COUNTERVECTOR, SerialObj);
|
||||
if ( ! bits_->Serialize(info) )
|
||||
return false;
|
||||
return SERIALIZE(static_cast<uint64>(width_));
|
||||
}
|
||||
|
||||
bool CounterVector::DoUnserialize(UnserialInfo* info)
|
||||
{
|
||||
DO_UNSERIALIZE(SerialObj);
|
||||
bits_ = BitVector::Unserialize(info);
|
||||
if ( ! bits_ )
|
||||
return false;
|
||||
uint64 width;
|
||||
if ( ! UNSERIALIZE(&width) )
|
||||
return false;
|
||||
width_ = static_cast<size_t>(width);
|
||||
return true;
|
||||
}
|
||||
|
132
src/probabilistic/CounterVector.h
Normal file
132
src/probabilistic/CounterVector.h
Normal file
|
@ -0,0 +1,132 @@
|
|||
#ifndef CounterVector_h
|
||||
#define CounterVector_h
|
||||
|
||||
#include "SerialObj.h"
|
||||
|
||||
namespace probabilistic {
|
||||
|
||||
class BitVector;
|
||||
|
||||
/**
|
||||
* A vector of counters, each of which have a fixed number of bits.
|
||||
*/
|
||||
class CounterVector : public SerialObj {
|
||||
CounterVector& operator=(const CounterVector&);
|
||||
public:
|
||||
typedef size_t size_type;
|
||||
typedef uint64 count_type;
|
||||
|
||||
/**
|
||||
* Constructs a counter vector having cells of a given width.
|
||||
*
|
||||
* @param width The number of bits that each cell occupies.
|
||||
*
|
||||
* @param cells The number of cells in the bitvector.
|
||||
*
|
||||
* @pre `cells > 0 && width > 0`
|
||||
*/
|
||||
CounterVector(size_t width, size_t cells = 1024);
|
||||
|
||||
/**
|
||||
* Copy-constructs a counter vector.
|
||||
*
|
||||
* @param other The counter vector to copy.
|
||||
*/
|
||||
CounterVector(const CounterVector& other);
|
||||
|
||||
~CounterVector();
|
||||
|
||||
/**
|
||||
* Increments a given cell.
|
||||
*
|
||||
* @param cell The cell to increment.
|
||||
*
|
||||
* @param value The value to add to the current counter in *cell*.
|
||||
*
|
||||
* @return `true` if adding *value* to the counter in *cell* succeeded.
|
||||
*
|
||||
* @pre `cell < Size()`
|
||||
*/
|
||||
bool Increment(size_type cell, count_type value = 1);
|
||||
|
||||
/**
|
||||
* Decrements a given cell.
|
||||
*
|
||||
* @param cell The cell to decrement.
|
||||
*
|
||||
* @param value The value to subtract from the current counter in *cell*.
|
||||
*
|
||||
* @return `true` if subtracting *value* from the counter in *cell* succeeded.
|
||||
*
|
||||
* @pre `cell < Size()`
|
||||
*/
|
||||
bool Decrement(size_type cell, count_type value = 1);
|
||||
|
||||
/**
|
||||
* Retrieves the counter of a given cell.
|
||||
*
|
||||
* @param cell The cell index to retrieve the count for.
|
||||
*
|
||||
* @return The counter associated with *cell*.
|
||||
*
|
||||
* @pre `cell < Size()`
|
||||
*/
|
||||
count_type Count(size_type cell) const;
|
||||
|
||||
/**
|
||||
* Retrieves the number of cells in the storage.
|
||||
*
|
||||
* @return The number of cells.
|
||||
*/
|
||||
size_type Size() const;
|
||||
|
||||
/**
|
||||
* Retrieves the counter width.
|
||||
*
|
||||
* @return The number of bits per counter.
|
||||
*/
|
||||
size_t Width() const;
|
||||
|
||||
/**
|
||||
* Computes the maximum counter value.
|
||||
*
|
||||
* @return The maximum counter value based on the width.
|
||||
*/
|
||||
size_t Max() const;
|
||||
|
||||
/**
|
||||
* Merges another counter vector into this instance by *adding* the counters
|
||||
* of each cells.
|
||||
*
|
||||
* @param other The counter vector to merge into this instance.
|
||||
*
|
||||
* @return A reference to `*this`.
|
||||
*
|
||||
* @pre `Size() == other.Size() && Width() == other.Width()`
|
||||
*/
|
||||
CounterVector& Merge(const CounterVector& other);
|
||||
|
||||
/**
|
||||
* An alias for ::Merge.
|
||||
*/
|
||||
CounterVector& operator|=(const CounterVector& other);
|
||||
|
||||
friend CounterVector operator|(const CounterVector& x,
|
||||
const CounterVector& y);
|
||||
|
||||
bool Serialize(SerialInfo* info) const;
|
||||
static CounterVector* Unserialize(UnserialInfo* info);
|
||||
|
||||
protected:
|
||||
DECLARE_SERIAL(CounterVector);
|
||||
|
||||
CounterVector() { }
|
||||
|
||||
private:
|
||||
BitVector* bits_;
|
||||
size_t width_;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
109
src/probabilistic/Hasher.cc
Normal file
109
src/probabilistic/Hasher.cc
Normal file
|
@ -0,0 +1,109 @@
|
|||
|
||||
#include <typeinfo>
|
||||
|
||||
#include "Hasher.h"
|
||||
|
||||
#include "digest.h"
|
||||
|
||||
using namespace probabilistic;
|
||||
|
||||
Hasher::UHF::UHF(size_t seed, const std::string& extra)
|
||||
: h_(compute_seed(seed, extra))
|
||||
{
|
||||
}
|
||||
|
||||
Hasher::digest Hasher::UHF::hash(const void* x, size_t n) const
|
||||
{
|
||||
assert(n <= UHASH_KEY_SIZE);
|
||||
return n == 0 ? 0 : h_(x, n);
|
||||
}
|
||||
|
||||
size_t Hasher::UHF::compute_seed(size_t seed, const std::string& extra)
|
||||
{
|
||||
u_char buf[SHA256_DIGEST_LENGTH];
|
||||
SHA256_CTX ctx;
|
||||
sha256_init(&ctx);
|
||||
if ( extra.empty() )
|
||||
{
|
||||
unsigned int first_seed = initial_seed();
|
||||
sha256_update(&ctx, &first_seed, sizeof(first_seed));
|
||||
}
|
||||
else
|
||||
{
|
||||
sha256_update(&ctx, extra.c_str(), extra.size());
|
||||
}
|
||||
sha256_update(&ctx, &seed, sizeof(seed));
|
||||
sha256_final(&ctx, buf);
|
||||
// Take the first sizeof(size_t) bytes as seed.
|
||||
return *reinterpret_cast<size_t*>(buf);
|
||||
}
|
||||
|
||||
|
||||
Hasher* Hasher::Create(size_t k, const std::string& name)
|
||||
{
|
||||
return new DefaultHasher(k, name);
|
||||
}
|
||||
|
||||
Hasher::Hasher(size_t k, const std::string& name)
|
||||
: k_(k), name_(name)
|
||||
{
|
||||
}
|
||||
|
||||
DefaultHasher::DefaultHasher(size_t k, const std::string& name)
|
||||
: Hasher(k, name)
|
||||
{
|
||||
for ( size_t i = 0; i < k; ++i )
|
||||
hash_functions_.push_back(UHF(i, name));
|
||||
}
|
||||
|
||||
Hasher::digest_vector DefaultHasher::Hash(const void* x, size_t n) const
|
||||
{
|
||||
digest_vector h(K(), 0);
|
||||
for ( size_t i = 0; i < h.size(); ++i )
|
||||
h[i] = hash_functions_[i](x, n);
|
||||
return h;
|
||||
}
|
||||
|
||||
DefaultHasher* DefaultHasher::Clone() const
|
||||
{
|
||||
return new DefaultHasher(*this);
|
||||
}
|
||||
|
||||
bool DefaultHasher::Equals(const Hasher* other) const
|
||||
{
|
||||
if ( typeid(*this) != typeid(*other) )
|
||||
return false;
|
||||
const DefaultHasher* o = static_cast<const DefaultHasher*>(other);
|
||||
return hash_functions_ == o->hash_functions_;
|
||||
}
|
||||
|
||||
DoubleHasher::DoubleHasher(size_t k, const std::string& name)
|
||||
: Hasher(k, name),
|
||||
h1_(1, name),
|
||||
h2_(2, name)
|
||||
{
|
||||
}
|
||||
|
||||
Hasher::digest_vector DoubleHasher::Hash(const void* x, size_t n) const
|
||||
{
|
||||
digest h1 = h1_(x, n);
|
||||
digest h2 = h2_(x, n);
|
||||
digest_vector h(K(), 0);
|
||||
for ( size_t i = 0; i < h.size(); ++i )
|
||||
h[i] = h1 + i * h2;
|
||||
return h;
|
||||
}
|
||||
|
||||
DoubleHasher* DoubleHasher::Clone() const
|
||||
{
|
||||
return new DoubleHasher(*this);
|
||||
}
|
||||
|
||||
bool DoubleHasher::Equals(const Hasher* other) const
|
||||
{
|
||||
if ( typeid(*this) != typeid(*other) )
|
||||
return false;
|
||||
const DoubleHasher* o = static_cast<const DoubleHasher*>(other);
|
||||
return h1_ == o->h1_ && h2_ == o->h2_;
|
||||
}
|
||||
|
131
src/probabilistic/Hasher.h
Normal file
131
src/probabilistic/Hasher.h
Normal file
|
@ -0,0 +1,131 @@
|
|||
#ifndef Hasher_h
|
||||
#define Hasher_h
|
||||
|
||||
#include "Hash.h"
|
||||
#include "H3.h"
|
||||
|
||||
namespace probabilistic {
|
||||
|
||||
/**
|
||||
* The abstract base class for hashers, i.e., constructs which hash elements
|
||||
* *k* times.
|
||||
*/
|
||||
class Hasher {
|
||||
public:
|
||||
typedef hash_t digest;
|
||||
typedef std::vector<digest> digest_vector;
|
||||
|
||||
/**
|
||||
* Constructs the hashing policy used by the implementation.
|
||||
*
|
||||
* @todo This factory function exists because the HashingPolicy class
|
||||
* hierachy is not yet serializable.
|
||||
*/
|
||||
static Hasher* Create(size_t k, const std::string& name);
|
||||
|
||||
virtual ~Hasher() { }
|
||||
|
||||
template <typename T>
|
||||
digest_vector operator()(const T& x) const
|
||||
{
|
||||
return Hash(&x, sizeof(T));
|
||||
}
|
||||
|
||||
virtual digest_vector Hash(const void* x, size_t n) const = 0;
|
||||
|
||||
virtual Hasher* Clone() const = 0;
|
||||
|
||||
virtual bool Equals(const Hasher* other) const = 0;
|
||||
|
||||
size_t K() const { return k_; }
|
||||
const std::string& Name() const { return name_; }
|
||||
|
||||
protected:
|
||||
/**
|
||||
* A universal hash function family.
|
||||
*/
|
||||
class UHF {
|
||||
public:
|
||||
/**
|
||||
* Constructs an H3 hash function seeded with a given seed and an optional
|
||||
* extra seed to replace the initial Bro seed.
|
||||
*
|
||||
* @param seed The seed to use for this instance.
|
||||
*
|
||||
* @param extra If not empty, this parameter replaces the initial seed to
|
||||
* compute the seed for t to compute the
|
||||
* seed
|
||||
* NUL-terminated string as additional seed.
|
||||
*/
|
||||
UHF(size_t seed, const std::string& extra = "");
|
||||
|
||||
template <typename T>
|
||||
digest operator()(const T& x) const
|
||||
{
|
||||
return hash(&x, sizeof(T));
|
||||
}
|
||||
|
||||
digest operator()(const void* x, size_t n) const
|
||||
{
|
||||
return hash(x, n);
|
||||
}
|
||||
|
||||
friend bool operator==(const UHF& x, const UHF& y)
|
||||
{
|
||||
return x.h_ == y.h_;
|
||||
}
|
||||
|
||||
friend bool operator!=(const UHF& x, const UHF& y)
|
||||
{
|
||||
return ! (x == y);
|
||||
}
|
||||
|
||||
digest hash(const void* x, size_t n) const;
|
||||
|
||||
private:
|
||||
static size_t compute_seed(size_t seed, const std::string& extra);
|
||||
|
||||
H3<digest, UHASH_KEY_SIZE> h_;
|
||||
};
|
||||
|
||||
Hasher(size_t k, const std::string& name);
|
||||
|
||||
private:
|
||||
const size_t k_;
|
||||
std::string name_;
|
||||
};
|
||||
|
||||
/**
|
||||
* The default hashing policy. Performs *k* hash function computations.
|
||||
*/
|
||||
class DefaultHasher : public Hasher {
|
||||
public:
|
||||
DefaultHasher(size_t k, const std::string& name);
|
||||
|
||||
virtual digest_vector Hash(const void* x, size_t n) const /* final */;
|
||||
virtual DefaultHasher* Clone() const /* final */;
|
||||
virtual bool Equals(const Hasher* other) const /* final */;
|
||||
|
||||
private:
|
||||
std::vector<UHF> hash_functions_;
|
||||
};
|
||||
|
||||
/**
|
||||
* The *double-hashing* policy. Uses a linear combination of two hash functions.
|
||||
*/
|
||||
class DoubleHasher : public Hasher {
|
||||
public:
|
||||
DoubleHasher(size_t k, const std::string& name);
|
||||
|
||||
virtual digest_vector Hash(const void* x, size_t n) const /* final */;
|
||||
virtual DoubleHasher* Clone() const /* final */;
|
||||
virtual bool Equals(const Hasher* other) const /* final */;
|
||||
|
||||
private:
|
||||
UHF h1_;
|
||||
UHF h2_;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
130
src/probabilistic/bloom-filter.bif
Normal file
130
src/probabilistic/bloom-filter.bif
Normal file
|
@ -0,0 +1,130 @@
|
|||
# ===========================================================================
|
||||
#
|
||||
# Bloom Filter Functions
|
||||
#
|
||||
# ===========================================================================
|
||||
|
||||
%%{
|
||||
|
||||
// TODO: This is currently included from the top-level src directory, hence
|
||||
// paths are relative to there. We need a better mechanisms to pull in
|
||||
// BiFs defined in sub directories.
|
||||
#include "probabilistic/BloomFilter.h"
|
||||
#include "OpaqueVal.h"
|
||||
|
||||
using namespace probabilistic;
|
||||
|
||||
%%}
|
||||
|
||||
module GLOBAL;
|
||||
|
||||
## Creates a basic Bloom filter.
|
||||
##
|
||||
## fp: The desired false-positive rate.
|
||||
##
|
||||
## capacity: the maximum number of elements that guarantees a false-positive
|
||||
## rate of *fp*.
|
||||
##
|
||||
## name: A name that uniquely identifies and seeds the Bloom filter. If empty,
|
||||
## the initialization will become dependent on the initial seed.
|
||||
##
|
||||
## Returns: A Bloom filter handle.
|
||||
function bloomfilter_basic_init%(fp: double, capacity: count,
|
||||
name: string &default=""%): opaque of bloomfilter
|
||||
%{
|
||||
if ( fp < 0.0 || fp > 1.0 )
|
||||
{
|
||||
reporter->Error("false-positive rate must take value between 0 and 1");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
size_t cells = BasicBloomFilter::M(fp, capacity);
|
||||
size_t optimal_k = BasicBloomFilter::K(cells, capacity);
|
||||
const Hasher* h = Hasher::Create(optimal_k, name->CheckString());
|
||||
return new BloomFilterVal(new BasicBloomFilter(h, cells));
|
||||
%}
|
||||
|
||||
## Creates a counting Bloom filter.
|
||||
##
|
||||
## k: The number of hash functions to use.
|
||||
##
|
||||
## cells: The number of cells of the underlying counter vector.
|
||||
##
|
||||
## max: The maximum counter value associated with each each element described
|
||||
## by *w = ceil(log_2(max))* bits. Each bit in the underlying counter vector
|
||||
## becomes a cell of size *w* bits.
|
||||
##
|
||||
## name: A name that uniquely identifies and seeds the Bloom filter. If empty,
|
||||
## the initialization will become dependent on the initial seed.
|
||||
##
|
||||
## Returns: A Bloom filter handle.
|
||||
function bloomfilter_counting_init%(k: count, cells: count, max: count,
|
||||
name: string &default=""%): opaque of bloomfilter
|
||||
%{
|
||||
if ( max == 0 )
|
||||
{
|
||||
reporter->Error("max counter value must be greater than 0");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const Hasher* h = Hasher::Create(k, name->CheckString());
|
||||
uint16 width = 1;
|
||||
while ( max >>= 1 )
|
||||
++width;
|
||||
return new BloomFilterVal(new CountingBloomFilter(h, cells, width));
|
||||
%}
|
||||
|
||||
## Adds an element to a Bloom filter.
|
||||
##
|
||||
## bf: The Bloom filter handle.
|
||||
##
|
||||
## x: The element to add.
|
||||
function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any
|
||||
%{
|
||||
BloomFilterVal* bfv = static_cast<BloomFilterVal*>(bf);
|
||||
if ( ! bfv->Type() && ! bfv->Typify(x->Type()) )
|
||||
reporter->Error("failed to set Bloom filter type");
|
||||
else if ( bfv->Type() != x->Type() )
|
||||
reporter->Error("incompatible Bloom filter types");
|
||||
else
|
||||
bfv->Add(x);
|
||||
return NULL;
|
||||
%}
|
||||
|
||||
## Retrieves the counter for a given element in a Bloom filter.
|
||||
##
|
||||
## bf: The Bloom filter handle.
|
||||
##
|
||||
## x: The element to count.
|
||||
##
|
||||
## Returns: the counter associated with *x* in *bf*.
|
||||
function bloomfilter_lookup%(bf: opaque of bloomfilter, x: any%): count
|
||||
%{
|
||||
const BloomFilterVal* bfv = static_cast<const BloomFilterVal*>(bf);
|
||||
if ( ! bfv->Type() )
|
||||
reporter->Error("cannot perform lookup on untyped Bloom filter");
|
||||
else if ( bfv->Type() != x->Type() )
|
||||
reporter->Error("incompatible Bloom filter types");
|
||||
else
|
||||
return new Val(static_cast<uint64>(bfv->Count(x)), TYPE_COUNT);
|
||||
return new Val(0, TYPE_COUNT);
|
||||
%}
|
||||
|
||||
## Merges two Bloom filters.
|
||||
##
|
||||
## bf1: The first Bloom filter handle.
|
||||
##
|
||||
## bf2: The second Bloom filter handle.
|
||||
##
|
||||
## Returns: The union of *bf1* and *bf2*.
|
||||
function bloomfilter_merge%(bf1: opaque of bloomfilter,
|
||||
bf2: opaque of bloomfilter%): opaque of bloomfilter
|
||||
%{
|
||||
const BloomFilterVal* bfv1 = static_cast<const BloomFilterVal*>(bf1);
|
||||
const BloomFilterVal* bfv2 = static_cast<const BloomFilterVal*>(bf2);
|
||||
if ( bfv1->Type() != bfv2->Type() )
|
||||
reporter->Error("incompatible Bloom filter types");
|
||||
else
|
||||
return BloomFilterVal::Merge(bfv1, bfv2);
|
||||
return NULL;
|
||||
%}
|
42
src/util.cc
42
src/util.cc
|
@ -716,6 +716,8 @@ static bool write_random_seeds(const char* write_file, uint32 seed,
|
|||
|
||||
static bool bro_rand_determistic = false;
|
||||
static unsigned int bro_rand_state = 0;
|
||||
static bool first_seed_saved = false;
|
||||
static unsigned int first_seed = 0;
|
||||
|
||||
static void bro_srandom(unsigned int seed, bool deterministic)
|
||||
{
|
||||
|
@ -800,6 +802,12 @@ void init_random_seed(uint32 seed, const char* read_file, const char* write_file
|
|||
|
||||
bro_srandom(seed, seeds_done);
|
||||
|
||||
if ( ! first_seed_saved )
|
||||
{
|
||||
first_seed = seed;
|
||||
first_seed_saved = true;
|
||||
}
|
||||
|
||||
if ( ! hmac_key_set )
|
||||
{
|
||||
MD5((const u_char*) buf, sizeof(buf), shared_hmac_md5_key);
|
||||
|
@ -811,27 +819,39 @@ void init_random_seed(uint32 seed, const char* read_file, const char* write_file
|
|||
write_file);
|
||||
}
|
||||
|
||||
unsigned int initial_seed()
|
||||
{
|
||||
return first_seed;
|
||||
}
|
||||
|
||||
bool have_random_seed()
|
||||
{
|
||||
return bro_rand_determistic;
|
||||
}
|
||||
|
||||
long int bro_prng(long int state)
|
||||
{
|
||||
// Use our own simple linear congruence PRNG to make sure we are
|
||||
// predictable across platforms.
|
||||
static const long int m = 2147483647;
|
||||
static const long int a = 16807;
|
||||
const long int q = m / a;
|
||||
const long int r = m % a;
|
||||
|
||||
state = a * ( state % q ) - r * ( state / q );
|
||||
|
||||
if ( state <= 0 )
|
||||
state += m;
|
||||
|
||||
return state;
|
||||
}
|
||||
|
||||
long int bro_random()
|
||||
{
|
||||
if ( ! bro_rand_determistic )
|
||||
return random(); // Use system PRNG.
|
||||
|
||||
// Use our own simple linear congruence PRNG to make sure we are
|
||||
// predictable across platforms.
|
||||
const long int m = 2147483647;
|
||||
const long int a = 16807;
|
||||
const long int q = m / a;
|
||||
const long int r = m % a;
|
||||
|
||||
bro_rand_state = a * ( bro_rand_state % q ) - r * ( bro_rand_state / q );
|
||||
|
||||
if ( bro_rand_state <= 0 )
|
||||
bro_rand_state += m;
|
||||
bro_rand_state = bro_prng(bro_rand_state);
|
||||
|
||||
return bro_rand_state;
|
||||
}
|
||||
|
|
12
src/util.h
12
src/util.h
|
@ -165,12 +165,20 @@ extern void hmac_md5(size_t size, const unsigned char* bytes,
|
|||
extern void init_random_seed(uint32 seed, const char* load_file,
|
||||
const char* write_file);
|
||||
|
||||
// Retrieves the initial seed computed after the very first call to
|
||||
// init_random_seed(). Repeated calls to init_random_seed() will not affect the
|
||||
// return value of this function.
|
||||
unsigned int initial_seed();
|
||||
|
||||
// Returns true if the user explicitly set a seed via init_random_seed();
|
||||
extern bool have_random_seed();
|
||||
|
||||
// A simple linear congruence PRNG. It takes its state as argument and returns
|
||||
// a new random value, which can serve as state for subsequent calls.
|
||||
long int bro_prng(long int state);
|
||||
|
||||
// Replacement for the system random(), to which is normally falls back
|
||||
// except when a seed has been given. In that case, we use our own
|
||||
// predictable PRNG.
|
||||
// except when a seed has been given. In that case, the function bro_prng.
|
||||
long int bro_random();
|
||||
|
||||
// Calls the system srandom() function with the given seed if not running
|
||||
|
|
21
testing/btest/Baseline/bifs.bloomfilter/output
Normal file
21
testing/btest/Baseline/bifs.bloomfilter/output
Normal file
|
@ -0,0 +1,21 @@
|
|||
0
|
||||
1
|
||||
1
|
||||
0
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
3
|
||||
3
|
||||
2
|
||||
3
|
||||
3
|
||||
3
|
||||
2
|
83
testing/btest/bifs/bloomfilter.bro
Normal file
83
testing/btest/bifs/bloomfilter.bro
Normal file
|
@ -0,0 +1,83 @@
|
|||
# @TEST-EXEC: bro -b %INPUT >output
|
||||
# @TEST-EXEC: btest-diff output
|
||||
|
||||
function test_basic_bloom_filter()
|
||||
{
|
||||
# Basic usage with counts.
|
||||
local bf_cnt = bloomfilter_basic_init(0.1, 1000);
|
||||
bloomfilter_add(bf_cnt, 42);
|
||||
bloomfilter_add(bf_cnt, 84);
|
||||
bloomfilter_add(bf_cnt, 168);
|
||||
print bloomfilter_lookup(bf_cnt, 0);
|
||||
print bloomfilter_lookup(bf_cnt, 42);
|
||||
print bloomfilter_lookup(bf_cnt, 168);
|
||||
print bloomfilter_lookup(bf_cnt, 336);
|
||||
bloomfilter_add(bf_cnt, 0.5); # Type mismatch
|
||||
bloomfilter_add(bf_cnt, "foo"); # Type mismatch
|
||||
|
||||
# Basic usage with strings.
|
||||
local bf_str = bloomfilter_basic_init(0.9, 10);
|
||||
bloomfilter_add(bf_str, "foo");
|
||||
bloomfilter_add(bf_str, "bar");
|
||||
print bloomfilter_lookup(bf_str, "foo");
|
||||
print bloomfilter_lookup(bf_str, "bar");
|
||||
print bloomfilter_lookup(bf_str, "b4z"); # FP
|
||||
print bloomfilter_lookup(bf_str, "quux"); # FP
|
||||
bloomfilter_add(bf_str, 0.5); # Type mismatch
|
||||
bloomfilter_add(bf_str, 100); # Type mismatch
|
||||
|
||||
# Edge cases.
|
||||
local bf_edge0 = bloomfilter_basic_init(0.000000000001, 1);
|
||||
local bf_edge1 = bloomfilter_basic_init(0.00000001, 100000000);
|
||||
local bf_edge2 = bloomfilter_basic_init(0.9999999, 1);
|
||||
local bf_edge3 = bloomfilter_basic_init(0.9999999, 100000000000);
|
||||
|
||||
# Invalid parameters.
|
||||
local bf_bug0 = bloomfilter_basic_init(-0.5, 42);
|
||||
local bf_bug1 = bloomfilter_basic_init(1.1, 42);
|
||||
|
||||
# Merging
|
||||
local bf_cnt2 = bloomfilter_basic_init(0.1, 1000);
|
||||
bloomfilter_add(bf_cnt2, 42);
|
||||
bloomfilter_add(bf_cnt, 100);
|
||||
local bf_merged = bloomfilter_merge(bf_cnt, bf_cnt2);
|
||||
print bloomfilter_lookup(bf_merged, 42);
|
||||
print bloomfilter_lookup(bf_merged, 84);
|
||||
print bloomfilter_lookup(bf_merged, 100);
|
||||
print bloomfilter_lookup(bf_merged, 168);
|
||||
}
|
||||
|
||||
function test_counting_bloom_filter()
|
||||
{
|
||||
local bf = bloomfilter_counting_init(3, 32, 3);
|
||||
bloomfilter_add(bf, "foo");
|
||||
print bloomfilter_lookup(bf, "foo"); # 1
|
||||
bloomfilter_add(bf, "foo");
|
||||
print bloomfilter_lookup(bf, "foo"); # 2
|
||||
bloomfilter_add(bf, "foo");
|
||||
print bloomfilter_lookup(bf, "foo"); # 3
|
||||
bloomfilter_add(bf, "foo");
|
||||
print bloomfilter_lookup(bf, "foo"); # still 3
|
||||
|
||||
|
||||
bloomfilter_add(bf, "bar");
|
||||
bloomfilter_add(bf, "bar");
|
||||
print bloomfilter_lookup(bf, "bar"); # 2
|
||||
print bloomfilter_lookup(bf, "foo"); # still 3
|
||||
|
||||
# Merging
|
||||
local bf2 = bloomfilter_counting_init(3, 32, 3);
|
||||
bloomfilter_add(bf2, "baz");
|
||||
bloomfilter_add(bf2, "baz");
|
||||
bloomfilter_add(bf2, "bar");
|
||||
local bf_merged = bloomfilter_merge(bf, bf2);
|
||||
print bloomfilter_lookup(bf_merged, "foo");
|
||||
print bloomfilter_lookup(bf_merged, "bar");
|
||||
print bloomfilter_lookup(bf_merged, "baz");
|
||||
}
|
||||
|
||||
event bro_init()
|
||||
{
|
||||
test_basic_bloom_filter();
|
||||
test_counting_bloom_filter();
|
||||
}
|
|
@ -12,6 +12,9 @@ global sha1_handle: opaque of sha1 &persistent &synchronized;
|
|||
global sha256_handle: opaque of sha256 &persistent &synchronized;
|
||||
global entropy_handle: opaque of entropy &persistent &synchronized;
|
||||
|
||||
global bloomfilter_elements: set[string] &persistent &synchronized;
|
||||
global bloomfilter_handle: opaque of bloomfilter &persistent &synchronized;
|
||||
|
||||
event bro_done()
|
||||
{
|
||||
local out = open("output.log");
|
||||
|
@ -36,6 +39,9 @@ event bro_done()
|
|||
print out, entropy_test_finish(entropy_handle);
|
||||
else
|
||||
print out, "entropy_test_add() failed";
|
||||
|
||||
for ( e in bloomfilter_elements )
|
||||
print bloomfilter_lookup(bloomfilter_handle, e);
|
||||
}
|
||||
|
||||
@TEST-END-FILE
|
||||
|
@ -47,6 +53,9 @@ global sha1_handle: opaque of sha1 &persistent &synchronized;
|
|||
global sha256_handle: opaque of sha256 &persistent &synchronized;
|
||||
global entropy_handle: opaque of entropy &persistent &synchronized;
|
||||
|
||||
global bloomfilter_elements = { "foo", "bar", "baz" } &persistent &synchronized;
|
||||
global bloomfilter_handle: opaque of bloomfilter &persistent &synchronized;
|
||||
|
||||
event bro_init()
|
||||
{
|
||||
local out = open("expected.log");
|
||||
|
@ -72,6 +81,10 @@ event bro_init()
|
|||
entropy_handle = entropy_test_init();
|
||||
if ( ! entropy_test_add(entropy_handle, "f") )
|
||||
print out, "entropy_test_add() failed";
|
||||
|
||||
bloomfilter_handle = bloomfilter_basic_init(0.1, 100);
|
||||
for ( e in bloomfilter_elements )
|
||||
bloomfilter_add(bloomfilter_handle, e);
|
||||
}
|
||||
|
||||
@TEST-END-FILE
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue