Broifying the code.

Also extending API documentation a bit more and fixing a memory leak.
This commit is contained in:
Robin Sommer 2013-07-23 17:16:57 -07:00
parent 21685d2529
commit 474107fe40
18 changed files with 1651 additions and 1329 deletions

View file

@ -560,7 +560,7 @@ void builtin_error(const char* msg, BroObj* arg)
#include "reporter.bif.func_def" #include "reporter.bif.func_def"
#include "strings.bif.func_def" #include "strings.bif.func_def"
// TODO: Add a nicer mechanism to pull subdirectory bifs automatically. // TODO: Add a nicer mechanism to pull in subdirectory bifs automatically.
#include "probabilistic/bloom-filter.bif.h" #include "probabilistic/bloom-filter.bif.h"
void init_builtin_funcs() void init_builtin_funcs()
@ -577,7 +577,7 @@ void init_builtin_funcs()
#include "reporter.bif.func_init" #include "reporter.bif.func_init"
#include "strings.bif.func_init" #include "strings.bif.func_init"
// TODO: Add a nicer mechanism to pull subdirectory bifs automatically. // TODO: Add a nicer mechanism to pull in subdirectory bifs automatically.
#include "probabilistic/bloom-filter.bif.init.cc" #include "probabilistic/bloom-filter.bif.init.cc"
did_builtin_init = true; did_builtin_init = true;

View file

@ -1,5 +1,6 @@
#include "OpaqueVal.h" // See the file "COPYING" in the main distribution directory for copyright.
#include "OpaqueVal.h"
#include "NetVar.h" #include "NetVar.h"
#include "Reporter.h" #include "Reporter.h"
#include "Serializer.h" #include "Serializer.h"
@ -518,86 +519,88 @@ bool EntropyVal::DoUnserialize(UnserialInfo* info)
} }
BloomFilterVal::BloomFilterVal() BloomFilterVal::BloomFilterVal()
: OpaqueVal(bloomfilter_type), : OpaqueVal(bloomfilter_type)
type_(NULL),
hash_(NULL),
bloom_filter_(NULL)
{ {
type = 0;
hash = 0;
bloom_filter = 0;
} }
BloomFilterVal::BloomFilterVal(OpaqueType* t) BloomFilterVal::BloomFilterVal(OpaqueType* t)
: OpaqueVal(t), : OpaqueVal(t)
type_(NULL),
hash_(NULL),
bloom_filter_(NULL)
{ {
type = 0;
hash = 0;
bloom_filter = 0;
} }
BloomFilterVal::BloomFilterVal(probabilistic::BloomFilter* bf) BloomFilterVal::BloomFilterVal(probabilistic::BloomFilter* bf)
: OpaqueVal(bloomfilter_type), : OpaqueVal(bloomfilter_type)
type_(NULL),
hash_(NULL),
bloom_filter_(bf)
{ {
type = 0;
hash = 0;
bloom_filter = bf;
} }
bool BloomFilterVal::Typify(BroType* type) bool BloomFilterVal::Typify(BroType* arg_type)
{ {
if ( type_ ) if ( type )
return false; return false;
type_ = type;
type_->Ref(); type = arg_type;
TypeList* tl = new TypeList(type_); type->Ref();
tl->Append(type_);
hash_ = new CompositeHash(tl); TypeList* tl = new TypeList(type);
tl->Append(type);
hash = new CompositeHash(tl);
Unref(tl); Unref(tl);
return true; return true;
} }
BroType* BloomFilterVal::Type() const BroType* BloomFilterVal::Type() const
{ {
return type_; return type;
} }
void BloomFilterVal::Add(const Val* val) void BloomFilterVal::Add(const Val* val)
{ {
HashKey* key = hash_->ComputeHash(val, 1); HashKey* key = hash->ComputeHash(val, 1);
bloom_filter_->Add(key->Hash()); bloom_filter->Add(key->Hash());
delete key;
} }
size_t BloomFilterVal::Count(const Val* val) const size_t BloomFilterVal::Count(const Val* val) const
{ {
HashKey* key = hash_->ComputeHash(val, 1); HashKey* key = hash->ComputeHash(val, 1);
return bloom_filter_->Count(key->Hash()); size_t cnt = bloom_filter->Count(key->Hash());
delete key;
return cnt;
} }
BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* x, BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* x,
const BloomFilterVal* y) const BloomFilterVal* y)
{ {
if ( x->Type() != y->Type() ) if ( ! same_type(x->Type(), y->Type()) )
{
reporter->InternalError("cannot merge Bloom filters with different types"); reporter->InternalError("cannot merge Bloom filters with different types");
return NULL;
}
BloomFilterVal* result; BloomFilterVal* result;
if ( (result = DoMerge<probabilistic::BasicBloomFilter>(x, y)) ) if ( (result = DoMerge<probabilistic::BasicBloomFilter>(x, y)) )
return result; return result;
else if ( (result = DoMerge<probabilistic::CountingBloomFilter>(x, y)) ) else if ( (result = DoMerge<probabilistic::CountingBloomFilter>(x, y)) )
return result; return result;
reporter->InternalError("failed to merge Bloom filters"); reporter->InternalError("failed to merge Bloom filters");
return NULL; return 0;
} }
BloomFilterVal::~BloomFilterVal() BloomFilterVal::~BloomFilterVal()
{ {
if ( type_ ) Unref(type);
Unref(type_); delete hash;
if ( hash_ ) delete bloom_filter;
delete hash_;
if ( bloom_filter_ )
delete bloom_filter_;
} }
IMPLEMENT_SERIAL(BloomFilterVal, SER_BLOOMFILTER_VAL); IMPLEMENT_SERIAL(BloomFilterVal, SER_BLOOMFILTER_VAL);
@ -606,13 +609,15 @@ bool BloomFilterVal::DoSerialize(SerialInfo* info) const
{ {
DO_SERIALIZE(SER_BLOOMFILTER_VAL, OpaqueVal); DO_SERIALIZE(SER_BLOOMFILTER_VAL, OpaqueVal);
bool is_typed = type_ != NULL; bool is_typed = (type != 0);
if ( ! SERIALIZE(is_typed) ) if ( ! SERIALIZE(is_typed) )
return false; return false;
if ( is_typed && ! type_->Serialize(info) )
if ( is_typed && ! type->Serialize(info) )
return false; return false;
return bloom_filter_->Serialize(info); return bloom_filter->Serialize(info);
} }
bool BloomFilterVal::DoUnserialize(UnserialInfo* info) bool BloomFilterVal::DoUnserialize(UnserialInfo* info)
@ -622,14 +627,16 @@ bool BloomFilterVal::DoUnserialize(UnserialInfo* info)
bool is_typed; bool is_typed;
if ( ! UNSERIALIZE(&is_typed) ) if ( ! UNSERIALIZE(&is_typed) )
return false; return false;
if ( is_typed ) if ( is_typed )
{ {
BroType* type = BroType::Unserialize(info); BroType* type = BroType::Unserialize(info);
if ( ! Typify(type) ) if ( ! Typify(type) )
return false; return false;
Unref(type); Unref(type);
} }
bloom_filter_ = probabilistic::BloomFilter::Unserialize(info); bloom_filter = probabilistic::BloomFilter::Unserialize(info);
return bloom_filter_ != NULL; return bloom_filter != 0;
} }

View file

@ -116,21 +116,19 @@ private:
}; };
class BloomFilterVal : public OpaqueVal { class BloomFilterVal : public OpaqueVal {
BloomFilterVal(const BloomFilterVal&);
BloomFilterVal& operator=(const BloomFilterVal&);
public: public:
static BloomFilterVal* Merge(const BloomFilterVal* x,
const BloomFilterVal* y);
explicit BloomFilterVal(probabilistic::BloomFilter* bf); explicit BloomFilterVal(probabilistic::BloomFilter* bf);
~BloomFilterVal(); virtual ~BloomFilterVal();
bool Typify(BroType* type);
BroType* Type() const; BroType* Type() const;
bool Typify(BroType* type);
void Add(const Val* val); void Add(const Val* val);
size_t Count(const Val* val) const; size_t Count(const Val* val) const;
static BloomFilterVal* Merge(const BloomFilterVal* x,
const BloomFilterVal* y);
protected: protected:
friend class Val; friend class Val;
BloomFilterVal(); BloomFilterVal();
@ -139,32 +137,35 @@ protected:
DECLARE_SERIAL(BloomFilterVal); DECLARE_SERIAL(BloomFilterVal);
private: private:
// Disable.
BloomFilterVal(const BloomFilterVal&);
BloomFilterVal& operator=(const BloomFilterVal&);
template <typename T> template <typename T>
static BloomFilterVal* DoMerge(const BloomFilterVal* x, static BloomFilterVal* DoMerge(const BloomFilterVal* x,
const BloomFilterVal* y) const BloomFilterVal* y)
{ {
if ( typeid(*x->bloom_filter_) != typeid(*y->bloom_filter_) ) if ( typeid(*x->bloom_filter) != typeid(*y->bloom_filter) )
{
reporter->InternalError("cannot merge different Bloom filter types"); reporter->InternalError("cannot merge different Bloom filter types");
return NULL;
} if ( typeid(T) != typeid(*x->bloom_filter) )
if ( typeid(T) != typeid(*x->bloom_filter_) ) return 0;
return NULL;
const T* a = static_cast<const T*>(x->bloom_filter_); const T* a = static_cast<const T*>(x->bloom_filter);
const T* b = static_cast<const T*>(y->bloom_filter_); const T* b = static_cast<const T*>(y->bloom_filter);
BloomFilterVal* merged = new BloomFilterVal(T::Merge(a, b)); BloomFilterVal* merged = new BloomFilterVal(T::Merge(a, b));
assert(merged); assert(merged);
if ( ! merged->Typify(x->Type()) ) if ( ! merged->Typify(x->Type()) )
{
reporter->InternalError("failed to set type on merged Bloom filter"); reporter->InternalError("failed to set type on merged Bloom filter");
return NULL;
}
return merged; return merged;
} }
BroType* type_; BroType* type;
CompositeHash* hash_; CompositeHash* hash;
probabilistic::BloomFilter* bloom_filter_; probabilistic::BloomFilter* bloom_filter;
}; };
#endif #endif

View file

@ -1321,6 +1321,7 @@ bool OpaqueType::DoUnserialize(UnserialInfo* info)
const char* n; const char* n;
if ( ! UNSERIALIZE_STR(&n, 0) ) if ( ! UNSERIALIZE_STR(&n, 0) )
return false; return false;
name = n; name = n;
delete [] n; delete [] n;

View file

@ -1,3 +1,5 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include "BitVector.h" #include "BitVector.h"
#include <cassert> #include <cassert>
@ -29,80 +31,95 @@ uint8_t count_table[] = {
} // namespace <anonymous> } // namespace <anonymous>
BitVector::Reference::Reference(block_type& block, block_type i) BitVector::Reference::Reference(block_type& block, block_type i)
: block_(block), : block(block), mask((block_type(1) << i))
mask_(block_type(1) << i)
{ {
assert(i < bits_per_block); assert(i < bits_per_block);
} }
BitVector::Reference& BitVector::Reference::Flip() BitVector::Reference& BitVector::Reference::Flip()
{ {
block_ ^= mask_; block ^= mask;
return *this; return *this;
} }
BitVector::Reference::operator bool() const BitVector::Reference::operator bool() const
{ {
return (block_ & mask_) != 0; return (block & mask) != 0;
} }
bool BitVector::Reference::operator~() const bool BitVector::Reference::operator~() const
{ {
return (block_ & mask_) == 0; return (block & mask) == 0;
} }
BitVector::Reference& BitVector::Reference::operator=(bool x) BitVector::Reference& BitVector::Reference::operator=(bool x)
{ {
x ? block_ |= mask_ : block_ &= ~mask_; if ( x )
block |= mask;
else
block &= ~mask;
return *this; return *this;
} }
BitVector::Reference& BitVector::Reference::operator=(Reference const& other) BitVector::Reference& BitVector::Reference::operator=(const Reference& other)
{ {
other ? block_ |= mask_ : block_ &= ~mask_; if ( other )
block |= mask;
else
block &= ~mask;
return *this; return *this;
} }
BitVector::Reference& BitVector::Reference::operator|=(bool x) BitVector::Reference& BitVector::Reference::operator|=(bool x)
{ {
if ( x ) if ( x )
block_ |= mask_; block |= mask;
return *this; return *this;
} }
BitVector::Reference& BitVector::Reference::operator&=(bool x) BitVector::Reference& BitVector::Reference::operator&=(bool x)
{ {
if ( ! x ) if ( ! x )
block_ &= ~mask_; block &= ~mask;
return *this; return *this;
} }
BitVector::Reference& BitVector::Reference::operator^=(bool x) BitVector::Reference& BitVector::Reference::operator^=(bool x)
{ {
if ( x ) if ( x )
block_ ^= mask_; block ^= mask;
return *this; return *this;
} }
BitVector::Reference& BitVector::Reference::operator-=(bool x) BitVector::Reference& BitVector::Reference::operator-=(bool x)
{ {
if ( x ) if ( x )
block_ &= ~mask_; block &= ~mask;
return *this; return *this;
} }
BitVector::BitVector()
BitVector::BitVector() : num_bits_(0) { } {
num_bits = 0;
}
BitVector::BitVector(size_type size, bool value) BitVector::BitVector(size_type size, bool value)
: bits_(bits_to_blocks(size), value ? ~block_type(0) : 0), : bits(bits_to_blocks(size), value ? ~block_type(0) : 0)
num_bits_(size) {
{ } num_bits = size;
}
BitVector::BitVector(BitVector const& other) BitVector::BitVector(BitVector const& other)
: bits_(other.bits_), : bits(other.bits)
num_bits_(other.num_bits_) {
{ } num_bits = other.num_bits;
}
BitVector BitVector::operator~() const BitVector BitVector::operator~() const
{ {
@ -113,7 +130,7 @@ BitVector BitVector::operator~() const
BitVector& BitVector::operator=(BitVector const& other) BitVector& BitVector::operator=(BitVector const& other)
{ {
bits_ = other.bits_; bits = other.bits;
return *this; return *this;
} }
@ -131,7 +148,7 @@ BitVector BitVector::operator>>(size_type n) const
BitVector& BitVector::operator<<=(size_type n) BitVector& BitVector::operator<<=(size_type n)
{ {
if (n >= num_bits_) if ( n >= num_bits )
return Reset(); return Reset();
if ( n > 0 ) if ( n > 0 )
@ -139,7 +156,8 @@ BitVector& BitVector::operator<<=(size_type n)
size_type last = Blocks() - 1; size_type last = Blocks() - 1;
size_type div = n / bits_per_block; size_type div = n / bits_per_block;
block_type r = bit_index(n); block_type r = bit_index(n);
block_type* b = &bits_[0]; block_type* b = &bits[0];
assert(Blocks() >= 1); assert(Blocks() >= 1);
assert(div <= last); assert(div <= last);
@ -147,12 +165,15 @@ BitVector& BitVector::operator<<=(size_type n)
{ {
for ( size_type i = last - div; i > 0; --i ) for ( size_type i = last - div; i > 0; --i )
b[i + div] = (b[i] << r) | (b[i - 1] >> (bits_per_block - r)); b[i + div] = (b[i] << r) | (b[i - 1] >> (bits_per_block - r));
b[div] = b[0] << r; b[div] = b[0] << r;
} }
else else
{ {
for (size_type i = last-div; i > 0; --i) for (size_type i = last-div; i > 0; --i)
b[i + div] = b[i]; b[i + div] = b[i];
b[div] = b[0]; b[div] = b[0];
} }
@ -165,7 +186,7 @@ BitVector& BitVector::operator<<=(size_type n)
BitVector& BitVector::operator>>=(size_type n) BitVector& BitVector::operator>>=(size_type n)
{ {
if (n >= num_bits_) if ( n >= num_bits )
return Reset(); return Reset();
if ( n > 0 ) if ( n > 0 )
@ -173,7 +194,8 @@ BitVector& BitVector::operator>>=(size_type n)
size_type last = Blocks() - 1; size_type last = Blocks() - 1;
size_type div = n / bits_per_block; size_type div = n / bits_per_block;
block_type r = bit_index(n); block_type r = bit_index(n);
block_type* b = &bits_[0]; block_type* b = &bits[0];
assert(Blocks() >= 1); assert(Blocks() >= 1);
assert(div <= last); assert(div <= last);
@ -181,8 +203,10 @@ BitVector& BitVector::operator>>=(size_type n)
{ {
for (size_type i = last - div; i > 0; --i) for (size_type i = last - div; i > 0; --i)
b[i - div] = (b[i] >> r) | (b[i + 1] << (bits_per_block - r)); b[i - div] = (b[i] >> r) | (b[i + 1] << (bits_per_block - r));
b[last - div] = b[last] >> r; b[last - div] = b[last] >> r;
} }
else else
{ {
for (size_type i = div; i <= last; ++i) for (size_type i = div; i <= last; ++i)
@ -191,38 +215,47 @@ BitVector& BitVector::operator>>=(size_type n)
std::fill_n(b + (Blocks() - div), div, block_type(0)); std::fill_n(b + (Blocks() - div), div, block_type(0));
} }
return *this; return *this;
} }
BitVector& BitVector::operator&=(BitVector const& other) BitVector& BitVector::operator&=(BitVector const& other)
{ {
assert(Size() >= other.Size()); assert(Size() >= other.Size());
for ( size_type i = 0; i < Blocks(); ++i ) for ( size_type i = 0; i < Blocks(); ++i )
bits_[i] &= other.bits_[i]; bits[i] &= other.bits[i];
return *this; return *this;
} }
BitVector& BitVector::operator|=(BitVector const& other) BitVector& BitVector::operator|=(BitVector const& other)
{ {
assert(Size() >= other.Size()); assert(Size() >= other.Size());
for ( size_type i = 0; i < Blocks(); ++i ) for ( size_type i = 0; i < Blocks(); ++i )
bits_[i] |= other.bits_[i]; bits[i] |= other.bits[i];
return *this; return *this;
} }
BitVector& BitVector::operator^=(BitVector const& other) BitVector& BitVector::operator^=(BitVector const& other)
{ {
assert(Size() >= other.Size()); assert(Size() >= other.Size());
for ( size_type i = 0; i < Blocks(); ++i ) for ( size_type i = 0; i < Blocks(); ++i )
bits_[i] ^= other.bits_[i]; bits[i] ^= other.bits[i];
return *this; return *this;
} }
BitVector& BitVector::operator-=(BitVector const& other) BitVector& BitVector::operator-=(BitVector const& other)
{ {
assert(Size() >= other.Size()); assert(Size() >= other.Size());
for ( size_type i = 0; i < Blocks(); ++i ) for ( size_type i = 0; i < Blocks(); ++i )
bits_[i] &= ~other.bits_[i]; bits[i] &= ~other.bits[i];
return *this; return *this;
} }
@ -254,7 +287,7 @@ BitVector operator-(BitVector const& x, BitVector const& y)
bool operator==(BitVector const& x, BitVector const& y) bool operator==(BitVector const& x, BitVector const& y)
{ {
return x.num_bits_ == y.num_bits_ && x.bits_ == y.bits_; return x.num_bits == y.num_bits && x.bits == y.bits;
} }
bool operator!=(BitVector const& x, BitVector const& y) bool operator!=(BitVector const& x, BitVector const& y)
@ -265,14 +298,19 @@ bool operator!=(BitVector const& x, BitVector const& y)
bool operator<(BitVector const& x, BitVector const& y) bool operator<(BitVector const& x, BitVector const& y)
{ {
assert(x.Size() == y.Size()); assert(x.Size() == y.Size());
for ( BitVector::size_type r = x.Blocks(); r > 0; --r ) for ( BitVector::size_type r = x.Blocks(); r > 0; --r )
{ {
BitVector::size_type i = r - 1; BitVector::size_type i = r - 1;
if (x.bits_[i] < y.bits_[i])
if ( x.bits[i] < y.bits[i] )
return true; return true;
else if (x.bits_[i] > y.bits_[i])
else if ( x.bits[i] > y.bits[i] )
return false; return false;
} }
return false; return false;
} }
@ -285,19 +323,19 @@ void BitVector::Resize(size_type n, bool value)
block_type block_value = value ? ~block_type(0) : block_type(0); block_type block_value = value ? ~block_type(0) : block_type(0);
if ( required != old ) if ( required != old )
bits_.resize(required, block_value); bits.resize(required, block_value);
if (value && (n > num_bits_) && extra_bits()) if ( value && (n > num_bits) && extra_bits() )
bits_[old - 1] |= (block_value << extra_bits()); bits[old - 1] |= (block_value << extra_bits());
num_bits_ = n; num_bits = n;
zero_unused_bits(); zero_unused_bits();
} }
void BitVector::Clear() void BitVector::Clear()
{ {
bits_.clear(); bits.clear();
num_bits_ = 0; num_bits = 0;
} }
void BitVector::PushBack(bool bit) void BitVector::PushBack(bool bit)
@ -310,109 +348,119 @@ void BitVector::PushBack(bool bit)
void BitVector::Append(block_type block) void BitVector::Append(block_type block)
{ {
size_type excess = extra_bits(); size_type excess = extra_bits();
if ( excess ) if ( excess )
{ {
assert(! Empty()); assert(! Empty());
bits_.push_back(block >> (bits_per_block - excess)); bits.push_back(block >> (bits_per_block - excess));
bits_[Blocks() - 2] |= (block << excess); bits[Blocks() - 2] |= (block << excess);
} }
else else
{ {
bits_.push_back(block); bits.push_back(block);
} }
num_bits_ += bits_per_block;
num_bits += bits_per_block;
} }
BitVector& BitVector::Set(size_type i, bool bit) BitVector& BitVector::Set(size_type i, bool bit)
{ {
assert(i < num_bits_); assert(i < num_bits);
if ( bit ) if ( bit )
bits_[block_index(i)] |= bit_mask(i); bits[block_index(i)] |= bit_mask(i);
else else
Reset(i); Reset(i);
return *this; return *this;
} }
BitVector& BitVector::Set() BitVector& BitVector::Set()
{ {
std::fill(bits_.begin(), bits_.end(), ~block_type(0)); std::fill(bits.begin(), bits.end(), ~block_type(0));
zero_unused_bits(); zero_unused_bits();
return *this; return *this;
} }
BitVector& BitVector::Reset(size_type i) BitVector& BitVector::Reset(size_type i)
{ {
assert(i < num_bits_); assert(i < num_bits);
bits_[block_index(i)] &= ~bit_mask(i); bits[block_index(i)] &= ~bit_mask(i);
return *this; return *this;
} }
BitVector& BitVector::Reset() BitVector& BitVector::Reset()
{ {
std::fill(bits_.begin(), bits_.end(), block_type(0)); std::fill(bits.begin(), bits.end(), block_type(0));
return *this; return *this;
} }
BitVector& BitVector::Flip(size_type i) BitVector& BitVector::Flip(size_type i)
{ {
assert(i < num_bits_); assert(i < num_bits);
bits_[block_index(i)] ^= bit_mask(i); bits[block_index(i)] ^= bit_mask(i);
return *this; return *this;
} }
BitVector& BitVector::Flip() BitVector& BitVector::Flip()
{ {
for (size_type i = 0; i < Blocks(); ++i) for (size_type i = 0; i < Blocks(); ++i)
bits_[i] = ~bits_[i]; bits[i] = ~bits[i];
zero_unused_bits(); zero_unused_bits();
return *this; return *this;
} }
bool BitVector::operator[](size_type i) const bool BitVector::operator[](size_type i) const
{ {
assert(i < num_bits_); assert(i < num_bits);
return (bits_[block_index(i)] & bit_mask(i)) != 0; return (bits[block_index(i)] & bit_mask(i)) != 0;
} }
BitVector::Reference BitVector::operator[](size_type i) BitVector::Reference BitVector::operator[](size_type i)
{ {
assert(i < num_bits_); assert(i < num_bits);
return Reference(bits_[block_index(i)], bit_index(i)); return Reference(bits[block_index(i)], bit_index(i));
} }
BitVector::size_type BitVector::Count() const BitVector::size_type BitVector::Count() const
{ {
std::vector<block_type>::const_iterator first = bits_.begin(); std::vector<block_type>::const_iterator first = bits.begin();
size_t n = 0; size_t n = 0;
size_type length = Blocks(); size_type length = Blocks();
while ( length ) while ( length )
{ {
block_type block = *first; block_type block = *first;
while ( block ) while ( block )
{ {
// TODO: use __popcnt if available. // TODO: use _popcnt if available.
n += count_table[block & ((1u << 8) - 1)]; n += count_table[block & ((1u << 8) - 1)];
block >>= 8; block >>= 8;
} }
++first; ++first;
--length; --length;
} }
return n; return n;
} }
BitVector::size_type BitVector::Blocks() const BitVector::size_type BitVector::Blocks() const
{ {
return bits_.size(); return bits.size();
} }
BitVector::size_type BitVector::Size() const BitVector::size_type BitVector::Size() const
{ {
return num_bits_; return num_bits;
} }
bool BitVector::Empty() const bool BitVector::Empty() const
{ {
return bits_.empty(); return bits.empty();
} }
BitVector::size_type BitVector::FindFirst() const BitVector::size_type BitVector::FindFirst() const
@ -424,9 +472,10 @@ BitVector::size_type BitVector::FindNext(size_type i) const
{ {
if ( i >= (Size() - 1) || Size() == 0 ) if ( i >= (Size() - 1) || Size() == 0 )
return npos; return npos;
++i; ++i;
size_type bi = block_index(i); size_type bi = block_index(i);
block_type block = bits_[bi] & (~block_type(0) << bit_index(i)); block_type block = bits[bi] & (~block_type(0) << bit_index(i));
return block ? bi * bits_per_block + lowest_bit(block) : find_from(bi + 1); return block ? bi * bits_per_block + lowest_bit(block) : find_from(bi + 1);
} }
@ -434,8 +483,10 @@ BitVector::size_type BitVector::lowest_bit(block_type block)
{ {
block_type x = block - (block & (block - 1)); block_type x = block - (block & (block - 1));
size_type log = 0; size_type log = 0;
while (x >>= 1) while (x >>= 1)
++log; ++log;
return log; return log;
} }
@ -447,16 +498,18 @@ BitVector::block_type BitVector::extra_bits() const
void BitVector::zero_unused_bits() void BitVector::zero_unused_bits()
{ {
if ( extra_bits() ) if ( extra_bits() )
bits_.back() &= ~(~block_type(0) << extra_bits()); bits.back() &= ~(~block_type(0) << extra_bits());
} }
BitVector::size_type BitVector::find_from(size_type i) const BitVector::size_type BitVector::find_from(size_type i) const
{ {
while (i < Blocks() && bits_[i] == 0) while (i < Blocks() && bits[i] == 0)
++i; ++i;
if ( i >= Blocks() ) if ( i >= Blocks() )
return npos; return npos;
return i * bits_per_block + lowest_bit(bits_[i]);
return i * bits_per_block + lowest_bit(bits[i]);
} }
bool BitVector::Serialize(SerialInfo* info) const bool BitVector::Serialize(SerialInfo* info) const
@ -466,8 +519,7 @@ bool BitVector::Serialize(SerialInfo* info) const
BitVector* BitVector::Unserialize(UnserialInfo* info) BitVector* BitVector::Unserialize(UnserialInfo* info)
{ {
return reinterpret_cast<BitVector*>( return reinterpret_cast<BitVector*>(SerialObj::Unserialize(info, SER_BITVECTOR));
SerialObj::Unserialize(info, SER_BITVECTOR));
} }
IMPLEMENT_SERIAL(BitVector, SER_BITVECTOR); IMPLEMENT_SERIAL(BitVector, SER_BITVECTOR);
@ -476,14 +528,14 @@ bool BitVector::DoSerialize(SerialInfo* info) const
{ {
DO_SERIALIZE(SER_BITVECTOR, SerialObj); DO_SERIALIZE(SER_BITVECTOR, SerialObj);
if ( ! SERIALIZE(static_cast<uint64>(bits_.size())) ) if ( ! SERIALIZE(static_cast<uint64>(bits.size())) )
return false; return false;
for ( size_t i = 0; i < bits_.size(); ++i ) for ( size_t i = 0; i < bits.size(); ++i )
if ( ! SERIALIZE(static_cast<uint64>(bits_[i])) ) if ( ! SERIALIZE(static_cast<uint64>(bits[i])) )
return false; return false;
return SERIALIZE(static_cast<uint64>(num_bits_)); return SERIALIZE(static_cast<uint64>(num_bits));
} }
bool BitVector::DoUnserialize(UnserialInfo* info) bool BitVector::DoUnserialize(UnserialInfo* info)
@ -494,19 +546,22 @@ bool BitVector::DoUnserialize(UnserialInfo* info)
if ( ! UNSERIALIZE(&size) ) if ( ! UNSERIALIZE(&size) )
return false; return false;
bits_.resize(static_cast<size_t>(size)); bits.resize(static_cast<size_t>(size));
uint64 block;
for ( size_t i = 0; i < bits_.size(); ++i ) for ( size_t i = 0; i < bits.size(); ++i )
{ {
uint64 block;
if ( ! UNSERIALIZE(&block) ) if ( ! UNSERIALIZE(&block) )
return false; return false;
bits_[i] = static_cast<block_type>(block);
bits[i] = static_cast<block_type>(block);
} }
uint64 num_bits; uint64 num_bits;
if ( ! UNSERIALIZE(&num_bits) ) if ( ! UNSERIALIZE(&num_bits) )
return false; return false;
num_bits_ = static_cast<size_type>(num_bits);
num_bits = static_cast<size_type>(num_bits);
return true; return true;
} }

View file

@ -1,8 +1,11 @@
#ifndef BitVector_h // See the file "COPYING" in the main distribution directory for copyright.
#define BitVector_h
#ifndef PROBABILISTIC_BITVECTOR_H
#define PROBABILISTIC_BITVECTOR_H
#include <iterator> #include <iterator>
#include <vector> #include <vector>
#include "SerialObj.h" #include "SerialObj.h"
namespace probabilistic { namespace probabilistic {
@ -14,35 +17,39 @@ class BitVector : public SerialObj {
public: public:
typedef size_t block_type; typedef size_t block_type;
typedef size_t size_type; typedef size_t size_type;
typedef bool const_reference;
static size_type npos; static size_type npos;
static block_type bits_per_block; static block_type bits_per_block;
public:
/** /**
* An lvalue proxy for single bits. * An lvalue proxy for individual bits.
*/ */
class Reference { class Reference {
friend class BitVector;
Reference(block_type& block, block_type i);
public: public:
/**
* Inverts the bits' values.
*/
Reference& Flip(); Reference& Flip();
operator bool() const; operator bool() const;
bool operator~() const; bool operator~() const;
Reference& operator=(bool x); Reference& operator=(bool x);
Reference& operator=(Reference const& other); Reference& operator=(const Reference& other);
Reference& operator|=(bool x); Reference& operator|=(bool x);
Reference& operator&=(bool x); Reference& operator&=(bool x);
Reference& operator^=(bool x); Reference& operator^=(bool x);
Reference& operator-=(bool x); Reference& operator-=(bool x);
private: private:
void operator&(); friend class BitVector;
block_type& block_;
block_type const mask_;
};
typedef bool const_reference; Reference(block_type& block, block_type i);
void operator&();
block_type& block;
const block_type mask;
};
/** /**
* Default-constructs an empty bit vector. * Default-constructs an empty bit vector.
@ -58,12 +65,16 @@ public:
/** /**
* Constructs a bit vector from a sequence of blocks. * Constructs a bit vector from a sequence of blocks.
*
* @param first Start of range
* @param last End of range.
*
*/ */
template <typename InputIterator> template <typename InputIterator>
BitVector(InputIterator first, InputIterator last) BitVector(InputIterator first, InputIterator last)
{ {
bits_.insert(bits_.end(), first, last); bits.insert(bits.end(), first, last);
num_bits_ = bits_.size() * bits_per_block; num_bits = bits.size() * bits_per_block;
} }
/** /**
@ -79,7 +90,7 @@ public:
BitVector& operator=(const BitVector& other); BitVector& operator=(const BitVector& other);
// //
// Bitwise operations // Bitwise operations.
// //
BitVector operator~() const; BitVector operator~() const;
BitVector operator<<(size_type n) const; BitVector operator<<(size_type n) const;
@ -105,6 +116,7 @@ public:
// //
// Basic operations // Basic operations
// //
/** Appends the bits in a sequence of values. /** Appends the bits in a sequence of values.
* @tparam Iterator A forward iterator. * @tparam Iterator A forward iterator.
* @param first An iterator pointing to the first element of the sequence. * @param first An iterator pointing to the first element of the sequence.
@ -121,21 +133,23 @@ public:
typename std::iterator_traits<ForwardIterator>::difference_type delta = typename std::iterator_traits<ForwardIterator>::difference_type delta =
std::distance(first, last); std::distance(first, last);
bits_.reserve(Blocks() + delta); bits.reserve(Blocks() + delta);
if ( excess == 0 ) if ( excess == 0 )
{ {
bits_.back() |= (*first << excess); bits.back() |= (*first << excess);
do
{ do {
block_type b = *first++ >> (bits_per_block - excess); block_type b = *first++ >> (bits_per_block - excess);
bits_.push_back(b | (first == last ? 0 : *first << excess)); bits.push_back(b | (first == last ? 0 : *first << excess));
} while (first != last); } while (first != last);
} }
else else
{ bits.insert(bits.end(), first, last);
bits_.insert(bits_.end(), first, last);
} num_bits += bits_per_block * delta;
num_bits_ += bits_per_block * delta;
} }
/** /**
@ -256,13 +270,48 @@ public:
*/ */
size_type FindNext(size_type i) const; size_type FindNext(size_type i) const;
/**
* Serializes the bit vector.
*
* @param info The serializaton informationt to use.
*
* @return True if successful.
*/
bool Serialize(SerialInfo* info) const; bool Serialize(SerialInfo* info) const;
/**
* Unserialize the bit vector.
*
* @param info The serializaton informationt to use.
*
* @return The unserialized bit vector, or null if an error occured.
*/
static BitVector* Unserialize(UnserialInfo* info); static BitVector* Unserialize(UnserialInfo* info);
protected: protected:
DECLARE_SERIAL(BitVector); DECLARE_SERIAL(BitVector);
private: private:
/**
* Computes the number of excess/unused bits in the bit vector.
*/
block_type extra_bits() const;
/**
* If the number of bits in the vector are not not a multiple of
* bitvector::bits_per_block, then the last block exhibits unused bits which
* this function resets.
*/
void zero_unused_bits();
/**
* Looks for the first 1-bit starting at a given position.
* @param i The block index to start looking.
* @return The block index of the first 1-bit starting from *i* or
* `bitvector::npos` if no 1-bit exists.
*/
size_type find_from(size_type i) const;
/** /**
* Computes the block index for a given bit position. * Computes the block index for a given bit position.
*/ */
@ -306,28 +355,8 @@ private:
*/ */
static size_type lowest_bit(block_type block); static size_type lowest_bit(block_type block);
/** std::vector<block_type> bits;
* Computes the number of excess/unused bits in the bit vector. size_type num_bits;
*/
block_type extra_bits() const;
/**
* If the number of bits in the vector are not not a multiple of
* bitvector::bits_per_block, then the last block exhibits unused bits which
* this function resets.
*/
void zero_unused_bits();
/**
* Looks for the first 1-bit starting at a given position.
* @param i The block index to start looking.
* @return The block index of the first 1-bit starting from *i* or
* `bitvector::npos` if no 1-bit exists.
*/
size_type find_from(size_type i) const;
std::vector<block_type> bits_;
size_type num_bits_;
}; };
} }

View file

@ -1,3 +1,5 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include "BloomFilter.h" #include "BloomFilter.h"
#include <cmath> #include <cmath>
@ -8,19 +10,18 @@
using namespace probabilistic; using namespace probabilistic;
BloomFilter::BloomFilter() BloomFilter::BloomFilter()
: hasher_(NULL)
{ {
hasher = 0;
} }
BloomFilter::BloomFilter(const Hasher* hasher) BloomFilter::BloomFilter(const Hasher* arg_hasher)
: hasher_(hasher)
{ {
hasher = arg_hasher;
} }
BloomFilter::~BloomFilter() BloomFilter::~BloomFilter()
{ {
if ( hasher_ ) delete hasher;
delete hasher_;
} }
bool BloomFilter::Serialize(SerialInfo* info) const bool BloomFilter::Serialize(SerialInfo* info) const
@ -30,33 +31,37 @@ bool BloomFilter::Serialize(SerialInfo* info) const
BloomFilter* BloomFilter::Unserialize(UnserialInfo* info) BloomFilter* BloomFilter::Unserialize(UnserialInfo* info)
{ {
return reinterpret_cast<BloomFilter*>( return reinterpret_cast<BloomFilter*>(SerialObj::Unserialize(info, SER_BLOOMFILTER));
SerialObj::Unserialize(info, SER_BLOOMFILTER));
} }
bool BloomFilter::DoSerialize(SerialInfo* info) const bool BloomFilter::DoSerialize(SerialInfo* info) const
{ {
DO_SERIALIZE(SER_BLOOMFILTER, SerialObj); DO_SERIALIZE(SER_BLOOMFILTER, SerialObj);
if ( ! SERIALIZE(static_cast<uint16>(hasher_->K())) )
if ( ! SERIALIZE(static_cast<uint16>(hasher->K())) )
return false; return false;
return SERIALIZE_STR(hasher_->Name().c_str(), hasher_->Name().size());
return SERIALIZE_STR(hasher->Name().c_str(), hasher->Name().size());
} }
bool BloomFilter::DoUnserialize(UnserialInfo* info) bool BloomFilter::DoUnserialize(UnserialInfo* info)
{ {
DO_UNSERIALIZE(SerialObj); DO_UNSERIALIZE(SerialObj);
uint16 k; uint16 k;
if ( ! UNSERIALIZE(&k) ) if ( ! UNSERIALIZE(&k) )
return false; return false;
const char* name; const char* name;
if ( ! UNSERIALIZE_STR(&name, 0) ) if ( ! UNSERIALIZE_STR(&name, 0) )
return false; return false;
hasher_ = Hasher::Create(k, name);
hasher = Hasher::Create(k, name);
delete [] name; delete [] name;
return true; return true;
} }
size_t BasicBloomFilter::M(double fp, size_t capacity) size_t BasicBloomFilter::M(double fp, size_t capacity)
{ {
double ln2 = std::log(2); double ln2 = std::log(2);
@ -72,26 +77,25 @@ size_t BasicBloomFilter::K(size_t cells, size_t capacity)
BasicBloomFilter* BasicBloomFilter::Merge(const BasicBloomFilter* x, BasicBloomFilter* BasicBloomFilter::Merge(const BasicBloomFilter* x,
const BasicBloomFilter* y) const BasicBloomFilter* y)
{ {
if ( ! x->hasher_->Equals(y->hasher_) ) if ( ! x->hasher->Equals(y->hasher) )
{ reporter->InternalError("incompatible hashers during BasicBloomFilter merge");
reporter->InternalError("incompatible hashers during Bloom filter merge");
return NULL;
}
BasicBloomFilter* result = new BasicBloomFilter(); BasicBloomFilter* result = new BasicBloomFilter();
result->hasher_ = x->hasher_->Clone(); result->hasher = x->hasher->Clone();
result->bits_ = new BitVector(*x->bits_ | *y->bits_); result->bits = new BitVector(*x->bits | *y->bits);
return result; return result;
} }
BasicBloomFilter::BasicBloomFilter() BasicBloomFilter::BasicBloomFilter()
: bits_(NULL)
{ {
bits = 0;
} }
BasicBloomFilter::BasicBloomFilter(const Hasher* hasher, size_t cells) BasicBloomFilter::BasicBloomFilter(const Hasher* hasher, size_t cells)
: BloomFilter(hasher), : BloomFilter(hasher)
bits_(new BitVector(cells))
{ {
bits = new BitVector(cells);
} }
IMPLEMENT_SERIAL(BasicBloomFilter, SER_BASICBLOOMFILTER) IMPLEMENT_SERIAL(BasicBloomFilter, SER_BASICBLOOMFILTER)
@ -99,90 +103,91 @@ IMPLEMENT_SERIAL(BasicBloomFilter, SER_BASICBLOOMFILTER)
bool BasicBloomFilter::DoSerialize(SerialInfo* info) const bool BasicBloomFilter::DoSerialize(SerialInfo* info) const
{ {
DO_SERIALIZE(SER_BASICBLOOMFILTER, BloomFilter); DO_SERIALIZE(SER_BASICBLOOMFILTER, BloomFilter);
return bits_->Serialize(info); return bits->Serialize(info);
} }
bool BasicBloomFilter::DoUnserialize(UnserialInfo* info) bool BasicBloomFilter::DoUnserialize(UnserialInfo* info)
{ {
DO_UNSERIALIZE(BloomFilter); DO_UNSERIALIZE(BloomFilter);
bits_ = BitVector::Unserialize(info); bits = BitVector::Unserialize(info);
return bits_ != NULL; return (bits != 0);
} }
void BasicBloomFilter::AddImpl(const Hasher::digest_vector& h) void BasicBloomFilter::AddImpl(const Hasher::digest_vector& h)
{ {
for ( size_t i = 0; i < h.size(); ++i ) for ( size_t i = 0; i < h.size(); ++i )
bits_->Set(h[i] % bits_->Size()); bits->Set(h[i] % bits->Size());
} }
size_t BasicBloomFilter::CountImpl(const Hasher::digest_vector& h) const size_t BasicBloomFilter::CountImpl(const Hasher::digest_vector& h) const
{ {
for ( size_t i = 0; i < h.size(); ++i ) for ( size_t i = 0; i < h.size(); ++i )
if ( ! (*bits_)[h[i] % bits_->Size()] ) {
if ( ! (*bits)[h[i] % bits->Size()] )
return 0; return 0;
return 1;
} }
return 1;
}
CountingBloomFilter* CountingBloomFilter::Merge(const CountingBloomFilter* x, CountingBloomFilter* CountingBloomFilter::Merge(const CountingBloomFilter* x,
const CountingBloomFilter* y) const CountingBloomFilter* y)
{ {
if ( ! x->hasher_->Equals(y->hasher_) ) if ( ! x->hasher->Equals(y->hasher) )
{ reporter->InternalError("incompatible hashers during CountingBloomFilter merge");
reporter->InternalError("incompatible hashers during Bloom filter merge");
return NULL;
}
CountingBloomFilter* result = new CountingBloomFilter(); CountingBloomFilter* result = new CountingBloomFilter();
result->hasher_ = x->hasher_->Clone(); result->hasher = x->hasher->Clone();
result->cells_ = new CounterVector(*x->cells_ | *y->cells_); result->cells = new CounterVector(*x->cells | *y->cells);
return result; return result;
} }
CountingBloomFilter::CountingBloomFilter() CountingBloomFilter::CountingBloomFilter()
: cells_(NULL)
{ {
cells = 0;
} }
CountingBloomFilter::CountingBloomFilter(const Hasher* hasher, CountingBloomFilter::CountingBloomFilter(const Hasher* hasher,
size_t cells, size_t width) size_t arg_cells, size_t width)
: BloomFilter(hasher), : BloomFilter(hasher)
cells_(new CounterVector(width, cells))
{ {
cells = new CounterVector(width, arg_cells);
} }
IMPLEMENT_SERIAL(CountingBloomFilter, SER_COUNTINGBLOOMFILTER) IMPLEMENT_SERIAL(CountingBloomFilter, SER_COUNTINGBLOOMFILTER)
bool CountingBloomFilter::DoSerialize(SerialInfo* info) const bool CountingBloomFilter::DoSerialize(SerialInfo* info) const
{ {
DO_SERIALIZE(SER_COUNTINGBLOOMFILTER, BloomFilter); DO_SERIALIZE(SER_COUNTINGBLOOMFILTER, BloomFilter);
return cells_->Serialize(info); return cells->Serialize(info);
} }
bool CountingBloomFilter::DoUnserialize(UnserialInfo* info) bool CountingBloomFilter::DoUnserialize(UnserialInfo* info)
{ {
DO_UNSERIALIZE(BloomFilter); DO_UNSERIALIZE(BloomFilter);
cells_ = CounterVector::Unserialize(info); cells = CounterVector::Unserialize(info);
return cells_ != NULL; return (cells != 0);
} }
// TODO: Use partitioning in add/count to allow for reusing CMS bounds. // TODO: Use partitioning in add/count to allow for reusing CMS bounds.
void CountingBloomFilter::AddImpl(const Hasher::digest_vector& h) void CountingBloomFilter::AddImpl(const Hasher::digest_vector& h)
{ {
for ( size_t i = 0; i < h.size(); ++i ) for ( size_t i = 0; i < h.size(); ++i )
cells_->Increment(h[i] % cells_->Size()); cells->Increment(h[i] % cells->Size());
} }
size_t CountingBloomFilter::CountImpl(const Hasher::digest_vector& h) const size_t CountingBloomFilter::CountImpl(const Hasher::digest_vector& h) const
{ {
CounterVector::size_type min = CounterVector::size_type min =
std::numeric_limits<CounterVector::size_type>::max(); std::numeric_limits<CounterVector::size_type>::max();
for ( size_t i = 0; i < h.size(); ++i ) for ( size_t i = 0; i < h.size(); ++i )
{ {
CounterVector::size_type cnt = cells_->Count(h[i] % cells_->Size()); CounterVector::size_type cnt = cells->Count(h[i] % cells->Size());
if ( cnt < min ) if ( cnt < min )
min = cnt; min = cnt;
} }
return min; return min;
} }

View file

@ -1,5 +1,7 @@
#ifndef BloomFilter_h // See the file "COPYING" in the main distribution directory for copyright.
#define BloomFilter_h
#ifndef PROBABILISTIC_BLOOMFILTER_H
#define PROBABILISTIC_BLOOMFILTER_H
#include <vector> #include <vector>
#include "BitVector.h" #include "BitVector.h"
@ -11,11 +13,15 @@ class CounterVector;
/** /**
* The abstract base class for Bloom filters. * The abstract base class for Bloom filters.
*
* At this point we won't let the user choose the hasher, but we might open
* up the interface in the future.
*/ */
class BloomFilter : public SerialObj { class BloomFilter : public SerialObj {
public: public:
// At this point we won't let the user choose the hasher, but we might /**
// open up the interface in the future. * Destructor.
*/
virtual ~BloomFilter(); virtual ~BloomFilter();
/** /**
@ -25,7 +31,7 @@ public:
template <typename T> template <typename T>
void Add(const T& x) void Add(const T& x)
{ {
AddImpl((*hasher_)(x)); AddImpl((*hasher)(x));
} }
/** /**
@ -38,15 +44,34 @@ public:
template <typename T> template <typename T>
size_t Count(const T& x) const size_t Count(const T& x) const
{ {
return CountImpl((*hasher_)(x)); return CountImpl((*hasher)(x));
} }
/**
* Serializes the Bloom filter.
*
* @param info The serializaton information to use.
*
* @return True if successful.
*/
bool Serialize(SerialInfo* info) const; bool Serialize(SerialInfo* info) const;
/**
* Unserializes a Bloom filter.
*
* @param info The serializaton information to use.
*
* @return The unserialized Bloom filter, or null if an error
* occured.
*/
static BloomFilter* Unserialize(UnserialInfo* info); static BloomFilter* Unserialize(UnserialInfo* info);
protected: protected:
DECLARE_ABSTRACT_SERIAL(BloomFilter); DECLARE_ABSTRACT_SERIAL(BloomFilter);
/**
* Default constructor.
*/
BloomFilter(); BloomFilter();
/** /**
@ -56,10 +81,26 @@ protected:
*/ */
BloomFilter(const Hasher* hasher); BloomFilter(const Hasher* hasher);
/**
* Abstract method for implementinng the *Add* operation.
*
* @param hashes A set of *k* hashes for the item to add, computed by
* the internal hasher object.
*
*/
virtual void AddImpl(const Hasher::digest_vector& hashes) = 0; virtual void AddImpl(const Hasher::digest_vector& hashes) = 0;
/**
* Abstract method for implementing the *Count* operation.
*
* @param hashes A set of *k* hashes for the item to add, computed by
* the internal hasher object.
*
* @return Returns the counter associated with the hashed element.
*/
virtual size_t CountImpl(const Hasher::digest_vector& hashes) const = 0; virtual size_t CountImpl(const Hasher::digest_vector& hashes) const = 0;
const Hasher* hasher_; const Hasher* hasher;
}; };
/** /**
@ -68,15 +109,28 @@ protected:
class BasicBloomFilter : public BloomFilter { class BasicBloomFilter : public BloomFilter {
public: public:
/** /**
* Computes the number of cells based a given false-positive rate and * Constructs a basic Bloom filter with a given number of cells. The
* capacity. In the literature, this parameter often has the name *M*. * ideal number of cells can be computed with *M*.
* *
* @param fp The false-positive rate. * @param hasher The hasher to use. The ideal number of hash
* functions can be computed with *K*.
* *
* @param capacity The number of exepected elements. * @param cells The number of cells.
*/
BasicBloomFilter(const Hasher* hasher, size_t cells);
/**
* Computes the number of cells based on a given false positive rate
* and capacity. In the literature, this parameter often has the name
* *M*.
* *
* Returns: The number cells needed to support a false-positive rate of *fp* * @param fp The false positive rate.
* with at most *capacity* elements. *
* @param capacity The expected number of elements that will be
* stored.
*
* Returns: The number cells needed to support a false positive rate
* of *fp* with at most *capacity* elements.
*/ */
static size_t M(double fp, size_t capacity); static size_t M(double fp, size_t capacity);
@ -88,29 +142,33 @@ public:
* *
* @param capacity The maximum number of elements. * @param capacity The maximum number of elements.
* *
* Returns: the optimal number of hash functions for a false-positive rate of * Returns: the optimal number of hash functions for a false-positive
* *fp* for at most *capacity* elements. * rate of *fp* for at most *capacity* elements.
*/ */
static size_t K(size_t cells, size_t capacity); static size_t K(size_t cells, size_t capacity);
/**
* Merges two basic Bloom filters.
*
* @return The merged Bloom filter.
*/
static BasicBloomFilter* Merge(const BasicBloomFilter* x, static BasicBloomFilter* Merge(const BasicBloomFilter* x,
const BasicBloomFilter* y); const BasicBloomFilter* y);
/**
* Constructs a basic Bloom filter with a given number of cells and capacity.
*/
BasicBloomFilter(const Hasher* hasher, size_t cells);
protected: protected:
DECLARE_SERIAL(BasicBloomFilter); DECLARE_SERIAL(BasicBloomFilter);
/**
* Default constructor.
*/
BasicBloomFilter(); BasicBloomFilter();
// Overridden from BloomFilter.
virtual void AddImpl(const Hasher::digest_vector& h); virtual void AddImpl(const Hasher::digest_vector& h);
virtual size_t CountImpl(const Hasher::digest_vector& h) const; virtual size_t CountImpl(const Hasher::digest_vector& h) const;
private: private:
BitVector* bits_; BitVector* bits;
}; };
/** /**
@ -118,21 +176,40 @@ private:
*/ */
class CountingBloomFilter : public BloomFilter { class CountingBloomFilter : public BloomFilter {
public: public:
/**
* Constructs a counting Bloom filter.
*
* @param hasher The hasher to use. The ideal number of hash
* functions can be computed with *K*.
*
* @param cells The number of cells to use.
*
* @param width The maximal bit-width of counter values.
*/
CountingBloomFilter(const Hasher* hasher, size_t cells, size_t width);
/**
* Merges two counting Bloom filters.
*
* @return The merged Bloom filter.
*/
static CountingBloomFilter* Merge(const CountingBloomFilter* x, static CountingBloomFilter* Merge(const CountingBloomFilter* x,
const CountingBloomFilter* y); const CountingBloomFilter* y);
CountingBloomFilter(const Hasher* hasher, size_t cells, size_t width);
protected: protected:
DECLARE_SERIAL(CountingBloomFilter); DECLARE_SERIAL(CountingBloomFilter);
/**
* Default constructor.
*/
CountingBloomFilter(); CountingBloomFilter();
// Overridden from BloomFilter.
virtual void AddImpl(const Hasher::digest_vector& h); virtual void AddImpl(const Hasher::digest_vector& h);
virtual size_t CountImpl(const Hasher::digest_vector& h) const; virtual size_t CountImpl(const Hasher::digest_vector& h) const;
private: private:
CounterVector* cells_; CounterVector* cells;
}; };
} }

View file

@ -1,3 +1,5 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include "CounterVector.h" #include "CounterVector.h"
#include <limits> #include <limits>
@ -6,39 +8,45 @@
using namespace probabilistic; using namespace probabilistic;
CounterVector::CounterVector(size_t width, size_t cells) CounterVector::CounterVector(size_t arg_width, size_t cells)
: bits_(new BitVector(width * cells)),
width_(width)
{ {
bits = new BitVector(arg_width * cells);
width = arg_width;
} }
CounterVector::CounterVector(const CounterVector& other) CounterVector::CounterVector(const CounterVector& other)
: bits_(new BitVector(*other.bits_)),
width_(other.width_)
{ {
bits = new BitVector(*other.bits);
width = other.width;
} }
CounterVector::~CounterVector() CounterVector::~CounterVector()
{ {
delete bits_; delete bits;
} }
bool CounterVector::Increment(size_type cell, count_type value) bool CounterVector::Increment(size_type cell, count_type value)
{ {
assert(cell < Size()); assert(cell < Size());
assert(value != 0); assert(value != 0);
size_t lsb = cell * width_;
size_t lsb = cell * width;
bool carry = false; bool carry = false;
for ( size_t i = 0; i < width_; ++i )
for ( size_t i = 0; i < width; ++i )
{ {
bool b1 = (*bits_)[lsb + i]; bool b1 = (*bits)[lsb + i];
bool b2 = value & (1 << i); bool b2 = value & (1 << i);
(*bits_)[lsb + i] = b1 ^ b2 ^ carry; (*bits)[lsb + i] = b1 ^ b2 ^ carry;
carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) ); carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) );
} }
if ( carry ) if ( carry )
for ( size_t i = 0; i < width_; ++i ) {
bits_->Set(lsb + i); for ( size_t i = 0; i < width; ++i )
bits->Set(lsb + i);
}
return ! carry; return ! carry;
} }
@ -46,65 +54,77 @@ bool CounterVector::Decrement(size_type cell, count_type value)
{ {
assert(cell < Size()); assert(cell < Size());
assert(value != 0); assert(value != 0);
value = ~value + 1; // A - B := A + ~B + 1 value = ~value + 1; // A - B := A + ~B + 1
bool carry = false; bool carry = false;
size_t lsb = cell * width_; size_t lsb = cell * width;
for ( size_t i = 0; i < width_; ++i )
for ( size_t i = 0; i < width; ++i )
{ {
bool b1 = (*bits_)[lsb + i]; bool b1 = (*bits)[lsb + i];
bool b2 = value & (1 << i); bool b2 = value & (1 << i);
(*bits_)[lsb + i] = b1 ^ b2 ^ carry; (*bits)[lsb + i] = b1 ^ b2 ^ carry;
carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) ); carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) );
} }
return carry; return carry;
} }
CounterVector::count_type CounterVector::Count(size_type cell) const CounterVector::count_type CounterVector::Count(size_type cell) const
{ {
assert(cell < Size()); assert(cell < Size());
size_t cnt = 0, order = 1; size_t cnt = 0, order = 1;
size_t lsb = cell * width_; size_t lsb = cell * width;
for (size_t i = lsb; i < lsb + width_; ++i, order <<= 1)
if ((*bits_)[i]) for ( size_t i = lsb; i < lsb + width; ++i, order <<= 1 )
if ( (*bits)[i] )
cnt |= order; cnt |= order;
return cnt; return cnt;
} }
CounterVector::size_type CounterVector::Size() const CounterVector::size_type CounterVector::Size() const
{ {
return bits_->Size() / width_; return bits->Size() / width;
} }
size_t CounterVector::Width() const size_t CounterVector::Width() const
{ {
return width_; return width;
} }
size_t CounterVector::Max() const size_t CounterVector::Max() const
{ {
return std::numeric_limits<size_t>::max() return std::numeric_limits<size_t>::max()
>> (std::numeric_limits<size_t>::digits - width_); >> (std::numeric_limits<size_t>::digits - width);
} }
CounterVector& CounterVector::Merge(const CounterVector& other) CounterVector& CounterVector::Merge(const CounterVector& other)
{ {
assert(Size() == other.Size()); assert(Size() == other.Size());
assert(Width() == other.Width()); assert(Width() == other.Width());
for ( size_t cell = 0; cell < Size(); ++cell ) for ( size_t cell = 0; cell < Size(); ++cell )
{ {
size_t lsb = cell * width_; size_t lsb = cell * width;
bool carry = false; bool carry = false;
for ( size_t i = 0; i < width_; ++i )
for ( size_t i = 0; i < width; ++i )
{ {
bool b1 = (*bits_)[lsb + i]; bool b1 = (*bits)[lsb + i];
bool b2 = (*other.bits_)[lsb + i]; bool b2 = (*other.bits)[lsb + i];
(*bits_)[lsb + i] = b1 ^ b2 ^ carry; (*bits)[lsb + i] = b1 ^ b2 ^ carry;
carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) ); carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) );
} }
if ( carry ) if ( carry )
for ( size_t i = 0; i < width_; ++i ) {
bits_->Set(lsb + i); for ( size_t i = 0; i < width; ++i )
bits->Set(lsb + i);
} }
}
return *this; return *this;
} }
@ -130,8 +150,7 @@ bool CounterVector::Serialize(SerialInfo* info) const
CounterVector* CounterVector::Unserialize(UnserialInfo* info) CounterVector* CounterVector::Unserialize(UnserialInfo* info)
{ {
return reinterpret_cast<CounterVector*>( return reinterpret_cast<CounterVector*>(SerialObj::Unserialize(info, SER_COUNTERVECTOR));
SerialObj::Unserialize(info, SER_COUNTERVECTOR));
} }
IMPLEMENT_SERIAL(CounterVector, SER_COUNTERVECTOR) IMPLEMENT_SERIAL(CounterVector, SER_COUNTERVECTOR)
@ -139,21 +158,26 @@ IMPLEMENT_SERIAL(CounterVector, SER_COUNTERVECTOR)
bool CounterVector::DoSerialize(SerialInfo* info) const bool CounterVector::DoSerialize(SerialInfo* info) const
{ {
DO_SERIALIZE(SER_COUNTERVECTOR, SerialObj); DO_SERIALIZE(SER_COUNTERVECTOR, SerialObj);
if ( ! bits_->Serialize(info) )
if ( ! bits->Serialize(info) )
return false; return false;
return SERIALIZE(static_cast<uint64>(width_));
return SERIALIZE(static_cast<uint64>(width));
} }
bool CounterVector::DoUnserialize(UnserialInfo* info) bool CounterVector::DoUnserialize(UnserialInfo* info)
{ {
DO_UNSERIALIZE(SerialObj); DO_UNSERIALIZE(SerialObj);
bits_ = BitVector::Unserialize(info);
if ( ! bits_ ) bits = BitVector::Unserialize(info);
if ( ! bits )
return false; return false;
uint64 width; uint64 width;
if ( ! UNSERIALIZE(&width) ) if ( ! UNSERIALIZE(&width) )
return false; return false;
width_ = static_cast<size_t>(width);
width = static_cast<size_t>(width);
return true; return true;
} }

View file

@ -1,5 +1,7 @@
#ifndef CounterVector_h // See the file "COPYING" in the main distribution directory for copyright.
#define CounterVector_h
#ifndef PROBABILISTIC_COUNTERVECTOR_H
#define PROBABILISTIC_COUNTERVECTOR_H
#include "SerialObj.h" #include "SerialObj.h"
@ -8,10 +10,9 @@ namespace probabilistic {
class BitVector; class BitVector;
/** /**
* A vector of counters, each of which have a fixed number of bits. * A vector of counters, each of which has a fixed number of bits.
*/ */
class CounterVector : public SerialObj { class CounterVector : public SerialObj {
CounterVector& operator=(const CounterVector&);
public: public:
typedef size_t size_type; typedef size_t size_type;
typedef uint64 count_type; typedef uint64 count_type;
@ -34,6 +35,9 @@ public:
*/ */
CounterVector(const CounterVector& other); CounterVector(const CounterVector& other);
/**
* Destructor.
*/
~CounterVector(); ~CounterVector();
/** /**
@ -95,8 +99,8 @@ public:
size_t Max() const; size_t Max() const;
/** /**
* Merges another counter vector into this instance by *adding* the counters * Merges another counter vector into this instance by *adding* the
* of each cells. * counters of each cells.
* *
* @param other The counter vector to merge into this instance. * @param other The counter vector to merge into this instance.
* *
@ -111,20 +115,38 @@ public:
*/ */
CounterVector& operator|=(const CounterVector& other); CounterVector& operator|=(const CounterVector& other);
friend CounterVector operator|(const CounterVector& x, /**
const CounterVector& y); * Serializes the bit vector.
*
* @param info The serializaton information to use.
*
* @return True if successful.
*/
bool Serialize(SerialInfo* info) const; bool Serialize(SerialInfo* info) const;
/**
* Unserialize the counter vector.
*
* @param info The serializaton information to use.
*
* @return The unserialized counter vector, or null if an error
* occured.
*/
static CounterVector* Unserialize(UnserialInfo* info); static CounterVector* Unserialize(UnserialInfo* info);
protected: protected:
DECLARE_SERIAL(CounterVector); friend CounterVector operator|(const CounterVector& x,
const CounterVector& y);
CounterVector() { } CounterVector() { }
DECLARE_SERIAL(CounterVector);
private: private:
BitVector* bits_; CounterVector& operator=(const CounterVector&); // Disable.
size_t width_;
BitVector* bits;
size_t width;
}; };
} }

View file

@ -1,66 +1,70 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include <typeinfo> #include <typeinfo>
#include "Hasher.h" #include "Hasher.h"
#include "digest.h" #include "digest.h"
using namespace probabilistic; using namespace probabilistic;
Hasher::UHF::UHF(size_t seed, const std::string& extra) UHF::UHF(size_t seed, const std::string& extra)
: h_(compute_seed(seed, extra)) : h(compute_seed(seed, extra))
{ {
} }
Hasher::digest Hasher::UHF::hash(const void* x, size_t n) const Hasher::digest UHF::hash(const void* x, size_t n) const
{ {
assert(n <= UHASH_KEY_SIZE); assert(n <= UHASH_KEY_SIZE);
return n == 0 ? 0 : h_(x, n); return n == 0 ? 0 : h(x, n);
} }
size_t Hasher::UHF::compute_seed(size_t seed, const std::string& extra) size_t UHF::compute_seed(size_t seed, const std::string& extra)
{ {
u_char buf[SHA256_DIGEST_LENGTH]; u_char buf[SHA256_DIGEST_LENGTH];
SHA256_CTX ctx; SHA256_CTX ctx;
sha256_init(&ctx); sha256_init(&ctx);
if ( extra.empty() ) if ( extra.empty() )
{ {
unsigned int first_seed = initial_seed(); unsigned int first_seed = initial_seed();
sha256_update(&ctx, &first_seed, sizeof(first_seed)); sha256_update(&ctx, &first_seed, sizeof(first_seed));
} }
else else
{
sha256_update(&ctx, extra.c_str(), extra.size()); sha256_update(&ctx, extra.c_str(), extra.size());
}
sha256_update(&ctx, &seed, sizeof(seed)); sha256_update(&ctx, &seed, sizeof(seed));
sha256_final(&ctx, buf); sha256_final(&ctx, buf);
// Take the first sizeof(size_t) bytes as seed. // Take the first sizeof(size_t) bytes as seed.
return *reinterpret_cast<size_t*>(buf); return *reinterpret_cast<size_t*>(buf);
} }
Hasher* Hasher::Create(size_t k, const std::string& name) Hasher* Hasher::Create(size_t k, const std::string& name)
{ {
return new DefaultHasher(k, name); return new DefaultHasher(k, name);
} }
Hasher::Hasher(size_t k, const std::string& name) Hasher::Hasher(size_t k, const std::string& arg_name)
: k_(k), name_(name) : k(k)
{ {
name = arg_name;
} }
DefaultHasher::DefaultHasher(size_t k, const std::string& name) DefaultHasher::DefaultHasher(size_t k, const std::string& name)
: Hasher(k, name) : Hasher(k, name)
{ {
for ( size_t i = 0; i < k; ++i ) for ( size_t i = 0; i < k; ++i )
hash_functions_.push_back(UHF(i, name)); hash_functions.push_back(UHF(i, name));
} }
Hasher::digest_vector DefaultHasher::Hash(const void* x, size_t n) const Hasher::digest_vector DefaultHasher::Hash(const void* x, size_t n) const
{ {
digest_vector h(K(), 0); digest_vector h(K(), 0);
for ( size_t i = 0; i < h.size(); ++i ) for ( size_t i = 0; i < h.size(); ++i )
h[i] = hash_functions_[i](x, n); h[i] = hash_functions[i](x, n);
return h; return h;
} }
@ -73,24 +77,25 @@ bool DefaultHasher::Equals(const Hasher* other) const
{ {
if ( typeid(*this) != typeid(*other) ) if ( typeid(*this) != typeid(*other) )
return false; return false;
const DefaultHasher* o = static_cast<const DefaultHasher*>(other); const DefaultHasher* o = static_cast<const DefaultHasher*>(other);
return hash_functions_ == o->hash_functions_; return hash_functions == o->hash_functions;
} }
DoubleHasher::DoubleHasher(size_t k, const std::string& name) DoubleHasher::DoubleHasher(size_t k, const std::string& name)
: Hasher(k, name), : Hasher(k, name), h1(1, name), h2(2, name)
h1_(1, name),
h2_(2, name)
{ {
} }
Hasher::digest_vector DoubleHasher::Hash(const void* x, size_t n) const Hasher::digest_vector DoubleHasher::Hash(const void* x, size_t n) const
{ {
digest h1 = h1_(x, n); digest d1 = h1(x, n);
digest h2 = h2_(x, n); digest d2 = h2(x, n);
digest_vector h(K(), 0); digest_vector h(K(), 0);
for ( size_t i = 0; i < h.size(); ++i ) for ( size_t i = 0; i < h.size(); ++i )
h[i] = h1 + i * h2; h[i] = d1 + i * d2;
return h; return h;
} }
@ -103,7 +108,7 @@ bool DoubleHasher::Equals(const Hasher* other) const
{ {
if ( typeid(*this) != typeid(*other) ) if ( typeid(*this) != typeid(*other) )
return false; return false;
const DoubleHasher* o = static_cast<const DoubleHasher*>(other);
return h1_ == o->h1_ && h2_ == o->h2_;
}
const DoubleHasher* o = static_cast<const DoubleHasher*>(other);
return h1 == o->h1 && h2 == o->h2;
}

View file

@ -1,5 +1,7 @@
#ifndef Hasher_h // See the file "COPYING" in the main distribution directory for copyright.
#define Hasher_h
#ifndef PROBABILISTIC_HASHER_H
#define PROBABILISTIC_HASHER_H
#include "Hash.h" #include "Hash.h"
#include "H3.h" #include "H3.h"
@ -7,8 +9,8 @@
namespace probabilistic { namespace probabilistic {
/** /**
* The abstract base class for hashers, i.e., constructs which hash elements * Abstract base class for hashers. A hasher creates a family of hash
* *k* times. * functions to hash an element *k* times.
*/ */
class Hasher { class Hasher {
public: public:
@ -16,63 +18,127 @@ public:
typedef std::vector<digest> digest_vector; typedef std::vector<digest> digest_vector;
/** /**
* Constructs the hashing policy used by the implementation. * Destructor.
*
* @todo This factory function exists because the HashingPolicy class
* hierachy is not yet serializable.
*/ */
static Hasher* Create(size_t k, const std::string& name);
virtual ~Hasher() { } virtual ~Hasher() { }
/**
* Computes hash values for an element.
*
* @param x The element to hash.
*
* @return Vector of *k* hash values.
*/
template <typename T> template <typename T>
digest_vector operator()(const T& x) const digest_vector operator()(const T& x) const
{ {
return Hash(&x, sizeof(T)); return Hash(&x, sizeof(T));
} }
/**
* Computes the hashes for a set of bytes.
*
* @param x Pointer to first byte to hash.
*
* @param n Number of bytes to hash.
*
* @return Vector of *k* hash values.
*
*/
virtual digest_vector Hash(const void* x, size_t n) const = 0; virtual digest_vector Hash(const void* x, size_t n) const = 0;
/**
* Returns a deep copy of the hasher.
*/
virtual Hasher* Clone() const = 0; virtual Hasher* Clone() const = 0;
/**
* Returns true if two hashers are identical.
*/
virtual bool Equals(const Hasher* other) const = 0; virtual bool Equals(const Hasher* other) const = 0;
size_t K() const { return k_; } /**
const std::string& Name() const { return name_; } * Returns the number *k* of hash functions the hashers applies.
*/
size_t K() const { return k; }
/**
* Returns the hasher's name. TODO: What's this?
*/
const std::string& Name() const { return name; }
/**
* Constructs the hasher used by the implementation. This hardcodes a
* specific hashing policy. It exists only because the HashingPolicy
* class hierachy is not yet serializable.
*
* @param k The number of hash functions to apply.
*
* @param name The hasher's name.
*
* @return Returns a new hasher instance.
*/
static Hasher* Create(size_t k, const std::string& name);
protected: protected:
Hasher(size_t k, const std::string& name);
private:
const size_t k;
std::string name;
};
/** /**
* A universal hash function family. * A universal hash function family. This is a helper class that Hasher
* implementations can use in their implementation.
*/ */
class UHF { class UHF {
public: public:
/** /**
* Constructs an H3 hash function seeded with a given seed and an optional * Constructs an H3 hash function seeded with a given seed and an
* extra seed to replace the initial Bro seed. * optional extra seed to replace the initial Bro seed.
* *
* @param seed The seed to use for this instance. * @param seed The seed to use for this instance.
* *
* @param extra If not empty, this parameter replaces the initial seed to * @param extra If not empty, this parameter replaces the initial
* compute the seed for t to compute the * seed to compute the seed for t to compute the seed NUL-terminated
* seed * string as additional seed.
* NUL-terminated string as additional seed.
*/ */
UHF(size_t seed, const std::string& extra = ""); UHF(size_t seed, const std::string& extra = "");
template <typename T> template <typename T>
digest operator()(const T& x) const Hasher::digest operator()(const T& x) const
{ {
return hash(&x, sizeof(T)); return hash(&x, sizeof(T));
} }
digest operator()(const void* x, size_t n) const /**
* Computes hash values for an element.
*
* @param x The element to hash.
*
* @return Vector of *k* hash values.
*/
Hasher::digest operator()(const void* x, size_t n) const
{ {
return hash(x, n); return hash(x, n);
} }
/**
* Computes the hashes for a set of bytes.
*
* @param x Pointer to first byte to hash.
*
* @param n Number of bytes to hash.
*
* @return Vector of *k* hash values.
*
*/
Hasher::digest hash(const void* x, size_t n) const;
friend bool operator==(const UHF& x, const UHF& y) friend bool operator==(const UHF& x, const UHF& y)
{ {
return x.h_ == y.h_; return x.h == y.h;
} }
friend bool operator!=(const UHF& x, const UHF& y) friend bool operator!=(const UHF& x, const UHF& y)
@ -80,50 +146,60 @@ protected:
return ! (x == y); return ! (x == y);
} }
digest hash(const void* x, size_t n) const;
private: private:
static size_t compute_seed(size_t seed, const std::string& extra); static size_t compute_seed(size_t seed, const std::string& extra);
H3<digest, UHASH_KEY_SIZE> h_; H3<Hasher::digest, UHASH_KEY_SIZE> h;
}; };
Hasher(size_t k, const std::string& name);
private:
const size_t k_;
std::string name_;
};
/** /**
* The default hashing policy. Performs *k* hash function computations. * A hasher implementing the default hashing policy. Uses *k* separate hash
* functions internally.
*/ */
class DefaultHasher : public Hasher { class DefaultHasher : public Hasher {
public: public:
/**
* Constructor for a hasher with *k* hash functions.
*
* @param k The number of hash functions to use.
*
* @param name The name of the hasher.
*/
DefaultHasher(size_t k, const std::string& name); DefaultHasher(size_t k, const std::string& name);
// Overridden from Hasher.
virtual digest_vector Hash(const void* x, size_t n) const /* final */; virtual digest_vector Hash(const void* x, size_t n) const /* final */;
virtual DefaultHasher* Clone() const /* final */; virtual DefaultHasher* Clone() const /* final */;
virtual bool Equals(const Hasher* other) const /* final */; virtual bool Equals(const Hasher* other) const /* final */;
private: private:
std::vector<UHF> hash_functions_; std::vector<UHF> hash_functions;
}; };
/** /**
* The *double-hashing* policy. Uses a linear combination of two hash functions. * The *double-hashing* policy. Uses a linear combination of two hash
* functions.
*/ */
class DoubleHasher : public Hasher { class DoubleHasher : public Hasher {
public: public:
/**
* Constructor for a double hasher with *k* hash functions.
*
* @param k The number of hash functions to use.
*
* @param name The name of the hasher.
*/
DoubleHasher(size_t k, const std::string& name); DoubleHasher(size_t k, const std::string& name);
// Overridden from Hasher.
virtual digest_vector Hash(const void* x, size_t n) const /* final */; virtual digest_vector Hash(const void* x, size_t n) const /* final */;
virtual DoubleHasher* Clone() const /* final */; virtual DoubleHasher* Clone() const /* final */;
virtual bool Equals(const Hasher* other) const /* final */; virtual bool Equals(const Hasher* other) const /* final */;
private: private:
UHF h1_; UHF h1;
UHF h2_; UHF h2;
}; };
} }

View file

@ -35,12 +35,13 @@ function bloomfilter_basic_init%(fp: double, capacity: count,
if ( fp < 0.0 || fp > 1.0 ) if ( fp < 0.0 || fp > 1.0 )
{ {
reporter->Error("false-positive rate must take value between 0 and 1"); reporter->Error("false-positive rate must take value between 0 and 1");
return NULL; return 0;
} }
size_t cells = BasicBloomFilter::M(fp, capacity); size_t cells = BasicBloomFilter::M(fp, capacity);
size_t optimal_k = BasicBloomFilter::K(cells, capacity); size_t optimal_k = BasicBloomFilter::K(cells, capacity);
const Hasher* h = Hasher::Create(optimal_k, name->CheckString()); const Hasher* h = Hasher::Create(optimal_k, name->CheckString());
return new BloomFilterVal(new BasicBloomFilter(h, cells)); return new BloomFilterVal(new BasicBloomFilter(h, cells));
%} %}
@ -64,13 +65,15 @@ function bloomfilter_counting_init%(k: count, cells: count, max: count,
if ( max == 0 ) if ( max == 0 )
{ {
reporter->Error("max counter value must be greater than 0"); reporter->Error("max counter value must be greater than 0");
return NULL; return 0;
} }
const Hasher* h = Hasher::Create(k, name->CheckString()); const Hasher* h = Hasher::Create(k, name->CheckString());
uint16 width = 1; uint16 width = 1;
while ( max >>= 1 ) while ( max >>= 1 )
++width; ++width;
return new BloomFilterVal(new CountingBloomFilter(h, cells, width)); return new BloomFilterVal(new CountingBloomFilter(h, cells, width));
%} %}
@ -82,13 +85,17 @@ function bloomfilter_counting_init%(k: count, cells: count, max: count,
function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any
%{ %{
BloomFilterVal* bfv = static_cast<BloomFilterVal*>(bf); BloomFilterVal* bfv = static_cast<BloomFilterVal*>(bf);
if ( ! bfv->Type() && ! bfv->Typify(x->Type()) ) if ( ! bfv->Type() && ! bfv->Typify(x->Type()) )
reporter->Error("failed to set Bloom filter type"); reporter->Error("failed to set Bloom filter type");
else if ( bfv->Type() != x->Type() )
else if ( ! same_type(bfv->Type(), x->Type()) )
reporter->Error("incompatible Bloom filter types"); reporter->Error("incompatible Bloom filter types");
else else
bfv->Add(x); bfv->Add(x);
return NULL;
return 0;
%} %}
## Retrieves the counter for a given element in a Bloom filter. ## Retrieves the counter for a given element in a Bloom filter.
@ -101,12 +108,16 @@ function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any
function bloomfilter_lookup%(bf: opaque of bloomfilter, x: any%): count function bloomfilter_lookup%(bf: opaque of bloomfilter, x: any%): count
%{ %{
const BloomFilterVal* bfv = static_cast<const BloomFilterVal*>(bf); const BloomFilterVal* bfv = static_cast<const BloomFilterVal*>(bf);
if ( ! bfv->Type() ) if ( ! bfv->Type() )
reporter->Error("cannot perform lookup on untyped Bloom filter"); reporter->Error("cannot perform lookup on untyped Bloom filter");
else if ( bfv->Type() != x->Type() )
else if ( ! same_type(bfv->Type(), x->Type()) )
reporter->Error("incompatible Bloom filter types"); reporter->Error("incompatible Bloom filter types");
else else
return new Val(static_cast<uint64>(bfv->Count(x)), TYPE_COUNT); return new Val(static_cast<uint64>(bfv->Count(x)), TYPE_COUNT);
return new Val(0, TYPE_COUNT); return new Val(0, TYPE_COUNT);
%} %}
@ -122,9 +133,12 @@ function bloomfilter_merge%(bf1: opaque of bloomfilter,
%{ %{
const BloomFilterVal* bfv1 = static_cast<const BloomFilterVal*>(bf1); const BloomFilterVal* bfv1 = static_cast<const BloomFilterVal*>(bf1);
const BloomFilterVal* bfv2 = static_cast<const BloomFilterVal*>(bf2); const BloomFilterVal* bfv2 = static_cast<const BloomFilterVal*>(bf2);
if ( bfv1->Type() != bfv2->Type() )
if ( ! same_type(bfv1->Type(), bfv2->Type()) )
{
reporter->Error("incompatible Bloom filter types"); reporter->Error("incompatible Bloom filter types");
else return 0;
}
return BloomFilterVal::Merge(bfv1, bfv2); return BloomFilterVal::Merge(bfv1, bfv2);
return NULL;
%} %}

View file

@ -166,15 +166,15 @@ extern void init_random_seed(uint32 seed, const char* load_file,
const char* write_file); const char* write_file);
// Retrieves the initial seed computed after the very first call to // Retrieves the initial seed computed after the very first call to
// init_random_seed(). Repeated calls to init_random_seed() will not affect the // init_random_seed(). Repeated calls to init_random_seed() will not affect
// return value of this function. // the return value of this function.
unsigned int initial_seed(); unsigned int initial_seed();
// Returns true if the user explicitly set a seed via init_random_seed(); // Returns true if the user explicitly set a seed via init_random_seed();
extern bool have_random_seed(); extern bool have_random_seed();
// A simple linear congruence PRNG. It takes its state as argument and returns // A simple linear congruence PRNG. It takes its state as argument and
// a new random value, which can serve as state for subsequent calls. // returns a new random value, which can serve as state for subsequent calls.
long int bro_prng(long int state); long int bro_prng(long int state);
// Replacement for the system random(), to which is normally falls back // Replacement for the system random(), to which is normally falls back

View file

@ -1,3 +1,9 @@
error: incompatible Bloom filter types
error: incompatible Bloom filter types
error: incompatible Bloom filter types
error: incompatible Bloom filter types
error: false-positive rate must take value between 0 and 1
error: false-positive rate must take value between 0 and 1
0 0
1 1
1 1

View file

@ -1,4 +1,4 @@
# @TEST-EXEC: bro -b %INPUT >output # @TEST-EXEC: bro -b %INPUT >output 2>&1
# @TEST-EXEC: btest-diff output # @TEST-EXEC: btest-diff output
function test_basic_bloom_filter() function test_basic_bloom_filter()