mirror of
https://github.com/zeek/zeek.git
synced 2025-10-07 09:08:20 +00:00
Broifying the code.
Also extending API documentation a bit more and fixing a memory leak.
This commit is contained in:
parent
21685d2529
commit
474107fe40
18 changed files with 1651 additions and 1329 deletions
|
@ -560,7 +560,7 @@ void builtin_error(const char* msg, BroObj* arg)
|
||||||
#include "reporter.bif.func_def"
|
#include "reporter.bif.func_def"
|
||||||
#include "strings.bif.func_def"
|
#include "strings.bif.func_def"
|
||||||
|
|
||||||
// TODO: Add a nicer mechanism to pull subdirectory bifs automatically.
|
// TODO: Add a nicer mechanism to pull in subdirectory bifs automatically.
|
||||||
#include "probabilistic/bloom-filter.bif.h"
|
#include "probabilistic/bloom-filter.bif.h"
|
||||||
|
|
||||||
void init_builtin_funcs()
|
void init_builtin_funcs()
|
||||||
|
@ -577,7 +577,7 @@ void init_builtin_funcs()
|
||||||
#include "reporter.bif.func_init"
|
#include "reporter.bif.func_init"
|
||||||
#include "strings.bif.func_init"
|
#include "strings.bif.func_init"
|
||||||
|
|
||||||
// TODO: Add a nicer mechanism to pull subdirectory bifs automatically.
|
// TODO: Add a nicer mechanism to pull in subdirectory bifs automatically.
|
||||||
#include "probabilistic/bloom-filter.bif.init.cc"
|
#include "probabilistic/bloom-filter.bif.init.cc"
|
||||||
|
|
||||||
did_builtin_init = true;
|
did_builtin_init = true;
|
||||||
|
|
4
src/H3.h
4
src/H3.h
|
@ -100,8 +100,8 @@ public:
|
||||||
// loop optmized with Duff's Device
|
// loop optmized with Duff's Device
|
||||||
register unsigned n = (size + 7) / 8;
|
register unsigned n = (size + 7) / 8;
|
||||||
switch ( size % 8 ) {
|
switch ( size % 8 ) {
|
||||||
case 0: do { result ^= byte_lookup[offset++][*p++];
|
case 0: do { result ^= byte_lookup[offset++][*p++];
|
||||||
case 7: result ^= byte_lookup[offset++][*p++];
|
case 7: result ^= byte_lookup[offset++][*p++];
|
||||||
case 6: result ^= byte_lookup[offset++][*p++];
|
case 6: result ^= byte_lookup[offset++][*p++];
|
||||||
case 5: result ^= byte_lookup[offset++][*p++];
|
case 5: result ^= byte_lookup[offset++][*p++];
|
||||||
case 4: result ^= byte_lookup[offset++][*p++];
|
case 4: result ^= byte_lookup[offset++][*p++];
|
||||||
|
|
159
src/OpaqueVal.cc
159
src/OpaqueVal.cc
|
@ -1,5 +1,6 @@
|
||||||
#include "OpaqueVal.h"
|
// See the file "COPYING" in the main distribution directory for copyright.
|
||||||
|
|
||||||
|
#include "OpaqueVal.h"
|
||||||
#include "NetVar.h"
|
#include "NetVar.h"
|
||||||
#include "Reporter.h"
|
#include "Reporter.h"
|
||||||
#include "Serializer.h"
|
#include "Serializer.h"
|
||||||
|
@ -518,87 +519,89 @@ bool EntropyVal::DoUnserialize(UnserialInfo* info)
|
||||||
}
|
}
|
||||||
|
|
||||||
BloomFilterVal::BloomFilterVal()
|
BloomFilterVal::BloomFilterVal()
|
||||||
: OpaqueVal(bloomfilter_type),
|
: OpaqueVal(bloomfilter_type)
|
||||||
type_(NULL),
|
|
||||||
hash_(NULL),
|
|
||||||
bloom_filter_(NULL)
|
|
||||||
{
|
{
|
||||||
|
type = 0;
|
||||||
|
hash = 0;
|
||||||
|
bloom_filter = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
BloomFilterVal::BloomFilterVal(OpaqueType* t)
|
BloomFilterVal::BloomFilterVal(OpaqueType* t)
|
||||||
: OpaqueVal(t),
|
: OpaqueVal(t)
|
||||||
type_(NULL),
|
|
||||||
hash_(NULL),
|
|
||||||
bloom_filter_(NULL)
|
|
||||||
{
|
{
|
||||||
|
type = 0;
|
||||||
|
hash = 0;
|
||||||
|
bloom_filter = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
BloomFilterVal::BloomFilterVal(probabilistic::BloomFilter* bf)
|
BloomFilterVal::BloomFilterVal(probabilistic::BloomFilter* bf)
|
||||||
: OpaqueVal(bloomfilter_type),
|
: OpaqueVal(bloomfilter_type)
|
||||||
type_(NULL),
|
|
||||||
hash_(NULL),
|
|
||||||
bloom_filter_(bf)
|
|
||||||
{
|
{
|
||||||
|
type = 0;
|
||||||
|
hash = 0;
|
||||||
|
bloom_filter = bf;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BloomFilterVal::Typify(BroType* type)
|
bool BloomFilterVal::Typify(BroType* arg_type)
|
||||||
{
|
{
|
||||||
if ( type_ )
|
if ( type )
|
||||||
return false;
|
return false;
|
||||||
type_ = type;
|
|
||||||
type_->Ref();
|
type = arg_type;
|
||||||
TypeList* tl = new TypeList(type_);
|
type->Ref();
|
||||||
tl->Append(type_);
|
|
||||||
hash_ = new CompositeHash(tl);
|
TypeList* tl = new TypeList(type);
|
||||||
Unref(tl);
|
tl->Append(type);
|
||||||
return true;
|
hash = new CompositeHash(tl);
|
||||||
}
|
Unref(tl);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
BroType* BloomFilterVal::Type() const
|
BroType* BloomFilterVal::Type() const
|
||||||
{
|
{
|
||||||
return type_;
|
return type;
|
||||||
}
|
}
|
||||||
|
|
||||||
void BloomFilterVal::Add(const Val* val)
|
void BloomFilterVal::Add(const Val* val)
|
||||||
{
|
{
|
||||||
HashKey* key = hash_->ComputeHash(val, 1);
|
HashKey* key = hash->ComputeHash(val, 1);
|
||||||
bloom_filter_->Add(key->Hash());
|
bloom_filter->Add(key->Hash());
|
||||||
}
|
delete key;
|
||||||
|
}
|
||||||
|
|
||||||
size_t BloomFilterVal::Count(const Val* val) const
|
size_t BloomFilterVal::Count(const Val* val) const
|
||||||
{
|
{
|
||||||
HashKey* key = hash_->ComputeHash(val, 1);
|
HashKey* key = hash->ComputeHash(val, 1);
|
||||||
return bloom_filter_->Count(key->Hash());
|
size_t cnt = bloom_filter->Count(key->Hash());
|
||||||
}
|
delete key;
|
||||||
|
return cnt;
|
||||||
|
}
|
||||||
|
|
||||||
BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* x,
|
BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* x,
|
||||||
const BloomFilterVal* y)
|
const BloomFilterVal* y)
|
||||||
{
|
{
|
||||||
if ( x->Type() != y->Type() )
|
if ( ! same_type(x->Type(), y->Type()) )
|
||||||
{
|
reporter->InternalError("cannot merge Bloom filters with different types");
|
||||||
reporter->InternalError("cannot merge Bloom filters with different types");
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
BloomFilterVal* result;
|
BloomFilterVal* result;
|
||||||
if ( (result = DoMerge<probabilistic::BasicBloomFilter>(x, y)) )
|
|
||||||
return result;
|
|
||||||
else if ( (result = DoMerge<probabilistic::CountingBloomFilter>(x, y)) )
|
|
||||||
return result;
|
|
||||||
|
|
||||||
reporter->InternalError("failed to merge Bloom filters");
|
if ( (result = DoMerge<probabilistic::BasicBloomFilter>(x, y)) )
|
||||||
return NULL;
|
return result;
|
||||||
}
|
|
||||||
|
else if ( (result = DoMerge<probabilistic::CountingBloomFilter>(x, y)) )
|
||||||
|
return result;
|
||||||
|
|
||||||
|
reporter->InternalError("failed to merge Bloom filters");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
BloomFilterVal::~BloomFilterVal()
|
BloomFilterVal::~BloomFilterVal()
|
||||||
{
|
{
|
||||||
if ( type_ )
|
Unref(type);
|
||||||
Unref(type_);
|
delete hash;
|
||||||
if ( hash_ )
|
delete bloom_filter;
|
||||||
delete hash_;
|
}
|
||||||
if ( bloom_filter_ )
|
|
||||||
delete bloom_filter_;
|
|
||||||
}
|
|
||||||
|
|
||||||
IMPLEMENT_SERIAL(BloomFilterVal, SER_BLOOMFILTER_VAL);
|
IMPLEMENT_SERIAL(BloomFilterVal, SER_BLOOMFILTER_VAL);
|
||||||
|
|
||||||
|
@ -606,14 +609,16 @@ bool BloomFilterVal::DoSerialize(SerialInfo* info) const
|
||||||
{
|
{
|
||||||
DO_SERIALIZE(SER_BLOOMFILTER_VAL, OpaqueVal);
|
DO_SERIALIZE(SER_BLOOMFILTER_VAL, OpaqueVal);
|
||||||
|
|
||||||
bool is_typed = type_ != NULL;
|
bool is_typed = (type != 0);
|
||||||
if ( ! SERIALIZE(is_typed) )
|
|
||||||
return false;
|
|
||||||
if ( is_typed && ! type_->Serialize(info) )
|
|
||||||
return false;
|
|
||||||
|
|
||||||
return bloom_filter_->Serialize(info);
|
if ( ! SERIALIZE(is_typed) )
|
||||||
}
|
return false;
|
||||||
|
|
||||||
|
if ( is_typed && ! type->Serialize(info) )
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return bloom_filter->Serialize(info);
|
||||||
|
}
|
||||||
|
|
||||||
bool BloomFilterVal::DoUnserialize(UnserialInfo* info)
|
bool BloomFilterVal::DoUnserialize(UnserialInfo* info)
|
||||||
{
|
{
|
||||||
|
@ -621,15 +626,17 @@ bool BloomFilterVal::DoUnserialize(UnserialInfo* info)
|
||||||
|
|
||||||
bool is_typed;
|
bool is_typed;
|
||||||
if ( ! UNSERIALIZE(&is_typed) )
|
if ( ! UNSERIALIZE(&is_typed) )
|
||||||
return false;
|
return false;
|
||||||
if ( is_typed )
|
|
||||||
{
|
|
||||||
BroType* type = BroType::Unserialize(info);
|
|
||||||
if ( ! Typify(type) )
|
|
||||||
return false;
|
|
||||||
Unref(type);
|
|
||||||
}
|
|
||||||
|
|
||||||
bloom_filter_ = probabilistic::BloomFilter::Unserialize(info);
|
if ( is_typed )
|
||||||
return bloom_filter_ != NULL;
|
{
|
||||||
}
|
BroType* type = BroType::Unserialize(info);
|
||||||
|
if ( ! Typify(type) )
|
||||||
|
return false;
|
||||||
|
|
||||||
|
Unref(type);
|
||||||
|
}
|
||||||
|
|
||||||
|
bloom_filter = probabilistic::BloomFilter::Unserialize(info);
|
||||||
|
return bloom_filter != 0;
|
||||||
|
}
|
||||||
|
|
|
@ -116,21 +116,19 @@ private:
|
||||||
};
|
};
|
||||||
|
|
||||||
class BloomFilterVal : public OpaqueVal {
|
class BloomFilterVal : public OpaqueVal {
|
||||||
BloomFilterVal(const BloomFilterVal&);
|
|
||||||
BloomFilterVal& operator=(const BloomFilterVal&);
|
|
||||||
public:
|
public:
|
||||||
static BloomFilterVal* Merge(const BloomFilterVal* x,
|
|
||||||
const BloomFilterVal* y);
|
|
||||||
|
|
||||||
explicit BloomFilterVal(probabilistic::BloomFilter* bf);
|
explicit BloomFilterVal(probabilistic::BloomFilter* bf);
|
||||||
~BloomFilterVal();
|
virtual ~BloomFilterVal();
|
||||||
|
|
||||||
bool Typify(BroType* type);
|
|
||||||
BroType* Type() const;
|
BroType* Type() const;
|
||||||
|
bool Typify(BroType* type);
|
||||||
|
|
||||||
void Add(const Val* val);
|
void Add(const Val* val);
|
||||||
size_t Count(const Val* val) const;
|
size_t Count(const Val* val) const;
|
||||||
|
|
||||||
|
static BloomFilterVal* Merge(const BloomFilterVal* x,
|
||||||
|
const BloomFilterVal* y);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
friend class Val;
|
friend class Val;
|
||||||
BloomFilterVal();
|
BloomFilterVal();
|
||||||
|
@ -139,32 +137,35 @@ protected:
|
||||||
DECLARE_SERIAL(BloomFilterVal);
|
DECLARE_SERIAL(BloomFilterVal);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
template <typename T>
|
// Disable.
|
||||||
static BloomFilterVal* DoMerge(const BloomFilterVal* x,
|
BloomFilterVal(const BloomFilterVal&);
|
||||||
const BloomFilterVal* y)
|
BloomFilterVal& operator=(const BloomFilterVal&);
|
||||||
{
|
|
||||||
if ( typeid(*x->bloom_filter_) != typeid(*y->bloom_filter_) )
|
|
||||||
{
|
|
||||||
reporter->InternalError("cannot merge different Bloom filter types");
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
if ( typeid(T) != typeid(*x->bloom_filter_) )
|
|
||||||
return NULL;
|
|
||||||
const T* a = static_cast<const T*>(x->bloom_filter_);
|
|
||||||
const T* b = static_cast<const T*>(y->bloom_filter_);
|
|
||||||
BloomFilterVal* merged = new BloomFilterVal(T::Merge(a, b));
|
|
||||||
assert(merged);
|
|
||||||
if ( ! merged->Typify(x->Type()) )
|
|
||||||
{
|
|
||||||
reporter->InternalError("failed to set type on merged Bloom filter");
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
return merged;
|
|
||||||
}
|
|
||||||
|
|
||||||
BroType* type_;
|
template <typename T>
|
||||||
CompositeHash* hash_;
|
static BloomFilterVal* DoMerge(const BloomFilterVal* x,
|
||||||
probabilistic::BloomFilter* bloom_filter_;
|
const BloomFilterVal* y)
|
||||||
};
|
{
|
||||||
|
if ( typeid(*x->bloom_filter) != typeid(*y->bloom_filter) )
|
||||||
|
reporter->InternalError("cannot merge different Bloom filter types");
|
||||||
|
|
||||||
|
if ( typeid(T) != typeid(*x->bloom_filter) )
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
const T* a = static_cast<const T*>(x->bloom_filter);
|
||||||
|
const T* b = static_cast<const T*>(y->bloom_filter);
|
||||||
|
|
||||||
|
BloomFilterVal* merged = new BloomFilterVal(T::Merge(a, b));
|
||||||
|
assert(merged);
|
||||||
|
|
||||||
|
if ( ! merged->Typify(x->Type()) )
|
||||||
|
reporter->InternalError("failed to set type on merged Bloom filter");
|
||||||
|
|
||||||
|
return merged;
|
||||||
|
}
|
||||||
|
|
||||||
|
BroType* type;
|
||||||
|
CompositeHash* hash;
|
||||||
|
probabilistic::BloomFilter* bloom_filter;
|
||||||
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1321,6 +1321,7 @@ bool OpaqueType::DoUnserialize(UnserialInfo* info)
|
||||||
const char* n;
|
const char* n;
|
||||||
if ( ! UNSERIALIZE_STR(&n, 0) )
|
if ( ! UNSERIALIZE_STR(&n, 0) )
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
name = n;
|
name = n;
|
||||||
delete [] n;
|
delete [] n;
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
// See the file "COPYING" in the main distribution directory for copyright.
|
||||||
|
|
||||||
#include "BitVector.h"
|
#include "BitVector.h"
|
||||||
|
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
|
@ -8,505 +10,558 @@ using namespace probabilistic;
|
||||||
|
|
||||||
BitVector::size_type BitVector::npos = static_cast<BitVector::size_type>(-1);
|
BitVector::size_type BitVector::npos = static_cast<BitVector::size_type>(-1);
|
||||||
BitVector::block_type BitVector::bits_per_block =
|
BitVector::block_type BitVector::bits_per_block =
|
||||||
std::numeric_limits<BitVector::block_type>::digits;
|
std::numeric_limits<BitVector::block_type>::digits;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
uint8_t count_table[] = {
|
uint8_t count_table[] = {
|
||||||
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2,
|
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2,
|
||||||
3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3,
|
3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3,
|
||||||
3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3,
|
3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3,
|
||||||
4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4,
|
4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4,
|
||||||
3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5,
|
3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5,
|
||||||
6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4,
|
6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4,
|
||||||
4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5,
|
4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5,
|
||||||
6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5,
|
6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5,
|
||||||
3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3,
|
3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3,
|
||||||
4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6,
|
4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6,
|
||||||
6, 7, 6, 7, 7, 8
|
6, 7, 6, 7, 7, 8
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace <anonymous>
|
} // namespace <anonymous>
|
||||||
|
|
||||||
BitVector::Reference::Reference(block_type& block, block_type i)
|
BitVector::Reference::Reference(block_type& block, block_type i)
|
||||||
: block_(block),
|
: block(block), mask((block_type(1) << i))
|
||||||
mask_(block_type(1) << i)
|
{
|
||||||
{
|
assert(i < bits_per_block);
|
||||||
assert(i < bits_per_block);
|
}
|
||||||
}
|
|
||||||
|
|
||||||
BitVector::Reference& BitVector::Reference::Flip()
|
BitVector::Reference& BitVector::Reference::Flip()
|
||||||
{
|
{
|
||||||
block_ ^= mask_;
|
block ^= mask;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
BitVector::Reference::operator bool() const
|
BitVector::Reference::operator bool() const
|
||||||
{
|
{
|
||||||
return (block_ & mask_) != 0;
|
return (block & mask) != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BitVector::Reference::operator~() const
|
bool BitVector::Reference::operator~() const
|
||||||
{
|
{
|
||||||
return (block_ & mask_) == 0;
|
return (block & mask) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
BitVector::Reference& BitVector::Reference::operator=(bool x)
|
BitVector::Reference& BitVector::Reference::operator=(bool x)
|
||||||
{
|
{
|
||||||
x ? block_ |= mask_ : block_ &= ~mask_;
|
if ( x )
|
||||||
return *this;
|
block |= mask;
|
||||||
}
|
else
|
||||||
|
block &= ~mask;
|
||||||
|
|
||||||
BitVector::Reference& BitVector::Reference::operator=(Reference const& other)
|
return *this;
|
||||||
{
|
}
|
||||||
other ? block_ |= mask_ : block_ &= ~mask_;
|
|
||||||
return *this;
|
BitVector::Reference& BitVector::Reference::operator=(const Reference& other)
|
||||||
}
|
{
|
||||||
|
if ( other )
|
||||||
|
block |= mask;
|
||||||
|
else
|
||||||
|
block &= ~mask;
|
||||||
|
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
BitVector::Reference& BitVector::Reference::operator|=(bool x)
|
BitVector::Reference& BitVector::Reference::operator|=(bool x)
|
||||||
{
|
{
|
||||||
if (x)
|
if ( x )
|
||||||
block_ |= mask_;
|
block |= mask;
|
||||||
return *this;
|
|
||||||
}
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
BitVector::Reference& BitVector::Reference::operator&=(bool x)
|
BitVector::Reference& BitVector::Reference::operator&=(bool x)
|
||||||
{
|
{
|
||||||
if (! x)
|
if ( ! x )
|
||||||
block_ &= ~mask_;
|
block &= ~mask;
|
||||||
return *this;
|
|
||||||
}
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
BitVector::Reference& BitVector::Reference::operator^=(bool x)
|
BitVector::Reference& BitVector::Reference::operator^=(bool x)
|
||||||
{
|
{
|
||||||
if (x)
|
if ( x )
|
||||||
block_ ^= mask_;
|
block ^= mask;
|
||||||
return *this;
|
|
||||||
}
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
BitVector::Reference& BitVector::Reference::operator-=(bool x)
|
BitVector::Reference& BitVector::Reference::operator-=(bool x)
|
||||||
{
|
{
|
||||||
if (x)
|
if ( x )
|
||||||
block_ &= ~mask_;
|
block &= ~mask;
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
BitVector::BitVector() : num_bits_(0) { }
|
BitVector::BitVector()
|
||||||
|
{
|
||||||
|
num_bits = 0;
|
||||||
|
}
|
||||||
|
|
||||||
BitVector::BitVector(size_type size, bool value)
|
BitVector::BitVector(size_type size, bool value)
|
||||||
: bits_(bits_to_blocks(size), value ? ~block_type(0) : 0),
|
: bits(bits_to_blocks(size), value ? ~block_type(0) : 0)
|
||||||
num_bits_(size)
|
{
|
||||||
{ }
|
num_bits = size;
|
||||||
|
}
|
||||||
|
|
||||||
BitVector::BitVector(BitVector const& other)
|
BitVector::BitVector(BitVector const& other)
|
||||||
: bits_(other.bits_),
|
: bits(other.bits)
|
||||||
num_bits_(other.num_bits_)
|
{
|
||||||
{ }
|
num_bits = other.num_bits;
|
||||||
|
}
|
||||||
|
|
||||||
BitVector BitVector::operator~() const
|
BitVector BitVector::operator~() const
|
||||||
{
|
{
|
||||||
BitVector b(*this);
|
BitVector b(*this);
|
||||||
b.Flip();
|
b.Flip();
|
||||||
return b;
|
return b;
|
||||||
}
|
}
|
||||||
|
|
||||||
BitVector& BitVector::operator=(BitVector const& other)
|
BitVector& BitVector::operator=(BitVector const& other)
|
||||||
{
|
{
|
||||||
bits_ = other.bits_;
|
bits = other.bits;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
BitVector BitVector::operator<<(size_type n) const
|
BitVector BitVector::operator<<(size_type n) const
|
||||||
{
|
{
|
||||||
BitVector b(*this);
|
BitVector b(*this);
|
||||||
return b <<= n;
|
return b <<= n;
|
||||||
}
|
}
|
||||||
|
|
||||||
BitVector BitVector::operator>>(size_type n) const
|
BitVector BitVector::operator>>(size_type n) const
|
||||||
{
|
{
|
||||||
BitVector b(*this);
|
BitVector b(*this);
|
||||||
return b >>= n;
|
return b >>= n;
|
||||||
}
|
}
|
||||||
|
|
||||||
BitVector& BitVector::operator<<=(size_type n)
|
BitVector& BitVector::operator<<=(size_type n)
|
||||||
{
|
{
|
||||||
if (n >= num_bits_)
|
if ( n >= num_bits )
|
||||||
return Reset();
|
return Reset();
|
||||||
|
|
||||||
if (n > 0)
|
if ( n > 0 )
|
||||||
{
|
{
|
||||||
size_type last = Blocks() - 1;
|
size_type last = Blocks() - 1;
|
||||||
size_type div = n / bits_per_block;
|
size_type div = n / bits_per_block;
|
||||||
block_type r = bit_index(n);
|
block_type r = bit_index(n);
|
||||||
block_type* b = &bits_[0];
|
block_type* b = &bits[0];
|
||||||
assert(Blocks() >= 1);
|
|
||||||
assert(div <= last);
|
|
||||||
|
|
||||||
if (r != 0)
|
assert(Blocks() >= 1);
|
||||||
{
|
assert(div <= last);
|
||||||
for (size_type i = last - div; i > 0; --i)
|
|
||||||
b[i + div] = (b[i] << r) | (b[i - 1] >> (bits_per_block - r));
|
|
||||||
b[div] = b[0] << r;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for (size_type i = last-div; i > 0; --i)
|
|
||||||
b[i + div] = b[i];
|
|
||||||
b[div] = b[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
std::fill_n(b, div, block_type(0));
|
if ( r != 0 )
|
||||||
zero_unused_bits();
|
{
|
||||||
}
|
for ( size_type i = last - div; i > 0; --i )
|
||||||
|
b[i + div] = (b[i] << r) | (b[i - 1] >> (bits_per_block - r));
|
||||||
|
|
||||||
return *this;
|
b[div] = b[0] << r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (size_type i = last-div; i > 0; --i)
|
||||||
|
b[i + div] = b[i];
|
||||||
|
|
||||||
|
b[div] = b[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
std::fill_n(b, div, block_type(0));
|
||||||
|
zero_unused_bits();
|
||||||
|
}
|
||||||
|
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
BitVector& BitVector::operator>>=(size_type n)
|
BitVector& BitVector::operator>>=(size_type n)
|
||||||
{
|
{
|
||||||
if (n >= num_bits_)
|
if ( n >= num_bits )
|
||||||
return Reset();
|
return Reset();
|
||||||
|
|
||||||
if (n > 0)
|
if ( n > 0 )
|
||||||
{
|
{
|
||||||
size_type last = Blocks() - 1;
|
size_type last = Blocks() - 1;
|
||||||
size_type div = n / bits_per_block;
|
size_type div = n / bits_per_block;
|
||||||
block_type r = bit_index(n);
|
block_type r = bit_index(n);
|
||||||
block_type* b = &bits_[0];
|
block_type* b = &bits[0];
|
||||||
assert(Blocks() >= 1);
|
|
||||||
assert(div <= last);
|
|
||||||
|
|
||||||
if (r != 0)
|
assert(Blocks() >= 1);
|
||||||
{
|
assert(div <= last);
|
||||||
for (size_type i = last - div; i > 0; --i)
|
|
||||||
b[i - div] = (b[i] >> r) | (b[i + 1] << (bits_per_block - r));
|
|
||||||
b[last - div] = b[last] >> r;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for (size_type i = div; i <= last; ++i)
|
|
||||||
b[i-div] = b[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
std::fill_n(b + (Blocks() - div), div, block_type(0));
|
if ( r != 0 )
|
||||||
}
|
{
|
||||||
return *this;
|
for (size_type i = last - div; i > 0; --i)
|
||||||
}
|
b[i - div] = (b[i] >> r) | (b[i + 1] << (bits_per_block - r));
|
||||||
|
|
||||||
|
b[last - div] = b[last] >> r;
|
||||||
|
}
|
||||||
|
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (size_type i = div; i <= last; ++i)
|
||||||
|
b[i-div] = b[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
std::fill_n(b + (Blocks() - div), div, block_type(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
BitVector& BitVector::operator&=(BitVector const& other)
|
BitVector& BitVector::operator&=(BitVector const& other)
|
||||||
{
|
{
|
||||||
assert(Size() >= other.Size());
|
assert(Size() >= other.Size());
|
||||||
for (size_type i = 0; i < Blocks(); ++i)
|
|
||||||
bits_[i] &= other.bits_[i];
|
for ( size_type i = 0; i < Blocks(); ++i )
|
||||||
return *this;
|
bits[i] &= other.bits[i];
|
||||||
}
|
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
BitVector& BitVector::operator|=(BitVector const& other)
|
BitVector& BitVector::operator|=(BitVector const& other)
|
||||||
{
|
{
|
||||||
assert(Size() >= other.Size());
|
assert(Size() >= other.Size());
|
||||||
for (size_type i = 0; i < Blocks(); ++i)
|
|
||||||
bits_[i] |= other.bits_[i];
|
for ( size_type i = 0; i < Blocks(); ++i )
|
||||||
return *this;
|
bits[i] |= other.bits[i];
|
||||||
}
|
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
BitVector& BitVector::operator^=(BitVector const& other)
|
BitVector& BitVector::operator^=(BitVector const& other)
|
||||||
{
|
{
|
||||||
assert(Size() >= other.Size());
|
assert(Size() >= other.Size());
|
||||||
for (size_type i = 0; i < Blocks(); ++i)
|
|
||||||
bits_[i] ^= other.bits_[i];
|
for ( size_type i = 0; i < Blocks(); ++i )
|
||||||
return *this;
|
bits[i] ^= other.bits[i];
|
||||||
}
|
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
BitVector& BitVector::operator-=(BitVector const& other)
|
BitVector& BitVector::operator-=(BitVector const& other)
|
||||||
{
|
{
|
||||||
assert(Size() >= other.Size());
|
assert(Size() >= other.Size());
|
||||||
for (size_type i = 0; i < Blocks(); ++i)
|
|
||||||
bits_[i] &= ~other.bits_[i];
|
for ( size_type i = 0; i < Blocks(); ++i )
|
||||||
return *this;
|
bits[i] &= ~other.bits[i];
|
||||||
}
|
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
namespace probabilistic {
|
namespace probabilistic {
|
||||||
|
|
||||||
BitVector operator&(BitVector const& x, BitVector const& y)
|
BitVector operator&(BitVector const& x, BitVector const& y)
|
||||||
{
|
{
|
||||||
BitVector b(x);
|
BitVector b(x);
|
||||||
return b &= y;
|
return b &= y;
|
||||||
}
|
}
|
||||||
|
|
||||||
BitVector operator|(BitVector const& x, BitVector const& y)
|
BitVector operator|(BitVector const& x, BitVector const& y)
|
||||||
{
|
{
|
||||||
BitVector b(x);
|
BitVector b(x);
|
||||||
return b |= y;
|
return b |= y;
|
||||||
}
|
}
|
||||||
|
|
||||||
BitVector operator^(BitVector const& x, BitVector const& y)
|
BitVector operator^(BitVector const& x, BitVector const& y)
|
||||||
{
|
{
|
||||||
BitVector b(x);
|
BitVector b(x);
|
||||||
return b ^= y;
|
return b ^= y;
|
||||||
}
|
}
|
||||||
|
|
||||||
BitVector operator-(BitVector const& x, BitVector const& y)
|
BitVector operator-(BitVector const& x, BitVector const& y)
|
||||||
{
|
{
|
||||||
BitVector b(x);
|
BitVector b(x);
|
||||||
return b -= y;
|
return b -= y;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool operator==(BitVector const& x, BitVector const& y)
|
bool operator==(BitVector const& x, BitVector const& y)
|
||||||
{
|
{
|
||||||
return x.num_bits_ == y.num_bits_ && x.bits_ == y.bits_;
|
return x.num_bits == y.num_bits && x.bits == y.bits;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool operator!=(BitVector const& x, BitVector const& y)
|
bool operator!=(BitVector const& x, BitVector const& y)
|
||||||
{
|
{
|
||||||
return ! (x == y);
|
return ! (x == y);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool operator<(BitVector const& x, BitVector const& y)
|
bool operator<(BitVector const& x, BitVector const& y)
|
||||||
{
|
{
|
||||||
assert(x.Size() == y.Size());
|
assert(x.Size() == y.Size());
|
||||||
for (BitVector::size_type r = x.Blocks(); r > 0; --r)
|
|
||||||
{
|
for ( BitVector::size_type r = x.Blocks(); r > 0; --r )
|
||||||
BitVector::size_type i = r - 1;
|
{
|
||||||
if (x.bits_[i] < y.bits_[i])
|
BitVector::size_type i = r - 1;
|
||||||
return true;
|
|
||||||
else if (x.bits_[i] > y.bits_[i])
|
if ( x.bits[i] < y.bits[i] )
|
||||||
return false;
|
return true;
|
||||||
}
|
|
||||||
return false;
|
else if ( x.bits[i] > y.bits[i] )
|
||||||
}
|
return false;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void BitVector::Resize(size_type n, bool value)
|
void BitVector::Resize(size_type n, bool value)
|
||||||
{
|
{
|
||||||
size_type old = Blocks();
|
size_type old = Blocks();
|
||||||
size_type required = bits_to_blocks(n);
|
size_type required = bits_to_blocks(n);
|
||||||
block_type block_value = value ? ~block_type(0) : block_type(0);
|
block_type block_value = value ? ~block_type(0) : block_type(0);
|
||||||
|
|
||||||
if (required != old)
|
if ( required != old )
|
||||||
bits_.resize(required, block_value);
|
bits.resize(required, block_value);
|
||||||
|
|
||||||
if (value && (n > num_bits_) && extra_bits())
|
if ( value && (n > num_bits) && extra_bits() )
|
||||||
bits_[old - 1] |= (block_value << extra_bits());
|
bits[old - 1] |= (block_value << extra_bits());
|
||||||
|
|
||||||
num_bits_ = n;
|
num_bits = n;
|
||||||
zero_unused_bits();
|
zero_unused_bits();
|
||||||
}
|
}
|
||||||
|
|
||||||
void BitVector::Clear()
|
void BitVector::Clear()
|
||||||
{
|
{
|
||||||
bits_.clear();
|
bits.clear();
|
||||||
num_bits_ = 0;
|
num_bits = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void BitVector::PushBack(bool bit)
|
void BitVector::PushBack(bool bit)
|
||||||
{
|
{
|
||||||
size_type s = Size();
|
size_type s = Size();
|
||||||
Resize(s + 1);
|
Resize(s + 1);
|
||||||
Set(s, bit);
|
Set(s, bit);
|
||||||
}
|
}
|
||||||
|
|
||||||
void BitVector::Append(block_type block)
|
void BitVector::Append(block_type block)
|
||||||
{
|
{
|
||||||
size_type excess = extra_bits();
|
size_type excess = extra_bits();
|
||||||
if (excess)
|
|
||||||
{
|
if ( excess )
|
||||||
assert(! Empty());
|
{
|
||||||
bits_.push_back(block >> (bits_per_block - excess));
|
assert(! Empty());
|
||||||
bits_[Blocks() - 2] |= (block << excess);
|
bits.push_back(block >> (bits_per_block - excess));
|
||||||
}
|
bits[Blocks() - 2] |= (block << excess);
|
||||||
else
|
}
|
||||||
{
|
|
||||||
bits_.push_back(block);
|
else
|
||||||
}
|
{
|
||||||
num_bits_ += bits_per_block;
|
bits.push_back(block);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
num_bits += bits_per_block;
|
||||||
|
}
|
||||||
|
|
||||||
BitVector& BitVector::Set(size_type i, bool bit)
|
BitVector& BitVector::Set(size_type i, bool bit)
|
||||||
{
|
{
|
||||||
assert(i < num_bits_);
|
assert(i < num_bits);
|
||||||
if (bit)
|
|
||||||
bits_[block_index(i)] |= bit_mask(i);
|
if ( bit )
|
||||||
else
|
bits[block_index(i)] |= bit_mask(i);
|
||||||
Reset(i);
|
else
|
||||||
return *this;
|
Reset(i);
|
||||||
}
|
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
BitVector& BitVector::Set()
|
BitVector& BitVector::Set()
|
||||||
{
|
{
|
||||||
std::fill(bits_.begin(), bits_.end(), ~block_type(0));
|
std::fill(bits.begin(), bits.end(), ~block_type(0));
|
||||||
zero_unused_bits();
|
zero_unused_bits();
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
BitVector& BitVector::Reset(size_type i)
|
BitVector& BitVector::Reset(size_type i)
|
||||||
{
|
{
|
||||||
assert(i < num_bits_);
|
assert(i < num_bits);
|
||||||
bits_[block_index(i)] &= ~bit_mask(i);
|
bits[block_index(i)] &= ~bit_mask(i);
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
BitVector& BitVector::Reset()
|
BitVector& BitVector::Reset()
|
||||||
{
|
{
|
||||||
std::fill(bits_.begin(), bits_.end(), block_type(0));
|
std::fill(bits.begin(), bits.end(), block_type(0));
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
BitVector& BitVector::Flip(size_type i)
|
BitVector& BitVector::Flip(size_type i)
|
||||||
{
|
{
|
||||||
assert(i < num_bits_);
|
assert(i < num_bits);
|
||||||
bits_[block_index(i)] ^= bit_mask(i);
|
bits[block_index(i)] ^= bit_mask(i);
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
BitVector& BitVector::Flip()
|
BitVector& BitVector::Flip()
|
||||||
{
|
{
|
||||||
for (size_type i = 0; i < Blocks(); ++i)
|
for (size_type i = 0; i < Blocks(); ++i)
|
||||||
bits_[i] = ~bits_[i];
|
bits[i] = ~bits[i];
|
||||||
zero_unused_bits();
|
|
||||||
return *this;
|
zero_unused_bits();
|
||||||
}
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
bool BitVector::operator[](size_type i) const
|
bool BitVector::operator[](size_type i) const
|
||||||
{
|
{
|
||||||
assert(i < num_bits_);
|
assert(i < num_bits);
|
||||||
return (bits_[block_index(i)] & bit_mask(i)) != 0;
|
return (bits[block_index(i)] & bit_mask(i)) != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
BitVector::Reference BitVector::operator[](size_type i)
|
BitVector::Reference BitVector::operator[](size_type i)
|
||||||
{
|
{
|
||||||
assert(i < num_bits_);
|
assert(i < num_bits);
|
||||||
return Reference(bits_[block_index(i)], bit_index(i));
|
return Reference(bits[block_index(i)], bit_index(i));
|
||||||
}
|
}
|
||||||
|
|
||||||
BitVector::size_type BitVector::Count() const
|
BitVector::size_type BitVector::Count() const
|
||||||
{
|
{
|
||||||
std::vector<block_type>::const_iterator first = bits_.begin();
|
std::vector<block_type>::const_iterator first = bits.begin();
|
||||||
size_t n = 0;
|
size_t n = 0;
|
||||||
size_type length = Blocks();
|
size_type length = Blocks();
|
||||||
while (length)
|
|
||||||
{
|
while ( length )
|
||||||
block_type block = *first;
|
{
|
||||||
while (block)
|
block_type block = *first;
|
||||||
{
|
|
||||||
// TODO: use __popcnt if available.
|
while ( block )
|
||||||
n += count_table[block & ((1u << 8) - 1)];
|
{
|
||||||
block >>= 8;
|
// TODO: use _popcnt if available.
|
||||||
}
|
n += count_table[block & ((1u << 8) - 1)];
|
||||||
++first;
|
block >>= 8;
|
||||||
--length;
|
}
|
||||||
}
|
|
||||||
return n;
|
++first;
|
||||||
}
|
--length;
|
||||||
|
}
|
||||||
|
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
BitVector::size_type BitVector::Blocks() const
|
BitVector::size_type BitVector::Blocks() const
|
||||||
{
|
{
|
||||||
return bits_.size();
|
return bits.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
BitVector::size_type BitVector::Size() const
|
BitVector::size_type BitVector::Size() const
|
||||||
{
|
{
|
||||||
return num_bits_;
|
return num_bits;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BitVector::Empty() const
|
bool BitVector::Empty() const
|
||||||
{
|
{
|
||||||
return bits_.empty();
|
return bits.empty();
|
||||||
}
|
}
|
||||||
|
|
||||||
BitVector::size_type BitVector::FindFirst() const
|
BitVector::size_type BitVector::FindFirst() const
|
||||||
{
|
{
|
||||||
return find_from(0);
|
return find_from(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
BitVector::size_type BitVector::FindNext(size_type i) const
|
BitVector::size_type BitVector::FindNext(size_type i) const
|
||||||
{
|
{
|
||||||
if (i >= (Size() - 1) || Size() == 0)
|
if ( i >= (Size() - 1) || Size() == 0 )
|
||||||
return npos;
|
return npos;
|
||||||
++i;
|
|
||||||
size_type bi = block_index(i);
|
++i;
|
||||||
block_type block = bits_[bi] & (~block_type(0) << bit_index(i));
|
size_type bi = block_index(i);
|
||||||
return block ? bi * bits_per_block + lowest_bit(block) : find_from(bi + 1);
|
block_type block = bits[bi] & (~block_type(0) << bit_index(i));
|
||||||
}
|
return block ? bi * bits_per_block + lowest_bit(block) : find_from(bi + 1);
|
||||||
|
}
|
||||||
|
|
||||||
BitVector::size_type BitVector::lowest_bit(block_type block)
|
BitVector::size_type BitVector::lowest_bit(block_type block)
|
||||||
{
|
{
|
||||||
block_type x = block - (block & (block - 1));
|
block_type x = block - (block & (block - 1));
|
||||||
size_type log = 0;
|
size_type log = 0;
|
||||||
while (x >>= 1)
|
|
||||||
++log;
|
while (x >>= 1)
|
||||||
return log;
|
++log;
|
||||||
}
|
|
||||||
|
return log;
|
||||||
|
}
|
||||||
|
|
||||||
BitVector::block_type BitVector::extra_bits() const
|
BitVector::block_type BitVector::extra_bits() const
|
||||||
{
|
{
|
||||||
return bit_index(Size());
|
return bit_index(Size());
|
||||||
}
|
}
|
||||||
|
|
||||||
void BitVector::zero_unused_bits()
|
void BitVector::zero_unused_bits()
|
||||||
{
|
{
|
||||||
if (extra_bits())
|
if ( extra_bits() )
|
||||||
bits_.back() &= ~(~block_type(0) << extra_bits());
|
bits.back() &= ~(~block_type(0) << extra_bits());
|
||||||
}
|
}
|
||||||
|
|
||||||
BitVector::size_type BitVector::find_from(size_type i) const
|
BitVector::size_type BitVector::find_from(size_type i) const
|
||||||
{
|
{
|
||||||
while (i < Blocks() && bits_[i] == 0)
|
while (i < Blocks() && bits[i] == 0)
|
||||||
++i;
|
++i;
|
||||||
if (i >= Blocks())
|
|
||||||
return npos;
|
if ( i >= Blocks() )
|
||||||
return i * bits_per_block + lowest_bit(bits_[i]);
|
return npos;
|
||||||
}
|
|
||||||
|
return i * bits_per_block + lowest_bit(bits[i]);
|
||||||
|
}
|
||||||
|
|
||||||
bool BitVector::Serialize(SerialInfo* info) const
|
bool BitVector::Serialize(SerialInfo* info) const
|
||||||
{
|
{
|
||||||
return SerialObj::Serialize(info);
|
return SerialObj::Serialize(info);
|
||||||
}
|
}
|
||||||
|
|
||||||
BitVector* BitVector::Unserialize(UnserialInfo* info)
|
BitVector* BitVector::Unserialize(UnserialInfo* info)
|
||||||
{
|
{
|
||||||
return reinterpret_cast<BitVector*>(
|
return reinterpret_cast<BitVector*>(SerialObj::Unserialize(info, SER_BITVECTOR));
|
||||||
SerialObj::Unserialize(info, SER_BITVECTOR));
|
}
|
||||||
}
|
|
||||||
|
|
||||||
IMPLEMENT_SERIAL(BitVector, SER_BITVECTOR);
|
IMPLEMENT_SERIAL(BitVector, SER_BITVECTOR);
|
||||||
|
|
||||||
bool BitVector::DoSerialize(SerialInfo* info) const
|
bool BitVector::DoSerialize(SerialInfo* info) const
|
||||||
{
|
{
|
||||||
DO_SERIALIZE(SER_BITVECTOR, SerialObj);
|
DO_SERIALIZE(SER_BITVECTOR, SerialObj);
|
||||||
|
|
||||||
if ( ! SERIALIZE(static_cast<uint64>(bits_.size())) )
|
if ( ! SERIALIZE(static_cast<uint64>(bits.size())) )
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
for ( size_t i = 0; i < bits_.size(); ++i )
|
for ( size_t i = 0; i < bits.size(); ++i )
|
||||||
if ( ! SERIALIZE(static_cast<uint64>(bits_[i])) )
|
if ( ! SERIALIZE(static_cast<uint64>(bits[i])) )
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return SERIALIZE(static_cast<uint64>(num_bits_));
|
return SERIALIZE(static_cast<uint64>(num_bits));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BitVector::DoUnserialize(UnserialInfo* info)
|
bool BitVector::DoUnserialize(UnserialInfo* info)
|
||||||
{
|
{
|
||||||
DO_UNSERIALIZE(SerialObj);
|
DO_UNSERIALIZE(SerialObj);
|
||||||
|
|
||||||
uint64 size;
|
uint64 size;
|
||||||
if ( ! UNSERIALIZE(&size) )
|
if ( ! UNSERIALIZE(&size) )
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
bits_.resize(static_cast<size_t>(size));
|
bits.resize(static_cast<size_t>(size));
|
||||||
uint64 block;
|
|
||||||
for ( size_t i = 0; i < bits_.size(); ++i )
|
|
||||||
{
|
|
||||||
if ( ! UNSERIALIZE(&block) )
|
|
||||||
return false;
|
|
||||||
bits_[i] = static_cast<block_type>(block);
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64 num_bits;
|
for ( size_t i = 0; i < bits.size(); ++i )
|
||||||
if ( ! UNSERIALIZE(&num_bits) )
|
{
|
||||||
return false;
|
uint64 block;
|
||||||
num_bits_ = static_cast<size_type>(num_bits);
|
if ( ! UNSERIALIZE(&block) )
|
||||||
|
return false;
|
||||||
|
|
||||||
return true;
|
bits[i] = static_cast<block_type>(block);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint64 num_bits;
|
||||||
|
if ( ! UNSERIALIZE(&num_bits) )
|
||||||
|
return false;
|
||||||
|
|
||||||
|
num_bits = static_cast<size_type>(num_bits);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
|
@ -1,8 +1,11 @@
|
||||||
#ifndef BitVector_h
|
// See the file "COPYING" in the main distribution directory for copyright.
|
||||||
#define BitVector_h
|
|
||||||
|
#ifndef PROBABILISTIC_BITVECTOR_H
|
||||||
|
#define PROBABILISTIC_BITVECTOR_H
|
||||||
|
|
||||||
#include <iterator>
|
#include <iterator>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "SerialObj.h"
|
#include "SerialObj.h"
|
||||||
|
|
||||||
namespace probabilistic {
|
namespace probabilistic {
|
||||||
|
@ -12,322 +15,348 @@ namespace probabilistic {
|
||||||
*/
|
*/
|
||||||
class BitVector : public SerialObj {
|
class BitVector : public SerialObj {
|
||||||
public:
|
public:
|
||||||
typedef size_t block_type;
|
typedef size_t block_type;
|
||||||
typedef size_t size_type;
|
typedef size_t size_type;
|
||||||
static size_type npos;
|
typedef bool const_reference;
|
||||||
static block_type bits_per_block;
|
|
||||||
|
|
||||||
public:
|
static size_type npos;
|
||||||
/**
|
static block_type bits_per_block;
|
||||||
* An lvalue proxy for single bits.
|
|
||||||
*/
|
|
||||||
class Reference {
|
|
||||||
friend class BitVector;
|
|
||||||
Reference(block_type& block, block_type i);
|
|
||||||
|
|
||||||
public:
|
/**
|
||||||
Reference& Flip();
|
* An lvalue proxy for individual bits.
|
||||||
operator bool() const;
|
*/
|
||||||
bool operator~() const;
|
class Reference {
|
||||||
Reference& operator=(bool x);
|
public:
|
||||||
Reference& operator=(Reference const& other);
|
/**
|
||||||
Reference& operator|=(bool x);
|
* Inverts the bits' values.
|
||||||
Reference& operator&=(bool x);
|
*/
|
||||||
Reference& operator^=(bool x);
|
Reference& Flip();
|
||||||
Reference& operator-=(bool x);
|
|
||||||
|
|
||||||
private:
|
operator bool() const;
|
||||||
void operator&();
|
bool operator~() const;
|
||||||
block_type& block_;
|
Reference& operator=(bool x);
|
||||||
block_type const mask_;
|
Reference& operator=(const Reference& other);
|
||||||
};
|
Reference& operator|=(bool x);
|
||||||
|
Reference& operator&=(bool x);
|
||||||
|
Reference& operator^=(bool x);
|
||||||
|
Reference& operator-=(bool x);
|
||||||
|
|
||||||
typedef bool const_reference;
|
private:
|
||||||
|
friend class BitVector;
|
||||||
|
|
||||||
/**
|
Reference(block_type& block, block_type i);
|
||||||
* Default-constructs an empty bit vector.
|
void operator&();
|
||||||
*/
|
|
||||||
BitVector();
|
|
||||||
|
|
||||||
/**
|
block_type& block;
|
||||||
* Constructs a bit vector of a given size.
|
const block_type mask;
|
||||||
* @param size The number of bits.
|
};
|
||||||
* @param value The value for each bit.
|
|
||||||
*/
|
|
||||||
explicit BitVector(size_type size, bool value = false);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructs a bit vector from a sequence of blocks.
|
* Default-constructs an empty bit vector.
|
||||||
*/
|
*/
|
||||||
template <typename InputIterator>
|
BitVector();
|
||||||
BitVector(InputIterator first, InputIterator last)
|
|
||||||
{
|
|
||||||
bits_.insert(bits_.end(), first, last);
|
|
||||||
num_bits_ = bits_.size() * bits_per_block;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Copy-constructs a bit vector.
|
* Constructs a bit vector of a given size.
|
||||||
* @param other The bit vector to copy.
|
* @param size The number of bits.
|
||||||
*/
|
* @param value The value for each bit.
|
||||||
BitVector(const BitVector& other);
|
*/
|
||||||
|
explicit BitVector(size_type size, bool value = false);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Assigns another bit vector to this instance.
|
* Constructs a bit vector from a sequence of blocks.
|
||||||
* @param other The RHS of the assignment.
|
*
|
||||||
*/
|
* @param first Start of range
|
||||||
BitVector& operator=(const BitVector& other);
|
* @param last End of range.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
template <typename InputIterator>
|
||||||
|
BitVector(InputIterator first, InputIterator last)
|
||||||
|
{
|
||||||
|
bits.insert(bits.end(), first, last);
|
||||||
|
num_bits = bits.size() * bits_per_block;
|
||||||
|
}
|
||||||
|
|
||||||
//
|
/**
|
||||||
// Bitwise operations
|
* Copy-constructs a bit vector.
|
||||||
//
|
* @param other The bit vector to copy.
|
||||||
BitVector operator~() const;
|
*/
|
||||||
BitVector operator<<(size_type n) const;
|
BitVector(const BitVector& other);
|
||||||
BitVector operator>>(size_type n) const;
|
|
||||||
BitVector& operator<<=(size_type n);
|
|
||||||
BitVector& operator>>=(size_type n);
|
|
||||||
BitVector& operator&=(BitVector const& other);
|
|
||||||
BitVector& operator|=(BitVector const& other);
|
|
||||||
BitVector& operator^=(BitVector const& other);
|
|
||||||
BitVector& operator-=(BitVector const& other);
|
|
||||||
friend BitVector operator&(BitVector const& x, BitVector const& y);
|
|
||||||
friend BitVector operator|(BitVector const& x, BitVector const& y);
|
|
||||||
friend BitVector operator^(BitVector const& x, BitVector const& y);
|
|
||||||
friend BitVector operator-(BitVector const& x, BitVector const& y);
|
|
||||||
|
|
||||||
//
|
/**
|
||||||
// Relational operators
|
* Assigns another bit vector to this instance.
|
||||||
//
|
* @param other The RHS of the assignment.
|
||||||
friend bool operator==(BitVector const& x, BitVector const& y);
|
*/
|
||||||
friend bool operator!=(BitVector const& x, BitVector const& y);
|
BitVector& operator=(const BitVector& other);
|
||||||
friend bool operator<(BitVector const& x, BitVector const& y);
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// Basic operations
|
// Bitwise operations.
|
||||||
//
|
//
|
||||||
/** Appends the bits in a sequence of values.
|
BitVector operator~() const;
|
||||||
* @tparam Iterator A forward iterator.
|
BitVector operator<<(size_type n) const;
|
||||||
* @param first An iterator pointing to the first element of the sequence.
|
BitVector operator>>(size_type n) const;
|
||||||
* @param last An iterator pointing to one past the last element of the
|
BitVector& operator<<=(size_type n);
|
||||||
* sequence.
|
BitVector& operator>>=(size_type n);
|
||||||
*/
|
BitVector& operator&=(BitVector const& other);
|
||||||
template <typename ForwardIterator>
|
BitVector& operator|=(BitVector const& other);
|
||||||
void Append(ForwardIterator first, ForwardIterator last)
|
BitVector& operator^=(BitVector const& other);
|
||||||
{
|
BitVector& operator-=(BitVector const& other);
|
||||||
if (first == last)
|
friend BitVector operator&(BitVector const& x, BitVector const& y);
|
||||||
return;
|
friend BitVector operator|(BitVector const& x, BitVector const& y);
|
||||||
|
friend BitVector operator^(BitVector const& x, BitVector const& y);
|
||||||
|
friend BitVector operator-(BitVector const& x, BitVector const& y);
|
||||||
|
|
||||||
block_type excess = extra_bits();
|
//
|
||||||
typename std::iterator_traits<ForwardIterator>::difference_type delta =
|
// Relational operators
|
||||||
std::distance(first, last);
|
//
|
||||||
|
friend bool operator==(BitVector const& x, BitVector const& y);
|
||||||
|
friend bool operator!=(BitVector const& x, BitVector const& y);
|
||||||
|
friend bool operator<(BitVector const& x, BitVector const& y);
|
||||||
|
|
||||||
bits_.reserve(Blocks() + delta);
|
//
|
||||||
if (excess == 0)
|
// Basic operations
|
||||||
{
|
//
|
||||||
bits_.back() |= (*first << excess);
|
|
||||||
do
|
|
||||||
{
|
|
||||||
block_type b = *first++ >> (bits_per_block - excess);
|
|
||||||
bits_.push_back(b | (first == last ? 0 : *first << excess));
|
|
||||||
} while (first != last);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
bits_.insert(bits_.end(), first, last);
|
|
||||||
}
|
|
||||||
num_bits_ += bits_per_block * delta;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/** Appends the bits in a sequence of values.
|
||||||
* Appends the bits in a given block.
|
* @tparam Iterator A forward iterator.
|
||||||
* @param block The block containing bits to append.
|
* @param first An iterator pointing to the first element of the sequence.
|
||||||
*/
|
* @param last An iterator pointing to one past the last element of the
|
||||||
void Append(block_type block);
|
* sequence.
|
||||||
|
*/
|
||||||
|
template <typename ForwardIterator>
|
||||||
|
void Append(ForwardIterator first, ForwardIterator last)
|
||||||
|
{
|
||||||
|
if ( first == last )
|
||||||
|
return;
|
||||||
|
|
||||||
/** Appends a single bit to the end of the bit vector.
|
block_type excess = extra_bits();
|
||||||
* @param bit The value of the bit.
|
typename std::iterator_traits<ForwardIterator>::difference_type delta =
|
||||||
*/
|
std::distance(first, last);
|
||||||
void PushBack(bool bit);
|
|
||||||
|
|
||||||
/**
|
bits.reserve(Blocks() + delta);
|
||||||
* Clears all bits in the bitvector.
|
|
||||||
*/
|
|
||||||
void Clear();
|
|
||||||
|
|
||||||
/**
|
if ( excess == 0 )
|
||||||
* Resizes the bit vector to a new number of bits.
|
{
|
||||||
* @param n The new number of bits of the bit vector.
|
bits.back() |= (*first << excess);
|
||||||
* @param value The bit value of new values, if the vector expands.
|
|
||||||
*/
|
|
||||||
void Resize(size_type n, bool value = false);
|
|
||||||
|
|
||||||
/**
|
do {
|
||||||
* Sets a bit at a specific position to a given value.
|
block_type b = *first++ >> (bits_per_block - excess);
|
||||||
* @param i The bit position.
|
bits.push_back(b | (first == last ? 0 : *first << excess));
|
||||||
* @param bit The value assigned to position *i*.
|
} while (first != last);
|
||||||
* @return A reference to the bit vector instance.
|
|
||||||
*/
|
|
||||||
BitVector& Set(size_type i, bool bit = true);
|
|
||||||
|
|
||||||
/**
|
}
|
||||||
* Sets all bits to 1.
|
|
||||||
* @return A reference to the bit vector instance.
|
|
||||||
*/
|
|
||||||
BitVector& Set();
|
|
||||||
|
|
||||||
/**
|
else
|
||||||
* Resets a bit at a specific position, i.e., sets it to 0.
|
bits.insert(bits.end(), first, last);
|
||||||
* @param i The bit position.
|
|
||||||
* @return A reference to the bit vector instance.
|
|
||||||
*/
|
|
||||||
BitVector& Reset(size_type i);
|
|
||||||
|
|
||||||
/**
|
num_bits += bits_per_block * delta;
|
||||||
* Sets all bits to 0.
|
}
|
||||||
* @return A reference to the bit vector instance.
|
|
||||||
*/
|
|
||||||
BitVector& Reset();
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Toggles/flips a bit at a specific position.
|
* Appends the bits in a given block.
|
||||||
* @param i The bit position.
|
* @param block The block containing bits to append.
|
||||||
* @return A reference to the bit vector instance.
|
*/
|
||||||
*/
|
void Append(block_type block);
|
||||||
BitVector& Flip(size_type i);
|
|
||||||
|
|
||||||
/**
|
/** Appends a single bit to the end of the bit vector.
|
||||||
* Computes the complement.
|
* @param bit The value of the bit.
|
||||||
* @return A reference to the bit vector instance.
|
*/
|
||||||
*/
|
void PushBack(bool bit);
|
||||||
BitVector& Flip();
|
|
||||||
|
|
||||||
/** Retrieves a single bit.
|
/**
|
||||||
* @param i The bit position.
|
* Clears all bits in the bitvector.
|
||||||
* @return A mutable reference to the bit at position *i*.
|
*/
|
||||||
*/
|
void Clear();
|
||||||
Reference operator[](size_type i);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieves a single bit.
|
* Resizes the bit vector to a new number of bits.
|
||||||
* @param i The bit position.
|
* @param n The new number of bits of the bit vector.
|
||||||
* @return A const-reference to the bit at position *i*.
|
* @param value The bit value of new values, if the vector expands.
|
||||||
*/
|
*/
|
||||||
const_reference operator[](size_type i) const;
|
void Resize(size_type n, bool value = false);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Counts the number of 1-bits in the bit vector. Also known as *population
|
* Sets a bit at a specific position to a given value.
|
||||||
* count* or *Hamming weight*.
|
* @param i The bit position.
|
||||||
* @return The number of bits set to 1.
|
* @param bit The value assigned to position *i*.
|
||||||
*/
|
* @return A reference to the bit vector instance.
|
||||||
size_type Count() const;
|
*/
|
||||||
|
BitVector& Set(size_type i, bool bit = true);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieves the number of blocks of the underlying storage.
|
* Sets all bits to 1.
|
||||||
* @param The number of blocks that represent `Size()` bits.
|
* @return A reference to the bit vector instance.
|
||||||
*/
|
*/
|
||||||
size_type Blocks() const;
|
BitVector& Set();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieves the number of bits the bitvector consist of.
|
* Resets a bit at a specific position, i.e., sets it to 0.
|
||||||
* @return The length of the bit vector in bits.
|
* @param i The bit position.
|
||||||
*/
|
* @return A reference to the bit vector instance.
|
||||||
size_type Size() const;
|
*/
|
||||||
|
BitVector& Reset(size_type i);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Checks whether the bit vector is empty.
|
* Sets all bits to 0.
|
||||||
* @return `true` iff the bitvector has zero length.
|
* @return A reference to the bit vector instance.
|
||||||
*/
|
*/
|
||||||
bool Empty() const;
|
BitVector& Reset();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Finds the bit position of of the first 1-bit.
|
* Toggles/flips a bit at a specific position.
|
||||||
* @return The position of the first bit that equals to one or `npos` if no
|
* @param i The bit position.
|
||||||
* such bit exists.
|
* @return A reference to the bit vector instance.
|
||||||
*/
|
*/
|
||||||
size_type FindFirst() const;
|
BitVector& Flip(size_type i);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Finds the next 1-bit from a given starting position.
|
* Computes the complement.
|
||||||
*
|
* @return A reference to the bit vector instance.
|
||||||
* @param i The index where to start looking.
|
*/
|
||||||
*
|
BitVector& Flip();
|
||||||
* @return The position of the first bit that equals to 1 after position
|
|
||||||
* *i* or `npos` if no such bit exists.
|
|
||||||
*/
|
|
||||||
size_type FindNext(size_type i) const;
|
|
||||||
|
|
||||||
bool Serialize(SerialInfo* info) const;
|
/** Retrieves a single bit.
|
||||||
static BitVector* Unserialize(UnserialInfo* info);
|
* @param i The bit position.
|
||||||
|
* @return A mutable reference to the bit at position *i*.
|
||||||
|
*/
|
||||||
|
Reference operator[](size_type i);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves a single bit.
|
||||||
|
* @param i The bit position.
|
||||||
|
* @return A const-reference to the bit at position *i*.
|
||||||
|
*/
|
||||||
|
const_reference operator[](size_type i) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Counts the number of 1-bits in the bit vector. Also known as *population
|
||||||
|
* count* or *Hamming weight*.
|
||||||
|
* @return The number of bits set to 1.
|
||||||
|
*/
|
||||||
|
size_type Count() const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves the number of blocks of the underlying storage.
|
||||||
|
* @param The number of blocks that represent `Size()` bits.
|
||||||
|
*/
|
||||||
|
size_type Blocks() const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves the number of bits the bitvector consist of.
|
||||||
|
* @return The length of the bit vector in bits.
|
||||||
|
*/
|
||||||
|
size_type Size() const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks whether the bit vector is empty.
|
||||||
|
* @return `true` iff the bitvector has zero length.
|
||||||
|
*/
|
||||||
|
bool Empty() const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Finds the bit position of of the first 1-bit.
|
||||||
|
* @return The position of the first bit that equals to one or `npos` if no
|
||||||
|
* such bit exists.
|
||||||
|
*/
|
||||||
|
size_type FindFirst() const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Finds the next 1-bit from a given starting position.
|
||||||
|
*
|
||||||
|
* @param i The index where to start looking.
|
||||||
|
*
|
||||||
|
* @return The position of the first bit that equals to 1 after position
|
||||||
|
* *i* or `npos` if no such bit exists.
|
||||||
|
*/
|
||||||
|
size_type FindNext(size_type i) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Serializes the bit vector.
|
||||||
|
*
|
||||||
|
* @param info The serializaton informationt to use.
|
||||||
|
*
|
||||||
|
* @return True if successful.
|
||||||
|
*/
|
||||||
|
bool Serialize(SerialInfo* info) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Unserialize the bit vector.
|
||||||
|
*
|
||||||
|
* @param info The serializaton informationt to use.
|
||||||
|
*
|
||||||
|
* @return The unserialized bit vector, or null if an error occured.
|
||||||
|
*/
|
||||||
|
static BitVector* Unserialize(UnserialInfo* info);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
DECLARE_SERIAL(BitVector);
|
DECLARE_SERIAL(BitVector);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/**
|
/**
|
||||||
* Computes the block index for a given bit position.
|
* Computes the number of excess/unused bits in the bit vector.
|
||||||
*/
|
*/
|
||||||
static size_type block_index(size_type i)
|
block_type extra_bits() const;
|
||||||
{
|
|
||||||
return i / bits_per_block;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Computes the bit index within a given block for a given bit position.
|
* If the number of bits in the vector are not not a multiple of
|
||||||
*/
|
* bitvector::bits_per_block, then the last block exhibits unused bits which
|
||||||
static block_type bit_index(size_type i)
|
* this function resets.
|
||||||
{
|
*/
|
||||||
return i % bits_per_block;
|
void zero_unused_bits();
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Computes the bitmask block to extract a bit a given bit position.
|
* Looks for the first 1-bit starting at a given position.
|
||||||
*/
|
* @param i The block index to start looking.
|
||||||
static block_type bit_mask(size_type i)
|
* @return The block index of the first 1-bit starting from *i* or
|
||||||
{
|
* `bitvector::npos` if no 1-bit exists.
|
||||||
return block_type(1) << bit_index(i);
|
*/
|
||||||
}
|
size_type find_from(size_type i) const;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Computes the number of blocks needed to represent a given number of
|
* Computes the block index for a given bit position.
|
||||||
* bits.
|
*/
|
||||||
* @param bits the number of bits.
|
static size_type block_index(size_type i)
|
||||||
* @return The number of blocks to represent *bits* number of bits.
|
{
|
||||||
*/
|
return i / bits_per_block;
|
||||||
static size_type bits_to_blocks(size_type bits)
|
}
|
||||||
{
|
|
||||||
return bits / bits_per_block
|
|
||||||
+ static_cast<size_type>(bits % bits_per_block != 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Computes the bit position first 1-bit in a given block.
|
* Computes the bit index within a given block for a given bit position.
|
||||||
* @param block The block to inspect.
|
*/
|
||||||
* @return The bit position where *block* has its first bit set to 1.
|
static block_type bit_index(size_type i)
|
||||||
*/
|
{
|
||||||
static size_type lowest_bit(block_type block);
|
return i % bits_per_block;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Computes the number of excess/unused bits in the bit vector.
|
* Computes the bitmask block to extract a bit a given bit position.
|
||||||
*/
|
*/
|
||||||
block_type extra_bits() const;
|
static block_type bit_mask(size_type i)
|
||||||
|
{
|
||||||
|
return block_type(1) << bit_index(i);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* If the number of bits in the vector are not not a multiple of
|
* Computes the number of blocks needed to represent a given number of
|
||||||
* bitvector::bits_per_block, then the last block exhibits unused bits which
|
* bits.
|
||||||
* this function resets.
|
* @param bits the number of bits.
|
||||||
*/
|
* @return The number of blocks to represent *bits* number of bits.
|
||||||
void zero_unused_bits();
|
*/
|
||||||
|
static size_type bits_to_blocks(size_type bits)
|
||||||
|
{
|
||||||
|
return bits / bits_per_block
|
||||||
|
+ static_cast<size_type>(bits % bits_per_block != 0);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Looks for the first 1-bit starting at a given position.
|
* Computes the bit position first 1-bit in a given block.
|
||||||
* @param i The block index to start looking.
|
* @param block The block to inspect.
|
||||||
* @return The block index of the first 1-bit starting from *i* or
|
* @return The bit position where *block* has its first bit set to 1.
|
||||||
* `bitvector::npos` if no 1-bit exists.
|
*/
|
||||||
*/
|
static size_type lowest_bit(block_type block);
|
||||||
size_type find_from(size_type i) const;
|
|
||||||
|
|
||||||
std::vector<block_type> bits_;
|
std::vector<block_type> bits;
|
||||||
size_type num_bits_;
|
size_type num_bits;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
// See the file "COPYING" in the main distribution directory for copyright.
|
||||||
|
|
||||||
#include "BloomFilter.h"
|
#include "BloomFilter.h"
|
||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
@ -8,181 +10,184 @@
|
||||||
using namespace probabilistic;
|
using namespace probabilistic;
|
||||||
|
|
||||||
BloomFilter::BloomFilter()
|
BloomFilter::BloomFilter()
|
||||||
: hasher_(NULL)
|
{
|
||||||
{
|
hasher = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
BloomFilter::BloomFilter(const Hasher* hasher)
|
BloomFilter::BloomFilter(const Hasher* arg_hasher)
|
||||||
: hasher_(hasher)
|
{
|
||||||
{
|
hasher = arg_hasher;
|
||||||
}
|
}
|
||||||
|
|
||||||
BloomFilter::~BloomFilter()
|
BloomFilter::~BloomFilter()
|
||||||
{
|
{
|
||||||
if ( hasher_ )
|
delete hasher;
|
||||||
delete hasher_;
|
}
|
||||||
}
|
|
||||||
|
|
||||||
bool BloomFilter::Serialize(SerialInfo* info) const
|
bool BloomFilter::Serialize(SerialInfo* info) const
|
||||||
{
|
{
|
||||||
return SerialObj::Serialize(info);
|
return SerialObj::Serialize(info);
|
||||||
}
|
}
|
||||||
|
|
||||||
BloomFilter* BloomFilter::Unserialize(UnserialInfo* info)
|
BloomFilter* BloomFilter::Unserialize(UnserialInfo* info)
|
||||||
{
|
{
|
||||||
return reinterpret_cast<BloomFilter*>(
|
return reinterpret_cast<BloomFilter*>(SerialObj::Unserialize(info, SER_BLOOMFILTER));
|
||||||
SerialObj::Unserialize(info, SER_BLOOMFILTER));
|
}
|
||||||
}
|
|
||||||
|
|
||||||
bool BloomFilter::DoSerialize(SerialInfo* info) const
|
bool BloomFilter::DoSerialize(SerialInfo* info) const
|
||||||
{
|
{
|
||||||
DO_SERIALIZE(SER_BLOOMFILTER, SerialObj);
|
DO_SERIALIZE(SER_BLOOMFILTER, SerialObj);
|
||||||
if ( ! SERIALIZE(static_cast<uint16>(hasher_->K())) )
|
|
||||||
return false;
|
if ( ! SERIALIZE(static_cast<uint16>(hasher->K())) )
|
||||||
return SERIALIZE_STR(hasher_->Name().c_str(), hasher_->Name().size());
|
return false;
|
||||||
}
|
|
||||||
|
return SERIALIZE_STR(hasher->Name().c_str(), hasher->Name().size());
|
||||||
|
}
|
||||||
|
|
||||||
bool BloomFilter::DoUnserialize(UnserialInfo* info)
|
bool BloomFilter::DoUnserialize(UnserialInfo* info)
|
||||||
{
|
{
|
||||||
DO_UNSERIALIZE(SerialObj);
|
DO_UNSERIALIZE(SerialObj);
|
||||||
|
|
||||||
uint16 k;
|
uint16 k;
|
||||||
if ( ! UNSERIALIZE(&k) )
|
if ( ! UNSERIALIZE(&k) )
|
||||||
return false;
|
return false;
|
||||||
const char* name;
|
|
||||||
if ( ! UNSERIALIZE_STR(&name, 0) )
|
const char* name;
|
||||||
return false;
|
if ( ! UNSERIALIZE_STR(&name, 0) )
|
||||||
hasher_ = Hasher::Create(k, name);
|
return false;
|
||||||
|
|
||||||
|
hasher = Hasher::Create(k, name);
|
||||||
|
|
||||||
delete [] name;
|
delete [] name;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
size_t BasicBloomFilter::M(double fp, size_t capacity)
|
size_t BasicBloomFilter::M(double fp, size_t capacity)
|
||||||
{
|
{
|
||||||
double ln2 = std::log(2);
|
double ln2 = std::log(2);
|
||||||
return std::ceil(-(capacity * std::log(fp) / ln2 / ln2));
|
return std::ceil(-(capacity * std::log(fp) / ln2 / ln2));
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t BasicBloomFilter::K(size_t cells, size_t capacity)
|
size_t BasicBloomFilter::K(size_t cells, size_t capacity)
|
||||||
{
|
{
|
||||||
double frac = static_cast<double>(cells) / static_cast<double>(capacity);
|
double frac = static_cast<double>(cells) / static_cast<double>(capacity);
|
||||||
return std::ceil(frac * std::log(2));
|
return std::ceil(frac * std::log(2));
|
||||||
}
|
}
|
||||||
|
|
||||||
BasicBloomFilter* BasicBloomFilter::Merge(const BasicBloomFilter* x,
|
BasicBloomFilter* BasicBloomFilter::Merge(const BasicBloomFilter* x,
|
||||||
const BasicBloomFilter* y)
|
const BasicBloomFilter* y)
|
||||||
{
|
{
|
||||||
if ( ! x->hasher_->Equals(y->hasher_) )
|
if ( ! x->hasher->Equals(y->hasher) )
|
||||||
{
|
reporter->InternalError("incompatible hashers during BasicBloomFilter merge");
|
||||||
reporter->InternalError("incompatible hashers during Bloom filter merge");
|
|
||||||
return NULL;
|
BasicBloomFilter* result = new BasicBloomFilter();
|
||||||
}
|
result->hasher = x->hasher->Clone();
|
||||||
BasicBloomFilter* result = new BasicBloomFilter();
|
result->bits = new BitVector(*x->bits | *y->bits);
|
||||||
result->hasher_ = x->hasher_->Clone();
|
|
||||||
result->bits_ = new BitVector(*x->bits_ | *y->bits_);
|
return result;
|
||||||
return result;
|
}
|
||||||
}
|
|
||||||
|
|
||||||
BasicBloomFilter::BasicBloomFilter()
|
BasicBloomFilter::BasicBloomFilter()
|
||||||
: bits_(NULL)
|
{
|
||||||
{
|
bits = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
BasicBloomFilter::BasicBloomFilter(const Hasher* hasher, size_t cells)
|
BasicBloomFilter::BasicBloomFilter(const Hasher* hasher, size_t cells)
|
||||||
: BloomFilter(hasher),
|
: BloomFilter(hasher)
|
||||||
bits_(new BitVector(cells))
|
{
|
||||||
{
|
bits = new BitVector(cells);
|
||||||
}
|
}
|
||||||
|
|
||||||
IMPLEMENT_SERIAL(BasicBloomFilter, SER_BASICBLOOMFILTER)
|
IMPLEMENT_SERIAL(BasicBloomFilter, SER_BASICBLOOMFILTER)
|
||||||
|
|
||||||
bool BasicBloomFilter::DoSerialize(SerialInfo* info) const
|
bool BasicBloomFilter::DoSerialize(SerialInfo* info) const
|
||||||
{
|
{
|
||||||
DO_SERIALIZE(SER_BASICBLOOMFILTER, BloomFilter);
|
DO_SERIALIZE(SER_BASICBLOOMFILTER, BloomFilter);
|
||||||
return bits_->Serialize(info);
|
return bits->Serialize(info);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BasicBloomFilter::DoUnserialize(UnserialInfo* info)
|
bool BasicBloomFilter::DoUnserialize(UnserialInfo* info)
|
||||||
{
|
{
|
||||||
DO_UNSERIALIZE(BloomFilter);
|
DO_UNSERIALIZE(BloomFilter);
|
||||||
bits_ = BitVector::Unserialize(info);
|
bits = BitVector::Unserialize(info);
|
||||||
return bits_ != NULL;
|
return (bits != 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void BasicBloomFilter::AddImpl(const Hasher::digest_vector& h)
|
void BasicBloomFilter::AddImpl(const Hasher::digest_vector& h)
|
||||||
{
|
{
|
||||||
for ( size_t i = 0; i < h.size(); ++i )
|
for ( size_t i = 0; i < h.size(); ++i )
|
||||||
bits_->Set(h[i] % bits_->Size());
|
bits->Set(h[i] % bits->Size());
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t BasicBloomFilter::CountImpl(const Hasher::digest_vector& h) const
|
size_t BasicBloomFilter::CountImpl(const Hasher::digest_vector& h) const
|
||||||
{
|
{
|
||||||
for ( size_t i = 0; i < h.size(); ++i )
|
for ( size_t i = 0; i < h.size(); ++i )
|
||||||
if ( ! (*bits_)[h[i] % bits_->Size()] )
|
{
|
||||||
return 0;
|
if ( ! (*bits)[h[i] % bits->Size()] )
|
||||||
return 1;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
CountingBloomFilter* CountingBloomFilter::Merge(const CountingBloomFilter* x,
|
CountingBloomFilter* CountingBloomFilter::Merge(const CountingBloomFilter* x,
|
||||||
const CountingBloomFilter* y)
|
const CountingBloomFilter* y)
|
||||||
{
|
{
|
||||||
if ( ! x->hasher_->Equals(y->hasher_) )
|
if ( ! x->hasher->Equals(y->hasher) )
|
||||||
{
|
reporter->InternalError("incompatible hashers during CountingBloomFilter merge");
|
||||||
reporter->InternalError("incompatible hashers during Bloom filter merge");
|
|
||||||
return NULL;
|
CountingBloomFilter* result = new CountingBloomFilter();
|
||||||
}
|
result->hasher = x->hasher->Clone();
|
||||||
CountingBloomFilter* result = new CountingBloomFilter();
|
result->cells = new CounterVector(*x->cells | *y->cells);
|
||||||
result->hasher_ = x->hasher_->Clone();
|
|
||||||
result->cells_ = new CounterVector(*x->cells_ | *y->cells_);
|
return result;
|
||||||
return result;
|
}
|
||||||
}
|
|
||||||
|
|
||||||
CountingBloomFilter::CountingBloomFilter()
|
CountingBloomFilter::CountingBloomFilter()
|
||||||
: cells_(NULL)
|
{
|
||||||
{
|
cells = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
CountingBloomFilter::CountingBloomFilter(const Hasher* hasher,
|
CountingBloomFilter::CountingBloomFilter(const Hasher* hasher,
|
||||||
size_t cells, size_t width)
|
size_t arg_cells, size_t width)
|
||||||
: BloomFilter(hasher),
|
: BloomFilter(hasher)
|
||||||
cells_(new CounterVector(width, cells))
|
{
|
||||||
{
|
cells = new CounterVector(width, arg_cells);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
IMPLEMENT_SERIAL(CountingBloomFilter, SER_COUNTINGBLOOMFILTER)
|
IMPLEMENT_SERIAL(CountingBloomFilter, SER_COUNTINGBLOOMFILTER)
|
||||||
|
|
||||||
bool CountingBloomFilter::DoSerialize(SerialInfo* info) const
|
bool CountingBloomFilter::DoSerialize(SerialInfo* info) const
|
||||||
{
|
{
|
||||||
DO_SERIALIZE(SER_COUNTINGBLOOMFILTER, BloomFilter);
|
DO_SERIALIZE(SER_COUNTINGBLOOMFILTER, BloomFilter);
|
||||||
return cells_->Serialize(info);
|
return cells->Serialize(info);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool CountingBloomFilter::DoUnserialize(UnserialInfo* info)
|
bool CountingBloomFilter::DoUnserialize(UnserialInfo* info)
|
||||||
{
|
{
|
||||||
DO_UNSERIALIZE(BloomFilter);
|
DO_UNSERIALIZE(BloomFilter);
|
||||||
cells_ = CounterVector::Unserialize(info);
|
cells = CounterVector::Unserialize(info);
|
||||||
return cells_ != NULL;
|
return (cells != 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Use partitioning in add/count to allow for reusing CMS bounds.
|
// TODO: Use partitioning in add/count to allow for reusing CMS bounds.
|
||||||
|
|
||||||
void CountingBloomFilter::AddImpl(const Hasher::digest_vector& h)
|
void CountingBloomFilter::AddImpl(const Hasher::digest_vector& h)
|
||||||
{
|
{
|
||||||
for ( size_t i = 0; i < h.size(); ++i )
|
for ( size_t i = 0; i < h.size(); ++i )
|
||||||
cells_->Increment(h[i] % cells_->Size());
|
cells->Increment(h[i] % cells->Size());
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t CountingBloomFilter::CountImpl(const Hasher::digest_vector& h) const
|
size_t CountingBloomFilter::CountImpl(const Hasher::digest_vector& h) const
|
||||||
{
|
{
|
||||||
CounterVector::size_type min =
|
CounterVector::size_type min =
|
||||||
std::numeric_limits<CounterVector::size_type>::max();
|
std::numeric_limits<CounterVector::size_type>::max();
|
||||||
for ( size_t i = 0; i < h.size(); ++i )
|
|
||||||
{
|
for ( size_t i = 0; i < h.size(); ++i )
|
||||||
CounterVector::size_type cnt = cells_->Count(h[i] % cells_->Size());
|
{
|
||||||
if ( cnt < min )
|
CounterVector::size_type cnt = cells->Count(h[i] % cells->Size());
|
||||||
min = cnt;
|
if ( cnt < min )
|
||||||
}
|
min = cnt;
|
||||||
return min;
|
}
|
||||||
}
|
|
||||||
|
return min;
|
||||||
|
}
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
#ifndef BloomFilter_h
|
// See the file "COPYING" in the main distribution directory for copyright.
|
||||||
#define BloomFilter_h
|
|
||||||
|
#ifndef PROBABILISTIC_BLOOMFILTER_H
|
||||||
|
#define PROBABILISTIC_BLOOMFILTER_H
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include "BitVector.h"
|
#include "BitVector.h"
|
||||||
|
@ -11,42 +13,65 @@ class CounterVector;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The abstract base class for Bloom filters.
|
* The abstract base class for Bloom filters.
|
||||||
|
*
|
||||||
|
* At this point we won't let the user choose the hasher, but we might open
|
||||||
|
* up the interface in the future.
|
||||||
*/
|
*/
|
||||||
class BloomFilter : public SerialObj {
|
class BloomFilter : public SerialObj {
|
||||||
public:
|
public:
|
||||||
// At this point we won't let the user choose the hasher, but we might
|
/**
|
||||||
// open up the interface in the future.
|
* Destructor.
|
||||||
virtual ~BloomFilter();
|
*/
|
||||||
|
virtual ~BloomFilter();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adds an element of type T to the Bloom filter.
|
* Adds an element of type T to the Bloom filter.
|
||||||
* @param x The element to add
|
* @param x The element to add
|
||||||
*/
|
*/
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void Add(const T& x)
|
void Add(const T& x)
|
||||||
{
|
{
|
||||||
AddImpl((*hasher_)(x));
|
AddImpl((*hasher)(x));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieves the associated count of a given value.
|
* Retrieves the associated count of a given value.
|
||||||
*
|
*
|
||||||
* @param x The value of type `T` to check.
|
* @param x The value of type `T` to check.
|
||||||
*
|
*
|
||||||
* @return The counter associated with *x*.
|
* @return The counter associated with *x*.
|
||||||
*/
|
*/
|
||||||
template <typename T>
|
template <typename T>
|
||||||
size_t Count(const T& x) const
|
size_t Count(const T& x) const
|
||||||
{
|
{
|
||||||
return CountImpl((*hasher_)(x));
|
return CountImpl((*hasher)(x));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Serialize(SerialInfo* info) const;
|
/**
|
||||||
static BloomFilter* Unserialize(UnserialInfo* info);
|
* Serializes the Bloom filter.
|
||||||
|
*
|
||||||
|
* @param info The serializaton information to use.
|
||||||
|
*
|
||||||
|
* @return True if successful.
|
||||||
|
*/
|
||||||
|
bool Serialize(SerialInfo* info) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Unserializes a Bloom filter.
|
||||||
|
*
|
||||||
|
* @param info The serializaton information to use.
|
||||||
|
*
|
||||||
|
* @return The unserialized Bloom filter, or null if an error
|
||||||
|
* occured.
|
||||||
|
*/
|
||||||
|
static BloomFilter* Unserialize(UnserialInfo* info);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
DECLARE_ABSTRACT_SERIAL(BloomFilter);
|
DECLARE_ABSTRACT_SERIAL(BloomFilter);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default constructor.
|
||||||
|
*/
|
||||||
BloomFilter();
|
BloomFilter();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -54,12 +79,28 @@ protected:
|
||||||
*
|
*
|
||||||
* @param hasher The hasher to use for this Bloom filter.
|
* @param hasher The hasher to use for this Bloom filter.
|
||||||
*/
|
*/
|
||||||
BloomFilter(const Hasher* hasher);
|
BloomFilter(const Hasher* hasher);
|
||||||
|
|
||||||
virtual void AddImpl(const Hasher::digest_vector& hashes) = 0;
|
/**
|
||||||
virtual size_t CountImpl(const Hasher::digest_vector& hashes) const = 0;
|
* Abstract method for implementinng the *Add* operation.
|
||||||
|
*
|
||||||
|
* @param hashes A set of *k* hashes for the item to add, computed by
|
||||||
|
* the internal hasher object.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
virtual void AddImpl(const Hasher::digest_vector& hashes) = 0;
|
||||||
|
|
||||||
const Hasher* hasher_;
|
/**
|
||||||
|
* Abstract method for implementing the *Count* operation.
|
||||||
|
*
|
||||||
|
* @param hashes A set of *k* hashes for the item to add, computed by
|
||||||
|
* the internal hasher object.
|
||||||
|
*
|
||||||
|
* @return Returns the counter associated with the hashed element.
|
||||||
|
*/
|
||||||
|
virtual size_t CountImpl(const Hasher::digest_vector& hashes) const = 0;
|
||||||
|
|
||||||
|
const Hasher* hasher;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -67,50 +108,67 @@ protected:
|
||||||
*/
|
*/
|
||||||
class BasicBloomFilter : public BloomFilter {
|
class BasicBloomFilter : public BloomFilter {
|
||||||
public:
|
public:
|
||||||
/**
|
/**
|
||||||
* Computes the number of cells based a given false-positive rate and
|
* Constructs a basic Bloom filter with a given number of cells. The
|
||||||
* capacity. In the literature, this parameter often has the name *M*.
|
* ideal number of cells can be computed with *M*.
|
||||||
*
|
*
|
||||||
* @param fp The false-positive rate.
|
* @param hasher The hasher to use. The ideal number of hash
|
||||||
*
|
* functions can be computed with *K*.
|
||||||
* @param capacity The number of exepected elements.
|
*
|
||||||
*
|
* @param cells The number of cells.
|
||||||
* Returns: The number cells needed to support a false-positive rate of *fp*
|
*/
|
||||||
* with at most *capacity* elements.
|
BasicBloomFilter(const Hasher* hasher, size_t cells);
|
||||||
*/
|
|
||||||
static size_t M(double fp, size_t capacity);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Computes the optimal number of hash functions based on the number cells
|
* Computes the number of cells based on a given false positive rate
|
||||||
* and expected number of elements.
|
* and capacity. In the literature, this parameter often has the name
|
||||||
*
|
* *M*.
|
||||||
* @param cells The number of cells (*m*).
|
*
|
||||||
*
|
* @param fp The false positive rate.
|
||||||
* @param capacity The maximum number of elements.
|
*
|
||||||
*
|
* @param capacity The expected number of elements that will be
|
||||||
* Returns: the optimal number of hash functions for a false-positive rate of
|
* stored.
|
||||||
* *fp* for at most *capacity* elements.
|
*
|
||||||
*/
|
* Returns: The number cells needed to support a false positive rate
|
||||||
static size_t K(size_t cells, size_t capacity);
|
* of *fp* with at most *capacity* elements.
|
||||||
|
*/
|
||||||
|
static size_t M(double fp, size_t capacity);
|
||||||
|
|
||||||
static BasicBloomFilter* Merge(const BasicBloomFilter* x,
|
/**
|
||||||
const BasicBloomFilter* y);
|
* Computes the optimal number of hash functions based on the number cells
|
||||||
|
* and expected number of elements.
|
||||||
|
*
|
||||||
|
* @param cells The number of cells (*m*).
|
||||||
|
*
|
||||||
|
* @param capacity The maximum number of elements.
|
||||||
|
*
|
||||||
|
* Returns: the optimal number of hash functions for a false-positive
|
||||||
|
* rate of *fp* for at most *capacity* elements.
|
||||||
|
*/
|
||||||
|
static size_t K(size_t cells, size_t capacity);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructs a basic Bloom filter with a given number of cells and capacity.
|
* Merges two basic Bloom filters.
|
||||||
*/
|
*
|
||||||
BasicBloomFilter(const Hasher* hasher, size_t cells);
|
* @return The merged Bloom filter.
|
||||||
|
*/
|
||||||
|
static BasicBloomFilter* Merge(const BasicBloomFilter* x,
|
||||||
|
const BasicBloomFilter* y);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
DECLARE_SERIAL(BasicBloomFilter);
|
DECLARE_SERIAL(BasicBloomFilter);
|
||||||
|
|
||||||
BasicBloomFilter();
|
/**
|
||||||
|
* Default constructor.
|
||||||
|
*/
|
||||||
|
BasicBloomFilter();
|
||||||
|
|
||||||
virtual void AddImpl(const Hasher::digest_vector& h);
|
// Overridden from BloomFilter.
|
||||||
virtual size_t CountImpl(const Hasher::digest_vector& h) const;
|
virtual void AddImpl(const Hasher::digest_vector& h);
|
||||||
|
virtual size_t CountImpl(const Hasher::digest_vector& h) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
BitVector* bits_;
|
BitVector* bits;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -118,21 +176,40 @@ private:
|
||||||
*/
|
*/
|
||||||
class CountingBloomFilter : public BloomFilter {
|
class CountingBloomFilter : public BloomFilter {
|
||||||
public:
|
public:
|
||||||
static CountingBloomFilter* Merge(const CountingBloomFilter* x,
|
/**
|
||||||
const CountingBloomFilter* y);
|
* Constructs a counting Bloom filter.
|
||||||
|
*
|
||||||
|
* @param hasher The hasher to use. The ideal number of hash
|
||||||
|
* functions can be computed with *K*.
|
||||||
|
*
|
||||||
|
* @param cells The number of cells to use.
|
||||||
|
*
|
||||||
|
* @param width The maximal bit-width of counter values.
|
||||||
|
*/
|
||||||
|
CountingBloomFilter(const Hasher* hasher, size_t cells, size_t width);
|
||||||
|
|
||||||
CountingBloomFilter(const Hasher* hasher, size_t cells, size_t width);
|
/**
|
||||||
|
* Merges two counting Bloom filters.
|
||||||
|
*
|
||||||
|
* @return The merged Bloom filter.
|
||||||
|
*/
|
||||||
|
static CountingBloomFilter* Merge(const CountingBloomFilter* x,
|
||||||
|
const CountingBloomFilter* y);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
DECLARE_SERIAL(CountingBloomFilter);
|
DECLARE_SERIAL(CountingBloomFilter);
|
||||||
|
|
||||||
CountingBloomFilter();
|
/**
|
||||||
|
* Default constructor.
|
||||||
|
*/
|
||||||
|
CountingBloomFilter();
|
||||||
|
|
||||||
virtual void AddImpl(const Hasher::digest_vector& h);
|
// Overridden from BloomFilter.
|
||||||
virtual size_t CountImpl(const Hasher::digest_vector& h) const;
|
virtual void AddImpl(const Hasher::digest_vector& h);
|
||||||
|
virtual size_t CountImpl(const Hasher::digest_vector& h) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
CounterVector* cells_;
|
CounterVector* cells;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
// See the file "COPYING" in the main distribution directory for copyright.
|
||||||
|
|
||||||
#include "CounterVector.h"
|
#include "CounterVector.h"
|
||||||
|
|
||||||
#include <limits>
|
#include <limits>
|
||||||
|
@ -6,154 +8,176 @@
|
||||||
|
|
||||||
using namespace probabilistic;
|
using namespace probabilistic;
|
||||||
|
|
||||||
CounterVector::CounterVector(size_t width, size_t cells)
|
CounterVector::CounterVector(size_t arg_width, size_t cells)
|
||||||
: bits_(new BitVector(width * cells)),
|
{
|
||||||
width_(width)
|
bits = new BitVector(arg_width * cells);
|
||||||
{
|
width = arg_width;
|
||||||
}
|
}
|
||||||
|
|
||||||
CounterVector::CounterVector(const CounterVector& other)
|
CounterVector::CounterVector(const CounterVector& other)
|
||||||
: bits_(new BitVector(*other.bits_)),
|
{
|
||||||
width_(other.width_)
|
bits = new BitVector(*other.bits);
|
||||||
{
|
width = other.width;
|
||||||
}
|
}
|
||||||
|
|
||||||
CounterVector::~CounterVector()
|
CounterVector::~CounterVector()
|
||||||
{
|
{
|
||||||
delete bits_;
|
delete bits;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool CounterVector::Increment(size_type cell, count_type value)
|
bool CounterVector::Increment(size_type cell, count_type value)
|
||||||
{
|
{
|
||||||
assert(cell < Size());
|
assert(cell < Size());
|
||||||
assert(value != 0);
|
assert(value != 0);
|
||||||
size_t lsb = cell * width_;
|
|
||||||
bool carry = false;
|
size_t lsb = cell * width;
|
||||||
for ( size_t i = 0; i < width_; ++i )
|
bool carry = false;
|
||||||
{
|
|
||||||
bool b1 = (*bits_)[lsb + i];
|
for ( size_t i = 0; i < width; ++i )
|
||||||
bool b2 = value & (1 << i);
|
{
|
||||||
(*bits_)[lsb + i] = b1 ^ b2 ^ carry;
|
bool b1 = (*bits)[lsb + i];
|
||||||
carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) );
|
bool b2 = value & (1 << i);
|
||||||
}
|
(*bits)[lsb + i] = b1 ^ b2 ^ carry;
|
||||||
if ( carry )
|
carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) );
|
||||||
for ( size_t i = 0; i < width_; ++i )
|
}
|
||||||
bits_->Set(lsb + i);
|
|
||||||
return ! carry;
|
if ( carry )
|
||||||
}
|
{
|
||||||
|
for ( size_t i = 0; i < width; ++i )
|
||||||
|
bits->Set(lsb + i);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ! carry;
|
||||||
|
}
|
||||||
|
|
||||||
bool CounterVector::Decrement(size_type cell, count_type value)
|
bool CounterVector::Decrement(size_type cell, count_type value)
|
||||||
{
|
{
|
||||||
assert(cell < Size());
|
assert(cell < Size());
|
||||||
assert(value != 0);
|
assert(value != 0);
|
||||||
value = ~value + 1; // A - B := A + ~B + 1
|
|
||||||
bool carry = false;
|
value = ~value + 1; // A - B := A + ~B + 1
|
||||||
size_t lsb = cell * width_;
|
bool carry = false;
|
||||||
for ( size_t i = 0; i < width_; ++i )
|
size_t lsb = cell * width;
|
||||||
{
|
|
||||||
bool b1 = (*bits_)[lsb + i];
|
for ( size_t i = 0; i < width; ++i )
|
||||||
bool b2 = value & (1 << i);
|
{
|
||||||
(*bits_)[lsb + i] = b1 ^ b2 ^ carry;
|
bool b1 = (*bits)[lsb + i];
|
||||||
carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) );
|
bool b2 = value & (1 << i);
|
||||||
}
|
(*bits)[lsb + i] = b1 ^ b2 ^ carry;
|
||||||
return carry;
|
carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return carry;
|
||||||
|
}
|
||||||
|
|
||||||
CounterVector::count_type CounterVector::Count(size_type cell) const
|
CounterVector::count_type CounterVector::Count(size_type cell) const
|
||||||
{
|
{
|
||||||
assert(cell < Size());
|
assert(cell < Size());
|
||||||
size_t cnt = 0, order = 1;
|
|
||||||
size_t lsb = cell * width_;
|
size_t cnt = 0, order = 1;
|
||||||
for (size_t i = lsb; i < lsb + width_; ++i, order <<= 1)
|
size_t lsb = cell * width;
|
||||||
if ((*bits_)[i])
|
|
||||||
cnt |= order;
|
for ( size_t i = lsb; i < lsb + width; ++i, order <<= 1 )
|
||||||
return cnt;
|
if ( (*bits)[i] )
|
||||||
}
|
cnt |= order;
|
||||||
|
|
||||||
|
return cnt;
|
||||||
|
}
|
||||||
|
|
||||||
CounterVector::size_type CounterVector::Size() const
|
CounterVector::size_type CounterVector::Size() const
|
||||||
{
|
{
|
||||||
return bits_->Size() / width_;
|
return bits->Size() / width;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t CounterVector::Width() const
|
size_t CounterVector::Width() const
|
||||||
{
|
{
|
||||||
return width_;
|
return width;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t CounterVector::Max() const
|
size_t CounterVector::Max() const
|
||||||
{
|
{
|
||||||
return std::numeric_limits<size_t>::max()
|
return std::numeric_limits<size_t>::max()
|
||||||
>> (std::numeric_limits<size_t>::digits - width_);
|
>> (std::numeric_limits<size_t>::digits - width);
|
||||||
}
|
}
|
||||||
|
|
||||||
CounterVector& CounterVector::Merge(const CounterVector& other)
|
CounterVector& CounterVector::Merge(const CounterVector& other)
|
||||||
{
|
{
|
||||||
assert(Size() == other.Size());
|
assert(Size() == other.Size());
|
||||||
assert(Width() == other.Width());
|
assert(Width() == other.Width());
|
||||||
for ( size_t cell = 0; cell < Size(); ++cell )
|
|
||||||
{
|
for ( size_t cell = 0; cell < Size(); ++cell )
|
||||||
size_t lsb = cell * width_;
|
{
|
||||||
bool carry = false;
|
size_t lsb = cell * width;
|
||||||
for ( size_t i = 0; i < width_; ++i )
|
bool carry = false;
|
||||||
{
|
|
||||||
bool b1 = (*bits_)[lsb + i];
|
for ( size_t i = 0; i < width; ++i )
|
||||||
bool b2 = (*other.bits_)[lsb + i];
|
{
|
||||||
(*bits_)[lsb + i] = b1 ^ b2 ^ carry;
|
bool b1 = (*bits)[lsb + i];
|
||||||
carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) );
|
bool b2 = (*other.bits)[lsb + i];
|
||||||
}
|
(*bits)[lsb + i] = b1 ^ b2 ^ carry;
|
||||||
if ( carry )
|
carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) );
|
||||||
for ( size_t i = 0; i < width_; ++i )
|
}
|
||||||
bits_->Set(lsb + i);
|
|
||||||
}
|
if ( carry )
|
||||||
return *this;
|
{
|
||||||
}
|
for ( size_t i = 0; i < width; ++i )
|
||||||
|
bits->Set(lsb + i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
namespace probabilistic {
|
namespace probabilistic {
|
||||||
|
|
||||||
CounterVector& CounterVector::operator|=(const CounterVector& other)
|
CounterVector& CounterVector::operator|=(const CounterVector& other)
|
||||||
{
|
{
|
||||||
return Merge(other);
|
return Merge(other);
|
||||||
}
|
}
|
||||||
|
|
||||||
CounterVector operator|(const CounterVector& x, const CounterVector& y)
|
CounterVector operator|(const CounterVector& x, const CounterVector& y)
|
||||||
{
|
{
|
||||||
CounterVector cv(x);
|
CounterVector cv(x);
|
||||||
return cv |= y;
|
return cv |= y;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool CounterVector::Serialize(SerialInfo* info) const
|
bool CounterVector::Serialize(SerialInfo* info) const
|
||||||
{
|
{
|
||||||
return SerialObj::Serialize(info);
|
return SerialObj::Serialize(info);
|
||||||
}
|
}
|
||||||
|
|
||||||
CounterVector* CounterVector::Unserialize(UnserialInfo* info)
|
CounterVector* CounterVector::Unserialize(UnserialInfo* info)
|
||||||
{
|
{
|
||||||
return reinterpret_cast<CounterVector*>(
|
return reinterpret_cast<CounterVector*>(SerialObj::Unserialize(info, SER_COUNTERVECTOR));
|
||||||
SerialObj::Unserialize(info, SER_COUNTERVECTOR));
|
}
|
||||||
}
|
|
||||||
|
|
||||||
IMPLEMENT_SERIAL(CounterVector, SER_COUNTERVECTOR)
|
IMPLEMENT_SERIAL(CounterVector, SER_COUNTERVECTOR)
|
||||||
|
|
||||||
bool CounterVector::DoSerialize(SerialInfo* info) const
|
bool CounterVector::DoSerialize(SerialInfo* info) const
|
||||||
{
|
{
|
||||||
DO_SERIALIZE(SER_COUNTERVECTOR, SerialObj);
|
DO_SERIALIZE(SER_COUNTERVECTOR, SerialObj);
|
||||||
if ( ! bits_->Serialize(info) )
|
|
||||||
return false;
|
if ( ! bits->Serialize(info) )
|
||||||
return SERIALIZE(static_cast<uint64>(width_));
|
return false;
|
||||||
}
|
|
||||||
|
return SERIALIZE(static_cast<uint64>(width));
|
||||||
|
}
|
||||||
|
|
||||||
bool CounterVector::DoUnserialize(UnserialInfo* info)
|
bool CounterVector::DoUnserialize(UnserialInfo* info)
|
||||||
{
|
{
|
||||||
DO_UNSERIALIZE(SerialObj);
|
DO_UNSERIALIZE(SerialObj);
|
||||||
bits_ = BitVector::Unserialize(info);
|
|
||||||
if ( ! bits_ )
|
|
||||||
return false;
|
|
||||||
uint64 width;
|
|
||||||
if ( ! UNSERIALIZE(&width) )
|
|
||||||
return false;
|
|
||||||
width_ = static_cast<size_t>(width);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
bits = BitVector::Unserialize(info);
|
||||||
|
if ( ! bits )
|
||||||
|
return false;
|
||||||
|
|
||||||
|
uint64 width;
|
||||||
|
if ( ! UNSERIALIZE(&width) )
|
||||||
|
return false;
|
||||||
|
|
||||||
|
width = static_cast<size_t>(width);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
#ifndef CounterVector_h
|
// See the file "COPYING" in the main distribution directory for copyright.
|
||||||
#define CounterVector_h
|
|
||||||
|
#ifndef PROBABILISTIC_COUNTERVECTOR_H
|
||||||
|
#define PROBABILISTIC_COUNTERVECTOR_H
|
||||||
|
|
||||||
#include "SerialObj.h"
|
#include "SerialObj.h"
|
||||||
|
|
||||||
|
@ -8,123 +10,143 @@ namespace probabilistic {
|
||||||
class BitVector;
|
class BitVector;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A vector of counters, each of which have a fixed number of bits.
|
* A vector of counters, each of which has a fixed number of bits.
|
||||||
*/
|
*/
|
||||||
class CounterVector : public SerialObj {
|
class CounterVector : public SerialObj {
|
||||||
CounterVector& operator=(const CounterVector&);
|
|
||||||
public:
|
public:
|
||||||
typedef size_t size_type;
|
typedef size_t size_type;
|
||||||
typedef uint64 count_type;
|
typedef uint64 count_type;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructs a counter vector having cells of a given width.
|
* Constructs a counter vector having cells of a given width.
|
||||||
*
|
*
|
||||||
* @param width The number of bits that each cell occupies.
|
* @param width The number of bits that each cell occupies.
|
||||||
*
|
*
|
||||||
* @param cells The number of cells in the bitvector.
|
* @param cells The number of cells in the bitvector.
|
||||||
*
|
*
|
||||||
* @pre `cells > 0 && width > 0`
|
* @pre `cells > 0 && width > 0`
|
||||||
*/
|
*/
|
||||||
CounterVector(size_t width, size_t cells = 1024);
|
CounterVector(size_t width, size_t cells = 1024);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Copy-constructs a counter vector.
|
* Copy-constructs a counter vector.
|
||||||
*
|
*
|
||||||
* @param other The counter vector to copy.
|
* @param other The counter vector to copy.
|
||||||
*/
|
*/
|
||||||
CounterVector(const CounterVector& other);
|
CounterVector(const CounterVector& other);
|
||||||
|
|
||||||
~CounterVector();
|
/**
|
||||||
|
* Destructor.
|
||||||
|
*/
|
||||||
|
~CounterVector();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Increments a given cell.
|
* Increments a given cell.
|
||||||
*
|
*
|
||||||
* @param cell The cell to increment.
|
* @param cell The cell to increment.
|
||||||
*
|
*
|
||||||
* @param value The value to add to the current counter in *cell*.
|
* @param value The value to add to the current counter in *cell*.
|
||||||
*
|
*
|
||||||
* @return `true` if adding *value* to the counter in *cell* succeeded.
|
* @return `true` if adding *value* to the counter in *cell* succeeded.
|
||||||
*
|
*
|
||||||
* @pre `cell < Size()`
|
* @pre `cell < Size()`
|
||||||
*/
|
*/
|
||||||
bool Increment(size_type cell, count_type value = 1);
|
bool Increment(size_type cell, count_type value = 1);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Decrements a given cell.
|
* Decrements a given cell.
|
||||||
*
|
*
|
||||||
* @param cell The cell to decrement.
|
* @param cell The cell to decrement.
|
||||||
*
|
*
|
||||||
* @param value The value to subtract from the current counter in *cell*.
|
* @param value The value to subtract from the current counter in *cell*.
|
||||||
*
|
*
|
||||||
* @return `true` if subtracting *value* from the counter in *cell* succeeded.
|
* @return `true` if subtracting *value* from the counter in *cell* succeeded.
|
||||||
*
|
*
|
||||||
* @pre `cell < Size()`
|
* @pre `cell < Size()`
|
||||||
*/
|
*/
|
||||||
bool Decrement(size_type cell, count_type value = 1);
|
bool Decrement(size_type cell, count_type value = 1);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieves the counter of a given cell.
|
* Retrieves the counter of a given cell.
|
||||||
*
|
*
|
||||||
* @param cell The cell index to retrieve the count for.
|
* @param cell The cell index to retrieve the count for.
|
||||||
*
|
*
|
||||||
* @return The counter associated with *cell*.
|
* @return The counter associated with *cell*.
|
||||||
*
|
*
|
||||||
* @pre `cell < Size()`
|
* @pre `cell < Size()`
|
||||||
*/
|
*/
|
||||||
count_type Count(size_type cell) const;
|
count_type Count(size_type cell) const;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieves the number of cells in the storage.
|
* Retrieves the number of cells in the storage.
|
||||||
*
|
*
|
||||||
* @return The number of cells.
|
* @return The number of cells.
|
||||||
*/
|
*/
|
||||||
size_type Size() const;
|
size_type Size() const;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieves the counter width.
|
* Retrieves the counter width.
|
||||||
*
|
*
|
||||||
* @return The number of bits per counter.
|
* @return The number of bits per counter.
|
||||||
*/
|
*/
|
||||||
size_t Width() const;
|
size_t Width() const;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Computes the maximum counter value.
|
* Computes the maximum counter value.
|
||||||
*
|
*
|
||||||
* @return The maximum counter value based on the width.
|
* @return The maximum counter value based on the width.
|
||||||
*/
|
*/
|
||||||
size_t Max() const;
|
size_t Max() const;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Merges another counter vector into this instance by *adding* the counters
|
* Merges another counter vector into this instance by *adding* the
|
||||||
* of each cells.
|
* counters of each cells.
|
||||||
*
|
*
|
||||||
* @param other The counter vector to merge into this instance.
|
* @param other The counter vector to merge into this instance.
|
||||||
*
|
*
|
||||||
* @return A reference to `*this`.
|
* @return A reference to `*this`.
|
||||||
*
|
*
|
||||||
* @pre `Size() == other.Size() && Width() == other.Width()`
|
* @pre `Size() == other.Size() && Width() == other.Width()`
|
||||||
*/
|
*/
|
||||||
CounterVector& Merge(const CounterVector& other);
|
CounterVector& Merge(const CounterVector& other);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An alias for ::Merge.
|
* An alias for ::Merge.
|
||||||
*/
|
*/
|
||||||
CounterVector& operator|=(const CounterVector& other);
|
CounterVector& operator|=(const CounterVector& other);
|
||||||
|
|
||||||
friend CounterVector operator|(const CounterVector& x,
|
/**
|
||||||
const CounterVector& y);
|
* Serializes the bit vector.
|
||||||
|
*
|
||||||
|
* @param info The serializaton information to use.
|
||||||
|
*
|
||||||
|
* @return True if successful.
|
||||||
|
*/
|
||||||
|
bool Serialize(SerialInfo* info) const;
|
||||||
|
|
||||||
bool Serialize(SerialInfo* info) const;
|
/**
|
||||||
static CounterVector* Unserialize(UnserialInfo* info);
|
* Unserialize the counter vector.
|
||||||
|
*
|
||||||
|
* @param info The serializaton information to use.
|
||||||
|
*
|
||||||
|
* @return The unserialized counter vector, or null if an error
|
||||||
|
* occured.
|
||||||
|
*/
|
||||||
|
static CounterVector* Unserialize(UnserialInfo* info);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
DECLARE_SERIAL(CounterVector);
|
friend CounterVector operator|(const CounterVector& x,
|
||||||
|
const CounterVector& y);
|
||||||
|
|
||||||
CounterVector() { }
|
CounterVector() { }
|
||||||
|
|
||||||
|
DECLARE_SERIAL(CounterVector);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
BitVector* bits_;
|
CounterVector& operator=(const CounterVector&); // Disable.
|
||||||
size_t width_;
|
|
||||||
|
BitVector* bits;
|
||||||
|
size_t width;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,66 +1,70 @@
|
||||||
|
// See the file "COPYING" in the main distribution directory for copyright.
|
||||||
|
|
||||||
#include <typeinfo>
|
#include <typeinfo>
|
||||||
|
|
||||||
#include "Hasher.h"
|
#include "Hasher.h"
|
||||||
|
|
||||||
#include "digest.h"
|
#include "digest.h"
|
||||||
|
|
||||||
using namespace probabilistic;
|
using namespace probabilistic;
|
||||||
|
|
||||||
Hasher::UHF::UHF(size_t seed, const std::string& extra)
|
UHF::UHF(size_t seed, const std::string& extra)
|
||||||
: h_(compute_seed(seed, extra))
|
: h(compute_seed(seed, extra))
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
Hasher::digest Hasher::UHF::hash(const void* x, size_t n) const
|
Hasher::digest UHF::hash(const void* x, size_t n) const
|
||||||
{
|
{
|
||||||
assert(n <= UHASH_KEY_SIZE);
|
assert(n <= UHASH_KEY_SIZE);
|
||||||
return n == 0 ? 0 : h_(x, n);
|
return n == 0 ? 0 : h(x, n);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t Hasher::UHF::compute_seed(size_t seed, const std::string& extra)
|
size_t UHF::compute_seed(size_t seed, const std::string& extra)
|
||||||
{
|
{
|
||||||
u_char buf[SHA256_DIGEST_LENGTH];
|
u_char buf[SHA256_DIGEST_LENGTH];
|
||||||
SHA256_CTX ctx;
|
SHA256_CTX ctx;
|
||||||
sha256_init(&ctx);
|
sha256_init(&ctx);
|
||||||
|
|
||||||
if ( extra.empty() )
|
if ( extra.empty() )
|
||||||
{
|
{
|
||||||
unsigned int first_seed = initial_seed();
|
unsigned int first_seed = initial_seed();
|
||||||
sha256_update(&ctx, &first_seed, sizeof(first_seed));
|
sha256_update(&ctx, &first_seed, sizeof(first_seed));
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
sha256_update(&ctx, extra.c_str(), extra.size());
|
|
||||||
}
|
|
||||||
sha256_update(&ctx, &seed, sizeof(seed));
|
|
||||||
sha256_final(&ctx, buf);
|
|
||||||
// Take the first sizeof(size_t) bytes as seed.
|
|
||||||
return *reinterpret_cast<size_t*>(buf);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
else
|
||||||
|
sha256_update(&ctx, extra.c_str(), extra.size());
|
||||||
|
|
||||||
|
sha256_update(&ctx, &seed, sizeof(seed));
|
||||||
|
sha256_final(&ctx, buf);
|
||||||
|
|
||||||
|
// Take the first sizeof(size_t) bytes as seed.
|
||||||
|
return *reinterpret_cast<size_t*>(buf);
|
||||||
|
}
|
||||||
|
|
||||||
Hasher* Hasher::Create(size_t k, const std::string& name)
|
Hasher* Hasher::Create(size_t k, const std::string& name)
|
||||||
{
|
{
|
||||||
return new DefaultHasher(k, name);
|
return new DefaultHasher(k, name);
|
||||||
}
|
}
|
||||||
|
|
||||||
Hasher::Hasher(size_t k, const std::string& name)
|
Hasher::Hasher(size_t k, const std::string& arg_name)
|
||||||
: k_(k), name_(name)
|
: k(k)
|
||||||
{
|
{
|
||||||
|
name = arg_name;
|
||||||
}
|
}
|
||||||
|
|
||||||
DefaultHasher::DefaultHasher(size_t k, const std::string& name)
|
DefaultHasher::DefaultHasher(size_t k, const std::string& name)
|
||||||
: Hasher(k, name)
|
: Hasher(k, name)
|
||||||
{
|
{
|
||||||
for ( size_t i = 0; i < k; ++i )
|
for ( size_t i = 0; i < k; ++i )
|
||||||
hash_functions_.push_back(UHF(i, name));
|
hash_functions.push_back(UHF(i, name));
|
||||||
}
|
}
|
||||||
|
|
||||||
Hasher::digest_vector DefaultHasher::Hash(const void* x, size_t n) const
|
Hasher::digest_vector DefaultHasher::Hash(const void* x, size_t n) const
|
||||||
{
|
{
|
||||||
digest_vector h(K(), 0);
|
digest_vector h(K(), 0);
|
||||||
|
|
||||||
for ( size_t i = 0; i < h.size(); ++i )
|
for ( size_t i = 0; i < h.size(); ++i )
|
||||||
h[i] = hash_functions_[i](x, n);
|
h[i] = hash_functions[i](x, n);
|
||||||
|
|
||||||
return h;
|
return h;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -73,24 +77,25 @@ bool DefaultHasher::Equals(const Hasher* other) const
|
||||||
{
|
{
|
||||||
if ( typeid(*this) != typeid(*other) )
|
if ( typeid(*this) != typeid(*other) )
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
const DefaultHasher* o = static_cast<const DefaultHasher*>(other);
|
const DefaultHasher* o = static_cast<const DefaultHasher*>(other);
|
||||||
return hash_functions_ == o->hash_functions_;
|
return hash_functions == o->hash_functions;
|
||||||
}
|
}
|
||||||
|
|
||||||
DoubleHasher::DoubleHasher(size_t k, const std::string& name)
|
DoubleHasher::DoubleHasher(size_t k, const std::string& name)
|
||||||
: Hasher(k, name),
|
: Hasher(k, name), h1(1, name), h2(2, name)
|
||||||
h1_(1, name),
|
|
||||||
h2_(2, name)
|
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
Hasher::digest_vector DoubleHasher::Hash(const void* x, size_t n) const
|
Hasher::digest_vector DoubleHasher::Hash(const void* x, size_t n) const
|
||||||
{
|
{
|
||||||
digest h1 = h1_(x, n);
|
digest d1 = h1(x, n);
|
||||||
digest h2 = h2_(x, n);
|
digest d2 = h2(x, n);
|
||||||
digest_vector h(K(), 0);
|
digest_vector h(K(), 0);
|
||||||
|
|
||||||
for ( size_t i = 0; i < h.size(); ++i )
|
for ( size_t i = 0; i < h.size(); ++i )
|
||||||
h[i] = h1 + i * h2;
|
h[i] = d1 + i * d2;
|
||||||
|
|
||||||
return h;
|
return h;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -103,7 +108,7 @@ bool DoubleHasher::Equals(const Hasher* other) const
|
||||||
{
|
{
|
||||||
if ( typeid(*this) != typeid(*other) )
|
if ( typeid(*this) != typeid(*other) )
|
||||||
return false;
|
return false;
|
||||||
const DoubleHasher* o = static_cast<const DoubleHasher*>(other);
|
|
||||||
return h1_ == o->h1_ && h2_ == o->h2_;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
const DoubleHasher* o = static_cast<const DoubleHasher*>(other);
|
||||||
|
return h1 == o->h1 && h2 == o->h2;
|
||||||
|
}
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
#ifndef Hasher_h
|
// See the file "COPYING" in the main distribution directory for copyright.
|
||||||
#define Hasher_h
|
|
||||||
|
#ifndef PROBABILISTIC_HASHER_H
|
||||||
|
#define PROBABILISTIC_HASHER_H
|
||||||
|
|
||||||
#include "Hash.h"
|
#include "Hash.h"
|
||||||
#include "H3.h"
|
#include "H3.h"
|
||||||
|
@ -7,123 +9,197 @@
|
||||||
namespace probabilistic {
|
namespace probabilistic {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The abstract base class for hashers, i.e., constructs which hash elements
|
* Abstract base class for hashers. A hasher creates a family of hash
|
||||||
* *k* times.
|
* functions to hash an element *k* times.
|
||||||
*/
|
*/
|
||||||
class Hasher {
|
class Hasher {
|
||||||
public:
|
public:
|
||||||
typedef hash_t digest;
|
typedef hash_t digest;
|
||||||
typedef std::vector<digest> digest_vector;
|
typedef std::vector<digest> digest_vector;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructs the hashing policy used by the implementation.
|
* Destructor.
|
||||||
*
|
*/
|
||||||
* @todo This factory function exists because the HashingPolicy class
|
virtual ~Hasher() { }
|
||||||
* hierachy is not yet serializable.
|
|
||||||
*/
|
/**
|
||||||
|
* Computes hash values for an element.
|
||||||
|
*
|
||||||
|
* @param x The element to hash.
|
||||||
|
*
|
||||||
|
* @return Vector of *k* hash values.
|
||||||
|
*/
|
||||||
|
template <typename T>
|
||||||
|
digest_vector operator()(const T& x) const
|
||||||
|
{
|
||||||
|
return Hash(&x, sizeof(T));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Computes the hashes for a set of bytes.
|
||||||
|
*
|
||||||
|
* @param x Pointer to first byte to hash.
|
||||||
|
*
|
||||||
|
* @param n Number of bytes to hash.
|
||||||
|
*
|
||||||
|
* @return Vector of *k* hash values.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
virtual digest_vector Hash(const void* x, size_t n) const = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a deep copy of the hasher.
|
||||||
|
*/
|
||||||
|
virtual Hasher* Clone() const = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true if two hashers are identical.
|
||||||
|
*/
|
||||||
|
virtual bool Equals(const Hasher* other) const = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the number *k* of hash functions the hashers applies.
|
||||||
|
*/
|
||||||
|
size_t K() const { return k; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the hasher's name. TODO: What's this?
|
||||||
|
*/
|
||||||
|
const std::string& Name() const { return name; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs the hasher used by the implementation. This hardcodes a
|
||||||
|
* specific hashing policy. It exists only because the HashingPolicy
|
||||||
|
* class hierachy is not yet serializable.
|
||||||
|
*
|
||||||
|
* @param k The number of hash functions to apply.
|
||||||
|
*
|
||||||
|
* @param name The hasher's name.
|
||||||
|
*
|
||||||
|
* @return Returns a new hasher instance.
|
||||||
|
*/
|
||||||
static Hasher* Create(size_t k, const std::string& name);
|
static Hasher* Create(size_t k, const std::string& name);
|
||||||
|
|
||||||
virtual ~Hasher() { }
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
digest_vector operator()(const T& x) const
|
|
||||||
{
|
|
||||||
return Hash(&x, sizeof(T));
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual digest_vector Hash(const void* x, size_t n) const = 0;
|
|
||||||
|
|
||||||
virtual Hasher* Clone() const = 0;
|
|
||||||
|
|
||||||
virtual bool Equals(const Hasher* other) const = 0;
|
|
||||||
|
|
||||||
size_t K() const { return k_; }
|
|
||||||
const std::string& Name() const { return name_; }
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
/**
|
Hasher(size_t k, const std::string& name);
|
||||||
* A universal hash function family.
|
|
||||||
*/
|
|
||||||
class UHF {
|
|
||||||
public:
|
|
||||||
/**
|
|
||||||
* Constructs an H3 hash function seeded with a given seed and an optional
|
|
||||||
* extra seed to replace the initial Bro seed.
|
|
||||||
*
|
|
||||||
* @param seed The seed to use for this instance.
|
|
||||||
*
|
|
||||||
* @param extra If not empty, this parameter replaces the initial seed to
|
|
||||||
* compute the seed for t to compute the
|
|
||||||
* seed
|
|
||||||
* NUL-terminated string as additional seed.
|
|
||||||
*/
|
|
||||||
UHF(size_t seed, const std::string& extra = "");
|
|
||||||
|
|
||||||
template <typename T>
|
private:
|
||||||
digest operator()(const T& x) const
|
const size_t k;
|
||||||
{
|
std::string name;
|
||||||
return hash(&x, sizeof(T));
|
|
||||||
}
|
|
||||||
|
|
||||||
digest operator()(const void* x, size_t n) const
|
|
||||||
{
|
|
||||||
return hash(x, n);
|
|
||||||
}
|
|
||||||
|
|
||||||
friend bool operator==(const UHF& x, const UHF& y)
|
|
||||||
{
|
|
||||||
return x.h_ == y.h_;
|
|
||||||
}
|
|
||||||
|
|
||||||
friend bool operator!=(const UHF& x, const UHF& y)
|
|
||||||
{
|
|
||||||
return ! (x == y);
|
|
||||||
}
|
|
||||||
|
|
||||||
digest hash(const void* x, size_t n) const;
|
|
||||||
|
|
||||||
private:
|
|
||||||
static size_t compute_seed(size_t seed, const std::string& extra);
|
|
||||||
|
|
||||||
H3<digest, UHASH_KEY_SIZE> h_;
|
|
||||||
};
|
|
||||||
|
|
||||||
Hasher(size_t k, const std::string& name);
|
|
||||||
|
|
||||||
private:
|
|
||||||
const size_t k_;
|
|
||||||
std::string name_;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The default hashing policy. Performs *k* hash function computations.
|
* A universal hash function family. This is a helper class that Hasher
|
||||||
|
* implementations can use in their implementation.
|
||||||
|
*/
|
||||||
|
class UHF {
|
||||||
|
public:
|
||||||
|
/**
|
||||||
|
* Constructs an H3 hash function seeded with a given seed and an
|
||||||
|
* optional extra seed to replace the initial Bro seed.
|
||||||
|
*
|
||||||
|
* @param seed The seed to use for this instance.
|
||||||
|
*
|
||||||
|
* @param extra If not empty, this parameter replaces the initial
|
||||||
|
* seed to compute the seed for t to compute the seed NUL-terminated
|
||||||
|
* string as additional seed.
|
||||||
|
*/
|
||||||
|
UHF(size_t seed, const std::string& extra = "");
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
Hasher::digest operator()(const T& x) const
|
||||||
|
{
|
||||||
|
return hash(&x, sizeof(T));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Computes hash values for an element.
|
||||||
|
*
|
||||||
|
* @param x The element to hash.
|
||||||
|
*
|
||||||
|
* @return Vector of *k* hash values.
|
||||||
|
*/
|
||||||
|
Hasher::digest operator()(const void* x, size_t n) const
|
||||||
|
{
|
||||||
|
return hash(x, n);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Computes the hashes for a set of bytes.
|
||||||
|
*
|
||||||
|
* @param x Pointer to first byte to hash.
|
||||||
|
*
|
||||||
|
* @param n Number of bytes to hash.
|
||||||
|
*
|
||||||
|
* @return Vector of *k* hash values.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
Hasher::digest hash(const void* x, size_t n) const;
|
||||||
|
|
||||||
|
friend bool operator==(const UHF& x, const UHF& y)
|
||||||
|
{
|
||||||
|
return x.h == y.h;
|
||||||
|
}
|
||||||
|
|
||||||
|
friend bool operator!=(const UHF& x, const UHF& y)
|
||||||
|
{
|
||||||
|
return ! (x == y);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
static size_t compute_seed(size_t seed, const std::string& extra);
|
||||||
|
|
||||||
|
H3<Hasher::digest, UHASH_KEY_SIZE> h;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A hasher implementing the default hashing policy. Uses *k* separate hash
|
||||||
|
* functions internally.
|
||||||
*/
|
*/
|
||||||
class DefaultHasher : public Hasher {
|
class DefaultHasher : public Hasher {
|
||||||
public:
|
public:
|
||||||
DefaultHasher(size_t k, const std::string& name);
|
/**
|
||||||
|
* Constructor for a hasher with *k* hash functions.
|
||||||
|
*
|
||||||
|
* @param k The number of hash functions to use.
|
||||||
|
*
|
||||||
|
* @param name The name of the hasher.
|
||||||
|
*/
|
||||||
|
DefaultHasher(size_t k, const std::string& name);
|
||||||
|
|
||||||
virtual digest_vector Hash(const void* x, size_t n) const /* final */;
|
// Overridden from Hasher.
|
||||||
virtual DefaultHasher* Clone() const /* final */;
|
virtual digest_vector Hash(const void* x, size_t n) const /* final */;
|
||||||
virtual bool Equals(const Hasher* other) const /* final */;
|
virtual DefaultHasher* Clone() const /* final */;
|
||||||
|
virtual bool Equals(const Hasher* other) const /* final */;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::vector<UHF> hash_functions_;
|
std::vector<UHF> hash_functions;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The *double-hashing* policy. Uses a linear combination of two hash functions.
|
* The *double-hashing* policy. Uses a linear combination of two hash
|
||||||
|
* functions.
|
||||||
*/
|
*/
|
||||||
class DoubleHasher : public Hasher {
|
class DoubleHasher : public Hasher {
|
||||||
public:
|
public:
|
||||||
DoubleHasher(size_t k, const std::string& name);
|
/**
|
||||||
|
* Constructor for a double hasher with *k* hash functions.
|
||||||
|
*
|
||||||
|
* @param k The number of hash functions to use.
|
||||||
|
*
|
||||||
|
* @param name The name of the hasher.
|
||||||
|
*/
|
||||||
|
DoubleHasher(size_t k, const std::string& name);
|
||||||
|
|
||||||
virtual digest_vector Hash(const void* x, size_t n) const /* final */;
|
// Overridden from Hasher.
|
||||||
virtual DoubleHasher* Clone() const /* final */;
|
virtual digest_vector Hash(const void* x, size_t n) const /* final */;
|
||||||
virtual bool Equals(const Hasher* other) const /* final */;
|
virtual DoubleHasher* Clone() const /* final */;
|
||||||
|
virtual bool Equals(const Hasher* other) const /* final */;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
UHF h1_;
|
UHF h1;
|
||||||
UHF h2_;
|
UHF h2;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,18 +31,19 @@ module GLOBAL;
|
||||||
## Returns: A Bloom filter handle.
|
## Returns: A Bloom filter handle.
|
||||||
function bloomfilter_basic_init%(fp: double, capacity: count,
|
function bloomfilter_basic_init%(fp: double, capacity: count,
|
||||||
name: string &default=""%): opaque of bloomfilter
|
name: string &default=""%): opaque of bloomfilter
|
||||||
%{
|
%{
|
||||||
if ( fp < 0.0 || fp > 1.0 )
|
if ( fp < 0.0 || fp > 1.0 )
|
||||||
{
|
{
|
||||||
reporter->Error("false-positive rate must take value between 0 and 1");
|
reporter->Error("false-positive rate must take value between 0 and 1");
|
||||||
return NULL;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t cells = BasicBloomFilter::M(fp, capacity);
|
size_t cells = BasicBloomFilter::M(fp, capacity);
|
||||||
size_t optimal_k = BasicBloomFilter::K(cells, capacity);
|
size_t optimal_k = BasicBloomFilter::K(cells, capacity);
|
||||||
const Hasher* h = Hasher::Create(optimal_k, name->CheckString());
|
const Hasher* h = Hasher::Create(optimal_k, name->CheckString());
|
||||||
return new BloomFilterVal(new BasicBloomFilter(h, cells));
|
|
||||||
%}
|
return new BloomFilterVal(new BasicBloomFilter(h, cells));
|
||||||
|
%}
|
||||||
|
|
||||||
## Creates a counting Bloom filter.
|
## Creates a counting Bloom filter.
|
||||||
##
|
##
|
||||||
|
@ -59,20 +60,22 @@ function bloomfilter_basic_init%(fp: double, capacity: count,
|
||||||
##
|
##
|
||||||
## Returns: A Bloom filter handle.
|
## Returns: A Bloom filter handle.
|
||||||
function bloomfilter_counting_init%(k: count, cells: count, max: count,
|
function bloomfilter_counting_init%(k: count, cells: count, max: count,
|
||||||
name: string &default=""%): opaque of bloomfilter
|
name: string &default=""%): opaque of bloomfilter
|
||||||
%{
|
%{
|
||||||
if ( max == 0 )
|
if ( max == 0 )
|
||||||
{
|
{
|
||||||
reporter->Error("max counter value must be greater than 0");
|
reporter->Error("max counter value must be greater than 0");
|
||||||
return NULL;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
const Hasher* h = Hasher::Create(k, name->CheckString());
|
const Hasher* h = Hasher::Create(k, name->CheckString());
|
||||||
uint16 width = 1;
|
|
||||||
while ( max >>= 1 )
|
uint16 width = 1;
|
||||||
++width;
|
while ( max >>= 1 )
|
||||||
return new BloomFilterVal(new CountingBloomFilter(h, cells, width));
|
++width;
|
||||||
%}
|
|
||||||
|
return new BloomFilterVal(new CountingBloomFilter(h, cells, width));
|
||||||
|
%}
|
||||||
|
|
||||||
## Adds an element to a Bloom filter.
|
## Adds an element to a Bloom filter.
|
||||||
##
|
##
|
||||||
|
@ -80,16 +83,20 @@ function bloomfilter_counting_init%(k: count, cells: count, max: count,
|
||||||
##
|
##
|
||||||
## x: The element to add.
|
## x: The element to add.
|
||||||
function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any
|
function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any
|
||||||
%{
|
%{
|
||||||
BloomFilterVal* bfv = static_cast<BloomFilterVal*>(bf);
|
BloomFilterVal* bfv = static_cast<BloomFilterVal*>(bf);
|
||||||
if ( ! bfv->Type() && ! bfv->Typify(x->Type()) )
|
|
||||||
reporter->Error("failed to set Bloom filter type");
|
if ( ! bfv->Type() && ! bfv->Typify(x->Type()) )
|
||||||
else if ( bfv->Type() != x->Type() )
|
reporter->Error("failed to set Bloom filter type");
|
||||||
reporter->Error("incompatible Bloom filter types");
|
|
||||||
else
|
else if ( ! same_type(bfv->Type(), x->Type()) )
|
||||||
bfv->Add(x);
|
reporter->Error("incompatible Bloom filter types");
|
||||||
return NULL;
|
|
||||||
%}
|
else
|
||||||
|
bfv->Add(x);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
%}
|
||||||
|
|
||||||
## Retrieves the counter for a given element in a Bloom filter.
|
## Retrieves the counter for a given element in a Bloom filter.
|
||||||
##
|
##
|
||||||
|
@ -99,16 +106,20 @@ function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any
|
||||||
##
|
##
|
||||||
## Returns: the counter associated with *x* in *bf*.
|
## Returns: the counter associated with *x* in *bf*.
|
||||||
function bloomfilter_lookup%(bf: opaque of bloomfilter, x: any%): count
|
function bloomfilter_lookup%(bf: opaque of bloomfilter, x: any%): count
|
||||||
%{
|
%{
|
||||||
const BloomFilterVal* bfv = static_cast<const BloomFilterVal*>(bf);
|
const BloomFilterVal* bfv = static_cast<const BloomFilterVal*>(bf);
|
||||||
if ( ! bfv->Type() )
|
|
||||||
reporter->Error("cannot perform lookup on untyped Bloom filter");
|
if ( ! bfv->Type() )
|
||||||
else if ( bfv->Type() != x->Type() )
|
reporter->Error("cannot perform lookup on untyped Bloom filter");
|
||||||
reporter->Error("incompatible Bloom filter types");
|
|
||||||
else
|
else if ( ! same_type(bfv->Type(), x->Type()) )
|
||||||
return new Val(static_cast<uint64>(bfv->Count(x)), TYPE_COUNT);
|
reporter->Error("incompatible Bloom filter types");
|
||||||
return new Val(0, TYPE_COUNT);
|
|
||||||
%}
|
else
|
||||||
|
return new Val(static_cast<uint64>(bfv->Count(x)), TYPE_COUNT);
|
||||||
|
|
||||||
|
return new Val(0, TYPE_COUNT);
|
||||||
|
%}
|
||||||
|
|
||||||
## Merges two Bloom filters.
|
## Merges two Bloom filters.
|
||||||
##
|
##
|
||||||
|
@ -118,13 +129,16 @@ function bloomfilter_lookup%(bf: opaque of bloomfilter, x: any%): count
|
||||||
##
|
##
|
||||||
## Returns: The union of *bf1* and *bf2*.
|
## Returns: The union of *bf1* and *bf2*.
|
||||||
function bloomfilter_merge%(bf1: opaque of bloomfilter,
|
function bloomfilter_merge%(bf1: opaque of bloomfilter,
|
||||||
bf2: opaque of bloomfilter%): opaque of bloomfilter
|
bf2: opaque of bloomfilter%): opaque of bloomfilter
|
||||||
%{
|
%{
|
||||||
const BloomFilterVal* bfv1 = static_cast<const BloomFilterVal*>(bf1);
|
const BloomFilterVal* bfv1 = static_cast<const BloomFilterVal*>(bf1);
|
||||||
const BloomFilterVal* bfv2 = static_cast<const BloomFilterVal*>(bf2);
|
const BloomFilterVal* bfv2 = static_cast<const BloomFilterVal*>(bf2);
|
||||||
if ( bfv1->Type() != bfv2->Type() )
|
|
||||||
reporter->Error("incompatible Bloom filter types");
|
if ( ! same_type(bfv1->Type(), bfv2->Type()) )
|
||||||
else
|
{
|
||||||
return BloomFilterVal::Merge(bfv1, bfv2);
|
reporter->Error("incompatible Bloom filter types");
|
||||||
return NULL;
|
return 0;
|
||||||
%}
|
}
|
||||||
|
|
||||||
|
return BloomFilterVal::Merge(bfv1, bfv2);
|
||||||
|
%}
|
||||||
|
|
20
src/util.cc
20
src/util.cc
|
@ -803,10 +803,10 @@ void init_random_seed(uint32 seed, const char* read_file, const char* write_file
|
||||||
bro_srandom(seed, seeds_done);
|
bro_srandom(seed, seeds_done);
|
||||||
|
|
||||||
if ( ! first_seed_saved )
|
if ( ! first_seed_saved )
|
||||||
{
|
{
|
||||||
first_seed = seed;
|
first_seed = seed;
|
||||||
first_seed_saved = true;
|
first_seed_saved = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( ! hmac_key_set )
|
if ( ! hmac_key_set )
|
||||||
{
|
{
|
||||||
|
@ -820,9 +820,9 @@ void init_random_seed(uint32 seed, const char* read_file, const char* write_file
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned int initial_seed()
|
unsigned int initial_seed()
|
||||||
{
|
{
|
||||||
return first_seed;
|
return first_seed;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool have_random_seed()
|
bool have_random_seed()
|
||||||
{
|
{
|
||||||
|
@ -830,7 +830,7 @@ bool have_random_seed()
|
||||||
}
|
}
|
||||||
|
|
||||||
long int bro_prng(long int state)
|
long int bro_prng(long int state)
|
||||||
{
|
{
|
||||||
// Use our own simple linear congruence PRNG to make sure we are
|
// Use our own simple linear congruence PRNG to make sure we are
|
||||||
// predictable across platforms.
|
// predictable across platforms.
|
||||||
static const long int m = 2147483647;
|
static const long int m = 2147483647;
|
||||||
|
@ -844,14 +844,14 @@ long int bro_prng(long int state)
|
||||||
state += m;
|
state += m;
|
||||||
|
|
||||||
return state;
|
return state;
|
||||||
}
|
}
|
||||||
|
|
||||||
long int bro_random()
|
long int bro_random()
|
||||||
{
|
{
|
||||||
if ( ! bro_rand_determistic )
|
if ( ! bro_rand_determistic )
|
||||||
return random(); // Use system PRNG.
|
return random(); // Use system PRNG.
|
||||||
|
|
||||||
bro_rand_state = bro_prng(bro_rand_state);
|
bro_rand_state = bro_prng(bro_rand_state);
|
||||||
|
|
||||||
return bro_rand_state;
|
return bro_rand_state;
|
||||||
}
|
}
|
||||||
|
|
|
@ -166,15 +166,15 @@ extern void init_random_seed(uint32 seed, const char* load_file,
|
||||||
const char* write_file);
|
const char* write_file);
|
||||||
|
|
||||||
// Retrieves the initial seed computed after the very first call to
|
// Retrieves the initial seed computed after the very first call to
|
||||||
// init_random_seed(). Repeated calls to init_random_seed() will not affect the
|
// init_random_seed(). Repeated calls to init_random_seed() will not affect
|
||||||
// return value of this function.
|
// the return value of this function.
|
||||||
unsigned int initial_seed();
|
unsigned int initial_seed();
|
||||||
|
|
||||||
// Returns true if the user explicitly set a seed via init_random_seed();
|
// Returns true if the user explicitly set a seed via init_random_seed();
|
||||||
extern bool have_random_seed();
|
extern bool have_random_seed();
|
||||||
|
|
||||||
// A simple linear congruence PRNG. It takes its state as argument and returns
|
// A simple linear congruence PRNG. It takes its state as argument and
|
||||||
// a new random value, which can serve as state for subsequent calls.
|
// returns a new random value, which can serve as state for subsequent calls.
|
||||||
long int bro_prng(long int state);
|
long int bro_prng(long int state);
|
||||||
|
|
||||||
// Replacement for the system random(), to which is normally falls back
|
// Replacement for the system random(), to which is normally falls back
|
||||||
|
|
|
@ -1,3 +1,9 @@
|
||||||
|
error: incompatible Bloom filter types
|
||||||
|
error: incompatible Bloom filter types
|
||||||
|
error: incompatible Bloom filter types
|
||||||
|
error: incompatible Bloom filter types
|
||||||
|
error: false-positive rate must take value between 0 and 1
|
||||||
|
error: false-positive rate must take value between 0 and 1
|
||||||
0
|
0
|
||||||
1
|
1
|
||||||
1
|
1
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# @TEST-EXEC: bro -b %INPUT >output
|
# @TEST-EXEC: bro -b %INPUT >output 2>&1
|
||||||
# @TEST-EXEC: btest-diff output
|
# @TEST-EXEC: btest-diff output
|
||||||
|
|
||||||
function test_basic_bloom_filter()
|
function test_basic_bloom_filter()
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue