Add more serialization implementation.

This commit is contained in:
Matthias Vallentin 2013-06-04 15:30:27 -07:00
parent a5572dd66f
commit 751cf61293
6 changed files with 129 additions and 42 deletions

View file

@ -46,12 +46,23 @@ CounterVector::size_type CounterVector::Size() const
return bits_->Blocks() / width_; return bits_->Blocks() / width_;
} }
bool CounterVector::Serialize(SerialInfo* info) const
{
return SerialObj::Serialize(info);
}
CounterVector* CounterVector::Unserialize(UnserialInfo* info)
{
return reinterpret_cast<CounterVector*>(
SerialObj::Unserialize(info, SER_COUNTERVECTOR));
}
IMPLEMENT_SERIAL(CounterVector, SER_COUNTERVECTOR) IMPLEMENT_SERIAL(CounterVector, SER_COUNTERVECTOR)
bool CounterVector::DoSerialize(SerialInfo* info) const bool CounterVector::DoSerialize(SerialInfo* info) const
{ {
DO_SERIALIZE(SER_COUNTERVECTOR, SerialObj); DO_SERIALIZE(SER_COUNTERVECTOR, SerialObj);
if ( ! SERIALIZE(&bits_) ) if ( ! SERIALIZE(bits_) )
return false; return false;
return SERIALIZE(static_cast<uint64>(width_)); return SERIALIZE(static_cast<uint64>(width_));
} }
@ -60,9 +71,9 @@ bool CounterVector::DoUnserialize(UnserialInfo* info)
{ {
DO_UNSERIALIZE(SerialObj); DO_UNSERIALIZE(SerialObj);
return false; return false;
// TODO: Ask Robin how to unserialize non-pointer members. bits_ = BitVector::Unserialize(info);
//if ( ! UNSERIALIZE(&bits_) ) if ( ! bits_ )
// return false; return false;
uint64 width; uint64 width;
if ( ! UNSERIALIZE(&width) ) if ( ! UNSERIALIZE(&width) )
return false; return false;
@ -90,6 +101,18 @@ HashPolicy::HashVector DoubleHashing::Hash(const void* x, size_t n) const
return h; return h;
} }
BloomFilter::BloomFilter(size_t k)
: hash_(new hash_policy(k))
{
}
BloomFilter::~BloomFilter()
{
if ( hash_ )
delete hash_;
}
bool BloomFilter::Serialize(SerialInfo* info) const bool BloomFilter::Serialize(SerialInfo* info) const
{ {
return SerialObj::Serialize(info); return SerialObj::Serialize(info);
@ -101,24 +124,21 @@ BloomFilter* BloomFilter::Unserialize(UnserialInfo* info)
SerialObj::Unserialize(info, SER_BLOOMFILTER)); SerialObj::Unserialize(info, SER_BLOOMFILTER));
} }
// FIXME: should abstract base classes also have IMPLEMENT_SERIAL?
//IMPLEMENT_SERIAL(BloomFilter, SER_BLOOMFILTER)
bool BloomFilter::DoSerialize(SerialInfo* info) const bool BloomFilter::DoSerialize(SerialInfo* info) const
{ {
DO_SERIALIZE(SER_BLOOMFILTER, SerialObj); DO_SERIALIZE(SER_BLOOMFILTER, SerialObj);
// TODO: Make the hash policy serializable. if ( ! SERIALIZE(static_cast<uint16>(hash_->K())) )
//if ( ! SERIALIZE(hash_) ) return false;
// return false; return SERIALIZE(static_cast<uint16>(elements_));
return SERIALIZE(static_cast<uint64>(elements_));
} }
bool BloomFilter::DoUnserialize(UnserialInfo* info) bool BloomFilter::DoUnserialize(UnserialInfo* info)
{ {
DO_UNSERIALIZE(SerialObj); DO_UNSERIALIZE(SerialObj);
// TODO: Make the hash policy serializable. uint16 k;
//if ( ! hash_ = HashPolicy::Unserialize(info) ) if ( ! UNSERIALIZE(&k) )
// return false; return false;
hash_ = new hash_policy(static_cast<size_t>(k));
uint64 elements; uint64 elements;
if ( UNSERIALIZE(&elements) ) if ( UNSERIALIZE(&elements) )
return false; return false;
@ -126,7 +146,7 @@ bool BloomFilter::DoUnserialize(UnserialInfo* info)
return true; return true;
} }
size_t BasicBloomFilter::Cells(double fp, size_t capacity) size_t BasicBloomFilter::M(double fp, size_t capacity)
{ {
double ln2 = std::log(2); double ln2 = std::log(2);
return std::ceil(-(capacity * std::log(fp) / ln2 / ln2)); return std::ceil(-(capacity * std::log(fp) / ln2 / ln2));
@ -138,9 +158,16 @@ size_t BasicBloomFilter::K(size_t cells, size_t capacity)
return round<size_t>(frac * std::log(2)); return round<size_t>(frac * std::log(2));
} }
BasicBloomFilter::BasicBloomFilter(size_t cells, HashPolicy* hash) BasicBloomFilter::BasicBloomFilter(double fp, size_t capacity)
: BloomFilter(hash), bits_(cells) : BloomFilter(K(M(fp, capacity), capacity))
{ {
bits_ = new BitVector(M(fp, capacity));
}
BasicBloomFilter::BasicBloomFilter(size_t cells, size_t capacity)
: BloomFilter(K(cells, capacity))
{
bits_ = new BitVector(cells);
} }
IMPLEMENT_SERIAL(BasicBloomFilter, SER_BASICBLOOMFILTER) IMPLEMENT_SERIAL(BasicBloomFilter, SER_BASICBLOOMFILTER)
@ -148,38 +175,50 @@ IMPLEMENT_SERIAL(BasicBloomFilter, SER_BASICBLOOMFILTER)
bool BasicBloomFilter::DoSerialize(SerialInfo* info) const bool BasicBloomFilter::DoSerialize(SerialInfo* info) const
{ {
DO_SERIALIZE(SER_BASICBLOOMFILTER, BloomFilter); DO_SERIALIZE(SER_BASICBLOOMFILTER, BloomFilter);
// TODO: Make the hash policy serializable. return SERIALIZE(bits_);
//if ( ! SERIALIZE(&bits_) )
// return false;
return true;
} }
bool BasicBloomFilter::DoUnserialize(UnserialInfo* info) bool BasicBloomFilter::DoUnserialize(UnserialInfo* info)
{ {
DO_UNSERIALIZE(BloomFilter); DO_UNSERIALIZE(BloomFilter);
// TODO: Non-pointer member deserialization? bits_ = BitVector::Unserialize(info);
return true; return bits_ == NULL;
} }
void BasicBloomFilter::AddImpl(const HashPolicy::HashVector& h) void BasicBloomFilter::AddImpl(const HashPolicy::HashVector& h)
{ {
for ( size_t i = 0; i < h.size(); ++i ) for ( size_t i = 0; i < h.size(); ++i )
bits_.set(h[i] % h.size()); bits_->Set(h[i] % h.size());
} }
size_t BasicBloomFilter::CountImpl(const HashPolicy::HashVector& h) const size_t BasicBloomFilter::CountImpl(const HashPolicy::HashVector& h) const
{ {
for ( size_t i = 0; i < h.size(); ++i ) for ( size_t i = 0; i < h.size(); ++i )
if ( ! bits_[h[i] % h.size()] ) if ( ! (*bits_)[h[i] % h.size()] )
return 0; return 0;
return 1; return 1;
} }
IMPLEMENT_SERIAL(CountingBloomFilter, SER_COUNTINGBLOOMFILTER)
bool CountingBloomFilter::DoSerialize(SerialInfo* info) const
{
DO_SERIALIZE(SER_BASICBLOOMFILTER, BloomFilter);
return SERIALIZE(cells_);
}
bool CountingBloomFilter::DoUnserialize(UnserialInfo* info)
{
DO_UNSERIALIZE(BloomFilter);
cells_ = CounterVector::Unserialize(info);
return cells_ == NULL;
}
void CountingBloomFilter::AddImpl(const HashPolicy::HashVector& h) void CountingBloomFilter::AddImpl(const HashPolicy::HashVector& h)
{ {
for ( size_t i = 0; i < h.size(); ++i ) for ( size_t i = 0; i < h.size(); ++i )
cells_.Increment(h[i] % h.size(), 1); cells_->Increment(h[i] % h.size(), 1);
} }
size_t CountingBloomFilter::CountImpl(const HashPolicy::HashVector& h) const size_t CountingBloomFilter::CountImpl(const HashPolicy::HashVector& h) const
@ -188,7 +227,7 @@ size_t CountingBloomFilter::CountImpl(const HashPolicy::HashVector& h) const
std::numeric_limits<CounterVector::size_type>::max(); std::numeric_limits<CounterVector::size_type>::max();
for ( size_t i = 0; i < h.size(); ++i ) for ( size_t i = 0; i < h.size(); ++i )
{ {
CounterVector::size_type cnt = cells_.Count(h[i] % h.size()); CounterVector::size_type cnt = cells_->Count(h[i] % h.size());
if ( cnt < min ) if ( cnt < min )
min = cnt; min = cnt;
} }

View file

@ -151,9 +151,13 @@ private:
/** /**
* The abstract base class for Bloom filters. * The abstract base class for Bloom filters.
*/ */
class BloomFilter : SerialObj { class BloomFilter : public SerialObj {
public: public:
virtual ~BloomFilter() { delete hash_; } // At this point we won't let the user choose the hash policy, but we might
// open up the interface in the future.
typedef DoubleHashing hash_policy;
virtual ~BloomFilter();
/** /**
* Adds an element of type T to the Bloom filter. * Adds an element of type T to the Bloom filter.
@ -193,10 +197,10 @@ public:
static BloomFilter* Unserialize(UnserialInfo* info); static BloomFilter* Unserialize(UnserialInfo* info);
protected: protected:
DECLARE_SERIAL(BloomFilter); DECLARE_ABSTRACT_SERIAL(BloomFilter);
BloomFilter() { }; BloomFilter() { };
BloomFilter(HashPolicy* hash) : hash_(hash) { } BloomFilter(size_t k);
virtual void AddImpl(const HashPolicy::HashVector& hashes) = 0; virtual void AddImpl(const HashPolicy::HashVector& hashes) = 0;
virtual size_t CountImpl(const HashPolicy::HashVector& hashes) const = 0; virtual size_t CountImpl(const HashPolicy::HashVector& hashes) const = 0;
@ -211,10 +215,42 @@ private:
*/ */
class BasicBloomFilter : public BloomFilter { class BasicBloomFilter : public BloomFilter {
public: public:
static size_t Cells(double fp, size_t capacity); /**
* Computes the number of cells based a given false-positive rate and
* capacity. In the literature, this parameter often has the name *M*.
*
* @param fp The false-positive rate.
*
* @param capacity The number of exepected elements.
*
* Returns: The number cells needed to support a false-positive rate of *fp*
* with at most *capacity* elements.
*/
static size_t M(double fp, size_t capacity);
/**
* Computes the optimal number of hash functions based on the number cells
* and expected number of elements.
*
* @param cells The number of cells (*m*).
*
* @param capacity The maximum number of elements.
*
* Returns: the optimal number of hash functions for a false-positive rate of
* *fp* for at most *capacity* elements.
*/
static size_t K(size_t cells, size_t capacity); static size_t K(size_t cells, size_t capacity);
BasicBloomFilter(size_t cells, HashPolicy* hash); /**
* Constructs a basic Bloom filter with a given false-positive rate and
* capacity.
*/
BasicBloomFilter(double fp, size_t capacity);
/**
* Constructs a basic Bloom filter with a given number of cells and capacity.
*/
BasicBloomFilter(size_t cells, size_t capacity);
protected: protected:
DECLARE_SERIAL(BasicBloomFilter); DECLARE_SERIAL(BasicBloomFilter);
@ -225,7 +261,7 @@ protected:
virtual size_t CountImpl(const HashPolicy::HashVector& h) const; virtual size_t CountImpl(const HashPolicy::HashVector& h) const;
private: private:
BitVector bits_; BitVector* bits_;
}; };
/** /**
@ -233,18 +269,18 @@ private:
*/ */
class CountingBloomFilter : public BloomFilter { class CountingBloomFilter : public BloomFilter {
public: public:
CountingBloomFilter(unsigned width, HashPolicy* hash); CountingBloomFilter(unsigned width);
protected: protected:
DECLARE_SERIAL(CountingBloomFilter); DECLARE_SERIAL(CountingBloomFilter);
CountingBloomFilter(); CountingBloomFilter() { }
virtual void AddImpl(const HashPolicy::HashVector& h); virtual void AddImpl(const HashPolicy::HashVector& h);
virtual size_t CountImpl(const HashPolicy::HashVector& h) const; virtual size_t CountImpl(const HashPolicy::HashVector& h) const;
private: private:
CounterVector cells_; CounterVector* cells_;
}; };
#endif #endif

View file

@ -249,6 +249,7 @@ extern OpaqueType* md5_type;
extern OpaqueType* sha1_type; extern OpaqueType* sha1_type;
extern OpaqueType* sha256_type; extern OpaqueType* sha256_type;
extern OpaqueType* entropy_type; extern OpaqueType* entropy_type;
extern OpaqueType* bloomfilter_type;
// Initializes globals that don't pertain to network/event analysis. // Initializes globals that don't pertain to network/event analysis.
extern void init_general_global_var(); extern void init_general_global_var();

View file

@ -518,23 +518,31 @@ bool EntropyVal::DoUnserialize(UnserialInfo* info)
return true; return true;
} }
BloomFilterVal::BloomFilterVal() : OpaqueVal(bloomfilter_type)
{
}
BloomFilterVal::BloomFilterVal(OpaqueType* t) : OpaqueVal(t) BloomFilterVal::BloomFilterVal(OpaqueType* t) : OpaqueVal(t)
{ {
} }
BloomFilterVal::~BloomFilterVal()
{
if ( bloom_filter_ )
delete bloom_filter_;
}
IMPLEMENT_SERIAL(BloomFilterVal, SER_BLOOMFILTER_VAL); IMPLEMENT_SERIAL(BloomFilterVal, SER_BLOOMFILTER_VAL);
bool BloomFilterVal::DoSerialize(SerialInfo* info) const bool BloomFilterVal::DoSerialize(SerialInfo* info) const
{ {
DO_SERIALIZE(SER_BLOOMFILTER_VAL, OpaqueVal); DO_SERIALIZE(SER_BLOOMFILTER_VAL, OpaqueVal);
// TODO: implement. return SERIALIZE(bloom_filter_);
return true;
} }
bool BloomFilterVal::DoUnserialize(UnserialInfo* info) bool BloomFilterVal::DoUnserialize(UnserialInfo* info)
{ {
DO_UNSERIALIZE(OpaqueVal); DO_UNSERIALIZE(OpaqueVal);
// TODO: implement. bloom_filter_ = BloomFilter::Unserialize(info);
return true; return bloom_filter_ == NULL;
} }

View file

@ -112,6 +112,7 @@ private:
class BloomFilterVal : public OpaqueVal { class BloomFilterVal : public OpaqueVal {
public: public:
BloomFilterVal(); BloomFilterVal();
~BloomFilterVal();
protected: protected:
friend class Val; friend class Val;

View file

@ -53,6 +53,7 @@ SERIAL_IS(BITVECTOR, 0x1500)
SERIAL_IS(COUNTERVECTOR, 0xa000) SERIAL_IS(COUNTERVECTOR, 0xa000)
SERIAL_IS(BLOOMFILTER, 0xa100) SERIAL_IS(BLOOMFILTER, 0xa100)
SERIAL_IS(BASICBLOOMFILTER, 0xa200) SERIAL_IS(BASICBLOOMFILTER, 0xa200)
SERIAL_IS(COUNTINGBLOOMFILTER, 0xa300)
// These are the externally visible types. // These are the externally visible types.
const SerialType SER_NONE = 0; const SerialType SER_NONE = 0;
@ -211,5 +212,6 @@ SERIAL_CONST2(BITVECTOR)
SERIAL_CONST2(COUNTERVECTOR) SERIAL_CONST2(COUNTERVECTOR)
SERIAL_CONST2(BLOOMFILTER) SERIAL_CONST2(BLOOMFILTER)
SERIAL_CONST2(BASICBLOOMFILTER) SERIAL_CONST2(BASICBLOOMFILTER)
SERIAL_CONST2(COUNTINGBLOOMFILTER)
#endif #endif