mirror of
https://github.com/zeek/zeek.git
synced 2025-10-06 08:38:20 +00:00
Add more serialization implementation.
This commit is contained in:
parent
a5572dd66f
commit
751cf61293
6 changed files with 129 additions and 42 deletions
|
@ -46,12 +46,23 @@ CounterVector::size_type CounterVector::Size() const
|
|||
return bits_->Blocks() / width_;
|
||||
}
|
||||
|
||||
bool CounterVector::Serialize(SerialInfo* info) const
|
||||
{
|
||||
return SerialObj::Serialize(info);
|
||||
}
|
||||
|
||||
CounterVector* CounterVector::Unserialize(UnserialInfo* info)
|
||||
{
|
||||
return reinterpret_cast<CounterVector*>(
|
||||
SerialObj::Unserialize(info, SER_COUNTERVECTOR));
|
||||
}
|
||||
|
||||
IMPLEMENT_SERIAL(CounterVector, SER_COUNTERVECTOR)
|
||||
|
||||
bool CounterVector::DoSerialize(SerialInfo* info) const
|
||||
{
|
||||
DO_SERIALIZE(SER_COUNTERVECTOR, SerialObj);
|
||||
if ( ! SERIALIZE(&bits_) )
|
||||
if ( ! SERIALIZE(bits_) )
|
||||
return false;
|
||||
return SERIALIZE(static_cast<uint64>(width_));
|
||||
}
|
||||
|
@ -60,9 +71,9 @@ bool CounterVector::DoUnserialize(UnserialInfo* info)
|
|||
{
|
||||
DO_UNSERIALIZE(SerialObj);
|
||||
return false;
|
||||
// TODO: Ask Robin how to unserialize non-pointer members.
|
||||
//if ( ! UNSERIALIZE(&bits_) )
|
||||
// return false;
|
||||
bits_ = BitVector::Unserialize(info);
|
||||
if ( ! bits_ )
|
||||
return false;
|
||||
uint64 width;
|
||||
if ( ! UNSERIALIZE(&width) )
|
||||
return false;
|
||||
|
@ -90,6 +101,18 @@ HashPolicy::HashVector DoubleHashing::Hash(const void* x, size_t n) const
|
|||
return h;
|
||||
}
|
||||
|
||||
|
||||
BloomFilter::BloomFilter(size_t k)
|
||||
: hash_(new hash_policy(k))
|
||||
{
|
||||
}
|
||||
|
||||
BloomFilter::~BloomFilter()
|
||||
{
|
||||
if ( hash_ )
|
||||
delete hash_;
|
||||
}
|
||||
|
||||
bool BloomFilter::Serialize(SerialInfo* info) const
|
||||
{
|
||||
return SerialObj::Serialize(info);
|
||||
|
@ -101,24 +124,21 @@ BloomFilter* BloomFilter::Unserialize(UnserialInfo* info)
|
|||
SerialObj::Unserialize(info, SER_BLOOMFILTER));
|
||||
}
|
||||
|
||||
// FIXME: should abstract base classes also have IMPLEMENT_SERIAL?
|
||||
//IMPLEMENT_SERIAL(BloomFilter, SER_BLOOMFILTER)
|
||||
|
||||
bool BloomFilter::DoSerialize(SerialInfo* info) const
|
||||
{
|
||||
DO_SERIALIZE(SER_BLOOMFILTER, SerialObj);
|
||||
// TODO: Make the hash policy serializable.
|
||||
//if ( ! SERIALIZE(hash_) )
|
||||
// return false;
|
||||
return SERIALIZE(static_cast<uint64>(elements_));
|
||||
if ( ! SERIALIZE(static_cast<uint16>(hash_->K())) )
|
||||
return false;
|
||||
return SERIALIZE(static_cast<uint16>(elements_));
|
||||
}
|
||||
|
||||
bool BloomFilter::DoUnserialize(UnserialInfo* info)
|
||||
{
|
||||
DO_UNSERIALIZE(SerialObj);
|
||||
// TODO: Make the hash policy serializable.
|
||||
//if ( ! hash_ = HashPolicy::Unserialize(info) )
|
||||
// return false;
|
||||
uint16 k;
|
||||
if ( ! UNSERIALIZE(&k) )
|
||||
return false;
|
||||
hash_ = new hash_policy(static_cast<size_t>(k));
|
||||
uint64 elements;
|
||||
if ( UNSERIALIZE(&elements) )
|
||||
return false;
|
||||
|
@ -126,7 +146,7 @@ bool BloomFilter::DoUnserialize(UnserialInfo* info)
|
|||
return true;
|
||||
}
|
||||
|
||||
size_t BasicBloomFilter::Cells(double fp, size_t capacity)
|
||||
size_t BasicBloomFilter::M(double fp, size_t capacity)
|
||||
{
|
||||
double ln2 = std::log(2);
|
||||
return std::ceil(-(capacity * std::log(fp) / ln2 / ln2));
|
||||
|
@ -138,9 +158,16 @@ size_t BasicBloomFilter::K(size_t cells, size_t capacity)
|
|||
return round<size_t>(frac * std::log(2));
|
||||
}
|
||||
|
||||
BasicBloomFilter::BasicBloomFilter(size_t cells, HashPolicy* hash)
|
||||
: BloomFilter(hash), bits_(cells)
|
||||
BasicBloomFilter::BasicBloomFilter(double fp, size_t capacity)
|
||||
: BloomFilter(K(M(fp, capacity), capacity))
|
||||
{
|
||||
bits_ = new BitVector(M(fp, capacity));
|
||||
}
|
||||
|
||||
BasicBloomFilter::BasicBloomFilter(size_t cells, size_t capacity)
|
||||
: BloomFilter(K(cells, capacity))
|
||||
{
|
||||
bits_ = new BitVector(cells);
|
||||
}
|
||||
|
||||
IMPLEMENT_SERIAL(BasicBloomFilter, SER_BASICBLOOMFILTER)
|
||||
|
@ -148,38 +175,50 @@ IMPLEMENT_SERIAL(BasicBloomFilter, SER_BASICBLOOMFILTER)
|
|||
bool BasicBloomFilter::DoSerialize(SerialInfo* info) const
|
||||
{
|
||||
DO_SERIALIZE(SER_BASICBLOOMFILTER, BloomFilter);
|
||||
// TODO: Make the hash policy serializable.
|
||||
//if ( ! SERIALIZE(&bits_) )
|
||||
// return false;
|
||||
return true;
|
||||
return SERIALIZE(bits_);
|
||||
}
|
||||
|
||||
bool BasicBloomFilter::DoUnserialize(UnserialInfo* info)
|
||||
{
|
||||
DO_UNSERIALIZE(BloomFilter);
|
||||
// TODO: Non-pointer member deserialization?
|
||||
return true;
|
||||
bits_ = BitVector::Unserialize(info);
|
||||
return bits_ == NULL;
|
||||
}
|
||||
|
||||
void BasicBloomFilter::AddImpl(const HashPolicy::HashVector& h)
|
||||
{
|
||||
for ( size_t i = 0; i < h.size(); ++i )
|
||||
bits_.set(h[i] % h.size());
|
||||
bits_->Set(h[i] % h.size());
|
||||
}
|
||||
|
||||
size_t BasicBloomFilter::CountImpl(const HashPolicy::HashVector& h) const
|
||||
{
|
||||
for ( size_t i = 0; i < h.size(); ++i )
|
||||
if ( ! bits_[h[i] % h.size()] )
|
||||
if ( ! (*bits_)[h[i] % h.size()] )
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
IMPLEMENT_SERIAL(CountingBloomFilter, SER_COUNTINGBLOOMFILTER)
|
||||
|
||||
bool CountingBloomFilter::DoSerialize(SerialInfo* info) const
|
||||
{
|
||||
DO_SERIALIZE(SER_BASICBLOOMFILTER, BloomFilter);
|
||||
return SERIALIZE(cells_);
|
||||
}
|
||||
|
||||
bool CountingBloomFilter::DoUnserialize(UnserialInfo* info)
|
||||
{
|
||||
DO_UNSERIALIZE(BloomFilter);
|
||||
cells_ = CounterVector::Unserialize(info);
|
||||
return cells_ == NULL;
|
||||
}
|
||||
|
||||
void CountingBloomFilter::AddImpl(const HashPolicy::HashVector& h)
|
||||
{
|
||||
for ( size_t i = 0; i < h.size(); ++i )
|
||||
cells_.Increment(h[i] % h.size(), 1);
|
||||
cells_->Increment(h[i] % h.size(), 1);
|
||||
}
|
||||
|
||||
size_t CountingBloomFilter::CountImpl(const HashPolicy::HashVector& h) const
|
||||
|
@ -188,7 +227,7 @@ size_t CountingBloomFilter::CountImpl(const HashPolicy::HashVector& h) const
|
|||
std::numeric_limits<CounterVector::size_type>::max();
|
||||
for ( size_t i = 0; i < h.size(); ++i )
|
||||
{
|
||||
CounterVector::size_type cnt = cells_.Count(h[i] % h.size());
|
||||
CounterVector::size_type cnt = cells_->Count(h[i] % h.size());
|
||||
if ( cnt < min )
|
||||
min = cnt;
|
||||
}
|
||||
|
|
|
@ -151,9 +151,13 @@ private:
|
|||
/**
|
||||
* The abstract base class for Bloom filters.
|
||||
*/
|
||||
class BloomFilter : SerialObj {
|
||||
class BloomFilter : public SerialObj {
|
||||
public:
|
||||
virtual ~BloomFilter() { delete hash_; }
|
||||
// At this point we won't let the user choose the hash policy, but we might
|
||||
// open up the interface in the future.
|
||||
typedef DoubleHashing hash_policy;
|
||||
|
||||
virtual ~BloomFilter();
|
||||
|
||||
/**
|
||||
* Adds an element of type T to the Bloom filter.
|
||||
|
@ -193,10 +197,10 @@ public:
|
|||
static BloomFilter* Unserialize(UnserialInfo* info);
|
||||
|
||||
protected:
|
||||
DECLARE_SERIAL(BloomFilter);
|
||||
DECLARE_ABSTRACT_SERIAL(BloomFilter);
|
||||
|
||||
BloomFilter() { };
|
||||
BloomFilter(HashPolicy* hash) : hash_(hash) { }
|
||||
BloomFilter(size_t k);
|
||||
|
||||
virtual void AddImpl(const HashPolicy::HashVector& hashes) = 0;
|
||||
virtual size_t CountImpl(const HashPolicy::HashVector& hashes) const = 0;
|
||||
|
@ -211,10 +215,42 @@ private:
|
|||
*/
|
||||
class BasicBloomFilter : public BloomFilter {
|
||||
public:
|
||||
static size_t Cells(double fp, size_t capacity);
|
||||
/**
|
||||
* Computes the number of cells based a given false-positive rate and
|
||||
* capacity. In the literature, this parameter often has the name *M*.
|
||||
*
|
||||
* @param fp The false-positive rate.
|
||||
*
|
||||
* @param capacity The number of exepected elements.
|
||||
*
|
||||
* Returns: The number cells needed to support a false-positive rate of *fp*
|
||||
* with at most *capacity* elements.
|
||||
*/
|
||||
static size_t M(double fp, size_t capacity);
|
||||
|
||||
/**
|
||||
* Computes the optimal number of hash functions based on the number cells
|
||||
* and expected number of elements.
|
||||
*
|
||||
* @param cells The number of cells (*m*).
|
||||
*
|
||||
* @param capacity The maximum number of elements.
|
||||
*
|
||||
* Returns: the optimal number of hash functions for a false-positive rate of
|
||||
* *fp* for at most *capacity* elements.
|
||||
*/
|
||||
static size_t K(size_t cells, size_t capacity);
|
||||
|
||||
BasicBloomFilter(size_t cells, HashPolicy* hash);
|
||||
/**
|
||||
* Constructs a basic Bloom filter with a given false-positive rate and
|
||||
* capacity.
|
||||
*/
|
||||
BasicBloomFilter(double fp, size_t capacity);
|
||||
|
||||
/**
|
||||
* Constructs a basic Bloom filter with a given number of cells and capacity.
|
||||
*/
|
||||
BasicBloomFilter(size_t cells, size_t capacity);
|
||||
|
||||
protected:
|
||||
DECLARE_SERIAL(BasicBloomFilter);
|
||||
|
@ -225,7 +261,7 @@ protected:
|
|||
virtual size_t CountImpl(const HashPolicy::HashVector& h) const;
|
||||
|
||||
private:
|
||||
BitVector bits_;
|
||||
BitVector* bits_;
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -233,18 +269,18 @@ private:
|
|||
*/
|
||||
class CountingBloomFilter : public BloomFilter {
|
||||
public:
|
||||
CountingBloomFilter(unsigned width, HashPolicy* hash);
|
||||
CountingBloomFilter(unsigned width);
|
||||
|
||||
protected:
|
||||
DECLARE_SERIAL(CountingBloomFilter);
|
||||
|
||||
CountingBloomFilter();
|
||||
CountingBloomFilter() { }
|
||||
|
||||
virtual void AddImpl(const HashPolicy::HashVector& h);
|
||||
virtual size_t CountImpl(const HashPolicy::HashVector& h) const;
|
||||
|
||||
private:
|
||||
CounterVector cells_;
|
||||
CounterVector* cells_;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -249,6 +249,7 @@ extern OpaqueType* md5_type;
|
|||
extern OpaqueType* sha1_type;
|
||||
extern OpaqueType* sha256_type;
|
||||
extern OpaqueType* entropy_type;
|
||||
extern OpaqueType* bloomfilter_type;
|
||||
|
||||
// Initializes globals that don't pertain to network/event analysis.
|
||||
extern void init_general_global_var();
|
||||
|
|
|
@ -518,23 +518,31 @@ bool EntropyVal::DoUnserialize(UnserialInfo* info)
|
|||
return true;
|
||||
}
|
||||
|
||||
BloomFilterVal::BloomFilterVal() : OpaqueVal(bloomfilter_type)
|
||||
{
|
||||
}
|
||||
|
||||
BloomFilterVal::BloomFilterVal(OpaqueType* t) : OpaqueVal(t)
|
||||
{
|
||||
}
|
||||
|
||||
BloomFilterVal::~BloomFilterVal()
|
||||
{
|
||||
if ( bloom_filter_ )
|
||||
delete bloom_filter_;
|
||||
}
|
||||
|
||||
IMPLEMENT_SERIAL(BloomFilterVal, SER_BLOOMFILTER_VAL);
|
||||
|
||||
bool BloomFilterVal::DoSerialize(SerialInfo* info) const
|
||||
{
|
||||
DO_SERIALIZE(SER_BLOOMFILTER_VAL, OpaqueVal);
|
||||
// TODO: implement.
|
||||
return true;
|
||||
return SERIALIZE(bloom_filter_);
|
||||
}
|
||||
|
||||
bool BloomFilterVal::DoUnserialize(UnserialInfo* info)
|
||||
{
|
||||
DO_UNSERIALIZE(OpaqueVal);
|
||||
// TODO: implement.
|
||||
return true;
|
||||
bloom_filter_ = BloomFilter::Unserialize(info);
|
||||
return bloom_filter_ == NULL;
|
||||
}
|
||||
|
||||
|
|
|
@ -112,6 +112,7 @@ private:
|
|||
class BloomFilterVal : public OpaqueVal {
|
||||
public:
|
||||
BloomFilterVal();
|
||||
~BloomFilterVal();
|
||||
|
||||
protected:
|
||||
friend class Val;
|
||||
|
|
|
@ -53,6 +53,7 @@ SERIAL_IS(BITVECTOR, 0x1500)
|
|||
SERIAL_IS(COUNTERVECTOR, 0xa000)
|
||||
SERIAL_IS(BLOOMFILTER, 0xa100)
|
||||
SERIAL_IS(BASICBLOOMFILTER, 0xa200)
|
||||
SERIAL_IS(COUNTINGBLOOMFILTER, 0xa300)
|
||||
|
||||
// These are the externally visible types.
|
||||
const SerialType SER_NONE = 0;
|
||||
|
@ -211,5 +212,6 @@ SERIAL_CONST2(BITVECTOR)
|
|||
SERIAL_CONST2(COUNTERVECTOR)
|
||||
SERIAL_CONST2(BLOOMFILTER)
|
||||
SERIAL_CONST2(BASICBLOOMFILTER)
|
||||
SERIAL_CONST2(COUNTINGBLOOMFILTER)
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue