mirror of
https://github.com/zeek/zeek.git
synced 2025-10-06 16:48:19 +00:00
Add more serialization implementation.
This commit is contained in:
parent
a5572dd66f
commit
751cf61293
6 changed files with 129 additions and 42 deletions
|
@ -46,12 +46,23 @@ CounterVector::size_type CounterVector::Size() const
|
||||||
return bits_->Blocks() / width_;
|
return bits_->Blocks() / width_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool CounterVector::Serialize(SerialInfo* info) const
|
||||||
|
{
|
||||||
|
return SerialObj::Serialize(info);
|
||||||
|
}
|
||||||
|
|
||||||
|
CounterVector* CounterVector::Unserialize(UnserialInfo* info)
|
||||||
|
{
|
||||||
|
return reinterpret_cast<CounterVector*>(
|
||||||
|
SerialObj::Unserialize(info, SER_COUNTERVECTOR));
|
||||||
|
}
|
||||||
|
|
||||||
IMPLEMENT_SERIAL(CounterVector, SER_COUNTERVECTOR)
|
IMPLEMENT_SERIAL(CounterVector, SER_COUNTERVECTOR)
|
||||||
|
|
||||||
bool CounterVector::DoSerialize(SerialInfo* info) const
|
bool CounterVector::DoSerialize(SerialInfo* info) const
|
||||||
{
|
{
|
||||||
DO_SERIALIZE(SER_COUNTERVECTOR, SerialObj);
|
DO_SERIALIZE(SER_COUNTERVECTOR, SerialObj);
|
||||||
if ( ! SERIALIZE(&bits_) )
|
if ( ! SERIALIZE(bits_) )
|
||||||
return false;
|
return false;
|
||||||
return SERIALIZE(static_cast<uint64>(width_));
|
return SERIALIZE(static_cast<uint64>(width_));
|
||||||
}
|
}
|
||||||
|
@ -60,9 +71,9 @@ bool CounterVector::DoUnserialize(UnserialInfo* info)
|
||||||
{
|
{
|
||||||
DO_UNSERIALIZE(SerialObj);
|
DO_UNSERIALIZE(SerialObj);
|
||||||
return false;
|
return false;
|
||||||
// TODO: Ask Robin how to unserialize non-pointer members.
|
bits_ = BitVector::Unserialize(info);
|
||||||
//if ( ! UNSERIALIZE(&bits_) )
|
if ( ! bits_ )
|
||||||
// return false;
|
return false;
|
||||||
uint64 width;
|
uint64 width;
|
||||||
if ( ! UNSERIALIZE(&width) )
|
if ( ! UNSERIALIZE(&width) )
|
||||||
return false;
|
return false;
|
||||||
|
@ -90,6 +101,18 @@ HashPolicy::HashVector DoubleHashing::Hash(const void* x, size_t n) const
|
||||||
return h;
|
return h;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
BloomFilter::BloomFilter(size_t k)
|
||||||
|
: hash_(new hash_policy(k))
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
BloomFilter::~BloomFilter()
|
||||||
|
{
|
||||||
|
if ( hash_ )
|
||||||
|
delete hash_;
|
||||||
|
}
|
||||||
|
|
||||||
bool BloomFilter::Serialize(SerialInfo* info) const
|
bool BloomFilter::Serialize(SerialInfo* info) const
|
||||||
{
|
{
|
||||||
return SerialObj::Serialize(info);
|
return SerialObj::Serialize(info);
|
||||||
|
@ -101,24 +124,21 @@ BloomFilter* BloomFilter::Unserialize(UnserialInfo* info)
|
||||||
SerialObj::Unserialize(info, SER_BLOOMFILTER));
|
SerialObj::Unserialize(info, SER_BLOOMFILTER));
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: should abstract base classes also have IMPLEMENT_SERIAL?
|
|
||||||
//IMPLEMENT_SERIAL(BloomFilter, SER_BLOOMFILTER)
|
|
||||||
|
|
||||||
bool BloomFilter::DoSerialize(SerialInfo* info) const
|
bool BloomFilter::DoSerialize(SerialInfo* info) const
|
||||||
{
|
{
|
||||||
DO_SERIALIZE(SER_BLOOMFILTER, SerialObj);
|
DO_SERIALIZE(SER_BLOOMFILTER, SerialObj);
|
||||||
// TODO: Make the hash policy serializable.
|
if ( ! SERIALIZE(static_cast<uint16>(hash_->K())) )
|
||||||
//if ( ! SERIALIZE(hash_) )
|
return false;
|
||||||
// return false;
|
return SERIALIZE(static_cast<uint16>(elements_));
|
||||||
return SERIALIZE(static_cast<uint64>(elements_));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BloomFilter::DoUnserialize(UnserialInfo* info)
|
bool BloomFilter::DoUnserialize(UnserialInfo* info)
|
||||||
{
|
{
|
||||||
DO_UNSERIALIZE(SerialObj);
|
DO_UNSERIALIZE(SerialObj);
|
||||||
// TODO: Make the hash policy serializable.
|
uint16 k;
|
||||||
//if ( ! hash_ = HashPolicy::Unserialize(info) )
|
if ( ! UNSERIALIZE(&k) )
|
||||||
// return false;
|
return false;
|
||||||
|
hash_ = new hash_policy(static_cast<size_t>(k));
|
||||||
uint64 elements;
|
uint64 elements;
|
||||||
if ( UNSERIALIZE(&elements) )
|
if ( UNSERIALIZE(&elements) )
|
||||||
return false;
|
return false;
|
||||||
|
@ -126,7 +146,7 @@ bool BloomFilter::DoUnserialize(UnserialInfo* info)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t BasicBloomFilter::Cells(double fp, size_t capacity)
|
size_t BasicBloomFilter::M(double fp, size_t capacity)
|
||||||
{
|
{
|
||||||
double ln2 = std::log(2);
|
double ln2 = std::log(2);
|
||||||
return std::ceil(-(capacity * std::log(fp) / ln2 / ln2));
|
return std::ceil(-(capacity * std::log(fp) / ln2 / ln2));
|
||||||
|
@ -138,9 +158,16 @@ size_t BasicBloomFilter::K(size_t cells, size_t capacity)
|
||||||
return round<size_t>(frac * std::log(2));
|
return round<size_t>(frac * std::log(2));
|
||||||
}
|
}
|
||||||
|
|
||||||
BasicBloomFilter::BasicBloomFilter(size_t cells, HashPolicy* hash)
|
BasicBloomFilter::BasicBloomFilter(double fp, size_t capacity)
|
||||||
: BloomFilter(hash), bits_(cells)
|
: BloomFilter(K(M(fp, capacity), capacity))
|
||||||
{
|
{
|
||||||
|
bits_ = new BitVector(M(fp, capacity));
|
||||||
|
}
|
||||||
|
|
||||||
|
BasicBloomFilter::BasicBloomFilter(size_t cells, size_t capacity)
|
||||||
|
: BloomFilter(K(cells, capacity))
|
||||||
|
{
|
||||||
|
bits_ = new BitVector(cells);
|
||||||
}
|
}
|
||||||
|
|
||||||
IMPLEMENT_SERIAL(BasicBloomFilter, SER_BASICBLOOMFILTER)
|
IMPLEMENT_SERIAL(BasicBloomFilter, SER_BASICBLOOMFILTER)
|
||||||
|
@ -148,38 +175,50 @@ IMPLEMENT_SERIAL(BasicBloomFilter, SER_BASICBLOOMFILTER)
|
||||||
bool BasicBloomFilter::DoSerialize(SerialInfo* info) const
|
bool BasicBloomFilter::DoSerialize(SerialInfo* info) const
|
||||||
{
|
{
|
||||||
DO_SERIALIZE(SER_BASICBLOOMFILTER, BloomFilter);
|
DO_SERIALIZE(SER_BASICBLOOMFILTER, BloomFilter);
|
||||||
// TODO: Make the hash policy serializable.
|
return SERIALIZE(bits_);
|
||||||
//if ( ! SERIALIZE(&bits_) )
|
|
||||||
// return false;
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BasicBloomFilter::DoUnserialize(UnserialInfo* info)
|
bool BasicBloomFilter::DoUnserialize(UnserialInfo* info)
|
||||||
{
|
{
|
||||||
DO_UNSERIALIZE(BloomFilter);
|
DO_UNSERIALIZE(BloomFilter);
|
||||||
// TODO: Non-pointer member deserialization?
|
bits_ = BitVector::Unserialize(info);
|
||||||
return true;
|
return bits_ == NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
void BasicBloomFilter::AddImpl(const HashPolicy::HashVector& h)
|
void BasicBloomFilter::AddImpl(const HashPolicy::HashVector& h)
|
||||||
{
|
{
|
||||||
for ( size_t i = 0; i < h.size(); ++i )
|
for ( size_t i = 0; i < h.size(); ++i )
|
||||||
bits_.set(h[i] % h.size());
|
bits_->Set(h[i] % h.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t BasicBloomFilter::CountImpl(const HashPolicy::HashVector& h) const
|
size_t BasicBloomFilter::CountImpl(const HashPolicy::HashVector& h) const
|
||||||
{
|
{
|
||||||
for ( size_t i = 0; i < h.size(); ++i )
|
for ( size_t i = 0; i < h.size(); ++i )
|
||||||
if ( ! bits_[h[i] % h.size()] )
|
if ( ! (*bits_)[h[i] % h.size()] )
|
||||||
return 0;
|
return 0;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
IMPLEMENT_SERIAL(CountingBloomFilter, SER_COUNTINGBLOOMFILTER)
|
||||||
|
|
||||||
|
bool CountingBloomFilter::DoSerialize(SerialInfo* info) const
|
||||||
|
{
|
||||||
|
DO_SERIALIZE(SER_BASICBLOOMFILTER, BloomFilter);
|
||||||
|
return SERIALIZE(cells_);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CountingBloomFilter::DoUnserialize(UnserialInfo* info)
|
||||||
|
{
|
||||||
|
DO_UNSERIALIZE(BloomFilter);
|
||||||
|
cells_ = CounterVector::Unserialize(info);
|
||||||
|
return cells_ == NULL;
|
||||||
|
}
|
||||||
|
|
||||||
void CountingBloomFilter::AddImpl(const HashPolicy::HashVector& h)
|
void CountingBloomFilter::AddImpl(const HashPolicy::HashVector& h)
|
||||||
{
|
{
|
||||||
for ( size_t i = 0; i < h.size(); ++i )
|
for ( size_t i = 0; i < h.size(); ++i )
|
||||||
cells_.Increment(h[i] % h.size(), 1);
|
cells_->Increment(h[i] % h.size(), 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t CountingBloomFilter::CountImpl(const HashPolicy::HashVector& h) const
|
size_t CountingBloomFilter::CountImpl(const HashPolicy::HashVector& h) const
|
||||||
|
@ -188,7 +227,7 @@ size_t CountingBloomFilter::CountImpl(const HashPolicy::HashVector& h) const
|
||||||
std::numeric_limits<CounterVector::size_type>::max();
|
std::numeric_limits<CounterVector::size_type>::max();
|
||||||
for ( size_t i = 0; i < h.size(); ++i )
|
for ( size_t i = 0; i < h.size(); ++i )
|
||||||
{
|
{
|
||||||
CounterVector::size_type cnt = cells_.Count(h[i] % h.size());
|
CounterVector::size_type cnt = cells_->Count(h[i] % h.size());
|
||||||
if ( cnt < min )
|
if ( cnt < min )
|
||||||
min = cnt;
|
min = cnt;
|
||||||
}
|
}
|
||||||
|
|
|
@ -151,9 +151,13 @@ private:
|
||||||
/**
|
/**
|
||||||
* The abstract base class for Bloom filters.
|
* The abstract base class for Bloom filters.
|
||||||
*/
|
*/
|
||||||
class BloomFilter : SerialObj {
|
class BloomFilter : public SerialObj {
|
||||||
public:
|
public:
|
||||||
virtual ~BloomFilter() { delete hash_; }
|
// At this point we won't let the user choose the hash policy, but we might
|
||||||
|
// open up the interface in the future.
|
||||||
|
typedef DoubleHashing hash_policy;
|
||||||
|
|
||||||
|
virtual ~BloomFilter();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adds an element of type T to the Bloom filter.
|
* Adds an element of type T to the Bloom filter.
|
||||||
|
@ -193,10 +197,10 @@ public:
|
||||||
static BloomFilter* Unserialize(UnserialInfo* info);
|
static BloomFilter* Unserialize(UnserialInfo* info);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
DECLARE_SERIAL(BloomFilter);
|
DECLARE_ABSTRACT_SERIAL(BloomFilter);
|
||||||
|
|
||||||
BloomFilter() { };
|
BloomFilter() { };
|
||||||
BloomFilter(HashPolicy* hash) : hash_(hash) { }
|
BloomFilter(size_t k);
|
||||||
|
|
||||||
virtual void AddImpl(const HashPolicy::HashVector& hashes) = 0;
|
virtual void AddImpl(const HashPolicy::HashVector& hashes) = 0;
|
||||||
virtual size_t CountImpl(const HashPolicy::HashVector& hashes) const = 0;
|
virtual size_t CountImpl(const HashPolicy::HashVector& hashes) const = 0;
|
||||||
|
@ -211,10 +215,42 @@ private:
|
||||||
*/
|
*/
|
||||||
class BasicBloomFilter : public BloomFilter {
|
class BasicBloomFilter : public BloomFilter {
|
||||||
public:
|
public:
|
||||||
static size_t Cells(double fp, size_t capacity);
|
/**
|
||||||
|
* Computes the number of cells based a given false-positive rate and
|
||||||
|
* capacity. In the literature, this parameter often has the name *M*.
|
||||||
|
*
|
||||||
|
* @param fp The false-positive rate.
|
||||||
|
*
|
||||||
|
* @param capacity The number of exepected elements.
|
||||||
|
*
|
||||||
|
* Returns: The number cells needed to support a false-positive rate of *fp*
|
||||||
|
* with at most *capacity* elements.
|
||||||
|
*/
|
||||||
|
static size_t M(double fp, size_t capacity);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Computes the optimal number of hash functions based on the number cells
|
||||||
|
* and expected number of elements.
|
||||||
|
*
|
||||||
|
* @param cells The number of cells (*m*).
|
||||||
|
*
|
||||||
|
* @param capacity The maximum number of elements.
|
||||||
|
*
|
||||||
|
* Returns: the optimal number of hash functions for a false-positive rate of
|
||||||
|
* *fp* for at most *capacity* elements.
|
||||||
|
*/
|
||||||
static size_t K(size_t cells, size_t capacity);
|
static size_t K(size_t cells, size_t capacity);
|
||||||
|
|
||||||
BasicBloomFilter(size_t cells, HashPolicy* hash);
|
/**
|
||||||
|
* Constructs a basic Bloom filter with a given false-positive rate and
|
||||||
|
* capacity.
|
||||||
|
*/
|
||||||
|
BasicBloomFilter(double fp, size_t capacity);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs a basic Bloom filter with a given number of cells and capacity.
|
||||||
|
*/
|
||||||
|
BasicBloomFilter(size_t cells, size_t capacity);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
DECLARE_SERIAL(BasicBloomFilter);
|
DECLARE_SERIAL(BasicBloomFilter);
|
||||||
|
@ -225,7 +261,7 @@ protected:
|
||||||
virtual size_t CountImpl(const HashPolicy::HashVector& h) const;
|
virtual size_t CountImpl(const HashPolicy::HashVector& h) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
BitVector bits_;
|
BitVector* bits_;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -233,18 +269,18 @@ private:
|
||||||
*/
|
*/
|
||||||
class CountingBloomFilter : public BloomFilter {
|
class CountingBloomFilter : public BloomFilter {
|
||||||
public:
|
public:
|
||||||
CountingBloomFilter(unsigned width, HashPolicy* hash);
|
CountingBloomFilter(unsigned width);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
DECLARE_SERIAL(CountingBloomFilter);
|
DECLARE_SERIAL(CountingBloomFilter);
|
||||||
|
|
||||||
CountingBloomFilter();
|
CountingBloomFilter() { }
|
||||||
|
|
||||||
virtual void AddImpl(const HashPolicy::HashVector& h);
|
virtual void AddImpl(const HashPolicy::HashVector& h);
|
||||||
virtual size_t CountImpl(const HashPolicy::HashVector& h) const;
|
virtual size_t CountImpl(const HashPolicy::HashVector& h) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
CounterVector cells_;
|
CounterVector* cells_;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -249,6 +249,7 @@ extern OpaqueType* md5_type;
|
||||||
extern OpaqueType* sha1_type;
|
extern OpaqueType* sha1_type;
|
||||||
extern OpaqueType* sha256_type;
|
extern OpaqueType* sha256_type;
|
||||||
extern OpaqueType* entropy_type;
|
extern OpaqueType* entropy_type;
|
||||||
|
extern OpaqueType* bloomfilter_type;
|
||||||
|
|
||||||
// Initializes globals that don't pertain to network/event analysis.
|
// Initializes globals that don't pertain to network/event analysis.
|
||||||
extern void init_general_global_var();
|
extern void init_general_global_var();
|
||||||
|
|
|
@ -518,23 +518,31 @@ bool EntropyVal::DoUnserialize(UnserialInfo* info)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
BloomFilterVal::BloomFilterVal() : OpaqueVal(bloomfilter_type)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
BloomFilterVal::BloomFilterVal(OpaqueType* t) : OpaqueVal(t)
|
BloomFilterVal::BloomFilterVal(OpaqueType* t) : OpaqueVal(t)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
BloomFilterVal::~BloomFilterVal()
|
||||||
|
{
|
||||||
|
if ( bloom_filter_ )
|
||||||
|
delete bloom_filter_;
|
||||||
|
}
|
||||||
|
|
||||||
IMPLEMENT_SERIAL(BloomFilterVal, SER_BLOOMFILTER_VAL);
|
IMPLEMENT_SERIAL(BloomFilterVal, SER_BLOOMFILTER_VAL);
|
||||||
|
|
||||||
bool BloomFilterVal::DoSerialize(SerialInfo* info) const
|
bool BloomFilterVal::DoSerialize(SerialInfo* info) const
|
||||||
{
|
{
|
||||||
DO_SERIALIZE(SER_BLOOMFILTER_VAL, OpaqueVal);
|
DO_SERIALIZE(SER_BLOOMFILTER_VAL, OpaqueVal);
|
||||||
// TODO: implement.
|
return SERIALIZE(bloom_filter_);
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BloomFilterVal::DoUnserialize(UnserialInfo* info)
|
bool BloomFilterVal::DoUnserialize(UnserialInfo* info)
|
||||||
{
|
{
|
||||||
DO_UNSERIALIZE(OpaqueVal);
|
DO_UNSERIALIZE(OpaqueVal);
|
||||||
// TODO: implement.
|
bloom_filter_ = BloomFilter::Unserialize(info);
|
||||||
return true;
|
return bloom_filter_ == NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -112,6 +112,7 @@ private:
|
||||||
class BloomFilterVal : public OpaqueVal {
|
class BloomFilterVal : public OpaqueVal {
|
||||||
public:
|
public:
|
||||||
BloomFilterVal();
|
BloomFilterVal();
|
||||||
|
~BloomFilterVal();
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
friend class Val;
|
friend class Val;
|
||||||
|
|
|
@ -53,6 +53,7 @@ SERIAL_IS(BITVECTOR, 0x1500)
|
||||||
SERIAL_IS(COUNTERVECTOR, 0xa000)
|
SERIAL_IS(COUNTERVECTOR, 0xa000)
|
||||||
SERIAL_IS(BLOOMFILTER, 0xa100)
|
SERIAL_IS(BLOOMFILTER, 0xa100)
|
||||||
SERIAL_IS(BASICBLOOMFILTER, 0xa200)
|
SERIAL_IS(BASICBLOOMFILTER, 0xa200)
|
||||||
|
SERIAL_IS(COUNTINGBLOOMFILTER, 0xa300)
|
||||||
|
|
||||||
// These are the externally visible types.
|
// These are the externally visible types.
|
||||||
const SerialType SER_NONE = 0;
|
const SerialType SER_NONE = 0;
|
||||||
|
@ -211,5 +212,6 @@ SERIAL_CONST2(BITVECTOR)
|
||||||
SERIAL_CONST2(COUNTERVECTOR)
|
SERIAL_CONST2(COUNTERVECTOR)
|
||||||
SERIAL_CONST2(BLOOMFILTER)
|
SERIAL_CONST2(BLOOMFILTER)
|
||||||
SERIAL_CONST2(BASICBLOOMFILTER)
|
SERIAL_CONST2(BASICBLOOMFILTER)
|
||||||
|
SERIAL_CONST2(COUNTINGBLOOMFILTER)
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue