mirror of
https://github.com/zeek/zeek.git
synced 2025-10-07 00:58:19 +00:00
Merge remote-tracking branch 'origin/master' into topic/bernhard/hyperloglog
This commit is contained in:
commit
5b9d80e50d
247 changed files with 2729 additions and 5372 deletions
|
@ -40,28 +40,15 @@ bool BloomFilter::DoSerialize(SerialInfo* info) const
|
|||
{
|
||||
DO_SERIALIZE(SER_BLOOMFILTER, SerialObj);
|
||||
|
||||
if ( ! SERIALIZE(static_cast<uint16>(hasher->K())) )
|
||||
return false;
|
||||
|
||||
return SERIALIZE_STR(hasher->Name().c_str(), hasher->Name().size());
|
||||
return hasher->Serialize(info);
|
||||
}
|
||||
|
||||
bool BloomFilter::DoUnserialize(UnserialInfo* info)
|
||||
{
|
||||
DO_UNSERIALIZE(SerialObj);
|
||||
|
||||
uint16 k;
|
||||
if ( ! UNSERIALIZE(&k) )
|
||||
return false;
|
||||
|
||||
const char* name;
|
||||
if ( ! UNSERIALIZE_STR(&name, 0) )
|
||||
return false;
|
||||
|
||||
hasher = Hasher::Create(k, name);
|
||||
|
||||
delete [] name;
|
||||
return true;
|
||||
hasher = Hasher::Unserialize(info);
|
||||
return hasher != 0;
|
||||
}
|
||||
|
||||
size_t BasicBloomFilter::M(double fp, size_t capacity)
|
||||
|
|
|
@ -13,9 +13,6 @@ class CounterVector;
|
|||
|
||||
/**
|
||||
* The abstract base class for Bloom filters.
|
||||
*
|
||||
* At this point we won't let the user choose the hasher, but we might open
|
||||
* up the interface in the future.
|
||||
*/
|
||||
class BloomFilter : public SerialObj {
|
||||
public:
|
||||
|
|
|
@ -4,9 +4,59 @@
|
|||
|
||||
#include "Hasher.h"
|
||||
#include "digest.h"
|
||||
#include "Serializer.h"
|
||||
|
||||
using namespace probabilistic;
|
||||
|
||||
bool Hasher::Serialize(SerialInfo* info) const
|
||||
{
|
||||
return SerialObj::Serialize(info);
|
||||
}
|
||||
|
||||
Hasher* Hasher::Unserialize(UnserialInfo* info)
|
||||
{
|
||||
return reinterpret_cast<Hasher*>(SerialObj::Unserialize(info, SER_HASHER));
|
||||
}
|
||||
|
||||
bool Hasher::DoSerialize(SerialInfo* info) const
|
||||
{
|
||||
DO_SERIALIZE(SER_HASHER, SerialObj);
|
||||
|
||||
if ( ! SERIALIZE(static_cast<uint16>(k)) )
|
||||
return false;
|
||||
|
||||
return SERIALIZE_STR(name.c_str(), name.size());
|
||||
}
|
||||
|
||||
bool Hasher::DoUnserialize(UnserialInfo* info)
|
||||
{
|
||||
DO_UNSERIALIZE(SerialObj);
|
||||
|
||||
uint16 serial_k;
|
||||
if ( ! UNSERIALIZE(&serial_k) )
|
||||
return false;
|
||||
|
||||
k = serial_k;
|
||||
assert(k > 0);
|
||||
|
||||
const char* serial_name;
|
||||
if ( ! UNSERIALIZE_STR(&serial_name, 0) )
|
||||
return false;
|
||||
|
||||
name = serial_name;
|
||||
delete [] serial_name;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
Hasher::Hasher(size_t k, const std::string& arg_name)
|
||||
: k(k)
|
||||
{
|
||||
k = k;
|
||||
name = arg_name;
|
||||
}
|
||||
|
||||
|
||||
UHF::UHF(size_t seed, const std::string& extra)
|
||||
: h(compute_seed(seed, extra))
|
||||
{
|
||||
|
@ -40,17 +90,6 @@ size_t UHF::compute_seed(size_t seed, const std::string& extra)
|
|||
return *reinterpret_cast<size_t*>(buf);
|
||||
}
|
||||
|
||||
Hasher* Hasher::Create(size_t k, const std::string& name)
|
||||
{
|
||||
return new DefaultHasher(k, name);
|
||||
}
|
||||
|
||||
Hasher::Hasher(size_t k, const std::string& arg_name)
|
||||
: k(k)
|
||||
{
|
||||
name = arg_name;
|
||||
}
|
||||
|
||||
DefaultHasher::DefaultHasher(size_t k, const std::string& name)
|
||||
: Hasher(k, name)
|
||||
{
|
||||
|
@ -82,6 +121,27 @@ bool DefaultHasher::Equals(const Hasher* other) const
|
|||
return hash_functions == o->hash_functions;
|
||||
}
|
||||
|
||||
IMPLEMENT_SERIAL(DefaultHasher, SER_DEFAULTHASHER)
|
||||
|
||||
bool DefaultHasher::DoSerialize(SerialInfo* info) const
|
||||
{
|
||||
DO_SERIALIZE(SER_DEFAULTHASHER, Hasher);
|
||||
|
||||
// Nothing to do here, the base class has all we need serialized already.
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DefaultHasher::DoUnserialize(UnserialInfo* info)
|
||||
{
|
||||
DO_UNSERIALIZE(Hasher);
|
||||
|
||||
hash_functions.clear();
|
||||
for ( size_t i = 0; i < K(); ++i )
|
||||
hash_functions.push_back(UHF(i, Name()));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
DoubleHasher::DoubleHasher(size_t k, const std::string& name)
|
||||
: Hasher(k, name), h1(1, name), h2(2, name)
|
||||
{
|
||||
|
@ -112,3 +172,23 @@ bool DoubleHasher::Equals(const Hasher* other) const
|
|||
const DoubleHasher* o = static_cast<const DoubleHasher*>(other);
|
||||
return h1 == o->h1 && h2 == o->h2;
|
||||
}
|
||||
|
||||
IMPLEMENT_SERIAL(DoubleHasher, SER_DOUBLEHASHER)
|
||||
|
||||
bool DoubleHasher::DoSerialize(SerialInfo* info) const
|
||||
{
|
||||
DO_SERIALIZE(SER_DOUBLEHASHER, Hasher);
|
||||
|
||||
// Nothing to do here, the base class has all we need serialized already.
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DoubleHasher::DoUnserialize(UnserialInfo* info)
|
||||
{
|
||||
DO_UNSERIALIZE(Hasher);
|
||||
|
||||
h1 = UHF(1, Name());
|
||||
h2 = UHF(2, Name());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
|
||||
#include "Hash.h"
|
||||
#include "H3.h"
|
||||
#include "SerialObj.h"
|
||||
|
||||
namespace probabilistic {
|
||||
|
||||
|
@ -12,7 +13,7 @@ namespace probabilistic {
|
|||
* Abstract base class for hashers. A hasher creates a family of hash
|
||||
* functions to hash an element *k* times.
|
||||
*/
|
||||
class Hasher {
|
||||
class Hasher : public SerialObj {
|
||||
public:
|
||||
typedef hash_t digest;
|
||||
typedef std::vector<digest> digest_vector;
|
||||
|
@ -63,25 +64,20 @@ public:
|
|||
size_t K() const { return k; }
|
||||
|
||||
/**
|
||||
* Returns the hasher's name. TODO: What's this?
|
||||
* Returns the hasher's name. If not empty, the hasher uses this descriptor
|
||||
* to seed its *k* hash functions. Otherwise the hasher mixes in the initial
|
||||
* seed derived from the environment variable `$BRO_SEED`.
|
||||
*/
|
||||
const std::string& Name() const { return name; }
|
||||
|
||||
/**
|
||||
* Constructs the hasher used by the implementation. This hardcodes a
|
||||
* specific hashing policy. It exists only because the HashingPolicy
|
||||
* class hierachy is not yet serializable.
|
||||
*
|
||||
* @param k The number of hash functions to apply.
|
||||
*
|
||||
* @param name The hasher's name. Hashers with the same name should
|
||||
* provide consistent results.
|
||||
*
|
||||
* @return Returns a new hasher instance.
|
||||
*/
|
||||
static Hasher* Create(size_t k, const std::string& name);
|
||||
bool Serialize(SerialInfo* info) const;
|
||||
static Hasher* Unserialize(UnserialInfo* info);
|
||||
|
||||
protected:
|
||||
DECLARE_ABSTRACT_SERIAL(Hasher);
|
||||
|
||||
Hasher() { }
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
|
@ -93,7 +89,7 @@ protected:
|
|||
Hasher(size_t k, const std::string& name);
|
||||
|
||||
private:
|
||||
const size_t k;
|
||||
size_t k;
|
||||
std::string name;
|
||||
};
|
||||
|
||||
|
@ -113,7 +109,7 @@ public:
|
|||
* seed to compute the seed for t to compute the seed NUL-terminated
|
||||
* string as additional seed.
|
||||
*/
|
||||
UHF(size_t seed, const std::string& extra = "");
|
||||
UHF(size_t seed = 0, const std::string& extra = "");
|
||||
|
||||
template <typename T>
|
||||
Hasher::digest operator()(const T& x) const
|
||||
|
@ -175,14 +171,18 @@ public:
|
|||
*
|
||||
* @param name The name of the hasher.
|
||||
*/
|
||||
DefaultHasher(size_t k, const std::string& name);
|
||||
DefaultHasher(size_t k, const std::string& name = "");
|
||||
|
||||
// Overridden from Hasher.
|
||||
virtual digest_vector Hash(const void* x, size_t n) const /* final */;
|
||||
virtual DefaultHasher* Clone() const /* final */;
|
||||
virtual bool Equals(const Hasher* other) const /* final */;
|
||||
|
||||
DECLARE_SERIAL(DefaultHasher);
|
||||
|
||||
private:
|
||||
DefaultHasher() { }
|
||||
|
||||
std::vector<UHF> hash_functions;
|
||||
};
|
||||
|
||||
|
@ -199,14 +199,18 @@ public:
|
|||
*
|
||||
* @param name The name of the hasher.
|
||||
*/
|
||||
DoubleHasher(size_t k, const std::string& name);
|
||||
DoubleHasher(size_t k, const std::string& name = "");
|
||||
|
||||
// Overridden from Hasher.
|
||||
virtual digest_vector Hash(const void* x, size_t n) const /* final */;
|
||||
virtual DoubleHasher* Clone() const /* final */;
|
||||
virtual bool Equals(const Hasher* other) const /* final */;
|
||||
|
||||
DECLARE_SERIAL(DoubleHasher);
|
||||
|
||||
private:
|
||||
DoubleHasher() { }
|
||||
|
||||
UHF h1;
|
||||
UHF h2;
|
||||
};
|
||||
|
|
|
@ -48,7 +48,7 @@ function bloomfilter_basic_init%(fp: double, capacity: count,
|
|||
|
||||
size_t cells = BasicBloomFilter::M(fp, capacity);
|
||||
size_t optimal_k = BasicBloomFilter::K(cells, capacity);
|
||||
const Hasher* h = Hasher::Create(optimal_k, name->CheckString());
|
||||
const Hasher* h = new DefaultHasher(optimal_k, name->CheckString());
|
||||
|
||||
return new BloomFilterVal(new BasicBloomFilter(h, cells));
|
||||
%}
|
||||
|
@ -86,7 +86,7 @@ function bloomfilter_counting_init%(k: count, cells: count, max: count,
|
|||
return 0;
|
||||
}
|
||||
|
||||
const Hasher* h = Hasher::Create(k, name->CheckString());
|
||||
const Hasher* h = new DefaultHasher(k, name->CheckString());
|
||||
|
||||
uint16 width = 1;
|
||||
while ( max >>= 1 )
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue