mirror of
https://github.com/zeek/zeek.git
synced 2025-10-15 21:18:20 +00:00
Merge remote-tracking branch 'origin/topic/johanna/bit-1612'
Addig a new random seed for external tests. I added a wrapper around the siphash() function to make calling it a little bit safer at least. BIT-1612 #merged * origin/topic/johanna/bit-1612: HLL: Fix missing typecast in test case. Remove the -K/-J options for setting keys. Add test checking the quality of HLL by adding a lot of elements. Fix serializing probabilistic hashers. Baseline updates after hash function change. Also switch BloomFilters from H3 to siphash. Change Hashing from H3 to Siphash. HLL: Remove unnecessary comparison. Hyperloglog: change calculation of Rho
This commit is contained in:
commit
4d84ee82da
347 changed files with 26269 additions and 26053 deletions
|
@ -28,10 +28,9 @@ int CardinalityCounter::OptimalB(double error, double confidence) const
|
|||
return answer;
|
||||
}
|
||||
|
||||
void CardinalityCounter::Init(uint64 size)
|
||||
void CardinalityCounter::Init(uint64_t size)
|
||||
{
|
||||
m = size;
|
||||
buckets = new uint8_t[m];
|
||||
|
||||
// The following magic values are taken directly out of the
|
||||
// description of the HyperLogLog algorithn.
|
||||
|
@ -51,60 +50,83 @@ void CardinalityCounter::Init(uint64 size)
|
|||
else
|
||||
reporter->InternalError("Invalid size %" PRIu64 ". Size either has to be 16, 32, 64 or bigger than 128", size);
|
||||
|
||||
for ( uint64 i = 0; i < m; i++ )
|
||||
buckets[i] = 0;
|
||||
double calc_p = log2(m);
|
||||
if ( trunc(calc_p) != calc_p )
|
||||
reporter->InternalError("Invalid size %" PRIu64 ". Size either has to be a power of 2", size);
|
||||
|
||||
p = calc_p;
|
||||
|
||||
buckets.reserve(m);
|
||||
for ( uint64_t i = 0; i < m; i++ )
|
||||
buckets.push_back(0);
|
||||
|
||||
assert(buckets.size() == m);
|
||||
|
||||
V = m;
|
||||
}
|
||||
|
||||
CardinalityCounter::CardinalityCounter(CardinalityCounter& other)
|
||||
: buckets(other.buckets)
|
||||
{
|
||||
Init(other.GetM());
|
||||
Merge(&other);
|
||||
V = other.V;
|
||||
alpha_m = other.alpha_m;
|
||||
m = other.m;
|
||||
p = other.p;
|
||||
}
|
||||
|
||||
CardinalityCounter::CardinalityCounter(CardinalityCounter&& o)
|
||||
{
|
||||
V = o.V;
|
||||
alpha_m = o.alpha_m;
|
||||
m = o.m;
|
||||
p = o.p;
|
||||
|
||||
o.m = 0;
|
||||
buckets = std::move(o.buckets);
|
||||
}
|
||||
|
||||
CardinalityCounter::CardinalityCounter(double error_margin, double confidence)
|
||||
{
|
||||
int b = OptimalB(error_margin, confidence);
|
||||
Init((uint64) pow(2, b));
|
||||
|
||||
assert(b == p);
|
||||
}
|
||||
|
||||
CardinalityCounter::CardinalityCounter(uint64 size)
|
||||
CardinalityCounter::CardinalityCounter(uint64_t size)
|
||||
{
|
||||
Init(size);
|
||||
}
|
||||
|
||||
CardinalityCounter::CardinalityCounter(uint64 arg_size, uint64 arg_V, double arg_alpha_m)
|
||||
CardinalityCounter::CardinalityCounter(uint64_t arg_size, uint64_t arg_V, double arg_alpha_m)
|
||||
{
|
||||
m = arg_size;
|
||||
buckets = new uint8_t[m];
|
||||
|
||||
buckets.reserve(m);
|
||||
for ( uint64_t i = 0; i < m; i++ )
|
||||
buckets.push_back(0);
|
||||
|
||||
alpha_m = arg_alpha_m;
|
||||
V = arg_V;
|
||||
p = log2(m);
|
||||
}
|
||||
|
||||
CardinalityCounter::~CardinalityCounter()
|
||||
{
|
||||
delete [] buckets;
|
||||
}
|
||||
|
||||
uint8_t CardinalityCounter::Rank(uint64 hash_modified) const
|
||||
uint8_t CardinalityCounter::Rank(uint64_t hash_modified) const
|
||||
{
|
||||
uint8_t answer = 0;
|
||||
|
||||
hash_modified = (uint64)(hash_modified / m);
|
||||
hash_modified *= 2;
|
||||
|
||||
do {
|
||||
hash_modified = (uint64)(hash_modified / 2);
|
||||
answer++;
|
||||
} while ( hash_modified % 2 == 0);
|
||||
hash_modified = hash_modified >> p;
|
||||
int answer = 64 - p - CardinalityCounter::flsll(hash_modified) + 1;
|
||||
assert(answer > 0 && answer < 64);
|
||||
|
||||
return answer;
|
||||
}
|
||||
|
||||
void CardinalityCounter::AddElement(uint64 hash)
|
||||
void CardinalityCounter::AddElement(uint64_t hash)
|
||||
{
|
||||
uint64 index = hash % m;
|
||||
uint64_t index = hash % m;
|
||||
hash = hash-index;
|
||||
|
||||
if( buckets[index] == 0 )
|
||||
|
@ -118,7 +140,7 @@ void CardinalityCounter::AddElement(uint64 hash)
|
|||
|
||||
/**
|
||||
* Estimate the size by using the the "raw" HyperLogLog estimate. Then,
|
||||
* check if it's too "large" or "small" because the raw estimate doesn't
|
||||
* check if it's too "large" or "small" because the raw estimate doesn't
|
||||
* do well in those cases.
|
||||
* Thus, we correct for those errors as specified in the paper.
|
||||
*
|
||||
|
@ -149,7 +171,7 @@ bool CardinalityCounter::Merge(CardinalityCounter* c)
|
|||
if ( m != c->GetM() )
|
||||
return false;
|
||||
|
||||
uint8_t* temp = c->GetBuckets();
|
||||
const vector<uint8_t> temp = c->GetBuckets();
|
||||
|
||||
V = 0;
|
||||
|
||||
|
@ -165,12 +187,12 @@ bool CardinalityCounter::Merge(CardinalityCounter* c)
|
|||
return true;
|
||||
}
|
||||
|
||||
uint8_t* CardinalityCounter::GetBuckets()
|
||||
const vector<uint8_t> &CardinalityCounter::GetBuckets() const
|
||||
{
|
||||
return buckets;
|
||||
}
|
||||
|
||||
uint64 CardinalityCounter::GetM() const
|
||||
uint64_t CardinalityCounter::GetM() const
|
||||
{
|
||||
return m;
|
||||
}
|
||||
|
@ -192,7 +214,7 @@ bool CardinalityCounter::Serialize(SerialInfo* info) const
|
|||
CardinalityCounter* CardinalityCounter::Unserialize(UnserialInfo* info)
|
||||
{
|
||||
uint64_t m;
|
||||
uint64 V;
|
||||
uint64_t V;
|
||||
double alpha_m;
|
||||
|
||||
bool valid = true;
|
||||
|
@ -202,13 +224,13 @@ CardinalityCounter* CardinalityCounter::Unserialize(UnserialInfo* info)
|
|||
|
||||
CardinalityCounter* c = new CardinalityCounter(m, V, alpha_m);
|
||||
|
||||
uint8_t* buckets = c->buckets;
|
||||
vector<uint8_t>& buckets = c->buckets;
|
||||
|
||||
for ( unsigned int i = 0; i < m; i++ )
|
||||
{
|
||||
char c;
|
||||
valid &= UNSERIALIZE(&c);
|
||||
buckets[i] = (uint8)c;
|
||||
buckets[i] = (uint8_t)c;
|
||||
}
|
||||
|
||||
if ( ! valid )
|
||||
|
@ -219,3 +241,51 @@ CardinalityCounter* CardinalityCounter::Unserialize(UnserialInfo* info)
|
|||
|
||||
return c;
|
||||
}
|
||||
|
||||
/**
|
||||
* The following function is copied from libc/string/flsll.c from the FreeBSD source
|
||||
* tree. Original copyright message follows
|
||||
*/
|
||||
/*-
|
||||
* Copyright (c) 1990, 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Find Last Set bit
|
||||
*/
|
||||
int
|
||||
CardinalityCounter::flsll(uint64_t mask)
|
||||
{
|
||||
int bit;
|
||||
|
||||
if (mask == 0)
|
||||
return (0);
|
||||
for (bit = 1; mask != 1; bit++)
|
||||
mask = (uint64_t)mask >> 1;
|
||||
return (bit);
|
||||
}
|
||||
|
|
|
@ -28,13 +28,18 @@ public:
|
|||
*
|
||||
* @param confidence confidence of the error. Default: 0.95
|
||||
*/
|
||||
CardinalityCounter(double error_margin, double confidence = 0.95);
|
||||
explicit CardinalityCounter(double error_margin, double confidence = 0.95);
|
||||
|
||||
/**
|
||||
* Copy-Constructor
|
||||
*/
|
||||
CardinalityCounter(CardinalityCounter& other);
|
||||
|
||||
/**
|
||||
* Move-Constructor
|
||||
*/
|
||||
CardinalityCounter(CardinalityCounter&& o);
|
||||
|
||||
/**
|
||||
* Constructor for a known number of buckets.
|
||||
*
|
||||
|
@ -43,7 +48,7 @@ public:
|
|||
*
|
||||
* @param size number of buckets to create
|
||||
*/
|
||||
CardinalityCounter(uint64 size);
|
||||
explicit CardinalityCounter(uint64_t size);
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
|
@ -58,7 +63,7 @@ public:
|
|||
*
|
||||
* @param hash 64-bit hash value of the element to be added
|
||||
*/
|
||||
void AddElement(uint64 hash);
|
||||
void AddElement(uint64_t hash);
|
||||
|
||||
/**
|
||||
* Get the current estimated number of elements in the data
|
||||
|
@ -104,7 +109,7 @@ protected:
|
|||
*
|
||||
* @return Number of buckets
|
||||
*/
|
||||
uint64 GetM() const;
|
||||
uint64_t GetM() const;
|
||||
|
||||
/**
|
||||
* Returns the buckets array that holds all of the rough cardinality
|
||||
|
@ -114,21 +119,21 @@ protected:
|
|||
*
|
||||
* @return Array containing cardinality estimates
|
||||
*/
|
||||
uint8_t* GetBuckets();
|
||||
const std::vector<uint8_t>& GetBuckets() const;
|
||||
|
||||
private:
|
||||
/**
|
||||
* Constructor used when unserializing, i.e., all parameters are
|
||||
* known.
|
||||
*/
|
||||
CardinalityCounter(uint64 size, uint64 V, double alpha_m);
|
||||
explicit CardinalityCounter(uint64_t size, uint64_t V, double alpha_m);
|
||||
|
||||
/**
|
||||
* Helper function with code used jointly by multiple constructors.
|
||||
*
|
||||
* @param arg_size: number of buckets that need to be kept
|
||||
*/
|
||||
void Init(uint64 arg_size);
|
||||
void Init(uint64_t arg_size);
|
||||
|
||||
/**
|
||||
* This function calculates the smallest value of b that will
|
||||
|
@ -150,22 +155,28 @@ private:
|
|||
int OptimalB(double error, double confidence) const;
|
||||
|
||||
/**
|
||||
* Determines at which index (counted from the back) the first one-bit
|
||||
* Determines at which index (counted from the front) the first one-bit
|
||||
* appears. The last b bits have to be 0 (the element has to be divisible
|
||||
* by m), hence they are ignored.
|
||||
* by m), hence they are ignored. Always adds 1 to the result. This is the
|
||||
* rho function from the original algorithm.
|
||||
*
|
||||
* @param hash_modified hash value
|
||||
*
|
||||
* @returns index of first one-bit
|
||||
*/
|
||||
uint8_t Rank(uint64 hash_modified) const;
|
||||
uint8_t Rank(uint64_t hash_modified) const;
|
||||
|
||||
/**
|
||||
* flsll from FreeBSD; especially Linux does not have this.
|
||||
*/
|
||||
static int flsll(uint64_t mask);
|
||||
|
||||
/**
|
||||
* This is the number of buckets that will be stored. The standard
|
||||
* error is 1.04/sqrt(m), so the actual cardinality will be the
|
||||
* estimate +/- 1.04/sqrt(m) with approximately 68% probability.
|
||||
*/
|
||||
uint64 m;
|
||||
uint64_t m;
|
||||
|
||||
/**
|
||||
* These are the actual buckets that are storing an estimate of the
|
||||
|
@ -173,7 +184,7 @@ private:
|
|||
* appears in the bitstring and that location is at most 65, so not
|
||||
* that many bits are needed to store it.
|
||||
*/
|
||||
uint8_t* buckets;
|
||||
std::vector<uint8_t> buckets;
|
||||
|
||||
/**
|
||||
* There are some state constants that need to be kept track of to
|
||||
|
@ -181,8 +192,9 @@ private:
|
|||
* buckets that are 0 and this is used in the small error correction.
|
||||
* alpha_m is a multiplicative constant used in the algorithm.
|
||||
*/
|
||||
uint64 V;
|
||||
uint64_t V;
|
||||
double alpha_m;
|
||||
int p; // the log2 of m
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -5,18 +5,21 @@
|
|||
|
||||
#include "Hasher.h"
|
||||
#include "NetVar.h"
|
||||
#include "digest.h"
|
||||
#include "Serializer.h"
|
||||
#include "digest.h"
|
||||
#include "siphash24.h"
|
||||
|
||||
using namespace probabilistic;
|
||||
|
||||
uint64 Hasher::MakeSeed(const void* data, size_t size)
|
||||
Hasher::seed_t Hasher::MakeSeed(const void* data, size_t size)
|
||||
{
|
||||
u_char buf[SHA256_DIGEST_LENGTH];
|
||||
uint64 tmpseed;
|
||||
seed_t tmpseed;
|
||||
SHA256_CTX ctx;
|
||||
sha256_init(&ctx);
|
||||
|
||||
assert(sizeof(tmpseed) == 16);
|
||||
|
||||
if ( data )
|
||||
sha256_update(&ctx, data, size);
|
||||
|
||||
|
@ -56,7 +59,10 @@ bool Hasher::DoSerialize(SerialInfo* info) const
|
|||
if ( ! SERIALIZE(static_cast<uint16>(k)) )
|
||||
return false;
|
||||
|
||||
return SERIALIZE(static_cast<uint64>(seed));
|
||||
if ( ! SERIALIZE(static_cast<uint64>(seed.h1)) )
|
||||
return false;
|
||||
|
||||
return SERIALIZE(static_cast<uint64>(seed.h2));
|
||||
}
|
||||
|
||||
bool Hasher::DoUnserialize(UnserialInfo* info)
|
||||
|
@ -70,8 +76,11 @@ bool Hasher::DoUnserialize(UnserialInfo* info)
|
|||
k = serial_k;
|
||||
assert(k > 0);
|
||||
|
||||
uint64 serial_seed;
|
||||
if ( ! UNSERIALIZE(&serial_seed) )
|
||||
seed_t serial_seed;
|
||||
if ( ! UNSERIALIZE(&serial_seed.h1) )
|
||||
return false;
|
||||
|
||||
if ( ! UNSERIALIZE(&serial_seed.h2) )
|
||||
return false;
|
||||
|
||||
seed = serial_seed;
|
||||
|
@ -79,14 +88,18 @@ bool Hasher::DoUnserialize(UnserialInfo* info)
|
|||
return true;
|
||||
}
|
||||
|
||||
Hasher::Hasher(size_t arg_k, size_t arg_seed)
|
||||
Hasher::Hasher(size_t arg_k, seed_t arg_seed)
|
||||
{
|
||||
k = arg_k;
|
||||
seed = arg_seed;
|
||||
}
|
||||
|
||||
UHF::UHF(size_t arg_seed)
|
||||
: h(arg_seed)
|
||||
UHF::UHF()
|
||||
{
|
||||
memset(&seed, 0, sizeof(seed));
|
||||
}
|
||||
|
||||
UHF::UHF(Hasher::seed_t arg_seed)
|
||||
{
|
||||
seed = arg_seed;
|
||||
}
|
||||
|
@ -96,8 +109,14 @@ UHF::UHF(size_t arg_seed)
|
|||
// times.
|
||||
Hasher::digest UHF::hash(const void* x, size_t n) const
|
||||
{
|
||||
assert(sizeof(Hasher::seed_t) == SIPHASH_KEYLEN);
|
||||
|
||||
if ( n <= UHASH_KEY_SIZE )
|
||||
return n == 0 ? 0 : h(x, n);
|
||||
{
|
||||
hash_t outdigest;
|
||||
siphash(&outdigest, reinterpret_cast<const uint8_t*>(x), n, reinterpret_cast<const uint8_t*>(&seed));
|
||||
return outdigest;
|
||||
}
|
||||
|
||||
unsigned char d[16];
|
||||
MD5(reinterpret_cast<const unsigned char*>(x), n, d);
|
||||
|
@ -111,11 +130,15 @@ Hasher::digest UHF::hash(const void* x, size_t n) const
|
|||
return *reinterpret_cast<const Hasher::digest*>(d);
|
||||
}
|
||||
|
||||
DefaultHasher::DefaultHasher(size_t k, size_t seed)
|
||||
DefaultHasher::DefaultHasher(size_t k, Hasher::seed_t seed)
|
||||
: Hasher(k, seed)
|
||||
{
|
||||
for ( size_t i = 1; i <= k; ++i )
|
||||
hash_functions.push_back(UHF(Seed() + bro_prng(i)));
|
||||
{
|
||||
seed_t s = Seed();
|
||||
s.h1 += bro_prng(i);
|
||||
hash_functions.push_back(UHF(s));
|
||||
}
|
||||
}
|
||||
|
||||
Hasher::digest_vector DefaultHasher::Hash(const void* x, size_t n) const
|
||||
|
@ -158,12 +181,16 @@ bool DefaultHasher::DoUnserialize(UnserialInfo* info)
|
|||
|
||||
hash_functions.clear();
|
||||
for ( size_t i = 0; i < K(); ++i )
|
||||
hash_functions.push_back(UHF(Seed() + bro_prng(i)));
|
||||
{
|
||||
Hasher::seed_t s = Seed();
|
||||
s.h1 += bro_prng(i);
|
||||
hash_functions.push_back(UHF(s));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
DoubleHasher::DoubleHasher(size_t k, size_t seed)
|
||||
DoubleHasher::DoubleHasher(size_t k, seed_t seed)
|
||||
: Hasher(k, seed), h1(seed + bro_prng(1)), h2(seed + bro_prng(2))
|
||||
{
|
||||
}
|
||||
|
|
|
@ -4,7 +4,6 @@
|
|||
#define PROBABILISTIC_HASHER_H
|
||||
|
||||
#include "Hash.h"
|
||||
#include "H3.h"
|
||||
#include "SerialObj.h"
|
||||
|
||||
namespace probabilistic {
|
||||
|
@ -17,6 +16,15 @@ class Hasher : public SerialObj {
|
|||
public:
|
||||
typedef hash_t digest;
|
||||
typedef std::vector<digest> digest_vector;
|
||||
struct seed_t {
|
||||
uint64_t h1;
|
||||
uint64_t h2;
|
||||
|
||||
friend seed_t operator+(seed_t lhs, const uint64_t rhs) {
|
||||
lhs.h1 += rhs;
|
||||
return lhs;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Creates a valid hasher seed from an arbitrary string.
|
||||
|
@ -30,7 +38,7 @@ public:
|
|||
*
|
||||
* @return A seed suitable for hashers.
|
||||
*/
|
||||
static uint64 MakeSeed(const void* data, size_t size);
|
||||
static seed_t MakeSeed(const void* data, size_t size);
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
|
@ -89,7 +97,7 @@ public:
|
|||
/**
|
||||
* Returns the seed used to construct the hasher.
|
||||
*/
|
||||
size_t Seed() const { return seed; }
|
||||
seed_t Seed() const { return seed; }
|
||||
|
||||
bool Serialize(SerialInfo* info) const;
|
||||
static Hasher* Unserialize(UnserialInfo* info);
|
||||
|
@ -106,11 +114,11 @@ protected:
|
|||
*
|
||||
* @param arg_seed The seed for the hasher.
|
||||
*/
|
||||
Hasher(size_t arg_k, size_t arg_seed);
|
||||
Hasher(size_t arg_k, seed_t arg_seed);
|
||||
|
||||
private:
|
||||
size_t k;
|
||||
size_t seed;
|
||||
seed_t seed;
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -120,12 +128,17 @@ private:
|
|||
class UHF {
|
||||
public:
|
||||
/**
|
||||
* Constructs an H3 hash function seeded with a given seed and an
|
||||
* Default constructor with zero seed.
|
||||
*/
|
||||
UHF();
|
||||
|
||||
/**
|
||||
* Constructs an hash function seeded with a given seed and an
|
||||
* optional extra seed to replace the initial Bro seed.
|
||||
*
|
||||
* @param arg_seed The seed to use for this instance.
|
||||
*/
|
||||
UHF(size_t arg_seed = 0);
|
||||
UHF(Hasher::seed_t arg_seed);
|
||||
|
||||
template <typename T>
|
||||
Hasher::digest operator()(const T& x) const
|
||||
|
@ -159,7 +172,8 @@ public:
|
|||
|
||||
friend bool operator==(const UHF& x, const UHF& y)
|
||||
{
|
||||
return x.h == y.h;
|
||||
return (x.seed.h1 == y.seed.h1) &&
|
||||
(x.seed.h2 == y.seed.h2);
|
||||
}
|
||||
|
||||
friend bool operator!=(const UHF& x, const UHF& y)
|
||||
|
@ -168,10 +182,9 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
static size_t compute_seed(size_t seed);
|
||||
static size_t compute_seed(Hasher::seed_t seed);
|
||||
|
||||
H3<Hasher::digest, UHASH_KEY_SIZE> h;
|
||||
size_t seed;
|
||||
Hasher::seed_t seed;
|
||||
};
|
||||
|
||||
|
||||
|
@ -188,7 +201,7 @@ public:
|
|||
*
|
||||
* @param seed The seed for the hasher.
|
||||
*/
|
||||
DefaultHasher(size_t k, size_t seed);
|
||||
DefaultHasher(size_t k, Hasher::seed_t seed);
|
||||
|
||||
// Overridden from Hasher.
|
||||
virtual digest_vector Hash(const void* x, size_t n) const final;
|
||||
|
@ -216,7 +229,7 @@ public:
|
|||
*
|
||||
* @param seed The seed for the hasher.
|
||||
*/
|
||||
DoubleHasher(size_t k, size_t seed);
|
||||
DoubleHasher(size_t k, Hasher::seed_t seed);
|
||||
|
||||
// Overridden from Hasher.
|
||||
virtual digest_vector Hash(const void* x, size_t n) const final;
|
||||
|
|
|
@ -42,7 +42,7 @@ function bloomfilter_basic_init%(fp: double, capacity: count,
|
|||
|
||||
size_t cells = BasicBloomFilter::M(fp, capacity);
|
||||
size_t optimal_k = BasicBloomFilter::K(cells, capacity);
|
||||
size_t seed = Hasher::MakeSeed(name->Len() > 0 ? name->Bytes() : 0,
|
||||
Hasher::seed_t seed = Hasher::MakeSeed(name->Len() > 0 ? name->Bytes() : 0,
|
||||
name->Len());
|
||||
const Hasher* h = new DoubleHasher(optimal_k, seed);
|
||||
|
||||
|
@ -66,7 +66,7 @@ function bloomfilter_basic_init%(fp: double, capacity: count,
|
|||
##
|
||||
## Returns: A Bloom filter handle.
|
||||
##
|
||||
## .. bro:see:: bloomfilter_basic_init bloomfilter_counting_init bloomfilter_add
|
||||
## .. bro:see:: bloomfilter_basic_init bloomfilter_counting_init bloomfilter_add
|
||||
## bloomfilter_lookup bloomfilter_clear bloomfilter_merge global_hash_seed
|
||||
function bloomfilter_basic_init2%(k: count, cells: count,
|
||||
name: string &default=""%): opaque of bloomfilter
|
||||
|
@ -82,7 +82,7 @@ function bloomfilter_basic_init2%(k: count, cells: count,
|
|||
return 0;
|
||||
}
|
||||
|
||||
size_t seed = Hasher::MakeSeed(name->Len() > 0 ? name->Bytes() : 0,
|
||||
Hasher::seed_t seed = Hasher::MakeSeed(name->Len() > 0 ? name->Bytes() : 0,
|
||||
name->Len());
|
||||
const Hasher* h = new DoubleHasher(k, seed);
|
||||
|
||||
|
@ -121,7 +121,7 @@ function bloomfilter_counting_init%(k: count, cells: count, max: count,
|
|||
return 0;
|
||||
}
|
||||
|
||||
size_t seed = Hasher::MakeSeed(name->Len() > 0 ? name->Bytes() : 0,
|
||||
Hasher::seed_t seed = Hasher::MakeSeed(name->Len() > 0 ? name->Bytes() : 0,
|
||||
name->Len());
|
||||
|
||||
const Hasher* h = new DefaultHasher(k, seed);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue