mirror of
https://github.com/zeek/zeek.git
synced 2025-10-08 09:38:19 +00:00
Merge remote-tracking branch 'origin/master' into topic/matthias/bloom-filter
Conflicts: src/probabilistic/Hasher.h
This commit is contained in:
commit
9ad7121fed
250 changed files with 12762 additions and 5361 deletions
|
@ -466,8 +466,11 @@ bool BitVector::Empty() const
|
|||
bool BitVector::AllZero() const
|
||||
{
|
||||
for ( size_t i = 0; i < bits.size(); ++i )
|
||||
{
|
||||
if ( bits[i] )
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -565,11 +568,11 @@ bool BitVector::DoUnserialize(UnserialInfo* info)
|
|||
bits[i] = static_cast<block_type>(block);
|
||||
}
|
||||
|
||||
uint64 num_bits;
|
||||
if ( ! UNSERIALIZE(&num_bits) )
|
||||
uint64 n;
|
||||
if ( ! UNSERIALIZE(&n) )
|
||||
return false;
|
||||
|
||||
num_bits = static_cast<size_type>(num_bits);
|
||||
num_bits = static_cast<size_type>(n);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -1,9 +1,11 @@
|
|||
// See the file "COPYING" in the main distribution directory for copyright.
|
||||
|
||||
#include <typeinfo>
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
|
||||
#include "BloomFilter.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
#include "CounterVector.h"
|
||||
#include "Serializer.h"
|
||||
|
||||
|
@ -74,18 +76,19 @@ void BasicBloomFilter::Clear()
|
|||
bool BasicBloomFilter::Merge(const BloomFilter* other)
|
||||
{
|
||||
if ( typeid(*this) != typeid(*other) )
|
||||
return 0;
|
||||
return false;
|
||||
|
||||
const BasicBloomFilter* o = static_cast<const BasicBloomFilter*>(other);
|
||||
|
||||
if ( ! hasher->Equals(o->hasher) )
|
||||
{
|
||||
reporter->InternalError("incompatible hashers in BasicBloomFilter merge");
|
||||
reporter->Error("incompatible hashers in BasicBloomFilter merge");
|
||||
return false;
|
||||
}
|
||||
|
||||
else if ( bits->Size() != o->bits->Size() )
|
||||
{
|
||||
reporter->InternalError("different bitvector size in BasicBloomFilter merge");
|
||||
reporter->Error("different bitvector size in BasicBloomFilter merge");
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -172,18 +175,19 @@ void CountingBloomFilter::Clear()
|
|||
bool CountingBloomFilter::Merge(const BloomFilter* other)
|
||||
{
|
||||
if ( typeid(*this) != typeid(*other) )
|
||||
return 0;
|
||||
return false;
|
||||
|
||||
const CountingBloomFilter* o = static_cast<const CountingBloomFilter*>(other);
|
||||
|
||||
if ( ! hasher->Equals(o->hasher) )
|
||||
{
|
||||
reporter->InternalError("incompatible hashers in CountingBloomFilter merge");
|
||||
reporter->Error("incompatible hashers in CountingBloomFilter merge");
|
||||
return false;
|
||||
}
|
||||
|
||||
else if ( cells->Size() != o->cells->Size() )
|
||||
{
|
||||
reporter->InternalError("different bitvector size in CountingBloomFilter merge");
|
||||
reporter->Error("different bitvector size in CountingBloomFilter merge");
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -13,6 +13,6 @@ set(probabilistic_SRCS
|
|||
Hasher.cc)
|
||||
|
||||
bif_target(bloom-filter.bif)
|
||||
bro_add_subdir_library(probabilistic ${probabilistic_SRCS})
|
||||
|
||||
bro_add_subdir_library(probabilistic ${probabilistic_SRCS} ${BIF_OUTPUT_CC})
|
||||
add_dependencies(bro_probabilistic generate_outputs)
|
||||
|
|
|
@ -183,11 +183,11 @@ bool CounterVector::DoUnserialize(UnserialInfo* info)
|
|||
if ( ! bits )
|
||||
return false;
|
||||
|
||||
uint64 width;
|
||||
if ( ! UNSERIALIZE(&width) )
|
||||
uint64 w;
|
||||
if ( ! UNSERIALIZE(&w) )
|
||||
return false;
|
||||
|
||||
width = static_cast<size_t>(width);
|
||||
width = static_cast<size_t>(w);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -78,6 +78,14 @@ protected:
|
|||
|
||||
Hasher() { }
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param k the number of hash functions.
|
||||
*
|
||||
* @param name A name for the hasher. Hashers with the same name
|
||||
* should provide consistent results.
|
||||
*/
|
||||
Hasher(size_t k, const std::string& name);
|
||||
|
||||
private:
|
||||
|
|
|
@ -20,15 +20,23 @@ module GLOBAL;
|
|||
|
||||
## Creates a basic Bloom filter.
|
||||
##
|
||||
## .. note:: A Bloom filter can have a name associated with it. In the future,
|
||||
## Bloom filters with the same name will be compatible across indepedent Bro
|
||||
## instances, i.e., it will be possible to merge them. Currently, however, that is
|
||||
## not yet supported.
|
||||
##
|
||||
## fp: The desired false-positive rate.
|
||||
##
|
||||
## capacity: the maximum number of elements that guarantees a false-positive
|
||||
## rate of *fp*.
|
||||
##
|
||||
## name: A name that uniquely identifies and seeds the Bloom filter. If empty,
|
||||
## the initialization will become dependent on the initial seed.
|
||||
## the filter will remain tied to the current Bro process.
|
||||
##
|
||||
## Returns: A Bloom filter handle.
|
||||
##
|
||||
## .. bro:see:: bloomfilter_counting_init bloomfilter_add bloomfilter_lookup
|
||||
## bloomfilter_clear bloomfilter_merge
|
||||
function bloomfilter_basic_init%(fp: double, capacity: count,
|
||||
name: string &default=""%): opaque of bloomfilter
|
||||
%{
|
||||
|
@ -47,18 +55,28 @@ function bloomfilter_basic_init%(fp: double, capacity: count,
|
|||
|
||||
## Creates a counting Bloom filter.
|
||||
##
|
||||
## .. note:: A Bloom filter can have a name associated with it. In the future,
|
||||
## Bloom filters with the same name will be compatible across indepedent Bro
|
||||
## instances, i.e., it will be possible to merge them. Currently, however, that is
|
||||
## not yet supported.
|
||||
##
|
||||
## k: The number of hash functions to use.
|
||||
##
|
||||
## cells: The number of cells of the underlying counter vector.
|
||||
## cells: The number of cells of the underlying counter vector. As there's no
|
||||
## single answer to what's the best parameterization for a counting Bloom filter,
|
||||
## we refer to the Bloom filter literature here for choosing an appropiate value.
|
||||
##
|
||||
## max: The maximum counter value associated with each each element described
|
||||
## by *w = ceil(log_2(max))* bits. Each bit in the underlying counter vector
|
||||
## becomes a cell of size *w* bits.
|
||||
##
|
||||
## name: A name that uniquely identifies and seeds the Bloom filter. If empty,
|
||||
## the initialization will become dependent on the initial seed.
|
||||
## the filter will remain tied to the current Bro process.
|
||||
##
|
||||
## Returns: A Bloom filter handle.
|
||||
##
|
||||
## .. bro:see:: bloomfilter_basic_init bloomfilter_add bloomfilter_lookup
|
||||
## bloomfilter_clear bloomfilter_merge
|
||||
function bloomfilter_counting_init%(k: count, cells: count, max: count,
|
||||
name: string &default=""%): opaque of bloomfilter
|
||||
%{
|
||||
|
@ -82,6 +100,9 @@ function bloomfilter_counting_init%(k: count, cells: count, max: count,
|
|||
## bf: The Bloom filter handle.
|
||||
##
|
||||
## x: The element to add.
|
||||
##
|
||||
## .. bro:see:: bloomfilter_counting_init bloomfilter_basic_init loomfilter_lookup
|
||||
## bloomfilter_clear bloomfilter_merge
|
||||
function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any
|
||||
%{
|
||||
BloomFilterVal* bfv = static_cast<BloomFilterVal*>(bf);
|
||||
|
@ -105,6 +126,9 @@ function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any
|
|||
## x: The element to count.
|
||||
##
|
||||
## Returns: the counter associated with *x* in *bf*.
|
||||
##
|
||||
## .. bro:see:: bloomfilter_counting_init bloomfilter_basic_init
|
||||
## bloomfilter_add bloomfilter_clear bloomfilter_merge
|
||||
function bloomfilter_lookup%(bf: opaque of bloomfilter, x: any%): count
|
||||
%{
|
||||
const BloomFilterVal* bfv = static_cast<const BloomFilterVal*>(bf);
|
||||
|
@ -124,11 +148,14 @@ function bloomfilter_lookup%(bf: opaque of bloomfilter, x: any%): count
|
|||
return new Val(0, TYPE_COUNT);
|
||||
%}
|
||||
|
||||
## Removes all elements from a Bloom filter. This function sets resets all bits
|
||||
## in the underlying bitvector to 0 but does not change the parameterization of
|
||||
## the Bloom filter, such as the element type and the hasher seed.
|
||||
## Removes all elements from a Bloom filter. This function resets all bits in the
|
||||
## underlying bitvector back to 0 but does not change the parameterization of the
|
||||
## Bloom filter, such as the element type and the hasher seed.
|
||||
##
|
||||
## bf: The Bloom filter handle.
|
||||
##
|
||||
## .. bro:see:: bloomfilter_counting_init bloomfilter_basic_init
|
||||
## bloomfilter_add bloomfilter_lookup bloomfilter_merge
|
||||
function bloomfilter_clear%(bf: opaque of bloomfilter%): any
|
||||
%{
|
||||
BloomFilterVal* bfv = static_cast<BloomFilterVal*>(bf);
|
||||
|
@ -139,14 +166,20 @@ function bloomfilter_clear%(bf: opaque of bloomfilter%): any
|
|||
return 0;
|
||||
%}
|
||||
|
||||
|
||||
## Merges two Bloom filters.
|
||||
##
|
||||
## .. note:: Currently Bloom filters created by different Bro instances cannot
|
||||
## be merged. In the future, this will be supported as long as both filters
|
||||
## are created with the same name.
|
||||
##
|
||||
## bf1: The first Bloom filter handle.
|
||||
##
|
||||
## bf2: The second Bloom filter handle.
|
||||
##
|
||||
## Returns: The union of *bf1* and *bf2*.
|
||||
##
|
||||
## .. bro:see:: bloomfilter_counting_init bloomfilter_basic_init
|
||||
## bloomfilter_add bloomfilter_lookup bloomfilter_clear
|
||||
function bloomfilter_merge%(bf1: opaque of bloomfilter,
|
||||
bf2: opaque of bloomfilter%): opaque of bloomfilter
|
||||
%{
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue