Merge remote-tracking branch 'origin/master' into topic/matthias/bloom-filter

Conflicts:
	src/probabilistic/Hasher.h
This commit is contained in:
Matthias Vallentin 2013-07-30 12:12:27 +02:00
commit 9ad7121fed
250 changed files with 12762 additions and 5361 deletions

View file

@ -466,8 +466,11 @@ bool BitVector::Empty() const
bool BitVector::AllZero() const
{
for ( size_t i = 0; i < bits.size(); ++i )
{
if ( bits[i] )
return false;
}
return true;
}
@ -565,11 +568,11 @@ bool BitVector::DoUnserialize(UnserialInfo* info)
bits[i] = static_cast<block_type>(block);
}
uint64 num_bits;
if ( ! UNSERIALIZE(&num_bits) )
uint64 n;
if ( ! UNSERIALIZE(&n) )
return false;
num_bits = static_cast<size_type>(num_bits);
num_bits = static_cast<size_type>(n);
return true;
}

View file

@ -1,9 +1,11 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include <typeinfo>
#include <cmath>
#include <limits>
#include "BloomFilter.h"
#include <cmath>
#include <limits>
#include "CounterVector.h"
#include "Serializer.h"
@ -74,18 +76,19 @@ void BasicBloomFilter::Clear()
bool BasicBloomFilter::Merge(const BloomFilter* other)
{
if ( typeid(*this) != typeid(*other) )
return 0;
return false;
const BasicBloomFilter* o = static_cast<const BasicBloomFilter*>(other);
if ( ! hasher->Equals(o->hasher) )
{
reporter->InternalError("incompatible hashers in BasicBloomFilter merge");
reporter->Error("incompatible hashers in BasicBloomFilter merge");
return false;
}
else if ( bits->Size() != o->bits->Size() )
{
reporter->InternalError("different bitvector size in BasicBloomFilter merge");
reporter->Error("different bitvector size in BasicBloomFilter merge");
return false;
}
@ -172,18 +175,19 @@ void CountingBloomFilter::Clear()
bool CountingBloomFilter::Merge(const BloomFilter* other)
{
if ( typeid(*this) != typeid(*other) )
return 0;
return false;
const CountingBloomFilter* o = static_cast<const CountingBloomFilter*>(other);
if ( ! hasher->Equals(o->hasher) )
{
reporter->InternalError("incompatible hashers in CountingBloomFilter merge");
reporter->Error("incompatible hashers in CountingBloomFilter merge");
return false;
}
else if ( cells->Size() != o->cells->Size() )
{
reporter->InternalError("different bitvector size in CountingBloomFilter merge");
reporter->Error("different bitvector size in CountingBloomFilter merge");
return false;
}

View file

@ -13,6 +13,6 @@ set(probabilistic_SRCS
Hasher.cc)
bif_target(bloom-filter.bif)
bro_add_subdir_library(probabilistic ${probabilistic_SRCS})
bro_add_subdir_library(probabilistic ${probabilistic_SRCS} ${BIF_OUTPUT_CC})
add_dependencies(bro_probabilistic generate_outputs)

View file

@ -183,11 +183,11 @@ bool CounterVector::DoUnserialize(UnserialInfo* info)
if ( ! bits )
return false;
uint64 width;
if ( ! UNSERIALIZE(&width) )
uint64 w;
if ( ! UNSERIALIZE(&w) )
return false;
width = static_cast<size_t>(width);
width = static_cast<size_t>(w);
return true;
}

View file

@ -78,6 +78,14 @@ protected:
Hasher() { }
/**
* Constructor.
*
* @param k the number of hash functions.
*
* @param name A name for the hasher. Hashers with the same name
* should provide consistent results.
*/
Hasher(size_t k, const std::string& name);
private:

View file

@ -20,15 +20,23 @@ module GLOBAL;
## Creates a basic Bloom filter.
##
## .. note:: A Bloom filter can have a name associated with it. In the future,
## Bloom filters with the same name will be compatible across indepedent Bro
## instances, i.e., it will be possible to merge them. Currently, however, that is
## not yet supported.
##
## fp: The desired false-positive rate.
##
## capacity: the maximum number of elements that guarantees a false-positive
## rate of *fp*.
##
## name: A name that uniquely identifies and seeds the Bloom filter. If empty,
## the initialization will become dependent on the initial seed.
## the filter will remain tied to the current Bro process.
##
## Returns: A Bloom filter handle.
##
## .. bro:see:: bloomfilter_counting_init bloomfilter_add bloomfilter_lookup
## bloomfilter_clear bloomfilter_merge
function bloomfilter_basic_init%(fp: double, capacity: count,
name: string &default=""%): opaque of bloomfilter
%{
@ -47,18 +55,28 @@ function bloomfilter_basic_init%(fp: double, capacity: count,
## Creates a counting Bloom filter.
##
## .. note:: A Bloom filter can have a name associated with it. In the future,
## Bloom filters with the same name will be compatible across indepedent Bro
## instances, i.e., it will be possible to merge them. Currently, however, that is
## not yet supported.
##
## k: The number of hash functions to use.
##
## cells: The number of cells of the underlying counter vector.
## cells: The number of cells of the underlying counter vector. As there's no
## single answer to what's the best parameterization for a counting Bloom filter,
## we refer to the Bloom filter literature here for choosing an appropiate value.
##
## max: The maximum counter value associated with each each element described
## by *w = ceil(log_2(max))* bits. Each bit in the underlying counter vector
## becomes a cell of size *w* bits.
##
## name: A name that uniquely identifies and seeds the Bloom filter. If empty,
## the initialization will become dependent on the initial seed.
## the filter will remain tied to the current Bro process.
##
## Returns: A Bloom filter handle.
##
## .. bro:see:: bloomfilter_basic_init bloomfilter_add bloomfilter_lookup
## bloomfilter_clear bloomfilter_merge
function bloomfilter_counting_init%(k: count, cells: count, max: count,
name: string &default=""%): opaque of bloomfilter
%{
@ -82,6 +100,9 @@ function bloomfilter_counting_init%(k: count, cells: count, max: count,
## bf: The Bloom filter handle.
##
## x: The element to add.
##
## .. bro:see:: bloomfilter_counting_init bloomfilter_basic_init loomfilter_lookup
## bloomfilter_clear bloomfilter_merge
function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any
%{
BloomFilterVal* bfv = static_cast<BloomFilterVal*>(bf);
@ -105,6 +126,9 @@ function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any
## x: The element to count.
##
## Returns: the counter associated with *x* in *bf*.
##
## .. bro:see:: bloomfilter_counting_init bloomfilter_basic_init
## bloomfilter_add bloomfilter_clear bloomfilter_merge
function bloomfilter_lookup%(bf: opaque of bloomfilter, x: any%): count
%{
const BloomFilterVal* bfv = static_cast<const BloomFilterVal*>(bf);
@ -124,11 +148,14 @@ function bloomfilter_lookup%(bf: opaque of bloomfilter, x: any%): count
return new Val(0, TYPE_COUNT);
%}
## Removes all elements from a Bloom filter. This function sets resets all bits
## in the underlying bitvector to 0 but does not change the parameterization of
## the Bloom filter, such as the element type and the hasher seed.
## Removes all elements from a Bloom filter. This function resets all bits in the
## underlying bitvector back to 0 but does not change the parameterization of the
## Bloom filter, such as the element type and the hasher seed.
##
## bf: The Bloom filter handle.
##
## .. bro:see:: bloomfilter_counting_init bloomfilter_basic_init
## bloomfilter_add bloomfilter_lookup bloomfilter_merge
function bloomfilter_clear%(bf: opaque of bloomfilter%): any
%{
BloomFilterVal* bfv = static_cast<BloomFilterVal*>(bf);
@ -139,14 +166,20 @@ function bloomfilter_clear%(bf: opaque of bloomfilter%): any
return 0;
%}
## Merges two Bloom filters.
##
## .. note:: Currently Bloom filters created by different Bro instances cannot
## be merged. In the future, this will be supported as long as both filters
## are created with the same name.
##
## bf1: The first Bloom filter handle.
##
## bf2: The second Bloom filter handle.
##
## Returns: The union of *bf1* and *bf2*.
##
## .. bro:see:: bloomfilter_counting_init bloomfilter_basic_init
## bloomfilter_add bloomfilter_lookup bloomfilter_clear
function bloomfilter_merge%(bf1: opaque of bloomfilter,
bf2: opaque of bloomfilter%): opaque of bloomfilter
%{