From d50b8a147d739e3fdce9cf235e47d7291adbe212 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Wed, 31 Jul 2013 18:21:37 +0200 Subject: [PATCH] Add new BiF for low-level Bloom filter initialization. For symmetry reasons, the new Bif bloomfilter_basic_init2 also allows users to manually specify the memory bounds and number of hash functions to use. --- NEWS | 1 + src/probabilistic/bloom-filter.bif | 69 +++++++++++++++---- .../btest/Baseline/bifs.bloomfilter/output | 2 + testing/btest/bifs/bloomfilter.bro | 7 ++ 4 files changed, 67 insertions(+), 12 deletions(-) diff --git a/NEWS b/NEWS index c421e7d675..64058054d6 100644 --- a/NEWS +++ b/NEWS @@ -113,6 +113,7 @@ New Functionality the frequency of elements. The corresponding functions are: bloomfilter_basic_init(fp: double, capacity: count, name: string &default=""): opaque of bloomfilter + bloomfilter_basic_init2(k: count, cells: count, name: string &default=""): opaque of bloomfilter bloomfilter_counting_init(k: count, cells: count, max: count, name: string &default=""): opaque of bloomfilter bloomfilter_add(bf: opaque of bloomfilter, x: any) bloomfilter_lookup(bf: opaque of bloomfilter, x: any): count diff --git a/src/probabilistic/bloom-filter.bif b/src/probabilistic/bloom-filter.bif index d936b77e3b..0c4a67ac6f 100644 --- a/src/probabilistic/bloom-filter.bif +++ b/src/probabilistic/bloom-filter.bif @@ -35,8 +35,8 @@ module GLOBAL; ## ## Returns: A Bloom filter handle. ## -## .. bro:see:: bloomfilter_counting_init bloomfilter_add bloomfilter_lookup -## bloomfilter_clear bloomfilter_merge +## .. bro:see:: bloomfilter_basic_init2 bloomfilter_counting_init bloomfilter_add +## bloomfilter_lookup bloomfilter_clear bloomfilter_merge function bloomfilter_basic_init%(fp: double, capacity: count, name: string &default=""%): opaque of bloomfilter %{ @@ -55,6 +55,47 @@ function bloomfilter_basic_init%(fp: double, capacity: count, return new BloomFilterVal(new BasicBloomFilter(h, cells)); %} +## Creates a basic Bloom filter. This function serves as a low-level +## alternative to bloomfilter_basic_init where the user has full control over +## the number of hash functions and cells in the underlying bit vector. +## +## .. note:: A Bloom filter can have a name associated with it. In the future, +## Bloom filters with the same name will be compatible across indepedent Bro +## instances, i.e., it will be possible to merge them. Currently, however, that is +## not yet supported. +## +## k: The number of hash functions to use. +## +## cells: The number of cells of the underlying bit vector. +## +## name: A name that uniquely identifies and seeds the Bloom filter. If empty, +## the filter will remain tied to the current Bro process. +## +## Returns: A Bloom filter handle. +## +## .. bro:see:: bloom_filter_basic_init bloomfilter_counting_init bloomfilter_add +## bloomfilter_lookup bloomfilter_clear bloomfilter_merge +function bloomfilter_basic_init2%(k: count, cells: count, + name: string &default=""%): opaque of bloomfilter + %{ + if ( k == 0 ) + { + reporter->Error("number of hash functions must be non-negative"); + return 0; + } + if ( cells == 0 ) + { + reporter->Error("number of cells must be non-negative"); + return 0; + } + + size_t seed = Hasher::MakeSeed(name->Len() > 0 ? name->Bytes() : 0, + name->Len()); + const Hasher* h = new DefaultHasher(k, seed); + + return new BloomFilterVal(new BasicBloomFilter(h, cells)); + %} + ## Creates a counting Bloom filter. ## ## .. note:: A Bloom filter can have a name associated with it. In the future, @@ -77,8 +118,8 @@ function bloomfilter_basic_init%(fp: double, capacity: count, ## ## Returns: A Bloom filter handle. ## -## .. bro:see:: bloomfilter_basic_init bloomfilter_add bloomfilter_lookup -## bloomfilter_clear bloomfilter_merge +## .. bro:see:: bloomfilter_basic_init bloomfilter_basic_init2 bloomfilter_add +## bloomfilter_lookup bloomfilter_clear bloomfilter_merge function bloomfilter_counting_init%(k: count, cells: count, max: count, name: string &default=""%): opaque of bloomfilter %{ @@ -106,8 +147,9 @@ function bloomfilter_counting_init%(k: count, cells: count, max: count, ## ## x: The element to add. ## -## .. bro:see:: bloomfilter_counting_init bloomfilter_basic_init loomfilter_lookup -## bloomfilter_clear bloomfilter_merge +## .. bro:see:: bloomfilter_basic_init bloomfilter_basic_init2 +## bloomfilter_counting_init bloomfilter_lookup bloomfilter_clear +## bloomfilter_merge function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any %{ BloomFilterVal* bfv = static_cast(bf); @@ -132,8 +174,9 @@ function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any ## ## Returns: the counter associated with *x* in *bf*. ## -## .. bro:see:: bloomfilter_counting_init bloomfilter_basic_init -## bloomfilter_add bloomfilter_clear bloomfilter_merge +## .. bro:see:: bloomfilter_basic_init bloomfilter_basic_init2 +## bloomfilter_counting_init bloomfilter_add bloomfilter_clear +## bloomfilter_merge function bloomfilter_lookup%(bf: opaque of bloomfilter, x: any%): count %{ const BloomFilterVal* bfv = static_cast(bf); @@ -159,8 +202,9 @@ function bloomfilter_lookup%(bf: opaque of bloomfilter, x: any%): count ## ## bf: The Bloom filter handle. ## -## .. bro:see:: bloomfilter_counting_init bloomfilter_basic_init -## bloomfilter_add bloomfilter_lookup bloomfilter_merge +## .. bro:see:: bloomfilter_basic_init bloomfilter_counting_init2 +## bloomfilter_counting_init bloomfilter_add bloomfilter_lookup +## bloomfilter_merge function bloomfilter_clear%(bf: opaque of bloomfilter%): any %{ BloomFilterVal* bfv = static_cast(bf); @@ -183,8 +227,9 @@ function bloomfilter_clear%(bf: opaque of bloomfilter%): any ## ## Returns: The union of *bf1* and *bf2*. ## -## .. bro:see:: bloomfilter_counting_init bloomfilter_basic_init -## bloomfilter_add bloomfilter_lookup bloomfilter_clear +## .. bro:see:: bloomfilter_basic_init bloomfilter_basic_init2 +## bloomfilter_counting_init bloomfilter_add bloomfilter_lookup +## bloomfilter_clear function bloomfilter_merge%(bf1: opaque of bloomfilter, bf2: opaque of bloomfilter%): opaque of bloomfilter %{ diff --git a/testing/btest/Baseline/bifs.bloomfilter/output b/testing/btest/Baseline/bifs.bloomfilter/output index 14e1f038c0..731b7c7ce9 100644 --- a/testing/btest/Baseline/bifs.bloomfilter/output +++ b/testing/btest/Baseline/bifs.bloomfilter/output @@ -17,6 +17,8 @@ error: false-positive rate must take value between 0 and 1 1 1 1 +1 +1 2 3 3 diff --git a/testing/btest/bifs/bloomfilter.bro b/testing/btest/bifs/bloomfilter.bro index e6091e25fa..c2a1c47ca8 100644 --- a/testing/btest/bifs/bloomfilter.bro +++ b/testing/btest/bifs/bloomfilter.bro @@ -15,6 +15,13 @@ function test_basic_bloom_filter() bloomfilter_add(bf_cnt, 0.5); # Type mismatch bloomfilter_add(bf_cnt, "foo"); # Type mismatch + # Alternative constructor. + local bf_dbl = bloomfilter_basic_init2(4, 10); + bloomfilter_add(bf_dbl, 4.2); + bloomfilter_add(bf_dbl, 3.14); + print bloomfilter_lookup(bf_dbl, 4.2); + print bloomfilter_lookup(bf_dbl, 3.14); + # Basic usage with strings. local bf_str = bloomfilter_basic_init(0.9, 10); bloomfilter_add(bf_str, "foo");