Merge branch 'topic/robin/bloom-filter-merge'

* topic/robin/bloom-filter-merge:
  Using a real hash function for hashing a BitVector's internal state.
  Support UHF hashing for >= UHASH_KEY_SIZE bytes.
  Changing the Bloom filter hashing so that it's independent of CompositeHash.
  Add new BiF for low-level Bloom filter initialization.
  Introduce global_hash_seed script variable.

Conflicts:
	testing/btest/Baseline/bifs.bloomfilter/output
This commit is contained in:
Robin Sommer 2013-08-01 10:52:08 -07:00
commit 32a403cdaf
19 changed files with 337 additions and 138 deletions

View file

@ -0,0 +1,8 @@
bf1, global_seed, 11979365913534242684
bf2, global_seed, 12550100962110750449
bf3, my_seed, 12550100962110750449
bf4, my_seed, 945716460325754659
bf1, global_seed, 12550100962110750449
bf2, global_seed, 945716460325754659
bf3, my_seed, 12550100962110750449
bf4, my_seed, 945716460325754659

View file

@ -18,6 +18,7 @@ error: false-positive rate must take value between 0 and 1
1
1
1
1
2
3
3

View file

@ -0,0 +1,40 @@
# @TEST-EXEC: bro -b %INPUT global_hash_seed="foo" >>output
# @TEST-EXEC: bro -b %INPUT global_hash_seed="my_seed" >>output
# @TEST-EXEC: btest-diff output
type Foo: record
{
a: count;
b: string;
};
function test_bloom_filter()
{
local bf1 = bloomfilter_basic_init(0.9, 10);
bloomfilter_add(bf1, "foo");
bloomfilter_add(bf1, "bar");
local bf2 = bloomfilter_basic_init(0.9, 10);
bloomfilter_add(bf2, Foo($a=1, $b="xx"));
bloomfilter_add(bf2, Foo($a=2, $b="yy"));
local bf3 = bloomfilter_basic_init(0.9, 10, "my_seed");
bloomfilter_add(bf3, "foo");
bloomfilter_add(bf3, "bar");
local bf4 = bloomfilter_basic_init(0.9, 10, "my_seed");
bloomfilter_add(bf4, Foo($a=1, $b="xx"));
bloomfilter_add(bf4, Foo($a=2, $b="yy"));
print "bf1, global_seed", bloomfilter_internal_state(bf1);
print "bf2, global_seed", bloomfilter_internal_state(bf2);
print "bf3, my_seed", bloomfilter_internal_state(bf3);
print "bf4, my_seed", bloomfilter_internal_state(bf4);
}
event bro_init()
{
test_bloom_filter();
}

View file

@ -15,14 +15,21 @@ function test_basic_bloom_filter()
bloomfilter_add(bf_cnt, 0.5); # Type mismatch
bloomfilter_add(bf_cnt, "foo"); # Type mismatch
# Alternative constructor.
local bf_dbl = bloomfilter_basic_init2(4, 10);
bloomfilter_add(bf_dbl, 4.2);
bloomfilter_add(bf_dbl, 3.14);
print bloomfilter_lookup(bf_dbl, 4.2);
print bloomfilter_lookup(bf_dbl, 3.14);
# Basic usage with strings.
local bf_str = bloomfilter_basic_init(0.9, 10);
bloomfilter_add(bf_str, "foo");
bloomfilter_add(bf_str, "bar");
print bloomfilter_lookup(bf_str, "foo");
print bloomfilter_lookup(bf_str, "bar");
print bloomfilter_lookup(bf_str, "b4z"); # FP
print bloomfilter_lookup(bf_str, "quux"); # FP
print bloomfilter_lookup(bf_str, "b4zzz"); # FP
print bloomfilter_lookup(bf_str, "quuux"); # FP
bloomfilter_add(bf_str, 0.5); # Type mismatch
bloomfilter_add(bf_str, 100); # Type mismatch