Add bloomfilter_decrement bif

This bif implements the decrement operation for counting bloom filters.

It also clarifies some of the documentation.
This commit is contained in:
Johanna Amann 2022-01-18 17:57:51 +00:00 committed by Johanna Amann
parent becc966106
commit aa58b6b37b
7 changed files with 91 additions and 7 deletions

View file

@ -638,6 +638,12 @@ void BloomFilterVal::Add(const Val* val)
bloom_filter->Add(key.get()); bloom_filter->Add(key.get());
} }
bool BloomFilterVal::Decrement(const Val* val)
{
auto key = hash->MakeHashKey(*val, true);
return bloom_filter->Decrement(key.get());
}
size_t BloomFilterVal::Count(const Val* val) const size_t BloomFilterVal::Count(const Val* val) const
{ {
auto key = hash->MakeHashKey(*val, true); auto key = hash->MakeHashKey(*val, true);

View file

@ -328,6 +328,7 @@ public:
bool Typify(TypePtr type); bool Typify(TypePtr type);
void Add(const Val* val); void Add(const Val* val);
bool Decrement(const Val* val);
size_t Count(const Val* val) const; size_t Count(const Val* val) const;
void Clear(); void Clear();
bool Empty() const; bool Empty() const;

View file

@ -163,6 +163,12 @@ void BasicBloomFilter::Add(const zeek::detail::HashKey* key)
bits->Set(h[i] % bits->Size()); bits->Set(h[i] % bits->Size());
} }
bool BasicBloomFilter::Decrement(const zeek::detail::HashKey* key)
{
// operation not supported by basic bloom filter
return false;
}
size_t BasicBloomFilter::Count(const zeek::detail::HashKey* key) const size_t BasicBloomFilter::Count(const zeek::detail::HashKey* key) const
{ {
detail::Hasher::digest_vector h = hasher->Hash(key); detail::Hasher::digest_vector h = hasher->Hash(key);
@ -267,6 +273,20 @@ void CountingBloomFilter::Add(const zeek::detail::HashKey* key)
cells->Increment(h[i] % cells->Size()); cells->Increment(h[i] % cells->Size());
} }
bool CountingBloomFilter::Decrement(const zeek::detail::HashKey* key)
{
// Only decrement if a member.
if ( Count(key) == 0 )
return false;
detail::Hasher::digest_vector h = hasher->Hash(key);
for ( size_t i = 0; i < h.size(); ++i )
cells->Decrement(h[i] % cells->Size());
return true;
}
size_t CountingBloomFilter::Count(const zeek::detail::HashKey* key) const size_t CountingBloomFilter::Count(const zeek::detail::HashKey* key) const
{ {
detail::Hasher::digest_vector h = hasher->Hash(key); detail::Hasher::digest_vector h = hasher->Hash(key);

View file

@ -43,12 +43,23 @@ public:
virtual ~BloomFilter(); virtual ~BloomFilter();
/** /**
* Adds an element to the Bloom filter. * Adds an element to the Bloom filter, or increments its value for counting
* bloom filters
* *
* @param key The key associated with the element to add. * @param key The key associated with the element to add.
*/ */
virtual void Add(const zeek::detail::HashKey* key) = 0; virtual void Add(const zeek::detail::HashKey* key) = 0;
/**
* Decrements the value of an element in the bloom filter, if the underlying
* filter supports the operation
*
* #param key The key associated with the element to decrement.
*
* @return True if the decrement operation succeeded.
*/
virtual bool Decrement(const zeek::detail::HashKey* key) = 0;
/** /**
* Retrieves the associated count of a given value. * Retrieves the associated count of a given value.
* *
@ -182,6 +193,7 @@ protected:
// Overridden from BloomFilter. // Overridden from BloomFilter.
void Add(const zeek::detail::HashKey* key) override; void Add(const zeek::detail::HashKey* key) override;
bool Decrement(const zeek::detail::HashKey* key) override;
size_t Count(const zeek::detail::HashKey* key) const override; size_t Count(const zeek::detail::HashKey* key) const override;
broker::expected<broker::data> DoSerialize() const override; broker::expected<broker::data> DoSerialize() const override;
bool DoUnserialize(const broker::data& data) override; bool DoUnserialize(const broker::data& data) override;
@ -231,6 +243,7 @@ protected:
// Overridden from BloomFilter. // Overridden from BloomFilter.
void Add(const zeek::detail::HashKey* key) override; void Add(const zeek::detail::HashKey* key) override;
bool Decrement(const zeek::detail::HashKey* key) override;
size_t Count(const zeek::detail::HashKey* key) const override; size_t Count(const zeek::detail::HashKey* key) const override;
broker::expected<broker::data> DoSerialize() const override; broker::expected<broker::data> DoSerialize() const override;
bool DoUnserialize(const broker::data& data) override; bool DoUnserialize(const broker::data& data) override;

View file

@ -133,7 +133,7 @@ function bloomfilter_counting_init%(k: count, cells: count, max: count,
return zeek::make_intrusive<zeek::BloomFilterVal>(new zeek::probabilistic::CountingBloomFilter(h, cells, width)); return zeek::make_intrusive<zeek::BloomFilterVal>(new zeek::probabilistic::CountingBloomFilter(h, cells, width));
%} %}
## Adds an element to a Bloom filter. ## Adds an element to a Bloom filter. For counting bloom filters, the counter is incremented.
## ##
## bf: The Bloom filter handle. ## bf: The Bloom filter handle.
## ##
@ -141,7 +141,7 @@ function bloomfilter_counting_init%(k: count, cells: count, max: count,
## ##
## .. zeek:see:: bloomfilter_basic_init bloomfilter_basic_init2 ## .. zeek:see:: bloomfilter_basic_init bloomfilter_basic_init2
## bloomfilter_counting_init bloomfilter_lookup bloomfilter_clear ## bloomfilter_counting_init bloomfilter_lookup bloomfilter_clear
## bloomfilter_merge ## bloomfilter_merge bloomfilter_decrement
function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any
%{ %{
auto* bfv = static_cast<BloomFilterVal*>(bf); auto* bfv = static_cast<BloomFilterVal*>(bf);
@ -158,8 +158,41 @@ function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any
return nullptr; return nullptr;
%} %}
## Decrements the counter for an element that was added to a counting bloom filter in the past.
##
## bf: The coubting bloom filter handle.
##
## x: The element to decrement
##
## Returns: True on success
##
## .. zeek:see:: bloomfilter_basic_init bloomfilter_basic_init2
## bloomfilter_counting_init bloomfilter_lookup bloomfilter_clear
## bloomfilter_merge
function bloomfilter_decrement%(bf: opaque of bloomfilter, x: any%): bool
%{
auto* bfv = static_cast<BloomFilterVal*>(bf);
if ( ! bfv->Type() && ! bfv->Typify(x->GetType()) )
reporter->Error("failed to set Bloom filter type");
else if ( ! same_type(bfv->Type(), x->GetType()) )
reporter->Error("incompatible Bloom filter types");
if ( bfv->Decrement(x) )
return val_mgr->True();
else
return val_mgr->False();
%}
#
## Retrieves the counter for a given element in a Bloom filter. ## Retrieves the counter for a given element in a Bloom filter.
## ##
## For a basic bloom filter, this is 0 when the element is not part of the bloom filter, or 1
## if it is part of the bloom filter.
##
## For a counting bloom filter, this is the estimate of how often an element was added.
##
## bf: The Bloom filter handle. ## bf: The Bloom filter handle.
## ##
## x: The element to count. ## x: The element to count.
@ -206,10 +239,6 @@ function bloomfilter_clear%(bf: opaque of bloomfilter%): any
## Merges two Bloom filters. ## Merges two Bloom filters.
## ##
## .. note:: Currently Bloom filters created by different Zeek instances cannot
## be merged. In the future, this will be supported as long as both filters
## are created with the same name.
##
## bf1: The first Bloom filter handle. ## bf1: The first Bloom filter handle.
## ##
## bf2: The second Bloom filter handle. ## bf2: The second Bloom filter handle.

View file

@ -23,3 +23,9 @@
3 3
3 3
2 2
2
1
T
0
2
F

View file

@ -98,6 +98,15 @@ function test_counting_bloom_filter()
print bloomfilter_lookup(bf_merged, "foo"); print bloomfilter_lookup(bf_merged, "foo");
print bloomfilter_lookup(bf_merged, "bar"); print bloomfilter_lookup(bf_merged, "bar");
print bloomfilter_lookup(bf_merged, "baz"); print bloomfilter_lookup(bf_merged, "baz");
bloomfilter_decrement(bf, "foo");
print bloomfilter_lookup(bf, "foo"); # 2
bloomfilter_decrement(bf, "foo");
print bloomfilter_lookup(bf, "foo"); # 1
print bloomfilter_decrement(bf, "foo"); # True
print bloomfilter_lookup(bf, "foo"); # 0
print bloomfilter_lookup(bf, "bar"); # still 2
print bloomfilter_decrement(bf, "foo"); # False
} }
event zeek_init() event zeek_init()