diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc index e18d6e7e28..87212bc03c 100644 --- a/src/OpaqueVal.cc +++ b/src/OpaqueVal.cc @@ -638,6 +638,12 @@ void BloomFilterVal::Add(const Val* val) bloom_filter->Add(key.get()); } +bool BloomFilterVal::Decrement(const Val* val) + { + auto key = hash->MakeHashKey(*val, true); + return bloom_filter->Decrement(key.get()); + } + size_t BloomFilterVal::Count(const Val* val) const { auto key = hash->MakeHashKey(*val, true); diff --git a/src/OpaqueVal.h b/src/OpaqueVal.h index c02dce0691..1cc6be4b11 100644 --- a/src/OpaqueVal.h +++ b/src/OpaqueVal.h @@ -328,6 +328,7 @@ public: bool Typify(TypePtr type); void Add(const Val* val); + bool Decrement(const Val* val); size_t Count(const Val* val) const; void Clear(); bool Empty() const; diff --git a/src/probabilistic/BloomFilter.cc b/src/probabilistic/BloomFilter.cc index 4c046c3511..7b159940a0 100644 --- a/src/probabilistic/BloomFilter.cc +++ b/src/probabilistic/BloomFilter.cc @@ -163,6 +163,12 @@ void BasicBloomFilter::Add(const zeek::detail::HashKey* key) bits->Set(h[i] % bits->Size()); } +bool BasicBloomFilter::Decrement(const zeek::detail::HashKey* key) + { + // operation not supported by basic bloom filter + return false; + } + size_t BasicBloomFilter::Count(const zeek::detail::HashKey* key) const { detail::Hasher::digest_vector h = hasher->Hash(key); @@ -267,6 +273,20 @@ void CountingBloomFilter::Add(const zeek::detail::HashKey* key) cells->Increment(h[i] % cells->Size()); } +bool CountingBloomFilter::Decrement(const zeek::detail::HashKey* key) + { + // Only decrement if a member. + if ( Count(key) == 0 ) + return false; + + detail::Hasher::digest_vector h = hasher->Hash(key); + + for ( size_t i = 0; i < h.size(); ++i ) + cells->Decrement(h[i] % cells->Size()); + + return true; + } + size_t CountingBloomFilter::Count(const zeek::detail::HashKey* key) const { detail::Hasher::digest_vector h = hasher->Hash(key); diff --git a/src/probabilistic/BloomFilter.h b/src/probabilistic/BloomFilter.h index c67b6674c5..809c1151aa 100644 --- a/src/probabilistic/BloomFilter.h +++ b/src/probabilistic/BloomFilter.h @@ -43,12 +43,23 @@ public: virtual ~BloomFilter(); /** - * Adds an element to the Bloom filter. + * Adds an element to the Bloom filter, or increments its value for counting + * bloom filters * * @param key The key associated with the element to add. */ virtual void Add(const zeek::detail::HashKey* key) = 0; + /** + * Decrements the value of an element in the bloom filter, if the underlying + * filter supports the operation + * + * #param key The key associated with the element to decrement. + * + * @return True if the decrement operation succeeded. + */ + virtual bool Decrement(const zeek::detail::HashKey* key) = 0; + /** * Retrieves the associated count of a given value. * @@ -182,6 +193,7 @@ protected: // Overridden from BloomFilter. void Add(const zeek::detail::HashKey* key) override; + bool Decrement(const zeek::detail::HashKey* key) override; size_t Count(const zeek::detail::HashKey* key) const override; broker::expected DoSerialize() const override; bool DoUnserialize(const broker::data& data) override; @@ -231,6 +243,7 @@ protected: // Overridden from BloomFilter. void Add(const zeek::detail::HashKey* key) override; + bool Decrement(const zeek::detail::HashKey* key) override; size_t Count(const zeek::detail::HashKey* key) const override; broker::expected DoSerialize() const override; bool DoUnserialize(const broker::data& data) override; diff --git a/src/probabilistic/bloom-filter.bif b/src/probabilistic/bloom-filter.bif index b545e13aa9..bdc220a195 100644 --- a/src/probabilistic/bloom-filter.bif +++ b/src/probabilistic/bloom-filter.bif @@ -133,7 +133,7 @@ function bloomfilter_counting_init%(k: count, cells: count, max: count, return zeek::make_intrusive(new zeek::probabilistic::CountingBloomFilter(h, cells, width)); %} -## Adds an element to a Bloom filter. +## Adds an element to a Bloom filter. For counting bloom filters, the counter is incremented. ## ## bf: The Bloom filter handle. ## @@ -141,7 +141,7 @@ function bloomfilter_counting_init%(k: count, cells: count, max: count, ## ## .. zeek:see:: bloomfilter_basic_init bloomfilter_basic_init2 ## bloomfilter_counting_init bloomfilter_lookup bloomfilter_clear -## bloomfilter_merge +## bloomfilter_merge bloomfilter_decrement function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any %{ auto* bfv = static_cast(bf); @@ -158,8 +158,41 @@ function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any return nullptr; %} +## Decrements the counter for an element that was added to a counting bloom filter in the past. +## +## bf: The coubting bloom filter handle. +## +## x: The element to decrement +## +## Returns: True on success +## +## .. zeek:see:: bloomfilter_basic_init bloomfilter_basic_init2 +## bloomfilter_counting_init bloomfilter_lookup bloomfilter_clear +## bloomfilter_merge +function bloomfilter_decrement%(bf: opaque of bloomfilter, x: any%): bool + %{ + auto* bfv = static_cast(bf); + + if ( ! bfv->Type() && ! bfv->Typify(x->GetType()) ) + reporter->Error("failed to set Bloom filter type"); + + else if ( ! same_type(bfv->Type(), x->GetType()) ) + reporter->Error("incompatible Bloom filter types"); + + if ( bfv->Decrement(x) ) + return val_mgr->True(); + else + return val_mgr->False(); + %} +# + ## Retrieves the counter for a given element in a Bloom filter. ## +## For a basic bloom filter, this is 0 when the element is not part of the bloom filter, or 1 +## if it is part of the bloom filter. +## +## For a counting bloom filter, this is the estimate of how often an element was added. +## ## bf: The Bloom filter handle. ## ## x: The element to count. @@ -206,10 +239,6 @@ function bloomfilter_clear%(bf: opaque of bloomfilter%): any ## Merges two Bloom filters. ## -## .. note:: Currently Bloom filters created by different Zeek instances cannot -## be merged. In the future, this will be supported as long as both filters -## are created with the same name. -## ## bf1: The first Bloom filter handle. ## ## bf2: The second Bloom filter handle. diff --git a/testing/btest/Baseline/bifs.bloomfilter/output b/testing/btest/Baseline/bifs.bloomfilter/output index 2e6dfc8abe..f66423110b 100644 --- a/testing/btest/Baseline/bifs.bloomfilter/output +++ b/testing/btest/Baseline/bifs.bloomfilter/output @@ -23,3 +23,9 @@ 3 3 2 +2 +1 +T +0 +2 +F diff --git a/testing/btest/bifs/bloomfilter.zeek b/testing/btest/bifs/bloomfilter.zeek index dcd900d203..94c3b96b26 100644 --- a/testing/btest/bifs/bloomfilter.zeek +++ b/testing/btest/bifs/bloomfilter.zeek @@ -98,6 +98,15 @@ function test_counting_bloom_filter() print bloomfilter_lookup(bf_merged, "foo"); print bloomfilter_lookup(bf_merged, "bar"); print bloomfilter_lookup(bf_merged, "baz"); + + bloomfilter_decrement(bf, "foo"); + print bloomfilter_lookup(bf, "foo"); # 2 + bloomfilter_decrement(bf, "foo"); + print bloomfilter_lookup(bf, "foo"); # 1 + print bloomfilter_decrement(bf, "foo"); # True + print bloomfilter_lookup(bf, "foo"); # 0 + print bloomfilter_lookup(bf, "bar"); # still 2 + print bloomfilter_decrement(bf, "foo"); # False } event zeek_init()