mirror of
https://github.com/zeek/zeek.git
synced 2025-10-11 02:58:20 +00:00
Reformat the world
This commit is contained in:
parent
194cb24547
commit
b2f171ec69
714 changed files with 35149 additions and 35203 deletions
|
@ -2,36 +2,34 @@
|
|||
|
||||
#include "zeek/probabilistic/BitVector.h"
|
||||
|
||||
#include <broker/data.hh>
|
||||
#include <openssl/sha.h>
|
||||
#include <cassert>
|
||||
#include <limits>
|
||||
#include <openssl/sha.h>
|
||||
#include <broker/data.hh>
|
||||
|
||||
#include "zeek/digest.h"
|
||||
|
||||
namespace zeek::probabilistic::detail {
|
||||
namespace zeek::probabilistic::detail
|
||||
{
|
||||
|
||||
BitVector::size_type BitVector::npos = static_cast<BitVector::size_type>(-1);
|
||||
BitVector::block_type BitVector::bits_per_block =
|
||||
std::numeric_limits<BitVector::block_type>::digits;
|
||||
|
||||
namespace {
|
||||
namespace
|
||||
{
|
||||
|
||||
uint8_t count_table[] = {
|
||||
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2,
|
||||
3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3,
|
||||
3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3,
|
||||
4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4,
|
||||
3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5,
|
||||
6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4,
|
||||
4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5,
|
||||
6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5,
|
||||
3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3,
|
||||
4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6,
|
||||
6, 7, 6, 7, 7, 8
|
||||
};
|
||||
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
||||
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
|
||||
|
||||
} // namespace <anonymous>
|
||||
} // namespace <anonymous>
|
||||
|
||||
BitVector::Reference::Reference(block_type& block, block_type i)
|
||||
: block(block), mask((block_type(1) << i))
|
||||
|
@ -118,8 +116,7 @@ BitVector::BitVector(size_type size, bool value)
|
|||
num_bits = size;
|
||||
}
|
||||
|
||||
BitVector::BitVector(BitVector const& other)
|
||||
: bits(other.bits)
|
||||
BitVector::BitVector(BitVector const& other) : bits(other.bits)
|
||||
{
|
||||
num_bits = other.num_bits;
|
||||
}
|
||||
|
@ -174,7 +171,7 @@ BitVector& BitVector::operator<<=(size_type n)
|
|||
|
||||
else
|
||||
{
|
||||
for (size_type i = last-div; i > 0; --i)
|
||||
for ( size_type i = last - div; i > 0; --i )
|
||||
b[i + div] = b[i];
|
||||
|
||||
b[div] = b[0];
|
||||
|
@ -204,7 +201,7 @@ BitVector& BitVector::operator>>=(size_type n)
|
|||
|
||||
if ( r != 0 )
|
||||
{
|
||||
for (size_type i = last - div; i > 0; --i)
|
||||
for ( size_type i = last - div; i > 0; --i )
|
||||
b[i - div] = (b[i] >> r) | (b[i + 1] << (bits_per_block - r));
|
||||
|
||||
b[last - div] = b[last] >> r;
|
||||
|
@ -212,8 +209,8 @@ BitVector& BitVector::operator>>=(size_type n)
|
|||
|
||||
else
|
||||
{
|
||||
for (size_type i = div; i <= last; ++i)
|
||||
b[i-div] = b[i];
|
||||
for ( size_type i = div; i <= last; ++i )
|
||||
b[i - div] = b[i];
|
||||
}
|
||||
|
||||
std::fill_n(b + (Blocks() - div), div, block_type(0));
|
||||
|
@ -309,7 +306,6 @@ bool operator<(BitVector const& x, BitVector const& y)
|
|||
|
||||
else if ( x.bits[i] > y.bits[i] )
|
||||
return false;
|
||||
|
||||
}
|
||||
|
||||
return false;
|
||||
|
@ -404,7 +400,7 @@ BitVector& BitVector::Flip(size_type i)
|
|||
|
||||
BitVector& BitVector::Flip()
|
||||
{
|
||||
for (size_type i = 0; i < Blocks(); ++i)
|
||||
for ( size_type i = 0; i < Blocks(); ++i )
|
||||
bits[i] = ~bits[i];
|
||||
|
||||
zero_unused_bits();
|
||||
|
@ -549,7 +545,7 @@ BitVector::size_type BitVector::lowest_bit(block_type block)
|
|||
block_type x = block - (block & (block - 1));
|
||||
size_type log = 0;
|
||||
|
||||
while (x >>= 1)
|
||||
while ( x >>= 1 )
|
||||
++log;
|
||||
|
||||
return log;
|
||||
|
@ -568,7 +564,7 @@ void BitVector::zero_unused_bits()
|
|||
|
||||
BitVector::size_type BitVector::find_from(size_type i) const
|
||||
{
|
||||
while (i < Blocks() && bits[i] == 0)
|
||||
while ( i < Blocks() && bits[i] == 0 )
|
||||
++i;
|
||||
|
||||
if ( i >= Blocks() )
|
||||
|
@ -577,4 +573,4 @@ BitVector::size_type BitVector::find_from(size_type i) const
|
|||
return i * bits_per_block + lowest_bit(bits[i]);
|
||||
}
|
||||
|
||||
} // namespace zeek::probabilistic::detail
|
||||
} // namespace zeek::probabilistic::detail
|
||||
|
|
|
@ -2,20 +2,24 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <broker/expected.hh>
|
||||
#include <iterator>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include <broker/expected.hh>
|
||||
namespace broker
|
||||
{
|
||||
class data;
|
||||
}
|
||||
|
||||
namespace broker { class data; }
|
||||
|
||||
namespace zeek::probabilistic::detail {
|
||||
namespace zeek::probabilistic::detail
|
||||
{
|
||||
|
||||
/**
|
||||
* A vector of bits.
|
||||
*/
|
||||
class BitVector {
|
||||
class BitVector
|
||||
{
|
||||
public:
|
||||
typedef uint64_t block_type;
|
||||
typedef size_t size_type;
|
||||
|
@ -27,7 +31,8 @@ public:
|
|||
/**
|
||||
* An lvalue proxy for individual bits.
|
||||
*/
|
||||
class Reference {
|
||||
class Reference
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Inverts the bits' values.
|
||||
|
@ -51,7 +56,7 @@ public:
|
|||
|
||||
block_type& block;
|
||||
const block_type mask;
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* Default-constructs an empty bit vector.
|
||||
|
@ -72,8 +77,7 @@ public:
|
|||
* @param last End of range.
|
||||
*
|
||||
*/
|
||||
template <typename InputIterator>
|
||||
BitVector(InputIterator first, InputIterator last)
|
||||
template <typename InputIterator> BitVector(InputIterator first, InputIterator last)
|
||||
{
|
||||
bits.insert(bits.end(), first, last);
|
||||
num_bits = bits.size() * bits_per_block;
|
||||
|
@ -125,8 +129,7 @@ public:
|
|||
* @param last An iterator pointing to one past the last element of the
|
||||
* sequence.
|
||||
*/
|
||||
template <typename ForwardIterator>
|
||||
void Append(ForwardIterator first, ForwardIterator last)
|
||||
template <typename ForwardIterator> void Append(ForwardIterator first, ForwardIterator last)
|
||||
{
|
||||
if ( first == last )
|
||||
return;
|
||||
|
@ -141,11 +144,11 @@ public:
|
|||
{
|
||||
bits.back() |= (*first << excess);
|
||||
|
||||
do {
|
||||
do
|
||||
{
|
||||
block_type b = *first++ >> (bits_per_block - excess);
|
||||
bits.push_back(b | (first == last ? 0 : *first << excess));
|
||||
} while (first != last);
|
||||
|
||||
} while ( first != last );
|
||||
}
|
||||
|
||||
else
|
||||
|
@ -279,10 +282,10 @@ public:
|
|||
size_type FindNext(size_type i) const;
|
||||
|
||||
/** Computes a hash value of the internal representation.
|
||||
* This is mainly for debugging/testing purposes.
|
||||
*
|
||||
* @return The hash.
|
||||
*/
|
||||
* This is mainly for debugging/testing purposes.
|
||||
*
|
||||
* @return The hash.
|
||||
*/
|
||||
uint64_t Hash() const;
|
||||
|
||||
broker::expected<broker::data> Serialize() const;
|
||||
|
@ -312,26 +315,17 @@ private:
|
|||
/**
|
||||
* Computes the block index for a given bit position.
|
||||
*/
|
||||
static size_type block_index(size_type i)
|
||||
{
|
||||
return i / bits_per_block;
|
||||
}
|
||||
static size_type block_index(size_type i) { return i / bits_per_block; }
|
||||
|
||||
/**
|
||||
* Computes the bit index within a given block for a given bit position.
|
||||
*/
|
||||
static block_type bit_index(size_type i)
|
||||
{
|
||||
return i % bits_per_block;
|
||||
}
|
||||
static block_type bit_index(size_type i) { return i % bits_per_block; }
|
||||
|
||||
/**
|
||||
* Computes the bitmask block to extract a bit a given bit position.
|
||||
*/
|
||||
static block_type bit_mask(size_type i)
|
||||
{
|
||||
return block_type(1) << bit_index(i);
|
||||
}
|
||||
static block_type bit_mask(size_type i) { return block_type(1) << bit_index(i); }
|
||||
|
||||
/**
|
||||
* Computes the number of blocks needed to represent a given number of
|
||||
|
@ -341,8 +335,7 @@ private:
|
|||
*/
|
||||
static size_type bits_to_blocks(size_type bits)
|
||||
{
|
||||
return bits / bits_per_block
|
||||
+ static_cast<size_type>(bits % bits_per_block != 0);
|
||||
return bits / bits_per_block + static_cast<size_type>(bits % bits_per_block != 0);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -354,6 +347,6 @@ private:
|
|||
|
||||
std::vector<block_type> bits;
|
||||
size_type num_bits;
|
||||
};
|
||||
};
|
||||
|
||||
} // namespace zeek::probabilistic::detail
|
||||
} // namespace zeek::probabilistic::detail
|
||||
|
|
|
@ -2,17 +2,17 @@
|
|||
|
||||
#include "zeek/probabilistic/BloomFilter.h"
|
||||
|
||||
#include <broker/data.hh>
|
||||
#include <broker/error.hh>
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
|
||||
#include <broker/data.hh>
|
||||
#include <broker/error.hh>
|
||||
|
||||
#include "zeek/Reporter.h"
|
||||
#include "zeek/probabilistic/CounterVector.h"
|
||||
#include "zeek/util.h"
|
||||
#include "zeek/Reporter.h"
|
||||
|
||||
namespace zeek::probabilistic {
|
||||
namespace zeek::probabilistic
|
||||
{
|
||||
|
||||
BloomFilter::BloomFilter()
|
||||
{
|
||||
|
@ -61,15 +61,16 @@ std::unique_ptr<BloomFilter> BloomFilter::Unserialize(const broker::data& data)
|
|||
|
||||
std::unique_ptr<BloomFilter> bf;
|
||||
|
||||
switch ( *type ) {
|
||||
case Basic:
|
||||
bf = std::unique_ptr<BloomFilter>(new BasicBloomFilter());
|
||||
break;
|
||||
switch ( *type )
|
||||
{
|
||||
case Basic:
|
||||
bf = std::unique_ptr<BloomFilter>(new BasicBloomFilter());
|
||||
break;
|
||||
|
||||
case Counting:
|
||||
bf = std::unique_ptr<BloomFilter>(new CountingBloomFilter());
|
||||
break;
|
||||
}
|
||||
case Counting:
|
||||
bf = std::unique_ptr<BloomFilter>(new CountingBloomFilter());
|
||||
break;
|
||||
}
|
||||
|
||||
if ( ! bf->DoUnserialize((*v)[2]) )
|
||||
return nullptr;
|
||||
|
@ -144,8 +145,7 @@ BasicBloomFilter::BasicBloomFilter()
|
|||
bits = nullptr;
|
||||
}
|
||||
|
||||
BasicBloomFilter::BasicBloomFilter(const detail::Hasher* hasher, size_t cells)
|
||||
: BloomFilter(hasher)
|
||||
BasicBloomFilter::BasicBloomFilter(const detail::Hasher* hasher, size_t cells) : BloomFilter(hasher)
|
||||
{
|
||||
bits = new detail::BitVector(cells);
|
||||
}
|
||||
|
@ -197,8 +197,8 @@ CountingBloomFilter::CountingBloomFilter()
|
|||
cells = nullptr;
|
||||
}
|
||||
|
||||
CountingBloomFilter::CountingBloomFilter(const detail::Hasher* hasher,
|
||||
size_t arg_cells, size_t width)
|
||||
CountingBloomFilter::CountingBloomFilter(const detail::Hasher* hasher, size_t arg_cells,
|
||||
size_t width)
|
||||
: BloomFilter(hasher)
|
||||
{
|
||||
cells = new detail::CounterVector(width, arg_cells);
|
||||
|
@ -277,7 +277,7 @@ size_t CountingBloomFilter::Count(const zeek::detail::HashKey* key) const
|
|||
for ( size_t i = 0; i < h.size(); ++i )
|
||||
{
|
||||
detail::CounterVector::size_type cnt = cells->Count(h[i] % cells->Size());
|
||||
if ( cnt < min )
|
||||
if ( cnt < min )
|
||||
min = cnt;
|
||||
}
|
||||
|
||||
|
@ -300,4 +300,4 @@ bool CountingBloomFilter::DoUnserialize(const broker::data& data)
|
|||
return true;
|
||||
}
|
||||
|
||||
} // namespace zeek::probabilistic
|
||||
} // namespace zeek::probabilistic
|
||||
|
|
|
@ -2,29 +2,39 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "zeek/zeek-config.h"
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include <broker/expected.hh>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "zeek/probabilistic/BitVector.h"
|
||||
#include "zeek/probabilistic/Hasher.h"
|
||||
#include "zeek/zeek-config.h"
|
||||
|
||||
namespace broker { class data; }
|
||||
namespace broker
|
||||
{
|
||||
class data;
|
||||
}
|
||||
|
||||
namespace zeek::probabilistic {
|
||||
namespace detail { class CounterVector; }
|
||||
namespace zeek::probabilistic
|
||||
{
|
||||
namespace detail
|
||||
{
|
||||
class CounterVector;
|
||||
}
|
||||
|
||||
/** Types of derived BloomFilter classes. */
|
||||
enum BloomFilterType { Basic, Counting };
|
||||
enum BloomFilterType
|
||||
{
|
||||
Basic,
|
||||
Counting
|
||||
};
|
||||
|
||||
/**
|
||||
* The abstract base class for Bloom filters.
|
||||
*/
|
||||
class BloomFilter {
|
||||
class BloomFilter
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Destructor.
|
||||
|
@ -102,12 +112,13 @@ protected:
|
|||
virtual BloomFilterType Type() const = 0;
|
||||
|
||||
const detail::Hasher* hasher;
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* A basic Bloom filter.
|
||||
*/
|
||||
class BasicBloomFilter : public BloomFilter {
|
||||
class BasicBloomFilter : public BloomFilter
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Constructs a basic Bloom filter with a given number of cells. The
|
||||
|
@ -173,17 +184,17 @@ protected:
|
|||
size_t Count(const zeek::detail::HashKey* key) const override;
|
||||
broker::expected<broker::data> DoSerialize() const override;
|
||||
bool DoUnserialize(const broker::data& data) override;
|
||||
BloomFilterType Type() const override
|
||||
{ return BloomFilterType::Basic; }
|
||||
BloomFilterType Type() const override { return BloomFilterType::Basic; }
|
||||
|
||||
private:
|
||||
detail::BitVector* bits;
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* A counting Bloom filter.
|
||||
*/
|
||||
class CountingBloomFilter : public BloomFilter {
|
||||
class CountingBloomFilter : public BloomFilter
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Constructs a counting Bloom filter.
|
||||
|
@ -222,11 +233,10 @@ protected:
|
|||
size_t Count(const zeek::detail::HashKey* key) const override;
|
||||
broker::expected<broker::data> DoSerialize() const override;
|
||||
bool DoUnserialize(const broker::data& data) override;
|
||||
BloomFilterType Type() const override
|
||||
{ return BloomFilterType::Counting; }
|
||||
BloomFilterType Type() const override { return BloomFilterType::Counting; }
|
||||
|
||||
private:
|
||||
detail::CounterVector* cells;
|
||||
};
|
||||
};
|
||||
|
||||
} // namespace zeek::probabilistic
|
||||
} // namespace zeek::probabilistic
|
||||
|
|
|
@ -2,30 +2,31 @@
|
|||
|
||||
#include "zeek/probabilistic/CardinalityCounter.h"
|
||||
|
||||
#include <broker/data.hh>
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include <utility>
|
||||
|
||||
#include <broker/data.hh>
|
||||
|
||||
#include "zeek/Reporter.h"
|
||||
|
||||
namespace zeek::probabilistic::detail {
|
||||
namespace zeek::probabilistic::detail
|
||||
{
|
||||
|
||||
int CardinalityCounter::OptimalB(double error, double confidence) const
|
||||
{
|
||||
double initial_estimate = 2 * (log(1.04) - log(error)) / log(2);
|
||||
int answer = (int) floor(initial_estimate);
|
||||
int answer = (int)floor(initial_estimate);
|
||||
|
||||
// k is the number of standard deviations that we have to go to have
|
||||
// a confidence level of conf.
|
||||
|
||||
double k = 0;
|
||||
|
||||
do {
|
||||
do
|
||||
{
|
||||
answer++;
|
||||
k = pow(2, (answer - initial_estimate) / 2);
|
||||
} while ( erf(k / sqrt(2)) < confidence );
|
||||
} while ( erf(k / sqrt(2)) < confidence );
|
||||
|
||||
return answer;
|
||||
}
|
||||
|
@ -50,11 +51,13 @@ void CardinalityCounter::Init(uint64_t size)
|
|||
alpha_m = 0.7213 / (1 + 1.079 / m);
|
||||
|
||||
else
|
||||
reporter->InternalError("Invalid size %" PRIu64 ". Size either has to be 16, 32, 64 or bigger than 128", size);
|
||||
reporter->InternalError(
|
||||
"Invalid size %" PRIu64 ". Size either has to be 16, 32, 64 or bigger than 128", size);
|
||||
|
||||
double calc_p = log2(m);
|
||||
if ( trunc(calc_p) != calc_p )
|
||||
reporter->InternalError("Invalid size %" PRIu64 ". Size either has to be a power of 2", size);
|
||||
reporter->InternalError("Invalid size %" PRIu64 ". Size either has to be a power of 2",
|
||||
size);
|
||||
|
||||
p = calc_p;
|
||||
|
||||
|
@ -67,8 +70,7 @@ void CardinalityCounter::Init(uint64_t size)
|
|||
V = m;
|
||||
}
|
||||
|
||||
CardinalityCounter::CardinalityCounter(CardinalityCounter& other)
|
||||
: buckets(other.buckets)
|
||||
CardinalityCounter::CardinalityCounter(CardinalityCounter& other) : buckets(other.buckets)
|
||||
{
|
||||
V = other.V;
|
||||
alpha_m = other.alpha_m;
|
||||
|
@ -90,7 +92,7 @@ CardinalityCounter::CardinalityCounter(CardinalityCounter&& o) noexcept
|
|||
CardinalityCounter::CardinalityCounter(double error_margin, double confidence)
|
||||
{
|
||||
int b = OptimalB(error_margin, confidence);
|
||||
Init((uint64_t) pow(2, b));
|
||||
Init((uint64_t)pow(2, b));
|
||||
|
||||
assert(b == p);
|
||||
}
|
||||
|
@ -113,9 +115,7 @@ CardinalityCounter::CardinalityCounter(uint64_t arg_size, uint64_t arg_V, double
|
|||
p = log2(m);
|
||||
}
|
||||
|
||||
CardinalityCounter::~CardinalityCounter()
|
||||
{
|
||||
}
|
||||
CardinalityCounter::~CardinalityCounter() { }
|
||||
|
||||
uint8_t CardinalityCounter::Rank(uint64_t hash_modified) const
|
||||
{
|
||||
|
@ -129,9 +129,9 @@ uint8_t CardinalityCounter::Rank(uint64_t hash_modified) const
|
|||
void CardinalityCounter::AddElement(uint64_t hash)
|
||||
{
|
||||
uint64_t index = hash % m;
|
||||
hash = hash-index;
|
||||
hash = hash - index;
|
||||
|
||||
if( buckets[index] == 0 )
|
||||
if ( buckets[index] == 0 )
|
||||
V--;
|
||||
|
||||
uint8_t temp = Rank(hash);
|
||||
|
@ -158,7 +158,7 @@ double CardinalityCounter::Size() const
|
|||
answer = 1 / answer;
|
||||
answer = (alpha_m * m * m * answer);
|
||||
|
||||
if ( answer <= 5.0 * (m/2) )
|
||||
if ( answer <= 5.0 * (m / 2) )
|
||||
return m * log(((double)m) / V);
|
||||
|
||||
else if ( answer <= (pow(2, 64) / 30) )
|
||||
|
@ -189,7 +189,7 @@ bool CardinalityCounter::Merge(CardinalityCounter* c)
|
|||
return true;
|
||||
}
|
||||
|
||||
const std::vector<uint8_t> &CardinalityCounter::GetBuckets() const
|
||||
const std::vector<uint8_t>& CardinalityCounter::GetBuckets() const
|
||||
{
|
||||
return buckets;
|
||||
}
|
||||
|
@ -228,7 +228,7 @@ std::unique_ptr<CardinalityCounter> CardinalityCounter::Unserialize(const broker
|
|||
auto cc = std::unique_ptr<CardinalityCounter>(new CardinalityCounter(*m, *V, *alpha_m));
|
||||
if ( *m != cc->m )
|
||||
return nullptr;
|
||||
if ( cc->buckets.size() != * m )
|
||||
if ( cc->buckets.size() != *m )
|
||||
return nullptr;
|
||||
|
||||
for ( size_t i = 0; i < *m; ++i )
|
||||
|
@ -283,11 +283,11 @@ int CardinalityCounter::flsll(uint64_t mask)
|
|||
{
|
||||
int bit;
|
||||
|
||||
if (mask == 0)
|
||||
if ( mask == 0 )
|
||||
return (0);
|
||||
for (bit = 1; mask != 1; bit++)
|
||||
for ( bit = 1; mask != 1; bit++ )
|
||||
mask = (uint64_t)mask >> 1;
|
||||
return (bit);
|
||||
}
|
||||
|
||||
} // namespace zeek::probabilistic::detail
|
||||
} // namespace zeek::probabilistic::detail
|
||||
|
|
|
@ -2,21 +2,24 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <broker/expected.hh>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include <broker/expected.hh>
|
||||
namespace broker
|
||||
{
|
||||
class data;
|
||||
}
|
||||
|
||||
namespace broker { class data; }
|
||||
|
||||
namespace zeek::probabilistic::detail {
|
||||
namespace zeek::probabilistic::detail
|
||||
{
|
||||
|
||||
/**
|
||||
* A probabilistic cardinality counter using the HyperLogLog algorithm.
|
||||
*/
|
||||
class CardinalityCounter {
|
||||
class CardinalityCounter
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Constructor.
|
||||
|
@ -184,6 +187,6 @@ private:
|
|||
uint64_t V;
|
||||
double alpha_m;
|
||||
int p; // the log2 of m
|
||||
};
|
||||
};
|
||||
|
||||
} // namespace zeek::probabilistic::detail
|
||||
} // namespace zeek::probabilistic::detail
|
||||
|
|
|
@ -2,16 +2,16 @@
|
|||
|
||||
#include "zeek/probabilistic/CounterVector.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <limits>
|
||||
|
||||
#include <broker/data.hh>
|
||||
#include <broker/error.hh>
|
||||
#include <cassert>
|
||||
#include <limits>
|
||||
|
||||
#include "zeek/probabilistic/BitVector.h"
|
||||
#include "zeek/util.h"
|
||||
|
||||
namespace zeek::probabilistic::detail {
|
||||
namespace zeek::probabilistic::detail
|
||||
{
|
||||
|
||||
CounterVector::CounterVector(size_t arg_width, size_t cells)
|
||||
{
|
||||
|
@ -43,7 +43,7 @@ bool CounterVector::Increment(size_type cell, count_type value)
|
|||
bool b1 = (*bits)[lsb + i];
|
||||
bool b2 = value & (1 << i);
|
||||
(*bits)[lsb + i] = b1 ^ b2 ^ carry;
|
||||
carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) );
|
||||
carry = (b1 && b2) || (carry && (b1 != b2));
|
||||
}
|
||||
|
||||
if ( carry )
|
||||
|
@ -69,7 +69,7 @@ bool CounterVector::Decrement(size_type cell, count_type value)
|
|||
bool b1 = (*bits)[lsb + i];
|
||||
bool b2 = value & (1 << i);
|
||||
(*bits)[lsb + i] = b1 ^ b2 ^ carry;
|
||||
carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) );
|
||||
carry = (b1 && b2) || (carry && (b1 != b2));
|
||||
}
|
||||
|
||||
return carry;
|
||||
|
@ -111,8 +111,7 @@ size_t CounterVector::Width() const
|
|||
|
||||
size_t CounterVector::Max() const
|
||||
{
|
||||
return std::numeric_limits<size_t>::max()
|
||||
>> (std::numeric_limits<size_t>::digits - width);
|
||||
return std::numeric_limits<size_t>::max() >> (std::numeric_limits<size_t>::digits - width);
|
||||
}
|
||||
|
||||
CounterVector& CounterVector::Merge(const CounterVector& other)
|
||||
|
@ -130,7 +129,7 @@ CounterVector& CounterVector::Merge(const CounterVector& other)
|
|||
bool b1 = (*bits)[lsb + i];
|
||||
bool b2 = (*other.bits)[lsb + i];
|
||||
(*bits)[lsb + i] = b1 ^ b2 ^ carry;
|
||||
carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) );
|
||||
carry = (b1 && b2) || (carry && (b1 != b2));
|
||||
}
|
||||
|
||||
if ( carry )
|
||||
|
@ -186,4 +185,4 @@ std::unique_ptr<CounterVector> CounterVector::Unserialize(const broker::data& da
|
|||
return cv;
|
||||
}
|
||||
|
||||
} // namespace zeek::probabilistic::detail
|
||||
} // namespace zeek::probabilistic::detail
|
||||
|
|
|
@ -2,24 +2,28 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "zeek/zeek-config.h"
|
||||
|
||||
#include <broker/expected.hh>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
|
||||
#include <broker/expected.hh>
|
||||
#include "zeek/zeek-config.h"
|
||||
|
||||
namespace broker { class data; }
|
||||
namespace broker
|
||||
{
|
||||
class data;
|
||||
}
|
||||
|
||||
namespace zeek::probabilistic::detail {
|
||||
namespace zeek::probabilistic::detail
|
||||
{
|
||||
|
||||
class BitVector;
|
||||
|
||||
/**
|
||||
* A vector of counters, each of which has a fixed number of bits.
|
||||
*/
|
||||
class CounterVector {
|
||||
class CounterVector
|
||||
{
|
||||
public:
|
||||
typedef size_t size_type;
|
||||
typedef uint64_t count_type;
|
||||
|
@ -134,18 +138,17 @@ public:
|
|||
CounterVector& operator|=(const CounterVector& other);
|
||||
|
||||
/** Computes a hash value of the internal representation.
|
||||
* This is mainly for debugging/testing purposes.
|
||||
*
|
||||
* @return The hash.
|
||||
*/
|
||||
* This is mainly for debugging/testing purposes.
|
||||
*
|
||||
* @return The hash.
|
||||
*/
|
||||
uint64_t Hash() const;
|
||||
|
||||
broker::expected<broker::data> Serialize() const;
|
||||
static std::unique_ptr<CounterVector> Unserialize(const broker::data& data);
|
||||
|
||||
protected:
|
||||
friend CounterVector operator|(const CounterVector& x,
|
||||
const CounterVector& y);
|
||||
friend CounterVector operator|(const CounterVector& x, const CounterVector& y);
|
||||
|
||||
CounterVector() { }
|
||||
|
||||
|
@ -154,6 +157,6 @@ private:
|
|||
|
||||
BitVector* bits;
|
||||
size_t width;
|
||||
};
|
||||
};
|
||||
|
||||
} // namespace zeek::probabilistic::detail
|
||||
} // namespace zeek::probabilistic::detail
|
||||
|
|
|
@ -2,16 +2,17 @@
|
|||
|
||||
#include "zeek/probabilistic/Hasher.h"
|
||||
|
||||
#include <typeinfo>
|
||||
#include <openssl/evp.h>
|
||||
#include <broker/data.hh>
|
||||
#include <highwayhash/sip_hash.h>
|
||||
#include <openssl/evp.h>
|
||||
#include <typeinfo>
|
||||
|
||||
#include "zeek/NetVar.h"
|
||||
#include "zeek/Var.h"
|
||||
#include "zeek/digest.h"
|
||||
|
||||
namespace zeek::probabilistic::detail {
|
||||
namespace zeek::probabilistic::detail
|
||||
{
|
||||
|
||||
Hasher::seed_t Hasher::MakeSeed(const void* data, size_t size)
|
||||
{
|
||||
|
@ -53,9 +54,8 @@ Hasher::Hasher(size_t arg_k, seed_t arg_seed)
|
|||
|
||||
broker::expected<broker::data> Hasher::Serialize() const
|
||||
{
|
||||
return {broker::vector{
|
||||
static_cast<uint64_t>(Type()), static_cast<uint64_t>(k),
|
||||
seed.h[0], seed.h[1] }};
|
||||
return {broker::vector{static_cast<uint64_t>(Type()), static_cast<uint64_t>(k), seed.h[0],
|
||||
seed.h[1]}};
|
||||
}
|
||||
|
||||
std::unique_ptr<Hasher> Hasher::Unserialize(const broker::data& data)
|
||||
|
@ -75,15 +75,16 @@ std::unique_ptr<Hasher> Hasher::Unserialize(const broker::data& data)
|
|||
|
||||
std::unique_ptr<Hasher> hasher;
|
||||
|
||||
switch ( *type ) {
|
||||
case Default:
|
||||
hasher = std::unique_ptr<Hasher>(new DefaultHasher(*k, {*h1, *h2}));
|
||||
break;
|
||||
switch ( *type )
|
||||
{
|
||||
case Default:
|
||||
hasher = std::unique_ptr<Hasher>(new DefaultHasher(*k, {*h1, *h2}));
|
||||
break;
|
||||
|
||||
case Double:
|
||||
hasher = std::unique_ptr<Hasher>(new DoubleHasher(*k, {*h1, *h2}));
|
||||
break;
|
||||
}
|
||||
case Double:
|
||||
hasher = std::unique_ptr<Hasher>(new DoubleHasher(*k, {*h1, *h2}));
|
||||
break;
|
||||
}
|
||||
|
||||
// Note that the derived classed don't hold any further state of
|
||||
// their own. They reconstruct all their information from their
|
||||
|
@ -107,12 +108,12 @@ UHF::UHF(Hasher::seed_t arg_seed)
|
|||
// times.
|
||||
Hasher::digest UHF::hash(const void* x, size_t n) const
|
||||
{
|
||||
static_assert(std::is_same<highwayhash::SipHashState::Key, decltype(seed.h)>::value, "Seed value is not the same type as highwayhash key");
|
||||
static_assert(std::is_same<highwayhash::SipHashState::Key, decltype(seed.h)>::value,
|
||||
"Seed value is not the same type as highwayhash key");
|
||||
return highwayhash::SipHash(seed.h, reinterpret_cast<const char*>(x), n);
|
||||
}
|
||||
|
||||
DefaultHasher::DefaultHasher(size_t k, Hasher::seed_t seed)
|
||||
: Hasher(k, seed)
|
||||
DefaultHasher::DefaultHasher(size_t k, Hasher::seed_t seed) : Hasher(k, seed)
|
||||
{
|
||||
for ( size_t i = 1; i <= k; ++i )
|
||||
{
|
||||
|
@ -177,4 +178,4 @@ bool DoubleHasher::Equals(const Hasher* other) const
|
|||
return h1 == o->h1 && h2 == o->h2;
|
||||
}
|
||||
|
||||
} // namespace zeek::probabilistic::detail
|
||||
} // namespace zeek::probabilistic::detail
|
||||
|
|
|
@ -2,35 +2,46 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <broker/expected.hh>
|
||||
#include <memory>
|
||||
|
||||
#include "zeek/Hash.h"
|
||||
|
||||
namespace broker { class data; }
|
||||
namespace broker
|
||||
{
|
||||
class data;
|
||||
}
|
||||
|
||||
namespace zeek::probabilistic::detail {
|
||||
namespace zeek::probabilistic::detail
|
||||
{
|
||||
|
||||
/** Types of derived Hasher classes. */
|
||||
enum HasherType { Default, Double };
|
||||
enum HasherType
|
||||
{
|
||||
Default,
|
||||
Double
|
||||
};
|
||||
|
||||
/**
|
||||
* Abstract base class for hashers. A hasher creates a family of hash
|
||||
* functions to hash an element *k* times.
|
||||
*/
|
||||
class Hasher {
|
||||
class Hasher
|
||||
{
|
||||
public:
|
||||
typedef zeek::detail::hash_t digest;
|
||||
typedef std::vector<digest> digest_vector;
|
||||
struct seed_t {
|
||||
struct seed_t
|
||||
{
|
||||
// actually HH_U64, which has the same type
|
||||
alignas(16) unsigned long long h[2];
|
||||
|
||||
friend seed_t operator+(seed_t lhs, const uint64_t rhs) {
|
||||
friend seed_t operator+(seed_t lhs, const uint64_t rhs)
|
||||
{
|
||||
lhs.h[0] += rhs;
|
||||
return lhs;
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Creates a valid hasher seed from an arbitrary string.
|
||||
|
@ -58,11 +69,7 @@ public:
|
|||
*
|
||||
* @return Vector of *k* hash values.
|
||||
*/
|
||||
template <typename T>
|
||||
digest_vector operator()(const T& x) const
|
||||
{
|
||||
return Hash(&x, sizeof(T));
|
||||
}
|
||||
template <typename T> digest_vector operator()(const T& x) const { return Hash(&x, sizeof(T)); }
|
||||
|
||||
/**
|
||||
* Computes hash values for an element.
|
||||
|
@ -98,12 +105,12 @@ public:
|
|||
/**
|
||||
* Returns the number *k* of hash functions the hashers applies.
|
||||
*/
|
||||
size_t K() const { return k; }
|
||||
size_t K() const { return k; }
|
||||
|
||||
/**
|
||||
* Returns the seed used to construct the hasher.
|
||||
*/
|
||||
seed_t Seed() const { return seed; }
|
||||
seed_t Seed() const { return seed; }
|
||||
|
||||
broker::expected<broker::data> Serialize() const;
|
||||
static std::unique_ptr<Hasher> Unserialize(const broker::data& data);
|
||||
|
@ -125,13 +132,14 @@ protected:
|
|||
private:
|
||||
size_t k;
|
||||
seed_t seed;
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* A universal hash function family. This is a helper class that Hasher
|
||||
* implementations can use in their implementation.
|
||||
*/
|
||||
class UHF {
|
||||
class UHF
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Default constructor with zero seed.
|
||||
|
@ -146,8 +154,7 @@ public:
|
|||
*/
|
||||
explicit UHF(Hasher::seed_t arg_seed);
|
||||
|
||||
template <typename T>
|
||||
Hasher::digest operator()(const T& x) const
|
||||
template <typename T> Hasher::digest operator()(const T& x) const
|
||||
{
|
||||
return hash(&x, sizeof(T));
|
||||
}
|
||||
|
@ -159,10 +166,7 @@ public:
|
|||
*
|
||||
* @return Vector of *k* hash values.
|
||||
*/
|
||||
Hasher::digest operator()(const void* x, size_t n) const
|
||||
{
|
||||
return hash(x, n);
|
||||
}
|
||||
Hasher::digest operator()(const void* x, size_t n) const { return hash(x, n); }
|
||||
|
||||
/**
|
||||
* Computes the hashes for a set of bytes.
|
||||
|
@ -178,14 +182,10 @@ public:
|
|||
|
||||
friend bool operator==(const UHF& x, const UHF& y)
|
||||
{
|
||||
return (x.seed.h[0] == y.seed.h[0]) &&
|
||||
(x.seed.h[1] == y.seed.h[1]);
|
||||
return (x.seed.h[0] == y.seed.h[0]) && (x.seed.h[1] == y.seed.h[1]);
|
||||
}
|
||||
|
||||
friend bool operator!=(const UHF& x, const UHF& y)
|
||||
{
|
||||
return ! (x == y);
|
||||
}
|
||||
friend bool operator!=(const UHF& x, const UHF& y) { return ! (x == y); }
|
||||
|
||||
broker::expected<broker::data> Serialize() const;
|
||||
static UHF Unserialize(const broker::data& data);
|
||||
|
@ -194,14 +194,14 @@ private:
|
|||
static size_t compute_seed(Hasher::seed_t seed);
|
||||
|
||||
Hasher::seed_t seed;
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
/**
|
||||
* A hasher implementing the default hashing policy. Uses *k* separate hash
|
||||
* functions internally.
|
||||
*/
|
||||
class DefaultHasher : public Hasher {
|
||||
class DefaultHasher : public Hasher
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Constructor for a hasher with *k* hash functions.
|
||||
|
@ -220,17 +220,17 @@ public:
|
|||
private:
|
||||
DefaultHasher() { }
|
||||
|
||||
HasherType Type() const override
|
||||
{ return HasherType::Default; }
|
||||
HasherType Type() const override { return HasherType::Default; }
|
||||
|
||||
std::vector<UHF> hash_functions;
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* The *double-hashing* policy. Uses a linear combination of two hash
|
||||
* functions.
|
||||
*/
|
||||
class DoubleHasher : public Hasher {
|
||||
class DoubleHasher : public Hasher
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Constructor for a double hasher with *k* hash functions.
|
||||
|
@ -249,11 +249,10 @@ public:
|
|||
private:
|
||||
DoubleHasher() { }
|
||||
|
||||
HasherType Type() const override
|
||||
{ return HasherType::Double; }
|
||||
HasherType Type() const override { return HasherType::Double; }
|
||||
|
||||
UHF h1;
|
||||
UHF h2;
|
||||
};
|
||||
};
|
||||
|
||||
} // namespace zeek::probabilistic::detail
|
||||
} // namespace zeek::probabilistic::detail
|
||||
|
|
|
@ -4,22 +4,23 @@
|
|||
|
||||
#include <broker/error.hh>
|
||||
|
||||
#include "zeek/broker/Data.h"
|
||||
#include "zeek/CompHash.h"
|
||||
#include "zeek/Reporter.h"
|
||||
#include "zeek/Dict.h"
|
||||
#include "zeek/Reporter.h"
|
||||
#include "zeek/broker/Data.h"
|
||||
|
||||
namespace zeek::probabilistic::detail {
|
||||
namespace zeek::probabilistic::detail
|
||||
{
|
||||
|
||||
static void topk_element_hash_delete_func(void* val)
|
||||
{
|
||||
Element* e = (Element*) val;
|
||||
Element* e = (Element*)val;
|
||||
delete e;
|
||||
}
|
||||
|
||||
void TopkVal::Typify(TypePtr t)
|
||||
{
|
||||
assert(!hash && !type);
|
||||
assert(! hash && ! type);
|
||||
type = std::move(t);
|
||||
auto tl = make_intrusive<TypeList>(type);
|
||||
tl->Append(type);
|
||||
|
@ -104,7 +105,7 @@ void TopkVal::Merge(const TopkVal* value, bool doPrune)
|
|||
Element* e = *eit;
|
||||
// lookup if we already know this one...
|
||||
zeek::detail::HashKey* key = GetHash(e->value);
|
||||
Element* olde = (Element*) elementDict->Lookup(key);
|
||||
Element* olde = (Element*)elementDict->Lookup(key);
|
||||
|
||||
if ( olde == nullptr )
|
||||
{
|
||||
|
@ -114,7 +115,7 @@ void TopkVal::Merge(const TopkVal* value, bool doPrune)
|
|||
// insert at bucket position 0
|
||||
if ( buckets.size() > 0 )
|
||||
{
|
||||
assert (buckets.front()-> count > 0 );
|
||||
assert(buckets.front()->count > 0);
|
||||
}
|
||||
|
||||
Bucket* newbucket = new Bucket();
|
||||
|
@ -126,7 +127,6 @@ void TopkVal::Merge(const TopkVal* value, bool doPrune)
|
|||
|
||||
elementDict->Insert(key, olde);
|
||||
numElements++;
|
||||
|
||||
}
|
||||
|
||||
// now that we are sure that the old element is present - increment epsilon
|
||||
|
@ -153,7 +153,7 @@ void TopkVal::Merge(const TopkVal* value, bool doPrune)
|
|||
while ( numElements > size )
|
||||
{
|
||||
pruned = true;
|
||||
assert(buckets.size() > 0 );
|
||||
assert(buckets.size() > 0);
|
||||
Bucket* b = buckets.front();
|
||||
assert(b->elements.size() > 0);
|
||||
|
||||
|
@ -199,13 +199,13 @@ VectorValPtr TopkVal::GetTopK(int k) const // returns vector
|
|||
int read = 0;
|
||||
std::list<Bucket*>::const_iterator it = buckets.end();
|
||||
it--;
|
||||
while (read < k )
|
||||
while ( read < k )
|
||||
{
|
||||
//printf("Bucket %llu\n", (*it)->count);
|
||||
// printf("Bucket %llu\n", (*it)->count);
|
||||
std::list<Element*>::iterator eit = (*it)->elements.begin();
|
||||
while ( eit != (*it)->elements.end() )
|
||||
{
|
||||
//printf("Size: %ld\n", (*it)->elements.size());
|
||||
// printf("Size: %ld\n", (*it)->elements.size());
|
||||
t->Assign(read, (*eit)->value);
|
||||
read++;
|
||||
eit++;
|
||||
|
@ -223,7 +223,7 @@ VectorValPtr TopkVal::GetTopK(int k) const // returns vector
|
|||
uint64_t TopkVal::GetCount(Val* value) const
|
||||
{
|
||||
zeek::detail::HashKey* key = GetHash(value);
|
||||
Element* e = (Element*) elementDict->Lookup(key);
|
||||
Element* e = (Element*)elementDict->Lookup(key);
|
||||
delete key;
|
||||
|
||||
if ( e == nullptr )
|
||||
|
@ -238,7 +238,7 @@ uint64_t TopkVal::GetCount(Val* value) const
|
|||
uint64_t TopkVal::GetEpsilon(Val* value) const
|
||||
{
|
||||
zeek::detail::HashKey* key = GetHash(value);
|
||||
Element* e = (Element*) elementDict->Lookup(key);
|
||||
Element* e = (Element*)elementDict->Lookup(key);
|
||||
delete key;
|
||||
|
||||
if ( e == nullptr )
|
||||
|
@ -263,7 +263,8 @@ uint64_t TopkVal::GetSum() const
|
|||
}
|
||||
|
||||
if ( pruned )
|
||||
reporter->Warning("TopkVal::GetSum() was used on a pruned data structure. Result values do not represent total element count");
|
||||
reporter->Warning("TopkVal::GetSum() was used on a pruned data structure. Result values do "
|
||||
"not represent total element count");
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
@ -274,16 +275,15 @@ void TopkVal::Encountered(ValPtr encountered)
|
|||
|
||||
if ( numElements == 0 )
|
||||
Typify(encountered->GetType());
|
||||
else
|
||||
if ( ! same_type(type, encountered->GetType()) )
|
||||
{
|
||||
reporter->Error("Trying to add element to topk with differing type from other elements");
|
||||
return;
|
||||
}
|
||||
else if ( ! same_type(type, encountered->GetType()) )
|
||||
{
|
||||
reporter->Error("Trying to add element to topk with differing type from other elements");
|
||||
return;
|
||||
}
|
||||
|
||||
// Step 1 - get the hash.
|
||||
zeek::detail::HashKey* key = GetHash(encountered);
|
||||
Element* e = (Element*) elementDict->Lookup(key);
|
||||
Element* e = (Element*)elementDict->Lookup(key);
|
||||
|
||||
if ( e == nullptr )
|
||||
{
|
||||
|
@ -328,7 +328,7 @@ void TopkVal::Encountered(ValPtr encountered)
|
|||
assert(b->elements.size() > 0);
|
||||
zeek::detail::HashKey* deleteKey = GetHash((*(b->elements.begin()))->value);
|
||||
b->elements.erase(b->elements.begin());
|
||||
Element* deleteElement = (Element*) elementDict->RemoveEntry(deleteKey);
|
||||
Element* deleteElement = (Element*)elementDict->RemoveEntry(deleteKey);
|
||||
assert(deleteElement); // there has to have been a minimal element...
|
||||
delete deleteElement;
|
||||
delete deleteKey;
|
||||
|
@ -341,7 +341,6 @@ void TopkVal::Encountered(ValPtr encountered)
|
|||
|
||||
// fallthrough, increment operation has to run!
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// ok, we now have an element in e
|
||||
|
@ -362,10 +361,10 @@ void TopkVal::IncrementCounter(Element* e, unsigned int count)
|
|||
|
||||
bucketIter++;
|
||||
|
||||
while ( bucketIter != buckets.end() && (*bucketIter)->count < currcount+count )
|
||||
while ( bucketIter != buckets.end() && (*bucketIter)->count < currcount + count )
|
||||
bucketIter++;
|
||||
|
||||
if ( bucketIter != buckets.end() && (*bucketIter)->count == currcount+count )
|
||||
if ( bucketIter != buckets.end() && (*bucketIter)->count == currcount + count )
|
||||
nextBucket = *bucketIter;
|
||||
|
||||
if ( nextBucket == nullptr )
|
||||
|
@ -374,7 +373,7 @@ void TopkVal::IncrementCounter(Element* e, unsigned int count)
|
|||
// create it...
|
||||
|
||||
Bucket* b = new Bucket();
|
||||
b->count = currcount+count;
|
||||
b->count = currcount + count;
|
||||
|
||||
std::list<Bucket*>::iterator nextBucketPos = buckets.insert(bucketIter, b);
|
||||
b->bucketPos = nextBucketPos; // and give it the iterator we know now.
|
||||
|
@ -446,7 +445,6 @@ broker::expected<broker::data> TopkVal::DoSerialize() const
|
|||
return {std::move(d)};
|
||||
}
|
||||
|
||||
|
||||
bool TopkVal::DoUnserialize(const broker::data& data)
|
||||
{
|
||||
auto v = caf::get_if<broker::vector>(&data);
|
||||
|
@ -507,7 +505,7 @@ bool TopkVal::DoUnserialize(const broker::data& data)
|
|||
b->elements.insert(b->elements.end(), e);
|
||||
|
||||
zeek::detail::HashKey* key = GetHash(e->value);
|
||||
assert (elementDict->Lookup(key) == nullptr);
|
||||
assert(elementDict->Lookup(key) == nullptr);
|
||||
|
||||
elementDict->Insert(key, e);
|
||||
delete key;
|
||||
|
@ -520,4 +518,4 @@ bool TopkVal::DoUnserialize(const broker::data& data)
|
|||
return true;
|
||||
}
|
||||
|
||||
} // namespace zeek::probabilistic::detail
|
||||
} // namespace zeek::probabilistic::detail
|
||||
|
|
|
@ -3,19 +3,25 @@
|
|||
#pragma once
|
||||
|
||||
#include <list>
|
||||
#include "zeek/Val.h"
|
||||
|
||||
#include "zeek/OpaqueVal.h"
|
||||
#include "zeek/Val.h"
|
||||
|
||||
// This class implements the top-k algorithm. Or - to be more precise - an
|
||||
// interpretation of it.
|
||||
|
||||
namespace zeek::detail { class CompositeHash; }
|
||||
namespace zeek::detail
|
||||
{
|
||||
class CompositeHash;
|
||||
}
|
||||
|
||||
namespace zeek::probabilistic::detail {
|
||||
namespace zeek::probabilistic::detail
|
||||
{
|
||||
|
||||
struct Element;
|
||||
|
||||
struct Bucket {
|
||||
struct Bucket
|
||||
{
|
||||
uint64_t count;
|
||||
std::list<Element*> elements;
|
||||
|
||||
|
@ -23,15 +29,17 @@ struct Bucket {
|
|||
// points to us - so it is invalid when we are no longer there. Cute,
|
||||
// isn't it?
|
||||
std::list<Bucket*>::iterator bucketPos;
|
||||
};
|
||||
};
|
||||
|
||||
struct Element {
|
||||
struct Element
|
||||
{
|
||||
uint64_t epsilon;
|
||||
ValPtr value;
|
||||
Bucket* parent;
|
||||
};
|
||||
};
|
||||
|
||||
class TopkVal : public OpaqueVal {
|
||||
class TopkVal : public OpaqueVal
|
||||
{
|
||||
|
||||
public:
|
||||
/**
|
||||
|
@ -77,7 +85,7 @@ public:
|
|||
*
|
||||
* @returns internal count for val, 0 if unknown
|
||||
*/
|
||||
uint64_t GetCount(Val* value) const;
|
||||
uint64_t GetCount(Val* value) const;
|
||||
|
||||
/**
|
||||
* Get the current epsilon tracked in the top-k data structure for a
|
||||
|
@ -116,7 +124,7 @@ public:
|
|||
*
|
||||
* @param doPrune prune resulting TopkVal to size after merging
|
||||
*/
|
||||
void Merge(const TopkVal* value, bool doPrune=false);
|
||||
void Merge(const TopkVal* value, bool doPrune = false);
|
||||
|
||||
/**
|
||||
* Clone the Opaque Type
|
||||
|
@ -153,8 +161,7 @@ private:
|
|||
* @returns HashKey for value
|
||||
*/
|
||||
zeek::detail::HashKey* GetHash(Val* v) const; // this probably should go somewhere else.
|
||||
zeek::detail::HashKey* GetHash(const ValPtr& v) const
|
||||
{ return GetHash(v.get()); }
|
||||
zeek::detail::HashKey* GetHash(const ValPtr& v) const { return GetHash(v.get()); }
|
||||
|
||||
/**
|
||||
* Set the type that this TopK instance tracks
|
||||
|
@ -170,6 +177,6 @@ private:
|
|||
uint64_t size; // how many elements are we tracking?
|
||||
uint64_t numElements; // how many elements do we have at the moment
|
||||
bool pruned; // was this data structure pruned?
|
||||
};
|
||||
};
|
||||
|
||||
} // namespace zeek::probabilistic::detail
|
||||
} // namespace zeek::probabilistic::detail
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue