Reformat the world

This commit is contained in:
Tim Wojtulewicz 2021-09-16 15:35:39 -07:00
parent 194cb24547
commit b2f171ec69
714 changed files with 35149 additions and 35203 deletions

View file

@ -2,36 +2,34 @@
#include "zeek/probabilistic/BitVector.h"
#include <broker/data.hh>
#include <openssl/sha.h>
#include <cassert>
#include <limits>
#include <openssl/sha.h>
#include <broker/data.hh>
#include "zeek/digest.h"
namespace zeek::probabilistic::detail {
namespace zeek::probabilistic::detail
{
BitVector::size_type BitVector::npos = static_cast<BitVector::size_type>(-1);
BitVector::block_type BitVector::bits_per_block =
std::numeric_limits<BitVector::block_type>::digits;
namespace {
namespace
{
uint8_t count_table[] = {
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2,
3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3,
3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3,
4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4,
3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5,
6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4,
4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5,
6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5,
3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3,
4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6,
6, 7, 6, 7, 7, 8
};
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
} // namespace <anonymous>
} // namespace <anonymous>
BitVector::Reference::Reference(block_type& block, block_type i)
: block(block), mask((block_type(1) << i))
@ -118,8 +116,7 @@ BitVector::BitVector(size_type size, bool value)
num_bits = size;
}
BitVector::BitVector(BitVector const& other)
: bits(other.bits)
BitVector::BitVector(BitVector const& other) : bits(other.bits)
{
num_bits = other.num_bits;
}
@ -174,7 +171,7 @@ BitVector& BitVector::operator<<=(size_type n)
else
{
for (size_type i = last-div; i > 0; --i)
for ( size_type i = last - div; i > 0; --i )
b[i + div] = b[i];
b[div] = b[0];
@ -204,7 +201,7 @@ BitVector& BitVector::operator>>=(size_type n)
if ( r != 0 )
{
for (size_type i = last - div; i > 0; --i)
for ( size_type i = last - div; i > 0; --i )
b[i - div] = (b[i] >> r) | (b[i + 1] << (bits_per_block - r));
b[last - div] = b[last] >> r;
@ -212,8 +209,8 @@ BitVector& BitVector::operator>>=(size_type n)
else
{
for (size_type i = div; i <= last; ++i)
b[i-div] = b[i];
for ( size_type i = div; i <= last; ++i )
b[i - div] = b[i];
}
std::fill_n(b + (Blocks() - div), div, block_type(0));
@ -309,7 +306,6 @@ bool operator<(BitVector const& x, BitVector const& y)
else if ( x.bits[i] > y.bits[i] )
return false;
}
return false;
@ -404,7 +400,7 @@ BitVector& BitVector::Flip(size_type i)
BitVector& BitVector::Flip()
{
for (size_type i = 0; i < Blocks(); ++i)
for ( size_type i = 0; i < Blocks(); ++i )
bits[i] = ~bits[i];
zero_unused_bits();
@ -549,7 +545,7 @@ BitVector::size_type BitVector::lowest_bit(block_type block)
block_type x = block - (block & (block - 1));
size_type log = 0;
while (x >>= 1)
while ( x >>= 1 )
++log;
return log;
@ -568,7 +564,7 @@ void BitVector::zero_unused_bits()
BitVector::size_type BitVector::find_from(size_type i) const
{
while (i < Blocks() && bits[i] == 0)
while ( i < Blocks() && bits[i] == 0 )
++i;
if ( i >= Blocks() )
@ -577,4 +573,4 @@ BitVector::size_type BitVector::find_from(size_type i) const
return i * bits_per_block + lowest_bit(bits[i]);
}
} // namespace zeek::probabilistic::detail
} // namespace zeek::probabilistic::detail

View file

@ -2,20 +2,24 @@
#pragma once
#include <broker/expected.hh>
#include <iterator>
#include <memory>
#include <vector>
#include <broker/expected.hh>
namespace broker
{
class data;
}
namespace broker { class data; }
namespace zeek::probabilistic::detail {
namespace zeek::probabilistic::detail
{
/**
* A vector of bits.
*/
class BitVector {
class BitVector
{
public:
typedef uint64_t block_type;
typedef size_t size_type;
@ -27,7 +31,8 @@ public:
/**
* An lvalue proxy for individual bits.
*/
class Reference {
class Reference
{
public:
/**
* Inverts the bits' values.
@ -51,7 +56,7 @@ public:
block_type& block;
const block_type mask;
};
};
/**
* Default-constructs an empty bit vector.
@ -72,8 +77,7 @@ public:
* @param last End of range.
*
*/
template <typename InputIterator>
BitVector(InputIterator first, InputIterator last)
template <typename InputIterator> BitVector(InputIterator first, InputIterator last)
{
bits.insert(bits.end(), first, last);
num_bits = bits.size() * bits_per_block;
@ -125,8 +129,7 @@ public:
* @param last An iterator pointing to one past the last element of the
* sequence.
*/
template <typename ForwardIterator>
void Append(ForwardIterator first, ForwardIterator last)
template <typename ForwardIterator> void Append(ForwardIterator first, ForwardIterator last)
{
if ( first == last )
return;
@ -141,11 +144,11 @@ public:
{
bits.back() |= (*first << excess);
do {
do
{
block_type b = *first++ >> (bits_per_block - excess);
bits.push_back(b | (first == last ? 0 : *first << excess));
} while (first != last);
} while ( first != last );
}
else
@ -279,10 +282,10 @@ public:
size_type FindNext(size_type i) const;
/** Computes a hash value of the internal representation.
* This is mainly for debugging/testing purposes.
*
* @return The hash.
*/
* This is mainly for debugging/testing purposes.
*
* @return The hash.
*/
uint64_t Hash() const;
broker::expected<broker::data> Serialize() const;
@ -312,26 +315,17 @@ private:
/**
* Computes the block index for a given bit position.
*/
static size_type block_index(size_type i)
{
return i / bits_per_block;
}
static size_type block_index(size_type i) { return i / bits_per_block; }
/**
* Computes the bit index within a given block for a given bit position.
*/
static block_type bit_index(size_type i)
{
return i % bits_per_block;
}
static block_type bit_index(size_type i) { return i % bits_per_block; }
/**
* Computes the bitmask block to extract a bit a given bit position.
*/
static block_type bit_mask(size_type i)
{
return block_type(1) << bit_index(i);
}
static block_type bit_mask(size_type i) { return block_type(1) << bit_index(i); }
/**
* Computes the number of blocks needed to represent a given number of
@ -341,8 +335,7 @@ private:
*/
static size_type bits_to_blocks(size_type bits)
{
return bits / bits_per_block
+ static_cast<size_type>(bits % bits_per_block != 0);
return bits / bits_per_block + static_cast<size_type>(bits % bits_per_block != 0);
}
/**
@ -354,6 +347,6 @@ private:
std::vector<block_type> bits;
size_type num_bits;
};
};
} // namespace zeek::probabilistic::detail
} // namespace zeek::probabilistic::detail

View file

@ -2,17 +2,17 @@
#include "zeek/probabilistic/BloomFilter.h"
#include <broker/data.hh>
#include <broker/error.hh>
#include <cmath>
#include <limits>
#include <broker/data.hh>
#include <broker/error.hh>
#include "zeek/Reporter.h"
#include "zeek/probabilistic/CounterVector.h"
#include "zeek/util.h"
#include "zeek/Reporter.h"
namespace zeek::probabilistic {
namespace zeek::probabilistic
{
BloomFilter::BloomFilter()
{
@ -61,15 +61,16 @@ std::unique_ptr<BloomFilter> BloomFilter::Unserialize(const broker::data& data)
std::unique_ptr<BloomFilter> bf;
switch ( *type ) {
case Basic:
bf = std::unique_ptr<BloomFilter>(new BasicBloomFilter());
break;
switch ( *type )
{
case Basic:
bf = std::unique_ptr<BloomFilter>(new BasicBloomFilter());
break;
case Counting:
bf = std::unique_ptr<BloomFilter>(new CountingBloomFilter());
break;
}
case Counting:
bf = std::unique_ptr<BloomFilter>(new CountingBloomFilter());
break;
}
if ( ! bf->DoUnserialize((*v)[2]) )
return nullptr;
@ -144,8 +145,7 @@ BasicBloomFilter::BasicBloomFilter()
bits = nullptr;
}
BasicBloomFilter::BasicBloomFilter(const detail::Hasher* hasher, size_t cells)
: BloomFilter(hasher)
BasicBloomFilter::BasicBloomFilter(const detail::Hasher* hasher, size_t cells) : BloomFilter(hasher)
{
bits = new detail::BitVector(cells);
}
@ -197,8 +197,8 @@ CountingBloomFilter::CountingBloomFilter()
cells = nullptr;
}
CountingBloomFilter::CountingBloomFilter(const detail::Hasher* hasher,
size_t arg_cells, size_t width)
CountingBloomFilter::CountingBloomFilter(const detail::Hasher* hasher, size_t arg_cells,
size_t width)
: BloomFilter(hasher)
{
cells = new detail::CounterVector(width, arg_cells);
@ -277,7 +277,7 @@ size_t CountingBloomFilter::Count(const zeek::detail::HashKey* key) const
for ( size_t i = 0; i < h.size(); ++i )
{
detail::CounterVector::size_type cnt = cells->Count(h[i] % cells->Size());
if ( cnt < min )
if ( cnt < min )
min = cnt;
}
@ -300,4 +300,4 @@ bool CountingBloomFilter::DoUnserialize(const broker::data& data)
return true;
}
} // namespace zeek::probabilistic
} // namespace zeek::probabilistic

View file

@ -2,29 +2,39 @@
#pragma once
#include "zeek/zeek-config.h"
#include <memory>
#include <vector>
#include <string>
#include <broker/expected.hh>
#include <memory>
#include <string>
#include <vector>
#include "zeek/probabilistic/BitVector.h"
#include "zeek/probabilistic/Hasher.h"
#include "zeek/zeek-config.h"
namespace broker { class data; }
namespace broker
{
class data;
}
namespace zeek::probabilistic {
namespace detail { class CounterVector; }
namespace zeek::probabilistic
{
namespace detail
{
class CounterVector;
}
/** Types of derived BloomFilter classes. */
enum BloomFilterType { Basic, Counting };
enum BloomFilterType
{
Basic,
Counting
};
/**
* The abstract base class for Bloom filters.
*/
class BloomFilter {
class BloomFilter
{
public:
/**
* Destructor.
@ -102,12 +112,13 @@ protected:
virtual BloomFilterType Type() const = 0;
const detail::Hasher* hasher;
};
};
/**
* A basic Bloom filter.
*/
class BasicBloomFilter : public BloomFilter {
class BasicBloomFilter : public BloomFilter
{
public:
/**
* Constructs a basic Bloom filter with a given number of cells. The
@ -173,17 +184,17 @@ protected:
size_t Count(const zeek::detail::HashKey* key) const override;
broker::expected<broker::data> DoSerialize() const override;
bool DoUnserialize(const broker::data& data) override;
BloomFilterType Type() const override
{ return BloomFilterType::Basic; }
BloomFilterType Type() const override { return BloomFilterType::Basic; }
private:
detail::BitVector* bits;
};
};
/**
* A counting Bloom filter.
*/
class CountingBloomFilter : public BloomFilter {
class CountingBloomFilter : public BloomFilter
{
public:
/**
* Constructs a counting Bloom filter.
@ -222,11 +233,10 @@ protected:
size_t Count(const zeek::detail::HashKey* key) const override;
broker::expected<broker::data> DoSerialize() const override;
bool DoUnserialize(const broker::data& data) override;
BloomFilterType Type() const override
{ return BloomFilterType::Counting; }
BloomFilterType Type() const override { return BloomFilterType::Counting; }
private:
detail::CounterVector* cells;
};
};
} // namespace zeek::probabilistic
} // namespace zeek::probabilistic

View file

@ -2,30 +2,31 @@
#include "zeek/probabilistic/CardinalityCounter.h"
#include <broker/data.hh>
#include <math.h>
#include <stdint.h>
#include <utility>
#include <broker/data.hh>
#include "zeek/Reporter.h"
namespace zeek::probabilistic::detail {
namespace zeek::probabilistic::detail
{
int CardinalityCounter::OptimalB(double error, double confidence) const
{
double initial_estimate = 2 * (log(1.04) - log(error)) / log(2);
int answer = (int) floor(initial_estimate);
int answer = (int)floor(initial_estimate);
// k is the number of standard deviations that we have to go to have
// a confidence level of conf.
double k = 0;
do {
do
{
answer++;
k = pow(2, (answer - initial_estimate) / 2);
} while ( erf(k / sqrt(2)) < confidence );
} while ( erf(k / sqrt(2)) < confidence );
return answer;
}
@ -50,11 +51,13 @@ void CardinalityCounter::Init(uint64_t size)
alpha_m = 0.7213 / (1 + 1.079 / m);
else
reporter->InternalError("Invalid size %" PRIu64 ". Size either has to be 16, 32, 64 or bigger than 128", size);
reporter->InternalError(
"Invalid size %" PRIu64 ". Size either has to be 16, 32, 64 or bigger than 128", size);
double calc_p = log2(m);
if ( trunc(calc_p) != calc_p )
reporter->InternalError("Invalid size %" PRIu64 ". Size either has to be a power of 2", size);
reporter->InternalError("Invalid size %" PRIu64 ". Size either has to be a power of 2",
size);
p = calc_p;
@ -67,8 +70,7 @@ void CardinalityCounter::Init(uint64_t size)
V = m;
}
CardinalityCounter::CardinalityCounter(CardinalityCounter& other)
: buckets(other.buckets)
CardinalityCounter::CardinalityCounter(CardinalityCounter& other) : buckets(other.buckets)
{
V = other.V;
alpha_m = other.alpha_m;
@ -90,7 +92,7 @@ CardinalityCounter::CardinalityCounter(CardinalityCounter&& o) noexcept
CardinalityCounter::CardinalityCounter(double error_margin, double confidence)
{
int b = OptimalB(error_margin, confidence);
Init((uint64_t) pow(2, b));
Init((uint64_t)pow(2, b));
assert(b == p);
}
@ -113,9 +115,7 @@ CardinalityCounter::CardinalityCounter(uint64_t arg_size, uint64_t arg_V, double
p = log2(m);
}
CardinalityCounter::~CardinalityCounter()
{
}
CardinalityCounter::~CardinalityCounter() { }
uint8_t CardinalityCounter::Rank(uint64_t hash_modified) const
{
@ -129,9 +129,9 @@ uint8_t CardinalityCounter::Rank(uint64_t hash_modified) const
void CardinalityCounter::AddElement(uint64_t hash)
{
uint64_t index = hash % m;
hash = hash-index;
hash = hash - index;
if( buckets[index] == 0 )
if ( buckets[index] == 0 )
V--;
uint8_t temp = Rank(hash);
@ -158,7 +158,7 @@ double CardinalityCounter::Size() const
answer = 1 / answer;
answer = (alpha_m * m * m * answer);
if ( answer <= 5.0 * (m/2) )
if ( answer <= 5.0 * (m / 2) )
return m * log(((double)m) / V);
else if ( answer <= (pow(2, 64) / 30) )
@ -189,7 +189,7 @@ bool CardinalityCounter::Merge(CardinalityCounter* c)
return true;
}
const std::vector<uint8_t> &CardinalityCounter::GetBuckets() const
const std::vector<uint8_t>& CardinalityCounter::GetBuckets() const
{
return buckets;
}
@ -228,7 +228,7 @@ std::unique_ptr<CardinalityCounter> CardinalityCounter::Unserialize(const broker
auto cc = std::unique_ptr<CardinalityCounter>(new CardinalityCounter(*m, *V, *alpha_m));
if ( *m != cc->m )
return nullptr;
if ( cc->buckets.size() != * m )
if ( cc->buckets.size() != *m )
return nullptr;
for ( size_t i = 0; i < *m; ++i )
@ -283,11 +283,11 @@ int CardinalityCounter::flsll(uint64_t mask)
{
int bit;
if (mask == 0)
if ( mask == 0 )
return (0);
for (bit = 1; mask != 1; bit++)
for ( bit = 1; mask != 1; bit++ )
mask = (uint64_t)mask >> 1;
return (bit);
}
} // namespace zeek::probabilistic::detail
} // namespace zeek::probabilistic::detail

View file

@ -2,21 +2,24 @@
#pragma once
#include <broker/expected.hh>
#include <stdint.h>
#include <memory>
#include <vector>
#include <broker/expected.hh>
namespace broker
{
class data;
}
namespace broker { class data; }
namespace zeek::probabilistic::detail {
namespace zeek::probabilistic::detail
{
/**
* A probabilistic cardinality counter using the HyperLogLog algorithm.
*/
class CardinalityCounter {
class CardinalityCounter
{
public:
/**
* Constructor.
@ -184,6 +187,6 @@ private:
uint64_t V;
double alpha_m;
int p; // the log2 of m
};
};
} // namespace zeek::probabilistic::detail
} // namespace zeek::probabilistic::detail

View file

@ -2,16 +2,16 @@
#include "zeek/probabilistic/CounterVector.h"
#include <cassert>
#include <limits>
#include <broker/data.hh>
#include <broker/error.hh>
#include <cassert>
#include <limits>
#include "zeek/probabilistic/BitVector.h"
#include "zeek/util.h"
namespace zeek::probabilistic::detail {
namespace zeek::probabilistic::detail
{
CounterVector::CounterVector(size_t arg_width, size_t cells)
{
@ -43,7 +43,7 @@ bool CounterVector::Increment(size_type cell, count_type value)
bool b1 = (*bits)[lsb + i];
bool b2 = value & (1 << i);
(*bits)[lsb + i] = b1 ^ b2 ^ carry;
carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) );
carry = (b1 && b2) || (carry && (b1 != b2));
}
if ( carry )
@ -69,7 +69,7 @@ bool CounterVector::Decrement(size_type cell, count_type value)
bool b1 = (*bits)[lsb + i];
bool b2 = value & (1 << i);
(*bits)[lsb + i] = b1 ^ b2 ^ carry;
carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) );
carry = (b1 && b2) || (carry && (b1 != b2));
}
return carry;
@ -111,8 +111,7 @@ size_t CounterVector::Width() const
size_t CounterVector::Max() const
{
return std::numeric_limits<size_t>::max()
>> (std::numeric_limits<size_t>::digits - width);
return std::numeric_limits<size_t>::max() >> (std::numeric_limits<size_t>::digits - width);
}
CounterVector& CounterVector::Merge(const CounterVector& other)
@ -130,7 +129,7 @@ CounterVector& CounterVector::Merge(const CounterVector& other)
bool b1 = (*bits)[lsb + i];
bool b2 = (*other.bits)[lsb + i];
(*bits)[lsb + i] = b1 ^ b2 ^ carry;
carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) );
carry = (b1 && b2) || (carry && (b1 != b2));
}
if ( carry )
@ -186,4 +185,4 @@ std::unique_ptr<CounterVector> CounterVector::Unserialize(const broker::data& da
return cv;
}
} // namespace zeek::probabilistic::detail
} // namespace zeek::probabilistic::detail

View file

@ -2,24 +2,28 @@
#pragma once
#include "zeek/zeek-config.h"
#include <broker/expected.hh>
#include <cstddef>
#include <cstdint>
#include <memory>
#include <broker/expected.hh>
#include "zeek/zeek-config.h"
namespace broker { class data; }
namespace broker
{
class data;
}
namespace zeek::probabilistic::detail {
namespace zeek::probabilistic::detail
{
class BitVector;
/**
* A vector of counters, each of which has a fixed number of bits.
*/
class CounterVector {
class CounterVector
{
public:
typedef size_t size_type;
typedef uint64_t count_type;
@ -134,18 +138,17 @@ public:
CounterVector& operator|=(const CounterVector& other);
/** Computes a hash value of the internal representation.
* This is mainly for debugging/testing purposes.
*
* @return The hash.
*/
* This is mainly for debugging/testing purposes.
*
* @return The hash.
*/
uint64_t Hash() const;
broker::expected<broker::data> Serialize() const;
static std::unique_ptr<CounterVector> Unserialize(const broker::data& data);
protected:
friend CounterVector operator|(const CounterVector& x,
const CounterVector& y);
friend CounterVector operator|(const CounterVector& x, const CounterVector& y);
CounterVector() { }
@ -154,6 +157,6 @@ private:
BitVector* bits;
size_t width;
};
};
} // namespace zeek::probabilistic::detail
} // namespace zeek::probabilistic::detail

View file

@ -2,16 +2,17 @@
#include "zeek/probabilistic/Hasher.h"
#include <typeinfo>
#include <openssl/evp.h>
#include <broker/data.hh>
#include <highwayhash/sip_hash.h>
#include <openssl/evp.h>
#include <typeinfo>
#include "zeek/NetVar.h"
#include "zeek/Var.h"
#include "zeek/digest.h"
namespace zeek::probabilistic::detail {
namespace zeek::probabilistic::detail
{
Hasher::seed_t Hasher::MakeSeed(const void* data, size_t size)
{
@ -53,9 +54,8 @@ Hasher::Hasher(size_t arg_k, seed_t arg_seed)
broker::expected<broker::data> Hasher::Serialize() const
{
return {broker::vector{
static_cast<uint64_t>(Type()), static_cast<uint64_t>(k),
seed.h[0], seed.h[1] }};
return {broker::vector{static_cast<uint64_t>(Type()), static_cast<uint64_t>(k), seed.h[0],
seed.h[1]}};
}
std::unique_ptr<Hasher> Hasher::Unserialize(const broker::data& data)
@ -75,15 +75,16 @@ std::unique_ptr<Hasher> Hasher::Unserialize(const broker::data& data)
std::unique_ptr<Hasher> hasher;
switch ( *type ) {
case Default:
hasher = std::unique_ptr<Hasher>(new DefaultHasher(*k, {*h1, *h2}));
break;
switch ( *type )
{
case Default:
hasher = std::unique_ptr<Hasher>(new DefaultHasher(*k, {*h1, *h2}));
break;
case Double:
hasher = std::unique_ptr<Hasher>(new DoubleHasher(*k, {*h1, *h2}));
break;
}
case Double:
hasher = std::unique_ptr<Hasher>(new DoubleHasher(*k, {*h1, *h2}));
break;
}
// Note that the derived classed don't hold any further state of
// their own. They reconstruct all their information from their
@ -107,12 +108,12 @@ UHF::UHF(Hasher::seed_t arg_seed)
// times.
Hasher::digest UHF::hash(const void* x, size_t n) const
{
static_assert(std::is_same<highwayhash::SipHashState::Key, decltype(seed.h)>::value, "Seed value is not the same type as highwayhash key");
static_assert(std::is_same<highwayhash::SipHashState::Key, decltype(seed.h)>::value,
"Seed value is not the same type as highwayhash key");
return highwayhash::SipHash(seed.h, reinterpret_cast<const char*>(x), n);
}
DefaultHasher::DefaultHasher(size_t k, Hasher::seed_t seed)
: Hasher(k, seed)
DefaultHasher::DefaultHasher(size_t k, Hasher::seed_t seed) : Hasher(k, seed)
{
for ( size_t i = 1; i <= k; ++i )
{
@ -177,4 +178,4 @@ bool DoubleHasher::Equals(const Hasher* other) const
return h1 == o->h1 && h2 == o->h2;
}
} // namespace zeek::probabilistic::detail
} // namespace zeek::probabilistic::detail

View file

@ -2,35 +2,46 @@
#pragma once
#include <memory>
#include <broker/expected.hh>
#include <memory>
#include "zeek/Hash.h"
namespace broker { class data; }
namespace broker
{
class data;
}
namespace zeek::probabilistic::detail {
namespace zeek::probabilistic::detail
{
/** Types of derived Hasher classes. */
enum HasherType { Default, Double };
enum HasherType
{
Default,
Double
};
/**
* Abstract base class for hashers. A hasher creates a family of hash
* functions to hash an element *k* times.
*/
class Hasher {
class Hasher
{
public:
typedef zeek::detail::hash_t digest;
typedef std::vector<digest> digest_vector;
struct seed_t {
struct seed_t
{
// actually HH_U64, which has the same type
alignas(16) unsigned long long h[2];
friend seed_t operator+(seed_t lhs, const uint64_t rhs) {
friend seed_t operator+(seed_t lhs, const uint64_t rhs)
{
lhs.h[0] += rhs;
return lhs;
}
};
}
};
/**
* Creates a valid hasher seed from an arbitrary string.
@ -58,11 +69,7 @@ public:
*
* @return Vector of *k* hash values.
*/
template <typename T>
digest_vector operator()(const T& x) const
{
return Hash(&x, sizeof(T));
}
template <typename T> digest_vector operator()(const T& x) const { return Hash(&x, sizeof(T)); }
/**
* Computes hash values for an element.
@ -98,12 +105,12 @@ public:
/**
* Returns the number *k* of hash functions the hashers applies.
*/
size_t K() const { return k; }
size_t K() const { return k; }
/**
* Returns the seed used to construct the hasher.
*/
seed_t Seed() const { return seed; }
seed_t Seed() const { return seed; }
broker::expected<broker::data> Serialize() const;
static std::unique_ptr<Hasher> Unserialize(const broker::data& data);
@ -125,13 +132,14 @@ protected:
private:
size_t k;
seed_t seed;
};
};
/**
* A universal hash function family. This is a helper class that Hasher
* implementations can use in their implementation.
*/
class UHF {
class UHF
{
public:
/**
* Default constructor with zero seed.
@ -146,8 +154,7 @@ public:
*/
explicit UHF(Hasher::seed_t arg_seed);
template <typename T>
Hasher::digest operator()(const T& x) const
template <typename T> Hasher::digest operator()(const T& x) const
{
return hash(&x, sizeof(T));
}
@ -159,10 +166,7 @@ public:
*
* @return Vector of *k* hash values.
*/
Hasher::digest operator()(const void* x, size_t n) const
{
return hash(x, n);
}
Hasher::digest operator()(const void* x, size_t n) const { return hash(x, n); }
/**
* Computes the hashes for a set of bytes.
@ -178,14 +182,10 @@ public:
friend bool operator==(const UHF& x, const UHF& y)
{
return (x.seed.h[0] == y.seed.h[0]) &&
(x.seed.h[1] == y.seed.h[1]);
return (x.seed.h[0] == y.seed.h[0]) && (x.seed.h[1] == y.seed.h[1]);
}
friend bool operator!=(const UHF& x, const UHF& y)
{
return ! (x == y);
}
friend bool operator!=(const UHF& x, const UHF& y) { return ! (x == y); }
broker::expected<broker::data> Serialize() const;
static UHF Unserialize(const broker::data& data);
@ -194,14 +194,14 @@ private:
static size_t compute_seed(Hasher::seed_t seed);
Hasher::seed_t seed;
};
};
/**
* A hasher implementing the default hashing policy. Uses *k* separate hash
* functions internally.
*/
class DefaultHasher : public Hasher {
class DefaultHasher : public Hasher
{
public:
/**
* Constructor for a hasher with *k* hash functions.
@ -220,17 +220,17 @@ public:
private:
DefaultHasher() { }
HasherType Type() const override
{ return HasherType::Default; }
HasherType Type() const override { return HasherType::Default; }
std::vector<UHF> hash_functions;
};
};
/**
* The *double-hashing* policy. Uses a linear combination of two hash
* functions.
*/
class DoubleHasher : public Hasher {
class DoubleHasher : public Hasher
{
public:
/**
* Constructor for a double hasher with *k* hash functions.
@ -249,11 +249,10 @@ public:
private:
DoubleHasher() { }
HasherType Type() const override
{ return HasherType::Double; }
HasherType Type() const override { return HasherType::Double; }
UHF h1;
UHF h2;
};
};
} // namespace zeek::probabilistic::detail
} // namespace zeek::probabilistic::detail

View file

@ -4,22 +4,23 @@
#include <broker/error.hh>
#include "zeek/broker/Data.h"
#include "zeek/CompHash.h"
#include "zeek/Reporter.h"
#include "zeek/Dict.h"
#include "zeek/Reporter.h"
#include "zeek/broker/Data.h"
namespace zeek::probabilistic::detail {
namespace zeek::probabilistic::detail
{
static void topk_element_hash_delete_func(void* val)
{
Element* e = (Element*) val;
Element* e = (Element*)val;
delete e;
}
void TopkVal::Typify(TypePtr t)
{
assert(!hash && !type);
assert(! hash && ! type);
type = std::move(t);
auto tl = make_intrusive<TypeList>(type);
tl->Append(type);
@ -104,7 +105,7 @@ void TopkVal::Merge(const TopkVal* value, bool doPrune)
Element* e = *eit;
// lookup if we already know this one...
zeek::detail::HashKey* key = GetHash(e->value);
Element* olde = (Element*) elementDict->Lookup(key);
Element* olde = (Element*)elementDict->Lookup(key);
if ( olde == nullptr )
{
@ -114,7 +115,7 @@ void TopkVal::Merge(const TopkVal* value, bool doPrune)
// insert at bucket position 0
if ( buckets.size() > 0 )
{
assert (buckets.front()-> count > 0 );
assert(buckets.front()->count > 0);
}
Bucket* newbucket = new Bucket();
@ -126,7 +127,6 @@ void TopkVal::Merge(const TopkVal* value, bool doPrune)
elementDict->Insert(key, olde);
numElements++;
}
// now that we are sure that the old element is present - increment epsilon
@ -153,7 +153,7 @@ void TopkVal::Merge(const TopkVal* value, bool doPrune)
while ( numElements > size )
{
pruned = true;
assert(buckets.size() > 0 );
assert(buckets.size() > 0);
Bucket* b = buckets.front();
assert(b->elements.size() > 0);
@ -199,13 +199,13 @@ VectorValPtr TopkVal::GetTopK(int k) const // returns vector
int read = 0;
std::list<Bucket*>::const_iterator it = buckets.end();
it--;
while (read < k )
while ( read < k )
{
//printf("Bucket %llu\n", (*it)->count);
// printf("Bucket %llu\n", (*it)->count);
std::list<Element*>::iterator eit = (*it)->elements.begin();
while ( eit != (*it)->elements.end() )
{
//printf("Size: %ld\n", (*it)->elements.size());
// printf("Size: %ld\n", (*it)->elements.size());
t->Assign(read, (*eit)->value);
read++;
eit++;
@ -223,7 +223,7 @@ VectorValPtr TopkVal::GetTopK(int k) const // returns vector
uint64_t TopkVal::GetCount(Val* value) const
{
zeek::detail::HashKey* key = GetHash(value);
Element* e = (Element*) elementDict->Lookup(key);
Element* e = (Element*)elementDict->Lookup(key);
delete key;
if ( e == nullptr )
@ -238,7 +238,7 @@ uint64_t TopkVal::GetCount(Val* value) const
uint64_t TopkVal::GetEpsilon(Val* value) const
{
zeek::detail::HashKey* key = GetHash(value);
Element* e = (Element*) elementDict->Lookup(key);
Element* e = (Element*)elementDict->Lookup(key);
delete key;
if ( e == nullptr )
@ -263,7 +263,8 @@ uint64_t TopkVal::GetSum() const
}
if ( pruned )
reporter->Warning("TopkVal::GetSum() was used on a pruned data structure. Result values do not represent total element count");
reporter->Warning("TopkVal::GetSum() was used on a pruned data structure. Result values do "
"not represent total element count");
return sum;
}
@ -274,16 +275,15 @@ void TopkVal::Encountered(ValPtr encountered)
if ( numElements == 0 )
Typify(encountered->GetType());
else
if ( ! same_type(type, encountered->GetType()) )
{
reporter->Error("Trying to add element to topk with differing type from other elements");
return;
}
else if ( ! same_type(type, encountered->GetType()) )
{
reporter->Error("Trying to add element to topk with differing type from other elements");
return;
}
// Step 1 - get the hash.
zeek::detail::HashKey* key = GetHash(encountered);
Element* e = (Element*) elementDict->Lookup(key);
Element* e = (Element*)elementDict->Lookup(key);
if ( e == nullptr )
{
@ -328,7 +328,7 @@ void TopkVal::Encountered(ValPtr encountered)
assert(b->elements.size() > 0);
zeek::detail::HashKey* deleteKey = GetHash((*(b->elements.begin()))->value);
b->elements.erase(b->elements.begin());
Element* deleteElement = (Element*) elementDict->RemoveEntry(deleteKey);
Element* deleteElement = (Element*)elementDict->RemoveEntry(deleteKey);
assert(deleteElement); // there has to have been a minimal element...
delete deleteElement;
delete deleteKey;
@ -341,7 +341,6 @@ void TopkVal::Encountered(ValPtr encountered)
// fallthrough, increment operation has to run!
}
}
// ok, we now have an element in e
@ -362,10 +361,10 @@ void TopkVal::IncrementCounter(Element* e, unsigned int count)
bucketIter++;
while ( bucketIter != buckets.end() && (*bucketIter)->count < currcount+count )
while ( bucketIter != buckets.end() && (*bucketIter)->count < currcount + count )
bucketIter++;
if ( bucketIter != buckets.end() && (*bucketIter)->count == currcount+count )
if ( bucketIter != buckets.end() && (*bucketIter)->count == currcount + count )
nextBucket = *bucketIter;
if ( nextBucket == nullptr )
@ -374,7 +373,7 @@ void TopkVal::IncrementCounter(Element* e, unsigned int count)
// create it...
Bucket* b = new Bucket();
b->count = currcount+count;
b->count = currcount + count;
std::list<Bucket*>::iterator nextBucketPos = buckets.insert(bucketIter, b);
b->bucketPos = nextBucketPos; // and give it the iterator we know now.
@ -446,7 +445,6 @@ broker::expected<broker::data> TopkVal::DoSerialize() const
return {std::move(d)};
}
bool TopkVal::DoUnserialize(const broker::data& data)
{
auto v = caf::get_if<broker::vector>(&data);
@ -507,7 +505,7 @@ bool TopkVal::DoUnserialize(const broker::data& data)
b->elements.insert(b->elements.end(), e);
zeek::detail::HashKey* key = GetHash(e->value);
assert (elementDict->Lookup(key) == nullptr);
assert(elementDict->Lookup(key) == nullptr);
elementDict->Insert(key, e);
delete key;
@ -520,4 +518,4 @@ bool TopkVal::DoUnserialize(const broker::data& data)
return true;
}
} // namespace zeek::probabilistic::detail
} // namespace zeek::probabilistic::detail

View file

@ -3,19 +3,25 @@
#pragma once
#include <list>
#include "zeek/Val.h"
#include "zeek/OpaqueVal.h"
#include "zeek/Val.h"
// This class implements the top-k algorithm. Or - to be more precise - an
// interpretation of it.
namespace zeek::detail { class CompositeHash; }
namespace zeek::detail
{
class CompositeHash;
}
namespace zeek::probabilistic::detail {
namespace zeek::probabilistic::detail
{
struct Element;
struct Bucket {
struct Bucket
{
uint64_t count;
std::list<Element*> elements;
@ -23,15 +29,17 @@ struct Bucket {
// points to us - so it is invalid when we are no longer there. Cute,
// isn't it?
std::list<Bucket*>::iterator bucketPos;
};
};
struct Element {
struct Element
{
uint64_t epsilon;
ValPtr value;
Bucket* parent;
};
};
class TopkVal : public OpaqueVal {
class TopkVal : public OpaqueVal
{
public:
/**
@ -77,7 +85,7 @@ public:
*
* @returns internal count for val, 0 if unknown
*/
uint64_t GetCount(Val* value) const;
uint64_t GetCount(Val* value) const;
/**
* Get the current epsilon tracked in the top-k data structure for a
@ -116,7 +124,7 @@ public:
*
* @param doPrune prune resulting TopkVal to size after merging
*/
void Merge(const TopkVal* value, bool doPrune=false);
void Merge(const TopkVal* value, bool doPrune = false);
/**
* Clone the Opaque Type
@ -153,8 +161,7 @@ private:
* @returns HashKey for value
*/
zeek::detail::HashKey* GetHash(Val* v) const; // this probably should go somewhere else.
zeek::detail::HashKey* GetHash(const ValPtr& v) const
{ return GetHash(v.get()); }
zeek::detail::HashKey* GetHash(const ValPtr& v) const { return GetHash(v.get()); }
/**
* Set the type that this TopK instance tracks
@ -170,6 +177,6 @@ private:
uint64_t size; // how many elements are we tracking?
uint64_t numElements; // how many elements do we have at the moment
bool pruned; // was this data structure pruned?
};
};
} // namespace zeek::probabilistic::detail
} // namespace zeek::probabilistic::detail