zeek/src/Dict.h
Benjamin Bannier 627c3ad726 Fix clang-tidy readability-isolate-declaration warnings
I missed one of these in review so a machine is probably better at
catching them.

I fixed the existing instances which where largely in code which look
dated. Where possible I slightly reorganized the code so we do not have
to leave values uninitialized, but did not touch up anything else.
2025-06-30 14:19:06 -07:00

1580 lines
58 KiB
C++

// See the file "COPYING" in the main distribution directory for copyright.
#pragma once
#include <algorithm>
#include <cinttypes>
#include <cmath>
#include <cstdint>
#include <fstream>
#include <memory>
#include <vector>
#include "zeek/Hash.h"
#include "zeek/Obj.h"
#include "zeek/Reporter.h"
// Type for function to be called when deleting elements.
using dict_delete_func = void (*)(void*);
#if defined(DEBUG) && defined(ZEEK_DICT_DEBUG)
#define ASSERT_VALID(o) o->AssertValid()
#define ASSERT_EQUAL(a, b) ASSERT(a == b)
#else
#define ASSERT_VALID(o)
#define ASSERT_EQUAL(a, b)
#endif // DEBUG
namespace zeek {
template<typename T>
class Dictionary;
enum DictOrder : uint8_t { ORDERED, UNORDERED };
// A dict_delete_func that just calls delete.
extern void generic_delete_func(void*);
namespace detail {
// Default number of hash buckets in dictionary. The dictionary will increase the size
// of the hash table as needed.
constexpr uint32_t HASH_MASK = 0xFFFFFFFF; // only lower 32 bits.
// These four variables can be used to build different targets with -Dxxx for performance
// or for debugging purposes.
// When incrementally resizing and remapping, it remaps DICT_REMAP_ENTRIES each step. Use
// 2 for debug. 16 is best for a release build.
constexpr uint8_t DICT_REMAP_ENTRIES = 16;
// 100 for size < 1 << DICT_THRESHOLD_BITS + DICT_THRESHOLD_BITS;
// 75 for larger sizes
// 25 for min load factor.
constexpr int MIN_DICT_LOAD_FACTOR_100 = 25;
constexpr int DICT_LOAD_FACTOR_100 = 75;
// when space_distance is greater than SPACE_DISTANCE_THRESHOLD, double the size unless the load
// factor is already lower than MIN_DICT_LOAD_FACTOR_100.
constexpr int SPACE_DISTANCE_THRESHOLD = 32;
// To ignore occasional faraway spaces. only when space_distance_samples are above
// MIN_SPACE_DISTANCE_SAMPLES, we consider space_distance is not by chance.
constexpr int MIN_SPACE_DISTANCE_SAMPLES = 128;
// Default number of hash buckets in dictionary. The dictionary will
// increase the size of the hash table as needed.
constexpr uint8_t DEFAULT_DICT_SIZE = 0;
// When log2_buckets > DICT_THRESHOLD_BITS, DICT_LOAD_FACTOR_BITS becomes effective.
// Basically if dict size < 2^DICT_THRESHOLD_BITS + DICT_THRESHOLD_BITS, we size up only if
// necessary.
constexpr uint8_t DICT_THRESHOLD_BITS = 3;
// The value of an iteration cookie is the bucket and offset within the
// bucket at which to start looking for the next value to return.
constexpr uint16_t TOO_FAR_TO_REACH = 0xFFFF;
/**
* An entry stored in the dictionary.
*/
template<typename T>
class DictEntry {
public:
#ifdef DEBUG
int bucket = 0;
#endif
// Distance from the expected position in the table. 0xFFFF means that the entry is empty.
uint16_t distance = TOO_FAR_TO_REACH;
// The size of the key. Less than 8 bytes we'll store directly in the entry, otherwise we'll
// store it as a pointer. This avoids extra allocations if we can help it.
uint32_t key_size = 0;
// The maximum value of the key size above. This allows Dictionary to truncate keys before
// they get stored into an entry to avoid weird overflow errors.
static constexpr uint32_t MAX_KEY_SIZE = UINT32_MAX;
// Lower 4 bytes of the 8-byte hash, which is used to calculate the position in the table.
uint32_t hash = 0;
T* value = nullptr;
union {
char key_here[8]; // hold key len<=8. when over 8, it's a pointer to real keys.
char* key;
};
DictEntry(void* arg_key, uint32_t key_size = 0, hash_t hash = 0, T* value = nullptr, int16_t d = TOO_FAR_TO_REACH,
bool copy_key = false)
: distance(d), key_size(key_size), hash((uint32_t)hash), value(value) {
if ( ! arg_key )
return;
if ( key_size <= 8 ) {
memcpy(key_here, arg_key, key_size);
if ( ! copy_key )
delete[] (char*)arg_key; // own the arg_key, now don't need it.
}
else {
if ( copy_key ) {
key = new char[key_size];
memcpy(key, arg_key, key_size);
}
else {
key = (char*)arg_key;
}
}
}
bool Empty() const { return distance == TOO_FAR_TO_REACH; }
void SetEmpty() {
distance = TOO_FAR_TO_REACH;
#ifdef DEBUG
hash = 0;
key = nullptr;
value = nullptr;
key_size = 0;
bucket = 0;
#endif // DEBUG
}
void Clear() {
if ( key_size > 8 )
delete[] key;
SetEmpty();
}
const char* GetKey() const { return key_size <= 8 ? key_here : key; }
std::unique_ptr<detail::HashKey> GetHashKey() const {
return std::make_unique<detail::HashKey>(GetKey(), key_size, hash);
}
bool Equal(const char* arg_key, uint32_t arg_key_size, hash_t arg_hash) const { // only 40-bit hash comparison.
return (0 == ((hash ^ arg_hash) & HASH_MASK)) && key_size == arg_key_size &&
0 == memcmp(GetKey(), arg_key, key_size);
}
bool operator==(const DictEntry& r) const { return Equal(r.GetKey(), r.key_size, r.hash); }
bool operator!=(const DictEntry& r) const { return ! Equal(r.GetKey(), r.key_size, r.hash); }
};
using DictEntryVec = std::vector<detail::HashKey>;
} // namespace detail
template<typename T>
class DictIterator {
public:
using value_type = detail::DictEntry<T>;
using reference = detail::DictEntry<T>&;
using pointer = detail::DictEntry<T>*;
using difference_type = std::ptrdiff_t;
using iterator_category = std::forward_iterator_tag;
DictIterator() = default;
~DictIterator() {
if ( dict ) {
assert(dict->num_iterators > 0);
dict->DecrIters();
}
}
DictIterator(const DictIterator& that) {
if ( this == &that )
return;
if ( dict ) {
assert(dict->num_iterators > 0);
dict->DecrIters();
}
dict = that.dict;
curr = that.curr;
end = that.end;
ordered_iter = that.ordered_iter;
dict->IncrIters();
}
DictIterator& operator=(const DictIterator& that) {
if ( this == &that )
return *this;
if ( dict ) {
assert(dict->num_iterators > 0);
dict->DecrIters();
}
dict = that.dict;
curr = that.curr;
end = that.end;
ordered_iter = that.ordered_iter;
dict->IncrIters();
return *this;
}
DictIterator(DictIterator&& that) noexcept {
if ( this == &that )
return;
if ( dict ) {
assert(dict->num_iterators > 0);
dict->DecrIters();
}
dict = that.dict;
curr = that.curr;
end = that.end;
ordered_iter = that.ordered_iter;
that.dict = nullptr;
}
DictIterator& operator=(DictIterator&& that) noexcept {
if ( this == &that )
return *this;
if ( dict ) {
assert(dict->num_iterators > 0);
dict->DecrIters();
}
dict = that.dict;
curr = that.curr;
end = that.end;
ordered_iter = that.ordered_iter;
that.dict = nullptr;
return *this;
}
reference operator*() {
if ( dict->IsOrdered() ) {
// TODO: how does this work if ordered_iter == end(). LookupEntry will return a nullptr,
// which the dereference will fail on. That's undefined behavior, correct? Is that any
// different than if the unordered version returns a dereference of it's end?
auto e = dict->LookupEntry(*ordered_iter);
return *e;
}
return *curr;
}
reference operator*() const {
if ( dict->IsOrdered() ) {
auto e = dict->LookupEntry(*ordered_iter);
return *e;
}
return *curr;
}
pointer operator->() {
if ( dict->IsOrdered() )
return dict->LookupEntry(*ordered_iter);
return curr;
}
pointer operator->() const {
if ( dict->IsOrdered() )
return dict->LookupEntry(*ordered_iter);
return curr;
}
DictIterator& operator++() {
if ( dict->IsOrdered() )
++ordered_iter;
else {
// The non-robust case is easy. Just advance the current position forward until you
// find one isn't empty and isn't the end.
do {
++curr;
} while ( curr != end && curr->Empty() );
}
return *this;
}
DictIterator operator++(int) {
auto temp(*this);
++*this;
return temp;
}
bool operator==(const DictIterator& that) const {
if ( dict != that.dict )
return false;
if ( dict->IsOrdered() )
return ordered_iter == that.ordered_iter;
return curr == that.curr;
}
bool operator!=(const DictIterator& that) const { return ! (*this == that); }
private:
friend class Dictionary<T>;
DictIterator(const Dictionary<T>* d, detail::DictEntry<T>* begin, detail::DictEntry<T>* end)
: curr(begin), end(end) {
// Cast away the constness so that the number of iterators can be modified in the
// dictionary. This does violate the constness guarantees of const-begin()/end() and
// cbegin()/cend(), but we're not modifying the actual data in the collection, just a
// counter in the wrapper of the collection.
dict = const_cast<Dictionary<T>*>(d);
// Make sure that we're starting on a non-empty element.
while ( curr != end && curr->Empty() )
++curr;
dict->IncrIters();
}
DictIterator(const Dictionary<T>* d, detail::DictEntryVec::iterator iter) : ordered_iter(iter) {
// Cast away the constness so that the number of iterators can be modified in the
// dictionary. This does violate the constness guarantees of const-begin()/end() and
// cbegin()/cend(), but we're not modifying the actual data in the collection, just a
// counter in the wrapper of the collection.
dict = const_cast<Dictionary<T>*>(d);
dict->IncrIters();
}
Dictionary<T>* dict = nullptr;
detail::DictEntry<T>* curr = nullptr;
detail::DictEntry<T>* end = nullptr;
detail::DictEntryVec::iterator ordered_iter;
};
template<typename T>
class RobustDictIterator {
public:
using value_type = detail::DictEntry<T>;
using reference = detail::DictEntry<T>&;
using pointer = detail::DictEntry<T>*;
using difference_type = std::ptrdiff_t;
using iterator_category = std::forward_iterator_tag;
RobustDictIterator() : curr(nullptr) {}
RobustDictIterator(Dictionary<T>* d) : curr(nullptr), dict(d) {
next = -1;
inserted = new std::vector<detail::DictEntry<T>>();
visited = new std::vector<detail::DictEntry<T>>();
dict->IncrIters();
dict->iterators->push_back(this);
// Advance the iterator one step so that we're at the first element.
curr = dict->GetNextRobustIteration(this);
}
RobustDictIterator(const RobustDictIterator& other) : curr(nullptr), dict(nullptr) { *this = other; }
RobustDictIterator(RobustDictIterator&& other) noexcept : curr(nullptr), dict(nullptr) { *this = other; }
~RobustDictIterator() { Complete(); }
reference operator*() { return curr; }
pointer operator->() { return &curr; }
RobustDictIterator& operator++() {
if ( dict )
curr = dict->GetNextRobustIteration(this);
return *this;
}
RobustDictIterator operator++(int) {
auto temp(*this);
++*this;
return temp;
}
RobustDictIterator& operator=(const RobustDictIterator& other) {
if ( this == &other )
return *this;
delete inserted;
inserted = nullptr;
delete visited;
visited = nullptr;
dict = nullptr;
curr.Clear();
next = -1;
if ( other.dict ) {
next = other.next;
inserted = new std::vector<detail::DictEntry<T>>();
visited = new std::vector<detail::DictEntry<T>>();
if ( other.inserted )
std::copy(other.inserted->begin(), other.inserted->end(), std::back_inserter(*inserted));
if ( other.visited )
std::copy(other.visited->begin(), other.visited->end(), std::back_inserter(*visited));
dict = other.dict;
dict->IncrIters();
dict->iterators->push_back(this);
curr = other.curr;
}
return *this;
}
RobustDictIterator& operator=(RobustDictIterator&& other) noexcept {
delete inserted;
inserted = nullptr;
delete visited;
visited = nullptr;
dict = nullptr;
curr.Clear();
next = -1;
if ( other.dict ) {
next = other.next;
inserted = other.inserted;
visited = other.visited;
dict = other.dict;
dict->iterators->push_back(this);
dict->iterators->erase(std::remove(dict->iterators->begin(), dict->iterators->end(), &other),
dict->iterators->end());
other.dict = nullptr;
curr = std::move(other.curr);
}
return *this;
}
bool operator==(const RobustDictIterator& that) const { return curr == that.curr; }
bool operator!=(const RobustDictIterator& that) const { return ! (*this == that); }
private:
friend class Dictionary<T>;
void Complete() {
if ( dict ) {
assert(dict->num_iterators > 0);
dict->DecrIters();
dict->iterators->erase(std::remove(dict->iterators->begin(), dict->iterators->end(), this),
dict->iterators->end());
delete inserted;
delete visited;
inserted = nullptr;
visited = nullptr;
dict = nullptr;
curr = nullptr; // make this same as robust_end()
}
}
// Tracks the new entries inserted while iterating.
std::vector<detail::DictEntry<T>>* inserted = nullptr;
// Tracks the entries already visited but were moved across the next iteration
// point due to an insertion.
std::vector<detail::DictEntry<T>>* visited = nullptr;
detail::DictEntry<T> curr;
Dictionary<T>* dict = nullptr;
int next = -1;
};
/**
* A dictionary type that uses clustered hashing, a variation of Robinhood/Open Addressing
* hashing. The following posts help to understand the implementation:
* - https://jasonlue.github.io/algo/2019/08/20/clustered-hashing.html
* - https://jasonlue.github.io/algo/2019/08/27/clustered-hashing-basic-operations.html
* - https://jasonlue.github.io/algo/2019/09/03/clustered-hashing-incremental-resize.html
* - https://jasonlue.github.io/algo/2019/09/10/clustered-hashing-modify-on-iteration.html
*
* The dictionary is effectively a hashmap from hashed keys to values. The dictionary owns
* the keys but not the values. The dictionary size will be bounded at around 100K. 1M
* entries is the absolute limit. Only Connections use that many entries, and that is rare.
*/
template<typename T>
class Dictionary {
public:
explicit Dictionary(DictOrder ordering = UNORDERED, int initial_size = detail::DEFAULT_DICT_SIZE) {
if ( initial_size > 0 ) {
// If an initial size is specified, init the table right away. Otherwise wait until the
// first insertion to init.
SetLog2Buckets(static_cast<uint16_t>(std::log2(initial_size)));
Init();
}
if ( ordering == ORDERED )
order = std::make_unique<std::vector<detail::HashKey>>();
}
~Dictionary() { Clear(); }
// Member functions for looking up a key, inserting/changing its
// contents, and deleting it. These come in two flavors: one
// which takes a zeek::detail::HashKey, and the other which takes a raw key,
// its size, and its (unmodulated) hash.
// lookup may move the key to right place if in the old zone to speed up the next lookup.
T* Lookup(const detail::HashKey* key) const { return Lookup(key->Key(), key->Size(), key->Hash()); }
T* Lookup(const void* key, int key_size, detail::hash_t h) const {
if ( auto e = LookupEntry(key, key_size, h) )
return e->value;
return nullptr;
}
T* Lookup(const char* key) const {
detail::HashKey h(key);
return Dictionary<T>::Lookup(&h);
}
// Returns previous value, or 0 if none.
// If iterators_invalidated is supplied, its value is set to true
// if the removal may have invalidated any existing iterators.
T* Insert(detail::HashKey* key, T* val, bool* iterators_invalidated = nullptr) {
return Insert(key->TakeKey(), key->Size(), key->Hash(), val, false, iterators_invalidated);
}
// If copy_key is true, then the key is copied, otherwise it's assumed
// that it's a heap pointer that now belongs to the Dictionary to
// manage as needed.
// If iterators_invalidated is supplied, its value is set to true
// if the removal may have invalidated any existing iterators.
T* Insert(void* key, uint64_t key_size, detail::hash_t hash, T* val, bool copy_key,
bool* iterators_invalidated = nullptr) {
ASSERT_VALID(this);
// Initialize the table if it hasn't been done yet. This saves memory storing a bunch
// of empty dicts.
if ( ! table )
Init();
T* v = nullptr;
if ( key_size > detail::DictEntry<T>::MAX_KEY_SIZE ) {
// If the key is bigger than something that will fit in a DictEntry, report a
// RuntimeError. This will throw an exception. If this call came from a script
// context, it'll cause the script interpreter to unwind and stop the script
// execution. If called elsewhere, Zeek will likely abort due to an unhandled
// exception. This is all entirely intentional. since if you got to this point
// something went really wrong with your input data.
auto loc = detail::GetCurrentLocation();
reporter->RuntimeError(&loc,
"Attempted to create DictEntry with excessively large key, "
"truncating key (%" PRIu64 " > %u)",
key_size, detail::DictEntry<T>::MAX_KEY_SIZE);
}
// Look to see if this key is already in the table. If found, insert_position is the
// position of the existing element. If not, insert_position is where it'll be inserted
// and insert_distance is the distance of the key for the position.
int insert_position = -1;
int insert_distance = -1;
int position = LookupIndex(key, key_size, hash, &insert_position, &insert_distance);
if ( position >= 0 ) {
v = table[position].value;
table[position].value = val;
if ( ! copy_key )
delete[] (char*)key;
if ( iterators && ! iterators->empty() )
// need to set new v for iterators too.
for ( auto c : *iterators ) {
// Check to see if this iterator points at the entry we're replacing. The
// iterator keeps a copy of the element, so we need to update it too.
if ( **c == table[position] )
(*c)->value = val;
// Check if any of the inserted elements in this iterator point at the entry
// being replaced. Update those too.
auto it = std::find(c->inserted->begin(), c->inserted->end(), table[position]);
if ( it != c->inserted->end() )
it->value = val;
}
}
else {
if ( ! HaveOnlyRobustIterators() ) {
if ( iterators_invalidated )
*iterators_invalidated = true;
else
reporter->InternalWarning("Dictionary::Insert() possibly caused iterator invalidation");
}
// Do this before the actual insertion since creating the DictEntry is going to delete
// the key data. We need a copy of it first.
if ( order )
order->emplace_back(detail::HashKey{key, static_cast<size_t>(key_size), hash});
// Allocate memory for key if necessary. Key is updated to reflect internal key if
// necessary.
detail::DictEntry<T> entry(key, key_size, hash, val, insert_distance, copy_key);
InsertRelocateAndAdjust(entry, insert_position);
num_entries++;
cum_entries++;
if ( max_entries < num_entries )
max_entries = num_entries;
if ( num_entries > ThresholdEntries() )
SizeUp(); // NOLINT(bugprone-branch-clone)
// if space_distance is too great, performance decreases. we need to sizeup for
// performance.
else if ( space_distance_samples > detail::MIN_SPACE_DISTANCE_SAMPLES &&
static_cast<uint64_t>(space_distance_sum) >
static_cast<uint64_t>(space_distance_samples) * detail::SPACE_DISTANCE_THRESHOLD &&
static_cast<int>(num_entries) > detail::MIN_DICT_LOAD_FACTOR_100 * Capacity() / 100 )
SizeUp();
}
// Remap after insert can adjust asap to shorten period of mixed table.
// TODO: however, if remap happens right after size up, then it consumes more cpu for this
// cycle, a possible hiccup point.
if ( Remapping() )
Remap();
ASSERT_VALID(this);
return v;
}
T* Insert(const char* key, T* val, bool* iterators_invalidated = nullptr) {
detail::HashKey h(key);
return Insert(&h, val, iterators_invalidated);
}
// Removes the given element. Returns a pointer to the element in
// case it needs to be deleted. Returns 0 if no such element exists.
// If dontdelete is true, the key's bytes will not be deleted.
// If iterators_invalidated is supplied, its value is set to true
// if the removal may have invalidated any existing iterators.
T* Remove(const detail::HashKey* key, bool* iterators_invalidated = nullptr) {
return Remove(key->Key(), key->Size(), key->Hash(), false, iterators_invalidated);
}
T* Remove(const void* key, int key_size, detail::hash_t hash, bool dont_delete = false,
bool* iterators_invalidated =
nullptr) { // cookie adjustment: maintain inserts here. maintain next in lower level version.
ASSERT_VALID(this);
ASSERT(! dont_delete); // this is a poorly designed flag. if on, the internal has nowhere to
// return and memory is lost.
int position = LookupIndex(key, key_size, hash);
if ( position < 0 )
return nullptr;
if ( ! HaveOnlyRobustIterators() ) {
if ( iterators_invalidated )
*iterators_invalidated = true;
else
reporter->InternalWarning("Dictionary::Remove() possibly caused iterator invalidation");
}
detail::DictEntry<T> entry = RemoveRelocateAndAdjust(position);
num_entries--;
ASSERT(num_entries >= 0);
// e is about to be invalid. remove it from all references.
if ( order ) {
for ( auto it = order->begin(); it != order->end(); ++it ) {
if ( it->Equal(key, key_size, hash) ) {
it = order->erase(it);
break;
}
}
}
T* v = entry.value;
entry.Clear();
ASSERT_VALID(this);
return v;
}
// TODO: these came from PDict. They could probably be deprecated and removed in favor of
// just using Remove().
T* RemoveEntry(const detail::HashKey* key, bool* iterators_invalidated = nullptr) {
return Remove(key->Key(), key->Size(), key->Hash(), false, iterators_invalidated);
}
T* RemoveEntry(const detail::HashKey& key, bool* iterators_invalidated = nullptr) {
return Remove(key.Key(), key.Size(), key.Hash(), false, iterators_invalidated);
}
// Number of entries.
int Length() const { return num_entries; }
// Largest it's ever been.
int MaxLength() const { return max_entries; }
// Total number of entries ever.
uint64_t NumCumulativeInserts() const { return cum_entries; }
// True if the dictionary is ordered, false otherwise.
int IsOrdered() const { return order != nullptr; }
// If the dictionary is ordered then returns the n'th entry's value;
// the second method also returns the key. The first entry inserted
// corresponds to n=0.
//
// Returns nil if the dictionary is not ordered or if "n" is out
// of range.
T* NthEntry(int n) const {
const void* key;
int key_len;
return NthEntry(n, key, key_len);
}
T* NthEntry(int n, const void*& key, int& key_size) const {
if ( ! order || n < 0 || n >= Length() )
return nullptr;
auto& hk = order->at(n);
auto entry = Lookup(&hk);
key = hk.Key();
key_size = hk.Size();
return entry;
}
T* NthEntry(int n, const char*& key) const {
int key_len;
return NthEntry(n, (const void*&)key, key_len);
}
void SetDeleteFunc(dict_delete_func f) { delete_func = f; }
// Remove all entries.
void Clear() {
if ( table ) {
for ( int i = Capacity() - 1; i >= 0; i-- ) {
if ( table[i].Empty() )
continue;
if ( delete_func )
delete_func(table[i].value);
table[i].Clear();
}
free(table);
table = nullptr;
}
if ( order )
order.reset();
if ( iterators ) {
// Complete() erases from this Dictionary's iterators member, use a copy.
auto copied_iterators = *iterators;
for ( auto* i : copied_iterators )
i->Complete();
delete iterators;
iterators = nullptr;
}
log2_buckets = 0;
num_iterators = 0;
remaps = 0;
remap_end = -1;
num_entries = 0;
max_entries = 0;
}
/// The capacity of the table, Buckets + Overflow Size.
int Capacity() const { return table ? bucket_capacity : 0; }
int ExpectedCapacity() const { return bucket_capacity; }
// Debugging
#ifdef ZEEK_DICT_DEBUG
void DumpIfInvalid(bool valid) const {
if ( ! valid ) {
Dump(1);
abort();
}
}
void AssertValid() const {
bool valid = true;
int n = num_entries;
if ( table )
for ( int i = Capacity() - 1; i >= 0; i-- )
if ( ! table[i].Empty() )
n--;
valid = (n == 0);
DumpIfInvalid(valid);
// entries must clustered together
for ( int i = 1; i < Capacity(); i++ ) {
if ( ! table || table[i].Empty() )
continue;
if ( table[i - 1].Empty() ) {
valid = (table[i].distance == 0);
DumpIfInvalid(valid);
}
else {
valid = (table[i].bucket >= table[i - 1].bucket);
DumpIfInvalid(valid);
if ( table[i].bucket == table[i - 1].bucket ) {
valid = (table[i].distance == table[i - 1].distance + 1);
DumpIfInvalid(valid);
}
else {
valid = (table[i].distance <= table[i - 1].distance);
DumpIfInvalid(valid);
}
}
}
}
#endif // ZEEK_DICT_DEBUG
static constexpr size_t DICT_NUM_DISTANCES = 5;
void Dump(int level = 0) const {
int key_size = 0;
for ( int i = 0; i < Capacity(); i++ ) {
if ( table[i].Empty() )
continue;
key_size += zeek::util::pad_size(table[i].key_size);
if ( ! table[i].value )
continue;
}
int distances[DICT_NUM_DISTANCES];
int max_distance = 0;
DistanceStats(max_distance, distances, DICT_NUM_DISTANCES);
printf(
"cap %'7d ent %'7d %'-7d load %.2f max_dist %2d key/ent %3d lg "
"%2d remaps %1d remap_end %4d ",
Capacity(), Length(), MaxLength(), (double)Length() / (table ? Capacity() : 1), max_distance,
key_size / (Length() ? Length() : 1), log2_buckets, remaps, remap_end);
if ( Length() > 0 ) {
for ( size_t i = 0; i < DICT_NUM_DISTANCES - 1; i++ )
printf("[%zu]%2d%% ", i, 100 * distances[i] / Length());
printf("[%zu+]%2d%% ", DICT_NUM_DISTANCES - 1, 100 * distances[DICT_NUM_DISTANCES - 1] / Length());
}
else
printf("\n");
printf("\n");
if ( level >= 1 ) {
printf("%-10s %1s %-10s %-4s %-4s %-10s %-18s %-2s\n", "Index", "*", "Bucket", "Dist", "Off", "Hash",
"FibHash", "KeySize");
for ( int i = 0; i < Capacity(); i++ )
if ( table[i].Empty() )
printf("%'10d \n", i);
else
printf("%'10d %1s %'10d %4d %4d 0x%08x 0x%016" PRIx64 "(%3d) %2d\n", i, (i <= remap_end ? "*" : ""),
BucketByPosition(i), (int)table[i].distance, OffsetInClusterByPosition(i),
uint(table[i].hash), FibHash(table[i].hash), (int)FibHash(table[i].hash) & 0xFF,
(int)table[i].key_size);
}
}
void DistanceStats(int& max_distance, int* distances = nullptr, int num_distances = 0) const {
max_distance = 0;
for ( int i = 0; i < num_distances; i++ )
distances[i] = 0;
for ( int i = 0; i < Capacity(); i++ ) {
if ( table[i].Empty() )
continue;
if ( table[i].distance > max_distance )
max_distance = table[i].distance;
if ( num_distances <= 0 || ! distances )
continue;
if ( table[i].distance >= num_distances - 1 )
distances[num_distances - 1]++;
else
distances[table[i].distance]++;
}
}
void DumpKeys() const {
if ( ! table )
return;
char key_file[100];
// Detect string or binary from first key.
int i = 0;
while ( table[i].Empty() && i < Capacity() )
i++;
bool binary = false;
const char* key = table[i].GetKey();
for ( int j = 0; j < table[i].key_size; j++ )
if ( ! isprint(key[j]) ) {
binary = true;
break;
}
int max_distance = 0;
DistanceStats(max_distance);
if ( binary ) {
char key = char(random() % 26) + 'A';
snprintf(key_file, 100, "%d.%d-%c.key", Length(), max_distance, key);
std::ofstream f(key_file, std::ios::binary | std::ios::out | std::ios::trunc);
for ( int idx = 0; idx < Capacity(); idx++ )
if ( ! table[idx].Empty() ) {
int key_size = table[idx].key_size;
f.write((const char*)&key_size, sizeof(int));
f.write(table[idx].GetKey(), table[idx].key_size);
}
}
else {
char key = char(random() % 26) + 'A';
snprintf(key_file, 100, "%d.%d-%d.ckey", Length(), max_distance, key);
std::ofstream f(key_file, std::ios::out | std::ios::trunc);
for ( int idx = 0; idx < Capacity(); idx++ )
if ( ! table[idx].Empty() ) {
std::string s((char*)table[idx].GetKey(), table[idx].key_size);
f << s << "\n";
}
f << std::flush;
}
}
// Type traits needed for some of the std algorithms to work
using value_type = detail::DictEntry<T>;
using pointer = detail::DictEntry<T>*;
using const_pointer = const detail::DictEntry<T>*;
// Iterator support
using iterator = DictIterator<T>;
using const_iterator = const iterator;
using reverse_iterator = std::reverse_iterator<iterator>;
using const_reverse_iterator = std::reverse_iterator<const_iterator>;
iterator begin() {
if ( IsOrdered() )
return {this, order->begin()};
return {this, table, table + Capacity()};
}
iterator end() {
if ( IsOrdered() )
return {this, order->end()};
return {this, table + Capacity(), table + Capacity()};
}
const_iterator begin() const {
if ( IsOrdered() )
return {this, order->begin()};
return {this, table, table + Capacity()};
}
const_iterator end() const {
if ( IsOrdered() )
return {this, order->end()};
return {this, table + Capacity(), table + Capacity()};
}
const_iterator cbegin() {
if ( IsOrdered() )
return {this, order->begin()};
return {this, table, table + Capacity()};
}
const_iterator cend() {
if ( IsOrdered() )
return {this, order->end()};
return {this, table + Capacity(), table + Capacity()};
}
RobustDictIterator<T> begin_robust() { return MakeRobustIterator(); }
RobustDictIterator<T> end_robust() { return RobustDictIterator<T>(); }
private:
friend zeek::DictIterator<T>;
friend zeek::RobustDictIterator<T>;
void SetLog2Buckets(int value) {
log2_buckets = value;
bucket_count = 1 << log2_buckets;
bucket_capacity = (1 << log2_buckets) + log2_buckets;
}
/// Buckets of the table, not including overflow size.
int Buckets() const { return table ? bucket_count : 0; }
// bucket math
uint32_t ThresholdEntries() const {
// Increase the size of the dictionary when it is 75% full. However, when the dictionary
// is small ( bucket_capacity <= 2^3+3=11 elements ), only resize it when it's 100% full.
// The dictionary will always resize when the current insertion causes it to be full. This
// ensures that the current insertion should always be successful.
int capacity = Capacity();
if ( log2_buckets <= detail::DICT_THRESHOLD_BITS )
return capacity;
return capacity * detail::DICT_LOAD_FACTOR_100 / 100;
}
// Used to improve the distribution of the original hash.
detail::hash_t FibHash(detail::hash_t h) const {
// GoldenRatio phi = (sqrt(5)+1)/2 = 1.6180339887...
// 1/phi = phi - 1
h &= detail::HASH_MASK;
h *= 11400714819323198485llu; // 2^64/phi
return h;
}
// Maps a hash to the appropriate n-bit table bucket.
int BucketByHash(detail::hash_t h, int bit) const {
ASSERT(bit >= 0);
if ( ! bit )
return 0; //<< >> breaks on 64.
#ifdef DICT_NO_FIB_HASH
detail::hash_t hash = h;
#else
detail::hash_t hash = FibHash(h);
#endif
int m = 64 - bit;
hash <<= m;
hash >>= m;
return hash;
}
// Given a position of a non-empty item in the table, find the related bucket.
int BucketByPosition(int position) const {
ASSERT(table && position >= 0 && position < Capacity() && ! table[position].Empty());
return position - table[position].distance;
}
// Given a bucket of a non-empty item in the table, find the end of its cluster.
// The end should be equal to tail+1 if tail exists. Otherwise it's the tail of
// the just-smaller cluster + 1.
int EndOfClusterByBucket(int bucket) const {
ASSERT(bucket >= 0 && bucket < Buckets());
int i = bucket;
int current_cap = Capacity();
while ( i < current_cap && ! table[i].Empty() && BucketByPosition(i) <= bucket )
i++;
return i;
}
// Given a position of a non-empty item in the table, find the head of its cluster.
int HeadOfClusterByPosition(int position) const {
// Finding the first entry in the bucket chain.
ASSERT(0 <= position && position < Capacity() && ! table[position].Empty());
// Look backward for the first item with the same bucket as myself.
int bucket = BucketByPosition(position);
int i = position;
while ( i >= bucket && BucketByPosition(i) == bucket )
i--;
return i == bucket ? i : i + 1;
}
// Given a position of a non-empty item in the table, find the tail of its cluster.
int TailOfClusterByPosition(int position) const {
ASSERT(0 <= position && position < Capacity() && ! table[position].Empty());
int bucket = BucketByPosition(position);
int i = position;
int current_cap = Capacity();
while ( i < current_cap && ! table[i].Empty() && BucketByPosition(i) == bucket )
i++; // stop just over the tail.
return i - 1;
}
// Given a position of a non-empty item in the table, find the end of its cluster.
// The end should be equal to tail+1 if tail exists. Otherwise it's the tail of
// the just-smaller cluster + 1.
int EndOfClusterByPosition(int position) const { return TailOfClusterByPosition(position) + 1; }
// Given a position of a non-empty item in the table, find the offset of it within
// its cluster.
int OffsetInClusterByPosition(int position) const {
ASSERT(0 <= position && position < Capacity() && ! table[position].Empty());
int head = HeadOfClusterByPosition(position);
return position - head;
}
// Next non-empty item position in the table, starting at the specified position.
int Next(int position) const {
ASSERT(table && -1 <= position && position < Capacity());
int current_cap = Capacity();
do {
position++;
} while ( position < current_cap && table[position].Empty() );
return position;
}
void Init() {
ASSERT(! table);
table = (detail::DictEntry<T>*)malloc(sizeof(detail::DictEntry<T>) * ExpectedCapacity());
for ( int i = Capacity() - 1; i >= 0; i-- )
table[i].SetEmpty();
}
// Lookup
int LinearLookupIndex(const void* key, int key_size, detail::hash_t hash) const {
auto current_cap = Capacity();
for ( int i = 0; i < current_cap; i++ )
if ( ! table[i].Empty() && table[i].Equal((const char*)key, key_size, hash) )
return i;
return -1;
}
// Lookup position for all possible table_sizes caused by remapping. Remap it immediately
// if not in the middle of iteration.
int LookupIndex(const void* key, int key_size, detail::hash_t hash, int* insert_position = nullptr,
int* insert_distance = nullptr) {
ASSERT_VALID(this);
if ( ! table )
return -1;
int bucket = BucketByHash(hash, log2_buckets);
#ifdef ZEEK_DICT_DEBUG
int linear_position = LinearLookupIndex(key, key_size, hash);
#endif // ZEEK_DICT_DEBUG
int position = LookupIndex(key, key_size, hash, bucket, Capacity(), insert_position, insert_distance);
if ( position >= 0 ) {
ASSERT_EQUAL(position, linear_position); // same as linearLookup
return position;
}
for ( int i = 1; i <= remaps; i++ ) {
int prev_bucket = BucketByHash(hash, log2_buckets - i);
if ( prev_bucket <= remap_end ) {
// possibly here. insert_position & insert_distance returned on failed lookup is
// not valid in previous table_sizes.
position = LookupIndex(key, key_size, hash, prev_bucket, remap_end + 1);
if ( position >= 0 ) {
ASSERT_EQUAL(position, linear_position); // same as linearLookup
// remap immediately if no iteration is on.
if ( ! num_iterators ) {
Remap(position, &position);
ASSERT_EQUAL(position, LookupIndex(key, key_size, hash));
}
return position;
}
}
}
// not found
#ifdef ZEEK_DICT_DEBUG
if ( linear_position >= 0 ) { // different. stop and try to see whats happening.
ASSERT(false);
// rerun the function in debugger to track down the bug.
LookupIndex(key, key_size, hash);
}
#endif // ZEEK_DICT_DEBUG
return -1;
}
// Returns the position of the item if it exists. Otherwise returns -1, but set the insert
// position/distance if required. The starting point for the search may not be the bucket
// for the current table size since this method is also used to search for an item in the
// previous table size.
int LookupIndex(const void* key, int key_size, detail::hash_t hash, int begin, int end,
int* insert_position = nullptr, int* insert_distance = nullptr) {
ASSERT(begin >= 0 && begin < Buckets());
int i = begin;
for ( ; i < end && ! table[i].Empty() && BucketByPosition(i) <= begin; i++ )
if ( BucketByPosition(i) == begin && table[i].Equal((char*)key, key_size, hash) )
return i;
// no such cluster, or not found in the cluster.
if ( insert_position )
*insert_position = i;
if ( insert_distance ) {
*insert_distance = i - begin;
if ( *insert_distance >= detail::TOO_FAR_TO_REACH )
reporter->FatalErrorWithCore("Dictionary (size %d) insertion distance too far: %d", Length(),
*insert_distance);
}
return -1;
}
/// Insert entry, Adjust iterators when necessary.
void InsertRelocateAndAdjust(detail::DictEntry<T>& entry, int insert_position) {
/// e.distance is adjusted to be the one at insert_position.
#ifdef DEBUG
entry.bucket = BucketByHash(entry.hash, log2_buckets);
#endif // DEBUG
int last_affected_position = insert_position;
InsertAndRelocate(entry, insert_position, &last_affected_position);
space_distance_sum += last_affected_position - insert_position;
space_distance_samples++;
// If remapping in progress, adjust the remap_end to step back a little to cover the new
// range if the changed range straddles over remap_end.
if ( Remapping() && insert_position <= remap_end &&
remap_end < last_affected_position ) { //[i,j] range changed. if map_end in between. then possibly old
// entry pushed down
// across
// map_end.
remap_end = last_affected_position; // adjust to j on the conservative side.
}
if ( iterators && ! iterators->empty() )
for ( auto c : *iterators )
AdjustOnInsert(c, entry, insert_position, last_affected_position);
}
/// insert entry into position, relocate other entries when necessary.
void InsertAndRelocate(
detail::DictEntry<T>& entry, int insert_position,
int* last_affected_position = nullptr) { /// take out the head of cluster and append to the end of the cluster.
while ( true ) {
if ( insert_position >= Capacity() ) {
ASSERT(insert_position == Capacity());
SizeUp(); // copied all the items to new table. as it's just copying without
// remapping, insert_position is now empty.
table[insert_position] = entry;
if ( last_affected_position )
*last_affected_position = insert_position;
return;
}
if ( table[insert_position].Empty() ) { // the condition to end the loop.
table[insert_position] = entry;
if ( last_affected_position )
*last_affected_position = insert_position;
return;
}
// the to-be-swapped-out item appends to the end of its original cluster.
auto t = table[insert_position];
int next = EndOfClusterByPosition(insert_position);
t.distance += next - insert_position;
// swap
table[insert_position] = entry;
entry = t;
insert_position = next; // append to the end of the current cluster.
}
}
/// Adjust Iterators on Insert.
void AdjustOnInsert(RobustDictIterator<T>* c, const detail::DictEntry<T>& entry, int insert_position,
int last_affected_position) {
// See note in Dictionary::AdjustOnInsert() above.
c->inserted->erase(std::remove(c->inserted->begin(), c->inserted->end(), entry), c->inserted->end());
c->visited->erase(std::remove(c->visited->begin(), c->visited->end(), entry), c->visited->end());
if ( insert_position < c->next )
c->inserted->push_back(entry);
if ( insert_position < c->next && c->next <= last_affected_position ) {
int k = TailOfClusterByPosition(c->next);
ASSERT(k >= 0 && k < Capacity());
c->visited->push_back(table[k]);
}
}
/// Remove, Relocate & Adjust iterators.
detail::DictEntry<T> RemoveRelocateAndAdjust(int position) {
int last_affected_position = position;
detail::DictEntry<T> entry = RemoveAndRelocate(position, &last_affected_position);
#ifdef ZEEK_DICT_DEBUG
// validation: index to i-1 should be continuous without empty spaces.
for ( int k = position; k < last_affected_position; k++ )
ASSERT(! table[k].Empty());
#endif // ZEEK_DICT_DEBUG
if ( iterators && ! iterators->empty() )
for ( auto c : *iterators )
AdjustOnRemove(c, entry, position, last_affected_position);
return entry;
}
/// Remove & Relocate
detail::DictEntry<T> RemoveAndRelocate(int position, int* last_affected_position = nullptr) {
// fill the empty position with the tail of the cluster of position+1.
ASSERT(position >= 0 && position < Capacity() && ! table[position].Empty());
detail::DictEntry<T> entry = table[position];
while ( true ) {
if ( position == Capacity() - 1 || table[position + 1].Empty() || table[position + 1].distance == 0 ) {
// no next cluster to fill, or next position is empty or next position is already in
// perfect bucket.
table[position].SetEmpty();
if ( last_affected_position )
*last_affected_position = position;
return entry;
}
int next = TailOfClusterByPosition(position + 1);
table[position] = table[next];
table[position].distance -= next - position; // distance improved for the item.
position = next;
}
return entry;
}
/// Adjust safe iterators after Removal of entry at position.
void AdjustOnRemove(RobustDictIterator<T>* c, const detail::DictEntry<T>& entry, int position,
int last_affected_position) {
// See note in Dictionary::AdjustOnInsert() above.
c->inserted->erase(std::remove(c->inserted->begin(), c->inserted->end(), entry), c->inserted->end());
c->visited->erase(std::remove(c->visited->begin(), c->visited->end(), entry), c->visited->end());
if ( position < c->next && c->next <= last_affected_position ) {
int moved = HeadOfClusterByPosition(c->next - 1);
if ( moved < position )
moved = position;
c->inserted->push_back(table[moved]);
}
// if not already the end of the dictionary, adjust next to a valid one.
if ( c->next < Capacity() && table[c->next].Empty() )
c->next = Next(c->next);
if ( c->curr == entry ) {
if ( c->next >= 0 && c->next < Capacity() && ! table[c->next].Empty() )
c->curr = table[c->next];
else
c->curr = detail::DictEntry<T>(nullptr); // -> c == end_robust()
}
}
bool Remapping() const { return remap_end >= 0; } // remap in reverse order.
/// One round of remap.
void Remap() {
/// since remap should be very fast. take more at a time.
/// delay Remap when cookie is there. hard to handle cookie iteration while size changes.
/// remap from bottom up.
/// remap creates two parts of the dict: [0,remap_end] (remap_end, ...]. the former is mixed
/// with old/new entries; the latter contains all new entries.
///
if ( num_iterators > 0 )
return;
int left = detail::DICT_REMAP_ENTRIES;
while ( remap_end >= 0 && left > 0 ) {
if ( ! table[remap_end].Empty() && Remap(remap_end) )
left--;
else //< successful Remap may increase remap_end in the case of SizeUp due to insert. if
// so,
// remap_end need to be worked on again.
remap_end--;
}
if ( remap_end < 0 )
remaps = 0; // done remapping.
}
// Remap an item in position to a new position. Returns true if the relocation was
// successful, false otherwise. new_position will be set to the new position if a
// pointer is provided to store the new value.
bool Remap(int position, int* new_position = nullptr) {
ASSERT_VALID(this);
/// Remap changes item positions by remove() and insert(). to avoid excessive operation.
/// avoid it when safe iteration is in progress.
ASSERT(! iterators || iterators->empty());
int current = BucketByPosition(position); // current bucket
int expected = BucketByHash(table[position].hash, log2_buckets); // expected bucket in new
// table.
// equal because 1: it's a new item, 2: it's an old item, but new bucket is the same as old.
// 50% of old items act this way due to fibhash.
if ( current == expected )
return false;
detail::DictEntry<T> entry =
RemoveAndRelocate(position); // no iteration cookies to adjust, no need for last_affected_position.
#ifdef DEBUG
entry.bucket = expected;
#endif // DEBUG
// find insert position.
int insert_position = EndOfClusterByBucket(expected);
if ( new_position )
*new_position = insert_position;
entry.distance = insert_position - expected;
InsertAndRelocate(entry,
insert_position); // no iteration cookies to adjust, no need for last_affected_position.
ASSERT_VALID(this);
return true;
}
void SizeUp() {
int prev_capacity = Capacity();
SetLog2Buckets(log2_buckets + 1);
int capacity = Capacity();
table = (detail::DictEntry<T>*)realloc(table, capacity * sizeof(detail::DictEntry<T>));
for ( int i = prev_capacity; i < capacity; i++ )
table[i].SetEmpty();
// REmap from last to first in reverse order. SizeUp can be triggered by 2 conditions, one
// of which is that the last space in the table is occupied and there's nowhere to put new
// items. In this case, the table doubles in capacity and the item is put at the
// prev_capacity position with the old hash. We need to cover this item (?).
remap_end = prev_capacity; // prev_capacity instead of prev_capacity-1.
// another remap starts.
remaps++; // used in Lookup() to cover SizeUp with incomplete remaps.
ASSERT(remaps <= log2_buckets); // because we only sizeUp, one direction. we know the
// previous log2_buckets.
// reset performance metrics.
space_distance_sum = 0;
space_distance_samples = 0;
}
/**
* Retrieves a pointer to a full DictEntry in the table based on a hash key.
*
* @param key the key to lookup.
* @return A pointer to the entry or a nullptr if no entry has a matching key.
*/
detail::DictEntry<T>* LookupEntry(const detail::HashKey& key) {
return LookupEntry(key.Key(), key.Size(), key.Hash());
}
/**
* Retrieves a pointer to a full DictEntry in the table based on key data.
*
* @param key the key to lookup
* @param key_size the size of the key data
* @param h a hash of the key data.
* @return A pointer to the entry or a nullptr if no entry has a matching key.
*/
detail::DictEntry<T>* LookupEntry(const void* key, int key_size, detail::hash_t h) const {
// Look up possibly modifies the entry. Why? if the entry is found but not positioned
// according to the current dict (so it's before SizeUp), it will be moved to the right
// position so next lookup is fast.
Dictionary* d = const_cast<Dictionary*>(this);
int position = d->LookupIndex(key, key_size, h);
return position >= 0 ? &(table[position]) : nullptr;
}
bool HaveOnlyRobustIterators() const {
return (num_iterators == 0) || ((iterators ? iterators->size() : 0) == num_iterators);
}
RobustDictIterator<T> MakeRobustIterator() {
if ( IsOrdered() )
reporter->InternalError("RobustIterators are not currently supported for ordered dictionaries");
if ( ! iterators )
iterators = new std::vector<RobustDictIterator<T>*>;
return {this};
}
detail::DictEntry<T> GetNextRobustIteration(RobustDictIterator<T>* iter) {
// If there's no table in the dictionary, then the iterator needs to be
// cleaned up because it's not pointing at anything.
if ( ! table ) {
iter->Complete();
return detail::DictEntry<T>(nullptr); // end of iteration
}
// If there are any inserted entries, return them first.
// That keeps the list small and helps avoiding searching
// a large list when deleting an entry.
if ( iter->inserted && ! iter->inserted->empty() ) {
// Return the last one. Order doesn't matter,
// and removing from the tail is cheaper.
detail::DictEntry<T> e = iter->inserted->back();
iter->inserted->pop_back();
return e;
}
// First iteration.
if ( iter->next < 0 )
iter->next = Next(-1);
if ( iter->next < Capacity() && table[iter->next].Empty() ) {
// [Robin] I believe this means that the table has resized in a way
// that we're now inside the overflow area where elements are empty,
// because elsewhere empty slots aren't allowed. Assuming that's right,
// then it means we'll always be at the end of the table now and could
// also just set `next` to capacity. However, just to be sure, we
// instead reuse logic from below to move forward "to a valid position"
// and then double check, through an assertion in debug mode, that it's
// actually the end. If this ever triggered, the above assumption would
// be wrong (but the Next() call would probably still be right).
iter->next = Next(iter->next);
ASSERT(iter->next == Capacity());
}
// Filter out visited keys.
int capacity = Capacity();
if ( iter->visited && ! iter->visited->empty() )
// Filter out visited entries.
while ( iter->next < capacity ) {
ASSERT(! table[iter->next].Empty());
auto it = std::find(iter->visited->begin(), iter->visited->end(), table[iter->next]);
if ( it == iter->visited->end() )
break;
iter->visited->erase(it);
iter->next = Next(iter->next);
}
if ( iter->next >= capacity ) {
iter->Complete();
return detail::DictEntry<T>(nullptr); // end of iteration
}
ASSERT(! table[iter->next].Empty());
detail::DictEntry<T> e = table[iter->next];
// prepare for next time.
iter->next = Next(iter->next);
return e;
}
void IncrIters() { ++num_iterators; }
void DecrIters() { --num_iterators; }
// aligned on 8-bytes with 4-leading bytes. 7*8=56 bytes a dictionary.
// when sizeup but the current mapping is in progress. the current mapping will be ignored
// as it will be remapped to new dict size anyway. however, the missed count is recorded
// for lookup. if position not found for a key in the position of dict of current size, it
// still could be in the position of dict of previous N sizes.
uint16_t remaps = 0;
uint16_t log2_buckets = 0;
uint32_t bucket_capacity = 1;
uint32_t bucket_count = 1;
// Pending number of iterators on the Dict, including both robust and non-robust.
// This is used to avoid remapping if there are any active iterators.
uint16_t num_iterators = 0;
// The last index to be remapped.
int32_t remap_end = -1;
uint32_t num_entries = 0;
uint32_t max_entries = 0;
uint64_t cum_entries = 0;
uint32_t space_distance_samples = 0;
// how far the space is
int64_t space_distance_sum = 0;
dict_delete_func delete_func = nullptr;
detail::DictEntry<T>* table = nullptr;
std::vector<RobustDictIterator<T>*>* iterators = nullptr;
// Ordered dictionaries keep the order based on some criteria, by default the order of
// insertion. We only store a copy of the keys here for memory savings and for safety
// around reallocs and such.
std::unique_ptr<detail::DictEntryVec> order;
};
template<typename T>
using PDict = Dictionary<T>;
} // namespace zeek