Overwrite old Dict.h/cc with new ones

This commit is contained in:
Tim Wojtulewicz 2020-05-08 16:06:11 -07:00
parent b8287a3375
commit a243c0e4a6
4 changed files with 1201 additions and 2217 deletions

File diff suppressed because it is too large Load diff

View file

@ -1,85 +1,206 @@
// See the file "COPYING" in the main distribution directory for copyright.
#ifdef USE_OPEN_DICT
#include "OpenDict.h"
#else//USE_OPEN_DICT
#pragma once
#include "zeek-config.h"
#include <cstdint>
#include <vector>
#include "List.h"
#include "Hash.h"
ZEEK_FORWARD_DECLARE_NAMESPACED(DictEntry, zeek::detail);
ZEEK_FORWARD_DECLARE_NAMESPACED(IterCookie, zeek);
ZEEK_FORWARD_DECLARE_NAMESPACED(DictEntry, zeek::detail);
// Type for function to be called when deleting elements.
typedef void (*dict_delete_func)(void*);
namespace zeek {
// Type indicating whether the dictionary should keep track of the order
// of insertions.
// Default number of hash buckets in dictionary. The dictionary will increase the size
// of the hash table as needed.
constexpr uint32_t HASH_MASK = 0xFFFFFFFF; //only lower 32 bits.
// These four variables can be used to build different targets with -Dxxx for performance
// or for debugging purposes.
// When incrementally resizing and remapping, it remaps DICT_REMAP_ENTRIES each step. Use
// 2 for debug. 16 is best for a release build.
#ifndef DICT_REMAP_ENTRIES
constexpr uint8_t DICT_REMAP_ENTRIES = 16;
#endif
// Load factor = 1 - 0.5 ^ LOAD_FACTOR_BITS. 0.75 is the optimal value for release builds.
#ifndef DICT_LOAD_FACTOR_BITS
constexpr uint8_t DICT_LOAD_FACTOR_BITS = 2;
#endif
// Default number of hash buckets in dictionary. The dictionary will
// increase the size of the hash table as needed.
#ifndef DEFAULT_DICT_SIZE
constexpr uint8_t DEFAULT_DICT_SIZE = 0;
#endif
// When log2_buckets > DICT_THRESHOLD_BITS, DICT_LOAD_FACTOR_BITS becomes effective.
// Basically if dict size < 2^DICT_THRESHOLD_BITS + n, we size up only if necessary.
#ifndef DICT_THRESHOLD_BITS
constexpr uint8_t DICT_THRESHOLD_BITS = 3;
#endif
// The value of an iteration cookie is the bucket and offset within the
// bucket at which to start looking for the next value to return.
constexpr uint16_t TOO_FAR_TO_REACH = 0xFFFF;
enum DictOrder { ORDERED, UNORDERED };
// A dict_delete_func that just calls delete.
extern void generic_delete_func(void*);
namespace detail {
/**
* An entry stored in the dictionary.
*/
class DictEntry{
public:
#ifdef DEBUG
int bucket = 0;
#endif
// Distance from the expected position in the table. 0xFFFF means that the entry is empty.
uint16_t distance = TOO_FAR_TO_REACH;
// The size of the key. Less than 8 bytes we'll store directly in the entry, otherwise we'll
// store it as a pointer. This avoids extra allocations if we can help it.
uint16_t key_size = 0;
// Lower 4 bytes of the 8-byte hash, which is used to calculate the position in the table.
uint32_t hash = 0;
void* value = nullptr;
union{
char key_here[8]; //hold key len<=8. when over 8, it's a pointer to real keys.
char* key;
};
DictEntry(void* arg_key, int key_size = 0, zeek::detail::hash_t hash = 0, void* value = nullptr,
int16_t d = TOO_FAR_TO_REACH, bool copy_key = false)
: distance(d), key_size(key_size), hash((uint32_t)hash), value(value)
{
if ( key_size <= 8 )
{
memcpy(key_here, arg_key, key_size);
if ( ! copy_key )
delete (char*)arg_key; //own the arg_key, now don't need it.
}
else
{
if ( copy_key )
{
key = new char[key_size];
memcpy(key, arg_key, key_size);
}
else
{
key = (char*)arg_key;
}
}
}
bool Empty() const { return distance == TOO_FAR_TO_REACH; }
void SetEmpty()
{
distance = TOO_FAR_TO_REACH;
#ifdef DEBUG
hash = 0;
key = nullptr;
value = nullptr;
key_size = 0;
bucket = 0;
#endif//DEBUG
}
void Clear()
{
if( key_size > 8 )
delete key;
SetEmpty();
}
const char* GetKey() const { return key_size <= 8? key_here : key; }
bool Equal(const char* arg_key, int arg_key_size, zeek::detail::hash_t arg_hash) const
{//only 40-bit hash comparison.
return ( 0 == ((hash ^ arg_hash) & HASH_MASK) )
&& key_size == arg_key_size && 0 == memcmp(GetKey(), arg_key, key_size);
}
bool operator==(const DictEntry& r) const
{
return Equal(r.GetKey(), r.key_size, r.hash);
}
bool operator!=(const DictEntry& r) const
{
return ! Equal(r.GetKey(), r.key_size, r.hash);
}
};
} // namespace detail
/**
* A dictionary type that uses clustered hashing, a variation of Robinhood/Open Addressing
* hashing. The following posts help to understand the implementation:
* - https://jasonlue.github.io/algo/2019/08/20/clustered-hashing.html
* - https://jasonlue.github.io/algo/2019/08/27/clustered-hashing-basic-operations.html
* - https://jasonlue.github.io/algo/2019/09/03/clustered-hashing-incremental-resize.html
* - https://jasonlue.github.io/algo/2019/09/10/clustered-hashing-modify-on-iteration.html
*
* The dictionary is effectively a hashmap from hashed keys to values. The dictionary owns
* the keys but not the values. The dictionary size will be bounded at around 100K. 1M
* entries is the absolute limit. Only Connections use that many entries, and that is rare.
*/
class Dictionary{
public:
explicit Dictionary(DictOrder ordering = UNORDERED,
int initial_size = 0);
explicit Dictionary(DictOrder ordering = UNORDERED, int initial_size = DEFAULT_DICT_SIZE);
~Dictionary();
// Member functions for looking up a key, inserting/changing its
// contents, and deleting it. These come in two flavors: one
// which takes a HashKey, and the other which takes a raw key,
// which takes a zeek::detail::HashKey, and the other which takes a raw key,
// its size, and its (unmodulated) hash.
void* Lookup(const zeek::detail::HashKey* key) const
{ return Lookup(key->Key(), key->Size(), key->Hash()); }
void* Lookup(const void* key, int key_size, zeek::detail::hash_t hash) const;
//lookup may move the key to right place if in the old zone to speed up the next lookup.
void* Lookup(const zeek::detail::HashKey* key) const;
void* Lookup(const void* key, int key_size, zeek::detail::hash_t h) const;
// Returns previous value, or 0 if none.
void* Insert(zeek::detail::HashKey* key, void* val)
{
return Insert(key->TakeKey(), key->Size(), key->Hash(), val, 0);
}
{ return Insert(key->TakeKey(), key->Size(), key->Hash(), val, false); }
// If copy_key is true, then the key is copied, otherwise it's assumed
// that it's a heap pointer that now belongs to the Dictionary to
// manage as needed.
void* Insert(void* key, int key_size, zeek::detail::hash_t hash, void* val,
bool copy_key);
void* Insert(void* key, int key_size, zeek::detail::hash_t hash, void* val, bool copy_key);
// Removes the given element. Returns a pointer to the element in
// case it needs to be deleted. Returns 0 if no such element exists.
// If dontdelete is true, the key's bytes will not be deleted.
void* Remove(const zeek::detail::HashKey* key)
{ return Remove(key->Key(), key->Size(), key->Hash()); }
void* Remove(const void* key, int key_size, zeek::detail::hash_t hash,
bool dont_delete = false);
void* Remove(const void* key, int key_size, zeek::detail::hash_t hash, bool dont_delete = false);
// Number of entries.
int Length() const
{ return tbl2 ? num_entries + num_entries2 : num_entries; }
{ return num_entries; }
// Largest it's ever been.
int MaxLength() const
{
return tbl2 ?
max_num_entries + max_num_entries2 : max_num_entries;
}
{ return max_entries; }
// Total number of entries ever.
uint64_t NumCumulativeInserts() const
{
return cumulative_entries;
}
{ return cum_entries; }
// True if the dictionary is ordered, false otherwise.
bool IsOrdered() const { return order != nullptr; }
int IsOrdered() const { return order != nullptr; }
// If the dictionary is ordered then returns the n'th entry's value;
// the second method also returns the key. The first entry inserted
@ -109,7 +230,7 @@ public:
// If return_hash is true, a HashKey for the entry is returned in h,
// which should be delete'd when no longer needed.
IterCookie* InitForIteration() const;
void* NextEntry(zeek::detail::HashKey*& h, IterCookie*& cookie, int return_hash) const;
void* NextEntry(zeek::detail::HashKey*& h, IterCookie*& cookie, bool return_hash) const;
void StopIteration(IterCookie* cookie) const;
void SetDeleteFunc(dict_delete_func f) { delete_func = f; }
@ -120,78 +241,141 @@ public:
// and (ii) we won't visit any still-unseen entries which are getting
// removed. (We don't get this for free, so only use it if
// necessary.)
void MakeRobustCookie(IterCookie* cookie)
{ cookies.push_back(cookie); }
void MakeRobustCookie(IterCookie* cookie);
// Remove all entries.
void Clear();
unsigned int MemoryAllocation() const;
size_t MemoryAllocation() const;
/// The capacity of the table, Buckets + Overflow Size.
int Capacity(bool expected=false) const;
//Debugging
#ifdef DEBUG
void AssertValid() const;
#endif//DEBUG
void Dump(int level=0) const;
void DistanceStats(int& max_distance, int* distances=0, int num_distances=0) const;
void DumpKeys() const;
private:
void Init(int size);
void Init2(int size); // initialize second table for resizing
void DeInit();
friend zeek::IterCookie;
// Internal version of Insert().
void* Insert(zeek::detail::DictEntry* entry, bool copy_key);
/// Buckets of the table, not including overflow size.
int Buckets(bool expected=false) const;
void* DoRemove(zeek::detail::DictEntry* entry, zeek::detail::hash_t h,
zeek::PList<zeek::detail::DictEntry>* chain, int chain_offset);
//bucket math
int Log2(int num) const;
int ThresholdEntries() const;
int NextPrime(int n) const;
bool IsPrime(int n) const;
void StartChangeSize(int new_size);
void FinishChangeSize();
void MoveChains();
// Used to improve the distribution of the original hash.
zeek::detail::hash_t FibHash(zeek::detail::hash_t h) const;
// The following get and set the "density" threshold - if the
// average hash chain length exceeds this threshold, the
// table will be resized. The default value is 3.0.
double DensityThresh() const { return den_thresh; }
// Maps a hash to the appropriate n-bit table bucket.
int BucketByHash(zeek::detail::hash_t h, int bit) const;
void SetDensityThresh(double thresh)
{
den_thresh = thresh;
thresh_entries = int(thresh * double(num_buckets));
}
// Given a position of a non-empty item in the table, find the related bucket.
int BucketByPosition(int position) const;
// Same for the second table, when resizing.
void SetDensityThresh2(double thresh)
{
den_thresh2 = thresh;
thresh_entries2 = int(thresh * double(num_buckets2));
}
// Given a bucket of a non-empty item in the table, find the end of its cluster.
// The end should be equal to tail+1 if tail exists. Otherwise it's the tail of
// the just-smaller cluster + 1.
int EndOfClusterByBucket(int bucket) const;
// Given a position of a non-empty item in the table, find the head of its cluster.
int HeadOfClusterByPosition(int position) const;
// Given a position of a non-empty item in the table, find the tail of its cluster.
int TailOfClusterByPosition(int position) const;
// Given a position of a non-empty item in the table, find the end of its cluster.
// The end should be equal to tail+1 if tail exists. Otherwise it's the tail of
// the just-smaller cluster + 1.
int EndOfClusterByPosition(int position) const;
// Given a position of a non-empty item in the table, find the offset of it within
// its cluster.
int OffsetInClusterByPosition(int position) const;
// Next non-empty item position in the table.
int Next(int i) const;
void Init();
//Iteration
IterCookie* InitForIterationNonConst();
void* NextEntryNonConst(zeek::detail::HashKey*& h, IterCookie*& cookie, bool return_hash);
void StopIterationNonConst(IterCookie* cookie);
//Lookup
int LinearLookupIndex(const void* key, int key_size, zeek::detail::hash_t hash) const;
int LookupIndex(const void* key, int key_size, zeek::detail::hash_t hash, int* insert_position = nullptr,
int* insert_distance = nullptr);
int LookupIndex(const void* key, int key_size, zeek::detail::hash_t hash, int begin, int end,
int* insert_position = nullptr, int* insert_distance = nullptr);
/// Insert entry, Adjust cookies when necessary.
void InsertRelocateAndAdjust(detail::DictEntry& entry, int insert_position);
/// insert entry into position, relocate other entries when necessary.
void InsertAndRelocate(detail::DictEntry& entry, int insert_position, int* last_affected_position = nullptr);
/// Adjust Cookies on Insert.
void AdjustOnInsert(IterCookie* c, const detail::DictEntry& entry, int insert_position, int last_affected_position);
///Remove, Relocate & Adjust cookies.
detail::DictEntry RemoveRelocateAndAdjust(int position);
///Remove & Relocate
detail::DictEntry RemoveAndRelocate(int position, int* last_affected_position = nullptr);
///Adjust safe cookies after Removal of entry at position.
void AdjustOnRemove(IterCookie* c, const detail::DictEntry& entry, int position, int last_affected_position);
bool Remapping() const { return remap_end >= 0;} //remap in reverse order.
///One round of remap.
void Remap();
// Remap an item in position to a new position. Returns true if the relocation was
// successful, false otherwise. new_position will be set to the new position if a
// pointer is provided to store the new value.
bool Remap(int position, int* new_position = nullptr);
void SizeUp();
//alligned on 8-bytes with 4-leading bytes. 7*8=56 bytes a dictionary.
// when sizeup but the current mapping is in progress. the current mapping will be ignored
// as it will be remapped to new dict size anyway. however, the missed count is recorded
// for lookup. if position not found for a key in the position of dict of current size, it
// still could be in the position of dict of previous N sizes.
unsigned char remaps = 0;
unsigned char log2_buckets = 0;
// Pending number of iterators on the Dict, including both robust and non-robust.
// This is used to avoid remapping if there are any active iterators.
unsigned short num_iterators = 0;
// The last index to be remapped.
int remap_end = -1;
// Normally we only have tbl.
// When we're resizing, we'll have tbl (old) and tbl2 (new)
// tbl_next_ind keeps track of how much we've moved to tbl2
// (it's the next index we're going to move).
zeek::PList<zeek::detail::DictEntry>** tbl = nullptr;
int num_buckets = 0;
int num_entries = 0;
int max_num_entries = 0;
int thresh_entries = 0;
uint64_t cumulative_entries = 0;
double den_thresh = 0.0;
int max_entries = 0;
// Resizing table (replicates tbl above).
zeek::PList<zeek::detail::DictEntry>** tbl2 = nullptr;
int num_buckets2 = 0;
int num_entries2 = 0;
int max_num_entries2 = 0;
int thresh_entries2 = 0;
double den_thresh2 = 0;
zeek::detail::hash_t tbl_next_ind = 0;
zeek::PList<zeek::detail::DictEntry>* order = nullptr;
uint64_t cum_entries = 0;
dict_delete_func delete_func = nullptr;
detail::DictEntry* table = nullptr;
std::vector<IterCookie*>* cookies = nullptr;
zeek::PList<IterCookie> cookies;
// Order means the order of insertion. means no deletion until exit. will be inefficient.
std::vector<detail::DictEntry>* order = nullptr;
};
/*
* Template specialization of Dictionary that stores pointers for values.
*/
template<typename T>
class PDict : public Dictionary {
public:
@ -221,10 +405,10 @@ public:
T* NextEntry(IterCookie*& cookie) const
{
zeek::detail::HashKey* h;
return (T*) Dictionary::NextEntry(h, cookie, 0);
return (T*) Dictionary::NextEntry(h, cookie, false);
}
T* NextEntry(zeek::detail::HashKey*& h, IterCookie*& cookie) const
{ return (T*) Dictionary::NextEntry(h, cookie, 1); }
{ return (T*) Dictionary::NextEntry(h, cookie, true); }
T* RemoveEntry(const zeek::detail::HashKey* key)
{ return (T*) Remove(key->Key(), key->Size(), key->Hash()); }
T* RemoveEntry(const zeek::detail::HashKey& key)
@ -235,5 +419,3 @@ public:
using Dictionary [[deprecated("Remove in v4.1. Use zeek::Dictionary instead.")]] = zeek::Dictionary;
template<typename T> using PDict [[deprecated("Remove in v4.1. Use zeek::PDict instead.")]] = zeek::PDict<T>;
#endif//USE_OPEN_DICT

File diff suppressed because it is too large Load diff

View file

@ -1,421 +0,0 @@
// See the file "COPYING" in the main distribution directory for copyright.
#pragma once
#include <cstdint>
#include <vector>
#include "Hash.h"
ZEEK_FORWARD_DECLARE_NAMESPACED(IterCookie, zeek);
ZEEK_FORWARD_DECLARE_NAMESPACED(DictEntry, zeek::detail);
// Type for function to be called when deleting elements.
typedef void (*dict_delete_func)(void*);
namespace zeek {
// Default number of hash buckets in dictionary. The dictionary will increase the size
// of the hash table as needed.
constexpr uint32_t HASH_MASK = 0xFFFFFFFF; //only lower 32 bits.
// These four variables can be used to build different targets with -Dxxx for performance
// or for debugging purposes.
// When incrementally resizing and remapping, it remaps DICT_REMAP_ENTRIES each step. Use
// 2 for debug. 16 is best for a release build.
#ifndef DICT_REMAP_ENTRIES
constexpr uint8_t DICT_REMAP_ENTRIES = 16;
#endif
// Load factor = 1 - 0.5 ^ LOAD_FACTOR_BITS. 0.75 is the optimal value for release builds.
#ifndef DICT_LOAD_FACTOR_BITS
constexpr uint8_t DICT_LOAD_FACTOR_BITS = 2;
#endif
// Default number of hash buckets in dictionary. The dictionary will
// increase the size of the hash table as needed.
#ifndef DEFAULT_DICT_SIZE
constexpr uint8_t DEFAULT_DICT_SIZE = 0;
#endif
// When log2_buckets > DICT_THRESHOLD_BITS, DICT_LOAD_FACTOR_BITS becomes effective.
// Basically if dict size < 2^DICT_THRESHOLD_BITS + n, we size up only if necessary.
#ifndef DICT_THRESHOLD_BITS
constexpr uint8_t DICT_THRESHOLD_BITS = 3;
#endif
// The value of an iteration cookie is the bucket and offset within the
// bucket at which to start looking for the next value to return.
constexpr uint16_t TOO_FAR_TO_REACH = 0xFFFF;
enum DictOrder { ORDERED, UNORDERED };
// A dict_delete_func that just calls delete.
extern void generic_delete_func(void*);
namespace detail {
/**
* An entry stored in the dictionary.
*/
class DictEntry{
public:
#ifdef DEBUG
int bucket = 0;
#endif
// Distance from the expected position in the table. 0xFFFF means that the entry is empty.
uint16_t distance = TOO_FAR_TO_REACH;
// The size of the key. Less than 8 bytes we'll store directly in the entry, otherwise we'll
// store it as a pointer. This avoids extra allocations if we can help it.
uint16_t key_size = 0;
// Lower 4 bytes of the 8-byte hash, which is used to calculate the position in the table.
uint32_t hash = 0;
void* value = nullptr;
union{
char key_here[8]; //hold key len<=8. when over 8, it's a pointer to real keys.
char* key;
};
DictEntry(void* arg_key, int key_size = 0, hash_t hash = 0, void* value = nullptr,
int16_t d = TOO_FAR_TO_REACH, bool copy_key = false)
: distance(d), key_size(key_size), hash((uint32_t)hash), value(value)
{
if ( key_size <= 8 )
{
memcpy(key_here, arg_key, key_size);
if ( ! copy_key )
delete (char*)arg_key; //own the arg_key, now don't need it.
}
else
{
if ( copy_key )
{
key = new char[key_size];
memcpy(key, arg_key, key_size);
}
else
{
key = (char*)arg_key;
}
}
}
bool Empty() const { return distance == TOO_FAR_TO_REACH; }
void SetEmpty()
{
distance = TOO_FAR_TO_REACH;
#ifdef DEBUG
hash = 0;
key = nullptr;
value = nullptr;
key_size = 0;
bucket = 0;
#endif//DEBUG
}
void Clear()
{
if( key_size > 8 )
delete key;
SetEmpty();
}
const char* GetKey() const { return key_size <= 8? key_here : key; }
bool Equal(const char* arg_key, int arg_key_size, hash_t arg_hash) const
{//only 40-bit hash comparison.
return ( 0 == ((hash ^ arg_hash) & HASH_MASK) )
&& key_size == arg_key_size && 0 == memcmp(GetKey(), arg_key, key_size);
}
bool operator==(const DictEntry& r) const
{
return Equal(r.GetKey(), r.key_size, r.hash);
}
bool operator!=(const DictEntry& r) const
{
return ! Equal(r.GetKey(), r.key_size, r.hash);
}
};
} // namespace detail
/**
* A dictionary type that uses clustered hashing, a variation of Robinhood/Open Addressing
* hashing. The following posts help to understand the implementation:
* - https://jasonlue.github.io/algo/2019/08/20/clustered-hashing.html
* - https://jasonlue.github.io/algo/2019/08/27/clustered-hashing-basic-operations.html
* - https://jasonlue.github.io/algo/2019/09/03/clustered-hashing-incremental-resize.html
* - https://jasonlue.github.io/algo/2019/09/10/clustered-hashing-modify-on-iteration.html
*
* The dictionary is effectively a hashmap from hashed keys to values. The dictionary owns
* the keys but not the values. The dictionary size will be bounded at around 100K. 1M
* entries is the absolute limit. Only Connections use that many entries, and that is rare.
*/
class Dictionary{
public:
explicit Dictionary(DictOrder ordering = UNORDERED, int initial_size = DEFAULT_DICT_SIZE);
~Dictionary();
// Member functions for looking up a key, inserting/changing its
// contents, and deleting it. These come in two flavors: one
// which takes a HashKey, and the other which takes a raw key,
// its size, and its (unmodulated) hash.
//lookup may move the key to right place if in the old zone to speed up the next lookup.
void* Lookup(const HashKey* key) const;
void* Lookup(const void* key, int key_size, hash_t h) const;
// Returns previous value, or 0 if none.
void* Insert(HashKey* key, void* val)
{ return Insert(key->TakeKey(), key->Size(), key->Hash(), val, false); }
// If copy_key is true, then the key is copied, otherwise it's assumed
// that it's a heap pointer that now belongs to the Dictionary to
// manage as needed.
void* Insert(void* key, int key_size, hash_t hash, void* val, bool copy_key);
// Removes the given element. Returns a pointer to the element in
// case it needs to be deleted. Returns 0 if no such element exists.
// If dontdelete is true, the key's bytes will not be deleted.
void* Remove(const HashKey* key)
{ return Remove(key->Key(), key->Size(), key->Hash()); }
void* Remove(const void* key, int key_size, hash_t hash, bool dont_delete = false);
// Number of entries.
int Length() const
{ return num_entries; }
// Largest it's ever been.
int MaxLength() const
{ return max_entries; }
// Total number of entries ever.
uint64_t NumCumulativeInserts() const
{ return cum_entries; }
// True if the dictionary is ordered, false otherwise.
int IsOrdered() const { return order != nullptr; }
// If the dictionary is ordered then returns the n'th entry's value;
// the second method also returns the key. The first entry inserted
// corresponds to n=0.
//
// Returns nil if the dictionary is not ordered or if "n" is out
// of range.
void* NthEntry(int n) const
{
const void* key;
int key_len;
return NthEntry(n, key, key_len);
}
void* NthEntry(int n, const void*& key, int& key_len) const;
// To iterate through the dictionary, first call InitForIteration()
// to get an "iteration cookie". The cookie can then be handed
// to NextEntry() to get the next entry in the iteration and update
// the cookie. If NextEntry() indicates no more entries, it will
// also delete the cookie, or the cookie can be manually deleted
// prior to this if no longer needed.
//
// Unexpected results will occur if the elements of
// the dictionary are changed between calls to NextEntry() without
// first calling InitForIteration().
//
// If return_hash is true, a HashKey for the entry is returned in h,
// which should be delete'd when no longer needed.
IterCookie* InitForIteration() const;
void* NextEntry(HashKey*& h, IterCookie*& cookie, bool return_hash) const;
void StopIteration(IterCookie* cookie) const;
void SetDeleteFunc(dict_delete_func f) { delete_func = f; }
// With a robust cookie, it is safe to change the dictionary while
// iterating. This means that (i) we will eventually visit all
// unmodified entries as well as all entries added during iteration,
// and (ii) we won't visit any still-unseen entries which are getting
// removed. (We don't get this for free, so only use it if
// necessary.)
void MakeRobustCookie(IterCookie* cookie);
// Remove all entries.
void Clear();
size_t MemoryAllocation() const;
/// The capacity of the table, Buckets + Overflow Size.
int Capacity(bool expected=false) const;
//Debugging
#ifdef DEBUG
void AssertValid() const;
#endif//DEBUG
void Dump(int level=0) const;
void DistanceStats(int& max_distance, int* distances=0, int num_distances=0) const;
void DumpKeys() const;
private:
friend zeek::IterCookie;
/// Buckets of the table, not including overflow size.
int Buckets(bool expected=false) const;
//bucket math
int Log2(int num) const;
int ThresholdEntries() const;
// Used to improve the distribution of the original hash.
hash_t FibHash(hash_t h) const;
// Maps a hash to the appropriate n-bit table bucket.
int BucketByHash(hash_t h, int bit) const;
// Given a position of a non-empty item in the table, find the related bucket.
int BucketByPosition(int position) const;
// Given a bucket of a non-empty item in the table, find the end of its cluster.
// The end should be equal to tail+1 if tail exists. Otherwise it's the tail of
// the just-smaller cluster + 1.
int EndOfClusterByBucket(int bucket) const;
// Given a position of a non-empty item in the table, find the head of its cluster.
int HeadOfClusterByPosition(int position) const;
// Given a position of a non-empty item in the table, find the tail of its cluster.
int TailOfClusterByPosition(int position) const;
// Given a position of a non-empty item in the table, find the end of its cluster.
// The end should be equal to tail+1 if tail exists. Otherwise it's the tail of
// the just-smaller cluster + 1.
int EndOfClusterByPosition(int position) const;
// Given a position of a non-empty item in the table, find the offset of it within
// its cluster.
int OffsetInClusterByPosition(int position) const;
// Next non-empty item position in the table.
int Next(int i) const;
void Init();
//Iteration
IterCookie* InitForIterationNonConst();
void* NextEntryNonConst(HashKey*& h, IterCookie*& cookie, bool return_hash);
void StopIterationNonConst(IterCookie* cookie);
//Lookup
int LinearLookupIndex(const void* key, int key_size, hash_t hash) const;
int LookupIndex(const void* key, int key_size, hash_t hash, int* insert_position = nullptr,
int* insert_distance = nullptr);
int LookupIndex(const void* key, int key_size, hash_t hash, int begin, int end,
int* insert_position = nullptr, int* insert_distance = nullptr);
/// Insert entry, Adjust cookies when necessary.
void InsertRelocateAndAdjust(detail::DictEntry& entry, int insert_position);
/// insert entry into position, relocate other entries when necessary.
void InsertAndRelocate(detail::DictEntry& entry, int insert_position, int* last_affected_position = nullptr);
/// Adjust Cookies on Insert.
void AdjustOnInsert(IterCookie* c, const detail::DictEntry& entry, int insert_position, int last_affected_position);
///Remove, Relocate & Adjust cookies.
detail::DictEntry RemoveRelocateAndAdjust(int position);
///Remove & Relocate
detail::DictEntry RemoveAndRelocate(int position, int* last_affected_position = nullptr);
///Adjust safe cookies after Removal of entry at position.
void AdjustOnRemove(IterCookie* c, const detail::DictEntry& entry, int position, int last_affected_position);
bool Remapping() const { return remap_end >= 0;} //remap in reverse order.
///One round of remap.
void Remap();
// Remap an item in position to a new position. Returns true if the relocation was
// successful, false otherwise. new_position will be set to the new position if a
// pointer is provided to store the new value.
bool Remap(int position, int* new_position = nullptr);
void SizeUp();
//alligned on 8-bytes with 4-leading bytes. 7*8=56 bytes a dictionary.
// when sizeup but the current mapping is in progress. the current mapping will be ignored
// as it will be remapped to new dict size anyway. however, the missed count is recorded
// for lookup. if position not found for a key in the position of dict of current size, it
// still could be in the position of dict of previous N sizes.
unsigned char remaps = 0;
unsigned char log2_buckets = 0;
// Pending number of iterators on the Dict, including both robust and non-robust.
// This is used to avoid remapping if there are any active iterators.
unsigned short num_iterators = 0;
// The last index to be remapped.
int remap_end = -1;
int num_entries = 0;
int max_entries = 0;
uint64_t cum_entries = 0;
dict_delete_func delete_func = nullptr;
detail::DictEntry* table = nullptr;
std::vector<IterCookie*>* cookies = nullptr;
// Order means the order of insertion. means no deletion until exit. will be inefficient.
std::vector<detail::DictEntry>* order = nullptr;
};
/*
* Template specialization of Dictionary that stores pointers for values.
*/
template<typename T>
class PDict : public Dictionary {
public:
explicit PDict(DictOrder ordering = UNORDERED, int initial_size = 0) :
Dictionary(ordering, initial_size) {}
T* Lookup(const char* key) const
{
HashKey h(key);
return (T*) Dictionary::Lookup(&h);
}
T* Lookup(const HashKey* key) const
{ return (T*) Dictionary::Lookup(key); }
T* Insert(const char* key, T* val)
{
HashKey h(key);
return (T*) Dictionary::Insert(&h, (void*) val);
}
T* Insert(HashKey* key, T* val)
{ return (T*) Dictionary::Insert(key, (void*) val); }
T* NthEntry(int n) const
{ return (T*) Dictionary::NthEntry(n); }
T* NthEntry(int n, const char*& key) const
{
int key_len;
return (T*) Dictionary::NthEntry(n, (const void*&) key, key_len);
}
T* NextEntry(IterCookie*& cookie) const
{
HashKey* h;
return (T*) Dictionary::NextEntry(h, cookie, false);
}
T* NextEntry(HashKey*& h, IterCookie*& cookie) const
{ return (T*) Dictionary::NextEntry(h, cookie, true); }
T* RemoveEntry(const HashKey* key)
{ return (T*) Remove(key->Key(), key->Size(), key->Hash()); }
T* RemoveEntry(const HashKey& key)
{ return (T*) Remove(key.Key(), key.Size(), key.Hash()); }
};
} // namespace zeek
using Dictionary [[deprecated("Remove in v4.1. Use zeek::Dictionary instead.")]] = zeek::Dictionary;
template<typename T> using PDict [[deprecated("Remove in v4.1. Use zeek::PDict instead.")]] = zeek::PDict<T>;