// See the file "COPYING" in the main distribution directory for copyright. #pragma once #include #include #include #include "zeek/Hash.h" // Type for function to be called when deleting elements. typedef void (*dict_delete_func)(void*); namespace zeek { class IterCookie; class Dictionary; enum DictOrder { ORDERED, UNORDERED }; // A dict_delete_func that just calls delete. extern void generic_delete_func(void*); namespace detail { class DictEntry; // Default number of hash buckets in dictionary. The dictionary will increase the size // of the hash table as needed. constexpr uint32_t HASH_MASK = 0xFFFFFFFF; //only lower 32 bits. // These four variables can be used to build different targets with -Dxxx for performance // or for debugging purposes. // When incrementally resizing and remapping, it remaps DICT_REMAP_ENTRIES each step. Use // 2 for debug. 16 is best for a release build. constexpr uint8_t DICT_REMAP_ENTRIES = 16; // Load factor = 1 - 0.5 ^ LOAD_FACTOR_BITS. 0.75 is the optimal value for release builds. constexpr uint8_t DICT_LOAD_FACTOR_BITS = 2; // Default number of hash buckets in dictionary. The dictionary will // increase the size of the hash table as needed. constexpr uint8_t DEFAULT_DICT_SIZE = 0; // When log2_buckets > DICT_THRESHOLD_BITS, DICT_LOAD_FACTOR_BITS becomes effective. // Basically if dict size < 2^DICT_THRESHOLD_BITS + n, we size up only if necessary. constexpr uint8_t DICT_THRESHOLD_BITS = 3; // The value of an iteration cookie is the bucket and offset within the // bucket at which to start looking for the next value to return. constexpr uint16_t TOO_FAR_TO_REACH = 0xFFFF; /** * An entry stored in the dictionary. */ class DictEntry { public: #ifdef DEBUG int bucket = 0; #endif // Distance from the expected position in the table. 0xFFFF means that the entry is empty. uint16_t distance = TOO_FAR_TO_REACH; // The size of the key. Less than 8 bytes we'll store directly in the entry, otherwise we'll // store it as a pointer. This avoids extra allocations if we can help it. uint16_t key_size = 0; // Lower 4 bytes of the 8-byte hash, which is used to calculate the position in the table. uint32_t hash = 0; void* value = nullptr; union { char key_here[8]; //hold key len<=8. when over 8, it's a pointer to real keys. char* key; }; DictEntry(void* arg_key, int key_size = 0, hash_t hash = 0, void* value = nullptr, int16_t d = TOO_FAR_TO_REACH, bool copy_key = false) : distance(d), key_size(key_size), hash((uint32_t)hash), value(value) { if ( ! arg_key ) return; if ( key_size <= 8 ) { memcpy(key_here, arg_key, key_size); if ( ! copy_key ) delete [] (char*)arg_key; //own the arg_key, now don't need it. } else { if ( copy_key ) { key = new char[key_size]; memcpy(key, arg_key, key_size); } else { key = (char*)arg_key; } } } bool Empty() const { return distance == TOO_FAR_TO_REACH; } void SetEmpty() { distance = TOO_FAR_TO_REACH; #ifdef DEBUG hash = 0; key = nullptr; value = nullptr; key_size = 0; bucket = 0; #endif//DEBUG } void Clear() { if( key_size > 8 ) delete [] key; SetEmpty(); } const char* GetKey() const { return key_size <= 8 ? key_here : key; } std::unique_ptr GetHashKey() const { return std::make_unique(GetKey(), key_size, hash); } template T GetValue() const { return static_cast(value); } bool Equal(const char* arg_key, int arg_key_size, hash_t arg_hash) const {//only 40-bit hash comparison. return ( 0 == ((hash ^ arg_hash) & HASH_MASK) ) && key_size == arg_key_size && 0 == memcmp(GetKey(), arg_key, key_size); } bool operator==(const DictEntry& r) const { return Equal(r.GetKey(), r.key_size, r.hash); } bool operator!=(const DictEntry& r) const { return ! Equal(r.GetKey(), r.key_size, r.hash); } }; } // namespace detail class DictIterator { public: using value_type = detail::DictEntry; using reference = detail::DictEntry&; using pointer = detail::DictEntry*; using difference_type = std::ptrdiff_t; using iterator_category = std::forward_iterator_tag; DictIterator() = default; ~DictIterator(); DictIterator(const DictIterator& that); DictIterator& operator=(const DictIterator& that); DictIterator(DictIterator&& that); DictIterator& operator=(DictIterator&& that); reference operator*() { return *curr; } pointer operator->() { return curr; } DictIterator& operator++(); DictIterator operator++(int) { auto temp(*this); ++*this; return temp; } bool operator==( const DictIterator& that ) const { return curr == that.curr; } bool operator!=( const DictIterator& that ) const { return !(*this == that); } private: friend class Dictionary; DictIterator(const Dictionary* d, detail::DictEntry* begin, detail::DictEntry* end); Dictionary* dict = nullptr; detail::DictEntry* curr = nullptr; detail::DictEntry* end = nullptr; }; class RobustDictIterator { public: using value_type = detail::DictEntry; using reference = detail::DictEntry&; using pointer = detail::DictEntry*; using difference_type = std::ptrdiff_t; using iterator_category = std::forward_iterator_tag; RobustDictIterator() : curr(nullptr) {} RobustDictIterator(Dictionary* d); RobustDictIterator(const RobustDictIterator& other); RobustDictIterator(RobustDictIterator&& other); ~RobustDictIterator(); reference operator*() { return curr; } pointer operator->() { return &curr; } RobustDictIterator& operator++(); RobustDictIterator operator++(int) { auto temp(*this); ++*this; return temp; } bool operator==( const RobustDictIterator& that ) const { return curr == that.curr; } bool operator!=( const RobustDictIterator& that ) const { return !(*this == that); } private: friend class Dictionary; void Complete(); // Tracks the new entries inserted while iterating. std::vector* inserted = nullptr; // Tracks the entries already visited but were moved across the next iteration // point due to an insertion. std::vector* visited = nullptr; detail::DictEntry curr; Dictionary* dict = nullptr; int next = -1; }; /** * A dictionary type that uses clustered hashing, a variation of Robinhood/Open Addressing * hashing. The following posts help to understand the implementation: * - https://jasonlue.github.io/algo/2019/08/20/clustered-hashing.html * - https://jasonlue.github.io/algo/2019/08/27/clustered-hashing-basic-operations.html * - https://jasonlue.github.io/algo/2019/09/03/clustered-hashing-incremental-resize.html * - https://jasonlue.github.io/algo/2019/09/10/clustered-hashing-modify-on-iteration.html * * The dictionary is effectively a hashmap from hashed keys to values. The dictionary owns * the keys but not the values. The dictionary size will be bounded at around 100K. 1M * entries is the absolute limit. Only Connections use that many entries, and that is rare. */ class Dictionary { public: explicit Dictionary(DictOrder ordering = UNORDERED, int initial_size = detail::DEFAULT_DICT_SIZE); ~Dictionary(); // Member functions for looking up a key, inserting/changing its // contents, and deleting it. These come in two flavors: one // which takes a zeek::detail::HashKey, and the other which takes a raw key, // its size, and its (unmodulated) hash. //lookup may move the key to right place if in the old zone to speed up the next lookup. void* Lookup(const detail::HashKey* key) const; void* Lookup(const void* key, int key_size, detail::hash_t h) const; // Returns previous value, or 0 if none. // If iterators_invalidated is supplied, its value is set to true // if the removal may have invalidated any existing iterators. void* Insert(detail::HashKey* key, void* val, bool* iterators_invalidated = nullptr) { return Insert(key->TakeKey(), key->Size(), key->Hash(), val, false, iterators_invalidated); } // If copy_key is true, then the key is copied, otherwise it's assumed // that it's a heap pointer that now belongs to the Dictionary to // manage as needed. // If iterators_invalidated is supplied, its value is set to true // if the removal may have invalidated any existing iterators. void* Insert(void* key, int key_size, detail::hash_t hash, void* val, bool copy_key, bool* iterators_invalidated = nullptr); // Removes the given element. Returns a pointer to the element in // case it needs to be deleted. Returns 0 if no such element exists. // If dontdelete is true, the key's bytes will not be deleted. // If iterators_invalidated is supplied, its value is set to true // if the removal may have invalidated any existing iterators. void* Remove(const detail::HashKey* key, bool* iterators_invalidated = nullptr) { return Remove(key->Key(), key->Size(), key->Hash(), false, iterators_invalidated); } void* Remove(const void* key, int key_size, detail::hash_t hash, bool dont_delete = false, bool* iterators_invalidated = nullptr); // Number of entries. int Length() const { return num_entries; } // Largest it's ever been. int MaxLength() const { return max_entries; } // Total number of entries ever. uint64_t NumCumulativeInserts() const { return cum_entries; } // True if the dictionary is ordered, false otherwise. int IsOrdered() const { return order != nullptr; } // If the dictionary is ordered then returns the n'th entry's value; // the second method also returns the key. The first entry inserted // corresponds to n=0. // // Returns nil if the dictionary is not ordered or if "n" is out // of range. void* NthEntry(int n) const { const void* key; int key_len; return NthEntry(n, key, key_len); } void* NthEntry(int n, const void*& key, int& key_len) const; // To iterate through the dictionary, first call InitForIteration() // to get an "iteration cookie". The cookie can then be handed // to NextEntry() to get the next entry in the iteration and update // the cookie. If NextEntry() indicates no more entries, it will // also delete the cookie, or the cookie can be manually deleted // prior to this if no longer needed. // // Unexpected results will occur if the elements of // the dictionary are changed between calls to NextEntry() without // first calling InitForIteration(). // // If return_hash is true, a HashKey for the entry is returned in h, // which should be delete'd when no longer needed. [[deprecated("Remove in v5.1. Use begin() and the standard-library-compatible version of iteration.")]] IterCookie* InitForIteration() const; [[deprecated("Remove in v5.1. Use begin() and the standard-library-compatible version of iteration.")]] void* NextEntry(detail::HashKey*& h, IterCookie*& cookie, bool return_hash) const; [[deprecated("Remove in v5.1. Use begin() and the standard-library-compatible version of iteration.")]] void StopIteration(IterCookie* cookie) const; void SetDeleteFunc(dict_delete_func f) { delete_func = f; } // With a robust cookie, it is safe to change the dictionary while // iterating. This means that (i) we will eventually visit all // unmodified entries as well as all entries added during iteration, // and (ii) we won't visit any still-unseen entries which are getting // removed. (We don't get this for free, so only use it if // necessary.) [[deprecated("Remove in v5.1. Use begin_robust() and the standard-library-compatible version of iteration.")]] void MakeRobustCookie(IterCookie* cookie); // Remove all entries. void Clear(); size_t MemoryAllocation() const; /// The capacity of the table, Buckets + Overflow Size. int Capacity(bool expected = false) const; //Debugging #ifdef DEBUG void AssertValid() const; #endif//DEBUG void Dump(int level=0) const; void DistanceStats(int& max_distance, int* distances = 0, int num_distances = 0) const; void DumpKeys() const; // Type traits needed for some of the std algorithms to work using value_type = detail::DictEntry; using pointer = detail::DictEntry*; using const_pointer = const detail::DictEntry*; // Iterator support using iterator = DictIterator; using const_iterator = const iterator; using reverse_iterator = std::reverse_iterator; using const_reverse_iterator = std::reverse_iterator; iterator begin() { return { this, table, table + Capacity() }; } iterator end() { return { this, table + Capacity(), table + Capacity() }; } const_iterator begin() const { return { this, table, table + Capacity() }; } const_iterator end() const { return { this, table + Capacity(), table + Capacity() }; } const_iterator cbegin() { return { this, table, table + Capacity() }; } const_iterator cend() { return { this, table + Capacity(), table + Capacity() }; } RobustDictIterator begin_robust() { return MakeRobustIterator(); } RobustDictIterator end_robust() { return RobustDictIterator(); } private: friend zeek::IterCookie; friend zeek::DictIterator; friend zeek::RobustDictIterator; /// Buckets of the table, not including overflow size. int Buckets(bool expected = false) const; //bucket math int Log2(int num) const; int ThresholdEntries() const; // Used to improve the distribution of the original hash. detail::hash_t FibHash(detail::hash_t h) const; // Maps a hash to the appropriate n-bit table bucket. int BucketByHash(detail::hash_t h, int bit) const; // Given a position of a non-empty item in the table, find the related bucket. int BucketByPosition(int position) const; // Given a bucket of a non-empty item in the table, find the end of its cluster. // The end should be equal to tail+1 if tail exists. Otherwise it's the tail of // the just-smaller cluster + 1. int EndOfClusterByBucket(int bucket) const; // Given a position of a non-empty item in the table, find the head of its cluster. int HeadOfClusterByPosition(int position) const; // Given a position of a non-empty item in the table, find the tail of its cluster. int TailOfClusterByPosition(int position) const; // Given a position of a non-empty item in the table, find the end of its cluster. // The end should be equal to tail+1 if tail exists. Otherwise it's the tail of // the just-smaller cluster + 1. int EndOfClusterByPosition(int position) const; // Given a position of a non-empty item in the table, find the offset of it within // its cluster. int OffsetInClusterByPosition(int position) const; // Next non-empty item position in the table. int Next(int i) const; void Init(); // Iteration [[deprecated("Remove in v5.1. Use begin() and the standard-library-compatible version of iteration.")]] IterCookie* InitForIterationNonConst(); [[deprecated("Remove in v5.1. Use begin() and the standard-library-compatible version of iteration.")]] void* NextEntryNonConst(detail::HashKey*& h, IterCookie*& cookie, bool return_hash); [[deprecated("Remove in v5.1. Use begin() and the standard-library-compatible version of iteration.")]] void StopIterationNonConst(IterCookie* cookie); //Lookup int LinearLookupIndex(const void* key, int key_size, detail::hash_t hash) const; int LookupIndex(const void* key, int key_size, detail::hash_t hash, int* insert_position = nullptr, int* insert_distance = nullptr); int LookupIndex(const void* key, int key_size, detail::hash_t hash, int begin, int end, int* insert_position = nullptr, int* insert_distance = nullptr); /// Insert entry, Adjust cookies when necessary. void InsertRelocateAndAdjust(detail::DictEntry& entry, int insert_position); /// insert entry into position, relocate other entries when necessary. void InsertAndRelocate(detail::DictEntry& entry, int insert_position, int* last_affected_position = nullptr); /// Adjust Cookies on Insert. [[deprecated("Remove in v5.1. Use the standard-library-compatible version of iteration and the version that takes a RobustDictIterator.")]] void AdjustOnInsert(IterCookie* c, const detail::DictEntry& entry, int insert_position, int last_affected_position); void AdjustOnInsert(RobustDictIterator* c, const detail::DictEntry& entry, int insert_position, int last_affected_position); ///Remove, Relocate & Adjust cookies. detail::DictEntry RemoveRelocateAndAdjust(int position); ///Remove & Relocate detail::DictEntry RemoveAndRelocate(int position, int* last_affected_position = nullptr); ///Adjust safe cookies after Removal of entry at position. [[deprecated("Remove in v5.1. Use the standard-library-compatible version of iteration and the version that takes a RobustDictIterator.")]] void AdjustOnRemove(IterCookie* c, const detail::DictEntry& entry, int position, int last_affected_position); void AdjustOnRemove(RobustDictIterator* c, const detail::DictEntry& entry, int position, int last_affected_position); bool Remapping() const { return remap_end >= 0;} //remap in reverse order. ///One round of remap. void Remap(); // Remap an item in position to a new position. Returns true if the relocation was // successful, false otherwise. new_position will be set to the new position if a // pointer is provided to store the new value. bool Remap(int position, int* new_position = nullptr); void SizeUp(); bool HaveOnlyRobustIterators() const { return (num_iterators == 0) || ((cookies ? cookies->size() : 0) + (iterators ? iterators->size() : 0) == num_iterators); } RobustDictIterator MakeRobustIterator(); detail::DictEntry GetNextRobustIteration(RobustDictIterator* iter); //alligned on 8-bytes with 4-leading bytes. 7*8=56 bytes a dictionary. // when sizeup but the current mapping is in progress. the current mapping will be ignored // as it will be remapped to new dict size anyway. however, the missed count is recorded // for lookup. if position not found for a key in the position of dict of current size, it // still could be in the position of dict of previous N sizes. unsigned char remaps = 0; unsigned char log2_buckets = 0; // Pending number of iterators on the Dict, including both robust and non-robust. // This is used to avoid remapping if there are any active iterators. unsigned short num_iterators = 0; // The last index to be remapped. int remap_end = -1; int num_entries = 0; int max_entries = 0; uint64_t cum_entries = 0; dict_delete_func delete_func = nullptr; detail::DictEntry* table = nullptr; std::vector* cookies = nullptr; std::vector* iterators = nullptr; // Order means the order of insertion. means no deletion until exit. will be inefficient. std::vector* order = nullptr; }; /* * Template specialization of Dictionary that stores pointers for values. */ template class PDict : public Dictionary { public: explicit PDict(DictOrder ordering = UNORDERED, int initial_size = 0) : Dictionary(ordering, initial_size) {} T* Lookup(const char* key) const { detail::HashKey h(key); return (T*) Dictionary::Lookup(&h); } T* Lookup(const detail::HashKey* key) const { return (T*) Dictionary::Lookup(key); } T* Insert(const char* key, T* val, bool* iterators_invalidated = nullptr) { detail::HashKey h(key); return (T*) Dictionary::Insert(&h, (void*) val, iterators_invalidated); } T* Insert(detail::HashKey* key, T* val, bool* iterators_invalidated = nullptr) { return (T*) Dictionary::Insert(key, (void*) val, iterators_invalidated); } T* NthEntry(int n) const { return (T*) Dictionary::NthEntry(n); } T* NthEntry(int n, const char*& key) const { int key_len; return (T*) Dictionary::NthEntry(n, (const void*&) key, key_len); } [[deprecated("Remove in v5.1. Use the standard-library-compatible version of iteration.")]] T* NextEntry(IterCookie*& cookie) const { detail::HashKey* h; #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" return (T*) Dictionary::NextEntry(h, cookie, false); #pragma GCC diagnostic pop } [[deprecated("Remove in v5.1. Use the standard-library-compatible version of iteration.")]] T* NextEntry(detail::HashKey*& h, IterCookie*& cookie) const { #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" return (T*) Dictionary::NextEntry(h, cookie, true); #pragma GCC diagnostic pop } T* RemoveEntry(const detail::HashKey* key, bool* iterators_invalidated = nullptr) { return (T*) Remove(key->Key(), key->Size(), key->Hash(), false, iterators_invalidated); } T* RemoveEntry(const detail::HashKey& key, bool* iterators_invalidated = nullptr) { return (T*) Remove(key.Key(), key.Size(), key.Hash(), false, iterators_invalidated); } }; } // namespace zeek