Implement standard-library-compatible iterators for Dictionary

This commit is contained in:
Tim Wojtulewicz 2020-09-22 13:59:13 -07:00
parent 9e9998c6e5
commit 892124378c
16 changed files with 834 additions and 254 deletions

View file

@ -4,10 +4,12 @@
#include <cstdint>
#include <vector>
#include <memory>
#include "zeek/Hash.h"
ZEEK_FORWARD_DECLARE_NAMESPACED(IterCookie, zeek);
ZEEK_FORWARD_DECLARE_NAMESPACED(Dictionary, zeek);
ZEEK_FORWARD_DECLARE_NAMESPACED(DictEntry, zeek::detail);
// Type for function to be called when deleting elements.
@ -69,7 +71,7 @@ public:
uint32_t hash = 0;
void* value = nullptr;
union{
union {
char key_here[8]; //hold key len<=8. when over 8, it's a pointer to real keys.
char* key;
};
@ -78,6 +80,9 @@ public:
int16_t d = TOO_FAR_TO_REACH, bool copy_key = false)
: distance(d), key_size(key_size), hash((uint32_t)hash), value(value)
{
if ( ! arg_key )
return;
if ( key_size <= 8 )
{
memcpy(key_here, arg_key, key_size);
@ -120,6 +125,13 @@ public:
}
const char* GetKey() const { return key_size <= 8 ? key_here : key; }
std::unique_ptr<detail::HashKey> GetHashKey() const
{
return std::make_unique<detail::HashKey>(GetKey(), key_size, hash);
}
template <typename T>
T GetValue() const { return static_cast<T>(value); }
bool Equal(const char* arg_key, int arg_key_size, hash_t arg_hash) const
{//only 40-bit hash comparison.
@ -138,6 +150,76 @@ public:
} // namespace detail
class DictIterator {
public:
using value_type = detail::DictEntry;
using reference = detail::DictEntry&;
using pointer = detail::DictEntry*;
using difference_type = std::ptrdiff_t;
using iterator_category = std::forward_iterator_tag;
~DictIterator();
reference operator*() { return *curr; }
pointer operator->() { return curr; }
DictIterator& operator++();
DictIterator operator++(int) { auto temp(*this); ++*this; return temp; }
bool operator==( const DictIterator& that ) const { return curr == that.curr; }
bool operator!=( const DictIterator& that ) const { return !(*this == that); }
private:
friend class Dictionary;
DictIterator() = default;
DictIterator(const Dictionary* d, detail::DictEntry* begin, detail::DictEntry* end);
Dictionary* dict = nullptr;
detail::DictEntry* curr = nullptr;
detail::DictEntry* end = nullptr;
};
class RobustDictIterator {
public:
using value_type = detail::DictEntry;
using reference = detail::DictEntry&;
using pointer = detail::DictEntry*;
using difference_type = std::ptrdiff_t;
using iterator_category = std::forward_iterator_tag;
RobustDictIterator() : curr(nullptr) {}
RobustDictIterator(Dictionary* d);
RobustDictIterator(const RobustDictIterator& other);
RobustDictIterator(RobustDictIterator&& other);
~RobustDictIterator();
reference operator*() { return curr; }
pointer operator->() { return &curr; }
RobustDictIterator& operator++();
// RobustDictIterator operator++(int) { auto temp(*this); ++*this; return temp; }
bool operator==( const RobustDictIterator& that ) const { return curr == that.curr; }
bool operator!=( const RobustDictIterator& that ) const { return !(*this == that); }
private:
friend class Dictionary;
void Complete();
// Tracks the new entries inserted while iterating.
std::vector<detail::DictEntry>* inserted = nullptr;
// Tracks the entries already visited but were moved across the next iteration
// point due to an insertion.
std::vector<detail::DictEntry>* visited = nullptr;
detail::DictEntry curr;
Dictionary* dict = nullptr;
int next = -1;
};
/**
* A dictionary type that uses clustered hashing, a variation of Robinhood/Open Addressing
* hashing. The following posts help to understand the implementation:
@ -227,8 +309,11 @@ public:
//
// If return_hash is true, a HashKey for the entry is returned in h,
// which should be delete'd when no longer needed.
[[deprecated("Remove in v5.1. Use begin() and the standard-library-compatible version of iteration.")]]
IterCookie* InitForIteration() const;
[[deprecated("Remove in v5.1. Use begin() and the standard-library-compatible version of iteration.")]]
void* NextEntry(detail::HashKey*& h, IterCookie*& cookie, bool return_hash) const;
[[deprecated("Remove in v5.1. Use begin() and the standard-library-compatible version of iteration.")]]
void StopIteration(IterCookie* cookie) const;
void SetDeleteFunc(dict_delete_func f) { delete_func = f; }
@ -239,6 +324,7 @@ public:
// and (ii) we won't visit any still-unseen entries which are getting
// removed. (We don't get this for free, so only use it if
// necessary.)
[[deprecated("Remove in v5.1. Use begin_robust() and the standard-library-compatible version of iteration.")]]
void MakeRobustCookie(IterCookie* cookie);
// Remove all entries.
@ -257,8 +343,35 @@ public:
void DistanceStats(int& max_distance, int* distances = 0, int num_distances = 0) const;
void DumpKeys() const;
// Type traits needed for some of the std algorithms to work
using value_type = detail::DictEntry;
using pointer = detail::DictEntry*;
using const_pointer = const detail::DictEntry*;
// Iterator support
using iterator = DictIterator;
using const_iterator = const iterator;
using reverse_iterator = std::reverse_iterator<iterator>;
using const_reverse_iterator = std::reverse_iterator<const_iterator>;
iterator begin() { return { this, table, table + Capacity() }; }
iterator end() { return { this, table + Capacity(), table + Capacity() }; }
const_iterator begin() const { return { this, table, table + Capacity() }; }
const_iterator end() const { return { this, table + Capacity(), table + Capacity() }; }
const_iterator cbegin() { return { this, table, table + Capacity() }; }
const_iterator cend() { return { this, table + Capacity(), table + Capacity() }; }
// begin_robust returns a shared pointer here because we tend to store them for long
// periods of time (see TableVal::expire_iterator). We don't want to keep the whole
// object in a TableVal if it's never used. end_robust() does not return a pointer
// since it's only used in the loop.
RobustDictIterator begin_robust() { return MakeRobustIterator(); }
RobustDictIterator end_robust() { return RobustDictIterator(); }
private:
friend zeek::IterCookie;
friend zeek::DictIterator;
friend zeek::RobustDictIterator;
/// Buckets of the table, not including overflow size.
int Buckets(bool expected = false) const;
@ -301,9 +414,12 @@ private:
void Init();
//Iteration
// Iteration
[[deprecated("Remove in v5.1. Use begin() and the standard-library-compatible version of iteration.")]]
IterCookie* InitForIterationNonConst();
[[deprecated("Remove in v5.1. Use begin() and the standard-library-compatible version of iteration.")]]
void* NextEntryNonConst(detail::HashKey*& h, IterCookie*& cookie, bool return_hash);
[[deprecated("Remove in v5.1. Use begin() and the standard-library-compatible version of iteration.")]]
void StopIterationNonConst(IterCookie* cookie);
//Lookup
@ -320,7 +436,10 @@ private:
void InsertAndRelocate(detail::DictEntry& entry, int insert_position, int* last_affected_position = nullptr);
/// Adjust Cookies on Insert.
[[deprecated("Remove in v5.1. Use the standard-library-compatible version of iteration and the version that takes a RobustDictIterator.")]]
void AdjustOnInsert(IterCookie* c, const detail::DictEntry& entry, int insert_position, int last_affected_position);
void AdjustOnInsert(RobustDictIterator* c, const detail::DictEntry& entry,
int insert_position, int last_affected_position);
///Remove, Relocate & Adjust cookies.
detail::DictEntry RemoveRelocateAndAdjust(int position);
@ -329,7 +448,10 @@ private:
detail::DictEntry RemoveAndRelocate(int position, int* last_affected_position = nullptr);
///Adjust safe cookies after Removal of entry at position.
[[deprecated("Remove in v5.1. Use the standard-library-compatible version of iteration and the version that takes a RobustDictIterator.")]]
void AdjustOnRemove(IterCookie* c, const detail::DictEntry& entry, int position, int last_affected_position);
void AdjustOnRemove(RobustDictIterator* c, const detail::DictEntry& entry,
int position, int last_affected_position);
bool Remapping() const { return remap_end >= 0;} //remap in reverse order.
@ -344,7 +466,12 @@ private:
void SizeUp();
bool HaveOnlyRobustIterators() const
{ return num_iterators == 0 || (cookies && cookies->size() == num_iterators); }
{
return (num_iterators == 0) || ((cookies ? cookies->size() : 0) + (iterators ? iterators->size() : 0) == num_iterators);
}
RobustDictIterator MakeRobustIterator();
detail::DictEntry GetNextRobustIteration(RobustDictIterator* iter);
//alligned on 8-bytes with 4-leading bytes. 7*8=56 bytes a dictionary.
@ -364,11 +491,12 @@ private:
int num_entries = 0;
int max_entries = 0;
uint64_t cum_entries = 0;
dict_delete_func delete_func = nullptr;
detail::DictEntry* table = nullptr;
std::vector<IterCookie*>* cookies = nullptr;
std::vector<RobustDictIterator*>* iterators = nullptr;
// Order means the order of insertion. means no deletion until exit. will be inefficient.
std::vector<detail::DictEntry>* order = nullptr;
@ -403,13 +531,23 @@ public:
int key_len;
return (T*) Dictionary::NthEntry(n, (const void*&) key, key_len);
}
[[deprecated("Remove in v5.1. Use the standard-library-compatible version of iteration.")]]
T* NextEntry(IterCookie*& cookie) const
{
detail::HashKey* h;
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
return (T*) Dictionary::NextEntry(h, cookie, false);
#pragma GCC diagnostic pop
}
[[deprecated("Remove in v5.1. Use the standard-library-compatible version of iteration.")]]
T* NextEntry(detail::HashKey*& h, IterCookie*& cookie) const
{ return (T*) Dictionary::NextEntry(h, cookie, true); }
{
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
return (T*) Dictionary::NextEntry(h, cookie, true);
#pragma GCC diagnostic pop
}
T* RemoveEntry(const detail::HashKey* key, bool* iterators_invalidated = nullptr)
{ return (T*) Remove(key->Key(), key->Size(), key->Hash(), false, iterators_invalidated); }
T* RemoveEntry(const detail::HashKey& key, bool* iterators_invalidated = nullptr)