From 3b69dd38f360d29e6ef8f245bf63d074227dca93 Mon Sep 17 00:00:00 2001 From: Tim Wojtulewicz Date: Wed, 7 Sep 2022 16:11:03 -0700 Subject: [PATCH 1/3] Add equality, inequality, copy, and move operators to HashKey --- src/Hash.cc | 135 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/Hash.h | 24 ++++++++-- 2 files changed, 154 insertions(+), 5 deletions(-) diff --git a/src/Hash.cc b/src/Hash.cc index 9e07af5041..9229655800 100644 --- a/src/Hash.cc +++ b/src/Hash.cc @@ -8,6 +8,7 @@ #include #include +#include "zeek/3rdparty/doctest.h" #include "zeek/DebugLogger.h" #include "zeek/Desc.h" #include "zeek/Reporter.h" @@ -184,6 +185,29 @@ HashKey::HashKey(const void* arg_key, size_t arg_size, hash_t arg_hash, bool /* key = (char*)arg_key; } +HashKey::HashKey(const HashKey& other) : HashKey(other.key, other.size, other.hash) { } + +HashKey::HashKey(HashKey&& other) noexcept + { + hash = other.hash; + size = other.size; + write_size = other.write_size; + read_size = other.read_size; + + is_our_dynamic = other.is_our_dynamic; + key = other.key; + + other.size = 0; + other.is_our_dynamic = false; + other.key = nullptr; + } + +HashKey::~HashKey() + { + if ( is_our_dynamic ) + delete[] reinterpret_cast(key); + } + hash_t HashKey::Hash() const { if ( hash == 0 ) @@ -543,4 +567,115 @@ void HashKey::EnsureReadSpace(size_t n) const n, size - read_size); } +bool HashKey::operator==(const HashKey& other) const + { + // Quick exit for the same object. + if ( this == &other ) + return true; + + return Equal(other.key, other.size, other.hash); + } + +bool HashKey::operator!=(const HashKey& other) const + { + // Quick exit for different objects. + if ( this != &other ) + return true; + + return ! Equal(other.key, other.size, other.hash); + } + +bool HashKey::Equal(const void* other_key, size_t other_size, hash_t other_hash) const + { + // If the key memory is the same just return true. + if ( key == other_key && size == other_size ) + return true; + + // If either key is nullptr, return false. If they were both nullptr, it + // would have fallen in to the above block already. + if ( key == nullptr || other_key == nullptr ) + return false; + + return (hash == other_hash) && (size == other_size) && (memcmp(key, other_key, size) == 0); + } + +HashKey& HashKey::operator=(const HashKey& other) + { + if ( this == &other ) + return *this; + + if ( is_our_dynamic && IsAllocated() ) + delete[] key; + + hash = other.hash; + size = other.size; + is_our_dynamic = true; + write_size = other.write_size; + read_size = other.read_size; + + key = CopyKey(other.key, other.size); + + return *this; + } + +HashKey& HashKey::operator=(HashKey&& other) noexcept + { + if ( this == &other ) + return *this; + + hash = other.hash; + size = other.size; + write_size = other.write_size; + read_size = other.read_size; + + if ( is_our_dynamic && IsAllocated() ) + delete[] key; + + is_our_dynamic = other.is_our_dynamic; + key = other.key; + + other.size = 0; + other.is_our_dynamic = false; + other.key = nullptr; + + return *this; + } + +TEST_SUITE_BEGIN("Hash"); + +TEST_CASE("equality") + { + HashKey h1(12345); + HashKey h2(12345); + HashKey h3(67890); + + CHECK(h1 == h2); + CHECK(h1 != h3); + } + +TEST_CASE("copy assignment") + { + HashKey h1(12345); + HashKey h2 = h1; + HashKey h3{h1}; + + CHECK(h1 == h2); + CHECK(h1 == h3); + } + +TEST_CASE("move assignment") + { + HashKey h1(12345); + HashKey h2(12345); + HashKey h3(12345); + + HashKey h4 = std::move(h2); + HashKey h5{h3}; + + CHECK(h1 == h4); + CHECK(h1 == h5); + } + +TEST_SUITE_END(); + } // namespace zeek::detail diff --git a/src/Hash.h b/src/Hash.h index ad3e50245d..4f70ce1219 100644 --- a/src/Hash.h +++ b/src/Hash.h @@ -255,11 +255,14 @@ public: // function from the one above; its value is not used. HashKey(const void* key, size_t size, hash_t hash, bool dont_copy); - ~HashKey() - { - if ( is_our_dynamic ) - delete[](char*) key; - } + // Copy constructor. Always copies the key. + HashKey(const HashKey& other); + + // Move constructor. Takes ownership of the key. + HashKey(HashKey&& other) noexcept; + + // Destructor + ~HashKey(); // Hands over the key to the caller. This means that if the // key is our dynamic, we give it to the caller and mark it @@ -343,6 +346,17 @@ public: void Describe(ODesc* d) const; + bool operator==(const HashKey& other) const; + bool operator!=(const HashKey& other) const; + + bool Equal(const void* other_key, size_t other_size, hash_t other_hash) const; + + // Copy operator. Always copies the key. + HashKey& operator=(const HashKey& other); + + // Move operator. Takes ownership of the key. + HashKey& operator=(HashKey&& other) noexcept; + protected: char* CopyKey(const char* key, size_t size) const; From 57ae03dd7d145beaaef4ed1616bdeb4508124a15 Mon Sep 17 00:00:00 2001 From: Tim Wojtulewicz Date: Wed, 7 Sep 2022 17:08:41 -0700 Subject: [PATCH 2/3] Add support for itertors with ordered dictionaries --- src/Dict.cc | 90 +++++++++++++++++++++ src/Dict.h | 225 ++++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 272 insertions(+), 43 deletions(-) diff --git a/src/Dict.cc b/src/Dict.cc index 8fc2bede4f..a1540787dd 100644 --- a/src/Dict.cc +++ b/src/Dict.cc @@ -246,6 +246,96 @@ TEST_CASE("dict robust iteration") delete key3; } +TEST_CASE("dict ordered iteration") + { + PDict dict(DictOrder::ORDERED); + + // These key values are specifically contrived to be inserted + // into the dictionary in a different order by default. + uint32_t val = 15; + uint32_t key_val = 5; + auto key = std::make_unique(key_val); + + uint32_t val2 = 10; + uint32_t key_val2 = 25; + auto key2 = std::make_unique(key_val2); + + uint32_t val3 = 30; + uint32_t key_val3 = 45; + auto key3 = std::make_unique(key_val3); + + uint32_t val4 = 20; + uint32_t key_val4 = 35; + auto key4 = std::make_unique(key_val4); + + // Only insert the first three to start with so we can test the order + // being the same after a later insertion. + dict.Insert(key.get(), &val); + dict.Insert(key2.get(), &val2); + dict.Insert(key3.get(), &val3); + + int count = 0; + + for ( const auto& entry : dict ) + { + auto* v = static_cast(entry.value); + uint32_t k = *(uint32_t*)entry.GetKey(); + + // The keys should be returned in the same order we inserted + // them, which is 5, 25, 45. + if ( count == 0 ) + CHECK(k == 5); + else if ( count == 1 ) + CHECK(k == 25); + else if ( count == 2 ) + CHECK(k == 45); + + count++; + } + + dict.Insert(key4.get(), &val4); + count = 0; + + for ( const auto& entry : dict ) + { + auto* v = static_cast(entry.value); + uint32_t k = *(uint32_t*)entry.GetKey(); + + // The keys should be returned in the same order we inserted + // them, which is 5, 25, 45, 35. + if ( count == 0 ) + CHECK(k == 5); + else if ( count == 1 ) + CHECK(k == 25); + else if ( count == 2 ) + CHECK(k == 45); + else if ( count == 3 ) + CHECK(k == 35); + + count++; + } + + dict.Remove(key2.get()); + count = 0; + + for ( const auto& entry : dict ) + { + auto* v = static_cast(entry.value); + uint32_t k = *(uint32_t*)entry.GetKey(); + + // The keys should be returned in the same order we inserted + // them, which is 5, 45, 35. + if ( count == 0 ) + CHECK(k == 5); + else if ( count == 1 ) + CHECK(k == 45); + else if ( count == 2 ) + CHECK(k == 35); + + count++; + } + } + class DictTestDummy { public: diff --git a/src/Dict.h b/src/Dict.h index b257766a67..9a0a40b8b0 100644 --- a/src/Dict.h +++ b/src/Dict.h @@ -163,6 +163,8 @@ public: bool operator!=(const DictEntry& r) const { return ! Equal(r.GetKey(), r.key_size, r.hash); } }; +using DictEntryVec = std::vector; + } // namespace detail template class DictIterator @@ -198,6 +200,8 @@ public: dict = that.dict; curr = that.curr; end = that.end; + ordered_iter = that.ordered_iter; + dict->IncrIters(); } @@ -215,6 +219,8 @@ public: dict = that.dict; curr = that.curr; end = that.end; + ordered_iter = that.ordered_iter; + dict->IncrIters(); return *this; @@ -234,6 +240,7 @@ public: dict = that.dict; curr = that.curr; end = that.end; + ordered_iter = that.ordered_iter; that.dict = nullptr; } @@ -252,25 +259,64 @@ public: dict = that.dict; curr = that.curr; end = that.end; + ordered_iter = that.ordered_iter; that.dict = nullptr; return *this; } - reference operator*() { return *curr; } - reference operator*() const { return *curr; } - pointer operator->() { return curr; } - pointer operator->() const { return curr; } + reference operator*() + { + if ( dict->IsOrdered() ) + { + // TODO: how does this work if ordered_iter == end(). LookupEntry will return a nullptr, + // which the dereference will fail on. That's undefined behavior, correct? Is that any + // different than if the unordered version returns a dereference of it's end? + auto e = dict->LookupEntry(*ordered_iter); + return *e; + } + + return *curr; + } + reference operator*() const + { + if ( dict->IsOrdered() ) + { + auto e = dict->LookupEntry(*ordered_iter); + return *e; + } + + return *curr; + } + pointer operator->() + { + if ( dict->IsOrdered() ) + return dict->LookupEntry(*ordered_iter); + + return curr; + } + pointer operator->() const + { + if ( dict->IsOrdered() ) + return dict->LookupEntry(*ordered_iter); + + return curr; + } DictIterator& operator++() { - // The non-robust case is easy. Just advanced the current position forward until you find - // one isn't empty and isn't the end. - do + if ( dict->IsOrdered() ) + ++ordered_iter; + else { - ++curr; - } while ( curr != end && curr->Empty() ); + // The non-robust case is easy. Just advance the current position forward until you + // find one isn't empty and isn't the end. + do + { + ++curr; + } while ( curr != end && curr->Empty() ); + } return *this; } @@ -282,7 +328,17 @@ public: return temp; } - bool operator==(const DictIterator& that) const { return curr == that.curr; } + bool operator==(const DictIterator& that) const + { + if ( dict != that.dict ) + return false; + + if ( dict->IsOrdered() ) + return ordered_iter == that.ordered_iter; + + return curr == that.curr; + } + bool operator!=(const DictIterator& that) const { return ! (*this == that); } private: @@ -291,10 +347,21 @@ private: DictIterator(const Dictionary* d, detail::DictEntry* begin, detail::DictEntry* end) : curr(begin), end(end) { + // Cast away the constness so that the number of iterators can be modified in the + // dictionary. This does violate the constness guarantees of const-begin()/end() and + // cbegin()/cend(), but we're not modifying the actual data in the collection, just a + // counter in the wrapper of the collection. + dict = const_cast*>(d); + // Make sure that we're starting on a non-empty element. while ( curr != end && curr->Empty() ) ++curr; + dict->IncrIters(); + } + + DictIterator(const Dictionary* d, detail::DictEntryVec::iterator iter) : ordered_iter(iter) + { // Cast away the constness so that the number of iterators can be modified in the // dictionary. This does violate the constness guarantees of const-begin()/end() and // cbegin()/cend(), but we're not modifying the actual data in the collection, just a @@ -306,6 +373,7 @@ private: Dictionary* dict = nullptr; detail::DictEntry* curr = nullptr; detail::DictEntry* end = nullptr; + detail::DictEntryVec::iterator ordered_iter; }; template class RobustDictIterator @@ -462,7 +530,7 @@ public: } if ( ordering == ORDERED ) - order = new std::vector>; + order = std::make_unique>(); } ~Dictionary() { Clear(); } @@ -479,12 +547,10 @@ public: T* Lookup(const void* key, int key_size, detail::hash_t h) const { - // Look up possibly modifies the entry. Why? if the entry is found but not positioned - // according to the current dict (so it's before SizeUp), it will be moved to the right - // position so next lookup is fast. - Dictionary* d = const_cast(this); - int position = d->LookupIndex(key, key_size, h); - return position >= 0 ? table[position].value : nullptr; + if ( auto e = LookupEntry(key, key_size, h) ) + return e->value; + + return nullptr; } T* Lookup(const char* key) const @@ -530,13 +596,6 @@ public: if ( ! copy_key ) delete[](char*) key; - if ( order ) - { // set new v to order too. - auto it = std::find(order->begin(), order->end(), table[position]); - ASSERT(it != order->end()); - it->value = val; - } - if ( iterators && ! iterators->empty() ) // need to set new v for iterators too. for ( auto c : *iterators ) @@ -564,12 +623,15 @@ public: "Dictionary::Insert() possibly caused iterator invalidation"); } + // Do this before the actual insertion since creating the DictEntry is going to delete + // the key data. We need a copy of it first. + if ( order ) + order->emplace_back(detail::HashKey{key, static_cast(key_size), hash}); + // Allocate memory for key if necesary. Key is updated to reflect internal key if // necessary. detail::DictEntry entry(key, key_size, hash, val, insert_distance, copy_key); InsertRelocateAndAdjust(entry, insert_position); - if ( order ) - order->push_back(entry); num_entries++; cum_entries++; @@ -629,7 +691,16 @@ public: ASSERT(num_entries >= 0); // e is about to be invalid. remove it from all references. if ( order ) - order->erase(std::remove(order->begin(), order->end(), entry), order->end()); + { + for ( auto it = order->begin(); it != order->end(); ++it ) + { + if ( it->Equal(key, key_size, hash) ) + { + it = order->erase(it); + break; + } + } + } T* v = entry.value; entry.Clear(); @@ -677,10 +748,13 @@ public: { if ( ! order || n < 0 || n >= Length() ) return nullptr; - detail::DictEntry entry = (*order)[n]; - key = entry.GetKey(); - key_size = entry.key_size; - return entry.value; + + auto& hk = order->at(n); + auto entry = Lookup(&hk); + + key = hk.Key(); + key_size = hk.Size(); + return entry; } T* NthEntry(int n, const char*& key) const @@ -709,10 +783,8 @@ public: } if ( order ) - { - delete order; - order = nullptr; - } + order.reset(); + if ( iterators ) { delete iterators; @@ -913,12 +985,48 @@ public: using reverse_iterator = std::reverse_iterator; using const_reverse_iterator = std::reverse_iterator; - iterator begin() { return {this, table, table + Capacity()}; } - iterator end() { return {this, table + Capacity(), table + Capacity()}; } - const_iterator begin() const { return {this, table, table + Capacity()}; } - const_iterator end() const { return {this, table + Capacity(), table + Capacity()}; } - const_iterator cbegin() { return {this, table, table + Capacity()}; } - const_iterator cend() { return {this, table + Capacity(), table + Capacity()}; } + iterator begin() + { + if ( IsOrdered() ) + return {this, order->begin()}; + + return {this, table, table + Capacity()}; + } + iterator end() + { + if ( IsOrdered() ) + return {this, order->end()}; + + return {this, table + Capacity(), table + Capacity()}; + } + const_iterator begin() const + { + if ( IsOrdered() ) + return {this, order->begin()}; + + return {this, table, table + Capacity()}; + } + const_iterator end() const + { + if ( IsOrdered() ) + return {this, order->end()}; + + return {this, table + Capacity(), table + Capacity()}; + } + const_iterator cbegin() + { + if ( IsOrdered() ) + return {this, order->begin()}; + + return {this, table, table + Capacity()}; + } + const_iterator cend() + { + if ( IsOrdered() ) + return {this, order->end()}; + + return {this, table + Capacity(), table + Capacity()}; + } RobustDictIterator begin_robust() { return MakeRobustIterator(); } RobustDictIterator end_robust() { return RobustDictIterator(); } @@ -1400,6 +1508,35 @@ private: // previous log2_buckets. } + /** + * Retrieves a pointer to a full DictEntry in the table based on a hash key. + * + * @param key the key to lookup. + * @return A pointer to the entry or a nullptr if no entry has a matching key. + */ + detail::DictEntry* LookupEntry(const detail::HashKey& key) + { + return LookupEntry(key.Key(), key.Size(), key.Hash()); + } + + /** + * Retrieves a pointer to a full DictEntry in the table based on key data. + * + * @param key the key to lookup + * @param key_size the size of the key data + * @param h a hash of the key data. + * @return A pointer to the entry or a nullptr if no entry has a matching key. + */ + detail::DictEntry* LookupEntry(const void* key, int key_size, detail::hash_t h) const + { + // Look up possibly modifies the entry. Why? if the entry is found but not positioned + // according to the current dict (so it's before SizeUp), it will be moved to the right + // position so next lookup is fast. + Dictionary* d = const_cast(this); + int position = d->LookupIndex(key, key_size, h); + return position >= 0 ? &(table[position]) : nullptr; + } + bool HaveOnlyRobustIterators() const { return (num_iterators == 0) || ((iterators ? iterators->size() : 0) == num_iterators); @@ -1509,8 +1646,10 @@ private: detail::DictEntry* table = nullptr; std::vector*>* iterators = nullptr; - // Order means the order of insertion. means no deletion until exit. will be inefficient. - std::vector>* order = nullptr; + // Ordered dictionaries keep the order based on some criteria, by default the order of + // insertion. We only store a copy of the keys here for memory savings and for safety + // around reallocs and such. + std::unique_ptr order; }; template using PDict = Dictionary; From 20292b02102c3e48981b93d60df4cb5b73b50a87 Mon Sep 17 00:00:00 2001 From: Tim Wojtulewicz Date: Thu, 8 Sep 2022 15:37:34 -0700 Subject: [PATCH 3/3] Disable robust iteration for ordered dictionaries This also includes some minor commenting cleanup in that class --- src/Dict.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/Dict.h b/src/Dict.h index 9a0a40b8b0..163dbb1158 100644 --- a/src/Dict.h +++ b/src/Dict.h @@ -1544,6 +1544,10 @@ private: RobustDictIterator MakeRobustIterator() { + if ( IsOrdered() ) + reporter->InternalError( + "RobustIterators are not currently supported for ordered dictionaries"); + if ( ! iterators ) iterators = new std::vector*>; @@ -1552,15 +1556,17 @@ private: detail::DictEntry GetNextRobustIteration(RobustDictIterator* iter) { - // If there are any inserted entries, return them first. - // That keeps the list small and helps avoiding searching - // a large list when deleting an entry. + // If there's no table in the dictionary, then the iterator needs to be + // cleaned up because it's not pointing at anything. if ( ! table ) { iter->Complete(); return detail::DictEntry(nullptr); // end of iteration } + // If there are any inserted entries, return them first. + // That keeps the list small and helps avoiding searching + // a large list when deleting an entry. if ( iter->inserted && ! iter->inserted->empty() ) { // Return the last one. Order doesn't matter, @@ -1570,6 +1576,7 @@ private: return e; } + // First iteration. if ( iter->next < 0 ) iter->next = Next(-1);