diff --git a/src/Dict.cc b/src/Dict.cc index 8fc2bede4f..a1540787dd 100644 --- a/src/Dict.cc +++ b/src/Dict.cc @@ -246,6 +246,96 @@ TEST_CASE("dict robust iteration") delete key3; } +TEST_CASE("dict ordered iteration") + { + PDict dict(DictOrder::ORDERED); + + // These key values are specifically contrived to be inserted + // into the dictionary in a different order by default. + uint32_t val = 15; + uint32_t key_val = 5; + auto key = std::make_unique(key_val); + + uint32_t val2 = 10; + uint32_t key_val2 = 25; + auto key2 = std::make_unique(key_val2); + + uint32_t val3 = 30; + uint32_t key_val3 = 45; + auto key3 = std::make_unique(key_val3); + + uint32_t val4 = 20; + uint32_t key_val4 = 35; + auto key4 = std::make_unique(key_val4); + + // Only insert the first three to start with so we can test the order + // being the same after a later insertion. + dict.Insert(key.get(), &val); + dict.Insert(key2.get(), &val2); + dict.Insert(key3.get(), &val3); + + int count = 0; + + for ( const auto& entry : dict ) + { + auto* v = static_cast(entry.value); + uint32_t k = *(uint32_t*)entry.GetKey(); + + // The keys should be returned in the same order we inserted + // them, which is 5, 25, 45. + if ( count == 0 ) + CHECK(k == 5); + else if ( count == 1 ) + CHECK(k == 25); + else if ( count == 2 ) + CHECK(k == 45); + + count++; + } + + dict.Insert(key4.get(), &val4); + count = 0; + + for ( const auto& entry : dict ) + { + auto* v = static_cast(entry.value); + uint32_t k = *(uint32_t*)entry.GetKey(); + + // The keys should be returned in the same order we inserted + // them, which is 5, 25, 45, 35. + if ( count == 0 ) + CHECK(k == 5); + else if ( count == 1 ) + CHECK(k == 25); + else if ( count == 2 ) + CHECK(k == 45); + else if ( count == 3 ) + CHECK(k == 35); + + count++; + } + + dict.Remove(key2.get()); + count = 0; + + for ( const auto& entry : dict ) + { + auto* v = static_cast(entry.value); + uint32_t k = *(uint32_t*)entry.GetKey(); + + // The keys should be returned in the same order we inserted + // them, which is 5, 45, 35. + if ( count == 0 ) + CHECK(k == 5); + else if ( count == 1 ) + CHECK(k == 45); + else if ( count == 2 ) + CHECK(k == 35); + + count++; + } + } + class DictTestDummy { public: diff --git a/src/Dict.h b/src/Dict.h index b257766a67..9a0a40b8b0 100644 --- a/src/Dict.h +++ b/src/Dict.h @@ -163,6 +163,8 @@ public: bool operator!=(const DictEntry& r) const { return ! Equal(r.GetKey(), r.key_size, r.hash); } }; +using DictEntryVec = std::vector; + } // namespace detail template class DictIterator @@ -198,6 +200,8 @@ public: dict = that.dict; curr = that.curr; end = that.end; + ordered_iter = that.ordered_iter; + dict->IncrIters(); } @@ -215,6 +219,8 @@ public: dict = that.dict; curr = that.curr; end = that.end; + ordered_iter = that.ordered_iter; + dict->IncrIters(); return *this; @@ -234,6 +240,7 @@ public: dict = that.dict; curr = that.curr; end = that.end; + ordered_iter = that.ordered_iter; that.dict = nullptr; } @@ -252,25 +259,64 @@ public: dict = that.dict; curr = that.curr; end = that.end; + ordered_iter = that.ordered_iter; that.dict = nullptr; return *this; } - reference operator*() { return *curr; } - reference operator*() const { return *curr; } - pointer operator->() { return curr; } - pointer operator->() const { return curr; } + reference operator*() + { + if ( dict->IsOrdered() ) + { + // TODO: how does this work if ordered_iter == end(). LookupEntry will return a nullptr, + // which the dereference will fail on. That's undefined behavior, correct? Is that any + // different than if the unordered version returns a dereference of it's end? + auto e = dict->LookupEntry(*ordered_iter); + return *e; + } + + return *curr; + } + reference operator*() const + { + if ( dict->IsOrdered() ) + { + auto e = dict->LookupEntry(*ordered_iter); + return *e; + } + + return *curr; + } + pointer operator->() + { + if ( dict->IsOrdered() ) + return dict->LookupEntry(*ordered_iter); + + return curr; + } + pointer operator->() const + { + if ( dict->IsOrdered() ) + return dict->LookupEntry(*ordered_iter); + + return curr; + } DictIterator& operator++() { - // The non-robust case is easy. Just advanced the current position forward until you find - // one isn't empty and isn't the end. - do + if ( dict->IsOrdered() ) + ++ordered_iter; + else { - ++curr; - } while ( curr != end && curr->Empty() ); + // The non-robust case is easy. Just advance the current position forward until you + // find one isn't empty and isn't the end. + do + { + ++curr; + } while ( curr != end && curr->Empty() ); + } return *this; } @@ -282,7 +328,17 @@ public: return temp; } - bool operator==(const DictIterator& that) const { return curr == that.curr; } + bool operator==(const DictIterator& that) const + { + if ( dict != that.dict ) + return false; + + if ( dict->IsOrdered() ) + return ordered_iter == that.ordered_iter; + + return curr == that.curr; + } + bool operator!=(const DictIterator& that) const { return ! (*this == that); } private: @@ -291,10 +347,21 @@ private: DictIterator(const Dictionary* d, detail::DictEntry* begin, detail::DictEntry* end) : curr(begin), end(end) { + // Cast away the constness so that the number of iterators can be modified in the + // dictionary. This does violate the constness guarantees of const-begin()/end() and + // cbegin()/cend(), but we're not modifying the actual data in the collection, just a + // counter in the wrapper of the collection. + dict = const_cast*>(d); + // Make sure that we're starting on a non-empty element. while ( curr != end && curr->Empty() ) ++curr; + dict->IncrIters(); + } + + DictIterator(const Dictionary* d, detail::DictEntryVec::iterator iter) : ordered_iter(iter) + { // Cast away the constness so that the number of iterators can be modified in the // dictionary. This does violate the constness guarantees of const-begin()/end() and // cbegin()/cend(), but we're not modifying the actual data in the collection, just a @@ -306,6 +373,7 @@ private: Dictionary* dict = nullptr; detail::DictEntry* curr = nullptr; detail::DictEntry* end = nullptr; + detail::DictEntryVec::iterator ordered_iter; }; template class RobustDictIterator @@ -462,7 +530,7 @@ public: } if ( ordering == ORDERED ) - order = new std::vector>; + order = std::make_unique>(); } ~Dictionary() { Clear(); } @@ -479,12 +547,10 @@ public: T* Lookup(const void* key, int key_size, detail::hash_t h) const { - // Look up possibly modifies the entry. Why? if the entry is found but not positioned - // according to the current dict (so it's before SizeUp), it will be moved to the right - // position so next lookup is fast. - Dictionary* d = const_cast(this); - int position = d->LookupIndex(key, key_size, h); - return position >= 0 ? table[position].value : nullptr; + if ( auto e = LookupEntry(key, key_size, h) ) + return e->value; + + return nullptr; } T* Lookup(const char* key) const @@ -530,13 +596,6 @@ public: if ( ! copy_key ) delete[](char*) key; - if ( order ) - { // set new v to order too. - auto it = std::find(order->begin(), order->end(), table[position]); - ASSERT(it != order->end()); - it->value = val; - } - if ( iterators && ! iterators->empty() ) // need to set new v for iterators too. for ( auto c : *iterators ) @@ -564,12 +623,15 @@ public: "Dictionary::Insert() possibly caused iterator invalidation"); } + // Do this before the actual insertion since creating the DictEntry is going to delete + // the key data. We need a copy of it first. + if ( order ) + order->emplace_back(detail::HashKey{key, static_cast(key_size), hash}); + // Allocate memory for key if necesary. Key is updated to reflect internal key if // necessary. detail::DictEntry entry(key, key_size, hash, val, insert_distance, copy_key); InsertRelocateAndAdjust(entry, insert_position); - if ( order ) - order->push_back(entry); num_entries++; cum_entries++; @@ -629,7 +691,16 @@ public: ASSERT(num_entries >= 0); // e is about to be invalid. remove it from all references. if ( order ) - order->erase(std::remove(order->begin(), order->end(), entry), order->end()); + { + for ( auto it = order->begin(); it != order->end(); ++it ) + { + if ( it->Equal(key, key_size, hash) ) + { + it = order->erase(it); + break; + } + } + } T* v = entry.value; entry.Clear(); @@ -677,10 +748,13 @@ public: { if ( ! order || n < 0 || n >= Length() ) return nullptr; - detail::DictEntry entry = (*order)[n]; - key = entry.GetKey(); - key_size = entry.key_size; - return entry.value; + + auto& hk = order->at(n); + auto entry = Lookup(&hk); + + key = hk.Key(); + key_size = hk.Size(); + return entry; } T* NthEntry(int n, const char*& key) const @@ -709,10 +783,8 @@ public: } if ( order ) - { - delete order; - order = nullptr; - } + order.reset(); + if ( iterators ) { delete iterators; @@ -913,12 +985,48 @@ public: using reverse_iterator = std::reverse_iterator; using const_reverse_iterator = std::reverse_iterator; - iterator begin() { return {this, table, table + Capacity()}; } - iterator end() { return {this, table + Capacity(), table + Capacity()}; } - const_iterator begin() const { return {this, table, table + Capacity()}; } - const_iterator end() const { return {this, table + Capacity(), table + Capacity()}; } - const_iterator cbegin() { return {this, table, table + Capacity()}; } - const_iterator cend() { return {this, table + Capacity(), table + Capacity()}; } + iterator begin() + { + if ( IsOrdered() ) + return {this, order->begin()}; + + return {this, table, table + Capacity()}; + } + iterator end() + { + if ( IsOrdered() ) + return {this, order->end()}; + + return {this, table + Capacity(), table + Capacity()}; + } + const_iterator begin() const + { + if ( IsOrdered() ) + return {this, order->begin()}; + + return {this, table, table + Capacity()}; + } + const_iterator end() const + { + if ( IsOrdered() ) + return {this, order->end()}; + + return {this, table + Capacity(), table + Capacity()}; + } + const_iterator cbegin() + { + if ( IsOrdered() ) + return {this, order->begin()}; + + return {this, table, table + Capacity()}; + } + const_iterator cend() + { + if ( IsOrdered() ) + return {this, order->end()}; + + return {this, table + Capacity(), table + Capacity()}; + } RobustDictIterator begin_robust() { return MakeRobustIterator(); } RobustDictIterator end_robust() { return RobustDictIterator(); } @@ -1400,6 +1508,35 @@ private: // previous log2_buckets. } + /** + * Retrieves a pointer to a full DictEntry in the table based on a hash key. + * + * @param key the key to lookup. + * @return A pointer to the entry or a nullptr if no entry has a matching key. + */ + detail::DictEntry* LookupEntry(const detail::HashKey& key) + { + return LookupEntry(key.Key(), key.Size(), key.Hash()); + } + + /** + * Retrieves a pointer to a full DictEntry in the table based on key data. + * + * @param key the key to lookup + * @param key_size the size of the key data + * @param h a hash of the key data. + * @return A pointer to the entry or a nullptr if no entry has a matching key. + */ + detail::DictEntry* LookupEntry(const void* key, int key_size, detail::hash_t h) const + { + // Look up possibly modifies the entry. Why? if the entry is found but not positioned + // according to the current dict (so it's before SizeUp), it will be moved to the right + // position so next lookup is fast. + Dictionary* d = const_cast(this); + int position = d->LookupIndex(key, key_size, h); + return position >= 0 ? &(table[position]) : nullptr; + } + bool HaveOnlyRobustIterators() const { return (num_iterators == 0) || ((iterators ? iterators->size() : 0) == num_iterators); @@ -1509,8 +1646,10 @@ private: detail::DictEntry* table = nullptr; std::vector*>* iterators = nullptr; - // Order means the order of insertion. means no deletion until exit. will be inefficient. - std::vector>* order = nullptr; + // Ordered dictionaries keep the order based on some criteria, by default the order of + // insertion. We only store a copy of the keys here for memory savings and for safety + // around reallocs and such. + std::unique_ptr order; }; template using PDict = Dictionary;