// See the file "COPYING" in the main distribution directory for copyright. #include "zeek-config.h" #include "zeek/Dict.h" #ifdef HAVE_MEMORY_H #include #endif #include #include #include #include #include "zeek/3rdparty/doctest.h" #include "zeek/Reporter.h" #include "zeek/util.h" #if defined(DEBUG) && defined(ZEEK_DICT_DEBUG) #define ASSERT_VALID(o) o->AssertValid() #else #define ASSERT_VALID(o) #endif//DEBUG namespace zeek { class [[deprecated("Remove in v5.1. Use the standard-library-compatible version of iteration.")]] IterCookie { public: IterCookie(Dictionary* d) : d(d) {} bool robust = false; Dictionary* d = nullptr; // Index for the next valid entry. -1 is the default, meaning we haven't started // iterating yet. int next = -1; //index for next valid entry. -1 is default not started yet. // Tracks the new entries inserted while iterating. Only used for robust cookies. std::vector* inserted = nullptr; // Tracks the entries already visited but were moved across the next iteration // point due to an insertion. Only used for robust cookies. std::vector* visited = nullptr; void MakeRobust() { // IterCookies can't be made robust after iteration has started. ASSERT(next < 0); ASSERT(d && d->cookies); robust = true; inserted = new std::vector(); visited = new std::vector(); d->cookies->push_back(this); } void AssertValid() const { ASSERT(d && -1 <= next && next <= d->Capacity()); ASSERT(( ! robust && ! inserted && ! visited ) || ( robust && inserted && visited )); } ~IterCookie() { ASSERT_VALID(this); if( robust ) { d->cookies->erase(std::remove(d->cookies->begin(), d->cookies->end(), this), d->cookies->end()); delete inserted; delete visited; } } }; // namespace detail TEST_SUITE_BEGIN("Dict"); TEST_CASE("dict construction") { PDict dict; CHECK(! dict.IsOrdered()); CHECK(dict.Length() == 0); PDict dict2(ORDERED); CHECK(dict2.IsOrdered()); CHECK(dict2.Length() == 0); } TEST_CASE("dict operation") { PDict dict; uint32_t val = 10; uint32_t key_val = 5; detail::HashKey* key = new detail::HashKey(key_val); dict.Insert(key, &val); CHECK(dict.Length() == 1); detail::HashKey* key2 = new detail::HashKey(key_val); uint32_t* lookup = dict.Lookup(key2); CHECK(*lookup == val); dict.Remove(key2); CHECK(dict.Length() == 0); uint32_t* lookup2 = dict.Lookup(key2); CHECK(lookup2 == (uint32_t*)0); delete key2; CHECK(dict.MaxLength() == 1); CHECK(dict.NumCumulativeInserts() == 1); dict.Insert(key, &val); dict.Remove(key); CHECK(dict.MaxLength() == 1); CHECK(dict.NumCumulativeInserts() == 2); uint32_t val2 = 15; uint32_t key_val2 = 25; key2 = new detail::HashKey(key_val2); dict.Insert(key, &val); dict.Insert(key2, &val2); CHECK(dict.Length() == 2); CHECK(dict.NumCumulativeInserts() == 4); dict.Clear(); CHECK(dict.Length() == 0); delete key; delete key2; } TEST_CASE("dict nthentry") { PDict unordered(UNORDERED); PDict ordered(ORDERED); uint32_t val = 15; uint32_t key_val = 5; detail::HashKey* okey = new detail::HashKey(key_val); detail::HashKey* ukey = new detail::HashKey(key_val); uint32_t val2 = 10; uint32_t key_val2 = 25; detail::HashKey* okey2 = new detail::HashKey(key_val2); detail::HashKey* ukey2 = new detail::HashKey(key_val2); unordered.Insert(ukey, &val); unordered.Insert(ukey2, &val2); ordered.Insert(okey, &val); ordered.Insert(okey2, &val2); // NthEntry returns null for unordered dicts uint32_t* lookup = unordered.NthEntry(0); CHECK(lookup == (uint32_t*)0); // Ordered dicts are based on order of insertion, nothing about the // data itself lookup = ordered.NthEntry(0); CHECK(*lookup == 15); delete okey; delete okey2; delete ukey; delete ukey2; } TEST_CASE("dict iteration") { PDict dict; uint32_t val = 15; uint32_t key_val = 5; detail::HashKey* key = new detail::HashKey(key_val); uint32_t val2 = 10; uint32_t key_val2 = 25; detail::HashKey* key2 = new detail::HashKey(key_val2); dict.Insert(key, &val); dict.Insert(key2, &val2); #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" detail::HashKey* it_key; IterCookie* it = dict.InitForIteration(); CHECK(it != nullptr); int count = 0; while ( uint32_t* entry = dict.NextEntry(it_key, it) ) { switch ( count ) { case 0: // The DictEntry constructor typecasts this down to a uint32_t, so // we can't just check the value directly. // Explanation: hash_t is 64bit, open-dict only uses 32bit hash to // save space for each item (24 bytes aligned). OpenDict has table // size of 2^N and only take the lower bits of the hash. (The // original hash takes transformation in FibHash() to map into a // smaller 2^N range). CHECK(it_key->Hash() == (uint32_t)key2->Hash()); CHECK(*entry == 10); break; case 1: CHECK(it_key->Hash() == (uint32_t)key->Hash()); CHECK(*entry == 15); break; default: break; } count++; delete it_key; } CHECK(count == 2); #pragma GCC diagnostic pop delete key; delete key2; } TEST_CASE("dict new iteration") { PDict dict; uint32_t val = 15; uint32_t key_val = 5; detail::HashKey* key = new detail::HashKey(key_val); uint32_t val2 = 10; uint32_t key_val2 = 25; detail::HashKey* key2 = new detail::HashKey(key_val2); dict.Insert(key, &val); dict.Insert(key2, &val2); int count = 0; for ( const auto& entry : dict ) { auto* v = static_cast(entry.value); uint64_t k = *(uint32_t*) entry.GetKey(); switch ( count ) { case 0: CHECK(k == key_val2); CHECK(*v == val2); break; case 1: CHECK(k == key_val); CHECK(*v == val); break; default: break; } count++; } CHECK(count == 2); delete key; delete key2; } TEST_CASE("dict robust iteration") { PDict dict; uint32_t val = 15; uint32_t key_val = 5; detail::HashKey* key = new detail::HashKey(key_val); uint32_t val2 = 10; uint32_t key_val2 = 25; detail::HashKey* key2 = new detail::HashKey(key_val2); uint32_t val3 = 20; uint32_t key_val3 = 35; detail::HashKey* key3 = new detail::HashKey(key_val3); dict.Insert(key, &val); dict.Insert(key2, &val2); #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" detail::HashKey* it_key; IterCookie* it = dict.InitForIteration(); CHECK(it != nullptr); dict.MakeRobustCookie(it); int count = 0; while ( uint32_t* entry = dict.NextEntry(it_key, it) ) { switch ( count ) { case 0: CHECK(it_key->Hash() == (uint32_t)key2->Hash()); CHECK(*entry == 10); dict.Insert(key3, &val3); break; case 1: CHECK(it_key->Hash() == (uint32_t)key->Hash()); CHECK(*entry == 15); break; case 2: CHECK(it_key->Hash() == (uint32_t)key3->Hash()); CHECK(*entry == 20); break; default: // We shouldn't get here. CHECK(false); break; } count++; delete it_key; } CHECK(count == 3); IterCookie* it2 = dict.InitForIteration(); CHECK(it2 != nullptr); dict.MakeRobustCookie(it2); count = 0; while ( uint32_t* entry = dict.NextEntry(it_key, it2) ) { switch ( count ) { case 0: CHECK(it_key->Hash() == (uint32_t)key2->Hash()); CHECK(*entry == 10); dict.Remove(key3); break; case 1: CHECK(it_key->Hash() == (uint32_t)key->Hash()); CHECK(*entry == 15); break; default: // We shouldn't get here. CHECK(false); break; } count++; delete it_key; } CHECK(count == 2); #pragma GCC diagnostic pop delete key; delete key2; delete key3; } TEST_CASE("dict new robust iteration") { PDict dict; uint32_t val = 15; uint32_t key_val = 5; detail::HashKey* key = new detail::HashKey(key_val); uint32_t val2 = 10; uint32_t key_val2 = 25; detail::HashKey* key2 = new detail::HashKey(key_val2); uint32_t val3 = 20; uint32_t key_val3 = 35; detail::HashKey* key3 = new detail::HashKey(key_val3); dict.Insert(key, &val); dict.Insert(key2, &val2); { int count = 0; auto it = dict.begin_robust(); for ( ; it != dict.end_robust(); ++it ) { auto* v = it->GetValue(); uint64_t k = *(uint32_t*) it->GetKey(); switch ( count ) { case 0: CHECK(k == key_val2); CHECK(*v == val2); dict.Insert(key3, &val3); break; case 1: CHECK(k == key_val); CHECK(*v == val); break; case 2: CHECK(k == key_val3); CHECK(*v == val3); break; default: // We shouldn't get here. CHECK(false); break; } count++; } CHECK(count == 3); } { int count = 0; auto it = dict.begin_robust(); for ( ; it != dict.end_robust(); ++it ) { auto* v = it->GetValue(); uint64_t k = *(uint32_t*) it->GetKey(); switch ( count ) { case 0: CHECK(k == key_val2); CHECK(*v == val2); dict.Insert(key3, &val3); dict.Remove(key3); break; case 1: CHECK(k == key_val); CHECK(*v == val); break; default: // We shouldn't get here. CHECK(false); break; } count++; } CHECK(count == 2); } delete key; delete key2; delete key3; } TEST_CASE("dict iterator invalidation") { PDict dict; uint32_t val = 15; uint32_t key_val = 5; auto key = new detail::HashKey(key_val); uint32_t val2 = 10; uint32_t key_val2 = 25; auto key2 = new detail::HashKey(key_val2); uint32_t val3 = 42; uint32_t key_val3 = 37; auto key3 = new detail::HashKey(key_val3); dict.Insert(key, &val); dict.Insert(key2, &val2); detail::HashKey* it_key; bool iterators_invalidated = false; #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" IterCookie* it = dict.InitForIteration(); CHECK(it != nullptr); while ( uint32_t* entry = dict.NextEntry(it_key, it) ) { iterators_invalidated = false; dict.Remove(key3, &iterators_invalidated); // Key doesn't exist, nothing to remove, iteration not invalidated. CHECK(!iterators_invalidated); iterators_invalidated = false; dict.Insert(key, &val2, &iterators_invalidated); // Key exists, value gets overwritten, iteration not invalidated. CHECK(!iterators_invalidated); iterators_invalidated = false; dict.Remove(key2, &iterators_invalidated); // Key exists, gets removed, iteration is invalidated. CHECK(iterators_invalidated); delete it_key; dict.StopIteration(it); break; } it = dict.InitForIteration(); CHECK(it != nullptr); while ( uint32_t* entry = dict.NextEntry(it_key, it) ) { iterators_invalidated = false; dict.Insert(key3, &val3, &iterators_invalidated); // Key doesn't exist, gets inserted, iteration is invalidated. CHECK(iterators_invalidated); delete it_key; dict.StopIteration(it); break; } #pragma GCC diagnostic pop CHECK(dict.Length() == 2); CHECK(*static_cast(dict.Lookup(key)) == val2); CHECK(*static_cast(dict.Lookup(key3)) == val3); CHECK(static_cast(dict.Lookup(key2)) == nullptr); delete key; delete key2; delete key3; } TEST_SUITE_END(); ///////////////////////////////////////////////////////////////////////////////////////////////// //bucket math int Dictionary::Log2(int num) const { int i = 0; while ( num >>= 1 ) i++; return i; } int Dictionary::Buckets(bool expected) const { int buckets = ( 1 << log2_buckets ); if ( expected ) return buckets; return table ? buckets : 0; } int Dictionary::Capacity(bool expected) const { int capacity = ( 1 << log2_buckets ) + ( log2_buckets+0 ); if ( expected ) return capacity; return table ? capacity : 0; } int Dictionary::ThresholdEntries() const { // Increase the size of the dictionary when it is 75% full. However, when the dictionary // is small ( <= 20 elements ), only resize it when it's 100% full. The dictionary will // always resize when the current insertion causes it to be full. This ensures that the // current insertion should always be successful. int capacity = Capacity(); if ( log2_buckets <= detail::DICT_THRESHOLD_BITS ) return capacity; //20 or less elements, 1.0, only size up when necessary. return capacity - ( capacity >> detail::DICT_LOAD_FACTOR_BITS ); } detail::hash_t Dictionary::FibHash(detail::hash_t h) const { //GoldenRatio phi = (sqrt(5)+1)/2 = 1.6180339887... //1/phi = phi - 1 h &= detail::HASH_MASK; h *= 11400714819323198485llu; //2^64/phi return h; } // return position in dict with 2^bit size. int Dictionary::BucketByHash(detail::hash_t h, int log2_table_size) const //map h to n-bit { ASSERT(log2_table_size>=0); if ( ! log2_table_size ) return 0; //<< >> breaks on 64. #ifdef DICT_NO_FIB_HASH detail::hash_t hash = h; #else detail::hash_t hash = FibHash(h); #endif int m = 64 - log2_table_size; hash <<= m; hash >>= m; return hash; } //given entry at index i, return it's perfect bucket position. int Dictionary::BucketByPosition(int position) const { ASSERT(table && position>=0 && position < Capacity() && ! table[position].Empty()); return position - table[position].distance; } //////////////////////////////////////////////////////////////////////////////////////////////// //Cluster Math //////////////////////////////////////////////////////////////////////////////////////////////// int Dictionary::EndOfClusterByBucket(int bucket) const { ASSERT(bucket>=0 && bucket < Buckets()); int i = bucket; while ( i < Capacity() && ! table[i].Empty() && BucketByPosition(i) <= bucket ) i++; return i; } int Dictionary::HeadOfClusterByPosition( int position) const { // Finding the first entry in the bucket chain. ASSERT(0 <= position && position < Capacity() && ! table[position].Empty()); // Look backward for the first item with the same bucket as myself. int bucket = BucketByPosition(position); int i = position; while ( i >= bucket && BucketByPosition(i) == bucket ) i--; return i == bucket ? i : i + 1; } int Dictionary::TailOfClusterByPosition(int position) const { ASSERT(0 <= position && position < Capacity() && ! table[position].Empty()); int bucket = BucketByPosition(position); int i = position; while ( i < Capacity() && ! table[i].Empty() && BucketByPosition(i) == bucket ) i++; //stop just over the tail. return i - 1; } int Dictionary::EndOfClusterByPosition(int position) const { return TailOfClusterByPosition(position)+1; } int Dictionary::OffsetInClusterByPosition(int position) const { ASSERT(0 <= position && position < Capacity() && ! table[position].Empty()); int head = HeadOfClusterByPosition(position); return position - head; } // Find the next valid entry after the position. Position can be -1, which means // look for the next valid entry point altogether. int Dictionary::Next(int position) const { ASSERT(table && -1 <= position && position < Capacity()); do { position++; } while ( position < Capacity() && table[position].Empty() ); return position; } /////////////////////////////////////////////////////////////////////////////////////////////////////// //Debugging /////////////////////////////////////////////////////////////////////////////////////////////////////// #define DUMPIF(f) if(f) Dump(1) #ifdef DEBUG void Dictionary::AssertValid() const { bool valid = true; int n = num_entries; if ( table ) for ( int i = Capacity()-1; i >= 0; i-- ) if ( ! table[i].Empty() ) n--; valid = (n == 0); ASSERT(valid); DUMPIF(! valid); //entries must clustered together for ( int i = 1; i < Capacity(); i++ ) { if ( ! table || table[i].Empty() ) continue; if ( table[i-1].Empty() ) { valid = (table[i].distance == 0); ASSERT(valid); DUMPIF(! valid); } else { valid = (table[i].bucket >= table[i-1].bucket); ASSERT(valid); DUMPIF(! valid); if ( table[i].bucket == table[i-1].bucket ) { valid = (table[i].distance == table[i-1].distance+1); ASSERT(valid); DUMPIF(! valid); } else { valid = (table[i].distance <= table[i-1].distance); ASSERT(valid); DUMPIF(! valid); } } } } #endif//DEBUG size_t Dictionary::MemoryAllocation() const { size_t size = padded_sizeof(*this); if ( table ) { size += zeek::util::pad_size(Capacity() * sizeof(detail::DictEntry)); for ( int i = Capacity()-1; i>=0; i-- ) if ( ! table[i].Empty() && table[i].key_size > 8 ) size += zeek::util::pad_size(table[i].key_size); } if ( order ) size += padded_sizeof(std::vector) + zeek::util::pad_size(sizeof(detail::DictEntry) * order->capacity()); return size; } void Dictionary::DumpKeys() const { if ( ! table ) return; char key_file[100]; // Detect string or binary from first key. int i=0; while ( table[i].Empty() && i < Capacity() ) i++; bool binary = false; const char* key = table[i].GetKey(); for ( int j = 0; j < table[i].key_size; j++ ) if ( ! isprint(key[j]) ) { binary = true; break; } int max_distance = 0; DistanceStats(max_distance); if ( binary ) { char key = char(random() % 26) + 'A'; sprintf(key_file, "%d.%d.%zu-%c.key", Length(), max_distance, MemoryAllocation()/Length(), key); std::ofstream f(key_file, std::ios::binary|std::ios::out|std::ios::trunc); for ( int idx = 0; idx < Capacity(); idx++ ) if ( ! table[idx].Empty() ) { int key_size = table[idx].key_size; f.write((const char*)&key_size, sizeof(int)); f.write(table[idx].GetKey(), table[idx].key_size); } } else { char key = char(random() % 26) + 'A'; sprintf(key_file, "%d.%d.%zu-%d.ckey",Length(), max_distance, MemoryAllocation()/Length(), key); std::ofstream f(key_file, std::ios::out|std::ios::trunc); for ( int idx = 0; idx < Capacity(); idx++ ) if ( ! table[idx].Empty() ) { std::string s((char*)table[idx].GetKey(), table[idx].key_size); f << s << std::endl; } } } void Dictionary::DistanceStats(int& max_distance, int* distances, int num_distances) const { max_distance = 0; for ( int i = 0; i < num_distances; i++ ) distances[i] = 0; for ( int i = 0; i < Capacity(); i++ ) { if ( table[i].Empty() ) continue; if ( table[i].distance > max_distance ) max_distance = table[i].distance; if ( num_distances <= 0 || ! distances ) continue; if ( table[i].distance >= num_distances-1 ) distances[num_distances-1]++; else distances[table[i].distance]++; } } void Dictionary::Dump(int level) const { int key_size = 0; for ( int i = 0; i < Capacity(); i++ ) { if ( table[i].Empty() ) continue; key_size += zeek::util::pad_size(table[i].key_size); if ( ! table[i].value ) continue; } #define DICT_NUM_DISTANCES 5 int distances[DICT_NUM_DISTANCES]; int max_distance = 0; DistanceStats(max_distance, distances, DICT_NUM_DISTANCES); printf("cap %'7d ent %'7d %'-7d load %.2f max_dist %2d mem %10zu mem/ent %3zu key/ent %3d lg %2d remaps %1d remap_end %4d ", Capacity(), Length(), MaxLength(), (double)Length()/(table? Capacity() : 1), max_distance, MemoryAllocation(), (MemoryAllocation())/(Length()?Length():1), key_size / (Length()?Length():1), log2_buckets, remaps, remap_end); if ( Length() > 0 ) { for (int i = 0; i < DICT_NUM_DISTANCES-1; i++) printf("[%d]%2d%% ", i, 100*distances[i]/Length()); printf("[%d+]%2d%% ", DICT_NUM_DISTANCES-1, 100*distances[DICT_NUM_DISTANCES-1]/Length()); } else printf("\n"); printf("\n"); if ( level >= 1 ) { printf("%-10s %1s %-10s %-4s %-4s %-10s %-18s %-2s\n", "Index", "*","Bucket", "Dist", "Off", "Hash", "FibHash", "KeySize"); for ( int i = 0; i < Capacity(); i++ ) if ( table[i].Empty() ) printf("%'10d \n", i); else printf("%'10d %1s %'10d %4d %4d 0x%08x 0x%016" PRIx64 "(%3d) %2d\n", i, (i<=remap_end? "*": ""), BucketByPosition(i), (int)table[i].distance, OffsetInClusterByPosition(i), uint(table[i].hash), FibHash(table[i].hash), (int)FibHash(table[i].hash)&0xFF, (int)table[i].key_size); } } //////////////////////////////////////////////////////////////////////////////////////////////////// //Initialization. //////////////////////////////////////////////////////////////////////////////////////////////////// Dictionary::Dictionary(DictOrder ordering, int initial_size) { if ( initial_size > 0 ) { // If an initial size is speicified, init the table right away. Otherwise wait until the // first insertion to init. log2_buckets = Log2(initial_size); Init(); } if ( ordering == ORDERED ) order = new std::vector; } Dictionary::~Dictionary() { Clear(); } void Dictionary::Clear() { if ( table ) { for ( int i = Capacity() - 1; i >= 0; i-- ) { if ( table[i].Empty() ) continue; if ( delete_func ) delete_func(table[i].value); table[i].Clear(); } free(table); table = nullptr; } if ( order ) { delete order; order = nullptr; } if ( cookies ) { delete cookies; cookies = nullptr; } if ( iterators ) { delete iterators; iterators = nullptr; } log2_buckets = 0; num_iterators = 0; remaps = 0; remap_end = -1; num_entries = 0; max_entries = 0; } void Dictionary::Init() { ASSERT(! table); table = (detail::DictEntry*)malloc(sizeof(detail::DictEntry) * Capacity(true)); for ( int i = Capacity() - 1; i >= 0; i-- ) table[i].SetEmpty(); } // private void generic_delete_func(void* v) { free(v); } ////////////////////////////////////////////////////////////////////////////////////////// //Lookup // Look up now also possibly modifies the entry. Why? if the entry is found but not positioned // according to the current dict (so it's before SizeUp), it will be moved to the right // position so next lookup is fast. void* Dictionary::Lookup(const detail::HashKey* key) const { return Lookup(key->Key(), key->Size(), key->Hash()); } void* Dictionary::Lookup(const void* key, int key_size, detail::hash_t h) const { Dictionary* d = const_cast(this); int position = d->LookupIndex(key, key_size, h); return position >= 0 ? table[position].value : nullptr; } //for verification purposes int Dictionary::LinearLookupIndex(const void* key, int key_size, detail::hash_t hash) const { for ( int i = 0; i < Capacity(); i++ ) if ( ! table[i].Empty() && table[i].Equal((const char*)key, key_size, hash) ) return i; return -1; } // Lookup position for all possible table_sizes caused by remapping. Remap it immediately // if not in the middle of iteration. int Dictionary::LookupIndex(const void* key, int key_size, detail::hash_t hash, int* insert_position, int* insert_distance) { ASSERT_VALID(this); if ( ! table ) return -1; int bucket = BucketByHash(hash, log2_buckets); #ifdef DEBUG int linear_position = LinearLookupIndex(key, key_size, hash); #endif//DEBUG int position = LookupIndex(key, key_size, hash, bucket, Capacity(), insert_position, insert_distance); if ( position >= 0 ) { ASSERT(position == linear_position);//same as linearLookup return position; } for ( int i = 1; i <= remaps; i++ ) { int prev_bucket = BucketByHash(hash,log2_buckets - i); if ( prev_bucket <= remap_end ) { // possibly here. insert_position & insert_distance returned on failed lookup is // not valid in previous table_sizes. position = LookupIndex(key, key_size, hash, prev_bucket, remap_end+1); if ( position >= 0 ) { ASSERT(position == linear_position);//same as linearLookup //remap immediately if no iteration is on. if ( ! num_iterators ) { Remap(position, &position); ASSERT(position == LookupIndex(key, key_size, hash)); } return position; } } } //not found #ifdef DEBUG if ( linear_position >= 0 ) {//different. stop and try to see whats happending. ASSERT(false); //rerun the function in debugger to track down the bug. LookupIndex(key, key_size, hash); } #endif//DEBUG return -1; } // Returns the position of the item if it exists. Otherwise returns -1, but set the insert // position/distance if required. The starting point for the search may not be the bucket // for the current table size since this method is also used to search for an item in the // previous table size. int Dictionary::LookupIndex(const void* key, int key_size, detail::hash_t hash, int bucket, int end, int* insert_position/*output*/, int* insert_distance/*output*/) { ASSERT(bucket>=0 && bucket < Buckets()); int i = bucket; for ( ; i < end && ! table[i].Empty() && BucketByPosition(i) <= bucket; i++ ) if ( BucketByPosition(i) == bucket && table[i].Equal((char*)key, key_size, hash) ) return i; //no such cluster, or not found in the cluster. if ( insert_position ) *insert_position = i; if ( insert_distance ) *insert_distance = i - bucket; return -1; } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Insert /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// void* Dictionary::Insert(void* key, int key_size, detail::hash_t hash, void* val, bool copy_key, bool* iterators_invalidated) { ASSERT_VALID(this); // Initialize the table if it hasn't been done yet. This saves memory storing a bunch // of empty dicts. if ( ! table ) Init(); void* v = nullptr; //if found. i is the position //if not found, i is the insert position, d is the distance of key on position i. int insert_position = -1, insert_distance = -1; int position = LookupIndex(key, key_size, hash, &insert_position, &insert_distance); if ( position >= 0 ) { v = table[position].value; table[position].value = val; if ( ! copy_key ) delete [] (char*)key; if ( order ) {//set new v to order too. auto it = std::find(order->begin(), order->end(), table[position]); ASSERT(it != order->end()); it->value = val; } if ( cookies && ! cookies->empty() ) //need to set new v for cookies too. for ( auto c: *cookies ) { ASSERT_VALID(c); //ASSERT(false); auto it = std::find(c->inserted->begin(), c->inserted->end(), table[position]); if ( it != c->inserted->end() ) it->value = val; } if ( iterators && ! iterators->empty() ) //need to set new v for iterators too. for ( auto c: *iterators ) { auto it = std::find(c->inserted->begin(), c->inserted->end(), table[position]); if ( it != c->inserted->end() ) it->value = val; } } else { if ( ! HaveOnlyRobustIterators() ) { if ( iterators_invalidated ) *iterators_invalidated = true; else reporter->InternalWarning("Dictionary::Insert() possibly caused iterator invalidation"); } // Allocate memory for key if necesary. Key is updated to reflect internal key if necessary. detail::DictEntry entry(key, key_size, hash, val, insert_distance, copy_key); InsertRelocateAndAdjust(entry, insert_position); if ( order ) order->push_back(entry); num_entries++; cum_entries++; if ( max_entries < num_entries ) max_entries = num_entries; if ( num_entries > ThresholdEntries() ) SizeUp(); } // Remap after insert can adjust asap to shorten period of mixed table. // TODO: however, if remap happens right after size up, then it consumes more cpu for this cycle, // a possible hiccup point. if ( Remapping() ) Remap(); ASSERT_VALID(this); return v; } ///e.distance is adjusted to be the one at insert_position. void Dictionary::InsertRelocateAndAdjust(detail::DictEntry& entry, int insert_position) { #ifdef DEBUG entry.bucket = BucketByHash(entry.hash,log2_buckets); #endif//DEBUG int last_affected_position = insert_position; InsertAndRelocate(entry, insert_position, &last_affected_position); // If remapping in progress, adjust the remap_end to step back a little to cover the new // range if the changed range straddles over remap_end. if ( Remapping() && insert_position <= remap_end && remap_end < last_affected_position ) {//[i,j] range changed. if map_end in between. then possibly old entry pushed down across map_end. remap_end = last_affected_position; //adjust to j on the conservative side. } if ( cookies && ! cookies->empty() ) for ( auto c: *cookies ) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" AdjustOnInsert(c, entry, insert_position, last_affected_position); #pragma GCC diagnostic pop if ( iterators && ! iterators->empty() ) for ( auto c: *iterators ) AdjustOnInsert(c, entry, insert_position, last_affected_position); } /// insert entry into position, relocate other entries when necessary. void Dictionary::InsertAndRelocate(detail::DictEntry& entry, int insert_position, int* last_affected_position) {///take out the head of cluster and append to the end of the cluster. while ( true ) { if ( insert_position >= Capacity() ) { ASSERT(insert_position == Capacity()); SizeUp(); //copied all the items to new table. as it's just copying without remapping, insert_position is now empty. table[insert_position] = entry; if ( last_affected_position ) *last_affected_position = insert_position; return; } if ( table[insert_position].Empty() ) { //the condition to end the loop. table[insert_position] = entry; if ( last_affected_position ) *last_affected_position = insert_position; return; } //the to-be-swapped-out item appends to the end of its original cluster. auto t = table[insert_position]; int next = EndOfClusterByPosition(insert_position); t.distance += next - insert_position; //swap table[insert_position] = entry; entry = t; insert_position = next; //append to the end of the current cluster. } } #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" /// Adjust Cookies on Insert. void Dictionary::AdjustOnInsert(IterCookie* c, const detail::DictEntry& entry, int insert_position, int last_affected_position) { ASSERT(c); ASSERT_VALID(c); if ( insert_position < c->next ) c->inserted->push_back(entry); if ( insert_position < c->next && c->next <= last_affected_position ) { int k = TailOfClusterByPosition(c->next); ASSERT(k >= 0 && k < Capacity()); c->visited->push_back(table[k]); } } #pragma GCC diagnostic pop void Dictionary::AdjustOnInsert(RobustDictIterator* c, const detail::DictEntry& entry, int insert_position, int last_affected_position) { if ( insert_position < c->next ) c->inserted->push_back(entry); if ( insert_position < c->next && c->next <= last_affected_position ) { int k = TailOfClusterByPosition(c->next); ASSERT(k >= 0 && k < Capacity()); c->visited->push_back(table[k]); } } void Dictionary::SizeUp() { int prev_capacity = Capacity(); log2_buckets++; int capacity = Capacity(); table = (detail::DictEntry*)realloc(table, capacity * sizeof(detail::DictEntry)); for ( int i = prev_capacity; i < capacity; i++ ) table[i].SetEmpty(); // REmap from last to first in reverse order. SizeUp can be triggered by 2 conditions, one of // which is that the last space in the table is occupied and there's nowhere to put new items. // In this case, the table doubles in capacity and the item is put at the prev_capacity // position with the old hash. We need to cover this item (?). remap_end = prev_capacity; //prev_capacity instead of prev_capacity-1. //another remap starts. remaps++; //used in Lookup() to cover SizeUp with incomplete remaps. ASSERT(remaps <= log2_buckets);//because we only sizeUp, one direction. we know the previous log2_buckets. } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Remove /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// void* Dictionary::Remove(const void* key, int key_size, detail::hash_t hash, bool dont_delete, bool* iterators_invalidated) {//cookie adjustment: maintain inserts here. maintain next in lower level version. ASSERT_VALID(this); ASSERT(! dont_delete); //this is a poorly designed flag. if on, the internal has nowhere to return and memory is lost. int position = LookupIndex(key, key_size, hash); if ( position < 0 ) return nullptr; if ( ! HaveOnlyRobustIterators() ) { if ( iterators_invalidated ) *iterators_invalidated = true; else reporter->InternalWarning("Dictionary::Remove() possibly caused iterator invalidation"); } detail::DictEntry entry = RemoveRelocateAndAdjust(position); num_entries--; ASSERT(num_entries >= 0); //e is about to be invalid. remove it from all references. if ( order ) order->erase(std::remove(order->begin(), order->end(), entry), order->end()); void* v = entry.value; entry.Clear(); ASSERT_VALID(this); return v; } detail::DictEntry Dictionary::RemoveRelocateAndAdjust(int position) { int last_affected_position = position; detail::DictEntry entry = RemoveAndRelocate(position, &last_affected_position); #ifdef DEBUG //validation: index to i-1 should be continuous without empty spaces. for ( int k = position; k < last_affected_position; k++ ) ASSERT(! table[k].Empty()); #endif//DEBUG if ( cookies && ! cookies->empty() ) for ( auto c: *cookies ) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" AdjustOnRemove(c, entry, position, last_affected_position); #pragma GCC diagnostic pop if ( iterators && ! iterators->empty() ) for ( auto c: *iterators ) AdjustOnRemove(c, entry, position, last_affected_position); return entry; } detail::DictEntry Dictionary::RemoveAndRelocate(int position, int* last_affected_position) { //fill the empty position with the tail of the cluster of position+1. ASSERT(position >= 0 && position < Capacity() && ! table[position].Empty()); detail::DictEntry entry = table[position]; while ( true ) { if ( position == Capacity() - 1 || table[position+1].Empty() || table[position+1].distance == 0 ) { //no next cluster to fill, or next position is empty or next position is already in perfect bucket. table[position].SetEmpty(); if ( last_affected_position ) *last_affected_position = position; return entry; } int next = TailOfClusterByPosition(position+1); table[position] = table[next]; table[position].distance -= next - position; //distance improved for the item. position = next; } return entry; } #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" void Dictionary::AdjustOnRemove(IterCookie* c, const detail::DictEntry& entry, int position, int last_affected_position) { ASSERT_VALID(c); c->inserted->erase(std::remove(c->inserted->begin(), c->inserted->end(), entry), c->inserted->end()); if ( position < c->next && c->next <= last_affected_position ) { int moved = HeadOfClusterByPosition(c->next-1); if ( moved < position ) moved = position; c->inserted->push_back(table[moved]); } //if not already the end of the dictionary, adjust next to a valid one. if ( c->next < Capacity() && table[c->next].Empty() ) c->next = Next(c->next); } #pragma GCC diagnostic pop void Dictionary::AdjustOnRemove(RobustDictIterator* c, const detail::DictEntry& entry, int position, int last_affected_position) { c->inserted->erase(std::remove(c->inserted->begin(), c->inserted->end(), entry), c->inserted->end()); if ( position < c->next && c->next <= last_affected_position ) { int moved = HeadOfClusterByPosition(c->next-1); if ( moved < position ) moved = position; c->inserted->push_back(table[moved]); } //if not already the end of the dictionary, adjust next to a valid one. if ( c->next < Capacity() && table[c->next].Empty() ) c->next = Next(c->next); } /////////////////////////////////////////////////////////////////////////////////////////////////// //Remap /////////////////////////////////////////////////////////////////////////////////////////////////// void Dictionary::Remap() { ///since remap should be very fast. take more at a time. ///delay Remap when cookie is there. hard to handle cookie iteration while size changes. ///remap from bottom up. ///remap creates two parts of the dict: [0,remap_end] (remap_end, ...]. the former is mixed with old/new entries; the latter contains all new entries. /// if ( num_iterators > 0 ) return; int left = detail::DICT_REMAP_ENTRIES; while ( remap_end >= 0 && left > 0 ) { if ( ! table[remap_end].Empty() && Remap(remap_end) ) left--; else//< successful Remap may increase remap_end in the case of SizeUp due to insert. if so, remap_end need to be worked on again. remap_end--; } if ( remap_end < 0 ) remaps = 0; //done remapping. } bool Dictionary::Remap(int position, int* new_position) { ASSERT_VALID(this); ///Remap changes item positions by remove() and insert(). to avoid excessive operation. avoid it when safe iteration is in progress. ASSERT( ( ! cookies || cookies->empty() ) && ( ! iterators || iterators->empty() ) ); int current = BucketByPosition(position);//current bucket int expected = BucketByHash(table[position].hash, log2_buckets); //expected bucket in new table. //equal because 1: it's a new item, 2: it's an old item, but new bucket is the same as old. 50% of old items act this way due to fibhash. if ( current == expected ) return false; detail::DictEntry entry = RemoveAndRelocate(position); // no iteration cookies to adjust, no need for last_affected_position. #ifdef DEBUG entry.bucket = expected; #endif//DEBUG //find insert position. int insert_position = EndOfClusterByBucket(expected); if ( new_position ) *new_position = insert_position; entry.distance = insert_position - expected; InsertAndRelocate(entry, insert_position);// no iteration cookies to adjust, no need for last_affected_position. ASSERT_VALID(this); return true; } void* Dictionary::NthEntry(int n, const void*& key, int& key_size) const { if ( ! order || n < 0 || n >= Length() ) return nullptr; detail::DictEntry entry = (*order)[n]; key = entry.GetKey(); key_size = entry.key_size; return entry.value; } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Iteration /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" void Dictionary::MakeRobustCookie(IterCookie* cookie) { //make sure c->next >= 0. if ( ! cookies ) cookies = new std::vector; cookie->MakeRobust(); ASSERT_VALID(cookie); } IterCookie* Dictionary::InitForIterationNonConst() //const { num_iterators++; return new IterCookie(const_cast(this)); } void Dictionary::StopIterationNonConst(IterCookie* cookie) //const { ASSERT(num_iterators > 0); if ( num_iterators > 0 ) num_iterators--; delete cookie; } void* Dictionary::NextEntryNonConst(detail::HashKey*& h, IterCookie*& c, bool return_hash) //const { // If there are any inserted entries, return them first. // That keeps the list small and helps avoiding searching // a large list when deleting an entry. ASSERT(c); ASSERT_VALID(c); if ( ! table ) { if ( num_iterators > 0 ) num_iterators--; delete c; c = nullptr; return nullptr; //end of iteration. } if ( c->inserted && ! c->inserted->empty() ) { // Return the last one. Order doesn't matter, // and removing from the tail is cheaper. detail::DictEntry e = c->inserted->back(); if ( return_hash ) h = new detail::HashKey(e.GetKey(), e.key_size, e.hash); void* v = e.value; c->inserted->pop_back(); return v; } if ( c->next < 0 ) c->next = Next(-1); ASSERT(c->next >= Capacity() || ! table[c->next].Empty()); //filter out visited keys. int capacity = Capacity(); if ( c->visited && ! c->visited->empty() ) //filter out visited entries. while ( c->next < capacity ) { ASSERT(! table[c->next].Empty()); auto it = std::find(c->visited->begin(), c->visited->end(), table[c->next]); if ( it == c->visited->end() ) break; c->visited->erase(it); c->next = Next(c->next); } if ( c->next >= capacity ) {//end. if ( num_iterators > 0 ) num_iterators--; delete c; c = nullptr; return nullptr; //end of iteration. } ASSERT(! table[c->next].Empty()); void* v = table[c->next].value; if ( return_hash ) h = new detail::HashKey(table[c->next].GetKey(), table[c->next].key_size, table[c->next].hash); //prepare for next time. c->next = Next(c->next); ASSERT_VALID(c); return v; } IterCookie* Dictionary::InitForIteration() const { Dictionary* dp = const_cast(this); return dp->InitForIterationNonConst(); } void* Dictionary::NextEntry(detail::HashKey*& h, IterCookie*& cookie, bool return_hash) const { Dictionary* dp = const_cast(this); return dp->NextEntryNonConst(h, cookie, return_hash); } void Dictionary::StopIteration(IterCookie* cookie) const { Dictionary* dp = const_cast(this); dp->StopIterationNonConst(cookie); } #pragma GCC diagnostic pop /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // New Iteration /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// DictIterator::DictIterator(const Dictionary* d, detail::DictEntry* begin, detail::DictEntry* end) : curr(begin), end(end) { // Make sure that we're starting on a non-empty element. while ( curr != end && curr->Empty() ) ++curr; // Cast away the constness so that the number of iterators can be modified in the dictionary. This does // violate the constness guarantees of const-begin()/end() and cbegin()/cend(), but we're not modifying the // actual data in the collection, just a counter in the wrapper of the collection. dict = const_cast(d); dict->num_iterators++; } DictIterator::~DictIterator() { assert(dict->num_iterators > 0); dict->num_iterators--; } DictIterator& DictIterator::operator++() { // The non-robust case is easy. Just advanced the current position forward until you find // one isn't empty and isn't the end. do { ++curr; } while ( curr != end && curr->Empty() ); return *this; } RobustDictIterator Dictionary::MakeRobustIterator() { if ( ! iterators ) iterators = new std::vector; return { this }; } detail::DictEntry Dictionary::GetNextRobustIteration(RobustDictIterator* iter) { // If there are any inserted entries, return them first. // That keeps the list small and helps avoiding searching // a large list when deleting an entry. if ( ! table ) { iter->Complete(); return detail::DictEntry(nullptr); // end of iteration } if ( iter->inserted && ! iter->inserted->empty() ) { // Return the last one. Order doesn't matter, // and removing from the tail is cheaper. detail::DictEntry e = iter->inserted->back(); iter->inserted->pop_back(); return e; } if ( iter->next < 0 ) iter->next = Next(-1); ASSERT(iter->next >= Capacity() || ! table[iter->next].Empty()); // Filter out visited keys. int capacity = Capacity(); if ( iter->visited && ! iter->visited->empty() ) // Filter out visited entries. while ( iter->next < capacity ) { ASSERT(! table[iter->next].Empty()); auto it = std::find(iter->visited->begin(), iter->visited->end(), table[iter->next]); if ( it == iter->visited->end() ) break; iter->visited->erase(it); iter->next = Next(iter->next); } if ( iter->next >= capacity ) { iter->Complete(); return detail::DictEntry(nullptr); // end of iteration } ASSERT(! table[iter->next].Empty()); detail::DictEntry e = table[iter->next]; //prepare for next time. iter->next = Next(iter->next); return e; } RobustDictIterator::RobustDictIterator(Dictionary* d) : curr(nullptr), dict(d) { next = -1; inserted = new std::vector(); visited = new std::vector(); dict->num_iterators++; dict->iterators->push_back(this); // Advance the iterator one step so that we're at the first element. curr = dict->GetNextRobustIteration(this); } RobustDictIterator::RobustDictIterator(const RobustDictIterator& other) : curr(nullptr) { dict = nullptr; if ( other.dict ) { next = other.next; inserted = new std::vector(); visited = new std::vector(); if ( other.inserted ) std::copy(other.inserted->begin(), other.inserted->end(), std::back_inserter(*inserted)); if ( other.visited) std::copy(other.visited->begin(), other.visited->end(), std::back_inserter(*visited)); dict = other.dict; dict->num_iterators++; dict->iterators->push_back(this); curr = other.curr; } } RobustDictIterator::RobustDictIterator(RobustDictIterator&& other) : curr(nullptr) { dict = nullptr; if ( other.dict ) { next = other.next; inserted = other.inserted; visited = other.visited; dict = other.dict; dict->iterators->push_back(this); dict->iterators->erase(std::remove(dict->iterators->begin(), dict->iterators->end(), &other), dict->iterators->end()); other.dict = nullptr; curr = std::move(other.curr); } } RobustDictIterator::~RobustDictIterator() { Complete(); } void RobustDictIterator::Complete() { if ( dict ) { assert(dict->num_iterators > 0); dict->num_iterators--; dict->iterators->erase(std::remove(dict->iterators->begin(), dict->iterators->end(), this), dict->iterators->end()); delete inserted; delete visited; inserted = nullptr; visited = nullptr; dict = nullptr; } } RobustDictIterator& RobustDictIterator::operator++() { curr = dict->GetNextRobustIteration(this); return *this; } } // namespace zeek