From 47e7fe2cd18d28731f74b38d65987c124ce1143c Mon Sep 17 00:00:00 2001 From: Tim Wojtulewicz Date: Mon, 27 Jun 2022 14:22:21 -0700 Subject: [PATCH] Convert Dictionary types to be templated classes This has the fortunate side-effect of also making it so we can store the value objects as typed pointers, instead of void*. --- src/Dict.cc | 1143 +--------------------------- src/Dict.h | 1232 ++++++++++++++++++++++++++++--- src/Stmt.cc | 2 +- src/Val.cc | 23 +- src/Val.h | 2 +- src/broker/Data.cc | 4 +- src/broker/messaging.bif | 1 - src/file_analysis/AnalyzerSet.h | 3 +- src/file_analysis/File.cc | 8 +- src/input/Manager.cc | 4 +- src/logging/Manager.cc | 2 +- src/reporter.bif | 1 - src/script_opt/ZAM/IterInfo.h | 6 +- src/supervisor/Supervisor.cc | 4 +- src/telemetry/telemetry.bif | 2 +- 15 files changed, 1153 insertions(+), 1284 deletions(-) diff --git a/src/Dict.cc b/src/Dict.cc index 98b8320869..96e6bd8977 100644 --- a/src/Dict.cc +++ b/src/Dict.cc @@ -16,14 +16,6 @@ #include "zeek/Reporter.h" #include "zeek/util.h" -#if defined(DEBUG) && defined(ZEEK_DICT_DEBUG) -#define ASSERT_VALID(o) o->AssertValid() -#define ASSERT_EQUAL(a, b) ASSERT(a == b) -#else -#define ASSERT_VALID(o) -#define ASSERT_EQUAL(a, b) -#endif // DEBUG - namespace zeek { @@ -199,7 +191,7 @@ TEST_CASE("dict robust iteration") for ( ; it != dict.end_robust(); ++it ) { - auto* v = it->GetValue(); + auto* v = it->value; uint64_t k = *(uint32_t*)it->GetKey(); switch ( count ) @@ -234,7 +226,7 @@ TEST_CASE("dict robust iteration") for ( ; it != dict.end_robust(); ++it ) { - auto* v = it->GetValue(); + auto* v = it->value; uint64_t k = *(uint32_t*)it->GetKey(); switch ( count ) @@ -301,7 +293,7 @@ TEST_CASE("dict robust iteration replacement") for ( ; count != 2 && it != dict.end_robust(); ++count, ++it ) { } // Store off the value at this iterator index - auto* v = it->GetValue(); + auto* v = it->value; // Replace it with something else auto k = it->GetHashKey(); @@ -315,7 +307,7 @@ TEST_CASE("dict robust iteration replacement") for ( ; it != dict.end_robust(); ++it ) { uint64_t k = *(uint32_t*)it->GetKey(); - auto* v = it->GetValue(); + auto* v = it->value; CHECK(v->v == 50); } @@ -382,1137 +374,10 @@ TEST_CASE("dict iterator invalidation") delete key3; } -///////////////////////////////////////////////////////////////////////////////////////////////// -// bucket math -int Dictionary::Log2(int num) const - { - int i = 0; - while ( num >>= 1 ) - i++; - return i; - } - -int Dictionary::Buckets(bool expected) const - { - int buckets = (1 << log2_buckets); - if ( expected ) - return buckets; - return table ? buckets : 0; - } - -int Dictionary::Capacity(bool expected) const - { - int capacity = (1 << log2_buckets) + (log2_buckets + 0); - if ( expected ) - return capacity; - return table ? capacity : 0; - } - -int Dictionary::ThresholdEntries() const - { - // Increase the size of the dictionary when it is 75% full. However, when the dictionary - // is small ( <= 20 elements ), only resize it when it's 100% full. The dictionary will - // always resize when the current insertion causes it to be full. This ensures that the - // current insertion should always be successful. - int capacity = Capacity(); - if ( log2_buckets <= detail::DICT_THRESHOLD_BITS ) - return capacity; // 20 or less elements, 1.0, only size up when necessary. - return capacity - (capacity >> detail::DICT_LOAD_FACTOR_BITS); - } - -detail::hash_t Dictionary::FibHash(detail::hash_t h) const - { - // GoldenRatio phi = (sqrt(5)+1)/2 = 1.6180339887... - // 1/phi = phi - 1 - h &= detail::HASH_MASK; - h *= 11400714819323198485llu; // 2^64/phi - return h; - } - -// return position in dict with 2^bit size. -int Dictionary::BucketByHash(detail::hash_t h, int log2_table_size) const // map h to n-bit - { - ASSERT(log2_table_size >= 0); - if ( ! log2_table_size ) - return 0; //<< >> breaks on 64. - -#ifdef DICT_NO_FIB_HASH - detail::hash_t hash = h; -#else - detail::hash_t hash = FibHash(h); -#endif - - int m = 64 - log2_table_size; - hash <<= m; - hash >>= m; - - return hash; - } - -// given entry at index i, return it's perfect bucket position. -int Dictionary::BucketByPosition(int position) const - { - ASSERT(table && position >= 0 && position < Capacity() && ! table[position].Empty()); - return position - table[position].distance; - } - -//////////////////////////////////////////////////////////////////////////////////////////////// -// Cluster Math -//////////////////////////////////////////////////////////////////////////////////////////////// - -int Dictionary::EndOfClusterByBucket(int bucket) const - { - ASSERT(bucket >= 0 && bucket < Buckets()); - int i = bucket; - while ( i < Capacity() && ! table[i].Empty() && BucketByPosition(i) <= bucket ) - i++; - return i; - } - -int Dictionary::HeadOfClusterByPosition(int position) const - { - // Finding the first entry in the bucket chain. - ASSERT(0 <= position && position < Capacity() && ! table[position].Empty()); - - // Look backward for the first item with the same bucket as myself. - int bucket = BucketByPosition(position); - int i = position; - while ( i >= bucket && BucketByPosition(i) == bucket ) - i--; - - return i == bucket ? i : i + 1; - } - -int Dictionary::TailOfClusterByPosition(int position) const - { - ASSERT(0 <= position && position < Capacity() && ! table[position].Empty()); - - int bucket = BucketByPosition(position); - int i = position; - while ( i < Capacity() && ! table[i].Empty() && BucketByPosition(i) == bucket ) - i++; // stop just over the tail. - - return i - 1; - } - -int Dictionary::EndOfClusterByPosition(int position) const - { - return TailOfClusterByPosition(position) + 1; - } - -int Dictionary::OffsetInClusterByPosition(int position) const - { - ASSERT(0 <= position && position < Capacity() && ! table[position].Empty()); - int head = HeadOfClusterByPosition(position); - return position - head; - } - -// Find the next valid entry after the position. Position can be -1, which means -// look for the next valid entry point altogether. -int Dictionary::Next(int position) const - { - ASSERT(table && -1 <= position && position < Capacity()); - - do - { - position++; - } while ( position < Capacity() && table[position].Empty() ); - - return position; - } - -/////////////////////////////////////////////////////////////////////////////////////////////////////// -// Debugging -/////////////////////////////////////////////////////////////////////////////////////////////////////// -#define DUMPIF(f) \ - if ( f ) \ - Dump(1) -#ifdef ZEEK_DICT_DEBUG -void Dictionary::AssertValid() const - { - bool valid = true; - int n = num_entries; - - if ( table ) - for ( int i = Capacity() - 1; i >= 0; i-- ) - if ( ! table[i].Empty() ) - n--; - - valid = (n == 0); - ASSERT(valid); - DUMPIF(! valid); - - // entries must clustered together - for ( int i = 1; i < Capacity(); i++ ) - { - if ( ! table || table[i].Empty() ) - continue; - - if ( table[i - 1].Empty() ) - { - valid = (table[i].distance == 0); - ASSERT(valid); - DUMPIF(! valid); - } - else - { - valid = (table[i].bucket >= table[i - 1].bucket); - ASSERT(valid); - DUMPIF(! valid); - - if ( table[i].bucket == table[i - 1].bucket ) - { - valid = (table[i].distance == table[i - 1].distance + 1); - ASSERT(valid); - DUMPIF(! valid); - } - else - { - valid = (table[i].distance <= table[i - 1].distance); - ASSERT(valid); - DUMPIF(! valid); - } - } - } - } -#endif // ZEEK_DICT_DEBUG - -void Dictionary::DumpKeys() const - { - if ( ! table ) - return; - - char key_file[100]; - // Detect string or binary from first key. - int i = 0; - while ( table[i].Empty() && i < Capacity() ) - i++; - - bool binary = false; - const char* key = table[i].GetKey(); - for ( int j = 0; j < table[i].key_size; j++ ) - if ( ! isprint(key[j]) ) - { - binary = true; - break; - } - int max_distance = 0; - - DistanceStats(max_distance); - if ( binary ) - { - char key = char(random() % 26) + 'A'; - sprintf(key_file, "%d.%d-%c.key", Length(), max_distance, key); - std::ofstream f(key_file, std::ios::binary | std::ios::out | std::ios::trunc); - for ( int idx = 0; idx < Capacity(); idx++ ) - if ( ! table[idx].Empty() ) - { - int key_size = table[idx].key_size; - f.write((const char*)&key_size, sizeof(int)); - f.write(table[idx].GetKey(), table[idx].key_size); - } - } - else - { - char key = char(random() % 26) + 'A'; - sprintf(key_file, "%d.%d-%d.ckey", Length(), max_distance, key); - std::ofstream f(key_file, std::ios::out | std::ios::trunc); - for ( int idx = 0; idx < Capacity(); idx++ ) - if ( ! table[idx].Empty() ) - { - std::string s((char*)table[idx].GetKey(), table[idx].key_size); - f << s << std::endl; - } - } - } - -void Dictionary::DistanceStats(int& max_distance, int* distances, int num_distances) const - { - max_distance = 0; - for ( int i = 0; i < num_distances; i++ ) - distances[i] = 0; - - for ( int i = 0; i < Capacity(); i++ ) - { - if ( table[i].Empty() ) - continue; - if ( table[i].distance > max_distance ) - max_distance = table[i].distance; - if ( num_distances <= 0 || ! distances ) - continue; - if ( table[i].distance >= num_distances - 1 ) - distances[num_distances - 1]++; - else - distances[table[i].distance]++; - } - } - -void Dictionary::Dump(int level) const - { - int key_size = 0; - for ( int i = 0; i < Capacity(); i++ ) - { - if ( table[i].Empty() ) - continue; - key_size += zeek::util::pad_size(table[i].key_size); - if ( ! table[i].value ) - continue; - } - -#define DICT_NUM_DISTANCES 5 - int distances[DICT_NUM_DISTANCES]; - int max_distance = 0; - DistanceStats(max_distance, distances, DICT_NUM_DISTANCES); - printf("cap %'7d ent %'7d %'-7d load %.2f max_dist %2d key/ent %3d lg " - "%2d remaps %1d remap_end %4d ", - Capacity(), Length(), MaxLength(), (double)Length() / (table ? Capacity() : 1), - max_distance, key_size / (Length() ? Length() : 1), log2_buckets, remaps, remap_end); - if ( Length() > 0 ) - { - for ( int i = 0; i < DICT_NUM_DISTANCES - 1; i++ ) - printf("[%d]%2d%% ", i, 100 * distances[i] / Length()); - printf("[%d+]%2d%% ", DICT_NUM_DISTANCES - 1, - 100 * distances[DICT_NUM_DISTANCES - 1] / Length()); - } - else - printf("\n"); - - printf("\n"); - if ( level >= 1 ) - { - printf("%-10s %1s %-10s %-4s %-4s %-10s %-18s %-2s\n", "Index", "*", "Bucket", "Dist", - "Off", "Hash", "FibHash", "KeySize"); - for ( int i = 0; i < Capacity(); i++ ) - if ( table[i].Empty() ) - printf("%'10d \n", i); - else - printf("%'10d %1s %'10d %4d %4d 0x%08x 0x%016" PRIx64 "(%3d) %2d\n", i, - (i <= remap_end ? "*" : ""), BucketByPosition(i), (int)table[i].distance, - OffsetInClusterByPosition(i), uint(table[i].hash), FibHash(table[i].hash), - (int)FibHash(table[i].hash) & 0xFF, (int)table[i].key_size); - } - } - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Initialization. -//////////////////////////////////////////////////////////////////////////////////////////////////// -Dictionary::Dictionary(DictOrder ordering, int initial_size) - { - if ( initial_size > 0 ) - { - // If an initial size is speicified, init the table right away. Otherwise wait until the - // first insertion to init. - log2_buckets = Log2(initial_size); - Init(); - } - - if ( ordering == ORDERED ) - order = new std::vector; - } - -Dictionary::~Dictionary() - { - Clear(); - } - -void Dictionary::Clear() - { - if ( table ) - { - for ( int i = Capacity() - 1; i >= 0; i-- ) - { - if ( table[i].Empty() ) - continue; - if ( delete_func ) - delete_func(table[i].value); - table[i].Clear(); - } - free(table); - table = nullptr; - } - - if ( order ) - { - delete order; - order = nullptr; - } - if ( iterators ) - { - delete iterators; - iterators = nullptr; - } - log2_buckets = 0; - num_iterators = 0; - remaps = 0; - remap_end = -1; - num_entries = 0; - max_entries = 0; - } - -void Dictionary::Init() - { - ASSERT(! table); - table = (detail::DictEntry*)malloc(sizeof(detail::DictEntry) * Capacity(true)); - for ( int i = Capacity() - 1; i >= 0; i-- ) - table[i].SetEmpty(); - } - // private void generic_delete_func(void* v) { free(v); } -////////////////////////////////////////////////////////////////////////////////////////// -// Lookup - -// Look up now also possibly modifies the entry. Why? if the entry is found but not positioned -// according to the current dict (so it's before SizeUp), it will be moved to the right -// position so next lookup is fast. -void* Dictionary::Lookup(const detail::HashKey* key) const - { - return Lookup(key->Key(), key->Size(), key->Hash()); - } - -void* Dictionary::Lookup(const void* key, int key_size, detail::hash_t h) const - { - Dictionary* d = const_cast(this); - int position = d->LookupIndex(key, key_size, h); - return position >= 0 ? table[position].value : nullptr; - } - -// for verification purposes -int Dictionary::LinearLookupIndex(const void* key, int key_size, detail::hash_t hash) const - { - for ( int i = 0; i < Capacity(); i++ ) - if ( ! table[i].Empty() && table[i].Equal((const char*)key, key_size, hash) ) - return i; - return -1; - } - -// Lookup position for all possible table_sizes caused by remapping. Remap it immediately -// if not in the middle of iteration. -int Dictionary::LookupIndex(const void* key, int key_size, detail::hash_t hash, - int* insert_position, int* insert_distance) - { - ASSERT_VALID(this); - if ( ! table ) - return -1; - - int bucket = BucketByHash(hash, log2_buckets); -#ifdef ZEEK_DICT_DEBUG - int linear_position = LinearLookupIndex(key, key_size, hash); -#endif // ZEEK_DICT_DEBUG - int position = LookupIndex(key, key_size, hash, bucket, Capacity(), insert_position, - insert_distance); - if ( position >= 0 ) - { - ASSERT_EQUAL(position, linear_position); // same as linearLookup - return position; - } - - for ( int i = 1; i <= remaps; i++ ) - { - int prev_bucket = BucketByHash(hash, log2_buckets - i); - if ( prev_bucket <= remap_end ) - { - // possibly here. insert_position & insert_distance returned on failed lookup is - // not valid in previous table_sizes. - position = LookupIndex(key, key_size, hash, prev_bucket, remap_end + 1); - if ( position >= 0 ) - { - ASSERT_EQUAL(position, linear_position); // same as linearLookup - // remap immediately if no iteration is on. - if ( ! num_iterators ) - { - Remap(position, &position); - ASSERT_EQUAL(position, LookupIndex(key, key_size, hash)); - } - return position; - } - } - } - // not found -#ifdef ZEEK_DICT_DEBUG - if ( linear_position >= 0 ) - { // different. stop and try to see whats happending. - ASSERT(false); - // rerun the function in debugger to track down the bug. - LookupIndex(key, key_size, hash); - } -#endif // ZEEK_DICT_DEBUG - return -1; - } - -// Returns the position of the item if it exists. Otherwise returns -1, but set the insert -// position/distance if required. The starting point for the search may not be the bucket -// for the current table size since this method is also used to search for an item in the -// previous table size. -int Dictionary::LookupIndex(const void* key, int key_size, detail::hash_t hash, int bucket, int end, - int* insert_position /*output*/, int* insert_distance /*output*/) - { - ASSERT(bucket >= 0 && bucket < Buckets()); - int i = bucket; - for ( ; i < end && ! table[i].Empty() && BucketByPosition(i) <= bucket; i++ ) - if ( BucketByPosition(i) == bucket && table[i].Equal((char*)key, key_size, hash) ) - return i; - - // no such cluster, or not found in the cluster. - if ( insert_position ) - *insert_position = i; - - if ( insert_distance ) - { - *insert_distance = i - bucket; - - if ( *insert_distance >= detail::TOO_FAR_TO_REACH ) - reporter->FatalErrorWithCore("Dictionary (size %d) insertion distance too far: %d", - Length(), *insert_distance); - } - - return -1; - } - -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Insert -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -void* Dictionary::Insert(void* key, int key_size, detail::hash_t hash, void* val, bool copy_key, - bool* iterators_invalidated) - { - ASSERT_VALID(this); - - // Initialize the table if it hasn't been done yet. This saves memory storing a bunch - // of empty dicts. - if ( ! table ) - Init(); - - void* v = nullptr; - - // Look to see if this key is already in the table. If found, insert_position is the - // position of the existing element. If not, insert_position is where it'll be inserted - // and insert_distance is the distance of the key for the position. - int insert_position = -1, insert_distance = -1; - int position = LookupIndex(key, key_size, hash, &insert_position, &insert_distance); - if ( position >= 0 ) - { - v = table[position].value; - table[position].value = val; - if ( ! copy_key ) - delete[](char*) key; - - if ( order ) - { // set new v to order too. - auto it = std::find(order->begin(), order->end(), table[position]); - ASSERT(it != order->end()); - it->value = val; - } - - if ( iterators && ! iterators->empty() ) - // need to set new v for iterators too. - for ( auto c : *iterators ) - { - // Check to see if this iterator points at the entry we're replacing. The iterator - // keeps a copy of the element, so we need to update it too. - if ( **c == table[position] ) - (*c)->value = val; - - // Check if any of the inserted elements in this iterator point at the entry being - // replaced. Update those too. - auto it = std::find(c->inserted->begin(), c->inserted->end(), table[position]); - if ( it != c->inserted->end() ) - it->value = val; - } - } - else - { - if ( ! HaveOnlyRobustIterators() ) - { - if ( iterators_invalidated ) - *iterators_invalidated = true; - else - reporter->InternalWarning( - "Dictionary::Insert() possibly caused iterator invalidation"); - } - - // Allocate memory for key if necesary. Key is updated to reflect internal key if necessary. - detail::DictEntry entry(key, key_size, hash, val, insert_distance, copy_key); - InsertRelocateAndAdjust(entry, insert_position); - if ( order ) - order->push_back(entry); - - num_entries++; - cum_entries++; - if ( max_entries < num_entries ) - max_entries = num_entries; - if ( num_entries > ThresholdEntries() ) - SizeUp(); - } - - // Remap after insert can adjust asap to shorten period of mixed table. - // TODO: however, if remap happens right after size up, then it consumes more cpu for this - // cycle, a possible hiccup point. - if ( Remapping() ) - Remap(); - ASSERT_VALID(this); - return v; - } - -/// e.distance is adjusted to be the one at insert_position. -void Dictionary::InsertRelocateAndAdjust(detail::DictEntry& entry, int insert_position) - { -#ifdef ZEEK_DICT_DEBUG - entry.bucket = BucketByHash(entry.hash, log2_buckets); -#endif // ZEEK_DICT_DEBUG - int last_affected_position = insert_position; - InsertAndRelocate(entry, insert_position, &last_affected_position); - - // If remapping in progress, adjust the remap_end to step back a little to cover the new - // range if the changed range straddles over remap_end. - if ( Remapping() && insert_position <= remap_end && remap_end < last_affected_position ) - { //[i,j] range changed. if map_end in between. then possibly old entry pushed down across - // map_end. - remap_end = last_affected_position; // adjust to j on the conservative side. - } - - if ( iterators && ! iterators->empty() ) - for ( auto c : *iterators ) - AdjustOnInsert(c, entry, insert_position, last_affected_position); - } - -/// insert entry into position, relocate other entries when necessary. -void Dictionary::InsertAndRelocate(detail::DictEntry& entry, int insert_position, - int* last_affected_position) - { /// take out the head of cluster and append to the end of the cluster. - while ( true ) - { - if ( insert_position >= Capacity() ) - { - ASSERT(insert_position == Capacity()); - SizeUp(); // copied all the items to new table. as it's just copying without remapping, - // insert_position is now empty. - table[insert_position] = entry; - if ( last_affected_position ) - *last_affected_position = insert_position; - return; - } - if ( table[insert_position].Empty() ) - { // the condition to end the loop. - table[insert_position] = entry; - if ( last_affected_position ) - *last_affected_position = insert_position; - return; - } - - // the to-be-swapped-out item appends to the end of its original cluster. - auto t = table[insert_position]; - int next = EndOfClusterByPosition(insert_position); - t.distance += next - insert_position; - - // swap - table[insert_position] = entry; - entry = t; - insert_position = next; // append to the end of the current cluster. - } - } - -void Dictionary::AdjustOnInsert(RobustDictIterator* c, const detail::DictEntry& entry, - int insert_position, int last_affected_position) - { - // See note in Dictionary::AdjustOnInsert() above. - c->inserted->erase(std::remove(c->inserted->begin(), c->inserted->end(), entry), - c->inserted->end()); - c->visited->erase(std::remove(c->visited->begin(), c->visited->end(), entry), - c->visited->end()); - - if ( insert_position < c->next ) - c->inserted->push_back(entry); - if ( insert_position < c->next && c->next <= last_affected_position ) - { - int k = TailOfClusterByPosition(c->next); - ASSERT(k >= 0 && k < Capacity()); - c->visited->push_back(table[k]); - } - } - -void Dictionary::SizeUp() - { - int prev_capacity = Capacity(); - log2_buckets++; - int capacity = Capacity(); - table = (detail::DictEntry*)realloc(table, capacity * sizeof(detail::DictEntry)); - for ( int i = prev_capacity; i < capacity; i++ ) - table[i].SetEmpty(); - - // REmap from last to first in reverse order. SizeUp can be triggered by 2 conditions, one of - // which is that the last space in the table is occupied and there's nowhere to put new items. - // In this case, the table doubles in capacity and the item is put at the prev_capacity - // position with the old hash. We need to cover this item (?). - remap_end = prev_capacity; // prev_capacity instead of prev_capacity-1. - - // another remap starts. - remaps++; // used in Lookup() to cover SizeUp with incomplete remaps. - ASSERT( - remaps <= - log2_buckets); // because we only sizeUp, one direction. we know the previous log2_buckets. - } - -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Remove -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -void* Dictionary::Remove(const void* key, int key_size, detail::hash_t hash, bool dont_delete, - bool* iterators_invalidated) - { // cookie adjustment: maintain inserts here. maintain next in lower level version. - ASSERT_VALID(this); - - ASSERT(! dont_delete); // this is a poorly designed flag. if on, the internal has nowhere to - // return and memory is lost. - - int position = LookupIndex(key, key_size, hash); - if ( position < 0 ) - return nullptr; - - if ( ! HaveOnlyRobustIterators() ) - { - if ( iterators_invalidated ) - *iterators_invalidated = true; - else - reporter->InternalWarning("Dictionary::Remove() possibly caused iterator invalidation"); - } - - detail::DictEntry entry = RemoveRelocateAndAdjust(position); - num_entries--; - ASSERT(num_entries >= 0); - // e is about to be invalid. remove it from all references. - if ( order ) - order->erase(std::remove(order->begin(), order->end(), entry), order->end()); - - void* v = entry.value; - entry.Clear(); - ASSERT_VALID(this); - return v; - } - -detail::DictEntry Dictionary::RemoveRelocateAndAdjust(int position) - { - int last_affected_position = position; - detail::DictEntry entry = RemoveAndRelocate(position, &last_affected_position); - -#ifdef ZEEK_DICT_DEBUG - // validation: index to i-1 should be continuous without empty spaces. - for ( int k = position; k < last_affected_position; k++ ) - ASSERT(! table[k].Empty()); -#endif // ZEEK_DICT_DEBUG - - if ( iterators && ! iterators->empty() ) - for ( auto c : *iterators ) - AdjustOnRemove(c, entry, position, last_affected_position); - - return entry; - } - -detail::DictEntry Dictionary::RemoveAndRelocate(int position, int* last_affected_position) - { - // fill the empty position with the tail of the cluster of position+1. - ASSERT(position >= 0 && position < Capacity() && ! table[position].Empty()); - - detail::DictEntry entry = table[position]; - while ( true ) - { - if ( position == Capacity() - 1 || table[position + 1].Empty() || - table[position + 1].distance == 0 ) - { - // no next cluster to fill, or next position is empty or next position is already in - // perfect bucket. - table[position].SetEmpty(); - if ( last_affected_position ) - *last_affected_position = position; - return entry; - } - int next = TailOfClusterByPosition(position + 1); - table[position] = table[next]; - table[position].distance -= next - position; // distance improved for the item. - position = next; - } - - return entry; - } - -void Dictionary::AdjustOnRemove(RobustDictIterator* c, const detail::DictEntry& entry, int position, - int last_affected_position) - { - // See note in Dictionary::AdjustOnInsert() above. - c->inserted->erase(std::remove(c->inserted->begin(), c->inserted->end(), entry), - c->inserted->end()); - c->visited->erase(std::remove(c->visited->begin(), c->visited->end(), entry), - c->visited->end()); - - if ( position < c->next && c->next <= last_affected_position ) - { - int moved = HeadOfClusterByPosition(c->next - 1); - if ( moved < position ) - moved = position; - c->inserted->push_back(table[moved]); - } - - // if not already the end of the dictionary, adjust next to a valid one. - if ( c->next < Capacity() && table[c->next].Empty() ) - c->next = Next(c->next); - - if ( c->curr == entry ) - { - if ( c->next >= 0 && c->next < Capacity() && ! table[c->next].Empty() ) - c->curr = table[c->next]; - else - c->curr = detail::DictEntry(nullptr); // -> c == end_robust() - } - } - -/////////////////////////////////////////////////////////////////////////////////////////////////// -// Remap -/////////////////////////////////////////////////////////////////////////////////////////////////// - -void Dictionary::Remap() - { - /// since remap should be very fast. take more at a time. - /// delay Remap when cookie is there. hard to handle cookie iteration while size changes. - /// remap from bottom up. - /// remap creates two parts of the dict: [0,remap_end] (remap_end, ...]. the former is mixed - /// with old/new entries; the latter contains all new entries. - /// - if ( num_iterators > 0 ) - return; - - int left = detail::DICT_REMAP_ENTRIES; - while ( remap_end >= 0 && left > 0 ) - { - if ( ! table[remap_end].Empty() && Remap(remap_end) ) - left--; - else //< successful Remap may increase remap_end in the case of SizeUp due to insert. if so, - // remap_end need to be worked on again. - remap_end--; - } - if ( remap_end < 0 ) - remaps = 0; // done remapping. - } - -bool Dictionary::Remap(int position, int* new_position) - { - ASSERT_VALID(this); - /// Remap changes item positions by remove() and insert(). to avoid excessive operation. avoid - /// it when safe iteration is in progress. - ASSERT(! iterators || iterators->empty()); - int current = BucketByPosition(position); // current bucket - int expected = BucketByHash(table[position].hash, log2_buckets); // expected bucket in new - // table. - // equal because 1: it's a new item, 2: it's an old item, but new bucket is the same as old. 50% - // of old items act this way due to fibhash. - if ( current == expected ) - return false; - detail::DictEntry entry = RemoveAndRelocate( - position); // no iteration cookies to adjust, no need for last_affected_position. -#ifdef ZEEK_DICT_DEBUG - entry.bucket = expected; -#endif // ZEEK_DICT_DEBUG - - // find insert position. - int insert_position = EndOfClusterByBucket(expected); - if ( new_position ) - *new_position = insert_position; - entry.distance = insert_position - expected; - InsertAndRelocate( - entry, - insert_position); // no iteration cookies to adjust, no need for last_affected_position. - ASSERT_VALID(this); - return true; - } - -void* Dictionary::NthEntry(int n, const void*& key, int& key_size) const - { - if ( ! order || n < 0 || n >= Length() ) - return nullptr; - detail::DictEntry entry = (*order)[n]; - key = entry.GetKey(); - key_size = entry.key_size; - return entry.value; - } - -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Iteration -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -DictIterator::DictIterator(const Dictionary* d, detail::DictEntry* begin, detail::DictEntry* end) - : curr(begin), end(end) - { - // Make sure that we're starting on a non-empty element. - while ( curr != end && curr->Empty() ) - ++curr; - - // Cast away the constness so that the number of iterators can be modified in the dictionary. - // This does violate the constness guarantees of const-begin()/end() and cbegin()/cend(), but - // we're not modifying the actual data in the collection, just a counter in the wrapper of the - // collection. - dict = const_cast(d); - dict->IncrIters(); - } - -DictIterator::~DictIterator() - { - if ( dict ) - { - assert(dict->num_iterators > 0); - dict->DecrIters(); - } - } - -DictIterator& DictIterator::operator++() - { - // The non-robust case is easy. Just advanced the current position forward until you find - // one isn't empty and isn't the end. - do - { - ++curr; - } while ( curr != end && curr->Empty() ); - - return *this; - } - -DictIterator::DictIterator(const DictIterator& that) - { - if ( this == &that ) - return; - - if ( dict ) - { - assert(dict->num_iterators > 0); - dict->DecrIters(); - } - - dict = that.dict; - curr = that.curr; - end = that.end; - dict->IncrIters(); - } - -DictIterator& DictIterator::operator=(const DictIterator& that) - { - if ( this == &that ) - return *this; - - if ( dict ) - { - assert(dict->num_iterators > 0); - dict->DecrIters(); - } - - dict = that.dict; - curr = that.curr; - end = that.end; - dict->IncrIters(); - - return *this; - } - -DictIterator::DictIterator(DictIterator&& that) - { - if ( this == &that ) - return; - - if ( dict ) - { - assert(dict->num_iterators > 0); - dict->DecrIters(); - } - - dict = that.dict; - curr = that.curr; - end = that.end; - - that.dict = nullptr; - } - -DictIterator& DictIterator::operator=(DictIterator&& that) - { - if ( this == &that ) - return *this; - - if ( dict ) - { - assert(dict->num_iterators > 0); - dict->DecrIters(); - } - - dict = that.dict; - curr = that.curr; - end = that.end; - - that.dict = nullptr; - - return *this; - } - -RobustDictIterator Dictionary::MakeRobustIterator() - { - if ( ! iterators ) - iterators = new std::vector; - - return {this}; - } - -detail::DictEntry Dictionary::GetNextRobustIteration(RobustDictIterator* iter) - { - // If there are any inserted entries, return them first. - // That keeps the list small and helps avoiding searching - // a large list when deleting an entry. - if ( ! table ) - { - iter->Complete(); - return detail::DictEntry(nullptr); // end of iteration - } - - if ( iter->inserted && ! iter->inserted->empty() ) - { - // Return the last one. Order doesn't matter, - // and removing from the tail is cheaper. - detail::DictEntry e = iter->inserted->back(); - iter->inserted->pop_back(); - return e; - } - - if ( iter->next < 0 ) - iter->next = Next(-1); - - if ( iter->next < Capacity() && table[iter->next].Empty() ) - { - // [Robin] I believe this means that the table has resized in a way - // that we're now inside the overflow area where elements are empty, - // because elsewhere empty slots aren't allowed. Assuming that's right, - // then it means we'll always be at the end of the table now and could - // also just set `next` to capacity. However, just to be sure, we - // instead reuse logic from below to move forward "to a valid position" - // and then double check, through an assertion in debug mode, that it's - // actually the end. If this ever triggered, the above assumption would - // be wrong (but the Next() call would probably still be right). - iter->next = Next(iter->next); - ASSERT(iter->next == Capacity()); - } - - // Filter out visited keys. - int capacity = Capacity(); - if ( iter->visited && ! iter->visited->empty() ) - // Filter out visited entries. - while ( iter->next < capacity ) - { - ASSERT(! table[iter->next].Empty()); - auto it = std::find(iter->visited->begin(), iter->visited->end(), table[iter->next]); - if ( it == iter->visited->end() ) - break; - iter->visited->erase(it); - iter->next = Next(iter->next); - } - - if ( iter->next >= capacity ) - { - iter->Complete(); - return detail::DictEntry(nullptr); // end of iteration - } - - ASSERT(! table[iter->next].Empty()); - detail::DictEntry e = table[iter->next]; - - // prepare for next time. - iter->next = Next(iter->next); - return e; - } - -RobustDictIterator::RobustDictIterator(Dictionary* d) : curr(nullptr), dict(d) - { - next = -1; - inserted = new std::vector(); - visited = new std::vector(); - - dict->IncrIters(); - dict->iterators->push_back(this); - - // Advance the iterator one step so that we're at the first element. - curr = dict->GetNextRobustIteration(this); - } - -RobustDictIterator::RobustDictIterator(const RobustDictIterator& other) : curr(nullptr) - { - dict = nullptr; - - if ( other.dict ) - { - next = other.next; - inserted = new std::vector(); - visited = new std::vector(); - - if ( other.inserted ) - std::copy(other.inserted->begin(), other.inserted->end(), - std::back_inserter(*inserted)); - - if ( other.visited ) - std::copy(other.visited->begin(), other.visited->end(), std::back_inserter(*visited)); - - dict = other.dict; - dict->IncrIters(); - dict->iterators->push_back(this); - - curr = other.curr; - } - } - -RobustDictIterator::RobustDictIterator(RobustDictIterator&& other) : curr(nullptr) - { - dict = nullptr; - - if ( other.dict ) - { - next = other.next; - inserted = other.inserted; - visited = other.visited; - - dict = other.dict; - dict->iterators->push_back(this); - dict->iterators->erase( - std::remove(dict->iterators->begin(), dict->iterators->end(), &other), - dict->iterators->end()); - other.dict = nullptr; - - curr = std::move(other.curr); - } - } - -RobustDictIterator::~RobustDictIterator() - { - Complete(); - } - -void RobustDictIterator::Complete() - { - if ( dict ) - { - assert(dict->num_iterators > 0); - dict->DecrIters(); - - dict->iterators->erase(std::remove(dict->iterators->begin(), dict->iterators->end(), this), - dict->iterators->end()); - - delete inserted; - delete visited; - - inserted = nullptr; - visited = nullptr; - dict = nullptr; - } - } - -RobustDictIterator& RobustDictIterator::operator++() - { - curr = dict->GetNextRobustIteration(this); - return *this; - } - } // namespace zeek diff --git a/src/Dict.h b/src/Dict.h index dadedda2a4..31cf94a606 100644 --- a/src/Dict.h +++ b/src/Dict.h @@ -2,19 +2,30 @@ #pragma once +#include #include +#include #include #include #include "zeek/Hash.h" +#include "zeek/Reporter.h" // Type for function to be called when deleting elements. using dict_delete_func = void (*)(void*); +#if defined(DEBUG) && defined(ZEEK_DICT_DEBUG) +#define ASSERT_VALID(o) o->AssertValid() +#define ASSERT_EQUAL(a, b) ASSERT(a == b) +#else +#define ASSERT_VALID(o) +#define ASSERT_EQUAL(a, b) +#endif // DEBUG + namespace zeek { -class Dictionary; +template class Dictionary; enum DictOrder { @@ -28,8 +39,6 @@ extern void generic_delete_func(void*); namespace detail { -class DictEntry; - // Default number of hash buckets in dictionary. The dictionary will increase the size // of the hash table as needed. constexpr uint32_t HASH_MASK = 0xFFFFFFFF; // only lower 32 bits. @@ -59,7 +68,7 @@ constexpr uint16_t TOO_FAR_TO_REACH = 0xFFFF; /** * An entry stored in the dictionary. */ -class DictEntry +template class DictEntry { public: #ifdef DEBUG @@ -76,13 +85,13 @@ public: // Lower 4 bytes of the 8-byte hash, which is used to calculate the position in the table. uint32_t hash = 0; - void* value = nullptr; + T* value = nullptr; union { char key_here[8]; // hold key len<=8. when over 8, it's a pointer to real keys. char* key; }; - DictEntry(void* arg_key, int key_size = 0, hash_t hash = 0, void* value = nullptr, + DictEntry(void* arg_key, int key_size = 0, hash_t hash = 0, T* value = nullptr, int16_t d = TOO_FAR_TO_REACH, bool copy_key = false) : distance(d), key_size(key_size), hash((uint32_t)hash), value(value) { @@ -136,42 +145,135 @@ public: return std::make_unique(GetKey(), key_size, hash); } - template T GetValue() const { return static_cast(value); } - bool Equal(const char* arg_key, int arg_key_size, hash_t arg_hash) const { // only 40-bit hash comparison. return (0 == ((hash ^ arg_hash) & HASH_MASK)) && key_size == arg_key_size && 0 == memcmp(GetKey(), arg_key, key_size); } + + template + [[deprecated("Remove in v6.1. Access the value in the entry directly.")]] T* GetValue() const + { + static_assert(std::is_same_v, "Type of DictEntry and type requested are different"); + return value; + } + bool operator==(const DictEntry& r) const { return Equal(r.GetKey(), r.key_size, r.hash); } bool operator!=(const DictEntry& r) const { return ! Equal(r.GetKey(), r.key_size, r.hash); } }; } // namespace detail -class DictIterator +template class DictIterator { public: - using value_type = detail::DictEntry; - using reference = detail::DictEntry&; - using pointer = detail::DictEntry*; + using value_type = detail::DictEntry; + using reference = detail::DictEntry&; + using pointer = detail::DictEntry*; using difference_type = std::ptrdiff_t; using iterator_category = std::forward_iterator_tag; DictIterator() = default; - ~DictIterator(); + ~DictIterator() + { + if ( dict ) + { + assert(dict->num_iterators > 0); + dict->DecrIters(); + } + } - DictIterator(const DictIterator& that); - DictIterator& operator=(const DictIterator& that); - DictIterator(DictIterator&& that); - DictIterator& operator=(DictIterator&& that); + DictIterator(const DictIterator& that) + { + if ( this == &that ) + return; + + if ( dict ) + { + assert(dict->num_iterators > 0); + dict->DecrIters(); + } + + dict = that.dict; + curr = that.curr; + end = that.end; + dict->IncrIters(); + } + + DictIterator& operator=(const DictIterator& that) + { + if ( this == &that ) + return *this; + + if ( dict ) + { + assert(dict->num_iterators > 0); + dict->DecrIters(); + } + + dict = that.dict; + curr = that.curr; + end = that.end; + dict->IncrIters(); + + return *this; + } + + DictIterator(DictIterator&& that) + { + if ( this == &that ) + return; + + if ( dict ) + { + assert(dict->num_iterators > 0); + dict->DecrIters(); + } + + dict = that.dict; + curr = that.curr; + end = that.end; + + that.dict = nullptr; + } + + DictIterator& operator=(DictIterator&& that) + { + if ( this == &that ) + return *this; + + if ( dict ) + { + assert(dict->num_iterators > 0); + dict->DecrIters(); + } + + dict = that.dict; + curr = that.curr; + end = that.end; + + that.dict = nullptr; + + return *this; + } reference operator*() { return *curr; } reference operator*() const { return *curr; } pointer operator->() { return curr; } pointer operator->() const { return curr; } - DictIterator& operator++(); + DictIterator& operator++() + { + // The non-robust case is easy. Just advanced the current position forward until you find + // one isn't empty and isn't the end. + do + { + ++curr; + } while ( curr != end && curr->Empty() ); + + return *this; + } + DictIterator operator++(int) { auto temp(*this); @@ -183,34 +285,110 @@ public: bool operator!=(const DictIterator& that) const { return ! (*this == that); } private: - friend class Dictionary; + friend class Dictionary; - DictIterator(const Dictionary* d, detail::DictEntry* begin, detail::DictEntry* end); + DictIterator(const Dictionary* d, detail::DictEntry* begin, detail::DictEntry* end) + : curr(begin), end(end) + { + // Make sure that we're starting on a non-empty element. + while ( curr != end && curr->Empty() ) + ++curr; - Dictionary* dict = nullptr; - detail::DictEntry* curr = nullptr; - detail::DictEntry* end = nullptr; + // Cast away the constness so that the number of iterators can be modified in the + // dictionary. This does violate the constness guarantees of const-begin()/end() and + // cbegin()/cend(), but we're not modifying the actual data in the collection, just a + // counter in the wrapper of the collection. + dict = const_cast*>(d); + dict->IncrIters(); + } + + Dictionary* dict = nullptr; + detail::DictEntry* curr = nullptr; + detail::DictEntry* end = nullptr; }; -class RobustDictIterator +template class RobustDictIterator { public: - using value_type = detail::DictEntry; - using reference = detail::DictEntry&; - using pointer = detail::DictEntry*; + using value_type = detail::DictEntry; + using reference = detail::DictEntry&; + using pointer = detail::DictEntry*; using difference_type = std::ptrdiff_t; using iterator_category = std::forward_iterator_tag; RobustDictIterator() : curr(nullptr) { } - RobustDictIterator(Dictionary* d); - RobustDictIterator(const RobustDictIterator& other); - RobustDictIterator(RobustDictIterator&& other); - ~RobustDictIterator(); + + RobustDictIterator(Dictionary* d) : curr(nullptr), dict(d) + { + next = -1; + inserted = new std::vector>(); + visited = new std::vector>(); + + dict->IncrIters(); + dict->iterators->push_back(this); + + // Advance the iterator one step so that we're at the first element. + curr = dict->GetNextRobustIteration(this); + } + + RobustDictIterator(const RobustDictIterator& other) : curr(nullptr) + { + dict = nullptr; + + if ( other.dict ) + { + next = other.next; + inserted = new std::vector>(); + visited = new std::vector>(); + + if ( other.inserted ) + std::copy(other.inserted->begin(), other.inserted->end(), + std::back_inserter(*inserted)); + + if ( other.visited ) + std::copy(other.visited->begin(), other.visited->end(), + std::back_inserter(*visited)); + + dict = other.dict; + dict->IncrIters(); + dict->iterators->push_back(this); + + curr = other.curr; + } + } + + RobustDictIterator(RobustDictIterator&& other) : curr(nullptr) + { + dict = nullptr; + + if ( other.dict ) + { + next = other.next; + inserted = other.inserted; + visited = other.visited; + + dict = other.dict; + dict->iterators->push_back(this); + dict->iterators->erase( + std::remove(dict->iterators->begin(), dict->iterators->end(), &other), + dict->iterators->end()); + other.dict = nullptr; + + curr = std::move(other.curr); + } + } + + ~RobustDictIterator() { Complete(); } reference operator*() { return curr; } pointer operator->() { return &curr; } - RobustDictIterator& operator++(); + RobustDictIterator& operator++() + { + curr = dict->GetNextRobustIteration(this); + return *this; + } + RobustDictIterator operator++(int) { auto temp(*this); @@ -222,19 +400,37 @@ public: bool operator!=(const RobustDictIterator& that) const { return ! (*this == that); } private: - friend class Dictionary; + friend class Dictionary; - void Complete(); + void Complete() + { + if ( dict ) + { + assert(dict->num_iterators > 0); + dict->DecrIters(); + + dict->iterators->erase( + std::remove(dict->iterators->begin(), dict->iterators->end(), this), + dict->iterators->end()); + + delete inserted; + delete visited; + + inserted = nullptr; + visited = nullptr; + dict = nullptr; + } + } // Tracks the new entries inserted while iterating. - std::vector* inserted = nullptr; + std::vector>* inserted = nullptr; // Tracks the entries already visited but were moved across the next iteration // point due to an insertion. - std::vector* visited = nullptr; + std::vector>* visited = nullptr; - detail::DictEntry curr; - Dictionary* dict = nullptr; + detail::DictEntry curr; + Dictionary* dict = nullptr; int next = -1; }; @@ -250,25 +446,50 @@ private: * the keys but not the values. The dictionary size will be bounded at around 100K. 1M * entries is the absolute limit. Only Connections use that many entries, and that is rare. */ -class Dictionary +template class Dictionary { public: explicit Dictionary(DictOrder ordering = UNORDERED, - int initial_size = detail::DEFAULT_DICT_SIZE); - ~Dictionary(); + int initial_size = detail::DEFAULT_DICT_SIZE) + { + if ( initial_size > 0 ) + { + // If an initial size is speicified, init the table right away. Otherwise wait until the + // first insertion to init. + log2_buckets = Log2(initial_size); + Init(); + } + + if ( ordering == ORDERED ) + order = new std::vector>; + } + + ~Dictionary() { Clear(); } // Member functions for looking up a key, inserting/changing its // contents, and deleting it. These come in two flavors: one // which takes a zeek::detail::HashKey, and the other which takes a raw key, // its size, and its (unmodulated) hash. // lookup may move the key to right place if in the old zone to speed up the next lookup. - void* Lookup(const detail::HashKey* key) const; - void* Lookup(const void* key, int key_size, detail::hash_t h) const; + T* Lookup(const detail::HashKey* key) const + { + return Lookup(key->Key(), key->Size(), key->Hash()); + } + + T* Lookup(const void* key, int key_size, detail::hash_t h) const + { + // Look up possibly modifies the entry. Why? if the entry is found but not positioned + // according to the current dict (so it's before SizeUp), it will be moved to the right + // position so next lookup is fast. + Dictionary* d = const_cast(this); + int position = d->LookupIndex(key, key_size, h); + return position >= 0 ? table[position].value : nullptr; + } // Returns previous value, or 0 if none. // If iterators_invalidated is supplied, its value is set to true // if the removal may have invalidated any existing iterators. - void* Insert(detail::HashKey* key, void* val, bool* iterators_invalidated = nullptr) + T* Insert(detail::HashKey* key, T* val, bool* iterators_invalidated = nullptr) { return Insert(key->TakeKey(), key->Size(), key->Hash(), val, false, iterators_invalidated); } @@ -278,20 +499,130 @@ public: // manage as needed. // If iterators_invalidated is supplied, its value is set to true // if the removal may have invalidated any existing iterators. - void* Insert(void* key, int key_size, detail::hash_t hash, void* val, bool copy_key, - bool* iterators_invalidated = nullptr); + T* Insert(void* key, int key_size, detail::hash_t hash, T* val, bool copy_key, + bool* iterators_invalidated = nullptr) + { + ASSERT_VALID(this); + + // Initialize the table if it hasn't been done yet. This saves memory storing a bunch + // of empty dicts. + if ( ! table ) + Init(); + + T* v = nullptr; + + // Look to see if this key is already in the table. If found, insert_position is the + // position of the existing element. If not, insert_position is where it'll be inserted + // and insert_distance is the distance of the key for the position. + int insert_position = -1, insert_distance = -1; + int position = LookupIndex(key, key_size, hash, &insert_position, &insert_distance); + if ( position >= 0 ) + { + v = table[position].value; + table[position].value = val; + if ( ! copy_key ) + delete[](char*) key; + + if ( order ) + { // set new v to order too. + auto it = std::find(order->begin(), order->end(), table[position]); + ASSERT(it != order->end()); + it->value = val; + } + + if ( iterators && ! iterators->empty() ) + // need to set new v for iterators too. + for ( auto c : *iterators ) + { + // Check to see if this iterator points at the entry we're replacing. The + // iterator keeps a copy of the element, so we need to update it too. + if ( **c == table[position] ) + (*c)->value = val; + + // Check if any of the inserted elements in this iterator point at the entry + // being replaced. Update those too. + auto it = std::find(c->inserted->begin(), c->inserted->end(), table[position]); + if ( it != c->inserted->end() ) + it->value = val; + } + } + else + { + if ( ! HaveOnlyRobustIterators() ) + { + if ( iterators_invalidated ) + *iterators_invalidated = true; + else + reporter->InternalWarning( + "Dictionary::Insert() possibly caused iterator invalidation"); + } + + // Allocate memory for key if necesary. Key is updated to reflect internal key if + // necessary. + detail::DictEntry entry(key, key_size, hash, val, insert_distance, copy_key); + InsertRelocateAndAdjust(entry, insert_position); + if ( order ) + order->push_back(entry); + + num_entries++; + cum_entries++; + if ( max_entries < num_entries ) + max_entries = num_entries; + if ( num_entries > ThresholdEntries() ) + SizeUp(); + } + + // Remap after insert can adjust asap to shorten period of mixed table. + // TODO: however, if remap happens right after size up, then it consumes more cpu for this + // cycle, a possible hiccup point. + if ( Remapping() ) + Remap(); + ASSERT_VALID(this); + return v; + } // Removes the given element. Returns a pointer to the element in // case it needs to be deleted. Returns 0 if no such element exists. // If dontdelete is true, the key's bytes will not be deleted. // If iterators_invalidated is supplied, its value is set to true // if the removal may have invalidated any existing iterators. - void* Remove(const detail::HashKey* key, bool* iterators_invalidated = nullptr) + T* Remove(const detail::HashKey* key, bool* iterators_invalidated = nullptr) { return Remove(key->Key(), key->Size(), key->Hash(), false, iterators_invalidated); } - void* Remove(const void* key, int key_size, detail::hash_t hash, bool dont_delete = false, - bool* iterators_invalidated = nullptr); + T* Remove(const void* key, int key_size, detail::hash_t hash, bool dont_delete = false, + bool* iterators_invalidated = nullptr) + { // cookie adjustment: maintain inserts here. maintain next in lower level version. + ASSERT_VALID(this); + + ASSERT(! dont_delete); // this is a poorly designed flag. if on, the internal has nowhere to + // return and memory is lost. + + int position = LookupIndex(key, key_size, hash); + if ( position < 0 ) + return nullptr; + + if ( ! HaveOnlyRobustIterators() ) + { + if ( iterators_invalidated ) + *iterators_invalidated = true; + else + reporter->InternalWarning( + "Dictionary::Remove() possibly caused iterator invalidation"); + } + + detail::DictEntry entry = RemoveRelocateAndAdjust(position); + num_entries--; + ASSERT(num_entries >= 0); + // e is about to be invalid. remove it from all references. + if ( order ) + order->erase(std::remove(order->begin(), order->end(), entry), order->end()); + + T* v = entry.value; + entry.Clear(); + ASSERT_VALID(this); + return v; + } // Number of entries. int Length() const { return num_entries; } @@ -311,37 +642,248 @@ public: // // Returns nil if the dictionary is not ordered or if "n" is out // of range. - void* NthEntry(int n) const + T* NthEntry(int n) const { const void* key; int key_len; return NthEntry(n, key, key_len); } - void* NthEntry(int n, const void*& key, int& key_len) const; + + T* NthEntry(int n, const void*& key, int& key_size) const + { + if ( ! order || n < 0 || n >= Length() ) + return nullptr; + detail::DictEntry entry = (*order)[n]; + key = entry.GetKey(); + key_size = entry.key_size; + return entry.value; + } void SetDeleteFunc(dict_delete_func f) { delete_func = f; } // Remove all entries. - void Clear(); + void Clear() + { + if ( table ) + { + for ( int i = Capacity() - 1; i >= 0; i-- ) + { + if ( table[i].Empty() ) + continue; + if ( delete_func ) + delete_func(table[i].value); + table[i].Clear(); + } + free(table); + table = nullptr; + } + + if ( order ) + { + delete order; + order = nullptr; + } + if ( iterators ) + { + delete iterators; + iterators = nullptr; + } + log2_buckets = 0; + num_iterators = 0; + remaps = 0; + remap_end = -1; + num_entries = 0; + max_entries = 0; + } /// The capacity of the table, Buckets + Overflow Size. - int Capacity(bool expected = false) const; + int Capacity(bool expected = false) const + { + int capacity = (1 << log2_buckets) + (log2_buckets + 0); + if ( expected ) + return capacity; + return table ? capacity : 0; + } // Debugging -#ifdef DEBUG - void AssertValid() const; -#endif // DEBUG - void Dump(int level = 0) const; - void DistanceStats(int& max_distance, int* distances = 0, int num_distances = 0) const; - void DumpKeys() const; +#define DUMPIF(f) \ + if ( f ) \ + Dump(1) + +#ifdef ZEEK_DICT_DEBUG + void AssertValid() const + { + bool valid = true; + int n = num_entries; + + if ( table ) + for ( int i = Capacity() - 1; i >= 0; i-- ) + if ( ! table[i].Empty() ) + n--; + + valid = (n == 0); + ASSERT(valid); + DUMPIF(! valid); + + // entries must clustered together + for ( int i = 1; i < Capacity(); i++ ) + { + if ( ! table || table[i].Empty() ) + continue; + + if ( table[i - 1].Empty() ) + { + valid = (table[i].distance == 0); + ASSERT(valid); + DUMPIF(! valid); + } + else + { + valid = (table[i].bucket >= table[i - 1].bucket); + ASSERT(valid); + DUMPIF(! valid); + + if ( table[i].bucket == table[i - 1].bucket ) + { + valid = (table[i].distance == table[i - 1].distance + 1); + ASSERT(valid); + DUMPIF(! valid); + } + else + { + valid = (table[i].distance <= table[i - 1].distance); + ASSERT(valid); + DUMPIF(! valid); + } + } + } + } +#endif // ZEEK_DICT_DEBUG + + void Dump(int level = 0) const + { + int key_size = 0; + for ( int i = 0; i < Capacity(); i++ ) + { + if ( table[i].Empty() ) + continue; + key_size += zeek::util::pad_size(table[i].key_size); + if ( ! table[i].value ) + continue; + } + +#define DICT_NUM_DISTANCES 5 + int distances[DICT_NUM_DISTANCES]; + int max_distance = 0; + DistanceStats(max_distance, distances, DICT_NUM_DISTANCES); + printf("cap %'7d ent %'7d %'-7d load %.2f max_dist %2d key/ent %3d lg " + "%2d remaps %1d remap_end %4d ", + Capacity(), Length(), MaxLength(), (double)Length() / (table ? Capacity() : 1), + max_distance, key_size / (Length() ? Length() : 1), log2_buckets, remaps, remap_end); + if ( Length() > 0 ) + { + for ( int i = 0; i < DICT_NUM_DISTANCES - 1; i++ ) + printf("[%d]%2d%% ", i, 100 * distances[i] / Length()); + printf("[%d+]%2d%% ", DICT_NUM_DISTANCES - 1, + 100 * distances[DICT_NUM_DISTANCES - 1] / Length()); + } + else + printf("\n"); + + printf("\n"); + if ( level >= 1 ) + { + printf("%-10s %1s %-10s %-4s %-4s %-10s %-18s %-2s\n", "Index", "*", "Bucket", "Dist", + "Off", "Hash", "FibHash", "KeySize"); + for ( int i = 0; i < Capacity(); i++ ) + if ( table[i].Empty() ) + printf("%'10d \n", i); + else + printf("%'10d %1s %'10d %4d %4d 0x%08x 0x%016" PRIx64 "(%3d) %2d\n", i, + (i <= remap_end ? "*" : ""), BucketByPosition(i), (int)table[i].distance, + OffsetInClusterByPosition(i), uint(table[i].hash), + FibHash(table[i].hash), (int)FibHash(table[i].hash) & 0xFF, + (int)table[i].key_size); + } + } + + void DistanceStats(int& max_distance, int* distances = 0, int num_distances = 0) const + { + max_distance = 0; + for ( int i = 0; i < num_distances; i++ ) + distances[i] = 0; + + for ( int i = 0; i < Capacity(); i++ ) + { + if ( table[i].Empty() ) + continue; + if ( table[i].distance > max_distance ) + max_distance = table[i].distance; + if ( num_distances <= 0 || ! distances ) + continue; + if ( table[i].distance >= num_distances - 1 ) + distances[num_distances - 1]++; + else + distances[table[i].distance]++; + } + } + + void DumpKeys() const + { + if ( ! table ) + return; + + char key_file[100]; + // Detect string or binary from first key. + int i = 0; + while ( table[i].Empty() && i < Capacity() ) + i++; + + bool binary = false; + const char* key = table[i].GetKey(); + for ( int j = 0; j < table[i].key_size; j++ ) + if ( ! isprint(key[j]) ) + { + binary = true; + break; + } + int max_distance = 0; + + DistanceStats(max_distance); + if ( binary ) + { + char key = char(random() % 26) + 'A'; + sprintf(key_file, "%d.%d-%c.key", Length(), max_distance, key); + std::ofstream f(key_file, std::ios::binary | std::ios::out | std::ios::trunc); + for ( int idx = 0; idx < Capacity(); idx++ ) + if ( ! table[idx].Empty() ) + { + int key_size = table[idx].key_size; + f.write((const char*)&key_size, sizeof(int)); + f.write(table[idx].GetKey(), table[idx].key_size); + } + } + else + { + char key = char(random() % 26) + 'A'; + sprintf(key_file, "%d.%d-%d.ckey", Length(), max_distance, key); + std::ofstream f(key_file, std::ios::out | std::ios::trunc); + for ( int idx = 0; idx < Capacity(); idx++ ) + if ( ! table[idx].Empty() ) + { + std::string s((char*)table[idx].GetKey(), table[idx].key_size); + f << s << std::endl; + } + } + } // Type traits needed for some of the std algorithms to work - using value_type = detail::DictEntry; - using pointer = detail::DictEntry*; - using const_pointer = const detail::DictEntry*; + using value_type = detail::DictEntry; + using pointer = detail::DictEntry*; + using const_pointer = const detail::DictEntry*; // Iterator support - using iterator = DictIterator; + using iterator = DictIterator; using const_iterator = const iterator; using reverse_iterator = std::reverse_iterator; using const_reverse_iterator = std::reverse_iterator; @@ -353,101 +895,567 @@ public: const_iterator cbegin() { return {this, table, table + Capacity()}; } const_iterator cend() { return {this, table + Capacity(), table + Capacity()}; } - RobustDictIterator begin_robust() { return MakeRobustIterator(); } - RobustDictIterator end_robust() { return RobustDictIterator(); } + RobustDictIterator begin_robust() { return MakeRobustIterator(); } + RobustDictIterator end_robust() { return RobustDictIterator(); } private: - friend zeek::DictIterator; - friend zeek::RobustDictIterator; + friend zeek::DictIterator; + friend zeek::RobustDictIterator; /// Buckets of the table, not including overflow size. - int Buckets(bool expected = false) const; + int Buckets(bool expected = false) const + { + int buckets = (1 << log2_buckets); + if ( expected ) + return buckets; + return table ? buckets : 0; + } // bucket math - int Log2(int num) const; - int ThresholdEntries() const; + int Log2(int num) const + { + int i = 0; + while ( num >>= 1 ) + i++; + return i; + } + + int ThresholdEntries() const + { + // Increase the size of the dictionary when it is 75% full. However, when the dictionary + // is small ( <= 20 elements ), only resize it when it's 100% full. The dictionary will + // always resize when the current insertion causes it to be full. This ensures that the + // current insertion should always be successful. + int capacity = Capacity(); + if ( log2_buckets <= detail::DICT_THRESHOLD_BITS ) + return capacity; // 20 or less elements, 1.0, only size up when necessary. + return capacity - (capacity >> detail::DICT_LOAD_FACTOR_BITS); + } // Used to improve the distribution of the original hash. - detail::hash_t FibHash(detail::hash_t h) const; + detail::hash_t FibHash(detail::hash_t h) const + { + // GoldenRatio phi = (sqrt(5)+1)/2 = 1.6180339887... + // 1/phi = phi - 1 + h &= detail::HASH_MASK; + h *= 11400714819323198485llu; // 2^64/phi + return h; + } // Maps a hash to the appropriate n-bit table bucket. - int BucketByHash(detail::hash_t h, int bit) const; + int BucketByHash(detail::hash_t h, int bit) const + { + ASSERT(bit >= 0); + if ( ! bit ) + return 0; //<< >> breaks on 64. + +#ifdef DICT_NO_FIB_HASH + detail::hash_t hash = h; +#else + detail::hash_t hash = FibHash(h); +#endif + + int m = 64 - bit; + hash <<= m; + hash >>= m; + + return hash; + } // Given a position of a non-empty item in the table, find the related bucket. - int BucketByPosition(int position) const; + int BucketByPosition(int position) const + { + ASSERT(table && position >= 0 && position < Capacity() && ! table[position].Empty()); + return position - table[position].distance; + } // Given a bucket of a non-empty item in the table, find the end of its cluster. // The end should be equal to tail+1 if tail exists. Otherwise it's the tail of // the just-smaller cluster + 1. - int EndOfClusterByBucket(int bucket) const; + int EndOfClusterByBucket(int bucket) const + { + ASSERT(bucket >= 0 && bucket < Buckets()); + int i = bucket; + while ( i < Capacity() && ! table[i].Empty() && BucketByPosition(i) <= bucket ) + i++; + return i; + } // Given a position of a non-empty item in the table, find the head of its cluster. - int HeadOfClusterByPosition(int position) const; + int HeadOfClusterByPosition(int position) const + { + // Finding the first entry in the bucket chain. + ASSERT(0 <= position && position < Capacity() && ! table[position].Empty()); + + // Look backward for the first item with the same bucket as myself. + int bucket = BucketByPosition(position); + int i = position; + while ( i >= bucket && BucketByPosition(i) == bucket ) + i--; + + return i == bucket ? i : i + 1; + } // Given a position of a non-empty item in the table, find the tail of its cluster. - int TailOfClusterByPosition(int position) const; + int TailOfClusterByPosition(int position) const + { + ASSERT(0 <= position && position < Capacity() && ! table[position].Empty()); + + int bucket = BucketByPosition(position); + int i = position; + while ( i < Capacity() && ! table[i].Empty() && BucketByPosition(i) == bucket ) + i++; // stop just over the tail. + + return i - 1; + } // Given a position of a non-empty item in the table, find the end of its cluster. // The end should be equal to tail+1 if tail exists. Otherwise it's the tail of // the just-smaller cluster + 1. - int EndOfClusterByPosition(int position) const; + int EndOfClusterByPosition(int position) const { return TailOfClusterByPosition(position) + 1; } // Given a position of a non-empty item in the table, find the offset of it within // its cluster. - int OffsetInClusterByPosition(int position) const; + int OffsetInClusterByPosition(int position) const + { + ASSERT(0 <= position && position < Capacity() && ! table[position].Empty()); + int head = HeadOfClusterByPosition(position); + return position - head; + } - // Next non-empty item position in the table. - int Next(int i) const; + // Next non-empty item position in the table, starting at the specified position. + int Next(int position) const + { + ASSERT(table && -1 <= position && position < Capacity()); - void Init(); + do + { + position++; + } while ( position < Capacity() && table[position].Empty() ); + + return position; + } + + void Init() + { + ASSERT(! table); + table = (detail::DictEntry*)malloc(sizeof(detail::DictEntry) * Capacity(true)); + for ( int i = Capacity() - 1; i >= 0; i-- ) + table[i].SetEmpty(); + } // Lookup - int LinearLookupIndex(const void* key, int key_size, detail::hash_t hash) const; + int LinearLookupIndex(const void* key, int key_size, detail::hash_t hash) const + { + for ( int i = 0; i < Capacity(); i++ ) + if ( ! table[i].Empty() && table[i].Equal((const char*)key, key_size, hash) ) + return i; + return -1; + } + + // Lookup position for all possible table_sizes caused by remapping. Remap it immediately + // if not in the middle of iteration. int LookupIndex(const void* key, int key_size, detail::hash_t hash, - int* insert_position = nullptr, int* insert_distance = nullptr); + int* insert_position = nullptr, int* insert_distance = nullptr) + { + ASSERT_VALID(this); + if ( ! table ) + return -1; + + int bucket = BucketByHash(hash, log2_buckets); +#ifdef ZEEK_DICT_DEBUG + int linear_position = LinearLookupIndex(key, key_size, hash); +#endif // ZEEK_DICT_DEBUG + int position = LookupIndex(key, key_size, hash, bucket, Capacity(), insert_position, + insert_distance); + if ( position >= 0 ) + { + ASSERT_EQUAL(position, linear_position); // same as linearLookup + return position; + } + + for ( int i = 1; i <= remaps; i++ ) + { + int prev_bucket = BucketByHash(hash, log2_buckets - i); + if ( prev_bucket <= remap_end ) + { + // possibly here. insert_position & insert_distance returned on failed lookup is + // not valid in previous table_sizes. + position = LookupIndex(key, key_size, hash, prev_bucket, remap_end + 1); + if ( position >= 0 ) + { + ASSERT_EQUAL(position, linear_position); // same as linearLookup + // remap immediately if no iteration is on. + if ( ! num_iterators ) + { + Remap(position, &position); + ASSERT_EQUAL(position, LookupIndex(key, key_size, hash)); + } + return position; + } + } + } + // not found +#ifdef ZEEK_DICT_DEBUG + if ( linear_position >= 0 ) + { // different. stop and try to see whats happending. + ASSERT(false); + // rerun the function in debugger to track down the bug. + LookupIndex(key, key_size, hash); + } +#endif // ZEEK_DICT_DEBUG + return -1; + } + + // Returns the position of the item if it exists. Otherwise returns -1, but set the insert + // position/distance if required. The starting point for the search may not be the bucket + // for the current table size since this method is also used to search for an item in the + // previous table size. int LookupIndex(const void* key, int key_size, detail::hash_t hash, int begin, int end, - int* insert_position = nullptr, int* insert_distance = nullptr); + int* insert_position = nullptr, int* insert_distance = nullptr) + { + ASSERT(begin >= 0 && begin < Buckets()); + int i = begin; + for ( ; i < end && ! table[i].Empty() && BucketByPosition(i) <= begin; i++ ) + if ( BucketByPosition(i) == begin && table[i].Equal((char*)key, key_size, hash) ) + return i; + + // no such cluster, or not found in the cluster. + if ( insert_position ) + *insert_position = i; + + if ( insert_distance ) + { + *insert_distance = i - begin; + + if ( *insert_distance >= detail::TOO_FAR_TO_REACH ) + reporter->FatalErrorWithCore("Dictionary (size %d) insertion distance too far: %d", + Length(), *insert_distance); + } + + return -1; + } /// Insert entry, Adjust iterators when necessary. - void InsertRelocateAndAdjust(detail::DictEntry& entry, int insert_position); + void InsertRelocateAndAdjust(detail::DictEntry& entry, int insert_position) + { +/// e.distance is adjusted to be the one at insert_position. +#ifdef ZEEK_DICT_DEBUG + entry.bucket = BucketByHash(entry.hash, log2_buckets); +#endif // ZEEK_DICT_DEBUG + int last_affected_position = insert_position; + InsertAndRelocate(entry, insert_position, &last_affected_position); + + // If remapping in progress, adjust the remap_end to step back a little to cover the new + // range if the changed range straddles over remap_end. + if ( Remapping() && insert_position <= remap_end && remap_end < last_affected_position ) + { //[i,j] range changed. if map_end in between. then possibly old entry pushed down + // across + // map_end. + remap_end = last_affected_position; // adjust to j on the conservative side. + } + + if ( iterators && ! iterators->empty() ) + for ( auto c : *iterators ) + AdjustOnInsert(c, entry, insert_position, last_affected_position); + } /// insert entry into position, relocate other entries when necessary. - void InsertAndRelocate(detail::DictEntry& entry, int insert_position, - int* last_affected_position = nullptr); + void InsertAndRelocate(detail::DictEntry& entry, int insert_position, + int* last_affected_position = nullptr) + { /// take out the head of cluster and append to the end of the cluster. + while ( true ) + { + if ( insert_position >= Capacity() ) + { + ASSERT(insert_position == Capacity()); + SizeUp(); // copied all the items to new table. as it's just copying without + // remapping, insert_position is now empty. + table[insert_position] = entry; + if ( last_affected_position ) + *last_affected_position = insert_position; + return; + } + if ( table[insert_position].Empty() ) + { // the condition to end the loop. + table[insert_position] = entry; + if ( last_affected_position ) + *last_affected_position = insert_position; + return; + } + + // the to-be-swapped-out item appends to the end of its original cluster. + auto t = table[insert_position]; + int next = EndOfClusterByPosition(insert_position); + t.distance += next - insert_position; + + // swap + table[insert_position] = entry; + entry = t; + insert_position = next; // append to the end of the current cluster. + } + } /// Adjust Iterators on Insert. - void AdjustOnInsert(RobustDictIterator* c, const detail::DictEntry& entry, int insert_position, - int last_affected_position); + void AdjustOnInsert(RobustDictIterator* c, const detail::DictEntry& entry, + int insert_position, int last_affected_position) + { + // See note in Dictionary::AdjustOnInsert() above. + c->inserted->erase(std::remove(c->inserted->begin(), c->inserted->end(), entry), + c->inserted->end()); + c->visited->erase(std::remove(c->visited->begin(), c->visited->end(), entry), + c->visited->end()); + + if ( insert_position < c->next ) + c->inserted->push_back(entry); + if ( insert_position < c->next && c->next <= last_affected_position ) + { + int k = TailOfClusterByPosition(c->next); + ASSERT(k >= 0 && k < Capacity()); + c->visited->push_back(table[k]); + } + } /// Remove, Relocate & Adjust iterators. - detail::DictEntry RemoveRelocateAndAdjust(int position); + detail::DictEntry RemoveRelocateAndAdjust(int position) + { + int last_affected_position = position; + detail::DictEntry entry = RemoveAndRelocate(position, &last_affected_position); + +#ifdef ZEEK_DICT_DEBUG + // validation: index to i-1 should be continuous without empty spaces. + for ( int k = position; k < last_affected_position; k++ ) + ASSERT(! table[k].Empty()); +#endif // ZEEK_DICT_DEBUG + + if ( iterators && ! iterators->empty() ) + for ( auto c : *iterators ) + AdjustOnRemove(c, entry, position, last_affected_position); + + return entry; + } /// Remove & Relocate - detail::DictEntry RemoveAndRelocate(int position, int* last_affected_position = nullptr); + detail::DictEntry RemoveAndRelocate(int position, int* last_affected_position = nullptr) + { + // fill the empty position with the tail of the cluster of position+1. + ASSERT(position >= 0 && position < Capacity() && ! table[position].Empty()); + + detail::DictEntry entry = table[position]; + while ( true ) + { + if ( position == Capacity() - 1 || table[position + 1].Empty() || + table[position + 1].distance == 0 ) + { + // no next cluster to fill, or next position is empty or next position is already in + // perfect bucket. + table[position].SetEmpty(); + if ( last_affected_position ) + *last_affected_position = position; + return entry; + } + int next = TailOfClusterByPosition(position + 1); + table[position] = table[next]; + table[position].distance -= next - position; // distance improved for the item. + position = next; + } + + return entry; + } /// Adjust safe iterators after Removal of entry at position. - void AdjustOnRemove(RobustDictIterator* c, const detail::DictEntry& entry, int position, - int last_affected_position); + void AdjustOnRemove(RobustDictIterator* c, const detail::DictEntry& entry, int position, + int last_affected_position) + { + // See note in Dictionary::AdjustOnInsert() above. + c->inserted->erase(std::remove(c->inserted->begin(), c->inserted->end(), entry), + c->inserted->end()); + c->visited->erase(std::remove(c->visited->begin(), c->visited->end(), entry), + c->visited->end()); + + if ( position < c->next && c->next <= last_affected_position ) + { + int moved = HeadOfClusterByPosition(c->next - 1); + if ( moved < position ) + moved = position; + c->inserted->push_back(table[moved]); + } + + // if not already the end of the dictionary, adjust next to a valid one. + if ( c->next < Capacity() && table[c->next].Empty() ) + c->next = Next(c->next); + + if ( c->curr == entry ) + { + if ( c->next >= 0 && c->next < Capacity() && ! table[c->next].Empty() ) + c->curr = table[c->next]; + else + c->curr = detail::DictEntry(nullptr); // -> c == end_robust() + } + } bool Remapping() const { return remap_end >= 0; } // remap in reverse order. /// One round of remap. - void Remap(); + void Remap() + { + /// since remap should be very fast. take more at a time. + /// delay Remap when cookie is there. hard to handle cookie iteration while size changes. + /// remap from bottom up. + /// remap creates two parts of the dict: [0,remap_end] (remap_end, ...]. the former is mixed + /// with old/new entries; the latter contains all new entries. + /// + if ( num_iterators > 0 ) + return; + + int left = detail::DICT_REMAP_ENTRIES; + while ( remap_end >= 0 && left > 0 ) + { + if ( ! table[remap_end].Empty() && Remap(remap_end) ) + left--; + else //< successful Remap may increase remap_end in the case of SizeUp due to insert. if + // so, + // remap_end need to be worked on again. + remap_end--; + } + if ( remap_end < 0 ) + remaps = 0; // done remapping. + } // Remap an item in position to a new position. Returns true if the relocation was // successful, false otherwise. new_position will be set to the new position if a // pointer is provided to store the new value. - bool Remap(int position, int* new_position = nullptr); + bool Remap(int position, int* new_position = nullptr) + { + ASSERT_VALID(this); + /// Remap changes item positions by remove() and insert(). to avoid excessive operation. + /// avoid it when safe iteration is in progress. + ASSERT(! iterators || iterators->empty()); + int current = BucketByPosition(position); // current bucket + int expected = BucketByHash(table[position].hash, log2_buckets); // expected bucket in new + // table. + // equal because 1: it's a new item, 2: it's an old item, but new bucket is the same as old. + // 50% of old items act this way due to fibhash. + if ( current == expected ) + return false; + detail::DictEntry entry = RemoveAndRelocate( + position); // no iteration cookies to adjust, no need for last_affected_position. +#ifdef ZEEK_DICT_DEBUG + entry.bucket = expected; +#endif // ZEEK_DICT_DEBUG - void SizeUp(); + // find insert position. + int insert_position = EndOfClusterByBucket(expected); + if ( new_position ) + *new_position = insert_position; + entry.distance = insert_position - expected; + InsertAndRelocate( + entry, + insert_position); // no iteration cookies to adjust, no need for last_affected_position. + ASSERT_VALID(this); + return true; + } + + void SizeUp() + { + int prev_capacity = Capacity(); + log2_buckets++; + int capacity = Capacity(); + table = (detail::DictEntry*)realloc(table, capacity * sizeof(detail::DictEntry)); + for ( int i = prev_capacity; i < capacity; i++ ) + table[i].SetEmpty(); + + // REmap from last to first in reverse order. SizeUp can be triggered by 2 conditions, one + // of which is that the last space in the table is occupied and there's nowhere to put new + // items. In this case, the table doubles in capacity and the item is put at the + // prev_capacity position with the old hash. We need to cover this item (?). + remap_end = prev_capacity; // prev_capacity instead of prev_capacity-1. + + // another remap starts. + remaps++; // used in Lookup() to cover SizeUp with incomplete remaps. + ASSERT(remaps <= log2_buckets); // because we only sizeUp, one direction. we know the + // previous log2_buckets. + } bool HaveOnlyRobustIterators() const { return (num_iterators == 0) || ((iterators ? iterators->size() : 0) == num_iterators); } - RobustDictIterator MakeRobustIterator(); - detail::DictEntry GetNextRobustIteration(RobustDictIterator* iter); + RobustDictIterator MakeRobustIterator() + { + if ( ! iterators ) + iterators = new std::vector*>; + + return {this}; + } + + detail::DictEntry GetNextRobustIteration(RobustDictIterator* iter) + { + // If there are any inserted entries, return them first. + // That keeps the list small and helps avoiding searching + // a large list when deleting an entry. + if ( ! table ) + { + iter->Complete(); + return detail::DictEntry(nullptr); // end of iteration + } + + if ( iter->inserted && ! iter->inserted->empty() ) + { + // Return the last one. Order doesn't matter, + // and removing from the tail is cheaper. + detail::DictEntry e = iter->inserted->back(); + iter->inserted->pop_back(); + return e; + } + + if ( iter->next < 0 ) + iter->next = Next(-1); + + if ( iter->next < Capacity() && table[iter->next].Empty() ) + { + // [Robin] I believe this means that the table has resized in a way + // that we're now inside the overflow area where elements are empty, + // because elsewhere empty slots aren't allowed. Assuming that's right, + // then it means we'll always be at the end of the table now and could + // also just set `next` to capacity. However, just to be sure, we + // instead reuse logic from below to move forward "to a valid position" + // and then double check, through an assertion in debug mode, that it's + // actually the end. If this ever triggered, the above assumption would + // be wrong (but the Next() call would probably still be right). + iter->next = Next(iter->next); + ASSERT(iter->next == Capacity()); + } + + // Filter out visited keys. + int capacity = Capacity(); + if ( iter->visited && ! iter->visited->empty() ) + // Filter out visited entries. + while ( iter->next < capacity ) + { + ASSERT(! table[iter->next].Empty()); + auto it = std::find(iter->visited->begin(), iter->visited->end(), + table[iter->next]); + if ( it == iter->visited->end() ) + break; + iter->visited->erase(it); + iter->next = Next(iter->next); + } + + if ( iter->next >= capacity ) + { + iter->Complete(); + return detail::DictEntry(nullptr); // end of iteration + } + + ASSERT(! table[iter->next].Empty()); + detail::DictEntry e = table[iter->next]; + + // prepare for next time. + iter->next = Next(iter->next); + return e; + } void IncrIters() { ++num_iterators; } void DecrIters() { --num_iterators; } @@ -473,51 +1481,53 @@ private: uint64_t cum_entries = 0; dict_delete_func delete_func = nullptr; - detail::DictEntry* table = nullptr; - std::vector* iterators = nullptr; + detail::DictEntry* table = nullptr; + std::vector*>* iterators = nullptr; // Order means the order of insertion. means no deletion until exit. will be inefficient. - std::vector* order = nullptr; + std::vector>* order = nullptr; }; /* * Template specialization of Dictionary that stores pointers for values. */ -template class PDict : public Dictionary +template class PDict : public Dictionary { public: explicit PDict(DictOrder ordering = UNORDERED, int initial_size = 0) - : Dictionary(ordering, initial_size) + : Dictionary(ordering, initial_size) { } T* Lookup(const char* key) const { detail::HashKey h(key); - return (T*)Dictionary::Lookup(&h); + return Dictionary::Lookup(&h); } - T* Lookup(const detail::HashKey* key) const { return (T*)Dictionary::Lookup(key); } + T* Lookup(const detail::HashKey* key) const { return Dictionary::Lookup(key); } T* Insert(const char* key, T* val, bool* iterators_invalidated = nullptr) { detail::HashKey h(key); - return (T*)Dictionary::Insert(&h, (void*)val, iterators_invalidated); + return Dictionary::Insert(&h, val, iterators_invalidated); } T* Insert(detail::HashKey* key, T* val, bool* iterators_invalidated = nullptr) { - return (T*)Dictionary::Insert(key, (void*)val, iterators_invalidated); + return Dictionary::Insert(key, val, iterators_invalidated); } - T* NthEntry(int n) const { return (T*)Dictionary::NthEntry(n); } + T* NthEntry(int n) const { return Dictionary::NthEntry(n); } T* NthEntry(int n, const char*& key) const { int key_len; - return (T*)Dictionary::NthEntry(n, (const void*&)key, key_len); + return Dictionary::NthEntry(n, (const void*&)key, key_len); } T* RemoveEntry(const detail::HashKey* key, bool* iterators_invalidated = nullptr) { - return (T*)Remove(key->Key(), key->Size(), key->Hash(), false, iterators_invalidated); + return Dictionary::Remove(key->Key(), key->Size(), key->Hash(), false, + iterators_invalidated); } T* RemoveEntry(const detail::HashKey& key, bool* iterators_invalidated = nullptr) { - return (T*)Remove(key.Key(), key.Size(), key.Hash(), false, iterators_invalidated); + return Dictionary::Remove(key.Key(), key.Size(), key.Hash(), false, + iterators_invalidated); } }; diff --git a/src/Stmt.cc b/src/Stmt.cc index a17d59b2de..6b3dd99542 100644 --- a/src/Stmt.cc +++ b/src/Stmt.cc @@ -1331,7 +1331,7 @@ ValPtr ForStmt::DoExec(Frame* f, Val* v, StmtFlowType& flow) for ( const auto& lve : *loop_vals ) { auto k = lve.GetHashKey(); - auto* current_tev = lve.GetValue(); + auto* current_tev = lve.value; auto ind_lv = tv->RecreateIndex(*k); if ( value_var ) diff --git a/src/Val.cc b/src/Val.cc index 83dd413c08..e50932b807 100644 --- a/src/Val.cc +++ b/src/Val.cc @@ -493,13 +493,10 @@ static void BuildJSON(threading::formatter::JSON::NullDoubleWriter& writer, Val* else writer.StartObject(); - std::unique_ptr k; - TableEntryVal* entry; - for ( const auto& te : *table ) { - entry = te.GetValue(); - k = te.GetHashKey(); + auto* entry = te.value; + auto k = te.GetHashKey(); auto lv = tval->RecreateIndex(*k); Val* entry_key = lv->Length() == 1 ? lv->Idx(0).get() : lv.get(); @@ -1465,7 +1462,7 @@ int TableVal::RecursiveSize() const for ( const auto& ve : *table_val ) { - auto* tv = ve.GetValue(); + auto* tv = ve.value; if ( tv->GetVal() ) n += tv->GetVal()->AsTableVal()->RecursiveSize(); } @@ -1634,7 +1631,7 @@ bool TableVal::AddTo(Val* val, bool is_first_init, bool propagate_ops) const for ( const auto& tble : *table_val ) { auto k = tble.GetHashKey(); - auto* v = tble.GetValue(); + auto* v = tble.value; if ( is_first_init && t->AsTable()->Lookup(k.get()) ) { @@ -2255,7 +2252,7 @@ std::unordered_map TableVal::ToMap() const for ( const auto& iter : *table_val ) { auto k = iter.GetHashKey(); - auto v = iter.GetValue(); + auto v = iter.value; auto vl = table_hash->RecoverVals(*k); res[std::move(vl)] = v->GetVal(); @@ -2298,7 +2295,7 @@ void TableVal::Describe(ODesc* d) const reporter->InternalError("hash table underflow in TableVal::Describe"); auto k = iter->GetHashKey(); - auto* v = iter->GetValue(); + auto* v = iter->value; auto vl = table_hash->RecoverVals(*k); int dim = vl->Length(); @@ -2445,7 +2442,7 @@ void TableVal::DoExpire(double t) i < zeek::detail::table_incremental_step && *expire_iterator != table_val->end_robust(); ++i, ++(*expire_iterator) ) { - auto v = (*expire_iterator)->GetValue(); + auto v = (*expire_iterator)->value; if ( v->ExpireAccessTime() == 0 ) { @@ -2624,7 +2621,7 @@ ValPtr TableVal::DoClone(CloneState* state) for ( const auto& tble : *table_val ) { auto key = tble.GetHashKey(); - auto* val = tble.GetValue(); + auto* val = tble.value; TableEntryVal* nval = val->Clone(state); tv->table_val->Insert(key.get(), nval); @@ -2664,7 +2661,7 @@ unsigned int TableVal::ComputeFootprint(std::unordered_set* analyzed { auto k = iter.GetHashKey(); auto vl = table_hash->RecoverVals(*k); - auto v = iter.GetValue()->GetVal(); + auto v = iter.value->GetVal(); fp += vl->Footprint(analyzed_vals); if ( v ) @@ -2711,7 +2708,7 @@ TableVal::ParseTimeTableState TableVal::DumpTableState() for ( const auto& tble : *table_val ) { auto key = tble.GetHashKey(); - auto* val = tble.GetValue(); + auto* val = tble.value; rval.emplace_back(RecreateIndex(*key), val->GetVal()); } diff --git a/src/Val.h b/src/Val.h index 9ad33c5a6b..6daa352a1f 100644 --- a/src/Val.h +++ b/src/Val.h @@ -1044,7 +1044,7 @@ protected: detail::ExprPtr expire_time; detail::ExprPtr expire_func; TableValTimer* timer; - RobustDictIterator* expire_iterator; + RobustDictIterator* expire_iterator; detail::PrefixTable* subnets; ValPtr def_val; detail::ExprPtr change_func; diff --git a/src/broker/Data.cc b/src/broker/Data.cc index ec001045c9..da0ddca878 100644 --- a/src/broker/Data.cc +++ b/src/broker/Data.cc @@ -924,8 +924,6 @@ broker::expected val_to_data(const Val* v) for ( const auto& te : *table ) { auto hk = te.GetHashKey(); - auto* entry = te.GetValue(); - auto vl = table_val->RecreateIndex(*hk); broker::vector composite_key; @@ -952,7 +950,7 @@ broker::expected val_to_data(const Val* v) get(rval).emplace(move(key)); else { - auto val = val_to_data(entry->GetVal().get()); + auto val = val_to_data(te.value->GetVal().get()); if ( ! val ) return broker::ec::invalid_data; diff --git a/src/broker/messaging.bif b/src/broker/messaging.bif index 53f3d6fbaf..4997177bc7 100644 --- a/src/broker/messaging.bif +++ b/src/broker/messaging.bif @@ -37,7 +37,6 @@ std::set val_to_topic_set(zeek::Val* val) for ( const auto& te : *tbl ) { auto k = te.GetHashKey(); - auto* v = te.GetValue(); auto index = val->AsTableVal()->RecreateIndex(*k); rval.emplace(index->Idx(0)->AsString()->CheckString()); diff --git a/src/file_analysis/AnalyzerSet.h b/src/file_analysis/AnalyzerSet.h index 757a454a38..c1d609091c 100644 --- a/src/file_analysis/AnalyzerSet.h +++ b/src/file_analysis/AnalyzerSet.h @@ -96,7 +96,8 @@ public: void DrainModifications(); // Iterator support - using iterator = zeek::DictIterator; + using iterator = zeek::DictIterator; + ; using const_iterator = const iterator; using reverse_iterator = std::reverse_iterator; using const_reverse_iterator = std::reverse_iterator; diff --git a/src/file_analysis/File.cc b/src/file_analysis/File.cc index 4c4229e5bd..88500e9e50 100644 --- a/src/file_analysis/File.cc +++ b/src/file_analysis/File.cc @@ -377,7 +377,7 @@ void File::DeliverStream(const u_char* data, uint64_t len) for ( const auto& entry : analyzers ) { - auto* a = entry.GetValue(); + auto* a = entry.value; DBG_LOG(DBG_FILE_ANALYSIS, "stream delivery to analyzer %s", file_mgr->GetComponentName(a->Tag()).c_str()); @@ -475,7 +475,7 @@ void File::DeliverChunk(const u_char* data, uint64_t len, uint64_t offset) for ( const auto& entry : analyzers ) { - auto* a = entry.GetValue(); + auto* a = entry.value; DBG_LOG(DBG_FILE_ANALYSIS, "chunk delivery to analyzer %s", file_mgr->GetComponentName(a->Tag()).c_str()); @@ -539,7 +539,7 @@ void File::EndOfFile() for ( const auto& entry : analyzers ) { - auto* a = entry.GetValue(); + auto* a = entry.value; if ( ! a->EndOfFile() ) analyzers.QueueRemove(a->Tag(), a->GetArgs()); @@ -574,7 +574,7 @@ void File::Gap(uint64_t offset, uint64_t len) for ( const auto& entry : analyzers ) { - auto* a = entry.GetValue(); + auto* a = entry.value; if ( ! a->Undelivered(offset, len) ) analyzers.QueueRemove(a->Tag(), a->GetArgs()); diff --git a/src/input/Manager.cc b/src/input/Manager.cc index e93571077e..9e93314f5f 100644 --- a/src/input/Manager.cc +++ b/src/input/Manager.cc @@ -272,7 +272,7 @@ bool Manager::CreateStream(Stream* info, RecordVal* description) for ( const auto& icte : *info_config_table ) { auto k = icte.GetHashKey(); - auto* v = icte.GetValue(); + auto* v = icte.value; auto index = info->config->RecreateIndex(*k); string key = index->Idx(0)->AsString()->CheckString(); @@ -1402,7 +1402,7 @@ void Manager::EndCurrentSend(ReaderFrontend* reader) for ( auto it = stream->lastDict->begin_robust(); it != stream->lastDict->end_robust(); ++it ) { auto lastDictIdxKey = it->GetHashKey(); - InputHash* ih = it->GetValue(); + InputHash* ih = it->value; ValPtr val; ValPtr predidx; diff --git a/src/logging/Manager.cc b/src/logging/Manager.cc index 67e674838c..9db7235da7 100644 --- a/src/logging/Manager.cc +++ b/src/logging/Manager.cc @@ -889,7 +889,7 @@ bool Manager::Write(EnumVal* id, RecordVal* columns_arg) for ( const auto& fcte : *filter_config_table ) { auto k = fcte.GetHashKey(); - auto* v = fcte.GetValue(); + auto* v = fcte.value; auto index = filter->config->RecreateIndex(*k); string key = index->Idx(0)->AsString()->CheckString(); diff --git a/src/reporter.bif b/src/reporter.bif index 523195debb..3f2172b8c0 100644 --- a/src/reporter.bif +++ b/src/reporter.bif @@ -186,7 +186,6 @@ function Reporter::set_weird_sampling_whitelist%(weird_sampling_whitelist: strin for ( const auto& tble : *wl_table ) { auto k = tble.GetHashKey(); - auto* v = tble.GetValue(); auto index = wl_val->RecreateIndex(*k); string key = index->Idx(0)->AsString()->CheckString(); diff --git a/src/script_opt/ZAM/IterInfo.h b/src/script_opt/ZAM/IterInfo.h index 4724daa380..2fedf0a023 100644 --- a/src/script_opt/ZAM/IterInfo.h +++ b/src/script_opt/ZAM/IterInfo.h @@ -65,7 +65,7 @@ public: // For the current iteration, returns the corresponding value. ZVal IterValue() { - auto tev = (*tbl_iter)->GetValue(); + auto tev = (*tbl_iter)->value; return ZVal(tev->GetVal(), aux->value_var_type); } @@ -88,8 +88,8 @@ private: // Associated auxiliary information. ZInstAux* aux = nullptr; - std::optional tbl_iter; - std::optional tbl_end; + std::optional> tbl_iter; + std::optional> tbl_end; }; // Class for simple step-wise iteration across an integer range. diff --git a/src/supervisor/Supervisor.cc b/src/supervisor/Supervisor.cc index 556be2a5fe..b74f88db41 100644 --- a/src/supervisor/Supervisor.cc +++ b/src/supervisor/Supervisor.cc @@ -1271,7 +1271,7 @@ Supervisor::NodeConfig Supervisor::NodeConfig::FromRecord(const RecordVal* node) for ( const auto& ee : *env_table ) { auto k = ee.GetHashKey(); - auto* v = ee.GetValue(); + auto* v = ee.value; auto key = env_table_val->RecreateIndex(*k); auto name = key->Idx(0)->AsStringVal()->ToStdString(); @@ -1286,7 +1286,7 @@ Supervisor::NodeConfig Supervisor::NodeConfig::FromRecord(const RecordVal* node) for ( const auto& cte : *cluster_table ) { auto k = cte.GetHashKey(); - auto* v = cte.GetValue(); + auto* v = cte.value; auto key = cluster_table_val->RecreateIndex(*k); auto name = key->Idx(0)->AsStringVal()->ToStdString(); diff --git a/src/telemetry/telemetry.bif b/src/telemetry/telemetry.bif index 3c4d6d2d1b..5aa23abae0 100644 --- a/src/telemetry/telemetry.bif +++ b/src/telemetry/telemetry.bif @@ -55,7 +55,7 @@ std::vector sv_tbl(zeek::TableVal* xs) { for ( auto& val : *xs->Get() ) { - auto val_ptr = val.GetValue()->GetVal(); + auto val_ptr = val.value->GetVal(); result.emplace_back(std::string_view{val.GetKey(), val.key_size}, sv(val_ptr->AsStringVal())); }