zeek/src/Dict.cc

1722 lines
45 KiB
C++

// See the file "COPYING" in the main distribution directory for copyright.
#include "zeek-config.h"
#include "zeek/Dict.h"
#ifdef HAVE_MEMORY_H
#include <memory.h>
#endif
#include <algorithm>
#include <signal.h>
#include <climits>
#include <fstream>
#include "zeek/3rdparty/doctest.h"
#include "zeek/Reporter.h"
#include "zeek/util.h"
#if defined(DEBUG) && defined(ZEEK_DICT_DEBUG)
#define ASSERT_VALID(o) o->AssertValid()
#else
#define ASSERT_VALID(o)
#endif//DEBUG
namespace zeek {
class [[deprecated("Remove in v5.1. Use the standard-library-compatible version of iteration.")]] IterCookie {
public:
IterCookie(Dictionary* d) : d(d) {}
bool robust = false;
Dictionary* d = nullptr;
// Index for the next valid entry. -1 is the default, meaning we haven't started
// iterating yet.
int next = -1; //index for next valid entry. -1 is default not started yet.
// Tracks the new entries inserted while iterating. Only used for robust cookies.
std::vector<detail::DictEntry>* inserted = nullptr;
// Tracks the entries already visited but were moved across the next iteration
// point due to an insertion. Only used for robust cookies.
std::vector<detail::DictEntry>* visited = nullptr;
void MakeRobust()
{
// IterCookies can't be made robust after iteration has started.
ASSERT(next < 0);
ASSERT(d && d->cookies);
robust = true;
inserted = new std::vector<detail::DictEntry>();
visited = new std::vector<detail::DictEntry>();
d->cookies->push_back(this);
}
void AssertValid() const
{
ASSERT(d && -1 <= next && next <= d->Capacity());
ASSERT(( ! robust && ! inserted && ! visited ) || ( robust && inserted && visited ));
}
~IterCookie()
{
ASSERT_VALID(this);
if( robust )
{
d->cookies->erase(std::remove(d->cookies->begin(), d->cookies->end(), this), d->cookies->end());
delete inserted;
delete visited;
}
}
};
// namespace detail
TEST_SUITE_BEGIN("Dict");
TEST_CASE("dict construction")
{
PDict<int> dict;
CHECK(! dict.IsOrdered());
CHECK(dict.Length() == 0);
PDict<int> dict2(ORDERED);
CHECK(dict2.IsOrdered());
CHECK(dict2.Length() == 0);
}
TEST_CASE("dict operation")
{
PDict<uint32_t> dict;
uint32_t val = 10;
uint32_t key_val = 5;
detail::HashKey* key = new detail::HashKey(key_val);
dict.Insert(key, &val);
CHECK(dict.Length() == 1);
detail::HashKey* key2 = new detail::HashKey(key_val);
uint32_t* lookup = dict.Lookup(key2);
CHECK(*lookup == val);
dict.Remove(key2);
CHECK(dict.Length() == 0);
uint32_t* lookup2 = dict.Lookup(key2);
CHECK(lookup2 == (uint32_t*)0);
delete key2;
CHECK(dict.MaxLength() == 1);
CHECK(dict.NumCumulativeInserts() == 1);
dict.Insert(key, &val);
dict.Remove(key);
CHECK(dict.MaxLength() == 1);
CHECK(dict.NumCumulativeInserts() == 2);
uint32_t val2 = 15;
uint32_t key_val2 = 25;
key2 = new detail::HashKey(key_val2);
dict.Insert(key, &val);
dict.Insert(key2, &val2);
CHECK(dict.Length() == 2);
CHECK(dict.NumCumulativeInserts() == 4);
dict.Clear();
CHECK(dict.Length() == 0);
delete key;
delete key2;
}
TEST_CASE("dict nthentry")
{
PDict<uint32_t> unordered(UNORDERED);
PDict<uint32_t> ordered(ORDERED);
uint32_t val = 15;
uint32_t key_val = 5;
detail::HashKey* okey = new detail::HashKey(key_val);
detail::HashKey* ukey = new detail::HashKey(key_val);
uint32_t val2 = 10;
uint32_t key_val2 = 25;
detail::HashKey* okey2 = new detail::HashKey(key_val2);
detail::HashKey* ukey2 = new detail::HashKey(key_val2);
unordered.Insert(ukey, &val);
unordered.Insert(ukey2, &val2);
ordered.Insert(okey, &val);
ordered.Insert(okey2, &val2);
// NthEntry returns null for unordered dicts
uint32_t* lookup = unordered.NthEntry(0);
CHECK(lookup == (uint32_t*)0);
// Ordered dicts are based on order of insertion, nothing about the
// data itself
lookup = ordered.NthEntry(0);
CHECK(*lookup == 15);
delete okey;
delete okey2;
delete ukey;
delete ukey2;
}
TEST_CASE("dict iteration")
{
PDict<uint32_t> dict;
uint32_t val = 15;
uint32_t key_val = 5;
detail::HashKey* key = new detail::HashKey(key_val);
uint32_t val2 = 10;
uint32_t key_val2 = 25;
detail::HashKey* key2 = new detail::HashKey(key_val2);
dict.Insert(key, &val);
dict.Insert(key2, &val2);
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
detail::HashKey* it_key;
IterCookie* it = dict.InitForIteration();
CHECK(it != nullptr);
int count = 0;
while ( uint32_t* entry = dict.NextEntry(it_key, it) )
{
switch ( count )
{
case 0:
// The DictEntry constructor typecasts this down to a uint32_t, so
// we can't just check the value directly.
// Explanation: hash_t is 64bit, open-dict only uses 32bit hash to
// save space for each item (24 bytes aligned). OpenDict has table
// size of 2^N and only take the lower bits of the hash. (The
// original hash takes transformation in FibHash() to map into a
// smaller 2^N range).
CHECK(it_key->Hash() == (uint32_t)key2->Hash());
CHECK(*entry == 10);
break;
case 1:
CHECK(it_key->Hash() == (uint32_t)key->Hash());
CHECK(*entry == 15);
break;
default:
break;
}
count++;
delete it_key;
}
CHECK(count == 2);
#pragma GCC diagnostic pop
delete key;
delete key2;
}
TEST_CASE("dict new iteration")
{
PDict<uint32_t> dict;
uint32_t val = 15;
uint32_t key_val = 5;
detail::HashKey* key = new detail::HashKey(key_val);
uint32_t val2 = 10;
uint32_t key_val2 = 25;
detail::HashKey* key2 = new detail::HashKey(key_val2);
dict.Insert(key, &val);
dict.Insert(key2, &val2);
int count = 0;
for ( const auto& entry : dict )
{
auto* v = static_cast<uint32_t*>(entry.value);
uint64_t k = *(uint32_t*) entry.GetKey();
switch ( count )
{
case 0:
CHECK(k == key_val2);
CHECK(*v == val2);
break;
case 1:
CHECK(k == key_val);
CHECK(*v == val);
break;
default:
break;
}
count++;
}
CHECK(count == 2);
delete key;
delete key2;
}
TEST_CASE("dict robust iteration")
{
PDict<uint32_t> dict;
uint32_t val = 15;
uint32_t key_val = 5;
detail::HashKey* key = new detail::HashKey(key_val);
uint32_t val2 = 10;
uint32_t key_val2 = 25;
detail::HashKey* key2 = new detail::HashKey(key_val2);
uint32_t val3 = 20;
uint32_t key_val3 = 35;
detail::HashKey* key3 = new detail::HashKey(key_val3);
dict.Insert(key, &val);
dict.Insert(key2, &val2);
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
detail::HashKey* it_key;
IterCookie* it = dict.InitForIteration();
CHECK(it != nullptr);
dict.MakeRobustCookie(it);
int count = 0;
while ( uint32_t* entry = dict.NextEntry(it_key, it) )
{
switch ( count )
{
case 0:
CHECK(it_key->Hash() == (uint32_t)key2->Hash());
CHECK(*entry == 10);
dict.Insert(key3, &val3);
break;
case 1:
CHECK(it_key->Hash() == (uint32_t)key->Hash());
CHECK(*entry == 15);
break;
case 2:
CHECK(it_key->Hash() == (uint32_t)key3->Hash());
CHECK(*entry == 20);
break;
default:
// We shouldn't get here.
CHECK(false);
break;
}
count++;
delete it_key;
}
CHECK(count == 3);
IterCookie* it2 = dict.InitForIteration();
CHECK(it2 != nullptr);
dict.MakeRobustCookie(it2);
count = 0;
while ( uint32_t* entry = dict.NextEntry(it_key, it2) )
{
switch ( count )
{
case 0:
CHECK(it_key->Hash() == (uint32_t)key2->Hash());
CHECK(*entry == 10);
dict.Remove(key3);
break;
case 1:
CHECK(it_key->Hash() == (uint32_t)key->Hash());
CHECK(*entry == 15);
break;
default:
// We shouldn't get here.
CHECK(false);
break;
}
count++;
delete it_key;
}
CHECK(count == 2);
#pragma GCC diagnostic pop
delete key;
delete key2;
delete key3;
}
TEST_CASE("dict new robust iteration")
{
PDict<uint32_t> dict;
uint32_t val = 15;
uint32_t key_val = 5;
detail::HashKey* key = new detail::HashKey(key_val);
uint32_t val2 = 10;
uint32_t key_val2 = 25;
detail::HashKey* key2 = new detail::HashKey(key_val2);
uint32_t val3 = 20;
uint32_t key_val3 = 35;
detail::HashKey* key3 = new detail::HashKey(key_val3);
dict.Insert(key, &val);
dict.Insert(key2, &val2);
{
int count = 0;
auto it = dict.begin_robust();
for ( ; it != dict.end_robust(); ++it )
{
auto* v = it->GetValue<uint32_t*>();
uint64_t k = *(uint32_t*) it->GetKey();
switch ( count )
{
case 0:
CHECK(k == key_val2);
CHECK(*v == val2);
dict.Insert(key3, &val3);
break;
case 1:
CHECK(k == key_val);
CHECK(*v == val);
break;
case 2:
CHECK(k == key_val3);
CHECK(*v == val3);
break;
default:
// We shouldn't get here.
CHECK(false);
break;
}
count++;
}
CHECK(count == 3);
}
{
int count = 0;
auto it = dict.begin_robust();
for ( ; it != dict.end_robust(); ++it )
{
auto* v = it->GetValue<uint32_t*>();
uint64_t k = *(uint32_t*) it->GetKey();
switch ( count )
{
case 0:
CHECK(k == key_val2);
CHECK(*v == val2);
dict.Insert(key3, &val3);
dict.Remove(key3);
break;
case 1:
CHECK(k == key_val);
CHECK(*v == val);
break;
default:
// We shouldn't get here.
CHECK(false);
break;
}
count++;
}
CHECK(count == 2);
}
delete key;
delete key2;
delete key3;
}
TEST_CASE("dict iterator invalidation")
{
PDict<uint32_t> dict;
uint32_t val = 15;
uint32_t key_val = 5;
auto key = new detail::HashKey(key_val);
uint32_t val2 = 10;
uint32_t key_val2 = 25;
auto key2 = new detail::HashKey(key_val2);
uint32_t val3 = 42;
uint32_t key_val3 = 37;
auto key3 = new detail::HashKey(key_val3);
dict.Insert(key, &val);
dict.Insert(key2, &val2);
detail::HashKey* it_key;
bool iterators_invalidated = false;
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
IterCookie* it = dict.InitForIteration();
CHECK(it != nullptr);
while ( uint32_t* entry = dict.NextEntry(it_key, it) )
{
iterators_invalidated = false;
dict.Remove(key3, &iterators_invalidated);
// Key doesn't exist, nothing to remove, iteration not invalidated.
CHECK(!iterators_invalidated);
iterators_invalidated = false;
dict.Insert(key, &val2, &iterators_invalidated);
// Key exists, value gets overwritten, iteration not invalidated.
CHECK(!iterators_invalidated);
iterators_invalidated = false;
dict.Remove(key2, &iterators_invalidated);
// Key exists, gets removed, iteration is invalidated.
CHECK(iterators_invalidated);
delete it_key;
dict.StopIteration(it);
break;
}
it = dict.InitForIteration();
CHECK(it != nullptr);
while ( uint32_t* entry = dict.NextEntry(it_key, it) )
{
iterators_invalidated = false;
dict.Insert(key3, &val3, &iterators_invalidated);
// Key doesn't exist, gets inserted, iteration is invalidated.
CHECK(iterators_invalidated);
delete it_key;
dict.StopIteration(it);
break;
}
#pragma GCC diagnostic pop
CHECK(dict.Length() == 2);
CHECK(*static_cast<uint32_t*>(dict.Lookup(key)) == val2);
CHECK(*static_cast<uint32_t*>(dict.Lookup(key3)) == val3);
CHECK(static_cast<uint32_t*>(dict.Lookup(key2)) == nullptr);
delete key;
delete key2;
delete key3;
}
TEST_SUITE_END();
/////////////////////////////////////////////////////////////////////////////////////////////////
//bucket math
int Dictionary::Log2(int num) const
{
int i = 0;
while ( num >>= 1 )
i++;
return i;
}
int Dictionary::Buckets(bool expected) const
{
int buckets = ( 1 << log2_buckets );
if ( expected )
return buckets;
return table ? buckets : 0;
}
int Dictionary::Capacity(bool expected) const
{
int capacity = ( 1 << log2_buckets ) + ( log2_buckets+0 );
if ( expected )
return capacity;
return table ? capacity : 0;
}
int Dictionary::ThresholdEntries() const
{
// Increase the size of the dictionary when it is 75% full. However, when the dictionary
// is small ( <= 20 elements ), only resize it when it's 100% full. The dictionary will
// always resize when the current insertion causes it to be full. This ensures that the
// current insertion should always be successful.
int capacity = Capacity();
if ( log2_buckets <= detail::DICT_THRESHOLD_BITS )
return capacity; //20 or less elements, 1.0, only size up when necessary.
return capacity - ( capacity >> detail::DICT_LOAD_FACTOR_BITS );
}
detail::hash_t Dictionary::FibHash(detail::hash_t h) const
{
//GoldenRatio phi = (sqrt(5)+1)/2 = 1.6180339887...
//1/phi = phi - 1
h &= detail::HASH_MASK;
h *= 11400714819323198485llu; //2^64/phi
return h;
}
// return position in dict with 2^bit size.
int Dictionary::BucketByHash(detail::hash_t h, int log2_table_size) const //map h to n-bit
{
ASSERT(log2_table_size>=0);
if ( ! log2_table_size )
return 0; //<< >> breaks on 64.
#ifdef DICT_NO_FIB_HASH
detail::hash_t hash = h;
#else
detail::hash_t hash = FibHash(h);
#endif
int m = 64 - log2_table_size;
hash <<= m;
hash >>= m;
return hash;
}
//given entry at index i, return it's perfect bucket position.
int Dictionary::BucketByPosition(int position) const
{
ASSERT(table && position>=0 && position < Capacity() && ! table[position].Empty());
return position - table[position].distance;
}
////////////////////////////////////////////////////////////////////////////////////////////////
//Cluster Math
////////////////////////////////////////////////////////////////////////////////////////////////
int Dictionary::EndOfClusterByBucket(int bucket) const
{
ASSERT(bucket>=0 && bucket < Buckets());
int i = bucket;
while ( i < Capacity() && ! table[i].Empty() && BucketByPosition(i) <= bucket )
i++;
return i;
}
int Dictionary::HeadOfClusterByPosition( int position) const
{
// Finding the first entry in the bucket chain.
ASSERT(0 <= position && position < Capacity() && ! table[position].Empty());
// Look backward for the first item with the same bucket as myself.
int bucket = BucketByPosition(position);
int i = position;
while ( i >= bucket && BucketByPosition(i) == bucket )
i--;
return i == bucket ? i : i + 1;
}
int Dictionary::TailOfClusterByPosition(int position) const
{
ASSERT(0 <= position && position < Capacity() && ! table[position].Empty());
int bucket = BucketByPosition(position);
int i = position;
while ( i < Capacity() && ! table[i].Empty() && BucketByPosition(i) == bucket )
i++; //stop just over the tail.
return i - 1;
}
int Dictionary::EndOfClusterByPosition(int position) const
{
return TailOfClusterByPosition(position)+1;
}
int Dictionary::OffsetInClusterByPosition(int position) const
{
ASSERT(0 <= position && position < Capacity() && ! table[position].Empty());
int head = HeadOfClusterByPosition(position);
return position - head;
}
// Find the next valid entry after the position. Position can be -1, which means
// look for the next valid entry point altogether.
int Dictionary::Next(int position) const
{
ASSERT(table && -1 <= position && position < Capacity());
do
{
position++;
} while ( position < Capacity() && table[position].Empty() );
return position;
}
///////////////////////////////////////////////////////////////////////////////////////////////////////
//Debugging
///////////////////////////////////////////////////////////////////////////////////////////////////////
#define DUMPIF(f) if(f) Dump(1)
#ifdef DEBUG
void Dictionary::AssertValid() const
{
bool valid = true;
int n = num_entries;
if ( table )
for ( int i = Capacity()-1; i >= 0; i-- )
if ( ! table[i].Empty() )
n--;
valid = (n == 0);
ASSERT(valid);
DUMPIF(! valid);
//entries must clustered together
for ( int i = 1; i < Capacity(); i++ )
{
if ( ! table || table[i].Empty() )
continue;
if ( table[i-1].Empty() )
{
valid = (table[i].distance == 0);
ASSERT(valid);
DUMPIF(! valid);
}
else
{
valid = (table[i].bucket >= table[i-1].bucket);
ASSERT(valid);
DUMPIF(! valid);
if ( table[i].bucket == table[i-1].bucket )
{
valid = (table[i].distance == table[i-1].distance+1);
ASSERT(valid);
DUMPIF(! valid);
}
else
{
valid = (table[i].distance <= table[i-1].distance);
ASSERT(valid);
DUMPIF(! valid);
}
}
}
}
#endif//DEBUG
size_t Dictionary::MemoryAllocation() const
{
size_t size = padded_sizeof(*this);
if ( table )
{
size += zeek::util::pad_size(Capacity() * sizeof(detail::DictEntry));
for ( int i = Capacity()-1; i>=0; i-- )
if ( ! table[i].Empty() && table[i].key_size > 8 )
size += zeek::util::pad_size(table[i].key_size);
}
if ( order )
size += padded_sizeof(std::vector<detail::DictEntry>) + zeek::util::pad_size(sizeof(detail::DictEntry) * order->capacity());
return size;
}
void Dictionary::DumpKeys() const
{
if ( ! table )
return;
char key_file[100];
// Detect string or binary from first key.
int i=0;
while ( table[i].Empty() && i < Capacity() )
i++;
bool binary = false;
const char* key = table[i].GetKey();
for ( int j = 0; j < table[i].key_size; j++ )
if ( ! isprint(key[j]) )
{
binary = true;
break;
}
int max_distance = 0;
DistanceStats(max_distance);
if ( binary )
{
char key = char(random() % 26) + 'A';
sprintf(key_file, "%d.%d.%zu-%c.key", Length(), max_distance, MemoryAllocation()/Length(), key);
std::ofstream f(key_file, std::ios::binary|std::ios::out|std::ios::trunc);
for ( int idx = 0; idx < Capacity(); idx++ )
if ( ! table[idx].Empty() )
{
int key_size = table[idx].key_size;
f.write((const char*)&key_size, sizeof(int));
f.write(table[idx].GetKey(), table[idx].key_size);
}
}
else
{
char key = char(random() % 26) + 'A';
sprintf(key_file, "%d.%d.%zu-%d.ckey",Length(), max_distance, MemoryAllocation()/Length(), key);
std::ofstream f(key_file, std::ios::out|std::ios::trunc);
for ( int idx = 0; idx < Capacity(); idx++ )
if ( ! table[idx].Empty() )
{
std::string s((char*)table[idx].GetKey(), table[idx].key_size);
f << s << std::endl;
}
}
}
void Dictionary::DistanceStats(int& max_distance, int* distances, int num_distances) const
{
max_distance = 0;
for ( int i = 0; i < num_distances; i++ )
distances[i] = 0;
for ( int i = 0; i < Capacity(); i++ )
{
if ( table[i].Empty() )
continue;
if ( table[i].distance > max_distance )
max_distance = table[i].distance;
if ( num_distances <= 0 || ! distances )
continue;
if ( table[i].distance >= num_distances-1 )
distances[num_distances-1]++;
else
distances[table[i].distance]++;
}
}
void Dictionary::Dump(int level) const
{
int key_size = 0;
for ( int i = 0; i < Capacity(); i++ )
{
if ( table[i].Empty() )
continue;
key_size += zeek::util::pad_size(table[i].key_size);
if ( ! table[i].value )
continue;
}
#define DICT_NUM_DISTANCES 5
int distances[DICT_NUM_DISTANCES];
int max_distance = 0;
DistanceStats(max_distance, distances, DICT_NUM_DISTANCES);
printf("cap %'7d ent %'7d %'-7d load %.2f max_dist %2d mem %10zu mem/ent %3zu key/ent %3d lg %2d remaps %1d remap_end %4d ",
Capacity(), Length(), MaxLength(), (double)Length()/(table? Capacity() : 1),
max_distance, MemoryAllocation(), (MemoryAllocation())/(Length()?Length():1), key_size / (Length()?Length():1),
log2_buckets, remaps, remap_end);
if ( Length() > 0 )
{
for (int i = 0; i < DICT_NUM_DISTANCES-1; i++)
printf("[%d]%2d%% ", i, 100*distances[i]/Length());
printf("[%d+]%2d%% ", DICT_NUM_DISTANCES-1, 100*distances[DICT_NUM_DISTANCES-1]/Length());
}
else
printf("\n");
printf("\n");
if ( level >= 1 )
{
printf("%-10s %1s %-10s %-4s %-4s %-10s %-18s %-2s\n", "Index", "*","Bucket", "Dist", "Off", "Hash", "FibHash", "KeySize");
for ( int i = 0; i < Capacity(); i++ )
if ( table[i].Empty() )
printf("%'10d \n", i);
else
printf("%'10d %1s %'10d %4d %4d 0x%08x 0x%016" PRIx64 "(%3d) %2d\n",
i, (i<=remap_end? "*": ""), BucketByPosition(i), (int)table[i].distance, OffsetInClusterByPosition(i),
uint(table[i].hash), FibHash(table[i].hash), (int)FibHash(table[i].hash)&0xFF, (int)table[i].key_size);
}
}
////////////////////////////////////////////////////////////////////////////////////////////////////
//Initialization.
////////////////////////////////////////////////////////////////////////////////////////////////////
Dictionary::Dictionary(DictOrder ordering, int initial_size)
{
if ( initial_size > 0 )
{
// If an initial size is speicified, init the table right away. Otherwise wait until the
// first insertion to init.
log2_buckets = Log2(initial_size);
Init();
}
if ( ordering == ORDERED )
order = new std::vector<detail::DictEntry>;
}
Dictionary::~Dictionary()
{
Clear();
}
void Dictionary::Clear()
{
if ( table )
{
for ( int i = Capacity() - 1; i >= 0; i-- )
{
if ( table[i].Empty() )
continue;
if ( delete_func )
delete_func(table[i].value);
table[i].Clear();
}
free(table);
table = nullptr;
}
if ( order )
{
delete order;
order = nullptr;
}
if ( cookies )
{
delete cookies;
cookies = nullptr;
}
if ( iterators )
{
delete iterators;
iterators = nullptr;
}
log2_buckets = 0;
num_iterators = 0;
remaps = 0;
remap_end = -1;
num_entries = 0;
max_entries = 0;
}
void Dictionary::Init()
{
ASSERT(! table);
table = (detail::DictEntry*)malloc(sizeof(detail::DictEntry) * Capacity(true));
for ( int i = Capacity() - 1; i >= 0; i-- )
table[i].SetEmpty();
}
// private
void generic_delete_func(void* v)
{
free(v);
}
//////////////////////////////////////////////////////////////////////////////////////////
//Lookup
// Look up now also possibly modifies the entry. Why? if the entry is found but not positioned
// according to the current dict (so it's before SizeUp), it will be moved to the right
// position so next lookup is fast.
void* Dictionary::Lookup(const detail::HashKey* key) const
{
return Lookup(key->Key(), key->Size(), key->Hash());
}
void* Dictionary::Lookup(const void* key, int key_size, detail::hash_t h) const
{
Dictionary* d = const_cast<Dictionary*>(this);
int position = d->LookupIndex(key, key_size, h);
return position >= 0 ? table[position].value : nullptr;
}
//for verification purposes
int Dictionary::LinearLookupIndex(const void* key, int key_size, detail::hash_t hash) const
{
for ( int i = 0; i < Capacity(); i++ )
if ( ! table[i].Empty() && table[i].Equal((const char*)key, key_size, hash) )
return i;
return -1;
}
// Lookup position for all possible table_sizes caused by remapping. Remap it immediately
// if not in the middle of iteration.
int Dictionary::LookupIndex(const void* key, int key_size, detail::hash_t hash, int* insert_position, int* insert_distance)
{
ASSERT_VALID(this);
if ( ! table )
return -1;
int bucket = BucketByHash(hash, log2_buckets);
#ifdef DEBUG
int linear_position = LinearLookupIndex(key, key_size, hash);
#endif//DEBUG
int position = LookupIndex(key, key_size, hash, bucket, Capacity(), insert_position, insert_distance);
if ( position >= 0 )
{
ASSERT(position == linear_position);//same as linearLookup
return position;
}
for ( int i = 1; i <= remaps; i++ )
{
int prev_bucket = BucketByHash(hash,log2_buckets - i);
if ( prev_bucket <= remap_end )
{
// possibly here. insert_position & insert_distance returned on failed lookup is
// not valid in previous table_sizes.
position = LookupIndex(key, key_size, hash, prev_bucket, remap_end+1);
if ( position >= 0 )
{
ASSERT(position == linear_position);//same as linearLookup
//remap immediately if no iteration is on.
if ( ! num_iterators )
{
Remap(position, &position);
ASSERT(position == LookupIndex(key, key_size, hash));
}
return position;
}
}
}
//not found
#ifdef DEBUG
if ( linear_position >= 0 )
{//different. stop and try to see whats happending.
ASSERT(false);
//rerun the function in debugger to track down the bug.
LookupIndex(key, key_size, hash);
}
#endif//DEBUG
return -1;
}
// Returns the position of the item if it exists. Otherwise returns -1, but set the insert
// position/distance if required. The starting point for the search may not be the bucket
// for the current table size since this method is also used to search for an item in the
// previous table size.
int Dictionary::LookupIndex(const void* key, int key_size, detail::hash_t hash, int bucket, int end,
int* insert_position/*output*/, int* insert_distance/*output*/)
{
ASSERT(bucket>=0 && bucket < Buckets());
int i = bucket;
for ( ; i < end && ! table[i].Empty() && BucketByPosition(i) <= bucket; i++ )
if ( BucketByPosition(i) == bucket && table[i].Equal((char*)key, key_size, hash) )
return i;
//no such cluster, or not found in the cluster.
if ( insert_position )
*insert_position = i;
if ( insert_distance )
*insert_distance = i - bucket;
return -1;
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Insert
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
void* Dictionary::Insert(void* key, int key_size, detail::hash_t hash, void* val, bool copy_key, bool* iterators_invalidated)
{
ASSERT_VALID(this);
// Initialize the table if it hasn't been done yet. This saves memory storing a bunch
// of empty dicts.
if ( ! table )
Init();
void* v = nullptr;
//if found. i is the position
//if not found, i is the insert position, d is the distance of key on position i.
int insert_position = -1, insert_distance = -1;
int position = LookupIndex(key, key_size, hash, &insert_position, &insert_distance);
if ( position >= 0 )
{
v = table[position].value;
table[position].value = val;
if ( ! copy_key )
delete [] (char*)key;
if ( order )
{//set new v to order too.
auto it = std::find(order->begin(), order->end(), table[position]);
ASSERT(it != order->end());
it->value = val;
}
if ( cookies && ! cookies->empty() )
//need to set new v for cookies too.
for ( auto c: *cookies )
{
ASSERT_VALID(c);
//ASSERT(false);
auto it = std::find(c->inserted->begin(), c->inserted->end(), table[position]);
if ( it != c->inserted->end() )
it->value = val;
}
if ( iterators && ! iterators->empty() )
//need to set new v for iterators too.
for ( auto c: *iterators )
{
auto it = std::find(c->inserted->begin(), c->inserted->end(), table[position]);
if ( it != c->inserted->end() )
it->value = val;
}
}
else
{
if ( ! HaveOnlyRobustIterators() )
{
if ( iterators_invalidated )
*iterators_invalidated = true;
else
reporter->InternalWarning("Dictionary::Insert() possibly caused iterator invalidation");
}
// Allocate memory for key if necesary. Key is updated to reflect internal key if necessary.
detail::DictEntry entry(key, key_size, hash, val, insert_distance, copy_key);
InsertRelocateAndAdjust(entry, insert_position);
if ( order )
order->push_back(entry);
num_entries++;
cum_entries++;
if ( max_entries < num_entries )
max_entries = num_entries;
if ( num_entries > ThresholdEntries() )
SizeUp();
}
// Remap after insert can adjust asap to shorten period of mixed table.
// TODO: however, if remap happens right after size up, then it consumes more cpu for this cycle,
// a possible hiccup point.
if ( Remapping() )
Remap();
ASSERT_VALID(this);
return v;
}
///e.distance is adjusted to be the one at insert_position.
void Dictionary::InsertRelocateAndAdjust(detail::DictEntry& entry, int insert_position)
{
#ifdef DEBUG
entry.bucket = BucketByHash(entry.hash,log2_buckets);
#endif//DEBUG
int last_affected_position = insert_position;
InsertAndRelocate(entry, insert_position, &last_affected_position);
// If remapping in progress, adjust the remap_end to step back a little to cover the new
// range if the changed range straddles over remap_end.
if ( Remapping() && insert_position <= remap_end && remap_end < last_affected_position )
{//[i,j] range changed. if map_end in between. then possibly old entry pushed down across map_end.
remap_end = last_affected_position; //adjust to j on the conservative side.
}
if ( cookies && ! cookies->empty() )
for ( auto c: *cookies )
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
AdjustOnInsert(c, entry, insert_position, last_affected_position);
#pragma GCC diagnostic pop
if ( iterators && ! iterators->empty() )
for ( auto c: *iterators )
AdjustOnInsert(c, entry, insert_position, last_affected_position);
}
/// insert entry into position, relocate other entries when necessary.
void Dictionary::InsertAndRelocate(detail::DictEntry& entry, int insert_position, int* last_affected_position)
{///take out the head of cluster and append to the end of the cluster.
while ( true )
{
if ( insert_position >= Capacity() )
{
ASSERT(insert_position == Capacity());
SizeUp(); //copied all the items to new table. as it's just copying without remapping, insert_position is now empty.
table[insert_position] = entry;
if ( last_affected_position )
*last_affected_position = insert_position;
return;
}
if ( table[insert_position].Empty() )
{ //the condition to end the loop.
table[insert_position] = entry;
if ( last_affected_position )
*last_affected_position = insert_position;
return;
}
//the to-be-swapped-out item appends to the end of its original cluster.
auto t = table[insert_position];
int next = EndOfClusterByPosition(insert_position);
t.distance += next - insert_position;
//swap
table[insert_position] = entry;
entry = t;
insert_position = next; //append to the end of the current cluster.
}
}
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
/// Adjust Cookies on Insert.
void Dictionary::AdjustOnInsert(IterCookie* c, const detail::DictEntry& entry, int insert_position, int last_affected_position)
{
ASSERT(c);
ASSERT_VALID(c);
if ( insert_position < c->next )
c->inserted->push_back(entry);
if ( insert_position < c->next && c->next <= last_affected_position )
{
int k = TailOfClusterByPosition(c->next);
ASSERT(k >= 0 && k < Capacity());
c->visited->push_back(table[k]);
}
}
#pragma GCC diagnostic pop
void Dictionary::AdjustOnInsert(RobustDictIterator* c, const detail::DictEntry& entry,
int insert_position, int last_affected_position)
{
if ( insert_position < c->next )
c->inserted->push_back(entry);
if ( insert_position < c->next && c->next <= last_affected_position )
{
int k = TailOfClusterByPosition(c->next);
ASSERT(k >= 0 && k < Capacity());
c->visited->push_back(table[k]);
}
}
void Dictionary::SizeUp()
{
int prev_capacity = Capacity();
log2_buckets++;
int capacity = Capacity();
table = (detail::DictEntry*)realloc(table, capacity * sizeof(detail::DictEntry));
for ( int i = prev_capacity; i < capacity; i++ )
table[i].SetEmpty();
// REmap from last to first in reverse order. SizeUp can be triggered by 2 conditions, one of
// which is that the last space in the table is occupied and there's nowhere to put new items.
// In this case, the table doubles in capacity and the item is put at the prev_capacity
// position with the old hash. We need to cover this item (?).
remap_end = prev_capacity; //prev_capacity instead of prev_capacity-1.
//another remap starts.
remaps++; //used in Lookup() to cover SizeUp with incomplete remaps.
ASSERT(remaps <= log2_buckets);//because we only sizeUp, one direction. we know the previous log2_buckets.
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Remove
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
void* Dictionary::Remove(const void* key, int key_size, detail::hash_t hash, bool dont_delete, bool* iterators_invalidated)
{//cookie adjustment: maintain inserts here. maintain next in lower level version.
ASSERT_VALID(this);
ASSERT(! dont_delete); //this is a poorly designed flag. if on, the internal has nowhere to return and memory is lost.
int position = LookupIndex(key, key_size, hash);
if ( position < 0 )
return nullptr;
if ( ! HaveOnlyRobustIterators() )
{
if ( iterators_invalidated )
*iterators_invalidated = true;
else
reporter->InternalWarning("Dictionary::Remove() possibly caused iterator invalidation");
}
detail::DictEntry entry = RemoveRelocateAndAdjust(position);
num_entries--;
ASSERT(num_entries >= 0);
//e is about to be invalid. remove it from all references.
if ( order )
order->erase(std::remove(order->begin(), order->end(), entry), order->end());
void* v = entry.value;
entry.Clear();
ASSERT_VALID(this);
return v;
}
detail::DictEntry Dictionary::RemoveRelocateAndAdjust(int position)
{
int last_affected_position = position;
detail::DictEntry entry = RemoveAndRelocate(position, &last_affected_position);
#ifdef DEBUG
//validation: index to i-1 should be continuous without empty spaces.
for ( int k = position; k < last_affected_position; k++ )
ASSERT(! table[k].Empty());
#endif//DEBUG
if ( cookies && ! cookies->empty() )
for ( auto c: *cookies )
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
AdjustOnRemove(c, entry, position, last_affected_position);
#pragma GCC diagnostic pop
if ( iterators && ! iterators->empty() )
for ( auto c: *iterators )
AdjustOnRemove(c, entry, position, last_affected_position);
return entry;
}
detail::DictEntry Dictionary::RemoveAndRelocate(int position, int* last_affected_position)
{
//fill the empty position with the tail of the cluster of position+1.
ASSERT(position >= 0 && position < Capacity() && ! table[position].Empty());
detail::DictEntry entry = table[position];
while ( true )
{
if ( position == Capacity() - 1 || table[position+1].Empty() || table[position+1].distance == 0 )
{
//no next cluster to fill, or next position is empty or next position is already in perfect bucket.
table[position].SetEmpty();
if ( last_affected_position )
*last_affected_position = position;
return entry;
}
int next = TailOfClusterByPosition(position+1);
table[position] = table[next];
table[position].distance -= next - position; //distance improved for the item.
position = next;
}
return entry;
}
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
void Dictionary::AdjustOnRemove(IterCookie* c, const detail::DictEntry& entry, int position, int last_affected_position)
{
ASSERT_VALID(c);
c->inserted->erase(std::remove(c->inserted->begin(), c->inserted->end(), entry), c->inserted->end());
if ( position < c->next && c->next <= last_affected_position )
{
int moved = HeadOfClusterByPosition(c->next-1);
if ( moved < position )
moved = position;
c->inserted->push_back(table[moved]);
}
//if not already the end of the dictionary, adjust next to a valid one.
if ( c->next < Capacity() && table[c->next].Empty() )
c->next = Next(c->next);
}
#pragma GCC diagnostic pop
void Dictionary::AdjustOnRemove(RobustDictIterator* c, const detail::DictEntry& entry,
int position, int last_affected_position)
{
c->inserted->erase(std::remove(c->inserted->begin(), c->inserted->end(), entry), c->inserted->end());
if ( position < c->next && c->next <= last_affected_position )
{
int moved = HeadOfClusterByPosition(c->next-1);
if ( moved < position )
moved = position;
c->inserted->push_back(table[moved]);
}
//if not already the end of the dictionary, adjust next to a valid one.
if ( c->next < Capacity() && table[c->next].Empty() )
c->next = Next(c->next);
}
///////////////////////////////////////////////////////////////////////////////////////////////////
//Remap
///////////////////////////////////////////////////////////////////////////////////////////////////
void Dictionary::Remap()
{
///since remap should be very fast. take more at a time.
///delay Remap when cookie is there. hard to handle cookie iteration while size changes.
///remap from bottom up.
///remap creates two parts of the dict: [0,remap_end] (remap_end, ...]. the former is mixed with old/new entries; the latter contains all new entries.
///
if ( num_iterators > 0 )
return;
int left = detail::DICT_REMAP_ENTRIES;
while ( remap_end >= 0 && left > 0 )
{
if ( ! table[remap_end].Empty() && Remap(remap_end) )
left--;
else//< successful Remap may increase remap_end in the case of SizeUp due to insert. if so, remap_end need to be worked on again.
remap_end--;
}
if ( remap_end < 0 )
remaps = 0; //done remapping.
}
bool Dictionary::Remap(int position, int* new_position)
{
ASSERT_VALID(this);
///Remap changes item positions by remove() and insert(). to avoid excessive operation. avoid it when safe iteration is in progress.
ASSERT( ( ! cookies || cookies->empty() ) && ( ! iterators || iterators->empty() ) );
int current = BucketByPosition(position);//current bucket
int expected = BucketByHash(table[position].hash, log2_buckets); //expected bucket in new table.
//equal because 1: it's a new item, 2: it's an old item, but new bucket is the same as old. 50% of old items act this way due to fibhash.
if ( current == expected )
return false;
detail::DictEntry entry = RemoveAndRelocate(position); // no iteration cookies to adjust, no need for last_affected_position.
#ifdef DEBUG
entry.bucket = expected;
#endif//DEBUG
//find insert position.
int insert_position = EndOfClusterByBucket(expected);
if ( new_position )
*new_position = insert_position;
entry.distance = insert_position - expected;
InsertAndRelocate(entry, insert_position);// no iteration cookies to adjust, no need for last_affected_position.
ASSERT_VALID(this);
return true;
}
void* Dictionary::NthEntry(int n, const void*& key, int& key_size) const
{
if ( ! order || n < 0 || n >= Length() )
return nullptr;
detail::DictEntry entry = (*order)[n];
key = entry.GetKey();
key_size = entry.key_size;
return entry.value;
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Iteration
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
void Dictionary::MakeRobustCookie(IterCookie* cookie)
{ //make sure c->next >= 0.
if ( ! cookies )
cookies = new std::vector<IterCookie*>;
cookie->MakeRobust();
ASSERT_VALID(cookie);
}
IterCookie* Dictionary::InitForIterationNonConst() //const
{
num_iterators++;
return new IterCookie(const_cast<Dictionary*>(this));
}
void Dictionary::StopIterationNonConst(IterCookie* cookie) //const
{
ASSERT(num_iterators > 0);
if ( num_iterators > 0 )
num_iterators--;
delete cookie;
}
void* Dictionary::NextEntryNonConst(detail::HashKey*& h, IterCookie*& c, bool return_hash) //const
{
// If there are any inserted entries, return them first.
// That keeps the list small and helps avoiding searching
// a large list when deleting an entry.
ASSERT(c);
ASSERT_VALID(c);
if ( ! table )
{
if ( num_iterators > 0 )
num_iterators--;
delete c;
c = nullptr;
return nullptr; //end of iteration.
}
if ( c->inserted && ! c->inserted->empty() )
{
// Return the last one. Order doesn't matter,
// and removing from the tail is cheaper.
detail::DictEntry e = c->inserted->back();
if ( return_hash )
h = new detail::HashKey(e.GetKey(), e.key_size, e.hash);
void* v = e.value;
c->inserted->pop_back();
return v;
}
if ( c->next < 0 )
c->next = Next(-1);
ASSERT(c->next >= Capacity() || ! table[c->next].Empty());
//filter out visited keys.
int capacity = Capacity();
if ( c->visited && ! c->visited->empty() )
//filter out visited entries.
while ( c->next < capacity )
{
ASSERT(! table[c->next].Empty());
auto it = std::find(c->visited->begin(), c->visited->end(), table[c->next]);
if ( it == c->visited->end() )
break;
c->visited->erase(it);
c->next = Next(c->next);
}
if ( c->next >= capacity )
{//end.
if ( num_iterators > 0 )
num_iterators--;
delete c;
c = nullptr;
return nullptr; //end of iteration.
}
ASSERT(! table[c->next].Empty());
void* v = table[c->next].value;
if ( return_hash )
h = new detail::HashKey(table[c->next].GetKey(), table[c->next].key_size, table[c->next].hash);
//prepare for next time.
c->next = Next(c->next);
ASSERT_VALID(c);
return v;
}
IterCookie* Dictionary::InitForIteration() const
{
Dictionary* dp = const_cast<Dictionary*>(this);
return dp->InitForIterationNonConst();
}
void* Dictionary::NextEntry(detail::HashKey*& h, IterCookie*& cookie, bool return_hash) const
{
Dictionary* dp = const_cast<Dictionary*>(this);
return dp->NextEntryNonConst(h, cookie, return_hash);
}
void Dictionary::StopIteration(IterCookie* cookie) const
{
Dictionary* dp = const_cast<Dictionary*>(this);
dp->StopIterationNonConst(cookie);
}
#pragma GCC diagnostic pop
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// New Iteration
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
DictIterator::DictIterator(const Dictionary* d, detail::DictEntry* begin, detail::DictEntry* end)
: curr(begin), end(end)
{
// Make sure that we're starting on a non-empty element.
while ( curr != end && curr->Empty() )
++curr;
// Cast away the constness so that the number of iterators can be modified in the dictionary. This does
// violate the constness guarantees of const-begin()/end() and cbegin()/cend(), but we're not modifying the
// actual data in the collection, just a counter in the wrapper of the collection.
dict = const_cast<Dictionary*>(d);
dict->num_iterators++;
}
DictIterator::~DictIterator()
{
assert(dict->num_iterators > 0);
dict->num_iterators--;
}
DictIterator& DictIterator::operator++()
{
// The non-robust case is easy. Just advanced the current position forward until you find
// one isn't empty and isn't the end.
do {
++curr;
}
while ( curr != end && curr->Empty() );
return *this;
}
RobustDictIterator Dictionary::MakeRobustIterator()
{
if ( ! iterators )
iterators = new std::vector<RobustDictIterator*>;
return { this };
}
detail::DictEntry Dictionary::GetNextRobustIteration(RobustDictIterator* iter)
{
// If there are any inserted entries, return them first.
// That keeps the list small and helps avoiding searching
// a large list when deleting an entry.
if ( ! table )
{
iter->Complete();
return detail::DictEntry(nullptr); // end of iteration
}
if ( iter->inserted && ! iter->inserted->empty() )
{
// Return the last one. Order doesn't matter,
// and removing from the tail is cheaper.
detail::DictEntry e = iter->inserted->back();
iter->inserted->pop_back();
return e;
}
if ( iter->next < 0 )
iter->next = Next(-1);
ASSERT(iter->next >= Capacity() || ! table[iter->next].Empty());
// Filter out visited keys.
int capacity = Capacity();
if ( iter->visited && ! iter->visited->empty() )
// Filter out visited entries.
while ( iter->next < capacity )
{
ASSERT(! table[iter->next].Empty());
auto it = std::find(iter->visited->begin(), iter->visited->end(), table[iter->next]);
if ( it == iter->visited->end() )
break;
iter->visited->erase(it);
iter->next = Next(iter->next);
}
if ( iter->next >= capacity )
{
iter->Complete();
return detail::DictEntry(nullptr); // end of iteration
}
ASSERT(! table[iter->next].Empty());
detail::DictEntry e = table[iter->next];
//prepare for next time.
iter->next = Next(iter->next);
return e;
}
RobustDictIterator::RobustDictIterator(Dictionary* d) : curr(nullptr), dict(d)
{
next = -1;
inserted = new std::vector<detail::DictEntry>();
visited = new std::vector<detail::DictEntry>();
dict->num_iterators++;
dict->iterators->push_back(this);
// Advance the iterator one step so that we're at the first element.
curr = dict->GetNextRobustIteration(this);
}
RobustDictIterator::RobustDictIterator(const RobustDictIterator& other) : curr(nullptr)
{
dict = nullptr;
if ( other.dict )
{
next = other.next;
inserted = new std::vector<detail::DictEntry>();
visited = new std::vector<detail::DictEntry>();
if ( other.inserted )
std::copy(other.inserted->begin(), other.inserted->end(), std::back_inserter(*inserted));
if ( other.visited)
std::copy(other.visited->begin(), other.visited->end(), std::back_inserter(*visited));
dict = other.dict;
dict->num_iterators++;
dict->iterators->push_back(this);
curr = other.curr;
}
}
RobustDictIterator::RobustDictIterator(RobustDictIterator&& other) : curr(nullptr)
{
dict = nullptr;
if ( other.dict )
{
next = other.next;
inserted = other.inserted;
visited = other.visited;
dict = other.dict;
dict->iterators->push_back(this);
dict->iterators->erase(std::remove(dict->iterators->begin(), dict->iterators->end(), &other),
dict->iterators->end());
other.dict = nullptr;
curr = std::move(other.curr);
}
}
RobustDictIterator::~RobustDictIterator()
{
Complete();
}
void RobustDictIterator::Complete()
{
if ( dict )
{
assert(dict->num_iterators > 0);
dict->num_iterators--;
dict->iterators->erase(std::remove(dict->iterators->begin(), dict->iterators->end(), this),
dict->iterators->end());
delete inserted;
delete visited;
inserted = nullptr;
visited = nullptr;
dict = nullptr;
}
}
RobustDictIterator& RobustDictIterator::operator++()
{
curr = dict->GetNextRobustIteration(this);
return *this;
}
} // namespace zeek