mirror of
https://github.com/zeek/zeek.git
synced 2025-10-04 07:38:19 +00:00
603 lines
12 KiB
C++
603 lines
12 KiB
C++
// See the file "COPYING" in the main distribution directory for copyright.
|
|
|
|
#include "config.h"
|
|
|
|
#ifdef HAVE_MEMORY_H
|
|
#include <memory.h>
|
|
#endif
|
|
|
|
#include "Dict.h"
|
|
#include "Reporter.h"
|
|
|
|
// If the mean bucket length exceeds the following then Insert() will
|
|
// increase the size of the hash table.
|
|
#define DEFAULT_DENSITY_THRESH 3.0
|
|
|
|
// Threshold above which we do not try to ensure that the hash size
|
|
// is prime.
|
|
#define PRIME_THRESH 1000
|
|
|
|
class DictEntry {
|
|
public:
|
|
DictEntry(void* k, int l, hash_t h, void* val)
|
|
{ key = k; len = l; hash = h; value = val; }
|
|
|
|
~DictEntry()
|
|
{
|
|
delete [] (char*) key;
|
|
}
|
|
|
|
void* key;
|
|
int len;
|
|
hash_t hash;
|
|
void* value;
|
|
};
|
|
|
|
// The value of an iteration cookie is the bucket and offset within the
|
|
// bucket at which to start looking for the next value to return.
|
|
class IterCookie {
|
|
public:
|
|
IterCookie(int b, int o)
|
|
{
|
|
bucket = b;
|
|
offset = o;
|
|
ttbl = 0;
|
|
num_buckets_p = 0;
|
|
}
|
|
|
|
int bucket, offset;
|
|
PList(DictEntry)** ttbl;
|
|
const int* num_buckets_p;
|
|
PList(DictEntry) inserted; // inserted while iterating
|
|
};
|
|
|
|
Dictionary::Dictionary(dict_order ordering, int initial_size)
|
|
{
|
|
Init(initial_size);
|
|
tbl2 = 0;
|
|
|
|
if ( ordering == ORDERED )
|
|
order = new PList(DictEntry);
|
|
else
|
|
order = 0;
|
|
|
|
SetDensityThresh(DEFAULT_DENSITY_THRESH);
|
|
|
|
delete_func = 0;
|
|
tbl_next_ind = 0;
|
|
|
|
num_buckets2 = num_entries2 = max_num_entries2 = thresh_entries2 = 0;
|
|
den_thresh2 = 0;
|
|
}
|
|
|
|
Dictionary::~Dictionary()
|
|
{
|
|
DeInit();
|
|
delete order;
|
|
}
|
|
|
|
void Dictionary::Clear()
|
|
{
|
|
DeInit();
|
|
Init(2);
|
|
tbl2 = 0;
|
|
}
|
|
|
|
void Dictionary::DeInit()
|
|
{
|
|
for ( int i = 0; i < num_buckets; ++i )
|
|
if ( tbl[i] )
|
|
{
|
|
PList(DictEntry)* chain = tbl[i];
|
|
loop_over_list(*chain, j)
|
|
{
|
|
DictEntry* e = (*chain)[j];
|
|
if ( delete_func )
|
|
delete_func(e->value);
|
|
delete e;
|
|
}
|
|
|
|
delete chain;
|
|
}
|
|
|
|
delete [] tbl;
|
|
|
|
if ( tbl2 == 0 )
|
|
return;
|
|
|
|
for ( int i = 0; i < num_buckets2; ++i )
|
|
if ( tbl2[i] )
|
|
{
|
|
PList(DictEntry)* chain = tbl2[i];
|
|
loop_over_list(*chain, j)
|
|
{
|
|
DictEntry* e = (*chain)[j];
|
|
if ( delete_func )
|
|
delete_func(e->value);
|
|
delete e;
|
|
}
|
|
|
|
delete chain;
|
|
}
|
|
|
|
delete [] tbl2;
|
|
tbl2 = 0;
|
|
}
|
|
|
|
void* Dictionary::Lookup(const void* key, int key_size, hash_t hash) const
|
|
{
|
|
hash_t h;
|
|
PList(DictEntry)* chain;
|
|
|
|
// Figure out which hash table to look in.
|
|
h = hash % num_buckets;
|
|
if ( ! tbl2 || h >= tbl_next_ind )
|
|
chain = tbl[h];
|
|
else
|
|
chain = tbl2[hash % num_buckets2];
|
|
|
|
if ( chain )
|
|
{
|
|
for ( int i = 0; i < chain->length(); ++i )
|
|
{
|
|
DictEntry* entry = (*chain)[i];
|
|
|
|
if ( entry->hash == hash && entry->len == key_size &&
|
|
! memcmp(key, entry->key, key_size) )
|
|
return entry->value;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void* Dictionary::Insert(void* key, int key_size, hash_t hash, void* val,
|
|
int copy_key)
|
|
{
|
|
DictEntry* new_entry = new DictEntry(key, key_size, hash, val);
|
|
void* old_val = Insert(new_entry, copy_key);
|
|
|
|
if ( old_val )
|
|
{
|
|
// We didn't need the new DictEntry, the key was already
|
|
// present.
|
|
delete new_entry;
|
|
}
|
|
else if ( order )
|
|
order->append(new_entry);
|
|
|
|
// Resize logic.
|
|
if ( tbl2 )
|
|
MoveChains();
|
|
else if ( num_entries >= thresh_entries )
|
|
StartChangeSize(num_buckets * 2 + 1);
|
|
|
|
return old_val;
|
|
}
|
|
|
|
void* Dictionary::Remove(const void* key, int key_size, hash_t hash,
|
|
bool dont_delete)
|
|
{
|
|
hash_t h;
|
|
PList(DictEntry)* chain;
|
|
int* num_entries_ptr;
|
|
|
|
// Figure out which hash table to look in
|
|
h = hash % num_buckets;
|
|
if ( ! tbl2 || h >= tbl_next_ind )
|
|
{
|
|
chain = tbl[h];
|
|
num_entries_ptr = &num_entries;
|
|
}
|
|
else
|
|
{
|
|
chain = tbl2[hash % num_buckets2];
|
|
num_entries_ptr = &num_entries2;
|
|
}
|
|
|
|
if ( ! chain )
|
|
return 0;
|
|
|
|
for ( int i = 0; i < chain->length(); ++i )
|
|
{
|
|
DictEntry* entry = (*chain)[i];
|
|
|
|
if ( entry->hash == hash && entry->len == key_size &&
|
|
! memcmp(key, entry->key, key_size) )
|
|
{
|
|
void* entry_value = DoRemove(entry, h, chain, i);
|
|
|
|
if ( dont_delete )
|
|
entry->key = 0;
|
|
|
|
delete entry;
|
|
--*num_entries_ptr;
|
|
return entry_value;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void* Dictionary::DoRemove(DictEntry* entry, hash_t h,
|
|
PList(DictEntry)* chain, int chain_offset)
|
|
{
|
|
void* entry_value = entry->value;
|
|
|
|
chain->remove_nth(chain_offset);
|
|
if ( order )
|
|
order->remove(entry);
|
|
|
|
// Adjust existing cookies.
|
|
loop_over_list(cookies, i)
|
|
{
|
|
IterCookie* c = cookies[i];
|
|
|
|
// Is the affected bucket the current one?
|
|
if ( (unsigned int) c->bucket == h )
|
|
{
|
|
if ( c->offset > chain_offset )
|
|
--c->offset;
|
|
|
|
// The only other important case here occurs when we
|
|
// are deleting the current entry which
|
|
// simultaniously happens to be the last one in this
|
|
// bucket. This means that we would have to move on
|
|
// to the next non-empty bucket. Fortunately,
|
|
// NextEntry() will do exactly the right thing in
|
|
// this case. :-)
|
|
}
|
|
|
|
// This item may have been inserted during this iteration.
|
|
if ( (unsigned int) c->bucket > h )
|
|
c->inserted.remove(entry);
|
|
}
|
|
|
|
return entry_value;
|
|
}
|
|
|
|
void* Dictionary::NthEntry(int n, const void*& key, int& key_len) const
|
|
{
|
|
if ( ! order || n < 0 || n >= Length() )
|
|
return 0;
|
|
|
|
DictEntry* entry = (*order)[n];
|
|
key = entry->key;
|
|
key_len = entry->len;
|
|
return entry->value;
|
|
}
|
|
|
|
IterCookie* Dictionary::InitForIteration() const
|
|
{
|
|
return new IterCookie(0, 0);
|
|
}
|
|
|
|
void Dictionary::StopIteration(IterCookie* cookie) const
|
|
{
|
|
delete cookie;
|
|
}
|
|
|
|
void* Dictionary::NextEntry(HashKey*& h, IterCookie*& cookie, int return_hash) const
|
|
{
|
|
// If there are any inserted entries, return them first.
|
|
// That keeps the list small and helps avoiding searching
|
|
// a large list when deleting an entry.
|
|
|
|
DictEntry* entry;
|
|
|
|
if ( cookie->inserted.length() )
|
|
{
|
|
// Return the last one. Order doesn't matter,
|
|
// and removing from the tail is cheaper.
|
|
entry = cookie->inserted.remove_nth(cookie->inserted.length()-1);
|
|
if ( return_hash )
|
|
h = new HashKey(entry->key, entry->len, entry->hash);
|
|
|
|
return entry->value;
|
|
}
|
|
|
|
int b = cookie->bucket;
|
|
int o = cookie->offset;
|
|
PList(DictEntry)** ttbl;
|
|
const int* num_buckets_p;
|
|
|
|
if ( ! cookie->ttbl )
|
|
{
|
|
// XXX maybe we could update cookie->b from tbl_next_ind here?
|
|
cookie->ttbl = tbl;
|
|
cookie->num_buckets_p = &num_buckets;
|
|
}
|
|
|
|
ttbl = cookie->ttbl;
|
|
num_buckets_p = cookie->num_buckets_p;
|
|
|
|
if ( ttbl[b] && ttbl[b]->length() > o )
|
|
{
|
|
entry = (*ttbl[b])[o];
|
|
++cookie->offset;
|
|
if ( return_hash )
|
|
h = new HashKey(entry->key, entry->len, entry->hash);
|
|
return entry->value;
|
|
}
|
|
|
|
++b; // Move on to next non-empty bucket.
|
|
while ( b < *num_buckets_p && (! ttbl[b] || ttbl[b]->length() == 0) )
|
|
++b;
|
|
|
|
if ( b >= *num_buckets_p )
|
|
{
|
|
// If we're resizing, we need to search the 2nd table too.
|
|
if ( ttbl == tbl && tbl2 )
|
|
{
|
|
cookie->ttbl = tbl2;
|
|
cookie->num_buckets_p = &num_buckets2;
|
|
cookie->bucket = 0;
|
|
cookie->offset = 0;
|
|
return Dictionary::NextEntry(h, cookie, return_hash);
|
|
}
|
|
|
|
// All done.
|
|
|
|
// FIXME: I don't like removing the const here. But is there
|
|
// a better way?
|
|
const_cast<PList(IterCookie)*>(&cookies)->remove(cookie);
|
|
delete cookie;
|
|
cookie = 0;
|
|
return 0;
|
|
}
|
|
|
|
entry = (*ttbl[b])[0];
|
|
if ( return_hash )
|
|
h = new HashKey(entry->key, entry->len, entry->hash);
|
|
|
|
cookie->bucket = b;
|
|
cookie->offset = 1;
|
|
|
|
return entry->value;
|
|
}
|
|
|
|
void Dictionary::Init(int size)
|
|
{
|
|
num_buckets = NextPrime(size);
|
|
tbl = new PList(DictEntry)*[num_buckets];
|
|
|
|
for ( int i = 0; i < num_buckets; ++i )
|
|
tbl[i] = 0;
|
|
|
|
max_num_entries = num_entries = 0;
|
|
}
|
|
|
|
void Dictionary::Init2(int size)
|
|
{
|
|
num_buckets2 = NextPrime(size);
|
|
tbl2 = new PList(DictEntry)*[num_buckets2];
|
|
|
|
for ( int i = 0; i < num_buckets2; ++i )
|
|
tbl2[i] = 0;
|
|
|
|
max_num_entries2 = num_entries2 = 0;
|
|
}
|
|
|
|
// private
|
|
void* Dictionary::Insert(DictEntry* new_entry, int copy_key)
|
|
{
|
|
PList(DictEntry)** ttbl;
|
|
int* num_entries_ptr;
|
|
int* max_num_entries_ptr;
|
|
hash_t h = new_entry->hash % num_buckets;
|
|
|
|
// We must be careful when we are in the middle of resizing.
|
|
// If the new entry hashes to a bucket in the old table we
|
|
// haven't moved yet, we need to put it in the old table. If
|
|
// we didn't do it this way, we would sometimes have to
|
|
// search both tables which is probably more expensive.
|
|
|
|
if ( ! tbl2 || h >= tbl_next_ind )
|
|
{
|
|
ttbl = tbl;
|
|
num_entries_ptr = &num_entries;
|
|
max_num_entries_ptr = &max_num_entries;
|
|
}
|
|
else
|
|
{
|
|
ttbl = tbl2;
|
|
h = new_entry->hash % num_buckets2;
|
|
num_entries_ptr = &num_entries2;
|
|
max_num_entries_ptr = &max_num_entries2;
|
|
}
|
|
|
|
PList(DictEntry)* chain = ttbl[h];
|
|
|
|
int n = new_entry->len;
|
|
|
|
if ( chain )
|
|
{
|
|
for ( int i = 0; i < chain->length(); ++i )
|
|
{
|
|
DictEntry* entry = (*chain)[i];
|
|
|
|
if ( entry->hash == new_entry->hash &&
|
|
entry->len == n &&
|
|
! memcmp(entry->key, new_entry->key, n) )
|
|
{
|
|
void* old_value = entry->value;
|
|
entry->value = new_entry->value;
|
|
return old_value;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
// Create new chain.
|
|
chain = ttbl[h] = new PList(DictEntry);
|
|
|
|
// If we got this far, then we couldn't use an existing copy
|
|
// of the key, so make a new one if necessary.
|
|
if ( copy_key )
|
|
{
|
|
void* old_key = new_entry->key;
|
|
new_entry->key = (void*) new char[n];
|
|
memcpy(new_entry->key, old_key, n);
|
|
delete (char*) old_key;
|
|
}
|
|
|
|
// We happen to know (:-() that appending is more efficient
|
|
// on lists than prepending.
|
|
chain->append(new_entry);
|
|
|
|
if ( *max_num_entries_ptr < ++*num_entries_ptr )
|
|
*max_num_entries_ptr = *num_entries_ptr;
|
|
|
|
// For ongoing iterations: If we already passed the bucket where this
|
|
// entry was put, add it to the cookie's list of inserted entries.
|
|
loop_over_list(cookies, i)
|
|
{
|
|
IterCookie* c = cookies[i];
|
|
if ( h < (unsigned int) c->bucket )
|
|
c->inserted.append(new_entry);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int Dictionary::NextPrime(int n) const
|
|
{
|
|
if ( (n & 0x1) == 0 )
|
|
// Even.
|
|
++n;
|
|
|
|
if ( n > PRIME_THRESH )
|
|
// Too expensive to test for primality, just stick with it.
|
|
return n;
|
|
|
|
while ( ! IsPrime(n) )
|
|
n += 2;
|
|
|
|
return n;
|
|
}
|
|
|
|
int Dictionary::IsPrime(int n) const
|
|
{
|
|
for ( int j = 3; j * j <= n; ++j )
|
|
if ( n % j == 0 )
|
|
return 0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
void Dictionary::StartChangeSize(int new_size)
|
|
{
|
|
// Only start resizing if there isn't any iteration in progress.
|
|
if ( cookies.length() > 0 )
|
|
return;
|
|
|
|
if ( tbl2 )
|
|
reporter->InternalError("Dictionary::StartChangeSize() tbl2 not NULL");
|
|
|
|
Init2(new_size);
|
|
|
|
tbl_next_ind = 0;
|
|
|
|
// Preserve threshold density
|
|
SetDensityThresh2(DensityThresh());
|
|
}
|
|
|
|
void Dictionary::MoveChains()
|
|
{
|
|
// Do not change current distribution if there an ongoing iteration.
|
|
if ( cookies.length() > 0 )
|
|
return;
|
|
|
|
// Attempt to move this many entries (must do at least 2)
|
|
int num = 8;
|
|
|
|
do
|
|
{
|
|
PList(DictEntry)* chain = tbl[tbl_next_ind++];
|
|
|
|
if ( ! chain )
|
|
continue;
|
|
|
|
tbl[tbl_next_ind - 1] = 0;
|
|
|
|
for ( int j = 0; j < chain->length(); ++j )
|
|
{
|
|
Insert((*chain)[j], 0);
|
|
--num_entries;
|
|
--num;
|
|
}
|
|
|
|
delete chain;
|
|
}
|
|
while ( num > 0 && int(tbl_next_ind) < num_buckets );
|
|
|
|
if ( int(tbl_next_ind) >= num_buckets )
|
|
FinishChangeSize();
|
|
}
|
|
|
|
void Dictionary::FinishChangeSize()
|
|
{
|
|
// Cheap safety check.
|
|
if ( num_entries != 0 )
|
|
reporter->InternalError(
|
|
"Dictionary::FinishChangeSize: num_entries is %d\n",
|
|
num_entries);
|
|
|
|
for ( int i = 0; i < num_buckets; ++i )
|
|
delete tbl[i];
|
|
delete [] tbl;
|
|
|
|
tbl = tbl2;
|
|
tbl2 = 0;
|
|
|
|
num_buckets = num_buckets2;
|
|
num_entries = num_entries2;
|
|
max_num_entries = max_num_entries2;
|
|
den_thresh = den_thresh2;
|
|
thresh_entries = thresh_entries2;
|
|
|
|
num_buckets2 = 0;
|
|
num_entries2 = 0;
|
|
max_num_entries2 = 0;
|
|
den_thresh2 = 0;
|
|
thresh_entries2 = 0;
|
|
}
|
|
|
|
unsigned int Dictionary::MemoryAllocation() const
|
|
{
|
|
int size = padded_sizeof(*this);
|
|
|
|
for ( int i = 0; i < num_buckets; ++i )
|
|
if ( tbl[i] )
|
|
{
|
|
PList(DictEntry)* chain = tbl[i];
|
|
loop_over_list(*chain, j)
|
|
size += padded_sizeof(DictEntry) + pad_size((*chain)[j]->len);
|
|
size += chain->MemoryAllocation();
|
|
}
|
|
|
|
size += pad_size(num_buckets * sizeof(PList(DictEntry)*));
|
|
|
|
if ( order )
|
|
size += order->MemoryAllocation();
|
|
|
|
if ( tbl2 )
|
|
{
|
|
for ( int i = 0; i < num_buckets2; ++i )
|
|
if ( tbl2[i] )
|
|
{
|
|
PList(DictEntry)* chain = tbl2[i];
|
|
loop_over_list(*chain, j)
|
|
size += padded_sizeof(DictEntry) + pad_size((*chain)[j]->len);
|
|
size += chain->MemoryAllocation();
|
|
}
|
|
|
|
size += pad_size(num_buckets2 * sizeof(PList(DictEntry)*));
|
|
}
|
|
|
|
return size;
|
|
}
|
|
|
|
void generic_delete_func(void* v)
|
|
{
|
|
free(v);
|
|
}
|