Initial import of svn+ssh:://svn.icir.org/bro/trunk/bro as of r7088

2025-10-08 09:38:19 +00:00 · 2010-09-27 20:42:30 -07:00 · 2010-09-27 20:42:30 -07:00 · 61757ac78b
commit 61757ac78b
1383 changed files with 380824 additions and 0 deletions
--- a/src/Dict.cc
+++ b/src/Dict.cc
@ -0,0 +1,586 @@
+// $Id: Dict.cc 6219 2008-10-01 05:39:07Z vern $
+//
+// See the file "COPYING" in the main distribution directory for copyright.
+
+#include "config.h"
+
+#ifdef HAVE_MEMORY_H
+#include <memory.h>
+#endif
+
+#include "Dict.h"
+
+// If the mean bucket length exceeds the following then Insert() will
+// increase the size of the hash table.
+#define DEFAULT_DENSITY_THRESH 3.0
+
+// Threshold above which we do not try to ensure that the hash size
+// is prime.
+#define PRIME_THRESH 1000
+
+class DictEntry {
+public:
+	DictEntry(void* k, int l, hash_t h, void* val)
+		{ key = k; len = l; hash = h; value = val; }
+
+	~DictEntry()
+		{
+		delete [] (char*) key;
+		}
+
+	void* key;
+	int len;
+	hash_t hash;
+	void* value;
+};
+
+// The value of an iteration cookie is the bucket and offset within the
+// bucket at which to start looking for the next value to return.
+class IterCookie {
+public:
+	IterCookie(int b, int o)
+		{
+		bucket = b;
+		offset = o;
+		ttbl = 0;
+		num_buckets_p = 0;
+		}
+
+	int bucket, offset;
+	PList(DictEntry)** ttbl;
+	const int* num_buckets_p;
+	PList(DictEntry) inserted;	// inserted while iterating
+};
+
+Dictionary::Dictionary(dict_order ordering, int initial_size)
+	{
+	Init(initial_size);
+	tbl2 = 0;
+
+	if ( ordering == ORDERED )
+		order = new PList(DictEntry);
+	else
+		order = 0;
+
+	SetDensityThresh(DEFAULT_DENSITY_THRESH);
+
+	delete_func = 0;
+	}
+
+Dictionary::~Dictionary()
+	{
+	for ( int i = 0; i < num_buckets; ++i )
+		if ( tbl[i] )
+			{
+			PList(DictEntry)* chain = tbl[i];
+			loop_over_list(*chain, j)
+				{
+				DictEntry* e = (*chain)[j];
+				if ( delete_func )
+					delete_func(e->value);
+				delete e;
+				}
+
+			delete chain;
+			}
+
+	delete [] tbl;
+	delete order;
+
+	if ( tbl2 == 0 )
+		return;
+
+	for ( int i = 0; i < num_buckets2; ++i )
+		if ( tbl2[i] )
+			{
+			PList(DictEntry)* chain = tbl2[i];
+			loop_over_list(*chain, j)
+				{
+				DictEntry* e = (*chain)[j];
+				if ( delete_func )
+					delete_func(e->value);
+				delete e;
+				}
+
+			delete chain;
+			}
+	delete [] tbl2;
+	}
+
+void* Dictionary::Lookup(const void* key, int key_size, hash_t hash) const
+	{
+	hash_t h;
+	PList(DictEntry)* chain;
+
+	// Figure out which hash table to look in.
+	h = hash % num_buckets;
+	if ( ! tbl2 || h >= tbl_next_ind )
+		chain = tbl[h];
+	else
+		chain = tbl2[hash % num_buckets2];
+
+	if ( chain )
+		{
+		for ( int i = 0; i < chain->length(); ++i )
+			{
+			DictEntry* entry = (*chain)[i];
+
+			if ( entry->hash == hash && entry->len == key_size &&
+			     ! memcmp(key, entry->key, key_size) )
+				return entry->value;
+			}
+		}
+
+	return 0;
+	}
+
+void* Dictionary::Insert(void* key, int key_size, hash_t hash, void* val,
+				int copy_key)
+	{
+	DictEntry* new_entry = new DictEntry(key, key_size, hash, val);
+	void* old_val = Insert(new_entry, copy_key);
+
+	if ( old_val )
+		{
+		// We didn't need the new DictEntry, the key was already
+		// present.
+		delete new_entry;
+		}
+	else if ( order )
+		order->append(new_entry);
+
+	// Resize logic.
+	if ( tbl2 )
+		MoveChains();
+	else if ( num_entries >= thresh_entries )
+		StartChangeSize(num_buckets * 2 + 1);
+
+	return old_val;
+	}
+
+void* Dictionary::Remove(const void* key, int key_size, hash_t hash,
+				bool dont_delete)
+	{
+	hash_t h;
+	PList(DictEntry)* chain;
+	int* num_entries_ptr;
+
+	// Figure out which hash table to look in
+	h = hash % num_buckets;
+	if ( ! tbl2 || h >= tbl_next_ind )
+		{
+		chain = tbl[h];
+		num_entries_ptr = &num_entries;
+		}
+	else
+		{
+		chain = tbl2[hash % num_buckets2];
+		num_entries_ptr = &num_entries2;
+		}
+
+	if ( ! chain )
+		return 0;
+
+	for ( int i = 0; i < chain->length(); ++i )
+		{
+		DictEntry* entry = (*chain)[i];
+
+		if ( entry->hash == hash && entry->len == key_size &&
+		     ! memcmp(key, entry->key, key_size) )
+			{
+			void* entry_value = DoRemove(entry, h, chain, i);
+
+			if ( dont_delete )
+				entry->key = 0;
+
+			delete entry;
+			--*num_entries_ptr;
+			return entry_value;
+			}
+		}
+
+	return 0;
+	}
+
+void* Dictionary::DoRemove(DictEntry* entry, hash_t h,
+				PList(DictEntry)* chain, int chain_offset)
+	{
+	void* entry_value = entry->value;
+
+	chain->remove_nth(chain_offset);
+	if ( order )
+		order->remove(entry);
+
+	// Adjust existing cookies.
+	loop_over_list(cookies, i)
+		{
+		IterCookie* c = cookies[i];
+
+		// Is the affected bucket the current one?
+		if ( (unsigned int) c->bucket == h )
+			{
+			if ( c->offset > chain_offset )
+				--c->offset;
+
+			// The only other important case here occurs when we
+			// are deleting the current entry which
+			// simultaniously happens to be the last one in this
+			// bucket. This means that we would have to move on
+			// to the next non-empty bucket. Fortunately,
+			// NextEntry() will do exactly the right thing in
+			// this case. :-)
+			}
+
+		// This item may have been inserted during this iteration.
+		if ( (unsigned int) c->bucket > h )
+			c->inserted.remove(entry);
+		}
+
+	return entry_value;
+	}
+
+void* Dictionary::NthEntry(int n, const void*& key, int& key_len) const
+	{
+	if ( ! order || n < 0 || n >= Length() )
+		return 0;
+
+	DictEntry* entry = (*order)[n];
+	key = entry->key;
+	key_len = entry->len;
+	return entry->value;
+	}
+
+IterCookie* Dictionary::InitForIteration() const
+	{
+	return new IterCookie(0, 0);
+	}
+
+void Dictionary::StopIteration(IterCookie* cookie) const
+	{
+	delete cookie;
+	}
+
+void* Dictionary::NextEntry(HashKey*& h, IterCookie*& cookie, int return_hash) const
+	{
+	// If there are any inserted entries, return them first.
+	// That keeps the list small and helps avoiding searching
+	// a large list when deleting an entry.
+
+	DictEntry* entry;
+
+	if ( cookie->inserted.length() )
+		{
+		// Return the last one. Order doesn't matter,
+		// and removing from the tail is cheaper.
+		entry = cookie->inserted.remove_nth(cookie->inserted.length()-1);
+		if ( return_hash )
+			h = new HashKey(entry->key, entry->len, entry->hash);
+
+		return entry->value;
+		}
+
+	int b = cookie->bucket;
+	int o = cookie->offset;
+	PList(DictEntry)** ttbl;
+	const int* num_buckets_p;
+
+	if ( ! cookie->ttbl )
+		{
+		// XXX maybe we could update cookie->b from tbl_next_ind here?
+		cookie->ttbl = tbl;
+		cookie->num_buckets_p = &num_buckets;
+		}
+
+	ttbl = cookie->ttbl;
+	num_buckets_p = cookie->num_buckets_p;
+
+	if ( ttbl[b] && ttbl[b]->length() > o )
+		{
+		entry = (*ttbl[b])[o];
+		++cookie->offset;
+		if ( return_hash )
+			h = new HashKey(entry->key, entry->len, entry->hash);
+		return entry->value;
+		}
+
+	++b;	// Move on to next non-empty bucket.
+	while ( b < *num_buckets_p && (! ttbl[b] || ttbl[b]->length() == 0) )
+		++b;
+
+	if ( b >= *num_buckets_p )
+		{
+		// If we're resizing, we need to search the 2nd table too.
+		if ( ttbl == tbl && tbl2 )
+			{
+			cookie->ttbl = tbl2;
+			cookie->num_buckets_p = &num_buckets2;
+			cookie->bucket = 0;
+			cookie->offset = 0;
+			return Dictionary::NextEntry(h, cookie, return_hash);
+			}
+
+		// All done.
+
+		// FIXME: I don't like removing the const here. But is there
+		// a better way?
+		const_cast<PList(IterCookie)*>(&cookies)->remove(cookie);
+		delete cookie;
+		cookie = 0;
+		return 0;
+		}
+
+	entry = (*ttbl[b])[0];
+	if ( return_hash )
+		h = new HashKey(entry->key, entry->len, entry->hash);
+
+	cookie->bucket = b;
+	cookie->offset = 1;
+
+	return entry->value;
+	}
+
+void Dictionary::Init(int size)
+	{
+	num_buckets = NextPrime(size);
+	tbl = new PList(DictEntry)*[num_buckets];
+
+	for ( int i = 0; i < num_buckets; ++i )
+		tbl[i] = 0;
+
+	max_num_entries = num_entries = 0;
+	}
+
+void Dictionary::Init2(int size)
+	{
+	num_buckets2 = NextPrime(size);
+	tbl2 = new PList(DictEntry)*[num_buckets2];
+
+	for ( int i = 0; i < num_buckets2; ++i )
+		tbl2[i] = 0;
+
+	max_num_entries2 = num_entries2 = 0;
+	}
+
+// private
+void* Dictionary::Insert(DictEntry* new_entry, int copy_key)
+	{
+	PList(DictEntry)** ttbl;
+	int* num_entries_ptr;
+	int* max_num_entries_ptr;
+	hash_t h = new_entry->hash % num_buckets;
+
+	// We must be careful when we are in the middle of resizing.
+	// If the new entry hashes to a bucket in the old table we
+	// haven't moved yet, we need to put it in the old table. If
+	// we didn't do it this way, we would sometimes have to
+	// search both tables which is probably more expensive.
+
+	if ( ! tbl2 || h >= tbl_next_ind )
+		{
+		ttbl = tbl;
+		num_entries_ptr = &num_entries;
+		max_num_entries_ptr = &max_num_entries;
+		}
+	else
+		{
+		ttbl = tbl2;
+		h = new_entry->hash % num_buckets2;
+		num_entries_ptr = &num_entries2;
+		max_num_entries_ptr = &max_num_entries2;
+		}
+
+	PList(DictEntry)* chain = ttbl[h];
+
+	int n = new_entry->len;
+
+	if ( chain )
+		{
+		for ( int i = 0; i < chain->length(); ++i )
+			{
+			DictEntry* entry = (*chain)[i];
+
+			if ( entry->hash == new_entry->hash &&
+			     entry->len == n &&
+			     ! memcmp(entry->key, new_entry->key, n) )
+				{
+				void* old_value = entry->value;
+				entry->value = new_entry->value;
+				return old_value;
+				}
+			}
+		}
+	else
+		// Create new chain.
+		chain = ttbl[h] = new PList(DictEntry);
+
+	// If we got this far, then we couldn't use an existing copy
+	// of the key, so make a new one if necessary.
+	if ( copy_key )
+		{
+		void* old_key = new_entry->key;
+		new_entry->key = (void*) new char[n];
+		memcpy(new_entry->key, old_key, n);
+		delete (char*) old_key;
+		}
+
+	// We happen to know (:-() that appending is more efficient
+	// on lists than prepending.
+	chain->append(new_entry);
+
+	if ( *max_num_entries_ptr < ++*num_entries_ptr )
+		*max_num_entries_ptr = *num_entries_ptr;
+
+	// For ongoing iterations: If we already passed the bucket where this
+	// entry was put, add it to the cookie's list of inserted entries.
+	loop_over_list(cookies, i)
+		{
+		IterCookie* c = cookies[i];
+		if ( h < (unsigned int) c->bucket )
+			c->inserted.append(new_entry);
+		}
+
+	return 0;
+	}
+
+int Dictionary::NextPrime(int n) const
+	{
+	if ( (n & 0x1) == 0 )
+		// Even.
+		++n;
+
+	if ( n > PRIME_THRESH )
+		// Too expensive to test for primality, just stick with it.
+		return n;
+
+	while ( ! IsPrime(n) )
+		n += 2;
+
+	return n;
+	}
+
+int Dictionary::IsPrime(int n) const
+	{
+	for ( int j = 3; j * j <= n; ++j )
+		if ( n % j == 0 )
+			return 0;
+
+	return 1;
+	}
+
+void Dictionary::StartChangeSize(int new_size)
+	{
+	// Only start resizing if there isn't any iteration in progress.
+	if ( cookies.length() > 0 )
+		return;
+
+	if ( tbl2 )
+		internal_error("Dictionary::StartChangeSize() tbl2 not NULL");
+
+	Init2(new_size);
+
+	tbl_next_ind = 0;
+
+	// Preserve threshold density
+	SetDensityThresh2(DensityThresh());
+	}
+
+void Dictionary::MoveChains()
+	{
+	// Do not change current distribution if there an ongoing iteration.
+	if ( cookies.length() > 0 )
+		return;
+
+	// Attempt to move this many entries (must do at least 2)
+	int num = 8;
+
+	do
+		{
+		PList(DictEntry)* chain = tbl[tbl_next_ind++];
+
+		if ( ! chain )
+			continue;
+
+		tbl[tbl_next_ind - 1] = 0;
+
+		for ( int j = 0; j < chain->length(); ++j )
+			{
+			Insert((*chain)[j], 0);
+			--num_entries;
+			--num;
+			}
+
+		delete chain;
+		}
+	while ( num > 0 && int(tbl_next_ind) < num_buckets );
+
+	if ( int(tbl_next_ind) >= num_buckets )
+		FinishChangeSize();
+	}
+
+void Dictionary::FinishChangeSize()
+	{
+	// Cheap safety check.
+	if ( num_entries != 0 )
+		internal_error(
+		    "Dictionary::FinishChangeSize: num_entries is %d\n",
+		    num_entries);
+
+	for ( int i = 0; i < num_buckets; ++i )
+		delete tbl[i];
+	delete [] tbl;
+
+	tbl = tbl2;
+	tbl2 = 0;
+
+	num_buckets = num_buckets2;
+	num_entries = num_entries2;
+	max_num_entries = max_num_entries2;
+	den_thresh = den_thresh2;
+	thresh_entries = thresh_entries2;
+
+	num_buckets2 = 0;
+	num_entries2 = 0;
+	max_num_entries2 = 0;
+	den_thresh2 = 0;
+	thresh_entries2 = 0;
+	}
+
+unsigned int Dictionary::MemoryAllocation() const
+	{
+	int size = padded_sizeof(*this);
+
+	for ( int i = 0; i < num_buckets; ++i )
+		if ( tbl[i] )
+			{
+			PList(DictEntry)* chain = tbl[i];
+			loop_over_list(*chain, j)
+				size += padded_sizeof(DictEntry) + pad_size((*chain)[j]->len);
+			size += chain->MemoryAllocation();
+			}
+
+	size += pad_size(num_buckets * sizeof(PList(DictEntry)*));
+
+	if ( order )
+		size += order->MemoryAllocation();
+
+	if ( tbl2 )
+		{
+		for ( int i = 0; i < num_buckets2; ++i )
+			if ( tbl2[i] )
+				{
+				PList(DictEntry)* chain = tbl2[i];
+				loop_over_list(*chain, j)
+					size += padded_sizeof(DictEntry) + pad_size((*chain)[j]->len);
+				size += chain->MemoryAllocation();
+				}
+
+		size += pad_size(num_buckets2 * sizeof(PList(DictEntry)*));
+		}
+
+	return size;
+	}
+
+void generic_delete_func(void* v)
+	{
+	free(v);
+	}