Merge remote-tracking branch 'origin/topic/jazoff/datastructures-defer-init'

* origin/topic/jazoff/datastructures-defer-init: Defer initialization of lists and dicts until an item is added.
2025-10-02 14:48:21 +00:00 · 2019-03-13 17:28:26 -07:00 · 2019-03-13 17:28:26 -07:00 · 389fe2bc2a
commit 389fe2bc2a
parent 1239a286e2 52dbaef6da
6 changed files with 59 additions and 44 deletions
--- a/4
+++ b/4
@ -1,4 +1,8 @@

+2.6-154 | 2019-03-13 17:28:26 -0700
+
+  * Decrease memory usage via deferred list/dict initialization (Justin Azoff, Corelight)
+
 2.6-152 | 2019-03-13 13:46:17 -0700

  * Add field to the default http.log for the Origin header (Nate Guagenti)
--- a/2
+++ b/2
@ -1 +1 @@
-2.6-152
+2.6-154
--- a/src/Dict.cc
+++ b/src/Dict.cc
@ -17,6 +17,10 @@
 // is prime.
 #define PRIME_THRESH 1000

+// Default number of hash buckets in dictionary.  The dictionary will
+// increase the size of the hash table as needed.
+#define DEFAULT_DICT_SIZE 16
+
 class DictEntry {
 public:
 	DictEntry(void* k, int l, hash_t h, void* val)
@ -53,7 +57,7 @@ public:

 Dictionary::Dictionary(dict_order ordering, int initial_size)
 	{
-	Init(initial_size);
+	tbl = 0;
 	tbl2 = 0;

 	if ( ordering == ORDERED )
@ -61,14 +65,17 @@ Dictionary::Dictionary(dict_order ordering, int initial_size)
 	else
 		order = 0;

-	SetDensityThresh(DEFAULT_DENSITY_THRESH);
-
 	delete_func = 0;
 	tbl_next_ind = 0;

 	cumulative_entries = 0;
+	num_buckets = num_entries = max_num_entries = thresh_entries = 0;
+	den_thresh = 0;
 	num_buckets2 = num_entries2 = max_num_entries2 = thresh_entries2 = 0;
 	den_thresh2 = 0;
+
+	if ( initial_size > 0 )
+		Init(initial_size);
 	}

 Dictionary::~Dictionary()
@ -80,12 +87,15 @@ Dictionary::~Dictionary()
 void Dictionary::Clear()
 	{
 	DeInit();
-	Init(2);
+	tbl = 0;
 	tbl2 = 0;
 	}

 void Dictionary::DeInit()
 	{
+	if ( ! tbl )
+		return;
+
 	for ( int i = 0; i < num_buckets; ++i )
 		if ( tbl[i] )
 			{
@ -127,6 +137,9 @@ void Dictionary::DeInit()

 void* Dictionary::Lookup(const void* key, int key_size, hash_t hash) const
 	{
+	if ( ! tbl && ! tbl2 )
+		return 0;
+
 	hash_t h;
 	PList(DictEntry)* chain;

@ -155,6 +168,9 @@ void* Dictionary::Lookup(const void* key, int key_size, hash_t hash) const
 void* Dictionary::Insert(void* key, int key_size, hash_t hash, void* val,
 				int copy_key)
 	{
+	if ( ! tbl )
+		Init(DEFAULT_DICT_SIZE);
+
 	DictEntry* new_entry = new DictEntry(key, key_size, hash, val);
 	void* old_val = Insert(new_entry, copy_key);

@ -179,6 +195,9 @@ void* Dictionary::Insert(void* key, int key_size, hash_t hash, void* val,
 void* Dictionary::Remove(const void* key, int key_size, hash_t hash,
 				bool dont_delete)
 	{
+	if ( ! tbl && ! tbl2 )
+		return 0;
+
 	hash_t h;
 	PList(DictEntry)* chain;
 	int* num_entries_ptr;
@ -280,6 +299,14 @@ void Dictionary::StopIteration(IterCookie* cookie) const

 void* Dictionary::NextEntry(HashKey*& h, IterCookie*& cookie, int return_hash) const
 	{
+	if ( ! tbl && ! tbl2 )
+		{
+		const_cast<PList(IterCookie)*>(&cookies)->remove(cookie);
+		delete cookie;
+		cookie = 0;
+		return 0;
+		}
+
 	// If there are any inserted entries, return them first.
 	// That keeps the list small and helps avoiding searching
 	// a large list when deleting an entry.
@ -366,6 +393,7 @@ void Dictionary::Init(int size)
 		tbl[i] = 0;

 	max_num_entries = num_entries = 0;
+	SetDensityThresh(DEFAULT_DENSITY_THRESH);
 	}

 void Dictionary::Init2(int size)
@ -382,6 +410,9 @@ void Dictionary::Init2(int size)
 // private
 void* Dictionary::Insert(DictEntry* new_entry, int copy_key)
 	{
+	if ( ! tbl )
+		Init(DEFAULT_DICT_SIZE);
+
 	PList(DictEntry)** ttbl;
 	int* num_entries_ptr;
 	int* max_num_entries_ptr;
@ -568,6 +599,9 @@ unsigned int Dictionary::MemoryAllocation() const
 	{
 	int size = padded_sizeof(*this);

+	if ( ! tbl )
+		return size;
+
 	for ( int i = 0; i < num_buckets; ++i )
 		if ( tbl[i] )
 			{
--- a/src/Dict.h
+++ b/src/Dict.h
@ -13,10 +13,6 @@ class IterCookie;
 declare(PList,DictEntry);
 declare(PList,IterCookie);

-// Default number of hash buckets in dictionary.  The dictionary will
-// increase the size of the hash table as needed.
-#define DEFAULT_DICT_SIZE 16
-
 // Type indicating whether the dictionary should keep track of the order
 // of insertions.
 typedef enum { ORDERED, UNORDERED } dict_order;
@ -30,7 +26,7 @@ extern void generic_delete_func(void*);
 class Dictionary {
 public:
 	explicit Dictionary(dict_order ordering = UNORDERED,
-			int initial_size = DEFAULT_DICT_SIZE);
+			int initial_size = 0);
 	virtual ~Dictionary();

 	// Member functions for looking up a key, inserting/changing its
@ -196,7 +192,7 @@ private:
 class PDict(type) : public Dictionary {	\
 public:	\
 	explicit PDict(type)(dict_order ordering = UNORDERED,	\
-			int initial_size = DEFAULT_DICT_SIZE) :	\
+			int initial_size = 0) :	\
 		Dictionary(ordering, initial_size) {}	\
 	type* Lookup(const char* key) const	\
 		{	\
--- a/src/List.cc
+++ b/src/List.cc
@ -6,33 +6,27 @@
 #include "List.h"
 #include "util.h"

-static const int DEFAULT_CHUNK_SIZE = 10;
+#define DEFAULT_LIST_SIZE 10
+#define GROWTH_FACTOR 2

 BaseList::BaseList(int size)
 	{
-	chunk_size = DEFAULT_CHUNK_SIZE;
-
-	if ( size < 0 )
-		{
-		num_entries = max_entries = 0;
-		entry = 0;
-		}
-	else
-		{
-		if ( size > 0 )
-			chunk_size = size;
-
 	num_entries = 0;
-		entry = (ent *) safe_malloc(chunk_size * sizeof(ent));
-		max_entries = chunk_size;
-		}
+	max_entries = 0;
+	entry = 0;
+
+	if ( size <= 0 )
+		return;
+
+	max_entries = size;
+
+	entry = (ent *) safe_malloc(max_entries * sizeof(ent));
 	}


 BaseList::BaseList(BaseList& b)
 	{
 	max_entries = b.max_entries;
-	chunk_size = b.chunk_size;
 	num_entries = b.num_entries;

 	if ( max_entries )
@ -58,7 +52,6 @@ void BaseList::operator=(BaseList& b)
 		free(entry);

 	max_entries = b.max_entries;
-	chunk_size = b.chunk_size;
 	num_entries = b.num_entries;

 	if ( max_entries )
@ -73,10 +66,7 @@ void BaseList::operator=(BaseList& b)
 void BaseList::insert(ent a)
 	{
 	if ( num_entries == max_entries )
-		{
-		resize(max_entries + chunk_size);	// make more room
-		chunk_size *= 2;
-		}
+		resize(max_entries ? max_entries * GROWTH_FACTOR : DEFAULT_LIST_SIZE);

 	for ( int i = num_entries; i > 0; --i )
 		entry[i] = entry[i-1];	// move all pointers up one
@ -94,10 +84,7 @@ void BaseList::sortedinsert(ent a, list_cmp_func cmp_func)

 	// First append element.
 	if ( num_entries == max_entries )
-		{
-		resize(max_entries + chunk_size);
-		chunk_size *= 2;
-		}
+		resize(max_entries ? max_entries * GROWTH_FACTOR : DEFAULT_LIST_SIZE);

 	entry[num_entries++] = a;

@ -141,10 +128,7 @@ ent BaseList::remove_nth(int n)
 void BaseList::append(ent a)
 	{
 	if ( num_entries == max_entries )
-		{
-		resize(max_entries + chunk_size);	// make more room
-		chunk_size *= 2;
-		}
+		resize(max_entries ? max_entries * GROWTH_FACTOR : DEFAULT_LIST_SIZE);

 	entry[num_entries++] = a;
 	}
@ -168,7 +152,6 @@ void BaseList::clear()
 		}

 	num_entries = max_entries = 0;
-	chunk_size = DEFAULT_CHUNK_SIZE;
 	}

 ent BaseList::replace(int ent_index, ent new_ent)
--- a/src/List.h
+++ b/src/List.h
@ -11,7 +11,7 @@
 //	element up, and resizing the list, which involves getting new space
 //	and moving the data.  Resizing occurs automatically when inserting
 //	more elements than the list can currently hold.  Automatic
-//	resizing is done one "chunk_size" of elements at a time and
+//	resizing is done by growing by GROWTH_FACTOR at a time and
 //	always increases the size of the list.  Resizing to zero
 //	(or to less than the current value of num_entries)
 //	will decrease the size of the list to the current number of
@ -32,7 +32,6 @@ public:

 	void clear();		// remove all entries
 	int length() const	{ return num_entries; }
-	int chunk() const	{ return chunk_size; }
 	int max() const		{ return max_entries; }
 	int resize(int = 0);	// 0 => size to fit current number of entries

@ -79,7 +78,6 @@ protected:
 	void operator=(BaseList&);

 	ent* entry;
-	int chunk_size;		// increase size by this amount when necessary
 	int max_entries;
 	int num_entries;
 	};
 @ -1 +1 @@
 .6-152
 .6-154