From 50a0835b41f3bee692c767c4f6b680f1f592d68e Mon Sep 17 00:00:00 2001
From: Tim Wojtulewicz <tim@corelight.com>
Date: Thu, 7 Nov 2019 12:19:43 -0700
Subject: [PATCH 1/5] Convert type-checking macros to actual functions

---
 src/Type.h | 64 +++++++++++++++++++++++++++---------------------------
 1 file changed, 32 insertions(+), 32 deletions(-)

diff --git a/src/Type.h b/src/Type.h
index 7be4e01946..371e4385a5 100644
--- a/src/Type.h
+++ b/src/Type.h
@@ -698,53 +698,53 @@ extern BroType* init_type(Expr* init);
 // Returns true if argument is an atomic type.
 bool is_atomic_type(const BroType* t);
 
-// True if the given type tag corresponds to an integral type.
-#define IsIntegral(t)	(t == TYPE_INT || t == TYPE_COUNT || t == TYPE_COUNTER)
-
-// True if the given type tag corresponds to an arithmetic type.
-#define IsArithmetic(t)	(IsIntegral(t) || t == TYPE_DOUBLE)
-
-// True if the given type tag corresponds to a boolean type.
-#define IsBool(t)	(t == TYPE_BOOL)
-
-// True if the given type tag corresponds to an interval type.
-#define IsInterval(t)	(t == TYPE_INTERVAL)
-
-// True if the given type tag corresponds to a record type.
-#define IsRecord(t)	(t == TYPE_RECORD || t == TYPE_UNION)
-
-// True if the given type tag corresponds to a function type.
-#define IsFunc(t)	(t == TYPE_FUNC)
-
-// True if the given type type is a vector.
-#define IsVector(t)	(t == TYPE_VECTOR)
-
-// True if the given type type is a string.
-#define IsString(t)	(t == TYPE_STRING)
-
 // True if the given type tag corresponds to type that can be assigned to.
 extern int is_assignable(BroType* t);
 
+// True if the given type tag corresponds to an integral type.
+inline bool IsIntegral(TypeTag t) { return (t == TYPE_INT || t == TYPE_COUNT || t == TYPE_COUNTER); }
+
+// True if the given type tag corresponds to an arithmetic type.
+inline bool IsArithmetic(TypeTag t)	{ return (IsIntegral(t) || t == TYPE_DOUBLE); }
+
+// True if the given type tag corresponds to a boolean type.
+inline bool IsBool(TypeTag t)	{ return (t == TYPE_BOOL); }
+
+// True if the given type tag corresponds to an interval type.
+inline bool IsInterval(TypeTag t)	{ return (t == TYPE_INTERVAL); }
+
+// True if the given type tag corresponds to a record type.
+inline bool IsRecord(TypeTag t)	{ return (t == TYPE_RECORD || t == TYPE_UNION); }
+
+// True if the given type tag corresponds to a function type.
+inline bool IsFunc(TypeTag t)	{ return (t == TYPE_FUNC); }
+
+// True if the given type type is a vector.
+inline bool IsVector(TypeTag t)	{ return (t == TYPE_VECTOR); }
+
+// True if the given type type is a string.
+inline bool IsString(TypeTag t)	{ return (t == TYPE_STRING); }
+
 // True if the given type tag corresponds to the error type.
-#define IsErrorType(t)	(t == TYPE_ERROR)
+inline bool IsErrorType(TypeTag t)	{ return (t == TYPE_ERROR); }
 
 // True if both tags are integral types.
-#define BothIntegral(t1, t2) (IsIntegral(t1) && IsIntegral(t2))
+inline bool BothIntegral(TypeTag t1, TypeTag t2) { return (IsIntegral(t1) && IsIntegral(t2)); }
 
 // True if both tags are arithmetic types.
-#define BothArithmetic(t1, t2) (IsArithmetic(t1) && IsArithmetic(t2))
+inline bool BothArithmetic(TypeTag t1, TypeTag t2) { return (IsArithmetic(t1) && IsArithmetic(t2)); }
 
 // True if either tags is an arithmetic type.
-#define EitherArithmetic(t1, t2) (IsArithmetic(t1) || IsArithmetic(t2))
+inline bool EitherArithmetic(TypeTag t1, TypeTag t2) { return (IsArithmetic(t1) || IsArithmetic(t2)); }
 
 // True if both tags are boolean types.
-#define BothBool(t1, t2) (IsBool(t1) && IsBool(t2))
+inline bool BothBool(TypeTag t1, TypeTag t2) { return (IsBool(t1) && IsBool(t2)); }
 
 // True if both tags are interval types.
-#define BothInterval(t1, t2) (IsInterval(t1) && IsInterval(t2))
+inline bool BothInterval(TypeTag t1, TypeTag t2) { return (IsInterval(t1) && IsInterval(t2)); }
 
 // True if both tags are string types.
-#define BothString(t1, t2) (IsString(t1) && IsString(t2))
+inline bool BothString(TypeTag t1, TypeTag t2) { return (IsString(t1) && IsString(t2)); }
 
 // True if either tag is the error type.
-#define EitherError(t1, t2) (IsErrorType(t1) || IsErrorType(t2))
+inline bool EitherError(TypeTag t1, TypeTag t2) { return (IsErrorType(t1) || IsErrorType(t2)); }

From 46e730842297d16d1ea9e02cb06edd95b02c81dc Mon Sep 17 00:00:00 2001
From: Tim Wojtulewicz <tim@corelight.com>
Date: Thu, 7 Nov 2019 12:30:30 -0700
Subject: [PATCH 2/5] GHI-595: Convert from nlohmann/json to rapidjson for
 performance reasons

---
 src/3rdparty                                  |   2 +-
 src/CMakeLists.txt                            |   9 +-
 src/Val.cc                                    | 167 +++++++++++-------
 src/threading/formatters/JSON.cc              |  99 +++++++----
 src/threading/formatters/JSON.h               |  26 ++-
 .../json.log                                  |  10 +-
 .../Baseline/scripts.base.utils.json/output   |   2 +-
 7 files changed, 183 insertions(+), 132 deletions(-)

diff --git a/src/3rdparty b/src/3rdparty
index 2b3206b7ad..e2f0a1b3f1 160000
--- a/src/3rdparty
+++ b/src/3rdparty
@@ -1 +1 @@
-Subproject commit 2b3206b7add3472ea0736f2841473e11d506a85e
+Subproject commit e2f0a1b3f1c54f9dc97a806261cc329e4dd596c8
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 2c47f7dd3b..c64f9608fa 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -413,15 +413,14 @@ install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/
 )
 
 install(FILES
-        ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/json.hpp
         ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/sqlite3.h
         DESTINATION include/zeek/3rdparty
-)
+	)
 
 install(FILES
-        ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/tsl-ordered-map/ordered_map.h
-        ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/tsl-ordered-map/ordered_hash.h
-        DESTINATION include/zeek/3rdparty/tsl-ordered-map
+        ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/rapidjson/include/rapidjson/document.h
+        ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/rapidjson/include/rapidjson/writer.h
+        DESTINATION include/zeek/3rdparty/rapidjson/include/rapidjson
 )
 
 ########################################################################
diff --git a/src/Val.cc b/src/Val.cc
index 98ef6da532..b5b667aa56 100644
--- a/src/Val.cc
+++ b/src/Val.cc
@@ -12,6 +12,11 @@
 #include <stdio.h>
 #include <stdlib.h>
 
+#define RAPIDJSON_HAS_STDSTRING 1
+#include "3rdparty/rapidjson/include/rapidjson/document.h"
+#include "3rdparty/rapidjson/include/rapidjson/stringbuffer.h"
+#include "3rdparty/rapidjson/include/rapidjson/writer.h"
+
 #include "Val.h"
 #include "Net.h"
 #include "File.h"
@@ -27,20 +32,17 @@
 
 #include "broker/Data.h"
 
-#include "3rdparty/json.hpp"
-#include "3rdparty/tsl-ordered-map/ordered_map.h"
+class NullDoubleWriter : public rapidjson::Writer<rapidjson::StringBuffer> {
+public:
+	NullDoubleWriter(rapidjson::StringBuffer& buffer) : rapidjson::Writer<rapidjson::StringBuffer>(buffer) {}
+	bool Double(double d)
+	{
+	if ( rapidjson::internal::Double(d).IsNanOrInf() )
+		return rapidjson::Writer<rapidjson::StringBuffer>::Null();
 
-
-// Define a class for use with the json library that orders the keys in the same order that
-// they were inserted. By default, the json library orders them alphabetically and we don't
-// want it like that.
-template<class Key, class T, class Ignore, class Allocator,
-         class Hash = std::hash<Key>, class KeyEqual = std::equal_to<Key>,
-         class AllocatorPair = typename std::allocator_traits<Allocator>::template rebind_alloc<std::pair<Key, T>>,
-         class ValueTypeContainer = std::vector<std::pair<Key, T>, AllocatorPair>>
-using ordered_map = tsl::ordered_map<Key, T, Hash, KeyEqual, AllocatorPair, ValueTypeContainer>;
-
-using ZeekJson = nlohmann::basic_json<ordered_map>;
+	return rapidjson::Writer<rapidjson::StringBuffer>::Double(d);
+	}
+};
 
 Val::Val(Func* f)
 	{
@@ -433,46 +435,56 @@ TableVal* Val::GetRecordFields()
 	return rt->GetRecordFieldsVal(rv);
 	}
 
-// This is a static method in this file to avoid including json.hpp in Val.h since it's huge.
-static ZeekJson BuildJSON(Val* val, bool only_loggable=false, RE_Matcher* re=nullptr)
+// This is a static method in this file to avoid including rapidjson's headers in Val.h because they're huge.
+static void BuildJSON(NullDoubleWriter& writer, Val* val, bool only_loggable=false, RE_Matcher* re=nullptr, const string& key="")
 	{
-	// If the value wasn't set, return a nullptr. This will get turned into a 'null' in the json output.
-	if ( ! val )
-		return nullptr;
+	if ( !key.empty() )
+		writer.Key(key);
 
-	ZeekJson j;
+	// If the value wasn't set, write a null into the stream and return.
+	if ( ! val )
+		{
+		writer.Null();
+		return;
+		}
+
+	rapidjson::Value j;
 	BroType* type = val->Type();
 	switch ( type->Tag() )
 		{
 		case TYPE_BOOL:
-			j = val->AsBool();
+			writer.Bool(val->AsBool());
 			break;
 
 		case TYPE_INT:
-			j = val->AsInt();
+			writer.Int64(val->AsInt());
 			break;
 
 		case TYPE_COUNT:
-			j = val->AsCount();
+			writer.Uint64(val->AsCount());
 			break;
 
 		case TYPE_COUNTER:
-			j = val->AsCounter();
+			writer.Uint64(val->AsCounter());
 			break;
 
 		case TYPE_TIME:
-			j = val->AsTime();
+			writer.Double(val->AsTime());
 			break;
 
 		case TYPE_DOUBLE:
-			j = val->AsDouble();
+			writer.Double(val->AsDouble());
 			break;
 
 		case TYPE_PORT:
 			{
 			auto* pval = val->AsPortVal();
-			j.emplace("port", pval->Port());
-			j.emplace("proto", pval->Protocol());
+			writer.StartObject();
+			writer.Key("port");
+			writer.Int64(pval->Port());
+			writer.Key("proto");
+			writer.String(pval->Protocol());
+			writer.EndObject();
 			break;
 			}
 
@@ -484,7 +496,7 @@ static ZeekJson BuildJSON(Val* val, bool only_loggable=false, RE_Matcher* re=nul
 			ODesc d;
 			d.SetStyle(RAW_STYLE);
 			val->Describe(&d);
-			j = string(reinterpret_cast<const char*>(d.Bytes()), d.Len());
+			writer.String(reinterpret_cast<const char*>(d.Bytes()), d.Len());
 			break;
 			}
 
@@ -496,7 +508,7 @@ static ZeekJson BuildJSON(Val* val, bool only_loggable=false, RE_Matcher* re=nul
 			ODesc d;
 			d.SetStyle(RAW_STYLE);
 			val->Describe(&d);
-			j = json_escape_utf8(string(reinterpret_cast<const char*>(d.Bytes()), d.Len()));
+			writer.String(json_escape_utf8(string(reinterpret_cast<const char*>(d.Bytes()), d.Len())));
 			break;
 			}
 
@@ -506,9 +518,9 @@ static ZeekJson BuildJSON(Val* val, bool only_loggable=false, RE_Matcher* re=nul
 			auto* tval = val->AsTableVal();
 
 			if ( tval->Type()->IsSet() )
-				j = ZeekJson::array();
+				writer.StartArray();
 			else
-				j = ZeekJson::object();
+				writer.StartObject();
 
 			HashKey* k;
 			TableEntryVal* entry;
@@ -524,102 +536,125 @@ static ZeekJson BuildJSON(Val* val, bool only_loggable=false, RE_Matcher* re=nul
 				else
 					entry_key = lv->Ref();
 
-				ZeekJson key_json = BuildJSON(entry_key, only_loggable, re);
-
 				if ( tval->Type()->IsSet() )
-					j.emplace_back(std::move(key_json));
+					BuildJSON(writer, entry_key, only_loggable, re);
 				else
 					{
 					Val* entry_value = entry->Value();
 
-					string key_string;
-					if ( key_json.is_string() )
-						key_string = key_json;
-					else
-						key_string = key_json.dump();
+					rapidjson::StringBuffer buffer;
+					NullDoubleWriter key_writer(buffer);
+					BuildJSON(key_writer, entry_key, only_loggable, re);
+					string key_str = buffer.GetString();
+					if ( key_str[0] == '"')
+						key_str = key_str.substr(1);
+					if ( key_str[key_str.length()-1] == '"')
+						key_str = key_str.substr(0, key_str.length()-1);
 
-					j.emplace(key_string, BuildJSON(entry_value, only_loggable, re));
+					BuildJSON(writer, entry_value, only_loggable, re, key_str);
 					}
 
 				Unref(entry_key);
 				Unref(lv);
 				}
 
+			if ( tval->Type()->IsSet() )
+				writer.EndArray();
+			else
+				writer.EndObject();
+
 			break;
 			}
 
 		case TYPE_RECORD:
 			{
-			j = ZeekJson::object();
+			writer.StartObject();
+
 			auto* rval = val->AsRecordVal();
 			auto rt = rval->Type()->AsRecordType();
 
 			for ( auto i = 0; i < rt->NumFields(); ++i )
 				{
-				auto field_name = rt->FieldName(i);
-				std::string key_string;
-
-				if ( re && re->MatchAnywhere(field_name) != 0 )
-					{
-					StringVal blank("");
-					StringVal fn_val(field_name);
-					auto key_val = fn_val.Substitute(re, &blank, 0)->AsStringVal();
-					key_string = key_val->ToStdString();
-					Unref(key_val);
-					}
-				else
-					key_string = field_name;
-
 				Val* value = rval->LookupWithDefault(i);
 
 				if ( value && ( ! only_loggable || rt->FieldHasAttr(i, ATTR_LOG) ) )
-					j.emplace(key_string, BuildJSON(value, only_loggable, re));
+					{
+					string key_str;
+					auto field_name = rt->FieldName(i);
+
+					if ( re && re->MatchAnywhere(field_name) != 0 )
+						{
+						StringVal blank("");
+						StringVal fn_val(field_name);
+						auto key_val = fn_val.Substitute(re, &blank, 0)->AsStringVal();
+						key_str = key_val->ToStdString();
+						Unref(key_val);
+						}
+					else
+						key_str = field_name;
+
+					BuildJSON(writer, value, only_loggable, re, key_str);
+					}
 
 				Unref(value);
 				}
 
+			writer.EndObject();
 			break;
 			}
 
 		case TYPE_LIST:
 			{
-			j = ZeekJson::array();
+			writer.StartArray();
+
 			auto* lval = val->AsListVal();
 			size_t size = lval->Length();
 			for (size_t i = 0; i < size; i++)
-				j.push_back(BuildJSON(lval->Index(i), only_loggable, re));
+				BuildJSON(writer, lval->Index(i), only_loggable, re);
 
+			writer.EndArray();
 			break;
 			}
 
 		case TYPE_VECTOR:
 			{
-			j = ZeekJson::array();
+			writer.StartArray();
+
 			auto* vval = val->AsVectorVal();
 			size_t size = vval->SizeVal()->AsCount();
 			for (size_t i = 0; i < size; i++)
-				j.push_back(BuildJSON(vval->Lookup(i), only_loggable, re));
+				BuildJSON(writer, vval->Lookup(i), only_loggable, re);
 
+			writer.EndArray();
 			break;
 			}
 
 		case TYPE_OPAQUE:
 			{
+			writer.StartObject();
+
+			writer.Key("opaque_type");
 			auto* oval = val->AsOpaqueVal();
-			j = { { "opaque_type", OpaqueMgr::mgr()->TypeID(oval) } };
+			writer.String(OpaqueMgr::mgr()->TypeID(oval));
+
+			writer.EndObject();
 			break;
 			}
 
-		default: break;
+		default:
+		  writer.Null();
+		  break;
 		}
-
-	return j;
 	}
 
 StringVal* Val::ToJSON(bool only_loggable, RE_Matcher* re)
 	{
-	ZeekJson j = BuildJSON(this, only_loggable, re);
-	return new StringVal(j.dump());
+	rapidjson::StringBuffer buffer;
+	NullDoubleWriter writer(buffer);
+
+	BuildJSON(writer, this, only_loggable, re, "");
+
+	return new StringVal(buffer.GetString());
 	}
 
 IntervalVal::IntervalVal(double quantity, double units) :
diff --git a/src/threading/formatters/JSON.cc b/src/threading/formatters/JSON.cc
index 919ecd4a2b..6944af81cd 100644
--- a/src/threading/formatters/JSON.cc
+++ b/src/threading/formatters/JSON.cc
@@ -12,9 +12,18 @@
 #include <stdint.h>
 
 #include "JSON.h"
+#include "3rdparty/rapidjson/include/rapidjson/internal/ieee754.h"
 
 using namespace threading::formatter;
 
+bool JSON::NullDoubleWriter::Double(double d)
+	{
+	if ( rapidjson::internal::Double(d).IsNanOrInf() )
+		return rapidjson::Writer<rapidjson::StringBuffer>::Null();
+
+	return rapidjson::Writer<rapidjson::StringBuffer>::Double(d);
+	}
+
 JSON::JSON(MsgThread* t, TimeFormat tf) : Formatter(t), surrounding_braces(true)
 	{
 	timestamps = tf;
@@ -27,21 +36,19 @@ JSON::~JSON()
 bool JSON::Describe(ODesc* desc, int num_fields, const Field* const * fields,
                     Value** vals) const
 	{
-	ZeekJson j = ZeekJson::object();
+	rapidjson::StringBuffer buffer;
+	NullDoubleWriter writer(buffer);
+
+	writer.StartObject();
 
 	for ( int i = 0; i < num_fields; i++ )
 		{
 		if ( vals[i]->present )
-			{
-			ZeekJson new_entry = BuildJSON(vals[i]);
-			if ( new_entry.is_null() )
-				return false;
-
-			j.emplace(fields[i]->name, new_entry);
-			}
+			BuildJSON(writer, vals[i], fields[i]->name);
 		}
 
-	desc->Add(j.dump());
+	writer.EndObject();
+	desc->Add(buffer.GetString());
 
 	return true;
 	}
@@ -54,14 +61,18 @@ bool JSON::Describe(ODesc* desc, Value* val, const string& name) const
 		return false;
 		}
 
-	if ( ! val->present )
+	if ( ! val->present || name.empty() )
 		return true;
 
-	ZeekJson j = BuildJSON(val, name);
-	if ( j.is_null() )
-		return false;
+	rapidjson::Document doc;
+	rapidjson::StringBuffer buffer;
+	NullDoubleWriter writer(buffer);
 
-	desc->Add(j.dump());
+	writer.StartObject();
+	BuildJSON(writer, val, name);
+	writer.EndObject();
+
+	desc->Add(buffer.GetString());
 	return true;
 	}
 
@@ -71,47 +82,56 @@ threading::Value* JSON::ParseValue(const string& s, const string& name, TypeTag
 	return nullptr;
 	}
 
-ZeekJson JSON::BuildJSON(Value* val, const string& name) const
+void JSON::BuildJSON(NullDoubleWriter& writer, Value* val, const string& name) const
 	{
-	// If the value wasn't set, return a nullptr. This will get turned into a 'null' in the json output.
 	if ( ! val->present )
-		return nullptr;
+		{
+		writer.Null();
+		return;
+		}
 
-	ZeekJson j;
 	switch ( val->type )
 		{
 		case TYPE_BOOL:
-			j = val->val.int_val != 0;
+			if ( ! name.empty() ) writer.Key(name);
+			writer.Bool(val->val.int_val != 0);
 			break;
 
 		case TYPE_INT:
-			j = val->val.int_val;
+			if ( ! name.empty() ) writer.Key(name);
+			writer.Int64(val->val.int_val);
 			break;
 
 		case TYPE_COUNT:
 		case TYPE_COUNTER:
-			j = val->val.uint_val;
+			if ( ! name.empty() ) writer.Key(name);
+			writer.Uint64(val->val.uint_val);
 			break;
 
 		case TYPE_PORT:
-			j = val->val.port_val.port;
+			if ( ! name.empty() ) writer.Key(name);
+			writer.Uint64(val->val.port_val.port);
 			break;
 
 		case TYPE_SUBNET:
-			j = Formatter::Render(val->val.subnet_val);
+			if ( ! name.empty() ) writer.Key(name);
+			writer.String(Formatter::Render(val->val.subnet_val));
 			break;
 
 		case TYPE_ADDR:
-			j = Formatter::Render(val->val.addr_val);
+			if ( ! name.empty() ) writer.Key(name);
+			writer.String(Formatter::Render(val->val.addr_val));
 			break;
 
 		case TYPE_DOUBLE:
 		case TYPE_INTERVAL:
-			j = val->val.double_val;
+			if ( ! name.empty() ) writer.Key(name);
+			writer.Double(val->val.double_val);
 			break;
 
 		case TYPE_TIME:
 			{
+			if ( ! name.empty() ) writer.Key(name);
 			if ( timestamps == TS_ISO8601 )
 				{
 				char buffer[40];
@@ -125,7 +145,7 @@ ZeekJson JSON::BuildJSON(Value* val, const string& name) const
 					GetThread()->Error(GetThread()->Fmt("json formatter: failure getting time: (%lf)", val->val.double_val));
 					// This was a failure, doesn't really matter what gets put here
 					// but it should probably stand out...
-					j = "2000-01-01T00:00:00.000000";
+					writer.String("2000-01-01T00:00:00.000000");
 					}
 				else
 					{
@@ -136,17 +156,17 @@ ZeekJson JSON::BuildJSON(Value* val, const string& name) const
 						frac += 1;
 
 					snprintf(buffer2, sizeof(buffer2), "%s.%06.0fZ", buffer, fabs(frac) * 1000000);
-					j = buffer2;
+					writer.String(buffer2, strlen(buffer2));
 					}
 				}
 
 			else if ( timestamps == TS_EPOCH )
-				j = val->val.double_val;
+				writer.Double(val->val.double_val);
 
 			else if ( timestamps == TS_MILLIS )
 				{
 				// ElasticSearch uses milliseconds for timestamps
-				j = (uint64_t) (val->val.double_val * 1000);
+				writer.Uint64((uint64_t) (val->val.double_val * 1000));
 				}
 
 			break;
@@ -157,36 +177,37 @@ ZeekJson JSON::BuildJSON(Value* val, const string& name) const
 		case TYPE_FILE:
 		case TYPE_FUNC:
 			{
-			j = json_escape_utf8(string(val->val.string_val.data, val->val.string_val.length));
+			if ( ! name.empty() ) writer.Key(name);
+			writer.String(json_escape_utf8(string(val->val.string_val.data, val->val.string_val.length)));
 			break;
 			}
 
 		case TYPE_TABLE:
 			{
-			j = ZeekJson::array();
+			if ( ! name.empty() ) writer.Key(name);
+			writer.StartArray();
 
 			for ( int idx = 0; idx < val->val.set_val.size; idx++ )
-				j.push_back(BuildJSON(val->val.set_val.vals[idx]));
+				BuildJSON(writer, val->val.set_val.vals[idx]);
 
+			writer.EndArray();
 			break;
 			}
 
 		case TYPE_VECTOR:
 			{
-			j = ZeekJson::array();
+			if ( ! name.empty() ) writer.Key(name);
+			writer.StartArray();
 
 			for ( int idx = 0; idx < val->val.vector_val.size; idx++ )
-				j.push_back(BuildJSON(val->val.vector_val.vals[idx]));
+				BuildJSON(writer, val->val.vector_val.vals[idx]);
 
+			writer.EndArray();
 			break;
 			}
 
 		default:
+			reporter->Warning("Unhandled type in JSON::BuildJSON");
 			break;
 		}
-
-	if ( ! name.empty() && ! j.is_null() )
-		return { { name, j } };
-
-	return j;
 	}
diff --git a/src/threading/formatters/JSON.h b/src/threading/formatters/JSON.h
index 71edadc61d..e640264a3a 100644
--- a/src/threading/formatters/JSON.h
+++ b/src/threading/formatters/JSON.h
@@ -2,24 +2,14 @@
 
 #pragma once
 
-#include "../Formatter.h"
-#include "3rdparty/json.hpp"
-#include "3rdparty/tsl-ordered-map/ordered_map.h"
+#define RAPIDJSON_HAS_STDSTRING 1
+#include "3rdparty/rapidjson/include/rapidjson/document.h"
+#include "3rdparty/rapidjson/include/rapidjson/writer.h"
 
+#include "../Formatter.h"
 
 namespace threading { namespace formatter {
 
-// Define a class for use with the json library that orders the keys in the same order that
-// they were inserted. By default, the json library orders them alphabetically and we don't
-// want it like that.
-template<class Key, class T, class Ignore, class Allocator,
-         class Hash = std::hash<Key>, class KeyEqual = std::equal_to<Key>,
-         class AllocatorPair = typename std::allocator_traits<Allocator>::template rebind_alloc<std::pair<Key, T>>,
-         class ValueTypeContainer = std::vector<std::pair<Key, T>, AllocatorPair>>
-using ordered_map = tsl::ordered_map<Key, T, Hash, KeyEqual, AllocatorPair, ValueTypeContainer>;
-
-using ZeekJson = nlohmann::basic_json<ordered_map>;
-
 /**
   * A thread-safe class for converting values into a JSON representation
   * and vice versa.
@@ -42,7 +32,13 @@ public:
 
 private:
 
-	ZeekJson BuildJSON(Value* val, const string& name = "") const;
+	class NullDoubleWriter : public rapidjson::Writer<rapidjson::StringBuffer> {
+	public:
+		NullDoubleWriter(rapidjson::StringBuffer& stream) : rapidjson::Writer<rapidjson::StringBuffer>(stream) {}
+		bool Double(double d);
+	};
+
+	void BuildJSON(NullDoubleWriter& writer, Value* val, const string& name = "") const;
 
 	TimeFormat timestamps;
 	bool surrounding_braces;
diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.ascii-double/json.log b/testing/btest/Baseline/scripts.base.frameworks.logging.ascii-double/json.log
index bb0f950b13..99d353b197 100644
--- a/testing/btest/Baseline/scripts.base.frameworks.logging.ascii-double/json.log
+++ b/testing/btest/Baseline/scripts.base.frameworks.logging.ascii-double/json.log
@@ -9,11 +9,11 @@
 {"d":0.1234}
 {"d":50000.0}
 {"d":-50000.0}
-{"d":3.14e+15}
-{"d":-3.14e+15}
-{"d":1.79e+308}
-{"d":-1.79e+308}
-{"d":1.23456789e-05}
+{"d":3140000000000000.0}
+{"d":-3140000000000000.0}
+{"d":1.79e308}
+{"d":-1.79e308}
+{"d":0.0000123456789}
 {"d":2.23e-308}
 {"d":-2.23e-308}
 {"d":null}
diff --git a/testing/btest/Baseline/scripts.base.utils.json/output b/testing/btest/Baseline/scripts.base.utils.json/output
index 8757a51433..0794bd9587 100644
--- a/testing/btest/Baseline/scripts.base.utils.json/output
+++ b/testing/btest/Baseline/scripts.base.utils.json/output
@@ -2,7 +2,7 @@ true
 123
 -999
 3.14
--1.23456789e+308
+-1.23456789e308
 9e-308
 1480788576.868945
 "-12.0 hrs"

From ee0619f99905bfbdb7590010ed217e659bf78890 Mon Sep 17 00:00:00 2001
From: Tim Wojtulewicz <tim@corelight.com>
Date: Wed, 8 Jan 2020 12:06:52 -0700
Subject: [PATCH 3/5] Expand unit test for json_escape_utf8 to include all of
 the strings from the ascii-json-utf8 btest

---
 src/util.cc | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/src/util.cc b/src/util.cc
index 2b46e38f7b..16ec14515a 100644
--- a/src/util.cc
+++ b/src/util.cc
@@ -2190,6 +2190,60 @@ TEST_CASE("util json_escape_utf8")
 	CHECK(json_escape_utf8("string") == "string");
 	CHECK(json_escape_utf8("string\n") == "string\n");
 	CHECK(json_escape_utf8("string\x82") == "string\\x82");
+	CHECK(json_escape_utf8("\x07\xd4\xb7o") == "\\x07Էo");
+
+	// These strings are duplicated from the scripts.base.frameworks.logging.ascii-json-utf8 btest
+
+	// Valid ASCII and valid ASCII control characters
+	CHECK(json_escape_utf8("a") == "a");
+	CHECK(json_escape_utf8("\b\f\n\r\t\x00\x15") == "\b\f\n\r\t\x00\x15");
+
+	// Table 3-7 in https://www.unicode.org/versions/Unicode12.0.0/ch03.pdf describes what is
+	// valid and invalid for the tests below
+
+	// Valid 2 Octet Sequence
+	CHECK(json_escape_utf8("\xc3\xb1") == "\xc3\xb1");
+
+	// Invalid 2 Octet Sequence
+	CHECK(json_escape_utf8("\xc3\x28") == "\\xc3(");
+	CHECK(json_escape_utf8("\xc0\x81") == "\\xc0\\x81");
+	CHECK(json_escape_utf8("\xc1\x81") == "\\xc1\\x81");
+	CHECK(json_escape_utf8("\xc2\xcf") == "\\xc2\\xcf");
+
+	// Invalid Sequence Identifier
+	CHECK(json_escape_utf8("\xa0\xa1") == "\\xa0\\xa1");
+
+	// Valid 3 Octet Sequence
+	CHECK(json_escape_utf8("\xe2\x82\xa1") == "\xe2\x82\xa1");
+	CHECK(json_escape_utf8("\xe0\xa3\xa1") == "\xe0\xa3\xa1");
+
+	// Invalid 3 Octet Sequence (in 2nd Octet)
+	CHECK(json_escape_utf8("\xe0\x80\xa1") == "\\xe0\\x80\\xa1");
+	CHECK(json_escape_utf8("\xe2\x28\xa1") == "\\xe2(\\xa1");
+	CHECK(json_escape_utf8("\xed\xa0\xa1") == "\\xed\\xa0\\xa1");
+
+	// Invalid 3 Octet Sequence (in 3rd Octet)
+	CHECK(json_escape_utf8("\xe2\x82\x28") == "\\xe2\\x82(");
+
+	// Valid 4 Octet Sequence
+	CHECK(json_escape_utf8("\xf0\x90\x8c\xbc") == "\xf0\x90\x8c\xbc");
+	CHECK(json_escape_utf8("\xf1\x80\x8c\xbc") == "\xf1\x80\x8c\xbc");
+	CHECK(json_escape_utf8("\xf4\x80\x8c\xbc") == "\xf4\x80\x8c\xbc");
+
+	// Invalid 4 Octet Sequence (in 2nd Octet)
+	CHECK(json_escape_utf8("\xf0\x80\x8c\xbc") == "\\xf0\\x80\\x8c\\xbc");
+	CHECK(json_escape_utf8("\xf2\x28\x8c\xbc") == "\\xf2(\\x8c\\xbc");
+	CHECK(json_escape_utf8("\xf4\x90\x8c\xbc") == "\\xf4\\x90\\x8c\\xbc");
+
+	// Invalid 4 Octet Sequence (in 3rd Octet)
+	CHECK(json_escape_utf8("\xf0\x90\x28\xbc") == "\\xf0\\x90(\\xbc");
+
+	// Invalid 4 Octet Sequence (in 4th Octet)
+	CHECK(json_escape_utf8("\xf0\x28\x8c\x28") == "\\xf0(\\x8c(");
+
+	// Invalid 4 Octet Sequence (too short)
+	CHECK(json_escape_utf8("\xf4\x80\x8c") == "\\xf4\\x80\\x8c");
+	CHECK(json_escape_utf8("\xf0") == "\\xf0");
 	}
 
 string json_escape_utf8(const string& val)

From 23f551876cc21ce98d9d180d95f81ae6d28c9e13 Mon Sep 17 00:00:00 2001
From: Tim Wojtulewicz <tim@corelight.com>
Date: Wed, 8 Jan 2020 12:08:32 -0700
Subject: [PATCH 4/5] Optimize json_escape_utf8 a bit by removing repeated
 calls to string methods

---
 src/util.cc | 46 +++++++++++++++++++++++++---------------------
 src/util.h  |  2 +-
 2 files changed, 26 insertions(+), 22 deletions(-)

diff --git a/src/util.cc b/src/util.cc
index 16ec14515a..d7a699ab6e 100644
--- a/src/util.cc
+++ b/src/util.cc
@@ -2248,50 +2248,54 @@ TEST_CASE("util json_escape_utf8")
 
 string json_escape_utf8(const string& val)
 	{
-	string result;
-	result.reserve(val.length());
-
 	auto val_data = reinterpret_cast<const unsigned char*>(val.c_str());
+	auto val_size = val.length();
+
+	// Reserve at least the size of the existing string to avoid resizing the string in the best-case
+	// scenario where we don't have any multi-byte characters.
+	string result;
+	result.reserve(val_size);
 
 	size_t idx;
-	for ( idx = 0; idx < val.length(); )
+	for ( idx = 0; idx < val_size; )
 		{
-		// Normal ASCII characters plus a few of the control characters can be inserted directly. The rest of
-		// the control characters should be escaped as regular bytes.
-		if ( ( val[idx] >= 32 && val[idx] <= 127 ) ||
-		       val[idx] == '\b' || val[idx] == '\f' || val[idx] == '\n' || val[idx] == '\r' || val[idx] == '\t' )
+		char ch = val[idx];
+
+		// Normal ASCII characters plus a few of the control characters can be inserted directly. The
+		// rest of the control characters should be escaped as regular bytes.
+		if ( ( ch >= 32 && ch <= 127 ) ||
+		       ch == '\b' || ch == '\f' || ch == '\n' || ch == '\r' || ch == '\t' )
 			{
-			result.push_back(val[idx]);
+			result.push_back(ch);
 			++idx;
 			continue;
 			}
-		else if ( val[idx] >= 0 && val[idx] < 32 )
+		else if ( ch >= 0 && ch < 32 )
 			{
-			result.append(json_escape_byte(val[idx]));
+			result.append(json_escape_byte(ch));
 			++idx;
 			continue;
 			}
 
 		// Find out how long the next character should be.
-		unsigned int char_size = getNumBytesForUTF8(val[idx]);
+		unsigned int char_size = getNumBytesForUTF8(ch);
 
-		// If it says that it's a single character or it's not an invalid string UTF8 sequence, insert the one
-		// escaped byte into the string, step forward one, and go to the next character.
-		if ( char_size == 0 || idx+char_size > val.length() || isLegalUTF8Sequence(val_data+idx, val_data+idx+char_size) == 0 )
+		// If it says that it's a single character or it's not an valid string UTF8 sequence, insert
+		// the one escaped byte into the string, step forward one, and go to the next character.
+		if ( char_size == 0 || idx+char_size > val_size || isLegalUTF8Sequence(val_data+idx, val_data+idx+char_size) == 0 )
 			{
-			result.append(json_escape_byte(val[idx]));
+			result.append(json_escape_byte(ch));
 			++idx;
 			continue;
 			}
 
-		for ( size_t step = 0; step < char_size; step++, idx++ )
-			result.push_back(val[idx]);
+		result.append(val, idx, char_size);
+		idx += char_size;
 		}
 
 	// Insert any of the remaining bytes into the string as escaped bytes
-	if ( idx != val.length() )
-		for ( ; idx < val.length(); ++idx )
-			result.append(json_escape_byte(val[idx]));
+	for ( ; idx < val_size; ++idx )
+		result.append(json_escape_byte(val[idx]));
 
 	return result;
 	}
diff --git a/src/util.h b/src/util.h
index 3665518f96..5a5a8c5159 100644
--- a/src/util.h
+++ b/src/util.h
@@ -118,7 +118,7 @@ std::string extract_ip_and_len(const std::string& i, int* len);
 
 inline void bytetohex(unsigned char byte, char* hex_out)
 	{
-	static const char hex_chars[] = "0123456789abcdef";
+	static constexpr char hex_chars[] = "0123456789abcdef";
 	hex_out[0] = hex_chars[(byte & 0xf0) >> 4];
 	hex_out[1] = hex_chars[byte & 0x0f];
 	}

From 227d29db80f0769216de526a9fad83f6adfa5623 Mon Sep 17 00:00:00 2001
From: Tim Wojtulewicz <tim@corelight.com>
Date: Tue, 14 Jan 2020 08:29:32 -0700
Subject: [PATCH 5/5] Use the list of files from clang-tidy when searching for
 unit tests

The previous method for searching for these files included everything from src/3rdparty, which breaks when rapidjson is included. We don't want to include that directory anyways. We already had a good list of files to scan from the previous clang-tidy and adding any that are missing is an easy task.
---
 src/CMakeLists.txt | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index c64f9608fa..0a2cfa388a 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -438,9 +438,8 @@ create_clang_tidy_target()
 
 # Scan all .cc files for TEST_CASE macros and generate CTest targets.
 if (ENABLE_ZEEK_UNIT_TESTS)
-    file(GLOB_RECURSE all_cc_files "*.cc")
     set(test_cases "")
-    foreach (cc_file ${all_cc_files})
+    foreach (cc_file ${TIDY_SRCS})
         file (STRINGS ${cc_file} test_case_lines REGEX "TEST_CASE")
         foreach (line ${test_case_lines})
             string(REGEX REPLACE "TEST_CASE\\(\"(.+)\"\\)" "\\1" test_case "${line}")