Merge remote-tracking branch 'origin/topic/timw/595-rapidjson'

Tweaks: - Small change to the logic for removing quotes around strings. - Updated NEWS & COPYING.3rdparty - Use of intrusive_ptr for stack-allocated StringVals - Little bit of refactoring (I would love to merge the two BuildJSON() functions, too, but that's a larger task) * origin/topic/timw/595-rapidjson: Use the list of files from clang-tidy when searching for unit tests Optimize json_escape_utf8 a bit by removing repeated calls to string methods Expand unit test for json_escape_utf8 to include all of the strings from the ascii-json-utf8 btest GHI-595: Convert from nlohmann/json to rapidjson for performance reasons Convert type-checking macros to actual functions
2025-10-13 20:18:20 +00:00 · 2020-01-17 13:05:59 +00:00 · 2020-01-17 13:05:59 +00:00 · 8170baabef
commit 8170baabef
parent c8c6621a0e 227d29db80
12 changed files with 335 additions and 210 deletions
--- a/src/3rdparty
+++ b/src/3rdparty
@ -1 +1 @@
-Subproject commit 2b3206b7add3472ea0736f2841473e11d506a85e
+Subproject commit fae32236391d9117bf996e75d56ebd01ef076bc2
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -413,15 +413,14 @@ install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/
 )

 install(FILES
-        ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/json.hpp
        ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/sqlite3.h
        DESTINATION include/zeek/3rdparty
 )

 install(FILES
-        ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/tsl-ordered-map/ordered_map.h
-        ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/tsl-ordered-map/ordered_hash.h
-        DESTINATION include/zeek/3rdparty/tsl-ordered-map
+        ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/rapidjson/include/rapidjson/document.h
+        ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/rapidjson/include/rapidjson/writer.h
+        DESTINATION include/zeek/3rdparty/rapidjson/include/rapidjson
 )

 ########################################################################
@ -439,9 +438,8 @@ create_clang_tidy_target()

 # Scan all .cc files for TEST_CASE macros and generate CTest targets.
 if (ENABLE_ZEEK_UNIT_TESTS)
-    file(GLOB_RECURSE all_cc_files "*.cc")
    set(test_cases "")
-    foreach (cc_file ${all_cc_files})
+    foreach (cc_file ${TIDY_SRCS})
        file (STRINGS ${cc_file} test_case_lines REGEX "TEST_CASE")
        foreach (line ${test_case_lines})
            string(REGEX REPLACE "TEST_CASE\\(\"(.+)\"\\)" "\\1" test_case "${line}")
--- a/src/Type.h
+++ b/src/Type.h
@ -698,53 +698,53 @@ extern BroType* init_type(Expr* init);
 // Returns true if argument is an atomic type.
 bool is_atomic_type(const BroType* t);

-// True if the given type tag corresponds to an integral type.
-#define IsIntegral(t)	(t == TYPE_INT || t == TYPE_COUNT || t == TYPE_COUNTER)
-
-// True if the given type tag corresponds to an arithmetic type.
-#define IsArithmetic(t)	(IsIntegral(t) || t == TYPE_DOUBLE)
-
-// True if the given type tag corresponds to a boolean type.
-#define IsBool(t)	(t == TYPE_BOOL)
-
-// True if the given type tag corresponds to an interval type.
-#define IsInterval(t)	(t == TYPE_INTERVAL)
-
-// True if the given type tag corresponds to a record type.
-#define IsRecord(t)	(t == TYPE_RECORD || t == TYPE_UNION)
-
-// True if the given type tag corresponds to a function type.
-#define IsFunc(t)	(t == TYPE_FUNC)
-
-// True if the given type type is a vector.
-#define IsVector(t)	(t == TYPE_VECTOR)
-
-// True if the given type type is a string.
-#define IsString(t)	(t == TYPE_STRING)
-
 // True if the given type tag corresponds to type that can be assigned to.
 extern int is_assignable(BroType* t);

+// True if the given type tag corresponds to an integral type.
+inline bool IsIntegral(TypeTag t) { return (t == TYPE_INT || t == TYPE_COUNT || t == TYPE_COUNTER); }
+
+// True if the given type tag corresponds to an arithmetic type.
+inline bool IsArithmetic(TypeTag t)	{ return (IsIntegral(t) || t == TYPE_DOUBLE); }
+
+// True if the given type tag corresponds to a boolean type.
+inline bool IsBool(TypeTag t)	{ return (t == TYPE_BOOL); }
+
+// True if the given type tag corresponds to an interval type.
+inline bool IsInterval(TypeTag t)	{ return (t == TYPE_INTERVAL); }
+
+// True if the given type tag corresponds to a record type.
+inline bool IsRecord(TypeTag t)	{ return (t == TYPE_RECORD || t == TYPE_UNION); }
+
+// True if the given type tag corresponds to a function type.
+inline bool IsFunc(TypeTag t)	{ return (t == TYPE_FUNC); }
+
+// True if the given type type is a vector.
+inline bool IsVector(TypeTag t)	{ return (t == TYPE_VECTOR); }
+
+// True if the given type type is a string.
+inline bool IsString(TypeTag t)	{ return (t == TYPE_STRING); }
+
 // True if the given type tag corresponds to the error type.
-#define IsErrorType(t)	(t == TYPE_ERROR)
+inline bool IsErrorType(TypeTag t)	{ return (t == TYPE_ERROR); }

 // True if both tags are integral types.
-#define BothIntegral(t1, t2) (IsIntegral(t1) && IsIntegral(t2))
+inline bool BothIntegral(TypeTag t1, TypeTag t2) { return (IsIntegral(t1) && IsIntegral(t2)); }

 // True if both tags are arithmetic types.
-#define BothArithmetic(t1, t2) (IsArithmetic(t1) && IsArithmetic(t2))
+inline bool BothArithmetic(TypeTag t1, TypeTag t2) { return (IsArithmetic(t1) && IsArithmetic(t2)); }

 // True if either tags is an arithmetic type.
-#define EitherArithmetic(t1, t2) (IsArithmetic(t1) || IsArithmetic(t2))
+inline bool EitherArithmetic(TypeTag t1, TypeTag t2) { return (IsArithmetic(t1) || IsArithmetic(t2)); }

 // True if both tags are boolean types.
-#define BothBool(t1, t2) (IsBool(t1) && IsBool(t2))
+inline bool BothBool(TypeTag t1, TypeTag t2) { return (IsBool(t1) && IsBool(t2)); }

 // True if both tags are interval types.
-#define BothInterval(t1, t2) (IsInterval(t1) && IsInterval(t2))
+inline bool BothInterval(TypeTag t1, TypeTag t2) { return (IsInterval(t1) && IsInterval(t2)); }

 // True if both tags are string types.
-#define BothString(t1, t2) (IsString(t1) && IsString(t2))
+inline bool BothString(TypeTag t1, TypeTag t2) { return (IsString(t1) && IsString(t2)); }

 // True if either tag is the error type.
-#define EitherError(t1, t2) (IsErrorType(t1) || IsErrorType(t2))
+inline bool EitherError(TypeTag t1, TypeTag t2) { return (IsErrorType(t1) || IsErrorType(t2)); }
--- a/src/Val.cc
+++ b/src/Val.cc
@ -27,20 +27,7 @@

 #include "broker/Data.h"

-#include "3rdparty/json.hpp"
-#include "3rdparty/tsl-ordered-map/ordered_map.h"
-
-
-// Define a class for use with the json library that orders the keys in the same order that
-// they were inserted. By default, the json library orders them alphabetically and we don't
-// want it like that.
-template<class Key, class T, class Ignore, class Allocator,
-         class Hash = std::hash<Key>, class KeyEqual = std::equal_to<Key>,
-         class AllocatorPair = typename std::allocator_traits<Allocator>::template rebind_alloc<std::pair<Key, T>>,
-         class ValueTypeContainer = std::vector<std::pair<Key, T>, AllocatorPair>>
-using ordered_map = tsl::ordered_map<Key, T, Hash, KeyEqual, AllocatorPair, ValueTypeContainer>;
-
-using ZeekJson = nlohmann::basic_json<ordered_map>;
+#include "threading/formatters/JSON.h"

 Val::Val(Func* f)
 	{
@ -433,46 +420,56 @@ TableVal* Val::GetRecordFields()
 	return rt->GetRecordFieldsVal(rv);
 	}

-// This is a static method in this file to avoid including json.hpp in Val.h since it's huge.
-static ZeekJson BuildJSON(Val* val, bool only_loggable=false, RE_Matcher* re=nullptr)
+// This is a static method in this file to avoid including rapidjson's headers in Val.h because they're huge.
+static void BuildJSON(threading::formatter::JSON::NullDoubleWriter& writer, Val* val, bool only_loggable=false, RE_Matcher* re=nullptr, const string& key="")
 	{
-	// If the value wasn't set, return a nullptr. This will get turned into a 'null' in the json output.
-	if ( ! val )
-		return nullptr;
+	if ( !key.empty() )
+		writer.Key(key);

-	ZeekJson j;
+	// If the value wasn't set, write a null into the stream and return.
+	if ( ! val )
+		{
+		writer.Null();
+		return;
+		}
+
+	rapidjson::Value j;
 	BroType* type = val->Type();
 	switch ( type->Tag() )
 		{
 		case TYPE_BOOL:
-			j = val->AsBool();
+			writer.Bool(val->AsBool());
 			break;

 		case TYPE_INT:
-			j = val->AsInt();
+			writer.Int64(val->AsInt());
 			break;

 		case TYPE_COUNT:
-			j = val->AsCount();
+			writer.Uint64(val->AsCount());
 			break;

 		case TYPE_COUNTER:
-			j = val->AsCounter();
+			writer.Uint64(val->AsCounter());
 			break;

 		case TYPE_TIME:
-			j = val->AsTime();
+			writer.Double(val->AsTime());
 			break;

 		case TYPE_DOUBLE:
-			j = val->AsDouble();
+			writer.Double(val->AsDouble());
 			break;

 		case TYPE_PORT:
 			{
 			auto* pval = val->AsPortVal();
-			j.emplace("port", pval->Port());
-			j.emplace("proto", pval->Protocol());
+			writer.StartObject();
+			writer.Key("port");
+			writer.Int64(pval->Port());
+			writer.Key("proto");
+			writer.String(pval->Protocol());
+			writer.EndObject();
 			break;
 			}

@ -484,7 +481,7 @@ static ZeekJson BuildJSON(Val* val, bool only_loggable=false, RE_Matcher* re=nul
 			ODesc d;
 			d.SetStyle(RAW_STYLE);
 			val->Describe(&d);
-			j = string(reinterpret_cast<const char*>(d.Bytes()), d.Len());
+			writer.String(reinterpret_cast<const char*>(d.Bytes()), d.Len());
 			break;
 			}

@ -496,7 +493,7 @@ static ZeekJson BuildJSON(Val* val, bool only_loggable=false, RE_Matcher* re=nul
 			ODesc d;
 			d.SetStyle(RAW_STYLE);
 			val->Describe(&d);
-			j = json_escape_utf8(string(reinterpret_cast<const char*>(d.Bytes()), d.Len()));
+			writer.String(json_escape_utf8(string(reinterpret_cast<const char*>(d.Bytes()), d.Len())));
 			break;
 			}

@ -506,9 +503,9 @@ static ZeekJson BuildJSON(Val* val, bool only_loggable=false, RE_Matcher* re=nul
 			auto* tval = val->AsTableVal();

 			if ( tval->Type()->IsSet() )
-				j = ZeekJson::array();
+				writer.StartArray();
 			else
-				j = ZeekJson::object();
+				writer.StartObject();

 			HashKey* k;
 			TableEntryVal* entry;
@ -524,102 +521,125 @@ static ZeekJson BuildJSON(Val* val, bool only_loggable=false, RE_Matcher* re=nul
 				else
 					entry_key = lv->Ref();

-				ZeekJson key_json = BuildJSON(entry_key, only_loggable, re);
-
 				if ( tval->Type()->IsSet() )
-					j.emplace_back(std::move(key_json));
+					BuildJSON(writer, entry_key, only_loggable, re);
 				else
 					{
-					Val* entry_value = entry->Value();
+					rapidjson::StringBuffer buffer;
+					threading::formatter::JSON::NullDoubleWriter key_writer(buffer);
+					BuildJSON(key_writer, entry_key, only_loggable, re);
+					string key_str = buffer.GetString();

-					string key_string;
-					if ( key_json.is_string() )
-						key_string = key_json;
-					else
-						key_string = key_json.dump();
+					if ( key_str.length() >= 2 &&
+					     key_str[0] == '"' &&
+					     key_str[key_str.length() - 1] == '"' )
+						// Strip quotes.
+						key_str = key_str.substr(1, key_str.length() - 2);

-					j.emplace(key_string, BuildJSON(entry_value, only_loggable, re));
+					BuildJSON(writer, entry->Value(), only_loggable, re, key_str);
 					}

 				Unref(entry_key);
 				Unref(lv);
 				}

+			if ( tval->Type()->IsSet() )
+				writer.EndArray();
+			else
+				writer.EndObject();
+
 			break;
 			}

 		case TYPE_RECORD:
 			{
-			j = ZeekJson::object();
+			writer.StartObject();
+
 			auto* rval = val->AsRecordVal();
 			auto rt = rval->Type()->AsRecordType();

 			for ( auto i = 0; i < rt->NumFields(); ++i )
 				{
-				auto field_name = rt->FieldName(i);
-				std::string key_string;
-
-				if ( re && re->MatchAnywhere(field_name) != 0 )
-					{
-					StringVal blank("");
-					StringVal fn_val(field_name);
-					auto key_val = fn_val.Substitute(re, &blank, 0)->AsStringVal();
-					key_string = key_val->ToStdString();
-					Unref(key_val);
-					}
-				else
-					key_string = field_name;
-
 				Val* value = rval->LookupWithDefault(i);

 				if ( value && ( ! only_loggable || rt->FieldHasAttr(i, ATTR_LOG) ) )
-					j.emplace(key_string, BuildJSON(value, only_loggable, re));
+					{
+					string key_str;
+					auto field_name = rt->FieldName(i);
+
+					if ( re && re->MatchAnywhere(field_name) != 0 )
+						{
+						auto blank = make_intrusive<StringVal>("");
+						auto fn_val = make_intrusive<StringVal>(field_name);
+						auto key_val = fn_val->Substitute(re, blank.get(), 0)->AsStringVal();
+						key_str = key_val->ToStdString();
+						Unref(key_val);
+						}
+					else
+						key_str = field_name;
+
+					BuildJSON(writer, value, only_loggable, re, key_str);
+					}

 				Unref(value);
 				}

+			writer.EndObject();
 			break;
 			}

 		case TYPE_LIST:
 			{
-			j = ZeekJson::array();
+			writer.StartArray();
+
 			auto* lval = val->AsListVal();
 			size_t size = lval->Length();
 			for (size_t i = 0; i < size; i++)
-				j.push_back(BuildJSON(lval->Index(i), only_loggable, re));
+				BuildJSON(writer, lval->Index(i), only_loggable, re);

+			writer.EndArray();
 			break;
 			}

 		case TYPE_VECTOR:
 			{
-			j = ZeekJson::array();
+			writer.StartArray();
+
 			auto* vval = val->AsVectorVal();
 			size_t size = vval->SizeVal()->AsCount();
 			for (size_t i = 0; i < size; i++)
-				j.push_back(BuildJSON(vval->Lookup(i), only_loggable, re));
+				BuildJSON(writer, vval->Lookup(i), only_loggable, re);

+			writer.EndArray();
 			break;
 			}

 		case TYPE_OPAQUE:
 			{
+			writer.StartObject();
+
+			writer.Key("opaque_type");
 			auto* oval = val->AsOpaqueVal();
-			j = { { "opaque_type", OpaqueMgr::mgr()->TypeID(oval) } };
+			writer.String(OpaqueMgr::mgr()->TypeID(oval));
+
+			writer.EndObject();
 			break;
 			}

-		default: break;
+		default:
+		  writer.Null();
+		  break;
 		}
-
-	return j;
 	}

 StringVal* Val::ToJSON(bool only_loggable, RE_Matcher* re)
 	{
-	ZeekJson j = BuildJSON(this, only_loggable, re);
-	return new StringVal(j.dump());
+	rapidjson::StringBuffer buffer;
+	threading::formatter::JSON::NullDoubleWriter writer(buffer);
+
+	BuildJSON(writer, this, only_loggable, re, "");
+
+	return new StringVal(buffer.GetString());
 	}

 IntervalVal::IntervalVal(double quantity, double units) :
--- a/src/threading/formatters/JSON.cc
+++ b/src/threading/formatters/JSON.cc
@ -12,9 +12,18 @@
 #include <stdint.h>

 #include "JSON.h"
+#include "3rdparty/rapidjson/include/rapidjson/internal/ieee754.h"

 using namespace threading::formatter;

+bool JSON::NullDoubleWriter::Double(double d)
+	{
+	if ( rapidjson::internal::Double(d).IsNanOrInf() )
+		return rapidjson::Writer<rapidjson::StringBuffer>::Null();
+
+	return rapidjson::Writer<rapidjson::StringBuffer>::Double(d);
+	}
+
 JSON::JSON(MsgThread* t, TimeFormat tf) : Formatter(t), surrounding_braces(true)
 	{
 	timestamps = tf;
@ -27,21 +36,19 @@ JSON::~JSON()
 bool JSON::Describe(ODesc* desc, int num_fields, const Field* const * fields,
                    Value** vals) const
 	{
-	ZeekJson j = ZeekJson::object();
+	rapidjson::StringBuffer buffer;
+	NullDoubleWriter writer(buffer);
+
+	writer.StartObject();

 	for ( int i = 0; i < num_fields; i++ )
 		{
 		if ( vals[i]->present )
-			{
-			ZeekJson new_entry = BuildJSON(vals[i]);
-			if ( new_entry.is_null() )
-				return false;
-
-			j.emplace(fields[i]->name, new_entry);
-			}
+			BuildJSON(writer, vals[i], fields[i]->name);
 		}

-	desc->Add(j.dump());
+	writer.EndObject();
+	desc->Add(buffer.GetString());

 	return true;
 	}
@ -54,14 +61,18 @@ bool JSON::Describe(ODesc* desc, Value* val, const string& name) const
 		return false;
 		}

-	if ( ! val->present )
+	if ( ! val->present || name.empty() )
 		return true;

-	ZeekJson j = BuildJSON(val, name);
-	if ( j.is_null() )
-		return false;
+	rapidjson::Document doc;
+	rapidjson::StringBuffer buffer;
+	NullDoubleWriter writer(buffer);

-	desc->Add(j.dump());
+	writer.StartObject();
+	BuildJSON(writer, val, name);
+	writer.EndObject();
+
+	desc->Add(buffer.GetString());
 	return true;
 	}

@ -71,43 +82,47 @@ threading::Value* JSON::ParseValue(const string& s, const string& name, TypeTag
 	return nullptr;
 	}

-ZeekJson JSON::BuildJSON(Value* val, const string& name) const
+void JSON::BuildJSON(NullDoubleWriter& writer, Value* val, const string& name) const
 	{
-	// If the value wasn't set, return a nullptr. This will get turned into a 'null' in the json output.
 	if ( ! val->present )
-		return nullptr;
+		{
+		writer.Null();
+		return;
+		}
+
+	if ( ! name.empty() )
+		writer.Key(name);

-	ZeekJson j;
 	switch ( val->type )
 		{
 		case TYPE_BOOL:
-			j = val->val.int_val != 0;
+			writer.Bool(val->val.int_val != 0);
 			break;

 		case TYPE_INT:
-			j = val->val.int_val;
+			writer.Int64(val->val.int_val);
 			break;

 		case TYPE_COUNT:
 		case TYPE_COUNTER:
-			j = val->val.uint_val;
+			writer.Uint64(val->val.uint_val);
 			break;

 		case TYPE_PORT:
-			j = val->val.port_val.port;
+			writer.Uint64(val->val.port_val.port);
 			break;

 		case TYPE_SUBNET:
-			j = Formatter::Render(val->val.subnet_val);
+			writer.String(Formatter::Render(val->val.subnet_val));
 			break;

 		case TYPE_ADDR:
-			j = Formatter::Render(val->val.addr_val);
+			writer.String(Formatter::Render(val->val.addr_val));
 			break;

 		case TYPE_DOUBLE:
 		case TYPE_INTERVAL:
-			j = val->val.double_val;
+			writer.Double(val->val.double_val);
 			break;

 		case TYPE_TIME:
@ -125,7 +140,7 @@ ZeekJson JSON::BuildJSON(Value* val, const string& name) const
 					GetThread()->Error(GetThread()->Fmt("json formatter: failure getting time: (%lf)", val->val.double_val));
 					// This was a failure, doesn't really matter what gets put here
 					// but it should probably stand out...
-					j = "2000-01-01T00:00:00.000000";
+					writer.String("2000-01-01T00:00:00.000000");
 					}
 				else
 					{
@ -136,17 +151,17 @@ ZeekJson JSON::BuildJSON(Value* val, const string& name) const
 						frac += 1;

 					snprintf(buffer2, sizeof(buffer2), "%s.%06.0fZ", buffer, fabs(frac) * 1000000);
-					j = buffer2;
+					writer.String(buffer2, strlen(buffer2));
 					}
 				}

 			else if ( timestamps == TS_EPOCH )
-				j = val->val.double_val;
+				writer.Double(val->val.double_val);

 			else if ( timestamps == TS_MILLIS )
 				{
 				// ElasticSearch uses milliseconds for timestamps
-				j = (uint64_t) (val->val.double_val * 1000);
+				writer.Uint64((uint64_t) (val->val.double_val * 1000));
 				}

 			break;
@ -157,36 +172,34 @@ ZeekJson JSON::BuildJSON(Value* val, const string& name) const
 		case TYPE_FILE:
 		case TYPE_FUNC:
 			{
-			j = json_escape_utf8(string(val->val.string_val.data, val->val.string_val.length));
+			writer.String(json_escape_utf8(string(val->val.string_val.data, val->val.string_val.length)));
 			break;
 			}

 		case TYPE_TABLE:
 			{
-			j = ZeekJson::array();
+			writer.StartArray();

 			for ( int idx = 0; idx < val->val.set_val.size; idx++ )
-				j.push_back(BuildJSON(val->val.set_val.vals[idx]));
+				BuildJSON(writer, val->val.set_val.vals[idx]);

+			writer.EndArray();
 			break;
 			}

 		case TYPE_VECTOR:
 			{
-			j = ZeekJson::array();
+			writer.StartArray();

 			for ( int idx = 0; idx < val->val.vector_val.size; idx++ )
-				j.push_back(BuildJSON(val->val.vector_val.vals[idx]));
+				BuildJSON(writer, val->val.vector_val.vals[idx]);

+			writer.EndArray();
 			break;
 			}

 		default:
+			reporter->Warning("Unhandled type in JSON::BuildJSON");
 			break;
 		}
-
-	if ( ! name.empty() && ! j.is_null() )
-		return { { name, j } };
-
-	return j;
 	}
--- a/src/threading/formatters/JSON.h
+++ b/src/threading/formatters/JSON.h
@ -2,24 +2,14 @@

 #pragma once

-#include "../Formatter.h"
-#include "3rdparty/json.hpp"
-#include "3rdparty/tsl-ordered-map/ordered_map.h"
+#define RAPIDJSON_HAS_STDSTRING 1
+#include "3rdparty/rapidjson/include/rapidjson/document.h"
+#include "3rdparty/rapidjson/include/rapidjson/writer.h"

+#include "../Formatter.h"

 namespace threading { namespace formatter {

-// Define a class for use with the json library that orders the keys in the same order that
-// they were inserted. By default, the json library orders them alphabetically and we don't
-// want it like that.
-template<class Key, class T, class Ignore, class Allocator,
-         class Hash = std::hash<Key>, class KeyEqual = std::equal_to<Key>,
-         class AllocatorPair = typename std::allocator_traits<Allocator>::template rebind_alloc<std::pair<Key, T>>,
-         class ValueTypeContainer = std::vector<std::pair<Key, T>, AllocatorPair>>
-using ordered_map = tsl::ordered_map<Key, T, Hash, KeyEqual, AllocatorPair, ValueTypeContainer>;
-
-using ZeekJson = nlohmann::basic_json<ordered_map>;
-
 /**
  * A thread-safe class for converting values into a JSON representation
  * and vice versa.
@ -40,9 +30,14 @@ public:
 	                      threading::Value** vals) const override;
 	threading::Value* ParseValue(const string& s, const string& name, TypeTag type, TypeTag subtype = TYPE_ERROR) const override;

-private:
+	class NullDoubleWriter : public rapidjson::Writer<rapidjson::StringBuffer> {
+	public:
+		NullDoubleWriter(rapidjson::StringBuffer& stream) : rapidjson::Writer<rapidjson::StringBuffer>(stream) {}
+		bool Double(double d);
+	};

-	ZeekJson BuildJSON(Value* val, const string& name = "") const;
+private:
+	void BuildJSON(NullDoubleWriter& writer, Value* val, const string& name = "") const;

 	TimeFormat timestamps;
 	bool surrounding_braces;
--- a/src/util.cc
+++ b/src/util.cc
@ -2190,54 +2190,112 @@ TEST_CASE("util json_escape_utf8")
 	CHECK(json_escape_utf8("string") == "string");
 	CHECK(json_escape_utf8("string\n") == "string\n");
 	CHECK(json_escape_utf8("string\x82") == "string\\x82");
+	CHECK(json_escape_utf8("\x07\xd4\xb7o") == "\\x07Էo");
+
+	// These strings are duplicated from the scripts.base.frameworks.logging.ascii-json-utf8 btest
+
+	// Valid ASCII and valid ASCII control characters
+	CHECK(json_escape_utf8("a") == "a");
+	CHECK(json_escape_utf8("\b\f\n\r\t\x00\x15") == "\b\f\n\r\t\x00\x15");
+
+	// Table 3-7 in https://www.unicode.org/versions/Unicode12.0.0/ch03.pdf describes what is
+	// valid and invalid for the tests below
+
+	// Valid 2 Octet Sequence
+	CHECK(json_escape_utf8("\xc3\xb1") == "\xc3\xb1");
+
+	// Invalid 2 Octet Sequence
+	CHECK(json_escape_utf8("\xc3\x28") == "\\xc3(");
+	CHECK(json_escape_utf8("\xc0\x81") == "\\xc0\\x81");
+	CHECK(json_escape_utf8("\xc1\x81") == "\\xc1\\x81");
+	CHECK(json_escape_utf8("\xc2\xcf") == "\\xc2\\xcf");
+
+	// Invalid Sequence Identifier
+	CHECK(json_escape_utf8("\xa0\xa1") == "\\xa0\\xa1");
+
+	// Valid 3 Octet Sequence
+	CHECK(json_escape_utf8("\xe2\x82\xa1") == "\xe2\x82\xa1");
+	CHECK(json_escape_utf8("\xe0\xa3\xa1") == "\xe0\xa3\xa1");
+
+	// Invalid 3 Octet Sequence (in 2nd Octet)
+	CHECK(json_escape_utf8("\xe0\x80\xa1") == "\\xe0\\x80\\xa1");
+	CHECK(json_escape_utf8("\xe2\x28\xa1") == "\\xe2(\\xa1");
+	CHECK(json_escape_utf8("\xed\xa0\xa1") == "\\xed\\xa0\\xa1");
+
+	// Invalid 3 Octet Sequence (in 3rd Octet)
+	CHECK(json_escape_utf8("\xe2\x82\x28") == "\\xe2\\x82(");
+
+	// Valid 4 Octet Sequence
+	CHECK(json_escape_utf8("\xf0\x90\x8c\xbc") == "\xf0\x90\x8c\xbc");
+	CHECK(json_escape_utf8("\xf1\x80\x8c\xbc") == "\xf1\x80\x8c\xbc");
+	CHECK(json_escape_utf8("\xf4\x80\x8c\xbc") == "\xf4\x80\x8c\xbc");
+
+	// Invalid 4 Octet Sequence (in 2nd Octet)
+	CHECK(json_escape_utf8("\xf0\x80\x8c\xbc") == "\\xf0\\x80\\x8c\\xbc");
+	CHECK(json_escape_utf8("\xf2\x28\x8c\xbc") == "\\xf2(\\x8c\\xbc");
+	CHECK(json_escape_utf8("\xf4\x90\x8c\xbc") == "\\xf4\\x90\\x8c\\xbc");
+
+	// Invalid 4 Octet Sequence (in 3rd Octet)
+	CHECK(json_escape_utf8("\xf0\x90\x28\xbc") == "\\xf0\\x90(\\xbc");
+
+	// Invalid 4 Octet Sequence (in 4th Octet)
+	CHECK(json_escape_utf8("\xf0\x28\x8c\x28") == "\\xf0(\\x8c(");
+
+	// Invalid 4 Octet Sequence (too short)
+	CHECK(json_escape_utf8("\xf4\x80\x8c") == "\\xf4\\x80\\x8c");
+	CHECK(json_escape_utf8("\xf0") == "\\xf0");
 	}

 string json_escape_utf8(const string& val)
 	{
-	string result;
-	result.reserve(val.length());
-
 	auto val_data = reinterpret_cast<const unsigned char*>(val.c_str());
+	auto val_size = val.length();
+
+	// Reserve at least the size of the existing string to avoid resizing the string in the best-case
+	// scenario where we don't have any multi-byte characters.
+	string result;
+	result.reserve(val_size);

 	size_t idx;
-	for ( idx = 0; idx < val.length(); )
+	for ( idx = 0; idx < val_size; )
 		{
-		// Normal ASCII characters plus a few of the control characters can be inserted directly. The rest of
-		// the control characters should be escaped as regular bytes.
-		if ( ( val[idx] >= 32 && val[idx] <= 127 ) ||
-		       val[idx] == '\b' || val[idx] == '\f' || val[idx] == '\n' || val[idx] == '\r' || val[idx] == '\t' )
+		const char ch = val[idx];
+
+		// Normal ASCII characters plus a few of the control characters can be inserted directly. The
+		// rest of the control characters should be escaped as regular bytes.
+		if ( ( ch >= 32 && ch <= 127 ) ||
+		       ch == '\b' || ch == '\f' || ch == '\n' || ch == '\r' || ch == '\t' )
 			{
-			result.push_back(val[idx]);
+			result.push_back(ch);
 			++idx;
 			continue;
 			}
-		else if ( val[idx] >= 0 && val[idx] < 32 )
+		else if ( ch >= 0 && ch < 32 )
 			{
-			result.append(json_escape_byte(val[idx]));
+			result.append(json_escape_byte(ch));
 			++idx;
 			continue;
 			}

 		// Find out how long the next character should be.
-		unsigned int char_size = getNumBytesForUTF8(val[idx]);
+		unsigned int char_size = getNumBytesForUTF8(ch);

-		// If it says that it's a single character or it's not an invalid string UTF8 sequence, insert the one
-		// escaped byte into the string, step forward one, and go to the next character.
-		if ( char_size == 0 || idx+char_size > val.length() || isLegalUTF8Sequence(val_data+idx, val_data+idx+char_size) == 0 )
+		// If it says that it's a single character or it's not an valid string UTF8 sequence, insert
+		// the one escaped byte into the string, step forward one, and go to the next character.
+		if ( char_size == 0 || idx+char_size > val_size || isLegalUTF8Sequence(val_data+idx, val_data+idx+char_size) == 0 )
 			{
-			result.append(json_escape_byte(val[idx]));
+			result.append(json_escape_byte(ch));
 			++idx;
 			continue;
 			}

-		for ( size_t step = 0; step < char_size; step++, idx++ )
-			result.push_back(val[idx]);
+		result.append(val, idx, char_size);
+		idx += char_size;
 		}

 	// Insert any of the remaining bytes into the string as escaped bytes
-	if ( idx != val.length() )
-		for ( ; idx < val.length(); ++idx )
-			result.append(json_escape_byte(val[idx]));
+	for ( ; idx < val_size; ++idx )
+		result.append(json_escape_byte(val[idx]));

 	return result;
 	}
--- a/src/util.h
+++ b/src/util.h
@ -118,7 +118,7 @@ std::string extract_ip_and_len(const std::string& i, int* len);

 inline void bytetohex(unsigned char byte, char* hex_out)
 	{
-	static const char hex_chars[] = "0123456789abcdef";
+	static constexpr char hex_chars[] = "0123456789abcdef";
 	hex_out[0] = hex_chars[(byte & 0xf0) >> 4];
 	hex_out[1] = hex_chars[byte & 0x0f];
 	}