GHI-595: Convert from nlohmann/json to rapidjson for performance reasons

This commit is contained in:
Tim Wojtulewicz 2019-11-07 12:30:30 -07:00
parent 50a0835b41
commit 46e7308422
7 changed files with 183 additions and 132 deletions

@ -1 +1 @@
Subproject commit 2b3206b7add3472ea0736f2841473e11d506a85e Subproject commit e2f0a1b3f1c54f9dc97a806261cc329e4dd596c8

View file

@ -413,15 +413,14 @@ install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/
) )
install(FILES install(FILES
${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/json.hpp
${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/sqlite3.h ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/sqlite3.h
DESTINATION include/zeek/3rdparty DESTINATION include/zeek/3rdparty
) )
install(FILES install(FILES
${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/tsl-ordered-map/ordered_map.h ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/rapidjson/include/rapidjson/document.h
${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/tsl-ordered-map/ordered_hash.h ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/rapidjson/include/rapidjson/writer.h
DESTINATION include/zeek/3rdparty/tsl-ordered-map DESTINATION include/zeek/3rdparty/rapidjson/include/rapidjson
) )
######################################################################## ########################################################################

View file

@ -12,6 +12,11 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#define RAPIDJSON_HAS_STDSTRING 1
#include "3rdparty/rapidjson/include/rapidjson/document.h"
#include "3rdparty/rapidjson/include/rapidjson/stringbuffer.h"
#include "3rdparty/rapidjson/include/rapidjson/writer.h"
#include "Val.h" #include "Val.h"
#include "Net.h" #include "Net.h"
#include "File.h" #include "File.h"
@ -27,20 +32,17 @@
#include "broker/Data.h" #include "broker/Data.h"
#include "3rdparty/json.hpp" class NullDoubleWriter : public rapidjson::Writer<rapidjson::StringBuffer> {
#include "3rdparty/tsl-ordered-map/ordered_map.h" public:
NullDoubleWriter(rapidjson::StringBuffer& buffer) : rapidjson::Writer<rapidjson::StringBuffer>(buffer) {}
bool Double(double d)
{
if ( rapidjson::internal::Double(d).IsNanOrInf() )
return rapidjson::Writer<rapidjson::StringBuffer>::Null();
return rapidjson::Writer<rapidjson::StringBuffer>::Double(d);
// Define a class for use with the json library that orders the keys in the same order that }
// they were inserted. By default, the json library orders them alphabetically and we don't };
// want it like that.
template<class Key, class T, class Ignore, class Allocator,
class Hash = std::hash<Key>, class KeyEqual = std::equal_to<Key>,
class AllocatorPair = typename std::allocator_traits<Allocator>::template rebind_alloc<std::pair<Key, T>>,
class ValueTypeContainer = std::vector<std::pair<Key, T>, AllocatorPair>>
using ordered_map = tsl::ordered_map<Key, T, Hash, KeyEqual, AllocatorPair, ValueTypeContainer>;
using ZeekJson = nlohmann::basic_json<ordered_map>;
Val::Val(Func* f) Val::Val(Func* f)
{ {
@ -433,46 +435,56 @@ TableVal* Val::GetRecordFields()
return rt->GetRecordFieldsVal(rv); return rt->GetRecordFieldsVal(rv);
} }
// This is a static method in this file to avoid including json.hpp in Val.h since it's huge. // This is a static method in this file to avoid including rapidjson's headers in Val.h because they're huge.
static ZeekJson BuildJSON(Val* val, bool only_loggable=false, RE_Matcher* re=nullptr) static void BuildJSON(NullDoubleWriter& writer, Val* val, bool only_loggable=false, RE_Matcher* re=nullptr, const string& key="")
{ {
// If the value wasn't set, return a nullptr. This will get turned into a 'null' in the json output. if ( !key.empty() )
if ( ! val ) writer.Key(key);
return nullptr;
ZeekJson j; // If the value wasn't set, write a null into the stream and return.
if ( ! val )
{
writer.Null();
return;
}
rapidjson::Value j;
BroType* type = val->Type(); BroType* type = val->Type();
switch ( type->Tag() ) switch ( type->Tag() )
{ {
case TYPE_BOOL: case TYPE_BOOL:
j = val->AsBool(); writer.Bool(val->AsBool());
break; break;
case TYPE_INT: case TYPE_INT:
j = val->AsInt(); writer.Int64(val->AsInt());
break; break;
case TYPE_COUNT: case TYPE_COUNT:
j = val->AsCount(); writer.Uint64(val->AsCount());
break; break;
case TYPE_COUNTER: case TYPE_COUNTER:
j = val->AsCounter(); writer.Uint64(val->AsCounter());
break; break;
case TYPE_TIME: case TYPE_TIME:
j = val->AsTime(); writer.Double(val->AsTime());
break; break;
case TYPE_DOUBLE: case TYPE_DOUBLE:
j = val->AsDouble(); writer.Double(val->AsDouble());
break; break;
case TYPE_PORT: case TYPE_PORT:
{ {
auto* pval = val->AsPortVal(); auto* pval = val->AsPortVal();
j.emplace("port", pval->Port()); writer.StartObject();
j.emplace("proto", pval->Protocol()); writer.Key("port");
writer.Int64(pval->Port());
writer.Key("proto");
writer.String(pval->Protocol());
writer.EndObject();
break; break;
} }
@ -484,7 +496,7 @@ static ZeekJson BuildJSON(Val* val, bool only_loggable=false, RE_Matcher* re=nul
ODesc d; ODesc d;
d.SetStyle(RAW_STYLE); d.SetStyle(RAW_STYLE);
val->Describe(&d); val->Describe(&d);
j = string(reinterpret_cast<const char*>(d.Bytes()), d.Len()); writer.String(reinterpret_cast<const char*>(d.Bytes()), d.Len());
break; break;
} }
@ -496,7 +508,7 @@ static ZeekJson BuildJSON(Val* val, bool only_loggable=false, RE_Matcher* re=nul
ODesc d; ODesc d;
d.SetStyle(RAW_STYLE); d.SetStyle(RAW_STYLE);
val->Describe(&d); val->Describe(&d);
j = json_escape_utf8(string(reinterpret_cast<const char*>(d.Bytes()), d.Len())); writer.String(json_escape_utf8(string(reinterpret_cast<const char*>(d.Bytes()), d.Len())));
break; break;
} }
@ -506,9 +518,9 @@ static ZeekJson BuildJSON(Val* val, bool only_loggable=false, RE_Matcher* re=nul
auto* tval = val->AsTableVal(); auto* tval = val->AsTableVal();
if ( tval->Type()->IsSet() ) if ( tval->Type()->IsSet() )
j = ZeekJson::array(); writer.StartArray();
else else
j = ZeekJson::object(); writer.StartObject();
HashKey* k; HashKey* k;
TableEntryVal* entry; TableEntryVal* entry;
@ -524,102 +536,125 @@ static ZeekJson BuildJSON(Val* val, bool only_loggable=false, RE_Matcher* re=nul
else else
entry_key = lv->Ref(); entry_key = lv->Ref();
ZeekJson key_json = BuildJSON(entry_key, only_loggable, re);
if ( tval->Type()->IsSet() ) if ( tval->Type()->IsSet() )
j.emplace_back(std::move(key_json)); BuildJSON(writer, entry_key, only_loggable, re);
else else
{ {
Val* entry_value = entry->Value(); Val* entry_value = entry->Value();
string key_string; rapidjson::StringBuffer buffer;
if ( key_json.is_string() ) NullDoubleWriter key_writer(buffer);
key_string = key_json; BuildJSON(key_writer, entry_key, only_loggable, re);
else string key_str = buffer.GetString();
key_string = key_json.dump(); if ( key_str[0] == '"')
key_str = key_str.substr(1);
if ( key_str[key_str.length()-1] == '"')
key_str = key_str.substr(0, key_str.length()-1);
j.emplace(key_string, BuildJSON(entry_value, only_loggable, re)); BuildJSON(writer, entry_value, only_loggable, re, key_str);
} }
Unref(entry_key); Unref(entry_key);
Unref(lv); Unref(lv);
} }
if ( tval->Type()->IsSet() )
writer.EndArray();
else
writer.EndObject();
break; break;
} }
case TYPE_RECORD: case TYPE_RECORD:
{ {
j = ZeekJson::object(); writer.StartObject();
auto* rval = val->AsRecordVal(); auto* rval = val->AsRecordVal();
auto rt = rval->Type()->AsRecordType(); auto rt = rval->Type()->AsRecordType();
for ( auto i = 0; i < rt->NumFields(); ++i ) for ( auto i = 0; i < rt->NumFields(); ++i )
{ {
auto field_name = rt->FieldName(i);
std::string key_string;
if ( re && re->MatchAnywhere(field_name) != 0 )
{
StringVal blank("");
StringVal fn_val(field_name);
auto key_val = fn_val.Substitute(re, &blank, 0)->AsStringVal();
key_string = key_val->ToStdString();
Unref(key_val);
}
else
key_string = field_name;
Val* value = rval->LookupWithDefault(i); Val* value = rval->LookupWithDefault(i);
if ( value && ( ! only_loggable || rt->FieldHasAttr(i, ATTR_LOG) ) ) if ( value && ( ! only_loggable || rt->FieldHasAttr(i, ATTR_LOG) ) )
j.emplace(key_string, BuildJSON(value, only_loggable, re)); {
string key_str;
auto field_name = rt->FieldName(i);
if ( re && re->MatchAnywhere(field_name) != 0 )
{
StringVal blank("");
StringVal fn_val(field_name);
auto key_val = fn_val.Substitute(re, &blank, 0)->AsStringVal();
key_str = key_val->ToStdString();
Unref(key_val);
}
else
key_str = field_name;
BuildJSON(writer, value, only_loggable, re, key_str);
}
Unref(value); Unref(value);
} }
writer.EndObject();
break; break;
} }
case TYPE_LIST: case TYPE_LIST:
{ {
j = ZeekJson::array(); writer.StartArray();
auto* lval = val->AsListVal(); auto* lval = val->AsListVal();
size_t size = lval->Length(); size_t size = lval->Length();
for (size_t i = 0; i < size; i++) for (size_t i = 0; i < size; i++)
j.push_back(BuildJSON(lval->Index(i), only_loggable, re)); BuildJSON(writer, lval->Index(i), only_loggable, re);
writer.EndArray();
break; break;
} }
case TYPE_VECTOR: case TYPE_VECTOR:
{ {
j = ZeekJson::array(); writer.StartArray();
auto* vval = val->AsVectorVal(); auto* vval = val->AsVectorVal();
size_t size = vval->SizeVal()->AsCount(); size_t size = vval->SizeVal()->AsCount();
for (size_t i = 0; i < size; i++) for (size_t i = 0; i < size; i++)
j.push_back(BuildJSON(vval->Lookup(i), only_loggable, re)); BuildJSON(writer, vval->Lookup(i), only_loggable, re);
writer.EndArray();
break; break;
} }
case TYPE_OPAQUE: case TYPE_OPAQUE:
{ {
writer.StartObject();
writer.Key("opaque_type");
auto* oval = val->AsOpaqueVal(); auto* oval = val->AsOpaqueVal();
j = { { "opaque_type", OpaqueMgr::mgr()->TypeID(oval) } }; writer.String(OpaqueMgr::mgr()->TypeID(oval));
writer.EndObject();
break; break;
} }
default: break; default:
writer.Null();
break;
} }
return j;
} }
StringVal* Val::ToJSON(bool only_loggable, RE_Matcher* re) StringVal* Val::ToJSON(bool only_loggable, RE_Matcher* re)
{ {
ZeekJson j = BuildJSON(this, only_loggable, re); rapidjson::StringBuffer buffer;
return new StringVal(j.dump()); NullDoubleWriter writer(buffer);
BuildJSON(writer, this, only_loggable, re, "");
return new StringVal(buffer.GetString());
} }
IntervalVal::IntervalVal(double quantity, double units) : IntervalVal::IntervalVal(double quantity, double units) :

View file

@ -12,9 +12,18 @@
#include <stdint.h> #include <stdint.h>
#include "JSON.h" #include "JSON.h"
#include "3rdparty/rapidjson/include/rapidjson/internal/ieee754.h"
using namespace threading::formatter; using namespace threading::formatter;
bool JSON::NullDoubleWriter::Double(double d)
{
if ( rapidjson::internal::Double(d).IsNanOrInf() )
return rapidjson::Writer<rapidjson::StringBuffer>::Null();
return rapidjson::Writer<rapidjson::StringBuffer>::Double(d);
}
JSON::JSON(MsgThread* t, TimeFormat tf) : Formatter(t), surrounding_braces(true) JSON::JSON(MsgThread* t, TimeFormat tf) : Formatter(t), surrounding_braces(true)
{ {
timestamps = tf; timestamps = tf;
@ -27,21 +36,19 @@ JSON::~JSON()
bool JSON::Describe(ODesc* desc, int num_fields, const Field* const * fields, bool JSON::Describe(ODesc* desc, int num_fields, const Field* const * fields,
Value** vals) const Value** vals) const
{ {
ZeekJson j = ZeekJson::object(); rapidjson::StringBuffer buffer;
NullDoubleWriter writer(buffer);
writer.StartObject();
for ( int i = 0; i < num_fields; i++ ) for ( int i = 0; i < num_fields; i++ )
{ {
if ( vals[i]->present ) if ( vals[i]->present )
{ BuildJSON(writer, vals[i], fields[i]->name);
ZeekJson new_entry = BuildJSON(vals[i]);
if ( new_entry.is_null() )
return false;
j.emplace(fields[i]->name, new_entry);
}
} }
desc->Add(j.dump()); writer.EndObject();
desc->Add(buffer.GetString());
return true; return true;
} }
@ -54,14 +61,18 @@ bool JSON::Describe(ODesc* desc, Value* val, const string& name) const
return false; return false;
} }
if ( ! val->present ) if ( ! val->present || name.empty() )
return true; return true;
ZeekJson j = BuildJSON(val, name); rapidjson::Document doc;
if ( j.is_null() ) rapidjson::StringBuffer buffer;
return false; NullDoubleWriter writer(buffer);
desc->Add(j.dump()); writer.StartObject();
BuildJSON(writer, val, name);
writer.EndObject();
desc->Add(buffer.GetString());
return true; return true;
} }
@ -71,47 +82,56 @@ threading::Value* JSON::ParseValue(const string& s, const string& name, TypeTag
return nullptr; return nullptr;
} }
ZeekJson JSON::BuildJSON(Value* val, const string& name) const void JSON::BuildJSON(NullDoubleWriter& writer, Value* val, const string& name) const
{ {
// If the value wasn't set, return a nullptr. This will get turned into a 'null' in the json output.
if ( ! val->present ) if ( ! val->present )
return nullptr; {
writer.Null();
return;
}
ZeekJson j;
switch ( val->type ) switch ( val->type )
{ {
case TYPE_BOOL: case TYPE_BOOL:
j = val->val.int_val != 0; if ( ! name.empty() ) writer.Key(name);
writer.Bool(val->val.int_val != 0);
break; break;
case TYPE_INT: case TYPE_INT:
j = val->val.int_val; if ( ! name.empty() ) writer.Key(name);
writer.Int64(val->val.int_val);
break; break;
case TYPE_COUNT: case TYPE_COUNT:
case TYPE_COUNTER: case TYPE_COUNTER:
j = val->val.uint_val; if ( ! name.empty() ) writer.Key(name);
writer.Uint64(val->val.uint_val);
break; break;
case TYPE_PORT: case TYPE_PORT:
j = val->val.port_val.port; if ( ! name.empty() ) writer.Key(name);
writer.Uint64(val->val.port_val.port);
break; break;
case TYPE_SUBNET: case TYPE_SUBNET:
j = Formatter::Render(val->val.subnet_val); if ( ! name.empty() ) writer.Key(name);
writer.String(Formatter::Render(val->val.subnet_val));
break; break;
case TYPE_ADDR: case TYPE_ADDR:
j = Formatter::Render(val->val.addr_val); if ( ! name.empty() ) writer.Key(name);
writer.String(Formatter::Render(val->val.addr_val));
break; break;
case TYPE_DOUBLE: case TYPE_DOUBLE:
case TYPE_INTERVAL: case TYPE_INTERVAL:
j = val->val.double_val; if ( ! name.empty() ) writer.Key(name);
writer.Double(val->val.double_val);
break; break;
case TYPE_TIME: case TYPE_TIME:
{ {
if ( ! name.empty() ) writer.Key(name);
if ( timestamps == TS_ISO8601 ) if ( timestamps == TS_ISO8601 )
{ {
char buffer[40]; char buffer[40];
@ -125,7 +145,7 @@ ZeekJson JSON::BuildJSON(Value* val, const string& name) const
GetThread()->Error(GetThread()->Fmt("json formatter: failure getting time: (%lf)", val->val.double_val)); GetThread()->Error(GetThread()->Fmt("json formatter: failure getting time: (%lf)", val->val.double_val));
// This was a failure, doesn't really matter what gets put here // This was a failure, doesn't really matter what gets put here
// but it should probably stand out... // but it should probably stand out...
j = "2000-01-01T00:00:00.000000"; writer.String("2000-01-01T00:00:00.000000");
} }
else else
{ {
@ -136,17 +156,17 @@ ZeekJson JSON::BuildJSON(Value* val, const string& name) const
frac += 1; frac += 1;
snprintf(buffer2, sizeof(buffer2), "%s.%06.0fZ", buffer, fabs(frac) * 1000000); snprintf(buffer2, sizeof(buffer2), "%s.%06.0fZ", buffer, fabs(frac) * 1000000);
j = buffer2; writer.String(buffer2, strlen(buffer2));
} }
} }
else if ( timestamps == TS_EPOCH ) else if ( timestamps == TS_EPOCH )
j = val->val.double_val; writer.Double(val->val.double_val);
else if ( timestamps == TS_MILLIS ) else if ( timestamps == TS_MILLIS )
{ {
// ElasticSearch uses milliseconds for timestamps // ElasticSearch uses milliseconds for timestamps
j = (uint64_t) (val->val.double_val * 1000); writer.Uint64((uint64_t) (val->val.double_val * 1000));
} }
break; break;
@ -157,36 +177,37 @@ ZeekJson JSON::BuildJSON(Value* val, const string& name) const
case TYPE_FILE: case TYPE_FILE:
case TYPE_FUNC: case TYPE_FUNC:
{ {
j = json_escape_utf8(string(val->val.string_val.data, val->val.string_val.length)); if ( ! name.empty() ) writer.Key(name);
writer.String(json_escape_utf8(string(val->val.string_val.data, val->val.string_val.length)));
break; break;
} }
case TYPE_TABLE: case TYPE_TABLE:
{ {
j = ZeekJson::array(); if ( ! name.empty() ) writer.Key(name);
writer.StartArray();
for ( int idx = 0; idx < val->val.set_val.size; idx++ ) for ( int idx = 0; idx < val->val.set_val.size; idx++ )
j.push_back(BuildJSON(val->val.set_val.vals[idx])); BuildJSON(writer, val->val.set_val.vals[idx]);
writer.EndArray();
break; break;
} }
case TYPE_VECTOR: case TYPE_VECTOR:
{ {
j = ZeekJson::array(); if ( ! name.empty() ) writer.Key(name);
writer.StartArray();
for ( int idx = 0; idx < val->val.vector_val.size; idx++ ) for ( int idx = 0; idx < val->val.vector_val.size; idx++ )
j.push_back(BuildJSON(val->val.vector_val.vals[idx])); BuildJSON(writer, val->val.vector_val.vals[idx]);
writer.EndArray();
break; break;
} }
default: default:
reporter->Warning("Unhandled type in JSON::BuildJSON");
break; break;
} }
if ( ! name.empty() && ! j.is_null() )
return { { name, j } };
return j;
} }

View file

@ -2,24 +2,14 @@
#pragma once #pragma once
#include "../Formatter.h" #define RAPIDJSON_HAS_STDSTRING 1
#include "3rdparty/json.hpp" #include "3rdparty/rapidjson/include/rapidjson/document.h"
#include "3rdparty/tsl-ordered-map/ordered_map.h" #include "3rdparty/rapidjson/include/rapidjson/writer.h"
#include "../Formatter.h"
namespace threading { namespace formatter { namespace threading { namespace formatter {
// Define a class for use with the json library that orders the keys in the same order that
// they were inserted. By default, the json library orders them alphabetically and we don't
// want it like that.
template<class Key, class T, class Ignore, class Allocator,
class Hash = std::hash<Key>, class KeyEqual = std::equal_to<Key>,
class AllocatorPair = typename std::allocator_traits<Allocator>::template rebind_alloc<std::pair<Key, T>>,
class ValueTypeContainer = std::vector<std::pair<Key, T>, AllocatorPair>>
using ordered_map = tsl::ordered_map<Key, T, Hash, KeyEqual, AllocatorPair, ValueTypeContainer>;
using ZeekJson = nlohmann::basic_json<ordered_map>;
/** /**
* A thread-safe class for converting values into a JSON representation * A thread-safe class for converting values into a JSON representation
* and vice versa. * and vice versa.
@ -42,7 +32,13 @@ public:
private: private:
ZeekJson BuildJSON(Value* val, const string& name = "") const; class NullDoubleWriter : public rapidjson::Writer<rapidjson::StringBuffer> {
public:
NullDoubleWriter(rapidjson::StringBuffer& stream) : rapidjson::Writer<rapidjson::StringBuffer>(stream) {}
bool Double(double d);
};
void BuildJSON(NullDoubleWriter& writer, Value* val, const string& name = "") const;
TimeFormat timestamps; TimeFormat timestamps;
bool surrounding_braces; bool surrounding_braces;

View file

@ -9,11 +9,11 @@
{"d":0.1234} {"d":0.1234}
{"d":50000.0} {"d":50000.0}
{"d":-50000.0} {"d":-50000.0}
{"d":3.14e+15} {"d":3140000000000000.0}
{"d":-3.14e+15} {"d":-3140000000000000.0}
{"d":1.79e+308} {"d":1.79e308}
{"d":-1.79e+308} {"d":-1.79e308}
{"d":1.23456789e-05} {"d":0.0000123456789}
{"d":2.23e-308} {"d":2.23e-308}
{"d":-2.23e-308} {"d":-2.23e-308}
{"d":null} {"d":null}

View file

@ -2,7 +2,7 @@ true
123 123
-999 -999
3.14 3.14
-1.23456789e+308 -1.23456789e308
9e-308 9e-308
1480788576.868945 1480788576.868945
"-12.0 hrs" "-12.0 hrs"