GHI-595: Convert from nlohmann/json to rapidjson for performance reasons

This commit is contained in:
Tim Wojtulewicz 2019-11-07 12:30:30 -07:00
parent 50a0835b41
commit 46e7308422
7 changed files with 183 additions and 132 deletions

@ -1 +1 @@
Subproject commit 2b3206b7add3472ea0736f2841473e11d506a85e
Subproject commit e2f0a1b3f1c54f9dc97a806261cc329e4dd596c8

View file

@ -413,15 +413,14 @@ install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/
)
install(FILES
${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/json.hpp
${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/sqlite3.h
DESTINATION include/zeek/3rdparty
)
)
install(FILES
${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/tsl-ordered-map/ordered_map.h
${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/tsl-ordered-map/ordered_hash.h
DESTINATION include/zeek/3rdparty/tsl-ordered-map
${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/rapidjson/include/rapidjson/document.h
${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/rapidjson/include/rapidjson/writer.h
DESTINATION include/zeek/3rdparty/rapidjson/include/rapidjson
)
########################################################################

View file

@ -12,6 +12,11 @@
#include <stdio.h>
#include <stdlib.h>
#define RAPIDJSON_HAS_STDSTRING 1
#include "3rdparty/rapidjson/include/rapidjson/document.h"
#include "3rdparty/rapidjson/include/rapidjson/stringbuffer.h"
#include "3rdparty/rapidjson/include/rapidjson/writer.h"
#include "Val.h"
#include "Net.h"
#include "File.h"
@ -27,20 +32,17 @@
#include "broker/Data.h"
#include "3rdparty/json.hpp"
#include "3rdparty/tsl-ordered-map/ordered_map.h"
class NullDoubleWriter : public rapidjson::Writer<rapidjson::StringBuffer> {
public:
NullDoubleWriter(rapidjson::StringBuffer& buffer) : rapidjson::Writer<rapidjson::StringBuffer>(buffer) {}
bool Double(double d)
{
if ( rapidjson::internal::Double(d).IsNanOrInf() )
return rapidjson::Writer<rapidjson::StringBuffer>::Null();
// Define a class for use with the json library that orders the keys in the same order that
// they were inserted. By default, the json library orders them alphabetically and we don't
// want it like that.
template<class Key, class T, class Ignore, class Allocator,
class Hash = std::hash<Key>, class KeyEqual = std::equal_to<Key>,
class AllocatorPair = typename std::allocator_traits<Allocator>::template rebind_alloc<std::pair<Key, T>>,
class ValueTypeContainer = std::vector<std::pair<Key, T>, AllocatorPair>>
using ordered_map = tsl::ordered_map<Key, T, Hash, KeyEqual, AllocatorPair, ValueTypeContainer>;
using ZeekJson = nlohmann::basic_json<ordered_map>;
return rapidjson::Writer<rapidjson::StringBuffer>::Double(d);
}
};
Val::Val(Func* f)
{
@ -433,46 +435,56 @@ TableVal* Val::GetRecordFields()
return rt->GetRecordFieldsVal(rv);
}
// This is a static method in this file to avoid including json.hpp in Val.h since it's huge.
static ZeekJson BuildJSON(Val* val, bool only_loggable=false, RE_Matcher* re=nullptr)
// This is a static method in this file to avoid including rapidjson's headers in Val.h because they're huge.
static void BuildJSON(NullDoubleWriter& writer, Val* val, bool only_loggable=false, RE_Matcher* re=nullptr, const string& key="")
{
// If the value wasn't set, return a nullptr. This will get turned into a 'null' in the json output.
if ( ! val )
return nullptr;
if ( !key.empty() )
writer.Key(key);
ZeekJson j;
// If the value wasn't set, write a null into the stream and return.
if ( ! val )
{
writer.Null();
return;
}
rapidjson::Value j;
BroType* type = val->Type();
switch ( type->Tag() )
{
case TYPE_BOOL:
j = val->AsBool();
writer.Bool(val->AsBool());
break;
case TYPE_INT:
j = val->AsInt();
writer.Int64(val->AsInt());
break;
case TYPE_COUNT:
j = val->AsCount();
writer.Uint64(val->AsCount());
break;
case TYPE_COUNTER:
j = val->AsCounter();
writer.Uint64(val->AsCounter());
break;
case TYPE_TIME:
j = val->AsTime();
writer.Double(val->AsTime());
break;
case TYPE_DOUBLE:
j = val->AsDouble();
writer.Double(val->AsDouble());
break;
case TYPE_PORT:
{
auto* pval = val->AsPortVal();
j.emplace("port", pval->Port());
j.emplace("proto", pval->Protocol());
writer.StartObject();
writer.Key("port");
writer.Int64(pval->Port());
writer.Key("proto");
writer.String(pval->Protocol());
writer.EndObject();
break;
}
@ -484,7 +496,7 @@ static ZeekJson BuildJSON(Val* val, bool only_loggable=false, RE_Matcher* re=nul
ODesc d;
d.SetStyle(RAW_STYLE);
val->Describe(&d);
j = string(reinterpret_cast<const char*>(d.Bytes()), d.Len());
writer.String(reinterpret_cast<const char*>(d.Bytes()), d.Len());
break;
}
@ -496,7 +508,7 @@ static ZeekJson BuildJSON(Val* val, bool only_loggable=false, RE_Matcher* re=nul
ODesc d;
d.SetStyle(RAW_STYLE);
val->Describe(&d);
j = json_escape_utf8(string(reinterpret_cast<const char*>(d.Bytes()), d.Len()));
writer.String(json_escape_utf8(string(reinterpret_cast<const char*>(d.Bytes()), d.Len())));
break;
}
@ -506,9 +518,9 @@ static ZeekJson BuildJSON(Val* val, bool only_loggable=false, RE_Matcher* re=nul
auto* tval = val->AsTableVal();
if ( tval->Type()->IsSet() )
j = ZeekJson::array();
writer.StartArray();
else
j = ZeekJson::object();
writer.StartObject();
HashKey* k;
TableEntryVal* entry;
@ -524,102 +536,125 @@ static ZeekJson BuildJSON(Val* val, bool only_loggable=false, RE_Matcher* re=nul
else
entry_key = lv->Ref();
ZeekJson key_json = BuildJSON(entry_key, only_loggable, re);
if ( tval->Type()->IsSet() )
j.emplace_back(std::move(key_json));
BuildJSON(writer, entry_key, only_loggable, re);
else
{
Val* entry_value = entry->Value();
string key_string;
if ( key_json.is_string() )
key_string = key_json;
else
key_string = key_json.dump();
rapidjson::StringBuffer buffer;
NullDoubleWriter key_writer(buffer);
BuildJSON(key_writer, entry_key, only_loggable, re);
string key_str = buffer.GetString();
if ( key_str[0] == '"')
key_str = key_str.substr(1);
if ( key_str[key_str.length()-1] == '"')
key_str = key_str.substr(0, key_str.length()-1);
j.emplace(key_string, BuildJSON(entry_value, only_loggable, re));
BuildJSON(writer, entry_value, only_loggable, re, key_str);
}
Unref(entry_key);
Unref(lv);
}
if ( tval->Type()->IsSet() )
writer.EndArray();
else
writer.EndObject();
break;
}
case TYPE_RECORD:
{
j = ZeekJson::object();
writer.StartObject();
auto* rval = val->AsRecordVal();
auto rt = rval->Type()->AsRecordType();
for ( auto i = 0; i < rt->NumFields(); ++i )
{
auto field_name = rt->FieldName(i);
std::string key_string;
if ( re && re->MatchAnywhere(field_name) != 0 )
{
StringVal blank("");
StringVal fn_val(field_name);
auto key_val = fn_val.Substitute(re, &blank, 0)->AsStringVal();
key_string = key_val->ToStdString();
Unref(key_val);
}
else
key_string = field_name;
Val* value = rval->LookupWithDefault(i);
if ( value && ( ! only_loggable || rt->FieldHasAttr(i, ATTR_LOG) ) )
j.emplace(key_string, BuildJSON(value, only_loggable, re));
{
string key_str;
auto field_name = rt->FieldName(i);
if ( re && re->MatchAnywhere(field_name) != 0 )
{
StringVal blank("");
StringVal fn_val(field_name);
auto key_val = fn_val.Substitute(re, &blank, 0)->AsStringVal();
key_str = key_val->ToStdString();
Unref(key_val);
}
else
key_str = field_name;
BuildJSON(writer, value, only_loggable, re, key_str);
}
Unref(value);
}
writer.EndObject();
break;
}
case TYPE_LIST:
{
j = ZeekJson::array();
writer.StartArray();
auto* lval = val->AsListVal();
size_t size = lval->Length();
for (size_t i = 0; i < size; i++)
j.push_back(BuildJSON(lval->Index(i), only_loggable, re));
BuildJSON(writer, lval->Index(i), only_loggable, re);
writer.EndArray();
break;
}
case TYPE_VECTOR:
{
j = ZeekJson::array();
writer.StartArray();
auto* vval = val->AsVectorVal();
size_t size = vval->SizeVal()->AsCount();
for (size_t i = 0; i < size; i++)
j.push_back(BuildJSON(vval->Lookup(i), only_loggable, re));
BuildJSON(writer, vval->Lookup(i), only_loggable, re);
writer.EndArray();
break;
}
case TYPE_OPAQUE:
{
writer.StartObject();
writer.Key("opaque_type");
auto* oval = val->AsOpaqueVal();
j = { { "opaque_type", OpaqueMgr::mgr()->TypeID(oval) } };
writer.String(OpaqueMgr::mgr()->TypeID(oval));
writer.EndObject();
break;
}
default: break;
default:
writer.Null();
break;
}
return j;
}
StringVal* Val::ToJSON(bool only_loggable, RE_Matcher* re)
{
ZeekJson j = BuildJSON(this, only_loggable, re);
return new StringVal(j.dump());
rapidjson::StringBuffer buffer;
NullDoubleWriter writer(buffer);
BuildJSON(writer, this, only_loggable, re, "");
return new StringVal(buffer.GetString());
}
IntervalVal::IntervalVal(double quantity, double units) :

View file

@ -12,9 +12,18 @@
#include <stdint.h>
#include "JSON.h"
#include "3rdparty/rapidjson/include/rapidjson/internal/ieee754.h"
using namespace threading::formatter;
bool JSON::NullDoubleWriter::Double(double d)
{
if ( rapidjson::internal::Double(d).IsNanOrInf() )
return rapidjson::Writer<rapidjson::StringBuffer>::Null();
return rapidjson::Writer<rapidjson::StringBuffer>::Double(d);
}
JSON::JSON(MsgThread* t, TimeFormat tf) : Formatter(t), surrounding_braces(true)
{
timestamps = tf;
@ -27,21 +36,19 @@ JSON::~JSON()
bool JSON::Describe(ODesc* desc, int num_fields, const Field* const * fields,
Value** vals) const
{
ZeekJson j = ZeekJson::object();
rapidjson::StringBuffer buffer;
NullDoubleWriter writer(buffer);
writer.StartObject();
for ( int i = 0; i < num_fields; i++ )
{
if ( vals[i]->present )
{
ZeekJson new_entry = BuildJSON(vals[i]);
if ( new_entry.is_null() )
return false;
j.emplace(fields[i]->name, new_entry);
}
BuildJSON(writer, vals[i], fields[i]->name);
}
desc->Add(j.dump());
writer.EndObject();
desc->Add(buffer.GetString());
return true;
}
@ -54,14 +61,18 @@ bool JSON::Describe(ODesc* desc, Value* val, const string& name) const
return false;
}
if ( ! val->present )
if ( ! val->present || name.empty() )
return true;
ZeekJson j = BuildJSON(val, name);
if ( j.is_null() )
return false;
rapidjson::Document doc;
rapidjson::StringBuffer buffer;
NullDoubleWriter writer(buffer);
desc->Add(j.dump());
writer.StartObject();
BuildJSON(writer, val, name);
writer.EndObject();
desc->Add(buffer.GetString());
return true;
}
@ -71,47 +82,56 @@ threading::Value* JSON::ParseValue(const string& s, const string& name, TypeTag
return nullptr;
}
ZeekJson JSON::BuildJSON(Value* val, const string& name) const
void JSON::BuildJSON(NullDoubleWriter& writer, Value* val, const string& name) const
{
// If the value wasn't set, return a nullptr. This will get turned into a 'null' in the json output.
if ( ! val->present )
return nullptr;
{
writer.Null();
return;
}
ZeekJson j;
switch ( val->type )
{
case TYPE_BOOL:
j = val->val.int_val != 0;
if ( ! name.empty() ) writer.Key(name);
writer.Bool(val->val.int_val != 0);
break;
case TYPE_INT:
j = val->val.int_val;
if ( ! name.empty() ) writer.Key(name);
writer.Int64(val->val.int_val);
break;
case TYPE_COUNT:
case TYPE_COUNTER:
j = val->val.uint_val;
if ( ! name.empty() ) writer.Key(name);
writer.Uint64(val->val.uint_val);
break;
case TYPE_PORT:
j = val->val.port_val.port;
if ( ! name.empty() ) writer.Key(name);
writer.Uint64(val->val.port_val.port);
break;
case TYPE_SUBNET:
j = Formatter::Render(val->val.subnet_val);
if ( ! name.empty() ) writer.Key(name);
writer.String(Formatter::Render(val->val.subnet_val));
break;
case TYPE_ADDR:
j = Formatter::Render(val->val.addr_val);
if ( ! name.empty() ) writer.Key(name);
writer.String(Formatter::Render(val->val.addr_val));
break;
case TYPE_DOUBLE:
case TYPE_INTERVAL:
j = val->val.double_val;
if ( ! name.empty() ) writer.Key(name);
writer.Double(val->val.double_val);
break;
case TYPE_TIME:
{
if ( ! name.empty() ) writer.Key(name);
if ( timestamps == TS_ISO8601 )
{
char buffer[40];
@ -125,7 +145,7 @@ ZeekJson JSON::BuildJSON(Value* val, const string& name) const
GetThread()->Error(GetThread()->Fmt("json formatter: failure getting time: (%lf)", val->val.double_val));
// This was a failure, doesn't really matter what gets put here
// but it should probably stand out...
j = "2000-01-01T00:00:00.000000";
writer.String("2000-01-01T00:00:00.000000");
}
else
{
@ -136,17 +156,17 @@ ZeekJson JSON::BuildJSON(Value* val, const string& name) const
frac += 1;
snprintf(buffer2, sizeof(buffer2), "%s.%06.0fZ", buffer, fabs(frac) * 1000000);
j = buffer2;
writer.String(buffer2, strlen(buffer2));
}
}
else if ( timestamps == TS_EPOCH )
j = val->val.double_val;
writer.Double(val->val.double_val);
else if ( timestamps == TS_MILLIS )
{
// ElasticSearch uses milliseconds for timestamps
j = (uint64_t) (val->val.double_val * 1000);
writer.Uint64((uint64_t) (val->val.double_val * 1000));
}
break;
@ -157,36 +177,37 @@ ZeekJson JSON::BuildJSON(Value* val, const string& name) const
case TYPE_FILE:
case TYPE_FUNC:
{
j = json_escape_utf8(string(val->val.string_val.data, val->val.string_val.length));
if ( ! name.empty() ) writer.Key(name);
writer.String(json_escape_utf8(string(val->val.string_val.data, val->val.string_val.length)));
break;
}
case TYPE_TABLE:
{
j = ZeekJson::array();
if ( ! name.empty() ) writer.Key(name);
writer.StartArray();
for ( int idx = 0; idx < val->val.set_val.size; idx++ )
j.push_back(BuildJSON(val->val.set_val.vals[idx]));
BuildJSON(writer, val->val.set_val.vals[idx]);
writer.EndArray();
break;
}
case TYPE_VECTOR:
{
j = ZeekJson::array();
if ( ! name.empty() ) writer.Key(name);
writer.StartArray();
for ( int idx = 0; idx < val->val.vector_val.size; idx++ )
j.push_back(BuildJSON(val->val.vector_val.vals[idx]));
BuildJSON(writer, val->val.vector_val.vals[idx]);
writer.EndArray();
break;
}
default:
reporter->Warning("Unhandled type in JSON::BuildJSON");
break;
}
if ( ! name.empty() && ! j.is_null() )
return { { name, j } };
return j;
}

View file

@ -2,24 +2,14 @@
#pragma once
#include "../Formatter.h"
#include "3rdparty/json.hpp"
#include "3rdparty/tsl-ordered-map/ordered_map.h"
#define RAPIDJSON_HAS_STDSTRING 1
#include "3rdparty/rapidjson/include/rapidjson/document.h"
#include "3rdparty/rapidjson/include/rapidjson/writer.h"
#include "../Formatter.h"
namespace threading { namespace formatter {
// Define a class for use with the json library that orders the keys in the same order that
// they were inserted. By default, the json library orders them alphabetically and we don't
// want it like that.
template<class Key, class T, class Ignore, class Allocator,
class Hash = std::hash<Key>, class KeyEqual = std::equal_to<Key>,
class AllocatorPair = typename std::allocator_traits<Allocator>::template rebind_alloc<std::pair<Key, T>>,
class ValueTypeContainer = std::vector<std::pair<Key, T>, AllocatorPair>>
using ordered_map = tsl::ordered_map<Key, T, Hash, KeyEqual, AllocatorPair, ValueTypeContainer>;
using ZeekJson = nlohmann::basic_json<ordered_map>;
/**
* A thread-safe class for converting values into a JSON representation
* and vice versa.
@ -42,7 +32,13 @@ public:
private:
ZeekJson BuildJSON(Value* val, const string& name = "") const;
class NullDoubleWriter : public rapidjson::Writer<rapidjson::StringBuffer> {
public:
NullDoubleWriter(rapidjson::StringBuffer& stream) : rapidjson::Writer<rapidjson::StringBuffer>(stream) {}
bool Double(double d);
};
void BuildJSON(NullDoubleWriter& writer, Value* val, const string& name = "") const;
TimeFormat timestamps;
bool surrounding_braces;

View file

@ -9,11 +9,11 @@
{"d":0.1234}
{"d":50000.0}
{"d":-50000.0}
{"d":3.14e+15}
{"d":-3.14e+15}
{"d":1.79e+308}
{"d":-1.79e+308}
{"d":1.23456789e-05}
{"d":3140000000000000.0}
{"d":-3140000000000000.0}
{"d":1.79e308}
{"d":-1.79e308}
{"d":0.0000123456789}
{"d":2.23e-308}
{"d":-2.23e-308}
{"d":null}

View file

@ -2,7 +2,7 @@ true
123
-999
3.14
-1.23456789e+308
-1.23456789e308
9e-308
1480788576.868945
"-12.0 hrs"