mirror of
https://github.com/zeek/zeek.git
synced 2025-10-13 20:18:20 +00:00
Merge remote-tracking branch 'origin/topic/timw/595-rapidjson'
Tweaks: - Small change to the logic for removing quotes around strings. - Updated NEWS & COPYING.3rdparty - Use of intrusive_ptr for stack-allocated StringVals - Little bit of refactoring (I would love to merge the two BuildJSON() functions, too, but that's a larger task) * origin/topic/timw/595-rapidjson: Use the list of files from clang-tidy when searching for unit tests Optimize json_escape_utf8 a bit by removing repeated calls to string methods Expand unit test for json_escape_utf8 to include all of the strings from the ascii-json-utf8 btest GHI-595: Convert from nlohmann/json to rapidjson for performance reasons Convert type-checking macros to actual functions
This commit is contained in:
commit
8170baabef
12 changed files with 335 additions and 210 deletions
|
@ -1 +1 @@
|
|||
Subproject commit 2b3206b7add3472ea0736f2841473e11d506a85e
|
||||
Subproject commit fae32236391d9117bf996e75d56ebd01ef076bc2
|
|
@ -413,15 +413,14 @@ install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/
|
|||
)
|
||||
|
||||
install(FILES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/json.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/sqlite3.h
|
||||
DESTINATION include/zeek/3rdparty
|
||||
)
|
||||
|
||||
install(FILES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/tsl-ordered-map/ordered_map.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/tsl-ordered-map/ordered_hash.h
|
||||
DESTINATION include/zeek/3rdparty/tsl-ordered-map
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/rapidjson/include/rapidjson/document.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/rapidjson/include/rapidjson/writer.h
|
||||
DESTINATION include/zeek/3rdparty/rapidjson/include/rapidjson
|
||||
)
|
||||
|
||||
########################################################################
|
||||
|
@ -439,9 +438,8 @@ create_clang_tidy_target()
|
|||
|
||||
# Scan all .cc files for TEST_CASE macros and generate CTest targets.
|
||||
if (ENABLE_ZEEK_UNIT_TESTS)
|
||||
file(GLOB_RECURSE all_cc_files "*.cc")
|
||||
set(test_cases "")
|
||||
foreach (cc_file ${all_cc_files})
|
||||
foreach (cc_file ${TIDY_SRCS})
|
||||
file (STRINGS ${cc_file} test_case_lines REGEX "TEST_CASE")
|
||||
foreach (line ${test_case_lines})
|
||||
string(REGEX REPLACE "TEST_CASE\\(\"(.+)\"\\)" "\\1" test_case "${line}")
|
||||
|
|
64
src/Type.h
64
src/Type.h
|
@ -698,53 +698,53 @@ extern BroType* init_type(Expr* init);
|
|||
// Returns true if argument is an atomic type.
|
||||
bool is_atomic_type(const BroType* t);
|
||||
|
||||
// True if the given type tag corresponds to an integral type.
|
||||
#define IsIntegral(t) (t == TYPE_INT || t == TYPE_COUNT || t == TYPE_COUNTER)
|
||||
|
||||
// True if the given type tag corresponds to an arithmetic type.
|
||||
#define IsArithmetic(t) (IsIntegral(t) || t == TYPE_DOUBLE)
|
||||
|
||||
// True if the given type tag corresponds to a boolean type.
|
||||
#define IsBool(t) (t == TYPE_BOOL)
|
||||
|
||||
// True if the given type tag corresponds to an interval type.
|
||||
#define IsInterval(t) (t == TYPE_INTERVAL)
|
||||
|
||||
// True if the given type tag corresponds to a record type.
|
||||
#define IsRecord(t) (t == TYPE_RECORD || t == TYPE_UNION)
|
||||
|
||||
// True if the given type tag corresponds to a function type.
|
||||
#define IsFunc(t) (t == TYPE_FUNC)
|
||||
|
||||
// True if the given type type is a vector.
|
||||
#define IsVector(t) (t == TYPE_VECTOR)
|
||||
|
||||
// True if the given type type is a string.
|
||||
#define IsString(t) (t == TYPE_STRING)
|
||||
|
||||
// True if the given type tag corresponds to type that can be assigned to.
|
||||
extern int is_assignable(BroType* t);
|
||||
|
||||
// True if the given type tag corresponds to an integral type.
|
||||
inline bool IsIntegral(TypeTag t) { return (t == TYPE_INT || t == TYPE_COUNT || t == TYPE_COUNTER); }
|
||||
|
||||
// True if the given type tag corresponds to an arithmetic type.
|
||||
inline bool IsArithmetic(TypeTag t) { return (IsIntegral(t) || t == TYPE_DOUBLE); }
|
||||
|
||||
// True if the given type tag corresponds to a boolean type.
|
||||
inline bool IsBool(TypeTag t) { return (t == TYPE_BOOL); }
|
||||
|
||||
// True if the given type tag corresponds to an interval type.
|
||||
inline bool IsInterval(TypeTag t) { return (t == TYPE_INTERVAL); }
|
||||
|
||||
// True if the given type tag corresponds to a record type.
|
||||
inline bool IsRecord(TypeTag t) { return (t == TYPE_RECORD || t == TYPE_UNION); }
|
||||
|
||||
// True if the given type tag corresponds to a function type.
|
||||
inline bool IsFunc(TypeTag t) { return (t == TYPE_FUNC); }
|
||||
|
||||
// True if the given type type is a vector.
|
||||
inline bool IsVector(TypeTag t) { return (t == TYPE_VECTOR); }
|
||||
|
||||
// True if the given type type is a string.
|
||||
inline bool IsString(TypeTag t) { return (t == TYPE_STRING); }
|
||||
|
||||
// True if the given type tag corresponds to the error type.
|
||||
#define IsErrorType(t) (t == TYPE_ERROR)
|
||||
inline bool IsErrorType(TypeTag t) { return (t == TYPE_ERROR); }
|
||||
|
||||
// True if both tags are integral types.
|
||||
#define BothIntegral(t1, t2) (IsIntegral(t1) && IsIntegral(t2))
|
||||
inline bool BothIntegral(TypeTag t1, TypeTag t2) { return (IsIntegral(t1) && IsIntegral(t2)); }
|
||||
|
||||
// True if both tags are arithmetic types.
|
||||
#define BothArithmetic(t1, t2) (IsArithmetic(t1) && IsArithmetic(t2))
|
||||
inline bool BothArithmetic(TypeTag t1, TypeTag t2) { return (IsArithmetic(t1) && IsArithmetic(t2)); }
|
||||
|
||||
// True if either tags is an arithmetic type.
|
||||
#define EitherArithmetic(t1, t2) (IsArithmetic(t1) || IsArithmetic(t2))
|
||||
inline bool EitherArithmetic(TypeTag t1, TypeTag t2) { return (IsArithmetic(t1) || IsArithmetic(t2)); }
|
||||
|
||||
// True if both tags are boolean types.
|
||||
#define BothBool(t1, t2) (IsBool(t1) && IsBool(t2))
|
||||
inline bool BothBool(TypeTag t1, TypeTag t2) { return (IsBool(t1) && IsBool(t2)); }
|
||||
|
||||
// True if both tags are interval types.
|
||||
#define BothInterval(t1, t2) (IsInterval(t1) && IsInterval(t2))
|
||||
inline bool BothInterval(TypeTag t1, TypeTag t2) { return (IsInterval(t1) && IsInterval(t2)); }
|
||||
|
||||
// True if both tags are string types.
|
||||
#define BothString(t1, t2) (IsString(t1) && IsString(t2))
|
||||
inline bool BothString(TypeTag t1, TypeTag t2) { return (IsString(t1) && IsString(t2)); }
|
||||
|
||||
// True if either tag is the error type.
|
||||
#define EitherError(t1, t2) (IsErrorType(t1) || IsErrorType(t2))
|
||||
inline bool EitherError(TypeTag t1, TypeTag t2) { return (IsErrorType(t1) || IsErrorType(t2)); }
|
||||
|
|
156
src/Val.cc
156
src/Val.cc
|
@ -27,20 +27,7 @@
|
|||
|
||||
#include "broker/Data.h"
|
||||
|
||||
#include "3rdparty/json.hpp"
|
||||
#include "3rdparty/tsl-ordered-map/ordered_map.h"
|
||||
|
||||
|
||||
// Define a class for use with the json library that orders the keys in the same order that
|
||||
// they were inserted. By default, the json library orders them alphabetically and we don't
|
||||
// want it like that.
|
||||
template<class Key, class T, class Ignore, class Allocator,
|
||||
class Hash = std::hash<Key>, class KeyEqual = std::equal_to<Key>,
|
||||
class AllocatorPair = typename std::allocator_traits<Allocator>::template rebind_alloc<std::pair<Key, T>>,
|
||||
class ValueTypeContainer = std::vector<std::pair<Key, T>, AllocatorPair>>
|
||||
using ordered_map = tsl::ordered_map<Key, T, Hash, KeyEqual, AllocatorPair, ValueTypeContainer>;
|
||||
|
||||
using ZeekJson = nlohmann::basic_json<ordered_map>;
|
||||
#include "threading/formatters/JSON.h"
|
||||
|
||||
Val::Val(Func* f)
|
||||
{
|
||||
|
@ -433,46 +420,56 @@ TableVal* Val::GetRecordFields()
|
|||
return rt->GetRecordFieldsVal(rv);
|
||||
}
|
||||
|
||||
// This is a static method in this file to avoid including json.hpp in Val.h since it's huge.
|
||||
static ZeekJson BuildJSON(Val* val, bool only_loggable=false, RE_Matcher* re=nullptr)
|
||||
// This is a static method in this file to avoid including rapidjson's headers in Val.h because they're huge.
|
||||
static void BuildJSON(threading::formatter::JSON::NullDoubleWriter& writer, Val* val, bool only_loggable=false, RE_Matcher* re=nullptr, const string& key="")
|
||||
{
|
||||
// If the value wasn't set, return a nullptr. This will get turned into a 'null' in the json output.
|
||||
if ( ! val )
|
||||
return nullptr;
|
||||
if ( !key.empty() )
|
||||
writer.Key(key);
|
||||
|
||||
ZeekJson j;
|
||||
// If the value wasn't set, write a null into the stream and return.
|
||||
if ( ! val )
|
||||
{
|
||||
writer.Null();
|
||||
return;
|
||||
}
|
||||
|
||||
rapidjson::Value j;
|
||||
BroType* type = val->Type();
|
||||
switch ( type->Tag() )
|
||||
{
|
||||
case TYPE_BOOL:
|
||||
j = val->AsBool();
|
||||
writer.Bool(val->AsBool());
|
||||
break;
|
||||
|
||||
case TYPE_INT:
|
||||
j = val->AsInt();
|
||||
writer.Int64(val->AsInt());
|
||||
break;
|
||||
|
||||
case TYPE_COUNT:
|
||||
j = val->AsCount();
|
||||
writer.Uint64(val->AsCount());
|
||||
break;
|
||||
|
||||
case TYPE_COUNTER:
|
||||
j = val->AsCounter();
|
||||
writer.Uint64(val->AsCounter());
|
||||
break;
|
||||
|
||||
case TYPE_TIME:
|
||||
j = val->AsTime();
|
||||
writer.Double(val->AsTime());
|
||||
break;
|
||||
|
||||
case TYPE_DOUBLE:
|
||||
j = val->AsDouble();
|
||||
writer.Double(val->AsDouble());
|
||||
break;
|
||||
|
||||
case TYPE_PORT:
|
||||
{
|
||||
auto* pval = val->AsPortVal();
|
||||
j.emplace("port", pval->Port());
|
||||
j.emplace("proto", pval->Protocol());
|
||||
writer.StartObject();
|
||||
writer.Key("port");
|
||||
writer.Int64(pval->Port());
|
||||
writer.Key("proto");
|
||||
writer.String(pval->Protocol());
|
||||
writer.EndObject();
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -484,7 +481,7 @@ static ZeekJson BuildJSON(Val* val, bool only_loggable=false, RE_Matcher* re=nul
|
|||
ODesc d;
|
||||
d.SetStyle(RAW_STYLE);
|
||||
val->Describe(&d);
|
||||
j = string(reinterpret_cast<const char*>(d.Bytes()), d.Len());
|
||||
writer.String(reinterpret_cast<const char*>(d.Bytes()), d.Len());
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -496,7 +493,7 @@ static ZeekJson BuildJSON(Val* val, bool only_loggable=false, RE_Matcher* re=nul
|
|||
ODesc d;
|
||||
d.SetStyle(RAW_STYLE);
|
||||
val->Describe(&d);
|
||||
j = json_escape_utf8(string(reinterpret_cast<const char*>(d.Bytes()), d.Len()));
|
||||
writer.String(json_escape_utf8(string(reinterpret_cast<const char*>(d.Bytes()), d.Len())));
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -506,9 +503,9 @@ static ZeekJson BuildJSON(Val* val, bool only_loggable=false, RE_Matcher* re=nul
|
|||
auto* tval = val->AsTableVal();
|
||||
|
||||
if ( tval->Type()->IsSet() )
|
||||
j = ZeekJson::array();
|
||||
writer.StartArray();
|
||||
else
|
||||
j = ZeekJson::object();
|
||||
writer.StartObject();
|
||||
|
||||
HashKey* k;
|
||||
TableEntryVal* entry;
|
||||
|
@ -524,102 +521,125 @@ static ZeekJson BuildJSON(Val* val, bool only_loggable=false, RE_Matcher* re=nul
|
|||
else
|
||||
entry_key = lv->Ref();
|
||||
|
||||
ZeekJson key_json = BuildJSON(entry_key, only_loggable, re);
|
||||
|
||||
if ( tval->Type()->IsSet() )
|
||||
j.emplace_back(std::move(key_json));
|
||||
BuildJSON(writer, entry_key, only_loggable, re);
|
||||
else
|
||||
{
|
||||
Val* entry_value = entry->Value();
|
||||
rapidjson::StringBuffer buffer;
|
||||
threading::formatter::JSON::NullDoubleWriter key_writer(buffer);
|
||||
BuildJSON(key_writer, entry_key, only_loggable, re);
|
||||
string key_str = buffer.GetString();
|
||||
|
||||
string key_string;
|
||||
if ( key_json.is_string() )
|
||||
key_string = key_json;
|
||||
else
|
||||
key_string = key_json.dump();
|
||||
if ( key_str.length() >= 2 &&
|
||||
key_str[0] == '"' &&
|
||||
key_str[key_str.length() - 1] == '"' )
|
||||
// Strip quotes.
|
||||
key_str = key_str.substr(1, key_str.length() - 2);
|
||||
|
||||
j.emplace(key_string, BuildJSON(entry_value, only_loggable, re));
|
||||
BuildJSON(writer, entry->Value(), only_loggable, re, key_str);
|
||||
}
|
||||
|
||||
Unref(entry_key);
|
||||
Unref(lv);
|
||||
}
|
||||
|
||||
if ( tval->Type()->IsSet() )
|
||||
writer.EndArray();
|
||||
else
|
||||
writer.EndObject();
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_RECORD:
|
||||
{
|
||||
j = ZeekJson::object();
|
||||
writer.StartObject();
|
||||
|
||||
auto* rval = val->AsRecordVal();
|
||||
auto rt = rval->Type()->AsRecordType();
|
||||
|
||||
for ( auto i = 0; i < rt->NumFields(); ++i )
|
||||
{
|
||||
auto field_name = rt->FieldName(i);
|
||||
std::string key_string;
|
||||
|
||||
if ( re && re->MatchAnywhere(field_name) != 0 )
|
||||
{
|
||||
StringVal blank("");
|
||||
StringVal fn_val(field_name);
|
||||
auto key_val = fn_val.Substitute(re, &blank, 0)->AsStringVal();
|
||||
key_string = key_val->ToStdString();
|
||||
Unref(key_val);
|
||||
}
|
||||
else
|
||||
key_string = field_name;
|
||||
|
||||
Val* value = rval->LookupWithDefault(i);
|
||||
|
||||
if ( value && ( ! only_loggable || rt->FieldHasAttr(i, ATTR_LOG) ) )
|
||||
j.emplace(key_string, BuildJSON(value, only_loggable, re));
|
||||
{
|
||||
string key_str;
|
||||
auto field_name = rt->FieldName(i);
|
||||
|
||||
if ( re && re->MatchAnywhere(field_name) != 0 )
|
||||
{
|
||||
auto blank = make_intrusive<StringVal>("");
|
||||
auto fn_val = make_intrusive<StringVal>(field_name);
|
||||
auto key_val = fn_val->Substitute(re, blank.get(), 0)->AsStringVal();
|
||||
key_str = key_val->ToStdString();
|
||||
Unref(key_val);
|
||||
}
|
||||
else
|
||||
key_str = field_name;
|
||||
|
||||
BuildJSON(writer, value, only_loggable, re, key_str);
|
||||
}
|
||||
|
||||
Unref(value);
|
||||
}
|
||||
|
||||
writer.EndObject();
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_LIST:
|
||||
{
|
||||
j = ZeekJson::array();
|
||||
writer.StartArray();
|
||||
|
||||
auto* lval = val->AsListVal();
|
||||
size_t size = lval->Length();
|
||||
for (size_t i = 0; i < size; i++)
|
||||
j.push_back(BuildJSON(lval->Index(i), only_loggable, re));
|
||||
BuildJSON(writer, lval->Index(i), only_loggable, re);
|
||||
|
||||
writer.EndArray();
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_VECTOR:
|
||||
{
|
||||
j = ZeekJson::array();
|
||||
writer.StartArray();
|
||||
|
||||
auto* vval = val->AsVectorVal();
|
||||
size_t size = vval->SizeVal()->AsCount();
|
||||
for (size_t i = 0; i < size; i++)
|
||||
j.push_back(BuildJSON(vval->Lookup(i), only_loggable, re));
|
||||
BuildJSON(writer, vval->Lookup(i), only_loggable, re);
|
||||
|
||||
writer.EndArray();
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_OPAQUE:
|
||||
{
|
||||
writer.StartObject();
|
||||
|
||||
writer.Key("opaque_type");
|
||||
auto* oval = val->AsOpaqueVal();
|
||||
j = { { "opaque_type", OpaqueMgr::mgr()->TypeID(oval) } };
|
||||
writer.String(OpaqueMgr::mgr()->TypeID(oval));
|
||||
|
||||
writer.EndObject();
|
||||
break;
|
||||
}
|
||||
|
||||
default: break;
|
||||
default:
|
||||
writer.Null();
|
||||
break;
|
||||
}
|
||||
|
||||
return j;
|
||||
}
|
||||
|
||||
StringVal* Val::ToJSON(bool only_loggable, RE_Matcher* re)
|
||||
{
|
||||
ZeekJson j = BuildJSON(this, only_loggable, re);
|
||||
return new StringVal(j.dump());
|
||||
rapidjson::StringBuffer buffer;
|
||||
threading::formatter::JSON::NullDoubleWriter writer(buffer);
|
||||
|
||||
BuildJSON(writer, this, only_loggable, re, "");
|
||||
|
||||
return new StringVal(buffer.GetString());
|
||||
}
|
||||
|
||||
IntervalVal::IntervalVal(double quantity, double units) :
|
||||
|
|
|
@ -12,9 +12,18 @@
|
|||
#include <stdint.h>
|
||||
|
||||
#include "JSON.h"
|
||||
#include "3rdparty/rapidjson/include/rapidjson/internal/ieee754.h"
|
||||
|
||||
using namespace threading::formatter;
|
||||
|
||||
bool JSON::NullDoubleWriter::Double(double d)
|
||||
{
|
||||
if ( rapidjson::internal::Double(d).IsNanOrInf() )
|
||||
return rapidjson::Writer<rapidjson::StringBuffer>::Null();
|
||||
|
||||
return rapidjson::Writer<rapidjson::StringBuffer>::Double(d);
|
||||
}
|
||||
|
||||
JSON::JSON(MsgThread* t, TimeFormat tf) : Formatter(t), surrounding_braces(true)
|
||||
{
|
||||
timestamps = tf;
|
||||
|
@ -27,21 +36,19 @@ JSON::~JSON()
|
|||
bool JSON::Describe(ODesc* desc, int num_fields, const Field* const * fields,
|
||||
Value** vals) const
|
||||
{
|
||||
ZeekJson j = ZeekJson::object();
|
||||
rapidjson::StringBuffer buffer;
|
||||
NullDoubleWriter writer(buffer);
|
||||
|
||||
writer.StartObject();
|
||||
|
||||
for ( int i = 0; i < num_fields; i++ )
|
||||
{
|
||||
if ( vals[i]->present )
|
||||
{
|
||||
ZeekJson new_entry = BuildJSON(vals[i]);
|
||||
if ( new_entry.is_null() )
|
||||
return false;
|
||||
|
||||
j.emplace(fields[i]->name, new_entry);
|
||||
}
|
||||
BuildJSON(writer, vals[i], fields[i]->name);
|
||||
}
|
||||
|
||||
desc->Add(j.dump());
|
||||
writer.EndObject();
|
||||
desc->Add(buffer.GetString());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -54,14 +61,18 @@ bool JSON::Describe(ODesc* desc, Value* val, const string& name) const
|
|||
return false;
|
||||
}
|
||||
|
||||
if ( ! val->present )
|
||||
if ( ! val->present || name.empty() )
|
||||
return true;
|
||||
|
||||
ZeekJson j = BuildJSON(val, name);
|
||||
if ( j.is_null() )
|
||||
return false;
|
||||
rapidjson::Document doc;
|
||||
rapidjson::StringBuffer buffer;
|
||||
NullDoubleWriter writer(buffer);
|
||||
|
||||
desc->Add(j.dump());
|
||||
writer.StartObject();
|
||||
BuildJSON(writer, val, name);
|
||||
writer.EndObject();
|
||||
|
||||
desc->Add(buffer.GetString());
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -71,43 +82,47 @@ threading::Value* JSON::ParseValue(const string& s, const string& name, TypeTag
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
ZeekJson JSON::BuildJSON(Value* val, const string& name) const
|
||||
void JSON::BuildJSON(NullDoubleWriter& writer, Value* val, const string& name) const
|
||||
{
|
||||
// If the value wasn't set, return a nullptr. This will get turned into a 'null' in the json output.
|
||||
if ( ! val->present )
|
||||
return nullptr;
|
||||
{
|
||||
writer.Null();
|
||||
return;
|
||||
}
|
||||
|
||||
if ( ! name.empty() )
|
||||
writer.Key(name);
|
||||
|
||||
ZeekJson j;
|
||||
switch ( val->type )
|
||||
{
|
||||
case TYPE_BOOL:
|
||||
j = val->val.int_val != 0;
|
||||
writer.Bool(val->val.int_val != 0);
|
||||
break;
|
||||
|
||||
case TYPE_INT:
|
||||
j = val->val.int_val;
|
||||
writer.Int64(val->val.int_val);
|
||||
break;
|
||||
|
||||
case TYPE_COUNT:
|
||||
case TYPE_COUNTER:
|
||||
j = val->val.uint_val;
|
||||
writer.Uint64(val->val.uint_val);
|
||||
break;
|
||||
|
||||
case TYPE_PORT:
|
||||
j = val->val.port_val.port;
|
||||
writer.Uint64(val->val.port_val.port);
|
||||
break;
|
||||
|
||||
case TYPE_SUBNET:
|
||||
j = Formatter::Render(val->val.subnet_val);
|
||||
writer.String(Formatter::Render(val->val.subnet_val));
|
||||
break;
|
||||
|
||||
case TYPE_ADDR:
|
||||
j = Formatter::Render(val->val.addr_val);
|
||||
writer.String(Formatter::Render(val->val.addr_val));
|
||||
break;
|
||||
|
||||
case TYPE_DOUBLE:
|
||||
case TYPE_INTERVAL:
|
||||
j = val->val.double_val;
|
||||
writer.Double(val->val.double_val);
|
||||
break;
|
||||
|
||||
case TYPE_TIME:
|
||||
|
@ -125,7 +140,7 @@ ZeekJson JSON::BuildJSON(Value* val, const string& name) const
|
|||
GetThread()->Error(GetThread()->Fmt("json formatter: failure getting time: (%lf)", val->val.double_val));
|
||||
// This was a failure, doesn't really matter what gets put here
|
||||
// but it should probably stand out...
|
||||
j = "2000-01-01T00:00:00.000000";
|
||||
writer.String("2000-01-01T00:00:00.000000");
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -136,17 +151,17 @@ ZeekJson JSON::BuildJSON(Value* val, const string& name) const
|
|||
frac += 1;
|
||||
|
||||
snprintf(buffer2, sizeof(buffer2), "%s.%06.0fZ", buffer, fabs(frac) * 1000000);
|
||||
j = buffer2;
|
||||
writer.String(buffer2, strlen(buffer2));
|
||||
}
|
||||
}
|
||||
|
||||
else if ( timestamps == TS_EPOCH )
|
||||
j = val->val.double_val;
|
||||
writer.Double(val->val.double_val);
|
||||
|
||||
else if ( timestamps == TS_MILLIS )
|
||||
{
|
||||
// ElasticSearch uses milliseconds for timestamps
|
||||
j = (uint64_t) (val->val.double_val * 1000);
|
||||
writer.Uint64((uint64_t) (val->val.double_val * 1000));
|
||||
}
|
||||
|
||||
break;
|
||||
|
@ -157,36 +172,34 @@ ZeekJson JSON::BuildJSON(Value* val, const string& name) const
|
|||
case TYPE_FILE:
|
||||
case TYPE_FUNC:
|
||||
{
|
||||
j = json_escape_utf8(string(val->val.string_val.data, val->val.string_val.length));
|
||||
writer.String(json_escape_utf8(string(val->val.string_val.data, val->val.string_val.length)));
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_TABLE:
|
||||
{
|
||||
j = ZeekJson::array();
|
||||
writer.StartArray();
|
||||
|
||||
for ( int idx = 0; idx < val->val.set_val.size; idx++ )
|
||||
j.push_back(BuildJSON(val->val.set_val.vals[idx]));
|
||||
BuildJSON(writer, val->val.set_val.vals[idx]);
|
||||
|
||||
writer.EndArray();
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_VECTOR:
|
||||
{
|
||||
j = ZeekJson::array();
|
||||
writer.StartArray();
|
||||
|
||||
for ( int idx = 0; idx < val->val.vector_val.size; idx++ )
|
||||
j.push_back(BuildJSON(val->val.vector_val.vals[idx]));
|
||||
BuildJSON(writer, val->val.vector_val.vals[idx]);
|
||||
|
||||
writer.EndArray();
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
reporter->Warning("Unhandled type in JSON::BuildJSON");
|
||||
break;
|
||||
}
|
||||
|
||||
if ( ! name.empty() && ! j.is_null() )
|
||||
return { { name, j } };
|
||||
|
||||
return j;
|
||||
}
|
||||
|
|
|
@ -2,24 +2,14 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "../Formatter.h"
|
||||
#include "3rdparty/json.hpp"
|
||||
#include "3rdparty/tsl-ordered-map/ordered_map.h"
|
||||
#define RAPIDJSON_HAS_STDSTRING 1
|
||||
#include "3rdparty/rapidjson/include/rapidjson/document.h"
|
||||
#include "3rdparty/rapidjson/include/rapidjson/writer.h"
|
||||
|
||||
#include "../Formatter.h"
|
||||
|
||||
namespace threading { namespace formatter {
|
||||
|
||||
// Define a class for use with the json library that orders the keys in the same order that
|
||||
// they were inserted. By default, the json library orders them alphabetically and we don't
|
||||
// want it like that.
|
||||
template<class Key, class T, class Ignore, class Allocator,
|
||||
class Hash = std::hash<Key>, class KeyEqual = std::equal_to<Key>,
|
||||
class AllocatorPair = typename std::allocator_traits<Allocator>::template rebind_alloc<std::pair<Key, T>>,
|
||||
class ValueTypeContainer = std::vector<std::pair<Key, T>, AllocatorPair>>
|
||||
using ordered_map = tsl::ordered_map<Key, T, Hash, KeyEqual, AllocatorPair, ValueTypeContainer>;
|
||||
|
||||
using ZeekJson = nlohmann::basic_json<ordered_map>;
|
||||
|
||||
/**
|
||||
* A thread-safe class for converting values into a JSON representation
|
||||
* and vice versa.
|
||||
|
@ -40,9 +30,14 @@ public:
|
|||
threading::Value** vals) const override;
|
||||
threading::Value* ParseValue(const string& s, const string& name, TypeTag type, TypeTag subtype = TYPE_ERROR) const override;
|
||||
|
||||
private:
|
||||
class NullDoubleWriter : public rapidjson::Writer<rapidjson::StringBuffer> {
|
||||
public:
|
||||
NullDoubleWriter(rapidjson::StringBuffer& stream) : rapidjson::Writer<rapidjson::StringBuffer>(stream) {}
|
||||
bool Double(double d);
|
||||
};
|
||||
|
||||
ZeekJson BuildJSON(Value* val, const string& name = "") const;
|
||||
private:
|
||||
void BuildJSON(NullDoubleWriter& writer, Value* val, const string& name = "") const;
|
||||
|
||||
TimeFormat timestamps;
|
||||
bool surrounding_braces;
|
||||
|
|
100
src/util.cc
100
src/util.cc
|
@ -2190,54 +2190,112 @@ TEST_CASE("util json_escape_utf8")
|
|||
CHECK(json_escape_utf8("string") == "string");
|
||||
CHECK(json_escape_utf8("string\n") == "string\n");
|
||||
CHECK(json_escape_utf8("string\x82") == "string\\x82");
|
||||
CHECK(json_escape_utf8("\x07\xd4\xb7o") == "\\x07Էo");
|
||||
|
||||
// These strings are duplicated from the scripts.base.frameworks.logging.ascii-json-utf8 btest
|
||||
|
||||
// Valid ASCII and valid ASCII control characters
|
||||
CHECK(json_escape_utf8("a") == "a");
|
||||
CHECK(json_escape_utf8("\b\f\n\r\t\x00\x15") == "\b\f\n\r\t\x00\x15");
|
||||
|
||||
// Table 3-7 in https://www.unicode.org/versions/Unicode12.0.0/ch03.pdf describes what is
|
||||
// valid and invalid for the tests below
|
||||
|
||||
// Valid 2 Octet Sequence
|
||||
CHECK(json_escape_utf8("\xc3\xb1") == "\xc3\xb1");
|
||||
|
||||
// Invalid 2 Octet Sequence
|
||||
CHECK(json_escape_utf8("\xc3\x28") == "\\xc3(");
|
||||
CHECK(json_escape_utf8("\xc0\x81") == "\\xc0\\x81");
|
||||
CHECK(json_escape_utf8("\xc1\x81") == "\\xc1\\x81");
|
||||
CHECK(json_escape_utf8("\xc2\xcf") == "\\xc2\\xcf");
|
||||
|
||||
// Invalid Sequence Identifier
|
||||
CHECK(json_escape_utf8("\xa0\xa1") == "\\xa0\\xa1");
|
||||
|
||||
// Valid 3 Octet Sequence
|
||||
CHECK(json_escape_utf8("\xe2\x82\xa1") == "\xe2\x82\xa1");
|
||||
CHECK(json_escape_utf8("\xe0\xa3\xa1") == "\xe0\xa3\xa1");
|
||||
|
||||
// Invalid 3 Octet Sequence (in 2nd Octet)
|
||||
CHECK(json_escape_utf8("\xe0\x80\xa1") == "\\xe0\\x80\\xa1");
|
||||
CHECK(json_escape_utf8("\xe2\x28\xa1") == "\\xe2(\\xa1");
|
||||
CHECK(json_escape_utf8("\xed\xa0\xa1") == "\\xed\\xa0\\xa1");
|
||||
|
||||
// Invalid 3 Octet Sequence (in 3rd Octet)
|
||||
CHECK(json_escape_utf8("\xe2\x82\x28") == "\\xe2\\x82(");
|
||||
|
||||
// Valid 4 Octet Sequence
|
||||
CHECK(json_escape_utf8("\xf0\x90\x8c\xbc") == "\xf0\x90\x8c\xbc");
|
||||
CHECK(json_escape_utf8("\xf1\x80\x8c\xbc") == "\xf1\x80\x8c\xbc");
|
||||
CHECK(json_escape_utf8("\xf4\x80\x8c\xbc") == "\xf4\x80\x8c\xbc");
|
||||
|
||||
// Invalid 4 Octet Sequence (in 2nd Octet)
|
||||
CHECK(json_escape_utf8("\xf0\x80\x8c\xbc") == "\\xf0\\x80\\x8c\\xbc");
|
||||
CHECK(json_escape_utf8("\xf2\x28\x8c\xbc") == "\\xf2(\\x8c\\xbc");
|
||||
CHECK(json_escape_utf8("\xf4\x90\x8c\xbc") == "\\xf4\\x90\\x8c\\xbc");
|
||||
|
||||
// Invalid 4 Octet Sequence (in 3rd Octet)
|
||||
CHECK(json_escape_utf8("\xf0\x90\x28\xbc") == "\\xf0\\x90(\\xbc");
|
||||
|
||||
// Invalid 4 Octet Sequence (in 4th Octet)
|
||||
CHECK(json_escape_utf8("\xf0\x28\x8c\x28") == "\\xf0(\\x8c(");
|
||||
|
||||
// Invalid 4 Octet Sequence (too short)
|
||||
CHECK(json_escape_utf8("\xf4\x80\x8c") == "\\xf4\\x80\\x8c");
|
||||
CHECK(json_escape_utf8("\xf0") == "\\xf0");
|
||||
}
|
||||
|
||||
string json_escape_utf8(const string& val)
|
||||
{
|
||||
string result;
|
||||
result.reserve(val.length());
|
||||
|
||||
auto val_data = reinterpret_cast<const unsigned char*>(val.c_str());
|
||||
auto val_size = val.length();
|
||||
|
||||
// Reserve at least the size of the existing string to avoid resizing the string in the best-case
|
||||
// scenario where we don't have any multi-byte characters.
|
||||
string result;
|
||||
result.reserve(val_size);
|
||||
|
||||
size_t idx;
|
||||
for ( idx = 0; idx < val.length(); )
|
||||
for ( idx = 0; idx < val_size; )
|
||||
{
|
||||
// Normal ASCII characters plus a few of the control characters can be inserted directly. The rest of
|
||||
// the control characters should be escaped as regular bytes.
|
||||
if ( ( val[idx] >= 32 && val[idx] <= 127 ) ||
|
||||
val[idx] == '\b' || val[idx] == '\f' || val[idx] == '\n' || val[idx] == '\r' || val[idx] == '\t' )
|
||||
const char ch = val[idx];
|
||||
|
||||
// Normal ASCII characters plus a few of the control characters can be inserted directly. The
|
||||
// rest of the control characters should be escaped as regular bytes.
|
||||
if ( ( ch >= 32 && ch <= 127 ) ||
|
||||
ch == '\b' || ch == '\f' || ch == '\n' || ch == '\r' || ch == '\t' )
|
||||
{
|
||||
result.push_back(val[idx]);
|
||||
result.push_back(ch);
|
||||
++idx;
|
||||
continue;
|
||||
}
|
||||
else if ( val[idx] >= 0 && val[idx] < 32 )
|
||||
else if ( ch >= 0 && ch < 32 )
|
||||
{
|
||||
result.append(json_escape_byte(val[idx]));
|
||||
result.append(json_escape_byte(ch));
|
||||
++idx;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Find out how long the next character should be.
|
||||
unsigned int char_size = getNumBytesForUTF8(val[idx]);
|
||||
unsigned int char_size = getNumBytesForUTF8(ch);
|
||||
|
||||
// If it says that it's a single character or it's not an invalid string UTF8 sequence, insert the one
|
||||
// escaped byte into the string, step forward one, and go to the next character.
|
||||
if ( char_size == 0 || idx+char_size > val.length() || isLegalUTF8Sequence(val_data+idx, val_data+idx+char_size) == 0 )
|
||||
// If it says that it's a single character or it's not an valid string UTF8 sequence, insert
|
||||
// the one escaped byte into the string, step forward one, and go to the next character.
|
||||
if ( char_size == 0 || idx+char_size > val_size || isLegalUTF8Sequence(val_data+idx, val_data+idx+char_size) == 0 )
|
||||
{
|
||||
result.append(json_escape_byte(val[idx]));
|
||||
result.append(json_escape_byte(ch));
|
||||
++idx;
|
||||
continue;
|
||||
}
|
||||
|
||||
for ( size_t step = 0; step < char_size; step++, idx++ )
|
||||
result.push_back(val[idx]);
|
||||
result.append(val, idx, char_size);
|
||||
idx += char_size;
|
||||
}
|
||||
|
||||
// Insert any of the remaining bytes into the string as escaped bytes
|
||||
if ( idx != val.length() )
|
||||
for ( ; idx < val.length(); ++idx )
|
||||
result.append(json_escape_byte(val[idx]));
|
||||
for ( ; idx < val_size; ++idx )
|
||||
result.append(json_escape_byte(val[idx]));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -118,7 +118,7 @@ std::string extract_ip_and_len(const std::string& i, int* len);
|
|||
|
||||
inline void bytetohex(unsigned char byte, char* hex_out)
|
||||
{
|
||||
static const char hex_chars[] = "0123456789abcdef";
|
||||
static constexpr char hex_chars[] = "0123456789abcdef";
|
||||
hex_out[0] = hex_chars[(byte & 0xf0) >> 4];
|
||||
hex_out[1] = hex_chars[byte & 0x0f];
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue