Add proper JSON serialization via C++, deprecate json.zeek

This commit is contained in:
Tim Wojtulewicz 2019-07-02 12:22:13 -07:00
parent 385de9b0e7
commit 33a7927704
5 changed files with 244 additions and 119 deletions

View file

@ -3,7 +3,6 @@
@load base/frameworks/openflow
@load base/utils/active-http
@load base/utils/exec
@load base/utils/json
module OpenFlow;

View file

@ -1,109 +1,2 @@
##! Functions to assist with generating JSON data from Zeek data scructures.
# We might want to implement this in core somtime, this looks... hacky at best.
@load base/utils/strings
## A function to convert arbitrary Zeek data into a JSON string.
##
## v: The value to convert to JSON. Typically a record.
##
## only_loggable: If the v value is a record this will only cause
## fields with the &log attribute to be included in the JSON.
##
## returns: a JSON formatted string.
function to_json(v: any, only_loggable: bool &default=F, field_escape_pattern: pattern &default=/^_/): string
{
local tn = type_name(v);
switch ( tn )
{
case "type":
return "";
case "string":
return cat("\"", gsub(gsub(clean(v), /\\/, "\\\\"), /\"/, "\\\""), "\"");
case "port":
return cat(port_to_count(to_port(cat(v))));
case "enum":
fallthrough;
case "interval":
fallthrough;
case "addr":
fallthrough;
case "subnet":
return cat("\"", v, "\"");
case "int":
fallthrough;
case "count":
fallthrough;
case "time":
return cat(v);
case "double":
return fmt("%.16g", v);
case "bool":
local bval: bool = v;
return bval ? "true" : "false";
default:
break;
}
if ( /^record/ in tn )
{
local rec_parts: string_vec = vector();
local ft = record_fields(v);
for ( field, field_desc in ft )
{
# replace the escape pattern in the field.
if( field_escape_pattern in field )
field = cat(sub(field, field_escape_pattern, ""));
if ( field_desc?$value && (!only_loggable || field_desc$log) )
{
local onepart = cat("\"", field, "\": ", to_json(field_desc$value, only_loggable));
rec_parts += onepart;
}
}
return cat("{", join_string_vec(rec_parts, ", "), "}");
}
# None of the following are supported.
else if ( /^set/ in tn )
{
local set_parts: string_vec = vector();
local sa: set[bool] = v;
for ( sv in sa )
{
set_parts += to_json(sv, only_loggable);
}
return cat("[", join_string_vec(set_parts, ", "), "]");
}
else if ( /^table/ in tn )
{
local tab_parts: vector of string = vector();
local ta: table[bool] of any = v;
for ( ti, tv in ta )
{
local ts = to_json(ti);
local if_quotes = (ts[0] == "\"") ? "" : "\"";
tab_parts += cat(if_quotes, ts, if_quotes, ": ", to_json(tv, only_loggable));
}
return cat("{", join_string_vec(tab_parts, ", "), "}");
}
else if ( /^vector/ in tn )
{
local vec_parts: string_vec = vector();
local va: vector of any = v;
for ( vi in va )
{
vec_parts += to_json(va[vi], only_loggable);
}
return cat("[", join_string_vec(vec_parts, ", "), "]");
}
return "\"\"";
}
## This file is deprecated in favor of to_json in zeek.bif
@deprecated="Remove in 3.1. to_json is now always available as a built-in function."

View file

@ -27,6 +27,16 @@
#include "broker/Data.h"
#include "3rdparty/json.hpp"
#include "3rdparty/fifo_map.hpp"
// Define a class for use with the json library that orders the keys in the same order that
// they were inserted. By default, the json library orders them alphabetically and we don't
// want it like that.
template<class K, class V, class compare, class A>
using json_fifo_map = nlohmann::fifo_map<K, V, nlohmann::fifo_map_compare<K>, A>;
using ZeekJson = nlohmann::basic_json<json_fifo_map>;
Val::Val(Func* f)
{
val.func_val = f;
@ -444,7 +454,208 @@ TableVal* Val::GetRecordFields()
}
return fields;
}
// This is a static method in this file to avoid including json.hpp in Val.h since it's huge.
static ZeekJson BuildJSON(Val* val, bool only_loggable=false, RE_Matcher* re=new RE_Matcher("^_"))
{
ZeekJson j;
BroType* type = val->Type();
switch ( type->Tag() )
{
case TYPE_BOOL:
j = val->AsBool();
break;
case TYPE_INT:
j = val->AsInt();
break;
case TYPE_COUNT:
j = val->AsCount();
break;
case TYPE_COUNTER:
j = val->AsCounter();
break;
case TYPE_TIME:
j = val->AsTime();
break;
case TYPE_DOUBLE:
j = val->AsDouble();
break;
case TYPE_PORT:
{
auto* pval = val->AsPortVal();
j["port"] = pval->Port();
j["proto"] = pval->Protocol();
break;
}
case TYPE_PATTERN:
case TYPE_INTERVAL:
case TYPE_ADDR:
case TYPE_SUBNET:
{
ODesc d;
d.SetStyle(RAW_STYLE);
val->Describe(&d);
auto* bs = new BroString(1, d.TakeBytes(), d.Len());
j = string((char*)bs->Bytes(), bs->Len());
delete bs;
break;
}
case TYPE_FILE:
case TYPE_FUNC:
case TYPE_ENUM:
case TYPE_STRING:
{
ODesc d;
d.SetStyle(RAW_STYLE);
val->Describe(&d);
auto* bs = new BroString(1, d.TakeBytes(), d.Len());
j = json_escape_utf8(string((char*)bs->Bytes(), bs->Len()));
delete bs;
break;
}
case TYPE_TABLE:
{
auto* table = val->AsTable();
auto* tval = val->AsTableVal();
if ( tval->Type()->IsSet() )
j = ZeekJson::array();
else
j = ZeekJson::object();
HashKey* k;
auto c = table->InitForIteration();
while ( table->NextEntry(k, c) )
{
auto lv = tval->RecoverIndex(k);
delete k;
if ( tval->Type()->IsSet() )
{
auto* value = lv->Index(0)->Ref();
j.push_back(BuildJSON(value, only_loggable, re));
Unref(value);
}
else
{
ZeekJson key_json;
Val* entry_value;
if ( lv->Length() == 1 )
{
Val* entry_key = lv->Index(0)->Ref();
entry_value = tval->Lookup(entry_key, true);
key_json = BuildJSON(entry_key, only_loggable, re);
Unref(entry_key);
}
else
{
entry_value = tval->Lookup(lv, true);
key_json = BuildJSON(lv, only_loggable, re);
}
string key_string;
if ( key_json.is_string() )
key_string = key_json;
else
key_string = key_json.dump();
j[key_string] = BuildJSON(entry_value, only_loggable, re);
}
Unref(lv);
}
break;
}
case TYPE_RECORD:
{
j = ZeekJson::object();
auto* rval = val->AsRecordVal();
TableVal* fields = rval->GetRecordFields();
auto* field_indexes = fields->ConvertToPureList();
int num_indexes = field_indexes->Length();
for ( int i = 0; i < num_indexes; ++i )
{
Val* key = field_indexes->Index(i);
auto* key_field = fields->Lookup(key)->AsRecordVal();
auto* key_val = key->AsStringVal();
string key_string;
if ( re->MatchAnywhere(key_val->AsString()) != 0 )
{
key_val = key_val->Substitute(re, new StringVal(""), 0)->AsStringVal();
key_string = key_val->ToStdString();
delete key_val;
}
else
key_string = key_val->ToStdString();
Val* value = key_field->Lookup("value", true);
if ( value && ( ! only_loggable || key_field->Lookup("log")->AsBool() ) )
j[key_string] = BuildJSON(value, only_loggable, re);
}
delete fields;
break;
}
case TYPE_LIST:
{
j = ZeekJson::array();
auto* lval = val->AsListVal();
size_t size = lval->Length();
for (size_t i = 0; i < size; i++)
j.push_back(BuildJSON(lval->Index(i), only_loggable, re));
break;
}
case TYPE_VECTOR:
{
j = ZeekJson::array();
auto* vval = val->AsVectorVal();
size_t size = vval->SizeVal()->AsCount();
for (size_t i = 0; i < size; i++)
j.push_back(BuildJSON(vval->Lookup(i), only_loggable, re));
break;
}
case TYPE_OPAQUE:
{
j = ZeekJson::object();
auto* oval = val->AsOpaqueVal();
j["opaque_type"] = OpaqueMgr::mgr()->TypeID(oval);
break;
}
default: break;
}
return j;
}
StringVal* Val::ToJSON(bool only_loggable, RE_Matcher* re)
{
ZeekJson j = BuildJSON(this, only_loggable, re);
return new StringVal(j.dump());
}
IntervalVal::IntervalVal(double quantity, double units) :
@ -558,6 +769,18 @@ uint32 PortVal::Port() const
return p & ~PORT_SPACE_MASK;
}
string PortVal::Protocol() const
{
if ( IsUDP() )
return "udp";
else if ( IsTCP() )
return "tcp";
else if ( IsICMP() )
return "icmp";
else
return "unknown";
}
int PortVal::IsTCP() const
{
return (val.uint_val & PORT_SPACE_MASK) == TCP_PORT_MASK;
@ -577,14 +800,8 @@ void PortVal::ValDescribe(ODesc* d) const
{
uint32 p = static_cast<uint32>(val.uint_val);
d->Add(p & ~PORT_SPACE_MASK);
if ( IsUDP() )
d->Add("/udp");
else if ( IsTCP() )
d->Add("/tcp");
else if ( IsICMP() )
d->Add("/icmp");
else
d->Add("/unknown");
d->Add("/");
d->Add(Protocol());
}
Val* PortVal::DoClone(CloneState* state)

View file

@ -20,6 +20,7 @@
#include "Notifier.h"
#include "IPAddr.h"
#include "DebugLogger.h"
#include "RE.h"
// We have four different port name spaces: TCP, UDP, ICMP, and UNKNOWN.
// We distinguish between them based on the bits specified in the *_PORT_MASK
@ -34,7 +35,6 @@
class Val;
class Func;
class BroFile;
class RE_Matcher;
class PrefixTable;
class PortVal;
@ -349,6 +349,8 @@ public:
TableVal* GetRecordFields();
StringVal* ToJSON(bool only_loggable=false, RE_Matcher* re=new RE_Matcher("^_"));
protected:
friend class EnumType;
@ -532,6 +534,7 @@ public:
// Returns the port number in host order (not including the mask).
uint32 Port() const;
string Protocol() const;
// Tests for protocol types.
int IsTCP() const;

View file

@ -5039,3 +5039,16 @@ function anonymize_addr%(a: addr, cl: IPAddrAnonymizationClass%): addr
(enum ip_addr_anonymization_class_t) anon_class));
}
%}
## A function to convert arbitrary Zeek data into a JSON string.
##
## v: The value to convert to JSON. Typically a record.
##
## only_loggable: If the v value is a record this will only cause
## fields with the &log attribute to be included in the JSON.
##
## returns: a JSON formatted string.
function to_json%(val: any, only_loggable: bool &default=F, field_escape_pattern: pattern &default=/^_/%): string
%{
return val->ToJSON(only_loggable, field_escape_pattern);
%}