Implement from_json bif

This commit is contained in:
Fupeng Zhao 2023-04-16 14:45:19 +00:00 committed by mAsk°
parent b739bdbbfb
commit 584e68434d
27 changed files with 531 additions and 1 deletions

View file

@ -1093,6 +1093,14 @@ type entropy_test_result: record {
serial_correlation: double; ##< Serial correlation coefficient. serial_correlation: double; ##< Serial correlation coefficient.
}; };
## Return type for from_json BIF.
##
## .. zeek:see:: from_json
type from_json_result: record {
v: any &optional; ##< Parsed value.
valid: bool; ##< True if parsing was successful.
};
# TCP values for :zeek:see:`endpoint` *state* field. # TCP values for :zeek:see:`endpoint` *state* field.
# todo:: these should go into an enum to make them autodoc'able. # todo:: these should go into an enum to make them autodoc'able.
const TCP_INACTIVE = 0; ##< Endpoint is still inactive. const TCP_INACTIVE = 0; ##< Endpoint is still inactive.

View file

@ -6,6 +6,8 @@
#include <netdb.h> #include <netdb.h>
#include <netinet/in.h> #include <netinet/in.h>
#include <rapidjson/document.h>
#include <rapidjson/error/en.h>
#include <sys/param.h> #include <sys/param.h>
#include <sys/types.h> #include <sys/types.h>
#include <unistd.h> #include <unistd.h>
@ -1061,6 +1063,305 @@ StringValPtr StringVal::Replace(RE_Matcher* re, const String& repl, bool do_all)
return make_intrusive<StringVal>(new String(true, result, r - result)); return make_intrusive<StringVal>(new String(true, result, r - result));
} }
static std::variant<ValPtr, std::string> BuildVal(const rapidjson::Value& j, const TypePtr& t)
{
if ( j.IsNull() )
return Val::nil;
switch ( t->Tag() )
{
case TYPE_BOOL:
{
if ( ! j.IsBool() )
goto mismatch_err;
return val_mgr->Bool(j.GetBool());
}
case TYPE_INT:
{
if ( ! j.IsInt64() )
goto mismatch_err;
return val_mgr->Int(j.GetInt64());
}
case TYPE_COUNT:
{
if ( ! j.IsUint64() )
goto mismatch_err;
return val_mgr->Count(j.GetUint64());
}
case TYPE_TIME:
{
if ( ! j.IsNumber() )
goto mismatch_err;
return make_intrusive<TimeVal>(j.GetDouble());
}
case TYPE_DOUBLE:
{
if ( ! j.IsNumber() )
goto mismatch_err;
return make_intrusive<DoubleVal>(j.GetDouble());
}
case TYPE_INTERVAL:
{
if ( ! j.IsNumber() )
goto mismatch_err;
return make_intrusive<IntervalVal>(j.GetDouble());
}
case TYPE_PORT:
{
if ( ! j.IsString() )
goto mismatch_err;
int port = 0;
if ( j.GetStringLength() > 0 && j.GetStringLength() < 10 )
{
char* slash;
errno = 0;
port = strtol(j.GetString(), &slash, 10);
if ( ! errno )
{
++slash;
if ( util::streq(slash, "tcp") )
return val_mgr->Port(port, TRANSPORT_TCP);
else if ( util::streq(slash, "udp") )
return val_mgr->Port(port, TRANSPORT_UDP);
else if ( util::streq(slash, "icmp") )
return val_mgr->Port(port, TRANSPORT_ICMP);
else if ( util::streq(slash, "unknown") )
return val_mgr->Port(port, TRANSPORT_UNKNOWN);
}
}
return "wrong port format, must be /[0-9]{1,5}\\/(tcp|udp|icmp|unknown)/";
}
case TYPE_PATTERN:
{
if ( ! j.IsString() )
goto mismatch_err;
std::string candidate(j.GetString(), j.GetStringLength());
if ( candidate.size() > 2 && candidate.front() == candidate.back() &&
candidate.back() == '/' )
{
// Remove the '/'s
candidate.erase(0, 1);
candidate.erase(candidate.size() - 1);
}
auto re = std::make_unique<RE_Matcher>(candidate.c_str());
if ( ! re->Compile() )
return "error compiling pattern";
return make_intrusive<PatternVal>(re.release());
}
case TYPE_ADDR:
case TYPE_SUBNET:
{
if ( ! j.IsString() )
goto mismatch_err;
int width = 0;
std::string candidate;
if ( t->Tag() == TYPE_ADDR )
candidate = std::string(j.GetString(), j.GetStringLength());
else
{
std::string_view subnet_sv(j.GetString(), j.GetStringLength());
auto pos = subnet_sv.find('/');
if ( pos == subnet_sv.npos )
return util::fmt("Invalid value for subnet: %s", j.GetString());
candidate = std::string(j.GetString(), pos);
errno = 0;
char* end;
width = strtol(subnet_sv.data() + pos + 1, &end, 10);
if ( subnet_sv.data() + pos + 1 == end || errno )
return util::fmt("Invalid value for subnet: %s", j.GetString());
}
if ( candidate.front() == '[' )
candidate.erase(0, 1);
if ( candidate.back() == ']' )
candidate.erase(candidate.size() - 1);
if ( t->Tag() == TYPE_ADDR )
return make_intrusive<AddrVal>(candidate);
else
return make_intrusive<SubNetVal>(candidate.c_str(), width);
}
case TYPE_ENUM:
{
if ( ! j.IsString() )
goto mismatch_err;
auto et = t->AsEnumType();
auto intval = et->Lookup({j.GetString(), j.GetStringLength()});
if ( intval < 0 )
return util::fmt("'%s' is not a valid enum for '%s'.", j.GetString(),
et->GetName().c_str());
return et->GetEnumVal(intval);
}
case TYPE_STRING:
{
if ( ! j.IsString() )
goto mismatch_err;
return make_intrusive<StringVal>(j.GetStringLength(), j.GetString());
}
case TYPE_TABLE:
{
if ( ! j.IsArray() )
goto mismatch_err;
if ( ! t->IsSet() )
goto unsupport_err;
auto tt = t->AsSetType();
auto tl = tt->GetIndices();
auto tv = make_intrusive<TableVal>(IntrusivePtr{NewRef{}, tt});
for ( const auto& item : j.GetArray() )
{
std::variant<ValPtr, std::string> v;
if ( tl->GetTypes().size() == 1 )
v = BuildVal(item, tl->GetPureType());
else
v = BuildVal(item, tl);
if ( ! get_if<ValPtr>(&v) )
return v;
if ( ! std::get<ValPtr>(v) )
continue;
tv->Assign(std::move(std::get<ValPtr>(v)), nullptr);
}
return tv;
}
case TYPE_RECORD:
{
if ( ! j.IsObject() )
goto mismatch_err;
auto rt = t->AsRecordType();
auto rv = make_intrusive<RecordVal>(IntrusivePtr{NewRef{}, rt});
for ( int i = 0; i < rt->NumFields(); ++i )
{
auto td_i = rt->FieldDecl(i);
auto m_it = j.FindMember(td_i->id);
bool has_member = m_it != j.MemberEnd();
bool member_is_null = has_member ? m_it->value.IsNull() : true;
if ( ! has_member || member_is_null )
{
if ( ! td_i->GetAttr(detail::ATTR_OPTIONAL) &&
! td_i->GetAttr(detail::ATTR_DEFAULT) )
return util::fmt("Record '%s' field '%s' is null or missing",
t->GetName().c_str(), td_i->id);
continue;
}
auto v = BuildVal(m_it->value, td_i->type);
if ( ! get_if<ValPtr>(&v) )
return v;
rv->Assign(i, std::move(std::get<ValPtr>(v)));
}
return rv;
}
case TYPE_LIST:
{
if ( ! j.IsArray() )
goto mismatch_err;
auto lt = t->AsTypeList();
if ( j.GetArray().Size() < lt->GetTypes().size() )
return "index type doesn't match";
auto lv = make_intrusive<ListVal>(TYPE_ANY);
for ( size_t i = 0; i < lt->GetTypes().size(); i++ )
{
auto v = BuildVal(j.GetArray()[i], lt->GetTypes()[i]);
if ( ! get_if<ValPtr>(&v) )
return v;
lv->Append(std::move(std::get<ValPtr>(v)));
}
return lv;
}
case TYPE_VECTOR:
{
if ( ! j.IsArray() )
goto mismatch_err;
auto vt = t->AsVectorType();
auto vv = make_intrusive<VectorVal>(IntrusivePtr{NewRef{}, vt});
for ( const auto& item : j.GetArray() )
{
auto v = BuildVal(item, vt->Yield());
if ( ! get_if<ValPtr>(&v) )
return v;
if ( ! std::get<ValPtr>(v) )
continue;
vv->Assign(vv->Size(), std::move(std::get<ValPtr>(v)));
}
return vv;
}
default:
unsupport_err:
return util::fmt("type '%s' unsupport", type_name(t->Tag()));
}
mismatch_err:
return util::fmt("type '%s' mismatch", type_name(t->Tag()));
}
std::variant<ValPtr, std::string> ValFromJSON(std::string_view json_str, const TypePtr& t)
{
rapidjson::Document doc;
rapidjson::ParseResult ok = doc.Parse(json_str.data(), json_str.length());
if ( ! ok )
return util::fmt("JSON parse error: %s Offset: %lu", rapidjson::GetParseError_En(ok.Code()),
ok.Offset());
return BuildVal(doc, t);
}
ValPtr StringVal::DoClone(CloneState* state) ValPtr StringVal::DoClone(CloneState* state)
{ {
// We could likely treat this type as immutable and return a reference // We could likely treat this type as immutable and return a reference

View file

@ -6,6 +6,7 @@
#include <array> #include <array>
#include <list> #include <list>
#include <unordered_map> #include <unordered_map>
#include <variant>
#include <vector> #include <vector>
#include "zeek/IntrusivePtr.h" #include "zeek/IntrusivePtr.h"
@ -1760,4 +1761,9 @@ extern bool can_cast_value_to_type(const Val* v, Type* t);
// specific instance later. // specific instance later.
extern bool can_cast_value_to_type(const Type* s, Type* t); extern bool can_cast_value_to_type(const Type* s, Type* t);
// Parses a JSON string into arbitrary Zeek data using std::variant to simulate functional exception
// handling. Returns a ValPtr if parsing was successful, or a std::string containing an error
// message if an error occurred.
extern std::variant<ValPtr, std::string> ValFromJSON(std::string_view json_str, const TypePtr& t);
} // namespace zeek } // namespace zeek

View file

@ -5576,12 +5576,52 @@ function anonymize_addr%(a: addr, cl: IPAddrAnonymizationClass%): addr
## ##
## returns: a JSON formatted string. ## returns: a JSON formatted string.
## ##
## .. zeek:see:: fmt cat cat_sep string_cat print_raw ## .. zeek:see:: fmt cat cat_sep string_cat print_raw from_json
function to_json%(val: any, only_loggable: bool &default=F, field_escape_pattern: pattern &default=/^_/%): string function to_json%(val: any, only_loggable: bool &default=F, field_escape_pattern: pattern &default=/^_/%): string
%{ %{
return val->ToJSON(only_loggable, field_escape_pattern); return val->ToJSON(only_loggable, field_escape_pattern);
%} %}
## A function to convert a JSON string into arbitrary Zeek data.
##
## json_str: The JSON string.
##
## t: Type of zeek data.
##
## returns: A value of type t.
##
## .. zeek:see:: to_json
function from_json%(json_str: string, t: any%): from_json_result
%{
static auto result_type = zeek::id::find_type<RecordType>("from_json_result");
static auto v_idx = result_type->FieldOffset("v");
static auto valid_idx = result_type->FieldOffset("valid");
auto rval = zeek::make_intrusive<RecordVal>(result_type);
if ( t->GetType()->Tag() != zeek::TYPE_TYPE )
{
rval->Assign(valid_idx, false);
zeek::emit_builtin_error("from_json() requires a type argument");
return rval;
}
auto res = zeek::ValFromJSON(json_str->ToStdStringView(), t->AsType()->AsTypeType()->GetType());
if ( auto val = std::get_if<zeek::ValPtr>(&res) )
{
rval->Assign(v_idx, *val);
rval->Assign(valid_idx, true);
}
else
{
rval->Assign(valid_idx, false);
zeek::emit_builtin_error(std::get<std::string>(res).c_str());
}
return rval;
%}
## Compresses a given path by removing '..'s and the parent directory it ## Compresses a given path by removing '..'s and the parent directory it
## references and also removing dual '/'s and extraneous '/./'s. ## references and also removing dual '/'s and extraneous '/./'s.
## ##

View file

@ -0,0 +1 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.

View file

@ -0,0 +1,6 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
[v={
fe80::/64,
192.168.0.0/16
}, valid=T]
[v=[1, 3, 4], valid=T]

View file

@ -0,0 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
error in <...>/from_json.zeek, line 8: Record 'Foo' field 'hello' is null or missing (from_json({"t":null}, to_any_coerceFoo))

View file

@ -0,0 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
[v=<uninitialized>, valid=F]

View file

@ -0,0 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
error in <...>/from_json.zeek, line 4: from_json() requires a type argument (from_json([], to_any_coerce10))

View file

@ -0,0 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
[v=<uninitialized>, valid=F]

View file

@ -0,0 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
error in <...>/from_json.zeek, line 4: JSON parse error: Missing a closing quotation mark in string. Offset: 5 (from_json({"hel, to_any_coercestring_vec))

View file

@ -0,0 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
[v=<uninitialized>, valid=F]

View file

@ -0,0 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
error in <...>/from_json.zeek, line 5: type 'bool' mismatch (from_json([], to_any_coercebool_t))

View file

@ -0,0 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
[v=<uninitialized>, valid=F]

View file

@ -0,0 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
error in <...>/from_json.zeek, line 4: type 'table' unsupport (from_json([], to_any_coercetable_string_of_string))

View file

@ -0,0 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
[v=<uninitialized>, valid=F]

View file

@ -0,0 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
error in <...>/from_json.zeek, line 5: wrong port format, must be <...>/(tcp|udp|icmp|unknown)/ (from_json("80", to_any_coerceport_t))

View file

@ -0,0 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
[v=<uninitialized>, valid=F]

View file

@ -0,0 +1,3 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
error in <...>/from_json.zeek, line 5: index type doesn't match (from_json([[1, false], [2]], to_any_coerceset_t))
error in <...>/from_json.zeek, line 6: type 'bool' mismatch (from_json([[1, false], [2, 1]], to_any_coerceset_t))

View file

@ -0,0 +1,3 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
[v=<uninitialized>, valid=F]
[v=<uninitialized>, valid=F]

View file

@ -0,0 +1,3 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
error: error compiling pattern /^?(.|\n)*(([[:print:]]{-}[[:alnum:]]foo))/
error in <...>/from_json.zeek, line 5: error compiling pattern (from_json("/([[:print:]]{-}[[:alnum:]]foo)/", to_any_coercepattern_t))

View file

@ -0,0 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
[v=<uninitialized>, valid=F]

View file

@ -0,0 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
error in <...>/from_json.zeek, line 7: 'Yellow' is not a valid enum for 'Color'. (from_json("Yellow", to_any_coerceColor))

View file

@ -0,0 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
[v=<uninitialized>, valid=F]

View file

@ -0,0 +1 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.

View file

@ -0,0 +1,8 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
[v=[hello=world, t=T, f=F, n=<uninitialized>, def=123, i=123, pi=3.1416, a=[1, 2, 3, 4], c1=A::Blue, p=1500/tcp, ti=XXXXXXXXXX.XXXXXX, it=1.0 hr 23.0 mins 20.0 secs, ad=127.0.0.1, s=::1/128, re=/^?(a)$?/, su={
aa:bb::/32,
192.168.0.0/16
}, se={
[192.168.0.1, 80/tcp] ,
[2001:db8::1, 8080/udp]
}], valid=T]

View file

@ -0,0 +1,120 @@
# @TEST-EXEC: ASAN_OPTIONS="$ASAN_OPTIONS,detect_leaks=0" zeek -b %INPUT
# @TEST-EXEC: btest-diff .stdout
# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-remove-abspath btest-diff .stderr
module A;
type Color: enum {
Red = 10,
White = 20,
Blue = 30
};
type Foo: record {
hello: string;
t: bool;
f: bool;
n: count &optional;
def: count &default = 123;
i: int;
pi: double;
a: string_vec;
c1: Color;
p: port;
ti: time;
it: interval;
ad: addr;
s: subnet;
re: pattern;
su: subnet_set;
se: set[addr, port];
};
event zeek_init()
{
local json = "{\"hello\":\"world\",\"t\":true,\"f\":false,\"se\":[[\"192.168.0.1\", \"80/tcp\"], [\"2001:db8::1\", \"8080/udp\"]],\"n\":null,\"i\":123,\"pi\":3.1416,\"a\":[\"1\",\"2\",\"3\",\"4\"],\"su\":[\"[aa:bb::0]/32\",\"192.168.0.0/16\"],\"c1\":\"A::Blue\",\"p\":\"1500/tcp\",\"it\":5000,\"ad\":\"127.0.0.1\",\"s\":\"[::1/128]\",\"re\":\"/a/\",\"ti\":1681652265.042767}";
print from_json(json, Foo);
}
@TEST-START-NEXT
# argument type mismatch
event zeek_init()
{
print from_json("[]", 10);
}
@TEST-START-NEXT
# JSON parse error
event zeek_init()
{
print from_json("{\"hel", string_vec);
}
@TEST-START-NEXT
type bool_t: bool;
# type mismatch error
event zeek_init()
{
print from_json("[]", bool_t);
}
@TEST-START-NEXT
# type unsupport error
event zeek_init()
{
print from_json("[]", table_string_of_string);
}
@TEST-START-NEXT
type port_t: port;
# wrong port format
event zeek_init()
{
print from_json("\"80\"", port_t);
}
@TEST-START-NEXT
type set_t: set[int, bool];
# index type doesn't match
event zeek_init()
{
print from_json("[[1, false], [2]]", set_t);
print from_json("[[1, false], [2, 1]]", set_t);
}
@TEST-START-NEXT
type pattern_t: pattern;
# pattern compile error
event zeek_init()
{
print from_json("\"/([[:print:]]{-}[[:alnum:]]foo)/\"", pattern_t);
}
@TEST-START-NEXT
type Color: enum {
Red = 10
};
# enum error
event zeek_init()
{
print from_json("\"Yellow\"", Color);
}
@TEST-START-NEXT
# container null
event zeek_init()
{
print from_json("[\"fe80::/64\",null,\"192.168.0.0/16\"]", subnet_set);
print from_json("[\"1\",null,\"3\",\"4\"]", string_vec);
}
@TEST-START-NEXT
type Foo: record {
hello: string;
t: bool;
};
# record field null or missing
event zeek_init()
{
print from_json("{\"t\":null}", Foo);
}