from_json: Support function to normalize key names

When a JSON document contains key names containing colons or other
special characters that are not valid in Zeek identifiers, from_json()
cannot be used to parse such input.

This change allows a customizable normalization function.

Closes #3142.
This commit is contained in:
Arne Welzel 2023-06-21 11:03:28 +02:00
parent 3a8b299c35
commit 480d52ca1f
14 changed files with 102 additions and 29 deletions

4
NEWS
View file

@ -31,6 +31,10 @@ New Functionality
the break statement within ``assertion_failure()`` or ``assertion_result()``
allows to suppress the default message.
- The ``from_json()`` function now takes an optional key_func argument to
normalize JSON object key names. This can be useful if the keys in a JSON
object are not valid Zeek identifiers or reserved keywords.
Changed Functionality
---------------------

View file

@ -131,6 +131,13 @@ type files_tag_set: set[Files::Tag];
## directly and then remove this alias.
type interval_set: set[interval];
## Function mapping a string to a string.
##
## .. todo:: We need this type definition only for declaring builtin functions
## via ``bifcl``. We should extend ``bifcl`` to understand composite types
## directly and then remove this alias.
type string_mapper: function(s: string): string;
## A structure indicating a MIME type and strength of a match against
## file magic signatures.
##
@ -1129,6 +1136,12 @@ type entropy_test_result: record {
serial_correlation: double; ##< Serial correlation coefficient.
};
## The default JSON key mapper function. Identity function.
function from_json_default_key_mapper(s: string): string
{
return s;
}
## Return type for from_json BIF.
##
## .. zeek:see:: from_json

View file

@ -1064,7 +1064,8 @@ StringValPtr StringVal::Replace(RE_Matcher* re, const String& repl, bool do_all)
return make_intrusive<StringVal>(new String(true, result, r - result));
}
static std::variant<ValPtr, std::string> BuildVal(const rapidjson::Value& j, const TypePtr& t)
static std::variant<ValPtr, std::string> BuildVal(const rapidjson::Value& j, const TypePtr& t,
const FuncPtr& key_func)
{
auto mismatch_err = [t, &j]()
{
@ -1278,9 +1279,9 @@ static std::variant<ValPtr, std::string> BuildVal(const rapidjson::Value& j, con
std::variant<ValPtr, std::string> v;
if ( tl->GetTypes().size() == 1 )
v = BuildVal(item, tl->GetPureType());
v = BuildVal(item, tl->GetPureType(), key_func);
else
v = BuildVal(item, tl);
v = BuildVal(item, tl, key_func);
if ( ! get_if<ValPtr>(&v) )
return v;
@ -1301,24 +1302,62 @@ static std::variant<ValPtr, std::string> BuildVal(const rapidjson::Value& j, con
auto rt = t->AsRecordType();
auto rv = make_intrusive<RecordVal>(IntrusivePtr{NewRef{}, rt});
std::map<std::string, const rapidjson::Value*> normalized_keys;
// If key_func is given, map all JSON keys and store in above map.
if ( key_func )
{
for ( auto it = j.MemberBegin(); it != j.MemberEnd(); it++ )
{
ValPtr result;
try
{
result = key_func->Invoke(
zeek::make_intrusive<StringVal>(it->name.GetString()));
}
catch ( InterpreterException& )
{
/* Already reported. */
}
if ( ! result )
return "key function error";
normalized_keys[result->AsStringVal()->CheckString()] = &it->value;
}
}
// Now lookup record fields using the normalized input.
for ( int i = 0; i < rt->NumFields(); ++i )
{
auto td_i = rt->FieldDecl(i);
auto m_it = j.FindMember(td_i->id);
bool has_member = m_it != j.MemberEnd();
bool member_is_null = has_member && m_it->value.IsNull();
const auto td_i = rt->FieldDecl(i);
const rapidjson::Value* jval = nullptr;
if ( ! has_member || member_is_null )
if ( key_func )
{
auto m_it = normalized_keys.find(td_i->id);
jval = m_it != normalized_keys.end() ? m_it->second : nullptr;
}
else
{
auto m_it = j.FindMember(td_i->id);
jval = m_it != j.MemberEnd() ? &m_it->value : nullptr;
}
if ( ! jval || jval->IsNull() )
{
if ( ! td_i->GetAttr(detail::ATTR_OPTIONAL) &&
! td_i->GetAttr(detail::ATTR_DEFAULT) )
// jval being set means it is a null JSON value else
// it wasn't even there.
return util::fmt("required field %s$%s is %s in JSON", t->GetName().c_str(),
td_i->id, member_is_null ? "null" : "missing");
td_i->id, jval ? "null" : "missing");
continue;
}
auto v = BuildVal(m_it->value, td_i->type);
auto v = BuildVal(*jval, td_i->type, key_func);
if ( ! get_if<ValPtr>(&v) )
return v;
@ -1342,7 +1381,7 @@ static std::variant<ValPtr, std::string> BuildVal(const rapidjson::Value& j, con
for ( size_t i = 0; i < lt->GetTypes().size(); i++ )
{
auto v = BuildVal(j.GetArray()[i], lt->GetTypes()[i]);
auto v = BuildVal(j.GetArray()[i], lt->GetTypes()[i], key_func);
if ( ! get_if<ValPtr>(&v) )
return v;
@ -1361,7 +1400,7 @@ static std::variant<ValPtr, std::string> BuildVal(const rapidjson::Value& j, con
auto vv = make_intrusive<VectorVal>(IntrusivePtr{NewRef{}, vt});
for ( const auto& item : j.GetArray() )
{
auto v = BuildVal(item, vt->Yield());
auto v = BuildVal(item, vt->Yield(), key_func);
if ( ! get_if<ValPtr>(&v) )
return v;
@ -1379,7 +1418,8 @@ static std::variant<ValPtr, std::string> BuildVal(const rapidjson::Value& j, con
}
}
std::variant<ValPtr, std::string> detail::ValFromJSON(std::string_view json_str, const TypePtr& t)
std::variant<ValPtr, std::string> detail::ValFromJSON(std::string_view json_str, const TypePtr& t,
const FuncPtr& key_func)
{
rapidjson::Document doc;
rapidjson::ParseResult ok = doc.Parse(json_str.data(), json_str.length());
@ -1388,7 +1428,7 @@ std::variant<ValPtr, std::string> detail::ValFromJSON(std::string_view json_str,
return util::fmt("JSON parse error: %s Offset: %lu", rapidjson::GetParseError_En(ok.Code()),
ok.Offset());
return BuildVal(doc, t);
return BuildVal(doc, t, key_func);
}
ValPtr StringVal::DoClone(CloneState* state)

View file

@ -1800,7 +1800,11 @@ namespace detail
// Parses a JSON string into arbitrary Zeek data using std::variant to simulate functional exception
// handling. Returns a ValPtr if parsing was successful, or a std::string containing an error
// message if an error occurred.
extern std::variant<ValPtr, std::string> ValFromJSON(std::string_view json_str, const TypePtr& t);
//
// The *key_func* parameter is a Zeek script function called for every JSON key
// for normalization. If Func::nil is passed, no normalization happens.
extern std::variant<ValPtr, std::string> ValFromJSON(std::string_view json_str, const TypePtr& t,
const FuncPtr& key_func);
}
} // namespace zeek

View file

@ -5522,14 +5522,19 @@ function to_json%(val: any, only_loggable: bool &default=F, field_escape_pattern
##
## t: Type of Zeek data.
##
## key_func: Optional function to normalize key names in JSON objects. Useful
## when keys are not valid field identifiers, or represent reserved
## keywords like **port** or **type**.
##
## returns: A value of type t.
##
## .. zeek:see:: to_json
function from_json%(s: string, t: any%): from_json_result
function from_json%(s: string, t: any, key_func: string_mapper &default=from_json_default_key_mapper%): from_json_result
%{
static auto result_type = zeek::id::find_type<RecordType>("from_json_result");
static auto v_idx = result_type->FieldOffset("v");
static auto valid_idx = result_type->FieldOffset("valid");
static auto default_key_func_ptr = zeek::id::find_func("from_json_default_key_mapper");
auto rval = zeek::make_intrusive<RecordVal>(result_type);
@ -5540,7 +5545,14 @@ function from_json%(s: string, t: any%): from_json_result
return rval;
}
auto res = zeek::detail::ValFromJSON(s->ToStdStringView(), t->AsType()->AsTypeType()->GetType());
// If key_func is the same as "from_json_default_key_mapper",
// null it out so that no key normalization happens.
auto key_func_ptr = key_func->AsFuncVal()->AsFuncPtr();
if ( key_func_ptr == default_key_func_ptr )
key_func_ptr = Func::nil;
auto res = zeek::detail::ValFromJSON(s->ToStdStringView(), t->AsType()->AsTypeType()->GetType(),
key_func_ptr);
if ( auto val = std::get_if<zeek::ValPtr>(&res) )
{

View file

@ -1,3 +1,3 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
error in <...>/from_json.zeek, line 8: required field Foo$hello is missing in JSON (from_json({"t":null}, to_any_coerceFoo))
error in <...>/from_json.zeek, line 9: required field Foo$hello is null in JSON (from_json({"hello": null, "t": true}, to_any_coerceFoo))
error in <...>/from_json.zeek, line 8: required field Foo$hello is missing in JSON (from_json({"t":null}, to_any_coerceFoo, from_json_default_key_mapper))
error in <...>/from_json.zeek, line 9: required field Foo$hello is null in JSON (from_json({"hello": null, "t": true}, to_any_coerceFoo, from_json_default_key_mapper))

View file

@ -1,2 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
error in <...>/from_json.zeek, line 4: from_json() requires a type argument (from_json([], to_any_coerce10))
error in <...>/from_json.zeek, line 4: from_json() requires a type argument (from_json([], to_any_coerce10, from_json_default_key_mapper))

View file

@ -1,2 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
error in <...>/from_json.zeek, line 4: JSON parse error: Missing a closing quotation mark in string. Offset: 5 (from_json({"hel, to_any_coercestring_vec))
error in <...>/from_json.zeek, line 4: JSON parse error: Missing a closing quotation mark in string. Offset: 5 (from_json({"hel, to_any_coercestring_vec, from_json_default_key_mapper))

View file

@ -1,3 +1,3 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
error in <...>/from_json.zeek, line 9: cannot convert JSON type 'array' to Zeek type 'bool' (from_json([], to_any_coercebool_t))
error in <...>/from_json.zeek, line 10: cannot convert JSON type 'string' to Zeek type 'bool' (from_json({"a": "hello"}, to_any_coerceFoo))
error in <...>/from_json.zeek, line 9: cannot convert JSON type 'array' to Zeek type 'bool' (from_json([], to_any_coercebool_t, from_json_default_key_mapper))
error in <...>/from_json.zeek, line 10: cannot convert JSON type 'string' to Zeek type 'bool' (from_json({"a": "hello"}, to_any_coerceFoo, from_json_default_key_mapper))

View file

@ -1,2 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
error in <...>/from_json.zeek, line 4: tables are not supported (from_json([], to_any_coercetable_string_of_string))
error in <...>/from_json.zeek, line 4: tables are not supported (from_json([], to_any_coercetable_string_of_string, from_json_default_key_mapper))

View file

@ -1,2 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
error in <...>/from_json.zeek, line 5: wrong port format, must be <...>/(tcp|udp|icmp|unknown)/ (from_json("80", to_any_coerceport_t))
error in <...>/from_json.zeek, line 5: wrong port format, must be <...>/(tcp|udp|icmp|unknown)/ (from_json("80", to_any_coerceport_t, from_json_default_key_mapper))

View file

@ -1,3 +1,3 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
error in <...>/from_json.zeek, line 5: index type doesn't match (from_json([[1, false], [2]], to_any_coerceset_t))
error in <...>/from_json.zeek, line 6: cannot convert JSON type 'number' to Zeek type 'bool' (from_json([[1, false], [2, 1]], to_any_coerceset_t))
error in <...>/from_json.zeek, line 5: index type doesn't match (from_json([[1, false], [2]], to_any_coerceset_t, from_json_default_key_mapper))
error in <...>/from_json.zeek, line 6: cannot convert JSON type 'number' to Zeek type 'bool' (from_json([[1, false], [2, 1]], to_any_coerceset_t, from_json_default_key_mapper))

View file

@ -1,3 +1,3 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
error: error compiling pattern /^?(.|\n)*(([[:print:]]{-}[[:alnum:]]foo))/
error in <...>/from_json.zeek, line 5: error compiling pattern (from_json("/([[:print:]]{-}[[:alnum:]]foo)/", to_any_coercepattern_t))
error in <...>/from_json.zeek, line 5: error compiling pattern (from_json("/([[:print:]]{-}[[:alnum:]]foo)/", to_any_coercepattern_t, from_json_default_key_mapper))

View file

@ -1,2 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
error in <...>/from_json.zeek, line 7: 'Yellow' is not a valid enum for 'Color'. (from_json("Yellow", to_any_coerceColor))
error in <...>/from_json.zeek, line 7: 'Yellow' is not a valid enum for 'Color'. (from_json("Yellow", to_any_coerceColor, from_json_default_key_mapper))