mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 14:48:21 +00:00
from_json: Support function to normalize key names
When a JSON document contains key names containing colons or other special characters that are not valid in Zeek identifiers, from_json() cannot be used to parse such input. This change allows a customizable normalization function. Closes #3142.
This commit is contained in:
parent
3a8b299c35
commit
480d52ca1f
14 changed files with 102 additions and 29 deletions
4
NEWS
4
NEWS
|
@ -31,6 +31,10 @@ New Functionality
|
||||||
the break statement within ``assertion_failure()`` or ``assertion_result()``
|
the break statement within ``assertion_failure()`` or ``assertion_result()``
|
||||||
allows to suppress the default message.
|
allows to suppress the default message.
|
||||||
|
|
||||||
|
- The ``from_json()`` function now takes an optional key_func argument to
|
||||||
|
normalize JSON object key names. This can be useful if the keys in a JSON
|
||||||
|
object are not valid Zeek identifiers or reserved keywords.
|
||||||
|
|
||||||
Changed Functionality
|
Changed Functionality
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
|
|
|
@ -131,6 +131,13 @@ type files_tag_set: set[Files::Tag];
|
||||||
## directly and then remove this alias.
|
## directly and then remove this alias.
|
||||||
type interval_set: set[interval];
|
type interval_set: set[interval];
|
||||||
|
|
||||||
|
## Function mapping a string to a string.
|
||||||
|
##
|
||||||
|
## .. todo:: We need this type definition only for declaring builtin functions
|
||||||
|
## via ``bifcl``. We should extend ``bifcl`` to understand composite types
|
||||||
|
## directly and then remove this alias.
|
||||||
|
type string_mapper: function(s: string): string;
|
||||||
|
|
||||||
## A structure indicating a MIME type and strength of a match against
|
## A structure indicating a MIME type and strength of a match against
|
||||||
## file magic signatures.
|
## file magic signatures.
|
||||||
##
|
##
|
||||||
|
@ -1129,6 +1136,12 @@ type entropy_test_result: record {
|
||||||
serial_correlation: double; ##< Serial correlation coefficient.
|
serial_correlation: double; ##< Serial correlation coefficient.
|
||||||
};
|
};
|
||||||
|
|
||||||
|
## The default JSON key mapper function. Identity function.
|
||||||
|
function from_json_default_key_mapper(s: string): string
|
||||||
|
{
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
## Return type for from_json BIF.
|
## Return type for from_json BIF.
|
||||||
##
|
##
|
||||||
## .. zeek:see:: from_json
|
## .. zeek:see:: from_json
|
||||||
|
|
68
src/Val.cc
68
src/Val.cc
|
@ -1064,7 +1064,8 @@ StringValPtr StringVal::Replace(RE_Matcher* re, const String& repl, bool do_all)
|
||||||
return make_intrusive<StringVal>(new String(true, result, r - result));
|
return make_intrusive<StringVal>(new String(true, result, r - result));
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::variant<ValPtr, std::string> BuildVal(const rapidjson::Value& j, const TypePtr& t)
|
static std::variant<ValPtr, std::string> BuildVal(const rapidjson::Value& j, const TypePtr& t,
|
||||||
|
const FuncPtr& key_func)
|
||||||
{
|
{
|
||||||
auto mismatch_err = [t, &j]()
|
auto mismatch_err = [t, &j]()
|
||||||
{
|
{
|
||||||
|
@ -1278,9 +1279,9 @@ static std::variant<ValPtr, std::string> BuildVal(const rapidjson::Value& j, con
|
||||||
std::variant<ValPtr, std::string> v;
|
std::variant<ValPtr, std::string> v;
|
||||||
|
|
||||||
if ( tl->GetTypes().size() == 1 )
|
if ( tl->GetTypes().size() == 1 )
|
||||||
v = BuildVal(item, tl->GetPureType());
|
v = BuildVal(item, tl->GetPureType(), key_func);
|
||||||
else
|
else
|
||||||
v = BuildVal(item, tl);
|
v = BuildVal(item, tl, key_func);
|
||||||
|
|
||||||
if ( ! get_if<ValPtr>(&v) )
|
if ( ! get_if<ValPtr>(&v) )
|
||||||
return v;
|
return v;
|
||||||
|
@ -1301,24 +1302,62 @@ static std::variant<ValPtr, std::string> BuildVal(const rapidjson::Value& j, con
|
||||||
|
|
||||||
auto rt = t->AsRecordType();
|
auto rt = t->AsRecordType();
|
||||||
auto rv = make_intrusive<RecordVal>(IntrusivePtr{NewRef{}, rt});
|
auto rv = make_intrusive<RecordVal>(IntrusivePtr{NewRef{}, rt});
|
||||||
|
|
||||||
|
std::map<std::string, const rapidjson::Value*> normalized_keys;
|
||||||
|
|
||||||
|
// If key_func is given, map all JSON keys and store in above map.
|
||||||
|
if ( key_func )
|
||||||
|
{
|
||||||
|
for ( auto it = j.MemberBegin(); it != j.MemberEnd(); it++ )
|
||||||
|
{
|
||||||
|
ValPtr result;
|
||||||
|
try
|
||||||
|
{
|
||||||
|
result = key_func->Invoke(
|
||||||
|
zeek::make_intrusive<StringVal>(it->name.GetString()));
|
||||||
|
}
|
||||||
|
catch ( InterpreterException& )
|
||||||
|
{
|
||||||
|
/* Already reported. */
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( ! result )
|
||||||
|
return "key function error";
|
||||||
|
|
||||||
|
normalized_keys[result->AsStringVal()->CheckString()] = &it->value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now lookup record fields using the normalized input.
|
||||||
for ( int i = 0; i < rt->NumFields(); ++i )
|
for ( int i = 0; i < rt->NumFields(); ++i )
|
||||||
{
|
{
|
||||||
auto td_i = rt->FieldDecl(i);
|
const auto td_i = rt->FieldDecl(i);
|
||||||
auto m_it = j.FindMember(td_i->id);
|
const rapidjson::Value* jval = nullptr;
|
||||||
bool has_member = m_it != j.MemberEnd();
|
|
||||||
bool member_is_null = has_member && m_it->value.IsNull();
|
|
||||||
|
|
||||||
if ( ! has_member || member_is_null )
|
if ( key_func )
|
||||||
|
{
|
||||||
|
auto m_it = normalized_keys.find(td_i->id);
|
||||||
|
jval = m_it != normalized_keys.end() ? m_it->second : nullptr;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
auto m_it = j.FindMember(td_i->id);
|
||||||
|
jval = m_it != j.MemberEnd() ? &m_it->value : nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( ! jval || jval->IsNull() )
|
||||||
{
|
{
|
||||||
if ( ! td_i->GetAttr(detail::ATTR_OPTIONAL) &&
|
if ( ! td_i->GetAttr(detail::ATTR_OPTIONAL) &&
|
||||||
! td_i->GetAttr(detail::ATTR_DEFAULT) )
|
! td_i->GetAttr(detail::ATTR_DEFAULT) )
|
||||||
|
// jval being set means it is a null JSON value else
|
||||||
|
// it wasn't even there.
|
||||||
return util::fmt("required field %s$%s is %s in JSON", t->GetName().c_str(),
|
return util::fmt("required field %s$%s is %s in JSON", t->GetName().c_str(),
|
||||||
td_i->id, member_is_null ? "null" : "missing");
|
td_i->id, jval ? "null" : "missing");
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto v = BuildVal(m_it->value, td_i->type);
|
auto v = BuildVal(*jval, td_i->type, key_func);
|
||||||
if ( ! get_if<ValPtr>(&v) )
|
if ( ! get_if<ValPtr>(&v) )
|
||||||
return v;
|
return v;
|
||||||
|
|
||||||
|
@ -1342,7 +1381,7 @@ static std::variant<ValPtr, std::string> BuildVal(const rapidjson::Value& j, con
|
||||||
|
|
||||||
for ( size_t i = 0; i < lt->GetTypes().size(); i++ )
|
for ( size_t i = 0; i < lt->GetTypes().size(); i++ )
|
||||||
{
|
{
|
||||||
auto v = BuildVal(j.GetArray()[i], lt->GetTypes()[i]);
|
auto v = BuildVal(j.GetArray()[i], lt->GetTypes()[i], key_func);
|
||||||
if ( ! get_if<ValPtr>(&v) )
|
if ( ! get_if<ValPtr>(&v) )
|
||||||
return v;
|
return v;
|
||||||
|
|
||||||
|
@ -1361,7 +1400,7 @@ static std::variant<ValPtr, std::string> BuildVal(const rapidjson::Value& j, con
|
||||||
auto vv = make_intrusive<VectorVal>(IntrusivePtr{NewRef{}, vt});
|
auto vv = make_intrusive<VectorVal>(IntrusivePtr{NewRef{}, vt});
|
||||||
for ( const auto& item : j.GetArray() )
|
for ( const auto& item : j.GetArray() )
|
||||||
{
|
{
|
||||||
auto v = BuildVal(item, vt->Yield());
|
auto v = BuildVal(item, vt->Yield(), key_func);
|
||||||
if ( ! get_if<ValPtr>(&v) )
|
if ( ! get_if<ValPtr>(&v) )
|
||||||
return v;
|
return v;
|
||||||
|
|
||||||
|
@ -1379,7 +1418,8 @@ static std::variant<ValPtr, std::string> BuildVal(const rapidjson::Value& j, con
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::variant<ValPtr, std::string> detail::ValFromJSON(std::string_view json_str, const TypePtr& t)
|
std::variant<ValPtr, std::string> detail::ValFromJSON(std::string_view json_str, const TypePtr& t,
|
||||||
|
const FuncPtr& key_func)
|
||||||
{
|
{
|
||||||
rapidjson::Document doc;
|
rapidjson::Document doc;
|
||||||
rapidjson::ParseResult ok = doc.Parse(json_str.data(), json_str.length());
|
rapidjson::ParseResult ok = doc.Parse(json_str.data(), json_str.length());
|
||||||
|
@ -1388,7 +1428,7 @@ std::variant<ValPtr, std::string> detail::ValFromJSON(std::string_view json_str,
|
||||||
return util::fmt("JSON parse error: %s Offset: %lu", rapidjson::GetParseError_En(ok.Code()),
|
return util::fmt("JSON parse error: %s Offset: %lu", rapidjson::GetParseError_En(ok.Code()),
|
||||||
ok.Offset());
|
ok.Offset());
|
||||||
|
|
||||||
return BuildVal(doc, t);
|
return BuildVal(doc, t, key_func);
|
||||||
}
|
}
|
||||||
|
|
||||||
ValPtr StringVal::DoClone(CloneState* state)
|
ValPtr StringVal::DoClone(CloneState* state)
|
||||||
|
|
|
@ -1800,7 +1800,11 @@ namespace detail
|
||||||
// Parses a JSON string into arbitrary Zeek data using std::variant to simulate functional exception
|
// Parses a JSON string into arbitrary Zeek data using std::variant to simulate functional exception
|
||||||
// handling. Returns a ValPtr if parsing was successful, or a std::string containing an error
|
// handling. Returns a ValPtr if parsing was successful, or a std::string containing an error
|
||||||
// message if an error occurred.
|
// message if an error occurred.
|
||||||
extern std::variant<ValPtr, std::string> ValFromJSON(std::string_view json_str, const TypePtr& t);
|
//
|
||||||
|
// The *key_func* parameter is a Zeek script function called for every JSON key
|
||||||
|
// for normalization. If Func::nil is passed, no normalization happens.
|
||||||
|
extern std::variant<ValPtr, std::string> ValFromJSON(std::string_view json_str, const TypePtr& t,
|
||||||
|
const FuncPtr& key_func);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace zeek
|
} // namespace zeek
|
||||||
|
|
16
src/zeek.bif
16
src/zeek.bif
|
@ -5522,14 +5522,19 @@ function to_json%(val: any, only_loggable: bool &default=F, field_escape_pattern
|
||||||
##
|
##
|
||||||
## t: Type of Zeek data.
|
## t: Type of Zeek data.
|
||||||
##
|
##
|
||||||
|
## key_func: Optional function to normalize key names in JSON objects. Useful
|
||||||
|
## when keys are not valid field identifiers, or represent reserved
|
||||||
|
## keywords like **port** or **type**.
|
||||||
|
##
|
||||||
## returns: A value of type t.
|
## returns: A value of type t.
|
||||||
##
|
##
|
||||||
## .. zeek:see:: to_json
|
## .. zeek:see:: to_json
|
||||||
function from_json%(s: string, t: any%): from_json_result
|
function from_json%(s: string, t: any, key_func: string_mapper &default=from_json_default_key_mapper%): from_json_result
|
||||||
%{
|
%{
|
||||||
static auto result_type = zeek::id::find_type<RecordType>("from_json_result");
|
static auto result_type = zeek::id::find_type<RecordType>("from_json_result");
|
||||||
static auto v_idx = result_type->FieldOffset("v");
|
static auto v_idx = result_type->FieldOffset("v");
|
||||||
static auto valid_idx = result_type->FieldOffset("valid");
|
static auto valid_idx = result_type->FieldOffset("valid");
|
||||||
|
static auto default_key_func_ptr = zeek::id::find_func("from_json_default_key_mapper");
|
||||||
|
|
||||||
auto rval = zeek::make_intrusive<RecordVal>(result_type);
|
auto rval = zeek::make_intrusive<RecordVal>(result_type);
|
||||||
|
|
||||||
|
@ -5540,7 +5545,14 @@ function from_json%(s: string, t: any%): from_json_result
|
||||||
return rval;
|
return rval;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto res = zeek::detail::ValFromJSON(s->ToStdStringView(), t->AsType()->AsTypeType()->GetType());
|
// If key_func is the same as "from_json_default_key_mapper",
|
||||||
|
// null it out so that no key normalization happens.
|
||||||
|
auto key_func_ptr = key_func->AsFuncVal()->AsFuncPtr();
|
||||||
|
if ( key_func_ptr == default_key_func_ptr )
|
||||||
|
key_func_ptr = Func::nil;
|
||||||
|
|
||||||
|
auto res = zeek::detail::ValFromJSON(s->ToStdStringView(), t->AsType()->AsTypeType()->GetType(),
|
||||||
|
key_func_ptr);
|
||||||
|
|
||||||
if ( auto val = std::get_if<zeek::ValPtr>(&res) )
|
if ( auto val = std::get_if<zeek::ValPtr>(&res) )
|
||||||
{
|
{
|
||||||
|
|
|
@ -1,3 +1,3 @@
|
||||||
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
|
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
|
||||||
error in <...>/from_json.zeek, line 8: required field Foo$hello is missing in JSON (from_json({"t":null}, to_any_coerceFoo))
|
error in <...>/from_json.zeek, line 8: required field Foo$hello is missing in JSON (from_json({"t":null}, to_any_coerceFoo, from_json_default_key_mapper))
|
||||||
error in <...>/from_json.zeek, line 9: required field Foo$hello is null in JSON (from_json({"hello": null, "t": true}, to_any_coerceFoo))
|
error in <...>/from_json.zeek, line 9: required field Foo$hello is null in JSON (from_json({"hello": null, "t": true}, to_any_coerceFoo, from_json_default_key_mapper))
|
||||||
|
|
|
@ -1,2 +1,2 @@
|
||||||
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
|
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
|
||||||
error in <...>/from_json.zeek, line 4: from_json() requires a type argument (from_json([], to_any_coerce10))
|
error in <...>/from_json.zeek, line 4: from_json() requires a type argument (from_json([], to_any_coerce10, from_json_default_key_mapper))
|
||||||
|
|
|
@ -1,2 +1,2 @@
|
||||||
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
|
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
|
||||||
error in <...>/from_json.zeek, line 4: JSON parse error: Missing a closing quotation mark in string. Offset: 5 (from_json({"hel, to_any_coercestring_vec))
|
error in <...>/from_json.zeek, line 4: JSON parse error: Missing a closing quotation mark in string. Offset: 5 (from_json({"hel, to_any_coercestring_vec, from_json_default_key_mapper))
|
||||||
|
|
|
@ -1,3 +1,3 @@
|
||||||
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
|
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
|
||||||
error in <...>/from_json.zeek, line 9: cannot convert JSON type 'array' to Zeek type 'bool' (from_json([], to_any_coercebool_t))
|
error in <...>/from_json.zeek, line 9: cannot convert JSON type 'array' to Zeek type 'bool' (from_json([], to_any_coercebool_t, from_json_default_key_mapper))
|
||||||
error in <...>/from_json.zeek, line 10: cannot convert JSON type 'string' to Zeek type 'bool' (from_json({"a": "hello"}, to_any_coerceFoo))
|
error in <...>/from_json.zeek, line 10: cannot convert JSON type 'string' to Zeek type 'bool' (from_json({"a": "hello"}, to_any_coerceFoo, from_json_default_key_mapper))
|
||||||
|
|
|
@ -1,2 +1,2 @@
|
||||||
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
|
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
|
||||||
error in <...>/from_json.zeek, line 4: tables are not supported (from_json([], to_any_coercetable_string_of_string))
|
error in <...>/from_json.zeek, line 4: tables are not supported (from_json([], to_any_coercetable_string_of_string, from_json_default_key_mapper))
|
||||||
|
|
|
@ -1,2 +1,2 @@
|
||||||
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
|
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
|
||||||
error in <...>/from_json.zeek, line 5: wrong port format, must be <...>/(tcp|udp|icmp|unknown)/ (from_json("80", to_any_coerceport_t))
|
error in <...>/from_json.zeek, line 5: wrong port format, must be <...>/(tcp|udp|icmp|unknown)/ (from_json("80", to_any_coerceport_t, from_json_default_key_mapper))
|
||||||
|
|
|
@ -1,3 +1,3 @@
|
||||||
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
|
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
|
||||||
error in <...>/from_json.zeek, line 5: index type doesn't match (from_json([[1, false], [2]], to_any_coerceset_t))
|
error in <...>/from_json.zeek, line 5: index type doesn't match (from_json([[1, false], [2]], to_any_coerceset_t, from_json_default_key_mapper))
|
||||||
error in <...>/from_json.zeek, line 6: cannot convert JSON type 'number' to Zeek type 'bool' (from_json([[1, false], [2, 1]], to_any_coerceset_t))
|
error in <...>/from_json.zeek, line 6: cannot convert JSON type 'number' to Zeek type 'bool' (from_json([[1, false], [2, 1]], to_any_coerceset_t, from_json_default_key_mapper))
|
||||||
|
|
|
@ -1,3 +1,3 @@
|
||||||
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
|
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
|
||||||
error: error compiling pattern /^?(.|\n)*(([[:print:]]{-}[[:alnum:]]foo))/
|
error: error compiling pattern /^?(.|\n)*(([[:print:]]{-}[[:alnum:]]foo))/
|
||||||
error in <...>/from_json.zeek, line 5: error compiling pattern (from_json("/([[:print:]]{-}[[:alnum:]]foo)/", to_any_coercepattern_t))
|
error in <...>/from_json.zeek, line 5: error compiling pattern (from_json("/([[:print:]]{-}[[:alnum:]]foo)/", to_any_coercepattern_t, from_json_default_key_mapper))
|
||||||
|
|
|
@ -1,2 +1,2 @@
|
||||||
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
|
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
|
||||||
error in <...>/from_json.zeek, line 7: 'Yellow' is not a valid enum for 'Color'. (from_json("Yellow", to_any_coerceColor))
|
error in <...>/from_json.zeek, line 7: 'Yellow' is not a valid enum for 'Color'. (from_json("Yellow", to_any_coerceColor, from_json_default_key_mapper))
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue