mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 06:38:20 +00:00
from_json: Support function to normalize key names
When a JSON document contains key names containing colons or other special characters that are not valid in Zeek identifiers, from_json() cannot be used to parse such input. This change allows a customizable normalization function. Closes #3142.
This commit is contained in:
parent
3a8b299c35
commit
480d52ca1f
14 changed files with 102 additions and 29 deletions
4
NEWS
4
NEWS
|
@ -31,6 +31,10 @@ New Functionality
|
|||
the break statement within ``assertion_failure()`` or ``assertion_result()``
|
||||
allows to suppress the default message.
|
||||
|
||||
- The ``from_json()`` function now takes an optional key_func argument to
|
||||
normalize JSON object key names. This can be useful if the keys in a JSON
|
||||
object are not valid Zeek identifiers or reserved keywords.
|
||||
|
||||
Changed Functionality
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -131,6 +131,13 @@ type files_tag_set: set[Files::Tag];
|
|||
## directly and then remove this alias.
|
||||
type interval_set: set[interval];
|
||||
|
||||
## Function mapping a string to a string.
|
||||
##
|
||||
## .. todo:: We need this type definition only for declaring builtin functions
|
||||
## via ``bifcl``. We should extend ``bifcl`` to understand composite types
|
||||
## directly and then remove this alias.
|
||||
type string_mapper: function(s: string): string;
|
||||
|
||||
## A structure indicating a MIME type and strength of a match against
|
||||
## file magic signatures.
|
||||
##
|
||||
|
@ -1129,6 +1136,12 @@ type entropy_test_result: record {
|
|||
serial_correlation: double; ##< Serial correlation coefficient.
|
||||
};
|
||||
|
||||
## The default JSON key mapper function. Identity function.
|
||||
function from_json_default_key_mapper(s: string): string
|
||||
{
|
||||
return s;
|
||||
}
|
||||
|
||||
## Return type for from_json BIF.
|
||||
##
|
||||
## .. zeek:see:: from_json
|
||||
|
|
68
src/Val.cc
68
src/Val.cc
|
@ -1064,7 +1064,8 @@ StringValPtr StringVal::Replace(RE_Matcher* re, const String& repl, bool do_all)
|
|||
return make_intrusive<StringVal>(new String(true, result, r - result));
|
||||
}
|
||||
|
||||
static std::variant<ValPtr, std::string> BuildVal(const rapidjson::Value& j, const TypePtr& t)
|
||||
static std::variant<ValPtr, std::string> BuildVal(const rapidjson::Value& j, const TypePtr& t,
|
||||
const FuncPtr& key_func)
|
||||
{
|
||||
auto mismatch_err = [t, &j]()
|
||||
{
|
||||
|
@ -1278,9 +1279,9 @@ static std::variant<ValPtr, std::string> BuildVal(const rapidjson::Value& j, con
|
|||
std::variant<ValPtr, std::string> v;
|
||||
|
||||
if ( tl->GetTypes().size() == 1 )
|
||||
v = BuildVal(item, tl->GetPureType());
|
||||
v = BuildVal(item, tl->GetPureType(), key_func);
|
||||
else
|
||||
v = BuildVal(item, tl);
|
||||
v = BuildVal(item, tl, key_func);
|
||||
|
||||
if ( ! get_if<ValPtr>(&v) )
|
||||
return v;
|
||||
|
@ -1301,24 +1302,62 @@ static std::variant<ValPtr, std::string> BuildVal(const rapidjson::Value& j, con
|
|||
|
||||
auto rt = t->AsRecordType();
|
||||
auto rv = make_intrusive<RecordVal>(IntrusivePtr{NewRef{}, rt});
|
||||
|
||||
std::map<std::string, const rapidjson::Value*> normalized_keys;
|
||||
|
||||
// If key_func is given, map all JSON keys and store in above map.
|
||||
if ( key_func )
|
||||
{
|
||||
for ( auto it = j.MemberBegin(); it != j.MemberEnd(); it++ )
|
||||
{
|
||||
ValPtr result;
|
||||
try
|
||||
{
|
||||
result = key_func->Invoke(
|
||||
zeek::make_intrusive<StringVal>(it->name.GetString()));
|
||||
}
|
||||
catch ( InterpreterException& )
|
||||
{
|
||||
/* Already reported. */
|
||||
}
|
||||
|
||||
if ( ! result )
|
||||
return "key function error";
|
||||
|
||||
normalized_keys[result->AsStringVal()->CheckString()] = &it->value;
|
||||
}
|
||||
}
|
||||
|
||||
// Now lookup record fields using the normalized input.
|
||||
for ( int i = 0; i < rt->NumFields(); ++i )
|
||||
{
|
||||
auto td_i = rt->FieldDecl(i);
|
||||
auto m_it = j.FindMember(td_i->id);
|
||||
bool has_member = m_it != j.MemberEnd();
|
||||
bool member_is_null = has_member && m_it->value.IsNull();
|
||||
const auto td_i = rt->FieldDecl(i);
|
||||
const rapidjson::Value* jval = nullptr;
|
||||
|
||||
if ( ! has_member || member_is_null )
|
||||
if ( key_func )
|
||||
{
|
||||
auto m_it = normalized_keys.find(td_i->id);
|
||||
jval = m_it != normalized_keys.end() ? m_it->second : nullptr;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto m_it = j.FindMember(td_i->id);
|
||||
jval = m_it != j.MemberEnd() ? &m_it->value : nullptr;
|
||||
}
|
||||
|
||||
if ( ! jval || jval->IsNull() )
|
||||
{
|
||||
if ( ! td_i->GetAttr(detail::ATTR_OPTIONAL) &&
|
||||
! td_i->GetAttr(detail::ATTR_DEFAULT) )
|
||||
// jval being set means it is a null JSON value else
|
||||
// it wasn't even there.
|
||||
return util::fmt("required field %s$%s is %s in JSON", t->GetName().c_str(),
|
||||
td_i->id, member_is_null ? "null" : "missing");
|
||||
td_i->id, jval ? "null" : "missing");
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
auto v = BuildVal(m_it->value, td_i->type);
|
||||
auto v = BuildVal(*jval, td_i->type, key_func);
|
||||
if ( ! get_if<ValPtr>(&v) )
|
||||
return v;
|
||||
|
||||
|
@ -1342,7 +1381,7 @@ static std::variant<ValPtr, std::string> BuildVal(const rapidjson::Value& j, con
|
|||
|
||||
for ( size_t i = 0; i < lt->GetTypes().size(); i++ )
|
||||
{
|
||||
auto v = BuildVal(j.GetArray()[i], lt->GetTypes()[i]);
|
||||
auto v = BuildVal(j.GetArray()[i], lt->GetTypes()[i], key_func);
|
||||
if ( ! get_if<ValPtr>(&v) )
|
||||
return v;
|
||||
|
||||
|
@ -1361,7 +1400,7 @@ static std::variant<ValPtr, std::string> BuildVal(const rapidjson::Value& j, con
|
|||
auto vv = make_intrusive<VectorVal>(IntrusivePtr{NewRef{}, vt});
|
||||
for ( const auto& item : j.GetArray() )
|
||||
{
|
||||
auto v = BuildVal(item, vt->Yield());
|
||||
auto v = BuildVal(item, vt->Yield(), key_func);
|
||||
if ( ! get_if<ValPtr>(&v) )
|
||||
return v;
|
||||
|
||||
|
@ -1379,7 +1418,8 @@ static std::variant<ValPtr, std::string> BuildVal(const rapidjson::Value& j, con
|
|||
}
|
||||
}
|
||||
|
||||
std::variant<ValPtr, std::string> detail::ValFromJSON(std::string_view json_str, const TypePtr& t)
|
||||
std::variant<ValPtr, std::string> detail::ValFromJSON(std::string_view json_str, const TypePtr& t,
|
||||
const FuncPtr& key_func)
|
||||
{
|
||||
rapidjson::Document doc;
|
||||
rapidjson::ParseResult ok = doc.Parse(json_str.data(), json_str.length());
|
||||
|
@ -1388,7 +1428,7 @@ std::variant<ValPtr, std::string> detail::ValFromJSON(std::string_view json_str,
|
|||
return util::fmt("JSON parse error: %s Offset: %lu", rapidjson::GetParseError_En(ok.Code()),
|
||||
ok.Offset());
|
||||
|
||||
return BuildVal(doc, t);
|
||||
return BuildVal(doc, t, key_func);
|
||||
}
|
||||
|
||||
ValPtr StringVal::DoClone(CloneState* state)
|
||||
|
|
|
@ -1800,7 +1800,11 @@ namespace detail
|
|||
// Parses a JSON string into arbitrary Zeek data using std::variant to simulate functional exception
|
||||
// handling. Returns a ValPtr if parsing was successful, or a std::string containing an error
|
||||
// message if an error occurred.
|
||||
extern std::variant<ValPtr, std::string> ValFromJSON(std::string_view json_str, const TypePtr& t);
|
||||
//
|
||||
// The *key_func* parameter is a Zeek script function called for every JSON key
|
||||
// for normalization. If Func::nil is passed, no normalization happens.
|
||||
extern std::variant<ValPtr, std::string> ValFromJSON(std::string_view json_str, const TypePtr& t,
|
||||
const FuncPtr& key_func);
|
||||
}
|
||||
|
||||
} // namespace zeek
|
||||
|
|
16
src/zeek.bif
16
src/zeek.bif
|
@ -5522,14 +5522,19 @@ function to_json%(val: any, only_loggable: bool &default=F, field_escape_pattern
|
|||
##
|
||||
## t: Type of Zeek data.
|
||||
##
|
||||
## key_func: Optional function to normalize key names in JSON objects. Useful
|
||||
## when keys are not valid field identifiers, or represent reserved
|
||||
## keywords like **port** or **type**.
|
||||
##
|
||||
## returns: A value of type t.
|
||||
##
|
||||
## .. zeek:see:: to_json
|
||||
function from_json%(s: string, t: any%): from_json_result
|
||||
function from_json%(s: string, t: any, key_func: string_mapper &default=from_json_default_key_mapper%): from_json_result
|
||||
%{
|
||||
static auto result_type = zeek::id::find_type<RecordType>("from_json_result");
|
||||
static auto v_idx = result_type->FieldOffset("v");
|
||||
static auto valid_idx = result_type->FieldOffset("valid");
|
||||
static auto default_key_func_ptr = zeek::id::find_func("from_json_default_key_mapper");
|
||||
|
||||
auto rval = zeek::make_intrusive<RecordVal>(result_type);
|
||||
|
||||
|
@ -5540,7 +5545,14 @@ function from_json%(s: string, t: any%): from_json_result
|
|||
return rval;
|
||||
}
|
||||
|
||||
auto res = zeek::detail::ValFromJSON(s->ToStdStringView(), t->AsType()->AsTypeType()->GetType());
|
||||
// If key_func is the same as "from_json_default_key_mapper",
|
||||
// null it out so that no key normalization happens.
|
||||
auto key_func_ptr = key_func->AsFuncVal()->AsFuncPtr();
|
||||
if ( key_func_ptr == default_key_func_ptr )
|
||||
key_func_ptr = Func::nil;
|
||||
|
||||
auto res = zeek::detail::ValFromJSON(s->ToStdStringView(), t->AsType()->AsTypeType()->GetType(),
|
||||
key_func_ptr);
|
||||
|
||||
if ( auto val = std::get_if<zeek::ValPtr>(&res) )
|
||||
{
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
|
||||
error in <...>/from_json.zeek, line 8: required field Foo$hello is missing in JSON (from_json({"t":null}, to_any_coerceFoo))
|
||||
error in <...>/from_json.zeek, line 9: required field Foo$hello is null in JSON (from_json({"hello": null, "t": true}, to_any_coerceFoo))
|
||||
error in <...>/from_json.zeek, line 8: required field Foo$hello is missing in JSON (from_json({"t":null}, to_any_coerceFoo, from_json_default_key_mapper))
|
||||
error in <...>/from_json.zeek, line 9: required field Foo$hello is null in JSON (from_json({"hello": null, "t": true}, to_any_coerceFoo, from_json_default_key_mapper))
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
|
||||
error in <...>/from_json.zeek, line 4: from_json() requires a type argument (from_json([], to_any_coerce10))
|
||||
error in <...>/from_json.zeek, line 4: from_json() requires a type argument (from_json([], to_any_coerce10, from_json_default_key_mapper))
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
|
||||
error in <...>/from_json.zeek, line 4: JSON parse error: Missing a closing quotation mark in string. Offset: 5 (from_json({"hel, to_any_coercestring_vec))
|
||||
error in <...>/from_json.zeek, line 4: JSON parse error: Missing a closing quotation mark in string. Offset: 5 (from_json({"hel, to_any_coercestring_vec, from_json_default_key_mapper))
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
|
||||
error in <...>/from_json.zeek, line 9: cannot convert JSON type 'array' to Zeek type 'bool' (from_json([], to_any_coercebool_t))
|
||||
error in <...>/from_json.zeek, line 10: cannot convert JSON type 'string' to Zeek type 'bool' (from_json({"a": "hello"}, to_any_coerceFoo))
|
||||
error in <...>/from_json.zeek, line 9: cannot convert JSON type 'array' to Zeek type 'bool' (from_json([], to_any_coercebool_t, from_json_default_key_mapper))
|
||||
error in <...>/from_json.zeek, line 10: cannot convert JSON type 'string' to Zeek type 'bool' (from_json({"a": "hello"}, to_any_coerceFoo, from_json_default_key_mapper))
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
|
||||
error in <...>/from_json.zeek, line 4: tables are not supported (from_json([], to_any_coercetable_string_of_string))
|
||||
error in <...>/from_json.zeek, line 4: tables are not supported (from_json([], to_any_coercetable_string_of_string, from_json_default_key_mapper))
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
|
||||
error in <...>/from_json.zeek, line 5: wrong port format, must be <...>/(tcp|udp|icmp|unknown)/ (from_json("80", to_any_coerceport_t))
|
||||
error in <...>/from_json.zeek, line 5: wrong port format, must be <...>/(tcp|udp|icmp|unknown)/ (from_json("80", to_any_coerceport_t, from_json_default_key_mapper))
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
|
||||
error in <...>/from_json.zeek, line 5: index type doesn't match (from_json([[1, false], [2]], to_any_coerceset_t))
|
||||
error in <...>/from_json.zeek, line 6: cannot convert JSON type 'number' to Zeek type 'bool' (from_json([[1, false], [2, 1]], to_any_coerceset_t))
|
||||
error in <...>/from_json.zeek, line 5: index type doesn't match (from_json([[1, false], [2]], to_any_coerceset_t, from_json_default_key_mapper))
|
||||
error in <...>/from_json.zeek, line 6: cannot convert JSON type 'number' to Zeek type 'bool' (from_json([[1, false], [2, 1]], to_any_coerceset_t, from_json_default_key_mapper))
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
|
||||
error: error compiling pattern /^?(.|\n)*(([[:print:]]{-}[[:alnum:]]foo))/
|
||||
error in <...>/from_json.zeek, line 5: error compiling pattern (from_json("/([[:print:]]{-}[[:alnum:]]foo)/", to_any_coercepattern_t))
|
||||
error in <...>/from_json.zeek, line 5: error compiling pattern (from_json("/([[:print:]]{-}[[:alnum:]]foo)/", to_any_coercepattern_t, from_json_default_key_mapper))
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
|
||||
error in <...>/from_json.zeek, line 7: 'Yellow' is not a valid enum for 'Color'. (from_json("Yellow", to_any_coerceColor))
|
||||
error in <...>/from_json.zeek, line 7: 'Yellow' is not a valid enum for 'Color'. (from_json("Yellow", to_any_coerceColor, from_json_default_key_mapper))
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue