diff --git a/NEWS b/NEWS index 7d38482c91..f40752aa08 100644 --- a/NEWS +++ b/NEWS @@ -31,6 +31,10 @@ New Functionality the break statement within ``assertion_failure()`` or ``assertion_result()`` allows to suppress the default message. +- The ``from_json()`` function now takes an optional key_func argument to + normalize JSON object key names. This can be useful if the keys in a JSON + object are not valid Zeek identifiers or reserved keywords. + Changed Functionality --------------------- diff --git a/scripts/base/init-bare.zeek b/scripts/base/init-bare.zeek index b891e0792f..2352566129 100644 --- a/scripts/base/init-bare.zeek +++ b/scripts/base/init-bare.zeek @@ -131,6 +131,13 @@ type files_tag_set: set[Files::Tag]; ## directly and then remove this alias. type interval_set: set[interval]; +## Function mapping a string to a string. +## +## .. todo:: We need this type definition only for declaring builtin functions +## via ``bifcl``. We should extend ``bifcl`` to understand composite types +## directly and then remove this alias. +type string_mapper: function(s: string): string; + ## A structure indicating a MIME type and strength of a match against ## file magic signatures. ## @@ -1129,6 +1136,12 @@ type entropy_test_result: record { serial_correlation: double; ##< Serial correlation coefficient. }; +## The default JSON key mapper function. Identity function. +function from_json_default_key_mapper(s: string): string + { + return s; + } + ## Return type for from_json BIF. ## ## .. zeek:see:: from_json diff --git a/src/Val.cc b/src/Val.cc index 3cb1d08a63..0288cc1738 100644 --- a/src/Val.cc +++ b/src/Val.cc @@ -1064,7 +1064,8 @@ StringValPtr StringVal::Replace(RE_Matcher* re, const String& repl, bool do_all) return make_intrusive(new String(true, result, r - result)); } -static std::variant BuildVal(const rapidjson::Value& j, const TypePtr& t) +static std::variant BuildVal(const rapidjson::Value& j, const TypePtr& t, + const FuncPtr& key_func) { auto mismatch_err = [t, &j]() { @@ -1278,9 +1279,9 @@ static std::variant BuildVal(const rapidjson::Value& j, con std::variant v; if ( tl->GetTypes().size() == 1 ) - v = BuildVal(item, tl->GetPureType()); + v = BuildVal(item, tl->GetPureType(), key_func); else - v = BuildVal(item, tl); + v = BuildVal(item, tl, key_func); if ( ! get_if(&v) ) return v; @@ -1301,24 +1302,62 @@ static std::variant BuildVal(const rapidjson::Value& j, con auto rt = t->AsRecordType(); auto rv = make_intrusive(IntrusivePtr{NewRef{}, rt}); + + std::map normalized_keys; + + // If key_func is given, map all JSON keys and store in above map. + if ( key_func ) + { + for ( auto it = j.MemberBegin(); it != j.MemberEnd(); it++ ) + { + ValPtr result; + try + { + result = key_func->Invoke( + zeek::make_intrusive(it->name.GetString())); + } + catch ( InterpreterException& ) + { + /* Already reported. */ + } + + if ( ! result ) + return "key function error"; + + normalized_keys[result->AsStringVal()->CheckString()] = &it->value; + } + } + + // Now lookup record fields using the normalized input. for ( int i = 0; i < rt->NumFields(); ++i ) { - auto td_i = rt->FieldDecl(i); - auto m_it = j.FindMember(td_i->id); - bool has_member = m_it != j.MemberEnd(); - bool member_is_null = has_member && m_it->value.IsNull(); + const auto td_i = rt->FieldDecl(i); + const rapidjson::Value* jval = nullptr; - if ( ! has_member || member_is_null ) + if ( key_func ) + { + auto m_it = normalized_keys.find(td_i->id); + jval = m_it != normalized_keys.end() ? m_it->second : nullptr; + } + else + { + auto m_it = j.FindMember(td_i->id); + jval = m_it != j.MemberEnd() ? &m_it->value : nullptr; + } + + if ( ! jval || jval->IsNull() ) { if ( ! td_i->GetAttr(detail::ATTR_OPTIONAL) && ! td_i->GetAttr(detail::ATTR_DEFAULT) ) + // jval being set means it is a null JSON value else + // it wasn't even there. return util::fmt("required field %s$%s is %s in JSON", t->GetName().c_str(), - td_i->id, member_is_null ? "null" : "missing"); + td_i->id, jval ? "null" : "missing"); continue; } - auto v = BuildVal(m_it->value, td_i->type); + auto v = BuildVal(*jval, td_i->type, key_func); if ( ! get_if(&v) ) return v; @@ -1342,7 +1381,7 @@ static std::variant BuildVal(const rapidjson::Value& j, con for ( size_t i = 0; i < lt->GetTypes().size(); i++ ) { - auto v = BuildVal(j.GetArray()[i], lt->GetTypes()[i]); + auto v = BuildVal(j.GetArray()[i], lt->GetTypes()[i], key_func); if ( ! get_if(&v) ) return v; @@ -1361,7 +1400,7 @@ static std::variant BuildVal(const rapidjson::Value& j, con auto vv = make_intrusive(IntrusivePtr{NewRef{}, vt}); for ( const auto& item : j.GetArray() ) { - auto v = BuildVal(item, vt->Yield()); + auto v = BuildVal(item, vt->Yield(), key_func); if ( ! get_if(&v) ) return v; @@ -1379,7 +1418,8 @@ static std::variant BuildVal(const rapidjson::Value& j, con } } -std::variant detail::ValFromJSON(std::string_view json_str, const TypePtr& t) +std::variant detail::ValFromJSON(std::string_view json_str, const TypePtr& t, + const FuncPtr& key_func) { rapidjson::Document doc; rapidjson::ParseResult ok = doc.Parse(json_str.data(), json_str.length()); @@ -1388,7 +1428,7 @@ std::variant detail::ValFromJSON(std::string_view json_str, return util::fmt("JSON parse error: %s Offset: %lu", rapidjson::GetParseError_En(ok.Code()), ok.Offset()); - return BuildVal(doc, t); + return BuildVal(doc, t, key_func); } ValPtr StringVal::DoClone(CloneState* state) diff --git a/src/Val.h b/src/Val.h index 629dbe624e..75dd006df3 100644 --- a/src/Val.h +++ b/src/Val.h @@ -1800,7 +1800,11 @@ namespace detail // Parses a JSON string into arbitrary Zeek data using std::variant to simulate functional exception // handling. Returns a ValPtr if parsing was successful, or a std::string containing an error // message if an error occurred. -extern std::variant ValFromJSON(std::string_view json_str, const TypePtr& t); +// +// The *key_func* parameter is a Zeek script function called for every JSON key +// for normalization. If Func::nil is passed, no normalization happens. +extern std::variant ValFromJSON(std::string_view json_str, const TypePtr& t, + const FuncPtr& key_func); } } // namespace zeek diff --git a/src/zeek.bif b/src/zeek.bif index f1ddc87274..b3b57eb6b3 100644 --- a/src/zeek.bif +++ b/src/zeek.bif @@ -5522,14 +5522,19 @@ function to_json%(val: any, only_loggable: bool &default=F, field_escape_pattern ## ## t: Type of Zeek data. ## +## key_func: Optional function to normalize key names in JSON objects. Useful +## when keys are not valid field identifiers, or represent reserved +## keywords like **port** or **type**. +## ## returns: A value of type t. ## ## .. zeek:see:: to_json -function from_json%(s: string, t: any%): from_json_result +function from_json%(s: string, t: any, key_func: string_mapper &default=from_json_default_key_mapper%): from_json_result %{ static auto result_type = zeek::id::find_type("from_json_result"); static auto v_idx = result_type->FieldOffset("v"); static auto valid_idx = result_type->FieldOffset("valid"); + static auto default_key_func_ptr = zeek::id::find_func("from_json_default_key_mapper"); auto rval = zeek::make_intrusive(result_type); @@ -5540,7 +5545,14 @@ function from_json%(s: string, t: any%): from_json_result return rval; } - auto res = zeek::detail::ValFromJSON(s->ToStdStringView(), t->AsType()->AsTypeType()->GetType()); + // If key_func is the same as "from_json_default_key_mapper", + // null it out so that no key normalization happens. + auto key_func_ptr = key_func->AsFuncVal()->AsFuncPtr(); + if ( key_func_ptr == default_key_func_ptr ) + key_func_ptr = Func::nil; + + auto res = zeek::detail::ValFromJSON(s->ToStdStringView(), t->AsType()->AsTypeType()->GetType(), + key_func_ptr); if ( auto val = std::get_if(&res) ) { diff --git a/testing/btest/Baseline/bifs.from_json-11/.stderr b/testing/btest/Baseline/bifs.from_json-11/.stderr index 1438154b7a..1d41754de1 100644 --- a/testing/btest/Baseline/bifs.from_json-11/.stderr +++ b/testing/btest/Baseline/bifs.from_json-11/.stderr @@ -1,3 +1,3 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -error in <...>/from_json.zeek, line 8: required field Foo$hello is missing in JSON (from_json({"t":null}, to_any_coerceFoo)) -error in <...>/from_json.zeek, line 9: required field Foo$hello is null in JSON (from_json({"hello": null, "t": true}, to_any_coerceFoo)) +error in <...>/from_json.zeek, line 8: required field Foo$hello is missing in JSON (from_json({"t":null}, to_any_coerceFoo, from_json_default_key_mapper)) +error in <...>/from_json.zeek, line 9: required field Foo$hello is null in JSON (from_json({"hello": null, "t": true}, to_any_coerceFoo, from_json_default_key_mapper)) diff --git a/testing/btest/Baseline/bifs.from_json-2/.stderr b/testing/btest/Baseline/bifs.from_json-2/.stderr index 1ce3c885b8..e038a036a5 100644 --- a/testing/btest/Baseline/bifs.from_json-2/.stderr +++ b/testing/btest/Baseline/bifs.from_json-2/.stderr @@ -1,2 +1,2 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -error in <...>/from_json.zeek, line 4: from_json() requires a type argument (from_json([], to_any_coerce10)) +error in <...>/from_json.zeek, line 4: from_json() requires a type argument (from_json([], to_any_coerce10, from_json_default_key_mapper)) diff --git a/testing/btest/Baseline/bifs.from_json-3/.stderr b/testing/btest/Baseline/bifs.from_json-3/.stderr index cd9437efcb..473f1078b5 100644 --- a/testing/btest/Baseline/bifs.from_json-3/.stderr +++ b/testing/btest/Baseline/bifs.from_json-3/.stderr @@ -1,2 +1,2 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -error in <...>/from_json.zeek, line 4: JSON parse error: Missing a closing quotation mark in string. Offset: 5 (from_json({"hel, to_any_coercestring_vec)) +error in <...>/from_json.zeek, line 4: JSON parse error: Missing a closing quotation mark in string. Offset: 5 (from_json({"hel, to_any_coercestring_vec, from_json_default_key_mapper)) diff --git a/testing/btest/Baseline/bifs.from_json-4/.stderr b/testing/btest/Baseline/bifs.from_json-4/.stderr index 210255a35f..1d5ac55355 100644 --- a/testing/btest/Baseline/bifs.from_json-4/.stderr +++ b/testing/btest/Baseline/bifs.from_json-4/.stderr @@ -1,3 +1,3 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -error in <...>/from_json.zeek, line 9: cannot convert JSON type 'array' to Zeek type 'bool' (from_json([], to_any_coercebool_t)) -error in <...>/from_json.zeek, line 10: cannot convert JSON type 'string' to Zeek type 'bool' (from_json({"a": "hello"}, to_any_coerceFoo)) +error in <...>/from_json.zeek, line 9: cannot convert JSON type 'array' to Zeek type 'bool' (from_json([], to_any_coercebool_t, from_json_default_key_mapper)) +error in <...>/from_json.zeek, line 10: cannot convert JSON type 'string' to Zeek type 'bool' (from_json({"a": "hello"}, to_any_coerceFoo, from_json_default_key_mapper)) diff --git a/testing/btest/Baseline/bifs.from_json-5/.stderr b/testing/btest/Baseline/bifs.from_json-5/.stderr index b08347df71..ca29a3113e 100644 --- a/testing/btest/Baseline/bifs.from_json-5/.stderr +++ b/testing/btest/Baseline/bifs.from_json-5/.stderr @@ -1,2 +1,2 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -error in <...>/from_json.zeek, line 4: tables are not supported (from_json([], to_any_coercetable_string_of_string)) +error in <...>/from_json.zeek, line 4: tables are not supported (from_json([], to_any_coercetable_string_of_string, from_json_default_key_mapper)) diff --git a/testing/btest/Baseline/bifs.from_json-6/.stderr b/testing/btest/Baseline/bifs.from_json-6/.stderr index fa015b8031..64c105aca7 100644 --- a/testing/btest/Baseline/bifs.from_json-6/.stderr +++ b/testing/btest/Baseline/bifs.from_json-6/.stderr @@ -1,2 +1,2 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -error in <...>/from_json.zeek, line 5: wrong port format, must be <...>/(tcp|udp|icmp|unknown)/ (from_json("80", to_any_coerceport_t)) +error in <...>/from_json.zeek, line 5: wrong port format, must be <...>/(tcp|udp|icmp|unknown)/ (from_json("80", to_any_coerceport_t, from_json_default_key_mapper)) diff --git a/testing/btest/Baseline/bifs.from_json-7/.stderr b/testing/btest/Baseline/bifs.from_json-7/.stderr index b3789904c6..69de81bdc1 100644 --- a/testing/btest/Baseline/bifs.from_json-7/.stderr +++ b/testing/btest/Baseline/bifs.from_json-7/.stderr @@ -1,3 +1,3 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -error in <...>/from_json.zeek, line 5: index type doesn't match (from_json([[1, false], [2]], to_any_coerceset_t)) -error in <...>/from_json.zeek, line 6: cannot convert JSON type 'number' to Zeek type 'bool' (from_json([[1, false], [2, 1]], to_any_coerceset_t)) +error in <...>/from_json.zeek, line 5: index type doesn't match (from_json([[1, false], [2]], to_any_coerceset_t, from_json_default_key_mapper)) +error in <...>/from_json.zeek, line 6: cannot convert JSON type 'number' to Zeek type 'bool' (from_json([[1, false], [2, 1]], to_any_coerceset_t, from_json_default_key_mapper)) diff --git a/testing/btest/Baseline/bifs.from_json-8/.stderr b/testing/btest/Baseline/bifs.from_json-8/.stderr index 18bb4fa2eb..58419f633f 100644 --- a/testing/btest/Baseline/bifs.from_json-8/.stderr +++ b/testing/btest/Baseline/bifs.from_json-8/.stderr @@ -1,3 +1,3 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. error: error compiling pattern /^?(.|\n)*(([[:print:]]{-}[[:alnum:]]foo))/ -error in <...>/from_json.zeek, line 5: error compiling pattern (from_json("/([[:print:]]{-}[[:alnum:]]foo)/", to_any_coercepattern_t)) +error in <...>/from_json.zeek, line 5: error compiling pattern (from_json("/([[:print:]]{-}[[:alnum:]]foo)/", to_any_coercepattern_t, from_json_default_key_mapper)) diff --git a/testing/btest/Baseline/bifs.from_json-9/.stderr b/testing/btest/Baseline/bifs.from_json-9/.stderr index 60cbecd370..1e4425a1cc 100644 --- a/testing/btest/Baseline/bifs.from_json-9/.stderr +++ b/testing/btest/Baseline/bifs.from_json-9/.stderr @@ -1,2 +1,2 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -error in <...>/from_json.zeek, line 7: 'Yellow' is not a valid enum for 'Color'. (from_json("Yellow", to_any_coerceColor)) +error in <...>/from_json.zeek, line 7: 'Yellow' is not a valid enum for 'Color'. (from_json("Yellow", to_any_coerceColor, from_json_default_key_mapper))