From 7294bb34becf15ff018e54180b9fe15fd67b54e2 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 18 Mar 2021 08:40:48 -0700 Subject: [PATCH 1/4] fixes for propagating optimization options, and pruning script function analysis --- src/script_opt/ScriptOpt.cc | 4 ++++ src/zeek-setup.cc | 8 ++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/script_opt/ScriptOpt.cc b/src/script_opt/ScriptOpt.cc index ecfd939e4a..78d99ff0e5 100644 --- a/src/script_opt/ScriptOpt.cc +++ b/src/script_opt/ScriptOpt.cc @@ -146,6 +146,10 @@ void FuncInfo::SetProfile(std::shared_ptr _pf) void analyze_func(ScriptFuncPtr f) { + if ( analysis_options.only_func && + *analysis_options.only_func != f->Name() ) + return; + funcs.emplace_back(f, ScopePtr{NewRef{}, f->GetScope()}, f->CurrentBody()); } diff --git a/src/zeek-setup.cc b/src/zeek-setup.cc index bf3db660f0..51e3162c2b 100644 --- a/src/zeek-setup.cc +++ b/src/zeek-setup.cc @@ -394,6 +394,10 @@ SetupResult setup(int argc, char** argv, Options* zopts) auto options = zopts ? *zopts : parse_cmdline(argc, argv); + // Set up the global that facilitates access to analysis/optimization + // options from deep within some modules. + analysis_options = options.analysis_options; + if ( options.print_usage ) usage(argv[0], 0); @@ -748,10 +752,6 @@ SetupResult setup(int argc, char** argv, Options* zopts) } } - // Set up the global that facilitates access to analysis/optimization - // options from deep within some modules. - analysis_options = options.analysis_options; - analyze_scripts(); if ( analysis_options.report_recursive ) From b473bc48e122468719bbf9ad4e65b71b25b22f50 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 18 Mar 2021 08:46:18 -0700 Subject: [PATCH 2/4] remove iffy reliance on type punning that relies on interpreter's behavior --- scripts/base/frameworks/config/main.zeek | 10 +++------ src/option.bif | 27 ++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/scripts/base/frameworks/config/main.zeek b/scripts/base/frameworks/config/main.zeek index 5fe7f6eaaf..2d87e3c09e 100644 --- a/scripts/base/frameworks/config/main.zeek +++ b/scripts/base/frameworks/config/main.zeek @@ -127,13 +127,9 @@ function format_value(value: any) : string if ( /^set/ in tn && strstr(tn, ",") == 0 ) { - # The conversion to set here is tricky and assumes - # that the set isn't indexed via a tuple of types. - # The above check for commas in the type name - # ensures this. - local it: set[bool] = value; - for ( sv in it ) - part += cat(sv); + local vec = Option::any_set_to_any_vec(value); + for ( sv in vec ) + part += cat(vec[sv]); return join_string_vec(part, ","); } else if ( /^vector/ in tn ) diff --git a/src/option.bif b/src/option.bif index 496baae0cc..ad40ae388c 100644 --- a/src/option.bif +++ b/src/option.bif @@ -213,3 +213,30 @@ function Option::set_change_handler%(ID: string, on_change: any, priority: int & i->AddOptionHandler(func, -priority); return zeek::val_mgr->True(); %} + +## Helper function that converts a set (of arbitrary index type) to +## a "vector of any". +## +## v: an "any" type corresponding to a set. +## +## Returns: a vector-of-any with one element for each member of v. +function Option::any_set_to_any_vec%(v: any%): any_vec + %{ + auto ret_type = make_intrusive(base_type(TYPE_ANY)); + auto ret = make_intrusive(ret_type); + const auto& t = v->GetType(); + + if ( ! t->IsSet() ) + { + zeek::emit_builtin_error("argument is not a set"); + return ret; + } + + auto lv = v->AsTableVal()->ToListVal(); + auto n = lv->Length(); + + for ( int i = 0; i < n; ++i ) + ret->Assign(i, lv->Idx(i)); + + return ret; + %} From b3ee7ec6757f6aef7f72cc0fa3bdc6ae18b63876 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 18 Mar 2021 08:56:58 -0700 Subject: [PATCH 3/4] avoid infinite recursion in same_type() if it is analyzing recursive types --- src/Type.cc | 197 ++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 161 insertions(+), 36 deletions(-) diff --git a/src/Type.cc b/src/Type.cc index 4f86a97253..4969d56dc2 100644 --- a/src/Type.cc +++ b/src/Type.cc @@ -1583,6 +1583,19 @@ bool same_type(const Type& arg_t1, const Type& arg_t2, return false; } + // A major complication we have to deal with is the potential + // presence of recursive types (records, in particular). If + // we simply traverse a type's members recursively, then if the + // type is itself recursive we will end up with infinite recursion. + // To prevent this, we need to instead track our analysis process + + // Which types we're in the process of analyzing. We add (compound) + // types to this as we recurse into their elements, and remove them + // when we're done processing them. + static std::unordered_set analyzed_types; + + // First do all checks that don't require any recursion. + switch ( t1->Tag() ) { case TYPE_VOID: case TYPE_BOOL: @@ -1608,6 +1621,13 @@ bool same_type(const Type& arg_t1, const Type& arg_t2, // true per default? return true; + case TYPE_OPAQUE: + { + const OpaqueType* ot1 = (const OpaqueType*) t1; + const OpaqueType* ot2 = (const OpaqueType*) t2; + return ot1->Name() == ot2->Name(); + } + case TYPE_TABLE: { const IndexType* it1 = (const IndexType*) t1; @@ -1616,22 +1636,16 @@ bool same_type(const Type& arg_t1, const Type& arg_t2, const auto& tl1 = it1->GetIndices(); const auto& tl2 = it2->GetIndices(); - if ( tl1 || tl2 ) - { - if ( ! tl1 || ! tl2 || ! same_type(tl1, tl2, is_init, match_record_field_names) ) - return false; - } + if ( (tl1 || tl2) && ! (tl1 && tl2) ) + return false; const auto& y1 = t1->Yield(); const auto& y2 = t2->Yield(); - if ( y1 || y2 ) - { - if ( ! y1 || ! y2 || ! same_type(y1, y2, is_init, match_record_field_names) ) - return false; - } + if ( (y1 || y2) && ! (y1 && y2) ) + return false; - return true; + break; } case TYPE_FUNC: @@ -1642,14 +1656,12 @@ bool same_type(const Type& arg_t1, const Type& arg_t2, if ( ft1->Flavor() != ft2->Flavor() ) return false; - if ( t1->Yield() || t2->Yield() ) - { - if ( ! t1->Yield() || ! t2->Yield() || - ! same_type(t1->Yield(), t2->Yield(), is_init, match_record_field_names) ) - return false; - } + const auto& y1 = t1->Yield(); + const auto& y2 = t2->Yield(); + if ( (y1 || y2) && ! (y1 && y2) ) + return false; - return ft1->CheckArgs(ft2->ParamList()->GetTypes(), is_init, false); + break; } case TYPE_RECORD: @@ -1665,15 +1677,15 @@ bool same_type(const Type& arg_t1, const Type& arg_t2, const TypeDecl* td1 = rt1->FieldDecl(i); const TypeDecl* td2 = rt2->FieldDecl(i); - if ( (match_record_field_names && ! util::streq(td1->id, td2->id)) || - ! same_type(td1->type, td2->type, is_init, match_record_field_names) ) + if ( match_record_field_names && + ! util::streq(td1->id, td2->id) ) return false; if ( ! same_attrs(td1->attrs.get(), td2->attrs.get()) ) return false; } - return true; + break; } case TYPE_LIST: @@ -1684,36 +1696,149 @@ bool same_type(const Type& arg_t1, const Type& arg_t2, if ( tl1.size() != tl2.size() ) return false; - for ( auto i = 0u; i < tl1.size(); ++i ) - if ( ! same_type(tl1[i], tl2[i], is_init, match_record_field_names) ) - return false; - - return true; + break; } case TYPE_VECTOR: case TYPE_FILE: - return same_type(t1->Yield(), t2->Yield(), is_init, match_record_field_names); + case TYPE_TYPE: + break; - case TYPE_OPAQUE: + case TYPE_UNION: + reporter->Error("union type in same_type()"); + } + + // If we get to here, then we're dealing with a type with + // subtypes, and thus potentially recursive. + + if ( analyzed_types.count(t1) > 0 || analyzed_types.count(t2) > 0 ) { - const OpaqueType* ot1 = (const OpaqueType*) t1; - const OpaqueType* ot2 = (const OpaqueType*) t2; - return ot1->Name() == ot2->Name(); + // We've analyzed at least one of the types previously. + // Avoid infinite recursion. + + if ( analyzed_types.count(t1) > 0 && + analyzed_types.count(t2) > 0 ) + // We've analyzed them both. In theory, this + // could happen while the types are still different. + // Checking for that is a pain - we could do so + // by recursively expanding all of the types present + // when traversing them (suppressing repeats), and + // see that they individually match in a non-recursive + // manner. For now, we assume they're a direct match. + return true; + + // One is definitely recursive and the other has not yet + // manifested as such. In theory, they again could still + // be a match, if the non-recursive one would manifest + // becoming recursive if only we traversed it further, but + // for now we assume they're not a match. + return false; } + // Track the two types for when we recurse. + analyzed_types.insert(t1); + analyzed_types.insert(t2); + + bool result; + + switch ( t1->Tag() ) { + case TYPE_TABLE: + { + const IndexType* it1 = (const IndexType*) t1; + const IndexType* it2 = (const IndexType*) t2; + + const auto& tl1 = it1->GetIndices(); + const auto& tl2 = it2->GetIndices(); + + if ( ! same_type(tl1, tl2, is_init, match_record_field_names) ) + result = false; + else + { + const auto& y1 = t1->Yield(); + const auto& y2 = t2->Yield(); + + result = same_type(y1, y2, is_init, + match_record_field_names); + } + break; + } + + case TYPE_FUNC: + { + const FuncType* ft1 = (const FuncType*) t1; + const FuncType* ft2 = (const FuncType*) t2; + + if ( ! same_type(t1->Yield(), t2->Yield(), is_init, + match_record_field_names) ) + result = false; + else + result = ft1->CheckArgs(ft2->ParamList()->GetTypes(), + is_init, false); + break; + } + + case TYPE_RECORD: + { + const RecordType* rt1 = (const RecordType*) t1; + const RecordType* rt2 = (const RecordType*) t2; + + result = true; + + for ( int i = 0; i < rt1->NumFields(); ++i ) + { + const TypeDecl* td1 = rt1->FieldDecl(i); + const TypeDecl* td2 = rt2->FieldDecl(i); + + if ( ! same_type(td1->type, td2->type, is_init, + match_record_field_names) ) + { + result = false; + break; + } + } + break; + } + + case TYPE_LIST: + { + const auto& tl1 = t1->AsTypeList()->GetTypes(); + const auto& tl2 = t2->AsTypeList()->GetTypes(); + + result = true; + + for ( auto i = 0u; i < tl1.size(); ++i ) + if ( ! same_type(tl1[i], tl2[i], is_init, + match_record_field_names) ) + { + result = false; + break; + } + break; + } + + case TYPE_VECTOR: + case TYPE_FILE: + result = same_type(t1->Yield(), t2->Yield(), + is_init, match_record_field_names); + break; + case TYPE_TYPE: { auto tt1 = t1->AsTypeType(); auto tt2 = t2->AsTypeType(); - return same_type(tt1->GetType(), tt1->GetType(), - is_init, match_record_field_names); + result = same_type(tt1->GetType(), tt1->GetType(), + is_init, match_record_field_names); + break; } - case TYPE_UNION: - reporter->Error("union type in same_type()"); + default: + result = false; } - return false; + + analyzed_types.erase(t1); + analyzed_types.erase(t2); + + return result; } bool same_attrs(const detail::Attributes* a1, const detail::Attributes* a2) From e407d8ab51f196fb867e834cff09b0dc7ca1ecf1 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 18 Mar 2021 08:58:03 -0700 Subject: [PATCH 4/4] fix for associating current scope with the name of enums; name tidying --- src/parse.y | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/parse.y b/src/parse.y index d8ce7ec819..ac7a924e40 100644 --- a/src/parse.y +++ b/src/parse.y @@ -143,18 +143,21 @@ static zeek::detail::Location func_hdr_location; zeek::EnumType* cur_enum_type = nullptr; static zeek::detail::ID* cur_decl_type_id = nullptr; -static void parser_new_enum (void) +static void parse_new_enum (void) { /* Starting a new enum definition. */ assert(cur_enum_type == nullptr); if ( cur_decl_type_id ) - cur_enum_type = new zeek::EnumType(cur_decl_type_id->Name()); + { + auto name = zeek::detail::make_full_var_name(current_module.c_str(), cur_decl_type_id->Name()); + cur_enum_type = new zeek::EnumType(name); + } else zeek::reporter->FatalError("incorrect syntax for enum type declaration"); } -static void parser_redef_enum (zeek::detail::ID *id) +static void parse_redef_enum (zeek::detail::ID* id) { /* Redef an enum. id points to the enum to be redefined. Let cur_enum_type point to it. */ @@ -958,7 +961,7 @@ type: $$ = 0; } - | TOK_ENUM '{' { zeek::detail::set_location(@1); parser_new_enum(); } enum_body '}' + | TOK_ENUM '{' { zeek::detail::set_location(@1); parse_new_enum(); } enum_body '}' { zeek::detail::set_location(@1, @5); $4->UpdateLocationEndInfo(@5); @@ -1151,7 +1154,7 @@ decl: } | TOK_REDEF TOK_ENUM global_id TOK_ADD_TO '{' - { ++in_enum_redef; parser_redef_enum($3); zeek::detail::zeekygen_mgr->Redef($3, ::filename); } + { ++in_enum_redef; parse_redef_enum($3); zeek::detail::zeekygen_mgr->Redef($3, ::filename); } enum_body '}' ';' { --in_enum_redef;