From e73351a6e1881da6615b99ee1c190c35fb4c798d Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 24 Nov 2021 15:19:01 -0800 Subject: [PATCH] track the use of conditionals in functions and files --- src/Var.cc | 5 +- src/Var.h | 2 +- src/parse.y | 27 +++++++-- src/scan.l | 113 ++++++++++++++++++++++------------- src/script_opt/StmtOptInfo.h | 4 ++ 5 files changed, 103 insertions(+), 48 deletions(-) diff --git a/src/Var.cc b/src/Var.cc index 4067f17e4c..dc8a04251b 100644 --- a/src/Var.cc +++ b/src/Var.cc @@ -18,6 +18,7 @@ #include "zeek/Val.h" #include "zeek/module_util.h" #include "zeek/script_opt/ScriptOpt.h" +#include "zeek/script_opt/StmtOptInfo.h" namespace zeek::detail { @@ -717,7 +718,7 @@ TraversalCode OuterIDBindingFinder::PostExpr(const Expr* expr) static bool duplicate_ASTs = getenv("ZEEK_DUPLICATE_ASTS"); -void end_func(StmtPtr body) +void end_func(StmtPtr body, bool free_of_conditionals) { if ( duplicate_ASTs && reporter->Errors() == 0 ) // Only try duplication in the absence of errors. If errors @@ -729,6 +730,8 @@ void end_func(StmtPtr body) // by duplicating can itself be correctly duplicated. body = body->Duplicate()->Duplicate(); + body->GetOptInfo()->is_free_of_conditionals = free_of_conditionals; + auto ingredients = std::make_unique(pop_scope(), std::move(body)); if ( ingredients->id->HasVal() ) diff --git a/src/Var.h b/src/Var.h index 2950afa2bf..85f62f4d0f 100644 --- a/src/Var.h +++ b/src/Var.h @@ -45,7 +45,7 @@ extern void add_type(ID* id, TypePtr t, std::unique_ptr> at extern void begin_func(IDPtr id, const char* module_name, FunctionFlavor flavor, bool is_redef, FuncTypePtr t, std::unique_ptr> attrs = nullptr); -extern void end_func(StmtPtr body); +extern void end_func(StmtPtr body, bool free_of_conditionals); // Gather all IDs referenced inside a body that aren't part of a given scope. extern IDPList gather_outer_ids(ScopePtr scope, StmtPtr body); diff --git a/src/parse.y b/src/parse.y index d3d3710840..e7bf8fddac 100644 --- a/src/parse.y +++ b/src/parse.y @@ -105,6 +105,11 @@ extern const char* last_filename; // Absolute path of last file parsed. extern const char* last_tok_filename; extern const char* last_last_tok_filename; +extern int conditional_epoch; // let's us track embedded conditionals + +// Whether the file we're currently parsing includes @if conditionals. +extern bool current_file_has_conditionals; + YYLTYPE GetCurrentLocation(); extern int yyerror(const char[]); extern int brolex(); @@ -138,6 +143,7 @@ bool defining_global_ID = false; std::vector saved_in_init; static Location func_hdr_location; +static int func_hdr_cond_epoch = 0; EnumType* cur_enum_type = nullptr; static ID* cur_decl_type_id = nullptr; @@ -1214,16 +1220,19 @@ decl: zeekygen_mgr->Identifier(std::move(id)); } - | func_hdr { func_hdr_location = @1; } func_body - - | func_hdr { func_hdr_location = @1; } conditional_list func_body + | func_hdr + { + func_hdr_location = @1; + func_hdr_cond_epoch = conditional_epoch; + } + conditional_list func_body | conditional ; conditional_list: - conditional - | conditional conditional_list + | conditional_list conditional + ; conditional: TOK_ATIF '(' expr ')' @@ -1296,7 +1305,13 @@ func_body: '}' { set_location(func_hdr_location, @5); - end_func({AdoptRef{}, $3}); + + bool free_of_conditionals = true; + if ( current_file_has_conditionals || + conditional_epoch > func_hdr_cond_epoch ) + free_of_conditionals = false; + + end_func({AdoptRef{}, $3}, free_of_conditionals); } ; diff --git a/src/scan.l b/src/scan.l index 0301494e2f..6b1c7bf138 100644 --- a/src/scan.l +++ b/src/scan.l @@ -48,7 +48,22 @@ extern YYLTYPE yylloc; // holds start line and column of token extern zeek::EnumType* cur_enum_type; // Track the @if... depth. -std::intptr_t current_depth = 0; +static std::intptr_t conditional_depth = 0; + +zeek::detail::int_list entry_cond_depth; // @if depth upon starting file + +// Tracks how many conditionals there have been. This value only +// increases. Its value is to support logic such as figuring out +// whether a function body has a conditional within it by comparing +// the epoch at the beginning of parsing the body with that at the end. +int conditional_epoch = 0; + +// Whether the current file has included conditionals (so far). +bool current_file_has_conditionals = false; + +// The files that include conditionals. Not currently used, but will be +// in the future once we add --optimize-files=/pat/. +std::unordered_set files_with_conditionals; zeek::detail::int_list if_stack; @@ -99,6 +114,18 @@ static std::string find_relative_script_file(const std::string& filename) return zeek::util::find_script_file(filename, zeek::util::zeek_path()); } +static void start_conditional() + { + ++conditional_depth; + ++conditional_epoch; + + if ( ! current_file_has_conditionals ) + // First time we've observed that this file includes conditionals. + files_with_conditionals.insert(::filename); + + current_file_has_conditionals = true; + } + class FileInfo { public: FileInfo(std::string restore_module = ""); @@ -418,11 +445,11 @@ when return TOK_WHEN; @ifdef return TOK_ATIFDEF; @ifndef return TOK_ATIFNDEF; @else return TOK_ATELSE; -@endif --current_depth; +@endif do_atendif(); -@if ++current_depth; -@ifdef ++current_depth; -@ifndef ++current_depth; +@if start_conditional(); +@ifdef start_conditional(); +@ifndef start_conditional(); @else return TOK_ATELSE; @endif return TOK_ATENDIF; [^@\r\n]+ /* eat */ @@ -639,17 +666,19 @@ static int load_files(const char* orig_file) zeek::detail::zeekygen_mgr->Script(file_path); - // "orig_file", could be an alias for yytext, which is ephemeral + // "orig_file" could be an alias for yytext, which is ephemeral // and will be zapped after the yy_switch_to_buffer() below. YY_BUFFER_STATE buffer; - if ( rc.first == 1 ) { + if ( rc.first == 1 ) + { // Parse code provided by plugin. assert(rc.second); DBG_LOG(zeek::DBG_SCRIPTS, "Loading %s from code supplied by plugin ", file_path.c_str()); buffer = yy_scan_bytes(rc.second->data(), rc.second->size()); // this copies the data } - else { + else + { // Parse from file. assert(f); DBG_LOG(zeek::DBG_SCRIPTS, "Loading %s", file_path.c_str()); @@ -663,6 +692,8 @@ static int load_files(const char* orig_file) // every Obj created when parsing it. yylloc.filename = filename = zeek::util::copy_string(file_path.c_str()); + entry_cond_depth.push_back(conditional_depth); + return 1; } @@ -693,9 +724,21 @@ public: std::vector local_names; }; +static void begin_ignoring() + { + if_stack.push_back(conditional_depth); + BEGIN(IGNORE); + } + +static void resume_processing() + { + if_stack.pop_back(); + BEGIN(INITIAL); + } + void do_atif(zeek::detail::Expr* expr) { - ++current_depth; + start_conditional(); LocalNameFinder cb; expr->Traverse(&cb); @@ -716,70 +759,52 @@ void do_atif(zeek::detail::Expr* expr) } if ( ! val->AsBool() ) - { - if_stack.push_back(current_depth); - BEGIN(IGNORE); - } + begin_ignoring(); } void do_atifdef(const char* id) { - ++current_depth; + start_conditional(); const auto& i = zeek::detail::lookup_ID(id, zeek::detail::current_module.c_str()); if ( ! i ) - { - if_stack.push_back(current_depth); - BEGIN(IGNORE); - } + begin_ignoring(); } void do_atifndef(const char *id) { - ++current_depth; + start_conditional(); const auto& i = zeek::detail::lookup_ID(id, zeek::detail::current_module.c_str()); if ( i ) - { - if_stack.push_back(current_depth); - BEGIN(IGNORE); - } + begin_ignoring(); } void do_atelse() { - if ( current_depth == 0 ) + if ( conditional_depth == 0 ) zeek::reporter->Error("@else without @if..."); - if ( ! if_stack.empty() && current_depth > if_stack.back() ) + if ( ! if_stack.empty() && conditional_depth > if_stack.back() ) return; if ( YY_START == INITIAL ) - { - if_stack.push_back(current_depth); - BEGIN(IGNORE); - } + begin_ignoring(); else - { - if_stack.pop_back(); - BEGIN(INITIAL); - } + resume_processing(); } void do_atendif() { - if ( current_depth == 0 ) + if ( conditional_depth <= entry_cond_depth.back() ) zeek::reporter->Error("unbalanced @if... @endif"); - if ( current_depth == if_stack.back() ) - { - BEGIN(INITIAL); - if_stack.pop_back(); - } + if ( ! if_stack.empty() && conditional_depth == if_stack.back() ) + resume_processing(); - --current_depth; + --conditional_depth; } // Be careful to never delete things from this list, as the strings @@ -840,7 +865,15 @@ void add_to_name_list(char* s, char delim, zeek::name_list& nl) int yywrap() { + if ( entry_cond_depth.size() > 0 ) + { + if ( conditional_depth > entry_cond_depth.back() ) + zeek::reporter->FatalError("unbalanced @if... @endif"); + entry_cond_depth.pop_back(); + } + last_filename = ::filename; + current_file_has_conditionals = false; if ( zeek::reporter->Errors() > 0 ) return 1; diff --git a/src/script_opt/StmtOptInfo.h b/src/script_opt/StmtOptInfo.h index 2edcb47e7e..449ecc0e61 100644 --- a/src/script_opt/StmtOptInfo.h +++ b/src/script_opt/StmtOptInfo.h @@ -22,6 +22,10 @@ public: // True if we observe that there is a branch out of the statement // to just beyond its extent, such as due to a "break". bool contains_branch_beyond = false; + + // Whether this statement is free of the possible influence + // of conditional code. + bool is_free_of_conditionals = true; }; } // namespace zeek::detail