diff --git a/CHANGES b/CHANGES index b88e7fb324..5938f8a746 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,15 @@ +4.2.0-dev.403 | 2021-12-01 10:25:32 -0700 + + * fix btest comment to more accurately describe the test (Vern Paxson, Corelight) + + * btests for erroneous script conditionals (Vern Paxson, Corelight) + + * avoid compiling-to-C++ for functions potentially influenced by conditionals (Vern Paxson, Corelight) + + * track the use of conditionals in functions and files (Vern Paxson, Corelight) + + * AST profiles track the associated function/body/expression (Vern Paxson, Corelight) + 4.2.0-dev.396 | 2021-12-01 09:44:03 -0700 * GH-1873: Deprecate the tag types differently to avoid type clashes (Tim Wojtulewicz, Corelight) diff --git a/VERSION b/VERSION index 6d817459fa..af5d9a7a37 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -4.2.0-dev.396 +4.2.0-dev.403 diff --git a/src/Var.cc b/src/Var.cc index 4067f17e4c..dc8a04251b 100644 --- a/src/Var.cc +++ b/src/Var.cc @@ -18,6 +18,7 @@ #include "zeek/Val.h" #include "zeek/module_util.h" #include "zeek/script_opt/ScriptOpt.h" +#include "zeek/script_opt/StmtOptInfo.h" namespace zeek::detail { @@ -717,7 +718,7 @@ TraversalCode OuterIDBindingFinder::PostExpr(const Expr* expr) static bool duplicate_ASTs = getenv("ZEEK_DUPLICATE_ASTS"); -void end_func(StmtPtr body) +void end_func(StmtPtr body, bool free_of_conditionals) { if ( duplicate_ASTs && reporter->Errors() == 0 ) // Only try duplication in the absence of errors. If errors @@ -729,6 +730,8 @@ void end_func(StmtPtr body) // by duplicating can itself be correctly duplicated. body = body->Duplicate()->Duplicate(); + body->GetOptInfo()->is_free_of_conditionals = free_of_conditionals; + auto ingredients = std::make_unique(pop_scope(), std::move(body)); if ( ingredients->id->HasVal() ) diff --git a/src/Var.h b/src/Var.h index 2950afa2bf..85f62f4d0f 100644 --- a/src/Var.h +++ b/src/Var.h @@ -45,7 +45,7 @@ extern void add_type(ID* id, TypePtr t, std::unique_ptr> at extern void begin_func(IDPtr id, const char* module_name, FunctionFlavor flavor, bool is_redef, FuncTypePtr t, std::unique_ptr> attrs = nullptr); -extern void end_func(StmtPtr body); +extern void end_func(StmtPtr body, bool free_of_conditionals); // Gather all IDs referenced inside a body that aren't part of a given scope. extern IDPList gather_outer_ids(ScopePtr scope, StmtPtr body); diff --git a/src/parse.y b/src/parse.y index d3d3710840..e7bf8fddac 100644 --- a/src/parse.y +++ b/src/parse.y @@ -105,6 +105,11 @@ extern const char* last_filename; // Absolute path of last file parsed. extern const char* last_tok_filename; extern const char* last_last_tok_filename; +extern int conditional_epoch; // let's us track embedded conditionals + +// Whether the file we're currently parsing includes @if conditionals. +extern bool current_file_has_conditionals; + YYLTYPE GetCurrentLocation(); extern int yyerror(const char[]); extern int brolex(); @@ -138,6 +143,7 @@ bool defining_global_ID = false; std::vector saved_in_init; static Location func_hdr_location; +static int func_hdr_cond_epoch = 0; EnumType* cur_enum_type = nullptr; static ID* cur_decl_type_id = nullptr; @@ -1214,16 +1220,19 @@ decl: zeekygen_mgr->Identifier(std::move(id)); } - | func_hdr { func_hdr_location = @1; } func_body - - | func_hdr { func_hdr_location = @1; } conditional_list func_body + | func_hdr + { + func_hdr_location = @1; + func_hdr_cond_epoch = conditional_epoch; + } + conditional_list func_body | conditional ; conditional_list: - conditional - | conditional conditional_list + | conditional_list conditional + ; conditional: TOK_ATIF '(' expr ')' @@ -1296,7 +1305,13 @@ func_body: '}' { set_location(func_hdr_location, @5); - end_func({AdoptRef{}, $3}); + + bool free_of_conditionals = true; + if ( current_file_has_conditionals || + conditional_epoch > func_hdr_cond_epoch ) + free_of_conditionals = false; + + end_func({AdoptRef{}, $3}, free_of_conditionals); } ; diff --git a/src/scan.l b/src/scan.l index 0301494e2f..6b1c7bf138 100644 --- a/src/scan.l +++ b/src/scan.l @@ -48,7 +48,22 @@ extern YYLTYPE yylloc; // holds start line and column of token extern zeek::EnumType* cur_enum_type; // Track the @if... depth. -std::intptr_t current_depth = 0; +static std::intptr_t conditional_depth = 0; + +zeek::detail::int_list entry_cond_depth; // @if depth upon starting file + +// Tracks how many conditionals there have been. This value only +// increases. Its value is to support logic such as figuring out +// whether a function body has a conditional within it by comparing +// the epoch at the beginning of parsing the body with that at the end. +int conditional_epoch = 0; + +// Whether the current file has included conditionals (so far). +bool current_file_has_conditionals = false; + +// The files that include conditionals. Not currently used, but will be +// in the future once we add --optimize-files=/pat/. +std::unordered_set files_with_conditionals; zeek::detail::int_list if_stack; @@ -99,6 +114,18 @@ static std::string find_relative_script_file(const std::string& filename) return zeek::util::find_script_file(filename, zeek::util::zeek_path()); } +static void start_conditional() + { + ++conditional_depth; + ++conditional_epoch; + + if ( ! current_file_has_conditionals ) + // First time we've observed that this file includes conditionals. + files_with_conditionals.insert(::filename); + + current_file_has_conditionals = true; + } + class FileInfo { public: FileInfo(std::string restore_module = ""); @@ -418,11 +445,11 @@ when return TOK_WHEN; @ifdef return TOK_ATIFDEF; @ifndef return TOK_ATIFNDEF; @else return TOK_ATELSE; -@endif --current_depth; +@endif do_atendif(); -@if ++current_depth; -@ifdef ++current_depth; -@ifndef ++current_depth; +@if start_conditional(); +@ifdef start_conditional(); +@ifndef start_conditional(); @else return TOK_ATELSE; @endif return TOK_ATENDIF; [^@\r\n]+ /* eat */ @@ -639,17 +666,19 @@ static int load_files(const char* orig_file) zeek::detail::zeekygen_mgr->Script(file_path); - // "orig_file", could be an alias for yytext, which is ephemeral + // "orig_file" could be an alias for yytext, which is ephemeral // and will be zapped after the yy_switch_to_buffer() below. YY_BUFFER_STATE buffer; - if ( rc.first == 1 ) { + if ( rc.first == 1 ) + { // Parse code provided by plugin. assert(rc.second); DBG_LOG(zeek::DBG_SCRIPTS, "Loading %s from code supplied by plugin ", file_path.c_str()); buffer = yy_scan_bytes(rc.second->data(), rc.second->size()); // this copies the data } - else { + else + { // Parse from file. assert(f); DBG_LOG(zeek::DBG_SCRIPTS, "Loading %s", file_path.c_str()); @@ -663,6 +692,8 @@ static int load_files(const char* orig_file) // every Obj created when parsing it. yylloc.filename = filename = zeek::util::copy_string(file_path.c_str()); + entry_cond_depth.push_back(conditional_depth); + return 1; } @@ -693,9 +724,21 @@ public: std::vector local_names; }; +static void begin_ignoring() + { + if_stack.push_back(conditional_depth); + BEGIN(IGNORE); + } + +static void resume_processing() + { + if_stack.pop_back(); + BEGIN(INITIAL); + } + void do_atif(zeek::detail::Expr* expr) { - ++current_depth; + start_conditional(); LocalNameFinder cb; expr->Traverse(&cb); @@ -716,70 +759,52 @@ void do_atif(zeek::detail::Expr* expr) } if ( ! val->AsBool() ) - { - if_stack.push_back(current_depth); - BEGIN(IGNORE); - } + begin_ignoring(); } void do_atifdef(const char* id) { - ++current_depth; + start_conditional(); const auto& i = zeek::detail::lookup_ID(id, zeek::detail::current_module.c_str()); if ( ! i ) - { - if_stack.push_back(current_depth); - BEGIN(IGNORE); - } + begin_ignoring(); } void do_atifndef(const char *id) { - ++current_depth; + start_conditional(); const auto& i = zeek::detail::lookup_ID(id, zeek::detail::current_module.c_str()); if ( i ) - { - if_stack.push_back(current_depth); - BEGIN(IGNORE); - } + begin_ignoring(); } void do_atelse() { - if ( current_depth == 0 ) + if ( conditional_depth == 0 ) zeek::reporter->Error("@else without @if..."); - if ( ! if_stack.empty() && current_depth > if_stack.back() ) + if ( ! if_stack.empty() && conditional_depth > if_stack.back() ) return; if ( YY_START == INITIAL ) - { - if_stack.push_back(current_depth); - BEGIN(IGNORE); - } + begin_ignoring(); else - { - if_stack.pop_back(); - BEGIN(INITIAL); - } + resume_processing(); } void do_atendif() { - if ( current_depth == 0 ) + if ( conditional_depth <= entry_cond_depth.back() ) zeek::reporter->Error("unbalanced @if... @endif"); - if ( current_depth == if_stack.back() ) - { - BEGIN(INITIAL); - if_stack.pop_back(); - } + if ( ! if_stack.empty() && conditional_depth == if_stack.back() ) + resume_processing(); - --current_depth; + --conditional_depth; } // Be careful to never delete things from this list, as the strings @@ -840,7 +865,15 @@ void add_to_name_list(char* s, char delim, zeek::name_list& nl) int yywrap() { + if ( entry_cond_depth.size() > 0 ) + { + if ( conditional_depth > entry_cond_depth.back() ) + zeek::reporter->FatalError("unbalanced @if... @endif"); + entry_cond_depth.pop_back(); + } + last_filename = ::filename; + current_file_has_conditionals = false; if ( zeek::reporter->Errors() > 0 ) return 1; diff --git a/src/script_opt/CPP/Util.cc b/src/script_opt/CPP/Util.cc index 1c64c9af21..8c6e6d0a91 100644 --- a/src/script_opt/CPP/Util.cc +++ b/src/script_opt/CPP/Util.cc @@ -6,6 +6,8 @@ #include #include +#include "zeek/script_opt/StmtOptInfo.h" + namespace zeek::detail { @@ -50,6 +52,14 @@ bool is_CPP_compilable(const ProfileFunc* pf, const char** reason) return false; } + auto body = pf->ProfiledBody(); + if ( body && ! body->GetOptInfo()->is_free_of_conditionals ) + { + if ( reason ) + *reason = "body may be affected by @if conditional"; + return false; + } + return true; } diff --git a/src/script_opt/ProfileFunc.cc b/src/script_opt/ProfileFunc.cc index da6802ff99..b7ccae6f68 100644 --- a/src/script_opt/ProfileFunc.cc +++ b/src/script_opt/ProfileFunc.cc @@ -60,12 +60,23 @@ p_hash_type script_specific_hash(const StmtPtr& body, p_hash_type generic_hash) ProfileFunc::ProfileFunc(const Func* func, const StmtPtr& body, bool _abs_rec_fields) { + profiled_func = func; + profiled_body = body.get(); abs_rec_fields = _abs_rec_fields; Profile(func->GetType().get(), body); } +ProfileFunc::ProfileFunc(const Stmt* s, bool _abs_rec_fields) + { + profiled_body = s; + abs_rec_fields = _abs_rec_fields; + s->Traverse(this); + } + ProfileFunc::ProfileFunc(const Expr* e, bool _abs_rec_fields) { + profiled_expr = e; + abs_rec_fields = _abs_rec_fields; if ( e->Tag() == EXPR_LAMBDA ) @@ -84,12 +95,6 @@ ProfileFunc::ProfileFunc(const Expr* e, bool _abs_rec_fields) e->Traverse(this); } -ProfileFunc::ProfileFunc(const Stmt* s, bool _abs_rec_fields) - { - abs_rec_fields = _abs_rec_fields; - s->Traverse(this); - } - void ProfileFunc::Profile(const FuncType* ft, const StmtPtr& body) { num_params = ft->Params()->NumFields(); diff --git a/src/script_opt/ProfileFunc.h b/src/script_opt/ProfileFunc.h index dbc275eb66..2ccb47f6d6 100644 --- a/src/script_opt/ProfileFunc.h +++ b/src/script_opt/ProfileFunc.h @@ -100,6 +100,12 @@ public: ProfileFunc(const Stmt* body, bool abs_rec_fields = false); ProfileFunc(const Expr* func, bool abs_rec_fields = false); + // Returns the function, body, or expression profiled. Each can be + // null depending on the constructor used. + const Func* ProfiledFunc() const { return profiled_func; } + const Stmt* ProfiledBody() const { return profiled_body; } + const Expr* ProfiledExpr() const { return profiled_expr; } + // See the comments for the associated member variables for each // of these accessors. const std::unordered_set& Globals() const { return globals; } @@ -157,6 +163,12 @@ protected: // Take note of an assignment to an identifier. void TrackAssignment(const ID* id); + // The function, body, or expression profiled. Can be null + // depending on which constructor was used. + const Func* profiled_func = nullptr; + const Stmt* profiled_body = nullptr; + const Expr* profiled_expr = nullptr; + // Globals seen in the function. // // Does *not* include globals solely seen as the function being diff --git a/src/script_opt/StmtOptInfo.h b/src/script_opt/StmtOptInfo.h index 2edcb47e7e..449ecc0e61 100644 --- a/src/script_opt/StmtOptInfo.h +++ b/src/script_opt/StmtOptInfo.h @@ -22,6 +22,10 @@ public: // True if we observe that there is a branch out of the statement // to just beyond its extent, such as due to a "break". bool contains_branch_beyond = false; + + // Whether this statement is free of the possible influence + // of conditional code. + bool is_free_of_conditionals = true; }; } // namespace zeek::detail diff --git a/testing/btest/Baseline/language.dangling-at/.stderr b/testing/btest/Baseline/language.dangling-at/.stderr new file mode 100644 index 0000000000..dac4eed6f6 --- /dev/null +++ b/testing/btest/Baseline/language.dangling-at/.stderr @@ -0,0 +1,2 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +fatal error in <...>/dangling-at.zeek, line 9: unbalanced @if... @endif diff --git a/testing/btest/Baseline/language.orphan-endif/.stderr b/testing/btest/Baseline/language.orphan-endif/.stderr new file mode 100644 index 0000000000..087a9dd1ed --- /dev/null +++ b/testing/btest/Baseline/language.orphan-endif/.stderr @@ -0,0 +1,2 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +error in <...>/orphan-endif.zeek, line 8: unbalanced @if... @endif diff --git a/testing/btest/language/dangling-at.zeek b/testing/btest/language/dangling-at.zeek new file mode 100644 index 0000000000..f8e7eceef7 --- /dev/null +++ b/testing/btest/language/dangling-at.zeek @@ -0,0 +1,8 @@ +# @TEST-EXEC-FAIL: zeek -b %INPUT +# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-remove-abspath btest-diff .stderr +# Check that dangling conditionals are detected. + +@if ( 1==1 ) +print "it's true!"; +@else +lalala diff --git a/testing/btest/language/orphan-endif.zeek b/testing/btest/language/orphan-endif.zeek new file mode 100644 index 0000000000..ecead2e6a2 --- /dev/null +++ b/testing/btest/language/orphan-endif.zeek @@ -0,0 +1,9 @@ +# @TEST-EXEC-FAIL: zeek -b %INPUT +# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-remove-abspath btest-diff .stderr +# Check that orphan endif's are detected. + +@if ( T ) +print "so far, so good"; +@endif +@endif +print "whoops!";