// See the file "COPYING" in the main distribution directory for copyright. #include "zeek/script_opt/GenIDDefs.h" #include "zeek/Desc.h" #include "zeek/Expr.h" #include "zeek/Reporter.h" #include "zeek/Scope.h" #include "zeek/script_opt/Expr.h" #include "zeek/script_opt/ScriptOpt.h" #include "zeek/script_opt/StmtOptInfo.h" namespace zeek::detail { GenIDDefs::GenIDDefs(std::shared_ptr _pf, const FuncPtr& f, ScopePtr scope, StmtPtr body) : pf(std::move(_pf)) { TraverseFunction(f, scope, body); } void GenIDDefs::TraverseFunction(const FuncPtr& f, ScopePtr scope, StmtPtr body) { func_flavor = f->Flavor(); // Establish the outermost set of identifiers. modified_IDs.emplace_back(); for ( const auto& g : pf->Globals() ) { g->GetOptInfo()->Clear(); TrackID(g); } // Clear the locals before processing the arguments, since // they're included among the locals. for ( const auto& l : pf->Locals() ) l->GetOptInfo()->Clear(); const auto& args = scope->OrderedVars(); int nparam = f->GetType()->Params()->NumFields(); for ( const auto& a : args ) { if ( --nparam < 0 ) break; a->GetOptInfo()->Clear(); TrackID(a); } stmt_num = 0; // 0 = "before the first statement" body->Traverse(this); } TraversalCode GenIDDefs::PreStmt(const Stmt* s) { last_stmt_traversed = s; auto si = s->GetOptInfo(); si->stmt_num = ++stmt_num; si->block_level = confluence_blocks.size() + 1; switch ( s->Tag() ) { case STMT_CATCH_RETURN: { auto cr = s->AsCatchReturnStmt(); auto block = cr->Block(); cr_active.push_back(confluence_blocks.size()); // Confluence for the bodies of catch-return's is a bit complex. // We would like any expressions computed at the outermost level // of the body to be available for script optimization *outside* // the catch-return; this in particular is helpful in optimizing // coalesced event handlers, but has other benefits as well. // // However, if one of the outermost statements executes a "return", // then any outermost expressions computed after it might not // be available. Put another way, the potentially-returning // statement starts a confluence region that runs through the end // of the body. // // To deal with this, we start off without a new confluence block, // but create one upon encountering a statement that could return. bool did_confluence = false; if ( block->Tag() == STMT_LIST ) { auto prev_stmt = s; auto& stmts = block->AsStmtList()->Stmts(); for ( auto& st : stmts ) { if ( ! did_confluence && st->CouldReturn(false) ) { StartConfluenceBlock(prev_stmt); did_confluence = true; } st->Traverse(this); } } else // If there's just a single statement then there are no // expressions computed subsequent to it that we need to // worry about, so just do ordinary traversal. block->Traverse(this); if ( did_confluence ) EndConfluenceBlock(); cr_active.pop_back(); auto retvar = cr->RetVar(); if ( retvar ) TrackID(retvar->Id()); return TC_ABORTSTMT; } case STMT_IF: { auto i = s->AsIfStmt(); auto cond = i->StmtExpr(); auto t_branch = i->TrueBranch(); auto f_branch = i->FalseBranch(); cond->Traverse(this); StartConfluenceBlock(s); t_branch->Traverse(this); if ( ! t_branch->NoFlowAfter(false) ) BranchBeyond(last_stmt_traversed, s, true); f_branch->Traverse(this); if ( ! f_branch->NoFlowAfter(false) ) BranchBeyond(last_stmt_traversed, s, true); EndConfluenceBlock(true); return TC_ABORTSTMT; } case STMT_SWITCH: AnalyzeSwitch(s->AsSwitchStmt()); return TC_ABORTSTMT; case STMT_FOR: { auto f = s->AsForStmt(); auto ids = f->LoopVars(); auto e = f->LoopExpr(); auto body = f->LoopBody(); auto val_var = f->ValueVar(); e->Traverse(this); for ( const auto& id : *ids ) TrackID(id); if ( val_var ) TrackID(val_var); StartConfluenceBlock(s); body->Traverse(this); if ( ! body->NoFlowAfter(false) ) BranchBackTo(last_stmt_traversed, s, true); EndConfluenceBlock(); return TC_ABORTSTMT; } case STMT_WHILE: { auto w = s->AsWhileStmt(); StartConfluenceBlock(s); auto cond_pred_stmt = w->CondPredStmt(); if ( cond_pred_stmt ) cond_pred_stmt->Traverse(this); // Important to traverse the condition in its version // interpreted as a statement, so that when evaluating // its variable usage, that's done in the context of // *after* cond_pred_stmt executes, rather than as // part of that execution. auto cond_stmt = w->ConditionAsStmt(); cond_stmt->Traverse(this); auto body = w->Body(); body->Traverse(this); if ( ! body->NoFlowAfter(false) ) BranchBackTo(last_stmt_traversed, s, true); EndConfluenceBlock(); return TC_ABORTSTMT; } default: return TC_CONTINUE; } } void GenIDDefs::AnalyzeSwitch(const SwitchStmt* sw) { sw->StmtExpr()->Traverse(this); for ( const auto& c : *sw->Cases() ) { // Important: the confluence block is the switch statement // itself, not the case body. This is needed so that variable // assignments made inside case bodies that end with // "fallthrough" are correctly propagated to the next case // body. StartConfluenceBlock(sw); auto body = c->Body(); auto exprs = c->ExprCases(); if ( exprs ) exprs->Traverse(this); auto type_ids = c->TypeCases(); if ( type_ids ) { for ( const auto& id : *type_ids ) if ( id->Name() ) TrackID(id); } body->Traverse(this); EndConfluenceBlock(false); } } TraversalCode GenIDDefs::PostStmt(const Stmt* s) { switch ( s->Tag() ) { case STMT_INIT: { auto init = s->AsInitStmt(); auto& inits = init->Inits(); for ( const auto& id : inits ) { auto id_t = id->GetType(); // Only aggregates get initialized. if ( zeek::IsAggr(id->GetType()->Tag()) ) TrackID(id); } break; } case STMT_RETURN: ReturnAt(s); break; case STMT_NEXT: BranchBackTo(last_stmt_traversed, FindLoop(), false); break; case STMT_BREAK: { auto target = FindBreakTarget(); if ( target ) BranchBeyond(s, target, false); else { ASSERT(func_flavor == FUNC_FLAVOR_HOOK); ReturnAt(s); } break; } case STMT_FALLTHROUGH: // No need to do anything, the work all occurs // with NoFlowAfter. break; default: break; } return TC_CONTINUE; } TraversalCode GenIDDefs::PreExpr(const Expr* e) { e->GetOptInfo()->stmt_num = stmt_num; switch ( e->Tag() ) { case EXPR_NAME: CheckVarUsage(e, e->AsNameExpr()->Id()); break; case EXPR_ASSIGN: { auto lhs = e->GetOp1(); auto op2 = e->GetOp2(); if ( lhs->Tag() == EXPR_LIST && op2->GetType()->Tag() != TYPE_ANY ) { // This combination occurs only for assignments used // to initialize table entries. Treat it as references // to both the lhs and the rhs, not as an assignment. return TC_CONTINUE; } op2->Traverse(this); if ( ! CheckLHS(lhs, op2) ) // Not a simple assignment (or group of assignments), // so analyze the accesses to check for use of // possibly undefined values. lhs->Traverse(this); return TC_ABORTSTMT; } case EXPR_COND: // Special hack. We turn off checking for usage issues // inside conditionals. This is because we use them heavily // to deconstruct logical expressions for which the actual // operand access is safe (guaranteed not to access a value // that hasn't been undefined), but the flow analysis has // trouble determining that. ++suppress_usage; e->GetOp1()->Traverse(this); e->GetOp2()->Traverse(this); e->GetOp3()->Traverse(this); --suppress_usage; return TC_ABORTSTMT; case EXPR_LAMBDA: { auto l = static_cast(e); const auto& ids = l->OuterIDs(); for ( auto& id : ids ) CheckVarUsage(e, id); // Don't descend into the lambda body - we'll analyze and // optimize it separately, as its own function. return TC_ABORTSTMT; } default: break; } return TC_CONTINUE; } TraversalCode GenIDDefs::PostExpr(const Expr* e) { // Attend to expressions that reflect assignments after // execution, but for which the assignment target was // also an accessed value (so if we analyzed them // in PreExpr then we'd have had to do manual traversals // of their operands). auto t = e->Tag(); if ( t == EXPR_INCR || t == EXPR_DECR || t == EXPR_ADD_TO || t == EXPR_REMOVE_FROM ) { auto op = e->GetOp1(); if ( ! IsAggr(op) ) (void)CheckLHS(op); } return TC_CONTINUE; } bool GenIDDefs::CheckLHS(const ExprPtr& lhs, const ExprPtr& rhs) { switch ( lhs->Tag() ) { case EXPR_REF: return CheckLHS(lhs->GetOp1(), rhs); case EXPR_NAME: { auto n = lhs->AsNameExpr(); TrackID(n->Id(), rhs); return true; } case EXPR_LIST: { // look for [a, b, c] = any_val auto l = lhs->AsListExpr(); for ( const auto& expr : l->Exprs() ) { if ( expr->Tag() != EXPR_NAME ) // This will happen for table initializers, // for example. return false; auto n = expr->AsNameExpr(); TrackID(n->Id()); } return true; } case EXPR_FIELD: // If we want to track record field initializations, // we'd handle that here. return false; case EXPR_INDEX: // If we wanted to track potential alterations of // aggregates, we'd do that here. return false; default: reporter->InternalError("bad tag in GenIDDefs::CheckLHS"); } } bool GenIDDefs::IsAggr(const Expr* e) const { if ( e->Tag() != EXPR_NAME ) return false; auto n = e->AsNameExpr(); auto id = n->Id(); auto tag = id->GetType()->Tag(); return zeek::IsAggr(tag); } void GenIDDefs::CheckVarUsage(const Expr* e, const ID* id) { if ( analysis_options.usage_issues != 1 || id->IsGlobal() || suppress_usage > 0 ) return; auto oi = id->GetOptInfo(); if ( ! oi->DidUndefinedWarning() && ! oi->IsDefinedBefore(last_stmt_traversed) && ! id->GetAttr(ATTR_IS_ASSIGNED) ) { if ( ! oi->IsPossiblyDefinedBefore(last_stmt_traversed) ) { e->Warn("used without definition"); oi->SetDidUndefinedWarning(); } else if ( ! oi->DidPossiblyUndefinedWarning() ) { e->Warn("possibly used without definition"); oi->SetDidPossiblyUndefinedWarning(); } } } void GenIDDefs::StartConfluenceBlock(const Stmt* s) { confluence_blocks.push_back(s); modified_IDs.emplace_back(); } void GenIDDefs::EndConfluenceBlock(bool no_orig) { for ( auto id : modified_IDs.back() ) id->GetOptInfo()->ConfluenceBlockEndsAfter(last_stmt_traversed, no_orig); confluence_blocks.pop_back(); modified_IDs.pop_back(); } void GenIDDefs::BranchBackTo(const Stmt* from, const Stmt* to, bool close_all) { for ( auto id : modified_IDs.back() ) id->GetOptInfo()->BranchBackTo(from, to, close_all); } void GenIDDefs::BranchBeyond(const Stmt* from, const Stmt* to, bool close_all) { for ( auto id : modified_IDs.back() ) id->GetOptInfo()->BranchBeyond(from, to, close_all); to->GetOptInfo()->contains_branch_beyond = true; } const Stmt* GenIDDefs::FindLoop() { int i = confluence_blocks.size() - 1; while ( i >= 0 ) { auto t = confluence_blocks[i]->Tag(); if ( t == STMT_WHILE || t == STMT_FOR ) break; --i; } ASSERT(i >= 0); return confluence_blocks[i]; } const Stmt* GenIDDefs::FindBreakTarget() { int i = confluence_blocks.size() - 1; while ( i >= 0 ) { auto cb = confluence_blocks[i]; auto t = cb->Tag(); if ( t == STMT_WHILE || t == STMT_FOR || t == STMT_SWITCH ) return cb; --i; } return nullptr; } void GenIDDefs::ReturnAt(const Stmt* s) { // If we're right at a catch-return then we don't want to make the // identifier as encountering a scope-ending "return" here. By avoiding // that, we're able to do optimization across catch-return blocks. if ( cr_active.empty() || cr_active.back() != confluence_blocks.size() ) for ( auto id : modified_IDs.back() ) id->GetOptInfo()->ReturnAt(s); } void GenIDDefs::TrackID(const ID* id, const ExprPtr& e) { auto oi = id->GetOptInfo(); // The 4th argument here is hardwired to 0, meaning "assess across all // confluence blocks". If we want definitions inside catch-return bodies // to not propagate outside those bodies, we'd instead create new // confluence blocks for catch-return statements, and use their identifier // here to set the lowest limit for definitions. For now we leave // DefinedAfter as capable of supporting that distinction in case we // find need to revive it in the future. oi->DefinedAfter(last_stmt_traversed, e, confluence_blocks, 0); // Ensure we track this identifier across all relevant // confluence regions. for ( auto i = 0U; i < confluence_blocks.size(); ++i ) // Add one because modified_IDs includes outer non-confluence // block. modified_IDs[i + 1].insert(id); if ( confluence_blocks.empty() ) // This is a definition at the outermost level. modified_IDs[0].insert(id); } } // namespace zeek::detail