mirror of
https://github.com/zeek/zeek.git
synced 2025-10-11 02:58:20 +00:00
507 lines
15 KiB
C++
507 lines
15 KiB
C++
// See the file "COPYING" in the main distribution directory for copyright.
|
|
|
|
#include "zeek/script_opt/GenIDDefs.h"
|
|
|
|
#include "zeek/Desc.h"
|
|
#include "zeek/Expr.h"
|
|
#include "zeek/Reporter.h"
|
|
#include "zeek/Scope.h"
|
|
#include "zeek/script_opt/Expr.h"
|
|
#include "zeek/script_opt/ScriptOpt.h"
|
|
#include "zeek/script_opt/StmtOptInfo.h"
|
|
|
|
namespace zeek::detail {
|
|
|
|
GenIDDefs::GenIDDefs(std::shared_ptr<ProfileFunc> _pf, const FuncPtr& f, ScopePtr scope, StmtPtr body)
|
|
: pf(std::move(_pf)) {
|
|
TraverseFunction(f, scope, body);
|
|
}
|
|
|
|
void GenIDDefs::TraverseFunction(const FuncPtr& f, ScopePtr scope, StmtPtr body) {
|
|
func_flavor = f->Flavor();
|
|
|
|
// Establish the outermost set of identifiers.
|
|
modified_IDs.emplace_back();
|
|
|
|
for ( const auto& g : pf->Globals() ) {
|
|
g->GetOptInfo()->Clear();
|
|
TrackID(g);
|
|
}
|
|
|
|
// Clear the locals before processing the arguments, since
|
|
// they're included among the locals.
|
|
for ( const auto& l : pf->Locals() )
|
|
l->GetOptInfo()->Clear();
|
|
|
|
const auto& args = scope->OrderedVars();
|
|
int nparam = f->GetType()->Params()->NumFields();
|
|
|
|
for ( const auto& a : args ) {
|
|
if ( --nparam < 0 )
|
|
break;
|
|
|
|
a->GetOptInfo()->Clear();
|
|
TrackID(a);
|
|
}
|
|
|
|
stmt_num = 0; // 0 = "before the first statement"
|
|
|
|
body->Traverse(this);
|
|
}
|
|
|
|
TraversalCode GenIDDefs::PreStmt(const Stmt* s) {
|
|
last_stmt_traversed = s;
|
|
|
|
auto si = s->GetOptInfo();
|
|
si->stmt_num = ++stmt_num;
|
|
si->block_level = confluence_blocks.size() + 1;
|
|
|
|
switch ( s->Tag() ) {
|
|
case STMT_CATCH_RETURN: {
|
|
auto cr = s->AsCatchReturnStmt();
|
|
auto block = cr->Block();
|
|
|
|
cr_active.push_back(confluence_blocks.size());
|
|
|
|
// Confluence for the bodies of catch-return's is a bit complex.
|
|
// We would like any expressions computed at the outermost level
|
|
// of the body to be available for script optimization *outside*
|
|
// the catch-return; this in particular is helpful in optimizing
|
|
// coalesced event handlers, but has other benefits as well.
|
|
//
|
|
// However, if one of the outermost statements executes a "return",
|
|
// then any outermost expressions computed after it might not
|
|
// be available. Put another way, the potentially-returning
|
|
// statement starts a confluence region that runs through the end
|
|
// of the body.
|
|
//
|
|
// To deal with this, we start off without a new confluence block,
|
|
// but create one upon encountering a statement that could return.
|
|
|
|
bool did_confluence = false;
|
|
|
|
if ( block->Tag() == STMT_LIST ) {
|
|
auto prev_stmt = s;
|
|
auto& stmts = block->AsStmtList()->Stmts();
|
|
for ( auto& st : stmts ) {
|
|
if ( ! did_confluence && st->CouldReturn(false) ) {
|
|
StartConfluenceBlock(prev_stmt);
|
|
did_confluence = true;
|
|
}
|
|
|
|
st->Traverse(this);
|
|
}
|
|
}
|
|
else
|
|
// If there's just a single statement then there are no
|
|
// expressions computed subsequent to it that we need to
|
|
// worry about, so just do ordinary traversal.
|
|
block->Traverse(this);
|
|
|
|
if ( did_confluence )
|
|
EndConfluenceBlock();
|
|
|
|
cr_active.pop_back();
|
|
|
|
auto retvar = cr->RetVar();
|
|
if ( retvar )
|
|
TrackID(retvar->Id());
|
|
|
|
return TC_ABORTSTMT;
|
|
}
|
|
|
|
case STMT_IF: {
|
|
auto i = s->AsIfStmt();
|
|
auto cond = i->StmtExpr();
|
|
auto t_branch = i->TrueBranch();
|
|
auto f_branch = i->FalseBranch();
|
|
|
|
cond->Traverse(this);
|
|
|
|
StartConfluenceBlock(s);
|
|
|
|
t_branch->Traverse(this);
|
|
if ( ! t_branch->NoFlowAfter(false) )
|
|
BranchBeyond(last_stmt_traversed, s, true);
|
|
|
|
f_branch->Traverse(this);
|
|
if ( ! f_branch->NoFlowAfter(false) )
|
|
BranchBeyond(last_stmt_traversed, s, true);
|
|
|
|
EndConfluenceBlock(true);
|
|
|
|
return TC_ABORTSTMT;
|
|
}
|
|
|
|
case STMT_SWITCH: AnalyzeSwitch(s->AsSwitchStmt()); return TC_ABORTSTMT;
|
|
|
|
case STMT_FOR: {
|
|
auto f = s->AsForStmt();
|
|
|
|
auto ids = f->LoopVars();
|
|
auto e = f->LoopExpr();
|
|
auto body = f->LoopBody();
|
|
auto val_var = f->ValueVar();
|
|
|
|
e->Traverse(this);
|
|
|
|
for ( const auto& id : *ids )
|
|
TrackID(id);
|
|
|
|
if ( val_var )
|
|
TrackID(val_var);
|
|
|
|
StartConfluenceBlock(s);
|
|
body->Traverse(this);
|
|
|
|
if ( ! body->NoFlowAfter(false) )
|
|
BranchBackTo(last_stmt_traversed, s, true);
|
|
|
|
EndConfluenceBlock();
|
|
|
|
return TC_ABORTSTMT;
|
|
}
|
|
|
|
case STMT_WHILE: {
|
|
auto w = s->AsWhileStmt();
|
|
|
|
StartConfluenceBlock(s);
|
|
|
|
auto cond_pred_stmt = w->CondPredStmt();
|
|
if ( cond_pred_stmt )
|
|
cond_pred_stmt->Traverse(this);
|
|
|
|
// Important to traverse the condition in its version
|
|
// interpreted as a statement, so that when evaluating
|
|
// its variable usage, that's done in the context of
|
|
// *after* cond_pred_stmt executes, rather than as
|
|
// part of that execution.
|
|
auto cond_stmt = w->ConditionAsStmt();
|
|
cond_stmt->Traverse(this);
|
|
|
|
auto body = w->Body();
|
|
body->Traverse(this);
|
|
|
|
if ( ! body->NoFlowAfter(false) )
|
|
BranchBackTo(last_stmt_traversed, s, true);
|
|
|
|
EndConfluenceBlock();
|
|
|
|
return TC_ABORTSTMT;
|
|
}
|
|
|
|
default: return TC_CONTINUE;
|
|
}
|
|
}
|
|
|
|
void GenIDDefs::AnalyzeSwitch(const SwitchStmt* sw) {
|
|
sw->StmtExpr()->Traverse(this);
|
|
|
|
for ( const auto& c : *sw->Cases() ) {
|
|
// Important: the confluence block is the switch statement
|
|
// itself, not the case body. This is needed so that variable
|
|
// assignments made inside case bodies that end with
|
|
// "fallthrough" are correctly propagated to the next case
|
|
// body.
|
|
StartConfluenceBlock(sw);
|
|
|
|
auto body = c->Body();
|
|
|
|
auto exprs = c->ExprCases();
|
|
if ( exprs )
|
|
exprs->Traverse(this);
|
|
|
|
auto type_ids = c->TypeCases();
|
|
if ( type_ids ) {
|
|
for ( const auto& id : *type_ids )
|
|
if ( id->Name() )
|
|
TrackID(id);
|
|
}
|
|
|
|
body->Traverse(this);
|
|
EndConfluenceBlock(false);
|
|
}
|
|
}
|
|
|
|
TraversalCode GenIDDefs::PostStmt(const Stmt* s) {
|
|
switch ( s->Tag() ) {
|
|
case STMT_INIT: {
|
|
auto init = s->AsInitStmt();
|
|
auto& inits = init->Inits();
|
|
|
|
for ( const auto& id : inits ) {
|
|
auto id_t = id->GetType();
|
|
|
|
// Only aggregates get initialized.
|
|
if ( zeek::IsAggr(id->GetType()->Tag()) )
|
|
TrackID(id);
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
case STMT_RETURN: ReturnAt(s); break;
|
|
|
|
case STMT_NEXT: BranchBackTo(last_stmt_traversed, FindLoop(), false); break;
|
|
|
|
case STMT_BREAK: {
|
|
auto target = FindBreakTarget();
|
|
|
|
if ( target )
|
|
BranchBeyond(s, target, false);
|
|
|
|
else {
|
|
ASSERT(func_flavor == FUNC_FLAVOR_HOOK);
|
|
ReturnAt(s);
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
case STMT_FALLTHROUGH:
|
|
// No need to do anything, the work all occurs
|
|
// with NoFlowAfter.
|
|
break;
|
|
|
|
default: break;
|
|
}
|
|
|
|
return TC_CONTINUE;
|
|
}
|
|
|
|
TraversalCode GenIDDefs::PreExpr(const Expr* e) {
|
|
e->GetOptInfo()->stmt_num = stmt_num;
|
|
|
|
switch ( e->Tag() ) {
|
|
case EXPR_NAME: CheckVarUsage(e, e->AsNameExpr()->Id()); break;
|
|
|
|
case EXPR_ASSIGN: {
|
|
auto lhs = e->GetOp1();
|
|
auto op2 = e->GetOp2();
|
|
|
|
if ( lhs->Tag() == EXPR_LIST && op2->GetType()->Tag() != TYPE_ANY ) {
|
|
// This combination occurs only for assignments used
|
|
// to initialize table entries. Treat it as references
|
|
// to both the lhs and the rhs, not as an assignment.
|
|
return TC_CONTINUE;
|
|
}
|
|
|
|
op2->Traverse(this);
|
|
|
|
if ( ! CheckLHS(lhs, op2) )
|
|
// Not a simple assignment (or group of assignments),
|
|
// so analyze the accesses to check for use of
|
|
// possibly undefined values.
|
|
lhs->Traverse(this);
|
|
|
|
return TC_ABORTSTMT;
|
|
}
|
|
|
|
case EXPR_COND:
|
|
// Special hack. We turn off checking for usage issues
|
|
// inside conditionals. This is because we use them heavily
|
|
// to deconstruct logical expressions for which the actual
|
|
// operand access is safe (guaranteed not to access a value
|
|
// that hasn't been undefined), but the flow analysis has
|
|
// trouble determining that.
|
|
++suppress_usage;
|
|
e->GetOp1()->Traverse(this);
|
|
e->GetOp2()->Traverse(this);
|
|
e->GetOp3()->Traverse(this);
|
|
--suppress_usage;
|
|
|
|
return TC_ABORTSTMT;
|
|
|
|
case EXPR_LAMBDA: {
|
|
auto l = static_cast<const LambdaExpr*>(e);
|
|
const auto& ids = l->OuterIDs();
|
|
|
|
for ( auto& id : ids )
|
|
CheckVarUsage(e, id);
|
|
|
|
// Don't descend into the lambda body - we'll analyze and
|
|
// optimize it separately, as its own function.
|
|
return TC_ABORTSTMT;
|
|
}
|
|
|
|
default: break;
|
|
}
|
|
|
|
return TC_CONTINUE;
|
|
}
|
|
|
|
TraversalCode GenIDDefs::PostExpr(const Expr* e) {
|
|
// Attend to expressions that reflect assignments after
|
|
// execution, but for which the assignment target was
|
|
// also an accessed value (so if we analyzed them
|
|
// in PreExpr then we'd have had to do manual traversals
|
|
// of their operands).
|
|
|
|
auto t = e->Tag();
|
|
if ( t == EXPR_INCR || t == EXPR_DECR || t == EXPR_ADD_TO || t == EXPR_REMOVE_FROM ) {
|
|
auto op = e->GetOp1();
|
|
if ( ! IsAggr(op) )
|
|
(void)CheckLHS(op);
|
|
}
|
|
|
|
return TC_CONTINUE;
|
|
}
|
|
|
|
bool GenIDDefs::CheckLHS(const ExprPtr& lhs, const ExprPtr& rhs) {
|
|
switch ( lhs->Tag() ) {
|
|
case EXPR_REF: return CheckLHS(lhs->GetOp1(), rhs);
|
|
|
|
case EXPR_NAME: {
|
|
auto n = lhs->AsNameExpr();
|
|
TrackID(n->Id(), rhs);
|
|
return true;
|
|
}
|
|
|
|
case EXPR_LIST: { // look for [a, b, c] = any_val
|
|
auto l = lhs->AsListExpr();
|
|
for ( const auto& expr : l->Exprs() ) {
|
|
if ( expr->Tag() != EXPR_NAME )
|
|
// This will happen for table initializers,
|
|
// for example.
|
|
return false;
|
|
|
|
auto n = expr->AsNameExpr();
|
|
TrackID(n->Id());
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
case EXPR_FIELD:
|
|
// If we want to track record field initializations,
|
|
// we'd handle that here.
|
|
return false;
|
|
|
|
case EXPR_INDEX:
|
|
// If we wanted to track potential alterations of
|
|
// aggregates, we'd do that here.
|
|
return false;
|
|
|
|
default: reporter->InternalError("bad tag in GenIDDefs::CheckLHS");
|
|
}
|
|
}
|
|
|
|
bool GenIDDefs::IsAggr(const Expr* e) const {
|
|
if ( e->Tag() != EXPR_NAME )
|
|
return false;
|
|
|
|
auto n = e->AsNameExpr();
|
|
auto id = n->Id();
|
|
auto tag = id->GetType()->Tag();
|
|
|
|
return zeek::IsAggr(tag);
|
|
}
|
|
|
|
void GenIDDefs::CheckVarUsage(const Expr* e, const ID* id) {
|
|
if ( analysis_options.usage_issues != 1 || id->IsGlobal() || suppress_usage > 0 )
|
|
return;
|
|
|
|
auto oi = id->GetOptInfo();
|
|
|
|
if ( ! oi->DidUndefinedWarning() && ! oi->IsDefinedBefore(last_stmt_traversed) &&
|
|
! id->GetAttr(ATTR_IS_ASSIGNED) ) {
|
|
if ( ! oi->IsPossiblyDefinedBefore(last_stmt_traversed) ) {
|
|
e->Warn("used without definition");
|
|
oi->SetDidUndefinedWarning();
|
|
}
|
|
|
|
else if ( ! oi->DidPossiblyUndefinedWarning() ) {
|
|
e->Warn("possibly used without definition");
|
|
oi->SetDidPossiblyUndefinedWarning();
|
|
}
|
|
}
|
|
}
|
|
|
|
void GenIDDefs::StartConfluenceBlock(const Stmt* s) {
|
|
confluence_blocks.push_back(s);
|
|
modified_IDs.emplace_back();
|
|
}
|
|
|
|
void GenIDDefs::EndConfluenceBlock(bool no_orig) {
|
|
for ( auto id : modified_IDs.back() )
|
|
id->GetOptInfo()->ConfluenceBlockEndsAfter(last_stmt_traversed, no_orig);
|
|
|
|
confluence_blocks.pop_back();
|
|
modified_IDs.pop_back();
|
|
}
|
|
|
|
void GenIDDefs::BranchBackTo(const Stmt* from, const Stmt* to, bool close_all) {
|
|
for ( auto id : modified_IDs.back() )
|
|
id->GetOptInfo()->BranchBackTo(from, to, close_all);
|
|
}
|
|
|
|
void GenIDDefs::BranchBeyond(const Stmt* from, const Stmt* to, bool close_all) {
|
|
for ( auto id : modified_IDs.back() )
|
|
id->GetOptInfo()->BranchBeyond(from, to, close_all);
|
|
|
|
to->GetOptInfo()->contains_branch_beyond = true;
|
|
}
|
|
|
|
const Stmt* GenIDDefs::FindLoop() {
|
|
int i = confluence_blocks.size() - 1;
|
|
while ( i >= 0 ) {
|
|
auto t = confluence_blocks[i]->Tag();
|
|
if ( t == STMT_WHILE || t == STMT_FOR )
|
|
break;
|
|
|
|
--i;
|
|
}
|
|
|
|
ASSERT(i >= 0);
|
|
|
|
return confluence_blocks[i];
|
|
}
|
|
|
|
const Stmt* GenIDDefs::FindBreakTarget() {
|
|
int i = confluence_blocks.size() - 1;
|
|
while ( i >= 0 ) {
|
|
auto cb = confluence_blocks[i];
|
|
auto t = cb->Tag();
|
|
if ( t == STMT_WHILE || t == STMT_FOR || t == STMT_SWITCH )
|
|
return cb;
|
|
|
|
--i;
|
|
}
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
void GenIDDefs::ReturnAt(const Stmt* s) {
|
|
// If we're right at a catch-return then we don't want to make the
|
|
// identifier as encountering a scope-ending "return" here. By avoiding
|
|
// that, we're able to do optimization across catch-return blocks.
|
|
if ( cr_active.empty() || cr_active.back() != confluence_blocks.size() )
|
|
for ( auto id : modified_IDs.back() )
|
|
id->GetOptInfo()->ReturnAt(s);
|
|
}
|
|
|
|
void GenIDDefs::TrackID(const ID* id, const ExprPtr& e) {
|
|
auto oi = id->GetOptInfo();
|
|
|
|
// The 4th argument here is hardwired to 0, meaning "assess across all
|
|
// confluence blocks". If we want definitions inside catch-return bodies
|
|
// to not propagate outside those bodies, we'd instead create new
|
|
// confluence blocks for catch-return statements, and use their identifier
|
|
// here to set the lowest limit for definitions. For now we leave
|
|
// DefinedAfter as capable of supporting that distinction in case we
|
|
// find need to revive it in the future.
|
|
oi->DefinedAfter(last_stmt_traversed, e, confluence_blocks, 0);
|
|
|
|
// Ensure we track this identifier across all relevant
|
|
// confluence regions.
|
|
for ( auto i = 0U; i < confluence_blocks.size(); ++i )
|
|
// Add one because modified_IDs includes outer non-confluence
|
|
// block.
|
|
modified_IDs[i + 1].insert(id);
|
|
|
|
if ( confluence_blocks.empty() )
|
|
// This is a definition at the outermost level.
|
|
modified_IDs[0].insert(id);
|
|
}
|
|
|
|
} // namespace zeek::detail
|