computing of identifier definition regions

This commit is contained in:
Vern Paxson 2021-08-16 10:57:47 -07:00
parent 16bd121ea0
commit cffc954905
5 changed files with 658 additions and 3 deletions

View file

@ -385,6 +385,8 @@ set(MAIN_SRCS
script_opt/DefSetsMgr.cc script_opt/DefSetsMgr.cc
script_opt/Expr.cc script_opt/Expr.cc
script_opt/GenRDs.cc script_opt/GenRDs.cc
script_opt/GenIDDefs.cc
script_opt/IDOptInfo.cc
script_opt/Inline.cc script_opt/Inline.cc
script_opt/ProfileFunc.cc script_opt/ProfileFunc.cc
script_opt/ReachingDefs.cc script_opt/ReachingDefs.cc

View file

@ -17,7 +17,7 @@ enum DefPointType {
// Used to capture the notion "the variable may have no definition // Used to capture the notion "the variable may have no definition
// at this point" (or "has no definition", depending on whether we're // at this point" (or "has no definition", depending on whether we're
// concerned with minimal or maximal RDs). // concerned with minimal or maximal RDs).
NO_DEF, NO_DEF_POINT,
// Assigned at the given statement. // Assigned at the given statement.
STMT_DEF, STMT_DEF,
@ -49,7 +49,7 @@ public:
DefinitionPoint() DefinitionPoint()
{ {
o = nullptr; o = nullptr;
t = NO_DEF; t = NO_DEF_POINT;
} }
DefinitionPoint(const Stmt* s) DefinitionPoint(const Stmt* s)

537
src/script_opt/GenIDDefs.cc Normal file
View file

@ -0,0 +1,537 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include "zeek/Expr.h"
#include "zeek/Scope.h"
#include "zeek/Reporter.h"
#include "zeek/Desc.h"
#include "zeek/script_opt/GenIDDefs.h"
#include "zeek/script_opt/ScriptOpt.h"
#include "zeek/script_opt/ExprOptInfo.h"
#include "zeek/script_opt/StmtOptInfo.h"
namespace zeek::detail {
GenIDDefs::GenIDDefs(std::shared_ptr<ProfileFunc> _pf, const Func* f,
ScopePtr scope, StmtPtr body)
: pf(std::move(_pf))
{
TraverseFunction(f, scope, body);
}
void GenIDDefs::TraverseFunction(const Func* f, ScopePtr scope, StmtPtr body)
{
func_flavor = f->Flavor();
// Establish the outermost barrior and associated set of
// identifiers.
barrier_blocks.push_back(0);
std::unordered_set<const ID*> empty_IDs;
modified_IDs.push_back(empty_IDs);
for ( const auto& g : pf->Globals() )
{
g->GetOptInfo()->Clear();
TrackID(g);
}
// Clear the locals before processing the arguments, since
// they're included among the locals.
for ( const auto& l : pf->Locals() )
l->GetOptInfo()->Clear();
const auto& args = scope->OrderedVars();
int nparam = f->GetType()->Params()->NumFields();
for ( const auto& a : args )
{
if ( --nparam < 0 )
break;
a->GetOptInfo()->Clear();
TrackID(a);
}
stmt_num = 0; // 0 = "before the first statement"
body->Traverse(this);
}
TraversalCode GenIDDefs::PreStmt(const Stmt* s)
{
curr_stmt = s;
auto si = s->GetOptInfo();
si->stmt_num = ++stmt_num;
si->block_level = confluence_blocks.size() + 1;
switch ( s->Tag() ) {
case STMT_CATCH_RETURN:
{
auto cr = s->AsCatchReturnStmt();
auto block = cr->Block();
StartConfluenceBlock(s);
block->Traverse(this);
EndConfluenceBlock();
auto retvar = cr->RetVar();
if ( retvar )
TrackID(retvar->Id());
return TC_ABORTSTMT;
}
case STMT_IF:
{
auto i = s->AsIfStmt();
auto cond = i->StmtExpr();
auto t_branch = i->TrueBranch();
auto f_branch = i->FalseBranch();
cond->Traverse(this);
StartConfluenceBlock(s);
t_branch->Traverse(this);
if ( ! t_branch->NoFlowAfter(false) )
BranchBeyond(curr_stmt, s, true);
f_branch->Traverse(this);
if ( ! f_branch->NoFlowAfter(false) )
BranchBeyond(curr_stmt, s, true);
EndConfluenceBlock(true);
return TC_ABORTSTMT;
}
case STMT_SWITCH:
{
auto sw = s->AsSwitchStmt();
auto e = sw->StmtExpr();
e->Traverse(this);
StartConfluenceBlock(sw);
for ( const auto& c : *sw->Cases() )
{
auto body = c->Body();
auto exprs = c->ExprCases();
if ( exprs )
exprs->Traverse(this);
auto type_ids = c->TypeCases();
if ( type_ids )
{
for ( const auto& id : *type_ids )
if ( id->Name() )
TrackID(id);
}
body->Traverse(this);
}
EndConfluenceBlock(sw->HasDefault());
return TC_ABORTSTMT;
}
case STMT_FOR:
{
auto f = s->AsForStmt();
auto ids = f->LoopVars();
auto e = f->LoopExpr();
auto body = f->LoopBody();
auto val_var = f->ValueVar();
e->Traverse(this);
for ( const auto& id : *ids )
TrackID(id);
if ( val_var )
TrackID(val_var);
StartConfluenceBlock(s);
body->Traverse(this);
if ( ! body->NoFlowAfter(false) )
BranchBackTo(curr_stmt, s, true);
EndConfluenceBlock();
return TC_ABORTSTMT;
}
case STMT_WHILE:
{
auto w = s->AsWhileStmt();
StartConfluenceBlock(s);
auto cond_pred_stmt = w->CondPredStmt();
if ( cond_pred_stmt )
cond_pred_stmt->Traverse(this);
// Important to traverse the condition in its version
// interpreted as a statement, so that when evaluating
// its variable usage, that's done in the context of
// *after* cond_pred_stmt executes, rather than as
// part of that execution.
auto cond_stmt = w->ConditionAsStmt();
cond_stmt->Traverse(this);
auto body = w->Body();
body->Traverse(this);
if ( ! body->NoFlowAfter(false) )
BranchBackTo(curr_stmt, s, true);
EndConfluenceBlock();
return TC_ABORTSTMT;
}
case STMT_WHEN:
{
// ### punt on these for now, need to reflect on bindings.
return TC_ABORTSTMT;
}
default:
return TC_CONTINUE;
}
}
TraversalCode GenIDDefs::PostStmt(const Stmt* s)
{
switch ( s->Tag() ) {
case STMT_INIT:
{
auto init = s->AsInitStmt();
auto& inits = init->Inits();
for ( const auto& id : inits )
{
auto id_t = id->GetType();
// Only aggregates get initialized.
if ( zeek::IsAggr(id->GetType()->Tag()) )
TrackID(id);
}
break;
}
case STMT_RETURN:
ReturnAt(s);
break;
case STMT_NEXT:
BranchBackTo(curr_stmt, FindLoop(), false);
break;
case STMT_BREAK:
{
auto target = FindBreakTarget();
if ( target )
BranchBeyond(s, target, false);
else
{
ASSERT(func_flavor == FUNC_FLAVOR_HOOK);
ReturnAt(s);
}
break;
}
case STMT_FALLTHROUGH:
// No need to do anything, the work all occurs
// with NoFlowAfter.
break;
default:
break;
}
return TC_CONTINUE;
}
TraversalCode GenIDDefs::PreExpr(const Expr* e)
{
e->GetOptInfo()->stmt_num = stmt_num;
switch ( e->Tag() ) {
case EXPR_NAME:
CheckVarUsage(e, e->AsNameExpr()->Id());
break;
case EXPR_ASSIGN:
{
auto lhs = e->GetOp1();
auto op2 = e->GetOp2();
if ( lhs->Tag() == EXPR_LIST &&
op2->GetType()->Tag() != TYPE_ANY )
{
// This combination occurs only for assignments used
// to initialize table entries. Treat it as references
// to both the lhs and the rhs, not as an assignment.
return TC_CONTINUE;
}
op2->Traverse(this);
if ( ! CheckLHS(lhs, op2) )
// Not a simple assignment (or group of assignments),
// so analyze the accesses to check for use of
// possibly undefined values.
lhs->Traverse(this);
return TC_ABORTSTMT;
}
case EXPR_COND:
// Special hack. We turn off checking for usage issues
// inside conditionals. This is because we use them heavily
// to deconstruct logical expressions for which the actual
// operand access is safe (guaranteed not to access a value
// that hasn't been undefined), but the flow analysis has
// trouble determining that.
++suppress_usage;
e->GetOp1()->Traverse(this);
e->GetOp2()->Traverse(this);
e->GetOp3()->Traverse(this);
--suppress_usage;
return TC_ABORTSTMT;
case EXPR_LAMBDA:
{
auto l = static_cast<const LambdaExpr*>(e);
const auto& ids = l->OuterIDs();
for ( auto& id : ids )
CheckVarUsage(e, id);
// Don't descend into the lambda body - we'll analyze and
// optimize it separately, as its own function.
return TC_ABORTSTMT;
}
default:
break;
}
return TC_CONTINUE;
}
TraversalCode GenIDDefs::PostExpr(const Expr* e)
{
// Attend to expressions that reflect assignments after
// execution, but for which the assignment target was
// also an accessed value (so if we analyzed them
// in PreExpr then we'd have had to do manual traversals
// of their operands).
auto t = e->Tag();
if ( t == EXPR_INCR || t == EXPR_DECR ||
t == EXPR_ADD_TO || t == EXPR_REMOVE_FROM )
{
auto op = e->GetOp1();
if ( ! IsAggr(op) )
(void) CheckLHS(op);
}
return TC_CONTINUE;
}
bool GenIDDefs::CheckLHS(const ExprPtr& lhs, const ExprPtr& rhs)
{
switch ( lhs->Tag() ) {
case EXPR_REF:
return CheckLHS(lhs->GetOp1(), rhs);
case EXPR_NAME:
{
auto n = lhs->AsNameExpr();
TrackID(n->Id(), rhs);
return true;
}
case EXPR_LIST:
{ // look for [a, b, c] = any_val
auto l = lhs->AsListExpr();
for ( const auto& expr : l->Exprs() )
{
if ( expr->Tag() != EXPR_NAME )
// This will happen for table initializers,
// for example.
return false;
auto n = expr->AsNameExpr();
TrackID(n->Id());
}
return true;
}
case EXPR_FIELD:
// If we want to track record field initializations,
// we'd handle that here.
return false;
case EXPR_INDEX:
// If we wanted to track potential alterations of
// aggregates, we'd do that here.
return false;
default:
reporter->InternalError("bad tag in GenIDDefs::CheckLHS");
}
}
bool GenIDDefs::IsAggr(const Expr* e) const
{
if ( e->Tag() != EXPR_NAME )
return false;
auto n = e->AsNameExpr();
auto id = n->Id();
auto tag = id->GetType()->Tag();
return zeek::IsAggr(tag);
}
void GenIDDefs::CheckVarUsage(const Expr* e, const ID* id)
{
if ( analysis_options.usage_issues == 0 || id->IsGlobal() ||
suppress_usage > 0 )
return;
auto oi = id->GetOptInfo();
if ( ! oi->DidUndefinedWarning() && ! oi->IsDefinedBefore(curr_stmt) &&
! id->GetAttr(ATTR_IS_ASSIGNED) )
{
if ( ! oi->IsPossiblyDefinedBefore(curr_stmt) )
{
e->Warn("used without definition");
oi->SetDidUndefinedWarning();
}
else if ( ! oi->DidPossiblyUndefinedWarning() )
{
e->Warn("possibly used without definition");
oi->SetDidPossiblyUndefinedWarning();
}
}
}
void GenIDDefs::StartConfluenceBlock(const Stmt* s)
{
if ( s->Tag() == STMT_CATCH_RETURN )
barrier_blocks.push_back(confluence_blocks.size());
confluence_blocks.push_back(s);
std::unordered_set<const ID*> empty_IDs;
modified_IDs.push_back(empty_IDs);
}
void GenIDDefs::EndConfluenceBlock(bool no_orig)
{
for ( auto id : modified_IDs.back() )
id->GetOptInfo()->ConfluenceBlockEndsAfter(curr_stmt, no_orig);
confluence_blocks.pop_back();
int bb = barrier_blocks.back();
if ( bb > 0 && confluence_blocks.size() == bb )
barrier_blocks.pop_back();
modified_IDs.pop_back();
}
void GenIDDefs::BranchBackTo(const Stmt* from, const Stmt* to, bool close_all)
{
for ( auto id : modified_IDs.back() )
id->GetOptInfo()->BranchBackTo(from, to, close_all);
}
void GenIDDefs::BranchBeyond(const Stmt* from, const Stmt* to, bool close_all)
{
for ( auto id : modified_IDs.back() )
id->GetOptInfo()->BranchBeyond(from, to, close_all);
to->GetOptInfo()->contains_branch_beyond = true;
}
const Stmt* GenIDDefs::FindLoop()
{
int i = confluence_blocks.size() - 1;
while ( i >= 0 )
{
auto t = confluence_blocks[i]->Tag();
if ( t == STMT_WHILE || t == STMT_FOR )
break;
--i;
}
ASSERT(i >= 0);
return confluence_blocks[i];
}
const Stmt* GenIDDefs::FindBreakTarget()
{
int i = confluence_blocks.size() - 1;
while ( i >= 0 )
{
auto cb = confluence_blocks[i];
auto t = cb->Tag();
if ( t == STMT_WHILE || t == STMT_FOR || t == STMT_SWITCH )
return cb;
--i;
}
return nullptr;
}
void GenIDDefs::ReturnAt(const Stmt* s)
{
for ( auto id : modified_IDs.back() )
id->GetOptInfo()->ReturnAt(s);
}
void GenIDDefs::TrackID(const ID* id, const ExprPtr& e)
{
auto oi = id->GetOptInfo();
ASSERT(barrier_blocks.size() > 0);
oi->DefinedAfter(curr_stmt, e,
confluence_blocks, barrier_blocks.back());
// Ensure we track this identifier across all relevant
// confluence regions.
for ( int i = barrier_blocks.back(); i < confluence_blocks.size(); ++i )
// Add one because modified_IDs includes outer non-confluence
// block.
modified_IDs[i+1].insert(id);
if ( confluence_blocks.size() == 0 )
// This is a definition at the outermost level.
modified_IDs[0].insert(id);
}
} // zeek::detail

116
src/script_opt/GenIDDefs.h Normal file
View file

@ -0,0 +1,116 @@
// See the file "COPYING" in the main distribution directory for copyright.
// Class for generating identifier definition information by traversing
// a function body's AST.
#pragma once
#include "zeek/script_opt/IDOptInfo.h"
#include "zeek/script_opt/ProfileFunc.h"
namespace zeek::detail {
class GenIDDefs : public TraversalCallback {
public:
GenIDDefs(std::shared_ptr<ProfileFunc> _pf, const Func* f,
ScopePtr scope, StmtPtr body);
private:
// Traverses the given function body, using the first two
// arguments for context.
void TraverseFunction(const Func* f, ScopePtr scope, StmtPtr body);
TraversalCode PreStmt(const Stmt*) override;
TraversalCode PostStmt(const Stmt*) override;
TraversalCode PreExpr(const Expr*) override;
TraversalCode PostExpr(const Expr*) override;
// Analyzes the target of an assignment. Returns true if the LHS
// was an expression for which we can track it as a definition
// (e.g., assignments to variables, but not to elements of
// aggregates). "rhs" gives the expression used for simple direct
// assignments.
bool CheckLHS(const ExprPtr& lhs, const ExprPtr& rhs = nullptr);
// True if the given expression directly represents an aggregate.
bool IsAggr(const ExprPtr& e) const { return IsAggr(e.get()); }
bool IsAggr(const Expr* e) const;
// If -u is active, checks for whether the given identifier present
// in the given expression is undefined at that point.
void CheckVarUsage(const Expr* e, const ID* id);
// Begin a new confluence block with the given statement.
void StartConfluenceBlock(const Stmt* s);
// Finish up the current confluence block. If no_orig_flow is true,
// then there's no control flow from the origin (the statement that
// starts the block).
void EndConfluenceBlock(bool no_orig_flow = false);
// Note branches from the given "from" statement back up to the
// beginning of, or just past, the "to" statement. If "close_all"
// is true then the nature of the branch is that it terminates
// all pending confluence blocks.
void BranchBackTo(const Stmt* from, const Stmt* to, bool close_all);
void BranchBeyond(const Stmt* from, const Stmt* to, bool close_all);
// These search back through the active confluence blocks looking
// for either the innermost loop, or the innermost block for which
// a "break" would target going beyond that block.
const Stmt* FindLoop();
const Stmt* FindBreakTarget();
// Note that the given statement executes a "return" (which could
// instead be an outer "break" for a hook).
void ReturnAt(const Stmt* s);
// Tracks that the given identifier is defined at the current
// statement in the current confluence block. 'e' is the
// expression used to define the identifier, for simple direct
// assignments.
void TrackID(const IDPtr& id, const ExprPtr& e = nullptr)
{ TrackID(id.get(), e); }
void TrackID(const ID* id, const ExprPtr& e = nullptr);
// Profile for the function. Currently, all we actually need from
// this is the list of globals and locals.
std::shared_ptr<ProfileFunc> pf;
// Whether the Func is an event/hook/function. We currently only
// need to know whether it's a hook, so we correctly interpret an
// outer "break" in that context.
FunctionFlavor func_flavor;
// The statement we are currently traversing.
const Stmt* curr_stmt = nullptr;
// Used to number Stmt objects found during AST traversal.
int stmt_num;
// A stack of confluence blocks, with the innermost at the top/back.
std::vector<const Stmt*> confluence_blocks;
// Index into confluence_blocks of "barrier" blocks that
// represent unavoidable confluence blocks (no branching
// out of them). These include the outermost block and
// any catch-return blocks. We track these because
// (1) there's no need for an IDOptInfo to track previously
// unseen confluence regions outer to those, and (2) they
// can get quite deep due when inlining, so there are savings
// to avoid having to track outer to them.
std::vector<int> barrier_blocks;
// The following is parallel to confluence_blocks except
// the front entry tracks identifiers at the outermost
// (non-confluence) scope. Thus, to index it for a given
// confluence block i, we need to use i+1.
std::vector<std::unordered_set<const ID*>> modified_IDs;
// If non-zero, indicates we should suspend any generation
// of usage errors. A counter rather than a boolean because
// such situations might nest.
int suppress_usage = 0;
};
} // zeek::detail

View file

@ -190,7 +190,7 @@ public:
if ( ! dps || dps->length() != 1 ) if ( ! dps || dps->length() != 1 )
return false; return false;
return (*dps)[0].Tag() != NO_DEF; return (*dps)[0].Tag() != NO_DEF_POINT;
} }
// Whether the given definition item has an RD at the given // Whether the given definition item has an RD at the given