mirror of
https://github.com/zeek/zeek.git
synced 2025-10-10 10:38:20 +00:00
reworked AST optimizers analysis of side effects during aggregate operations & calls
This commit is contained in:
parent
c028901146
commit
740a087765
13 changed files with 1119 additions and 223 deletions
|
@ -7,11 +7,32 @@
|
|||
|
||||
namespace zeek::detail {
|
||||
|
||||
// Class for tracking whether a given expression has side effects. Currently,
|
||||
// we just need to know whether Yes-it-does or No-it-doesn't, so the structure
|
||||
// is very simple.
|
||||
|
||||
class ExprSideEffects {
|
||||
public:
|
||||
ExprSideEffects(bool _has_side_effects) : has_side_effects(_has_side_effects) {}
|
||||
|
||||
bool HasSideEffects() const { return has_side_effects; }
|
||||
|
||||
protected:
|
||||
bool has_side_effects;
|
||||
};
|
||||
|
||||
class ExprOptInfo {
|
||||
public:
|
||||
// The AST number of the statement in which this expression
|
||||
// appears.
|
||||
int stmt_num = -1; // -1 = not assigned yet
|
||||
|
||||
auto& SideEffects() { return side_effects; }
|
||||
|
||||
protected:
|
||||
// This optional value missing means "we haven't yet determined the
|
||||
// side effects".
|
||||
std::optional<ExprSideEffects> side_effects;
|
||||
};
|
||||
|
||||
} // namespace zeek::detail
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include "zeek/Desc.h"
|
||||
#include "zeek/Func.h"
|
||||
#include "zeek/Stmt.h"
|
||||
#include "zeek/script_opt/FuncInfo.h"
|
||||
#include "zeek/script_opt/IDOptInfo.h"
|
||||
|
||||
namespace zeek::detail {
|
||||
|
@ -85,7 +86,16 @@ TraversalCode ProfileFunc::PreStmt(const Stmt* s) {
|
|||
case STMT_INIT:
|
||||
for ( const auto& id : s->AsInitStmt()->Inits() ) {
|
||||
inits.insert(id.get());
|
||||
TrackType(id->GetType());
|
||||
|
||||
auto& t = id->GetType();
|
||||
TrackType(t);
|
||||
|
||||
auto attrs = id->GetAttrs();
|
||||
if ( attrs )
|
||||
constructor_attrs[attrs.get()] = t;
|
||||
|
||||
if ( t->Tag() == TYPE_RECORD )
|
||||
CheckRecordConstructor(t);
|
||||
}
|
||||
|
||||
// Don't traverse further into the statement, since we
|
||||
|
@ -147,6 +157,14 @@ TraversalCode ProfileFunc::PreStmt(const Stmt* s) {
|
|||
expr_switches.insert(sw);
|
||||
} break;
|
||||
|
||||
case STMT_ADD:
|
||||
case STMT_DELETE: {
|
||||
auto ad_stmt = static_cast<const AddDelStmt*>(s);
|
||||
auto ad_e = ad_stmt->StmtExpr();
|
||||
auto& lhs_t = ad_e->GetOp1()->GetType();
|
||||
aggr_mods.insert(lhs_t.get());
|
||||
} break;
|
||||
|
||||
default: break;
|
||||
}
|
||||
|
||||
|
@ -221,19 +239,31 @@ TraversalCode ProfileFunc::PreExpr(const Expr* e) {
|
|||
}
|
||||
break;
|
||||
|
||||
case EXPR_INDEX: {
|
||||
auto lhs_t = e->GetOp1()->GetType();
|
||||
if ( lhs_t->Tag() == TYPE_TABLE )
|
||||
tbl_refs.insert(lhs_t.get());
|
||||
} break;
|
||||
|
||||
case EXPR_INCR:
|
||||
case EXPR_DECR:
|
||||
case EXPR_ADD_TO:
|
||||
case EXPR_REMOVE_FROM:
|
||||
case EXPR_ASSIGN: {
|
||||
if ( e->GetOp1()->Tag() != EXPR_REF )
|
||||
// this isn't a direct assignment
|
||||
auto lhs = e->GetOp1();
|
||||
|
||||
if ( lhs->Tag() == EXPR_REF )
|
||||
lhs = lhs->GetOp1();
|
||||
|
||||
else if ( e->Tag() == EXPR_ASSIGN )
|
||||
// This isn't a direct assignment, but instead an overloaded
|
||||
// use of "=" such as in a table constructor.
|
||||
break;
|
||||
|
||||
auto lhs = e->GetOp1()->GetOp1();
|
||||
if ( lhs->Tag() != EXPR_NAME )
|
||||
break;
|
||||
auto lhs_t = lhs->GetType();
|
||||
|
||||
switch ( lhs->Tag() ) {
|
||||
case EXPR_NAME: {
|
||||
auto id = lhs->AsNameExpr()->Id();
|
||||
TrackAssignment(id);
|
||||
|
||||
|
@ -245,8 +275,53 @@ TraversalCode ProfileFunc::PreExpr(const Expr* e) {
|
|||
// inside a when clause.
|
||||
when_locals.insert(id);
|
||||
}
|
||||
break;
|
||||
else if ( IsAggr(lhs_t->Tag()) )
|
||||
aggr_mods.insert(lhs_t.get());
|
||||
} break;
|
||||
|
||||
case EXPR_INDEX: {
|
||||
auto lhs_aggr = lhs->GetOp1();
|
||||
auto lhs_aggr_t = lhs_aggr->GetType();
|
||||
|
||||
// Determine which aggregate is being modified. For an
|
||||
// assignment "a[b] = aggr", it's not a[b]'s type but
|
||||
// rather a's type. However, for any of the others,
|
||||
// e.g. "a[b] -= aggr" it is a[b]'s type.
|
||||
if ( e->Tag() == EXPR_ASSIGN )
|
||||
aggr_mods.insert(lhs_aggr_t.get());
|
||||
else
|
||||
aggr_mods.insert(lhs_t.get());
|
||||
|
||||
if ( lhs_aggr_t->Tag() == TYPE_TABLE ) {
|
||||
// We don't want the default recursion into the
|
||||
// expression's LHS because that will treat this
|
||||
// table modification as a reference instead. So
|
||||
// do it manually. Given that, we need to do the
|
||||
// expression's RHS manually too.
|
||||
lhs->GetOp1()->Traverse(this);
|
||||
lhs->GetOp2()->Traverse(this);
|
||||
|
||||
auto rhs = e->GetOp2();
|
||||
if ( rhs )
|
||||
rhs->Traverse(this);
|
||||
|
||||
return TC_ABORTSTMT;
|
||||
}
|
||||
} break;
|
||||
|
||||
case EXPR_FIELD: aggr_mods.insert(lhs_t.get()); break;
|
||||
|
||||
case EXPR_LIST: {
|
||||
for ( auto id : lhs->AsListExpr()->Exprs() ) {
|
||||
auto id_t = id->GetType();
|
||||
if ( IsAggr(id_t->Tag()) )
|
||||
aggr_mods.insert(id_t.get());
|
||||
}
|
||||
} break;
|
||||
|
||||
default: reporter->InternalError("bad expression in ProfileFunc: %s", obj_desc(e).c_str());
|
||||
}
|
||||
} break;
|
||||
|
||||
case EXPR_CALL: {
|
||||
auto c = e->AsCallExpr();
|
||||
|
@ -272,8 +347,8 @@ TraversalCode ProfileFunc::PreExpr(const Expr* e) {
|
|||
auto func_vf = func_v->AsFunc();
|
||||
|
||||
if ( func_vf->GetKind() == Func::SCRIPT_FUNC ) {
|
||||
auto bf = static_cast<ScriptFunc*>(func_vf);
|
||||
script_calls.insert(bf);
|
||||
auto sf = static_cast<ScriptFunc*>(func_vf);
|
||||
script_calls.insert(sf);
|
||||
}
|
||||
else
|
||||
BiF_globals.insert(func);
|
||||
|
@ -329,18 +404,20 @@ TraversalCode ProfileFunc::PreExpr(const Expr* e) {
|
|||
// In general, we don't want to recurse into the body.
|
||||
// However, we still want to *profile* it so we can
|
||||
// identify calls within it.
|
||||
ProfileFunc body_pf(l->Ingredients()->Body().get(), false);
|
||||
script_calls.insert(body_pf.ScriptCalls().begin(), body_pf.ScriptCalls().end());
|
||||
auto pf = std::make_shared<ProfileFunc>(l->Ingredients()->Body().get(), false);
|
||||
script_calls.insert(pf->ScriptCalls().begin(), pf->ScriptCalls().end());
|
||||
|
||||
return TC_ABORTSTMT;
|
||||
}
|
||||
|
||||
case EXPR_RECORD_CONSTRUCTOR: CheckRecordConstructor(e->GetType()); break;
|
||||
|
||||
case EXPR_SET_CONSTRUCTOR: {
|
||||
auto sc = static_cast<const SetConstructorExpr*>(e);
|
||||
const auto& attrs = sc->GetAttrs();
|
||||
|
||||
if ( attrs )
|
||||
constructor_attrs.insert(attrs.get());
|
||||
constructor_attrs[attrs.get()] = sc->GetType();
|
||||
} break;
|
||||
|
||||
case EXPR_TABLE_CONSTRUCTOR: {
|
||||
|
@ -348,7 +425,24 @@ TraversalCode ProfileFunc::PreExpr(const Expr* e) {
|
|||
const auto& attrs = tc->GetAttrs();
|
||||
|
||||
if ( attrs )
|
||||
constructor_attrs.insert(attrs.get());
|
||||
constructor_attrs[attrs.get()] = tc->GetType();
|
||||
} break;
|
||||
|
||||
case EXPR_RECORD_COERCE:
|
||||
// This effectively does a record construction of the target
|
||||
// type, so check that.
|
||||
CheckRecordConstructor(e->GetType());
|
||||
break;
|
||||
|
||||
case EXPR_TABLE_COERCE: {
|
||||
// This is written without casting so it can work with other
|
||||
// types if needed.
|
||||
auto res_type = e->GetType().get();
|
||||
auto orig_type = e->GetOp1()->GetType().get();
|
||||
if ( type_aliases.count(res_type) == 0 )
|
||||
type_aliases[orig_type] = {res_type};
|
||||
else
|
||||
type_aliases[orig_type].insert(res_type);
|
||||
} break;
|
||||
|
||||
default: break;
|
||||
|
@ -395,20 +489,42 @@ void ProfileFunc::TrackAssignment(const ID* id) {
|
|||
++assignees[id];
|
||||
else
|
||||
assignees[id] = 1;
|
||||
|
||||
if ( id->IsGlobal() || captures.count(id) > 0 )
|
||||
non_local_assignees.insert(id);
|
||||
}
|
||||
|
||||
void ProfileFunc::CheckRecordConstructor(TypePtr t) {
|
||||
auto rt = cast_intrusive<RecordType>(t);
|
||||
for ( auto td : *rt->Types() )
|
||||
if ( td->attrs ) {
|
||||
// In principle we could figure out whether this particular
|
||||
// constructor happens to explicitly specify &default fields, and
|
||||
// not include those attributes if it does since they won't come
|
||||
// into play. However that seems like added complexity for almost
|
||||
// surely no ultimate gain.
|
||||
auto attrs = td->attrs.get();
|
||||
constructor_attrs[attrs] = rt;
|
||||
|
||||
if ( rec_constructor_attrs.count(rt.get()) == 0 )
|
||||
rec_constructor_attrs[rt.get()] = {attrs};
|
||||
else
|
||||
rec_constructor_attrs[rt.get()].insert(attrs);
|
||||
}
|
||||
}
|
||||
|
||||
ProfileFuncs::ProfileFuncs(std::vector<FuncInfo>& funcs, is_compilable_pred pred, bool _full_record_hashes) {
|
||||
full_record_hashes = _full_record_hashes;
|
||||
|
||||
for ( auto& f : funcs ) {
|
||||
if ( f.ShouldSkip() )
|
||||
continue;
|
||||
|
||||
auto pf = std::make_unique<ProfileFunc>(f.Func(), f.Body(), full_record_hashes);
|
||||
auto pf = std::make_shared<ProfileFunc>(f.Func(), f.Body(), full_record_hashes);
|
||||
|
||||
if ( ! pred || (*pred)(pf.get(), nullptr) )
|
||||
MergeInProfile(pf.get());
|
||||
|
||||
// Track the profile even if we're not compiling the function, since
|
||||
// the AST optimizer will still need it to reason about function-call
|
||||
// side effects.
|
||||
f.SetProfile(std::move(pf));
|
||||
func_profs[f.Func()] = f.ProfilePtr();
|
||||
}
|
||||
|
@ -432,6 +548,81 @@ ProfileFuncs::ProfileFuncs(std::vector<FuncInfo>& funcs, is_compilable_pred pred
|
|||
// Computing those hashes could have led to traversals that
|
||||
// create more pending expressions to analyze.
|
||||
} while ( ! pending_exprs.empty() );
|
||||
|
||||
// Now that we have everything profiled, we can proceed to analyses
|
||||
// that require full global information.
|
||||
ComputeSideEffects();
|
||||
}
|
||||
|
||||
bool ProfileFuncs::IsTableWithDefaultAggr(const Type* t) {
|
||||
auto analy = tbl_has_aggr_default.find(t);
|
||||
if ( analy != tbl_has_aggr_default.end() )
|
||||
// We already have the answer.
|
||||
return analy->second;
|
||||
|
||||
// See whether an alias for the type has already been resolved.
|
||||
if ( t->AsTableType()->Yield() ) {
|
||||
for ( auto& at : tbl_has_aggr_default )
|
||||
if ( same_type(at.first, t) ) {
|
||||
tbl_has_aggr_default[t] = at.second;
|
||||
return at.second;
|
||||
}
|
||||
}
|
||||
|
||||
tbl_has_aggr_default[t] = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ProfileFuncs::HasSideEffects(SideEffectsOp::AccessType access, const TypePtr& t) const {
|
||||
IDSet nli;
|
||||
TypeSet aggrs;
|
||||
|
||||
if ( GetSideEffects(access, t.get(), nli, aggrs) )
|
||||
return true;
|
||||
|
||||
return ! nli.empty() || ! aggrs.empty();
|
||||
}
|
||||
|
||||
bool ProfileFuncs::GetSideEffects(SideEffectsOp::AccessType access, const Type* t, IDSet& non_local_ids,
|
||||
TypeSet& aggrs) const {
|
||||
for ( auto se : side_effects_ops )
|
||||
if ( AssessSideEffects(se.get(), access, t, non_local_ids, aggrs) )
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ProfileFuncs::GetCallSideEffects(const NameExpr* n, IDSet& non_local_ids, TypeSet& aggrs, bool& is_unknown) {
|
||||
auto fid = n->Id();
|
||||
auto fv = fid->GetVal();
|
||||
|
||||
if ( ! fv || ! fid->IsConst() ) {
|
||||
// The value is unavailable (likely a bug), or might change at run-time.
|
||||
is_unknown = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
auto func = fv->AsFunc();
|
||||
if ( func->GetKind() == Func::BUILTIN_FUNC ) {
|
||||
if ( ! is_side_effect_free(func->Name()) )
|
||||
is_unknown = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
auto sf = static_cast<ScriptFunc*>(func);
|
||||
auto seo = GetCallSideEffects(sf);
|
||||
if ( ! seo )
|
||||
return false;
|
||||
|
||||
if ( seo->HasUnknownChanges() )
|
||||
is_unknown = true;
|
||||
|
||||
for ( auto a : seo->ModAggrs() )
|
||||
aggrs.insert(a);
|
||||
for ( auto nl : seo->ModNonLocals() )
|
||||
non_local_ids.insert(nl);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void ProfileFuncs::MergeInProfile(ProfileFunc* pf) {
|
||||
|
@ -460,7 +651,7 @@ void ProfileFuncs::MergeInProfile(ProfileFunc* pf) {
|
|||
|
||||
auto& attrs = g->GetAttrs();
|
||||
if ( attrs )
|
||||
AnalyzeAttrs(attrs.get());
|
||||
AnalyzeAttrs(attrs.get(), t.get());
|
||||
}
|
||||
|
||||
constants.insert(pf->Constants().begin(), pf->Constants().end());
|
||||
|
@ -475,7 +666,13 @@ void ProfileFuncs::MergeInProfile(ProfileFunc* pf) {
|
|||
}
|
||||
|
||||
for ( auto& a : pf->ConstructorAttrs() )
|
||||
AnalyzeAttrs(a);
|
||||
AnalyzeAttrs(a.first, a.second.get());
|
||||
|
||||
for ( auto& ta : pf->TypeAliases() ) {
|
||||
if ( type_aliases.count(ta.first) == 0 )
|
||||
type_aliases[ta.first] = std::set<const Type*>{};
|
||||
type_aliases[ta.first].insert(ta.second.begin(), ta.second.end());
|
||||
}
|
||||
}
|
||||
|
||||
void ProfileFuncs::TraverseValue(const ValPtr& v) {
|
||||
|
@ -579,8 +776,12 @@ void ProfileFuncs::ComputeBodyHashes(std::vector<FuncInfo>& funcs) {
|
|||
if ( ! f.ShouldSkip() )
|
||||
ComputeProfileHash(f.ProfilePtr());
|
||||
|
||||
for ( auto& l : lambdas )
|
||||
ComputeProfileHash(ExprProf(l));
|
||||
for ( auto& l : lambdas ) {
|
||||
auto pf = ExprProf(l);
|
||||
func_profs[l->PrimaryFunc().get()] = pf;
|
||||
lambda_primaries[l->Name()] = l->PrimaryFunc().get();
|
||||
ComputeProfileHash(pf);
|
||||
}
|
||||
}
|
||||
|
||||
void ProfileFuncs::ComputeProfileHash(std::shared_ptr<ProfileFunc> pf) {
|
||||
|
@ -710,11 +911,8 @@ p_hash_type ProfileFuncs::HashType(const Type* t) {
|
|||
// We don't hash the field name, as in some contexts
|
||||
// those are ignored.
|
||||
|
||||
if ( f->attrs ) {
|
||||
if ( do_hash )
|
||||
if ( f->attrs && do_hash )
|
||||
h = merge_p_hashes(h, HashAttrs(f->attrs));
|
||||
AnalyzeAttrs(f->attrs.get());
|
||||
}
|
||||
}
|
||||
} break;
|
||||
|
||||
|
@ -731,8 +929,24 @@ p_hash_type ProfileFuncs::HashType(const Type* t) {
|
|||
auto ft = t->AsFuncType();
|
||||
auto flv = ft->FlavorString();
|
||||
h = merge_p_hashes(h, p_hash(flv));
|
||||
|
||||
// We deal with the parameters individually, rather than just
|
||||
// recursing into the RecordType that's used (for convenience)
|
||||
// to represent them. We do so because their properties are
|
||||
// somewhat different - in particular, an &default on a parameter
|
||||
// field is resolved in the context of the caller, not the
|
||||
// function itself, and so we don't want to track those as
|
||||
// attributes associated with the function body's execution.
|
||||
h = merge_p_hashes(h, p_hash("params"));
|
||||
h = merge_p_hashes(h, HashType(ft->Params()));
|
||||
auto params = ft->Params()->Types();
|
||||
|
||||
if ( params ) {
|
||||
h = merge_p_hashes(h, p_hash(params->length()));
|
||||
|
||||
for ( auto p : *params )
|
||||
h = merge_p_hashes(h, HashType(p->type));
|
||||
}
|
||||
|
||||
h = merge_p_hashes(h, p_hash("func-yield"));
|
||||
h = merge_p_hashes(h, HashType(ft->Yield()));
|
||||
} break;
|
||||
|
@ -803,18 +1017,367 @@ p_hash_type ProfileFuncs::HashAttrs(const AttributesPtr& Attrs) {
|
|||
return h;
|
||||
}
|
||||
|
||||
void ProfileFuncs::AnalyzeAttrs(const Attributes* Attrs) {
|
||||
auto attrs = Attrs->GetAttrs();
|
||||
void ProfileFuncs::AnalyzeAttrs(const Attributes* attrs, const Type* t) {
|
||||
for ( const auto& a : attrs->GetAttrs() ) {
|
||||
auto& e = a->GetExpr();
|
||||
|
||||
for ( const auto& a : attrs ) {
|
||||
const Expr* e = a->GetExpr().get();
|
||||
if ( ! e )
|
||||
continue;
|
||||
|
||||
pending_exprs.push_back(e.get());
|
||||
|
||||
auto prev_ea = expr_attrs.find(a.get());
|
||||
if ( prev_ea == expr_attrs.end() )
|
||||
expr_attrs[a.get()] = {t};
|
||||
else {
|
||||
// Add it if new. This is rare, but can arise due to attributes
|
||||
// being shared for example from initializers with a variable
|
||||
// itself.
|
||||
bool found = false;
|
||||
for ( auto ea : prev_ea->second )
|
||||
if ( ea == t ) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if ( ! found )
|
||||
prev_ea->second.push_back(t);
|
||||
}
|
||||
|
||||
if ( e ) {
|
||||
pending_exprs.push_back(e);
|
||||
if ( e->Tag() == EXPR_LAMBDA )
|
||||
lambdas.insert(e->AsLambdaExpr());
|
||||
}
|
||||
}
|
||||
|
||||
void ProfileFuncs::ComputeSideEffects() {
|
||||
// Computing side effects is an iterative process, because whether
|
||||
// a given expression has a side effect can depend on whether it
|
||||
// includes accesses to types that themselves have side effects.
|
||||
|
||||
// Step one: assemble the candidate pool of attributes to assess.
|
||||
for ( auto& ea : expr_attrs ) {
|
||||
// Is this an attribute that can be triggered by
|
||||
// statement/expression execution?
|
||||
auto a = ea.first;
|
||||
auto at = a->Tag();
|
||||
if ( at == ATTR_DEFAULT || at == ATTR_DEFAULT_INSERT || at == ATTR_ON_CHANGE ) {
|
||||
if ( at == ATTR_DEFAULT ) {
|
||||
// Look for tables with &default's returning aggregate values.
|
||||
for ( auto t : ea.second ) {
|
||||
if ( t->Tag() != TYPE_TABLE )
|
||||
continue;
|
||||
|
||||
auto y = t->AsTableType()->Yield();
|
||||
|
||||
if ( y && IsAggr(y->Tag()) ) {
|
||||
tbl_has_aggr_default[t] = true;
|
||||
for ( auto ta : type_aliases[t] )
|
||||
tbl_has_aggr_default[ta] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Weed out very-common-and-completely-safe expressions.
|
||||
if ( ! DefinitelyHasNoSideEffects(a->GetExpr()) )
|
||||
candidates.insert(a);
|
||||
}
|
||||
}
|
||||
|
||||
// At this point, very often there are no candidates and we're done.
|
||||
// However, if we have candidates then we need to process them in an
|
||||
// iterative fashion because it's possible that the side effects of
|
||||
// some of them depend on the side effects of other candidates.
|
||||
|
||||
while ( ! candidates.empty() ) {
|
||||
// For which attributes have we resolved their status.
|
||||
AttrSet made_decision;
|
||||
|
||||
for ( auto c : candidates ) {
|
||||
IDSet non_local_ids;
|
||||
TypeSet aggrs;
|
||||
bool is_unknown = false;
|
||||
|
||||
// Track the candidate we're currently analyzing, since sometimes
|
||||
// it's self-referential and we need to identify that fact.
|
||||
curr_candidate = c;
|
||||
|
||||
if ( ! AssessSideEffects(c->GetExpr(), non_local_ids, aggrs, is_unknown) )
|
||||
// Can't make a decision yet.
|
||||
continue;
|
||||
|
||||
// We've resolved this candidate.
|
||||
made_decision.insert(c);
|
||||
SetSideEffects(c, non_local_ids, aggrs, is_unknown);
|
||||
}
|
||||
|
||||
if ( made_decision.empty() ) {
|
||||
// We weren't able to make forward progress. This happens when
|
||||
// the pending candidates are mutually dependent. While in
|
||||
// principle we could scope the worst-case resolution of their
|
||||
// side effects, this is such an unlikely situation that we just
|
||||
// mark them all as unknown.
|
||||
|
||||
// We keep these empty.
|
||||
IDSet non_local_ids;
|
||||
TypeSet aggrs;
|
||||
|
||||
for ( auto c : candidates )
|
||||
SetSideEffects(c, non_local_ids, aggrs, true);
|
||||
|
||||
// We're now all done.
|
||||
break;
|
||||
}
|
||||
|
||||
for ( auto md : made_decision )
|
||||
candidates.erase(md);
|
||||
}
|
||||
}
|
||||
|
||||
bool ProfileFuncs::DefinitelyHasNoSideEffects(const ExprPtr& e) const {
|
||||
if ( e->Tag() == EXPR_CONST || e->Tag() == EXPR_VECTOR_CONSTRUCTOR )
|
||||
return true;
|
||||
|
||||
if ( e->Tag() == EXPR_NAME )
|
||||
return e->GetType()->Tag() != TYPE_FUNC;
|
||||
|
||||
auto ep = expr_profs.find(e.get());
|
||||
ASSERT(ep != expr_profs.end());
|
||||
|
||||
const auto& pf = ep->second;
|
||||
|
||||
if ( ! pf->NonLocalAssignees().empty() || ! pf->TableRefs().empty() || ! pf->AggrMods().empty() ||
|
||||
! pf->ScriptCalls().empty() )
|
||||
return false;
|
||||
|
||||
for ( auto& b : pf->BiFGlobals() )
|
||||
if ( ! is_side_effect_free(b->Name()) )
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void ProfileFuncs::SetSideEffects(const Attr* a, IDSet& non_local_ids, TypeSet& aggrs, bool is_unknown) {
|
||||
auto seo_vec = std::vector<std::shared_ptr<SideEffectsOp>>{};
|
||||
bool is_rec = expr_attrs[a][0]->Tag() == TYPE_RECORD;
|
||||
|
||||
SideEffectsOp::AccessType at;
|
||||
if ( is_rec )
|
||||
at = SideEffectsOp::CONSTRUCTION;
|
||||
else if ( a->Tag() == ATTR_ON_CHANGE )
|
||||
at = SideEffectsOp::WRITE;
|
||||
else
|
||||
at = SideEffectsOp::READ;
|
||||
|
||||
if ( non_local_ids.empty() && aggrs.empty() && ! is_unknown )
|
||||
// Definitely no side effects.
|
||||
seo_vec.push_back(std::make_shared<SideEffectsOp>());
|
||||
else {
|
||||
attrs_with_side_effects.insert(a);
|
||||
|
||||
// Set side effects for all of the types associated with this attribute.
|
||||
for ( auto ea_t : expr_attrs[a] ) {
|
||||
auto seo = std::make_shared<SideEffectsOp>(at, ea_t);
|
||||
seo->AddModNonGlobal(non_local_ids);
|
||||
seo->AddModAggrs(aggrs);
|
||||
|
||||
if ( is_unknown )
|
||||
seo->SetUnknownChanges();
|
||||
|
||||
side_effects_ops.push_back(seo);
|
||||
seo_vec.push_back(std::move(seo));
|
||||
}
|
||||
}
|
||||
|
||||
if ( is_rec )
|
||||
record_constr_with_side_effects[a] = std::move(seo_vec);
|
||||
else
|
||||
aggr_side_effects[a] = std::move(seo_vec);
|
||||
}
|
||||
|
||||
AttrVec ProfileFuncs::AssociatedAttrs(const Type* t) {
|
||||
AttrVec assoc_attrs;
|
||||
|
||||
// Search both the pending candidates and the ones already identified.
|
||||
// You might think we'd just do the latter, but we want to include the
|
||||
// pending ones, too, so we can identify not-yet-resolved dependencies.
|
||||
FindAssociatedAttrs(candidates, t, assoc_attrs);
|
||||
FindAssociatedAttrs(attrs_with_side_effects, t, assoc_attrs);
|
||||
|
||||
return assoc_attrs;
|
||||
}
|
||||
|
||||
void ProfileFuncs::FindAssociatedAttrs(const AttrSet& attrs, const Type* t, AttrVec& assoc_attrs) {
|
||||
for ( auto a : attrs ) {
|
||||
for ( auto ea_t : expr_attrs[a] ) {
|
||||
if ( same_type(t, ea_t) ) {
|
||||
assoc_attrs.push_back(a);
|
||||
break;
|
||||
}
|
||||
|
||||
for ( auto ta : type_aliases[ea_t] )
|
||||
if ( same_type(t, ta) ) {
|
||||
assoc_attrs.push_back(a);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool ProfileFuncs::AssessSideEffects(const ExprPtr& e, IDSet& non_local_ids, TypeSet& aggrs, bool& is_unknown) {
|
||||
if ( e->Tag() == EXPR_NAME && e->GetType()->Tag() == TYPE_FUNC )
|
||||
// This occurs when the expression is itself a function name, and
|
||||
// in an attribute context indicates an implicit call.
|
||||
return GetCallSideEffects(e->AsNameExpr(), non_local_ids, aggrs, is_unknown);
|
||||
|
||||
ASSERT(expr_profs.count(e.get()) != 0);
|
||||
auto pf = expr_profs[e.get()];
|
||||
return AssessSideEffects(pf.get(), non_local_ids, aggrs, is_unknown);
|
||||
}
|
||||
|
||||
bool ProfileFuncs::AssessSideEffects(const ProfileFunc* pf, IDSet& non_local_ids, TypeSet& aggrs, bool& is_unknown) {
|
||||
if ( pf->DoesIndirectCalls() ) {
|
||||
is_unknown = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
for ( auto& b : pf->BiFGlobals() )
|
||||
if ( ! is_side_effect_free(b->Name()) ) {
|
||||
is_unknown = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
IDSet nla;
|
||||
TypeSet mod_aggrs;
|
||||
|
||||
for ( auto& a : pf->NonLocalAssignees() )
|
||||
nla.insert(a);
|
||||
|
||||
for ( auto& r : pf->RecordConstructorAttrs() )
|
||||
if ( ! AssessAggrEffects(SideEffectsOp::CONSTRUCTION, r.first, nla, mod_aggrs, is_unknown) )
|
||||
// Not enough information yet to know all of the side effects.
|
||||
return false;
|
||||
|
||||
for ( auto& tr : pf->TableRefs() )
|
||||
if ( ! AssessAggrEffects(SideEffectsOp::READ, tr, nla, mod_aggrs, is_unknown) )
|
||||
return false;
|
||||
|
||||
for ( auto& tm : pf->AggrMods() ) {
|
||||
if ( tm->Tag() == TYPE_TABLE && ! AssessAggrEffects(SideEffectsOp::WRITE, tm, nla, mod_aggrs, is_unknown) )
|
||||
return false;
|
||||
|
||||
mod_aggrs.insert(tm);
|
||||
}
|
||||
|
||||
for ( auto& f : pf->ScriptCalls() ) {
|
||||
if ( f->Flavor() != FUNC_FLAVOR_FUNCTION ) {
|
||||
// A hook (since events can't be called) - not something
|
||||
// to analyze further.
|
||||
is_unknown = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
auto pff = func_profs[f];
|
||||
if ( active_func_profiles.count(pff) > 0 )
|
||||
// We're already processing this function and arrived here via
|
||||
// recursion. Skip further analysis here, we'll do it instead
|
||||
// for the original instance.
|
||||
continue;
|
||||
|
||||
// Track this analysis so we can detect recursion.
|
||||
active_func_profiles.insert(pff);
|
||||
auto a = AssessSideEffects(pff.get(), nla, mod_aggrs, is_unknown);
|
||||
active_func_profiles.erase(pff);
|
||||
|
||||
if ( ! a )
|
||||
return false;
|
||||
}
|
||||
|
||||
non_local_ids.insert(nla.begin(), nla.end());
|
||||
aggrs.insert(mod_aggrs.begin(), mod_aggrs.end());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ProfileFuncs::AssessAggrEffects(SideEffectsOp::AccessType access, const Type* t, IDSet& non_local_ids,
|
||||
TypeSet& aggrs, bool& is_unknown) {
|
||||
auto assoc_attrs = AssociatedAttrs(t);
|
||||
|
||||
for ( auto a : assoc_attrs ) {
|
||||
if ( a == curr_candidate )
|
||||
// Self-reference - don't treat the absence of any determination
|
||||
// for it as meaning we can't resolve the candidate.
|
||||
continue;
|
||||
|
||||
// See whether we've already determined the side affects associated
|
||||
// with this attribute.
|
||||
auto ase = aggr_side_effects.find(a);
|
||||
if ( ase == aggr_side_effects.end() ) {
|
||||
ase = record_constr_with_side_effects.find(a);
|
||||
if ( ase == record_constr_with_side_effects.end() )
|
||||
// Haven't resolved it yet, so can't resolve current candidate.
|
||||
return false;
|
||||
}
|
||||
|
||||
for ( auto& se : ase->second )
|
||||
if ( AssessSideEffects(se.get(), access, t, non_local_ids, aggrs) ) {
|
||||
is_unknown = true;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ProfileFuncs::AssessSideEffects(const SideEffectsOp* se, SideEffectsOp::AccessType access, const Type* t,
|
||||
IDSet& non_local_ids, TypeSet& aggrs) const {
|
||||
// First determine whether the SideEffectsOp applies.
|
||||
if ( se->GetAccessType() != access )
|
||||
return false;
|
||||
|
||||
if ( ! same_type(se->GetType(), t) )
|
||||
return false;
|
||||
|
||||
// It applies, return its effects.
|
||||
if ( se->HasUnknownChanges() )
|
||||
return true;
|
||||
|
||||
for ( auto a : se->ModAggrs() )
|
||||
aggrs.insert(a);
|
||||
for ( auto nl : se->ModNonLocals() )
|
||||
non_local_ids.insert(nl);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
std::shared_ptr<SideEffectsOp> ProfileFuncs::GetCallSideEffects(const ScriptFunc* sf) {
|
||||
if ( lambda_primaries.count(sf->Name()) > 0 )
|
||||
sf = lambda_primaries[sf->Name()];
|
||||
|
||||
auto sf_se = func_side_effects.find(sf);
|
||||
if ( sf_se != func_side_effects.end() )
|
||||
// Return cached result.
|
||||
return sf_se->second;
|
||||
|
||||
bool is_unknown = false;
|
||||
IDSet nla;
|
||||
TypeSet mod_aggrs;
|
||||
|
||||
ASSERT(func_profs.count(sf) != 0);
|
||||
auto pf = func_profs[sf];
|
||||
if ( ! AssessSideEffects(pf.get(), nla, mod_aggrs, is_unknown) )
|
||||
// Can't figure it out yet.
|
||||
return nullptr;
|
||||
|
||||
auto seo = std::make_shared<SideEffectsOp>(SideEffectsOp::CALL);
|
||||
seo->AddModNonGlobal(nla);
|
||||
seo->AddModAggrs(mod_aggrs);
|
||||
|
||||
if ( is_unknown )
|
||||
seo->SetUnknownChanges();
|
||||
|
||||
func_side_effects[sf] = seo;
|
||||
|
||||
return seo;
|
||||
}
|
||||
|
||||
} // namespace zeek::detail
|
||||
|
|
|
@ -63,6 +63,9 @@ inline p_hash_type merge_p_hashes(p_hash_type h1, p_hash_type h2) {
|
|||
return h1 ^ (h2 + 0x9e3779b9 + (h1 << 6) + (h1 >> 2));
|
||||
}
|
||||
|
||||
using AttrSet = std::unordered_set<const Attr*>;
|
||||
using AttrVec = std::vector<const Attr*>;
|
||||
|
||||
// Class for profiling the components of a single function (or expression).
|
||||
class ProfileFunc : public TraversalCallback {
|
||||
public:
|
||||
|
@ -93,6 +96,9 @@ public:
|
|||
const IDSet& WhenLocals() const { return when_locals; }
|
||||
const IDSet& Params() const { return params; }
|
||||
const std::unordered_map<const ID*, int>& Assignees() const { return assignees; }
|
||||
const IDSet& NonLocalAssignees() const { return non_local_assignees; }
|
||||
const auto& TableRefs() const { return tbl_refs; }
|
||||
const auto& AggrMods() const { return aggr_mods; }
|
||||
const IDSet& Inits() const { return inits; }
|
||||
const std::vector<const Stmt*>& Stmts() const { return stmts; }
|
||||
const std::vector<const Expr*>& Exprs() const { return exprs; }
|
||||
|
@ -100,16 +106,20 @@ public:
|
|||
const std::vector<const ConstExpr*>& Constants() const { return constants; }
|
||||
const IDSet& UnorderedIdentifiers() const { return ids; }
|
||||
const std::vector<const ID*>& OrderedIdentifiers() const { return ordered_ids; }
|
||||
const std::unordered_set<const Type*>& UnorderedTypes() const { return types; }
|
||||
const TypeSet& UnorderedTypes() const { return types; }
|
||||
const std::vector<const Type*>& OrderedTypes() const { return ordered_types; }
|
||||
const auto& TypeAliases() const { return type_aliases; }
|
||||
const std::unordered_set<ScriptFunc*>& ScriptCalls() const { return script_calls; }
|
||||
const IDSet& BiFGlobals() const { return BiF_globals; }
|
||||
const std::unordered_set<std::string>& Events() const { return events; }
|
||||
const std::unordered_set<const Attributes*>& ConstructorAttrs() const { return constructor_attrs; }
|
||||
const std::unordered_map<const Attributes*, TypePtr>& ConstructorAttrs() const { return constructor_attrs; }
|
||||
const std::unordered_map<const Type*, std::set<const Attributes*>>& RecordConstructorAttrs() const {
|
||||
return rec_constructor_attrs;
|
||||
}
|
||||
const std::unordered_set<const SwitchStmt*>& ExprSwitches() const { return expr_switches; }
|
||||
const std::unordered_set<const SwitchStmt*>& TypeSwitches() const { return type_switches; }
|
||||
|
||||
bool DoesIndirectCalls() { return does_indirect_calls; }
|
||||
bool DoesIndirectCalls() const { return does_indirect_calls; }
|
||||
|
||||
int NumParams() const { return num_params; }
|
||||
int NumLambdas() const { return lambdas.size(); }
|
||||
|
@ -139,6 +149,10 @@ protected:
|
|||
// Take note of an assignment to an identifier.
|
||||
void TrackAssignment(const ID* id);
|
||||
|
||||
// Extracts attributes of a record type used in a constructor (or implicit
|
||||
// initialization, or coercion, which does an implicit construction).
|
||||
void CheckRecordConstructor(TypePtr t);
|
||||
|
||||
// The function, body, or expression profiled. Can be null
|
||||
// depending on which constructor was used.
|
||||
const Func* profiled_func = nullptr;
|
||||
|
@ -175,6 +189,15 @@ protected:
|
|||
// captured in "inits".
|
||||
std::unordered_map<const ID*, int> assignees;
|
||||
|
||||
// A subset of assignees reflecting those that are globals or captures.
|
||||
IDSet non_local_assignees;
|
||||
|
||||
// TableType's that are used in table references (i.e., index operations).
|
||||
TypeSet tbl_refs;
|
||||
|
||||
// Types corresponding to aggregates that are modified.
|
||||
TypeSet aggr_mods;
|
||||
|
||||
// Same for locals seen in initializations, so we can find,
|
||||
// for example, unused aggregates.
|
||||
IDSet inits;
|
||||
|
@ -209,11 +232,15 @@ protected:
|
|||
|
||||
// Types seen in the function. A set rather than a vector because
|
||||
// the same type can be seen numerous times.
|
||||
std::unordered_set<const Type*> types;
|
||||
TypeSet types;
|
||||
|
||||
// The same, but in a deterministic order, with duplicates removed.
|
||||
std::vector<const Type*> ordered_types;
|
||||
|
||||
// For a given type (seen in an attribute), tracks other types that
|
||||
// are effectively aliased with it via coercions.
|
||||
std::unordered_map<const Type*, std::set<const Type*>> type_aliases;
|
||||
|
||||
// Script functions that this script calls. Includes calls made
|
||||
// by lambdas and when bodies, as the goal is to identify recursion.
|
||||
std::unordered_set<ScriptFunc*> script_calls;
|
||||
|
@ -228,8 +255,13 @@ protected:
|
|||
// Names of generated events.
|
||||
std::unordered_set<std::string> events;
|
||||
|
||||
// Attributes seen in set or table constructors.
|
||||
std::unordered_set<const Attributes*> constructor_attrs;
|
||||
// Attributes seen in set, table, or record constructors, mapped back
|
||||
// to the type where they appear.
|
||||
std::unordered_map<const Attributes*, TypePtr> constructor_attrs;
|
||||
|
||||
// Attributes associated with record constructors. There can be several,
|
||||
// so we use a set.
|
||||
std::unordered_map<const Type*, std::set<const Attributes*>> rec_constructor_attrs;
|
||||
|
||||
// Switch statements with either expression cases or type cases.
|
||||
std::unordered_set<const SwitchStmt*> expr_switches;
|
||||
|
@ -256,6 +288,50 @@ protected:
|
|||
bool abs_rec_fields;
|
||||
};
|
||||
|
||||
// Describes an operation for which some forms of access can lead to state
|
||||
// modifications.
|
||||
class SideEffectsOp {
|
||||
public:
|
||||
// Access types correspond to:
|
||||
// NONE - there are no side effects
|
||||
// CALL - relevant for function calls
|
||||
// CONSTRUCTION - relevant for constructing/coercing a record
|
||||
// READ - relevant for reading a table element
|
||||
// WRITE - relevant for modifying a table element
|
||||
enum AccessType { NONE, CALL, CONSTRUCTION, READ, WRITE };
|
||||
|
||||
SideEffectsOp(AccessType at = NONE, const Type* t = nullptr) : access(at), type(t) {}
|
||||
|
||||
auto GetAccessType() const { return access; }
|
||||
const Type* GetType() const { return type; }
|
||||
|
||||
void SetUnknownChanges() { has_unknown_changes = true; }
|
||||
bool HasUnknownChanges() const { return has_unknown_changes; }
|
||||
|
||||
void AddModNonGlobal(IDSet ids) { mod_non_locals.insert(ids.begin(), ids.end()); }
|
||||
void AddModAggrs(TypeSet types) { mod_aggrs.insert(types.begin(), types.end()); }
|
||||
|
||||
const auto& ModNonLocals() const { return mod_non_locals; }
|
||||
const auto& ModAggrs() const { return mod_aggrs; }
|
||||
|
||||
private:
|
||||
AccessType access;
|
||||
const Type* type; // type for which some operations alter state
|
||||
|
||||
// Globals and/or captures that the operation potentially modifies.
|
||||
IDSet mod_non_locals;
|
||||
|
||||
// Aggregates (specified by types) that potentially modified.
|
||||
TypeSet mod_aggrs;
|
||||
|
||||
// Sometimes the side effects are not known (such as when making
|
||||
// indirect function calls, so we can't know statically what function
|
||||
// will be called). We refer to as Unknown, and their implications are
|
||||
// presumed to be worst-case - any non-local or aggregate is potentially
|
||||
// affected.
|
||||
bool has_unknown_changes = false;
|
||||
};
|
||||
|
||||
// Function pointer for a predicate that determines whether a given
|
||||
// profile is compilable. Alternatively we could derive subclasses
|
||||
// from ProfileFuncs and use a virtual method for this, but that seems
|
||||
|
@ -286,11 +362,38 @@ public:
|
|||
const std::unordered_set<const LambdaExpr*>& Lambdas() const { return lambdas; }
|
||||
const std::unordered_set<std::string>& Events() const { return events; }
|
||||
|
||||
std::shared_ptr<ProfileFunc> FuncProf(const ScriptFunc* f) { return func_profs[f]; }
|
||||
const auto& FuncProfs() const { return func_profs; }
|
||||
|
||||
// This is only externally germane for LambdaExpr's.
|
||||
// Profiles associated with LambdaExpr's and expressions appearing in
|
||||
// attributes.
|
||||
std::shared_ptr<ProfileFunc> ExprProf(const Expr* e) { return expr_profs[e]; }
|
||||
|
||||
// Returns true if the given type corresponds to a table that has a
|
||||
// &default attribute that returns an aggregate value.
|
||||
bool IsTableWithDefaultAggr(const Type* t);
|
||||
|
||||
// Returns true if the given operation has non-zero side effects.
|
||||
bool HasSideEffects(SideEffectsOp::AccessType access, const TypePtr& t) const;
|
||||
|
||||
// Retrieves the side effects of the given operation, updating non_local_ids
|
||||
// and aggrs with identifiers and aggregate types that are modified.
|
||||
//
|
||||
// A return value of true means the side effects are Unknown. If false,
|
||||
// then there are side effects iff either (or both) of non_local_ids
|
||||
// or aggrs are non-empty.
|
||||
bool GetSideEffects(SideEffectsOp::AccessType access, const Type* t, IDSet& non_local_ids, TypeSet& aggrs) const;
|
||||
|
||||
// Retrieves the side effects of calling the function corresponding to
|
||||
// the NameExpr, updating non_local_ids and aggrs with identifiers and
|
||||
// aggregate types that are modified. is_unknown is set to true if the
|
||||
// call has Unknown side effects (which overrides the relevance of the
|
||||
// updates to the sets).
|
||||
//
|
||||
// A return value of true means that side effects cannot yet be determined,
|
||||
// due to dependencies on other side effects. This can happen when
|
||||
// constructing a ProfileFuncs, but should not happen once its constructed.
|
||||
bool GetCallSideEffects(const NameExpr* n, IDSet& non_local_ids, TypeSet& aggrs, bool& is_unknown);
|
||||
|
||||
// Returns the "representative" Type* for the hash associated with
|
||||
// the parameter (which might be the parameter itself).
|
||||
const Type* TypeRep(const Type* orig) {
|
||||
|
@ -332,8 +435,56 @@ protected:
|
|||
void ComputeProfileHash(std::shared_ptr<ProfileFunc> pf);
|
||||
|
||||
// Analyze the expressions and lambdas appearing in a set of
|
||||
// attributes.
|
||||
void AnalyzeAttrs(const Attributes* Attrs);
|
||||
// attributes, in the context of a given type.
|
||||
void AnalyzeAttrs(const Attributes* attrs, const Type* t);
|
||||
|
||||
// In the abstract, computes side-effects associated with operations other
|
||||
// than explicit function calls. Currently, this means tables and records
|
||||
// that can implicitly call functions that have side effects due to
|
||||
// attributes such as &default. The machinery also applies to assessing
|
||||
// the side effects of explicit function calls, which is done by
|
||||
// (the two versions of) GetCallSideEffects().
|
||||
void ComputeSideEffects();
|
||||
|
||||
// True if the given expression for sure has no side effects, which is
|
||||
// almost always the case. False if the expression *may* have side effects
|
||||
// and requires further analysis.
|
||||
bool DefinitelyHasNoSideEffects(const ExprPtr& e) const;
|
||||
|
||||
// Records the side effects associated with the given attribute.
|
||||
void SetSideEffects(const Attr* a, IDSet& non_local_ids, TypeSet& aggrs, bool is_unknown);
|
||||
|
||||
// Returns the attributes associated with the given type *and its aliases*.
|
||||
AttrVec AssociatedAttrs(const Type* t);
|
||||
|
||||
// For a given set of attributes, assesses which ones are associated with
|
||||
// the given type or its aliases and adds them to the given vector.
|
||||
void FindAssociatedAttrs(const AttrSet& candidate_attrs, const Type* t, AttrVec& assoc_attrs);
|
||||
|
||||
// Assesses the side effects associated with the given expression. Returns
|
||||
// true if a complete assessment was possible, false if not because the
|
||||
// results depend on resolving other potential side effects first.
|
||||
bool AssessSideEffects(const ExprPtr& e, IDSet& non_local_ids, TypeSet& types, bool& is_unknown);
|
||||
|
||||
// Same, but for the given profile.
|
||||
bool AssessSideEffects(const ProfileFunc* pf, IDSet& non_local_ids, TypeSet& types, bool& is_unknown);
|
||||
|
||||
// Same but for the particular case of a relevant access to an aggregate
|
||||
// (which can be constructing a record; reading a table element; or
|
||||
// modifying a table element).
|
||||
bool AssessAggrEffects(SideEffectsOp::AccessType access, const Type* t, IDSet& non_local_ids, TypeSet& aggrs,
|
||||
bool& is_unknown);
|
||||
|
||||
// For a given set of side effects, determines whether the given aggregate
|
||||
// access applies. If so, updates non_local_ids and aggrs and returns true
|
||||
// if there are Unknown side effects; otherwise returns false.
|
||||
bool AssessSideEffects(const SideEffectsOp* se, SideEffectsOp::AccessType access, const Type* t,
|
||||
IDSet& non_local_ids, TypeSet& aggrs) const;
|
||||
|
||||
// Returns nil if side effects are not available. That should never be
|
||||
// the case after we've done our initial analysis, but is provided
|
||||
// as a signal so that this method can also be used during that analysis.
|
||||
std::shared_ptr<SideEffectsOp> GetCallSideEffects(const ScriptFunc* f);
|
||||
|
||||
// Globals seen across the functions, other than those solely seen
|
||||
// as the function being called in a call.
|
||||
|
@ -357,6 +508,11 @@ protected:
|
|||
// Maps a type to its representative (which might be itself).
|
||||
std::unordered_map<const Type*, const Type*> type_to_rep;
|
||||
|
||||
// For a given type, tracks which other types are aliased to it.
|
||||
// Alias occurs via operations that can propagate attributes, which
|
||||
// are various forms of aggregate coercions.
|
||||
std::unordered_map<const Type*, std::set<const Type*>> type_aliases;
|
||||
|
||||
// Script functions that get called.
|
||||
std::unordered_set<ScriptFunc*> script_calls;
|
||||
|
||||
|
@ -369,35 +525,77 @@ protected:
|
|||
// Names of generated events.
|
||||
std::unordered_set<std::string> events;
|
||||
|
||||
// Maps script functions to associated profiles. This isn't
|
||||
// actually well-defined in the case of event handlers and hooks,
|
||||
// which can have multiple bodies. However, the need for this
|
||||
// is temporary (it's for skipping compilation of functions that
|
||||
// appear in "when" clauses), and in that context it suffices.
|
||||
// Maps script functions to associated profiles. This isn't actually
|
||||
// well-defined in the case of event handlers and hooks, which can have
|
||||
// multiple bodies. However, we only use this in the context of calls
|
||||
// to regular functions, and for that it suffices.
|
||||
std::unordered_map<const ScriptFunc*, std::shared_ptr<ProfileFunc>> func_profs;
|
||||
|
||||
// Maps expressions to their profiles. This is only germane
|
||||
// externally for LambdaExpr's, but internally it abets memory
|
||||
// management.
|
||||
// Map lambda names to their primary functions
|
||||
std::unordered_map<std::string, const ScriptFunc*> lambda_primaries;
|
||||
|
||||
// Tracks side effects associated with script functions. If we decide in
|
||||
// the future to associate richer side-effect information with BiFs then
|
||||
// we could expand this to track Func*'s instead.
|
||||
std::unordered_map<const ScriptFunc*, std::shared_ptr<SideEffectsOp>> func_side_effects;
|
||||
|
||||
// Maps expressions to their profiles.
|
||||
std::unordered_map<const Expr*, std::shared_ptr<ProfileFunc>> expr_profs;
|
||||
|
||||
// These remaining member variables are only used internally,
|
||||
// not provided via accessors:
|
||||
|
||||
// Maps expression-valued attributes to a collection of types in which
|
||||
// the attribute appears. Usually there's just one type, but there are
|
||||
// some scripting constructs that can result in the same attribute being
|
||||
// shared across multiple distinct (though compatible) types.
|
||||
std::unordered_map<const Attr*, std::vector<const Type*>> expr_attrs;
|
||||
|
||||
// Tracks whether a given TableType has a &default that returns an
|
||||
// aggregate. Expressions involving indexing tables with such types
|
||||
// cannot be optimized out using CSE because each returned value is
|
||||
// distinct.
|
||||
std::unordered_map<const Type*, bool> tbl_has_aggr_default;
|
||||
|
||||
// For a given attribute, maps it to side effects associated with aggregate
|
||||
// operations (table reads/writes).
|
||||
std::unordered_map<const Attr*, std::vector<std::shared_ptr<SideEffectsOp>>> aggr_side_effects;
|
||||
|
||||
// The same, but for record constructors.
|
||||
std::unordered_map<const Attr*, std::vector<std::shared_ptr<SideEffectsOp>>> record_constr_with_side_effects;
|
||||
|
||||
// The set of attributes that may have side effects but we haven't yet
|
||||
// resolved if that's the case. Empty after we're done analyzing for
|
||||
// side effects.
|
||||
AttrSet candidates;
|
||||
|
||||
// The current candidate we're analyzing. We track this to deal with
|
||||
// the possibility of the candidate's side effects recursively referring
|
||||
// to the candidate itself.
|
||||
const Attr* curr_candidate;
|
||||
|
||||
// The set of attributes that definitely have side effects.
|
||||
AttrSet attrs_with_side_effects;
|
||||
|
||||
// The full collection of operations with side effects.
|
||||
std::vector<std::shared_ptr<SideEffectsOp>> side_effects_ops;
|
||||
|
||||
// Which function profiles we are currently analyzing. Used to detect
|
||||
// recursion and prevent it from leading to non-termination of the analysis.
|
||||
std::unordered_set<std::shared_ptr<ProfileFunc>> active_func_profiles;
|
||||
|
||||
// Maps types to their hashes.
|
||||
std::unordered_map<const Type*, p_hash_type> type_hashes;
|
||||
|
||||
// An inverse mapping, to a representative for each distinct hash.
|
||||
std::unordered_map<p_hash_type, const Type*> type_hash_reps;
|
||||
|
||||
// For types with names, tracks the ones we've already hashed,
|
||||
// so we can avoid work for distinct pointers that refer to the
|
||||
// same underlying type.
|
||||
// For types with names, tracks the ones we've already hashed, so we can
|
||||
// avoid work for distinct pointers that refer to the same underlying type.
|
||||
std::unordered_map<std::string, const Type*> seen_type_names;
|
||||
|
||||
// Expressions that we've discovered that we need to further
|
||||
// profile. These can arise for example due to lambdas or
|
||||
// record attributes.
|
||||
// Expressions that we've discovered that we need to further profile.
|
||||
// These can arise for example due to lambdas or record attributes.
|
||||
std::vector<const Expr*> pending_exprs;
|
||||
|
||||
// Whether the hashes for extended records should cover their final,
|
||||
|
|
|
@ -11,12 +11,14 @@
|
|||
#include "zeek/Stmt.h"
|
||||
#include "zeek/Var.h"
|
||||
#include "zeek/script_opt/ExprOptInfo.h"
|
||||
#include "zeek/script_opt/FuncInfo.h"
|
||||
#include "zeek/script_opt/StmtOptInfo.h"
|
||||
#include "zeek/script_opt/TempVar.h"
|
||||
|
||||
namespace zeek::detail {
|
||||
|
||||
Reducer::Reducer(const ScriptFunc* func, std::shared_ptr<ProfileFunc> _pf) : pf(std::move(_pf)) {
|
||||
Reducer::Reducer(const ScriptFunc* func, std::shared_ptr<ProfileFunc> _pf, ProfileFuncs& _pfs)
|
||||
: pf(std::move(_pf)), pfs(_pfs) {
|
||||
auto& ft = func->GetType();
|
||||
|
||||
// Track the parameters so we don't remap them.
|
||||
|
@ -424,6 +426,41 @@ IDPtr Reducer::FindExprTmp(const Expr* rhs, const Expr* a, const std::shared_ptr
|
|||
}
|
||||
|
||||
bool Reducer::ExprValid(const ID* id, const Expr* e1, const Expr* e2) const {
|
||||
// First check for whether e1 is already known to itself have side effects.
|
||||
// If so, then it's never safe to reuse its associated identifier in lieu
|
||||
// of e2.
|
||||
std::optional<ExprSideEffects>& e1_se = e1->GetOptInfo()->SideEffects();
|
||||
if ( ! e1_se ) {
|
||||
bool has_side_effects = false;
|
||||
auto e1_t = e1->GetType();
|
||||
|
||||
if ( e1_t->Tag() == TYPE_OPAQUE || e1_t->Tag() == TYPE_ANY )
|
||||
// These have difficult-to-analyze semantics.
|
||||
has_side_effects = true;
|
||||
|
||||
else if ( e1->Tag() == EXPR_INDEX ) {
|
||||
auto aggr = e1->GetOp1();
|
||||
auto aggr_t = aggr->GetType();
|
||||
|
||||
if ( pfs.HasSideEffects(SideEffectsOp::READ, aggr_t) )
|
||||
has_side_effects = true;
|
||||
|
||||
else if ( aggr_t->Tag() == TYPE_TABLE && pfs.IsTableWithDefaultAggr(aggr_t.get()) )
|
||||
has_side_effects = true;
|
||||
}
|
||||
|
||||
else if ( e1->Tag() == EXPR_RECORD_CONSTRUCTOR || e1->Tag() == EXPR_RECORD_COERCE )
|
||||
has_side_effects = pfs.HasSideEffects(SideEffectsOp::CONSTRUCTION, e1->GetType());
|
||||
|
||||
e1_se = ExprSideEffects(has_side_effects);
|
||||
}
|
||||
|
||||
if ( e1_se->HasSideEffects() ) {
|
||||
// We already know that e2 is structurally identical to e1.
|
||||
e2->GetOptInfo()->SideEffects() = ExprSideEffects(true);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Here are the considerations for expression validity.
|
||||
//
|
||||
// * None of the operands used in the given expression can
|
||||
|
@ -437,11 +474,14 @@ bool Reducer::ExprValid(const ID* id, const Expr* e1, const Expr* e2) const {
|
|||
// * Same goes to modifications of aggregates via "add" or "delete"
|
||||
// or "+=" append.
|
||||
//
|
||||
// * No propagation of expressions that are based on aggregates
|
||||
// across function calls.
|
||||
// * Assessment of any record constructors or coercions, or
|
||||
// table references or modifications, for possible invocation of
|
||||
// associated handlers that have side effects.
|
||||
//
|
||||
// * No propagation of expressions that are based on globals
|
||||
// across calls.
|
||||
// * Assessment of function calls for potential side effects.
|
||||
//
|
||||
// These latter two are guided by the global profile of the full set
|
||||
// of script functions.
|
||||
|
||||
// Tracks which ID's are germane for our analysis.
|
||||
std::vector<const ID*> ids;
|
||||
|
@ -456,7 +496,7 @@ bool Reducer::ExprValid(const ID* id, const Expr* e1, const Expr* e2) const {
|
|||
if ( e1->Tag() == EXPR_NAME )
|
||||
ids.push_back(e1->AsNameExpr()->Id());
|
||||
|
||||
CSE_ValidityChecker vc(ids, e1, e2);
|
||||
CSE_ValidityChecker vc(pfs, ids, e1, e2);
|
||||
reduction_root->Traverse(&vc);
|
||||
|
||||
return vc.IsValid();
|
||||
|
@ -785,15 +825,12 @@ std::shared_ptr<TempVar> Reducer::FindTemporary(const ID* id) const {
|
|||
return tmp->second;
|
||||
}
|
||||
|
||||
CSE_ValidityChecker::CSE_ValidityChecker(const std::vector<const ID*>& _ids, const Expr* _start_e, const Expr* _end_e)
|
||||
: ids(_ids) {
|
||||
CSE_ValidityChecker::CSE_ValidityChecker(ProfileFuncs& _pfs, const std::vector<const ID*>& _ids, const Expr* _start_e,
|
||||
const Expr* _end_e)
|
||||
: pfs(_pfs), ids(_ids) {
|
||||
start_e = _start_e;
|
||||
end_e = _end_e;
|
||||
|
||||
for ( auto i : ids )
|
||||
if ( i->IsGlobal() || IsAggr(i->GetType()) )
|
||||
sensitive_to_calls = true;
|
||||
|
||||
// Track whether this is a record assignment, in which case
|
||||
// we're attuned to assignments to the same field for the
|
||||
// same type of record.
|
||||
|
@ -811,7 +848,16 @@ CSE_ValidityChecker::CSE_ValidityChecker(const std::vector<const ID*>& _ids, con
|
|||
}
|
||||
|
||||
TraversalCode CSE_ValidityChecker::PreStmt(const Stmt* s) {
|
||||
if ( s->Tag() == STMT_ADD || s->Tag() == STMT_DELETE )
|
||||
auto t = s->Tag();
|
||||
|
||||
if ( t == STMT_WHEN ) {
|
||||
// These are too hard to analyze - they result in lambda calls
|
||||
// that can affect aggregates, etc.
|
||||
is_valid = false;
|
||||
return TC_ABORTALL;
|
||||
}
|
||||
|
||||
if ( t == STMT_ADD || t == STMT_DELETE )
|
||||
in_aggr_mod_stmt = true;
|
||||
|
||||
return TC_CONTINUE;
|
||||
|
@ -831,7 +877,7 @@ TraversalCode CSE_ValidityChecker::PreExpr(const Expr* e) {
|
|||
|
||||
// Don't analyze the expression, as it's our starting
|
||||
// point and we don't want to conflate its properties
|
||||
// with those of any intervening expression.
|
||||
// with those of any intervening expressions.
|
||||
return TC_CONTINUE;
|
||||
}
|
||||
|
||||
|
@ -858,25 +904,26 @@ TraversalCode CSE_ValidityChecker::PreExpr(const Expr* e) {
|
|||
auto lhs_ref = e->GetOp1()->AsRefExprPtr();
|
||||
auto lhs = lhs_ref->GetOp1()->AsNameExpr();
|
||||
|
||||
if ( CheckID(ids, lhs->Id(), false) ) {
|
||||
is_valid = false;
|
||||
if ( CheckID(lhs->Id(), false) )
|
||||
return TC_ABORTALL;
|
||||
}
|
||||
|
||||
// Note, we don't use CheckAggrMod() because this
|
||||
// is a plain assignment. It might be changing a variable's
|
||||
// binding to an aggregate, but it's not changing the
|
||||
// aggregate itself.
|
||||
// Note, we don't use CheckAggrMod() because this is a plain
|
||||
// assignment. It might be changing a variable's binding to
|
||||
// an aggregate ("aggr_var = new_aggr_val"), but we don't
|
||||
// introduce temporaries that are simply aliases of existing
|
||||
// variables (e.g., we don't have "<internal>::#8 = aggr_var"),
|
||||
// and so there's no concern that the temporary could now be
|
||||
// referring to the wrong aggregate. If instead we have
|
||||
// "<internal>::#8 = aggr_var$foo", then a reassignment here
|
||||
// to "aggr_var" will already be caught by CheckID().
|
||||
} break;
|
||||
|
||||
case EXPR_INDEX_ASSIGN: {
|
||||
auto lhs_aggr = e->GetOp1();
|
||||
auto lhs_aggr_id = lhs_aggr->AsNameExpr()->Id();
|
||||
|
||||
if ( CheckID(ids, lhs_aggr_id, true) || CheckAggrMod(ids, e) ) {
|
||||
is_valid = false;
|
||||
if ( CheckID(lhs_aggr_id, true) || CheckTableMod(lhs_aggr->GetType()) )
|
||||
return TC_ABORTALL;
|
||||
}
|
||||
} break;
|
||||
|
||||
case EXPR_FIELD_LHS_ASSIGN: {
|
||||
|
@ -884,17 +931,9 @@ TraversalCode CSE_ValidityChecker::PreExpr(const Expr* e) {
|
|||
auto lhs_aggr_id = lhs->AsNameExpr()->Id();
|
||||
auto lhs_field = e->AsFieldLHSAssignExpr()->Field();
|
||||
|
||||
if ( lhs_field == field && same_type(lhs_aggr_id->GetType(), field_type) ) {
|
||||
// Potential assignment to the same field as for
|
||||
// our expression of interest. Even if the
|
||||
// identifier involved is not one we have our eye
|
||||
// on, due to aggregate aliasing this could be
|
||||
// altering the value of our expression, so bail.
|
||||
is_valid = false;
|
||||
if ( CheckID(lhs_aggr_id, true) )
|
||||
return TC_ABORTALL;
|
||||
}
|
||||
|
||||
if ( CheckID(ids, lhs_aggr_id, true) || CheckAggrMod(ids, e) ) {
|
||||
if ( lhs_field == field && same_type(lhs_aggr_id->GetType(), field_type) ) {
|
||||
is_valid = false;
|
||||
return TC_ABORTALL;
|
||||
}
|
||||
|
@ -903,17 +942,13 @@ TraversalCode CSE_ValidityChecker::PreExpr(const Expr* e) {
|
|||
case EXPR_APPEND_TO:
|
||||
// This doesn't directly change any identifiers, but does
|
||||
// alter an aggregate.
|
||||
if ( CheckAggrMod(ids, e) ) {
|
||||
is_valid = false;
|
||||
if ( CheckAggrMod(e->GetType()) )
|
||||
return TC_ABORTALL;
|
||||
}
|
||||
break;
|
||||
|
||||
case EXPR_CALL:
|
||||
if ( sensitive_to_calls ) {
|
||||
is_valid = false;
|
||||
if ( CheckCall(e->AsCallExpr()) )
|
||||
return TC_ABORTALL;
|
||||
}
|
||||
break;
|
||||
|
||||
case EXPR_TABLE_CONSTRUCTOR:
|
||||
|
@ -922,49 +957,35 @@ TraversalCode CSE_ValidityChecker::PreExpr(const Expr* e) {
|
|||
// so we don't want to traverse them.
|
||||
return TC_ABORTSTMT;
|
||||
|
||||
case EXPR_RECORD_COERCE:
|
||||
case EXPR_RECORD_CONSTRUCTOR:
|
||||
// If these have initializations done at construction
|
||||
// time, those can include function calls.
|
||||
if ( sensitive_to_calls ) {
|
||||
auto& et = e->GetType();
|
||||
if ( et->Tag() == TYPE_RECORD && ! et->AsRecordType()->IdempotentCreation() ) {
|
||||
is_valid = false;
|
||||
// Note, record coercion behaves like constructors in terms of
|
||||
// potentially executing &default functions. In either case,
|
||||
// the type of the expression reflects the type we want to analyze
|
||||
// for side effects.
|
||||
if ( CheckRecordConstructor(e->GetType()) )
|
||||
return TC_ABORTALL;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case EXPR_INDEX:
|
||||
case EXPR_FIELD:
|
||||
// We treat these together because they both have
|
||||
// to be checked when inside an "add" or "delete"
|
||||
// statement.
|
||||
if ( in_aggr_mod_stmt ) {
|
||||
case EXPR_FIELD: {
|
||||
// We treat these together because they both have to be checked
|
||||
// when inside an "add" or "delete" statement.
|
||||
auto aggr = e->GetOp1();
|
||||
auto aggr_t = aggr->GetType();
|
||||
|
||||
if ( in_aggr_mod_stmt ) {
|
||||
auto aggr_id = aggr->AsNameExpr()->Id();
|
||||
|
||||
if ( CheckID(ids, aggr_id, true) ) {
|
||||
is_valid = false;
|
||||
return TC_ABORTALL;
|
||||
}
|
||||
}
|
||||
|
||||
if ( t == EXPR_INDEX && sensitive_to_calls ) {
|
||||
// Unfortunately in isolation we can't
|
||||
// statically determine whether this table
|
||||
// has a &default associated with it. In
|
||||
// principle we could track all instances
|
||||
// of the table type seen (across the
|
||||
// entire set of scripts), and note whether
|
||||
// any of those include an expression, but
|
||||
// that's a lot of work for what might be
|
||||
// minimal gain.
|
||||
|
||||
is_valid = false;
|
||||
if ( CheckID(aggr_id, true) || CheckAggrMod(aggr_t) )
|
||||
return TC_ABORTALL;
|
||||
}
|
||||
|
||||
break;
|
||||
else if ( t == EXPR_INDEX && aggr_t->Tag() == TYPE_TABLE ) {
|
||||
if ( CheckTableRef(aggr_t) )
|
||||
return TC_ABORTALL;
|
||||
}
|
||||
} break;
|
||||
|
||||
default: break;
|
||||
}
|
||||
|
@ -972,33 +993,92 @@ TraversalCode CSE_ValidityChecker::PreExpr(const Expr* e) {
|
|||
return TC_CONTINUE;
|
||||
}
|
||||
|
||||
bool CSE_ValidityChecker::CheckID(const std::vector<const ID*>& ids, const ID* id, bool ignore_orig) const {
|
||||
// Only check type info for aggregates.
|
||||
auto id_t = IsAggr(id->GetType()) ? id->GetType() : nullptr;
|
||||
|
||||
bool CSE_ValidityChecker::CheckID(const ID* id, bool ignore_orig) {
|
||||
for ( auto i : ids ) {
|
||||
if ( ignore_orig && i == ids.front() )
|
||||
continue;
|
||||
|
||||
if ( id == i )
|
||||
return true; // reassignment
|
||||
|
||||
if ( id_t && same_type(id_t, i->GetType()) )
|
||||
// Same-type aggregate.
|
||||
return true;
|
||||
return Invalid(); // reassignment
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool CSE_ValidityChecker::CheckAggrMod(const std::vector<const ID*>& ids, const Expr* e) const {
|
||||
const auto& e_i_t = e->GetType();
|
||||
if ( IsAggr(e_i_t) ) {
|
||||
// This assignment sets an aggregate value.
|
||||
// Look for type matches.
|
||||
bool CSE_ValidityChecker::CheckAggrMod(const TypePtr& t) {
|
||||
if ( ! IsAggr(t) )
|
||||
return false;
|
||||
|
||||
for ( auto i : ids )
|
||||
if ( same_type(e_i_t, i->GetType()) )
|
||||
if ( same_type(t, i->GetType()) )
|
||||
return Invalid();
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool CSE_ValidityChecker::CheckRecordConstructor(const TypePtr& t) {
|
||||
if ( t->Tag() != TYPE_RECORD )
|
||||
return false;
|
||||
|
||||
return CheckSideEffects(SideEffectsOp::CONSTRUCTION, t);
|
||||
}
|
||||
|
||||
bool CSE_ValidityChecker::CheckTableMod(const TypePtr& t) {
|
||||
if ( CheckAggrMod(t) )
|
||||
return true;
|
||||
|
||||
if ( t->Tag() != TYPE_TABLE )
|
||||
return false;
|
||||
|
||||
return CheckSideEffects(SideEffectsOp::WRITE, t);
|
||||
}
|
||||
|
||||
bool CSE_ValidityChecker::CheckTableRef(const TypePtr& t) { return CheckSideEffects(SideEffectsOp::READ, t); }
|
||||
|
||||
bool CSE_ValidityChecker::CheckCall(const CallExpr* c) {
|
||||
auto func = c->Func();
|
||||
std::string desc;
|
||||
if ( func->Tag() != EXPR_NAME )
|
||||
// Can't analyze indirect calls.
|
||||
return Invalid();
|
||||
|
||||
IDSet non_local_ids;
|
||||
TypeSet aggrs;
|
||||
bool is_unknown = false;
|
||||
|
||||
auto resolved = pfs.GetCallSideEffects(func->AsNameExpr(), non_local_ids, aggrs, is_unknown);
|
||||
ASSERT(resolved);
|
||||
|
||||
if ( is_unknown || CheckSideEffects(non_local_ids, aggrs) )
|
||||
return Invalid();
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool CSE_ValidityChecker::CheckSideEffects(SideEffectsOp::AccessType access, const TypePtr& t) {
|
||||
IDSet non_local_ids;
|
||||
TypeSet aggrs;
|
||||
|
||||
if ( pfs.GetSideEffects(access, t.get(), non_local_ids, aggrs) )
|
||||
return Invalid();
|
||||
|
||||
return CheckSideEffects(non_local_ids, aggrs);
|
||||
}
|
||||
|
||||
bool CSE_ValidityChecker::CheckSideEffects(const IDSet& non_local_ids, const TypeSet& aggrs) {
|
||||
if ( non_local_ids.empty() && aggrs.empty() )
|
||||
// This is far and away the most common case.
|
||||
return false;
|
||||
|
||||
for ( auto i : ids ) {
|
||||
for ( auto nli : non_local_ids )
|
||||
if ( nli == i )
|
||||
return Invalid();
|
||||
|
||||
auto i_t = i->GetType();
|
||||
for ( auto a : aggrs )
|
||||
if ( same_type(a, i_t.get()) )
|
||||
return Invalid();
|
||||
}
|
||||
|
||||
return false;
|
||||
|
|
|
@ -16,7 +16,7 @@ class TempVar;
|
|||
|
||||
class Reducer {
|
||||
public:
|
||||
Reducer(const ScriptFunc* func, std::shared_ptr<ProfileFunc> pf);
|
||||
Reducer(const ScriptFunc* func, std::shared_ptr<ProfileFunc> pf, ProfileFuncs& pfs);
|
||||
|
||||
StmtPtr Reduce(StmtPtr s);
|
||||
|
||||
|
@ -131,24 +131,22 @@ public:
|
|||
replaced_stmts.clear();
|
||||
}
|
||||
|
||||
// Given the LHS and RHS of an assignment, returns true
|
||||
// if the RHS is a common subexpression (meaning that the
|
||||
// current assignment statement should be deleted). In
|
||||
// that case, has the side effect of associating an alias
|
||||
// for the LHS with the temporary variable that holds the
|
||||
// equivalent RHS; or if the LHS is a local that has no other
|
||||
// assignments, and the same for the RHS.
|
||||
// Given the LHS and RHS of an assignment, returns true if the RHS is
|
||||
// a common subexpression (meaning that the current assignment statement
|
||||
// should be deleted). In that case, has the side effect of associating
|
||||
// an alias for the LHS with the temporary variable that holds the
|
||||
// equivalent RHS; or if the LHS is a local that has no other assignments,
|
||||
// and the same for the RHS.
|
||||
//
|
||||
// Assumes reduction (including alias propagation) has
|
||||
// already been applied.
|
||||
// Assumes reduction (including alias propagation) has already been applied.
|
||||
|
||||
bool IsCSE(const AssignExpr* a, const NameExpr* lhs, const Expr* rhs);
|
||||
|
||||
// Returns a constant representing folding of the given expression
|
||||
// (which must have constant operands).
|
||||
ConstExprPtr Fold(ExprPtr e);
|
||||
|
||||
// Notes that the given expression has been folded to the
|
||||
// given constant.
|
||||
// Notes that the given expression has been folded to the given constant.
|
||||
void FoldedTo(ExprPtr orig, ConstExprPtr c);
|
||||
|
||||
// Given an lhs=rhs statement followed by succ_stmt, returns
|
||||
|
@ -237,6 +235,9 @@ protected:
|
|||
// Profile associated with the function.
|
||||
std::shared_ptr<ProfileFunc> pf;
|
||||
|
||||
// Profile across all script functions - used for optimization decisions.
|
||||
ProfileFuncs& pfs;
|
||||
|
||||
// Tracks the temporary variables created during the reduction/
|
||||
// optimization process.
|
||||
std::vector<std::shared_ptr<TempVar>> temps;
|
||||
|
@ -324,7 +325,7 @@ protected:
|
|||
|
||||
class CSE_ValidityChecker : public TraversalCallback {
|
||||
public:
|
||||
CSE_ValidityChecker(const std::vector<const ID*>& ids, const Expr* start_e, const Expr* end_e);
|
||||
CSE_ValidityChecker(ProfileFuncs& pfs, const std::vector<const ID*>& ids, const Expr* start_e, const Expr* end_e);
|
||||
|
||||
TraversalCode PreStmt(const Stmt*) override;
|
||||
TraversalCode PostStmt(const Stmt*) override;
|
||||
|
@ -342,21 +343,47 @@ public:
|
|||
|
||||
protected:
|
||||
// Returns true if an assignment involving the given identifier on
|
||||
// the LHS is in conflict with the given list of identifiers.
|
||||
bool CheckID(const std::vector<const ID*>& ids, const ID* id, bool ignore_orig) const;
|
||||
// the LHS is in conflict with the identifiers we're tracking.
|
||||
bool CheckID(const ID* id, bool ignore_orig);
|
||||
|
||||
// Returns true if the assignment given by 'e' modifies an aggregate
|
||||
// with the same type as that of one of the identifiers.
|
||||
bool CheckAggrMod(const std::vector<const ID*>& ids, const Expr* e) const;
|
||||
// Returns true if a modification to an aggregate of the given type
|
||||
// potentially aliases with one of the identifiers we're tracking.
|
||||
bool CheckAggrMod(const TypePtr& t);
|
||||
|
||||
// Returns true if a record constructor/coercion of the given type has
|
||||
// side effects and invalides the CSE opportunity.
|
||||
bool CheckRecordConstructor(const TypePtr& t);
|
||||
|
||||
// The same for modifications to tables.
|
||||
bool CheckTableMod(const TypePtr& t);
|
||||
|
||||
// The same for accessing (reading) tables.
|
||||
bool CheckTableRef(const TypePtr& t);
|
||||
|
||||
// The same for the given function call.
|
||||
bool CheckCall(const CallExpr* c);
|
||||
|
||||
// True if the given form of access to the given type has side effects.
|
||||
bool CheckSideEffects(SideEffectsOp::AccessType access, const TypePtr& t);
|
||||
|
||||
// True if side effects to the given identifiers and aggregates invalidate
|
||||
// the CSE opportunity.
|
||||
bool CheckSideEffects(const IDSet& non_local_ids, const TypeSet& aggrs);
|
||||
|
||||
// Helper function that marks the CSE opportunity as invalid and returns
|
||||
// "true" (used by various methods to signal invalidation).
|
||||
bool Invalid() {
|
||||
is_valid = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Profile across all script functions.
|
||||
ProfileFuncs& pfs;
|
||||
|
||||
// The list of identifiers for which an assignment to one of them
|
||||
// renders the CSE unsafe.
|
||||
const std::vector<const ID*>& ids;
|
||||
|
||||
// Whether the list of identifiers includes some that we should
|
||||
// consider potentially altered by a function call.
|
||||
bool sensitive_to_calls = false;
|
||||
|
||||
// Where in the AST to start our analysis. This is the initial
|
||||
// assignment expression.
|
||||
const Expr* start_e;
|
||||
|
@ -379,8 +406,9 @@ protected:
|
|||
bool have_start_e = false;
|
||||
bool have_end_e = false;
|
||||
|
||||
// Whether analyzed expressions occur in the context of
|
||||
// a statement that modifies an aggregate ("add" or "delete").
|
||||
// Whether analyzed expressions occur in the context of a statement
|
||||
// that modifies an aggregate ("add" or "delete"), which changes the
|
||||
// interpretation of the expressions.
|
||||
bool in_aggr_mod_stmt = false;
|
||||
};
|
||||
|
||||
|
|
|
@ -147,7 +147,8 @@ static bool optimize_AST(ScriptFunc* f, std::shared_ptr<ProfileFunc>& pf, std::s
|
|||
return true;
|
||||
}
|
||||
|
||||
static void optimize_func(ScriptFunc* f, std::shared_ptr<ProfileFunc> pf, ScopePtr scope, StmtPtr& body) {
|
||||
static void optimize_func(ScriptFunc* f, std::shared_ptr<ProfileFunc> pf, ProfileFuncs& pfs, ScopePtr scope,
|
||||
StmtPtr& body) {
|
||||
if ( reporter->Errors() > 0 )
|
||||
return;
|
||||
|
||||
|
@ -167,7 +168,7 @@ static void optimize_func(ScriptFunc* f, std::shared_ptr<ProfileFunc> pf, ScopeP
|
|||
|
||||
push_existing_scope(scope);
|
||||
|
||||
auto rc = std::make_shared<Reducer>(f, pf);
|
||||
auto rc = std::make_shared<Reducer>(f, pf, pfs);
|
||||
auto new_body = rc->Reduce(body);
|
||||
|
||||
if ( reporter->Errors() > 0 ) {
|
||||
|
@ -230,7 +231,7 @@ static void optimize_func(ScriptFunc* f, std::shared_ptr<ProfileFunc> pf, ScopeP
|
|||
f->SetFrameSize(new_frame_size);
|
||||
|
||||
if ( analysis_options.gen_ZAM_code ) {
|
||||
ZAMCompiler ZAM(f, pf, scope, new_body, ud, rc);
|
||||
ZAMCompiler ZAM(f, pfs, pf, scope, new_body, ud, rc);
|
||||
|
||||
new_body = ZAM.CompileBody();
|
||||
|
||||
|
@ -413,16 +414,18 @@ static void use_CPP() {
|
|||
reporter->FatalError("no C++ functions found to use");
|
||||
}
|
||||
|
||||
static void generate_CPP(std::unique_ptr<ProfileFuncs>& pfs) {
|
||||
static void generate_CPP() {
|
||||
const auto gen_name = CPP_dir + "CPP-gen.cc";
|
||||
|
||||
const bool standalone = analysis_options.gen_standalone_CPP;
|
||||
const bool report = analysis_options.report_uncompilable;
|
||||
|
||||
auto pfs = std::make_unique<ProfileFuncs>(funcs, is_CPP_compilable, false);
|
||||
|
||||
CPPCompile cpp(funcs, *pfs, gen_name, standalone, report);
|
||||
}
|
||||
|
||||
static void analyze_scripts_for_ZAM(std::unique_ptr<ProfileFuncs>& pfs) {
|
||||
static void analyze_scripts_for_ZAM() {
|
||||
if ( analysis_options.usage_issues > 0 && analysis_options.optimize_AST ) {
|
||||
fprintf(stderr,
|
||||
"warning: \"-O optimize-AST\" option is incompatible with -u option, "
|
||||
|
@ -430,15 +433,7 @@ static void analyze_scripts_for_ZAM(std::unique_ptr<ProfileFuncs>& pfs) {
|
|||
analysis_options.optimize_AST = false;
|
||||
}
|
||||
|
||||
// Re-profile the functions, now without worrying about compatibility
|
||||
// with compilation to C++.
|
||||
|
||||
// The first profiling pass earlier may have marked some of the
|
||||
// functions as to-skip, so clear those markings.
|
||||
for ( auto& f : funcs )
|
||||
f.SetSkip(false);
|
||||
|
||||
pfs = std::make_unique<ProfileFuncs>(funcs, nullptr, true);
|
||||
auto pfs = std::make_unique<ProfileFuncs>(funcs, nullptr, true);
|
||||
|
||||
bool report_recursive = analysis_options.report_recursive;
|
||||
std::unique_ptr<Inliner> inl;
|
||||
|
@ -492,7 +487,7 @@ static void analyze_scripts_for_ZAM(std::unique_ptr<ProfileFuncs>& pfs) {
|
|||
}
|
||||
|
||||
auto new_body = f.Body();
|
||||
optimize_func(func, f.ProfilePtr(), f.Scope(), new_body);
|
||||
optimize_func(func, f.ProfilePtr(), *pfs, f.Scope(), new_body);
|
||||
f.SetBody(new_body);
|
||||
|
||||
if ( is_lambda )
|
||||
|
@ -566,10 +561,6 @@ void analyze_scripts(bool no_unused_warnings) {
|
|||
if ( ! have_one_to_do )
|
||||
reporter->FatalError("no matching functions/files for C++ compilation");
|
||||
|
||||
// Now that everything's parsed and BiF's have been initialized,
|
||||
// profile the functions.
|
||||
auto pfs = std::make_unique<ProfileFuncs>(funcs, is_CPP_compilable, false);
|
||||
|
||||
if ( CPP_init_hook ) {
|
||||
(*CPP_init_hook)();
|
||||
if ( compiled_scripts.empty() )
|
||||
|
@ -591,13 +582,13 @@ void analyze_scripts(bool no_unused_warnings) {
|
|||
if ( analysis_options.gen_ZAM )
|
||||
reporter->FatalError("-O ZAM and -O gen-C++ conflict");
|
||||
|
||||
generate_CPP(pfs);
|
||||
generate_CPP();
|
||||
exit(0);
|
||||
}
|
||||
|
||||
// At this point we're done with C++ considerations, so instead
|
||||
// are compiling to ZAM.
|
||||
analyze_scripts_for_ZAM(pfs);
|
||||
analyze_scripts_for_ZAM();
|
||||
|
||||
if ( reporter->Errors() > 0 )
|
||||
reporter->FatalError("Optimized script execution aborted due to errors");
|
||||
|
|
|
@ -18,6 +18,8 @@ struct Options;
|
|||
|
||||
namespace zeek::detail {
|
||||
|
||||
using TypeSet = std::unordered_set<const Type*>;
|
||||
|
||||
// Flags controlling what sorts of analysis to do.
|
||||
|
||||
struct AnalyOpt {
|
||||
|
|
|
@ -16,8 +16,6 @@ public:
|
|||
UsageAnalyzer(std::vector<FuncInfo>& funcs);
|
||||
|
||||
private:
|
||||
using IDSet = std::unordered_set<const ID*>;
|
||||
|
||||
// Finds the set of identifiers that serve as a starting point of
|
||||
// what's-known-to-be-used. An identifier qualifies as such if it is
|
||||
// (1) an event that was newly introduced by scripting (so, known to
|
||||
|
@ -67,10 +65,10 @@ private:
|
|||
// of why the first needs to be per-traversal.
|
||||
|
||||
// All of the identifiers we've analyzed during the current traversal.
|
||||
std::unordered_set<const ID*> analyzed_IDs;
|
||||
IDSet analyzed_IDs;
|
||||
|
||||
// All of the types we've analyzed to date.
|
||||
std::unordered_set<const Type*> analyzed_types;
|
||||
TypeSet analyzed_types;
|
||||
};
|
||||
|
||||
// Marks a given identifier as referring to a script-level event (one
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "zeek/Event.h"
|
||||
#include "zeek/script_opt/ProfileFunc.h"
|
||||
#include "zeek/script_opt/UseDefs.h"
|
||||
#include "zeek/script_opt/ZAM/ZBody.h"
|
||||
|
||||
|
@ -23,8 +24,6 @@ class Stmt;
|
|||
class SwitchStmt;
|
||||
class CatchReturnStmt;
|
||||
|
||||
class ProfileFunc;
|
||||
|
||||
using InstLabel = ZInstI*;
|
||||
|
||||
// Class representing a single compiled statement. (This is different from,
|
||||
|
@ -53,7 +52,7 @@ public:
|
|||
|
||||
class ZAMCompiler {
|
||||
public:
|
||||
ZAMCompiler(ScriptFunc* f, std::shared_ptr<ProfileFunc> pf, ScopePtr scope, StmtPtr body,
|
||||
ZAMCompiler(ScriptFunc* f, ProfileFuncs& pfs, std::shared_ptr<ProfileFunc> pf, ScopePtr scope, StmtPtr body,
|
||||
std::shared_ptr<UseDefs> ud, std::shared_ptr<Reducer> rd);
|
||||
~ZAMCompiler();
|
||||
|
||||
|
@ -503,6 +502,7 @@ private:
|
|||
std::vector<const NameExpr*> retvars;
|
||||
|
||||
ScriptFunc* func;
|
||||
ProfileFuncs& pfs;
|
||||
std::shared_ptr<ProfileFunc> pf;
|
||||
ScopePtr scope;
|
||||
StmtPtr body;
|
||||
|
|
|
@ -8,14 +8,14 @@
|
|||
#include "zeek/Reporter.h"
|
||||
#include "zeek/Scope.h"
|
||||
#include "zeek/module_util.h"
|
||||
#include "zeek/script_opt/ProfileFunc.h"
|
||||
#include "zeek/script_opt/ScriptOpt.h"
|
||||
#include "zeek/script_opt/ZAM/Compile.h"
|
||||
|
||||
namespace zeek::detail {
|
||||
|
||||
ZAMCompiler::ZAMCompiler(ScriptFunc* f, std::shared_ptr<ProfileFunc> _pf, ScopePtr _scope, StmtPtr _body,
|
||||
std::shared_ptr<UseDefs> _ud, std::shared_ptr<Reducer> _rd) {
|
||||
ZAMCompiler::ZAMCompiler(ScriptFunc* f, ProfileFuncs& _pfs, std::shared_ptr<ProfileFunc> _pf, ScopePtr _scope,
|
||||
StmtPtr _body, std::shared_ptr<UseDefs> _ud, std::shared_ptr<Reducer> _rd)
|
||||
: pfs(_pfs) {
|
||||
func = f;
|
||||
pf = std::move(_pf);
|
||||
scope = std::move(_scope);
|
||||
|
|
|
@ -4,7 +4,6 @@
|
|||
|
||||
#include "zeek/Desc.h"
|
||||
#include "zeek/Reporter.h"
|
||||
#include "zeek/script_opt/ProfileFunc.h"
|
||||
#include "zeek/script_opt/ZAM/Compile.h"
|
||||
|
||||
namespace zeek::detail {
|
||||
|
@ -667,11 +666,10 @@ const ZAMStmt ZAMCompiler::CompileIndex(const NameExpr* n1, int n2_slot, const T
|
|||
z = ZInstI(zop, Frame1Slot(n1, zop), n2_slot, c3);
|
||||
}
|
||||
|
||||
// See the discussion in CSE_ValidityChecker::PreExpr
|
||||
// regarding always needing to treat this as potentially
|
||||
// modifying globals.
|
||||
if ( pfs.HasSideEffects(SideEffectsOp::READ, n2t) ) {
|
||||
z.aux = new ZInstAux(0);
|
||||
z.aux->can_change_non_locals = true;
|
||||
}
|
||||
|
||||
return AddInst(z);
|
||||
}
|
||||
|
@ -853,6 +851,9 @@ const ZAMStmt ZAMCompiler::AssignTableElem(const Expr* e) {
|
|||
z.aux = InternalBuildVals(op2);
|
||||
z.t = op3->GetType();
|
||||
|
||||
if ( pfs.HasSideEffects(SideEffectsOp::WRITE, op1->GetType()) )
|
||||
z.aux->can_change_non_locals = true;
|
||||
|
||||
return AddInst(z);
|
||||
}
|
||||
|
||||
|
@ -1004,8 +1005,21 @@ const ZAMStmt ZAMCompiler::DoCall(const CallExpr* c, const NameExpr* n) {
|
|||
if ( ! z.aux )
|
||||
z.aux = new ZInstAux(0);
|
||||
|
||||
if ( indirect )
|
||||
z.aux->can_change_non_locals = true;
|
||||
|
||||
else {
|
||||
IDSet non_local_ids;
|
||||
TypeSet aggrs;
|
||||
bool is_unknown = false;
|
||||
|
||||
auto resolved = pfs.GetCallSideEffects(func, non_local_ids, aggrs, is_unknown);
|
||||
ASSERT(resolved);
|
||||
|
||||
if ( is_unknown || ! non_local_ids.empty() || ! aggrs.empty() )
|
||||
z.aux->can_change_non_locals = true;
|
||||
}
|
||||
|
||||
z.call_expr = {NewRef{}, const_cast<CallExpr*>(c)};
|
||||
|
||||
if ( in_when )
|
||||
|
@ -1089,7 +1103,7 @@ const ZAMStmt ZAMCompiler::ConstructRecord(const NameExpr* n, const Expr* e) {
|
|||
|
||||
z.t = e->GetType();
|
||||
|
||||
if ( ! rc->GetType<RecordType>()->IdempotentCreation() )
|
||||
if ( pfs.HasSideEffects(SideEffectsOp::CONSTRUCTION, z.t) )
|
||||
z.aux->can_change_non_locals = true;
|
||||
|
||||
return AddInst(z);
|
||||
|
@ -1188,6 +1202,9 @@ const ZAMStmt ZAMCompiler::RecordCoerce(const NameExpr* n, const Expr* e) {
|
|||
// Mark the integer entries in z.aux as not being frame slots as usual.
|
||||
z.aux->slots = nullptr;
|
||||
|
||||
if ( pfs.HasSideEffects(SideEffectsOp::CONSTRUCTION, e->GetType()) )
|
||||
z.aux->can_change_non_locals = true;
|
||||
|
||||
return AddInst(z);
|
||||
}
|
||||
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
#include "zeek/IPAddr.h"
|
||||
#include "zeek/Reporter.h"
|
||||
#include "zeek/ZeekString.h"
|
||||
#include "zeek/script_opt/ProfileFunc.h"
|
||||
#include "zeek/script_opt/ZAM/Compile.h"
|
||||
|
||||
namespace zeek::detail {
|
||||
|
|
|
@ -4,7 +4,6 @@
|
|||
|
||||
#include "zeek/Desc.h"
|
||||
#include "zeek/Reporter.h"
|
||||
#include "zeek/script_opt/ProfileFunc.h"
|
||||
#include "zeek/script_opt/Reduce.h"
|
||||
#include "zeek/script_opt/ZAM/Compile.h"
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue