mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 06:38:20 +00:00
1561 lines
49 KiB
C++
1561 lines
49 KiB
C++
// See the file "COPYING" in the main distribution directory for copyright.
|
|
|
|
#include "zeek/script_opt/ProfileFunc.h"
|
|
|
|
#include <unistd.h>
|
|
#include <cerrno>
|
|
|
|
#include "zeek/Desc.h"
|
|
#include "zeek/Func.h"
|
|
#include "zeek/Stmt.h"
|
|
#include "zeek/script_opt/FuncInfo.h"
|
|
#include "zeek/script_opt/IDOptInfo.h"
|
|
|
|
namespace zeek::detail {
|
|
|
|
// Computes the profiling hash of a Obj based on its (deterministic)
|
|
// description.
|
|
p_hash_type p_hash(const Obj* o) {
|
|
ODesc d;
|
|
d.SetDeterminism(true);
|
|
o->Describe(&d);
|
|
return p_hash(d.Description());
|
|
}
|
|
|
|
ProfileFunc::ProfileFunc(const Func* func, const StmtPtr& body, bool _abs_rec_fields) {
|
|
profiled_func = func;
|
|
profiled_body = body.get();
|
|
abs_rec_fields = _abs_rec_fields;
|
|
|
|
auto ft = func->GetType()->AsFuncType();
|
|
auto& fcaps = ft->GetCaptures();
|
|
|
|
if ( fcaps ) {
|
|
int offset = 0;
|
|
|
|
for ( auto& c : *fcaps ) {
|
|
auto cid = c.Id().get();
|
|
captures.insert(cid);
|
|
captures_offsets[cid] = offset++;
|
|
}
|
|
}
|
|
|
|
Profile(ft, body);
|
|
}
|
|
|
|
ProfileFunc::ProfileFunc(const Stmt* s, bool _abs_rec_fields) {
|
|
profiled_body = s;
|
|
abs_rec_fields = _abs_rec_fields;
|
|
s->Traverse(this);
|
|
}
|
|
|
|
ProfileFunc::ProfileFunc(const Expr* e, bool _abs_rec_fields) {
|
|
profiled_expr = e;
|
|
|
|
abs_rec_fields = _abs_rec_fields;
|
|
|
|
if ( e->Tag() == EXPR_LAMBDA ) {
|
|
auto func = e->AsLambdaExpr();
|
|
|
|
int offset = 0;
|
|
|
|
for ( auto oid : func->OuterIDs() ) {
|
|
captures.insert(oid);
|
|
captures_offsets[oid] = offset++;
|
|
}
|
|
|
|
Profile(func->GetType()->AsFuncType(), func->Ingredients()->Body());
|
|
}
|
|
|
|
else
|
|
// We don't have a function type, so do the traversal
|
|
// directly.
|
|
e->Traverse(this);
|
|
}
|
|
|
|
void ProfileFunc::Profile(const FuncType* ft, const StmtPtr& body) {
|
|
num_params = ft->Params()->NumFields();
|
|
TrackType(ft);
|
|
body->Traverse(this);
|
|
}
|
|
|
|
TraversalCode ProfileFunc::PreStmt(const Stmt* s) {
|
|
stmts.push_back(s);
|
|
|
|
switch ( s->Tag() ) {
|
|
case STMT_INIT:
|
|
for ( const auto& id : s->AsInitStmt()->Inits() ) {
|
|
inits.insert(id.get());
|
|
|
|
auto& t = id->GetType();
|
|
TrackType(t);
|
|
|
|
auto attrs = id->GetAttrs();
|
|
if ( attrs )
|
|
constructor_attrs[attrs.get()] = t;
|
|
|
|
if ( t->Tag() == TYPE_RECORD )
|
|
CheckRecordConstructor(t);
|
|
}
|
|
|
|
// Don't traverse further into the statement, since we
|
|
// don't want to view the identifiers as locals unless
|
|
// they're also used elsewhere.
|
|
return TC_ABORTSTMT;
|
|
|
|
case STMT_WHEN: {
|
|
++num_when_stmts;
|
|
|
|
auto w = s->AsWhenStmt();
|
|
auto wi = w->Info();
|
|
|
|
for ( auto wl : wi->WhenNewLocals() )
|
|
when_locals.insert(wl);
|
|
} break;
|
|
|
|
case STMT_FOR: {
|
|
auto sf = s->AsForStmt();
|
|
auto loop_vars = sf->LoopVars();
|
|
auto value_var = sf->ValueVar();
|
|
|
|
for ( auto id : *loop_vars )
|
|
locals.insert(id);
|
|
|
|
if ( value_var )
|
|
locals.insert(value_var.get());
|
|
} break;
|
|
|
|
case STMT_SWITCH: {
|
|
// If this is a type-case switch statement, then find the
|
|
// identifiers created so we can add them to our list of
|
|
// locals. Ideally this wouldn't be necessary since *surely*
|
|
// if one bothers to define such an identifier then it'll be
|
|
// subsequently used, and we'll pick up the local that way ...
|
|
// but if for some reason it's not, then we would have an
|
|
// incomplete list of locals that need to be tracked.
|
|
|
|
auto sw = s->AsSwitchStmt();
|
|
bool is_type_switch = false;
|
|
|
|
for ( auto& c : *sw->Cases() ) {
|
|
auto idl = c->TypeCases();
|
|
if ( idl ) {
|
|
for ( auto id : *idl )
|
|
// Make sure it's not a placeholder
|
|
// identifier, used when there's
|
|
// no explicit one.
|
|
if ( id->Name() )
|
|
locals.insert(id);
|
|
|
|
is_type_switch = true;
|
|
}
|
|
}
|
|
|
|
if ( is_type_switch )
|
|
type_switches.insert(sw);
|
|
else
|
|
expr_switches.insert(sw);
|
|
} break;
|
|
|
|
default: break;
|
|
}
|
|
|
|
return TC_CONTINUE;
|
|
}
|
|
|
|
TraversalCode ProfileFunc::PreExpr(const Expr* e) {
|
|
exprs.push_back(e);
|
|
|
|
TrackType(e->GetType());
|
|
|
|
switch ( e->Tag() ) {
|
|
case EXPR_CONST: constants.push_back(e->AsConstExpr()); break;
|
|
|
|
case EXPR_NAME: {
|
|
auto n = e->AsNameExpr();
|
|
auto id = n->Id();
|
|
|
|
// Turns out that NameExpr's can be constructed using a
|
|
// different Type* than that of the identifier itself,
|
|
// so be sure we track the latter too.
|
|
TrackType(id->GetType());
|
|
|
|
if ( id->IsGlobal() ) {
|
|
globals.insert(id);
|
|
all_globals.insert(id);
|
|
|
|
const auto& t = id->GetType();
|
|
if ( t->Tag() == TYPE_FUNC )
|
|
if ( t->AsFuncType()->Flavor() == FUNC_FLAVOR_EVENT )
|
|
events.insert(id->Name());
|
|
|
|
break;
|
|
}
|
|
|
|
// This is a tad ugly. Unfortunately due to the weird way
|
|
// that Zeek function *declarations* work, there's no reliable
|
|
// way to get the list of parameters for a function *definition*,
|
|
// since they can have different names than what's present in the
|
|
// declaration. So we identify them directly, by knowing that
|
|
// they come at the beginning of the frame ... and being careful
|
|
// to avoid misconfusing a lambda capture with a low frame offset
|
|
// as a parameter.
|
|
if ( captures.count(id) == 0 && id->Offset() < num_params )
|
|
params.insert(id);
|
|
|
|
locals.insert(id);
|
|
|
|
break;
|
|
}
|
|
|
|
case EXPR_FIELD:
|
|
if ( abs_rec_fields ) {
|
|
auto f = e->AsFieldExpr()->Field();
|
|
addl_hashes.push_back(p_hash(f));
|
|
}
|
|
else {
|
|
auto fn = e->AsFieldExpr()->FieldName();
|
|
addl_hashes.push_back(p_hash(fn));
|
|
}
|
|
break;
|
|
|
|
case EXPR_HAS_FIELD:
|
|
if ( abs_rec_fields ) {
|
|
auto f = e->AsHasFieldExpr()->Field();
|
|
addl_hashes.push_back(std::hash<int>{}(f));
|
|
}
|
|
else {
|
|
auto fn = e->AsHasFieldExpr()->FieldName();
|
|
addl_hashes.push_back(std::hash<std::string>{}(fn));
|
|
}
|
|
break;
|
|
|
|
case EXPR_INDEX: {
|
|
auto lhs_t = e->GetOp1()->GetType();
|
|
if ( lhs_t->Tag() == TYPE_TABLE )
|
|
tbl_refs.insert(lhs_t.get());
|
|
} break;
|
|
|
|
case EXPR_INCR:
|
|
case EXPR_DECR:
|
|
case EXPR_ADD_TO:
|
|
case EXPR_REMOVE_FROM:
|
|
case EXPR_ASSIGN: {
|
|
auto lhs = e->GetOp1();
|
|
bool is_assign = e->Tag() == EXPR_ASSIGN;
|
|
|
|
if ( is_assign ) {
|
|
// Check for this being an assignment to a function (as
|
|
// opposed to a call). If so, then the function can be
|
|
// used indirectly.
|
|
auto rhs = e->GetOp2();
|
|
if ( rhs->Tag() == EXPR_NAME ) {
|
|
auto& rhs_id = rhs->AsNameExpr()->IdPtr();
|
|
const auto& t = rhs_id->GetType();
|
|
if ( t->Tag() == TYPE_FUNC && t->AsFuncType()->Flavor() == FUNC_FLAVOR_FUNCTION )
|
|
indirect_funcs.insert(rhs_id.get());
|
|
}
|
|
}
|
|
|
|
if ( lhs->Tag() == EXPR_REF )
|
|
lhs = lhs->GetOp1();
|
|
|
|
else if ( is_assign )
|
|
// This isn't a direct assignment, but instead an overloaded
|
|
// use of "=" such as in a table constructor.
|
|
break;
|
|
|
|
auto lhs_t = lhs->GetType();
|
|
|
|
switch ( lhs->Tag() ) {
|
|
case EXPR_NAME: {
|
|
auto id = lhs->AsNameExpr()->Id();
|
|
TrackAssignment(id);
|
|
|
|
if ( is_assign ) {
|
|
auto a_e = static_cast<const AssignExpr*>(e);
|
|
auto& av = a_e->AssignVal();
|
|
if ( av )
|
|
// This is a funky "local" assignment
|
|
// inside a when clause.
|
|
when_locals.insert(id);
|
|
}
|
|
else if ( IsAggr(lhs_t->Tag()) )
|
|
aggr_mods.insert(lhs_t.get());
|
|
} break;
|
|
|
|
case EXPR_INDEX: {
|
|
auto lhs_aggr = lhs->GetOp1();
|
|
auto lhs_aggr_t = lhs_aggr->GetType();
|
|
|
|
// Determine which aggregate is being modified. For an
|
|
// assignment "a[b] = aggr", it's not a[b]'s type but
|
|
// rather a's type. However, for any of the others,
|
|
// e.g. "a[b] -= aggr" it is a[b]'s type.
|
|
if ( is_assign )
|
|
aggr_mods.insert(lhs_aggr_t.get());
|
|
else
|
|
aggr_mods.insert(lhs_t.get());
|
|
|
|
if ( lhs_aggr_t->Tag() == TYPE_TABLE ) {
|
|
// We don't want the default recursion into the
|
|
// expression's LHS because that will treat this
|
|
// table modification as a reference instead. So
|
|
// do it manually. Given that, we need to do the
|
|
// expression's RHS manually too.
|
|
lhs->GetOp1()->Traverse(this);
|
|
lhs->GetOp2()->Traverse(this);
|
|
|
|
auto rhs = e->GetOp2();
|
|
if ( rhs )
|
|
rhs->Traverse(this);
|
|
|
|
return TC_ABORTSTMT;
|
|
}
|
|
} break;
|
|
|
|
case EXPR_FIELD: aggr_mods.insert(lhs_t.get()); break;
|
|
|
|
case EXPR_LIST: {
|
|
for ( auto id : lhs->AsListExpr()->Exprs() ) {
|
|
auto id_t = id->GetType();
|
|
if ( IsAggr(id_t->Tag()) )
|
|
aggr_mods.insert(id_t.get());
|
|
}
|
|
} break;
|
|
|
|
default: reporter->InternalError("bad expression in ProfileFunc: %s", obj_desc(e).c_str());
|
|
}
|
|
} break;
|
|
|
|
case EXPR_AGGR_ADD:
|
|
case EXPR_AGGR_DEL: {
|
|
auto lhs = e->GetOp1();
|
|
if ( lhs )
|
|
aggr_mods.insert(lhs->GetType().get());
|
|
else
|
|
aggr_mods.insert(e->GetType().get());
|
|
} break;
|
|
|
|
case EXPR_CALL: {
|
|
auto c = e->AsCallExpr();
|
|
auto args = c->Args();
|
|
auto f = c->Func();
|
|
|
|
const NameExpr* n = nullptr;
|
|
const ID* func = nullptr;
|
|
|
|
if ( f->Tag() == EXPR_NAME ) {
|
|
n = f->AsNameExpr();
|
|
func = n->Id();
|
|
|
|
if ( ! func->IsGlobal() )
|
|
does_indirect_calls = true;
|
|
}
|
|
else
|
|
does_indirect_calls = true;
|
|
|
|
// Check for whether any of the arguments is a bare function.
|
|
// If so, then note that that function may be used indirectly,
|
|
// unless the function being called is known to be idempotent.
|
|
if ( does_indirect_calls || ! is_idempotent(func->Name()) ) {
|
|
for ( auto& arg : args->Exprs() )
|
|
if ( arg->Tag() == EXPR_NAME ) {
|
|
auto& arg_id = arg->AsNameExpr()->IdPtr();
|
|
const auto& t = arg_id->GetType();
|
|
if ( t->Tag() == TYPE_FUNC && t->AsFuncType()->Flavor() == FUNC_FLAVOR_FUNCTION )
|
|
indirect_funcs.insert(arg_id.get());
|
|
}
|
|
}
|
|
|
|
if ( does_indirect_calls )
|
|
// We waited on doing this until after checking for
|
|
// indirect functions.
|
|
return TC_CONTINUE;
|
|
|
|
all_globals.insert(func);
|
|
|
|
auto func_v = func->GetVal();
|
|
if ( func_v ) {
|
|
auto func_vf = func_v->AsFunc();
|
|
|
|
if ( func_vf->GetKind() == Func::SCRIPT_FUNC ) {
|
|
auto sf = static_cast<ScriptFunc*>(func_vf);
|
|
script_calls.insert(sf);
|
|
}
|
|
else
|
|
BiF_globals.insert(func);
|
|
}
|
|
else {
|
|
// We could complain, but for now we don't, because
|
|
// if we're invoked prior to full Zeek initialization,
|
|
// the value might indeed not there yet.
|
|
// printf("no function value for global %s\n", func->Name());
|
|
}
|
|
|
|
// Recurse into the arguments.
|
|
args->Traverse(this);
|
|
|
|
// Do the following explicitly, since we won't be recursing
|
|
// into the LHS global.
|
|
|
|
// Note that the type of the expression and the type of the
|
|
// function can actually be *different* due to the NameExpr
|
|
// being constructed based on a forward reference and then
|
|
// the global getting a different (constructed) type when
|
|
// the function is actually declared. Geez. So hedge our
|
|
// bets.
|
|
TrackType(n->GetType());
|
|
TrackType(func->GetType());
|
|
|
|
TrackID(func);
|
|
|
|
return TC_ABORTSTMT;
|
|
}
|
|
|
|
case EXPR_EVENT: {
|
|
auto ev = e->AsEventExpr()->Name();
|
|
events.insert(ev);
|
|
addl_hashes.push_back(p_hash(ev));
|
|
} break;
|
|
|
|
case EXPR_LAMBDA: {
|
|
auto l = e->AsLambdaExpr();
|
|
lambdas.push_back(l);
|
|
|
|
for ( const auto& i : l->OuterIDs() ) {
|
|
locals.insert(i);
|
|
TrackID(i);
|
|
|
|
// See above re EXPR_NAME regarding the following
|
|
// logic.
|
|
if ( captures.count(i) == 0 && i->Offset() < num_params )
|
|
params.insert(i);
|
|
}
|
|
|
|
// In general, we don't want to recurse into the body.
|
|
// However, we still want to *profile* it so we can
|
|
// identify calls within it.
|
|
auto pf = std::make_shared<ProfileFunc>(l->Ingredients()->Body().get(), false);
|
|
script_calls.insert(pf->ScriptCalls().begin(), pf->ScriptCalls().end());
|
|
|
|
return TC_ABORTSTMT;
|
|
}
|
|
|
|
case EXPR_RECORD_CONSTRUCTOR:
|
|
case EXPR_REC_CONSTRUCT_WITH_REC: CheckRecordConstructor(e->GetType()); break;
|
|
|
|
case EXPR_SET_CONSTRUCTOR: {
|
|
auto sc = static_cast<const SetConstructorExpr*>(e);
|
|
const auto& attrs = sc->GetAttrs();
|
|
|
|
if ( attrs )
|
|
constructor_attrs[attrs.get()] = sc->GetType();
|
|
} break;
|
|
|
|
case EXPR_TABLE_CONSTRUCTOR: {
|
|
auto tc = static_cast<const TableConstructorExpr*>(e);
|
|
const auto& attrs = tc->GetAttrs();
|
|
|
|
if ( attrs )
|
|
constructor_attrs[attrs.get()] = tc->GetType();
|
|
} break;
|
|
|
|
case EXPR_RECORD_COERCE:
|
|
// This effectively does a record construction of the target
|
|
// type, so check that.
|
|
CheckRecordConstructor(e->GetType());
|
|
break;
|
|
|
|
case EXPR_TABLE_COERCE: {
|
|
// This is written without casting so it can work with other
|
|
// types if needed.
|
|
auto res_type = e->GetType().get();
|
|
auto orig_type = e->GetOp1()->GetType().get();
|
|
if ( type_aliases.count(res_type) == 0 )
|
|
type_aliases[orig_type] = {res_type};
|
|
else
|
|
type_aliases[orig_type].insert(res_type);
|
|
} break;
|
|
|
|
default: break;
|
|
}
|
|
|
|
return TC_CONTINUE;
|
|
}
|
|
|
|
TraversalCode ProfileFunc::PreID(const ID* id) {
|
|
TrackID(id);
|
|
|
|
// There's no need for any further analysis of this ID.
|
|
return TC_ABORTSTMT;
|
|
}
|
|
|
|
void ProfileFunc::TrackType(const Type* t) {
|
|
if ( ! t )
|
|
return;
|
|
|
|
auto [it, inserted] = types.insert(t);
|
|
|
|
if ( ! inserted )
|
|
// We've already tracked it.
|
|
return;
|
|
|
|
ordered_types.push_back(t);
|
|
}
|
|
|
|
void ProfileFunc::TrackID(const ID* id) {
|
|
if ( ! id )
|
|
return;
|
|
|
|
auto [it, inserted] = ids.insert(id);
|
|
|
|
if ( ! inserted )
|
|
// Already tracked.
|
|
return;
|
|
|
|
ordered_ids.push_back(id);
|
|
}
|
|
|
|
void ProfileFunc::TrackAssignment(const ID* id) {
|
|
if ( assignees.count(id) > 0 )
|
|
++assignees[id];
|
|
else
|
|
assignees[id] = 1;
|
|
|
|
if ( id->IsGlobal() || captures.count(id) > 0 )
|
|
non_local_assignees.insert(id);
|
|
}
|
|
|
|
void ProfileFunc::CheckRecordConstructor(TypePtr t) {
|
|
auto rt = cast_intrusive<RecordType>(t);
|
|
for ( auto td : *rt->Types() )
|
|
if ( td->attrs ) {
|
|
// In principle we could figure out whether this particular
|
|
// constructor happens to explicitly specify &default fields, and
|
|
// not include those attributes if it does since they won't come
|
|
// into play. However that seems like added complexity for almost
|
|
// surely no ultimate gain.
|
|
auto attrs = td->attrs.get();
|
|
constructor_attrs[attrs] = rt;
|
|
|
|
if ( rec_constructor_attrs.count(rt.get()) == 0 )
|
|
rec_constructor_attrs[rt.get()] = {attrs};
|
|
else
|
|
rec_constructor_attrs[rt.get()].insert(attrs);
|
|
}
|
|
}
|
|
|
|
ProfileFuncs::ProfileFuncs(std::vector<FuncInfo>& funcs, is_compilable_pred pred, bool _compute_func_hashes,
|
|
bool _full_record_hashes) {
|
|
compute_func_hashes = _compute_func_hashes;
|
|
full_record_hashes = _full_record_hashes;
|
|
|
|
for ( auto& f : funcs ) {
|
|
auto pf = std::make_shared<ProfileFunc>(f.Func(), f.Body(), full_record_hashes);
|
|
|
|
if ( ! pred || (*pred)(pf.get(), nullptr) )
|
|
MergeInProfile(pf.get());
|
|
|
|
// Track the profile even if we're not compiling the function, since
|
|
// the AST optimizer will still need it to reason about function-call
|
|
// side effects.
|
|
|
|
// Propagate previous hash if requested.
|
|
if ( ! compute_func_hashes && f.Profile() )
|
|
pf->SetHashVal(f.Profile()->HashVal());
|
|
|
|
f.SetProfile(std::move(pf));
|
|
func_profs[f.Func()] = f.ProfilePtr();
|
|
}
|
|
|
|
// We now have the main (starting) types used by all of the
|
|
// functions. Recursively compute their hashes.
|
|
ComputeTypeHashes(main_types);
|
|
|
|
do {
|
|
// Computing the hashes can have marked expressions (seen in
|
|
// record attributes) for further analysis. Likewise, when
|
|
// doing the profile merges above we may have noted lambda
|
|
// expressions. Analyze these, and iteratively any further
|
|
// expressions that the analysis uncovers.
|
|
DrainPendingExprs();
|
|
|
|
// We now have all the information we need to form definitive,
|
|
// deterministic hashes.
|
|
ComputeBodyHashes(funcs);
|
|
|
|
// Computing those hashes could have led to traversals that
|
|
// create more pending expressions to analyze.
|
|
} while ( ! pending_exprs.empty() );
|
|
|
|
// Now that we have everything profiled, we can proceed to analyses
|
|
// that require full global information.
|
|
ComputeSideEffects();
|
|
}
|
|
|
|
bool ProfileFuncs::IsTableWithDefaultAggr(const Type* t) {
|
|
auto analy = tbl_has_aggr_default.find(t);
|
|
if ( analy != tbl_has_aggr_default.end() )
|
|
// We already have the answer.
|
|
return analy->second;
|
|
|
|
// See whether an alias for the type has already been resolved.
|
|
if ( t->AsTableType()->Yield() ) {
|
|
for ( auto& at : tbl_has_aggr_default )
|
|
if ( same_type(at.first, t) ) {
|
|
tbl_has_aggr_default[t] = at.second;
|
|
return at.second;
|
|
}
|
|
}
|
|
|
|
tbl_has_aggr_default[t] = false;
|
|
return false;
|
|
}
|
|
|
|
bool ProfileFuncs::HasSideEffects(SideEffectsOp::AccessType access, const TypePtr& t) const {
|
|
IDSet nli;
|
|
TypeSet aggrs;
|
|
|
|
if ( GetSideEffects(access, t.get(), nli, aggrs) )
|
|
return true;
|
|
|
|
return ! nli.empty() || ! aggrs.empty();
|
|
}
|
|
|
|
bool ProfileFuncs::GetSideEffects(SideEffectsOp::AccessType access, const Type* t, IDSet& non_local_ids,
|
|
TypeSet& aggrs) const {
|
|
for ( auto se : side_effects_ops )
|
|
if ( AssessSideEffects(se.get(), access, t, non_local_ids, aggrs) )
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
bool ProfileFuncs::GetCallSideEffects(const NameExpr* n, IDSet& non_local_ids, TypeSet& aggrs, bool& is_unknown) {
|
|
auto fid = n->Id();
|
|
auto fv = fid->GetVal();
|
|
|
|
if ( ! fv || ! fid->IsConst() ) {
|
|
// The value is unavailable (likely a bug), or might change at run-time.
|
|
is_unknown = true;
|
|
return true;
|
|
}
|
|
|
|
auto func = fv->AsFunc();
|
|
if ( func->GetKind() == Func::BUILTIN_FUNC ) {
|
|
if ( has_script_side_effects(func->Name()) )
|
|
is_unknown = true;
|
|
return true;
|
|
}
|
|
|
|
auto sf = static_cast<ScriptFunc*>(func);
|
|
auto seo = GetCallSideEffects(sf);
|
|
if ( ! seo )
|
|
return false;
|
|
|
|
if ( seo->HasUnknownChanges() )
|
|
is_unknown = true;
|
|
|
|
for ( auto a : seo->ModAggrs() )
|
|
aggrs.insert(a);
|
|
for ( auto nl : seo->ModNonLocals() )
|
|
non_local_ids.insert(nl);
|
|
|
|
return true;
|
|
}
|
|
|
|
void ProfileFuncs::MergeInProfile(ProfileFunc* pf) {
|
|
all_globals.insert(pf->AllGlobals().begin(), pf->AllGlobals().end());
|
|
|
|
for ( auto& g : pf->Globals() ) {
|
|
auto [it, inserted] = globals.emplace(g);
|
|
|
|
if ( ! inserted )
|
|
continue;
|
|
|
|
TraverseValue(g->GetVal());
|
|
|
|
const auto& t = g->GetType();
|
|
if ( t->Tag() == TYPE_TYPE )
|
|
(void)HashType(t->AsTypeType()->GetType());
|
|
|
|
auto& init_exprs = g->GetOptInfo()->GetInitExprs();
|
|
for ( const auto& i_e : init_exprs )
|
|
if ( i_e ) {
|
|
pending_exprs.push_back(i_e.get());
|
|
|
|
if ( i_e->Tag() == EXPR_LAMBDA )
|
|
lambdas.insert(i_e->AsLambdaExpr());
|
|
}
|
|
|
|
auto& attrs = g->GetAttrs();
|
|
if ( attrs )
|
|
AnalyzeAttrs(attrs.get(), t.get());
|
|
}
|
|
|
|
constants.insert(pf->Constants().begin(), pf->Constants().end());
|
|
main_types.insert(main_types.end(), pf->OrderedTypes().begin(), pf->OrderedTypes().end());
|
|
script_calls.insert(pf->ScriptCalls().begin(), pf->ScriptCalls().end());
|
|
BiF_globals.insert(pf->BiFGlobals().begin(), pf->BiFGlobals().end());
|
|
events.insert(pf->Events().begin(), pf->Events().end());
|
|
|
|
for ( auto& i : pf->Lambdas() ) {
|
|
lambdas.insert(i);
|
|
pending_exprs.push_back(i);
|
|
}
|
|
|
|
for ( auto& a : pf->ConstructorAttrs() )
|
|
AnalyzeAttrs(a.first, a.second.get());
|
|
|
|
for ( auto& ta : pf->TypeAliases() ) {
|
|
if ( type_aliases.count(ta.first) == 0 )
|
|
type_aliases[ta.first] = std::set<const Type*>{};
|
|
type_aliases[ta.first].insert(ta.second.begin(), ta.second.end());
|
|
}
|
|
}
|
|
|
|
void ProfileFuncs::TraverseValue(const ValPtr& v) {
|
|
if ( ! v )
|
|
return;
|
|
|
|
const auto& t = v->GetType();
|
|
(void)HashType(t);
|
|
|
|
switch ( t->Tag() ) {
|
|
case TYPE_ADDR:
|
|
case TYPE_ANY:
|
|
case TYPE_BOOL:
|
|
case TYPE_COUNT:
|
|
case TYPE_DOUBLE:
|
|
case TYPE_ENUM:
|
|
case TYPE_ERROR:
|
|
case TYPE_FILE:
|
|
case TYPE_FUNC:
|
|
case TYPE_INT:
|
|
case TYPE_INTERVAL:
|
|
case TYPE_OPAQUE:
|
|
case TYPE_PATTERN:
|
|
case TYPE_PORT:
|
|
case TYPE_STRING:
|
|
case TYPE_SUBNET:
|
|
case TYPE_TIME:
|
|
case TYPE_VOID: break;
|
|
|
|
case TYPE_RECORD: {
|
|
auto r = cast_intrusive<RecordVal>(v);
|
|
auto n = r->NumFields();
|
|
|
|
for ( auto i = 0u; i < n; ++i )
|
|
TraverseValue(r->GetField(i));
|
|
} break;
|
|
|
|
case TYPE_TABLE: {
|
|
auto tv = cast_intrusive<TableVal>(v);
|
|
auto tv_map = tv->ToMap();
|
|
|
|
for ( auto& tv_i : tv_map ) {
|
|
TraverseValue(tv_i.first);
|
|
TraverseValue(tv_i.second);
|
|
}
|
|
} break;
|
|
|
|
case TYPE_LIST: {
|
|
auto lv = cast_intrusive<ListVal>(v);
|
|
auto n = lv->Length();
|
|
|
|
for ( auto i = 0; i < n; ++i )
|
|
TraverseValue(lv->Idx(i));
|
|
} break;
|
|
|
|
case TYPE_VECTOR: {
|
|
auto vv = cast_intrusive<VectorVal>(v);
|
|
auto n = vv->Size();
|
|
|
|
for ( auto i = 0u; i < n; ++i )
|
|
TraverseValue(vv->ValAt(i));
|
|
} break;
|
|
|
|
case TYPE_TYPE: (void)HashType(t->AsTypeType()->GetType()); break;
|
|
}
|
|
}
|
|
|
|
void ProfileFuncs::DrainPendingExprs() {
|
|
while ( pending_exprs.size() > 0 ) {
|
|
// Copy the pending expressions so we can loop over them
|
|
// while accruing additions.
|
|
auto pe = pending_exprs;
|
|
pending_exprs.clear();
|
|
|
|
for ( auto e : pe ) {
|
|
auto pf = std::make_shared<ProfileFunc>(e, full_record_hashes);
|
|
|
|
expr_profs[e] = pf;
|
|
MergeInProfile(pf.get());
|
|
|
|
// It's important to compute the hashes over the
|
|
// ordered types rather than the unordered. If type
|
|
// T1 depends on a recursive type T2, then T1's hash
|
|
// will vary with depending on whether we arrive at
|
|
// T1 via an in-progress traversal of T2 (in which
|
|
// case T1 will see the "stub" in-progress hash for
|
|
// T2), or via a separate type T3 (in which case it
|
|
// will see the full hash).
|
|
ComputeTypeHashes(pf->OrderedTypes());
|
|
}
|
|
}
|
|
}
|
|
|
|
void ProfileFuncs::ComputeTypeHashes(const std::vector<const Type*>& types) {
|
|
for ( auto t : types )
|
|
(void)HashType(t);
|
|
}
|
|
|
|
void ProfileFuncs::ComputeBodyHashes(std::vector<FuncInfo>& funcs) {
|
|
if ( compute_func_hashes )
|
|
for ( auto& f : funcs )
|
|
if ( ! f.ShouldSkip() )
|
|
ComputeProfileHash(f.ProfilePtr());
|
|
|
|
for ( auto& l : lambdas ) {
|
|
auto pf = ExprProf(l);
|
|
func_profs[l->PrimaryFunc().get()] = pf;
|
|
lambda_primaries[l->Name()] = l->PrimaryFunc().get();
|
|
|
|
if ( compute_func_hashes )
|
|
ComputeProfileHash(pf);
|
|
}
|
|
}
|
|
|
|
void ProfileFuncs::ComputeProfileHash(std::shared_ptr<ProfileFunc> pf) {
|
|
p_hash_type h = 0;
|
|
|
|
// We add markers between each class of hash component, to
|
|
// prevent collisions due to elements with simple hashes
|
|
// (such as Stmt's or Expr's that are only represented by
|
|
// the hash of their tag).
|
|
h = merge_p_hashes(h, p_hash("stmts"));
|
|
for ( auto i : pf->Stmts() )
|
|
h = merge_p_hashes(h, p_hash(i->Tag()));
|
|
|
|
h = merge_p_hashes(h, p_hash("exprs"));
|
|
for ( auto i : pf->Exprs() )
|
|
h = merge_p_hashes(h, p_hash(i->Tag()));
|
|
|
|
h = merge_p_hashes(h, p_hash("ids"));
|
|
for ( auto i : pf->OrderedIdentifiers() )
|
|
h = merge_p_hashes(h, p_hash(i->Name()));
|
|
|
|
h = merge_p_hashes(h, p_hash("constants"));
|
|
for ( auto i : pf->Constants() )
|
|
h = merge_p_hashes(h, p_hash(i->Value()));
|
|
|
|
h = merge_p_hashes(h, p_hash("types"));
|
|
for ( auto i : pf->OrderedTypes() )
|
|
h = merge_p_hashes(h, HashType(i));
|
|
|
|
h = merge_p_hashes(h, p_hash("lambdas"));
|
|
for ( auto i : pf->Lambdas() )
|
|
h = merge_p_hashes(h, p_hash(i));
|
|
|
|
h = merge_p_hashes(h, p_hash("addl"));
|
|
for ( auto i : pf->AdditionalHashes() )
|
|
h = merge_p_hashes(h, i);
|
|
|
|
pf->SetHashVal(h);
|
|
}
|
|
|
|
p_hash_type ProfileFuncs::HashType(const Type* t) {
|
|
if ( ! t )
|
|
return 0;
|
|
|
|
auto it = type_hashes.find(t);
|
|
|
|
if ( it != type_hashes.end() )
|
|
// We've already done this Type*.
|
|
return it->second;
|
|
|
|
auto& tn = t->GetName();
|
|
if ( ! tn.empty() ) {
|
|
auto seen_it = seen_type_names.find(tn);
|
|
|
|
if ( seen_it != seen_type_names.end() ) {
|
|
// We've already done a type with the same name, even
|
|
// though with a different Type*. Reuse its results.
|
|
auto seen_t = seen_it->second;
|
|
auto h = type_hashes[seen_t];
|
|
|
|
type_hashes[t] = h;
|
|
type_to_rep[t] = type_to_rep[seen_t];
|
|
|
|
return h;
|
|
}
|
|
}
|
|
|
|
auto h = p_hash(t->Tag());
|
|
if ( ! tn.empty() )
|
|
h = merge_p_hashes(h, p_hash(tn));
|
|
|
|
// Enter an initial value for this type's hash. We'll update it
|
|
// at the end, but having it here first will prevent recursive
|
|
// records from leading to infinite recursion as we traverse them.
|
|
// It's okay that the initial value is degenerate, because if we access
|
|
// it during the traversal that will only happen due to a recursive
|
|
// type, in which case the other elements of that type will serve
|
|
// to differentiate its hash.
|
|
type_hashes[t] = h;
|
|
|
|
switch ( t->Tag() ) {
|
|
case TYPE_ADDR:
|
|
case TYPE_ANY:
|
|
case TYPE_BOOL:
|
|
case TYPE_COUNT:
|
|
case TYPE_DOUBLE:
|
|
case TYPE_ENUM:
|
|
case TYPE_ERROR:
|
|
case TYPE_INT:
|
|
case TYPE_INTERVAL:
|
|
case TYPE_OPAQUE:
|
|
case TYPE_PATTERN:
|
|
case TYPE_PORT:
|
|
case TYPE_STRING:
|
|
case TYPE_SUBNET:
|
|
case TYPE_TIME:
|
|
case TYPE_VOID: h = merge_p_hashes(h, p_hash(t)); break;
|
|
|
|
case TYPE_RECORD: {
|
|
const auto& ft = t->AsRecordType();
|
|
auto n = ft->NumFields();
|
|
auto orig_n = ft->NumOrigFields();
|
|
|
|
h = merge_p_hashes(h, p_hash("record"));
|
|
|
|
if ( full_record_hashes )
|
|
h = merge_p_hashes(h, p_hash(n));
|
|
else
|
|
h = merge_p_hashes(h, p_hash(orig_n));
|
|
|
|
for ( auto i = 0; i < n; ++i ) {
|
|
bool do_hash = full_record_hashes;
|
|
if ( ! do_hash )
|
|
do_hash = (i < orig_n);
|
|
|
|
const auto& f = ft->FieldDecl(i);
|
|
auto type_h = HashType(f->type);
|
|
|
|
if ( do_hash ) {
|
|
h = merge_p_hashes(h, p_hash(f->id));
|
|
h = merge_p_hashes(h, type_h);
|
|
}
|
|
|
|
h = merge_p_hashes(h, p_hash(f->id));
|
|
h = merge_p_hashes(h, HashType(f->type));
|
|
|
|
// We don't hash the field name, as in some contexts
|
|
// those are ignored.
|
|
|
|
if ( f->attrs ) {
|
|
if ( do_hash )
|
|
h = merge_p_hashes(h, HashAttrs(f->attrs));
|
|
AnalyzeAttrs(f->attrs.get(), ft);
|
|
}
|
|
}
|
|
} break;
|
|
|
|
case TYPE_TABLE: {
|
|
auto tbl = t->AsTableType();
|
|
h = merge_p_hashes(h, p_hash("table"));
|
|
h = merge_p_hashes(h, p_hash("indices"));
|
|
h = merge_p_hashes(h, HashType(tbl->GetIndices()));
|
|
h = merge_p_hashes(h, p_hash("tbl-yield"));
|
|
h = merge_p_hashes(h, HashType(tbl->Yield()));
|
|
} break;
|
|
|
|
case TYPE_FUNC: {
|
|
auto ft = t->AsFuncType();
|
|
auto flv = ft->FlavorString();
|
|
h = merge_p_hashes(h, p_hash(flv));
|
|
h = merge_p_hashes(h, p_hash("params"));
|
|
h = merge_p_hashes(h, HashType(ft->Params()));
|
|
h = merge_p_hashes(h, p_hash("func-yield"));
|
|
h = merge_p_hashes(h, HashType(ft->Yield()));
|
|
} break;
|
|
|
|
case TYPE_LIST: {
|
|
auto& tl = t->AsTypeList()->GetTypes();
|
|
|
|
h = merge_p_hashes(h, p_hash("list"));
|
|
h = merge_p_hashes(h, p_hash(tl.size()));
|
|
|
|
for ( const auto& tl_i : tl )
|
|
h = merge_p_hashes(h, HashType(tl_i));
|
|
} break;
|
|
|
|
case TYPE_VECTOR:
|
|
h = merge_p_hashes(h, p_hash("vec"));
|
|
h = merge_p_hashes(h, HashType(t->AsVectorType()->Yield()));
|
|
break;
|
|
|
|
case TYPE_FILE:
|
|
h = merge_p_hashes(h, p_hash("file"));
|
|
h = merge_p_hashes(h, HashType(t->AsFileType()->Yield()));
|
|
break;
|
|
|
|
case TYPE_TYPE:
|
|
h = merge_p_hashes(h, p_hash("type"));
|
|
h = merge_p_hashes(h, HashType(t->AsTypeType()->GetType()));
|
|
break;
|
|
}
|
|
|
|
type_hashes[t] = h;
|
|
|
|
auto [rep_it, rep_inserted] = type_hash_reps.emplace(h, t);
|
|
|
|
if ( rep_inserted ) { // No previous rep, so use this Type* for that.
|
|
type_to_rep[t] = t;
|
|
rep_types.push_back(t);
|
|
}
|
|
else
|
|
type_to_rep[t] = rep_it->second;
|
|
|
|
if ( ! tn.empty() )
|
|
seen_type_names[tn] = t;
|
|
|
|
return h;
|
|
}
|
|
|
|
p_hash_type ProfileFuncs::HashAttrs(const AttributesPtr& Attrs) {
|
|
// It's tempting to just use p_hash, but that won't work
|
|
// if the attributes wind up with extensible records in their
|
|
// descriptions, if we're not doing full record hashes.
|
|
auto attrs = Attrs->GetAttrs();
|
|
p_hash_type h = 0;
|
|
|
|
for ( const auto& a : attrs ) {
|
|
h = merge_p_hashes(h, p_hash(a->Tag()));
|
|
auto e = a->GetExpr();
|
|
|
|
// We don't try to hash an associated expression, since those
|
|
// can vary in structure due to compilation of elements. We
|
|
// do though enforce consistency for their types.
|
|
if ( e ) {
|
|
h = merge_p_hashes(h, HashType(e->GetType()));
|
|
h = merge_p_hashes(h, p_hash(e.get()));
|
|
}
|
|
}
|
|
|
|
return h;
|
|
}
|
|
|
|
void ProfileFuncs::AnalyzeAttrs(const Attributes* attrs, const Type* t) {
|
|
for ( const auto& a : attrs->GetAttrs() ) {
|
|
auto& e = a->GetExpr();
|
|
|
|
if ( ! e )
|
|
continue;
|
|
|
|
pending_exprs.push_back(e.get());
|
|
|
|
auto prev_ea = expr_attrs.find(a.get());
|
|
if ( prev_ea == expr_attrs.end() )
|
|
expr_attrs[a.get()] = {t};
|
|
else {
|
|
// Add it if new. This is rare, but can arise due to attributes
|
|
// being shared for example from initializers with a variable
|
|
// itself.
|
|
bool found = false;
|
|
for ( auto ea : prev_ea->second )
|
|
if ( ea == t ) {
|
|
found = true;
|
|
break;
|
|
}
|
|
|
|
if ( ! found )
|
|
prev_ea->second.push_back(t);
|
|
}
|
|
|
|
if ( e->Tag() == EXPR_LAMBDA )
|
|
lambdas.insert(e->AsLambdaExpr());
|
|
}
|
|
}
|
|
|
|
void ProfileFuncs::ComputeSideEffects() {
|
|
// Computing side effects is an iterative process, because whether
|
|
// a given expression has a side effect can depend on whether it
|
|
// includes accesses to types that themselves have side effects.
|
|
|
|
// Step one: assemble the candidate pool of attributes to assess.
|
|
for ( auto& ea : expr_attrs ) {
|
|
// Is this an attribute that can be triggered by
|
|
// statement/expression execution?
|
|
auto a = ea.first;
|
|
auto at = a->Tag();
|
|
if ( at == ATTR_DEFAULT || at == ATTR_DEFAULT_INSERT || at == ATTR_ON_CHANGE ) {
|
|
if ( at == ATTR_DEFAULT ) {
|
|
// Look for tables with &default's returning aggregate values.
|
|
for ( auto t : ea.second ) {
|
|
if ( t->Tag() != TYPE_TABLE )
|
|
continue;
|
|
|
|
auto y = t->AsTableType()->Yield();
|
|
|
|
if ( y && IsAggr(y->Tag()) ) {
|
|
tbl_has_aggr_default[t] = true;
|
|
for ( auto ta : type_aliases[t] )
|
|
tbl_has_aggr_default[ta] = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Weed out very-common-and-completely-safe expressions.
|
|
if ( ! DefinitelyHasNoSideEffects(a->GetExpr()) )
|
|
candidates.insert(a);
|
|
}
|
|
}
|
|
|
|
// At this point, very often there are no candidates and we're done.
|
|
// However, if we have candidates then we need to process them in an
|
|
// iterative fashion because it's possible that the side effects of
|
|
// some of them depend on the side effects of other candidates.
|
|
|
|
while ( ! candidates.empty() ) {
|
|
// For which attributes have we resolved their status.
|
|
AttrSet made_decision;
|
|
|
|
for ( auto c : candidates ) {
|
|
IDSet non_local_ids;
|
|
TypeSet aggrs;
|
|
bool is_unknown = false;
|
|
|
|
// Track the candidate we're currently analyzing, since sometimes
|
|
// it's self-referential and we need to identify that fact.
|
|
curr_candidate = c;
|
|
|
|
if ( ! AssessSideEffects(c->GetExpr(), non_local_ids, aggrs, is_unknown) )
|
|
// Can't make a decision yet.
|
|
continue;
|
|
|
|
// We've resolved this candidate.
|
|
made_decision.insert(c);
|
|
SetSideEffects(c, non_local_ids, aggrs, is_unknown);
|
|
}
|
|
|
|
if ( made_decision.empty() ) {
|
|
// We weren't able to make forward progress. This happens when
|
|
// the pending candidates are mutually dependent. While in
|
|
// principle we could scope the worst-case resolution of their
|
|
// side effects, this is such an unlikely situation that we just
|
|
// mark them all as unknown.
|
|
|
|
// We keep these empty.
|
|
IDSet non_local_ids;
|
|
TypeSet aggrs;
|
|
|
|
for ( auto c : candidates )
|
|
SetSideEffects(c, non_local_ids, aggrs, true);
|
|
|
|
// We're now all done.
|
|
break;
|
|
}
|
|
|
|
for ( auto md : made_decision )
|
|
candidates.erase(md);
|
|
}
|
|
}
|
|
|
|
bool ProfileFuncs::DefinitelyHasNoSideEffects(const ExprPtr& e) const {
|
|
if ( e->Tag() == EXPR_CONST || e->Tag() == EXPR_VECTOR_CONSTRUCTOR )
|
|
return true;
|
|
|
|
if ( e->Tag() == EXPR_NAME )
|
|
return e->GetType()->Tag() != TYPE_FUNC;
|
|
|
|
auto ep = expr_profs.find(e.get());
|
|
ASSERT(ep != expr_profs.end());
|
|
|
|
const auto& pf = ep->second;
|
|
|
|
if ( ! pf->NonLocalAssignees().empty() || ! pf->TableRefs().empty() || ! pf->AggrMods().empty() ||
|
|
! pf->ScriptCalls().empty() )
|
|
return false;
|
|
|
|
for ( auto& b : pf->BiFGlobals() )
|
|
if ( has_script_side_effects(b->Name()) )
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
void ProfileFuncs::SetSideEffects(const Attr* a, IDSet& non_local_ids, TypeSet& aggrs, bool is_unknown) {
|
|
auto seo_vec = std::vector<std::shared_ptr<SideEffectsOp>>{};
|
|
bool is_rec = expr_attrs[a][0]->Tag() == TYPE_RECORD;
|
|
|
|
SideEffectsOp::AccessType at;
|
|
if ( is_rec )
|
|
at = SideEffectsOp::CONSTRUCTION;
|
|
else if ( a->Tag() == ATTR_ON_CHANGE )
|
|
at = SideEffectsOp::WRITE;
|
|
else
|
|
at = SideEffectsOp::READ;
|
|
|
|
if ( non_local_ids.empty() && aggrs.empty() && ! is_unknown )
|
|
// Definitely no side effects.
|
|
seo_vec.push_back(std::make_shared<SideEffectsOp>());
|
|
else {
|
|
attrs_with_side_effects.insert(a);
|
|
|
|
// Set side effects for all of the types associated with this attribute.
|
|
for ( auto ea_t : expr_attrs[a] ) {
|
|
auto seo = std::make_shared<SideEffectsOp>(at, ea_t);
|
|
seo->AddModNonGlobal(non_local_ids);
|
|
seo->AddModAggrs(aggrs);
|
|
|
|
if ( is_unknown )
|
|
seo->SetUnknownChanges();
|
|
|
|
side_effects_ops.push_back(seo);
|
|
seo_vec.push_back(std::move(seo));
|
|
}
|
|
}
|
|
|
|
if ( is_rec )
|
|
record_constr_with_side_effects[a] = std::move(seo_vec);
|
|
else
|
|
aggr_side_effects[a] = std::move(seo_vec);
|
|
}
|
|
|
|
AttrVec ProfileFuncs::AssociatedAttrs(const Type* t) {
|
|
AttrVec assoc_attrs;
|
|
|
|
// Search both the pending candidates and the ones already identified.
|
|
// You might think we'd just do the latter, but we want to include the
|
|
// pending ones, too, so we can identify not-yet-resolved dependencies.
|
|
FindAssociatedAttrs(candidates, t, assoc_attrs);
|
|
FindAssociatedAttrs(attrs_with_side_effects, t, assoc_attrs);
|
|
|
|
return assoc_attrs;
|
|
}
|
|
|
|
void ProfileFuncs::FindAssociatedAttrs(const AttrSet& attrs, const Type* t, AttrVec& assoc_attrs) {
|
|
for ( auto a : attrs ) {
|
|
for ( auto ea_t : expr_attrs[a] ) {
|
|
if ( same_type(t, ea_t) ) {
|
|
assoc_attrs.push_back(a);
|
|
break;
|
|
}
|
|
|
|
for ( auto ta : type_aliases[ea_t] )
|
|
if ( same_type(t, ta) ) {
|
|
assoc_attrs.push_back(a);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
bool ProfileFuncs::AssessSideEffects(const ExprPtr& e, IDSet& non_local_ids, TypeSet& aggrs, bool& is_unknown) {
|
|
if ( e->Tag() == EXPR_NAME && e->GetType()->Tag() == TYPE_FUNC )
|
|
// This occurs when the expression is itself a function name, and
|
|
// in an attribute context indicates an implicit call.
|
|
return GetCallSideEffects(e->AsNameExpr(), non_local_ids, aggrs, is_unknown);
|
|
|
|
ASSERT(expr_profs.count(e.get()) != 0);
|
|
auto pf = expr_profs[e.get()];
|
|
return AssessSideEffects(pf.get(), non_local_ids, aggrs, is_unknown);
|
|
}
|
|
|
|
bool ProfileFuncs::AssessSideEffects(const ProfileFunc* pf, IDSet& non_local_ids, TypeSet& aggrs, bool& is_unknown) {
|
|
if ( pf->DoesIndirectCalls() ) {
|
|
is_unknown = true;
|
|
return true;
|
|
}
|
|
|
|
for ( auto& b : pf->BiFGlobals() )
|
|
if ( has_script_side_effects(b->Name()) ) {
|
|
is_unknown = true;
|
|
return true;
|
|
}
|
|
|
|
IDSet nla;
|
|
TypeSet mod_aggrs;
|
|
|
|
for ( auto& a : pf->NonLocalAssignees() )
|
|
nla.insert(a);
|
|
|
|
for ( auto& r : pf->RecordConstructorAttrs() )
|
|
if ( ! AssessAggrEffects(SideEffectsOp::CONSTRUCTION, r.first, nla, mod_aggrs, is_unknown) )
|
|
// Not enough information yet to know all of the side effects.
|
|
return false;
|
|
|
|
for ( auto& tr : pf->TableRefs() )
|
|
if ( ! AssessAggrEffects(SideEffectsOp::READ, tr, nla, mod_aggrs, is_unknown) )
|
|
return false;
|
|
|
|
for ( auto& tm : pf->AggrMods() ) {
|
|
if ( tm->Tag() == TYPE_TABLE && ! AssessAggrEffects(SideEffectsOp::WRITE, tm, nla, mod_aggrs, is_unknown) )
|
|
return false;
|
|
|
|
mod_aggrs.insert(tm);
|
|
}
|
|
|
|
for ( auto& f : pf->ScriptCalls() ) {
|
|
if ( f->Flavor() != FUNC_FLAVOR_FUNCTION ) {
|
|
// A hook (since events can't be called) - not something
|
|
// to analyze further.
|
|
is_unknown = true;
|
|
return true;
|
|
}
|
|
|
|
auto pff = func_profs[f];
|
|
if ( active_func_profiles.count(pff) > 0 )
|
|
// We're already processing this function and arrived here via
|
|
// recursion. Skip further analysis here, we'll do it instead
|
|
// for the original instance.
|
|
continue;
|
|
|
|
// Track this analysis so we can detect recursion.
|
|
active_func_profiles.insert(pff);
|
|
auto a = AssessSideEffects(pff.get(), nla, mod_aggrs, is_unknown);
|
|
active_func_profiles.erase(pff);
|
|
|
|
if ( ! a )
|
|
return false;
|
|
}
|
|
|
|
non_local_ids.insert(nla.begin(), nla.end());
|
|
aggrs.insert(mod_aggrs.begin(), mod_aggrs.end());
|
|
|
|
return true;
|
|
}
|
|
|
|
bool ProfileFuncs::AssessAggrEffects(SideEffectsOp::AccessType access, const Type* t, IDSet& non_local_ids,
|
|
TypeSet& aggrs, bool& is_unknown) {
|
|
auto assoc_attrs = AssociatedAttrs(t);
|
|
|
|
for ( auto a : assoc_attrs ) {
|
|
if ( a == curr_candidate )
|
|
// Self-reference - don't treat the absence of any determination
|
|
// for it as meaning we can't resolve the candidate.
|
|
continue;
|
|
|
|
// See whether we've already determined the side affects associated
|
|
// with this attribute.
|
|
auto ase = aggr_side_effects.find(a);
|
|
if ( ase == aggr_side_effects.end() ) {
|
|
ase = record_constr_with_side_effects.find(a);
|
|
if ( ase == record_constr_with_side_effects.end() )
|
|
// Haven't resolved it yet, so can't resolve current candidate.
|
|
return false;
|
|
}
|
|
|
|
for ( auto& se : ase->second )
|
|
if ( AssessSideEffects(se.get(), access, t, non_local_ids, aggrs) ) {
|
|
is_unknown = true;
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool ProfileFuncs::AssessSideEffects(const SideEffectsOp* se, SideEffectsOp::AccessType access, const Type* t,
|
|
IDSet& non_local_ids, TypeSet& aggrs) const {
|
|
// First determine whether the SideEffectsOp applies.
|
|
if ( se->GetAccessType() != access )
|
|
return false;
|
|
|
|
if ( ! same_type(se->GetType(), t) )
|
|
return false;
|
|
|
|
// It applies, return its effects.
|
|
if ( se->HasUnknownChanges() )
|
|
return true;
|
|
|
|
for ( auto a : se->ModAggrs() )
|
|
aggrs.insert(a);
|
|
for ( auto nl : se->ModNonLocals() )
|
|
non_local_ids.insert(nl);
|
|
|
|
return false;
|
|
}
|
|
|
|
std::shared_ptr<SideEffectsOp> ProfileFuncs::GetCallSideEffects(const ScriptFunc* sf) {
|
|
if ( lambda_primaries.count(sf->Name()) > 0 )
|
|
sf = lambda_primaries[sf->Name()];
|
|
|
|
auto sf_se = func_side_effects.find(sf);
|
|
if ( sf_se != func_side_effects.end() )
|
|
// Return cached result.
|
|
return sf_se->second;
|
|
|
|
bool is_unknown = false;
|
|
IDSet nla;
|
|
TypeSet mod_aggrs;
|
|
|
|
ASSERT(func_profs.count(sf) != 0);
|
|
auto pf = func_profs[sf];
|
|
if ( ! AssessSideEffects(pf.get(), nla, mod_aggrs, is_unknown) )
|
|
// Can't figure it out yet.
|
|
return nullptr;
|
|
|
|
auto seo = std::make_shared<SideEffectsOp>(SideEffectsOp::CALL);
|
|
seo->AddModNonGlobal(nla);
|
|
seo->AddModAggrs(mod_aggrs);
|
|
|
|
if ( is_unknown )
|
|
seo->SetUnknownChanges();
|
|
|
|
func_side_effects[sf] = seo;
|
|
|
|
return seo;
|
|
}
|
|
|
|
// We associate modules with filenames, and take the first one we see.
|
|
static std::unordered_map<std::string, std::string> filename_module;
|
|
|
|
void switch_to_module(const char* module_name) {
|
|
auto loc = GetCurrentLocation();
|
|
if ( loc.first_line != 0 && filename_module.count(loc.filename) == 0 )
|
|
filename_module[loc.filename] = module_name;
|
|
}
|
|
|
|
std::string func_name_at_loc(std::string fname, const Location* loc) {
|
|
auto find_module = filename_module.find(loc->filename);
|
|
if ( find_module == filename_module.end() )
|
|
// No associated module.
|
|
return fname;
|
|
|
|
auto& module = find_module->second;
|
|
if ( module.empty() || module == "GLOBAL" )
|
|
// Trivial associated module.
|
|
return fname;
|
|
|
|
auto mod_prefix = module + "::";
|
|
|
|
if ( fname.find(mod_prefix) == 0 )
|
|
return fname; // it already has the module name
|
|
|
|
return mod_prefix + fname;
|
|
}
|
|
|
|
TraversalCode SetBlockLineNumbers::PreStmt(const Stmt* s) {
|
|
auto loc = const_cast<Location*>(s->GetLocationInfo());
|
|
UpdateLocInfo(loc);
|
|
block_line_range.emplace_back(std::pair<int, int>{loc->first_line, loc->last_line});
|
|
return TC_CONTINUE;
|
|
}
|
|
|
|
TraversalCode SetBlockLineNumbers::PostStmt(const Stmt* s) {
|
|
auto loc = const_cast<Location*>(s->GetLocationInfo());
|
|
auto r = block_line_range.back();
|
|
loc->first_line = r.first;
|
|
loc->last_line = r.second;
|
|
|
|
block_line_range.pop_back();
|
|
|
|
if ( ! block_line_range.empty() ) {
|
|
// We may have widened our range, propagate that to our parent.
|
|
auto& r_p = block_line_range.back();
|
|
r_p.first = std::min(r_p.first, r.first);
|
|
r_p.second = std::max(r_p.second, r.second);
|
|
}
|
|
|
|
return TC_CONTINUE;
|
|
}
|
|
|
|
TraversalCode SetBlockLineNumbers::PreExpr(const Expr* e) {
|
|
ASSERT(! block_line_range.empty());
|
|
UpdateLocInfo(const_cast<Location*>(e->GetLocationInfo()));
|
|
return TC_CONTINUE;
|
|
}
|
|
|
|
void SetBlockLineNumbers::UpdateLocInfo(Location* loc) {
|
|
// Sometimes locations are generated with inverted line coverage.
|
|
if ( loc->first_line > loc->last_line )
|
|
std::swap(loc->first_line, loc->last_line);
|
|
|
|
auto first_line = loc->first_line;
|
|
auto last_line = loc->last_line;
|
|
|
|
if ( ! block_line_range.empty() ) {
|
|
auto& r = block_line_range.back();
|
|
r.first = std::min(r.first, first_line);
|
|
r.second = std::max(r.second, last_line);
|
|
}
|
|
}
|
|
|
|
ASTBlockAnalyzer::ASTBlockAnalyzer(std::vector<FuncInfo>& funcs) {
|
|
for ( auto& f : funcs ) {
|
|
if ( ! f.ShouldAnalyze() )
|
|
continue;
|
|
|
|
auto func = f.Func();
|
|
std::string fn = func->Name();
|
|
auto body = f.Body();
|
|
|
|
// First get the line numbers all sorted out.
|
|
SetBlockLineNumbers sbln;
|
|
body->Traverse(&sbln);
|
|
|
|
auto body_loc = body->GetLocationInfo();
|
|
fn = func_name_at_loc(fn, body_loc);
|
|
|
|
parents.emplace_back(std::pair<std::string, std::string>{fn, fn});
|
|
func_name_prefix = fn + ":";
|
|
body->Traverse(this);
|
|
parents.pop_back();
|
|
}
|
|
|
|
// This should never appear!
|
|
func_name_prefix = "<MISSING>:";
|
|
}
|
|
|
|
static bool is_compound_stmt(const Stmt* s) {
|
|
static std::set<StmtTag> compound_stmts = {STMT_FOR, STMT_IF, STMT_LIST, STMT_SWITCH, STMT_WHEN, STMT_WHILE};
|
|
return compound_stmts.count(s->Tag()) > 0;
|
|
}
|
|
|
|
TraversalCode ASTBlockAnalyzer::PreStmt(const Stmt* s) {
|
|
auto loc = s->GetLocationInfo();
|
|
auto ls = BuildExpandedDescription(loc);
|
|
|
|
if ( is_compound_stmt(s) )
|
|
parents.push_back(std::pair<std::string, std::string>{LocWithFunc(loc), std::move(ls)});
|
|
|
|
return TC_CONTINUE;
|
|
}
|
|
|
|
TraversalCode ASTBlockAnalyzer::PostStmt(const Stmt* s) {
|
|
if ( is_compound_stmt(s) )
|
|
parents.pop_back();
|
|
|
|
return TC_CONTINUE;
|
|
}
|
|
|
|
TraversalCode ASTBlockAnalyzer::PreExpr(const Expr* e) {
|
|
(void)BuildExpandedDescription(e->GetLocationInfo());
|
|
return TC_CONTINUE;
|
|
}
|
|
|
|
std::string ASTBlockAnalyzer::BuildExpandedDescription(const Location* loc) {
|
|
ASSERT(loc && loc->first_line != 0);
|
|
|
|
auto ls = LocWithFunc(loc);
|
|
if ( ! parents.empty() ) {
|
|
auto& parent_pair = parents.back();
|
|
if ( parent_pair.first == ls )
|
|
ls = parent_pair.second;
|
|
else
|
|
ls = parent_pair.second + ";" + ls;
|
|
}
|
|
|
|
auto lk = LocKey(loc);
|
|
if ( exp_desc.count(lk) == 0 )
|
|
exp_desc[lk] = ls;
|
|
|
|
return ls;
|
|
}
|
|
|
|
std::unique_ptr<ASTBlockAnalyzer> AST_blocks;
|
|
|
|
} // namespace zeek::detail
|