initial elements of switching over CSE validity to full assessment of side effects

This commit is contained in:
Vern Paxson 2023-11-28 15:01:52 -08:00
parent 8a18d5f8a2
commit eab33725f8
5 changed files with 186 additions and 28 deletions

View file

@ -7,11 +7,27 @@
namespace zeek::detail { namespace zeek::detail {
class ExprSideEffects {
public:
ExprSideEffects(bool _has_side_effects) : has_side_effects(_has_side_effects) {}
bool HasSideEffects() const { return has_side_effects; }
protected:
bool has_side_effects;
};
class ExprOptInfo { class ExprOptInfo {
public: public:
// The AST number of the statement in which this expression // The AST number of the statement in which this expression
// appears. // appears.
int stmt_num = -1; // -1 = not assigned yet int stmt_num = -1; // -1 = not assigned yet
auto& SideEffects() { return side_effects; }
protected:
// ###
std::optional<ExprSideEffects> side_effects;
}; };
} // namespace zeek::detail } // namespace zeek::detail

View file

@ -988,8 +988,10 @@ void ProfileFuncs::ComputeSideEffects() {
// Weed out very-common-and-completely-safe expressions. // Weed out very-common-and-completely-safe expressions.
if ( ! DefinitelyHasNoSideEffects(a->GetExpr()) ) if ( ! DefinitelyHasNoSideEffects(a->GetExpr()) )
candidates.insert(a); candidates.insert(a);
#if 0
else else
printf("attribute %s definitely has no side effects\n", obj_desc(a).c_str()); printf("attribute %s definitely has no side effects\n", obj_desc(a).c_str());
#endif
} }
} }
@ -1020,7 +1022,7 @@ void ProfileFuncs::ComputeSideEffects() {
std::unordered_set<const Type*> aggrs; std::unordered_set<const Type*> aggrs;
bool is_unknown = true; bool is_unknown = true;
for ( auto c : candidates ) { for ( auto c : candidates ) {
printf("jackpot for %s\n", obj_desc(c).c_str()); // printf("jackpot for %s\n", obj_desc(c).c_str());
SetSideEffects(c, non_local_ids, aggrs, is_unknown); SetSideEffects(c, non_local_ids, aggrs, is_unknown);
} }
break; break;
@ -1045,12 +1047,12 @@ void ProfileFuncs::SetSideEffects(const Attr* a, IDSet& non_local_ids, std::unor
at = SideEffectsOp::READ; at = SideEffectsOp::READ;
if ( non_local_ids.empty() && aggrs.empty() && ! is_unknown ) { if ( non_local_ids.empty() && aggrs.empty() && ! is_unknown ) {
printf("%s has no side effects\n", obj_desc(a).c_str()); // printf("%s has no side effects\n", obj_desc(a).c_str());
// Definitely no side effects. // Definitely no side effects.
seo_vec.push_back(std::make_shared<SideEffectsOp>()); seo_vec.push_back(std::make_shared<SideEffectsOp>());
} }
else { else {
printf("%s has side effects\n", obj_desc(a).c_str()); // printf("%s has side effects\n", obj_desc(a).c_str());
for ( auto ea_t : expr_attrs[a] ) { for ( auto ea_t : expr_attrs[a] ) {
auto seo = std::make_shared<SideEffectsOp>(at, ea_t); auto seo = std::make_shared<SideEffectsOp>(at, ea_t);
@ -1060,6 +1062,7 @@ void ProfileFuncs::SetSideEffects(const Attr* a, IDSet& non_local_ids, std::unor
if ( is_unknown ) if ( is_unknown )
seo->SetUnknownChanges(); seo->SetUnknownChanges();
side_effects_ops.push_back(seo);
seo_vec.push_back(std::move(seo)); seo_vec.push_back(std::move(seo));
} }
} }
@ -1221,23 +1224,51 @@ bool ProfileFuncs::AssessAggrEffects(SideEffectsOp::AccessType access, const Typ
return false; return false;
} }
for ( auto& se : ase->second ) { for ( auto& se : ase->second )
if ( se->GetAccessType() != access ) if ( AssessSideEffects(se.get(), access, t, non_local_ids, aggrs) ) {
continue;
if ( se->HasUnknownChanges() ) {
is_unknown = true; is_unknown = true;
return true; // no point doing any more work
}
}
return true; return true;
} }
bool ProfileFuncs::AssessSideEffects(const SideEffectsOp* se, SideEffectsOp::AccessType access, const Type* t,
IDSet& non_local_ids, std::unordered_set<const Type*>& aggrs) const {
if ( se->GetAccessType() != access )
return false;
if ( ! same_type(se->GetType(), t) )
return false;
if ( se->HasUnknownChanges() )
return true;
for ( auto a : se->ModAggrs() ) for ( auto a : se->ModAggrs() )
aggrs.insert(a); aggrs.insert(a);
for ( auto nl : se->ModNonLocals() ) for ( auto nl : se->ModNonLocals() )
non_local_ids.insert(nl); non_local_ids.insert(nl);
}
return false;
} }
bool ProfileFuncs::GetSideEffects(SideEffectsOp::AccessType access, const Type* t) const {
IDSet nli;
std::unordered_set<const Type*> aggrs;
if ( GetSideEffects(access, t, nli, aggrs) )
return true; return true;
return ! nli.empty() || ! aggrs.empty();
}
bool ProfileFuncs::GetSideEffects(SideEffectsOp::AccessType access, const Type* t, IDSet& non_local_ids,
std::unordered_set<const Type*>& aggrs) const {
for ( auto se : side_effects_ops )
if ( AssessSideEffects(se.get(), access, t, non_local_ids, aggrs) )
return true;
return false;
} }
} // namespace zeek::detail } // namespace zeek::detail

View file

@ -318,7 +318,16 @@ public:
// Expression-valued attributes that appear in the context of different // Expression-valued attributes that appear in the context of different
// types. // types.
const auto& ExprAttrs() const { return expr_attrs; } // ### const auto& ExprAttrs() const { return expr_attrs; }
// ###
// true = unknown
bool GetSideEffects(SideEffectsOp::AccessType access, const Type* t) const;
bool GetSideEffects(SideEffectsOp::AccessType access, const Type* t, IDSet& non_local_ids,
std::unordered_set<const Type*>& aggrs) const;
const auto& AttrSideEffects() const { return attr_side_effects; }
const auto& RecordConstructorEffects() const { return record_constr_with_side_effects; }
// Returns the "representative" Type* for the hash associated with // Returns the "representative" Type* for the hash associated with
// the parameter (which might be the parameter itself). // the parameter (which might be the parameter itself).
@ -381,6 +390,10 @@ protected:
bool AssessAggrEffects(SideEffectsOp::AccessType access, const Type* t, IDSet& non_local_ids, bool AssessAggrEffects(SideEffectsOp::AccessType access, const Type* t, IDSet& non_local_ids,
std::unordered_set<const Type*>& aggrs, bool& is_unknown); std::unordered_set<const Type*>& aggrs, bool& is_unknown);
// true = is unknown
bool AssessSideEffects(const SideEffectsOp* se, SideEffectsOp::AccessType access, const Type* t,
IDSet& non_local_ids, std::unordered_set<const Type*>& aggrs) const;
// Globals seen across the functions, other than those solely seen // Globals seen across the functions, other than those solely seen
// as the function being called in a call. // as the function being called in a call.
IDSet globals; IDSet globals;
@ -465,7 +478,6 @@ protected:
// record attributes. // record attributes.
std::vector<const Expr*> pending_exprs; std::vector<const Expr*> pending_exprs;
// ### used?
std::vector<std::shared_ptr<SideEffectsOp>> side_effects_ops; std::vector<std::shared_ptr<SideEffectsOp>> side_effects_ops;

View file

@ -425,6 +425,32 @@ IDPtr Reducer::FindExprTmp(const Expr* rhs, const Expr* a, const std::shared_ptr
} }
bool Reducer::ExprValid(const ID* id, const Expr* e1, const Expr* e2) const { bool Reducer::ExprValid(const ID* id, const Expr* e1, const Expr* e2) const {
// First check for whether e1 is already known to itself have side effects.
// If so, then it's never safe to reuse its associated identifier in lieu
// of e2.
std::optional<ExprSideEffects>& e1_se = e1->GetOptInfo()->SideEffects();
if ( ! e1_se )
{
bool has_side_effects;
if ( e1->Tag() == EXPR_INDEX )
has_side_effects = pfs.GetSideEffects(SideEffectsOp::READ, e1->GetOp1()->GetType().get());
else if ( e1->Tag() == EXPR_RECORD_CONSTRUCTOR || e1->Tag() == EXPR_RECORD_COERCE )
has_side_effects = pfs.GetSideEffects(SideEffectsOp::CONSTRUCTION, e1->GetType().get());
else
has_side_effects = false;
e1_se = ExprSideEffects(has_side_effects);
}
if ( e1_se->HasSideEffects() )
{
// We already know that e2 is structurally identical to e1.
e2->GetOptInfo()->SideEffects() = ExprSideEffects(true);
return false;
}
// Here are the considerations for expression validity. // Here are the considerations for expression validity.
// //
// * None of the operands used in the given expression can // * None of the operands used in the given expression can
@ -964,19 +990,27 @@ TraversalCode CSE_ValidityChecker::PreExpr(const Expr* e) {
break; break;
case EXPR_INDEX: case EXPR_INDEX:
case EXPR_FIELD: case EXPR_FIELD: {
// We treat these together because they both have to be checked // We treat these together because they both have to be checked
// when inside an "add" or "delete" statement. // when inside an "add" or "delete" statement.
if ( in_aggr_mod_stmt ) {
auto aggr = e->GetOp1(); auto aggr = e->GetOp1();
if ( in_aggr_mod_stmt ) {
auto aggr_id = aggr->AsNameExpr()->Id(); auto aggr_id = aggr->AsNameExpr()->Id();
if ( CheckID(aggr_id, true) ) { if ( CheckID(aggr_id, true) || CheckAggrMod(e) ) {
is_valid = false; is_valid = false;
return TC_ABORTALL; return TC_ABORTALL;
} }
} }
else if ( t == EXPR_INDEX && aggr->GetType()->Tag() == TYPE_TABLE ) {
if ( CheckTableRef(e) ) {
is_valid = false;
return TC_ABORTALL;
}
}
#if 0
if ( t == EXPR_INDEX && have_sensitive_IDs ) { if ( t == EXPR_INDEX && have_sensitive_IDs ) {
// Unfortunately in isolation we can't statically determine // Unfortunately in isolation we can't statically determine
// whether this table has a &default associated with it. // whether this table has a &default associated with it.
@ -987,6 +1021,8 @@ TraversalCode CSE_ValidityChecker::PreExpr(const Expr* e) {
is_valid = false; is_valid = false;
return TC_ABORTALL; return TC_ABORTALL;
} }
#endif
}
break; break;
@ -1009,9 +1045,7 @@ bool CSE_ValidityChecker::CheckID(const ID* id, bool ignore_orig) const {
if ( id_t && same_type(id_t, i->GetType()) ) { if ( id_t && same_type(id_t, i->GetType()) ) {
// Same-type aggregate. // Same-type aggregate.
if ( ! ignore_orig ) // if ( ! ignore_orig ) printf("identifier %s (%d), start %s, end %s\n", id->Name(), ignore_orig, obj_desc(start_e).c_str(), obj_desc(end_e).c_str());
printf("identifier %s (%d), start %s, end %s\n", id->Name(), ignore_orig, obj_desc(start_e).c_str(),
obj_desc(end_e).c_str());
if ( ignore_orig ) if ( ignore_orig )
return true; return true;
} }
@ -1023,10 +1057,72 @@ bool CSE_ValidityChecker::CheckID(const ID* id, bool ignore_orig) const {
bool CSE_ValidityChecker::CheckAggrMod(const Expr* e) const { bool CSE_ValidityChecker::CheckAggrMod(const Expr* e) const {
const auto& e_i_t = e->GetType(); const auto& e_i_t = e->GetType();
if ( IsAggr(e_i_t) ) { if ( IsAggr(e_i_t) ) {
// This assignment sets an aggregate value. Look for type matches. // This assignment sets an aggregate value.
for ( auto i : ids )
if ( same_type(e_i_t, i->GetType()) ) // Note, the following will almost always remain empty, so spinning
// through them in the loop below will be very quick.
IDSet non_local_ids;
std::unordered_set<const Type*> aggrs;
// Here we do a bit of speed optimization by wiring in knowledge
// the aggregate WRITE effects only occur for tables.
if ( e_i_t->Tag() == TYPE_TABLE && pfs.GetSideEffects(SideEffectsOp::WRITE, e_i_t.get(), non_local_ids, aggrs) )
return true; return true;
for ( auto i : ids ) {
for ( auto nli : non_local_ids )
if ( nli == i ) {
// printf("non-local ID on WRITE: %s\n", i->Name());
return true;
}
auto i_t = i->GetType();
if ( i_t->Tag() == TYPE_ANY )
continue;
if ( same_type(e_i_t, i_t) )
return true;
for ( auto a : aggrs )
if ( same_type(a, i_t.get()) ) {
// printf("aggr type on WRITE: %s\n", i->Name());
return true;
}
}
}
return false;
}
bool CSE_ValidityChecker::CheckTableRef(const Expr* e) const {
const auto& e_i_t = e->GetType();
// This assignment sets an aggregate value.
// Note, the following will almost always remain empty, so spinning
// through them in the loop below will be very quick.
IDSet non_local_ids;
std::unordered_set<const Type*> aggrs;
if ( pfs.GetSideEffects(SideEffectsOp::READ, e_i_t.get(), non_local_ids, aggrs) )
return true;
for ( auto i : ids ) {
for ( auto nli : non_local_ids )
if ( nli == i ) {
// printf("non-local ID on READ: %s\n", i->Name());
return true;
}
auto i_t = i->GetType();
if ( i_t->Tag() == TYPE_ANY )
continue;
for ( auto a : aggrs )
if ( same_type(a, i->GetType().get()) ) {
// printf("aggr type on READ: %p %s\n", e, obj_desc(e).c_str());
return true;
}
} }
return false; return false;

View file

@ -350,8 +350,11 @@ protected:
// Returns true if the assignment given by 'e' modifies an aggregate // Returns true if the assignment given by 'e' modifies an aggregate
// with the same type as that of one of the identifiers we're tracking. // with the same type as that of one of the identifiers we're tracking.
// ###
bool CheckAggrMod(const Expr* e) const; bool CheckAggrMod(const Expr* e) const;
bool CheckTableRef(const Expr* e) const;
// Profile across all script functions. // Profile across all script functions.
ProfileFuncs& pfs; ProfileFuncs& pfs;