diff --git a/src/script_opt/ExprOptInfo.h b/src/script_opt/ExprOptInfo.h index 30452aa344..b12cde6d97 100644 --- a/src/script_opt/ExprOptInfo.h +++ b/src/script_opt/ExprOptInfo.h @@ -7,11 +7,27 @@ namespace zeek::detail { +class ExprSideEffects { +public: + ExprSideEffects(bool _has_side_effects) : has_side_effects(_has_side_effects) {} + + bool HasSideEffects() const { return has_side_effects; } + +protected: + bool has_side_effects; +}; + class ExprOptInfo { public: // The AST number of the statement in which this expression // appears. int stmt_num = -1; // -1 = not assigned yet + + auto& SideEffects() { return side_effects; } + +protected: + // ### + std::optional side_effects; }; } // namespace zeek::detail diff --git a/src/script_opt/ProfileFunc.cc b/src/script_opt/ProfileFunc.cc index bee5c688e2..0a5c3d456f 100644 --- a/src/script_opt/ProfileFunc.cc +++ b/src/script_opt/ProfileFunc.cc @@ -988,8 +988,10 @@ void ProfileFuncs::ComputeSideEffects() { // Weed out very-common-and-completely-safe expressions. if ( ! DefinitelyHasNoSideEffects(a->GetExpr()) ) candidates.insert(a); +#if 0 else printf("attribute %s definitely has no side effects\n", obj_desc(a).c_str()); +#endif } } @@ -1020,7 +1022,7 @@ void ProfileFuncs::ComputeSideEffects() { std::unordered_set aggrs; bool is_unknown = true; for ( auto c : candidates ) { - printf("jackpot for %s\n", obj_desc(c).c_str()); + // printf("jackpot for %s\n", obj_desc(c).c_str()); SetSideEffects(c, non_local_ids, aggrs, is_unknown); } break; @@ -1045,12 +1047,12 @@ void ProfileFuncs::SetSideEffects(const Attr* a, IDSet& non_local_ids, std::unor at = SideEffectsOp::READ; if ( non_local_ids.empty() && aggrs.empty() && ! is_unknown ) { - printf("%s has no side effects\n", obj_desc(a).c_str()); + // printf("%s has no side effects\n", obj_desc(a).c_str()); // Definitely no side effects. seo_vec.push_back(std::make_shared()); } else { - printf("%s has side effects\n", obj_desc(a).c_str()); + // printf("%s has side effects\n", obj_desc(a).c_str()); for ( auto ea_t : expr_attrs[a] ) { auto seo = std::make_shared(at, ea_t); @@ -1060,6 +1062,7 @@ void ProfileFuncs::SetSideEffects(const Attr* a, IDSet& non_local_ids, std::unor if ( is_unknown ) seo->SetUnknownChanges(); + side_effects_ops.push_back(seo); seo_vec.push_back(std::move(seo)); } } @@ -1221,23 +1224,51 @@ bool ProfileFuncs::AssessAggrEffects(SideEffectsOp::AccessType access, const Typ return false; } - for ( auto& se : ase->second ) { - if ( se->GetAccessType() != access ) - continue; - - if ( se->HasUnknownChanges() ) { + for ( auto& se : ase->second ) + if ( AssessSideEffects(se.get(), access, t, non_local_ids, aggrs) ) { is_unknown = true; - return true; + return true; // no point doing any more work } - - for ( auto a : se->ModAggrs() ) - aggrs.insert(a); - for ( auto nl : se->ModNonLocals() ) - non_local_ids.insert(nl); - } } return true; } +bool ProfileFuncs::AssessSideEffects(const SideEffectsOp* se, SideEffectsOp::AccessType access, const Type* t, + IDSet& non_local_ids, std::unordered_set& aggrs) const { + if ( se->GetAccessType() != access ) + return false; + + if ( ! same_type(se->GetType(), t) ) + return false; + + if ( se->HasUnknownChanges() ) + return true; + + for ( auto a : se->ModAggrs() ) + aggrs.insert(a); + for ( auto nl : se->ModNonLocals() ) + non_local_ids.insert(nl); + + return false; +} + +bool ProfileFuncs::GetSideEffects(SideEffectsOp::AccessType access, const Type* t) const { + IDSet nli; + std::unordered_set aggrs; + + if ( GetSideEffects(access, t, nli, aggrs) ) + return true; + + return ! nli.empty() || ! aggrs.empty(); +} + +bool ProfileFuncs::GetSideEffects(SideEffectsOp::AccessType access, const Type* t, IDSet& non_local_ids, + std::unordered_set& aggrs) const { + for ( auto se : side_effects_ops ) + if ( AssessSideEffects(se.get(), access, t, non_local_ids, aggrs) ) + return true; + return false; +} + } // namespace zeek::detail diff --git a/src/script_opt/ProfileFunc.h b/src/script_opt/ProfileFunc.h index d4d55dff73..ca48f13cf9 100644 --- a/src/script_opt/ProfileFunc.h +++ b/src/script_opt/ProfileFunc.h @@ -318,7 +318,16 @@ public: // Expression-valued attributes that appear in the context of different // types. - const auto& ExprAttrs() const { return expr_attrs; } + // ### const auto& ExprAttrs() const { return expr_attrs; } + + // ### + // true = unknown + bool GetSideEffects(SideEffectsOp::AccessType access, const Type* t) const; + bool GetSideEffects(SideEffectsOp::AccessType access, const Type* t, IDSet& non_local_ids, + std::unordered_set& aggrs) const; + + const auto& AttrSideEffects() const { return attr_side_effects; } + const auto& RecordConstructorEffects() const { return record_constr_with_side_effects; } // Returns the "representative" Type* for the hash associated with // the parameter (which might be the parameter itself). @@ -381,6 +390,10 @@ protected: bool AssessAggrEffects(SideEffectsOp::AccessType access, const Type* t, IDSet& non_local_ids, std::unordered_set& aggrs, bool& is_unknown); + // true = is unknown + bool AssessSideEffects(const SideEffectsOp* se, SideEffectsOp::AccessType access, const Type* t, + IDSet& non_local_ids, std::unordered_set& aggrs) const; + // Globals seen across the functions, other than those solely seen // as the function being called in a call. IDSet globals; @@ -465,7 +478,6 @@ protected: // record attributes. std::vector pending_exprs; - // ### used? std::vector> side_effects_ops; diff --git a/src/script_opt/Reduce.cc b/src/script_opt/Reduce.cc index 4159cc646a..207e2da22c 100644 --- a/src/script_opt/Reduce.cc +++ b/src/script_opt/Reduce.cc @@ -425,6 +425,32 @@ IDPtr Reducer::FindExprTmp(const Expr* rhs, const Expr* a, const std::shared_ptr } bool Reducer::ExprValid(const ID* id, const Expr* e1, const Expr* e2) const { + // First check for whether e1 is already known to itself have side effects. + // If so, then it's never safe to reuse its associated identifier in lieu + // of e2. + std::optional& e1_se = e1->GetOptInfo()->SideEffects(); + if ( ! e1_se ) + { + bool has_side_effects; + + if ( e1->Tag() == EXPR_INDEX ) + has_side_effects = pfs.GetSideEffects(SideEffectsOp::READ, e1->GetOp1()->GetType().get()); + + else if ( e1->Tag() == EXPR_RECORD_CONSTRUCTOR || e1->Tag() == EXPR_RECORD_COERCE ) + has_side_effects = pfs.GetSideEffects(SideEffectsOp::CONSTRUCTION, e1->GetType().get()); + else + has_side_effects = false; + + e1_se = ExprSideEffects(has_side_effects); + } + + if ( e1_se->HasSideEffects() ) + { + // We already know that e2 is structurally identical to e1. + e2->GetOptInfo()->SideEffects() = ExprSideEffects(true); + return false; + } + // Here are the considerations for expression validity. // // * None of the operands used in the given expression can @@ -964,19 +990,27 @@ TraversalCode CSE_ValidityChecker::PreExpr(const Expr* e) { break; case EXPR_INDEX: - case EXPR_FIELD: + case EXPR_FIELD: { // We treat these together because they both have to be checked // when inside an "add" or "delete" statement. + auto aggr = e->GetOp1(); + if ( in_aggr_mod_stmt ) { - auto aggr = e->GetOp1(); auto aggr_id = aggr->AsNameExpr()->Id(); - if ( CheckID(aggr_id, true) ) { + if ( CheckID(aggr_id, true) || CheckAggrMod(e) ) { is_valid = false; return TC_ABORTALL; } } + else if ( t == EXPR_INDEX && aggr->GetType()->Tag() == TYPE_TABLE ) { + if ( CheckTableRef(e) ) { + is_valid = false; + return TC_ABORTALL; + } + } +#if 0 if ( t == EXPR_INDEX && have_sensitive_IDs ) { // Unfortunately in isolation we can't statically determine // whether this table has a &default associated with it. @@ -986,9 +1020,11 @@ TraversalCode CSE_ValidityChecker::PreExpr(const Expr* e) { // lot of work for what might be minimal gain. is_valid = false; return TC_ABORTALL; - } + } +#endif + } - break; + break; default: break; } @@ -1009,9 +1045,7 @@ bool CSE_ValidityChecker::CheckID(const ID* id, bool ignore_orig) const { if ( id_t && same_type(id_t, i->GetType()) ) { // Same-type aggregate. - if ( ! ignore_orig ) - printf("identifier %s (%d), start %s, end %s\n", id->Name(), ignore_orig, obj_desc(start_e).c_str(), - obj_desc(end_e).c_str()); + // if ( ! ignore_orig ) printf("identifier %s (%d), start %s, end %s\n", id->Name(), ignore_orig, obj_desc(start_e).c_str(), obj_desc(end_e).c_str()); if ( ignore_orig ) return true; } @@ -1023,10 +1057,72 @@ bool CSE_ValidityChecker::CheckID(const ID* id, bool ignore_orig) const { bool CSE_ValidityChecker::CheckAggrMod(const Expr* e) const { const auto& e_i_t = e->GetType(); if ( IsAggr(e_i_t) ) { - // This assignment sets an aggregate value. Look for type matches. - for ( auto i : ids ) - if ( same_type(e_i_t, i->GetType()) ) + // This assignment sets an aggregate value. + + // Note, the following will almost always remain empty, so spinning + // through them in the loop below will be very quick. + IDSet non_local_ids; + std::unordered_set aggrs; + + // Here we do a bit of speed optimization by wiring in knowledge + // the aggregate WRITE effects only occur for tables. + if ( e_i_t->Tag() == TYPE_TABLE && pfs.GetSideEffects(SideEffectsOp::WRITE, e_i_t.get(), non_local_ids, aggrs) ) + return true; + + for ( auto i : ids ) { + for ( auto nli : non_local_ids ) + if ( nli == i ) { + // printf("non-local ID on WRITE: %s\n", i->Name()); + return true; + } + + auto i_t = i->GetType(); + + if ( i_t->Tag() == TYPE_ANY ) + continue; + + if ( same_type(e_i_t, i_t) ) return true; + + for ( auto a : aggrs ) + if ( same_type(a, i_t.get()) ) { + // printf("aggr type on WRITE: %s\n", i->Name()); + return true; + } + } + } + + return false; +} + +bool CSE_ValidityChecker::CheckTableRef(const Expr* e) const { + const auto& e_i_t = e->GetType(); + // This assignment sets an aggregate value. + + // Note, the following will almost always remain empty, so spinning + // through them in the loop below will be very quick. + IDSet non_local_ids; + std::unordered_set aggrs; + + if ( pfs.GetSideEffects(SideEffectsOp::READ, e_i_t.get(), non_local_ids, aggrs) ) + return true; + + for ( auto i : ids ) { + for ( auto nli : non_local_ids ) + if ( nli == i ) { + // printf("non-local ID on READ: %s\n", i->Name()); + return true; + } + + auto i_t = i->GetType(); + if ( i_t->Tag() == TYPE_ANY ) + continue; + + for ( auto a : aggrs ) + if ( same_type(a, i->GetType().get()) ) { + // printf("aggr type on READ: %p %s\n", e, obj_desc(e).c_str()); + return true; + } } return false; diff --git a/src/script_opt/Reduce.h b/src/script_opt/Reduce.h index 7553803802..75ebc391ed 100644 --- a/src/script_opt/Reduce.h +++ b/src/script_opt/Reduce.h @@ -350,8 +350,11 @@ protected: // Returns true if the assignment given by 'e' modifies an aggregate // with the same type as that of one of the identifiers we're tracking. + // ### bool CheckAggrMod(const Expr* e) const; + bool CheckTableRef(const Expr* e) const; + // Profile across all script functions. ProfileFuncs& pfs;