From f8478def21675c29e9de74d2b0ed90d16ebe580d Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 30 Nov 2023 13:26:55 -0800 Subject: [PATCH] CSE across function calls --- src/Func.h | 2 +- src/script_opt/FuncInfo.cc | 87 +-------------------------- src/script_opt/ProfileFunc.cc | 107 +++++++++++++++++++++++++--------- src/script_opt/ProfileFunc.h | 10 ++++ src/script_opt/Reduce.cc | 86 +++++++++++---------------- src/script_opt/Reduce.h | 2 + src/script_opt/SideEffects.h | 12 +--- 7 files changed, 132 insertions(+), 174 deletions(-) diff --git a/src/Func.h b/src/Func.h index f434f6021f..243b0ea67b 100644 --- a/src/Func.h +++ b/src/Func.h @@ -264,7 +264,7 @@ public: * If this function is an instantiated lambda, returns the primary from * which it was constructed. Otherwise, returns this function itself. */ - const ScriptFunc* Primary() { return primary ? primary.get() : this; } + const ScriptFunc* Primary() const { return primary ? primary.get() : this; } /** * Returns the function's frame size. diff --git a/src/script_opt/FuncInfo.cc b/src/script_opt/FuncInfo.cc index a7da9977e2..9b9752add8 100644 --- a/src/script_opt/FuncInfo.cc +++ b/src/script_opt/FuncInfo.cc @@ -15,90 +15,6 @@ static std::unordered_set side_effects_free_BiFs = { "Analyzer::__register_for_port", "Analyzer::__schedule_analyzer", "Analyzer::__tag", - "Broker::__append", - "Broker::__auto_publish", - "Broker::__auto_unpublish", - "Broker::__clear", - "Broker::__close", - "Broker::__create_clone", - "Broker::__create_master", - "Broker::__data", - "Broker::__data_type", - "Broker::__decrement", - "Broker::__erase", - "Broker::__exists", - "Broker::__flush_logs", - "Broker::__forward", - "Broker::__get", - "Broker::__get_index_from_value", - "Broker::__increment", - "Broker::__insert_into_set", - "Broker::__insert_into_table", - "Broker::__is_closed", - "Broker::__keys", - "Broker::__listen", - "Broker::__node_id", - "Broker::__opaque_clone_through_serialization", - "Broker::__peer", - "Broker::__peer_no_retry", - "Broker::__peers", - "Broker::__pop", - "Broker::__publish_id", - "Broker::__push", - "Broker::__put", - "Broker::__put_unique", - "Broker::__record_assign", - "Broker::__record_create", - "Broker::__record_iterator", - "Broker::__record_iterator_last", - "Broker::__record_iterator_next", - "Broker::__record_iterator_value", - "Broker::__record_lookup", - "Broker::__record_size", - "Broker::__remove_from", - "Broker::__set_clear", - "Broker::__set_contains", - "Broker::__set_create", - "Broker::__set_insert", - "Broker::__set_iterator", - "Broker::__set_iterator_last", - "Broker::__set_iterator_next", - "Broker::__set_iterator_value", - "Broker::__set_metrics_export_endpoint_name", - "Broker::__set_metrics_export_interval", - "Broker::__set_metrics_export_prefixes", - "Broker::__set_metrics_export_topic", - "Broker::__set_metrics_import_topics", - "Broker::__set_remove", - "Broker::__set_size", - "Broker::__store_name", - "Broker::__subscribe", - "Broker::__table_clear", - "Broker::__table_contains", - "Broker::__table_create", - "Broker::__table_insert", - "Broker::__table_iterator", - "Broker::__table_iterator_last", - "Broker::__table_iterator_next", - "Broker::__table_iterator_value", - "Broker::__table_lookup", - "Broker::__table_remove", - "Broker::__table_size", - "Broker::__unpeer", - "Broker::__unsubscribe", - "Broker::__vector_clear", - "Broker::__vector_create", - "Broker::__vector_insert", - "Broker::__vector_iterator", - "Broker::__vector_iterator_last", - "Broker::__vector_iterator_next", - "Broker::__vector_iterator_value", - "Broker::__vector_lookup", - "Broker::__vector_remove", - "Broker::__vector_replace", - "Broker::__vector_size", - "Broker::make_event", - "Broker::publish", "FileExtract::__set_limit", "Files::__add_analyzer", "Files::__analyzer_enabled", @@ -544,6 +460,9 @@ static std::unordered_set side_effects_free_BiFs = { // Ones not listed: // +// Broker::* +// These can manipulate unspecified (at script level) records. +// // Cluster::publish_hrw // Cluster::publish_rr // Call script functions to get topic names. diff --git a/src/script_opt/ProfileFunc.cc b/src/script_opt/ProfileFunc.cc index 70b907614c..e3818d3c25 100644 --- a/src/script_opt/ProfileFunc.cc +++ b/src/script_opt/ProfileFunc.cc @@ -1137,34 +1137,11 @@ bool ProfileFuncs::AssessSideEffects(const ExprPtr& e, IDSet& non_local_ids, std bool& is_unknown) { std::shared_ptr pf; - if ( e->Tag() == EXPR_NAME && e->GetType()->Tag() == TYPE_FUNC ) { - // This occurs when the expression is itself a function name, and - // in an attribute context indicates an implicit call. - auto fid = e->AsNameExpr()->Id(); - auto fv = fid->GetVal(); + if ( e->Tag() == EXPR_NAME && e->GetType()->Tag() == TYPE_FUNC ) + return GetCallSideEffects(e->AsNameExpr(), non_local_ids, aggrs, is_unknown); - if ( ! fv || ! fid->IsConst() ) { - // The value is unavailable (likely a bug), or might change - // at run-time. - is_unknown = true; - return true; - } - - auto func = fv->AsFunc(); - if ( func->GetKind() == Func::BUILTIN_FUNC ) { - if ( ! is_side_effect_free(func->Name()) ) - is_unknown = true; - return true; - } - - auto sf = static_cast(func)->Primary(); - ASSERT(func_profs.count(sf) != 0); - pf = func_profs[sf]; - } - else { - ASSERT(expr_profs.count(e.get()) != 0); - pf = expr_profs[e.get()]; - } + ASSERT(expr_profs.count(e.get()) != 0); + pf = expr_profs[e.get()]; return AssessSideEffects(pf.get(), non_local_ids, aggrs, is_unknown); } @@ -1207,6 +1184,13 @@ bool ProfileFuncs::AssessSideEffects(const ProfileFunc* pf, IDSet& non_local_ids } for ( auto& f : pf->ScriptCalls() ) { + if ( f->Flavor() != FUNC_FLAVOR_FUNCTION ) { + // A hook (since events can't be called) - not something + // to analyze further. + is_unknown = true; + return true; + } + auto pff = func_profs[f]; if ( active_func_profiles.count(pff) > 0 ) continue; @@ -1305,4 +1289,73 @@ bool ProfileFuncs::GetSideEffects(SideEffectsOp::AccessType access, const Type* return false; } +std::shared_ptr ProfileFuncs::GetCallSideEffects(const ScriptFunc* sf) { + sf = sf->Primary(); + + auto sf_se = func_side_effects.find(sf); + + if ( sf_se != func_side_effects.end() ) + return sf_se->second; + + bool is_unknown = false; + IDSet nla; + std::unordered_set mod_aggrs; + + ASSERT(func_profs.count(sf) != 0); + auto pf = func_profs[sf]; + + if ( ! AssessSideEffects(pf.get(), nla, mod_aggrs, is_unknown) ) + // Can't figure it out yet. + return nullptr; + + auto seo = std::make_shared(SideEffectsOp::CALL); + seo->AddModNonGlobal(nla); + seo->AddModAggrs(mod_aggrs); + + if ( is_unknown ) + seo->SetUnknownChanges(); + + func_side_effects[sf] = seo; + + return seo; +} + +bool ProfileFuncs::GetCallSideEffects(const NameExpr* n, IDSet& non_local_ids, std::unordered_set& aggrs, + bool& is_unknown) { + // This occurs when the expression is itself a function name, and + // in an attribute context indicates an implicit call. + auto fid = n->Id(); + auto fv = fid->GetVal(); + + if ( ! fv || ! fid->IsConst() ) { + // The value is unavailable (likely a bug), or might change + // at run-time. + is_unknown = true; + return true; + } + + auto func = fv->AsFunc(); + if ( func->GetKind() == Func::BUILTIN_FUNC ) { + if ( ! is_side_effect_free(func->Name()) ) + is_unknown = true; + return true; + } + + auto sf = static_cast(func); + auto seo = GetCallSideEffects(sf); + + if ( ! seo ) + return false; + + if ( seo->HasUnknownChanges() ) + is_unknown = true; + + for ( auto a : seo->ModAggrs() ) + aggrs.insert(a); + for ( auto nl : seo->ModNonLocals() ) + non_local_ids.insert(nl); + + return true; +} + } // namespace zeek::detail diff --git a/src/script_opt/ProfileFunc.h b/src/script_opt/ProfileFunc.h index fae9b650fd..2ae051711c 100644 --- a/src/script_opt/ProfileFunc.h +++ b/src/script_opt/ProfileFunc.h @@ -328,6 +328,13 @@ public: bool GetSideEffects(SideEffectsOp::AccessType access, const Type* t, IDSet& non_local_ids, std::unordered_set& aggrs) const; + // Returns nil if side effects are not available. That should never be + // the case after we've done our initial analysis, but is provided + // as a signal so that this method can also be used during that analysis. + bool GetCallSideEffects(const NameExpr* n, IDSet& non_local_ids, std::unordered_set& aggrs, + bool& is_unknown); + std::shared_ptr GetCallSideEffects(const ScriptFunc* f); + const auto& AttrSideEffects() const { return attr_side_effects; } const auto& RecordConstructorEffects() const { return record_constr_with_side_effects; } @@ -440,6 +447,9 @@ protected: // appear in "when" clauses), and in that context it suffices. std::unordered_map> func_profs; + // ### ScriptFunc not Func, assumes is_side_effect_free() is cheap + std::unordered_map> func_side_effects; + // Maps expressions to their profiles. This is only germane // externally for LambdaExpr's, but internally it abets memory // management. diff --git a/src/script_opt/Reduce.cc b/src/script_opt/Reduce.cc index 2057850795..2f186c3ac3 100644 --- a/src/script_opt/Reduce.cc +++ b/src/script_opt/Reduce.cc @@ -11,6 +11,7 @@ #include "zeek/Stmt.h" #include "zeek/Var.h" #include "zeek/script_opt/ExprOptInfo.h" +#include "zeek/script_opt/FuncInfo.h" #include "zeek/script_opt/StmtOptInfo.h" #include "zeek/script_opt/TempVar.h" @@ -951,32 +952,7 @@ TraversalCode CSE_ValidityChecker::PreExpr(const Expr* e) { break; case EXPR_CALL: - if ( have_sensitive_IDs ) { - auto c = e->AsCallExpr(); - auto func = c->Func(); - std::string desc; - if ( func->Tag() == EXPR_NAME ) { - auto f = func->AsNameExpr()->Id(); - if ( f->IsGlobal() ) { - auto func_v = f->GetVal(); - if ( func_v ) { - auto func_vf = func_v->AsFunc(); - - if ( func_vf->GetKind() == Func::SCRIPT_FUNC ) - desc = "script"; - else - desc = "BiF"; - } - else - desc = "missing"; - } - else - desc = "indirect"; - } - else - desc = "compound-indirect"; - - // printf("call sensitivity: %s %s\n", desc.c_str(), obj_desc(e).c_str()); + if ( CheckCall(e->AsCallExpr()) ) { is_valid = false; return TC_ABORTALL; } @@ -990,13 +966,13 @@ TraversalCode CSE_ValidityChecker::PreExpr(const Expr* e) { case EXPR_RECORD_COERCE: case EXPR_RECORD_CONSTRUCTOR: - // Note, record coercion behaves like constructors in terms of - // potentially executing &default functions. In either case, - // the type of the expression reflects the type we want to analyze - // for side effects. + // Note, record coercion behaves like constructors in terms of + // potentially executing &default functions. In either case, + // the type of the expression reflects the type we want to analyze + // for side effects. if ( CheckRecordConstructor(e->GetType()) ) { - is_valid = false; - return TC_ABORTALL; + is_valid = false; + return TC_ABORTALL; } break; @@ -1022,18 +998,6 @@ TraversalCode CSE_ValidityChecker::PreExpr(const Expr* e) { return TC_ABORTALL; } } -#if 0 - if ( t == EXPR_INDEX && have_sensitive_IDs ) { - // Unfortunately in isolation we can't statically determine - // whether this table has a &default associated with it. - // In principle we could track all instances of the table - // type seen (across the entire set of scripts), and note - // whether any of those include an expression, but that's a - // lot of work for what might be minimal gain. - is_valid = false; - return TC_ABORTALL; - } -#endif } break; @@ -1095,8 +1059,23 @@ bool CSE_ValidityChecker::CheckTableMod(const TypePtr& t) const { return CheckSideEffects(SideEffectsOp::WRITE, t); } -bool CSE_ValidityChecker::CheckTableRef(const TypePtr& t) const { - return CheckSideEffects(SideEffectsOp::READ, t); +bool CSE_ValidityChecker::CheckTableRef(const TypePtr& t) const { return CheckSideEffects(SideEffectsOp::READ, t); } + +bool CSE_ValidityChecker::CheckCall(const CallExpr* c) const { + auto func = c->Func(); + std::string desc; + if ( func->Tag() != EXPR_NAME ) + // Can't analyze indirect calls. + return true; + + IDSet non_local_ids; + std::unordered_set aggrs; + bool is_unknown = false; + + auto resolved = pfs.GetCallSideEffects(func->AsNameExpr(), non_local_ids, aggrs, is_unknown); + ASSERT(resolved); + + return is_unknown || CheckSideEffects(non_local_ids, aggrs); } bool CSE_ValidityChecker::CheckSideEffects(SideEffectsOp::AccessType access, const TypePtr& t) const { @@ -1106,23 +1085,24 @@ bool CSE_ValidityChecker::CheckSideEffects(SideEffectsOp::AccessType access, con if ( pfs.GetSideEffects(access, t.get(), non_local_ids, aggrs) ) return true; + return CheckSideEffects(non_local_ids, aggrs); +} + +bool CSE_ValidityChecker::CheckSideEffects(const IDSet& non_local_ids, + const std::unordered_set& aggrs) const { if ( non_local_ids.empty() && aggrs.empty() ) - // This is far and away the most common case. + // This is far and away the most common case. return false; for ( auto i : ids ) { for ( auto nli : non_local_ids ) - if ( nli == i ) { - // printf("non-local ID on %d: %s\n", access, i->Name()); + if ( nli == i ) return true; - } auto i_t = i->GetType(); for ( auto a : aggrs ) - if ( same_type(a, i_t.get()) ) { - // printf("aggr type on %d: %s\n", access, i->Name()); + if ( same_type(a, i_t.get()) ) return true; - } } return false; diff --git a/src/script_opt/Reduce.h b/src/script_opt/Reduce.h index daf2014d61..7748d7e953 100644 --- a/src/script_opt/Reduce.h +++ b/src/script_opt/Reduce.h @@ -355,7 +355,9 @@ protected: bool CheckRecordConstructor(const TypePtr& t) const; bool CheckTableMod(const TypePtr& t) const; bool CheckTableRef(const TypePtr& t) const; + bool CheckCall(const CallExpr* c) const; bool CheckSideEffects(SideEffectsOp::AccessType access, const TypePtr& t) const; + bool CheckSideEffects(const IDSet& non_local_ids, const std::unordered_set& aggrs) const; // Profile across all script functions. ProfileFuncs& pfs; diff --git a/src/script_opt/SideEffects.h b/src/script_opt/SideEffects.h index a95fface91..d37bdd91fd 100644 --- a/src/script_opt/SideEffects.h +++ b/src/script_opt/SideEffects.h @@ -14,18 +14,12 @@ namespace zeek::detail { // modifications. class SideEffectsOp { public: - // ### remove NONE? - enum AccessType { NONE, READ, WRITE, CONSTRUCTION }; + enum AccessType { NONE, CALL, CONSTRUCTION, READ, WRITE }; - SideEffectsOp() : access(NONE), type(nullptr) {} - SideEffectsOp(AccessType at, const Type* t) : access(at), type(t) {} + // Type can be left off for CALL access. + SideEffectsOp(AccessType at = NONE, const Type* t = nullptr) : access(at), type(t) {} auto GetAccessType() const { return access; } - bool NoSideEffects() const { return access == NONE; } - bool OnReadAccess() const { return access == READ; } - bool OnWriteAccess() const { return access == WRITE; } - bool OnConstruction() const { return access == CONSTRUCTION; } - const Type* GetType() const { return type; } void SetUnknownChanges() { has_unknown_changes = true; }