From 2e335a1fc48452c5ec0152cbb7549cbfc987081e Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 4 Dec 2023 12:24:15 -0800 Subject: [PATCH] done with code cleanup --- src/script_opt/ProfileFunc.h | 45 ++++++++++++++++++++++++++++++++- src/script_opt/Reduce.cc | 30 +++------------------- src/script_opt/Reduce.h | 24 +++++++++++++----- src/script_opt/SideEffects.h | 44 -------------------------------- src/script_opt/ZAM/Expr.cc | 6 +++-- src/script_opt/ZAM/Stmt.cc | 1 - src/script_opt/ZAM/Vars.cc | 1 - src/script_opt/ZAM/maint/README | 4 +-- 8 files changed, 71 insertions(+), 84 deletions(-) delete mode 100644 src/script_opt/SideEffects.h diff --git a/src/script_opt/ProfileFunc.h b/src/script_opt/ProfileFunc.h index c8a6b10389..af1206623a 100644 --- a/src/script_opt/ProfileFunc.h +++ b/src/script_opt/ProfileFunc.h @@ -37,7 +37,6 @@ #include "zeek/Stmt.h" #include "zeek/Traverse.h" #include "zeek/script_opt/ScriptOpt.h" -#include "zeek/script_opt/SideEffects.h" namespace zeek::detail { @@ -289,6 +288,50 @@ protected: bool abs_rec_fields; }; +// Describes an operation for which some forms of access can lead to state +// modifications. +class SideEffectsOp { +public: + // Access types correspond to: + // NONE - there are no side effects + // CALL - relevant for function calls + // CONSTRUCTION - relevant for constructing/coercing a record + // READ - relevant for reading a table element + // WRITE - relevant for modifying a table element + enum AccessType { NONE, CALL, CONSTRUCTION, READ, WRITE }; + + SideEffectsOp(AccessType at = NONE, const Type* t = nullptr) : access(at), type(t) {} + + auto GetAccessType() const { return access; } + const Type* GetType() const { return type; } + + void SetUnknownChanges() { has_unknown_changes = true; } + bool HasUnknownChanges() const { return has_unknown_changes; } + + void AddModNonGlobal(IDSet ids) { mod_non_locals.insert(ids.begin(), ids.end()); } + void AddModAggrs(TypeSet types) { mod_aggrs.insert(types.begin(), types.end()); } + + const auto& ModNonLocals() const { return mod_non_locals; } + const auto& ModAggrs() const { return mod_aggrs; } + +private: + AccessType access; + const Type* type; // type for which some operations alter state + + // Globals and/or captures that the operation potentially modifies. + IDSet mod_non_locals; + + // Aggregates (specified by types) that potentially modified. + TypeSet mod_aggrs; + + // Sometimes the side effects are not known (such as when making + // indirect function calls, so we can't know statically what function + // will be called). We refer to as Unknown, and their implications are + // presumed to be worst-case - any non-local or aggregate is potentially + // affected. + bool has_unknown_changes = false; +}; + // Function pointer for a predicate that determines whether a given // profile is compilable. Alternatively we could derive subclasses // from ProfileFuncs and use a virtual method for this, but that seems diff --git a/src/script_opt/Reduce.cc b/src/script_opt/Reduce.cc index b6dd3fd4ed..4e7e35cdde 100644 --- a/src/script_opt/Reduce.cc +++ b/src/script_opt/Reduce.cc @@ -492,8 +492,6 @@ bool Reducer::ExprValid(const ID* id, const Expr* e1, const Expr* e2) const { ids.push_back(e1->AsNameExpr()->Id()); CSE_ValidityChecker vc(pfs, ids, e1, e2); - - auto loc = reduction_root->GetLocationInfo(); reduction_root->Traverse(&vc); return vc.IsValid(); @@ -978,9 +976,7 @@ TraversalCode CSE_ValidityChecker::PreExpr(const Expr* e) { if ( CheckTableRef(aggr_t) ) return TC_ABORTALL; } - } - - break; + } break; default: break; } @@ -989,23 +985,12 @@ TraversalCode CSE_ValidityChecker::PreExpr(const Expr* e) { } bool CSE_ValidityChecker::CheckID(const ID* id, bool ignore_orig) { - // Only check type info for aggregates. - auto id_t = IsAggr(id->GetType()) ? id->GetType() : nullptr; - for ( auto i : ids ) { if ( ignore_orig && i == ids.front() ) continue; if ( id == i ) return Invalid(); // reassignment - - if ( id_t && same_type(id_t, i->GetType()) ) { - // Same-type aggregate. - // if ( ! ignore_orig ) printf("identifier %s (%d), start %s, end %s\n", id->Name(), ignore_orig, - // obj_desc(start_e).c_str(), obj_desc(end_e).c_str()); - if ( ignore_orig ) - return Invalid(); - } } return false; @@ -1065,15 +1050,8 @@ bool CSE_ValidityChecker::CheckSideEffects(SideEffectsOp::AccessType access, con IDSet non_local_ids; TypeSet aggrs; - // if ( access == SideEffectsOp::CONSTRUCTION ) printf("assessing construction\n"); - - if ( pfs.GetSideEffects(access, t.get(), non_local_ids, aggrs) ) { - // if ( access == SideEffectsOp::CONSTRUCTION ) printf("bailing directly on construction\n"); + if ( pfs.GetSideEffects(access, t.get(), non_local_ids, aggrs) ) return Invalid(); - } - - // if ( access == SideEffectsOp::CONSTRUCTION ) printf("construction has %ld/%ld non-locals/aggrs\n", - // non_local_ids.size(), aggrs.size()); return CheckSideEffects(non_local_ids, aggrs); } @@ -1090,10 +1068,8 @@ bool CSE_ValidityChecker::CheckSideEffects(const IDSet& non_local_ids, const Typ auto i_t = i->GetType(); for ( auto a : aggrs ) - if ( same_type(a, i_t.get()) ) { - // printf("invalid identifier %s\n", i->Name()); + if ( same_type(a, i_t.get()) ) return Invalid(); - } } return false; diff --git a/src/script_opt/Reduce.h b/src/script_opt/Reduce.h index 1618be30fd..b4f4cb4743 100644 --- a/src/script_opt/Reduce.h +++ b/src/script_opt/Reduce.h @@ -2,7 +2,6 @@ #pragma once -#include "zeek/Desc.h" // ### #include "zeek/Expr.h" #include "zeek/Scope.h" #include "zeek/Stmt.h" @@ -351,15 +350,28 @@ protected: // potentially aliases with one of the identifiers we're tracking. bool CheckAggrMod(const TypePtr& t); - // About elements ... - // ### + // Returns true if a record constructor/coercion of the given type has + // side effects and invalides the CSE opportunity. bool CheckRecordConstructor(const TypePtr& t); + + // The same for modifications to tables. bool CheckTableMod(const TypePtr& t); + + // The same for accessing (reading) tables. bool CheckTableRef(const TypePtr& t); + + // The same for the given function call. bool CheckCall(const CallExpr* c); + + // True if the given form of access to the given type has side effects. bool CheckSideEffects(SideEffectsOp::AccessType access, const TypePtr& t); + + // True if side effects to the given identifiers and aggregates invalidate + // the CSE opportunity. bool CheckSideEffects(const IDSet& non_local_ids, const TypeSet& aggrs); + // Helper function that marks the CSE opportunity as invalid and returns + // "true" (used by various methods to signal invalidation). bool Invalid() { is_valid = false; return true; @@ -394,9 +406,9 @@ protected: bool have_start_e = false; bool have_end_e = false; - // Whether analyzed expressions occur in the context of - // a statement that modifies an aggregate ("add" or "delete"). - // ### + // Whether analyzed expressions occur in the context of a statement + // that modifies an aggregate ("add" or "delete"), which changes the + // interpretation of the expressions. bool in_aggr_mod_stmt = false; }; diff --git a/src/script_opt/SideEffects.h b/src/script_opt/SideEffects.h deleted file mode 100644 index bf5cd28bfa..0000000000 --- a/src/script_opt/SideEffects.h +++ /dev/null @@ -1,44 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -// Analyses regarding operations where non-locals or aggregates can be modified -// indirectly, in support of ensuring that after such an operation, script -// optimization doesn't use a stale version of the non-local/aggregate. - -#pragma once - -#include "zeek/ID.h" - -namespace zeek::detail { - -// Describes an operation for which some forms of access can lead to state -// modifications. -class SideEffectsOp { -public: - enum AccessType { NONE, CALL, CONSTRUCTION, READ, WRITE }; - - // Type can be left off for CALL access. - SideEffectsOp(AccessType at = NONE, const Type* t = nullptr) : access(at), type(t) {} - - auto GetAccessType() const { return access; } - const Type* GetType() const { return type; } - - void SetUnknownChanges() { has_unknown_changes = true; } - bool HasUnknownChanges() const { return has_unknown_changes; } - - void AddModNonGlobal(IDSet ids) { mod_non_locals.insert(ids.begin(), ids.end()); } - void AddModAggrs(TypeSet types) { mod_aggrs.insert(types.begin(), types.end()); } - - const auto& ModNonLocals() const { return mod_non_locals; } - const auto& ModAggrs() const { return mod_aggrs; } - -private: - AccessType access; - const Type* type; // type for which some operations alter state - - IDSet mod_non_locals; - TypeSet mod_aggrs; - - bool has_unknown_changes = false; -}; - -} // namespace zeek::detail diff --git a/src/script_opt/ZAM/Expr.cc b/src/script_opt/ZAM/Expr.cc index bf6d005918..9e3b30caf4 100644 --- a/src/script_opt/ZAM/Expr.cc +++ b/src/script_opt/ZAM/Expr.cc @@ -4,7 +4,6 @@ #include "zeek/Desc.h" #include "zeek/Reporter.h" -#include "zeek/script_opt/ProfileFunc.h" #include "zeek/script_opt/ZAM/Compile.h" namespace zeek::detail { @@ -983,7 +982,10 @@ const ZAMStmt ZAMCompiler::DoCall(const CallExpr* c, const NameExpr* n) { if ( ! z.aux ) z.aux = new ZInstAux(0); - if ( ! indirect ) { + if ( indirect ) + z.aux->can_change_non_locals = true; + + else { IDSet non_local_ids; TypeSet aggrs; bool is_unknown = false; diff --git a/src/script_opt/ZAM/Stmt.cc b/src/script_opt/ZAM/Stmt.cc index c5540bdc6b..d38b99f95f 100644 --- a/src/script_opt/ZAM/Stmt.cc +++ b/src/script_opt/ZAM/Stmt.cc @@ -5,7 +5,6 @@ #include "zeek/IPAddr.h" #include "zeek/Reporter.h" #include "zeek/ZeekString.h" -#include "zeek/script_opt/ProfileFunc.h" #include "zeek/script_opt/ZAM/Compile.h" namespace zeek::detail { diff --git a/src/script_opt/ZAM/Vars.cc b/src/script_opt/ZAM/Vars.cc index bab4ee0761..3994c12f2d 100644 --- a/src/script_opt/ZAM/Vars.cc +++ b/src/script_opt/ZAM/Vars.cc @@ -4,7 +4,6 @@ #include "zeek/Desc.h" #include "zeek/Reporter.h" -#include "zeek/script_opt/ProfileFunc.h" #include "zeek/script_opt/Reduce.h" #include "zeek/script_opt/ZAM/Compile.h" diff --git a/src/script_opt/ZAM/maint/README b/src/script_opt/ZAM/maint/README index 0ebe927645..ee69f88149 100644 --- a/src/script_opt/ZAM/maint/README +++ b/src/script_opt/ZAM/maint/README @@ -2,8 +2,8 @@ This directory holds scripts and associated data to support maintenance of ZAM optimization: list-bifs.zeek - A Zeek script that prints to stdout a list of the BiFs available - for the Zeek invocation. + A Zeek script that prints to stdout a sorted list of the BiFs + available for the Zeek invocation. Use this to compare with BiFs.list to see whether there are any new BiFs (or old ones that have been removed). If so, update