From b489cfc508efbd2e6df83fac304c74b1e28fe551 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 6 Nov 2023 07:03:17 -0800 Subject: [PATCH] greater ZAM optimization of inlined function calls --- src/Expr.h | 4 +- src/Stmt.cc | 2 +- src/script_opt/Expr.cc | 13 +++--- src/script_opt/GenIDDefs.cc | 36 ++++++++--------- src/script_opt/GenIDDefs.h | 15 +++---- src/script_opt/Inline.cc | 18 ++++++--- src/script_opt/Inline.h | 6 ++- src/script_opt/Reduce.cc | 80 ++++++++++++++++++++----------------- src/script_opt/Reduce.h | 30 +++++++++++--- src/script_opt/ScriptOpt.cc | 2 +- src/script_opt/Stmt.cc | 4 +- src/script_opt/TempVar.cc | 7 +++- src/script_opt/TempVar.h | 8 ++-- 13 files changed, 130 insertions(+), 95 deletions(-) diff --git a/src/Expr.h b/src/Expr.h index da1f2e13a3..3bec879dbb 100644 --- a/src/Expr.h +++ b/src/Expr.h @@ -1588,7 +1588,8 @@ private: class InlineExpr : public Expr { public: - InlineExpr(ListExprPtr arg_args, std::vector params, StmtPtr body, int frame_offset, TypePtr ret_type); + InlineExpr(ListExprPtr arg_args, std::vector params, std::vector param_is_modified, StmtPtr body, + int frame_offset, TypePtr ret_type); bool IsPure() const override; @@ -1610,6 +1611,7 @@ protected: void ExprDescribe(ODesc* d) const override; std::vector params; + std::vector param_is_modified; int frame_offset; ListExprPtr args; StmtPtr body; diff --git a/src/Stmt.cc b/src/Stmt.cc index 23a52b4bb5..a0747b6757 100644 --- a/src/Stmt.cc +++ b/src/Stmt.cc @@ -361,7 +361,7 @@ void do_print_stmt(const std::vector& vals) { } ExprStmt::ExprStmt(ExprPtr arg_e) : Stmt(STMT_EXPR), e(std::move(arg_e)) { - if ( e && e->Tag() != EXPR_CALL && e->IsPure() && e->GetType()->Tag() != TYPE_ERROR ) + if ( e && e->Tag() != EXPR_CALL && e->Tag() != EXPR_INLINE && e->IsPure() && e->GetType()->Tag() != TYPE_ERROR ) Warn("expression value ignored"); SetLocationInfo(e->GetLocationInfo()); diff --git a/src/script_opt/Expr.cc b/src/script_opt/Expr.cc index 14f3717ea6..146c21800b 100644 --- a/src/script_opt/Expr.cc +++ b/src/script_opt/Expr.cc @@ -2272,10 +2272,11 @@ ExprPtr CastExpr::Duplicate() { return SetSucc(new CastExpr(op->Duplicate(), typ ExprPtr IsExpr::Duplicate() { return SetSucc(new IsExpr(op->Duplicate(), t)); } -InlineExpr::InlineExpr(ListExprPtr arg_args, std::vector arg_params, StmtPtr arg_body, int _frame_offset, - TypePtr ret_type) +InlineExpr::InlineExpr(ListExprPtr arg_args, std::vector arg_params, std ::vector arg_param_is_modified, + StmtPtr arg_body, int _frame_offset, TypePtr ret_type) : Expr(EXPR_INLINE), args(std::move(arg_args)), body(std::move(arg_body)) { params = std::move(arg_params); + param_is_modified = std::move(arg_param_is_modified); frame_offset = _frame_offset; type = std::move(ret_type); } @@ -2316,7 +2317,7 @@ ValPtr InlineExpr::Eval(Frame* f) const { ExprPtr InlineExpr::Duplicate() { auto args_d = args->Duplicate()->AsListExprPtr(); auto body_d = body->Duplicate(); - return SetSucc(new InlineExpr(args_d, params, body_d, frame_offset, type)); + return SetSucc(new InlineExpr(args_d, params, param_is_modified, body_d, frame_offset, type)); } bool InlineExpr::IsReduced(Reducer* c) const { return NonReduced(this); } @@ -2334,11 +2335,7 @@ ExprPtr InlineExpr::Reduce(Reducer* c, StmtPtr& red_stmt) { loop_over_list(args_list, i) { StmtPtr arg_red_stmt; auto red_i = args_list[i]->Reduce(c, arg_red_stmt); - - auto param_i = c->GenInlineBlockName(params[i]); - auto assign = make_intrusive(param_i, red_i, false, nullptr, nullptr, false); - auto assign_stmt = make_intrusive(assign); - + auto assign_stmt = c->GenParam(params[i], red_i, param_is_modified[i]); red_stmt = MergeStmts(red_stmt, arg_red_stmt, assign_stmt); } diff --git a/src/script_opt/GenIDDefs.cc b/src/script_opt/GenIDDefs.cc index d268b18794..209176b3d7 100644 --- a/src/script_opt/GenIDDefs.cc +++ b/src/script_opt/GenIDDefs.cc @@ -20,9 +20,7 @@ GenIDDefs::GenIDDefs(std::shared_ptr _pf, const Func* f, ScopePtr s void GenIDDefs::TraverseFunction(const Func* f, ScopePtr scope, StmtPtr body) { func_flavor = f->Flavor(); - // Establish the outermost barrier and associated set of - // identifiers. - barrier_blocks.push_back(0); + // Establish the outermost set of identifiers. modified_IDs.emplace_back(); for ( const auto& g : pf->Globals() ) { @@ -63,9 +61,9 @@ TraversalCode GenIDDefs::PreStmt(const Stmt* s) { auto cr = s->AsCatchReturnStmt(); auto block = cr->Block(); - StartConfluenceBlock(s); + cr_active.push_back(confluence_blocks.size()); block->Traverse(this); - EndConfluenceBlock(); + cr_active.pop_back(); auto retvar = cr->RetVar(); if ( retvar ) @@ -381,9 +379,6 @@ void GenIDDefs::CheckVarUsage(const Expr* e, const ID* id) { } void GenIDDefs::StartConfluenceBlock(const Stmt* s) { - if ( s->Tag() == STMT_CATCH_RETURN ) - barrier_blocks.push_back(confluence_blocks.size()); - confluence_blocks.push_back(s); modified_IDs.emplace_back(); } @@ -393,11 +388,6 @@ void GenIDDefs::EndConfluenceBlock(bool no_orig) { id->GetOptInfo()->ConfluenceBlockEndsAfter(curr_stmt, no_orig); confluence_blocks.pop_back(); - - auto bb = barrier_blocks.back(); - if ( bb > 0 && confluence_blocks.size() == bb ) - barrier_blocks.pop_back(); - modified_IDs.pop_back(); } @@ -443,19 +433,29 @@ const Stmt* GenIDDefs::FindBreakTarget() { } void GenIDDefs::ReturnAt(const Stmt* s) { - for ( auto id : modified_IDs.back() ) - id->GetOptInfo()->ReturnAt(s); + // If we're right at a catch-return then we don't want to make the + // identifier as encountering a scope-ending "return" here. By avoiding + // that, we're able to do optimization across catch-return blocks. + if ( cr_active.empty() || cr_active.back() != confluence_blocks.size() ) + for ( auto id : modified_IDs.back() ) + id->GetOptInfo()->ReturnAt(s); } void GenIDDefs::TrackID(const ID* id, const ExprPtr& e) { auto oi = id->GetOptInfo(); - ASSERT(! barrier_blocks.empty()); - oi->DefinedAfter(curr_stmt, e, confluence_blocks, barrier_blocks.back()); + // The 4th argument here is hardwired to 0, meaning "assess across all + // confluence blocks". If we want definitions inside catch-return bodies + // to not propagate outside those bodies, we'd instead create new + // confluence blocks for catch-return statements, and use their identifier + // here to set the lowest limit for definitions. For now we leave + // DefinedAfter as capable of supporting that distinction in case we + // find need to revive it in the future. + oi->DefinedAfter(curr_stmt, e, confluence_blocks, 0); // Ensure we track this identifier across all relevant // confluence regions. - for ( auto i = barrier_blocks.back(); i < confluence_blocks.size(); ++i ) + for ( auto i = 0U; i < confluence_blocks.size(); ++i ) // Add one because modified_IDs includes outer non-confluence // block. modified_IDs[i + 1].insert(id); diff --git a/src/script_opt/GenIDDefs.h b/src/script_opt/GenIDDefs.h index ba68da3282..bc1c2785bd 100644 --- a/src/script_opt/GenIDDefs.h +++ b/src/script_opt/GenIDDefs.h @@ -91,15 +91,12 @@ private: // A stack of confluence blocks, with the innermost at the top/back. std::vector confluence_blocks; - // Index into confluence_blocks of "barrier" blocks that - // represent unavoidable confluence blocks (no branching - // out of them). These include the outermost block and - // any catch-return blocks. We track these because - // (1) there's no need for an IDOptInfo to track previously - // unseen confluence regions outer to those, and (2) they - // can get quite deep due when inlining, so there are savings - // to avoid having to track outer to them. - std::vector barrier_blocks; + // Stack of confluence blocks corresponding to activate catch-return + // statements. We used to stop identifier definitions at these + // boundaries, but given there's no confluence (i.e., the body of the + // catch-return *will* execute), we can do broader optimization if we + // don't treat them as their own (new) confluence blocks. + std::vector cr_active; // The following is parallel to confluence_blocks except // the front entry tracks identifiers at the outermost diff --git a/src/script_opt/Inline.cc b/src/script_opt/Inline.cc index 49270fb41c..e3f6e90c9b 100644 --- a/src/script_opt/Inline.cc +++ b/src/script_opt/Inline.cc @@ -117,7 +117,7 @@ void Inliner::Analyze() { if ( body->Tag() == STMT_CPP ) continue; - inline_ables.insert(func); + inline_ables[func] = f.Profile(); } for ( auto& f : funcs ) @@ -173,7 +173,8 @@ ExprPtr Inliner::CheckForInlining(CallExprPtr c) { auto func_vf = static_cast(function); - if ( inline_ables.count(func_vf) == 0 ) + auto ia = inline_ables.find(func_vf); + if ( ia == inline_ables.end() ) return c; if ( c->IsInWhen() ) { @@ -220,14 +221,18 @@ ExprPtr Inliner::CheckForInlining(CallExprPtr c) { // the function, *using the knowledge that the parameters are // declared first*. auto scope = func_vf->GetScope(); + auto& pf = ia->second; auto& vars = scope->OrderedVars(); int nparam = func_vf->GetType()->Params()->NumFields(); std::vector params; - params.reserve(nparam); + std::vector param_is_modified; - for ( int i = 0; i < nparam; ++i ) - params.emplace_back(vars[i]); + for ( int i = 0; i < nparam; ++i ) { + auto& vi = vars[i]; + params.emplace_back(vi); + param_is_modified.emplace_back((pf->Assignees().count(vi.get()) > 0)); + } // Recursively inline the body. This is safe to do because we've // ensured there are no recursive loops ... but we have to be @@ -251,7 +256,8 @@ ExprPtr Inliner::CheckForInlining(CallExprPtr c) { max_inlined_frame_size = hold_max_inlined_frame_size; auto t = c->GetType(); - auto ie = make_intrusive(c->ArgsPtr(), std::move(params), body_dup, curr_frame_size, t); + auto ie = make_intrusive(c->ArgsPtr(), std::move(params), std::move(param_is_modified), body_dup, + curr_frame_size, t); ie->SetOriginal(c); return ie; diff --git a/src/script_opt/Inline.h b/src/script_opt/Inline.h index 288f28c2ce..1f4594535d 100644 --- a/src/script_opt/Inline.h +++ b/src/script_opt/Inline.h @@ -13,6 +13,7 @@ namespace zeek::detail { class FuncInfo; +class ProfileFunc; class Inliner { public: @@ -43,8 +44,9 @@ protected: // the full set of scripts. std::vector& funcs; - // Functions that we've determined to be suitable for inlining. - std::unordered_set inline_ables; + // Functions that we've determined to be suitable for inlining, and + // their associated profiles. + std::unordered_map inline_ables; // Functions that we inlined. std::unordered_set did_inline; diff --git a/src/script_opt/Reduce.cc b/src/script_opt/Reduce.cc index 9ed76c31a7..4999bc6399 100644 --- a/src/script_opt/Reduce.cc +++ b/src/script_opt/Reduce.cc @@ -16,7 +16,7 @@ namespace zeek::detail { -Reducer::Reducer(const ScriptFunc* func) { +Reducer::Reducer(const ScriptFunc* func, std::shared_ptr _pf) : pf(std::move(_pf)) { auto& ft = func->GetType(); // Track the parameters so we don't remap them. @@ -121,6 +121,34 @@ bool Reducer::ID_IsReduced(const ID* id) const { return inline_block_level == 0 || tracked_ids.count(id) > 0 || id->IsGlobal() || IsTemporary(id); } +StmtPtr Reducer::GenParam(const IDPtr& id, ExprPtr rhs, bool is_modified) { + auto param = GenInlineBlockName(id); + auto rhs_id = rhs->Tag() == EXPR_NAME ? rhs->AsNameExpr()->IdPtr() : nullptr; + + if ( rhs_id && pf->Locals().count(rhs_id.get()) == 0 && ! rhs_id->IsConst() ) + // It's hard to guarantee the RHS won't change during + // the inline block's execution. + is_modified = true; + + if ( ! is_modified ) { + // Can use a temporary variable, which then supports + // optimization via alias propagation. + auto param_id = GenTemporary(id->GetType(), rhs, param->IdPtr()); + auto& tv = ids_to_temps[param_id.get()]; + + if ( rhs_id ) + tv->SetAlias(rhs_id); + else if ( rhs->Tag() == EXPR_CONST ) + tv->SetConst(rhs->AsConstExpr()); + + param_temps.insert(param_id.get()); + param = make_intrusive(param_id); + } + + auto assign = make_intrusive(param, rhs, false, nullptr, nullptr, false); + return make_intrusive(assign); +} + NameExprPtr Reducer::GenInlineBlockName(const IDPtr& id) { // We do this during reduction, not optimization, so no need // to associate with curr_stmt. @@ -469,23 +497,14 @@ bool Reducer::IsCSE(const AssignExpr* a, const NameExpr* lhs, const Expr* rhs) { auto rhs_id = rhs->AsNameExpr()->IdPtr(); auto rhs_tmp_var = FindTemporary(rhs_id.get()); - if ( rhs_tmp_var && rhs_tmp_var->Const() ) { // temporary can be replaced with constant - lhs_tmp->SetConst(rhs_tmp_var->Const()); + if ( rhs_tmp_var ) { + if ( rhs_tmp_var->Const() ) + // temporary can be replaced with constant + lhs_tmp->SetConst(rhs_tmp_var->Const()); + else + lhs_tmp->SetAlias(rhs_id); return true; } - - // Treat the LHS as either an alias for the RHS, - // or as a constant if the RHS is a constant in - // this context. - auto stmt_num = a->GetOptInfo()->stmt_num; - auto rhs_const = CheckForConst(rhs_id, stmt_num); - - if ( rhs_const ) - lhs_tmp->SetConst(rhs_const); - else - lhs_tmp->SetAlias(rhs_id); - - return true; } expr_temps.emplace_back(lhs_tmp); @@ -599,23 +618,7 @@ ExprPtr Reducer::UpdateExpr(ExprPtr e) { alias_tmp = FindTemporary(alias.get()); } - if ( alias->GetOptInfo()->IsTemp() ) { - // Temporaries always have only one definition, - // so no need to check for consistency. - auto new_usage = NewVarUsage(alias, e.get()); - return new_usage; - } - - auto e_stmt_1 = e->GetOptInfo()->stmt_num; - auto e_stmt_2 = tmp_var->RHS()->GetOptInfo()->stmt_num; - - auto def_1 = alias->GetOptInfo()->DefinitionBefore(e_stmt_1); - auto def_2 = tmp_var->Id()->GetOptInfo()->DefinitionBefore(e_stmt_2); - - if ( def_1 == def_2 && def_1 != NO_DEF ) - return NewVarUsage(alias, e.get()); - else - return e; + return NewVarUsage(alias, e.get()); } auto rhs = tmp_var->RHS(); @@ -690,15 +693,20 @@ StmtPtr Reducer::MergeStmts(const NameExpr* lhs, ExprPtr rhs, const StmtPtr& suc return merge_e_stmt; } -IDPtr Reducer::GenTemporary(const TypePtr& t, ExprPtr rhs) { +IDPtr Reducer::GenTemporary(TypePtr t, ExprPtr rhs, IDPtr id) { if ( Optimizing() ) reporter->InternalError("Generating a new temporary while optimizing"); if ( omitted_stmts.size() > 0 ) reporter->InternalError("Generating a new temporary while pruning statements"); - auto temp = std::make_shared(temps.size(), t, rhs); - IDPtr temp_id = install_ID(temp->Name(), "", false, false); + auto temp = std::make_shared(temps.size(), rhs); + + IDPtr temp_id; + if ( id ) + temp_id = id; + else + temp_id = install_ID(temp->Name(), "", false, false); temp->SetID(temp_id); temp_id->SetType(t); diff --git a/src/script_opt/Reduce.h b/src/script_opt/Reduce.h index de0a545bb9..8c254b2f8c 100644 --- a/src/script_opt/Reduce.h +++ b/src/script_opt/Reduce.h @@ -6,6 +6,7 @@ #include "zeek/Scope.h" #include "zeek/Stmt.h" #include "zeek/Traverse.h" +#include "zeek/script_opt/ProfileFunc.h" namespace zeek::detail { @@ -14,7 +15,7 @@ class TempVar; class Reducer { public: - Reducer(const ScriptFunc* func); + Reducer(const ScriptFunc* func, std::shared_ptr pf); StmtPtr Reduce(StmtPtr s); @@ -41,8 +42,18 @@ public: bool ID_IsReducedOrTopLevel(const IDPtr& id) { return ID_IsReducedOrTopLevel(id.get()); } bool ID_IsReducedOrTopLevel(const ID* id); - // This is called *prior* to pushing a new inline block, in - // order to generate the equivalent of function parameters. + // This is called *prior* to pushing a new inline block, in order + // to generate the equivalent of function parameters. "rhs" is + // the concrete argument to which the (inline version of the) + // identifier will be assigned, and "is_modified" is true if the + // parameter is assigned to in the body of the block. + // + // The return value is a statement that performs an assignment + // to initialize the parameter to the RHS. + StmtPtr GenParam(const IDPtr& id, ExprPtr rhs, bool is_modified); + + // Returns an expression for referring to an identifier in the + // context of an inline block. NameExprPtr GenInlineBlockName(const IDPtr& id); int NumNewLocals() const { return new_locals.size(); } @@ -69,6 +80,7 @@ public: bool IsNewLocal(const ID* id) const; bool IsTemporary(const ID* id) const { return FindTemporary(id) != nullptr; } + bool IsParamTemp(const ID* id) const { return param_temps.count(id) > 0; } bool IsConstantVar(const ID* id) const { return constant_vars.find(id) != constant_vars.end(); } @@ -114,7 +126,8 @@ public: // current assignment statement should be deleted). In // that case, has the side effect of associating an alias // for the LHS with the temporary variable that holds the - // equivalent RHS. + // equivalent RHS; or if the LHS is a local that has no other + // assignments, and the same for the RHS. // // Assumes reduction (including alias propagation) has // already been applied. @@ -190,7 +203,7 @@ protected: // into compound expressions. void CheckIDs(const Expr* e, std::vector& ids) const; - IDPtr GenTemporary(const TypePtr& t, ExprPtr rhs); + IDPtr GenTemporary(TypePtr t, ExprPtr rhs, IDPtr id = nullptr); std::shared_ptr FindTemporary(const ID* id) const; // Retrieve the identifier corresponding to the new local for @@ -211,6 +224,9 @@ protected: // the corresponding ConstExpr with the value. const ConstExpr* CheckForConst(const IDPtr& id, int stmt_num) const; + // Profile associated with the function. + std::shared_ptr pf; + // Tracks the temporary variables created during the reduction/ // optimization process. std::vector> temps; @@ -229,6 +245,10 @@ protected: // Local variables created during reduction/optimization. IDSet new_locals; + // Parameters that we're treating as temporaries to facilitate CSE + // across inlined functions. + IDSet param_temps; + // Mapping of original identifiers to new locals. Used to // rename local variables when inlining. std::unordered_map orig_to_new_locals; diff --git a/src/script_opt/ScriptOpt.cc b/src/script_opt/ScriptOpt.cc index db24694d04..f29b946d80 100644 --- a/src/script_opt/ScriptOpt.cc +++ b/src/script_opt/ScriptOpt.cc @@ -162,7 +162,7 @@ static void optimize_func(ScriptFunc* f, std::shared_ptr pf, ScopeP push_existing_scope(scope); - auto rc = std::make_shared(f); + auto rc = std::make_shared(f, pf); auto new_body = rc->Reduce(body); if ( reporter->Errors() > 0 ) { diff --git a/src/script_opt/Stmt.cc b/src/script_opt/Stmt.cc index 3edb9f183d..73f14649f9 100644 --- a/src/script_opt/Stmt.cc +++ b/src/script_opt/Stmt.cc @@ -740,8 +740,8 @@ bool StmtList::ReduceStmt(unsigned int& s_i, std::vector& f_stmts, Redu } } - if ( c->IsCSE(a, var, rhs.get()) ) { - // printf("discarding %s as unnecessary\n", obj_desc(a)); + if ( c->IsTemporary(var->Id()) && ! c->IsParamTemp(var->Id()) && c->IsCSE(a, var, rhs.get()) ) { + // printf("discarding %s as unnecessary\n", var->Id()->Name()); // Skip this now unnecessary statement. return true; } diff --git a/src/script_opt/TempVar.cc b/src/script_opt/TempVar.cc index a95f751fd1..e3007350cf 100644 --- a/src/script_opt/TempVar.cc +++ b/src/script_opt/TempVar.cc @@ -6,7 +6,7 @@ namespace zeek::detail { -TempVar::TempVar(size_t num, const TypePtr& t, ExprPtr _rhs) : type(t) { +TempVar::TempVar(size_t num, ExprPtr _rhs) { char buf[8192]; snprintf(buf, sizeof buf, "#%zu", num); name = buf; @@ -14,6 +14,11 @@ TempVar::TempVar(size_t num, const TypePtr& t, ExprPtr _rhs) : type(t) { } void TempVar::SetAlias(IDPtr _alias) { + if ( _alias == alias ) + // This can happen when treating function parameters as + // temporary variables. + return; + if ( alias ) reporter->InternalError("Re-aliasing a temporary"); diff --git a/src/script_opt/TempVar.h b/src/script_opt/TempVar.h index 6146ed90e0..c28959078f 100644 --- a/src/script_opt/TempVar.h +++ b/src/script_opt/TempVar.h @@ -2,8 +2,8 @@ #pragma once -// Class for managing temporary variables created during statement reduction -// for compilation. +// Reducer helper class for managing temporary variables created during +// statement reduction for compilation. #include @@ -15,10 +15,9 @@ namespace zeek::detail { class TempVar { public: - TempVar(size_t num, const TypePtr& t, ExprPtr rhs); + TempVar(size_t num, ExprPtr rhs); const char* Name() const { return name.data(); } - const zeek::Type* Type() const { return type.get(); } const Expr* RHS() const { return rhs.get(); } IDPtr Id() const { return id; } @@ -42,7 +41,6 @@ public: protected: std::string name; IDPtr id; - const TypePtr& type; ExprPtr rhs; bool active = true; IDPtr alias;