From 35e157a0abef10593e61f2e54d76341714ebeebc Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 29 Nov 2024 16:12:05 -0800 Subject: [PATCH] skip optimization of functions with AST nodes unknown to script optimization --- src/script_opt/CPP/Util.cc | 6 ++ src/script_opt/Inline.cc | 3 +- src/script_opt/ProfileFunc.cc | 8 +-- src/script_opt/ProfileFunc.h | 10 +-- src/script_opt/ScriptOpt.cc | 119 ++++++++++++++++++++++++++++++++++ src/script_opt/ScriptOpt.h | 5 ++ src/script_opt/ZAM/Support.cc | 6 ++ 7 files changed, 148 insertions(+), 9 deletions(-) diff --git a/src/script_opt/CPP/Util.cc b/src/script_opt/CPP/Util.cc index 15fc981ede..84e1846151 100644 --- a/src/script_opt/CPP/Util.cc +++ b/src/script_opt/CPP/Util.cc @@ -39,6 +39,12 @@ string scope_prefix(const string& scope) { return "zeek::detail::CPP_" + scope; string scope_prefix(int scope) { return scope_prefix(to_string(scope)); } bool is_CPP_compilable(const ProfileFunc* pf, const char** reason) { + if ( has_AST_node_unknown_to_script_opt(pf, false) ) { + if ( reason ) + *reason = "unknown AST node type"; + return false; + } + if ( analysis_options.allow_cond ) return true; diff --git a/src/script_opt/Inline.cc b/src/script_opt/Inline.cc index ad727dd908..16c88cbd81 100644 --- a/src/script_opt/Inline.cc +++ b/src/script_opt/Inline.cc @@ -10,6 +10,7 @@ #include "zeek/script_opt/ProfileFunc.h" #include "zeek/script_opt/ScriptOpt.h" #include "zeek/script_opt/StmtOptInfo.h" +#include "zeek/script_opt/ZAM/Support.h" namespace zeek::detail { @@ -160,7 +161,7 @@ void Inliner::Analyze() { if ( non_recursive_funcs.count(func) == 0 ) continue; - if ( body->Tag() == STMT_CPP ) + if ( ! is_ZAM_compilable(f.Profile()) ) continue; inline_ables[func] = f.Profile(); diff --git a/src/script_opt/ProfileFunc.cc b/src/script_opt/ProfileFunc.cc index 25d6a5e01d..c489921f46 100644 --- a/src/script_opt/ProfileFunc.cc +++ b/src/script_opt/ProfileFunc.cc @@ -101,7 +101,7 @@ ProfileFunc::ProfileFunc(const Expr* e, bool _abs_rec_fields) { } TraversalCode ProfileFunc::PreStmt(const Stmt* s) { - stmts.push_back(s); + stmts.push_back({NewRef{}, const_cast(s)}); switch ( s->Tag() ) { case STMT_INIT: @@ -185,7 +185,7 @@ TraversalCode ProfileFunc::PreStmt(const Stmt* s) { } TraversalCode ProfileFunc::PreExpr(const Expr* e) { - exprs.push_back(e); + exprs.push_back({NewRef{}, const_cast(e)}); TrackType(e->GetType()); @@ -867,11 +867,11 @@ void ProfileFuncs::ComputeProfileHash(std::shared_ptr pf) { h = merge_p_hashes(h, p_hash(ov[i]->Name())); h = merge_p_hashes(h, p_hash("stmts")); - for ( auto i : pf->Stmts() ) + for ( auto& i : pf->Stmts() ) h = merge_p_hashes(h, p_hash(i->Tag())); h = merge_p_hashes(h, p_hash("exprs")); - for ( auto i : pf->Exprs() ) + for ( auto& i : pf->Exprs() ) h = merge_p_hashes(h, p_hash(i->Tag())); h = merge_p_hashes(h, p_hash("ids")); diff --git a/src/script_opt/ProfileFunc.h b/src/script_opt/ProfileFunc.h index 9a2c53bc02..c8bcd71d9a 100644 --- a/src/script_opt/ProfileFunc.h +++ b/src/script_opt/ProfileFunc.h @@ -66,6 +66,8 @@ inline p_hash_type merge_p_hashes(p_hash_type h1, p_hash_type h2) { using AttrSet = std::unordered_set; using AttrVec = std::vector; +class ProfileFuncs; + // Class for profiling the components of a single function (or expression). class ProfileFunc : public TraversalCallback { public: @@ -101,8 +103,8 @@ public: const auto& TableRefs() const { return tbl_refs; } const auto& AggrMods() const { return aggr_mods; } const IDSet& Inits() const { return inits; } - const std::vector& Stmts() const { return stmts; } - const std::vector& Exprs() const { return exprs; } + const std::vector& Stmts() const { return stmts; } + const std::vector& Exprs() const { return exprs; } const std::vector& Lambdas() const { return lambdas; } const std::vector& Constants() const { return constants; } const IDSet& UnorderedIdentifiers() const { return ids; } @@ -213,11 +215,11 @@ protected: // Statements seen in the function. Does not include indirect // statements, such as those in lambda bodies. - std::vector stmts; + std::vector stmts; // Expressions seen in the function. Does not include indirect // expressions (such as those appearing in attributes of types). - std::vector exprs; + std::vector exprs; // Lambdas seen in the function. We don't profile lambda bodies, // but rather make them available for separate profiling if diff --git a/src/script_opt/ScriptOpt.cc b/src/script_opt/ScriptOpt.cc index 47effda307..de53cd41ca 100644 --- a/src/script_opt/ScriptOpt.cc +++ b/src/script_opt/ScriptOpt.cc @@ -657,4 +657,123 @@ void profile_script_execution() { void finish_script_execution() { profile_script_execution(); } +// For now, we have equivalent concerns between ZAM and compile-to-C++. +bool has_AST_node_unknown_to_script_opt(const ProfileFunc* prof, bool /* is_ZAM */) { + // Note that the following sets are not comprehensive across the + // standard tags, because some tags are only generated *by* script + // optimization + // clang-format off + static const std::set known_stmts = { + STMT_PRINT, + STMT_EVENT, + STMT_EXPR, + STMT_IF, + STMT_WHEN, + STMT_SWITCH, + STMT_FOR, + STMT_NEXT, + STMT_BREAK, + STMT_RETURN, + STMT_LIST, + // STMT_EVENT_BODY_LIST, + STMT_INIT, + STMT_FALLTHROUGH, + STMT_WHILE, + // STMT_CATCH_RETURN, + // STMT_CHECK_ANY_LEN, + // STMT_CPP, + // STMT_ZAM, + STMT_NULL, + STMT_ASSERT, + // STMT_EXTERN, + // STMT_STD_FUNCTION, + }; + // clang-format on + + for ( auto& s : prof->Stmts() ) + if ( known_stmts.count(s->Tag()) == 0 ) + return true; + + // clang-format off + static const std::set known_exprs = { + // EXPR_ANY, + EXPR_NAME, + EXPR_CONST, + EXPR_CLONE, + EXPR_INCR, + EXPR_DECR, + EXPR_NOT, + EXPR_COMPLEMENT, + EXPR_POSITIVE, + EXPR_NEGATE, + EXPR_ADD, EXPR_SUB, + EXPR_AGGR_ADD, + EXPR_AGGR_DEL, + EXPR_ADD_TO, + EXPR_REMOVE_FROM, + EXPR_TIMES, + EXPR_DIVIDE, + EXPR_MASK, + EXPR_MOD, + EXPR_AND, + EXPR_OR, + EXPR_XOR, + EXPR_LSHIFT, + EXPR_RSHIFT, + EXPR_AND_AND, + EXPR_OR_OR, + EXPR_LT, + EXPR_LE, + EXPR_EQ, + EXPR_NE, + EXPR_GE, + EXPR_GT, + EXPR_COND, + EXPR_REF, + EXPR_ASSIGN, + EXPR_INDEX, + EXPR_FIELD, + EXPR_HAS_FIELD, + EXPR_RECORD_CONSTRUCTOR, + EXPR_TABLE_CONSTRUCTOR, + EXPR_SET_CONSTRUCTOR, + EXPR_VECTOR_CONSTRUCTOR, + EXPR_FIELD_ASSIGN, + EXPR_IN, + EXPR_LIST, + EXPR_CALL, + EXPR_LAMBDA, + EXPR_EVENT, + EXPR_SCHEDULE, + EXPR_ARITH_COERCE, + EXPR_RECORD_COERCE, + EXPR_TABLE_COERCE, + EXPR_VECTOR_COERCE, + EXPR_TO_ANY_COERCE, + EXPR_FROM_ANY_COERCE, + EXPR_SIZE, + EXPR_CAST, + EXPR_IS, + // EXPR_INDEX_SLICE_ASSIGN, + EXPR_INLINE, + // EXPR_APPEND_TO, + // EXPR_INDEX_ASSIGN, + // EXPR_FIELD_LHS_ASSIGN, + // EXPR_REC_ASSIGN_FIELDS, + // EXPR_REC_ADD_FIELDS, + // EXPR_REC_CONSTRUCT_WITH_REC, + // EXPR_FROM_ANY_VEC_COERCE, + // EXPR_ANY_INDEX, + // EXPR_SCRIPT_OPT_BUILTIN, + // EXPR_NOP, + }; + // clang-format on + + for ( auto& e : prof->Exprs() ) + if ( known_exprs.count(e->Tag()) == 0 ) + return true; + + return false; +} + } // namespace zeek::detail diff --git a/src/script_opt/ScriptOpt.h b/src/script_opt/ScriptOpt.h index 441c8d6ec2..4ae68870cc 100644 --- a/src/script_opt/ScriptOpt.h +++ b/src/script_opt/ScriptOpt.h @@ -263,6 +263,11 @@ extern void clear_script_analysis(); // Called when Zeek is terminating. extern void finish_script_execution(); +// Returns true if the given profile indicates the presence of an AST +// node not known to script optimization. The second argument specifies +// whether we're doing ZAM optimization; if not, compile-to-C++ is assumed. +extern bool has_AST_node_unknown_to_script_opt(const ProfileFunc* prof, bool /* is_ZAM */); + // Returns true if the given call has a specialized ZAM equivalent when // used in a conditional. extern bool IsZAM_BuiltInCond(const CallExpr* c); diff --git a/src/script_opt/ZAM/Support.cc b/src/script_opt/ZAM/Support.cc index cf8c31b301..849d0a26cc 100644 --- a/src/script_opt/ZAM/Support.cc +++ b/src/script_opt/ZAM/Support.cc @@ -117,6 +117,12 @@ bool file_mgr_set_reassembly_buffer(StringVal* file_id, uint64_t max) { bool ZAM_error = false; bool is_ZAM_compilable(const ProfileFunc* pf, const char** reason) { + if ( has_AST_node_unknown_to_script_opt(pf, true) ) { + if ( reason ) + *reason = "unknown AST node type"; + return false; + } + auto b = pf->ProfiledBody(); auto is_hook = pf->ProfiledFunc()->Flavor() == FUNC_FLAVOR_HOOK; if ( b && ! script_is_valid(b, is_hook) ) {