From 134f8f2ef528bafe9fa01574b153e7b00674507a Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 20 Nov 2022 12:16:25 -0800 Subject: [PATCH] script optimization fixes: new initialization model for standalone C++ scripts type coercion fix ZAM fix for compiling using C++ optimizer disambiguate empty constructors --- src/script_opt/CPP/Attrs.cc | 10 ++++- src/script_opt/CPP/CPP-load.bif | 10 +---- src/script_opt/CPP/Driver.cc | 17 +++++--- src/script_opt/CPP/Exprs.cc | 8 +++- src/script_opt/CPP/Func.cc | 2 +- src/script_opt/CPP/Func.h | 5 +-- src/script_opt/CPP/Inits.cc | 4 +- src/script_opt/CPP/RuntimeInitSupport.cc | 53 +++++++++--------------- src/script_opt/CPP/RuntimeInitSupport.h | 10 ++--- src/script_opt/CPP/Stmts.cc | 4 ++ src/script_opt/CPP/Tracker.cc | 13 ------ src/script_opt/CPP/Tracker.h | 8 +--- src/script_opt/ScriptOpt.cc | 18 +++----- src/script_opt/ZAM/Low-Level.cc | 14 +++++-- 14 files changed, 80 insertions(+), 96 deletions(-) diff --git a/src/script_opt/CPP/Attrs.cc b/src/script_opt/CPP/Attrs.cc index c4ba3345b6..a34a481a35 100644 --- a/src/script_opt/CPP/Attrs.cc +++ b/src/script_opt/CPP/Attrs.cc @@ -49,7 +49,15 @@ shared_ptr CPPCompile::RegisterAttr(const AttrPtr& attr) const auto& e = a->GetExpr(); if ( e && ! IsSimpleInitExpr(e) ) - init_exprs.AddKey(e, p_hash(e)); + { + auto h = p_hash(e); + + // Include the type in the hash, otherwise expressions + // like "vector()" are ambiguous. + h = merge_p_hashes(h, p_hash(e->GetType())); + + init_exprs.AddKey(e, h); + } auto gi = make_shared(this, attr); attr_info->AddInstance(gi); diff --git a/src/script_opt/CPP/CPP-load.bif b/src/script_opt/CPP/CPP-load.bif index 87528ce658..8376ae06a4 100644 --- a/src/script_opt/CPP/CPP-load.bif +++ b/src/script_opt/CPP/CPP-load.bif @@ -29,15 +29,7 @@ function load_CPP%(h: count%): bool return zeek::val_mgr->False(); } - // Ensure that any compiled scripts are used. If instead - // the AST is used, then when we activate the standalone - // scripts, they won't be able to avoid installing redundant - // event handlers. - detail::analysis_options.use_CPP = true; - - // Mark this script as one we should activate after loading - // compiled scripts. - detail::standalone_activations.push_back(cb->second); + (*cb->second)(); return zeek::val_mgr->True(); %} diff --git a/src/script_opt/CPP/Driver.cc b/src/script_opt/CPP/Driver.cc index f64c192367..3c57192f4e 100644 --- a/src/script_opt/CPP/Driver.cc +++ b/src/script_opt/CPP/Driver.cc @@ -494,13 +494,18 @@ void CPPCompile::GenFinishInit() Emit("\tfield_mapping.push_back(fm.ComputeOffset(&im));"); NL(); - Emit("load_BiFs__CPP();"); if ( standalone ) - { - NL(); + // BiFs need to be loaded later, because the main + // initialization finishes upon loading of the activation + // script, rather than after all scripts have been parsed + // and BiFs have been loaded. Emit("init_globals__CPP();"); - } + else + // For non-standalone, we're only initializing after all + // scripts have been parsed and BiFs loaded, so it's fine + // to go ahead and do so now. + Emit("load_BiFs__CPP();"); EndBlock(); } @@ -513,7 +518,9 @@ void CPPCompile::GenRegisterBodies() Emit("for ( auto& b : CPP__bodies_to_register )"); StartBlock(); Emit("auto f = make_intrusive(b.func_name.c_str(), b.func, b.type_signature);"); - Emit("register_body__CPP(f, b.priority, b.h, b.events, finish_init__CPP);"); + + auto reg = standalone ? "register_standalone_body" : "register_body"; + Emit("%s__CPP(f, b.priority, b.h, b.events, finish_init__CPP);", reg); EndBlock(); EndBlock(); diff --git a/src/script_opt/CPP/Exprs.cc b/src/script_opt/CPP/Exprs.cc index c8123a8a7a..8ce26668e9 100644 --- a/src/script_opt/CPP/Exprs.cc +++ b/src/script_opt/CPP/Exprs.cc @@ -869,7 +869,13 @@ string CPPCompile::GenUnary(const Expr* e, GenType gt, const char* op, const cha if ( e->GetType()->Tag() == TYPE_VECTOR ) return GenVectorOp(e, GenExpr(e->GetOp1(), GEN_NATIVE), vec_op); - return NativeToGT(string(op) + "(" + GenExpr(e->GetOp1(), GEN_NATIVE) + ")", e->GetType(), gt); + // Look for coercions that the interpreter does implicitly. + auto op1 = e->GetOp1(); + if ( op1->GetType()->Tag() == TYPE_COUNT && + (e->Tag() == EXPR_POSITIVE || e->Tag() == EXPR_NEGATE) ) + op1 = make_intrusive(op1, TYPE_INT); + + return NativeToGT(string(op) + "(" + GenExpr(op1, GEN_NATIVE) + ")", e->GetType(), gt); } string CPPCompile::GenBinary(const Expr* e, GenType gt, const char* op, const char* vec_op) diff --git a/src/script_opt/CPP/Func.cc b/src/script_opt/CPP/Func.cc index cd173c7332..708f9f0880 100644 --- a/src/script_opt/CPP/Func.cc +++ b/src/script_opt/CPP/Func.cc @@ -15,7 +15,7 @@ using namespace std; unordered_map compiled_scripts; unordered_map> added_bodies; unordered_map standalone_callbacks; -vector standalone_activations; +vector standalone_finalizations; void CPPFunc::Describe(ODesc* d) const { diff --git a/src/script_opt/CPP/Func.h b/src/script_opt/CPP/Func.h index ece8380815..b1077d75ea 100644 --- a/src/script_opt/CPP/Func.h +++ b/src/script_opt/CPP/Func.h @@ -119,9 +119,8 @@ extern std::unordered_map> added_bo // Maps hashes to standalone script initialization callbacks. extern std::unordered_map standalone_callbacks; -// Standalone callbacks marked for activation by calls to the -// load_CPP() BiF. -extern std::vector standalone_activations; +// Callbacks to finalize initialization of standalone compiled scripts. +extern std::vector standalone_finalizations; } // namespace detail diff --git a/src/script_opt/CPP/Inits.cc b/src/script_opt/CPP/Inits.cc index d77bd2f5e8..79228db09f 100644 --- a/src/script_opt/CPP/Inits.cc +++ b/src/script_opt/CPP/Inits.cc @@ -331,8 +331,8 @@ void CPPCompile::GenStandaloneActivation() Emit("void standalone_init__CPP()"); StartBlock(); Emit("init__CPP();"); - Emit("CPP_activation_funcs.push_back(standalone_activation__CPP);"); - Emit("CPP_activation_hook = activate__CPPs;"); + Emit("standalone_activation__CPP();"); + Emit("standalone_finalizations.push_back(load_BiFs__CPP);"); EndBlock(); } diff --git a/src/script_opt/CPP/RuntimeInitSupport.cc b/src/script_opt/CPP/RuntimeInitSupport.cc index 4758659faf..3f74c24f1e 100644 --- a/src/script_opt/CPP/RuntimeInitSupport.cc +++ b/src/script_opt/CPP/RuntimeInitSupport.cc @@ -11,7 +11,6 @@ namespace zeek::detail using namespace std; vector CPP_init_funcs; -vector CPP_activation_funcs; // Calls all of the initialization hooks, in the order they were added. void init_CPPs() @@ -25,18 +24,6 @@ void init_CPPs() need_init = false; } -// Calls all of the registered activation hooks for standalone code. -void activate__CPPs() - { - static bool need_init = true; - - if ( need_init ) - for ( auto f : CPP_activation_funcs ) - f(); - - need_init = false; - } - // This is a trick used to register the presence of compiled code. // The initialization of the static variable will make CPP_init_hook // non-null, which the main part of Zeek uses to tell that there's @@ -68,6 +55,16 @@ void register_body__CPP(CPPStmtPtr body, int priority, p_hash_type hash, vector< compiled_scripts[hash] = {move(body), priority, move(events), finish_init}; } +static unordered_map compiled_standalone_scripts; + +void register_standalone_body__CPP(CPPStmtPtr body, int priority, p_hash_type hash, + vector events, void (*finish_init)()) + { + // For standalone scripts we don't actually need finish_init, but + // we keep it for symmetry with compiled_scripts. + compiled_standalone_scripts[hash] = {move(body), priority, move(events), finish_init}; + } + void register_lambda__CPP(CPPStmtPtr body, p_hash_type hash, const char* name, TypePtr t, bool has_captures) { @@ -111,6 +108,13 @@ void activate_bodies__CPP(const char* fn, const char* module, bool exported, Typ fg->SetType(ft); } + if ( ! fg->GetAttr(ATTR_IS_USED) ) + { + vector used_attr; + used_attr.emplace_back(make_intrusive(ATTR_IS_USED)); + fg->AddAttrs(make_intrusive(used_attr, nullptr, false, true)); + } + auto v = fg->GetVal(); if ( ! v ) { // Create it. @@ -123,19 +127,6 @@ void activate_bodies__CPP(const char* fn, const char* module, bool exported, Typ } auto f = v->AsFunc(); - const auto& bodies = f->GetBodies(); - - // Track hashes of compiled bodies already associated with f. - unordered_set existing_CPP_bodies; - for ( auto& b : bodies ) - { - auto s = b.stmts; - if ( s->Tag() != STMT_CPP ) - continue; - - const auto& cpp_s = cast_intrusive(s); - existing_CPP_bodies.insert(cpp_s->GetHash()); - } // Events we need to register. unordered_set events; @@ -148,15 +139,9 @@ void activate_bodies__CPP(const char* fn, const char* module, bool exported, Typ for ( auto h : hashes ) { - if ( existing_CPP_bodies.count(h) > 0 ) - // We're presumably running with the original script, - // and have already incorporated this compiled body - // into f. - continue; - // Add in the new body. - auto csi = compiled_scripts.find(h); - ASSERT(csi != compiled_scripts.end()); + auto csi = compiled_standalone_scripts.find(h); + ASSERT(csi != compiled_standalone_scripts.end()); auto cs = csi->second; f->AddBody(cs.body, no_inits, num_params, cs.priority); diff --git a/src/script_opt/CPP/RuntimeInitSupport.h b/src/script_opt/CPP/RuntimeInitSupport.h index e7998716e4..06e5132edd 100644 --- a/src/script_opt/CPP/RuntimeInitSupport.h +++ b/src/script_opt/CPP/RuntimeInitSupport.h @@ -23,12 +23,6 @@ using CPP_init_func = void (*)(); // Tracks the initialization hooks for different compilation runs. extern std::vector CPP_init_funcs; -// Tracks the activation hooks for different "standalone" compilations. -extern std::vector CPP_activation_funcs; - -// Activates all previously registered standalone code. -extern void activate__CPPs(); - // Registers the given global type, if not already present. extern void register_type__CPP(TypePtr t, const std::string& name); @@ -39,6 +33,10 @@ extern void register_type__CPP(TypePtr t, const std::string& name); extern void register_body__CPP(CPPStmtPtr body, int priority, p_hash_type hash, std::vector events, void (*finish_init)()); +// Same but for standalone function bodies. +extern void register_standalone_body__CPP(CPPStmtPtr body, int priority, p_hash_type hash, + std::vector events, void (*finish_init)()); + // Registers a lambda body as associated with the given hash. Includes // the name of the lambda (so it can be made available as a quasi-global // identifier), its type, and whether it needs captures. diff --git a/src/script_opt/CPP/Stmts.cc b/src/script_opt/CPP/Stmts.cc index 98ccbd2b0c..6200d12ab0 100644 --- a/src/script_opt/CPP/Stmts.cc +++ b/src/script_opt/CPP/Stmts.cc @@ -11,6 +11,10 @@ using namespace std; void CPPCompile::GenStmt(const Stmt* s) { + auto loc = s->GetLocationInfo(); + if ( loc != &detail::no_location && s->Tag() != STMT_LIST ) + Emit("// %s:%s", loc->filename, to_string(loc->first_line)); + switch ( s->Tag() ) { case STMT_INIT: diff --git a/src/script_opt/CPP/Tracker.cc b/src/script_opt/CPP/Tracker.cc index de4a2bfdf4..23b3fb240d 100644 --- a/src/script_opt/CPP/Tracker.cc +++ b/src/script_opt/CPP/Tracker.cc @@ -16,9 +16,6 @@ template void CPPTracker::AddKey(IntrusivePtr key, p_hash_type h if ( HasKey(key) ) return; - if ( h == 0 ) - h = Hash(key); - if ( map2.count(h) == 0 ) { auto index = keys.size(); @@ -57,16 +54,6 @@ template string CPPTracker::KeyName(const T* key) return full_name; } -template p_hash_type CPPTracker::Hash(IntrusivePtr key) const - { - ODesc d; - d.SetDeterminism(true); - key->Describe(&d); - string desc = d.Description(); - auto h = hash{}(base_name + desc); - return p_hash_type(h); - } - // Instantiate the templates we'll need. template class CPPTracker; template class CPPTracker; diff --git a/src/script_opt/CPP/Tracker.h b/src/script_opt/CPP/Tracker.h index d234ba5a73..1aaf72429a 100644 --- a/src/script_opt/CPP/Tracker.h +++ b/src/script_opt/CPP/Tracker.h @@ -35,9 +35,8 @@ public: bool HasKey(const T* key) const { return map.count(key) > 0; } bool HasKey(IntrusivePtr key) const { return HasKey(key.get()); } - // Only adds the key if it's not already present. If a hash - // is provided, then refrains from computing it. - void AddKey(IntrusivePtr key, p_hash_type h = 0); + // Only adds the key if it's not already present. + void AddKey(IntrusivePtr key, p_hash_type h); void AddInitInfo(const T* rep, std::shared_ptr gi) { gi_s[rep] = std::move(gi); } @@ -58,9 +57,6 @@ public: const T* GetRep(IntrusivePtr key) { return GetRep(key.get()); } private: - // Compute a hash for the given key. - p_hash_type Hash(IntrusivePtr key) const; - // Maps keys to internal representations (i.e., hashes). std::unordered_map map; diff --git a/src/script_opt/ScriptOpt.cc b/src/script_opt/ScriptOpt.cc index fc4096e4ea..d877535b63 100644 --- a/src/script_opt/ScriptOpt.cc +++ b/src/script_opt/ScriptOpt.cc @@ -418,13 +418,8 @@ static void use_CPP() } } - if ( num_used == 0 && standalone_activations.empty() ) + if ( num_used == 0 ) reporter->FatalError("no C++ functions found to use"); - - // Now that we've loaded all of the compiled scripts - // relevant for the AST, activate standalone ones. - for ( auto cb : standalone_activations ) - (*cb)(); } static void generate_CPP(std::unique_ptr& pfs) @@ -528,13 +523,12 @@ static void analyze_scripts_for_ZAM(std::unique_ptr& pfs) void analyze_scripts(bool no_unused_warnings) { - static bool did_init = false; + init_options(); - if ( ! did_init ) - { - init_options(); - did_init = true; - } + // Any standalone compiled scripts have already been instantiated + // at this point, but may require post-loading-of-scripts finalization. + for ( auto cb : standalone_finalizations ) + (*cb)(); std::unique_ptr ua; if ( ! no_unused_warnings ) diff --git a/src/script_opt/ZAM/Low-Level.cc b/src/script_opt/ZAM/Low-Level.cc index 36e080d83b..6aefbea1f0 100644 --- a/src/script_opt/ZAM/Low-Level.cc +++ b/src/script_opt/ZAM/Low-Level.cc @@ -72,11 +72,16 @@ int ZAMCompiler::InternalAddVal(ZInstAux* zi, int i, Expr* e) auto& indices = e->GetOp1()->AsListExpr()->Exprs(); auto val = e->GetOp2(); int width = indices.length(); + int num_vals; for ( int j = 0; j < width; ++j ) - ASSERT(InternalAddVal(zi, i + j, indices[j]) == 1); + { + num_vals = InternalAddVal(zi, i + j, indices[j]); + ASSERT(num_vals == 1); + } - ASSERT(InternalAddVal(zi, i + width, val.get()) == 1); + num_vals = InternalAddVal(zi, i + width, val.get()); + ASSERT(num_vals == 1); return width + 1; } @@ -87,7 +92,10 @@ int ZAMCompiler::InternalAddVal(ZInstAux* zi, int i, Expr* e) int width = indices.length(); for ( int j = 0; j < width; ++j ) - ASSERT(InternalAddVal(zi, i + j, indices[j]) == 1); + { + int num_vals = InternalAddVal(zi, i + j, indices[j]); + ASSERT(num_vals == 1); + } return width; }