From eb13ff3110376cb88237a115aa08e87aa5a6c5ac Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 30 Sep 2025 11:27:53 -0700 Subject: [PATCH 1/2] Reworked initialization of globals for -O gen-standalone-C++ code --- src/script_opt/CPP/Consts.h | 6 +++- src/script_opt/CPP/Driver.cc | 37 ++++++++++++--------- src/script_opt/CPP/Exprs.cc | 26 +++++++++++++-- src/script_opt/CPP/Exprs.h | 5 +++ src/script_opt/CPP/Inits.cc | 41 +++++++++--------------- src/script_opt/CPP/Inits.h | 4 +++ src/script_opt/CPP/InitsInfo.cc | 32 +++++++++++++++--- src/script_opt/CPP/InitsInfo.h | 10 ++++++ src/script_opt/CPP/RuntimeInitSupport.cc | 9 ++++++ src/script_opt/CPP/RuntimeInits.cc | 3 ++ src/script_opt/CPP/Stmts.cc | 12 ++++++- src/script_opt/CPP/Vars.cc | 14 ++++---- src/script_opt/CPP/Vars.h | 9 +++--- src/script_opt/IDOptInfo.cc | 1 + src/script_opt/IDOptInfo.h | 8 ++++- 15 files changed, 155 insertions(+), 62 deletions(-) diff --git a/src/script_opt/CPP/Consts.h b/src/script_opt/CPP/Consts.h index 4bac98d31e..14ff82d9f3 100644 --- a/src/script_opt/CPP/Consts.h +++ b/src/script_opt/CPP/Consts.h @@ -15,8 +15,12 @@ public: // Returns the associated initialization info. In addition, consts_offset // returns an offset into an initialization-time global that tracks all // constructed globals, providing general access to them for aggregate -// constants. +// constants. The second form is for when this isn't needed. std::shared_ptr RegisterConstant(const ValPtr& vp, int& consts_offset); +std::shared_ptr RegisterConstant(const ValPtr& vp) { + int consts_offset; // ignored + return RegisterConstant(vp, consts_offset); +} private: // Maps (non-native) constants to associated C++ globals. diff --git a/src/script_opt/CPP/Driver.cc b/src/script_opt/CPP/Driver.cc index 06037eeb42..d497d4ecb9 100644 --- a/src/script_opt/CPP/Driver.cc +++ b/src/script_opt/CPP/Driver.cc @@ -125,8 +125,16 @@ void CPPCompile::Compile(bool report_uncompilable) { NL(); + IDSet globals_to_initialize; for ( auto& g : all_accessed_globals ) - CreateGlobal(g); + if ( CreateGlobal(g) ) + globals_to_initialize.insert(g); + + for ( auto& g : globals_to_initialize ) { + auto gi = GenerateGlobalInit(g); + global_id_info->AddInstance(gi); + global_gis[g] = std::move(gi); + } for ( const auto& e : accessed_events ) if ( AddGlobal(e, "gl") ) @@ -407,11 +415,6 @@ void CPPCompile::RegisterCompiledBody(const string& f) { } void CPPCompile::GenEpilog() { - if ( standalone ) { - NL(); - InitializeGlobals(); - } - NL(); for ( const auto& ii : init_infos ) GenInitExpr(ii.second); @@ -533,6 +536,7 @@ void CPPCompile::GenFinishInit() { Emit("generate_indices_set(CPP__Indices__init, InitIndices);"); Emit("std::map> InitConsts;"); + Emit("Frame* f__CPP = nullptr;"); NL(); for ( const auto& ci : const_info ) { @@ -551,9 +555,17 @@ void CPPCompile::GenFinishInit() { max_cohort = std::max(max_cohort, gi->MaxCohort()); for ( auto c = 0; c <= max_cohort; ++c ) - for ( const auto& gi : all_global_info ) - if ( gi->CohortSize(c) > 0 ) - Emit("%s.InitializeCohort(&im, %s);", gi->InitializersName(), Fmt(c)); + for ( const auto& gi : all_global_info ) { + if ( gi->CohortSize(c) == 0 ) + continue; + + Emit("%s.InitializeCohort(&im, %s);", gi->InitializersName(), Fmt(c)); + vector init_ids; + gi->GetCohortIDs(c, init_ids); + + for ( auto& ii : init_ids ) + InitializeGlobal(ii); + } // Populate mappings for dynamic offsets. NL(); @@ -567,13 +579,6 @@ void CPPCompile::GenFinishInit() { Emit("load_BiFs__CPP();"); - if ( standalone ) - // Note, BiFs will also be loaded again later, because the - // main initialization finishes upon loading of the activation - // script, rather than after all scripts have been parsed - // and plugins (with BiFs) have been loaded. - Emit("init_globals__CPP();"); - EndBlock(); } diff --git a/src/script_opt/CPP/Exprs.cc b/src/script_opt/CPP/Exprs.cc index 9cacd2506b..98341772ee 100644 --- a/src/script_opt/CPP/Exprs.cc +++ b/src/script_opt/CPP/Exprs.cc @@ -176,8 +176,7 @@ string CPPCompile::GenConstExpr(const ConstExpr* c, GenType gt) { if ( ! IsNativeType(t) ) { auto v = c->ValuePtr(); - int consts_offset; // ignored - (void)RegisterConstant(v, consts_offset); + (void)RegisterConstant(v); return NativeToGT(const_vals[v.get()]->Name(), t, gt); } @@ -1296,4 +1295,27 @@ string CPPCompile::GenEnum(const TypePtr& t, const ValPtr& ev) { return string("enum_mapping[") + Fmt(mapping_slot) + "]"; } +int CPPCompile::ReadyExpr(const ExprPtr& e) { + auto pf = make_unique(e.get()); + int max_cohort = 0; + + for ( const auto& g : pf->AllGlobals() ) + max_cohort = max(max_cohort, GenerateGlobalInit(g)->FinalInitCohort() + 1); + for ( const auto& c : pf->Constants() ) + max_cohort = max(max_cohort, RegisterConstant(c->ValuePtr())->FinalInitCohort() + 1); + + for ( const auto& t : pf->OrderedTypes() ) { + TypePtr tp{NewRef{}, const_cast(t)}; + max_cohort = max(max_cohort, RegisterType(tp)->FinalInitCohort() + 1); + } + + for ( auto& [attrs, t] : pf->ConstructorAttrs() ) { + AttributesPtr ap{NewRef{}, const_cast(attrs)}; + max_cohort = max(max_cohort, RegisterAttributes(ap)->FinalInitCohort() + 1); + max_cohort = max(max_cohort, RegisterType(t)->FinalInitCohort() + 1); + } + + return max_cohort; +} + } // namespace zeek::detail diff --git a/src/script_opt/CPP/Exprs.h b/src/script_opt/CPP/Exprs.h index 422f00f9a7..cb3ca2ec9f 100644 --- a/src/script_opt/CPP/Exprs.h +++ b/src/script_opt/CPP/Exprs.h @@ -117,6 +117,11 @@ std::string GenIntVector(const std::vector& vec); std::string GenField(const ExprPtr& rec, int field); std::string GenEnum(const TypePtr& et, const ValPtr& ev); +// Creates all the initializations needed to evaluate the given expression. +// Returns the maximum cohort associated with these. +friend class GlobalInitInfo; +int ReadyExpr(const ExprPtr& e); + // For record that are extended via redef's, maps fields beyond the original // definition to locations in the global (in the compiled code) "field_mapping" // array. diff --git a/src/script_opt/CPP/Inits.cc b/src/script_opt/CPP/Inits.cc index e828986beb..c665b5263b 100644 --- a/src/script_opt/CPP/Inits.cc +++ b/src/script_opt/CPP/Inits.cc @@ -185,42 +185,35 @@ void CPPCompile::InitializeConsts() { EndBlock(true); } -void CPPCompile::InitializeGlobals() { - Emit("static void init_globals__CPP()"); - StartBlock(); +void CPPCompile::InitializeGlobal(const IDPtr& g) { + const auto& oi = g->GetOptInfo(); + if ( ! oi ) + return; - Emit("Frame* f__CPP = nullptr;"); - NL(); + const auto& exprs = oi->GetInitExprs(); + const auto& init_classes = oi->GetInitClasses(); - auto& ofiles = analysis_options.only_files; + ASSERT(exprs.size() == init_classes.size()); - for ( const auto& ginit : IDOptInfo::GetGlobalInitExprs() ) { - IDPtr g{NewRef{}, const_cast(ginit.Id())}; + auto init = exprs.begin(); + auto ic = init_classes.begin(); - if ( ! ofiles.empty() && obj_matches_opt_files(g) != AnalyzeDecision::SHOULD ) - continue; - - if ( ! accessed_globals.contains(g) ) - continue; - - auto ic = ginit.IC(); - auto& init = ginit.Init(); - - if ( ic == INIT_NONE ) - Emit(GenExpr(init, GEN_NATIVE, true) + ";"); + for ( ; init != exprs.end(); ++init, ++ic ) { + if ( *ic == INIT_NONE ) + Emit(GenExpr(*init, GEN_NATIVE, true) + ";"); else { // This branch occurs for += or -= initializations that // use associated functions. string ics; - if ( ic == INIT_EXTRA ) + if ( *ic == INIT_EXTRA ) ics = "INIT_EXTRA"; - else if ( ic == INIT_REMOVE ) + else if ( *ic == INIT_REMOVE ) ics = "INIT_REMOVE"; else - reporter->FatalError("bad initialization class in CPPCompile::InitializeGlobals()"); + reporter->FatalError("bad initialization class in CPPCompile::InitializeGlobal()"); - Emit("%s->SetValue(%s, %s);", globals[g->Name()], GenExpr(init, GEN_NATIVE, true), ics); + Emit("%s->SetValue(%s, %s);", globals[g->Name()], GenExpr(*init, GEN_NATIVE, true), ics); } const auto& attrs = g->GetAttrs(); @@ -230,8 +223,6 @@ void CPPCompile::InitializeGlobals() { Emit("%s->SetAttrs(%s);", globals[g->Name()], attrs_str); } } - - EndBlock(); } void CPPCompile::GenInitHook() { diff --git a/src/script_opt/CPP/Inits.h b/src/script_opt/CPP/Inits.h index cb5646924b..737784c060 100644 --- a/src/script_opt/CPP/Inits.h +++ b/src/script_opt/CPP/Inits.h @@ -93,6 +93,10 @@ void InitializeHashes(); // Generate code to initialize indirect references to constants. void InitializeConsts(); +// Generate code to initialize a global (using dynamic statements rather than +// constants). +void InitializeGlobal(const IDPtr& g); + // Generate code to initialize globals (using dynamic statements rather than // constants). void InitializeGlobals(); diff --git a/src/script_opt/CPP/InitsInfo.cc b/src/script_opt/CPP/InitsInfo.cc index 8b14045c40..8df4a60e8e 100644 --- a/src/script_opt/CPP/InitsInfo.cc +++ b/src/script_opt/CPP/InitsInfo.cc @@ -8,6 +8,7 @@ #include "zeek/script_opt/CPP/AttrExprType.h" #include "zeek/script_opt/CPP/Compile.h" #include "zeek/script_opt/CPP/RuntimeInits.h" +#include "zeek/script_opt/IdOptInfo.h" using namespace std; @@ -15,6 +16,15 @@ namespace zeek::detail { string CPP_InitsInfo::Name(int index) const { return base_name + "[" + Fmt(index) + "]"; } +void CPP_InitsInfo::GetCohortIDs(int c, std::vector& ids) const { + if ( c > MaxCohort() ) + return; + + for ( auto& co : instances[c] ) + if ( auto id = co->InitIdentifier() ) + ids.emplace_back(std::move(id)); +} + void CPP_InitsInfo::AddInstance(shared_ptr g) { auto final_init_cohort = g->FinalInitCohort(); @@ -375,16 +385,17 @@ void GlobalLookupInitInfo::InitializerVals(std::vector& ivs) const ivs.push_back(val); } -GlobalInitInfo::GlobalInitInfo(CPPCompile* c, IDPtr g, string _CPP_name) - : GlobalLookupInitInfo(c, g, std::move(_CPP_name)) { +GlobalInitInfo::GlobalInitInfo(CPPCompile* c, IDPtr _g, string _CPP_name) + : GlobalLookupInitInfo(c, _g, std::move(_CPP_name)) { + g = std::move(_g); auto& gt = g->GetType(); auto gi = c->RegisterType(gt); - init_cohort = max(init_cohort, gi->InitCohort() + 1); + init_cohort = max(init_cohort, gi->FinalInitCohort() + 1); type = gi->Offset(); gi = c->RegisterAttributes(g->GetAttrs()); if ( gi ) { - init_cohort = max(init_cohort, gi->InitCohort() + 1); + init_cohort = max(init_cohort, gi->FinalInitCohort() + 1); attrs = gi->Offset(); } else @@ -396,7 +407,18 @@ GlobalInitInfo::GlobalInitInfo(CPPCompile* c, IDPtr g, string _CPP_name) gc.is_enum_const = g->IsEnumConst(); gc.is_type = g->IsType(); - val = ValElem(c, nullptr); // empty because we initialize dynamically + // We don't initialize the global directly because its initialization + // might be an expression rather than a simple constant. Instead we + // make sure that it can be generated per the use of GetCohortIDs() + // in CPPCompile::GenFinishInit(). + val = ValElem(c, nullptr); + + // This code here parallels that of CPPCompile::InitializeGlobal(). + const auto& oi = g->GetOptInfo(); + for ( auto& init : oi->GetInitExprs() ) + // We use GetOp2() because initialization expressions are + // capture in the form of some sort of assignment. + init_cohort = max(init_cohort, c->ReadyExpr(init->GetOp2()) + 1); if ( gt->Tag() == TYPE_FUNC && (! g->GetVal() || g->GetVal()->AsFunc()->GetKind() == Func::BUILTIN_FUNC) ) // Be sure not to try to create BiFs. In addition, GetVal() can be diff --git a/src/script_opt/CPP/InitsInfo.h b/src/script_opt/CPP/InitsInfo.h index 997d357163..63fcd94cbd 100644 --- a/src/script_opt/CPP/InitsInfo.h +++ b/src/script_opt/CPP/InitsInfo.h @@ -126,6 +126,10 @@ public: // to the given cohort c. int CohortSize(int c) const { return c > MaxCohort() ? 0 : instances[c].size(); } + // Populates the given vector with associated identifiers seen + // in the cohort, if any. + void GetCohortIDs(int c, std::vector& ids) const; + // Returns the C++ type associated with this collection's run-time vector. // This might be, for example, "PatternVal" const std::string& CPPType() const { return CPP_type; } @@ -302,6 +306,9 @@ public: // constructor parameter. virtual void InitializerVals(std::vector& ivs) const = 0; + // Returns any associated identifier, or nil if none. + virtual IDPtr InitIdentifier() const { return nullptr; } + const Obj* InitObj() const { return o; } protected: @@ -517,7 +524,10 @@ public: std::string InitializerType() const override { return "CPP_GlobalInit"; } void InitializerVals(std::vector& ivs) const override; + IDPtr InitIdentifier() const override { return g; } + protected: + IDPtr g; int type; int attrs; std::string val; diff --git a/src/script_opt/CPP/RuntimeInitSupport.cc b/src/script_opt/CPP/RuntimeInitSupport.cc index bfb3dcdffd..572635c971 100644 --- a/src/script_opt/CPP/RuntimeInitSupport.cc +++ b/src/script_opt/CPP/RuntimeInitSupport.cc @@ -114,6 +114,12 @@ void activate_bodies__CPP(const char* fn, const char* module, bool exported, Typ fg->SetType(ft); } + if ( ! fg->GetType() ) + // This can happen both because we just installed the ID, but also + // because events registered by Spicy don't have types associated + // with them initially. + fg->SetType(ft); + if ( ! fg->GetAttr(ATTR_IS_USED) ) fg->AddAttr(make_intrusive(ATTR_IS_USED)); @@ -179,6 +185,9 @@ IDPtr lookup_global__CPP(const char* g, const TypePtr& t, const GlobalCharacteri gl->MakeType(); } + else if ( ! gl->GetType() ) + gl->SetType(t); + return gl; } diff --git a/src/script_opt/CPP/RuntimeInits.cc b/src/script_opt/CPP/RuntimeInits.cc index eba85bc9cf..b5edcc83d5 100644 --- a/src/script_opt/CPP/RuntimeInits.cc +++ b/src/script_opt/CPP/RuntimeInits.cc @@ -507,6 +507,9 @@ void CPP_GlobalInit::Generate(InitsManager* im, std::vector& /* inits_vec if ( attrs >= 0 ) global->SetAttrs(im->Attributes(attrs)); + + if ( t->Tag() == TYPE_FUNC ) + global->AddAttr(make_intrusive(ATTR_IS_USED)); } size_t generate_indices_set(int* inits, std::vector>& indices_set) { diff --git a/src/script_opt/CPP/Stmts.cc b/src/script_opt/CPP/Stmts.cc index 0d684a5b3e..9bcbfa212f 100644 --- a/src/script_opt/CPP/Stmts.cc +++ b/src/script_opt/CPP/Stmts.cc @@ -87,7 +87,17 @@ void CPPCompile::GenInitStmt(const InitStmt* init) { continue; } - Emit("%s = make_intrusive<%s>(cast_intrusive<%s>(%s));", IDName(aggr), type_name, type_type, type_ind); + auto aggr_name = IDName(aggr); + + Emit("%s = make_intrusive<%s>(cast_intrusive<%s>(%s));", aggr_name, type_name, type_type, type_ind); + + const auto& attrs = aggr->GetAttrs(); + if ( ! attrs ) + continue; + + auto attrs_offset = AttributesOffset(attrs); + auto attrs_str = "CPP__Attributes__[" + Fmt(attrs_offset) + "]"; + Emit("%s->SetAttrs(%s);", aggr_name, attrs_str); } } diff --git a/src/script_opt/CPP/Vars.cc b/src/script_opt/CPP/Vars.cc index bc00b96970..de49d9cfd2 100644 --- a/src/script_opt/CPP/Vars.cc +++ b/src/script_opt/CPP/Vars.cc @@ -7,7 +7,7 @@ namespace zeek::detail { using namespace std; -void CPPCompile::CreateGlobal(IDPtr g) { +bool CPPCompile::CreateGlobal(IDPtr g) { auto gn = string(g->Name()); bool is_bif = pfs->BiFGlobals().contains(g); @@ -16,15 +16,17 @@ void CPPCompile::CreateGlobal(IDPtr g) { // then we'll call it directly. if ( compilable_funcs.contains(gn) ) { AddGlobal(gn, "zf"); - return; + return false; } if ( is_bif ) { AddBiF(g, false); - return; + return false; } } + bool should_init = false; + if ( AddGlobal(gn, "gl") ) { // We'll be creating this global. Emit("IDPtr %s;", globals[gn]); @@ -32,9 +34,7 @@ void CPPCompile::CreateGlobal(IDPtr g) { // This is an event that's also used as a variable. Emit("EventHandlerPtr %s_ev;", globals[gn]); - auto gi = GenerateGlobalInit(g); - global_id_info->AddInstance(gi); - global_gis[g] = std::move(gi); + should_init = true; } if ( is_bif ) @@ -43,6 +43,8 @@ void CPPCompile::CreateGlobal(IDPtr g) { AddBiF(g, true); global_vars.emplace(g); + + return should_init; } std::shared_ptr CPPCompile::RegisterGlobal(IDPtr g) { diff --git a/src/script_opt/CPP/Vars.h b/src/script_opt/CPP/Vars.h index 37326a9734..53e2ead2b2 100644 --- a/src/script_opt/CPP/Vars.h +++ b/src/script_opt/CPP/Vars.h @@ -12,7 +12,10 @@ std::shared_ptr RegisterGlobal(IDPtr g); private: // Generate declarations associated with the given global, and, if it's used // as a variable (not just as a function being called), track it as such. -void CreateGlobal(IDPtr g); +// +// Returns true if it needs initialization (which we do separately to avoid +// tripping across dependencies between globals). +bool CreateGlobal(IDPtr g); // Low-level function for generating an initializer for a global. Takes // into account differences for standalone-compilation. @@ -50,10 +53,6 @@ std::string CaptureName(const IDPtr& l) const; // stripped or transformed, and guaranteed not to conflict with C++ keywords. std::string Canonicalize(const std::string& name) const; -// Returns the name of the global corresponding to an expression (which must -// be a EXPR_NAME). -std::string GlobalName(const ExprPtr& e) { return globals[e->AsNameExpr()->Id()->Name()]; } - // Globals that are used (appear in the profiles) of the bodies we're // compiling. Includes globals just used as functions to call. std::unordered_set all_accessed_globals; diff --git a/src/script_opt/IDOptInfo.cc b/src/script_opt/IDOptInfo.cc index 8290ec04d6..a5577fa64a 100644 --- a/src/script_opt/IDOptInfo.cc +++ b/src/script_opt/IDOptInfo.cc @@ -71,6 +71,7 @@ void IDOptInfo::AddInitExpr(ExprPtr init_expr, InitClass ic) { global_init_exprs.emplace_back(my_id, init_expr, ic); init_exprs.emplace_back(std::move(init_expr)); + init_classes.emplace_back(ic); } void IDOptInfo::SetDefinedAfter(const Stmt* s, const ExprPtr& e, const std::vector& conf_blocks, diff --git a/src/script_opt/IDOptInfo.h b/src/script_opt/IDOptInfo.h index 1d5ca43259..8d8b68d9fa 100644 --- a/src/script_opt/IDOptInfo.h +++ b/src/script_opt/IDOptInfo.h @@ -148,8 +148,9 @@ public: // be done with the ExprPtr form of ID::SetVal. void AddInitExpr(ExprPtr init_expr, InitClass ic = INIT_NONE); - // Returns the initialization expressions for this identifier. + // Returns the initialization expressions or classes for this identifier. const std::vector& GetInitExprs() const { return init_exprs; } + const std::vector& GetInitClasses() const { return init_classes; } // Returns a list of the initialization expressions seen for all // globals, ordered by when they were processed. @@ -253,6 +254,11 @@ private: // one of the earlier instances rather than the last one. std::vector init_exprs; + // A parallel array of the associated initialization classes. + // We keep the two separate rather than a std::pair because the + // most common use is to just loop over the expressions. + std::vector init_classes; + // Tracks initializations of globals in the order they're seen. static std::vector global_init_exprs; From 7278b7f40ec87dd0eeb77e199dbe8669ec159996 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 30 Sep 2025 12:03:27 -0700 Subject: [PATCH 2/2] fixup! Reworked initialization of globals for -O gen-standalone-C++ code --- src/script_opt/CPP/InitsInfo.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/script_opt/CPP/InitsInfo.cc b/src/script_opt/CPP/InitsInfo.cc index 8df4a60e8e..f1731e9d31 100644 --- a/src/script_opt/CPP/InitsInfo.cc +++ b/src/script_opt/CPP/InitsInfo.cc @@ -8,7 +8,7 @@ #include "zeek/script_opt/CPP/AttrExprType.h" #include "zeek/script_opt/CPP/Compile.h" #include "zeek/script_opt/CPP/RuntimeInits.h" -#include "zeek/script_opt/IdOptInfo.h" +#include "zeek/script_opt/IDOptInfo.h" using namespace std;