From 36797a600ec1122be3302290112f4dc4a2463c9b Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 26 Sep 2025 16:46:21 -0700 Subject: [PATCH] fixes for globals in initialization expressions - still has vestigial late-build of globals --- src/script_opt/CPP/Consts.h | 6 ++++- src/script_opt/CPP/Driver.cc | 19 +++++++++++----- src/script_opt/CPP/Exprs.cc | 26 +++++++++++++++++++-- src/script_opt/CPP/Exprs.h | 5 +++++ src/script_opt/CPP/Inits.cc | 40 +++++++++++++++++++++++++++++++++ src/script_opt/CPP/Inits.h | 4 ++++ src/script_opt/CPP/InitsInfo.cc | 32 +++++++++++++++++++++----- src/script_opt/CPP/InitsInfo.h | 10 +++++++++ src/script_opt/IDOptInfo.cc | 1 + src/script_opt/IDOptInfo.h | 8 ++++++- 10 files changed, 137 insertions(+), 14 deletions(-) diff --git a/src/script_opt/CPP/Consts.h b/src/script_opt/CPP/Consts.h index 4bac98d31e..14ff82d9f3 100644 --- a/src/script_opt/CPP/Consts.h +++ b/src/script_opt/CPP/Consts.h @@ -15,8 +15,12 @@ public: // Returns the associated initialization info. In addition, consts_offset // returns an offset into an initialization-time global that tracks all // constructed globals, providing general access to them for aggregate -// constants. +// constants. The second form is for when this isn't needed. std::shared_ptr RegisterConstant(const ValPtr& vp, int& consts_offset); +std::shared_ptr RegisterConstant(const ValPtr& vp) { + int consts_offset; // ignored + return RegisterConstant(vp, consts_offset); +} private: // Maps (non-native) constants to associated C++ globals. diff --git a/src/script_opt/CPP/Driver.cc b/src/script_opt/CPP/Driver.cc index 215d74f061..241279a402 100644 --- a/src/script_opt/CPP/Driver.cc +++ b/src/script_opt/CPP/Driver.cc @@ -413,7 +413,7 @@ void CPPCompile::RegisterCompiledBody(const string& f) { void CPPCompile::GenEpilog() { if ( standalone ) { NL(); - InitializeGlobals(); + // InitializeGlobals(); } NL(); @@ -537,6 +537,7 @@ void CPPCompile::GenFinishInit() { Emit("generate_indices_set(CPP__Indices__init, InitIndices);"); Emit("std::map> InitConsts;"); + Emit("Frame* f__CPP = nullptr;"); NL(); for ( const auto& ci : const_info ) { @@ -555,9 +556,17 @@ void CPPCompile::GenFinishInit() { max_cohort = std::max(max_cohort, gi->MaxCohort()); for ( auto c = 0; c <= max_cohort; ++c ) - for ( const auto& gi : all_global_info ) - if ( gi->CohortSize(c) > 0 ) - Emit("%s.InitializeCohort(&im, %s);", gi->InitializersName(), Fmt(c)); + for ( const auto& gi : all_global_info ) { + if ( gi->CohortSize(c) == 0 ) + continue; + + Emit("%s.InitializeCohort(&im, %s);", gi->InitializersName(), Fmt(c)); + vector init_ids; + gi->GetCohortIDs(c, init_ids); + + for ( auto& ii : init_ids ) + InitializeGlobal(ii); + } // Populate mappings for dynamic offsets. NL(); @@ -571,7 +580,7 @@ void CPPCompile::GenFinishInit() { Emit("load_BiFs__CPP();"); - if ( standalone ) + if ( standalone && false ) // Note, BiFs will also be loaded again later, because the // main initialization finishes upon loading of the activation // script, rather than after all scripts have been parsed diff --git a/src/script_opt/CPP/Exprs.cc b/src/script_opt/CPP/Exprs.cc index 9cacd2506b..0f230be1b1 100644 --- a/src/script_opt/CPP/Exprs.cc +++ b/src/script_opt/CPP/Exprs.cc @@ -176,8 +176,7 @@ string CPPCompile::GenConstExpr(const ConstExpr* c, GenType gt) { if ( ! IsNativeType(t) ) { auto v = c->ValuePtr(); - int consts_offset; // ignored - (void)RegisterConstant(v, consts_offset); + (void)RegisterConstant(v); return NativeToGT(const_vals[v.get()]->Name(), t, gt); } @@ -1296,4 +1295,27 @@ string CPPCompile::GenEnum(const TypePtr& t, const ValPtr& ev) { return string("enum_mapping[") + Fmt(mapping_slot) + "]"; } +int CPPCompile::ReadyExpr(const ExprPtr& e) { + auto pf = make_unique(e.get()); + int max_cohort = 0; + + for ( const auto& g : pf->AllGlobals() ) + max_cohort = max(max_cohort, GenerateGlobalInit(g)->FinalInitCohort()); + for ( const auto& c : pf->Constants() ) + max_cohort = max(max_cohort, RegisterConstant(c->ValuePtr())->FinalInitCohort()); + + for ( const auto& t : pf->OrderedTypes() ) { + TypePtr tp{NewRef{}, const_cast(t)}; + max_cohort = max(max_cohort, RegisterType(tp)->FinalInitCohort()); + } + + for ( auto& [attrs, t] : pf->ConstructorAttrs() ) { + AttributesPtr ap{NewRef{}, const_cast(attrs)}; + max_cohort = max(max_cohort, RegisterAttributes(ap)->FinalInitCohort()); + max_cohort = max(max_cohort, RegisterType(t)->FinalInitCohort()); + } + + return max_cohort; +} + } // namespace zeek::detail diff --git a/src/script_opt/CPP/Exprs.h b/src/script_opt/CPP/Exprs.h index 422f00f9a7..cb3ca2ec9f 100644 --- a/src/script_opt/CPP/Exprs.h +++ b/src/script_opt/CPP/Exprs.h @@ -117,6 +117,11 @@ std::string GenIntVector(const std::vector& vec); std::string GenField(const ExprPtr& rec, int field); std::string GenEnum(const TypePtr& et, const ValPtr& ev); +// Creates all the initializations needed to evaluate the given expression. +// Returns the maximum cohort associated with these. +friend class GlobalInitInfo; +int ReadyExpr(const ExprPtr& e); + // For record that are extended via redef's, maps fields beyond the original // definition to locations in the global (in the compiled code) "field_mapping" // array. diff --git a/src/script_opt/CPP/Inits.cc b/src/script_opt/CPP/Inits.cc index 45c73589e7..eff3e5a876 100644 --- a/src/script_opt/CPP/Inits.cc +++ b/src/script_opt/CPP/Inits.cc @@ -185,6 +185,46 @@ void CPPCompile::InitializeConsts() { EndBlock(true); } +void CPPCompile::InitializeGlobal(const IDPtr& g) { + const auto& oi = g->GetOptInfo(); + if ( ! oi ) + return; + + const auto& exprs = oi->GetInitExprs(); + const auto& init_classes = oi->GetInitClasses(); + + ASSERT(exprs.size() == init_classes.size()); + + auto init = exprs.begin(); + auto ic = init_classes.begin(); + + for ( ; init != exprs.end(); ++init, ++ic ) { + if ( *ic == INIT_NONE ) + Emit(GenExpr(*init, GEN_NATIVE, true) + ";"); + + else { + // This branch occurs for += or -= initializations that + // use associated functions. + string ics; + if ( *ic == INIT_EXTRA ) + ics = "INIT_EXTRA"; + else if ( *ic == INIT_REMOVE ) + ics = "INIT_REMOVE"; + else + reporter->FatalError("bad initialization class in CPPCompile::InitializeGlobal()"); + + Emit("%s->SetValue(%s, %s);", globals[g->Name()], GenExpr(*init, GEN_NATIVE, true), ics); + } + + const auto& attrs = g->GetAttrs(); + if ( attrs ) { + auto attrs_offset = AttributesOffset(attrs); + auto attrs_str = "CPP__Attributes__[" + Fmt(attrs_offset) + "]"; + Emit("%s->SetAttrs(%s);", globals[g->Name()], attrs_str); + } + } +} + void CPPCompile::InitializeGlobals() { Emit("static void init_globals__CPP()"); StartBlock(); diff --git a/src/script_opt/CPP/Inits.h b/src/script_opt/CPP/Inits.h index cb5646924b..737784c060 100644 --- a/src/script_opt/CPP/Inits.h +++ b/src/script_opt/CPP/Inits.h @@ -93,6 +93,10 @@ void InitializeHashes(); // Generate code to initialize indirect references to constants. void InitializeConsts(); +// Generate code to initialize a global (using dynamic statements rather than +// constants). +void InitializeGlobal(const IDPtr& g); + // Generate code to initialize globals (using dynamic statements rather than // constants). void InitializeGlobals(); diff --git a/src/script_opt/CPP/InitsInfo.cc b/src/script_opt/CPP/InitsInfo.cc index 8b14045c40..8df4a60e8e 100644 --- a/src/script_opt/CPP/InitsInfo.cc +++ b/src/script_opt/CPP/InitsInfo.cc @@ -8,6 +8,7 @@ #include "zeek/script_opt/CPP/AttrExprType.h" #include "zeek/script_opt/CPP/Compile.h" #include "zeek/script_opt/CPP/RuntimeInits.h" +#include "zeek/script_opt/IdOptInfo.h" using namespace std; @@ -15,6 +16,15 @@ namespace zeek::detail { string CPP_InitsInfo::Name(int index) const { return base_name + "[" + Fmt(index) + "]"; } +void CPP_InitsInfo::GetCohortIDs(int c, std::vector& ids) const { + if ( c > MaxCohort() ) + return; + + for ( auto& co : instances[c] ) + if ( auto id = co->InitIdentifier() ) + ids.emplace_back(std::move(id)); +} + void CPP_InitsInfo::AddInstance(shared_ptr g) { auto final_init_cohort = g->FinalInitCohort(); @@ -375,16 +385,17 @@ void GlobalLookupInitInfo::InitializerVals(std::vector& ivs) const ivs.push_back(val); } -GlobalInitInfo::GlobalInitInfo(CPPCompile* c, IDPtr g, string _CPP_name) - : GlobalLookupInitInfo(c, g, std::move(_CPP_name)) { +GlobalInitInfo::GlobalInitInfo(CPPCompile* c, IDPtr _g, string _CPP_name) + : GlobalLookupInitInfo(c, _g, std::move(_CPP_name)) { + g = std::move(_g); auto& gt = g->GetType(); auto gi = c->RegisterType(gt); - init_cohort = max(init_cohort, gi->InitCohort() + 1); + init_cohort = max(init_cohort, gi->FinalInitCohort() + 1); type = gi->Offset(); gi = c->RegisterAttributes(g->GetAttrs()); if ( gi ) { - init_cohort = max(init_cohort, gi->InitCohort() + 1); + init_cohort = max(init_cohort, gi->FinalInitCohort() + 1); attrs = gi->Offset(); } else @@ -396,7 +407,18 @@ GlobalInitInfo::GlobalInitInfo(CPPCompile* c, IDPtr g, string _CPP_name) gc.is_enum_const = g->IsEnumConst(); gc.is_type = g->IsType(); - val = ValElem(c, nullptr); // empty because we initialize dynamically + // We don't initialize the global directly because its initialization + // might be an expression rather than a simple constant. Instead we + // make sure that it can be generated per the use of GetCohortIDs() + // in CPPCompile::GenFinishInit(). + val = ValElem(c, nullptr); + + // This code here parallels that of CPPCompile::InitializeGlobal(). + const auto& oi = g->GetOptInfo(); + for ( auto& init : oi->GetInitExprs() ) + // We use GetOp2() because initialization expressions are + // capture in the form of some sort of assignment. + init_cohort = max(init_cohort, c->ReadyExpr(init->GetOp2()) + 1); if ( gt->Tag() == TYPE_FUNC && (! g->GetVal() || g->GetVal()->AsFunc()->GetKind() == Func::BUILTIN_FUNC) ) // Be sure not to try to create BiFs. In addition, GetVal() can be diff --git a/src/script_opt/CPP/InitsInfo.h b/src/script_opt/CPP/InitsInfo.h index 997d357163..63fcd94cbd 100644 --- a/src/script_opt/CPP/InitsInfo.h +++ b/src/script_opt/CPP/InitsInfo.h @@ -126,6 +126,10 @@ public: // to the given cohort c. int CohortSize(int c) const { return c > MaxCohort() ? 0 : instances[c].size(); } + // Populates the given vector with associated identifiers seen + // in the cohort, if any. + void GetCohortIDs(int c, std::vector& ids) const; + // Returns the C++ type associated with this collection's run-time vector. // This might be, for example, "PatternVal" const std::string& CPPType() const { return CPP_type; } @@ -302,6 +306,9 @@ public: // constructor parameter. virtual void InitializerVals(std::vector& ivs) const = 0; + // Returns any associated identifier, or nil if none. + virtual IDPtr InitIdentifier() const { return nullptr; } + const Obj* InitObj() const { return o; } protected: @@ -517,7 +524,10 @@ public: std::string InitializerType() const override { return "CPP_GlobalInit"; } void InitializerVals(std::vector& ivs) const override; + IDPtr InitIdentifier() const override { return g; } + protected: + IDPtr g; int type; int attrs; std::string val; diff --git a/src/script_opt/IDOptInfo.cc b/src/script_opt/IDOptInfo.cc index 8290ec04d6..a5577fa64a 100644 --- a/src/script_opt/IDOptInfo.cc +++ b/src/script_opt/IDOptInfo.cc @@ -71,6 +71,7 @@ void IDOptInfo::AddInitExpr(ExprPtr init_expr, InitClass ic) { global_init_exprs.emplace_back(my_id, init_expr, ic); init_exprs.emplace_back(std::move(init_expr)); + init_classes.emplace_back(ic); } void IDOptInfo::SetDefinedAfter(const Stmt* s, const ExprPtr& e, const std::vector& conf_blocks, diff --git a/src/script_opt/IDOptInfo.h b/src/script_opt/IDOptInfo.h index 1d5ca43259..8d8b68d9fa 100644 --- a/src/script_opt/IDOptInfo.h +++ b/src/script_opt/IDOptInfo.h @@ -148,8 +148,9 @@ public: // be done with the ExprPtr form of ID::SetVal. void AddInitExpr(ExprPtr init_expr, InitClass ic = INIT_NONE); - // Returns the initialization expressions for this identifier. + // Returns the initialization expressions or classes for this identifier. const std::vector& GetInitExprs() const { return init_exprs; } + const std::vector& GetInitClasses() const { return init_classes; } // Returns a list of the initialization expressions seen for all // globals, ordered by when they were processed. @@ -253,6 +254,11 @@ private: // one of the earlier instances rather than the last one. std::vector init_exprs; + // A parallel array of the associated initialization classes. + // We keep the two separate rather than a std::pair because the + // most common use is to just loop over the expressions. + std::vector init_classes; + // Tracks initializations of globals in the order they're seen. static std::vector global_init_exprs;