Reworked initialization of globals for -O gen-standalone-C++ code

This commit is contained in:
Vern Paxson 2025-09-30 11:27:53 -07:00
parent 0700427bac
commit eb13ff3110
15 changed files with 155 additions and 62 deletions

View file

@ -15,8 +15,12 @@ public:
// Returns the associated initialization info. In addition, consts_offset // Returns the associated initialization info. In addition, consts_offset
// returns an offset into an initialization-time global that tracks all // returns an offset into an initialization-time global that tracks all
// constructed globals, providing general access to them for aggregate // constructed globals, providing general access to them for aggregate
// constants. // constants. The second form is for when this isn't needed.
std::shared_ptr<CPP_InitInfo> RegisterConstant(const ValPtr& vp, int& consts_offset); std::shared_ptr<CPP_InitInfo> RegisterConstant(const ValPtr& vp, int& consts_offset);
std::shared_ptr<CPP_InitInfo> RegisterConstant(const ValPtr& vp) {
int consts_offset; // ignored
return RegisterConstant(vp, consts_offset);
}
private: private:
// Maps (non-native) constants to associated C++ globals. // Maps (non-native) constants to associated C++ globals.

View file

@ -125,8 +125,16 @@ void CPPCompile::Compile(bool report_uncompilable) {
NL(); NL();
IDSet globals_to_initialize;
for ( auto& g : all_accessed_globals ) for ( auto& g : all_accessed_globals )
CreateGlobal(g); if ( CreateGlobal(g) )
globals_to_initialize.insert(g);
for ( auto& g : globals_to_initialize ) {
auto gi = GenerateGlobalInit(g);
global_id_info->AddInstance(gi);
global_gis[g] = std::move(gi);
}
for ( const auto& e : accessed_events ) for ( const auto& e : accessed_events )
if ( AddGlobal(e, "gl") ) if ( AddGlobal(e, "gl") )
@ -407,11 +415,6 @@ void CPPCompile::RegisterCompiledBody(const string& f) {
} }
void CPPCompile::GenEpilog() { void CPPCompile::GenEpilog() {
if ( standalone ) {
NL();
InitializeGlobals();
}
NL(); NL();
for ( const auto& ii : init_infos ) for ( const auto& ii : init_infos )
GenInitExpr(ii.second); GenInitExpr(ii.second);
@ -533,6 +536,7 @@ void CPPCompile::GenFinishInit() {
Emit("generate_indices_set(CPP__Indices__init, InitIndices);"); Emit("generate_indices_set(CPP__Indices__init, InitIndices);");
Emit("std::map<TypeTag, std::shared_ptr<CPP_AbstractInitAccessor>> InitConsts;"); Emit("std::map<TypeTag, std::shared_ptr<CPP_AbstractInitAccessor>> InitConsts;");
Emit("Frame* f__CPP = nullptr;");
NL(); NL();
for ( const auto& ci : const_info ) { for ( const auto& ci : const_info ) {
@ -551,9 +555,17 @@ void CPPCompile::GenFinishInit() {
max_cohort = std::max(max_cohort, gi->MaxCohort()); max_cohort = std::max(max_cohort, gi->MaxCohort());
for ( auto c = 0; c <= max_cohort; ++c ) for ( auto c = 0; c <= max_cohort; ++c )
for ( const auto& gi : all_global_info ) for ( const auto& gi : all_global_info ) {
if ( gi->CohortSize(c) > 0 ) if ( gi->CohortSize(c) == 0 )
continue;
Emit("%s.InitializeCohort(&im, %s);", gi->InitializersName(), Fmt(c)); Emit("%s.InitializeCohort(&im, %s);", gi->InitializersName(), Fmt(c));
vector<IDPtr> init_ids;
gi->GetCohortIDs(c, init_ids);
for ( auto& ii : init_ids )
InitializeGlobal(ii);
}
// Populate mappings for dynamic offsets. // Populate mappings for dynamic offsets.
NL(); NL();
@ -567,13 +579,6 @@ void CPPCompile::GenFinishInit() {
Emit("load_BiFs__CPP();"); Emit("load_BiFs__CPP();");
if ( standalone )
// Note, BiFs will also be loaded again later, because the
// main initialization finishes upon loading of the activation
// script, rather than after all scripts have been parsed
// and plugins (with BiFs) have been loaded.
Emit("init_globals__CPP();");
EndBlock(); EndBlock();
} }

View file

@ -176,8 +176,7 @@ string CPPCompile::GenConstExpr(const ConstExpr* c, GenType gt) {
if ( ! IsNativeType(t) ) { if ( ! IsNativeType(t) ) {
auto v = c->ValuePtr(); auto v = c->ValuePtr();
int consts_offset; // ignored (void)RegisterConstant(v);
(void)RegisterConstant(v, consts_offset);
return NativeToGT(const_vals[v.get()]->Name(), t, gt); return NativeToGT(const_vals[v.get()]->Name(), t, gt);
} }
@ -1296,4 +1295,27 @@ string CPPCompile::GenEnum(const TypePtr& t, const ValPtr& ev) {
return string("enum_mapping[") + Fmt(mapping_slot) + "]"; return string("enum_mapping[") + Fmt(mapping_slot) + "]";
} }
int CPPCompile::ReadyExpr(const ExprPtr& e) {
auto pf = make_unique<ProfileFunc>(e.get());
int max_cohort = 0;
for ( const auto& g : pf->AllGlobals() )
max_cohort = max(max_cohort, GenerateGlobalInit(g)->FinalInitCohort() + 1);
for ( const auto& c : pf->Constants() )
max_cohort = max(max_cohort, RegisterConstant(c->ValuePtr())->FinalInitCohort() + 1);
for ( const auto& t : pf->OrderedTypes() ) {
TypePtr tp{NewRef{}, const_cast<Type*>(t)};
max_cohort = max(max_cohort, RegisterType(tp)->FinalInitCohort() + 1);
}
for ( auto& [attrs, t] : pf->ConstructorAttrs() ) {
AttributesPtr ap{NewRef{}, const_cast<Attributes*>(attrs)};
max_cohort = max(max_cohort, RegisterAttributes(ap)->FinalInitCohort() + 1);
max_cohort = max(max_cohort, RegisterType(t)->FinalInitCohort() + 1);
}
return max_cohort;
}
} // namespace zeek::detail } // namespace zeek::detail

View file

@ -117,6 +117,11 @@ std::string GenIntVector(const std::vector<int>& vec);
std::string GenField(const ExprPtr& rec, int field); std::string GenField(const ExprPtr& rec, int field);
std::string GenEnum(const TypePtr& et, const ValPtr& ev); std::string GenEnum(const TypePtr& et, const ValPtr& ev);
// Creates all the initializations needed to evaluate the given expression.
// Returns the maximum cohort associated with these.
friend class GlobalInitInfo;
int ReadyExpr(const ExprPtr& e);
// For record that are extended via redef's, maps fields beyond the original // For record that are extended via redef's, maps fields beyond the original
// definition to locations in the global (in the compiled code) "field_mapping" // definition to locations in the global (in the compiled code) "field_mapping"
// array. // array.

View file

@ -185,42 +185,35 @@ void CPPCompile::InitializeConsts() {
EndBlock(true); EndBlock(true);
} }
void CPPCompile::InitializeGlobals() { void CPPCompile::InitializeGlobal(const IDPtr& g) {
Emit("static void init_globals__CPP()"); const auto& oi = g->GetOptInfo();
StartBlock(); if ( ! oi )
return;
Emit("Frame* f__CPP = nullptr;"); const auto& exprs = oi->GetInitExprs();
NL(); const auto& init_classes = oi->GetInitClasses();
auto& ofiles = analysis_options.only_files; ASSERT(exprs.size() == init_classes.size());
for ( const auto& ginit : IDOptInfo::GetGlobalInitExprs() ) { auto init = exprs.begin();
IDPtr g{NewRef{}, const_cast<ID*>(ginit.Id())}; auto ic = init_classes.begin();
if ( ! ofiles.empty() && obj_matches_opt_files(g) != AnalyzeDecision::SHOULD ) for ( ; init != exprs.end(); ++init, ++ic ) {
continue; if ( *ic == INIT_NONE )
Emit(GenExpr(*init, GEN_NATIVE, true) + ";");
if ( ! accessed_globals.contains(g) )
continue;
auto ic = ginit.IC();
auto& init = ginit.Init();
if ( ic == INIT_NONE )
Emit(GenExpr(init, GEN_NATIVE, true) + ";");
else { else {
// This branch occurs for += or -= initializations that // This branch occurs for += or -= initializations that
// use associated functions. // use associated functions.
string ics; string ics;
if ( ic == INIT_EXTRA ) if ( *ic == INIT_EXTRA )
ics = "INIT_EXTRA"; ics = "INIT_EXTRA";
else if ( ic == INIT_REMOVE ) else if ( *ic == INIT_REMOVE )
ics = "INIT_REMOVE"; ics = "INIT_REMOVE";
else else
reporter->FatalError("bad initialization class in CPPCompile::InitializeGlobals()"); reporter->FatalError("bad initialization class in CPPCompile::InitializeGlobal()");
Emit("%s->SetValue(%s, %s);", globals[g->Name()], GenExpr(init, GEN_NATIVE, true), ics); Emit("%s->SetValue(%s, %s);", globals[g->Name()], GenExpr(*init, GEN_NATIVE, true), ics);
} }
const auto& attrs = g->GetAttrs(); const auto& attrs = g->GetAttrs();
@ -230,8 +223,6 @@ void CPPCompile::InitializeGlobals() {
Emit("%s->SetAttrs(%s);", globals[g->Name()], attrs_str); Emit("%s->SetAttrs(%s);", globals[g->Name()], attrs_str);
} }
} }
EndBlock();
} }
void CPPCompile::GenInitHook() { void CPPCompile::GenInitHook() {

View file

@ -93,6 +93,10 @@ void InitializeHashes();
// Generate code to initialize indirect references to constants. // Generate code to initialize indirect references to constants.
void InitializeConsts(); void InitializeConsts();
// Generate code to initialize a global (using dynamic statements rather than
// constants).
void InitializeGlobal(const IDPtr& g);
// Generate code to initialize globals (using dynamic statements rather than // Generate code to initialize globals (using dynamic statements rather than
// constants). // constants).
void InitializeGlobals(); void InitializeGlobals();

View file

@ -8,6 +8,7 @@
#include "zeek/script_opt/CPP/AttrExprType.h" #include "zeek/script_opt/CPP/AttrExprType.h"
#include "zeek/script_opt/CPP/Compile.h" #include "zeek/script_opt/CPP/Compile.h"
#include "zeek/script_opt/CPP/RuntimeInits.h" #include "zeek/script_opt/CPP/RuntimeInits.h"
#include "zeek/script_opt/IdOptInfo.h"
using namespace std; using namespace std;
@ -15,6 +16,15 @@ namespace zeek::detail {
string CPP_InitsInfo::Name(int index) const { return base_name + "[" + Fmt(index) + "]"; } string CPP_InitsInfo::Name(int index) const { return base_name + "[" + Fmt(index) + "]"; }
void CPP_InitsInfo::GetCohortIDs(int c, std::vector<IDPtr>& ids) const {
if ( c > MaxCohort() )
return;
for ( auto& co : instances[c] )
if ( auto id = co->InitIdentifier() )
ids.emplace_back(std::move(id));
}
void CPP_InitsInfo::AddInstance(shared_ptr<CPP_InitInfo> g) { void CPP_InitsInfo::AddInstance(shared_ptr<CPP_InitInfo> g) {
auto final_init_cohort = g->FinalInitCohort(); auto final_init_cohort = g->FinalInitCohort();
@ -375,16 +385,17 @@ void GlobalLookupInitInfo::InitializerVals(std::vector<std::string>& ivs) const
ivs.push_back(val); ivs.push_back(val);
} }
GlobalInitInfo::GlobalInitInfo(CPPCompile* c, IDPtr g, string _CPP_name) GlobalInitInfo::GlobalInitInfo(CPPCompile* c, IDPtr _g, string _CPP_name)
: GlobalLookupInitInfo(c, g, std::move(_CPP_name)) { : GlobalLookupInitInfo(c, _g, std::move(_CPP_name)) {
g = std::move(_g);
auto& gt = g->GetType(); auto& gt = g->GetType();
auto gi = c->RegisterType(gt); auto gi = c->RegisterType(gt);
init_cohort = max(init_cohort, gi->InitCohort() + 1); init_cohort = max(init_cohort, gi->FinalInitCohort() + 1);
type = gi->Offset(); type = gi->Offset();
gi = c->RegisterAttributes(g->GetAttrs()); gi = c->RegisterAttributes(g->GetAttrs());
if ( gi ) { if ( gi ) {
init_cohort = max(init_cohort, gi->InitCohort() + 1); init_cohort = max(init_cohort, gi->FinalInitCohort() + 1);
attrs = gi->Offset(); attrs = gi->Offset();
} }
else else
@ -396,7 +407,18 @@ GlobalInitInfo::GlobalInitInfo(CPPCompile* c, IDPtr g, string _CPP_name)
gc.is_enum_const = g->IsEnumConst(); gc.is_enum_const = g->IsEnumConst();
gc.is_type = g->IsType(); gc.is_type = g->IsType();
val = ValElem(c, nullptr); // empty because we initialize dynamically // We don't initialize the global directly because its initialization
// might be an expression rather than a simple constant. Instead we
// make sure that it can be generated per the use of GetCohortIDs()
// in CPPCompile::GenFinishInit().
val = ValElem(c, nullptr);
// This code here parallels that of CPPCompile::InitializeGlobal().
const auto& oi = g->GetOptInfo();
for ( auto& init : oi->GetInitExprs() )
// We use GetOp2() because initialization expressions are
// capture in the form of some sort of assignment.
init_cohort = max(init_cohort, c->ReadyExpr(init->GetOp2()) + 1);
if ( gt->Tag() == TYPE_FUNC && (! g->GetVal() || g->GetVal()->AsFunc()->GetKind() == Func::BUILTIN_FUNC) ) if ( gt->Tag() == TYPE_FUNC && (! g->GetVal() || g->GetVal()->AsFunc()->GetKind() == Func::BUILTIN_FUNC) )
// Be sure not to try to create BiFs. In addition, GetVal() can be // Be sure not to try to create BiFs. In addition, GetVal() can be

View file

@ -126,6 +126,10 @@ public:
// to the given cohort c. // to the given cohort c.
int CohortSize(int c) const { return c > MaxCohort() ? 0 : instances[c].size(); } int CohortSize(int c) const { return c > MaxCohort() ? 0 : instances[c].size(); }
// Populates the given vector with associated identifiers seen
// in the cohort, if any.
void GetCohortIDs(int c, std::vector<IDPtr>& ids) const;
// Returns the C++ type associated with this collection's run-time vector. // Returns the C++ type associated with this collection's run-time vector.
// This might be, for example, "PatternVal" // This might be, for example, "PatternVal"
const std::string& CPPType() const { return CPP_type; } const std::string& CPPType() const { return CPP_type; }
@ -302,6 +306,9 @@ public:
// constructor parameter. // constructor parameter.
virtual void InitializerVals(std::vector<std::string>& ivs) const = 0; virtual void InitializerVals(std::vector<std::string>& ivs) const = 0;
// Returns any associated identifier, or nil if none.
virtual IDPtr InitIdentifier() const { return nullptr; }
const Obj* InitObj() const { return o; } const Obj* InitObj() const { return o; }
protected: protected:
@ -517,7 +524,10 @@ public:
std::string InitializerType() const override { return "CPP_GlobalInit"; } std::string InitializerType() const override { return "CPP_GlobalInit"; }
void InitializerVals(std::vector<std::string>& ivs) const override; void InitializerVals(std::vector<std::string>& ivs) const override;
IDPtr InitIdentifier() const override { return g; }
protected: protected:
IDPtr g;
int type; int type;
int attrs; int attrs;
std::string val; std::string val;

View file

@ -114,6 +114,12 @@ void activate_bodies__CPP(const char* fn, const char* module, bool exported, Typ
fg->SetType(ft); fg->SetType(ft);
} }
if ( ! fg->GetType() )
// This can happen both because we just installed the ID, but also
// because events registered by Spicy don't have types associated
// with them initially.
fg->SetType(ft);
if ( ! fg->GetAttr(ATTR_IS_USED) ) if ( ! fg->GetAttr(ATTR_IS_USED) )
fg->AddAttr(make_intrusive<Attr>(ATTR_IS_USED)); fg->AddAttr(make_intrusive<Attr>(ATTR_IS_USED));
@ -179,6 +185,9 @@ IDPtr lookup_global__CPP(const char* g, const TypePtr& t, const GlobalCharacteri
gl->MakeType(); gl->MakeType();
} }
else if ( ! gl->GetType() )
gl->SetType(t);
return gl; return gl;
} }

View file

@ -507,6 +507,9 @@ void CPP_GlobalInit::Generate(InitsManager* im, std::vector<void*>& /* inits_vec
if ( attrs >= 0 ) if ( attrs >= 0 )
global->SetAttrs(im->Attributes(attrs)); global->SetAttrs(im->Attributes(attrs));
if ( t->Tag() == TYPE_FUNC )
global->AddAttr(make_intrusive<Attr>(ATTR_IS_USED));
} }
size_t generate_indices_set(int* inits, std::vector<std::vector<int>>& indices_set) { size_t generate_indices_set(int* inits, std::vector<std::vector<int>>& indices_set) {

View file

@ -87,7 +87,17 @@ void CPPCompile::GenInitStmt(const InitStmt* init) {
continue; continue;
} }
Emit("%s = make_intrusive<%s>(cast_intrusive<%s>(%s));", IDName(aggr), type_name, type_type, type_ind); auto aggr_name = IDName(aggr);
Emit("%s = make_intrusive<%s>(cast_intrusive<%s>(%s));", aggr_name, type_name, type_type, type_ind);
const auto& attrs = aggr->GetAttrs();
if ( ! attrs )
continue;
auto attrs_offset = AttributesOffset(attrs);
auto attrs_str = "CPP__Attributes__[" + Fmt(attrs_offset) + "]";
Emit("%s->SetAttrs(%s);", aggr_name, attrs_str);
} }
} }

View file

@ -7,7 +7,7 @@ namespace zeek::detail {
using namespace std; using namespace std;
void CPPCompile::CreateGlobal(IDPtr g) { bool CPPCompile::CreateGlobal(IDPtr g) {
auto gn = string(g->Name()); auto gn = string(g->Name());
bool is_bif = pfs->BiFGlobals().contains(g); bool is_bif = pfs->BiFGlobals().contains(g);
@ -16,15 +16,17 @@ void CPPCompile::CreateGlobal(IDPtr g) {
// then we'll call it directly. // then we'll call it directly.
if ( compilable_funcs.contains(gn) ) { if ( compilable_funcs.contains(gn) ) {
AddGlobal(gn, "zf"); AddGlobal(gn, "zf");
return; return false;
} }
if ( is_bif ) { if ( is_bif ) {
AddBiF(g, false); AddBiF(g, false);
return; return false;
} }
} }
bool should_init = false;
if ( AddGlobal(gn, "gl") ) { // We'll be creating this global. if ( AddGlobal(gn, "gl") ) { // We'll be creating this global.
Emit("IDPtr %s;", globals[gn]); Emit("IDPtr %s;", globals[gn]);
@ -32,9 +34,7 @@ void CPPCompile::CreateGlobal(IDPtr g) {
// This is an event that's also used as a variable. // This is an event that's also used as a variable.
Emit("EventHandlerPtr %s_ev;", globals[gn]); Emit("EventHandlerPtr %s_ev;", globals[gn]);
auto gi = GenerateGlobalInit(g); should_init = true;
global_id_info->AddInstance(gi);
global_gis[g] = std::move(gi);
} }
if ( is_bif ) if ( is_bif )
@ -43,6 +43,8 @@ void CPPCompile::CreateGlobal(IDPtr g) {
AddBiF(g, true); AddBiF(g, true);
global_vars.emplace(g); global_vars.emplace(g);
return should_init;
} }
std::shared_ptr<CPP_InitInfo> CPPCompile::RegisterGlobal(IDPtr g) { std::shared_ptr<CPP_InitInfo> CPPCompile::RegisterGlobal(IDPtr g) {

View file

@ -12,7 +12,10 @@ std::shared_ptr<CPP_InitInfo> RegisterGlobal(IDPtr g);
private: private:
// Generate declarations associated with the given global, and, if it's used // Generate declarations associated with the given global, and, if it's used
// as a variable (not just as a function being called), track it as such. // as a variable (not just as a function being called), track it as such.
void CreateGlobal(IDPtr g); //
// Returns true if it needs initialization (which we do separately to avoid
// tripping across dependencies between globals).
bool CreateGlobal(IDPtr g);
// Low-level function for generating an initializer for a global. Takes // Low-level function for generating an initializer for a global. Takes
// into account differences for standalone-compilation. // into account differences for standalone-compilation.
@ -50,10 +53,6 @@ std::string CaptureName(const IDPtr& l) const;
// stripped or transformed, and guaranteed not to conflict with C++ keywords. // stripped or transformed, and guaranteed not to conflict with C++ keywords.
std::string Canonicalize(const std::string& name) const; std::string Canonicalize(const std::string& name) const;
// Returns the name of the global corresponding to an expression (which must
// be a EXPR_NAME).
std::string GlobalName(const ExprPtr& e) { return globals[e->AsNameExpr()->Id()->Name()]; }
// Globals that are used (appear in the profiles) of the bodies we're // Globals that are used (appear in the profiles) of the bodies we're
// compiling. Includes globals just used as functions to call. // compiling. Includes globals just used as functions to call.
std::unordered_set<IDPtr> all_accessed_globals; std::unordered_set<IDPtr> all_accessed_globals;

View file

@ -71,6 +71,7 @@ void IDOptInfo::AddInitExpr(ExprPtr init_expr, InitClass ic) {
global_init_exprs.emplace_back(my_id, init_expr, ic); global_init_exprs.emplace_back(my_id, init_expr, ic);
init_exprs.emplace_back(std::move(init_expr)); init_exprs.emplace_back(std::move(init_expr));
init_classes.emplace_back(ic);
} }
void IDOptInfo::SetDefinedAfter(const Stmt* s, const ExprPtr& e, const std::vector<const Stmt*>& conf_blocks, void IDOptInfo::SetDefinedAfter(const Stmt* s, const ExprPtr& e, const std::vector<const Stmt*>& conf_blocks,

View file

@ -148,8 +148,9 @@ public:
// be done with the ExprPtr form of ID::SetVal. // be done with the ExprPtr form of ID::SetVal.
void AddInitExpr(ExprPtr init_expr, InitClass ic = INIT_NONE); void AddInitExpr(ExprPtr init_expr, InitClass ic = INIT_NONE);
// Returns the initialization expressions for this identifier. // Returns the initialization expressions or classes for this identifier.
const std::vector<ExprPtr>& GetInitExprs() const { return init_exprs; } const std::vector<ExprPtr>& GetInitExprs() const { return init_exprs; }
const std::vector<InitClass>& GetInitClasses() const { return init_classes; }
// Returns a list of the initialization expressions seen for all // Returns a list of the initialization expressions seen for all
// globals, ordered by when they were processed. // globals, ordered by when they were processed.
@ -253,6 +254,11 @@ private:
// one of the earlier instances rather than the last one. // one of the earlier instances rather than the last one.
std::vector<ExprPtr> init_exprs; std::vector<ExprPtr> init_exprs;
// A parallel array of the associated initialization classes.
// We keep the two separate rather than a std::pair because the
// most common use is to just loop over the expressions.
std::vector<InitClass> init_classes;
// Tracks initializations of globals in the order they're seen. // Tracks initializations of globals in the order they're seen.
static std::vector<IDInitInfo> global_init_exprs; static std::vector<IDInitInfo> global_init_exprs;