Reworked initialization of globals for -O gen-standalone-C++ code

This commit is contained in:
Vern Paxson 2025-09-30 11:27:53 -07:00
parent 0700427bac
commit eb13ff3110
15 changed files with 155 additions and 62 deletions

View file

@ -15,8 +15,12 @@ public:
// Returns the associated initialization info. In addition, consts_offset
// returns an offset into an initialization-time global that tracks all
// constructed globals, providing general access to them for aggregate
// constants.
// constants. The second form is for when this isn't needed.
std::shared_ptr<CPP_InitInfo> RegisterConstant(const ValPtr& vp, int& consts_offset);
std::shared_ptr<CPP_InitInfo> RegisterConstant(const ValPtr& vp) {
int consts_offset; // ignored
return RegisterConstant(vp, consts_offset);
}
private:
// Maps (non-native) constants to associated C++ globals.

View file

@ -125,8 +125,16 @@ void CPPCompile::Compile(bool report_uncompilable) {
NL();
IDSet globals_to_initialize;
for ( auto& g : all_accessed_globals )
CreateGlobal(g);
if ( CreateGlobal(g) )
globals_to_initialize.insert(g);
for ( auto& g : globals_to_initialize ) {
auto gi = GenerateGlobalInit(g);
global_id_info->AddInstance(gi);
global_gis[g] = std::move(gi);
}
for ( const auto& e : accessed_events )
if ( AddGlobal(e, "gl") )
@ -407,11 +415,6 @@ void CPPCompile::RegisterCompiledBody(const string& f) {
}
void CPPCompile::GenEpilog() {
if ( standalone ) {
NL();
InitializeGlobals();
}
NL();
for ( const auto& ii : init_infos )
GenInitExpr(ii.second);
@ -533,6 +536,7 @@ void CPPCompile::GenFinishInit() {
Emit("generate_indices_set(CPP__Indices__init, InitIndices);");
Emit("std::map<TypeTag, std::shared_ptr<CPP_AbstractInitAccessor>> InitConsts;");
Emit("Frame* f__CPP = nullptr;");
NL();
for ( const auto& ci : const_info ) {
@ -551,9 +555,17 @@ void CPPCompile::GenFinishInit() {
max_cohort = std::max(max_cohort, gi->MaxCohort());
for ( auto c = 0; c <= max_cohort; ++c )
for ( const auto& gi : all_global_info )
if ( gi->CohortSize(c) > 0 )
for ( const auto& gi : all_global_info ) {
if ( gi->CohortSize(c) == 0 )
continue;
Emit("%s.InitializeCohort(&im, %s);", gi->InitializersName(), Fmt(c));
vector<IDPtr> init_ids;
gi->GetCohortIDs(c, init_ids);
for ( auto& ii : init_ids )
InitializeGlobal(ii);
}
// Populate mappings for dynamic offsets.
NL();
@ -567,13 +579,6 @@ void CPPCompile::GenFinishInit() {
Emit("load_BiFs__CPP();");
if ( standalone )
// Note, BiFs will also be loaded again later, because the
// main initialization finishes upon loading of the activation
// script, rather than after all scripts have been parsed
// and plugins (with BiFs) have been loaded.
Emit("init_globals__CPP();");
EndBlock();
}

View file

@ -176,8 +176,7 @@ string CPPCompile::GenConstExpr(const ConstExpr* c, GenType gt) {
if ( ! IsNativeType(t) ) {
auto v = c->ValuePtr();
int consts_offset; // ignored
(void)RegisterConstant(v, consts_offset);
(void)RegisterConstant(v);
return NativeToGT(const_vals[v.get()]->Name(), t, gt);
}
@ -1296,4 +1295,27 @@ string CPPCompile::GenEnum(const TypePtr& t, const ValPtr& ev) {
return string("enum_mapping[") + Fmt(mapping_slot) + "]";
}
int CPPCompile::ReadyExpr(const ExprPtr& e) {
auto pf = make_unique<ProfileFunc>(e.get());
int max_cohort = 0;
for ( const auto& g : pf->AllGlobals() )
max_cohort = max(max_cohort, GenerateGlobalInit(g)->FinalInitCohort() + 1);
for ( const auto& c : pf->Constants() )
max_cohort = max(max_cohort, RegisterConstant(c->ValuePtr())->FinalInitCohort() + 1);
for ( const auto& t : pf->OrderedTypes() ) {
TypePtr tp{NewRef{}, const_cast<Type*>(t)};
max_cohort = max(max_cohort, RegisterType(tp)->FinalInitCohort() + 1);
}
for ( auto& [attrs, t] : pf->ConstructorAttrs() ) {
AttributesPtr ap{NewRef{}, const_cast<Attributes*>(attrs)};
max_cohort = max(max_cohort, RegisterAttributes(ap)->FinalInitCohort() + 1);
max_cohort = max(max_cohort, RegisterType(t)->FinalInitCohort() + 1);
}
return max_cohort;
}
} // namespace zeek::detail

View file

@ -117,6 +117,11 @@ std::string GenIntVector(const std::vector<int>& vec);
std::string GenField(const ExprPtr& rec, int field);
std::string GenEnum(const TypePtr& et, const ValPtr& ev);
// Creates all the initializations needed to evaluate the given expression.
// Returns the maximum cohort associated with these.
friend class GlobalInitInfo;
int ReadyExpr(const ExprPtr& e);
// For record that are extended via redef's, maps fields beyond the original
// definition to locations in the global (in the compiled code) "field_mapping"
// array.

View file

@ -185,42 +185,35 @@ void CPPCompile::InitializeConsts() {
EndBlock(true);
}
void CPPCompile::InitializeGlobals() {
Emit("static void init_globals__CPP()");
StartBlock();
void CPPCompile::InitializeGlobal(const IDPtr& g) {
const auto& oi = g->GetOptInfo();
if ( ! oi )
return;
Emit("Frame* f__CPP = nullptr;");
NL();
const auto& exprs = oi->GetInitExprs();
const auto& init_classes = oi->GetInitClasses();
auto& ofiles = analysis_options.only_files;
ASSERT(exprs.size() == init_classes.size());
for ( const auto& ginit : IDOptInfo::GetGlobalInitExprs() ) {
IDPtr g{NewRef{}, const_cast<ID*>(ginit.Id())};
auto init = exprs.begin();
auto ic = init_classes.begin();
if ( ! ofiles.empty() && obj_matches_opt_files(g) != AnalyzeDecision::SHOULD )
continue;
if ( ! accessed_globals.contains(g) )
continue;
auto ic = ginit.IC();
auto& init = ginit.Init();
if ( ic == INIT_NONE )
Emit(GenExpr(init, GEN_NATIVE, true) + ";");
for ( ; init != exprs.end(); ++init, ++ic ) {
if ( *ic == INIT_NONE )
Emit(GenExpr(*init, GEN_NATIVE, true) + ";");
else {
// This branch occurs for += or -= initializations that
// use associated functions.
string ics;
if ( ic == INIT_EXTRA )
if ( *ic == INIT_EXTRA )
ics = "INIT_EXTRA";
else if ( ic == INIT_REMOVE )
else if ( *ic == INIT_REMOVE )
ics = "INIT_REMOVE";
else
reporter->FatalError("bad initialization class in CPPCompile::InitializeGlobals()");
reporter->FatalError("bad initialization class in CPPCompile::InitializeGlobal()");
Emit("%s->SetValue(%s, %s);", globals[g->Name()], GenExpr(init, GEN_NATIVE, true), ics);
Emit("%s->SetValue(%s, %s);", globals[g->Name()], GenExpr(*init, GEN_NATIVE, true), ics);
}
const auto& attrs = g->GetAttrs();
@ -230,8 +223,6 @@ void CPPCompile::InitializeGlobals() {
Emit("%s->SetAttrs(%s);", globals[g->Name()], attrs_str);
}
}
EndBlock();
}
void CPPCompile::GenInitHook() {

View file

@ -93,6 +93,10 @@ void InitializeHashes();
// Generate code to initialize indirect references to constants.
void InitializeConsts();
// Generate code to initialize a global (using dynamic statements rather than
// constants).
void InitializeGlobal(const IDPtr& g);
// Generate code to initialize globals (using dynamic statements rather than
// constants).
void InitializeGlobals();

View file

@ -8,6 +8,7 @@
#include "zeek/script_opt/CPP/AttrExprType.h"
#include "zeek/script_opt/CPP/Compile.h"
#include "zeek/script_opt/CPP/RuntimeInits.h"
#include "zeek/script_opt/IdOptInfo.h"
using namespace std;
@ -15,6 +16,15 @@ namespace zeek::detail {
string CPP_InitsInfo::Name(int index) const { return base_name + "[" + Fmt(index) + "]"; }
void CPP_InitsInfo::GetCohortIDs(int c, std::vector<IDPtr>& ids) const {
if ( c > MaxCohort() )
return;
for ( auto& co : instances[c] )
if ( auto id = co->InitIdentifier() )
ids.emplace_back(std::move(id));
}
void CPP_InitsInfo::AddInstance(shared_ptr<CPP_InitInfo> g) {
auto final_init_cohort = g->FinalInitCohort();
@ -375,16 +385,17 @@ void GlobalLookupInitInfo::InitializerVals(std::vector<std::string>& ivs) const
ivs.push_back(val);
}
GlobalInitInfo::GlobalInitInfo(CPPCompile* c, IDPtr g, string _CPP_name)
: GlobalLookupInitInfo(c, g, std::move(_CPP_name)) {
GlobalInitInfo::GlobalInitInfo(CPPCompile* c, IDPtr _g, string _CPP_name)
: GlobalLookupInitInfo(c, _g, std::move(_CPP_name)) {
g = std::move(_g);
auto& gt = g->GetType();
auto gi = c->RegisterType(gt);
init_cohort = max(init_cohort, gi->InitCohort() + 1);
init_cohort = max(init_cohort, gi->FinalInitCohort() + 1);
type = gi->Offset();
gi = c->RegisterAttributes(g->GetAttrs());
if ( gi ) {
init_cohort = max(init_cohort, gi->InitCohort() + 1);
init_cohort = max(init_cohort, gi->FinalInitCohort() + 1);
attrs = gi->Offset();
}
else
@ -396,7 +407,18 @@ GlobalInitInfo::GlobalInitInfo(CPPCompile* c, IDPtr g, string _CPP_name)
gc.is_enum_const = g->IsEnumConst();
gc.is_type = g->IsType();
val = ValElem(c, nullptr); // empty because we initialize dynamically
// We don't initialize the global directly because its initialization
// might be an expression rather than a simple constant. Instead we
// make sure that it can be generated per the use of GetCohortIDs()
// in CPPCompile::GenFinishInit().
val = ValElem(c, nullptr);
// This code here parallels that of CPPCompile::InitializeGlobal().
const auto& oi = g->GetOptInfo();
for ( auto& init : oi->GetInitExprs() )
// We use GetOp2() because initialization expressions are
// capture in the form of some sort of assignment.
init_cohort = max(init_cohort, c->ReadyExpr(init->GetOp2()) + 1);
if ( gt->Tag() == TYPE_FUNC && (! g->GetVal() || g->GetVal()->AsFunc()->GetKind() == Func::BUILTIN_FUNC) )
// Be sure not to try to create BiFs. In addition, GetVal() can be

View file

@ -126,6 +126,10 @@ public:
// to the given cohort c.
int CohortSize(int c) const { return c > MaxCohort() ? 0 : instances[c].size(); }
// Populates the given vector with associated identifiers seen
// in the cohort, if any.
void GetCohortIDs(int c, std::vector<IDPtr>& ids) const;
// Returns the C++ type associated with this collection's run-time vector.
// This might be, for example, "PatternVal"
const std::string& CPPType() const { return CPP_type; }
@ -302,6 +306,9 @@ public:
// constructor parameter.
virtual void InitializerVals(std::vector<std::string>& ivs) const = 0;
// Returns any associated identifier, or nil if none.
virtual IDPtr InitIdentifier() const { return nullptr; }
const Obj* InitObj() const { return o; }
protected:
@ -517,7 +524,10 @@ public:
std::string InitializerType() const override { return "CPP_GlobalInit"; }
void InitializerVals(std::vector<std::string>& ivs) const override;
IDPtr InitIdentifier() const override { return g; }
protected:
IDPtr g;
int type;
int attrs;
std::string val;

View file

@ -114,6 +114,12 @@ void activate_bodies__CPP(const char* fn, const char* module, bool exported, Typ
fg->SetType(ft);
}
if ( ! fg->GetType() )
// This can happen both because we just installed the ID, but also
// because events registered by Spicy don't have types associated
// with them initially.
fg->SetType(ft);
if ( ! fg->GetAttr(ATTR_IS_USED) )
fg->AddAttr(make_intrusive<Attr>(ATTR_IS_USED));
@ -179,6 +185,9 @@ IDPtr lookup_global__CPP(const char* g, const TypePtr& t, const GlobalCharacteri
gl->MakeType();
}
else if ( ! gl->GetType() )
gl->SetType(t);
return gl;
}

View file

@ -507,6 +507,9 @@ void CPP_GlobalInit::Generate(InitsManager* im, std::vector<void*>& /* inits_vec
if ( attrs >= 0 )
global->SetAttrs(im->Attributes(attrs));
if ( t->Tag() == TYPE_FUNC )
global->AddAttr(make_intrusive<Attr>(ATTR_IS_USED));
}
size_t generate_indices_set(int* inits, std::vector<std::vector<int>>& indices_set) {

View file

@ -87,7 +87,17 @@ void CPPCompile::GenInitStmt(const InitStmt* init) {
continue;
}
Emit("%s = make_intrusive<%s>(cast_intrusive<%s>(%s));", IDName(aggr), type_name, type_type, type_ind);
auto aggr_name = IDName(aggr);
Emit("%s = make_intrusive<%s>(cast_intrusive<%s>(%s));", aggr_name, type_name, type_type, type_ind);
const auto& attrs = aggr->GetAttrs();
if ( ! attrs )
continue;
auto attrs_offset = AttributesOffset(attrs);
auto attrs_str = "CPP__Attributes__[" + Fmt(attrs_offset) + "]";
Emit("%s->SetAttrs(%s);", aggr_name, attrs_str);
}
}

View file

@ -7,7 +7,7 @@ namespace zeek::detail {
using namespace std;
void CPPCompile::CreateGlobal(IDPtr g) {
bool CPPCompile::CreateGlobal(IDPtr g) {
auto gn = string(g->Name());
bool is_bif = pfs->BiFGlobals().contains(g);
@ -16,15 +16,17 @@ void CPPCompile::CreateGlobal(IDPtr g) {
// then we'll call it directly.
if ( compilable_funcs.contains(gn) ) {
AddGlobal(gn, "zf");
return;
return false;
}
if ( is_bif ) {
AddBiF(g, false);
return;
return false;
}
}
bool should_init = false;
if ( AddGlobal(gn, "gl") ) { // We'll be creating this global.
Emit("IDPtr %s;", globals[gn]);
@ -32,9 +34,7 @@ void CPPCompile::CreateGlobal(IDPtr g) {
// This is an event that's also used as a variable.
Emit("EventHandlerPtr %s_ev;", globals[gn]);
auto gi = GenerateGlobalInit(g);
global_id_info->AddInstance(gi);
global_gis[g] = std::move(gi);
should_init = true;
}
if ( is_bif )
@ -43,6 +43,8 @@ void CPPCompile::CreateGlobal(IDPtr g) {
AddBiF(g, true);
global_vars.emplace(g);
return should_init;
}
std::shared_ptr<CPP_InitInfo> CPPCompile::RegisterGlobal(IDPtr g) {

View file

@ -12,7 +12,10 @@ std::shared_ptr<CPP_InitInfo> RegisterGlobal(IDPtr g);
private:
// Generate declarations associated with the given global, and, if it's used
// as a variable (not just as a function being called), track it as such.
void CreateGlobal(IDPtr g);
//
// Returns true if it needs initialization (which we do separately to avoid
// tripping across dependencies between globals).
bool CreateGlobal(IDPtr g);
// Low-level function for generating an initializer for a global. Takes
// into account differences for standalone-compilation.
@ -50,10 +53,6 @@ std::string CaptureName(const IDPtr& l) const;
// stripped or transformed, and guaranteed not to conflict with C++ keywords.
std::string Canonicalize(const std::string& name) const;
// Returns the name of the global corresponding to an expression (which must
// be a EXPR_NAME).
std::string GlobalName(const ExprPtr& e) { return globals[e->AsNameExpr()->Id()->Name()]; }
// Globals that are used (appear in the profiles) of the bodies we're
// compiling. Includes globals just used as functions to call.
std::unordered_set<IDPtr> all_accessed_globals;

View file

@ -71,6 +71,7 @@ void IDOptInfo::AddInitExpr(ExprPtr init_expr, InitClass ic) {
global_init_exprs.emplace_back(my_id, init_expr, ic);
init_exprs.emplace_back(std::move(init_expr));
init_classes.emplace_back(ic);
}
void IDOptInfo::SetDefinedAfter(const Stmt* s, const ExprPtr& e, const std::vector<const Stmt*>& conf_blocks,

View file

@ -148,8 +148,9 @@ public:
// be done with the ExprPtr form of ID::SetVal.
void AddInitExpr(ExprPtr init_expr, InitClass ic = INIT_NONE);
// Returns the initialization expressions for this identifier.
// Returns the initialization expressions or classes for this identifier.
const std::vector<ExprPtr>& GetInitExprs() const { return init_exprs; }
const std::vector<InitClass>& GetInitClasses() const { return init_classes; }
// Returns a list of the initialization expressions seen for all
// globals, ordered by when they were processed.
@ -253,6 +254,11 @@ private:
// one of the earlier instances rather than the last one.
std::vector<ExprPtr> init_exprs;
// A parallel array of the associated initialization classes.
// We keep the two separate rather than a std::pair because the
// most common use is to just loop over the expressions.
std::vector<InitClass> init_classes;
// Tracks initializations of globals in the order they're seen.
static std::vector<IDInitInfo> global_init_exprs;