introduced simplified initialization for non-standalone -O gen-C++ code

tied -O gen-standalone-C++ to use of --optimize-files
This commit is contained in:
Vern Paxson 2024-12-06 16:25:22 -08:00
parent 612d99e751
commit 2e69a8870a
11 changed files with 189 additions and 56 deletions

View file

@ -60,6 +60,11 @@ void GenFinishInit();
// Generate the function that registers compiled script bodies. // Generate the function that registers compiled script bodies.
void GenRegisterBodies(); void GenRegisterBodies();
public:
// Whether we're generating "standalone" code.
bool TargetingStandalone() const { return standalone; }
private:
// True if the given function (plus body and profile) is one that should be // True if the given function (plus body and profile) is one that should be
// compiled. If non-nil, sets reason to the the reason why, if there's a // compiled. If non-nil, sets reason to the the reason why, if there's a
// fundamental problem. If however the function should be skipped for other // fundamental problem. If however the function should be skipped for other

View file

@ -104,11 +104,18 @@ void CPPCompile::InitializeFieldMappings() {
StartBlock(); StartBlock();
string type_arg, attrs_arg;
if ( ! standalone )
type_arg = attrs_arg = "DO_NOT_CONSTRUCT_VALUE_MARKER";
for ( const auto& mapping : field_decls ) { for ( const auto& mapping : field_decls ) {
auto rt_arg = Fmt(mapping.first); auto rt_arg = Fmt(mapping.first);
auto td = mapping.second; auto td = mapping.second;
auto type_arg = Fmt(TypeOffset(td->type));
auto attrs_arg = Fmt(AttributesOffset(td->attrs)); if ( standalone ) {
type_arg = Fmt(TypeOffset(td->type));
attrs_arg = Fmt(AttributesOffset(td->attrs));
}
Emit("CPP_FieldMapping(%s, \"%s\", %s, %s),", rt_arg, td->id, type_arg, attrs_arg); Emit("CPP_FieldMapping(%s, \"%s\", %s, %s),", rt_arg, td->id, type_arg, attrs_arg);
} }
@ -121,8 +128,10 @@ void CPPCompile::InitializeEnumMappings() {
StartBlock(); StartBlock();
auto create_if_missing = standalone ? "true" : "false";
for ( const auto& mapping : enum_names ) for ( const auto& mapping : enum_names )
Emit("CPP_EnumMapping(%s, \"%s\"),", Fmt(mapping.first), mapping.second); Emit("CPP_EnumMapping(%s, \"%s\", %s),", Fmt(mapping.first), mapping.second, create_if_missing);
EndBlock(true); EndBlock(true);
} }
@ -178,9 +187,15 @@ void CPPCompile::InitializeGlobals() {
Emit("Frame* f__CPP = nullptr;"); Emit("Frame* f__CPP = nullptr;");
NL(); NL();
auto& ofiles = analysis_options.only_files;
for ( const auto& ginit : IDOptInfo::GetGlobalInitExprs() ) { for ( const auto& ginit : IDOptInfo::GetGlobalInitExprs() ) {
auto g = ginit.Id(); auto g = ginit.Id();
if ( pfs->Globals().count(g) == 0 )
if ( ! ofiles.empty() && ! obj_matches_opt_files(g) )
continue;
if ( accessed_globals.count(g) == 0 )
continue; continue;
auto ic = ginit.IC(); auto ic = ginit.IC();

View file

@ -355,10 +355,18 @@ AttrsInfo::AttrsInfo(CPPCompile* _c, const AttributesPtr& _attrs) : CompoundItem
} }
} }
GlobalInitInfo::GlobalInitInfo(CPPCompile* c, const ID* g, string _CPP_name) GlobalLookupInitInfo::GlobalLookupInitInfo(CPPCompile* c, const ID* g, string _CPP_name)
: CPP_InitInfo(g), CPP_name(std::move(_CPP_name)) { : CPP_InitInfo(g), CPP_name(std::move(_CPP_name)) {
Zeek_name = g->Name(); Zeek_name = g->Name();
}
void GlobalLookupInitInfo::InitializerVals(std::vector<std::string>& ivs) const {
ivs.push_back(CPP_name);
ivs.push_back(string("\"") + Zeek_name + "\"");
}
GlobalInitInfo::GlobalInitInfo(CPPCompile* c, const ID* g, string _CPP_name)
: GlobalLookupInitInfo(c, g, std::move(_CPP_name)) {
auto& gt = g->GetType(); auto& gt = g->GetType();
auto gi = c->RegisterType(gt); auto gi = c->RegisterType(gt);
init_cohort = max(init_cohort, gi->InitCohort() + 1); init_cohort = max(init_cohort, gi->InitCohort() + 1);
@ -375,7 +383,7 @@ GlobalInitInfo::GlobalInitInfo(CPPCompile* c, const ID* g, string _CPP_name)
exported = g->IsExport(); exported = g->IsExport();
val = ValElem(c, nullptr); // empty because we initialize dynamically val = ValElem(c, nullptr); // empty because we initialize dynamically
if ( gt->Tag() == TYPE_FUNC && ! g->GetVal() ) if ( gt->Tag() == TYPE_FUNC && (! g->GetVal() || g->GetVal()->AsFunc()->GetKind() == Func::BUILTIN_FUNC) )
// Remember this peculiarity so we can recreate it for // Remember this peculiarity so we can recreate it for
// error-behavior-compatibility. // error-behavior-compatibility.
func_with_no_val = true; func_with_no_val = true;
@ -549,7 +557,7 @@ RecordTypeInfo::RecordTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c
field_types.push_back(r_i->type); field_types.push_back(r_i->type);
if ( r_i->attrs ) { if ( c->TargetingStandalone() && r_i->attrs ) {
gi = c->RegisterAttributes(r_i->attrs); gi = c->RegisterAttributes(r_i->attrs);
final_init_cohort = max(final_init_cohort, gi->InitCohort() + 1); final_init_cohort = max(final_init_cohort, gi->InitCohort() + 1);
field_attrs.push_back(gi->Offset()); field_attrs.push_back(gi->Offset());
@ -576,6 +584,13 @@ void RecordTypeInfo::AddInitializerVals(std::vector<std::string>& ivs) const {
} }
} }
NamedTypeInfo::NamedTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c, std::move(_t)) {}
void NamedTypeInfo::AddInitializerVals(std::vector<std::string>& ivs) const {
ivs.emplace_back(Fmt(NAMED_TYPE_MARKER));
ivs.emplace_back(Fmt(c->TrackString(t->GetName())));
}
void IndicesManager::Generate(CPPCompile* c) { void IndicesManager::Generate(CPPCompile* c) {
c->Emit("int CPP__Indices__init[] ="); c->Emit("int CPP__Indices__init[] =");
c->StartBlock(); c->StartBlock();

View file

@ -478,8 +478,22 @@ public:
AttrsInfo(CPPCompile* c, const AttributesPtr& attrs); AttrsInfo(CPPCompile* c, const AttributesPtr& attrs);
}; };
// Information for initialization a Zeek global. // A lightweight initializer for a Zeek global that will look it up at
class GlobalInitInfo : public CPP_InitInfo { // initialization time but not create it if missing.
class GlobalLookupInitInfo : public CPP_InitInfo {
public:
GlobalLookupInitInfo(CPPCompile* c, const ID* g, std::string CPP_name);
std::string InitializerType() const override { return "CPP_GlobalLookupInit"; }
void InitializerVals(std::vector<std::string>& ivs) const override;
protected:
std::string Zeek_name;
std::string CPP_name;
};
// Information for initializing a Zeek global.
class GlobalInitInfo : public GlobalLookupInitInfo {
public: public:
GlobalInitInfo(CPPCompile* c, const ID* g, std::string CPP_name); GlobalInitInfo(CPPCompile* c, const ID* g, std::string CPP_name);
@ -639,6 +653,14 @@ private:
std::vector<int> field_attrs; std::vector<int> field_attrs;
}; };
// Class for initializing a named Zeek type that should be present at startup.
class NamedTypeInfo : public AbstractTypeInfo {
public:
NamedTypeInfo(CPPCompile* c, TypePtr _t);
void AddInitializerVals(std::vector<std::string>& ivs) const override;
};
// Much of the table-driven initialization is based on vectors of indices, // Much of the table-driven initialization is based on vectors of indices,
// which we represent as vectors of int's, where each int is used to index a // which we represent as vectors of int's, where each int is used to index a
// global C++ vector. This class manages such vectors. In particular, it // global C++ vector. This class manages such vectors. In particular, it

View file

@ -90,7 +90,7 @@ and also any compiled-to-C++ bodies present in the `zeek` binary that
The above workflows require the subsequent `zeek` execution to include The above workflows require the subsequent `zeek` execution to include
the `target.zeek` script. You can avoid this by replacing the first step with: the `target.zeek` script. You can avoid this by replacing the first step with:
1. `./src/zeek -O gen-standalone-C++ target.zeek >target-stand-in.zeek` 1. `./src/zeek -O gen-standalone-C++ --optimize-files=target.zeek target.zeek >target-stand-in.zeek`
(and then building as in the 2nd step above). (and then building as in the 2nd step above).
This option prints to _stdout_ a This option prints to _stdout_ a
@ -100,13 +100,9 @@ without needing to include `target.zeek` in the invocation (nor
the `-O use-C++` option). After loading the stand-in script, the `-O use-C++` option). After loading the stand-in script,
you can still access types and functions declared in `target.zeek`. you can still access types and functions declared in `target.zeek`.
Note: the implementation differences between `gen-C++` and `gen-standalone-C++` Note: `gen-standalone-C++` _must_ be used with `--optimize-files`, as the
wound up being modest enough that it might make sense to just always provide compiler needs the latter to determine which global declarations the
the latter functionality, which it turns out does not introduce any standalone code needs to initialize.
additional constraints compared to the current `gen-C++` functionality.
On the other hand, it's possible (not yet established) that code created
using `gen-C++` can be made to compile significantly faster than
standalone code.
There are additional workflows relating to running the test suite: see There are additional workflows relating to running the test suite: see
`src/script_opt/CPP/maint/README`. `src/script_opt/CPP/maint/README`.

View file

@ -235,7 +235,7 @@ void CPP_TypeInits::PreInit(InitsManager* im, int offset, ValElemVec& init_vals)
if ( tag == TYPE_LIST ) if ( tag == TYPE_LIST )
inits_vec[offset] = make_intrusive<TypeList>(); inits_vec[offset] = make_intrusive<TypeList>();
else if ( tag == TYPE_RECORD ) { else if ( tag == TYPE_RECORD && init_vals[1] != NAMED_TYPE_MARKER ) {
auto name = im->Strings(init_vals[1]); auto name = im->Strings(init_vals[1]);
if ( name[0] ) if ( name[0] )
inits_vec[offset] = get_record_type__CPP(name); inits_vec[offset] = get_record_type__CPP(name);
@ -243,7 +243,7 @@ void CPP_TypeInits::PreInit(InitsManager* im, int offset, ValElemVec& init_vals)
inits_vec[offset] = get_record_type__CPP(nullptr); inits_vec[offset] = get_record_type__CPP(nullptr);
} }
else if ( tag == TYPE_TABLE ) else if ( tag == TYPE_TABLE && init_vals[1] != NAMED_TYPE_MARKER )
inits_vec[offset] = make_intrusive<CPPTableType>(); inits_vec[offset] = make_intrusive<CPPTableType>();
// else no pre-initialization needed // else no pre-initialization needed
@ -251,6 +251,13 @@ void CPP_TypeInits::PreInit(InitsManager* im, int offset, ValElemVec& init_vals)
void CPP_TypeInits::Generate(InitsManager* im, vector<TypePtr>& ivec, int offset, ValElemVec& init_vals) const { void CPP_TypeInits::Generate(InitsManager* im, vector<TypePtr>& ivec, int offset, ValElemVec& init_vals) const {
auto tag = static_cast<TypeTag>(init_vals[0]); auto tag = static_cast<TypeTag>(init_vals[0]);
if ( init_vals.size() > 1 && init_vals[1] == NAMED_TYPE_MARKER ) {
auto name = im->Strings(init_vals[2]);
ivec[offset] = find_global__CPP(name)->GetType();
return;
}
TypePtr t; TypePtr t;
switch ( tag ) { switch ( tag ) {
case TYPE_ADDR: case TYPE_ADDR:
@ -406,6 +413,11 @@ int CPP_FieldMapping::ComputeOffset(InitsManager* im) const {
auto fm_offset = r->FieldOffset(field_name.c_str()); auto fm_offset = r->FieldOffset(field_name.c_str());
if ( fm_offset < 0 ) { // field does not exist, create it if ( fm_offset < 0 ) { // field does not exist, create it
if ( field_type == DO_NOT_CONSTRUCT_VALUE_MARKER ) {
reporter->CPPRuntimeError("record field \"%s\" missing in %s", field_name.c_str(), obj_desc(r).c_str());
exit(1);
}
fm_offset = r->NumFields(); fm_offset = r->NumFields();
auto id = util::copy_string(field_name.c_str(), field_name.size()); auto id = util::copy_string(field_name.c_str(), field_name.size());
@ -429,6 +441,11 @@ int CPP_EnumMapping::ComputeOffset(InitsManager* im) const {
auto em_offset = e->Lookup(e_name); auto em_offset = e->Lookup(e_name);
if ( em_offset < 0 ) { // enum constant does not exist, create it if ( em_offset < 0 ) { // enum constant does not exist, create it
if ( ! construct_if_missing ) {
reporter->CPPRuntimeError("enum element \"%s\" missing in %s", e_name.c_str(), obj_desc(e).c_str());
exit(1);
}
em_offset = e->Names().size(); em_offset = e->Names().size();
if ( e->Lookup(em_offset) ) if ( e->Lookup(em_offset) )
reporter->InternalError("enum inconsistency while initializing compiled scripts"); reporter->InternalError("enum inconsistency while initializing compiled scripts");
@ -438,6 +455,10 @@ int CPP_EnumMapping::ComputeOffset(InitsManager* im) const {
return em_offset; return em_offset;
} }
void CPP_GlobalLookupInit::Generate(InitsManager* im, std::vector<void*>& /* inits_vec */, int /* offset */) const {
global = find_global__CPP(name);
}
void CPP_GlobalInit::Generate(InitsManager* im, std::vector<void*>& /* inits_vec */, int /* offset */) const { void CPP_GlobalInit::Generate(InitsManager* im, std::vector<void*>& /* inits_vec */, int /* offset */) const {
auto& t = im->Types(type); auto& t = im->Types(type);
global = lookup_global__CPP(name, t, exported); global = lookup_global__CPP(name, t, exported);

View file

@ -41,6 +41,14 @@ extern std::vector<std::vector<std::vector<int>>> generate_indices_set(int* init
#define END_OF_VEC_VEC -100 #define END_OF_VEC_VEC -100
#define END_OF_VEC_VEC_VEC -200 #define END_OF_VEC_VEC_VEC -200
// A marker value for "named" types (those that are simply looked up by
// name at initialization time).
#define NAMED_TYPE_MARKER -300
// A marker value indicating values that should not be constructed if not
// already present.
#define DO_NOT_CONSTRUCT_VALUE_MARKER -400
// An abstract helper class used to access elements of an initialization vector. // An abstract helper class used to access elements of an initialization vector.
// We need the abstraction because InitsManager below needs to be able to refer // We need the abstraction because InitsManager below needs to be able to refer
// to any of a range of templated classes. // to any of a range of templated classes.
@ -369,8 +377,19 @@ public:
} }
}; };
// Class for initializing a Zeek global. These don't go into an initialization // Classes for initializing Zeek globals. These don't go into an initialization
// vector, so we use void* as the underlying type. // vector, so we use void* as the underlying type.
class CPP_GlobalLookupInit : public CPP_Init<void*> {
public:
CPP_GlobalLookupInit(IDPtr& _global, const char* _name) : CPP_Init<void*>(), global(_global), name(_name) {}
void Generate(InitsManager* im, std::vector<void*>& /* inits_vec */, int /* offset */) const override;
protected:
IDPtr& global;
const char* name;
};
class CPP_GlobalInit : public CPP_Init<void*> { class CPP_GlobalInit : public CPP_Init<void*> {
public: public:
CPP_GlobalInit(IDPtr& _global, const char* _name, int _type, int _attrs, int _val, bool _exported, CPP_GlobalInit(IDPtr& _global, const char* _name, int _type, int _attrs, int _val, bool _exported,
@ -463,7 +482,11 @@ public:
private: private:
int rec; // index to retrieve the record's type int rec; // index to retrieve the record's type
std::string field_name; // which field this offset pertains to std::string field_name; // which field this offset pertains to
int field_type; // the field's type, in case we have to construct it
// The field's type, in case we have to construct it. If
// DO_NOT_CONSTRUCT_VALUE_MARKER then it's instead an error
// if missing.
int field_type;
int field_attrs; // the same for the field's attributes int field_attrs; // the same for the field's attributes
}; };
@ -473,13 +496,15 @@ private:
// the enum). // the enum).
class CPP_EnumMapping { class CPP_EnumMapping {
public: public:
CPP_EnumMapping(int _e_type, std::string _e_name) : e_type(_e_type), e_name(std::move(_e_name)) {} CPP_EnumMapping(int _e_type, std::string _e_name, bool _construct_if_missing)
: e_type(_e_type), e_name(std::move(_e_name)), construct_if_missing(_construct_if_missing) {}
int ComputeOffset(InitsManager* im) const; int ComputeOffset(InitsManager* im) const;
private: private:
int e_type; // index to EnumType int e_type; // index to EnumType
std::string e_name; // which enum constant for that type std::string e_name; // which enum constant for that type
bool construct_if_missing; // if true, construct constant if not present
}; };
// Looks up a BiF of the given name, making it available to compiled // Looks up a BiF of the given name, making it available to compiled

View file

@ -191,6 +191,7 @@ shared_ptr<CPP_InitInfo> CPPCompile::RegisterType(const TypePtr& tp) {
shared_ptr<CPP_InitInfo> gi; shared_ptr<CPP_InitInfo> gi;
if ( standalone || t->GetName().empty() ) {
switch ( t->Tag() ) { switch ( t->Tag() ) {
case TYPE_ADDR: case TYPE_ADDR:
case TYPE_ANY: case TYPE_ANY:
@ -226,6 +227,9 @@ shared_ptr<CPP_InitInfo> CPPCompile::RegisterType(const TypePtr& tp) {
default: reporter->InternalError("bad type in CPPCompile::RegisterType"); default: reporter->InternalError("bad type in CPPCompile::RegisterType");
} }
}
else
gi = make_shared<NamedTypeInfo>(this, tp);
type_info->AddInstance(gi); type_info->AddInstance(gi);
processed_types[t] = gi; processed_types[t] = gi;

View file

@ -11,7 +11,7 @@ void CPPCompile::CreateGlobal(const ID* g) {
auto gn = string(g->Name()); auto gn = string(g->Name());
bool is_bif = pfs->BiFGlobals().count(g) > 0; bool is_bif = pfs->BiFGlobals().count(g) > 0;
if ( pfs->Globals().count(g) == 0 ) { if ( accessed_globals.count(g) == 0 ) {
// Only used in the context of calls. If it's compilable, // Only used in the context of calls. If it's compilable,
// then we'll call it directly. // then we'll call it directly.
if ( compilable_funcs.count(gn) > 0 ) { if ( compilable_funcs.count(gn) > 0 ) {
@ -28,11 +28,16 @@ void CPPCompile::CreateGlobal(const ID* g) {
if ( AddGlobal(gn, "gl") ) { // We'll be creating this global. if ( AddGlobal(gn, "gl") ) { // We'll be creating this global.
Emit("IDPtr %s;", globals[gn]); Emit("IDPtr %s;", globals[gn]);
if ( pfs->Events().count(gn) > 0 ) if ( accessed_events.count(gn) > 0 )
// This is an event that's also used as a variable. // This is an event that's also used as a variable.
Emit("EventHandlerPtr %s_ev;", globals[gn]); Emit("EventHandlerPtr %s_ev;", globals[gn]);
auto gi = make_shared<GlobalInitInfo>(this, g, globals[gn]); shared_ptr<CPP_InitInfo> gi;
if ( standalone )
gi = make_shared<GlobalInitInfo>(this, g, globals[gn]);
else
gi = make_shared<GlobalLookupInitInfo>(this, g, globals[gn]);
global_id_info->AddInstance(gi); global_id_info->AddInstance(gi);
global_gis[g] = gi; global_gis[g] = gi;
} }
@ -64,7 +69,12 @@ std::shared_ptr<CPP_InitInfo> CPPCompile::RegisterGlobal(const ID* g) {
return gg->second; return gg->second;
} }
auto gi = make_shared<GlobalInitInfo>(this, g, globals[gn]); shared_ptr<CPP_InitInfo> gi;
if ( standalone )
gi = make_shared<GlobalInitInfo>(this, g, globals[gn]);
else
gi = make_shared<GlobalLookupInitInfo>(this, g, globals[gn]);
global_id_info->AddInstance(gi); global_id_info->AddInstance(gi);
global_gis[g] = gi; global_gis[g] = gi;

View file

@ -109,13 +109,25 @@ bool should_analyze(const ScriptFuncPtr& f, const StmtPtr& body) {
if ( ofiles.empty() && ofuncs.empty() ) if ( ofiles.empty() && ofuncs.empty() )
return true; return true;
if ( obj_matches_opt_files(body.get()) )
return true;
const auto& fun = f->GetName(); const auto& fun = f->GetName();
for ( auto& o : ofuncs ) for ( auto& o : ofuncs )
if ( std::regex_match(fun, o) ) if ( std::regex_match(fun, o) )
return true; return true;
auto fin = util::detail::normalize_path(body->GetLocationInfo()->filename); return false;
}
bool obj_matches_opt_files(const Obj* obj) {
auto& ofiles = analysis_options.only_files;
if ( ofiles.empty() )
return false;
auto fin = util::detail::normalize_path(obj->GetLocationInfo()->filename);
for ( auto& o : ofiles ) for ( auto& o : ofiles )
if ( std::regex_match(fin, o) ) if ( std::regex_match(fin, o) )
@ -285,8 +297,12 @@ static void init_options() {
check_env_opt("ZEEK_USE_CPP", analysis_options.use_CPP); check_env_opt("ZEEK_USE_CPP", analysis_options.use_CPP);
check_env_opt("ZEEK_ALLOW_COND", analysis_options.allow_cond); check_env_opt("ZEEK_ALLOW_COND", analysis_options.allow_cond);
if ( analysis_options.gen_standalone_CPP ) if ( analysis_options.gen_standalone_CPP ) {
if ( analysis_options.only_files.empty() )
reporter->FatalError("-O gen-standalone-C++ requires use of --optimize-files");
analysis_options.gen_CPP = true; analysis_options.gen_CPP = true;
}
if ( analysis_options.gen_CPP ) if ( analysis_options.gen_CPP )
generating_CPP = true; generating_CPP = true;

View file

@ -247,6 +247,10 @@ extern void add_file_analysis_pattern(AnalyOpt& opts, const char* pat);
// it should be skipped. // it should be skipped.
extern bool should_analyze(const ScriptFuncPtr& f, const StmtPtr& body); extern bool should_analyze(const ScriptFuncPtr& f, const StmtPtr& body);
// True if the given object's location matches one specified by
// --optimize-files=...
extern bool obj_matches_opt_files(const Obj* obj);
// Analyze all of the parsed scripts collectively for usage issues (unless // Analyze all of the parsed scripts collectively for usage issues (unless
// suppressed by the flag) and optimization. // suppressed by the flag) and optimization.
extern void analyze_scripts(bool no_unused_warnings); extern void analyze_scripts(bool no_unused_warnings);