From 2e69a8870ae7686aadef8600810f7131c97d1285 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 6 Dec 2024 16:25:22 -0800 Subject: [PATCH] introduced simplified initialization for non-standalone -O gen-C++ code tied -O gen-standalone-C++ to use of --optimize-files --- src/script_opt/CPP/Driver.h | 5 +++ src/script_opt/CPP/Inits.cc | 23 ++++++++++--- src/script_opt/CPP/InitsInfo.cc | 21 ++++++++++-- src/script_opt/CPP/InitsInfo.h | 26 ++++++++++++-- src/script_opt/CPP/README.md | 12 +++---- src/script_opt/CPP/RuntimeInits.cc | 25 ++++++++++++-- src/script_opt/CPP/RuntimeInits.h | 37 ++++++++++++++++---- src/script_opt/CPP/Types.cc | 54 ++++++++++++++++-------------- src/script_opt/CPP/Vars.cc | 18 +++++++--- src/script_opt/ScriptOpt.cc | 20 +++++++++-- src/script_opt/ScriptOpt.h | 4 +++ 11 files changed, 189 insertions(+), 56 deletions(-) diff --git a/src/script_opt/CPP/Driver.h b/src/script_opt/CPP/Driver.h index e2a097ed2e..f4f8e123d8 100644 --- a/src/script_opt/CPP/Driver.h +++ b/src/script_opt/CPP/Driver.h @@ -60,6 +60,11 @@ void GenFinishInit(); // Generate the function that registers compiled script bodies. void GenRegisterBodies(); +public: +// Whether we're generating "standalone" code. +bool TargetingStandalone() const { return standalone; } + +private: // True if the given function (plus body and profile) is one that should be // compiled. If non-nil, sets reason to the the reason why, if there's a // fundamental problem. If however the function should be skipped for other diff --git a/src/script_opt/CPP/Inits.cc b/src/script_opt/CPP/Inits.cc index 0906d1c55b..54b40b161c 100644 --- a/src/script_opt/CPP/Inits.cc +++ b/src/script_opt/CPP/Inits.cc @@ -104,11 +104,18 @@ void CPPCompile::InitializeFieldMappings() { StartBlock(); + string type_arg, attrs_arg; + if ( ! standalone ) + type_arg = attrs_arg = "DO_NOT_CONSTRUCT_VALUE_MARKER"; + for ( const auto& mapping : field_decls ) { auto rt_arg = Fmt(mapping.first); auto td = mapping.second; - auto type_arg = Fmt(TypeOffset(td->type)); - auto attrs_arg = Fmt(AttributesOffset(td->attrs)); + + if ( standalone ) { + type_arg = Fmt(TypeOffset(td->type)); + attrs_arg = Fmt(AttributesOffset(td->attrs)); + } Emit("CPP_FieldMapping(%s, \"%s\", %s, %s),", rt_arg, td->id, type_arg, attrs_arg); } @@ -121,8 +128,10 @@ void CPPCompile::InitializeEnumMappings() { StartBlock(); + auto create_if_missing = standalone ? "true" : "false"; + for ( const auto& mapping : enum_names ) - Emit("CPP_EnumMapping(%s, \"%s\"),", Fmt(mapping.first), mapping.second); + Emit("CPP_EnumMapping(%s, \"%s\", %s),", Fmt(mapping.first), mapping.second, create_if_missing); EndBlock(true); } @@ -178,9 +187,15 @@ void CPPCompile::InitializeGlobals() { Emit("Frame* f__CPP = nullptr;"); NL(); + auto& ofiles = analysis_options.only_files; + for ( const auto& ginit : IDOptInfo::GetGlobalInitExprs() ) { auto g = ginit.Id(); - if ( pfs->Globals().count(g) == 0 ) + + if ( ! ofiles.empty() && ! obj_matches_opt_files(g) ) + continue; + + if ( accessed_globals.count(g) == 0 ) continue; auto ic = ginit.IC(); diff --git a/src/script_opt/CPP/InitsInfo.cc b/src/script_opt/CPP/InitsInfo.cc index 6006b7bda0..ba8e3cd674 100644 --- a/src/script_opt/CPP/InitsInfo.cc +++ b/src/script_opt/CPP/InitsInfo.cc @@ -355,10 +355,18 @@ AttrsInfo::AttrsInfo(CPPCompile* _c, const AttributesPtr& _attrs) : CompoundItem } } -GlobalInitInfo::GlobalInitInfo(CPPCompile* c, const ID* g, string _CPP_name) +GlobalLookupInitInfo::GlobalLookupInitInfo(CPPCompile* c, const ID* g, string _CPP_name) : CPP_InitInfo(g), CPP_name(std::move(_CPP_name)) { Zeek_name = g->Name(); +} +void GlobalLookupInitInfo::InitializerVals(std::vector& ivs) const { + ivs.push_back(CPP_name); + ivs.push_back(string("\"") + Zeek_name + "\""); +} + +GlobalInitInfo::GlobalInitInfo(CPPCompile* c, const ID* g, string _CPP_name) + : GlobalLookupInitInfo(c, g, std::move(_CPP_name)) { auto& gt = g->GetType(); auto gi = c->RegisterType(gt); init_cohort = max(init_cohort, gi->InitCohort() + 1); @@ -375,7 +383,7 @@ GlobalInitInfo::GlobalInitInfo(CPPCompile* c, const ID* g, string _CPP_name) exported = g->IsExport(); val = ValElem(c, nullptr); // empty because we initialize dynamically - if ( gt->Tag() == TYPE_FUNC && ! g->GetVal() ) + if ( gt->Tag() == TYPE_FUNC && (! g->GetVal() || g->GetVal()->AsFunc()->GetKind() == Func::BUILTIN_FUNC) ) // Remember this peculiarity so we can recreate it for // error-behavior-compatibility. func_with_no_val = true; @@ -549,7 +557,7 @@ RecordTypeInfo::RecordTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c field_types.push_back(r_i->type); - if ( r_i->attrs ) { + if ( c->TargetingStandalone() && r_i->attrs ) { gi = c->RegisterAttributes(r_i->attrs); final_init_cohort = max(final_init_cohort, gi->InitCohort() + 1); field_attrs.push_back(gi->Offset()); @@ -576,6 +584,13 @@ void RecordTypeInfo::AddInitializerVals(std::vector& ivs) const { } } +NamedTypeInfo::NamedTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c, std::move(_t)) {} + +void NamedTypeInfo::AddInitializerVals(std::vector& ivs) const { + ivs.emplace_back(Fmt(NAMED_TYPE_MARKER)); + ivs.emplace_back(Fmt(c->TrackString(t->GetName()))); +} + void IndicesManager::Generate(CPPCompile* c) { c->Emit("int CPP__Indices__init[] ="); c->StartBlock(); diff --git a/src/script_opt/CPP/InitsInfo.h b/src/script_opt/CPP/InitsInfo.h index a9ae9ed37b..3432871d67 100644 --- a/src/script_opt/CPP/InitsInfo.h +++ b/src/script_opt/CPP/InitsInfo.h @@ -478,8 +478,22 @@ public: AttrsInfo(CPPCompile* c, const AttributesPtr& attrs); }; -// Information for initialization a Zeek global. -class GlobalInitInfo : public CPP_InitInfo { +// A lightweight initializer for a Zeek global that will look it up at +// initialization time but not create it if missing. +class GlobalLookupInitInfo : public CPP_InitInfo { +public: + GlobalLookupInitInfo(CPPCompile* c, const ID* g, std::string CPP_name); + + std::string InitializerType() const override { return "CPP_GlobalLookupInit"; } + void InitializerVals(std::vector& ivs) const override; + +protected: + std::string Zeek_name; + std::string CPP_name; +}; + +// Information for initializing a Zeek global. +class GlobalInitInfo : public GlobalLookupInitInfo { public: GlobalInitInfo(CPPCompile* c, const ID* g, std::string CPP_name); @@ -639,6 +653,14 @@ private: std::vector field_attrs; }; +// Class for initializing a named Zeek type that should be present at startup. +class NamedTypeInfo : public AbstractTypeInfo { +public: + NamedTypeInfo(CPPCompile* c, TypePtr _t); + + void AddInitializerVals(std::vector& ivs) const override; +}; + // Much of the table-driven initialization is based on vectors of indices, // which we represent as vectors of int's, where each int is used to index a // global C++ vector. This class manages such vectors. In particular, it diff --git a/src/script_opt/CPP/README.md b/src/script_opt/CPP/README.md index 4b3ed6b10f..ba95fc6980 100644 --- a/src/script_opt/CPP/README.md +++ b/src/script_opt/CPP/README.md @@ -90,7 +90,7 @@ and also any compiled-to-C++ bodies present in the `zeek` binary that The above workflows require the subsequent `zeek` execution to include the `target.zeek` script. You can avoid this by replacing the first step with: -1. `./src/zeek -O gen-standalone-C++ target.zeek >target-stand-in.zeek` +1. `./src/zeek -O gen-standalone-C++ --optimize-files=target.zeek target.zeek >target-stand-in.zeek` (and then building as in the 2nd step above). This option prints to _stdout_ a @@ -100,13 +100,9 @@ without needing to include `target.zeek` in the invocation (nor the `-O use-C++` option). After loading the stand-in script, you can still access types and functions declared in `target.zeek`. -Note: the implementation differences between `gen-C++` and `gen-standalone-C++` -wound up being modest enough that it might make sense to just always provide -the latter functionality, which it turns out does not introduce any -additional constraints compared to the current `gen-C++` functionality. -On the other hand, it's possible (not yet established) that code created -using `gen-C++` can be made to compile significantly faster than -standalone code. +Note: `gen-standalone-C++` _must_ be used with `--optimize-files`, as the +compiler needs the latter to determine which global declarations the +standalone code needs to initialize. There are additional workflows relating to running the test suite: see `src/script_opt/CPP/maint/README`. diff --git a/src/script_opt/CPP/RuntimeInits.cc b/src/script_opt/CPP/RuntimeInits.cc index d8b73626c5..2918a8b30a 100644 --- a/src/script_opt/CPP/RuntimeInits.cc +++ b/src/script_opt/CPP/RuntimeInits.cc @@ -235,7 +235,7 @@ void CPP_TypeInits::PreInit(InitsManager* im, int offset, ValElemVec& init_vals) if ( tag == TYPE_LIST ) inits_vec[offset] = make_intrusive(); - else if ( tag == TYPE_RECORD ) { + else if ( tag == TYPE_RECORD && init_vals[1] != NAMED_TYPE_MARKER ) { auto name = im->Strings(init_vals[1]); if ( name[0] ) inits_vec[offset] = get_record_type__CPP(name); @@ -243,7 +243,7 @@ void CPP_TypeInits::PreInit(InitsManager* im, int offset, ValElemVec& init_vals) inits_vec[offset] = get_record_type__CPP(nullptr); } - else if ( tag == TYPE_TABLE ) + else if ( tag == TYPE_TABLE && init_vals[1] != NAMED_TYPE_MARKER ) inits_vec[offset] = make_intrusive(); // else no pre-initialization needed @@ -251,6 +251,13 @@ void CPP_TypeInits::PreInit(InitsManager* im, int offset, ValElemVec& init_vals) void CPP_TypeInits::Generate(InitsManager* im, vector& ivec, int offset, ValElemVec& init_vals) const { auto tag = static_cast(init_vals[0]); + + if ( init_vals.size() > 1 && init_vals[1] == NAMED_TYPE_MARKER ) { + auto name = im->Strings(init_vals[2]); + ivec[offset] = find_global__CPP(name)->GetType(); + return; + } + TypePtr t; switch ( tag ) { case TYPE_ADDR: @@ -406,6 +413,11 @@ int CPP_FieldMapping::ComputeOffset(InitsManager* im) const { auto fm_offset = r->FieldOffset(field_name.c_str()); if ( fm_offset < 0 ) { // field does not exist, create it + if ( field_type == DO_NOT_CONSTRUCT_VALUE_MARKER ) { + reporter->CPPRuntimeError("record field \"%s\" missing in %s", field_name.c_str(), obj_desc(r).c_str()); + exit(1); + } + fm_offset = r->NumFields(); auto id = util::copy_string(field_name.c_str(), field_name.size()); @@ -429,6 +441,11 @@ int CPP_EnumMapping::ComputeOffset(InitsManager* im) const { auto em_offset = e->Lookup(e_name); if ( em_offset < 0 ) { // enum constant does not exist, create it + if ( ! construct_if_missing ) { + reporter->CPPRuntimeError("enum element \"%s\" missing in %s", e_name.c_str(), obj_desc(e).c_str()); + exit(1); + } + em_offset = e->Names().size(); if ( e->Lookup(em_offset) ) reporter->InternalError("enum inconsistency while initializing compiled scripts"); @@ -438,6 +455,10 @@ int CPP_EnumMapping::ComputeOffset(InitsManager* im) const { return em_offset; } +void CPP_GlobalLookupInit::Generate(InitsManager* im, std::vector& /* inits_vec */, int /* offset */) const { + global = find_global__CPP(name); +} + void CPP_GlobalInit::Generate(InitsManager* im, std::vector& /* inits_vec */, int /* offset */) const { auto& t = im->Types(type); global = lookup_global__CPP(name, t, exported); diff --git a/src/script_opt/CPP/RuntimeInits.h b/src/script_opt/CPP/RuntimeInits.h index 268d2ca250..ddd8cf578e 100644 --- a/src/script_opt/CPP/RuntimeInits.h +++ b/src/script_opt/CPP/RuntimeInits.h @@ -41,6 +41,14 @@ extern std::vector>> generate_indices_set(int* init #define END_OF_VEC_VEC -100 #define END_OF_VEC_VEC_VEC -200 +// A marker value for "named" types (those that are simply looked up by +// name at initialization time). +#define NAMED_TYPE_MARKER -300 + +// A marker value indicating values that should not be constructed if not +// already present. +#define DO_NOT_CONSTRUCT_VALUE_MARKER -400 + // An abstract helper class used to access elements of an initialization vector. // We need the abstraction because InitsManager below needs to be able to refer // to any of a range of templated classes. @@ -369,8 +377,19 @@ public: } }; -// Class for initializing a Zeek global. These don't go into an initialization +// Classes for initializing Zeek globals. These don't go into an initialization // vector, so we use void* as the underlying type. +class CPP_GlobalLookupInit : public CPP_Init { +public: + CPP_GlobalLookupInit(IDPtr& _global, const char* _name) : CPP_Init(), global(_global), name(_name) {} + + void Generate(InitsManager* im, std::vector& /* inits_vec */, int /* offset */) const override; + +protected: + IDPtr& global; + const char* name; +}; + class CPP_GlobalInit : public CPP_Init { public: CPP_GlobalInit(IDPtr& _global, const char* _name, int _type, int _attrs, int _val, bool _exported, @@ -463,8 +482,12 @@ public: private: int rec; // index to retrieve the record's type std::string field_name; // which field this offset pertains to - int field_type; // the field's type, in case we have to construct it - int field_attrs; // the same for the field's attributes + + // The field's type, in case we have to construct it. If + // DO_NOT_CONSTRUCT_VALUE_MARKER then it's instead an error + // if missing. + int field_type; + int field_attrs; // the same for the field's attributes }; // Constructs at run-time a mapping between abstract enum values used when @@ -473,13 +496,15 @@ private: // the enum). class CPP_EnumMapping { public: - CPP_EnumMapping(int _e_type, std::string _e_name) : e_type(_e_type), e_name(std::move(_e_name)) {} + CPP_EnumMapping(int _e_type, std::string _e_name, bool _construct_if_missing) + : e_type(_e_type), e_name(std::move(_e_name)), construct_if_missing(_construct_if_missing) {} int ComputeOffset(InitsManager* im) const; private: - int e_type; // index to EnumType - std::string e_name; // which enum constant for that type + int e_type; // index to EnumType + std::string e_name; // which enum constant for that type + bool construct_if_missing; // if true, construct constant if not present }; // Looks up a BiF of the given name, making it available to compiled diff --git a/src/script_opt/CPP/Types.cc b/src/script_opt/CPP/Types.cc index f97987a9e9..59aab07a04 100644 --- a/src/script_opt/CPP/Types.cc +++ b/src/script_opt/CPP/Types.cc @@ -191,41 +191,45 @@ shared_ptr CPPCompile::RegisterType(const TypePtr& tp) { shared_ptr gi; - switch ( t->Tag() ) { - case TYPE_ADDR: - case TYPE_ANY: - case TYPE_BOOL: - case TYPE_COUNT: - case TYPE_DOUBLE: - case TYPE_ERROR: - case TYPE_INT: - case TYPE_INTERVAL: - case TYPE_PATTERN: - case TYPE_PORT: - case TYPE_STRING: - case TYPE_TIME: - case TYPE_VOID: - case TYPE_SUBNET: - case TYPE_FILE: gi = make_shared(this, tp); break; + if ( standalone || t->GetName().empty() ) { + switch ( t->Tag() ) { + case TYPE_ADDR: + case TYPE_ANY: + case TYPE_BOOL: + case TYPE_COUNT: + case TYPE_DOUBLE: + case TYPE_ERROR: + case TYPE_INT: + case TYPE_INTERVAL: + case TYPE_PATTERN: + case TYPE_PORT: + case TYPE_STRING: + case TYPE_TIME: + case TYPE_VOID: + case TYPE_SUBNET: + case TYPE_FILE: gi = make_shared(this, tp); break; - case TYPE_ENUM: gi = make_shared(this, tp); break; + case TYPE_ENUM: gi = make_shared(this, tp); break; - case TYPE_OPAQUE: gi = make_shared(this, tp); break; + case TYPE_OPAQUE: gi = make_shared(this, tp); break; - case TYPE_TYPE: gi = make_shared(this, tp); break; + case TYPE_TYPE: gi = make_shared(this, tp); break; - case TYPE_VECTOR: gi = make_shared(this, tp); break; + case TYPE_VECTOR: gi = make_shared(this, tp); break; - case TYPE_LIST: gi = make_shared(this, tp); break; + case TYPE_LIST: gi = make_shared(this, tp); break; - case TYPE_TABLE: gi = make_shared(this, tp); break; + case TYPE_TABLE: gi = make_shared(this, tp); break; - case TYPE_RECORD: gi = make_shared(this, tp); break; + case TYPE_RECORD: gi = make_shared(this, tp); break; - case TYPE_FUNC: gi = make_shared(this, tp); break; + case TYPE_FUNC: gi = make_shared(this, tp); break; - default: reporter->InternalError("bad type in CPPCompile::RegisterType"); + default: reporter->InternalError("bad type in CPPCompile::RegisterType"); + } } + else + gi = make_shared(this, tp); type_info->AddInstance(gi); processed_types[t] = gi; diff --git a/src/script_opt/CPP/Vars.cc b/src/script_opt/CPP/Vars.cc index 38ec046aa9..34a037c056 100644 --- a/src/script_opt/CPP/Vars.cc +++ b/src/script_opt/CPP/Vars.cc @@ -11,7 +11,7 @@ void CPPCompile::CreateGlobal(const ID* g) { auto gn = string(g->Name()); bool is_bif = pfs->BiFGlobals().count(g) > 0; - if ( pfs->Globals().count(g) == 0 ) { + if ( accessed_globals.count(g) == 0 ) { // Only used in the context of calls. If it's compilable, // then we'll call it directly. if ( compilable_funcs.count(gn) > 0 ) { @@ -28,11 +28,16 @@ void CPPCompile::CreateGlobal(const ID* g) { if ( AddGlobal(gn, "gl") ) { // We'll be creating this global. Emit("IDPtr %s;", globals[gn]); - if ( pfs->Events().count(gn) > 0 ) + if ( accessed_events.count(gn) > 0 ) // This is an event that's also used as a variable. Emit("EventHandlerPtr %s_ev;", globals[gn]); - auto gi = make_shared(this, g, globals[gn]); + shared_ptr gi; + if ( standalone ) + gi = make_shared(this, g, globals[gn]); + else + gi = make_shared(this, g, globals[gn]); + global_id_info->AddInstance(gi); global_gis[g] = gi; } @@ -64,7 +69,12 @@ std::shared_ptr CPPCompile::RegisterGlobal(const ID* g) { return gg->second; } - auto gi = make_shared(this, g, globals[gn]); + shared_ptr gi; + if ( standalone ) + gi = make_shared(this, g, globals[gn]); + else + gi = make_shared(this, g, globals[gn]); + global_id_info->AddInstance(gi); global_gis[g] = gi; diff --git a/src/script_opt/ScriptOpt.cc b/src/script_opt/ScriptOpt.cc index 36f041fe62..08b32b71e1 100644 --- a/src/script_opt/ScriptOpt.cc +++ b/src/script_opt/ScriptOpt.cc @@ -109,13 +109,25 @@ bool should_analyze(const ScriptFuncPtr& f, const StmtPtr& body) { if ( ofiles.empty() && ofuncs.empty() ) return true; + if ( obj_matches_opt_files(body.get()) ) + return true; + const auto& fun = f->GetName(); for ( auto& o : ofuncs ) if ( std::regex_match(fun, o) ) return true; - auto fin = util::detail::normalize_path(body->GetLocationInfo()->filename); + return false; +} + +bool obj_matches_opt_files(const Obj* obj) { + auto& ofiles = analysis_options.only_files; + + if ( ofiles.empty() ) + return false; + + auto fin = util::detail::normalize_path(obj->GetLocationInfo()->filename); for ( auto& o : ofiles ) if ( std::regex_match(fin, o) ) @@ -285,8 +297,12 @@ static void init_options() { check_env_opt("ZEEK_USE_CPP", analysis_options.use_CPP); check_env_opt("ZEEK_ALLOW_COND", analysis_options.allow_cond); - if ( analysis_options.gen_standalone_CPP ) + if ( analysis_options.gen_standalone_CPP ) { + if ( analysis_options.only_files.empty() ) + reporter->FatalError("-O gen-standalone-C++ requires use of --optimize-files"); + analysis_options.gen_CPP = true; + } if ( analysis_options.gen_CPP ) generating_CPP = true; diff --git a/src/script_opt/ScriptOpt.h b/src/script_opt/ScriptOpt.h index 441c8d6ec2..c595a71cc6 100644 --- a/src/script_opt/ScriptOpt.h +++ b/src/script_opt/ScriptOpt.h @@ -247,6 +247,10 @@ extern void add_file_analysis_pattern(AnalyOpt& opts, const char* pat); // it should be skipped. extern bool should_analyze(const ScriptFuncPtr& f, const StmtPtr& body); +// True if the given object's location matches one specified by +// --optimize-files=... +extern bool obj_matches_opt_files(const Obj* obj); + // Analyze all of the parsed scripts collectively for usage issues (unless // suppressed by the flag) and optimization. extern void analyze_scripts(bool no_unused_warnings);