From 18f4fcb5a441271804a5270f7ed100c81b67f917 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 12 Jan 2023 14:08:45 -0800 Subject: [PATCH] Maintenance updates for -O gen-C++ / -O gen-standalone-C++ fixes for using BiFs in standalone global initializations avoiding redundant global initializations updates to maintenance scripts and notes removal of an unused member variable --- src/script_opt/CPP/Compile.h | 5 ---- src/script_opt/CPP/Driver.cc | 13 ++++----- src/script_opt/CPP/Inits.cc | 1 - src/script_opt/CPP/InitsInfo.cc | 6 ++++ src/script_opt/CPP/InitsInfo.h | 1 + src/script_opt/CPP/RuntimeInitSupport.cc | 2 +- src/script_opt/CPP/RuntimeInits.cc | 28 +++++++++++++++++-- src/script_opt/CPP/RuntimeInits.h | 15 ++++++++-- src/script_opt/CPP/RuntimeOps.h | 3 +- src/script_opt/CPP/Vars.cc | 35 ++++++++++++++---------- src/script_opt/CPP/maint/README | 4 ++- src/script_opt/CPP/maint/do-CPP-btest.sh | 2 +- 12 files changed, 77 insertions(+), 38 deletions(-) diff --git a/src/script_opt/CPP/Compile.h b/src/script_opt/CPP/Compile.h index 6deccf7f10..ceba063daf 100644 --- a/src/script_opt/CPP/Compile.h +++ b/src/script_opt/CPP/Compile.h @@ -345,11 +345,6 @@ private: // Maps functions (not hooks or events) to upstream compiled names. std::unordered_map hashed_funcs; - // Tracks all of the module names used in activate_bodies__CPP() - // calls, to ensure all of the global names of compiled-to-standalone - // functions are available to subsequent scripts. - std::unordered_set module_names; - // If non-zero, provides a tag used for auxiliary/additional // compilation units. int addl_tag = 0; diff --git a/src/script_opt/CPP/Driver.cc b/src/script_opt/CPP/Driver.cc index 3c57192f4e..a6dd17ab7c 100644 --- a/src/script_opt/CPP/Driver.cc +++ b/src/script_opt/CPP/Driver.cc @@ -495,17 +495,14 @@ void CPPCompile::GenFinishInit() NL(); + Emit("load_BiFs__CPP();"); + if ( standalone ) - // BiFs need to be loaded later, because the main - // initialization finishes upon loading of the activation + // Note, BiFs will also be loaded again later, because the + // main initialization finishes upon loading of the activation // script, rather than after all scripts have been parsed - // and BiFs have been loaded. + // and plugins (with BiFs) have been loaded. Emit("init_globals__CPP();"); - else - // For non-standalone, we're only initializing after all - // scripts have been parsed and BiFs loaded, so it's fine - // to go ahead and do so now. - Emit("load_BiFs__CPP();"); EndBlock(); } diff --git a/src/script_opt/CPP/Inits.cc b/src/script_opt/CPP/Inits.cc index 79228db09f..de7ddd0ff2 100644 --- a/src/script_opt/CPP/Inits.cc +++ b/src/script_opt/CPP/Inits.cc @@ -313,7 +313,6 @@ void CPPCompile::GenStandaloneActivation() auto var = extract_var_name(fn); auto mod = extract_module_name(fn); - module_names.insert(mod); auto fid = lookup_ID(var.c_str(), mod.c_str(), false, true, false); if ( ! fid ) diff --git a/src/script_opt/CPP/InitsInfo.cc b/src/script_opt/CPP/InitsInfo.cc index 16152a0f4b..26d2c4b90a 100644 --- a/src/script_opt/CPP/InitsInfo.cc +++ b/src/script_opt/CPP/InitsInfo.cc @@ -350,6 +350,11 @@ GlobalInitInfo::GlobalInitInfo(CPPCompile* c, const ID* g, string _CPP_name) exported = g->IsExport(); val = ValElem(c, nullptr); // empty because we initialize dynamically + + if ( gt->Tag() == TYPE_FUNC && ! g->GetVal() ) + // Remember this peculiarity so we can recreate it for + // error-behavior-compatibility. + func_with_no_val = true; } void GlobalInitInfo::InitializerVals(std::vector& ivs) const @@ -360,6 +365,7 @@ void GlobalInitInfo::InitializerVals(std::vector& ivs) const ivs.push_back(Fmt(attrs)); ivs.push_back(val); ivs.push_back(Fmt(exported)); + ivs.push_back(Fmt(func_with_no_val)); } CallExprInitInfo::CallExprInitInfo(CPPCompile* c, ExprPtr _e, string _e_name, string _wrapper_class) diff --git a/src/script_opt/CPP/InitsInfo.h b/src/script_opt/CPP/InitsInfo.h index b9c15c1951..5d8b3d5dab 100644 --- a/src/script_opt/CPP/InitsInfo.h +++ b/src/script_opt/CPP/InitsInfo.h @@ -492,6 +492,7 @@ protected: int attrs; std::string val; bool exported; + bool func_with_no_val = false; // needed to handle some error situations }; // Information for initializing an item corresponding to a Zeek function diff --git a/src/script_opt/CPP/RuntimeInitSupport.cc b/src/script_opt/CPP/RuntimeInitSupport.cc index 45db298044..62130f49df 100644 --- a/src/script_opt/CPP/RuntimeInitSupport.cc +++ b/src/script_opt/CPP/RuntimeInitSupport.cc @@ -169,7 +169,7 @@ IDPtr lookup_global__CPP(const char* g, const TypePtr& t, bool exported) Func* lookup_bif__CPP(const char* bif) { auto b = lookup_ID(bif, GLOBAL_MODULE_NAME, false, false, false); - return b ? b->GetVal()->AsFunc() : nullptr; + return (b && b->GetVal()) ? b->GetVal()->AsFunc() : nullptr; } FuncValPtr lookup_func__CPP(string name, int num_bodies, vector hashes, diff --git a/src/script_opt/CPP/RuntimeInits.cc b/src/script_opt/CPP/RuntimeInits.cc index 1e5678d8b0..fe8f07ce9b 100644 --- a/src/script_opt/CPP/RuntimeInits.cc +++ b/src/script_opt/CPP/RuntimeInits.cc @@ -488,10 +488,32 @@ int CPP_EnumMapping::ComputeOffset(InitsManager* im) const void CPP_GlobalInit::Generate(InitsManager* im, std::vector& /* inits_vec */, int /* offset */) const { - global = lookup_global__CPP(name, im->Types(type), exported); + auto& t = im->Types(type); + global = lookup_global__CPP(name, t, exported); - if ( ! global->HasVal() && val >= 0 ) - global->SetVal(im->ConstVals(val)); + if ( ! global->HasVal() ) + { + if ( val >= 0 ) + // Have explicit initialization value. + global->SetVal(im->ConstVals(val)); + + else if ( t->Tag() == TYPE_FUNC && ! func_with_no_val ) + { + // Create a matching value so that this global can + // be used in other initializations. The code here + // mirrors that in activate_bodies__CPP(). + auto fn = global->Name(); + auto ft = cast_intrusive(t); + + vector no_bodies; + vector no_priorities; + + auto sf = make_intrusive(fn, ft, no_bodies, no_priorities); + + auto v = make_intrusive(move(sf)); + global->SetVal(v); + } + } if ( attrs >= 0 ) global->SetAttrs(im->Attributes(attrs)); diff --git a/src/script_opt/CPP/RuntimeInits.h b/src/script_opt/CPP/RuntimeInits.h index 1c6938770d..47519f990d 100644 --- a/src/script_opt/CPP/RuntimeInits.h +++ b/src/script_opt/CPP/RuntimeInits.h @@ -405,9 +405,9 @@ class CPP_GlobalInit : public CPP_Init { public: CPP_GlobalInit(IDPtr& _global, const char* _name, int _type, int _attrs, int _val, - bool _exported) + bool _exported, bool _func_with_no_val) : CPP_Init(), global(_global), name(_name), type(_type), attrs(_attrs), val(_val), - exported(_exported) + exported(_exported), func_with_no_val(_func_with_no_val) { } @@ -421,6 +421,7 @@ protected: int attrs; int val; bool exported; + bool func_with_no_val; }; // Abstract class for constructing a CallExpr to evaluate a Zeek expression. @@ -534,7 +535,15 @@ public: { } - void ResolveBiF() const { bif_func = lookup_bif__CPP(bif_name.c_str()); } + void ResolveBiF() const + { + // We allow a BiF to be resolved multiple times. This is to + // support plugins that might load BiFs that aren't initially + // available, and also global initializations that might + // require (other) BiFs that *are* initially available. + if ( ! bif_func ) + bif_func = lookup_bif__CPP(bif_name.c_str()); + } protected: zeek::Func*& bif_func; // where to store the pointer to the BiF diff --git a/src/script_opt/CPP/RuntimeOps.h b/src/script_opt/CPP/RuntimeOps.h index bf03ef8c0e..a47ce7257e 100644 --- a/src/script_opt/CPP/RuntimeOps.h +++ b/src/script_opt/CPP/RuntimeOps.h @@ -47,7 +47,8 @@ extern ValPtr when_index_slice__CPP(VectorVal* vec, const ListVal* lv); // but (2) needing to have the address of that vector. inline ValPtr invoke__CPP(Func* f, std::vector args, Frame* frame) { - frame->SetOnlyCall(nullptr); + if ( frame ) + frame->SetOnlyCall(nullptr); return f->Invoke(&args, frame); } diff --git a/src/script_opt/CPP/Vars.cc b/src/script_opt/CPP/Vars.cc index b15eceacbd..b6cc33d128 100644 --- a/src/script_opt/CPP/Vars.cc +++ b/src/script_opt/CPP/Vars.cc @@ -55,21 +55,28 @@ std::shared_ptr CPPCompile::RegisterGlobal(const ID* g) { auto gg = global_gis.find(g); - if ( gg == global_gis.end() ) - { - auto gn = string(g->Name()); - - if ( globals.count(gn) == 0 ) - // Create a name for it. - (void)IDNameStr(g); - - auto gi = make_shared(this, g, globals[gn]); - global_id_info->AddInstance(gi); - global_gis[g] = gi; - return gi; - } - else + if ( gg != global_gis.end() ) return gg->second; + + auto gn = string(g->Name()); + + if ( globals.count(gn) == 0 ) + { + // Create a name for it. + (void)IDNameStr(g); + + // That call may have created the initializer, in which + // case no need to repeat it. + gg = global_gis.find(g); + if ( gg != global_gis.end() ) + return gg->second; + } + + auto gi = make_shared(this, g, globals[gn]); + global_id_info->AddInstance(gi); + global_gis[g] = gi; + + return gi; } void CPPCompile::AddBiF(const ID* b, bool is_var) diff --git a/src/script_opt/CPP/maint/README b/src/script_opt/CPP/maint/README index f51d62167b..72360997af 100644 --- a/src/script_opt/CPP/maint/README +++ b/src/script_opt/CPP/maint/README @@ -15,7 +15,7 @@ The maintenance workflow: to check in updates to the list of how the compiler currently fares on various btests (see end of this doc): - Sat Dec 3 13:20:43 PST 2022 + Thu Jan 12 14:05:26 PST 2023 2. Run "find-test-files.sh" to generate a list (to stdout) of all of the possible Zeek source files found in the test suite. @@ -32,6 +32,8 @@ The maintenance workflow: "-O gen-C++" can successfully run on the input. Presently, it should be able to do so for all of them, other than some exceptions noted below. + This step is parallelizable, say using xargs -P 10. + 6. Copy ./src/zeek to ./zeek.HOLD. This is used to speed up recompilation used in the next step. However, it's also a headache to do development to fix a bug and then forget to update zeek.HOLD, which means you wind up diff --git a/src/script_opt/CPP/maint/do-CPP-btest.sh b/src/script_opt/CPP/maint/do-CPP-btest.sh index acd5bc6c1b..71df6dbc38 100755 --- a/src/script_opt/CPP/maint/do-CPP-btest.sh +++ b/src/script_opt/CPP/maint/do-CPP-btest.sh @@ -1,6 +1,6 @@ #! /bin/sh -rm -f CPP-gen.cc +rm -f CPP-gen.cc src/zeek cp zeek.HOLD src/zeek || ( echo Need to create clean zeek.HOLD