diff --git a/CHANGES b/CHANGES index 601bdc5b4f..1a8f9e64f3 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,23 @@ +4.1.0-dev.852 | 2021-07-01 08:46:41 -0700 + + * low-level coding style fixes (Vern Paxson, Corelight) + + * support for standalone compiled scripts to export globals with module qualifiers (Vern Paxson, Corelight) + + * updates for documentation of functionality for compiling scripts to C++ (Vern Paxson, Corelight) + + * fixes for standalone C++ scripts making types & variables/functions available (Vern Paxson, Corelight) + + * fixed bug limiting availability of load_CPP() BiF (Vern Paxson, Corelight) + + * updates to development helper scripts to support new workflow (Vern Paxson, Corelight) + + * simpler workflow for -O gen-C++ ; also some hooks for -O gen-standalone-C++ (Vern Paxson, Corelight) + + * ReplaceBody now deletes a body if the replacement is nil (Vern Paxson, Corelight) + + * removal of can't-actually-be-executed code (Vern Paxson, Corelight) + 4.1.0-dev.842 | 2021-06-30 20:32:37 -0700 * Skip input framework entries with missing but non-optional fields (Christian Kreibich, Corelight) diff --git a/VERSION b/VERSION index b9e08e880e..9fea460515 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -4.1.0-dev.842 +4.1.0-dev.852 diff --git a/src/Func.cc b/src/Func.cc index 4859ab7aad..f06108c983 100644 --- a/src/Func.cc +++ b/src/Func.cc @@ -332,8 +332,11 @@ ScriptFunc::ScriptFunc(std::string _name, FuncTypePtr ft, sort(bodies.begin(), bodies.end()); - current_body = bodies[0].stmts; - current_priority = bodies[0].priority; + if ( ! bodies.empty() ) + { + current_body = bodies[0].stmts; + current_priority = bodies[0].priority; + } } ScriptFunc::~ScriptFunc() @@ -579,15 +582,21 @@ void ScriptFunc::ReplaceBody(const StmtPtr& old_body, StmtPtr new_body) { bool found_it = false; - for ( auto& body : bodies ) - if ( body.stmts.get() == old_body.get() ) + for ( auto body = bodies.begin(); body != bodies.end(); ++body ) + if ( body->stmts.get() == old_body.get() ) { - body.stmts = new_body; - current_priority = body.priority; + if ( new_body ) + { + body->stmts = new_body; + current_priority = body->priority; + } + else + bodies.erase(body); + found_it = true; + break; } - ASSERT(found_it); current_body = new_body; } @@ -1049,6 +1058,7 @@ void init_primary_bifs() #include "option.bif.func_init" #include "supervisor.bif.func_init" #include "packet_analysis.bif.func_init" +#include "CPP-load.bif.func_init" init_builtin_types(); did_builtin_init = true; diff --git a/src/Func.h b/src/Func.h index 7f42286b9f..18d539526f 100644 --- a/src/Func.h +++ b/src/Func.h @@ -237,10 +237,16 @@ public: const std::vector& new_inits, size_t new_frame_size, int priority) override; - // Replace the given current instance of a function body with - // a new one. + /** + * Replaces the given current instance of a function body with + * a new one. If new_body is nil then the current instance is + * deleted with no replacement. + * + * @param old_body Body to replace. + * @param new_body New body to use; can be nil. + */ void ReplaceBody(const detail::StmtPtr& old_body, - detail::StmtPtr new_body); + detail::StmtPtr new_body); StmtPtr CurrentBody() const { return current_body; } int CurrentPriority() const { return current_priority; } @@ -316,7 +322,7 @@ private: StmtPtr current_body; // ... and its priority. - int current_priority; + int current_priority = 0; }; using built_in_func = BifReturnVal (*)(Frame* frame, const Args* args); diff --git a/src/script_opt/CPP/CPP-load.bif b/src/script_opt/CPP/CPP-load.bif index 8260dfd012..4e6105b0bb 100644 --- a/src/script_opt/CPP/CPP-load.bif +++ b/src/script_opt/CPP/CPP-load.bif @@ -22,7 +22,8 @@ function load_CPP%(h: count%): bool %{ auto cb = detail::standalone_callbacks.find(h); - if ( cb == detail::standalone_callbacks.end() ) + if ( cb == detail::standalone_callbacks.end() || + ! detail::CPP_init_hook ) { reporter->Error("load of non-existing C++ code (%" PRIu64 ")", h); return zeek::val_mgr->False(); @@ -38,5 +39,8 @@ function load_CPP%(h: count%): bool // compiled scripts. detail::standalone_activations.push_back(cb->second); + // Proceed with activation. + (*detail::CPP_init_hook)(); + return zeek::val_mgr->True(); %} diff --git a/src/script_opt/CPP/Compile.h b/src/script_opt/CPP/Compile.h index 0f0f9d5de8..5d818c1756 100644 --- a/src/script_opt/CPP/Compile.h +++ b/src/script_opt/CPP/Compile.h @@ -134,8 +134,8 @@ namespace zeek::detail { class CPPCompile { public: CPPCompile(std::vector& _funcs, ProfileFuncs& pfs, - const char* gen_name, CPPHashManager& hm, - bool update, bool standalone); + const std::string& gen_name, const std::string& addl_name, + CPPHashManager& _hm, bool _update, bool _standalone); ~CPPCompile(); private: @@ -187,6 +187,11 @@ private: // Maps functions (not hooks or events) to upstream compiled names. std::unordered_map hashed_funcs; + // Tracks all of the module names used in activate_bodies__CPP() + // calls, to ensure all of the global names of compiled-to-standalone + // functions are available to subsequent scripts. + std::unordered_set module_names; + // If non-zero, provides a tag used for auxiliary/additional // compilation units. int addl_tag = 0; @@ -390,9 +395,14 @@ private: // function. std::string GenArgs(const RecordTypePtr& params, const Expr* e); - // Functions that we've declared/compiled. + // Functions that we've declared/compiled. Indexed by full C++ name. std::unordered_set compiled_funcs; + // "Simple" functions that we've compiled, i.e., those that have + // a single body and thus can be called dirctly. Indexed by + // function name, and maps to the C++ name. + std::unordered_map compiled_simple_funcs; + // Maps those to their associated files - used to make add-C++ body // hashes distinct. std::unordered_map cf_locs; @@ -858,6 +868,12 @@ private: void AddInit(const IntrusivePtr& o) { AddInit(o.get()); } void AddInit(const Obj* o); + // This is akin to an initialization, but done separately + // (upon "activation") so it can include initializations that + // rely on parsing having finished (in particular, BiFs having + // been registered). Only used when generating standalone code. + void AddActivation(std::string a) { activations.emplace_back(a); } + // Records the fact that the initialization of object o1 depends // on that of object o2. void NoteInitDependency(const IntrusivePtr& o1, @@ -922,6 +938,10 @@ private: // other initializations, and that themselves have no dependencies). std::vector pre_inits; + // A list of "activations" (essentially, post-initializations). + // See AddActivation() above. + std::vector activations; + // Expressions for which we need to generate initialization-time // code. Currently, these are only expressions appearing in // attributes. @@ -992,6 +1012,16 @@ private: NL(); } + void Emit(const std::string& fmt, const std::string& arg1, + const std::string& arg2, const std::string& arg3, + const std::string& arg4, const std::string& arg5) const + { + Indent(); + fprintf(write_file, fmt.c_str(), arg1.c_str(), arg2.c_str(), + arg3.c_str(), arg4.c_str(), arg5.c_str()); + NL(); + } + // Returns an expression for constructing a Zeek String object // corresponding to the given byte array. std::string GenString(const char* b, int len) const; @@ -1010,6 +1040,9 @@ private: // File to which we're generating code. FILE* write_file; + // Name of file holding potential "additional" code. + std::string addl_name; + // Indentation level. int block_level = 0; diff --git a/src/script_opt/CPP/DeclFunc.cc b/src/script_opt/CPP/DeclFunc.cc index da7b3fa377..1d4017d793 100644 --- a/src/script_opt/CPP/DeclFunc.cc +++ b/src/script_opt/CPP/DeclFunc.cc @@ -24,6 +24,9 @@ void CPPCompile::DeclareFunc(const FuncInfo& func) DeclareSubclass(f->GetType(), pf, fname, body, priority, nullptr, f->Flavor()); + + if ( f->GetBodies().size() == 1 ) + compiled_simple_funcs[f->Name()] = fname; } void CPPCompile::DeclareLambda(const LambdaExpr* l, const ProfileFunc* pf) diff --git a/src/script_opt/CPP/Driver.cc b/src/script_opt/CPP/Driver.cc index 3efe5177a1..7d8d016922 100644 --- a/src/script_opt/CPP/Driver.cc +++ b/src/script_opt/CPP/Driver.cc @@ -13,20 +13,24 @@ using namespace std; CPPCompile::CPPCompile(vector& _funcs, ProfileFuncs& _pfs, - const char* gen_name, CPPHashManager& _hm, - bool _update, bool _standalone) -: funcs(_funcs), pfs(_pfs), hm(_hm), update(_update), standalone(_standalone) + const string& gen_name, const string& _addl_name, + CPPHashManager& _hm, bool _update, bool _standalone) +: funcs(_funcs), pfs(_pfs), hm(_hm), + update(_update), standalone(_standalone) { - auto mode = hm.IsAppend() ? "a" : "w"; + addl_name = _addl_name; + bool is_addl = hm.IsAppend(); + auto target_name = is_addl ? addl_name.c_str() : gen_name.c_str(); + auto mode = is_addl ? "a" : "w"; - write_file = fopen(gen_name, mode); + write_file = fopen(target_name, mode); if ( ! write_file ) { - reporter->Error("can't open C++ target file %s", gen_name); + reporter->Error("can't open C++ target file %s", target_name); exit(1); } - if ( hm.IsAppend() ) + if ( is_addl ) { // We need a unique number to associate with the name // space for the code we're adding. A convenient way to @@ -39,7 +43,7 @@ CPPCompile::CPPCompile(vector& _funcs, ProfileFuncs& _pfs, { char buf[256]; util::zeek_strerror_r(errno, buf, sizeof(buf)); - reporter->Error("fstat failed on %s: %s", gen_name, buf); + reporter->Error("fstat failed on %s: %s", target_name, buf); exit(1); } @@ -49,6 +53,20 @@ CPPCompile::CPPCompile(vector& _funcs, ProfileFuncs& _pfs, addl_tag = st.st_size + 1; } + else + { + // Create an empty "additional" file. + auto addl_f = fopen(addl_name.c_str(), "w"); + if ( ! addl_f ) + { + reporter->Error("can't open C++ additional file %s", + addl_name.c_str()); + exit(1); + } + + fclose(addl_f); + } + Compile(); } @@ -285,6 +303,9 @@ void CPPCompile::GenEpilog() CheckInitConsistency(to_do); auto nc = GenDependentInits(to_do); + if ( standalone ) + GenStandaloneActivation(); + NL(); Emit("void init__CPP()"); @@ -301,6 +322,9 @@ void CPPCompile::GenEpilog() NL(); InitializeFieldMappings(); + if ( standalone ) + Emit("standalone_init__CPP();"); + EndBlock(true); GenInitHook(); @@ -313,7 +337,7 @@ void CPPCompile::GenEpilog() if ( addl_tag > 0 ) return; - Emit("#include \"CPP-gen-addl.h\"\n"); + Emit("#include \"" + addl_name + "\"\n"); Emit("} // zeek::detail"); } diff --git a/src/script_opt/CPP/Exprs.cc b/src/script_opt/CPP/Exprs.cc index 5786dfcc62..accdae18ed 100644 --- a/src/script_opt/CPP/Exprs.cc +++ b/src/script_opt/CPP/Exprs.cc @@ -262,15 +262,18 @@ string CPPCompile::GenCallExpr(const CallExpr* c, GenType gt) auto f_id = f->AsNameExpr()->Id(); const auto& params = f_id->GetType()->AsFuncType()->Params(); auto id_name = f_id->Name(); - auto fname = Canonicalize(id_name) + "_zf"; - bool is_compiled = compiled_funcs.count(fname) > 0; + bool is_compiled = compiled_simple_funcs.count(id_name) > 0; bool was_compiled = hashed_funcs.count(id_name) > 0; if ( is_compiled || was_compiled ) { + string fname; + if ( was_compiled ) fname = hashed_funcs[id_name]; + else + fname = compiled_simple_funcs[id_name]; if ( args_l->Exprs().length() > 0 ) gen = fname + "(" + GenArgs(params, args_l) + @@ -499,12 +502,6 @@ string CPPCompile::GenSizeExpr(const Expr* e, GenType gt) else if ( it == TYPE_INTERNAL_DOUBLE ) gen = string("fabs__CPP(") + gen + ")"; - else if ( it == TYPE_INTERNAL_INT || it == TYPE_INTERNAL_DOUBLE ) - { - auto cast = (it == TYPE_INTERNAL_INT) ? "bro_int_t" : "double"; - gen = string("abs__CPP(") + cast + "(" + gen + "))"; - } - else return GenericValPtrToGT(gen + "->SizeVal()", t, gt); diff --git a/src/script_opt/CPP/Func.cc b/src/script_opt/CPP/Func.cc index 9c7c8c7a36..ff06ca6be3 100644 --- a/src/script_opt/CPP/Func.cc +++ b/src/script_opt/CPP/Func.cc @@ -12,6 +12,7 @@ namespace zeek::detail { using namespace std; unordered_map compiled_scripts; +unordered_map> added_bodies; unordered_map standalone_callbacks; vector standalone_activations; diff --git a/src/script_opt/CPP/Func.h b/src/script_opt/CPP/Func.h index 187e2772df..12973b6b49 100644 --- a/src/script_opt/CPP/Func.h +++ b/src/script_opt/CPP/Func.h @@ -108,6 +108,14 @@ struct CompiledScript { // Maps hashes to compiled information. extern std::unordered_map compiled_scripts; +// When using standalone-code, tracks which function bodies have had +// compiled versions added to them. Needed so that we don't replace +// the body twice, leading to two copies. Indexed first by the name +// of the function, and then via the hash of the body that has been +// added to it. +extern std::unordered_map> + added_bodies; + // Maps hashes to standalone script initialization callbacks. extern std::unordered_map standalone_callbacks; diff --git a/src/script_opt/CPP/Inits.cc b/src/script_opt/CPP/Inits.cc index 40f7e9da4e..3e3e9de63c 100644 --- a/src/script_opt/CPP/Inits.cc +++ b/src/script_opt/CPP/Inits.cc @@ -4,6 +4,7 @@ #include #include +#include "zeek/module_util.h" #include "zeek/script_opt/ProfileFunc.h" #include "zeek/script_opt/CPP/Compile.h" @@ -460,9 +461,6 @@ void CPPCompile::GenInitHook() { NL(); - if ( standalone ) - GenStandaloneActivation(); - Emit("int hook_in_init()"); StartBlock(); @@ -482,6 +480,15 @@ void CPPCompile::GenInitHook() void CPPCompile::GenStandaloneActivation() { + NL(); + + Emit("void standalone_activation__CPP()"); + StartBlock(); + for ( auto& a : activations ) + Emit(a); + EndBlock(); + + NL(); Emit("void standalone_init__CPP()"); StartBlock(); @@ -497,11 +504,6 @@ void CPPCompile::GenStandaloneActivation() for ( const auto& func : funcs ) { auto f = func.Func(); - - if ( f->Flavor() == FUNC_FLAVOR_FUNCTION ) - // No need to explicitly add bodies. - continue; - auto fname = BodyName(func); auto bname = Canonicalize(fname.c_str()) + "_zf"; @@ -515,10 +517,6 @@ void CPPCompile::GenStandaloneActivation() for ( auto& fb : func_bodies ) { - auto f = fb.first; - const auto fn = f->Name(); - const auto& ft = f->GetType(); - string hashes; for ( auto h : fb.second ) { @@ -530,12 +528,30 @@ void CPPCompile::GenStandaloneActivation() hashes = "{" + hashes + "}"; - Emit("activate_bodies__CPP(\"%s\", %s, %s);", - fn, GenTypeName(ft), hashes); + auto f = fb.first; + auto fn = f->Name(); + const auto& ft = f->GetType(); + + auto var = extract_var_name(fn); + auto mod = extract_module_name(fn); + module_names.insert(mod); + + auto fid = lookup_ID(var.c_str(), mod.c_str(), + false, true, false); + if ( ! fid ) + reporter->InternalError("can't find identifier %s", fn); + + auto exported = fid->IsExport() ? "true" : "false"; + + Emit("activate_bodies__CPP(\"%s\", \"%s\", %s, %s, %s);", + var, mod, exported, GenTypeName(ft), hashes); } - EndBlock(); NL(); + Emit("CPP_activation_funcs.push_back(standalone_activation__CPP);"); + Emit("CPP_activation_hook = activate__CPPs;"); + + EndBlock(); } void CPPCompile::GenLoad() @@ -548,7 +564,15 @@ void CPPCompile::GenLoad() Emit("register_scripts__CPP(%s, standalone_init__CPP);", Fmt(total_hash)); - // Spit out the placeholder script. + // Spit out the placeholder script, and any associated module + // definitions. + for ( const auto& m : module_names ) + if ( m != "GLOBAL" ) + printf("module %s;\n", m.c_str()); + + if ( module_names.size() > 0 ) + printf("module GLOBAL;\n\n"); + printf("global init_CPP_%llu = load_CPP(%llu);\n", total_hash, total_hash); } diff --git a/src/script_opt/CPP/README.md b/src/script_opt/CPP/README.md index 1a8d1e4d9d..1ba87ec6ea 100644 --- a/src/script_opt/CPP/README.md +++ b/src/script_opt/CPP/README.md @@ -54,6 +54,13 @@ at the beginning of `Compile.h`. Workflows --------- +_Before building Zeek_, see the first of the [_Known Issues_](#known-issues) +below regarding compilation times. If your aim is to exploration of the +functionality rather than production use, you might want to build Zeek +using `./configure --enable-debug`, which can reduce compilation times by +50x (!). Once you've built it, the following sketches how to create +and use compiled scripts. + The main code generated by the compiler is taken from `build/CPP-gen.cc`. An empty version of this is generated when first building Zeek. @@ -66,21 +73,17 @@ The following workflow assumes you are in the `build/` subdirectory: 1. `./src/zeek -O gen-C++ target.zeek` The generated code is written to -`CPP-gen-addl.h`. (This name is a reflection of some more complicated -features and probably should be changed.) The compiler will also produce -a file `CPP-hashes.dat`, for use by an advanced feature. -2. `mv CPP-gen-addl.h CPP-gen.cc` -3. `touch CPP-gen-addl.h` -(Needed because `CPP-gen.cc` -expects the file to exist, again in support of more complicated features.) -4. `ninja` or `make` to recompile Zeek -5. `./src/zeek -O use-C++ target.zeek` +`CPP-gen.cc`. The compiler will also produce +a file `CPP-hashes.dat`, for use by an advanced feature, and an +empty `CPP-gen-addl.h` file (same). +2. `ninja` or `make` to recompile Zeek +3. `./src/zeek -O use-C++ target.zeek` Executes with each function/hook/ event handler pulled in by `target.zeek` replaced with its compiled version. Instead of the last line above, you can use the following variants: -5. `./src/zeek -O report-C++ target.zeek` +3. `./src/zeek -O report-C++ target.zeek` For each function body in `target.zeek`, reports which ones have compiled-to-C++ bodies available, and also any compiled-to-C++ bodies present in the `zeek` binary that @@ -91,15 +94,21 @@ the `target.zeek` script. You can avoid this by replacing the first step with: 1. `./src/zeek -O gen-standalone-C++ target.zeek >target-stand-in.zeek` -and then continuing the next three steps. This option prints to _stdout_ a +(and then building as in the 2nd step above). +This option prints to _stdout_ a (very short) "stand-in" Zeek script that you can load using -`-O use-C++ target-stand-in.zeek` to activate the compiled `target.zeek` -without needing to include `target.zeek` in the invocation. +`target-stand-in.zeek` to activate the compiled `target.zeek` +without needing to include `target.zeek` in the invocation (nor +the `-O use-C++` option). After loading the stand-in script, +you can still access types and functions declared in `target.zeek`. Note: the implementation differences between `gen-C++` and `gen-standalone-C++` wound up being modest enough that it might make sense to just always provide the latter functionality, which it turns out does not introduce any additional constraints compared to the current `gen-C++` functionality. +On the other hand, it's possible (not yet established) that code created +using `gen-C++` can be made to compile significantly faster than +standalone code. There are additional workflows relating to running the test suite, which we document only briefly here as they're likely going to change or go away @@ -128,7 +137,7 @@ Both of these _append_ to any existing `CPP-gen-addl.h` file, providing a means for building it up to reflect a number of compilations. The `update-C++` and `add-C++` options help support different -ways of building the `btest` test suie. They were meant to enable doing so +ways of building the `btest` test suite. They were meant to enable doing so without requiring per-test-suite-element recompilations. However, experiences to date have found that trying to avoid pointwise compilations incurs additional headaches, so it's better to just bite off the cost of a large @@ -174,11 +183,6 @@ Known Issues Here we list various known issues with using the compiler:
-* Run-time error messages generally lack location information and information -about associated expressions/statements, making them hard to puzzle out. -This could be fixed, but would add execution overhead in passing around -the necessary strings / `Location` objects. - * Compilation of compiled code can be noticeably slow (if built using `./configure --enable-debug`) or hugely slow (if not), with the latter taking on the order of an hour on a beefy laptop. This slowness complicates @@ -186,6 +190,11 @@ CI/CD approaches for always running compiled code against the test suite when merging changes. It's not presently clear how feasible it is to speed this up. +* Run-time error messages generally lack location information and information +about associated expressions/statements, making them hard to puzzle out. +This could be fixed, but would add execution overhead in passing around +the necessary strings / `Location` objects. + * Subtle bugs can arise when compiling code that uses `@if` conditional compilation. The compiled code will not directly use the wrong instance of a script body (one that differs due to the `@if` conditional having a diff --git a/src/script_opt/CPP/RuntimeInit.cc b/src/script_opt/CPP/RuntimeInit.cc index 1b338d9243..003d082cd8 100644 --- a/src/script_opt/CPP/RuntimeInit.cc +++ b/src/script_opt/CPP/RuntimeInit.cc @@ -9,12 +9,30 @@ namespace zeek::detail { using namespace std; vector CPP_init_funcs; +vector CPP_activation_funcs; // Calls all of the initialization hooks, in the order they were added. void init_CPPs() { - for ( auto f : CPP_init_funcs ) - f(); + static bool need_init = true; + + if ( need_init ) + for ( auto f : CPP_init_funcs ) + f(); + + need_init = false; + } + +// Calls all of the registered activation hooks for standalone code. +void activate__CPPs() + { + static bool need_init = true; + + if ( need_init ) + for ( auto f : CPP_activation_funcs ) + f(); + + need_init = false; } // This is a trick used to register the presence of compiled code. @@ -30,6 +48,19 @@ static int flag_init_CPP() static int dummy = flag_init_CPP(); +void register_type__CPP(TypePtr t, const std::string& name) + { + if ( t->GetName().size() > 0 ) + // Already registered. + return; + + t->SetName(name); + + auto id = install_ID(name.c_str(), GLOBAL_MODULE_NAME, true, false); + id->SetType(t); + id->MakeType(); + } + void register_body__CPP(CPPStmtPtr body, int priority, p_hash_type hash, vector events) { @@ -67,18 +98,31 @@ void register_scripts__CPP(p_hash_type h, void (*callback)()) standalone_callbacks[h] = callback; } -void activate_bodies__CPP(const char* fn, TypePtr t, vector hashes) +void activate_bodies__CPP(const char* fn, const char* module, bool exported, + TypePtr t, vector hashes) { auto ft = cast_intrusive(t); - auto fg = lookup_ID(fn, GLOBAL_MODULE_NAME, false, false, false); + auto fg = lookup_ID(fn, module, false, false, false); if ( ! fg ) { - fg = install_ID(fn, GLOBAL_MODULE_NAME, true, false); + fg = install_ID(fn, module, true, exported); fg->SetType(ft); } - auto f = fg->GetVal()->AsFunc(); + auto v = fg->GetVal(); + if ( ! v ) + { // Create it. + std::vector no_bodies; + std::vector no_priorities; + auto sf = make_intrusive(fn, ft, no_bodies, + no_priorities); + + v = make_intrusive(move(sf)); + fg->SetVal(v); + } + + auto f = v->AsFunc(); const auto& bodies = f->GetBodies(); // Track hashes of compiled bodies already associated with f. @@ -115,6 +159,7 @@ void activate_bodies__CPP(const char* fn, TypePtr t, vector hashes) auto cs = compiled_scripts[h]; f->AddBody(cs.body, no_inits, num_params, cs.priority); + added_bodies[fn].insert(h); events.insert(cs.events.begin(), cs.events.end()); } @@ -126,13 +171,13 @@ void activate_bodies__CPP(const char* fn, TypePtr t, vector hashes) } } -IDPtr lookup_global__CPP(const char* g, const TypePtr& t) +IDPtr lookup_global__CPP(const char* g, const TypePtr& t, bool exported) { auto gl = lookup_ID(g, GLOBAL_MODULE_NAME, false, false, false); if ( ! gl ) { - gl = install_ID(g, GLOBAL_MODULE_NAME, true, false); + gl = install_ID(g, GLOBAL_MODULE_NAME, true, exported); gl->SetType(t); } diff --git a/src/script_opt/CPP/RuntimeInit.h b/src/script_opt/CPP/RuntimeInit.h index 11b584e7f1..0e58e9e42a 100644 --- a/src/script_opt/CPP/RuntimeInit.h +++ b/src/script_opt/CPP/RuntimeInit.h @@ -20,6 +20,15 @@ typedef void (*CPP_init_func)(); // Tracks the initialization hooks for different compilation runs. extern std::vector CPP_init_funcs; +// Tracks the activation hooks for different "standalone" compilations. +extern std::vector CPP_activation_funcs; + +// Activates all previously registered standalone code. +extern void activate__CPPs(); + +// Registers the given global type, if not already present. +extern void register_type__CPP(TypePtr t, const std::string& name); + // Registers the given compiled function body as associated with the // given priority and hash. "events" is a list of event handlers // relevant for the function body, which should be registered if the @@ -38,15 +47,17 @@ extern void register_lambda__CPP(CPPStmtPtr body, p_hash_type hash, // the given hash. extern void register_scripts__CPP(p_hash_type h, void (*callback)()); -// Activates the event handler/hook with the given name (which is created -// if it doesn't exist) and type, using (at least) the bodies associated -// with the given hashes. -extern void activate_bodies__CPP(const char* fn, TypePtr t, +// Activates the function/event handler/hook with the given name and in +// the given module, using (at least) the bodies associated with the +// given hashes. Creates the identifier using the given module and +// export setting if it doesn't already exist. +extern void activate_bodies__CPP(const char* fn, const char* module, + bool exported, TypePtr t, std::vector hashes); // Looks for a global with the given name. If not present, creates it -// with the given type. -extern IDPtr lookup_global__CPP(const char* g, const TypePtr& t); +// with the given type and export setting. +extern IDPtr lookup_global__CPP(const char* g, const TypePtr& t, bool exported); // Looks for a BiF with the given name. Returns nil if not present. extern Func* lookup_bif__CPP(const char* bif); diff --git a/src/script_opt/CPP/Types.cc b/src/script_opt/CPP/Types.cc index 38511b5c85..b6eaeb2564 100644 --- a/src/script_opt/CPP/Types.cc +++ b/src/script_opt/CPP/Types.cc @@ -134,7 +134,8 @@ void CPPCompile::ExpandTypeVar(const TypePtr& t) auto& script_type_name = t->GetName(); if ( script_type_name.size() > 0 ) - AddInit(t, tn + "->SetName(\"" + script_type_name + "\");"); + AddInit(t, "register_type__CPP(" + tn + ", \"" + + script_type_name + "\");"); AddInit(t); } diff --git a/src/script_opt/CPP/Vars.cc b/src/script_opt/CPP/Vars.cc index 0cf5e668c5..6e1795123b 100644 --- a/src/script_opt/CPP/Vars.cc +++ b/src/script_opt/CPP/Vars.cc @@ -109,9 +109,11 @@ void CPPCompile::CreateGlobal(const ID* g) const auto& t = g->GetType(); NoteInitDependency(g, TypeRep(t)); + auto exported = g->IsExport() ? "true" : "false"; + AddInit(g, globals[gn], string("lookup_global__CPP(\"") + gn + "\", " + - GenTypeName(t) + ")"); + GenTypeName(t) + ", " + exported + ")"); } if ( is_bif ) @@ -168,7 +170,12 @@ void CPPCompile::AddBiF(const ID* b, bool is_var) if ( AddGlobal(n, "bif", true) ) Emit("Func* %s;", globals[n]); - AddInit(b, globals[n], string("lookup_bif__CPP(\"") + bn + "\")"); + auto lookup = string("lookup_bif__CPP(\"") + bn + "\")"; + + if ( standalone ) + AddActivation(globals[n] + " = " + lookup + ";"); + else + AddInit(b, globals[n], lookup); } bool CPPCompile::AddGlobal(const string& g, const char* suffix, bool track) diff --git a/src/script_opt/CPP/bare-embedded-build b/src/script_opt/CPP/bare-embedded-build index 5001e9923a..af0cf37a9a 100755 --- a/src/script_opt/CPP/bare-embedded-build +++ b/src/script_opt/CPP/bare-embedded-build @@ -2,11 +2,8 @@ build=../../../build -echo > CPP-gen-addl.h -(cd $build - export -n ZEEK_USE_CPP ZEEK_ADD_CPP - export ZEEK_HASH_DIR=. - echo | src/zeek -b -O gen-C++ -) -mv $build/CPP-gen-addl.h CPP-gen.cc -(cd $build ; ninja || echo Bare embedded build failed) +cd $build +export -n ZEEK_USE_CPP ZEEK_ADD_CPP +export ZEEK_HASH_DIR=. +echo | src/zeek -b -O gen-C++ +ninja || echo Bare embedded build failed diff --git a/src/script_opt/CPP/full-embedded-build b/src/script_opt/CPP/full-embedded-build index b542086bcc..de4ac500cb 100755 --- a/src/script_opt/CPP/full-embedded-build +++ b/src/script_opt/CPP/full-embedded-build @@ -2,11 +2,8 @@ build=../../../build -echo > CPP-gen-addl.h -(cd $build - export -n ZEEK_USE_CPP ZEEK_ADD_CPP - export ZEEK_HASH_DIR=. - echo | src/zeek -O gen-C++ -) -mv $build/CPP-gen-addl.h CPP-gen.cc -(cd $build ; ninja || echo Full embedded build failed) +cd $build +export -n ZEEK_USE_CPP ZEEK_ADD_CPP +export ZEEK_HASH_DIR=. +echo | src/zeek -O gen-C++ +ninja || echo Full embedded build failed diff --git a/src/script_opt/CPP/non-embedded-build b/src/script_opt/CPP/non-embedded-build index 7d8e7b50c5..ec925184e4 100755 --- a/src/script_opt/CPP/non-embedded-build +++ b/src/script_opt/CPP/non-embedded-build @@ -1,7 +1,5 @@ #! /bin/sh -base=../../.. -so=$base/src/script_opt/CPP -echo > $so/CPP-gen.cc -cd $base/build +cd ../../../build +echo >CPP-gen.cc ninja || echo Non-embedded build failed diff --git a/src/script_opt/CPP/single-full-test.sh b/src/script_opt/CPP/single-full-test.sh index f4802230ff..84d9b21479 100755 --- a/src/script_opt/CPP/single-full-test.sh +++ b/src/script_opt/CPP/single-full-test.sh @@ -4,11 +4,8 @@ echo $1 base=../../.. test=$base/testing/btest -so=$base/src/script_opt/CPP build=$base/build -gen=CPP-gen-addl.h - -echo >$gen +gen=CPP-gen.cc ./non-embedded-build >$build/errs 2>&1 || echo non-embedded build failed @@ -17,7 +14,7 @@ export ZEEK_HASH_DIR=$test ZEEK_GEN_CPP= cd $test ../../auxil/btest/btest $1 >jbuild-$1.out 2>&1 grep -c '^namespace' $gen -mv $gen $so/CPP-gen.cc +mv $gen $build/ cd $build ninja >& errs || echo build for $1 failed diff --git a/src/script_opt/CPP/single-test.sh b/src/script_opt/CPP/single-test.sh index a6e56eb07c..6d2a117e67 100755 --- a/src/script_opt/CPP/single-test.sh +++ b/src/script_opt/CPP/single-test.sh @@ -4,18 +4,16 @@ echo $1 base=../../.. test=$base/testing/btest -so=$base/src/script_opt/CPP build=$base/build -gen=CPP-gen-addl.h +gen=CPP-gen.cc export -n ZEEK_USE_CPP export ZEEK_HASH_DIR=$test ZEEK_ADD_CPP= cd $test cp $build/CPP-hashes.dat . -echo >$gen ../../auxil/btest/btest $1 >cpp-build-$1.out 2>&1 grep -c '^namespace' $gen -mv $gen $so +mv $gen $build cd $build ninja >& errs || echo build for $1 failed diff --git a/src/script_opt/CPP/update-single-test.sh b/src/script_opt/CPP/update-single-test.sh index dcdecfbb70..aa521a053e 100755 --- a/src/script_opt/CPP/update-single-test.sh +++ b/src/script_opt/CPP/update-single-test.sh @@ -2,18 +2,16 @@ base=../../.. test=$base/testing/btest -so=$base/src/script_opt/CPP build=$base/build -gen=CPP-gen-addl.h +gen=CPP-gen.cc export -n ZEEK_USE_CPP export ZEEK_HASH_DIR=$test ZEEK_ADD_CPP= cd $test cp $build/CPP-hashes.dat . -echo >$gen ../../auxil/btest/btest $1 >jbuild-$1.out 2>&1 grep -c '^namespace' $gen -mv $gen $so +mv $gen $build/ cd $build ninja >& errs || echo build for $1 failed diff --git a/src/script_opt/ScriptOpt.cc b/src/script_opt/ScriptOpt.cc index ad74979df8..47f0765b57 100644 --- a/src/script_opt/ScriptOpt.cc +++ b/src/script_opt/ScriptOpt.cc @@ -24,6 +24,7 @@ AnalyOpt analysis_options; std::unordered_set non_recursive_funcs; void (*CPP_init_hook)() = nullptr; +void (*CPP_activation_hook)() = nullptr; // Tracks all of the loaded functions (including event handlers and hooks). static std::vector funcs; @@ -288,9 +289,6 @@ void analyze_scripts() // Avoid profiling overhead. return; - const auto hash_name = hash_dir + "CPP-hashes"; - const auto gen_name = hash_dir + "CPP-gen-addl.h"; - // Now that everything's parsed and BiF's have been initialized, // profile the functions. auto pfs = std::make_unique(funcs, is_CPP_compilable, false); @@ -365,8 +363,21 @@ void analyze_scripts() { auto b = s->second.body; b->SetHash(hash); - f.Func()->ReplaceBody(f.Body(), b); - f.SetBody(b); + + // We may have already updated the body if + // we're using code compiled for standalone. + if ( f.Body()->Tag() != STMT_CPP ) + { + auto func = f.Func(); + if ( added_bodies[func->Name()].count(hash) > 0 ) + // We've already added the + // replacement. Delete orig. + func->ReplaceBody(f.Body(), nullptr); + else + func->ReplaceBody(f.Body(), b); + + f.SetBody(b); + } for ( auto& e : s->second.events ) { @@ -384,6 +395,8 @@ void analyze_scripts() if ( generating_CPP ) { + const auto hash_name = hash_dir + "CPP-hashes"; + auto hm = std::make_unique(hash_name.c_str(), analysis_options.add_CPP); @@ -402,7 +415,10 @@ void analyze_scripts() pfs = std::make_unique(funcs, is_CPP_compilable, false); } - CPPCompile cpp(funcs, *pfs, gen_name.c_str(), *hm, + const auto gen_name = hash_dir + "CPP-gen.cc"; + const auto addl_name = hash_dir + "CPP-gen-addl.h"; + + CPPCompile cpp(funcs, *pfs, gen_name, addl_name, *hm, analysis_options.gen_CPP || analysis_options.update_CPP, analysis_options.gen_standalone_CPP); diff --git a/src/script_opt/ScriptOpt.h b/src/script_opt/ScriptOpt.h index 8531e10f24..ca42f1fdea 100644 --- a/src/script_opt/ScriptOpt.h +++ b/src/script_opt/ScriptOpt.h @@ -152,5 +152,9 @@ extern void analyze_scripts(); // to a non-empty value. extern void (*CPP_init_hook)(); +// Used for "standalone" C++-compiled scripts to complete their activation; +// called after parsing and BiF initialization, but before zeek_init. +extern void (*CPP_activation_hook)(); + } // namespace zeek::detail diff --git a/src/zeek-setup.cc b/src/zeek-setup.cc index a6cbf55d49..240e2f2a37 100644 --- a/src/zeek-setup.cc +++ b/src/zeek-setup.cc @@ -843,6 +843,9 @@ SetupResult setup(int argc, char** argv, Options* zopts) // we don't have any other source for it. run_state::detail::update_network_time(util::current_time()); + if ( CPP_activation_hook ) + (*CPP_activation_hook)(); + if ( zeek_init ) event_mgr.Enqueue(zeek_init, Args{});