Merge remote-tracking branch 'origin/topic/vern/CPP-workflow2'

* origin/topic/vern/CPP-workflow2:
  low-level coding style fixes
  support for standalone compiled scripts to export globals with module qualifiers
  updates for documentation of functionality for compiling scripts to C++
  fixes for standalone C++ scripts making types & variables/functions available
  fixed bug limiting availability of load_CPP() BiF
  updates to development helper scripts to support new workflow
  simpler workflow for -O gen-C++ ; also some hooks for -O gen-standalone-C++
  ReplaceBody now deletes a body if the replacement is nil
  removal of can't-actually-be-executed code
This commit is contained in:
Tim Wojtulewicz 2021-07-01 08:46:41 -07:00
commit 0b342b7bfa
26 changed files with 335 additions and 124 deletions

20
CHANGES
View file

@ -1,3 +1,23 @@
4.1.0-dev.852 | 2021-07-01 08:46:41 -0700
* low-level coding style fixes (Vern Paxson, Corelight)
* support for standalone compiled scripts to export globals with module qualifiers (Vern Paxson, Corelight)
* updates for documentation of functionality for compiling scripts to C++ (Vern Paxson, Corelight)
* fixes for standalone C++ scripts making types & variables/functions available (Vern Paxson, Corelight)
* fixed bug limiting availability of load_CPP() BiF (Vern Paxson, Corelight)
* updates to development helper scripts to support new workflow (Vern Paxson, Corelight)
* simpler workflow for -O gen-C++ ; also some hooks for -O gen-standalone-C++ (Vern Paxson, Corelight)
* ReplaceBody now deletes a body if the replacement is nil (Vern Paxson, Corelight)
* removal of can't-actually-be-executed code (Vern Paxson, Corelight)
4.1.0-dev.842 | 2021-06-30 20:32:37 -0700 4.1.0-dev.842 | 2021-06-30 20:32:37 -0700
* Skip input framework entries with missing but non-optional fields (Christian Kreibich, Corelight) * Skip input framework entries with missing but non-optional fields (Christian Kreibich, Corelight)

View file

@ -1 +1 @@
4.1.0-dev.842 4.1.0-dev.852

View file

@ -332,9 +332,12 @@ ScriptFunc::ScriptFunc(std::string _name, FuncTypePtr ft,
sort(bodies.begin(), bodies.end()); sort(bodies.begin(), bodies.end());
if ( ! bodies.empty() )
{
current_body = bodies[0].stmts; current_body = bodies[0].stmts;
current_priority = bodies[0].priority; current_priority = bodies[0].priority;
} }
}
ScriptFunc::~ScriptFunc() ScriptFunc::~ScriptFunc()
{ {
@ -579,15 +582,21 @@ void ScriptFunc::ReplaceBody(const StmtPtr& old_body, StmtPtr new_body)
{ {
bool found_it = false; bool found_it = false;
for ( auto& body : bodies ) for ( auto body = bodies.begin(); body != bodies.end(); ++body )
if ( body.stmts.get() == old_body.get() ) if ( body->stmts.get() == old_body.get() )
{ {
body.stmts = new_body; if ( new_body )
current_priority = body.priority; {
body->stmts = new_body;
current_priority = body->priority;
}
else
bodies.erase(body);
found_it = true; found_it = true;
break;
} }
ASSERT(found_it);
current_body = new_body; current_body = new_body;
} }
@ -1049,6 +1058,7 @@ void init_primary_bifs()
#include "option.bif.func_init" #include "option.bif.func_init"
#include "supervisor.bif.func_init" #include "supervisor.bif.func_init"
#include "packet_analysis.bif.func_init" #include "packet_analysis.bif.func_init"
#include "CPP-load.bif.func_init"
init_builtin_types(); init_builtin_types();
did_builtin_init = true; did_builtin_init = true;

View file

@ -237,8 +237,14 @@ public:
const std::vector<IDPtr>& new_inits, const std::vector<IDPtr>& new_inits,
size_t new_frame_size, int priority) override; size_t new_frame_size, int priority) override;
// Replace the given current instance of a function body with /**
// a new one. * Replaces the given current instance of a function body with
* a new one. If new_body is nil then the current instance is
* deleted with no replacement.
*
* @param old_body Body to replace.
* @param new_body New body to use; can be nil.
*/
void ReplaceBody(const detail::StmtPtr& old_body, void ReplaceBody(const detail::StmtPtr& old_body,
detail::StmtPtr new_body); detail::StmtPtr new_body);
@ -316,7 +322,7 @@ private:
StmtPtr current_body; StmtPtr current_body;
// ... and its priority. // ... and its priority.
int current_priority; int current_priority = 0;
}; };
using built_in_func = BifReturnVal (*)(Frame* frame, const Args* args); using built_in_func = BifReturnVal (*)(Frame* frame, const Args* args);

View file

@ -22,7 +22,8 @@ function load_CPP%(h: count%): bool
%{ %{
auto cb = detail::standalone_callbacks.find(h); auto cb = detail::standalone_callbacks.find(h);
if ( cb == detail::standalone_callbacks.end() ) if ( cb == detail::standalone_callbacks.end() ||
! detail::CPP_init_hook )
{ {
reporter->Error("load of non-existing C++ code (%" PRIu64 ")", h); reporter->Error("load of non-existing C++ code (%" PRIu64 ")", h);
return zeek::val_mgr->False(); return zeek::val_mgr->False();
@ -38,5 +39,8 @@ function load_CPP%(h: count%): bool
// compiled scripts. // compiled scripts.
detail::standalone_activations.push_back(cb->second); detail::standalone_activations.push_back(cb->second);
// Proceed with activation.
(*detail::CPP_init_hook)();
return zeek::val_mgr->True(); return zeek::val_mgr->True();
%} %}

View file

@ -134,8 +134,8 @@ namespace zeek::detail {
class CPPCompile { class CPPCompile {
public: public:
CPPCompile(std::vector<FuncInfo>& _funcs, ProfileFuncs& pfs, CPPCompile(std::vector<FuncInfo>& _funcs, ProfileFuncs& pfs,
const char* gen_name, CPPHashManager& hm, const std::string& gen_name, const std::string& addl_name,
bool update, bool standalone); CPPHashManager& _hm, bool _update, bool _standalone);
~CPPCompile(); ~CPPCompile();
private: private:
@ -187,6 +187,11 @@ private:
// Maps functions (not hooks or events) to upstream compiled names. // Maps functions (not hooks or events) to upstream compiled names.
std::unordered_map<std::string, std::string> hashed_funcs; std::unordered_map<std::string, std::string> hashed_funcs;
// Tracks all of the module names used in activate_bodies__CPP()
// calls, to ensure all of the global names of compiled-to-standalone
// functions are available to subsequent scripts.
std::unordered_set<std::string> module_names;
// If non-zero, provides a tag used for auxiliary/additional // If non-zero, provides a tag used for auxiliary/additional
// compilation units. // compilation units.
int addl_tag = 0; int addl_tag = 0;
@ -390,9 +395,14 @@ private:
// function. // function.
std::string GenArgs(const RecordTypePtr& params, const Expr* e); std::string GenArgs(const RecordTypePtr& params, const Expr* e);
// Functions that we've declared/compiled. // Functions that we've declared/compiled. Indexed by full C++ name.
std::unordered_set<std::string> compiled_funcs; std::unordered_set<std::string> compiled_funcs;
// "Simple" functions that we've compiled, i.e., those that have
// a single body and thus can be called dirctly. Indexed by
// function name, and maps to the C++ name.
std::unordered_map<std::string, std::string> compiled_simple_funcs;
// Maps those to their associated files - used to make add-C++ body // Maps those to their associated files - used to make add-C++ body
// hashes distinct. // hashes distinct.
std::unordered_map<std::string, std::string> cf_locs; std::unordered_map<std::string, std::string> cf_locs;
@ -858,6 +868,12 @@ private:
void AddInit(const IntrusivePtr<Obj>& o) { AddInit(o.get()); } void AddInit(const IntrusivePtr<Obj>& o) { AddInit(o.get()); }
void AddInit(const Obj* o); void AddInit(const Obj* o);
// This is akin to an initialization, but done separately
// (upon "activation") so it can include initializations that
// rely on parsing having finished (in particular, BiFs having
// been registered). Only used when generating standalone code.
void AddActivation(std::string a) { activations.emplace_back(a); }
// Records the fact that the initialization of object o1 depends // Records the fact that the initialization of object o1 depends
// on that of object o2. // on that of object o2.
void NoteInitDependency(const IntrusivePtr<Obj>& o1, void NoteInitDependency(const IntrusivePtr<Obj>& o1,
@ -922,6 +938,10 @@ private:
// other initializations, and that themselves have no dependencies). // other initializations, and that themselves have no dependencies).
std::vector<std::string> pre_inits; std::vector<std::string> pre_inits;
// A list of "activations" (essentially, post-initializations).
// See AddActivation() above.
std::vector<std::string> activations;
// Expressions for which we need to generate initialization-time // Expressions for which we need to generate initialization-time
// code. Currently, these are only expressions appearing in // code. Currently, these are only expressions appearing in
// attributes. // attributes.
@ -992,6 +1012,16 @@ private:
NL(); NL();
} }
void Emit(const std::string& fmt, const std::string& arg1,
const std::string& arg2, const std::string& arg3,
const std::string& arg4, const std::string& arg5) const
{
Indent();
fprintf(write_file, fmt.c_str(), arg1.c_str(), arg2.c_str(),
arg3.c_str(), arg4.c_str(), arg5.c_str());
NL();
}
// Returns an expression for constructing a Zeek String object // Returns an expression for constructing a Zeek String object
// corresponding to the given byte array. // corresponding to the given byte array.
std::string GenString(const char* b, int len) const; std::string GenString(const char* b, int len) const;
@ -1010,6 +1040,9 @@ private:
// File to which we're generating code. // File to which we're generating code.
FILE* write_file; FILE* write_file;
// Name of file holding potential "additional" code.
std::string addl_name;
// Indentation level. // Indentation level.
int block_level = 0; int block_level = 0;

View file

@ -24,6 +24,9 @@ void CPPCompile::DeclareFunc(const FuncInfo& func)
DeclareSubclass(f->GetType(), pf, fname, body, priority, nullptr, DeclareSubclass(f->GetType(), pf, fname, body, priority, nullptr,
f->Flavor()); f->Flavor());
if ( f->GetBodies().size() == 1 )
compiled_simple_funcs[f->Name()] = fname;
} }
void CPPCompile::DeclareLambda(const LambdaExpr* l, const ProfileFunc* pf) void CPPCompile::DeclareLambda(const LambdaExpr* l, const ProfileFunc* pf)

View file

@ -13,20 +13,24 @@ using namespace std;
CPPCompile::CPPCompile(vector<FuncInfo>& _funcs, ProfileFuncs& _pfs, CPPCompile::CPPCompile(vector<FuncInfo>& _funcs, ProfileFuncs& _pfs,
const char* gen_name, CPPHashManager& _hm, const string& gen_name, const string& _addl_name,
bool _update, bool _standalone) CPPHashManager& _hm, bool _update, bool _standalone)
: funcs(_funcs), pfs(_pfs), hm(_hm), update(_update), standalone(_standalone) : funcs(_funcs), pfs(_pfs), hm(_hm),
update(_update), standalone(_standalone)
{ {
auto mode = hm.IsAppend() ? "a" : "w"; addl_name = _addl_name;
bool is_addl = hm.IsAppend();
auto target_name = is_addl ? addl_name.c_str() : gen_name.c_str();
auto mode = is_addl ? "a" : "w";
write_file = fopen(gen_name, mode); write_file = fopen(target_name, mode);
if ( ! write_file ) if ( ! write_file )
{ {
reporter->Error("can't open C++ target file %s", gen_name); reporter->Error("can't open C++ target file %s", target_name);
exit(1); exit(1);
} }
if ( hm.IsAppend() ) if ( is_addl )
{ {
// We need a unique number to associate with the name // We need a unique number to associate with the name
// space for the code we're adding. A convenient way to // space for the code we're adding. A convenient way to
@ -39,7 +43,7 @@ CPPCompile::CPPCompile(vector<FuncInfo>& _funcs, ProfileFuncs& _pfs,
{ {
char buf[256]; char buf[256];
util::zeek_strerror_r(errno, buf, sizeof(buf)); util::zeek_strerror_r(errno, buf, sizeof(buf));
reporter->Error("fstat failed on %s: %s", gen_name, buf); reporter->Error("fstat failed on %s: %s", target_name, buf);
exit(1); exit(1);
} }
@ -49,6 +53,20 @@ CPPCompile::CPPCompile(vector<FuncInfo>& _funcs, ProfileFuncs& _pfs,
addl_tag = st.st_size + 1; addl_tag = st.st_size + 1;
} }
else
{
// Create an empty "additional" file.
auto addl_f = fopen(addl_name.c_str(), "w");
if ( ! addl_f )
{
reporter->Error("can't open C++ additional file %s",
addl_name.c_str());
exit(1);
}
fclose(addl_f);
}
Compile(); Compile();
} }
@ -285,6 +303,9 @@ void CPPCompile::GenEpilog()
CheckInitConsistency(to_do); CheckInitConsistency(to_do);
auto nc = GenDependentInits(to_do); auto nc = GenDependentInits(to_do);
if ( standalone )
GenStandaloneActivation();
NL(); NL();
Emit("void init__CPP()"); Emit("void init__CPP()");
@ -301,6 +322,9 @@ void CPPCompile::GenEpilog()
NL(); NL();
InitializeFieldMappings(); InitializeFieldMappings();
if ( standalone )
Emit("standalone_init__CPP();");
EndBlock(true); EndBlock(true);
GenInitHook(); GenInitHook();
@ -313,7 +337,7 @@ void CPPCompile::GenEpilog()
if ( addl_tag > 0 ) if ( addl_tag > 0 )
return; return;
Emit("#include \"CPP-gen-addl.h\"\n"); Emit("#include \"" + addl_name + "\"\n");
Emit("} // zeek::detail"); Emit("} // zeek::detail");
} }

View file

@ -262,15 +262,18 @@ string CPPCompile::GenCallExpr(const CallExpr* c, GenType gt)
auto f_id = f->AsNameExpr()->Id(); auto f_id = f->AsNameExpr()->Id();
const auto& params = f_id->GetType()->AsFuncType()->Params(); const auto& params = f_id->GetType()->AsFuncType()->Params();
auto id_name = f_id->Name(); auto id_name = f_id->Name();
auto fname = Canonicalize(id_name) + "_zf";
bool is_compiled = compiled_funcs.count(fname) > 0; bool is_compiled = compiled_simple_funcs.count(id_name) > 0;
bool was_compiled = hashed_funcs.count(id_name) > 0; bool was_compiled = hashed_funcs.count(id_name) > 0;
if ( is_compiled || was_compiled ) if ( is_compiled || was_compiled )
{ {
string fname;
if ( was_compiled ) if ( was_compiled )
fname = hashed_funcs[id_name]; fname = hashed_funcs[id_name];
else
fname = compiled_simple_funcs[id_name];
if ( args_l->Exprs().length() > 0 ) if ( args_l->Exprs().length() > 0 )
gen = fname + "(" + GenArgs(params, args_l) + gen = fname + "(" + GenArgs(params, args_l) +
@ -499,12 +502,6 @@ string CPPCompile::GenSizeExpr(const Expr* e, GenType gt)
else if ( it == TYPE_INTERNAL_DOUBLE ) else if ( it == TYPE_INTERNAL_DOUBLE )
gen = string("fabs__CPP(") + gen + ")"; gen = string("fabs__CPP(") + gen + ")";
else if ( it == TYPE_INTERNAL_INT || it == TYPE_INTERNAL_DOUBLE )
{
auto cast = (it == TYPE_INTERNAL_INT) ? "bro_int_t" : "double";
gen = string("abs__CPP(") + cast + "(" + gen + "))";
}
else else
return GenericValPtrToGT(gen + "->SizeVal()", t, gt); return GenericValPtrToGT(gen + "->SizeVal()", t, gt);

View file

@ -12,6 +12,7 @@ namespace zeek::detail {
using namespace std; using namespace std;
unordered_map<p_hash_type, CompiledScript> compiled_scripts; unordered_map<p_hash_type, CompiledScript> compiled_scripts;
unordered_map<string, unordered_set<p_hash_type>> added_bodies;
unordered_map<p_hash_type, void (*)()> standalone_callbacks; unordered_map<p_hash_type, void (*)()> standalone_callbacks;
vector<void (*)()> standalone_activations; vector<void (*)()> standalone_activations;

View file

@ -108,6 +108,14 @@ struct CompiledScript {
// Maps hashes to compiled information. // Maps hashes to compiled information.
extern std::unordered_map<p_hash_type, CompiledScript> compiled_scripts; extern std::unordered_map<p_hash_type, CompiledScript> compiled_scripts;
// When using standalone-code, tracks which function bodies have had
// compiled versions added to them. Needed so that we don't replace
// the body twice, leading to two copies. Indexed first by the name
// of the function, and then via the hash of the body that has been
// added to it.
extern std::unordered_map<std::string, std::unordered_set<p_hash_type>>
added_bodies;
// Maps hashes to standalone script initialization callbacks. // Maps hashes to standalone script initialization callbacks.
extern std::unordered_map<p_hash_type, void (*)()> standalone_callbacks; extern std::unordered_map<p_hash_type, void (*)()> standalone_callbacks;

View file

@ -4,6 +4,7 @@
#include <unistd.h> #include <unistd.h>
#include <sys/stat.h> #include <sys/stat.h>
#include "zeek/module_util.h"
#include "zeek/script_opt/ProfileFunc.h" #include "zeek/script_opt/ProfileFunc.h"
#include "zeek/script_opt/CPP/Compile.h" #include "zeek/script_opt/CPP/Compile.h"
@ -460,9 +461,6 @@ void CPPCompile::GenInitHook()
{ {
NL(); NL();
if ( standalone )
GenStandaloneActivation();
Emit("int hook_in_init()"); Emit("int hook_in_init()");
StartBlock(); StartBlock();
@ -482,6 +480,15 @@ void CPPCompile::GenInitHook()
void CPPCompile::GenStandaloneActivation() void CPPCompile::GenStandaloneActivation()
{ {
NL();
Emit("void standalone_activation__CPP()");
StartBlock();
for ( auto& a : activations )
Emit(a);
EndBlock();
NL();
Emit("void standalone_init__CPP()"); Emit("void standalone_init__CPP()");
StartBlock(); StartBlock();
@ -497,11 +504,6 @@ void CPPCompile::GenStandaloneActivation()
for ( const auto& func : funcs ) for ( const auto& func : funcs )
{ {
auto f = func.Func(); auto f = func.Func();
if ( f->Flavor() == FUNC_FLAVOR_FUNCTION )
// No need to explicitly add bodies.
continue;
auto fname = BodyName(func); auto fname = BodyName(func);
auto bname = Canonicalize(fname.c_str()) + "_zf"; auto bname = Canonicalize(fname.c_str()) + "_zf";
@ -515,10 +517,6 @@ void CPPCompile::GenStandaloneActivation()
for ( auto& fb : func_bodies ) for ( auto& fb : func_bodies )
{ {
auto f = fb.first;
const auto fn = f->Name();
const auto& ft = f->GetType();
string hashes; string hashes;
for ( auto h : fb.second ) for ( auto h : fb.second )
{ {
@ -530,12 +528,30 @@ void CPPCompile::GenStandaloneActivation()
hashes = "{" + hashes + "}"; hashes = "{" + hashes + "}";
Emit("activate_bodies__CPP(\"%s\", %s, %s);", auto f = fb.first;
fn, GenTypeName(ft), hashes); auto fn = f->Name();
const auto& ft = f->GetType();
auto var = extract_var_name(fn);
auto mod = extract_module_name(fn);
module_names.insert(mod);
auto fid = lookup_ID(var.c_str(), mod.c_str(),
false, true, false);
if ( ! fid )
reporter->InternalError("can't find identifier %s", fn);
auto exported = fid->IsExport() ? "true" : "false";
Emit("activate_bodies__CPP(\"%s\", \"%s\", %s, %s, %s);",
var, mod, exported, GenTypeName(ft), hashes);
} }
EndBlock();
NL(); NL();
Emit("CPP_activation_funcs.push_back(standalone_activation__CPP);");
Emit("CPP_activation_hook = activate__CPPs;");
EndBlock();
} }
void CPPCompile::GenLoad() void CPPCompile::GenLoad()
@ -548,7 +564,15 @@ void CPPCompile::GenLoad()
Emit("register_scripts__CPP(%s, standalone_init__CPP);", Fmt(total_hash)); Emit("register_scripts__CPP(%s, standalone_init__CPP);", Fmt(total_hash));
// Spit out the placeholder script. // Spit out the placeholder script, and any associated module
// definitions.
for ( const auto& m : module_names )
if ( m != "GLOBAL" )
printf("module %s;\n", m.c_str());
if ( module_names.size() > 0 )
printf("module GLOBAL;\n\n");
printf("global init_CPP_%llu = load_CPP(%llu);\n", printf("global init_CPP_%llu = load_CPP(%llu);\n",
total_hash, total_hash); total_hash, total_hash);
} }

View file

@ -54,6 +54,13 @@ at the beginning of `Compile.h`.
Workflows Workflows
--------- ---------
_Before building Zeek_, see the first of the [_Known Issues_](#known-issues)
below regarding compilation times. If your aim is to exploration of the
functionality rather than production use, you might want to build Zeek
using `./configure --enable-debug`, which can reduce compilation times by
50x (!). Once you've built it, the following sketches how to create
and use compiled scripts.
The main code generated by the compiler is taken from The main code generated by the compiler is taken from
`build/CPP-gen.cc`. An empty version of this is generated when `build/CPP-gen.cc`. An empty version of this is generated when
first building Zeek. first building Zeek.
@ -66,21 +73,17 @@ The following workflow assumes you are in the `build/` subdirectory:
1. `./src/zeek -O gen-C++ target.zeek` 1. `./src/zeek -O gen-C++ target.zeek`
The generated code is written to The generated code is written to
`CPP-gen-addl.h`. (This name is a reflection of some more complicated `CPP-gen.cc`. The compiler will also produce
features and probably should be changed.) The compiler will also produce a file `CPP-hashes.dat`, for use by an advanced feature, and an
a file `CPP-hashes.dat`, for use by an advanced feature. empty `CPP-gen-addl.h` file (same).
2. `mv CPP-gen-addl.h CPP-gen.cc` 2. `ninja` or `make` to recompile Zeek
3. `touch CPP-gen-addl.h` 3. `./src/zeek -O use-C++ target.zeek`
(Needed because `CPP-gen.cc`
expects the file to exist, again in support of more complicated features.)
4. `ninja` or `make` to recompile Zeek
5. `./src/zeek -O use-C++ target.zeek`
Executes with each function/hook/ Executes with each function/hook/
event handler pulled in by `target.zeek` replaced with its compiled version. event handler pulled in by `target.zeek` replaced with its compiled version.
Instead of the last line above, you can use the following variants: Instead of the last line above, you can use the following variants:
5. `./src/zeek -O report-C++ target.zeek` 3. `./src/zeek -O report-C++ target.zeek`
For each function body in For each function body in
`target.zeek`, reports which ones have compiled-to-C++ bodies available, `target.zeek`, reports which ones have compiled-to-C++ bodies available,
and also any compiled-to-C++ bodies present in the `zeek` binary that and also any compiled-to-C++ bodies present in the `zeek` binary that
@ -91,15 +94,21 @@ the `target.zeek` script. You can avoid this by replacing the first step with:
1. `./src/zeek -O gen-standalone-C++ target.zeek >target-stand-in.zeek` 1. `./src/zeek -O gen-standalone-C++ target.zeek >target-stand-in.zeek`
and then continuing the next three steps. This option prints to _stdout_ a (and then building as in the 2nd step above).
This option prints to _stdout_ a
(very short) "stand-in" Zeek script that you can load using (very short) "stand-in" Zeek script that you can load using
`-O use-C++ target-stand-in.zeek` to activate the compiled `target.zeek` `target-stand-in.zeek` to activate the compiled `target.zeek`
without needing to include `target.zeek` in the invocation. without needing to include `target.zeek` in the invocation (nor
the `-O use-C++` option). After loading the stand-in script,
you can still access types and functions declared in `target.zeek`.
Note: the implementation differences between `gen-C++` and `gen-standalone-C++` Note: the implementation differences between `gen-C++` and `gen-standalone-C++`
wound up being modest enough that it might make sense to just always provide wound up being modest enough that it might make sense to just always provide
the latter functionality, which it turns out does not introduce any the latter functionality, which it turns out does not introduce any
additional constraints compared to the current `gen-C++` functionality. additional constraints compared to the current `gen-C++` functionality.
On the other hand, it's possible (not yet established) that code created
using `gen-C++` can be made to compile significantly faster than
standalone code.
There are additional workflows relating to running the test suite, which There are additional workflows relating to running the test suite, which
we document only briefly here as they're likely going to change or go away we document only briefly here as they're likely going to change or go away
@ -128,7 +137,7 @@ Both of these _append_ to any existing `CPP-gen-addl.h` file, providing
a means for building it up to reflect a number of compilations. a means for building it up to reflect a number of compilations.
The `update-C++` and `add-C++` options help support different The `update-C++` and `add-C++` options help support different
ways of building the `btest` test suie. They were meant to enable doing so ways of building the `btest` test suite. They were meant to enable doing so
without requiring per-test-suite-element recompilations. However, experiences without requiring per-test-suite-element recompilations. However, experiences
to date have found that trying to avoid pointwise compilations incurs to date have found that trying to avoid pointwise compilations incurs
additional headaches, so it's better to just bite off the cost of a large additional headaches, so it's better to just bite off the cost of a large
@ -174,11 +183,6 @@ Known Issues
Here we list various known issues with using the compiler: Here we list various known issues with using the compiler:
<br> <br>
* Run-time error messages generally lack location information and information
about associated expressions/statements, making them hard to puzzle out.
This could be fixed, but would add execution overhead in passing around
the necessary strings / `Location` objects.
* Compilation of compiled code can be noticeably slow (if built using * Compilation of compiled code can be noticeably slow (if built using
`./configure --enable-debug`) or hugely slow (if not), with the latter `./configure --enable-debug`) or hugely slow (if not), with the latter
taking on the order of an hour on a beefy laptop. This slowness complicates taking on the order of an hour on a beefy laptop. This slowness complicates
@ -186,6 +190,11 @@ CI/CD approaches for always running compiled code against the test suite
when merging changes. It's not presently clear how feasible it is to when merging changes. It's not presently clear how feasible it is to
speed this up. speed this up.
* Run-time error messages generally lack location information and information
about associated expressions/statements, making them hard to puzzle out.
This could be fixed, but would add execution overhead in passing around
the necessary strings / `Location` objects.
* Subtle bugs can arise when compiling code that uses `@if` conditional * Subtle bugs can arise when compiling code that uses `@if` conditional
compilation. The compiled code will not directly use the wrong instance compilation. The compiled code will not directly use the wrong instance
of a script body (one that differs due to the `@if` conditional having a of a script body (one that differs due to the `@if` conditional having a

View file

@ -9,12 +9,30 @@ namespace zeek::detail {
using namespace std; using namespace std;
vector<CPP_init_func> CPP_init_funcs; vector<CPP_init_func> CPP_init_funcs;
vector<CPP_init_func> CPP_activation_funcs;
// Calls all of the initialization hooks, in the order they were added. // Calls all of the initialization hooks, in the order they were added.
void init_CPPs() void init_CPPs()
{ {
static bool need_init = true;
if ( need_init )
for ( auto f : CPP_init_funcs ) for ( auto f : CPP_init_funcs )
f(); f();
need_init = false;
}
// Calls all of the registered activation hooks for standalone code.
void activate__CPPs()
{
static bool need_init = true;
if ( need_init )
for ( auto f : CPP_activation_funcs )
f();
need_init = false;
} }
// This is a trick used to register the presence of compiled code. // This is a trick used to register the presence of compiled code.
@ -30,6 +48,19 @@ static int flag_init_CPP()
static int dummy = flag_init_CPP(); static int dummy = flag_init_CPP();
void register_type__CPP(TypePtr t, const std::string& name)
{
if ( t->GetName().size() > 0 )
// Already registered.
return;
t->SetName(name);
auto id = install_ID(name.c_str(), GLOBAL_MODULE_NAME, true, false);
id->SetType(t);
id->MakeType();
}
void register_body__CPP(CPPStmtPtr body, int priority, p_hash_type hash, void register_body__CPP(CPPStmtPtr body, int priority, p_hash_type hash,
vector<string> events) vector<string> events)
{ {
@ -67,18 +98,31 @@ void register_scripts__CPP(p_hash_type h, void (*callback)())
standalone_callbacks[h] = callback; standalone_callbacks[h] = callback;
} }
void activate_bodies__CPP(const char* fn, TypePtr t, vector<p_hash_type> hashes) void activate_bodies__CPP(const char* fn, const char* module, bool exported,
TypePtr t, vector<p_hash_type> hashes)
{ {
auto ft = cast_intrusive<FuncType>(t); auto ft = cast_intrusive<FuncType>(t);
auto fg = lookup_ID(fn, GLOBAL_MODULE_NAME, false, false, false); auto fg = lookup_ID(fn, module, false, false, false);
if ( ! fg ) if ( ! fg )
{ {
fg = install_ID(fn, GLOBAL_MODULE_NAME, true, false); fg = install_ID(fn, module, true, exported);
fg->SetType(ft); fg->SetType(ft);
} }
auto f = fg->GetVal()->AsFunc(); auto v = fg->GetVal();
if ( ! v )
{ // Create it.
std::vector<StmtPtr> no_bodies;
std::vector<int> no_priorities;
auto sf = make_intrusive<ScriptFunc>(fn, ft, no_bodies,
no_priorities);
v = make_intrusive<FuncVal>(move(sf));
fg->SetVal(v);
}
auto f = v->AsFunc();
const auto& bodies = f->GetBodies(); const auto& bodies = f->GetBodies();
// Track hashes of compiled bodies already associated with f. // Track hashes of compiled bodies already associated with f.
@ -115,6 +159,7 @@ void activate_bodies__CPP(const char* fn, TypePtr t, vector<p_hash_type> hashes)
auto cs = compiled_scripts[h]; auto cs = compiled_scripts[h];
f->AddBody(cs.body, no_inits, num_params, cs.priority); f->AddBody(cs.body, no_inits, num_params, cs.priority);
added_bodies[fn].insert(h);
events.insert(cs.events.begin(), cs.events.end()); events.insert(cs.events.begin(), cs.events.end());
} }
@ -126,13 +171,13 @@ void activate_bodies__CPP(const char* fn, TypePtr t, vector<p_hash_type> hashes)
} }
} }
IDPtr lookup_global__CPP(const char* g, const TypePtr& t) IDPtr lookup_global__CPP(const char* g, const TypePtr& t, bool exported)
{ {
auto gl = lookup_ID(g, GLOBAL_MODULE_NAME, false, false, false); auto gl = lookup_ID(g, GLOBAL_MODULE_NAME, false, false, false);
if ( ! gl ) if ( ! gl )
{ {
gl = install_ID(g, GLOBAL_MODULE_NAME, true, false); gl = install_ID(g, GLOBAL_MODULE_NAME, true, exported);
gl->SetType(t); gl->SetType(t);
} }

View file

@ -20,6 +20,15 @@ typedef void (*CPP_init_func)();
// Tracks the initialization hooks for different compilation runs. // Tracks the initialization hooks for different compilation runs.
extern std::vector<CPP_init_func> CPP_init_funcs; extern std::vector<CPP_init_func> CPP_init_funcs;
// Tracks the activation hooks for different "standalone" compilations.
extern std::vector<CPP_init_func> CPP_activation_funcs;
// Activates all previously registered standalone code.
extern void activate__CPPs();
// Registers the given global type, if not already present.
extern void register_type__CPP(TypePtr t, const std::string& name);
// Registers the given compiled function body as associated with the // Registers the given compiled function body as associated with the
// given priority and hash. "events" is a list of event handlers // given priority and hash. "events" is a list of event handlers
// relevant for the function body, which should be registered if the // relevant for the function body, which should be registered if the
@ -38,15 +47,17 @@ extern void register_lambda__CPP(CPPStmtPtr body, p_hash_type hash,
// the given hash. // the given hash.
extern void register_scripts__CPP(p_hash_type h, void (*callback)()); extern void register_scripts__CPP(p_hash_type h, void (*callback)());
// Activates the event handler/hook with the given name (which is created // Activates the function/event handler/hook with the given name and in
// if it doesn't exist) and type, using (at least) the bodies associated // the given module, using (at least) the bodies associated with the
// with the given hashes. // given hashes. Creates the identifier using the given module and
extern void activate_bodies__CPP(const char* fn, TypePtr t, // export setting if it doesn't already exist.
extern void activate_bodies__CPP(const char* fn, const char* module,
bool exported, TypePtr t,
std::vector<p_hash_type> hashes); std::vector<p_hash_type> hashes);
// Looks for a global with the given name. If not present, creates it // Looks for a global with the given name. If not present, creates it
// with the given type. // with the given type and export setting.
extern IDPtr lookup_global__CPP(const char* g, const TypePtr& t); extern IDPtr lookup_global__CPP(const char* g, const TypePtr& t, bool exported);
// Looks for a BiF with the given name. Returns nil if not present. // Looks for a BiF with the given name. Returns nil if not present.
extern Func* lookup_bif__CPP(const char* bif); extern Func* lookup_bif__CPP(const char* bif);

View file

@ -134,7 +134,8 @@ void CPPCompile::ExpandTypeVar(const TypePtr& t)
auto& script_type_name = t->GetName(); auto& script_type_name = t->GetName();
if ( script_type_name.size() > 0 ) if ( script_type_name.size() > 0 )
AddInit(t, tn + "->SetName(\"" + script_type_name + "\");"); AddInit(t, "register_type__CPP(" + tn + ", \"" +
script_type_name + "\");");
AddInit(t); AddInit(t);
} }

View file

@ -109,9 +109,11 @@ void CPPCompile::CreateGlobal(const ID* g)
const auto& t = g->GetType(); const auto& t = g->GetType();
NoteInitDependency(g, TypeRep(t)); NoteInitDependency(g, TypeRep(t));
auto exported = g->IsExport() ? "true" : "false";
AddInit(g, globals[gn], AddInit(g, globals[gn],
string("lookup_global__CPP(\"") + gn + "\", " + string("lookup_global__CPP(\"") + gn + "\", " +
GenTypeName(t) + ")"); GenTypeName(t) + ", " + exported + ")");
} }
if ( is_bif ) if ( is_bif )
@ -168,7 +170,12 @@ void CPPCompile::AddBiF(const ID* b, bool is_var)
if ( AddGlobal(n, "bif", true) ) if ( AddGlobal(n, "bif", true) )
Emit("Func* %s;", globals[n]); Emit("Func* %s;", globals[n]);
AddInit(b, globals[n], string("lookup_bif__CPP(\"") + bn + "\")"); auto lookup = string("lookup_bif__CPP(\"") + bn + "\")";
if ( standalone )
AddActivation(globals[n] + " = " + lookup + ";");
else
AddInit(b, globals[n], lookup);
} }
bool CPPCompile::AddGlobal(const string& g, const char* suffix, bool track) bool CPPCompile::AddGlobal(const string& g, const char* suffix, bool track)

View file

@ -2,11 +2,8 @@
build=../../../build build=../../../build
echo > CPP-gen-addl.h cd $build
(cd $build
export -n ZEEK_USE_CPP ZEEK_ADD_CPP export -n ZEEK_USE_CPP ZEEK_ADD_CPP
export ZEEK_HASH_DIR=. export ZEEK_HASH_DIR=.
echo | src/zeek -b -O gen-C++ echo | src/zeek -b -O gen-C++
) ninja || echo Bare embedded build failed
mv $build/CPP-gen-addl.h CPP-gen.cc
(cd $build ; ninja || echo Bare embedded build failed)

View file

@ -2,11 +2,8 @@
build=../../../build build=../../../build
echo > CPP-gen-addl.h cd $build
(cd $build
export -n ZEEK_USE_CPP ZEEK_ADD_CPP export -n ZEEK_USE_CPP ZEEK_ADD_CPP
export ZEEK_HASH_DIR=. export ZEEK_HASH_DIR=.
echo | src/zeek -O gen-C++ echo | src/zeek -O gen-C++
) ninja || echo Full embedded build failed
mv $build/CPP-gen-addl.h CPP-gen.cc
(cd $build ; ninja || echo Full embedded build failed)

View file

@ -1,7 +1,5 @@
#! /bin/sh #! /bin/sh
base=../../.. cd ../../../build
so=$base/src/script_opt/CPP echo >CPP-gen.cc
echo > $so/CPP-gen.cc
cd $base/build
ninja || echo Non-embedded build failed ninja || echo Non-embedded build failed

View file

@ -4,11 +4,8 @@ echo $1
base=../../.. base=../../..
test=$base/testing/btest test=$base/testing/btest
so=$base/src/script_opt/CPP
build=$base/build build=$base/build
gen=CPP-gen-addl.h gen=CPP-gen.cc
echo >$gen
./non-embedded-build >$build/errs 2>&1 || echo non-embedded build failed ./non-embedded-build >$build/errs 2>&1 || echo non-embedded build failed
@ -17,7 +14,7 @@ export ZEEK_HASH_DIR=$test ZEEK_GEN_CPP=
cd $test cd $test
../../auxil/btest/btest $1 >jbuild-$1.out 2>&1 ../../auxil/btest/btest $1 >jbuild-$1.out 2>&1
grep -c '^namespace' $gen grep -c '^namespace' $gen
mv $gen $so/CPP-gen.cc mv $gen $build/
cd $build cd $build
ninja >& errs || echo build for $1 failed ninja >& errs || echo build for $1 failed

View file

@ -4,18 +4,16 @@ echo $1
base=../../.. base=../../..
test=$base/testing/btest test=$base/testing/btest
so=$base/src/script_opt/CPP
build=$base/build build=$base/build
gen=CPP-gen-addl.h gen=CPP-gen.cc
export -n ZEEK_USE_CPP export -n ZEEK_USE_CPP
export ZEEK_HASH_DIR=$test ZEEK_ADD_CPP= export ZEEK_HASH_DIR=$test ZEEK_ADD_CPP=
cd $test cd $test
cp $build/CPP-hashes.dat . cp $build/CPP-hashes.dat .
echo >$gen
../../auxil/btest/btest $1 >cpp-build-$1.out 2>&1 ../../auxil/btest/btest $1 >cpp-build-$1.out 2>&1
grep -c '^namespace' $gen grep -c '^namespace' $gen
mv $gen $so mv $gen $build
cd $build cd $build
ninja >& errs || echo build for $1 failed ninja >& errs || echo build for $1 failed

View file

@ -2,18 +2,16 @@
base=../../.. base=../../..
test=$base/testing/btest test=$base/testing/btest
so=$base/src/script_opt/CPP
build=$base/build build=$base/build
gen=CPP-gen-addl.h gen=CPP-gen.cc
export -n ZEEK_USE_CPP export -n ZEEK_USE_CPP
export ZEEK_HASH_DIR=$test ZEEK_ADD_CPP= export ZEEK_HASH_DIR=$test ZEEK_ADD_CPP=
cd $test cd $test
cp $build/CPP-hashes.dat . cp $build/CPP-hashes.dat .
echo >$gen
../../auxil/btest/btest $1 >jbuild-$1.out 2>&1 ../../auxil/btest/btest $1 >jbuild-$1.out 2>&1
grep -c '^namespace' $gen grep -c '^namespace' $gen
mv $gen $so mv $gen $build/
cd $build cd $build
ninja >& errs || echo build for $1 failed ninja >& errs || echo build for $1 failed

View file

@ -24,6 +24,7 @@ AnalyOpt analysis_options;
std::unordered_set<const Func*> non_recursive_funcs; std::unordered_set<const Func*> non_recursive_funcs;
void (*CPP_init_hook)() = nullptr; void (*CPP_init_hook)() = nullptr;
void (*CPP_activation_hook)() = nullptr;
// Tracks all of the loaded functions (including event handlers and hooks). // Tracks all of the loaded functions (including event handlers and hooks).
static std::vector<FuncInfo> funcs; static std::vector<FuncInfo> funcs;
@ -288,9 +289,6 @@ void analyze_scripts()
// Avoid profiling overhead. // Avoid profiling overhead.
return; return;
const auto hash_name = hash_dir + "CPP-hashes";
const auto gen_name = hash_dir + "CPP-gen-addl.h";
// Now that everything's parsed and BiF's have been initialized, // Now that everything's parsed and BiF's have been initialized,
// profile the functions. // profile the functions.
auto pfs = std::make_unique<ProfileFuncs>(funcs, is_CPP_compilable, false); auto pfs = std::make_unique<ProfileFuncs>(funcs, is_CPP_compilable, false);
@ -365,8 +363,21 @@ void analyze_scripts()
{ {
auto b = s->second.body; auto b = s->second.body;
b->SetHash(hash); b->SetHash(hash);
f.Func()->ReplaceBody(f.Body(), b);
// We may have already updated the body if
// we're using code compiled for standalone.
if ( f.Body()->Tag() != STMT_CPP )
{
auto func = f.Func();
if ( added_bodies[func->Name()].count(hash) > 0 )
// We've already added the
// replacement. Delete orig.
func->ReplaceBody(f.Body(), nullptr);
else
func->ReplaceBody(f.Body(), b);
f.SetBody(b); f.SetBody(b);
}
for ( auto& e : s->second.events ) for ( auto& e : s->second.events )
{ {
@ -384,6 +395,8 @@ void analyze_scripts()
if ( generating_CPP ) if ( generating_CPP )
{ {
const auto hash_name = hash_dir + "CPP-hashes";
auto hm = std::make_unique<CPPHashManager>(hash_name.c_str(), auto hm = std::make_unique<CPPHashManager>(hash_name.c_str(),
analysis_options.add_CPP); analysis_options.add_CPP);
@ -402,7 +415,10 @@ void analyze_scripts()
pfs = std::make_unique<ProfileFuncs>(funcs, is_CPP_compilable, false); pfs = std::make_unique<ProfileFuncs>(funcs, is_CPP_compilable, false);
} }
CPPCompile cpp(funcs, *pfs, gen_name.c_str(), *hm, const auto gen_name = hash_dir + "CPP-gen.cc";
const auto addl_name = hash_dir + "CPP-gen-addl.h";
CPPCompile cpp(funcs, *pfs, gen_name, addl_name, *hm,
analysis_options.gen_CPP || analysis_options.gen_CPP ||
analysis_options.update_CPP, analysis_options.update_CPP,
analysis_options.gen_standalone_CPP); analysis_options.gen_standalone_CPP);

View file

@ -152,5 +152,9 @@ extern void analyze_scripts();
// to a non-empty value. // to a non-empty value.
extern void (*CPP_init_hook)(); extern void (*CPP_init_hook)();
// Used for "standalone" C++-compiled scripts to complete their activation;
// called after parsing and BiF initialization, but before zeek_init.
extern void (*CPP_activation_hook)();
} // namespace zeek::detail } // namespace zeek::detail

View file

@ -843,6 +843,9 @@ SetupResult setup(int argc, char** argv, Options* zopts)
// we don't have any other source for it. // we don't have any other source for it.
run_state::detail::update_network_time(util::current_time()); run_state::detail::update_network_time(util::current_time());
if ( CPP_activation_hook )
(*CPP_activation_hook)();
if ( zeek_init ) if ( zeek_init )
event_mgr.Enqueue(zeek_init, Args{}); event_mgr.Enqueue(zeek_init, Args{});