diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 70e8c3e07a..3c101c3305 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -327,6 +327,27 @@ set(MAIN_SRCS plugin/Manager.cc plugin/Plugin.cc + script_opt/CPP/Attrs.cc + script_opt/CPP/Consts.cc + script_opt/CPP/DeclFunc.cc + script_opt/CPP/Driver.cc + script_opt/CPP/Emit.cc + script_opt/CPP/Exprs.cc + script_opt/CPP/Func.cc + script_opt/CPP/GenFunc.cc + script_opt/CPP/HashMgr.cc + script_opt/CPP/Inits.cc + script_opt/CPP/RuntimeInit.cc + script_opt/CPP/RuntimeOps.cc + script_opt/CPP/RuntimeVec.cc + script_opt/CPP/Stmts.cc + script_opt/CPP/Tracker.cc + script_opt/CPP/Types.cc + script_opt/CPP/Util.cc + script_opt/CPP/Vars.cc + + script_opt/CPP/CPP-gen.cc + script_opt/DefItem.cc script_opt/DefSetsMgr.cc script_opt/Expr.cc diff --git a/src/script_opt/CPP/Attrs.cc b/src/script_opt/CPP/Attrs.cc new file mode 100644 index 0000000000..9ba0a8ea3a --- /dev/null +++ b/src/script_opt/CPP/Attrs.cc @@ -0,0 +1,176 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include "zeek/script_opt/CPP/Compile.h" + + +namespace zeek::detail { + +void CPPCompile::RegisterAttributes(const AttributesPtr& attrs) + { + if ( ! attrs || attributes.HasKey(attrs) ) + return; + + attributes.AddKey(attrs); + AddInit(attrs); + + auto a_rep = attributes.GetRep(attrs); + if ( a_rep != attrs.get() ) + { + NoteInitDependency(attrs.get(), a_rep); + return; + } + + for ( const auto& a : attrs->GetAttrs() ) + { + const auto& e = a->GetExpr(); + if ( e ) + { + if ( IsSimpleInitExpr(e) ) + { + // Make sure any dependencies it has get noted. + (void) GenExpr(e, GEN_VAL_PTR); + continue; + } + + init_exprs.AddKey(e); + AddInit(e); + NoteInitDependency(attrs, e); + + auto e_rep = init_exprs.GetRep(e); + if ( e_rep != e.get() ) + NoteInitDependency(e.get(), e_rep); + } + } + } + +void CPPCompile::BuildAttrs(const AttributesPtr& attrs, std::string& attr_tags, + std::string& attr_vals) + { + if ( attrs ) + { + for ( const auto& a : attrs->GetAttrs() ) + { + if ( attr_tags.size() > 0 ) + { + attr_tags += ", "; + attr_vals += ", "; + } + + attr_tags += Fmt(int(a->Tag())); + + const auto& e = a->GetExpr(); + + if ( e ) + attr_vals += GenExpr(e, GEN_VAL_PTR, false); + else + attr_vals += "nullptr"; + } + } + + attr_tags = std::string("{") + attr_tags + "}"; + attr_vals = std::string("{") + attr_vals + "}"; + } + +void CPPCompile::GenAttrs(const AttributesPtr& attrs) + { + NL(); + + Emit("AttributesPtr %s", AttrsName(attrs)); + + StartBlock(); + + const auto& avec = attrs->GetAttrs(); + Emit("auto attrs = std::vector();"); + + AddInit(attrs); + + for ( auto i = 0; i < avec.size(); ++i ) + { + const auto& attr = avec[i]; + const auto& e = attr->GetExpr(); + + if ( ! e ) + { + Emit("attrs.emplace_back(make_intrusive(%s));", + AttrName(attr)); + continue; + } + + NoteInitDependency(attrs, e); + AddInit(e); + + std::string e_arg; + if ( IsSimpleInitExpr(e) ) + e_arg = GenAttrExpr(e); + else + e_arg = InitExprName(e); + + Emit("attrs.emplace_back(make_intrusive(%s, %s));", + AttrName(attr), e_arg); + } + + Emit("return make_intrusive(attrs, nullptr, true, false);"); + + EndBlock(); + } + +std::string CPPCompile::GenAttrExpr(const ExprPtr& e) + { + switch ( e->Tag() ) { + case EXPR_CONST: + return std::string("make_intrusive(") + + GenExpr(e, GEN_VAL_PTR) + ")"; + + case EXPR_NAME: + NoteInitDependency(e, e->AsNameExpr()->IdPtr()); + return std::string("make_intrusive(") + + globals[e->AsNameExpr()->Id()->Name()] + ")"; + + case EXPR_RECORD_COERCE: + NoteInitDependency(e, TypeRep(e->GetType())); + return std::string("make_intrusive(make_intrusive(make_intrusive()), cast_intrusive(") + + GenTypeName(e->GetType()) + "))"; + + default: + reporter->InternalError("bad expr tag in CPPCompile::GenAttrs"); + return "###"; + } + } + +std::string CPPCompile::AttrsName(const AttributesPtr& a) + { + return attributes.KeyName(a) + "()"; + } + +const char* CPPCompile::AttrName(const AttrPtr& attr) + { + switch ( attr->Tag() ) { + case ATTR_OPTIONAL: return "ATTR_OPTIONAL"; + case ATTR_DEFAULT: return "ATTR_DEFAULT"; + case ATTR_REDEF: return "ATTR_REDEF"; + case ATTR_ADD_FUNC: return "ATTR_ADD_FUNC"; + case ATTR_DEL_FUNC: return "ATTR_DEL_FUNC"; + case ATTR_EXPIRE_FUNC: return "ATTR_EXPIRE_FUNC"; + case ATTR_EXPIRE_READ: return "ATTR_EXPIRE_READ"; + case ATTR_EXPIRE_WRITE: return "ATTR_EXPIRE_WRITE"; + case ATTR_EXPIRE_CREATE: return "ATTR_EXPIRE_CREATE"; + case ATTR_RAW_OUTPUT: return "ATTR_RAW_OUTPUT"; + case ATTR_PRIORITY: return "ATTR_PRIORITY"; + case ATTR_GROUP: return "ATTR_GROUP"; + case ATTR_LOG: return "ATTR_LOG"; + case ATTR_ERROR_HANDLER: return "ATTR_ERROR_HANDLER"; + case ATTR_TYPE_COLUMN: return "ATTR_TYPE_COLUMN"; + case ATTR_TRACKED: return "ATTR_TRACKED"; + case ATTR_ON_CHANGE: return "ATTR_ON_CHANGE"; + case ATTR_BROKER_STORE: return "ATTR_BROKER_STORE"; + case ATTR_BROKER_STORE_ALLOW_COMPLEX: return "ATTR_BROKER_STORE_ALLOW_COMPLEX"; + case ATTR_BACKEND: return "ATTR_BACKEND"; + case ATTR_DEPRECATED: return "ATTR_DEPRECATED"; + case ATTR_IS_ASSIGNED: return "ATTR_IS_ASSIGNED"; + case ATTR_IS_USED: return "ATTR_IS_USED"; + + case NUM_ATTRS: return ""; + } + } + +} // zeek::detail diff --git a/src/script_opt/CPP/CPP-load.bif b/src/script_opt/CPP/CPP-load.bif new file mode 100644 index 0000000000..eafdcf232b --- /dev/null +++ b/src/script_opt/CPP/CPP-load.bif @@ -0,0 +1,40 @@ +##! Definitions of built-in functions related to loading compiled-to-C++ +##! scripts. + + +%%{ // C segment + +#include "zeek/Reporter.h" +#include "zeek/script_opt/ScriptOpt.h" +#include "zeek/script_opt/CPP/Func.h" + +%%} + +## Activates the compile-to-C++ scripts associated with the given hash. +## +## h: Hash of the set of C++ scripts. +## +## Returns: True if it was present and loaded, false if not. +## +function load_CPP%(h: count%): bool + %{ + auto cb = detail::standalone_callbacks.find(h); + + if ( cb == detail::standalone_callbacks.end() ) + { + reporter->Error("load of non-existing C++ code (%llu)", h); + return zeek::val_mgr->False(); + } + + // Ensure that any compiled scripts are used. If instead + // the AST is used, then when we activate the standalone + // scripts, they won't be able to avoid installing redundant + // event handlers. + detail::analysis_options.use_CPP = true; + + // Mark this script as one we should activate after loading + // compiled scripts. + detail::standalone_activations.push_back(cb->second); + + return zeek::val_mgr->True(); + %} diff --git a/src/script_opt/CPP/Compile.h b/src/script_opt/CPP/Compile.h new file mode 100644 index 0000000000..c4ed8d23e5 --- /dev/null +++ b/src/script_opt/CPP/Compile.h @@ -0,0 +1,1022 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#pragma once + +#include "zeek/Desc.h" +#include "zeek/script_opt/ScriptOpt.h" +#include "zeek/script_opt/CPP/Func.h" +#include "zeek/script_opt/CPP/Util.h" +#include "zeek/script_opt/CPP/Tracker.h" +#include "zeek/script_opt/CPP/HashMgr.h" + +// We structure the compiler for generating C++ versions of Zeek script +// bodies as a single large class. While we divide the compiler's +// functionality into a number of groups (see below), these interact with +// one another, and in particular with various member variables, enough +// so that it's not clear there's benefit to further splitting the +// functionality into multiple classes. (Some splitting has already been +// done for more self-contained functionality, resulting in the CPPTracker +// and CPPHashManager classes.) +// +// Most aspects of translating to C++ have a straightforward nature. +// We can turn many Zeek script statements directly into the C++ that's +// used by the interpreter for the corresponding Exec()/DoExec() methods. +// This often holds for Zeek expressions, too, though some of them require +// considerations (e.g., error handling) that require introducing helper +// functions to maintain the property that a Zeek script expression translates +// to a C++ expression. That property (i.e., not needing to turn Zeek +// expressions into multiple C++ statements) simplifies code generation +// considerably. It also means that the compiler should *not* run on +// transformed ASTs such as produced by the Reducer class. We instead +// seek to let the C++ compiler (meaning clang or g++, for example) +// find optimization opportunities, including inlining. +// +// For some Zeek scripting types, we use their natural C++ counterparts, +// such as "bro_uint_t" for "count" values. In the source code these +// are referred to as "native" types. Other types, like tables, keep +// their interpreter-equivalent type (e.g., TableVal). These are dealt +// with almost entirely using IntrusivePtr's. The few exceptions (i.e., +// direct uses of "new") are in contexts where the memory management +// is clearly already addressed. +// +// The user specifies generation of C++ using "-O gen-C++", which produces +// C++ code for all of the loaded functions/hooks/event handlers. Thus, +// for example, "zeek -b -O gen-C++ foo.zeek" will generate C++ code for +// all of the scripts loaded in "bare" mode, plus those for foo.zeek; and +// without the "-b" for all of the default scripts plus those in foo.zeek. +// +// One of the design goals employed is to support "incremental" compilation, +// i.e., compiling *additional* Zeek scripts at a later point after an +// initial compilation. This comes in two forms. +// +// "-O update-C++" produces C++ code that extends that already compiled, +// in a manner where subsequent compilations can leverage both the original +// and the newly added. Such compilations *must* be done in a consistent +// context (for example, any types extended in the original are extended in +// the same manner - plus then perhaps further extensions - in the updated +// code). +// +// "-O add-C++" instead produces C++ code that (1) will not be leveraged in +// any subsequent compilations, and (2) can be inconsistent with other +// "-O add-C++" code added in the future. The main use of this feature is +// to support compiling polyglot versions of Zeek scripts used to run +// the test suite. +// +// Zeek invocations specifying "-O use-C++" will activate any code compiled +// into the zeek binary; otherwise, the code lies dormant. "-O force-C++" +// does the same but generates warnings for script functions not found in +// compiled in. This is useful for debugging the compiled code, to ensure +// that it's indeed being run. +// +// "-O report-C++" reports on which compiled functions will/won't be used +// (including ones that are available but not relevant to the scripts loaded +// on the command line). +// +// We partition the methods of the compiler into a number of groups, +// the definitions of each having their own source file: +// +// Driver Drives the overall compilation process. +// +// Vars Management of C++ variables relating to local/global +// script variables. +// +// DeclFunc Generating declarations of C++ subclasses and +// functions. +// +// GenFunc Generating the bodies of script functions. +// +// Consts Dealing with Zeek script constants. Depending +// on their type, these are represented either +// directly in C++, or using C++ variables that +// are constructed at run-time. +// +// Stmts Generating code for Zeek statements. +// +// Exprs Generating code for Zeek expressions. +// +// Types Management of (1) C++ types used in generated code, +// and (2) C++ variables that hold Zeek script types, +// generated at run-time. +// +// Attrs Management of Zeek type attributes, some of which +// must be generated at run-time. +// +// Inits Management of initializing the run-time +// variables needed by the compiled code. +// +// Emit Low-level code generation. +// +// Of these, Inits is probably the most subtle. It turns out to be +// very tricky ensuring that we create run-time variables in the +// proper order. For example, a global might need a record type to be +// defined; one of the record's fields is a table; that table contains +// another record; one of that other record's fields is the original +// record (recursion); another field has an &default expression that +// requires the compiler to generate a helper function to construct +// the expression dynamically; and that helper function might in turn +// refer to other types that require initialization. +// +// To deal with these dependencies, for every run-time object the compiler +// maintains (1) all of the other run-time objects on which its initialization +// depends, and (2) the C++ statements needed to initialize it, once those +// other objects have been initialized. It then beings initialization with +// objects that have no dependencies, marks those as done (essentially), finds +// objects that now can be initialized and emits their initializations, +// marks those as done, etc. +// +// Below in declaring the CPPCompiler class, we group methods in accordance +// with those listed above. We also locate member variables with the group +// most relevant for their usage. However, keep in mind that many member +// variables are used by multiple groups, which is why we haven't created +// distinct per-group classes. + + +namespace zeek::detail { + +class CPPCompile { +public: + CPPCompile(std::vector& _funcs, ProfileFuncs& pfs, + const char* gen_name, CPPHashManager& hm, + bool update, bool standalone); + ~CPPCompile(); + +private: + // Start of methods related to driving the overall compilation + // process. + // See Driver.cc for definitions. + // + + // Main driver, invoked by constructor. + void Compile(); + + // Generate the beginning of the compiled code: run-time functions, + // namespace, auxiliary globals. + void GenProlog(); + + // Given the name of a function body that's been compiled, generate + // code to register it at run-time, and track its associated hash + // so subsequent compilations can reuse it. + void RegisterCompiledBody(const std::string& f); + + // After compilation, generate the final code. Most of this is + // run-time initialization of various dynamic values. + void GenEpilog(); + + // True if the given function (plus body and profile) is one + // that should be compiled. + bool IsCompilable(const FuncInfo& func); + + // The set of functions/bodies we're compiling. + std::vector& funcs; + + // The global profile of all of the functions. + ProfileFuncs& pfs; + + // Hash-indexed information about previously compiled code (and used + // to update it from this compilation run). + CPPHashManager& hm; + + // Script functions that we are able to compile. We compute + // these ahead of time so that when compiling script function A + // which makes a call to script function B, we know whether + // B will indeed be compiled, or if it'll be interpreted due to + // it including some functionality we don't currently support + // for compilation. + // + // Indexed by the name of the function. + std::unordered_set compilable_funcs; + + // Maps functions (not hooks or events) to upstream compiled names. + std::unordered_map hashed_funcs; + + // If non-zero, provides a tag used for auxiliary/additional + // compilation units. + int addl_tag = 0; + + // If true, then we're updating the C++ base (i.e., generating + // code meant for use by subsequently generated code). + bool update = false; + + // If true, the generated code should run "standalone". + bool standalone = false; + + // Hash over the functions in this compilation. This is only + // needed for "seatbelts", to ensure that we can produce a + // unique hash relating to this compilation (*and* its + // compilation time, which is why these are "seatbelts" and + // likely not important to make distinct. + p_hash_type total_hash = 0; + + // Working directory in which we're compiling. Used to quasi-locate + // error messages when doing test-suite "add-C++" crunches. + std::string working_dir; + + // + // End of methods related to script/C++ variables. + + + // Start of methods related to script variables and their C++ + // counterparts. + // See Vars.cc for definitions. + // + + // Returns true if the current compilation context has collisions + // with previously generated code (globals with conflicting types + // or initialization values, or types with differing elements). + bool CheckForCollisions(); + + // Generate declarations associated with the given global, and, if + // it's used as a variable (not just as a function being called), + // track it as such. + void CreateGlobal(const ID* g); + + // For the globals used in the compilation, if new then append + // them to the hash file to make the information available + // to subsequent compilation runs. + void UpdateGlobalHashes(); + + // Register the given identifier as a BiF. If is_var is true + // then the BiF is also used in a non-call context. + void AddBiF(const ID* b, bool is_var); + + // Register the given global name. "suffix" distinguishs particular + // types of globals, such as the names of bifs, global (non-function) + // variables, or compiled Zeek functions. If "track" is true then + // if we're compiling incrementally, and this is a new global not + // previously compiled, then we track its hash for future compilations. + bool AddGlobal(const std::string& g, const char* suffix, bool track); + + // Tracks that the body we're currently compiling refers to the + // given event. + void RegisterEvent(std::string ev_name); + + // The following match various forms of identifiers to the + // name used for their C++ equivalent. + const char* IDName(const ID& id) { return IDName(&id); } + const char* IDName(const IDPtr& id) { return IDName(id.get()); } + const char* IDName(const ID* id) { return IDNameStr(id).c_str(); } + const std::string& IDNameStr(const ID* id) const; + + // Returns a canonicalized version of a variant of a global made + // distinct by the given suffix. + std::string GlobalName(const std::string& g, const char* suffix) + { + return Canonicalize(g.c_str()) + "_" + suffix; + } + + // Returns a canonicalized form of a local identifier's name, + // expanding its module prefix if needed. + std::string LocalName(const ID* l) const; + std::string LocalName(const IDPtr& l) const + { return LocalName(l.get()); } + + // Returns a canonicalized name, with various non-alphanumeric + // characters stripped or transformed, and guananteed not to + // conflict with C++ keywords. + std::string Canonicalize(const char* name) const; + + // Maps global names (not identifiers) to the names we use for them. + std::unordered_map globals; + + // Similar for locals, for the function currently being compiled. + std::unordered_map locals; + + // Maps event names to the names we use for them. + std::unordered_map events; + + // Globals that correspond to variables, not functions. + std::unordered_set global_vars; + + // + // End of methods related to script/C++ variables. + + + // Start of methods related to declaring compiled script functions, + // including related classes. + // See DeclFunc.cc for definitions. + // + + // Generates declarations (class, forward reference to C++ function) + // for the given script function. + void DeclareFunc(const FuncInfo& func); + + // Similar, but for lambdas. + void DeclareLambda(const LambdaExpr* l, const ProfileFunc* pf); + + // Declares the CPPStmt subclass used for compiling the given + // function. "ft" gives the functions type, "pf" its profile, + // "fname" its C++ name, "body" its AST, "l" if non-nil its + // corresponding lambda expression, and "flavor" whether it's + // a hook/event/function. + void DeclareSubclass(const FuncTypePtr& ft, const ProfileFunc* pf, + const std::string& fname, + const StmtPtr& body, int priority, + const LambdaExpr* l, FunctionFlavor flavor); + + // Generates the declarations (and in-line definitions) associated + // with compiling a lambda. + void BuildLambda(const FuncTypePtr& ft, const ProfileFunc* pf, + const std::string& fname, const StmtPtr& body, + const LambdaExpr* l, const IDPList* lambda_ids); + + // For a call to the C++ version of a function of type "ft" and + // with lambda captures lambda_ids (nil if not applicable), generates + // code that binds the Interpreter arguments (i.e., Frame offsets) + // to C++ function arguments, as well as passing in the captures. + std::string BindArgs(const FuncTypePtr& ft, const IDPList* lambda_ids); + + // Generates the declaration for the parameters for a function with + // the given type, lambda captures (if non-nil), and profile. + std::string ParamDecl(const FuncTypePtr& ft, const IDPList* lambda_ids, + const ProfileFunc* pf); + + // Inspects the given profile to find the i'th parameter (starting + // at 0). Returns nil if the profile indicates that that parameter + // is not used by the function. + const ID* FindParam(int i, const ProfileFunc* pf); + + // Names for lambda capture ID's. These require a separate space + // that incorporates the lambda's name, to deal with nested lambda's + // that refer to the identifiers with the same name. + std::unordered_map lambda_names; + + // The function's parameters. Tracked so we don't re-declare them. + std::unordered_set params; + + // Whether we're parsing a hook. + bool in_hook = false; + + // + // End of methods related to declaring compiled script functions. + + + // Start of methods related to generating the bodies of compiled + // script functions. Note that some of this sort of functionality is + // instead in CPPDeclFunc.cc, due to the presence of inlined methods. + // See GenFunc.cc for definitions. + // + + // Driver functions for compiling the body of the given function + // or lambda. + void CompileFunc(const FuncInfo& func); + void CompileLambda(const LambdaExpr* l, const ProfileFunc* pf); + + // Generates the body of the Invoke() method (which supplies the + // "glue" between for calling the C++-generated code). + void GenInvokeBody(const std::string& fname, const TypePtr& t, + const std::string& args); + + // Generates the code for the body of a script function with + // the given type, profile, C++ name, AST, lambda captures + // (if non-nil), and hook/event/function "flavor". + void DefineBody(const FuncTypePtr& ft, const ProfileFunc* pf, + const std::string& fname, const StmtPtr& body, + const IDPList* lambda_ids, FunctionFlavor flavor); + + // Declare parameters that originate from a type signature of + // "any" but were concretized in this declaration. + void TranslateAnyParams(const FuncTypePtr& ft, const ProfileFunc* pf); + + // Generates code to dynamically initialize any events referred to + // in the function. + void InitializeEvents(const ProfileFunc* pf); + + // Declare local variables (which are non-globals that aren't + // parameters or lambda captures). + void DeclareLocals(const ProfileFunc* func, const IDPList* lambda_ids); + + // Returns the C++ name to use for a given function body. + std::string BodyName(const FuncInfo& func); + + // Generate the arguments to be used when calling a C++-generated + // function. + std::string GenArgs(const RecordTypePtr& params, const Expr* e); + + // Functions that we've declared/compiled. + std::unordered_set compiled_funcs; + + // Maps those to their associated files - used to make add-C++ body + // hashes distinct. + std::unordered_map cf_locs; + + // Maps function bodies to the names we use for them. + std::unordered_map body_names; + + // Reverse mapping. + std::unordered_map names_to_bodies; + + // Maps function names to hashes of bodies. + std::unordered_map body_hashes; + + // Maps function names to priorities, for hooks & event handlers. + std::unordered_map body_priorities; + + // Maps function names to events relevant to them. + std::unordered_map> body_events; + + // Return type of the function we're currently compiling. + TypePtr ret_type = nullptr; + + // Internal name of the function we're currently compiling. + std::string body_name; + + // + // End of methods related to generating compiled script bodies. + + + // Start of methods related to generating code for representing + // script constants as run-time values. + // See Consts.cc for definitions. + // + + // Returns an instantiation of a constant - either as a native + // C++ constant, or as a C++ variable that will be bound to + // a Zeek value at run-time initialization - that is needed + // by the given "parent" object (which acquires an initialization + // dependency, if a C++ variable is needed). + std::string BuildConstant(IntrusivePtr parent, const ValPtr& vp) + { return BuildConstant(parent.get(), vp); } + std::string BuildConstant(const Obj* parent, const ValPtr& vp); + + // Called to create a constant appropriate for the given expression + // or, more directly, the given value. The second method returns + // "true" if a C++ variable needed to be created to construct the + // constant at run-time initialization, false if can be instantiated + // directly as a C++ constant. + void AddConstant(const ConstExpr* c); + bool AddConstant(const ValPtr& v); + + // Build particular types of C++ variables (with the given name) + // to hold constants initialized at run-time. + void AddStringConstant(const ValPtr& v, std::string& const_name); + void AddPatternConstant(const ValPtr& v, std::string& const_name); + void AddListConstant(const ValPtr& v, std::string& const_name); + void AddRecordConstant(const ValPtr& v, std::string& const_name); + void AddTableConstant(const ValPtr& v, std::string& const_name); + void AddVectorConstant(const ValPtr& v, std::string& const_name); + + // Maps (non-native) constants to associated C++ globals. + std::unordered_map const_exprs; + + // Maps the values of (non-native) constants to associated C++ globals. + std::unordered_map const_vals; + + // Used for memory management associated with const_vals's index. + std::vector cv_indices; + + // Maps string representations of (non-native) constants to + // associated C++ globals. + std::unordered_map constants; + + // Maps the same representations to the Val* associated with their + // original creation. This enables us to construct initialization + // dependencies for later Val*'s that are able to reuse the same + // constant. + std::unordered_map constants_to_vals; + + // Function variables that we need to create dynamically for + // initializing globals, coupled with the name of their associated + // constant. + std::unordered_map func_vars; + + // + // End of methods related to generating code for script constants. + + + // Start of methods related to generating code for AST Stmt's. + // For the most part, code generation is straightforward as + // it matches the Exec/DoExec methods of the corresponding + // Stmt subclasses. + // See Stmts.cc for definitions. + // + + void GenStmt(const StmtPtr& s) { GenStmt(s.get()); } + void GenStmt(const Stmt* s); + void GenInitStmt(const InitStmt* init); + void GenIfStmt(const IfStmt* i); + void GenWhileStmt(const WhileStmt* w); + void GenReturnStmt(const ReturnStmt* r); + void GenAddStmt(const ExprStmt* es); + void GenDeleteStmt(const ExprStmt* es); + void GenEventStmt(const EventStmt* ev); + void GenSwitchStmt(const SwitchStmt* sw); + + void GenForStmt(const ForStmt* f); + void GenForOverTable(const ExprPtr& tbl, const IDPtr& value_var, + const IDPList* loop_vars); + void GenForOverVector(const ExprPtr& tbl, const IDPList* loop_vars); + void GenForOverString(const ExprPtr& str, const IDPList* loop_vars); + + // Nested level of loops/switches for which "break"'s should be + // C++ breaks rather than a "hook" break. + int break_level = 0; + + // + // End of methods related to generating code for AST Stmt's. + + + // Start of methods related to generating code for AST Expr's. + // See Exprs.cc for definitions. + // + + // These methods are all oriented around returning strings + // of C++ code; they do not directly emit the code, since often + // the caller will be embedding the result in some surrounding + // context. No effort is made to reduce string copying; this + // isn't worth the hassle, as it takes just a few seconds for + // the compiler to generate 100K+ LOC that clang will then need + // 10s of seconds to compile, so speeding up the compiler has + // little practical advantage. + + // The following enum's represent whether, for expressions yielding + // native values, the end goal is to have the value in (1) native + // form, (2) instead in ValPtr form, or (3) whichever is more + // convenient to generate (sometimes used when the caller knows + // that the value is non-native). + enum GenType { + GEN_NATIVE, + GEN_VAL_PTR, + GEN_DONT_CARE, + }; + + // Generate an expression for which we want the result embedded + // in {} initializers (generally to be used in calling a function + // where we want those values to be translated to a vector). + std::string GenExprs(const Expr* e); + + // Generate the value(s) associated with a ListExpr. If true, + // the "nested" parameter indicates that this list is embedded + // within an outer list, in which case it's expanded to include + // {}'s. It's false if the ListExpr is at the top level, such + // as when expanding the arguments in a CallExpr. + std::string GenListExpr(const Expr* e, GenType gt, bool nested); + + // Per-Expr-subclass code generation. The resulting code generally + // reflects the corresponding Eval() or Fold() methods. + std::string GenExpr(const ExprPtr& e, GenType gt, bool top_level = false) + { return GenExpr(e.get(), gt, top_level); } + std::string GenExpr(const Expr* e, GenType gt, bool top_level = false); + + std::string GenNameExpr(const NameExpr* ne, GenType gt); + std::string GenConstExpr(const ConstExpr* c, GenType gt); + std::string GenIncrExpr(const Expr* e, GenType gt, bool is_incr, bool top_level); + std::string GenCondExpr(const Expr* e, GenType gt); + std::string GenCallExpr(const CallExpr* c, GenType gt); + std::string GenInExpr(const Expr* e, GenType gt); + std::string GenFieldExpr(const FieldExpr* fe, GenType gt); + std::string GenHasFieldExpr(const HasFieldExpr* hfe, GenType gt); + std::string GenIndexExpr(const Expr* e, GenType gt); + std::string GenAssignExpr(const Expr* e, GenType gt, bool top_level); + std::string GenAddToExpr(const Expr* e, GenType gt, bool top_level); + std::string GenSizeExpr(const Expr* e, GenType gt); + std::string GenScheduleExpr(const Expr* e); + std::string GenLambdaExpr(const Expr* e); + std::string GenIsExpr(const Expr* e, GenType gt); + + std::string GenArithCoerceExpr(const Expr* e, GenType gt); + std::string GenRecordCoerceExpr(const Expr* e); + std::string GenTableCoerceExpr(const Expr* e); + std::string GenVectorCoerceExpr(const Expr* e); + + std::string GenRecordConstructorExpr(const Expr* e); + std::string GenSetConstructorExpr(const Expr* e); + std::string GenTableConstructorExpr(const Expr* e); + std::string GenVectorConstructorExpr(const Expr* e); + + // Generate code for constants that can be expressed directly + // as C++ constants. + std::string GenVal(const ValPtr& v); + + // Helper functions for particular Expr subclasses / flavors. + std::string GenUnary(const Expr* e, GenType gt, + const char* op, const char* vec_op = nullptr); + std::string GenBinary(const Expr* e, GenType gt, + const char* op, const char* vec_op = nullptr); + std::string GenBinarySet(const Expr* e, GenType gt, const char* op); + std::string GenBinaryString(const Expr* e, GenType gt, const char* op); + std::string GenBinaryPattern(const Expr* e, GenType gt, const char* op); + std::string GenBinaryAddr(const Expr* e, GenType gt, const char* op); + std::string GenBinarySubNet(const Expr* e, GenType gt, const char* op); + std::string GenEQ(const Expr* e, GenType gt, + const char* op, const char* vec_op); + + std::string GenAssign(const ExprPtr& lhs, const ExprPtr& rhs, + const std::string& rhs_native, + const std::string& rhs_val_ptr, + GenType gt, bool top_level); + std::string GenDirectAssign(const ExprPtr& lhs, + const std::string& rhs_native, + const std::string& rhs_val_ptr, + GenType gt, bool top_level); + std::string GenIndexAssign(const ExprPtr& lhs, const ExprPtr& rhs, + const std::string& rhs_val_ptr, + GenType gt, bool top_level); + std::string GenFieldAssign(const ExprPtr& lhs, const ExprPtr& rhs, + const std::string& rhs_val_ptr, + GenType gt, bool top_level); + std::string GenListAssign(const ExprPtr& lhs, const ExprPtr& rhs); + + // Support for element-by-element vector operations. + std::string GenVectorOp(const Expr* e, std::string op, + const char* vec_op); + std::string GenVectorOp(const Expr* e, std::string op1, + std::string op2, const char* vec_op); + + // If "all_deep" is true, it means make all of the captures + // deep copies, not just the ones that were explicitly marked + // as deep copies. That functionality is used to supporting + // Clone() methods; it's not needed when creating a new lambda + // instance. + std::string GenLambdaClone(const LambdaExpr* l, bool all_deep); + + // Returns an initializer list for a vector of integers. + std::string GenIntVector(const std::vector& vec); + + // The following are used to generate accesses to elements of + // extensible types. They first check whether the type has + // been extended (for records, beyond the field of interest); + // if not, then the access is done directly. If the access + // is however to an extended element, then they indirect the + // access through a map that is generated dynamically when + // the compiled code. Doing so allows the compiled code to + // work in contexts where other extensions occur that would + // otherwise conflict with hardwired offsets/values. + std::string GenField(const ExprPtr& rec, int field); + std::string GenEnum(const TypePtr& et, const ValPtr& ev); + + // For record that are extended via redef's, maps fields + // beyond the original definition to locations in the + // global (in the compiled code) "field_mapping" array. + // + // So for each such record, there's a second map of + // field-in-the-record to offset-in-field_mapping. + std::unordered_map> + record_field_mappings; + + // Total number of such mappings (i.e., entries in the inner maps, + // not the outer map). + int num_rf_mappings = 0; + + // For each entry in "field_mapping", the record and TypeDecl + // associated with the mapping. + std::vector> field_decls; + + // For enums that are extended via redef's, maps each distinct + // value (that the compiled scripts refer to) to locations in the + // global (in the compiled code) "enum_mapping" array. + // + // So for each such enum, there's a second map of + // value-during-compilation to offset-in-enum_mapping. + std::unordered_map> + enum_val_mappings; + + // Total number of such mappings (i.e., entries in the inner maps, + // not the outer map). + int num_ev_mappings = 0; + + // For each entry in "enum_mapping", the record and name + // associated with the mapping. + std::vector> enum_names; + + // + // End of methods related to generating code for AST Expr's. + + + // Start of methods related to managing script types. + // See Types.cc for definitions. + // + + // "Native" types are those Zeek scripting types that we support + // using low-level C++ types (like "bro_uint_t" for "count"). + // Types that we instead support using some form of ValPtr + // representation are "non-native". + bool IsNativeType(const TypePtr& t) const; + + // Given an expression corresponding to a native type (and with + // the given script type 't'), converts it to the given GenType. + std::string NativeToGT(const std::string& expr, const TypePtr& t, + GenType gt); + + // Given an expression with a C++ type of generic "ValPtr", of the + // given script type 't', converts it as needed to the given GenType. + std::string GenericValPtrToGT(const std::string& expr, const TypePtr& t, + GenType gt); + + // For a given type, generates the code necessary to initialize + // it at run time. The term "expand" in the method's name refers + // to the fact that the type has already been previously declared + // (necessary to facilitate defining recursive types), so this method + // generates the "meat" of the type but not its original declaration. + void ExpandTypeVar(const TypePtr& t); + + // Methods for expanding specific such types. "tn" is the name + // of the C++ variable used for the particular type. + void ExpandListTypeVar(const TypePtr& t, std::string& tn); + void ExpandRecordTypeVar(const TypePtr& t, std::string& tn); + void ExpandEnumTypeVar(const TypePtr& t, std::string& tn); + void ExpandTableTypeVar(const TypePtr& t, std::string& tn); + void ExpandFuncTypeVar(const TypePtr& t, std::string& tn); + + // The following assumes we're populating a type_decl_list called "tl". + std::string GenTypeDecl(const TypeDecl* td); + + // Returns the name of a C++ variable that will hold a TypePtr + // of the appropriate flavor. 't' does not need to be a type + // representative. + std::string GenTypeName(const Type* t); + std::string GenTypeName(const TypePtr& t) + { return GenTypeName(t.get()); } + + // Returns the "representative" for a given type, used to ensure + // that we re-use the C++ variable corresponding to a type and + // don't instantiate redundant instances. + const Type* TypeRep(const Type* t) { return pfs.TypeRep(t); } + const Type* TypeRep(const TypePtr& t) { return TypeRep(t.get()); } + + // Low-level C++ representations for types, of various flavors. + const char* TypeTagName(TypeTag tag) const; + const char* TypeName(const TypePtr& t); + const char* FullTypeName(const TypePtr& t); + const char* TypeType(const TypePtr& t); + + // Track the given type (with support methods for onces that + // are complicated), recursively including its sub-types, and + // creating initializations (and dependencies) for constructing + // C++ variables representing the types. + void RegisterType(const TypePtr& t); + void RegisterListType(const TypePtr& t); + void RegisterTableType(const TypePtr& t); + void RegisterRecordType(const TypePtr& t); + void RegisterFuncType(const TypePtr& t); + + // Access to a type's underlying values. + const char* NativeAccessor(const TypePtr& t); + + // The name for a type that should be used in declaring + // an IntrusivePtr to such a type. + const char* IntrusiveVal(const TypePtr& t); + + // Maps types to indices in the global "types__CPP" array. + CPPTracker types = {"types", &compiled_items}; + + // Used to prevent analysis of mutually-referring types from + // leading to infinite recursion. + std::unordered_set processed_types; + + // + // End of methods related to managing script types. + + + // Start of methods related to managing script type attributes. + // Attributes arise mainly in the context of constructing types. + // See Attrs.cc for definitions. + // + + // Tracks a use of the given set of attributes, including + // initialization dependencies and the generation of any + // associated expressions. + void RegisterAttributes(const AttributesPtr& attrs); + + // Populates the 2nd and 3rd arguments with C++ representations + // of the tags and (optional) values/expressions associated with + // the set of attributes. + void BuildAttrs(const AttributesPtr& attrs, std::string& attr_tags, + std::string& attr_vals); + + // Generates code to create the given attributes at run-time. + void GenAttrs(const AttributesPtr& attrs); + std::string GenAttrExpr(const ExprPtr& e); + + // Returns the name of the C++ variable that will hold the given + // attributes at run-time. + std::string AttrsName(const AttributesPtr& attrs); + + // Returns a string representation of the name associated with + // different attributes (e.g., "ATTR_DEFAULT"). + const char* AttrName(const AttrPtr& attr); + + // Similar for attributes, so we can reconstruct record types. + CPPTracker attributes = {"attrs", &compiled_items}; + + // + // End of methods related to managing script type attributes. + + + // Start of methods related to run-time initialization. + // See Inits.cc for definitions. + // + + // Generates code to construct a CallExpr that can be used to + // evaluate the expression 'e' as an initializer (typically + // for a record &default attribute). + void GenInitExpr(const ExprPtr& e); + + // True if the given expression is simple enough that we can + // generate code to evaluate it directly, and don't need to + // create a separate function per GenInitExpr(). + bool IsSimpleInitExpr(const ExprPtr& e) const; + + // Returns the name of a function used to evaluate an + // initialization expression. + std::string InitExprName(const ExprPtr& e); + + // Generates code to initializes the global 'g' (with C++ name "gl") + // to the given value *if* on start-up it doesn't already have a value. + void GenGlobalInit(const ID* g, std::string& gl, const ValPtr& v); + + // Generates code to initialize all of the function-valued globals + // (i.e., those pointing to lambdas). + void GenFuncVarInits(); + + // Generates the "pre-initialization" for a given type. For + // extensible types (records, enums, lists), these are empty + // versions that we'll later populate. + void GenPreInit(const Type* t); + + // Generates a function that executes the pre-initializations. + void GenPreInits(); + + // The following all track that for a given object, code associated + // with initializing it. Multiple calls for the same object append + // additional lines of code (the order of the calls is preserved). + // + // Versions with "lhs" and "rhs" arguments provide an initialization + // of the form "lhs = rhs;", as a convenience. + void AddInit(const IntrusivePtr& o, + const std::string& lhs, const std::string& rhs) + { AddInit(o.get(), lhs + " = " + rhs + ";"); } + void AddInit(const Obj* o, + const std::string& lhs, const std::string& rhs) + { AddInit(o, lhs + " = " + rhs + ";"); } + void AddInit(const IntrusivePtr& o, const std::string& init) + { AddInit(o.get(), init); } + void AddInit(const Obj* o, const std::string& init); + + // We do consistency checking of initialization dependencies by + // looking for depended-on objects have initializations. Sometimes + // it's unclear whether the object will actually require + // initialization, in which case we add an empty initialization + // for it so that the consistency-checking is happy. + void AddInit(const IntrusivePtr& o) { AddInit(o.get()); } + void AddInit(const Obj* o); + + // Records the fact that the initialization of object o1 depends + // on that of object o2. + void NoteInitDependency(const IntrusivePtr& o1, + const IntrusivePtr& o2) + { NoteInitDependency(o1.get(), o2.get()); } + void NoteInitDependency(const IntrusivePtr& o1, const Obj* o2) + { NoteInitDependency(o1.get(), o2); } + void NoteInitDependency(const Obj* o1, const IntrusivePtr& o2) + { NoteInitDependency(o1, o2.get()); } + void NoteInitDependency(const Obj* o1, const Obj* o2); + + // Records an initialization dependency of the given object + // on the given type, unless the type is a record. We need + // this notion to protect against circular dependencies in + // the face of recursive records. + void NoteNonRecordInitDependency(const Obj* o, const TypePtr& t) + { + if ( t && t->Tag() != TYPE_RECORD ) + NoteInitDependency(o, TypeRep(t)); + } + void NoteNonRecordInitDependency(const IntrusivePtr o, const TypePtr& t) + { NoteNonRecordInitDependency(o.get(), t); } + + // Analyzes the initialization dependencies to ensure that they're + // consistent, i.e., every object that either depends on another, + // or is itself depended on, appears in the "to_do" set. + void CheckInitConsistency(std::unordered_set& to_do); + + // Generate initializations for the items in the "to_do" set, + // in accordance with their dependencies. Returns 'n', the + // number of initialization functions generated. They should + // be called in order, from 1 to n. + int GenDependentInits(std::unordered_set& to_do); + + // Generates a function for initializing the nc'th cohort. + void GenInitCohort(int nc, std::unordered_set& cohort); + + // Initialize the mappings for record field offsets for field + // accesses into regions of records that can be extensible (and + // thus can vary at run-time to the offsets encountered during + // compilation). + void InitializeFieldMappings(); + + // Same, but for enum types. The second form does a single + // initialization corresponding to the given index in the mapping. + void InitializeEnumMappings(); + void InitializeEnumMappings(const EnumType* et, + const std::string& e_name, int index); + + // Generate the initialization hook for this set of compiled code. + void GenInitHook(); + + // Generates code to activate standalone code. + void GenStandaloneActivation(); + + // Generates code to register the initialization for standalone + // use, and prints to stdout a Zeek script that can load all of + // what we compiled. + void GenLoad(); + + // A list of pre-initializations (those potentially required by + // other initializations, and that themselves have no dependencies). + std::vector pre_inits; + + // Expressions for which we need to generate initialization-time + // code. Currently, these are only expressions appearing in + // attributes. + CPPTracker init_exprs = {"gen_init_expr", &compiled_items}; + + // Maps an object requiring initialization to its initializers. + std::unordered_map> obj_inits; + + // Maps an object requiring initializations to its dependencies + // on other such objects. + std::unordered_map> obj_deps; + + // + // End of methods related to run-time initialization. + + + // Start of methods related to low-level code generation. + // See Emit.cc for definitions. + // + + // Used to create (indented) C++ {...} code blocks. "needs_semi" + // controls whether to terminate the block with a ';' (such as + // for class definitions. + void StartBlock(); + void EndBlock(bool needs_semi = false); + + // Various ways of generating code. The multi-argument methods + // assume that the first argument is a printf-style format + // (but one that can only have %s specifiers). + void Emit(const std::string& str) const + { + Indent(); + fprintf(write_file, "%s", str.c_str()); + NL(); + } + + void Emit(const std::string& fmt, const std::string& arg) const + { + Indent(); + fprintf(write_file, fmt.c_str(), arg.c_str()); + NL(); + } + + void Emit(const std::string& fmt, const std::string& arg1, + const std::string& arg2) const + { + Indent(); + fprintf(write_file, fmt.c_str(), arg1.c_str(), arg2.c_str()); + NL(); + } + + void Emit(const std::string& fmt, const std::string& arg1, + const std::string& arg2, const std::string& arg3) const + { + Indent(); + fprintf(write_file, fmt.c_str(), arg1.c_str(), arg2.c_str(), + arg3.c_str()); + NL(); + } + + void Emit(const std::string& fmt, const std::string& arg1, + const std::string& arg2, const std::string& arg3, + const std::string& arg4) const + { + Indent(); + fprintf(write_file, fmt.c_str(), arg1.c_str(), arg2.c_str(), + arg3.c_str(), arg4.c_str()); + NL(); + } + + // Returns an expression for constructing a Zeek String object + // corresponding to the given byte array. + std::string GenString(const char* b, int len) const; + + // For the given byte array / string, returns a version expanded + // with escape sequences in order to represent it as a C++ string. + std::string CPPEscape(const char* b, int len) const; + std::string CPPEscape(const char* s) const + { return CPPEscape(s, strlen(s)); } + + void NL() const { fputc('\n', write_file); } + + // Indents to the current indentation level. + void Indent() const; + + // File to which we're generating code. + FILE* write_file; + + // Indentation level. + int block_level = 0; + + // + // End of methods related to run-time initialization. +}; + +} // zeek::detail diff --git a/src/script_opt/CPP/Consts.cc b/src/script_opt/CPP/Consts.cc new file mode 100644 index 0000000000..27b41eee9f --- /dev/null +++ b/src/script_opt/CPP/Consts.cc @@ -0,0 +1,292 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include "zeek/File.h" +#include "zeek/RE.h" +#include "zeek/script_opt/CPP/Compile.h" + + +namespace zeek::detail { + +std::string CPPCompile::BuildConstant(const Obj* parent, const ValPtr& vp) + { + if ( ! vp ) + return "nullptr"; + + if ( AddConstant(vp) ) + { + auto v = vp.get(); + AddInit(parent); + NoteInitDependency(parent, v); + + // Make sure the value pointer, which might be transient + // in construction, sticks around so we can track its + // value. + cv_indices.push_back(vp); + + return const_vals[v]; + } + else + return NativeToGT(GenVal(vp), vp->GetType(), GEN_VAL_PTR); + } + +void CPPCompile::AddConstant(const ConstExpr* c) + { + auto v = c->ValuePtr(); + + if ( AddConstant(v) ) + { + AddInit(c); + NoteInitDependency(c, v.get()); + } + } + +bool CPPCompile::AddConstant(const ValPtr& vp) + { + auto v = vp.get(); + + if ( IsNativeType(v->GetType()) ) + // These we instantiate directly. + return false; + + if ( const_vals.count(v) > 0 ) + // Already did this one. + return true; + + // Formulate a key that's unique per distinct constant. + + const auto& t = v->GetType(); + std::string c_desc; + + if ( t->Tag() == TYPE_STRING ) + { + // We can't rely on these to render with consistent + // escaping, sigh. Just use the raw string. + auto s = v->AsString(); + auto b = (const char*)(s->Bytes()); + c_desc = std::string(b, s->Len()) + "string"; + } + else + { + ODesc d; + v->Describe(&d); + + // Don't confuse constants of different types that happen to + // render the same. + t->Describe(&d); + + c_desc = d.Description(); + } + + if ( constants.count(c_desc) > 0 ) + { + const_vals[v] = constants[c_desc]; + + auto orig_v = constants_to_vals[c_desc]; + ASSERT(v != orig_v); + AddInit(v); + NoteInitDependency(v, orig_v); + + return true; + } + + // Need a C++ global for this constant. + auto const_name = std::string("CPP__const__") + + Fmt(int(constants.size())); + + const_vals[v] = constants[c_desc] = const_name; + constants_to_vals[c_desc] = v; + + auto tag = t->Tag(); + + switch ( tag ) { + case TYPE_STRING: + AddStringConstant(vp, const_name); + break; + + case TYPE_PATTERN: + AddPatternConstant(vp, const_name); + break; + + case TYPE_LIST: + AddListConstant(vp, const_name); + break; + + case TYPE_RECORD: + AddRecordConstant(vp, const_name); + break; + + case TYPE_TABLE: + AddTableConstant(vp, const_name); + break; + + case TYPE_VECTOR: + AddVectorConstant(vp, const_name); + break; + + case TYPE_ADDR: + case TYPE_SUBNET: + { + auto prefix = (tag == TYPE_ADDR) ? "Addr" : "SubNet"; + + Emit("%sValPtr %s;", prefix, const_name); + + ODesc d; + v->Describe(&d); + + AddInit(v, const_name, + std::string("make_intrusive<") + prefix + + "Val>(\"" + d.Description() + "\")"); + } + break; + + case TYPE_FUNC: + Emit("FuncValPtr %s;", const_name); + + // We can't generate the initialization now because it + // depends on first having compiled the associated body, + // so we know its hash. So for now we just note it + // to deal with later. + func_vars[v->AsFuncVal()] = const_name; + break; + + case TYPE_FILE: + { + Emit("FileValPtr %s;", const_name); + + auto f = cast_intrusive(vp)->Get(); + + AddInit(v, const_name, + std::string("make_intrusive(") + + "make_intrusive(\"" + f->Name() + "\", \"w\"))"); + } + break; + + default: + reporter->InternalError("bad constant type in CPPCompile::AddConstant"); + } + + return true; + } + +void CPPCompile::AddStringConstant(const ValPtr& v, std::string& const_name) + { + Emit("StringValPtr %s;", const_name); + + auto s = v->AsString(); + const char* b = (const char*)(s->Bytes()); + auto len = s->Len(); + + AddInit(v, const_name, GenString(b, len)); + } + +void CPPCompile::AddPatternConstant(const ValPtr& v, std::string& const_name) + { + Emit("PatternValPtr %s;", const_name); + + auto re = v->AsPatternVal()->Get(); + + AddInit(v, std::string("{ auto re = new RE_Matcher(") + + CPPEscape(re->OrigText()) + ");"); + + if ( re->IsCaseInsensitive() ) + AddInit(v, "re->MakeCaseInsensitive();"); + + AddInit(v, "re->Compile();"); + AddInit(v, const_name, "make_intrusive(re)"); + AddInit(v, "}"); + } + +void CPPCompile::AddListConstant(const ValPtr& v, std::string& const_name) + { + Emit("ListValPtr %s;", const_name); + + // No initialization dependency on the main type since we don't + // use the underlying TypeList. However, we *do* use the types of + // the elements. + + AddInit(v, const_name, std::string("make_intrusive(TYPE_ANY)")); + + auto lv = cast_intrusive(v); + auto n = lv->Length(); + + for ( auto i = 0; i < n; ++i ) + { + const auto& l_i = lv->Idx(i); + auto l_i_c = BuildConstant(v, l_i); + AddInit(v, const_name + "->Append(" + l_i_c + ");"); + NoteInitDependency(v, TypeRep(l_i->GetType())); + } + } + +void CPPCompile::AddRecordConstant(const ValPtr& v, std::string& const_name) + { + const auto& t = v->GetType(); + + Emit("RecordValPtr %s;", const_name); + + NoteInitDependency(v, TypeRep(t)); + + AddInit(v, const_name, std::string("make_intrusive(") + + "cast_intrusive(" + GenTypeName(t) + "))"); + + auto r = cast_intrusive(v); + auto n = r->NumFields(); + + for ( auto i = 0; i < n; ++i ) + { + const auto& r_i = r->GetField(i); + + if ( r_i ) + { + auto r_i_c = BuildConstant(v, r_i); + AddInit(v, const_name + "->Assign(" + Fmt(i) + + ", " + r_i_c + ");"); + } + } + } + +void CPPCompile::AddTableConstant(const ValPtr& v, std::string& const_name) + { + const auto& t = v->GetType(); + + Emit("TableValPtr %s;", const_name); + + NoteInitDependency(v, TypeRep(t)); + + AddInit(v, const_name, std::string("make_intrusive(") + + "cast_intrusive(" + GenTypeName(t) + "))"); + + auto tv = cast_intrusive(v); + auto tv_map = tv->ToMap(); + + for ( auto& tv_i : tv_map ) + { + auto ind = BuildConstant(v, tv_i.first); + auto val = BuildConstant(v, tv_i.second); + AddInit(v, const_name + "->Assign(" + ind + ", " + val + ");"); + } + } + +void CPPCompile::AddVectorConstant(const ValPtr& v, std::string& const_name) + { + const auto& t = v->GetType(); + + Emit("VectorValPtr %s;", const_name); + + NoteInitDependency(v, TypeRep(t)); + + AddInit(v, const_name, std::string("make_intrusive(") + + "cast_intrusive(" + GenTypeName(t) + "))"); + + auto vv = cast_intrusive(v); + auto n = vv->Size(); + + for ( auto i = 0; i < n; ++i ) + { + const auto& v_i = vv->ValAt(i); + auto v_i_c = BuildConstant(v, v_i); + AddInit(v, const_name + "->Append(" + v_i_c + ");"); + } + } + +} // zeek::detail diff --git a/src/script_opt/CPP/DeclFunc.cc b/src/script_opt/CPP/DeclFunc.cc new file mode 100644 index 0000000000..0682eef3b3 --- /dev/null +++ b/src/script_opt/CPP/DeclFunc.cc @@ -0,0 +1,320 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include +#include +#include + +#include "zeek/script_opt/CPP/Compile.h" + + +namespace zeek::detail { + +void CPPCompile::DeclareFunc(const FuncInfo& func) + { + if ( ! IsCompilable(func) ) + return; + + auto fname = Canonicalize(BodyName(func).c_str()) + "_zf"; + auto pf = func.Profile(); + auto f = func.Func(); + auto body = func.Body(); + auto priority = func.Priority(); + + DeclareSubclass(f->GetType(), pf, fname, body, priority, nullptr, + f->Flavor()); + } + +void CPPCompile::DeclareLambda(const LambdaExpr* l, const ProfileFunc* pf) + { + ASSERT(is_CPP_compilable(pf)); + + auto lname = Canonicalize(l->Name().c_str()) + "_lb"; + auto body = l->Ingredients().body; + auto l_id = l->Ingredients().id; + auto& ids = l->OuterIDs(); + + for ( auto id : ids ) + lambda_names[id] = LocalName(id); + + DeclareSubclass(l_id->GetType(), pf, lname, body, 0, l, + FUNC_FLAVOR_FUNCTION); + } + +void CPPCompile::DeclareSubclass(const FuncTypePtr& ft, const ProfileFunc* pf, + const std::string& fname, + const StmtPtr& body, int priority, + const LambdaExpr* l, FunctionFlavor flavor) + { + const auto& yt = ft->Yield(); + in_hook = flavor == FUNC_FLAVOR_HOOK; + const IDPList* lambda_ids = l ? &l->OuterIDs() : nullptr; + + auto yt_decl = in_hook ? "bool" : FullTypeName(yt); + + NL(); + Emit("static %s %s(%s);", yt_decl, fname, ParamDecl(ft, lambda_ids, pf)); + + Emit("class %s_cl : public CPPStmt", fname); + StartBlock(); + + Emit("public:"); + + std::string addl_args; // captures passed in on construction + std::string inits; // initializers for corresponding member vars + + if ( lambda_ids ) + { + for ( auto& id : *lambda_ids ) + { + auto name = lambda_names[id]; + auto tn = FullTypeName(id->GetType()); + addl_args = addl_args + ", " + tn + " _" + name; + + inits = inits + ", " + name + "(_" + name + ")"; + } + } + + Emit("%s_cl(const char* name%s) : CPPStmt(name)%s { }", + fname, addl_args.c_str(), inits.c_str()); + + // An additional constructor just used to generate place-holder + // instances, due to the mis-design that lambdas are identified + // by their Func objects rather than their FuncVal objects. + if ( lambda_ids && lambda_ids->length() > 0 ) + Emit("%s_cl(const char* name) : CPPStmt(name) { }", fname); + + Emit("ValPtr Exec(Frame* f, StmtFlowType& flow) override final"); + StartBlock(); + + Emit("flow = FLOW_RETURN;"); + + if ( in_hook ) + { + Emit("if ( ! %s(%s) )", fname, BindArgs(ft, lambda_ids)); + StartBlock(); + Emit("flow = FLOW_BREAK;"); + EndBlock(); + Emit("return nullptr;"); + } + + else if ( IsNativeType(yt) ) + GenInvokeBody(fname, yt, BindArgs(ft, lambda_ids)); + + else + Emit("return %s(%s);", fname, BindArgs(ft, lambda_ids)); + + EndBlock(); + + if ( lambda_ids ) + BuildLambda(ft, pf, fname, body, l, lambda_ids); + else + { + // Track this function as known to have been compiled. + // We don't track lambda bodies as compiled because they + // can't be instantiated directly without also supplying + // the captures. In principle we could make an exception + // for lambdas that don't take any arguments, but that + // seems potentially more confusing than beneficial. + compiled_funcs.emplace(fname); + + auto loc_f = script_specific_filename(body); + cf_locs[fname] = loc_f; + + // Some guidance for those looking through the generated code. + Emit("// compiled body for: %s", loc_f); + } + + EndBlock(true); + + auto h = pf->HashVal(); + + body_hashes[fname] = h; + body_priorities[fname] = priority; + body_names.emplace(body.get(), fname); + names_to_bodies.emplace(std::move(fname), body.get()); + + total_hash = merge_p_hashes(total_hash, h); + } + +void CPPCompile::BuildLambda(const FuncTypePtr& ft, const ProfileFunc* pf, + const std::string& fname, const StmtPtr& body, + const LambdaExpr* l, const IDPList* lambda_ids) + { + // Declare the member variables for holding the captures. + for ( auto& id : *lambda_ids ) + { + auto name = lambda_names[id]; + auto tn = FullTypeName(id->GetType()); + Emit("%s %s;", tn, name.c_str()); + } + + // Generate initialization to create and register the lambda. + auto literal_name = std::string("\"") + l->Name() + "\""; + auto instantiate = std::string("make_intrusive<") + fname + "_cl>(" + + literal_name + ")"; + + int nl = lambda_ids->length(); + auto h = Fmt(pf->HashVal()); + auto has_captures = nl > 0 ? "true" : "false"; + auto l_init = std::string("register_lambda__CPP(") + instantiate + + ", " + h + ", \"" + l->Name() + "\", " + + GenTypeName(ft) + ", " + has_captures + ");"; + + AddInit(l, l_init); + NoteInitDependency(l, TypeRep(ft)); + + // Make the lambda's body's initialization depend on the lambda's + // initialization. That way GenFuncVarInits() can generate + // initializations with the assurance that the associated body + // hashes will have been registered. + AddInit(body.get()); + NoteInitDependency(body.get(), l); + + // Generate method to extract the lambda captures from a deserialized + // Frame object. + Emit("void SetLambdaCaptures(Frame* f) override"); + StartBlock(); + for ( int i = 0; i < nl; ++i ) + { + auto l_i = (*lambda_ids)[i]; + const auto& t_i = l_i->GetType(); + auto cap_i = std::string("f->GetElement(") + Fmt(i) + ")"; + Emit("%s = %s;", lambda_names[l_i], + GenericValPtrToGT(cap_i, t_i, GEN_NATIVE)); + } + EndBlock(); + + // Generate the method for serializing the captures. + Emit("std::vector SerializeLambdaCaptures() const override"); + StartBlock(); + Emit("std::vector vals;"); + for ( int i = 0; i < nl; ++i ) + { + auto l_i = (*lambda_ids)[i]; + const auto& t_i = l_i->GetType(); + Emit("vals.emplace_back(%s);", + NativeToGT(lambda_names[l_i], t_i, GEN_VAL_PTR)); + } + Emit("return vals;"); + EndBlock(); + + // Generate the Clone() method. + Emit("CPPStmtPtr Clone() override"); + StartBlock(); + auto arg_clones = GenLambdaClone(l, true); + Emit("return make_intrusive<%s_cl>(name.c_str()%s);", fname, arg_clones); + EndBlock(); + } + +std::string CPPCompile::BindArgs(const FuncTypePtr& ft, const IDPList* lambda_ids) + { + const auto& params = ft->Params(); + auto t = params->Types(); + + std::string res; + + int n = t ? t->size() : 0; + for ( auto i = 0; i < n; ++i ) + { + auto arg_i = std::string("f->GetElement(") + Fmt(i) + ")"; + const auto& ft = params->GetFieldType(i); + + if ( IsNativeType(ft) ) + res += arg_i + NativeAccessor(ft); + else + res += GenericValPtrToGT(arg_i, ft, GEN_VAL_PTR); + + res += ", "; + } + + if ( lambda_ids ) + { + for ( auto& id : *lambda_ids ) + res += lambda_names[id] + ", "; + } + + // Add the final frame argument. + return res + "f"; + } + +std::string CPPCompile::ParamDecl(const FuncTypePtr& ft, + const IDPList* lambda_ids, + const ProfileFunc* pf) + { + const auto& params = ft->Params(); + int n = params->NumFields(); + + std::string decl; + + for ( auto i = 0; i < n; ++i ) + { + const auto& t = params->GetFieldType(i); + auto tn = FullTypeName(t); + auto param_id = FindParam(i, pf); + std::string fn; + + if ( param_id ) + { + if ( t->Tag() == TYPE_ANY && + param_id->GetType()->Tag() != TYPE_ANY ) + // We'll need to translate the parameter + // from its current representation to + // type "any". + fn = std::string("any_param__CPP_") + Fmt(i); + else + fn = LocalName(param_id); + } + else + // Parameters that are unused don't wind up + // in the ProfileFunc. Rather than dig their + // name out of the function's declaration, we + // explicitly name them to reflect that they're + // unused. + fn = std::string("unused_param__CPP_") + Fmt(i); + + if ( IsNativeType(t) ) + // Native types are always pass-by-value. + decl = decl + tn + " " + fn; + else + { + if ( param_id && pf->Assignees().count(param_id) > 0 ) + // We modify the parameter. + decl = decl + tn + " " + fn; + else + // Not modified, so pass by const reference. + decl = decl + "const " + tn + "& " + fn; + } + + decl += ", "; + } + + if ( lambda_ids ) + { + // Add the captures as additional parameters. + for ( auto& id : *lambda_ids ) + { + auto name = lambda_names[id]; + const auto& t = id->GetType(); + auto tn = FullTypeName(t); + + // Allow the captures to be modified. + decl = decl + tn + "& " + name + ", "; + } + } + + // Add in the declaration of the frame. + return decl + "Frame* f__CPP"; + } + +const ID* CPPCompile::FindParam(int i, const ProfileFunc* pf) + { + const auto& params = pf->Params(); + + for ( const auto& p : params ) + if ( p->Offset() == i ) + return p; + + return nullptr; + } + +} // zeek::detail diff --git a/src/script_opt/CPP/Driver.cc b/src/script_opt/CPP/Driver.cc new file mode 100644 index 0000000000..e59deed114 --- /dev/null +++ b/src/script_opt/CPP/Driver.cc @@ -0,0 +1,329 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include +#include +#include + +#include "zeek/script_opt/CPP/Compile.h" + + +namespace zeek::detail { + + +CPPCompile::CPPCompile(std::vector& _funcs, ProfileFuncs& _pfs, + const char* gen_name, CPPHashManager& _hm, + bool _update, bool _standalone) +: funcs(_funcs), pfs(_pfs), hm(_hm), update(_update), standalone(_standalone) + { + auto mode = hm.IsAppend() ? "a" : "w"; + + write_file = fopen(gen_name, mode); + if ( ! write_file ) + { + reporter->Error("can't open C++ target file %s", gen_name); + exit(1); + } + + if ( hm.IsAppend() ) + { + // We need a unique number to associate with the name + // space for the code we're adding. A convenient way to + // generate this safely is to use the present size of the + // file we're appending to. That guarantees that every + // incremental compilation will wind up with a different + // number. + struct stat st; + if ( fstat(fileno(write_file), &st) != 0 ) + { + char buf[256]; + util::zeek_strerror_r(errno, buf, sizeof(buf)); + reporter->Error("fstat failed on %s: %s", gen_name, buf); + exit(1); + } + + // We use a value of "0" to mean "we're not appending, + // we're generating from scratch", so make sure we're + // distinct from that. + addl_tag = st.st_size + 1; + } + + Compile(); + } + +CPPCompile::~CPPCompile() + { + fclose(write_file); + } + +void CPPCompile::Compile() + { + // Get the working directory so we can use it in diagnostic messages + // as a way to identify this compilation. Only germane when doing + // incremental compilation (particularly of the test suite). + char buf[8192]; + getcwd(buf, sizeof buf); + working_dir = buf; + + if ( update && addl_tag > 0 && CheckForCollisions() ) + // Inconsistent compilation environment. + exit(1); + + GenProlog(); + + // Determine which functions we can call directly, and reuse + // previously compiled instances of those if present. + for ( const auto& func : funcs ) + { + if ( func.Func()->Flavor() != FUNC_FLAVOR_FUNCTION ) + // Can't be called directly. + continue; + + if ( IsCompilable(func) ) + compilable_funcs.insert(BodyName(func)); + + auto h = func.Profile()->HashVal(); + if ( hm.HasHash(h) ) + { + // Track the previously compiled instance + // of this function. + auto n = func.Func()->Name(); + hashed_funcs[n] = hm.FuncBodyName(h); + } + } + + // Track all of the types we'll be using. + for ( const auto& t : pfs.RepTypes() ) + { + TypePtr tp{NewRef{}, (Type*)(t)}; + types.AddKey(tp, pfs.HashType(t)); + } + + for ( const auto& t : types.DistinctKeys() ) + if ( ! types.IsInherited(t) ) + // Type is new to this compilation, so we'll + // be generating it. + Emit("TypePtr %s;", types.KeyName(t)); + + NL(); + + for ( const auto& c : pfs.Constants() ) + AddConstant(c); + + NL(); + + for ( auto& g : pfs.AllGlobals() ) + CreateGlobal(g); + + // Now that the globals are created, register their attributes, + // if any, and generate their initialization for use in standalone + // scripts. We can't do these in CreateGlobal() because at that + // point it's possible that some of the globals refer to other + // globals not-yet-created. + for ( auto& g : pfs.AllGlobals() ) + { + RegisterAttributes(g->GetAttrs()); + if ( g->HasVal() ) + { + auto gn = std::string(g->Name()); + GenGlobalInit(g, globals[gn], g->GetVal()); + } + } + + for ( const auto& e : pfs.Events() ) + if ( AddGlobal(e, "gl", false) ) + Emit("EventHandlerPtr %s_ev;", globals[std::string(e)]); + + for ( const auto& t : pfs.RepTypes() ) + { + ASSERT(types.HasKey(t)); + TypePtr tp{NewRef{}, (Type*)(t)}; + RegisterType(tp); + } + + // The scaffolding is now in place to go ahead and generate + // the functions & lambdas. First declare them ... + for ( const auto& func : funcs ) + DeclareFunc(func); + + // We track lambdas by their internal names, because two different + // LambdaExpr's can wind up referring to the same underlying lambda + // if the bodies happen to be identical. In that case, we don't + // want to generate the lambda twice. + std::unordered_set lambda_names; + for ( const auto& l : pfs.Lambdas() ) + { + const auto& n = l->Name(); + if ( lambda_names.count(n) > 0 ) + // Skip it. + continue; + + DeclareLambda(l, pfs.ExprProf(l).get()); + lambda_names.insert(n); + } + + NL(); + + // ... and now generate their bodies. + for ( const auto& func : funcs ) + CompileFunc(func); + + lambda_names.clear(); + for ( const auto& l : pfs.Lambdas() ) + { + const auto& n = l->Name(); + if ( lambda_names.count(n) > 0 ) + continue; + + CompileLambda(l, pfs.ExprProf(l).get()); + lambda_names.insert(n); + } + + for ( const auto& f : compiled_funcs ) + RegisterCompiledBody(f); + + GenFuncVarInits(); + + GenEpilog(); + } + +void CPPCompile::GenProlog() + { + if ( addl_tag == 0 ) + { + Emit("#include \"zeek/script_opt/CPP/Runtime.h\"\n"); + Emit("namespace zeek::detail { //\n"); + } + + Emit("namespace CPP_%s { // %s\n", Fmt(addl_tag), working_dir.c_str()); + + // The following might-or-might-not wind up being populated/used. + Emit("std::vector field_mapping;"); + Emit("std::vector enum_mapping;"); + NL(); + } + +void CPPCompile::RegisterCompiledBody(const std::string& f) + { + auto h = body_hashes[f]; + auto p = body_priorities[f]; + + // Build up an initializer of the events relevant to the function. + std::string events; + if ( body_events.count(f) > 0 ) + for ( auto e : body_events[f] ) + { + if ( events.size() > 0 ) + events += ", "; + events = events + "\"" + e + "\""; + } + + events = std::string("{") + events + "}"; + + if ( addl_tag > 0 ) + // Hash in the location associated with this compilation + // pass, to get a final hash that avoids conflicts with + // identical-but-in-a-different-context function bodies + // when compiling potentially conflicting additional code + // (which we want to support to enable quicker test suite + // runs by enabling multiple tests to be compiled into the + // same binary). + h = merge_p_hashes(h, p_hash(cf_locs[f])); + + auto init = std::string("register_body__CPP(make_intrusive<") + + f + "_cl>(\"" + f + "\"), " + Fmt(p) + ", " + + Fmt(h) + ", " + events + ");"; + + AddInit(names_to_bodies[f], init); + + if ( update ) + { + fprintf(hm.HashFile(), "func\n%s%s\n", + scope_prefix(addl_tag).c_str(), f.c_str()); + fprintf(hm.HashFile(), "%llu\n", h); + } + } + +void CPPCompile::GenEpilog() + { + NL(); + + for ( const auto& e : init_exprs.DistinctKeys() ) + { + GenInitExpr(e); + if ( update ) + init_exprs.LogIfNew(e, addl_tag, hm.HashFile()); + } + + for ( const auto& a : attributes.DistinctKeys() ) + { + GenAttrs(a); + if ( update ) + attributes.LogIfNew(a, addl_tag, hm.HashFile()); + } + + // Generate the guts of compound types, and preserve type names + // if present. + for ( const auto& t : types.DistinctKeys() ) + { + ExpandTypeVar(t); + if ( update ) + types.LogIfNew(t, addl_tag, hm.HashFile()); + } + + InitializeEnumMappings(); + + GenPreInits(); + + std::unordered_set to_do; + for ( const auto& oi : obj_inits ) + to_do.insert(oi.first); + + CheckInitConsistency(to_do); + auto nc = GenDependentInits(to_do); + + NL(); + Emit("void init__CPP()"); + + StartBlock(); + + Emit("enum_mapping.resize(%s);\n", Fmt(int(enum_names.size()))); + Emit("pre_init__CPP();"); + + NL(); + for ( auto i = 1; i <= nc; ++i ) + Emit("init_%s__CPP();", Fmt(i)); + + // Populate mappings for dynamic offsets. + NL(); + InitializeFieldMappings(); + + EndBlock(true); + + GenInitHook(); + + Emit("} // %s\n\n", scope_prefix(addl_tag).c_str()); + + if ( update ) + UpdateGlobalHashes(); + + if ( addl_tag > 0 ) + return; + + Emit("#include \"zeek/script_opt/CPP/CPP-gen-addl.h\"\n"); + Emit("} // zeek::detail"); + } + +bool CPPCompile::IsCompilable(const FuncInfo& func) + { + if ( func.ShouldSkip() ) + // Caller marked this function as one to skip. + return false; + + if ( hm.HasHash(func.Profile()->HashVal()) ) + // We've already compiled it. + return false; + + return is_CPP_compilable(func.Profile()); + } + +} // zeek::detail diff --git a/src/script_opt/CPP/Emit.cc b/src/script_opt/CPP/Emit.cc new file mode 100644 index 0000000000..f01edae985 --- /dev/null +++ b/src/script_opt/CPP/Emit.cc @@ -0,0 +1,73 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include +#include +#include + +#include "zeek/script_opt/CPP/Compile.h" + + +namespace zeek::detail { + +void CPPCompile::StartBlock() + { + ++block_level; + Emit("{"); + } + +void CPPCompile::EndBlock(bool needs_semi) + { + Emit("}%s", needs_semi ? ";" : ""); + --block_level; + } + +std::string CPPCompile::GenString(const char* b, int len) const + { + return std::string("make_intrusive(") + Fmt(len) + + ", " + CPPEscape(b, len) + ")"; + } + +std::string CPPCompile::CPPEscape(const char* b, int len) const + { + std::string res = "\""; + + for ( int i = 0; i < len; ++i ) + { + unsigned char c = b[i]; + + switch ( c ) { + case '\a': res += "\\a"; break; + case '\b': res += "\\b"; break; + case '\f': res += "\\f"; break; + case '\n': res += "\\n"; break; + case '\r': res += "\\r"; break; + case '\t': res += "\\t"; break; + case '\v': res += "\\v"; break; + + case '\\': res += "\\\\"; break; + case '"': res += "\\\""; break; + + default: + if ( isprint(c) ) + res += c; + else + { + char buf[8192]; + snprintf(buf, sizeof buf, "%03o", c); + res += "\\"; + res += buf; + } + break; + } + } + + return res + "\""; + } + +void CPPCompile::Indent() const + { + for ( auto i = 0; i < block_level; ++i ) + fprintf(write_file, "%s", "\t"); + } + +} // zeek::detail diff --git a/src/script_opt/CPP/Exprs.cc b/src/script_opt/CPP/Exprs.cc new file mode 100644 index 0000000000..625f953928 --- /dev/null +++ b/src/script_opt/CPP/Exprs.cc @@ -0,0 +1,1226 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include +#include +#include + +#include "zeek/RE.h" +#include "zeek/script_opt/ProfileFunc.h" +#include "zeek/script_opt/CPP/Compile.h" + + +namespace zeek::detail { + +std::string CPPCompile::GenExprs(const Expr* e) + { + std::string gen; + if ( e->Tag() == EXPR_LIST ) + gen = GenListExpr(e, GEN_VAL_PTR, true); + else + gen = GenExpr(e, GEN_VAL_PTR); + + return std::string("{ ") + gen + " }"; + } + +std::string CPPCompile::GenListExpr(const Expr* e, GenType gt, bool nested) + { + const auto& exprs = e->AsListExpr()->Exprs(); + std::string gen; + + int n = exprs.size(); + + for ( auto i = 0; i < n; ++i ) + { + auto e_i = exprs[i]; + auto gen_i = GenExpr(e_i, gt); + + if ( nested && e_i->Tag() == EXPR_LIST ) + // These are table or set indices. + gen_i = std::string("index_val__CPP({") + gen_i + "})"; + + gen += gen_i; + + if ( i < n - 1 ) + gen += ", "; + } + + return gen; + } + +std::string CPPCompile::GenExpr(const Expr* e, GenType gt, bool top_level) + { + std::string gen; + + switch ( e->Tag() ) { + case EXPR_NAME: return GenNameExpr(e->AsNameExpr(), gt); + case EXPR_CONST: return GenConstExpr(e->AsConstExpr(), gt); + + case EXPR_CLONE: + gen = GenExpr(e->GetOp1(), GEN_VAL_PTR) + "->Clone()"; + return GenericValPtrToGT(gen, e->GetType(), gt); + + case EXPR_INCR: + case EXPR_DECR: + return GenIncrExpr(e, gt, e->Tag() == EXPR_INCR, top_level); + + case EXPR_NOT: return GenUnary(e, gt, "!", "not"); + case EXPR_COMPLEMENT: return GenUnary(e, gt, "~", "comp"); + case EXPR_POSITIVE: return GenUnary(e, gt, "+", "pos"); + case EXPR_NEGATE: return GenUnary(e, gt, "-", "neg"); + + case EXPR_ADD: return GenBinary(e, gt, "+", "add"); + case EXPR_SUB: return GenBinary(e, gt, "-", "sub"); + case EXPR_REMOVE_FROM: return GenBinary(e, gt, "-="); + case EXPR_TIMES: return GenBinary(e, gt, "*", "mul"); + case EXPR_DIVIDE: return GenBinary(e, gt, "/", "div"); + case EXPR_MOD: return GenBinary(e, gt, "%", "mod"); + case EXPR_AND: return GenBinary(e, gt, "&", "and"); + case EXPR_OR: return GenBinary(e, gt, "|", "or"); + case EXPR_XOR: return GenBinary(e, gt, "^", "xor"); + case EXPR_AND_AND: return GenBinary(e, gt, "&&", "andand"); + case EXPR_OR_OR: return GenBinary(e, gt, "||", "oror"); + case EXPR_LT: return GenBinary(e, gt, "<", "lt"); + case EXPR_LE: return GenBinary(e, gt, "<=", "le"); + case EXPR_GE: return GenBinary(e, gt, ">=","ge"); + case EXPR_GT: return GenBinary(e, gt, ">", "gt"); + + case EXPR_EQ: return GenEQ(e, gt, "==", "eq"); + case EXPR_NE: return GenEQ(e, gt, "!=", "ne"); + + case EXPR_COND: return GenCondExpr(e, gt); + case EXPR_CALL: return GenCallExpr(e->AsCallExpr(), gt); + case EXPR_LIST: return GenListExpr(e, gt, false); + case EXPR_IN: return GenInExpr(e, gt); + case EXPR_FIELD: return GenFieldExpr(e->AsFieldExpr(), gt); + case EXPR_HAS_FIELD: return GenHasFieldExpr(e->AsHasFieldExpr(), gt); + case EXPR_INDEX: return GenIndexExpr(e, gt); + case EXPR_ASSIGN: return GenAssignExpr(e, gt, top_level); + case EXPR_ADD_TO: return GenAddToExpr(e, gt, top_level); + case EXPR_REF: return GenExpr(e->GetOp1(), gt); + case EXPR_SIZE: return GenSizeExpr(e, gt); + case EXPR_SCHEDULE: return GenScheduleExpr(e); + case EXPR_LAMBDA: return GenLambdaExpr(e); + case EXPR_IS: return GenIsExpr(e, gt); + + case EXPR_ARITH_COERCE: return GenArithCoerceExpr(e, gt); + case EXPR_RECORD_COERCE: return GenRecordCoerceExpr(e); + case EXPR_TABLE_COERCE: return GenTableCoerceExpr(e); + case EXPR_VECTOR_COERCE: return GenVectorCoerceExpr(e); + + case EXPR_RECORD_CONSTRUCTOR: return GenRecordConstructorExpr(e); + case EXPR_SET_CONSTRUCTOR: return GenSetConstructorExpr(e); + case EXPR_TABLE_CONSTRUCTOR: return GenTableConstructorExpr(e); + case EXPR_VECTOR_CONSTRUCTOR: return GenVectorConstructorExpr(e); + + case EXPR_EVENT: + // These should not wind up being directly generated, + // but instead deconstructed in the context of either + // a "schedule" expression or an "event" statement. + ASSERT(0); + + case EXPR_CAST: + gen = std::string("cast_value_to_type__CPP(") + + GenExpr(e->GetOp1(), GEN_VAL_PTR) + ", " + + GenTypeName(e->GetType()) + ")"; + return GenericValPtrToGT(gen, e->GetType(), gt); + + case EXPR_FIELD_ASSIGN: + case EXPR_INDEX_SLICE_ASSIGN: + case EXPR_INLINE: + // These are only generated for reduced ASTs, which + // we shouldn't be compiling. + ASSERT(0); + + default: + // Intended to catch errors in overlooking the possible + // expressions that might appear. + return std::string("EXPR"); + } + } + +std::string CPPCompile::GenNameExpr(const NameExpr* ne, GenType gt) + { + const auto& t = ne->GetType(); + auto n = ne->Id(); + bool is_global_var = global_vars.count(n) > 0; + + if ( t->Tag() == TYPE_FUNC && ! is_global_var ) + { + auto func = n->Name(); + if ( globals.count(func) > 0 && + pfs.BiFGlobals().count(n) == 0 ) + return GenericValPtrToGT(IDNameStr(n), t, gt); + } + + if ( is_global_var ) + { + std::string gen; + + if ( n->IsType() ) + gen = std::string("make_intrusive(") + + globals[n->Name()] + + "->GetType(), true)"; + + else + gen = globals[n->Name()] + "->GetVal()"; + + return GenericValPtrToGT(gen, t, gt); + } + + return NativeToGT(IDNameStr(n), t, gt); + } + +std::string CPPCompile::GenConstExpr(const ConstExpr* c, GenType gt) + { + const auto& t = c->GetType(); + + if ( ! IsNativeType(t) ) + return NativeToGT(const_vals[c->Value()], t, gt); + + return NativeToGT(GenVal(c->ValuePtr()), t, gt); + } + +std::string CPPCompile::GenIncrExpr(const Expr* e, GenType gt, bool is_incr, bool top_level) + { + // For compound operands (table indexing, record fields), + // Zeek's interpreter will actually evaluate the operand + // twice, so easiest is to just transform this node + // into the expanded equivalent. + auto op = e->GetOp1(); + auto one = e->GetType()->InternalType() == TYPE_INTERNAL_INT ? + val_mgr->Int(1) : val_mgr->Count(1); + auto one_e = make_intrusive(one); + + ExprPtr rhs; + if ( is_incr ) + rhs = make_intrusive(op, one_e); + else + rhs = make_intrusive(op, one_e); + + auto assign = make_intrusive(op, rhs, false, + nullptr, nullptr, false); + + // Make sure any newly created types are known to + // the profiler. + (void) pfs.HashType(one_e->GetType()); + (void) pfs.HashType(rhs->GetType()); + (void) pfs.HashType(assign->GetType()); + + auto gen = GenExpr(assign, GEN_DONT_CARE, top_level); + + if ( ! top_level ) + gen = "(" + gen + ", " + GenExpr(op, gt) + ")"; + + return gen; + } + +std::string CPPCompile::GenCondExpr(const Expr* e, GenType gt) + { + auto op1 = e->GetOp1(); + auto op2 = e->GetOp2(); + auto op3 = e->GetOp3(); + + auto gen1 = GenExpr(op1, GEN_NATIVE); + auto gen2 = GenExpr(op2, gt); + auto gen3 = GenExpr(op3, gt); + + if ( op1->GetType()->Tag() == TYPE_VECTOR ) + return std::string("vector_select__CPP(") + + gen1 + ", " + gen2 + ", " + gen3 + ")"; + + return std::string("(") + gen1 + ") ? (" + + gen2 + ") : (" + gen3 + ")"; + } + +std::string CPPCompile::GenCallExpr(const CallExpr* c, GenType gt) + { + const auto& t = c->GetType(); + auto f = c->Func(); + auto args_l = c->Args(); + + auto gen = GenExpr(f, GEN_DONT_CARE); + + if ( f->Tag() == EXPR_NAME ) + { + auto f_id = f->AsNameExpr()->Id(); + const auto& params = f_id->GetType()->AsFuncType()->Params(); + auto id_name = f_id->Name(); + auto fname = Canonicalize(id_name) + "_zf"; + + bool is_compiled = compiled_funcs.count(fname) > 0; + bool was_compiled = hashed_funcs.count(id_name) > 0; + + if ( is_compiled || was_compiled ) + { + if ( was_compiled ) + fname = hashed_funcs[id_name]; + + if ( args_l->Exprs().length() > 0 ) + gen = fname + "(" + GenArgs(params, args_l) + + ", f__CPP)"; + else + gen = fname + "(f__CPP)"; + + return NativeToGT(gen, t, gt); + } + + // If the function isn't a BiF, then it will have been + // declared as a ValPtr (or a FuncValPtr, if a local), + // and we need to convert it to a Func*. + // + // If it is a BiF *that's also a global variable*, then + // we need to look up the BiF version of the global. + if ( pfs.BiFGlobals().count(f_id) == 0 ) + gen += + "->AsFunc()"; + + else if ( pfs.Globals().count(f_id) > 0 ) + // The BiF version has an extra "_", per + // AddBiF(..., true). + gen = globals[std::string(id_name) + "_"]; + } + + else + // Indirect call. + gen = std::string("(") + gen + ")->AsFunc()"; + + auto args_list = std::string(", {") + + GenExpr(args_l, GEN_VAL_PTR) + "}"; + auto invoker = std::string("invoke__CPP(") + + gen + args_list + ", f__CPP)"; + + if ( IsNativeType(t) && gt != GEN_VAL_PTR ) + return invoker + NativeAccessor(t); + + return GenericValPtrToGT(invoker, t, gt); + } + +std::string CPPCompile::GenInExpr(const Expr* e, GenType gt) + { + auto op1 = e->GetOp1(); + auto op2 = e->GetOp2(); + + auto t1 = op1->GetType(); + auto t2 = op2->GetType(); + + std::string gen; + + if ( t1->Tag() == TYPE_PATTERN ) + gen = std::string("(") + GenExpr(op1, GEN_DONT_CARE) + + ")->MatchAnywhere(" + + GenExpr(op2, GEN_DONT_CARE) + "->AsString())"; + + else if ( t2->Tag() == TYPE_STRING ) + gen = std::string("str_in__CPP(") + + GenExpr(op1, GEN_DONT_CARE) + "->AsString(), " + + GenExpr(op2, GEN_DONT_CARE) + "->AsString())"; + + else if ( t1->Tag() == TYPE_ADDR && t2->Tag() == TYPE_SUBNET ) + gen = std::string("(") + GenExpr(op2, GEN_DONT_CARE) + + ")->Contains(" + GenExpr(op1, GEN_VAL_PTR) + "->Get())"; + + else if ( t2->Tag() == TYPE_VECTOR ) + gen = GenExpr(op2, GEN_DONT_CARE) + "->Has(" + + GenExpr(op1, GEN_NATIVE) + ")"; + + else + gen = std::string("(") + GenExpr(op2, GEN_DONT_CARE) + + "->Find(index_val__CPP({" + GenExpr(op1, GEN_VAL_PTR) + + "})) ? true : false)"; + + return NativeToGT(gen, e->GetType(), gt); + } + +std::string CPPCompile::GenFieldExpr(const FieldExpr* fe, GenType gt) + { + auto r = fe->GetOp1(); + auto f = fe->Field(); + auto f_s = GenField(r, f); + + auto gen = std::string("field_access__CPP(") + + GenExpr(r, GEN_VAL_PTR) + ", " + f_s + ")"; + + return GenericValPtrToGT(gen, fe->GetType(), gt); + } + +std::string CPPCompile::GenHasFieldExpr(const HasFieldExpr* hfe, GenType gt) + { + auto r = hfe->GetOp1(); + auto f = hfe->Field(); + auto f_s = GenField(r, f); + + // Need to use accessors for native types. + auto gen = std::string("(") + GenExpr(r, GEN_DONT_CARE) + + "->GetField(" + f_s + ") != nullptr)"; + + return NativeToGT(gen, hfe->GetType(), gt); + } + +std::string CPPCompile::GenIndexExpr(const Expr* e, GenType gt) + { + auto aggr = e->GetOp1(); + const auto& aggr_t = aggr->GetType(); + + std::string gen; + + if ( aggr_t->Tag() == TYPE_TABLE ) + gen = std::string("index_table__CPP(") + + GenExpr(aggr, GEN_NATIVE) + ", {" + + GenExpr(e->GetOp2(), GEN_VAL_PTR) + "})"; + + else if ( aggr_t->Tag() == TYPE_VECTOR ) + { + const auto& op2 = e->GetOp2(); + const auto& t2 = op2->GetType(); + ASSERT(t2->Tag() == TYPE_LIST); + + if ( t2->Tag() == TYPE_LIST && + t2->AsTypeList()->GetTypes().size() == 2 ) + { + auto& inds = op2->AsListExpr()->Exprs(); + auto first = inds[0]; + auto last = inds[1]; + gen = std::string("index_slice(") + + GenExpr(aggr, GEN_VAL_PTR) + ".get(), " + + GenExpr(first, GEN_NATIVE) + ", " + + GenExpr(last, GEN_NATIVE) + ")"; + } + else + gen = std::string("index_vec__CPP(") + + GenExpr(aggr, GEN_NATIVE) + ", " + + GenExpr(e->GetOp2(), GEN_NATIVE) + ")"; + } + + else if ( aggr_t->Tag() == TYPE_STRING ) + gen = std::string("index_string__CPP(") + + GenExpr(aggr, GEN_NATIVE) + ", {" + + GenExpr(e->GetOp2(), GEN_VAL_PTR) + "})"; + + return GenericValPtrToGT(gen, e->GetType(), gt); + } + +std::string CPPCompile::GenAssignExpr(const Expr* e, GenType gt, bool top_level) + { + auto op1 = e->GetOp1()->AsRefExprPtr()->GetOp1(); + auto op2 = e->GetOp2(); + + const auto& t1 = op1->GetType(); + const auto& t2 = op2->GetType(); + + auto rhs_native = GenExpr(op2, GEN_NATIVE); + auto rhs_val_ptr = GenExpr(op2, GEN_VAL_PTR); + + auto lhs_is_any = t1->Tag() == TYPE_ANY; + auto rhs_is_any = t2->Tag() == TYPE_ANY; + + if ( lhs_is_any && ! rhs_is_any ) + rhs_native = rhs_val_ptr; + + if ( rhs_is_any && ! lhs_is_any && t1->Tag() != TYPE_LIST ) + rhs_native = rhs_val_ptr = + GenericValPtrToGT(rhs_val_ptr, t1, GEN_NATIVE); + + return GenAssign(op1, op2, rhs_native, rhs_val_ptr, gt, top_level); + } + +std::string CPPCompile::GenAddToExpr(const Expr* e, GenType gt, bool top_level) + { + const auto& t = e->GetType(); + + if ( t->Tag() == TYPE_VECTOR ) + { + auto gen = std::string("vector_append__CPP(") + + GenExpr(e->GetOp1(), GEN_VAL_PTR) + + ", " + GenExpr(e->GetOp2(), GEN_VAL_PTR) + ")"; + return GenericValPtrToGT(gen, t, gt); + } + + // Second GetOp1 is because for non-vectors, LHS will be a RefExpr. + auto lhs = e->GetOp1()->GetOp1(); + + if ( t->Tag() == TYPE_STRING ) + { + auto rhs_native = GenBinaryString(e, GEN_NATIVE, "+="); + auto rhs_val_ptr = GenBinaryString(e, GEN_VAL_PTR, "+="); + + return GenAssign(lhs, nullptr, rhs_native, rhs_val_ptr, gt, top_level); + } + + if ( lhs->Tag() != EXPR_NAME || lhs->AsNameExpr()->Id()->IsGlobal() ) + { + // LHS is a compound, or a global (and thus doesn't + // equate to a C++ variable); expand x += y to x = x + y + auto rhs = make_intrusive(lhs, e->GetOp2()); + auto assign = make_intrusive(lhs, rhs, false, nullptr, nullptr, false); + + // Make sure any newly created types are known to + // the profiler. + (void) pfs.HashType(rhs->GetType()); + (void) pfs.HashType(assign->GetType()); + + return GenExpr(assign, gt, top_level); + } + + return GenBinary(e, gt, "+="); + } + +std::string CPPCompile::GenSizeExpr(const Expr* e, GenType gt) + { + const auto& t = e->GetType(); + const auto& t1 = e->GetOp1()->GetType(); + auto it = t1->InternalType(); + + auto gen = GenExpr(e->GetOp1(), GEN_NATIVE); + + if ( t1->Tag() == TYPE_BOOL ) + gen = std::string("((") + gen + ") ? 1 : 0)"; + + else if ( it == TYPE_INTERNAL_UNSIGNED ) + // no-op + ; + + else if ( it == TYPE_INTERNAL_INT ) + gen = std::string("iabs__CPP(") + gen + ")"; + + else if ( it == TYPE_INTERNAL_DOUBLE ) + gen = std::string("fabs__CPP(") + gen + ")"; + + else if ( it == TYPE_INTERNAL_INT || it == TYPE_INTERNAL_DOUBLE ) + { + auto cast = (it == TYPE_INTERNAL_INT) ? "bro_int_t" : "double"; + gen = std::string("abs__CPP(") + cast + "(" + gen + "))"; + } + + else + return GenericValPtrToGT(gen + "->SizeVal()", t, gt); + + return NativeToGT(gen, t, gt); + } + +std::string CPPCompile::GenScheduleExpr(const Expr* e) + { + auto s = static_cast(e); + auto when = s->When(); + auto event = s->Event(); + std::string event_name(event->Handler()->Name()); + + RegisterEvent(event_name); + + std::string when_s = GenExpr(when, GEN_NATIVE); + if ( when->GetType()->Tag() == TYPE_INTERVAL ) + when_s += " + run_state::network_time"; + + return std::string("schedule__CPP(") + when_s + + ", " + globals[event_name] + "_ev, { " + + GenExpr(event->Args(), GEN_VAL_PTR) + " })"; + } + +std::string CPPCompile::GenLambdaExpr(const Expr* e) + { + auto l = static_cast(e); + auto name = Canonicalize(l->Name().c_str()) + "_lb_cl"; + auto cl_args = std::string("\"") + name + "\""; + + if ( l->OuterIDs().size() > 0 ) + cl_args = cl_args + GenLambdaClone(l, false); + + auto body = std::string("make_intrusive<") + name + ">(" + cl_args + ")"; + auto func = std::string("make_intrusive(\"") + + l->Name() + "\", cast_intrusive(" + + GenTypeName(l->GetType()) + "), " + body + ")"; + return std::string("make_intrusive(") + func + ")"; + } + +std::string CPPCompile::GenIsExpr(const Expr* e, GenType gt) + { + auto ie = static_cast(e); + auto gen = std::string("can_cast_value_to_type(") + + GenExpr(ie->GetOp1(), GEN_VAL_PTR) + ".get(), " + + GenTypeName(ie->TestType()) + ".get())"; + + return NativeToGT(gen, ie->GetType(), gt); + } + +std::string CPPCompile::GenArithCoerceExpr(const Expr* e, GenType gt) + { + const auto& t = e->GetType(); + auto op = e->GetOp1(); + + if ( same_type(t, op->GetType()) ) + return GenExpr(op, gt); + + bool is_vec = t->Tag() == TYPE_VECTOR; + + auto coerce_t = is_vec ? t->Yield() : t; + + std::string cast_name; + + switch ( coerce_t->InternalType() ) { + case TYPE_INTERNAL_INT: cast_name = "bro_int_t"; break; + case TYPE_INTERNAL_UNSIGNED: cast_name = "bro_uint_t"; break; + case TYPE_INTERNAL_DOUBLE: cast_name = "double"; break; + + default: + reporter->InternalError("bad type in arithmetic coercion"); + } + + if ( is_vec ) + return std::string("vec_coerce_") + cast_name + + "__CPP(" + GenExpr(op, GEN_NATIVE) + + ", " + GenTypeName(t) + ")"; + + return NativeToGT(cast_name + "(" + GenExpr(op, GEN_NATIVE) + ")", t, gt); + } + +std::string CPPCompile::GenRecordCoerceExpr(const Expr* e) + { + auto rc = static_cast(e); + auto op1 = rc->GetOp1(); + const auto& from_type = op1->GetType(); + const auto& to_type = rc->GetType(); + + if ( same_type(from_type, to_type) ) + // Elide coercion. + return GenExpr(op1, GEN_VAL_PTR); + + const auto& map = rc->Map(); + auto type_var = GenTypeName(to_type); + + return std::string("coerce_to_record(cast_intrusive(") + + type_var + "), " + GenExpr(op1, GEN_VAL_PTR) + ".get(), " + + GenIntVector(map) + ")"; + } + +std::string CPPCompile::GenTableCoerceExpr(const Expr* e) + { + auto tc = static_cast(e); + const auto& t = tc->GetType(); + auto op1 = tc->GetOp1(); + + return std::string("table_coerce__CPP(") + GenExpr(op1, GEN_VAL_PTR) + + ", " + GenTypeName(t) + ")"; + } + +std::string CPPCompile::GenVectorCoerceExpr(const Expr* e) + { + auto vc = static_cast(e); + const auto& op = vc->GetOp1(); + const auto& t = vc->GetType(); + + return std::string("vector_coerce__CPP(" + GenExpr(op, GEN_VAL_PTR) + + ", " + GenTypeName(t) + ")"); + } + +std::string CPPCompile::GenRecordConstructorExpr(const Expr* e) + { + auto rc = static_cast(e); + const auto& t = rc->GetType(); + const auto& exprs = rc->Op()->AsListExpr()->Exprs(); + auto n = exprs.length(); + + std::string vals; + + for ( auto i = 0; i < n; ++i ) + { + const auto& e = exprs[i]; + + ASSERT(e->Tag() == EXPR_FIELD_ASSIGN); + + vals += GenExpr(e->GetOp1(), GEN_VAL_PTR); + + if ( i < n - 1 ) + vals += ", "; + } + + return std::string("record_constructor__CPP({") + vals + "}, " + + "cast_intrusive(" + GenTypeName(t) + "))"; + } + +std::string CPPCompile::GenSetConstructorExpr(const Expr* e) + { + auto sc = static_cast(e); + const auto& t = sc->GetType(); + auto attrs = sc->GetAttrs(); + + std::string attr_tags; + std::string attr_vals; + BuildAttrs(attrs, attr_tags, attr_vals); + + return std::string("set_constructor__CPP(") + + GenExprs(sc->GetOp1().get()) + ", " + + "cast_intrusive(" + GenTypeName(t) + "), " + + attr_tags + ", " + attr_vals + ")"; + } + +std::string CPPCompile::GenTableConstructorExpr(const Expr* e) + { + auto tc = static_cast(e); + const auto& t = tc->GetType(); + auto attrs = tc->GetAttrs(); + + std::string attr_tags; + std::string attr_vals; + BuildAttrs(attrs, attr_tags, attr_vals); + + std::string indices; + std::string vals; + + const auto& exprs = tc->GetOp1()->AsListExpr()->Exprs(); + auto n = exprs.length(); + + for ( auto i = 0; i < n; ++i ) + { + const auto& e = exprs[i]; + + ASSERT(e->Tag() == EXPR_ASSIGN); + + auto index = e->GetOp1(); + auto v = e->GetOp2(); + + if ( index->Tag() == EXPR_LIST ) + // Multiple indices. + indices += "index_val__CPP({" + + GenExpr(index, GEN_VAL_PTR) + "})"; + else + indices += GenExpr(index, GEN_VAL_PTR); + + vals += GenExpr(v, GEN_VAL_PTR); + + if ( i < n - 1 ) + { + indices += ", "; + vals += ", "; + } + } + + return std::string("table_constructor__CPP({") + + indices + "}, {" + vals + "}, " + + "cast_intrusive(" + GenTypeName(t) + "), " + + attr_tags + ", " + attr_vals + ")"; + } + +std::string CPPCompile::GenVectorConstructorExpr(const Expr* e) + { + auto vc = static_cast(e); + const auto& t = vc->GetType(); + + return std::string("vector_constructor__CPP({") + + GenExpr(vc->GetOp1(), GEN_VAL_PTR) + "}, " + + "cast_intrusive(" + GenTypeName(t) + "))"; + } + +std::string CPPCompile::GenVal(const ValPtr& v) + { + const auto& t = v->GetType(); + auto tag = t->Tag(); + auto it = t->InternalType(); + + if ( tag == TYPE_BOOL ) + return std::string(v->IsZero() ? "false" : "true"); + + if ( tag == TYPE_ENUM ) + return GenEnum(t, v); + + if ( tag == TYPE_PORT ) + return Fmt(v->AsCount()); + + if ( it == TYPE_INTERNAL_DOUBLE ) + return Fmt(v->AsDouble()); + + ODesc d; + d.SetQuotes(true); + v->Describe(&d); + return d.Description(); + } + +std::string CPPCompile::GenUnary(const Expr* e, GenType gt, + const char* op, const char* vec_op) + { + if ( e->GetType()->Tag() == TYPE_VECTOR ) + return GenVectorOp(e, GenExpr(e->GetOp1(), GEN_NATIVE), vec_op); + + return NativeToGT(std::string(op) + "(" + + GenExpr(e->GetOp1(), GEN_NATIVE) + ")", + e->GetType(), gt); + } + +std::string CPPCompile::GenBinary(const Expr* e, GenType gt, + const char* op, const char* vec_op) + { + const auto& op1 = e->GetOp1(); + const auto& op2 = e->GetOp2(); + auto t = op1->GetType(); + + if ( e->GetType()->Tag() == TYPE_VECTOR ) + { + auto gen1 = GenExpr(op1, GEN_NATIVE); + auto gen2 = GenExpr(op2, GEN_NATIVE); + + if ( t->Tag() == TYPE_VECTOR && + t->Yield()->Tag() == TYPE_STRING && + op2->GetType()->Tag() == TYPE_VECTOR ) + return std::string("vec_str_op_") + vec_op + "__CPP(" + + gen1 + ", " + gen2 + ")"; + + return GenVectorOp(e, gen1, gen2, vec_op); + } + + if ( t->IsSet() ) + return GenBinarySet(e, gt, op); + + // The following is only used for internal int/uint/double + // operations. For those, it holds the prefix we use to + // distinguish different instances of inlined functions + // employed to support an operation. + std::string flavor; + + switch ( t->InternalType() ) { + case TYPE_INTERNAL_INT: flavor = "i"; break; + case TYPE_INTERNAL_UNSIGNED: flavor = "u"; break; + case TYPE_INTERNAL_DOUBLE: flavor = "f"; break; + + case TYPE_INTERNAL_STRING: return GenBinaryString(e, gt, op); + case TYPE_INTERNAL_ADDR: return GenBinaryAddr(e, gt, op); + case TYPE_INTERNAL_SUBNET: return GenBinarySubNet(e, gt, op); + + default: + if ( t->Tag() == TYPE_PATTERN ) + return GenBinaryPattern(e, gt, op); + break; + } + + auto g1 = GenExpr(e->GetOp1(), GEN_NATIVE); + auto g2 = GenExpr(e->GetOp2(), GEN_NATIVE); + + std::string gen; + + if ( e->Tag() == EXPR_DIVIDE ) + gen = flavor + "div__CPP(" + g1 + ", " + g2 + ")"; + + else if ( e->Tag() == EXPR_MOD ) + gen = flavor + "mod__CPP(" + g1 + ", " + g2 + ")"; + + else + gen = std::string("(") + g1 + ")" + op + "(" + g2 + ")"; + + return NativeToGT(gen, e->GetType(), gt); + } + +std::string CPPCompile::GenBinarySet(const Expr* e, GenType gt, const char* op) + { + auto v1 = GenExpr(e->GetOp1(), GEN_DONT_CARE) + "->AsTableVal()"; + auto v2 = GenExpr(e->GetOp2(), GEN_DONT_CARE) + "->AsTableVal()"; + + std::string res; + + switch ( e->Tag() ) { + case EXPR_AND: + res = v1 + "->Intersection(*" + v2 + ")"; + break; + + case EXPR_OR: + res = v1 + "->Union(" + v2 + ")"; + break; + + case EXPR_SUB: + res = v1 + "->TakeOut(" + v2 + ")"; + break; + + case EXPR_EQ: + res = v1 + "->EqualTo(*" + v2 + ")"; + break; + + case EXPR_NE: + res = std::string("! ") + v1 + "->EqualTo(*" + v2 + ")"; + break; + + case EXPR_LE: + res = v1 + "->IsSubsetOf(*" + v2 + ")"; + break; + + case EXPR_LT: + res = std::string("(") + v1 + "->IsSubsetOf(*" + v2 + ") &&" + + v1 + "->Size() < " + v2 + "->Size())"; + break; + + default: + reporter->InternalError("bad type in CPPCompile::GenBinarySet"); + } + + return NativeToGT(res, e->GetType(), gt); + } + +std::string CPPCompile::GenBinaryString(const Expr* e, GenType gt, + const char* op) + { + auto v1 = GenExpr(e->GetOp1(), GEN_DONT_CARE) + "->AsString()"; + auto v2 = GenExpr(e->GetOp2(), GEN_DONT_CARE) + "->AsString()"; + + std::string res; + + if ( e->Tag() == EXPR_ADD || e->Tag() == EXPR_ADD_TO ) + res = std::string("str_concat__CPP(") + v1 + ", " + v2 + ")"; + else + res = std::string("(Bstr_cmp(") + v1 + ", " + v2 + ") " + op + " 0)"; + + return NativeToGT(res, e->GetType(), gt); + } + +std::string CPPCompile::GenBinaryPattern(const Expr* e, GenType gt, + const char* op) + { + auto v1 = GenExpr(e->GetOp1(), GEN_DONT_CARE) + "->AsPattern()"; + auto v2 = GenExpr(e->GetOp2(), GEN_DONT_CARE) + "->AsPattern()"; + + auto func = e->Tag() == EXPR_AND ? + "RE_Matcher_conjunction" : "RE_Matcher_disjunction"; + + return NativeToGT(std::string("make_intrusive(") + + func + "(" + v1 + ", " + v2 + "))", e->GetType(), gt); + } + +std::string CPPCompile::GenBinaryAddr(const Expr* e, GenType gt, const char* op) + { + auto v1 = GenExpr(e->GetOp1(), GEN_DONT_CARE) + "->AsAddr()"; + + if ( e->Tag() == EXPR_DIVIDE ) + { + auto gen = std::string("addr_mask__CPP(") + v1 + ", " + + GenExpr(e->GetOp2(), GEN_NATIVE) + ")"; + + return NativeToGT(gen, e->GetType(), gt); + } + + auto v2 = GenExpr(e->GetOp2(), GEN_DONT_CARE) + "->AsAddr()"; + + return NativeToGT(v1 + op + v2, e->GetType(), gt); + } + +std::string CPPCompile::GenBinarySubNet(const Expr* e, GenType gt, + const char* op) + { + auto v1 = GenExpr(e->GetOp1(), GEN_DONT_CARE) + "->AsSubNet()"; + auto v2 = GenExpr(e->GetOp2(), GEN_DONT_CARE) + "->AsSubNet()"; + + return NativeToGT(v1 + op + v2, e->GetType(), gt); + } + +std::string CPPCompile::GenEQ(const Expr* e, GenType gt, + const char* op, const char* vec_op) + { + auto op1 = e->GetOp1(); + auto op2 = e->GetOp2(); + + if ( e->GetType()->Tag() == TYPE_VECTOR ) + { + auto gen1 = GenExpr(op1, GEN_NATIVE); + auto gen2 = GenExpr(op2, GEN_NATIVE); + return GenVectorOp(e, gen1, gen2, vec_op); + } + + auto tag = op1->GetType()->Tag(); + std::string negated(e->Tag() == EXPR_EQ ? "" : "! "); + + if ( tag == TYPE_PATTERN ) + return NativeToGT(negated + GenExpr(op1, GEN_DONT_CARE) + + "->MatchExactly(" + + GenExpr(op2, GEN_DONT_CARE) + "->AsString())", + e->GetType(), gt); + + if ( tag == TYPE_FUNC ) + { + auto gen_f1 = GenExpr(op1, GEN_DONT_CARE); + auto gen_f2 = GenExpr(op2, GEN_DONT_CARE); + + gen_f1 += "->AsFunc()"; + gen_f2 += "->AsFunc()"; + + auto gen = std::string("(") + gen_f1 + "==" + gen_f2 + ")"; + + return NativeToGT(negated + gen, e->GetType(), gt); + } + + return GenBinary(e, gt, op, vec_op); + } + +std::string CPPCompile::GenAssign(const ExprPtr& lhs, const ExprPtr& rhs, + const std::string& rhs_native, + const std::string& rhs_val_ptr, + GenType gt, bool top_level) + { + switch ( lhs->Tag() ) { + case EXPR_NAME: + return GenDirectAssign(lhs, rhs_native, rhs_val_ptr, gt, top_level); + + case EXPR_INDEX: + return GenIndexAssign(lhs, rhs, rhs_val_ptr, gt, top_level); + + case EXPR_FIELD: + return GenFieldAssign(lhs, rhs, rhs_val_ptr, gt, top_level); + + case EXPR_LIST: + return GenListAssign(lhs, rhs); + + default: + reporter->InternalError("bad assigment node in CPPCompile::GenExpr"); + return "XXX"; + } + } + +std::string CPPCompile::GenDirectAssign(const ExprPtr& lhs, + const std::string& rhs_native, + const std::string& rhs_val_ptr, + GenType gt, bool top_level) + { + auto n = lhs->AsNameExpr()->Id(); + auto name = IDNameStr(n); + + std::string gen; + + if ( n->IsGlobal() ) + { + const auto& t = n->GetType(); + auto gn = globals[n->Name()]; + + if ( t->Tag() == TYPE_FUNC && + t->AsFuncType()->Flavor() == FUNC_FLAVOR_EVENT ) + { + gen = std::string("set_event__CPP(") + gn + ", " + + rhs_val_ptr + ", " + gn + "_ev)"; + + if ( ! top_level ) + gen = GenericValPtrToGT(gen, n->GetType(), gt); + } + + else if ( top_level ) + gen = gn + "->SetVal(" + rhs_val_ptr + ")"; + + else + { + gen = std::string("set_global__CPP(") + + gn + ", " + rhs_val_ptr + ")"; + gen = GenericValPtrToGT(gen, n->GetType(), gt); + } + } + else + gen = name + " = " + rhs_native; + + return gen; + } + +std::string CPPCompile::GenIndexAssign(const ExprPtr& lhs, const ExprPtr& rhs, + const std::string& rhs_val_ptr, + GenType gt, bool top_level) + { + auto gen = std::string("assign_to_index__CPP("); + + gen += GenExpr(lhs->GetOp1(), GEN_VAL_PTR) + ", " + "index_val__CPP({" + + GenExpr(lhs->GetOp2(), GEN_VAL_PTR) + "}), " + rhs_val_ptr + ")"; + + if ( ! top_level ) + gen = GenericValPtrToGT(gen, rhs->GetType(), gt); + + return gen; + } + +std::string CPPCompile::GenFieldAssign(const ExprPtr& lhs, const ExprPtr& rhs, + const std::string& rhs_val_ptr, + GenType gt, bool top_level) + { + auto rec = lhs->GetOp1(); + auto rec_gen = GenExpr(rec, GEN_VAL_PTR); + auto field = GenField(rec, lhs->AsFieldExpr()->Field()); + + if ( top_level ) + return rec_gen + "->Assign(" + field + ", " + rhs_val_ptr + ")"; + else + { + auto gen = std::string("assign_field__CPP(") + rec_gen + + ", " + field + ", " + rhs_val_ptr + ")"; + return GenericValPtrToGT(gen, rhs->GetType(), gt); + } + } + +std::string CPPCompile::GenListAssign(const ExprPtr& lhs, const ExprPtr& rhs) + { + if ( rhs->Tag() != EXPR_NAME ) + reporter->InternalError("compound RHS expression in multi-assignment"); + + std::string gen; + const auto& vars = lhs->AsListExpr()->Exprs(); + + auto n = vars.length(); + for ( auto i = 0; i < n; ++i ) + { + const auto& var_i = vars[i]; + if ( var_i->Tag() != EXPR_NAME ) + reporter->InternalError("compound LHS expression in multi-assignment"); + const auto& t_i = var_i->GetType(); + auto var = var_i->AsNameExpr(); + + auto rhs_i_base = GenExpr(rhs, GEN_DONT_CARE); + rhs_i_base += "->AsListVal()->Idx(" + Fmt(i) + ")"; + + auto rhs_i = GenericValPtrToGT(rhs_i_base, t_i, GEN_NATIVE); + + gen += IDNameStr(var->Id()) + " = " + rhs_i; + + if ( i < n - 1 ) + gen += ", "; + } + + return "(" + gen + ")"; + } + +std::string CPPCompile::GenVectorOp(const Expr* e, std::string op, + const char* vec_op) + { + auto gen = std::string("vec_op_") + vec_op + "__CPP(" + op + ")"; + + if ( ! IsArithmetic(e->GetType()->Yield()->Tag()) ) + gen = std::string("vector_coerce_to__CPP(") + gen + ", " + + GenTypeName(e->GetType()) + ")"; + + return gen; + } + +std::string CPPCompile::GenVectorOp(const Expr* e, std::string op1, + std::string op2, const char* vec_op) + { + auto invoke = std::string(vec_op) + "__CPP(" + op1 + ", " + op2 + ")"; + + if ( e->GetOp1()->GetType()->Yield()->Tag() == TYPE_STRING ) + return std::string("str_vec_op_") + invoke; + + auto gen = std::string("vec_op_") + invoke; + + auto yt = e->GetType()->Yield()->Tag(); + if ( ! IsArithmetic(yt) && yt != TYPE_STRING ) + gen = std::string("vector_coerce_to__CPP(") + gen + ", " + + GenTypeName(e->GetType()) + ")"; + + return gen; + } + +std::string CPPCompile::GenLambdaClone(const LambdaExpr* l, bool all_deep) + { + auto& ids = l->OuterIDs(); + const auto& captures = l->GetType()->GetCaptures(); + + std::string cl_args; + + for ( const auto& id : ids ) + { + const auto& id_t = id->GetType(); + auto arg = LocalName(id); + + if ( captures && ! IsNativeType(id_t) ) + { + for ( const auto& c : *captures ) + if ( id == c.id && (c.deep_copy || all_deep) ) + arg = std::string("cast_intrusive<") + TypeName(id_t) + ">(" + arg + "->Clone())"; + } + + cl_args = cl_args + ", " + arg; + } + + return cl_args; + } + +std::string CPPCompile::GenIntVector(const std::vector& vec) + { + std::string res("{ "); + + for ( auto i = 0; i < vec.size(); ++i ) + { + res += Fmt(vec[i]); + + if ( i < vec.size() - 1 ) + res += ", "; + } + + return res + " }"; + } + +std::string CPPCompile::GenField(const ExprPtr& rec, int field) + { + auto t = TypeRep(rec->GetType()); + auto rt = t->AsRecordType(); + + if ( field < rt->NumOrigFields() ) + // Can use direct access. + return Fmt(field); + + // Need to dynamically map the field. + int mapping_slot; + + if ( record_field_mappings.count(rt) > 0 && + record_field_mappings[rt].count(field) > 0 ) + // We're already tracking this field. + mapping_slot = record_field_mappings[rt][field]; + + else + { + // New mapping. + mapping_slot = num_rf_mappings++; + + std::string field_name = rt->FieldName(field); + field_decls.emplace_back(std::pair(rt, rt->FieldDecl(field))); + + if ( record_field_mappings.count(rt) > 0 ) + // We're already tracking this record. + record_field_mappings[rt][field] = mapping_slot; + else + { + // Need to start tracking this record. + std::unordered_map rt_mapping; + rt_mapping[field] = mapping_slot; + record_field_mappings[rt] = rt_mapping; + } + } + + return std::string("field_mapping[") + Fmt(mapping_slot) + "]"; + } + +std::string CPPCompile::GenEnum(const TypePtr& t, const ValPtr& ev) + { + auto et = TypeRep(t)->AsEnumType(); + auto v = ev->AsEnum(); + + if ( ! et->HasRedefs() ) + // Can use direct access. + return Fmt(v); + + // Need to dynamically map the access. + int mapping_slot; + + if ( enum_val_mappings.count(et) > 0 && + enum_val_mappings[et].count(v) > 0 ) + // We're already tracking this value. + mapping_slot = enum_val_mappings[et][v]; + + else + { + // New mapping. + mapping_slot = num_ev_mappings++; + + std::string enum_name = et->Lookup(v); + enum_names.emplace_back(std::pair(et, std::move(enum_name))); + + if ( enum_val_mappings.count(et) > 0 ) + { + // We're already tracking this enum. + enum_val_mappings[et][v] = mapping_slot; + } + else + { + // Need to start tracking this enum. + std::unordered_map et_mapping; + et_mapping[v] = mapping_slot; + enum_val_mappings[et] = et_mapping; + } + } + + return std::string("enum_mapping[") + Fmt(mapping_slot) + "]"; + } + +} // zeek::detail diff --git a/src/script_opt/CPP/Func.cc b/src/script_opt/CPP/Func.cc new file mode 100644 index 0000000000..337f6d3825 --- /dev/null +++ b/src/script_opt/CPP/Func.cc @@ -0,0 +1,66 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include + +#include "zeek/Desc.h" +#include "zeek/broker/Data.h" +#include "zeek/script_opt/CPP/Func.h" + + +namespace zeek::detail { + +std::unordered_map compiled_scripts; +std::unordered_map standalone_callbacks; +std::vector standalone_activations; + +void CPPFunc::Describe(ODesc* d) const + { + d->AddSP("compiled function"); + d->Add(name); + } + +CPPLambdaFunc::CPPLambdaFunc(std::string _name, FuncTypePtr ft, + CPPStmtPtr _l_body) +: ScriptFunc(std::move(_name), std::move(ft), {_l_body}, {0}) + { + l_body = std::move(_l_body); + } + +broker::expected CPPLambdaFunc::SerializeClosure() const + { + auto vals = l_body->SerializeLambdaCaptures(); + + broker::vector rval; + rval.emplace_back(std::string("CopyFrame")); + + broker::vector body; + + for ( int i = 0; i < vals.size(); ++i ) + { + const auto& val = vals[i]; + auto expected = Broker::detail::val_to_data(val.get()); + if ( ! expected ) + return broker::ec::invalid_data; + + TypeTag tag = val->GetType()->Tag(); + broker::vector val_tuple {std::move(*expected), + static_cast(tag)}; + body.emplace_back(std::move(val_tuple)); + } + + rval.emplace_back(std::move(body)); + + return {std::move(rval)}; + } + +void CPPLambdaFunc::SetCaptures(Frame* f) + { + l_body->SetLambdaCaptures(f); + } + +FuncPtr CPPLambdaFunc::DoClone() + { + return make_intrusive(name, type, l_body->Clone()); + } + +} // zeek::detail diff --git a/src/script_opt/CPP/Func.h b/src/script_opt/CPP/Func.h new file mode 100644 index 0000000000..187e2772df --- /dev/null +++ b/src/script_opt/CPP/Func.h @@ -0,0 +1,120 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Subclasses of Func and Stmt to support C++-generated code, along +// with tracking of that code to enable hooking into it at run-time. + +#pragma once + +#include "zeek/Func.h" +#include "zeek/script_opt/ProfileFunc.h" + +namespace zeek { + +namespace detail { + +// A subclass of Func used for lambdas that the compiler creates for +// complex initializations (expressions used in type attributes). +// The usage is via derivation from this class, rather than direct +// use of it. + +class CPPFunc : public Func { +public: + bool IsPure() const override { return is_pure; } + + void Describe(ODesc* d) const override; + +protected: + // Constructor used when deriving subclasses. + CPPFunc(const char* _name, bool _is_pure) + { + name = _name; + is_pure = _is_pure; + } + + std::string name; + bool is_pure; +}; + + +// A subclass of Stmt used to replace a function/event handler/hook body. + +class CPPStmt : public Stmt { +public: + CPPStmt(const char* _name) : Stmt(STMT_CPP), name(_name) { } + + const std::string& Name() { return name; } + + // Sets/returns a hash associated with this statement. A value + // of 0 means "not set". + p_hash_type GetHash() const { return hash; } + void SetHash(p_hash_type h) { hash = h; } + + // The following only get defined by lambda bodies. + virtual void SetLambdaCaptures(Frame* f) { } + virtual std::vector SerializeLambdaCaptures() const + { return std::vector{}; } + + virtual IntrusivePtr Clone() + { + return {NewRef{}, this}; + } + +protected: + // This method being called means that the inliner is running + // on compiled code, which shouldn't happen. + StmtPtr Duplicate() override { ASSERT(0); return ThisPtr(); } + + TraversalCode Traverse(TraversalCallback* cb) const override + { return TC_CONTINUE; } + + std::string name; + p_hash_type hash = 0ULL; +}; + +using CPPStmtPtr = IntrusivePtr; + + +// For script-level lambdas, a ScriptFunc subclass that knows how to +// deal with its captures for serialization. Different from CPPFunc in +// that CPPFunc is for lambdas generated directly by the compiler, +// rather than those explicitly present in scripts. + +class CPPLambdaFunc : public ScriptFunc { +public: + CPPLambdaFunc(std::string name, FuncTypePtr ft, CPPStmtPtr l_body); + + bool HasCopySemantics() const override { return true; } + +protected: + // Methods related to sending lambdas via Broker. + broker::expected SerializeClosure() const override; + void SetCaptures(Frame* f) override; + + FuncPtr DoClone() override; + + CPPStmtPtr l_body; +}; + + +// Information associated with a given compiled script body: its +// Stmt subclass, priority, and any events that should be registered +// upon instantiating the body. +struct CompiledScript { + CPPStmtPtr body; + int priority; + std::vector events; +}; + +// Maps hashes to compiled information. +extern std::unordered_map compiled_scripts; + +// Maps hashes to standalone script initialization callbacks. +extern std::unordered_map standalone_callbacks; + +// Standalone callbacks marked for activation by calls to the +// load_CPP() BiF. +extern std::vector standalone_activations; + +} // namespace detail + +} // namespace zeek diff --git a/src/script_opt/CPP/GenFunc.cc b/src/script_opt/CPP/GenFunc.cc new file mode 100644 index 0000000000..c8968720ab --- /dev/null +++ b/src/script_opt/CPP/GenFunc.cc @@ -0,0 +1,247 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include +#include +#include + +#include "zeek/script_opt/CPP/Compile.h" + + +namespace zeek::detail { + +void CPPCompile::CompileFunc(const FuncInfo& func) + { + if ( ! IsCompilable(func) ) + return; + + auto fname = Canonicalize(BodyName(func).c_str()) + "_zf"; + auto pf = func.Profile(); + auto f = func.Func(); + auto body = func.Body(); + + DefineBody(f->GetType(), pf, fname, body, nullptr, f->Flavor()); + } + +void CPPCompile::CompileLambda(const LambdaExpr* l, const ProfileFunc* pf) + { + auto lname = Canonicalize(l->Name().c_str()) + "_lb"; + auto body = l->Ingredients().body; + auto l_id = l->Ingredients().id; + auto& ids = l->OuterIDs(); + + DefineBody(l_id->GetType(), pf, lname, body, &ids, + FUNC_FLAVOR_FUNCTION); + } + +void CPPCompile::GenInvokeBody(const std::string& fname, const TypePtr& t, + const std::string& args) + { + auto call = fname + "(" + args + ")"; + + if ( ! t || t->Tag() == TYPE_VOID ) + { + Emit("%s;", call); + Emit("return nullptr;"); + } + else + Emit("return %s;", NativeToGT(call, t, GEN_VAL_PTR)); + } + +void CPPCompile::DefineBody(const FuncTypePtr& ft, const ProfileFunc* pf, + const std::string& fname, const StmtPtr& body, + const IDPList* lambda_ids, FunctionFlavor flavor) + { + locals.clear(); + params.clear(); + + body_name = fname; + + ret_type = ft->Yield(); + in_hook = flavor == FUNC_FLAVOR_HOOK; + auto ret_type_str = in_hook ? "bool" : FullTypeName(ret_type); + + for ( const auto& p : pf->Params() ) + params.emplace(p); + + NL(); + + Emit("%s %s(%s)", ret_type_str, fname, ParamDecl(ft, lambda_ids, pf)); + + StartBlock(); + + // Deal with "any" parameters, if any. + TranslateAnyParams(ft, pf); + + // Make sure that any events referred to in this function have + // been initialized. + InitializeEvents(pf); + + // Create the local variables. + DeclareLocals(pf, lambda_ids); + + GenStmt(body); + + if ( in_hook ) + { + Emit("return true;"); + in_hook = false; + } + + // Seatbelts for running off the end of a function that's supposed + // to return a non-native type. + if ( ! IsNativeType(ret_type) ) + Emit("return nullptr;"); + + EndBlock(); + } + +void CPPCompile::TranslateAnyParams(const FuncTypePtr& ft, const ProfileFunc* pf) + { + const auto& formals = ft->Params(); + int n = formals->NumFields(); + + for ( auto i = 0; i < n; ++i ) + { + const auto& t = formals->GetFieldType(i); + if ( t->Tag() != TYPE_ANY ) + // Not a relevant parameter. + continue; + + auto param_id = FindParam(i, pf); + if ( ! param_id ) + // Parameter isn't used, skip it. + continue; + + const auto& pt = param_id->GetType(); + if ( pt->Tag() == TYPE_ANY ) + // It's already "any", nothing more to do. + continue; + + auto any_i = std::string("any_param__CPP_") + Fmt(i); + + Emit("%s %s = %s;", FullTypeName(pt), LocalName(param_id), + GenericValPtrToGT(any_i, pt, GEN_NATIVE)); + } + } + +void CPPCompile::InitializeEvents(const ProfileFunc* pf) + { + // Make sure that any events referred to in this function have + // been initialized. We have to do this dynamically because it + // depends on whether the final script using the compiled code + // happens to load the associated event handler + for ( const auto& e : pf->Events() ) + { + auto ev_name = globals[e] + "_ev"; + + // Create a scope so we don't have to individualize the + // variables. + Emit("{"); + Emit("static bool did_init = false;"); + Emit("if ( ! did_init )"); + StartBlock(); + + // We do both a Lookup and a Register because only the latter + // returns an EventHandlerPtr, sigh. + Emit("if ( event_registry->Lookup(\"%s\") )", e); + StartBlock(); + Emit("%s = event_registry->Register(\"%s\");", ev_name.c_str(), e); + EndBlock(); + Emit("did_init = true;"); + EndBlock(); + Emit("}"); + } + } + +void CPPCompile::DeclareLocals(const ProfileFunc* pf, const IDPList* lambda_ids) + { + // It's handy to have a set of the lambda captures rather than a list. + std::unordered_set lambda_set; + if ( lambda_ids ) + for ( auto li : *lambda_ids ) + lambda_set.insert(li); + + const auto& ls = pf->Locals(); + + // Track whether we generated a declaration. This is just for + // tidiness in the output. + bool did_decl = false; + + for ( const auto& l : ls ) + { + auto ln = LocalName(l); + + if ( lambda_set.count(l) > 0 ) + // No need to declare these, they're passed in as + // parameters. + ln = lambda_names[l]; + + else if ( params.count(l) == 0 ) + { // Not a parameter, so must be a local. + Emit("%s %s;", FullTypeName(l->GetType()), ln); + did_decl = true; + } + + locals.emplace(l, ln); + } + + if ( did_decl ) + NL(); + } + +std::string CPPCompile::BodyName(const FuncInfo& func) + { + const auto& f = func.Func(); + const auto& bodies = f->GetBodies(); + std::string fname = f->Name(); + + if ( bodies.size() == 1 ) + return fname; + + // Make the name distinct-per-body. + const auto& body = func.Body(); + + int i; + for ( i = 0; i < bodies.size(); ++i ) + if ( bodies[i].stmts == body ) + break; + + if ( i >= bodies.size() ) + reporter->InternalError("can't find body in CPPCompile::BodyName"); + + return fname + "__" + Fmt(i); + } + +std::string CPPCompile::GenArgs(const RecordTypePtr& params, const Expr* e) + { + const auto& exprs = e->AsListExpr()->Exprs(); + std::string gen; + + int n = exprs.size(); + + for ( auto i = 0; i < n; ++i ) + { + auto e_i = exprs[i]; + auto gt = GEN_NATIVE; + + const auto& param_t = params->GetFieldType(i); + bool param_any = param_t->Tag() == TYPE_ANY; + bool arg_any = e_i->GetType()->Tag() == TYPE_ANY; + + if ( param_any && ! arg_any ) + gt = GEN_VAL_PTR; + + auto expr_gen = GenExpr(e_i, gt); + + if ( ! param_any && arg_any ) + expr_gen = GenericValPtrToGT(expr_gen, param_t, GEN_NATIVE); + + gen = gen + expr_gen; + if ( i < n - 1 ) + gen += ", "; + } + + return gen; + } + +} // zeek::detail diff --git a/src/script_opt/CPP/HashMgr.cc b/src/script_opt/CPP/HashMgr.cc new file mode 100644 index 0000000000..f594f57f50 --- /dev/null +++ b/src/script_opt/CPP/HashMgr.cc @@ -0,0 +1,166 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include "zeek/script_opt/CPP/HashMgr.h" +#include "zeek/script_opt/CPP/Func.h" +#include "zeek/script_opt/CPP/Util.h" + +namespace zeek::detail { + +VarMapper compiled_items; + +CPPHashManager::CPPHashManager(const char* hash_name_base, bool _append) + { + append = _append; + + hash_name = std::string(hash_name_base) + ".dat"; + + if ( append ) + { + hf_r = fopen(hash_name.c_str(), "r"); + if ( ! hf_r ) + { + reporter->Error("can't open auxiliary C++ hash file %s for reading", + hash_name.c_str()); + exit(1); + } + + lock_file(hash_name, hf_r); + LoadHashes(hf_r); + } + + auto mode = append ? "a" : "w"; + + hf_w = fopen(hash_name.c_str(), mode); + if ( ! hf_w ) + { + reporter->Error("can't open auxiliary C++ hash file %s for writing", + hash_name.c_str()); + exit(1); + } + } + +CPPHashManager::~CPPHashManager() + { + fclose(hf_w); + + if ( hf_r ) + { + unlock_file(hash_name, hf_r); + fclose(hf_r); + } + } + +void CPPHashManager::LoadHashes(FILE* f) + { + std::string key; + + // The hash file format is inefficient but simple to scan. + // It doesn't appear to pose a bottleneck, so until it does + // it makes sense for maintainability to keep it dead simple. + + while ( GetLine(f, key) ) + { + std::string line; + + RequireLine(f, line); + + p_hash_type hash; + + if ( key == "func" ) + { + auto func = line; + + RequireLine(f, line); + + if ( sscanf(line.c_str(), "%llu", &hash) != 1 || hash == 0 ) + BadLine(line); + + previously_compiled[hash] = func; + } + + else if ( key == "global" ) + { + auto gl = line; + + RequireLine(f, line); + + p_hash_type gl_t_h, gl_v_h; + if ( sscanf(line.c_str(), "%llu %llu", + &gl_t_h, &gl_v_h) != 2 ) + BadLine(line); + + gl_type_hashes[gl] = gl_t_h; + gl_val_hashes[gl] = gl_v_h; + + // Eat the location info. It's there just for + // maintainers to be able to track down peculiarities + // in the hash file. + (void) RequireLine(f, line); + } + + else if ( key == "global-var" ) + { + auto gl = line; + + RequireLine(f, line); + + int scope; + if ( sscanf(line.c_str(), "%d", &scope) != 1 ) + BadLine(line); + + gv_scopes[gl] = scope; + } + + else if ( key == "hash" ) + { + int index; + int scope; + + if ( sscanf(line.c_str(), "%llu %d %d", &hash, &index, + &scope) != 3 || hash == 0 ) + BadLine(line); + + compiled_items[hash] = CompiledItemPair{index, scope}; + } + + else if ( key == "record" ) + record_type_globals.insert(line); + else if ( key == "enum" ) + enum_type_globals.insert(line); + + else + BadLine(line); + } + } + +void CPPHashManager::RequireLine(FILE* f, std::string& line) + { + if ( ! GetLine(f, line) ) + { + reporter->Error("missing final %s hash file entry", hash_name.c_str()); + exit(1); + } + } + +bool CPPHashManager::GetLine(FILE* f, std::string& line) + { + char buf[8192]; + if ( ! fgets(buf, sizeof buf, f) ) + return false; + + int n = strlen(buf); + if ( n > 0 && buf[n-1] == '\n' ) + buf[n-1] = '\0'; + + line = buf; + return true; + } + +void CPPHashManager::BadLine(std::string& line) + { + reporter->Error("bad %s hash file entry: %s", + hash_name.c_str(), line.c_str()); + exit(1); + } + +} // zeek::detail diff --git a/src/script_opt/CPP/HashMgr.h b/src/script_opt/CPP/HashMgr.h new file mode 100644 index 0000000000..a6705eadf4 --- /dev/null +++ b/src/script_opt/CPP/HashMgr.h @@ -0,0 +1,122 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// C++ compiler support class for managing information about compiled +// objects across compilations. The objects are identified via hashes, +// hence the term "hash manager". Objects can exist in different scopes. +// The information mapping hashes to objects and scopes is tracked +// across multiple compilations using intermediary file(s). + +#pragma once + +#include +#include "zeek/script_opt/ProfileFunc.h" + +namespace zeek::detail { + +class CPPHashManager { +public: + // Create a hash manager that uses the given name for + // referring to hash file(s). It's a "base" rather than + // a full name in case the manager winds up managing multiple + // distinct files (not currently the case). + // + // If "append" is true then new hashes will be added to the + // end of the file (and the hash file will be locked, to prevent + // overlapping updates from concurrent compilation/appends). + // Otherwise, the file will be generated afresh. + CPPHashManager(const char* hash_name_base, bool append); + ~CPPHashManager(); + + bool IsAppend() const { return append; } + + // True if the given hash has already been generated. + bool HasHash(p_hash_type h) const + { return previously_compiled.count(h) > 0; } + + // The internal (C++) name of a previously compiled function, + // as identified by its hash. + const std::string& FuncBodyName(p_hash_type h) + { return previously_compiled[h]; } + + // Whether the given global has already been generated; + // and, if so, the hashes of its type and initialization + // value (used for consistency checking). Here the name + // is that used at the script level. + bool HasGlobal(const std::string& gl) const + { return gl_type_hashes.count(gl) > 0; } + p_hash_type GlobalTypeHash(const std::string& gl) + { return gl_type_hashes[gl]; } + p_hash_type GlobalValHash(const std::string& gl) + { return gl_val_hashes[gl]; } + + // Whether the given C++ global already exists, and, if so, + // in what scope. + bool HasGlobalVar(const std::string& gv) const + { return gv_scopes.count(gv) > 0; } + int GlobalVarScope(const std::string& gv) + { return gv_scopes[gv]; } + + // True if the given global corresponds to a record type + // or an enum type. Used to suppress complaints about + // definitional inconsistencies for extensible types. + bool HasRecordTypeGlobal(const std::string& rt) const + { return record_type_globals.count(rt) > 0; } + bool HasEnumTypeGlobal(const std::string& et) const + { return enum_type_globals.count(et) > 0; } + + // Access to the file we're writing hashes to, so that the + // compiler can add new entries to it. + FILE* HashFile() const { return hf_w; } + +protected: + // Parses an existing file with hash information. + void LoadHashes(FILE* f); + + // Helper routines to load lines from hash file. + // The first complains if the line isn't present; + // the second merely indicates whether it was. + void RequireLine(FILE* f, std::string& line); + bool GetLine(FILE* f, std::string& line); + + // Generates an error message for a ill-formatted hash file line. + void BadLine(std::string& line); + + // Tracks previously compiled bodies based on hashes, mapping them + // to fully qualified (in terms of scoping) C++ names. + std::unordered_map previously_compiled; + + // Tracks globals that are record or enum types, indexed using + // script-level names. + std::unordered_set record_type_globals; + std::unordered_set enum_type_globals; + + // Tracks globals seen in previously compiled bodies, mapping + // script-level names to hashes of their types and their values. + std::unordered_map gl_type_hashes; + std::unordered_map gl_val_hashes; + + // Information about globals in terms of their internal variable + // names, rather than their script-level names. + std::unordered_map gv_scopes; + + // Whether we're appending to existing hash file(s), or starting + // afresh. + bool append; + + // Base for file names. + std::string hash_name; + + // Handles for reading from and writing to the hash file. + // We lock on the first + FILE* hf_r = nullptr; + FILE* hf_w = nullptr; +}; + +// Maps hashes to indices into C++ globals (like "types_N__CPP"), and +// namespace scopes. +struct CompiledItemPair { int index; int scope; }; +using VarMapper = std::unordered_map; + +extern VarMapper compiled_items; + +} // zeek::detail diff --git a/src/script_opt/CPP/ISSUES b/src/script_opt/CPP/ISSUES new file mode 100644 index 0000000000..13e337d392 --- /dev/null +++ b/src/script_opt/CPP/ISSUES @@ -0,0 +1,33 @@ +conditional code: + - top-level conditionals okay due to hash protection + - but lower-level, directly called, won't translate + + possible approaches: + - warn when compiling such functions + - an option to always do Invoke's rather than direct calls + - rewrite scripts to use run-time conditionals + (in base scripts, it's almost all regarding clusters) + +lambdas: not supported if they refer to events that are otherwise not registered + (not all that hard to fix) + +standalone code won't execute global statements + +standalone code needs to deal with field_mapping initializations the +same as enum_mapping + +type switches: + - easy to support by some sort of hash on the type + +when's: + - need to understand "return when" semantics + +slow compilation: + - analyze whether there's a bunch of unneeded stuff (e.g. orphan types) + +efficiency: + - leverage ZVal's directly + - directly calling BiFs + - best done by supplanting bifcl + - event handlers directly called, using vector arguments + - import custom BiFs (e.g. network_time()) from ZAM diff --git a/src/script_opt/CPP/Inits.cc b/src/script_opt/CPP/Inits.cc new file mode 100644 index 0000000000..89b0dd417f --- /dev/null +++ b/src/script_opt/CPP/Inits.cc @@ -0,0 +1,554 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include +#include +#include + +#include "zeek/script_opt/ProfileFunc.h" +#include "zeek/script_opt/CPP/Compile.h" + + +namespace zeek::detail { + +void CPPCompile::GenInitExpr(const ExprPtr& e) + { + NL(); + + const auto& t = e->GetType(); + auto ename = InitExprName(e); + + // First, create a CPPFunc that we can compile to compute 'e'. + auto name = std::string("wrapper_") + ename; + + // Forward declaration of the function that computes 'e'. + Emit("static %s %s(Frame* f__CPP);", FullTypeName(t), name); + + // Create the Func subclass that can be used in a CallExpr to + // evaluate 'e'. + Emit("class %s_cl : public CPPFunc", name); + StartBlock(); + + Emit("public:"); + Emit("%s_cl() : CPPFunc(\"%s\", %s)", name, name, e->IsPure() ? "true" : "false"); + + StartBlock(); + Emit("type = make_intrusive(make_intrusive(new type_decl_list()), %s, FUNC_FLAVOR_FUNCTION);", GenTypeName(t)); + + NoteInitDependency(e, TypeRep(t)); + EndBlock(); + + Emit("ValPtr Invoke(zeek::Args* args, Frame* parent) const override final"); + StartBlock(); + + if ( IsNativeType(t) ) + GenInvokeBody(name, t, "parent"); + else + Emit("return %s(parent);", name); + + EndBlock(); + EndBlock(true); + + // Now the implementation of computing 'e'. + Emit("static %s %s(Frame* f__CPP)", FullTypeName(t), name); + StartBlock(); + + Emit("return %s;", GenExpr(e, GEN_NATIVE)); + EndBlock(); + + Emit("CallExprPtr %s;", ename); + + NoteInitDependency(e, TypeRep(t)); + AddInit(e, ename, std::string("make_intrusive(make_intrusive(make_intrusive(make_intrusive<") + + name + "_cl>())), make_intrusive(), false)"); + } + +bool CPPCompile::IsSimpleInitExpr(const ExprPtr& e) const + { + switch ( e->Tag() ) { + case EXPR_CONST: + case EXPR_NAME: + return true; + + case EXPR_RECORD_COERCE: + { // look for coercion of empty record + auto op = e->GetOp1(); + + if ( op->Tag() != EXPR_RECORD_CONSTRUCTOR ) + return false; + + auto rc = static_cast(op.get()); + const auto& exprs = rc->Op()->AsListExpr()->Exprs(); + + return exprs.length() == 0; + } + + default: + return false; + } + } + +std::string CPPCompile::InitExprName(const ExprPtr& e) + { + return init_exprs.KeyName(e); + } + +void CPPCompile::GenGlobalInit(const ID* g, std::string& gl, const ValPtr& v) + { + const auto& t = v->GetType(); + auto tag = t->Tag(); + + if ( tag == TYPE_FUNC ) + // This should get initialized by recognizing hash of + // the function's body. + return; + + std::string init_val; + if ( tag == TYPE_OPAQUE ) + { + // We can only generate these by reproducing the expression + // (presumably a function call) used to create the value. + // That isn't fully sound, since if the global's value + // was redef'd in terms of its original value (e.g., + // "redef x = f(x)"), then we'll wind up with a broken + // expression. It's difficult to detect that in full + // generality, so um Don't Do That. (Note that this + // only affects execution of standalone compiled code, + // where the original scripts are replaced by load-stubs. + // If the scripts are available, then the HasVal() test + // we generate will mean we don't wind up using this + // expression anyway.) + + // Use the final initialization expression. + auto& init_exprs = g->GetInitExprs(); + init_val = GenExpr(init_exprs.back(), GEN_VAL_PTR, false); + } + else + init_val = BuildConstant(g, v); + + auto& attrs = g->GetAttrs(); + + AddInit(g, std::string("if ( ! ") + gl + "->HasVal() )"); + + if ( attrs ) + { + RegisterAttributes(attrs); + + AddInit(g, "\t{"); + AddInit(g, "\t" + gl + "->SetVal(" + init_val + ");"); + AddInit(g, "\t" + gl + "->SetAttrs(" + AttrsName(attrs) + ");"); + AddInit(g, "\t}"); + } + else + AddInit(g, "\t" + gl + "->SetVal(" + init_val + ");"); + } + +void CPPCompile::GenFuncVarInits() + { + for ( const auto& fv_init : func_vars ) + { + auto& fv = fv_init.first; + auto& const_name = fv_init.second; + + auto f = fv->AsFunc(); + const auto& fn = f->Name(); + const auto& ft = f->GetType(); + + NoteInitDependency(fv, TypeRep(ft)); + + const auto& bodies = f->GetBodies(); + + std::string hashes = "{"; + + for ( auto b : bodies ) + { + auto body = b.stmts.get(); + + ASSERT(body_names.count(body) > 0); + + auto& body_name = body_names[body]; + ASSERT(body_hashes.count(body_name) > 0); + + NoteInitDependency(fv, body); + + if ( hashes.size() > 1 ) + hashes += ", "; + + hashes += Fmt(body_hashes[body_name]); + } + + hashes += "}"; + + auto init = std::string("lookup_func__CPP(\"") + fn + + "\", " + hashes + ", " + GenTypeName(ft) + ")"; + + AddInit(fv, const_name, init); + } + } + +void CPPCompile::GenPreInit(const Type* t) + { + std::string pre_init; + + switch ( t->Tag() ) { + case TYPE_ADDR: + case TYPE_ANY: + case TYPE_BOOL: + case TYPE_COUNT: + case TYPE_DOUBLE: + case TYPE_ERROR: + case TYPE_INT: + case TYPE_INTERVAL: + case TYPE_PATTERN: + case TYPE_PORT: + case TYPE_STRING: + case TYPE_TIME: + case TYPE_TIMER: + case TYPE_VOID: + pre_init = std::string("base_type(") + TypeTagName(t->Tag()) + ")"; + break; + + case TYPE_ENUM: + pre_init = std::string("get_enum_type__CPP(\"") + + t->GetName() + "\")"; + break; + + case TYPE_SUBNET: + pre_init = std::string("make_intrusive()"); + break; + + case TYPE_FILE: + pre_init = std::string("make_intrusive(") + + GenTypeName(t->AsFileType()->Yield()) + ")"; + break; + + case TYPE_OPAQUE: + pre_init = std::string("make_intrusive(\"") + + t->AsOpaqueType()->Name() + "\")"; + break; + + case TYPE_RECORD: + { + std::string name; + + if ( t->GetName() != "" ) + name = std::string("\"") + t->GetName() + std::string("\""); + else + name = "nullptr"; + + pre_init = std::string("get_record_type__CPP(") + name + ")"; + } + break; + + case TYPE_LIST: + pre_init = std::string("make_intrusive()"); + break; + + case TYPE_TYPE: + case TYPE_VECTOR: + case TYPE_TABLE: + case TYPE_FUNC: + // Nothing to do for these, pre-initialization-wise. + return; + + default: + reporter->InternalError("bad type in CPPCompile::GenType"); + } + + pre_inits.emplace_back(GenTypeName(t) + " = " + pre_init + ";"); + } + +void CPPCompile::GenPreInits() + { + NL(); + Emit("void pre_init__CPP()"); + + StartBlock(); + for ( const auto& i : pre_inits ) + Emit(i); + EndBlock(); + } + +void CPPCompile::AddInit(const Obj* o, const std::string& init) + { + obj_inits[o].emplace_back(init); + } + +void CPPCompile::AddInit(const Obj* o) + { + if ( obj_inits.count(o) == 0 ) + { + std::vector empty; + obj_inits[o] = empty; + } + } + +void CPPCompile::NoteInitDependency(const Obj* o1, const Obj* o2) + { + obj_deps[o1].emplace(o2); + } + +void CPPCompile::CheckInitConsistency(std::unordered_set& to_do) + { + for ( const auto& od : obj_deps ) + { + const auto& o = od.first; + + if ( to_do.count(o) == 0 ) + { + fprintf(stderr, "object not in to_do: %s\n", + obj_desc(o).c_str()); + exit(1); + } + + for ( const auto& d : od.second ) + { + if ( to_do.count(d) == 0 ) + { + fprintf(stderr, "dep object for %s not in to_do: %s\n", + obj_desc(o).c_str(), obj_desc(d).c_str()); + exit(1); + } + } + } + } + +int CPPCompile::GenDependentInits(std::unordered_set& to_do) + { + int n = 0; + + // The basic approach is fairly brute force: find elements of + // to_do that don't have any pending dependencies; generate those; + // and remove them from the to_do list, freeing up other to_do entries + // to now not having any pending dependencies. Iterate until there + // are no more to-do items. + while ( to_do.size() > 0 ) + { + std::unordered_set cohort; + + for ( const auto& o : to_do ) + { + const auto& od = obj_deps.find(o); + + bool has_pending_dep = false; + + if ( od != obj_deps.end() ) + { + for ( const auto& d : od->second ) + if ( to_do.count(d) > 0 ) + { + has_pending_dep = true; + break; + } + } + + if ( has_pending_dep ) + continue; + + cohort.insert(o); + } + + ASSERT(cohort.size() > 0); + + GenInitCohort(++n, cohort); + + for ( const auto& o : cohort ) + { + ASSERT(to_do.count(o) > 0); + to_do.erase(o); + } + } + + return n; + } + +void CPPCompile::GenInitCohort(int nc, std::unordered_set& cohort) + { + NL(); + Emit("void init_%s__CPP()", Fmt(nc)); + StartBlock(); + + // If any script/BiF functions are used for initializing globals, + // the code generated from that will expect the presence of a + // frame pointer, even if nil. + Emit("Frame* f__CPP = nullptr;"); + + // The following is just for making the output readable/pretty: + // add space between initializations for distinct objects, taking + // into account that some objects have empty initializations. + bool did_an_init = false; + + for ( auto o : cohort ) + { + if ( did_an_init ) + { + NL(); + did_an_init = false; + } + + for ( const auto& i : obj_inits.find(o)->second ) + { + Emit("%s", i); + did_an_init = true; + } + } + + EndBlock(); + } + +void CPPCompile::InitializeFieldMappings() + { + Emit("int fm_offset;"); + + for ( const auto& mapping : field_decls ) + { + auto rt = mapping.first; + auto td = mapping.second; + auto fn = td->id; + auto rt_name = GenTypeName(rt) + "->AsRecordType()"; + + Emit("fm_offset = %s->FieldOffset(\"%s\");", rt_name, fn); + Emit("if ( fm_offset < 0 )"); + + StartBlock(); + Emit("// field does not exist, create it"); + Emit("fm_offset = %s->NumFields();", rt_name); + Emit("type_decl_list tl;"); + Emit(GenTypeDecl(td)); + Emit("%s->AddFieldsDirectly(tl);", rt_name); + EndBlock(); + + Emit("field_mapping.push_back(fm_offset);"); + } + } + +void CPPCompile::InitializeEnumMappings() + { + int n = 0; + + for ( const auto& mapping : enum_names ) + InitializeEnumMappings(mapping.first, mapping.second, n++); + } + +void CPPCompile::InitializeEnumMappings(const EnumType* et, + const std::string& e_name, int index) + { + AddInit(et, "{"); + + auto et_name = GenTypeName(et) + "->AsEnumType()"; + AddInit(et, "int em_offset = " + et_name + + "->Lookup(\"" + e_name + "\");"); + AddInit(et, "if ( em_offset < 0 )"); + + AddInit(et, "\t{"); + AddInit(et, "\tem_offset = " + et_name + "->Names().size();"); + // The following is to catch the case where the offset is already + // in use due to it being specified explicitly for an existing enum. + AddInit(et, "\tif ( " + et_name + "->Lookup(em_offset) )"); + AddInit(et, "\t\treporter->InternalError(\"enum inconsistency while initializing compiled scripts\");"); + AddInit(et, "\t" + et_name + + "->AddNameInternal(\"" + e_name + "\", em_offset);"); + AddInit(et, "\t}"); + + AddInit(et, "enum_mapping[" + Fmt(index) + "] = em_offset;"); + + AddInit(et, "}"); + } + +void CPPCompile::GenInitHook() + { + NL(); + + if ( standalone ) + GenStandaloneActivation(); + + Emit("int hook_in_init()"); + + StartBlock(); + + Emit("CPP_init_funcs.push_back(init__CPP);"); + + if ( standalone ) + GenLoad(); + + Emit("return 0;"); + EndBlock(); + + // Trigger the activation of the hook at run-time. + NL(); + Emit("static int dummy = hook_in_init();\n"); + } + +void CPPCompile::GenStandaloneActivation() + { + Emit("void standalone_init__CPP()"); + StartBlock(); + + // For events and hooks, we need to add each compiled body *unless* + // it's already there (which could be the case if the standalone + // code wasn't run standalone but instead with the original scripts). + // For events, we also register them in order to activate the + // associated scripts. + + // First, build up a list of per-hook/event handler bodies. + std::unordered_map> func_bodies; + + for ( const auto& func : funcs ) + { + auto f = func.Func(); + + if ( f->Flavor() == FUNC_FLAVOR_FUNCTION ) + // No need to explicitly add bodies. + continue; + + auto fname = BodyName(func); + auto bname = Canonicalize(fname.c_str()) + "_zf"; + + if ( compiled_funcs.count(bname) == 0 ) + // We didn't wind up compiling it. + continue; + + ASSERT(body_hashes.count(bname) > 0); + func_bodies[f].push_back(body_hashes[bname]); + } + + for ( auto& fb : func_bodies ) + { + auto f = fb.first; + const auto fn = f->Name(); + const auto& ft = f->GetType(); + + std::string hashes; + for ( auto h : fb.second ) + { + if ( hashes.size() > 0 ) + hashes += ", "; + + hashes += Fmt(h); + } + + hashes = "{" + hashes + "}"; + + Emit("activate_bodies__CPP(\"%s\", %s, %s);", + fn, GenTypeName(ft), hashes); + } + + EndBlock(); + NL(); + } + +void CPPCompile::GenLoad() + { + // First, generate a hash unique to this compilation. + auto t = util::current_time(); + auto th = std::hash{}(t); + + total_hash = merge_p_hashes(total_hash, th); + + Emit("register_scripts__CPP(%s, standalone_init__CPP);", Fmt(total_hash)); + + // Spit out the placeholder script. + printf("global init_CPP_%llu = load_CPP(%llu);\n", + total_hash, total_hash); + } + +} // zeek::detail diff --git a/src/script_opt/CPP/Runtime.h b/src/script_opt/CPP/Runtime.h new file mode 100644 index 0000000000..59a4ed0dbe --- /dev/null +++ b/src/script_opt/CPP/Runtime.h @@ -0,0 +1,37 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#pragma once + +#include "zeek/module_util.h" +#include "zeek/ZeekString.h" +#include "zeek/Func.h" +#include "zeek/File.h" +#include "zeek/Frame.h" +#include "zeek/Scope.h" +#include "zeek/RE.h" +#include "zeek/IPAddr.h" +#include "zeek/Val.h" +#include "zeek/OpaqueVal.h" +#include "zeek/Expr.h" +#include "zeek/Event.h" +#include "zeek/EventRegistry.h" +#include "zeek/RunState.h" +#include "zeek/script_opt/ScriptOpt.h" +#include "zeek/script_opt/CPP/Func.h" +#include "zeek/script_opt/CPP/RuntimeInit.h" +#include "zeek/script_opt/CPP/RuntimeOps.h" +#include "zeek/script_opt/CPP/RuntimeVec.h" + +namespace zeek { + +using BoolValPtr = IntrusivePtr; +using CountValPtr = IntrusivePtr; +using DoubleValPtr = IntrusivePtr; +using StringValPtr = IntrusivePtr; +using IntervalValPtr = IntrusivePtr; +using PatternValPtr = IntrusivePtr; +using FuncValPtr = IntrusivePtr; +using FileValPtr = IntrusivePtr; +using SubNetValPtr = IntrusivePtr; + +} diff --git a/src/script_opt/CPP/RuntimeInit.cc b/src/script_opt/CPP/RuntimeInit.cc new file mode 100644 index 0000000000..bb48152ee3 --- /dev/null +++ b/src/script_opt/CPP/RuntimeInit.cc @@ -0,0 +1,209 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include "zeek/module_util.h" +#include "zeek/EventRegistry.h" +#include "zeek/script_opt/CPP/RuntimeInit.h" + +namespace zeek::detail { + +std::vector CPP_init_funcs; + +// Calls all of the initialization hooks, in the order they were added. +void init_CPPs() + { + for ( auto f : CPP_init_funcs ) + f(); + } + +// This is a trick used to register the presence of compiled code. +// The initialization of the static variable will make CPP_init_hook +// non-null, which the main part of Zeek uses to tell that there's +// CPP code available. +static int flag_init_CPP() + { + CPP_init_hook = init_CPPs; + return 0; + } + +static int dummy = flag_init_CPP(); + + +void register_body__CPP(CPPStmtPtr body, int priority, p_hash_type hash, + std::vector events) + { + compiled_scripts[hash] = { std::move(body), priority, std::move(events) }; + } + +void register_lambda__CPP(CPPStmtPtr body, p_hash_type hash, const char* name, + TypePtr t, bool has_captures) + { + auto ft = cast_intrusive(t); + + // Create the quasi-global. + auto id = install_ID(name, GLOBAL_MODULE_NAME, true, false); + auto func = make_intrusive(name, ft, body); + func->SetName(name); + + auto v = make_intrusive(std::move(func)); + id->SetVal(std::move(v)); + id->SetType(ft); + + // Lambdas used in initializing global functions need to + // be registered, so that the initialization can find them. + // We do not, however, want to register *all* lambdas, because + // the ones that use captures cannot be used as regular + // function bodies. + if ( ! has_captures ) + // Note, no support for lambdas that themselves refer + // to events. + register_body__CPP(body, 0, hash, {}); + } + +void register_scripts__CPP(p_hash_type h, void (*callback)()) + { + ASSERT(standalone_callbacks.count(h) == 0); + standalone_callbacks[h] = callback; + } + +void activate_bodies__CPP(const char* fn, TypePtr t, + std::vector hashes) + { + auto ft = cast_intrusive(t); + auto fg = lookup_ID(fn, GLOBAL_MODULE_NAME, false, false, false); + + if ( ! fg ) + { + fg = install_ID(fn, GLOBAL_MODULE_NAME, true, false); + fg->SetType(ft); + } + + auto f = fg->GetVal()->AsFunc(); + const auto& bodies = f->GetBodies(); + + // Track hashes of compiled bodies already associated with f. + std::unordered_set existing_CPP_bodies; + for ( auto& b : bodies ) + { + auto s = b.stmts; + if ( s->Tag() != STMT_CPP ) + continue; + + const auto& cpp_s = cast_intrusive(s); + existing_CPP_bodies.insert(cpp_s->GetHash()); + } + + // Events we need to register. + std::unordered_set events; + + if ( ft->Flavor() == FUNC_FLAVOR_EVENT ) + events.insert(fn); + + std::vector no_inits; // empty initialization vector + int num_params = ft->Params()->NumFields(); + + for ( auto h : hashes ) + { + if ( existing_CPP_bodies.count(h) > 0 ) + // We're presumably running with the original script, + // and have already incorporated this compiled body + // into f. + continue; + + // Add in the new body. + ASSERT(compiled_scripts.count(h) > 0); + auto cs = compiled_scripts[h]; + + f->AddBody(cs.body, no_inits, num_params, cs.priority); + + events.insert(cs.events.begin(), cs.events.end()); + } + + for ( const auto& e : events ) + { + auto eh = event_registry->Register(e); + eh->SetUsed(); + } + } + +IDPtr lookup_global__CPP(const char* g, const TypePtr& t) + { + auto gl = lookup_ID(g, GLOBAL_MODULE_NAME, false, false, false); + + if ( ! gl ) + { + gl = install_ID(g, GLOBAL_MODULE_NAME, true, false); + gl->SetType(t); + } + + return gl; + } + +Func* lookup_bif__CPP(const char* bif) + { + auto b = lookup_ID(bif, GLOBAL_MODULE_NAME, false, false, false); + return b ? b->GetVal()->AsFunc() : nullptr; + } + +FuncValPtr lookup_func__CPP(std::string name, std::vector hashes, + const TypePtr& t) + { + auto ft = cast_intrusive(t); + + std::vector bodies; + std::vector priorities; + + for ( auto h : hashes ) + { + ASSERT(compiled_scripts.count(h) > 0); + + const auto& f = compiled_scripts[h]; + bodies.push_back(f.body); + priorities.push_back(f.priority); + + // This might register the same event more than once, + // if it's used in multiple bodies, but that's okay as + // the semantics for Register explicitly allow it. + for ( auto& e : f.events ) + { + auto eh = event_registry->Register(e); + eh->SetUsed(); + } + } + + auto sf = make_intrusive(std::move(name), std::move(ft), + std::move(bodies), + std::move(priorities)); + + return make_intrusive(std::move(sf)); + } + + +RecordTypePtr get_record_type__CPP(const char* record_type_name) + { + IDPtr existing_type; + + if ( record_type_name && + (existing_type = global_scope()->Find(record_type_name)) && + existing_type->GetType()->Tag() == TYPE_RECORD ) + return cast_intrusive(existing_type->GetType()); + + return make_intrusive(new type_decl_list()); + } + +EnumTypePtr get_enum_type__CPP(const std::string& enum_type_name) + { + auto existing_type = global_scope()->Find(enum_type_name); + + if ( existing_type && existing_type->GetType()->Tag() == TYPE_ENUM ) + return cast_intrusive(existing_type->GetType()); + else + return make_intrusive(enum_type_name); + } + +EnumValPtr make_enum__CPP(TypePtr t, int i) + { + auto et = cast_intrusive(std::move(t)); + return make_intrusive(et, i); + } + +} // namespace zeek::detail diff --git a/src/script_opt/CPP/RuntimeInit.h b/src/script_opt/CPP/RuntimeInit.h new file mode 100644 index 0000000000..11b584e7f1 --- /dev/null +++ b/src/script_opt/CPP/RuntimeInit.h @@ -0,0 +1,76 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Run-time support for initializing C++-compiled scripts. + +#pragma once + +#include "zeek/Val.h" +#include "zeek/script_opt/CPP/Func.h" + +namespace zeek { + +using FuncValPtr = IntrusivePtr; + +namespace detail { + +// An initialization hook for a collection of compiled-to-C++ functions +// (the result of a single invocation of the compiler on a set of scripts). +typedef void (*CPP_init_func)(); + +// Tracks the initialization hooks for different compilation runs. +extern std::vector CPP_init_funcs; + +// Registers the given compiled function body as associated with the +// given priority and hash. "events" is a list of event handlers +// relevant for the function body, which should be registered if the +// function body is going to be used. +extern void register_body__CPP(CPPStmtPtr body, int priority, p_hash_type hash, + std::vector events); + +// Registers a lambda body as associated with the given hash. Includes +// the name of the lambda (so it can be made available as a quasi-global +// identifier), its type, and whether it needs captures. +extern void register_lambda__CPP(CPPStmtPtr body, p_hash_type hash, + const char* name, TypePtr t, + bool has_captures); + +// Registers a callback for activating a set of scripts associated with +// the given hash. +extern void register_scripts__CPP(p_hash_type h, void (*callback)()); + +// Activates the event handler/hook with the given name (which is created +// if it doesn't exist) and type, using (at least) the bodies associated +// with the given hashes. +extern void activate_bodies__CPP(const char* fn, TypePtr t, + std::vector hashes); + +// Looks for a global with the given name. If not present, creates it +// with the given type. +extern IDPtr lookup_global__CPP(const char* g, const TypePtr& t); + +// Looks for a BiF with the given name. Returns nil if not present. +extern Func* lookup_bif__CPP(const char* bif); + +// For the function body associated with the given hash, creates and +// returns an associated FuncVal. It's a fatal error for the hash +// not to exist, because this function should only be called by compiled +// code that has ensured its existence. +extern FuncValPtr lookup_func__CPP(std::string name, std::vector h, + const TypePtr& t); + +// Returns the record corresponding to the given name, as long as the +// name is indeed a record type. Otherwise (or if the name is nil) +// creates a new empty record. +extern RecordTypePtr get_record_type__CPP(const char* record_type_name); + +// Returns the "enum" type corresponding to the given name, as long as +// the name is indeed an enum type. Otherwise, creates a new enum +// type with the given name. +extern EnumTypePtr get_enum_type__CPP(const std::string& enum_type_name); + +// Returns an enum value corresponding to the given low-level value 'i' +// in the context of the given enum type 't'. +extern EnumValPtr make_enum__CPP(TypePtr t, int i); + +} // namespace zeek::detail +} // namespace zeek diff --git a/src/script_opt/CPP/RuntimeOps.cc b/src/script_opt/CPP/RuntimeOps.cc new file mode 100644 index 0000000000..ffcf4cfecb --- /dev/null +++ b/src/script_opt/CPP/RuntimeOps.cc @@ -0,0 +1,232 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include "zeek/ZeekString.h" +#include "zeek/RunState.h" +#include "zeek/EventRegistry.h" +#include "zeek/IPAddr.h" +#include "zeek/script_opt/CPP/RuntimeOps.h" + +namespace zeek::detail { + +StringValPtr str_concat__CPP(const String* s1, const String* s2) + { + std::vector strings(2); + strings[0] = s1; + strings[1] = s2; + + return make_intrusive(concatenate(strings)); + } + +bool str_in__CPP(const String* s1, const String* s2) + { + auto s = reinterpret_cast(s1->CheckString()); + return util::strstr_n(s2->Len(), s2->Bytes(), s1->Len(), s) != -1; + } + +ListValPtr index_val__CPP(std::vector indices) + { + auto ind_v = make_intrusive(TYPE_ANY); + + // In the future, we could provide N versions of this that + // unroll the loop. + for ( auto i : indices ) + ind_v->Append(i); + + return ind_v; + } + +ValPtr index_table__CPP(const TableValPtr& t, std::vector indices) + { + auto v = t->FindOrDefault(index_val__CPP(std::move(indices))); + if ( ! v ) + reporter->CPPRuntimeError("no such index"); + return v; + } + +ValPtr index_vec__CPP(const VectorValPtr& vec, int index) + { + auto v = vec->ValAt(index); + if ( ! v ) + reporter->CPPRuntimeError("no such index"); + return v; + } + +ValPtr index_string__CPP(const StringValPtr& svp, std::vector indices) + { + return index_string(svp->AsString(), + index_val__CPP(std::move(indices)).get()); + } + +ValPtr set_event__CPP(IDPtr g, ValPtr v, EventHandlerPtr& gh) + { + g->SetVal(std::move(v)); + gh = event_registry->Register(g->Name()); + return v; + } + +ValPtr cast_value_to_type__CPP(const ValPtr& v, const TypePtr& t) + { + auto result = cast_value_to_type(v.get(), t.get()); + if ( ! result ) + reporter->CPPRuntimeError("invalid cast of value with type '%s' to type '%s'", + type_name(v->GetType()->Tag()), type_name(t->Tag())); + return result; + } + +SubNetValPtr addr_mask__CPP(const IPAddr& a, uint32_t mask) + { + if ( a.GetFamily() == IPv4 ) + { + if ( mask > 32 ) + reporter->CPPRuntimeError("bad IPv4 subnet prefix length: %d", int(mask)); + } + else + { + if ( mask > 128 ) + reporter->CPPRuntimeError("bad IPv6 subnet prefix length: %d", int(mask)); + } + + return make_intrusive(a, mask); + } + + +// Helper function for reporting invalidation of interators. +static void check_iterators__CPP(bool invalid) + { + if ( invalid ) + reporter->Warning("possible loop/iterator invalidation in compiled code"); + } + +// Template for aggregate assignments of the form "v1[v2] = v3". +template +ValPtr assign_to_index__CPP(T v1, ValPtr v2, ValPtr v3) + { + bool iterators_invalidated = false; + auto err_msg = assign_to_index(std::move(v1), std::move(v2), v3, iterators_invalidated); + + check_iterators__CPP(iterators_invalidated); + + if ( err_msg ) + reporter->CPPRuntimeError("%s", err_msg); + + return v3; + } + +ValPtr assign_to_index__CPP(TableValPtr v1, ValPtr v2, ValPtr v3) + { + return assign_to_index__CPP(v1, v2, v3); + } +ValPtr assign_to_index__CPP(VectorValPtr v1, ValPtr v2, ValPtr v3) + { + return assign_to_index__CPP(v1, v2, v3); + } +ValPtr assign_to_index__CPP(StringValPtr v1, ValPtr v2, ValPtr v3) + { + return assign_to_index__CPP(v1, v2, v3); + } + +void add_element__CPP(TableValPtr aggr, ListValPtr indices) + { + bool iterators_invalidated = false; + aggr->Assign(indices, nullptr, true, &iterators_invalidated); + check_iterators__CPP(iterators_invalidated); + } + +void remove_element__CPP(TableValPtr aggr, ListValPtr indices) + { + bool iterators_invalidated = false; + aggr->Remove(*indices.get(), true, &iterators_invalidated); + check_iterators__CPP(iterators_invalidated); + } + +// A helper function that takes a parallel vectors of attribute tags +// and values and returns a collective AttributesPtr corresponding to +// those instantiated attributes. For attributes that don't have +// associated expressions, the correspoinding value should be nil. +static AttributesPtr build_attrs__CPP(std::vector attr_tags, + std::vector attr_vals) + { + std::vector attrs; + int nattrs = attr_tags.size(); + for ( auto i = 0; i < nattrs; ++i ) + { + auto t_i = AttrTag(attr_tags[i]); + const auto& v_i = attr_vals[i]; + ExprPtr e; + + if ( v_i ) + e = make_intrusive(v_i); + + attrs.emplace_back(make_intrusive(t_i, e)); + } + + return make_intrusive(std::move(attrs), nullptr, false, false); + } + +TableValPtr set_constructor__CPP(std::vector elements, TableTypePtr t, + std::vector attr_tags, + std::vector attr_vals) + { + auto attrs = build_attrs__CPP(std::move(attr_tags), std::move(attr_vals)); + auto aggr = make_intrusive(std::move(t), std::move(attrs)); + + for ( const auto& elem : elements ) + aggr->Assign(std::move(elem), nullptr); + + return aggr; + } + +TableValPtr table_constructor__CPP(std::vector indices, + std::vector vals, TableTypePtr t, + std::vector attr_tags, + std::vector attr_vals) + { + const auto& yt = t->Yield().get(); + auto n = indices.size(); + + auto attrs = build_attrs__CPP(std::move(attr_tags), std::move(attr_vals)); + auto aggr = make_intrusive(std::move(t), std::move(attrs)); + + for ( auto i = 0; i < n; ++i ) + { + auto v = check_and_promote(vals[i], yt, true); + if ( v ) + aggr->Assign(std::move(indices[i]), std::move(v)); + } + + return aggr; + } + +RecordValPtr record_constructor__CPP(std::vector vals, RecordTypePtr t) + { + auto rv = make_intrusive(std::move(t)); + auto n = vals.size(); + + rv->Reserve(n); + + for ( auto i = 0; i < n; ++i ) + rv->Assign(i, vals[i]); + + return rv; + } + +VectorValPtr vector_constructor__CPP(std::vector vals, VectorTypePtr t) + { + auto vv = make_intrusive(std::move(t)); + auto n = vals.size(); + + for ( auto i = 0; i < n; ++i ) + vv->Assign(i, vals[i]); + + return vv; + } + +ValPtr schedule__CPP(double dt, EventHandlerPtr event, std::vector args) + { + if ( ! run_state::terminating ) + timer_mgr->Add(new ScheduleTimer(event, std::move(args), dt)); + + return nullptr; + } + +} // namespace zeek::detail diff --git a/src/script_opt/CPP/RuntimeOps.h b/src/script_opt/CPP/RuntimeOps.h new file mode 100644 index 0000000000..a3df1f06ea --- /dev/null +++ b/src/script_opt/CPP/RuntimeOps.h @@ -0,0 +1,198 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Run-time support for (non-vector) operations in C++-compiled scripts. + +#pragma once + +#include "zeek/Val.h" +#include "zeek/script_opt/CPP/Func.h" + +namespace zeek { + +using SubNetValPtr = IntrusivePtr; + +namespace detail { + +// Returns the concatenation of the given strings. +extern StringValPtr str_concat__CPP(const String* s1, const String* s2); + +// Returns true if string "s2" is in string "s1". +extern bool str_in__CPP(const String* s1, const String* s2); + +// Converts a vector of individual ValPtr's into a single ListValPtr +// suitable for indexing an aggregate. +extern ListValPtr index_val__CPP(std::vector indices); + +// Returns the value corresponding to indexing the given table/vector/string +// with the given set of indices. These are functions rather than something +// generated directly so that they can package up the error handling for +// the case where there's no such index. +extern ValPtr index_table__CPP(const TableValPtr& t, std::vector indices); +extern ValPtr index_vec__CPP(const VectorValPtr& vec, int index); +extern ValPtr index_string__CPP(const StringValPtr& svp, std::vector indices); + +// Calls out to the given script or BiF function. A separate function because +// of the need to (1) construct the "args" vector using {} initializers, +// but (2) needing to have the address of that vector. +inline ValPtr invoke__CPP(Func* f, std::vector args, Frame* frame) + { + return f->Invoke(&args, frame); + } + +// Assigns the given value to the given global. A separate function because +// we also need to return the value, for use in assignment cascades. +inline ValPtr set_global__CPP(IDPtr g, ValPtr v) + { + g->SetVal(v); + return v; + } + +// Assigns the given global to the given value, which corresponds to an +// event handler. +extern ValPtr set_event__CPP(IDPtr g, ValPtr v, EventHandlerPtr& gh); + +// Convert (in terms of the Zeek language) the given value to the given type. +// A separate function in order to package up the error handling. +extern ValPtr cast_value_to_type__CPP(const ValPtr& v, const TypePtr& t); + +// Returns the subnet corresponding to the given mask of the given address. +// A separate function in order to package up the error handling. +extern SubNetValPtr addr_mask__CPP(const IPAddr& a, uint32_t mask); + +// Assigns the given field in the given record to the given value. A +// separate function to allow for assignment cascades. +inline ValPtr assign_field__CPP(RecordValPtr rec, int field, ValPtr v) + { + rec->Assign(field, v); + return v; + } + +// Returns the given field in the given record. A separate function to +// support error handling. +inline ValPtr field_access__CPP(const RecordValPtr& rec, int field) + { + auto v = rec->GetFieldOrDefault(field); + if ( ! v ) + reporter->CPPRuntimeError("field value missing"); + + return v; + } + +// Each of the following executes the assignment "v1[v2] = v3" for +// tables/vectors/strings. +extern ValPtr assign_to_index__CPP(TableValPtr v1, ValPtr v2, ValPtr v3); +extern ValPtr assign_to_index__CPP(VectorValPtr v1, ValPtr v2, ValPtr v3); +extern ValPtr assign_to_index__CPP(StringValPtr v1, ValPtr v2, ValPtr v3); + +// Executes an "add" statement for the given set. +extern void add_element__CPP(TableValPtr aggr, ListValPtr indices); + +// Executes a "delete" statement for the given set. +extern void remove_element__CPP(TableValPtr aggr, ListValPtr indices); + +// Returns the given table/set (which should be empty) coerced to +// the given Zeek type. A separate function in order to deal with +// error handling. Inlined because this gets invoked a lot. +inline TableValPtr table_coerce__CPP(const ValPtr& v, const TypePtr& t) + { + TableVal* tv = v->AsTableVal(); + + if ( tv->Size() > 0 ) + reporter->CPPRuntimeError("coercion of non-empty table/set"); + + return make_intrusive(cast_intrusive(t), + tv->GetAttrs()); + } + +// The same, for an empty record. +inline VectorValPtr vector_coerce__CPP(const ValPtr& v, const TypePtr& t) + { + VectorVal* vv = v->AsVectorVal(); + + if ( vv->Size() > 0 ) + reporter->CPPRuntimeError("coercion of non-empty vector"); + + return make_intrusive(cast_intrusive(t)); + } + +// Constructs a set of the given type, containing the given elements, and +// with the associated attributes. +extern TableValPtr set_constructor__CPP(std::vector elements, + TableTypePtr t, + std::vector attr_tags, + std::vector attr_vals); + +// Constructs a table of the given type, containing the given elements +// (specified as parallel index/value vectors), and with the associated +// attributes. +extern TableValPtr table_constructor__CPP(std::vector indices, + std::vector vals, + TableTypePtr t, + std::vector attr_tags, + std::vector attr_vals); + +// Constructs a record of the given type, whose (ordered) fields are +// assigned to the corresponding elements of the given vector of values. +extern RecordValPtr record_constructor__CPP(std::vector vals, + RecordTypePtr t); + +// Constructs a vector of the given type, populated with the given values. +extern VectorValPtr vector_constructor__CPP(std::vector vals, + VectorTypePtr t); + +// Schedules an event to occur at the given absolute time, parameterized +// with the given set of values. A separate function to facilitate avoiding +// the scheduling if Zeek is terminating. +extern ValPtr schedule__CPP(double dt, EventHandlerPtr event, + std::vector args); + +// Simple helper functions for supporting absolute value. +inline bro_uint_t iabs__CPP(bro_int_t v) + { + return v < 0 ? -v : v; + } + +inline double fabs__CPP(double v) + { + return v < 0.0 ? -v : v; + } + +// The following operations are provided using functions to support +// error checking/reporting. +inline bro_int_t idiv__CPP(bro_int_t v1, bro_int_t v2) + { + if ( v2 == 0 ) + reporter->CPPRuntimeError("division by zero"); + return v1 / v2; + } + +inline bro_int_t imod__CPP(bro_int_t v1, bro_int_t v2) + { + if ( v2 == 0 ) + reporter->CPPRuntimeError("modulo by zero"); + return v1 % v2; + } + +inline bro_uint_t udiv__CPP(bro_uint_t v1, bro_uint_t v2) + { + if ( v2 == 0 ) + reporter->CPPRuntimeError("division by zero"); + return v1 / v2; + } + +inline bro_uint_t umod__CPP(bro_uint_t v1, bro_uint_t v2) + { + if ( v2 == 0 ) + reporter->CPPRuntimeError("modulo by zero"); + return v1 % v2; + } + +inline double fdiv__CPP(double v1, double v2) + { + if ( v2 == 0.0 ) + reporter->CPPRuntimeError("division by zero"); + return v1 / v2; + } + +} // namespace zeek::detail +} // namespace zeek diff --git a/src/script_opt/CPP/RuntimeVec.cc b/src/script_opt/CPP/RuntimeVec.cc new file mode 100644 index 0000000000..cbc9aa8e54 --- /dev/null +++ b/src/script_opt/CPP/RuntimeVec.cc @@ -0,0 +1,442 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include "zeek/ZeekString.h" +#include "zeek/script_opt/CPP/RuntimeVec.h" + +namespace zeek::detail { + +// Helper function for ensuring that two vectors have matching sizes. +static bool check_vec_sizes__CPP(const VectorValPtr& v1, const VectorValPtr& v2) + { + if ( v1->Size() == v2->Size() ) + return true; + + reporter->CPPRuntimeError("vector operands are of different sizes"); + return false; + } + +// Helper function that returns a VectorTypePtr apt for use with the +// the given yield type. We don't just use the yield type directly +// because here we're supporting low-level arithmetic operations +// (for example, adding one vector of "interval" to another), which +// we want to do using the low-level representations. We'll later +// convert the vector to the high-level representation if needed. +static VectorTypePtr base_vector_type__CPP(const VectorTypePtr& vt) + { + switch ( vt->Yield()->InternalType() ) { + case TYPE_INTERNAL_INT: + return make_intrusive(base_type(TYPE_INT)); + + case TYPE_INTERNAL_UNSIGNED: + return make_intrusive(base_type(TYPE_COUNT)); + + case TYPE_INTERNAL_DOUBLE: + return make_intrusive(base_type(TYPE_DOUBLE)); + + default: + return nullptr; + } + } + +// The kernel used for unary vector operations. +#define VEC_OP1_KERNEL(accessor, type, op) \ + for ( unsigned int i = 0; i < v->Size(); ++i ) \ + { \ + auto v_i = v->ValAt(i)->accessor(); \ + v_result->Assign(i, make_intrusive(op v_i)); \ + } + +// A macro (since it's beyond my templating skillz to deal with the +// "op" operator) for unary vector operations, invoking the kernel +// per the underlying representation used by the vector. "double_kernel" +// is an optional kernel to use for vectors whose underlying type +// is "double". It needs to be optional because C++ will (rightfully) +// complain about applying certain C++ unary operations to doubles. +#define VEC_OP1(name, op, double_kernel) \ +VectorValPtr vec_op_ ## name ## __CPP(const VectorValPtr& v) \ + { \ + auto vt = base_vector_type__CPP(v->GetType()); \ + auto v_result = make_intrusive(vt); \ + \ + switch ( vt->Yield()->InternalType() ) { \ + case TYPE_INTERNAL_INT: \ + { \ + VEC_OP1_KERNEL(AsInt, IntVal, op) \ + break; \ + } \ + \ + case TYPE_INTERNAL_UNSIGNED: \ + { \ + VEC_OP1_KERNEL(AsCount, CountVal, op) \ + break; \ + } \ + \ + double_kernel \ + \ + default: \ + break; \ + } \ + \ + return v_result; \ + } + +// Instantiates a double_kernel for a given operation. +#define VEC_OP1_WITH_DOUBLE(name, op) \ + VEC_OP1(name, op, case TYPE_INTERNAL_DOUBLE: { VEC_OP1_KERNEL(AsDouble, DoubleVal, op) break; }) + +// The unary operations supported for vectors. +VEC_OP1_WITH_DOUBLE(pos, +) +VEC_OP1_WITH_DOUBLE(neg, -) +VEC_OP1(not, !,) +VEC_OP1(comp, ~,) + +// A kernel for applying a binary operation element-by-element to two +// vectors of a given low-level type. +#define VEC_OP2_KERNEL(accessor, type, op) \ + for ( unsigned int i = 0; i < v1->Size(); ++i ) \ + { \ + auto v1_i = v1->ValAt(i)->accessor(); \ + auto v2_i = v2->ValAt(i)->accessor(); \ + v_result->Assign(i, make_intrusive(v1_i op v2_i)); \ + } + +// Analogous to VEC_OP1, instantiates a function for a given binary operation, +// which might-or-might-not be supported for low-level "double" types. +// This version is for operations whose result type is the same as the +// operand type. +#define VEC_OP2(name, op, double_kernel) \ +VectorValPtr vec_op_ ## name ## __CPP(const VectorValPtr& v1, const VectorValPtr& v2) \ + { \ + if ( ! check_vec_sizes__CPP(v1, v2) ) \ + return nullptr; \ + \ + auto vt = base_vector_type__CPP(v1->GetType()); \ + auto v_result = make_intrusive(vt); \ + \ + switch ( vt->Yield()->InternalType() ) { \ + case TYPE_INTERNAL_INT: \ + { \ + if ( vt->Yield()->Tag() == TYPE_BOOL ) \ + VEC_OP2_KERNEL(AsBool, BoolVal, op) \ + else \ + VEC_OP2_KERNEL(AsInt, IntVal, op) \ + break; \ + } \ + \ + case TYPE_INTERNAL_UNSIGNED: \ + { \ + VEC_OP2_KERNEL(AsCount, CountVal, op) \ + break; \ + } \ + \ + double_kernel \ + \ + default: \ + break; \ + } \ + \ + return v_result; \ + } + +// Instantiates a double_kernel for a binary operation. +#define VEC_OP2_WITH_DOUBLE(name, op) \ + VEC_OP2(name, op, case TYPE_INTERNAL_DOUBLE: { VEC_OP2_KERNEL(AsDouble, DoubleVal, op) break; }) + +// The binary operations supported for vectors. +VEC_OP2_WITH_DOUBLE(add, +) +VEC_OP2_WITH_DOUBLE(sub, -) +VEC_OP2_WITH_DOUBLE(mul, *) +VEC_OP2_WITH_DOUBLE(div, /) +VEC_OP2(mod, %,) +VEC_OP2(and, &,) +VEC_OP2(or, |,) +VEC_OP2(xor, ^,) +VEC_OP2(andand, &&,) +VEC_OP2(oror, ||,) + +// A version of VEC_OP2 that instead supports relational operations, so +// the result type is always vector-of-bool. +#define VEC_REL_OP(name, op) \ +VectorValPtr vec_op_ ## name ## __CPP(const VectorValPtr& v1, const VectorValPtr& v2) \ + { \ + if ( ! check_vec_sizes__CPP(v1, v2) ) \ + return nullptr; \ + \ + auto vt = v1->GetType(); \ + auto res_type = make_intrusive(base_type(TYPE_BOOL)); \ + auto v_result = make_intrusive(res_type); \ + \ + switch ( vt->Yield()->InternalType() ) { \ + case TYPE_INTERNAL_INT: \ + { \ + VEC_OP2_KERNEL(AsInt, BoolVal, op) \ + break; \ + } \ + \ + case TYPE_INTERNAL_UNSIGNED: \ + { \ + VEC_OP2_KERNEL(AsCount, BoolVal, op) \ + break; \ + } \ + \ + case TYPE_INTERNAL_DOUBLE: \ + { \ + VEC_OP2_KERNEL(AsDouble, BoolVal, op) \ + break; \ + } \ + \ + default: \ + break; \ + } \ + \ + return v_result; \ + } + +// The relational operations supported for vectors. +VEC_REL_OP(lt, <) +VEC_REL_OP(gt, >) +VEC_REL_OP(eq, ==) +VEC_REL_OP(ne, !=) +VEC_REL_OP(le, <=) +VEC_REL_OP(ge, >=) + +VectorValPtr vec_op_add__CPP(VectorValPtr v, int incr) + { + const auto& yt = v->GetType()->Yield(); + auto is_signed = yt->InternalType() == TYPE_INTERNAL_INT; + auto n = v->Size(); + + for ( unsigned int i = 0; i < n; ++i ) + { + auto v_i = v->ValAt(i); + ValPtr new_v_i; + + if ( is_signed ) + new_v_i = val_mgr->Int(v_i->AsInt() + incr); + else + new_v_i = val_mgr->Count(v_i->AsCount() + incr); + + v->Assign(i, new_v_i); + } + + return v; + } + +VectorValPtr vec_op_sub__CPP(VectorValPtr v, int i) + { + return vec_op_add__CPP(std::move(v), -i); + } + +// This function provides the core functionality. The arguments +// are applied as though they appeared left-to-right in a statement +// "s1 + v2 + v3 + s4". For any invocation, v2 will always be +// non-nil, and one-and-only-one of s1, v3, or s4 will be non-nil. +static VectorValPtr str_vec_op_str_vec_add__CPP(const StringValPtr& s1, + const VectorValPtr& v2, const VectorValPtr& v3, + const StringValPtr& s4) + { + auto vt = v2->GetType(); + auto v_result = make_intrusive(vt); + auto n = v2->Size(); + + for ( unsigned int i = 0; i < n; ++i ) + { + std::vector strings; + + auto v2_i = v2->ValAt(i); + if ( ! v2_i ) + continue; + + auto s2 = v2_i->AsString(); + const String* s3 = nullptr; + + if ( v3 ) + { + auto v3_i = v3->ValAt(i); + if ( ! v3_i ) + continue; + s3 = v3_i->AsString(); + } + + if ( s1 ) strings.push_back(s1->AsString()); + strings.push_back(s2); + if ( s3 ) strings.push_back(s3); + if ( s4 ) strings.push_back(s4->AsString()); + + auto res = make_intrusive(concatenate(strings)); + v_result->Assign(i, res); + } + + return v_result; + } + +VectorValPtr str_vec_op_add__CPP(const VectorValPtr& v1, const VectorValPtr& v2) + { + return str_vec_op_str_vec_add__CPP(nullptr, v1, v2, nullptr); + } + +VectorValPtr str_vec_op_add__CPP(const VectorValPtr& v1, const StringValPtr& s2) + { + return str_vec_op_str_vec_add__CPP(nullptr, v1, nullptr, s2); + } + +VectorValPtr str_vec_op_add__CPP(const StringValPtr& s1, const VectorValPtr& v2) + { + return str_vec_op_str_vec_add__CPP(s1, v2, nullptr, nullptr); + } + +// Kernel for element-by-element string relationals. "rel1" and "rel2" +// codify which relational (=/>) we're aiming to support, +// in terms of how a Bstr_cmp() comparison should be assessed. +static VectorValPtr str_vec_op_kernel__CPP(const VectorValPtr& v1, + const VectorValPtr& v2, + int rel1, int rel2) + { + auto res_type = make_intrusive(base_type(TYPE_BOOL)); + auto v_result = make_intrusive(res_type); + auto n = v1->Size(); + + for ( unsigned int i = 0; i < n; ++i ) + { + auto v1_i = v1->ValAt(i); + auto v2_i = v2->ValAt(i); + if ( ! v1_i || ! v2_i ) + continue; + + auto s1 = v1_i->AsString(); + auto s2 = v2_i->AsString(); + + auto cmp = Bstr_cmp(s1, s2); + auto rel = (cmp == rel1) || (cmp == rel2); + + v_result->Assign(i, val_mgr->Bool(rel)); + } + + return v_result; + } + +VectorValPtr str_vec_op_lt__CPP(const VectorValPtr& v1, const VectorValPtr& v2) + { + return str_vec_op_kernel__CPP(v1, v2, -1, -1); + } +VectorValPtr str_vec_op_le__CPP(const VectorValPtr& v1, const VectorValPtr& v2) + { + return str_vec_op_kernel__CPP(v1, v2, -1, 0); + } +VectorValPtr str_vec_op_eq__CPP(const VectorValPtr& v1, const VectorValPtr& v2) + { + return str_vec_op_kernel__CPP(v1, v2, 0, 0); + } +VectorValPtr str_vec_op_ne__CPP(const VectorValPtr& v1, const VectorValPtr& v2) + { + return str_vec_op_kernel__CPP(v1, v2, -1, 1); + } +VectorValPtr str_vec_op_gt__CPP(const VectorValPtr& v1, const VectorValPtr& v2) + { + return str_vec_op_kernel__CPP(v1, v2, 1, 1); + } +VectorValPtr str_vec_op_ge__CPP(const VectorValPtr& v1, const VectorValPtr& v2) + { + return str_vec_op_kernel__CPP(v1, v2, 0, 1); + } + +VectorValPtr vector_select__CPP(const VectorValPtr& v1, VectorValPtr v2, + VectorValPtr v3) + { + auto vt = v2->GetType(); + auto v_result = make_intrusive(vt); + + if ( ! check_vec_sizes__CPP(v1, v2) || ! check_vec_sizes__CPP(v1, v3) ) + return nullptr; + + auto n = v1->Size(); + + for ( unsigned int i = 0; i < n; ++i ) + { + auto vr_i = v1->BoolAt(i) ? v2->ValAt(i) : v3->ValAt(i); + v_result->Assign(i, std::move(vr_i)); + } + + return v_result; + } + +VectorValPtr vector_coerce_to__CPP(const VectorValPtr& v, const TypePtr& targ) + { + auto res_t = cast_intrusive(targ); + auto v_result = make_intrusive(std::move(res_t)); + auto n = v->Size(); + auto yt = targ->Yield(); + auto ytag = yt->Tag(); + + for ( unsigned int i = 0; i < n; ++i ) + { + ValPtr v_i = v->ValAt(i); + ValPtr r_i; + switch ( ytag ) { + case TYPE_BOOL: + r_i = val_mgr->Bool(v_i->AsBool()); + break; + + case TYPE_ENUM: + r_i = yt->AsEnumType()->GetEnumVal(v_i->AsInt()); + break; + + case TYPE_PORT: + r_i = make_intrusive(v_i->AsCount()); + break; + + case TYPE_INTERVAL: + r_i = make_intrusive(v_i->AsDouble()); + break; + + case TYPE_TIME: + r_i = make_intrusive(v_i->AsDouble()); + break; + + default: + reporter->InternalError("bad vector type in vector_coerce_to__CPP"); + } + + v_result->Assign(i, std::move(r_i)); + } + + return v_result; + } + +VectorValPtr vec_coerce_to_bro_int_t__CPP(const VectorValPtr& v, TypePtr targ) + { + auto res_t = cast_intrusive(targ); + auto v_result = make_intrusive(std::move(res_t)); + auto n = v->Size(); + + for ( unsigned int i = 0; i < n; ++i ) + v_result->Assign(i, val_mgr->Int(v->IntAt(i))); + + return v_result; + } + +VectorValPtr vec_coerce_to_bro_uint_t__CPP(const VectorValPtr& v, TypePtr targ) + { + auto res_t = cast_intrusive(targ); + auto v_result = make_intrusive(std::move(res_t)); + auto n = v->Size(); + + for ( unsigned int i = 0; i < n; ++i ) + v_result->Assign(i, val_mgr->Count(v->CountAt(i))); + + return v_result; + } + +VectorValPtr vec_coerce_to_double__CPP(const VectorValPtr& v, TypePtr targ) + { + auto res_t = cast_intrusive(targ); + auto v_result = make_intrusive(std::move(res_t)); + auto n = v->Size(); + + for ( unsigned int i = 0; i < n; ++i ) + v_result->Assign(i, make_intrusive(v->DoubleAt(i))); + + return v_result; + } + +} // namespace zeek::detail diff --git a/src/script_opt/CPP/RuntimeVec.h b/src/script_opt/CPP/RuntimeVec.h new file mode 100644 index 0000000000..d1a9734996 --- /dev/null +++ b/src/script_opt/CPP/RuntimeVec.h @@ -0,0 +1,96 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Run-time support for vector-oriented operations in C++-compiled scripts. +// The scope is unary (including appending), binary, and conditional +// operations. It does not include operations common to other aggregates, +// such as indexing and explicit coercion (but does include low-level +// coercion needed to support unary and binary operations). + +#pragma once + +#include "zeek/Val.h" + +namespace zeek::detail { + +// Appends v2 to the vector v1. A separate function because of the +// need to support assignment cascades. +inline ValPtr vector_append__CPP(VectorValPtr v1, ValPtr v2) + { + v1->Assign(v1->Size(), v2); + return v2; + } + +// Unary vector operations. +extern VectorValPtr vec_op_pos__CPP(const VectorValPtr& v); +extern VectorValPtr vec_op_neg__CPP(const VectorValPtr& v); +extern VectorValPtr vec_op_not__CPP(const VectorValPtr& v); +extern VectorValPtr vec_op_comp__CPP(const VectorValPtr& v); + +// Binary vector operations. +extern VectorValPtr vec_op_add__CPP(const VectorValPtr& v1, const VectorValPtr& v2); +extern VectorValPtr vec_op_sub__CPP(const VectorValPtr& v1, const VectorValPtr& v2); +extern VectorValPtr vec_op_mul__CPP(const VectorValPtr& v1, const VectorValPtr& v2); +extern VectorValPtr vec_op_div__CPP(const VectorValPtr& v1, const VectorValPtr& v2); +extern VectorValPtr vec_op_mod__CPP(const VectorValPtr& v1, const VectorValPtr& v2); +extern VectorValPtr vec_op_and__CPP(const VectorValPtr& v1, const VectorValPtr& v2); +extern VectorValPtr vec_op_or__CPP(const VectorValPtr& v1, const VectorValPtr& v2); +extern VectorValPtr vec_op_xor__CPP(const VectorValPtr& v1, const VectorValPtr& v2); +extern VectorValPtr vec_op_andand__CPP(const VectorValPtr& v1, const VectorValPtr& v2); +extern VectorValPtr vec_op_oror__CPP(const VectorValPtr& v1, const VectorValPtr& v2); + +// Vector relational operations. +extern VectorValPtr vec_op_lt__CPP(const VectorValPtr& v1, const VectorValPtr& v2); +extern VectorValPtr vec_op_gt__CPP(const VectorValPtr& v1, const VectorValPtr& v2); +extern VectorValPtr vec_op_eq__CPP(const VectorValPtr& v1, const VectorValPtr& v2); +extern VectorValPtr vec_op_ne__CPP(const VectorValPtr& v1, const VectorValPtr& v2); +extern VectorValPtr vec_op_le__CPP(const VectorValPtr& v1, const VectorValPtr& v2); +extern VectorValPtr vec_op_ge__CPP(const VectorValPtr& v1, const VectorValPtr& v2); + +// The following are to support ++/-- operations on vectors ... +extern VectorValPtr vec_op_add__CPP(VectorValPtr v, int incr); +extern VectorValPtr vec_op_sub__CPP(VectorValPtr v, int i); + +// ... and these for vector-plus-scalar and vector-plus-vector string +// operations. +extern VectorValPtr str_vec_op_add__CPP(const VectorValPtr& v1, + const VectorValPtr& v2); +extern VectorValPtr str_vec_op_add__CPP(const VectorValPtr& v1, + const StringValPtr& v2); +extern VectorValPtr str_vec_op_add__CPP(const StringValPtr& v1, + const VectorValPtr& v2); + +// String vector relationals. +extern VectorValPtr str_vec_op_lt__CPP(const VectorValPtr& v1, + const VectorValPtr& v2); +extern VectorValPtr str_vec_op_le__CPP(const VectorValPtr& v1, + const VectorValPtr& v2); +extern VectorValPtr str_vec_op_eq__CPP(const VectorValPtr& v1, + const VectorValPtr& v2); +extern VectorValPtr str_vec_op_ne__CPP(const VectorValPtr& v1, + const VectorValPtr& v2); +extern VectorValPtr str_vec_op_gt__CPP(const VectorValPtr& v1, + const VectorValPtr& v2); +extern VectorValPtr str_vec_op_ge__CPP(const VectorValPtr& v1, + const VectorValPtr& v2); + +// Support for vector conditional ('?:') expressions. Using the boolean +// vector v1 as a selector, returns a new vector populated with the +// elements selected out of v2 and v3. +extern VectorValPtr vector_select__CPP(const VectorValPtr& v1, VectorValPtr v2, + VectorValPtr v3); + +// Returns a new vector reflecting the given vector coerced to the given +// type. Assumes v already has the correct internal type. This can go +// away after we finish migrating to ZVal's. +extern VectorValPtr vector_coerce_to__CPP(const VectorValPtr& v, + const TypePtr& targ); + +// Similar coercion, but works for v having perhaps not the correct type. +extern VectorValPtr vec_coerce_to_bro_int_t__CPP(const VectorValPtr& v, + TypePtr targ); +extern VectorValPtr vec_coerce_to_bro_uint_t__CPP(const VectorValPtr& v, + TypePtr targ); +extern VectorValPtr vec_coerce_to_double__CPP(const VectorValPtr& v, + TypePtr targ); + +} // namespace zeek::detail diff --git a/src/script_opt/CPP/Stmts.cc b/src/script_opt/CPP/Stmts.cc new file mode 100644 index 0000000000..6c530a47e1 --- /dev/null +++ b/src/script_opt/CPP/Stmts.cc @@ -0,0 +1,384 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// C++ compiler methods relating to generating code for Stmt's. + +#include "zeek/script_opt/CPP/Compile.h" + +namespace zeek::detail { + +void CPPCompile::GenStmt(const Stmt* s) + { + switch ( s->Tag() ) { + case STMT_INIT: + GenInitStmt(s->AsInitStmt()); + break; + + case STMT_LIST: + { + // These always occur in contexts surrounded by {}'s, + // so no need to add them explicitly. + auto sl = s->AsStmtList(); + const auto& stmts = sl->Stmts(); + + for ( const auto& stmt : stmts ) + GenStmt(stmt); + } + break; + + case STMT_EXPR: + if ( auto e = s->AsExprStmt()->StmtExpr() ) + Emit("%s;", GenExpr(e, GEN_DONT_CARE, true)); + break; + + case STMT_IF: + GenIfStmt(s->AsIfStmt()); + break; + + case STMT_WHILE: + GenWhileStmt(s->AsWhileStmt()); + break; + + case STMT_NULL: + Emit(";"); + break; + + case STMT_RETURN: + GenReturnStmt(s->AsReturnStmt()); + break; + + case STMT_ADD: + GenAddStmt(static_cast(s)); + break; + + case STMT_DELETE: + GenDeleteStmt(static_cast(s)); + break; + + case STMT_EVENT: + GenEventStmt(static_cast(s)); + break; + + case STMT_SWITCH: + GenSwitchStmt(static_cast(s)); + break; + + case STMT_FOR: + GenForStmt(s->AsForStmt()); + break; + + case STMT_NEXT: + Emit("continue;"); + break; + + case STMT_BREAK: + if ( break_level > 0 ) + Emit("break;"); + else + Emit("return false;"); + break; + + case STMT_PRINT: + { + auto el = static_cast(s)->ExprList(); + Emit("do_print_stmt({%s});", GenExpr(el, GEN_VAL_PTR)); + } + break; + + case STMT_FALLTHROUGH: + break; + + case STMT_WHEN: + ASSERT(0); + break; + + default: + reporter->InternalError("bad statement type in CPPCompile::GenStmt"); + } + } + +void CPPCompile::GenInitStmt(const InitStmt* init) + { + auto inits = init->Inits(); + + for ( const auto& aggr : inits ) + { + const auto& t = aggr->GetType(); + + if ( ! IsAggr(t->Tag()) ) + continue; + + auto type_name = IntrusiveVal(t); + auto type_type = TypeType(t); + auto type_ind = GenTypeName(t); + + if ( locals.count(aggr.get()) == 0 ) + { + // fprintf(stderr, "aggregate %s unused\n", obj_desc(aggr.get()).c_str()); + continue; + } + + Emit("%s = make_intrusive<%s>(cast_intrusive<%s>(%s));", + IDName(aggr), type_name, + type_type, type_ind); + } + } + +void CPPCompile::GenIfStmt(const IfStmt* i) + { + auto cond = i->StmtExpr(); + + Emit("if ( %s )", GenExpr(cond, GEN_NATIVE)); + StartBlock(); + GenStmt(i->TrueBranch()); + EndBlock(); + + const auto& fb = i->FalseBranch(); + + if ( fb->Tag() != STMT_NULL ) + { + Emit("else"); + StartBlock(); + GenStmt(i->FalseBranch()); + EndBlock(); + } + } + +void CPPCompile::GenWhileStmt(const WhileStmt* w) + { + Emit("while ( %s )", + GenExpr(w->Condition(), GEN_NATIVE)); + + StartBlock(); + + ++break_level; + GenStmt(w->Body()); + --break_level; + + EndBlock(); + } + +void CPPCompile::GenReturnStmt(const ReturnStmt* r) + { + auto e = r->StmtExpr(); + + if ( ! ret_type || ! e || e->GetType()->Tag() == TYPE_VOID || in_hook ) + { + if ( in_hook ) + Emit("return true;"); + else + Emit("return;"); + + return; + } + + auto gt = ret_type->Tag() == TYPE_ANY ? GEN_VAL_PTR : GEN_NATIVE; + auto ret = GenExpr(e, gt); + + if ( e->GetType()->Tag() == TYPE_ANY ) + ret = GenericValPtrToGT(ret, ret_type, gt); + + Emit("return %s;", ret); + } + +void CPPCompile::GenAddStmt(const ExprStmt* es) + { + auto op = es->StmtExpr(); + auto aggr = GenExpr(op->GetOp1(), GEN_DONT_CARE); + auto indices = op->GetOp2(); + + Emit("add_element__CPP(%s, index_val__CPP({%s}));", + aggr, GenExpr(indices, GEN_VAL_PTR)); + } + +void CPPCompile::GenDeleteStmt(const ExprStmt* es) + { + auto op = es->StmtExpr(); + auto aggr = op->GetOp1(); + auto aggr_gen = GenExpr(aggr, GEN_VAL_PTR); + + if ( op->Tag() == EXPR_INDEX ) + { + auto indices = op->GetOp2(); + + Emit("remove_element__CPP(%s, index_val__CPP({%s}));", + aggr_gen, GenExpr(indices, GEN_VAL_PTR)); + } + + else + { + ASSERT(op->Tag() == EXPR_FIELD); + auto field = GenField(aggr, op->AsFieldExpr()->Field()); + Emit("%s->Remove(%s);", aggr_gen, field); + } + } + +void CPPCompile::GenEventStmt(const EventStmt* ev) + { + auto ev_s = ev->StmtExprPtr(); + auto ev_e = cast_intrusive(ev_s); + auto ev_n = ev_e->Name(); + + RegisterEvent(ev_n); + + if ( ev_e->Args()->Exprs().length() > 0 ) + Emit("event_mgr.Enqueue(%s_ev, %s);", + globals[std::string(ev_n)], + GenExpr(ev_e->Args(), GEN_VAL_PTR)); + else + Emit("event_mgr.Enqueue(%s_ev, Args{});", + globals[std::string(ev_n)]); + } + +void CPPCompile::GenSwitchStmt(const SwitchStmt* sw) + { + auto e = sw->StmtExpr(); + auto cases = sw->Cases(); + + auto e_it = e->GetType()->InternalType(); + bool is_int = e_it == TYPE_INTERNAL_INT; + bool is_uint = e_it == TYPE_INTERNAL_UNSIGNED; + bool organic = is_int || is_uint; + + std::string sw_val; + + if ( organic ) + sw_val = GenExpr(e, GEN_NATIVE); + else + sw_val = std::string("p_hash(") + GenExpr(e, GEN_VAL_PTR) + ")"; + + Emit("switch ( %s ) {", sw_val.c_str()); + + ++break_level; + + for ( const auto& c : *cases ) + { + if ( c->ExprCases() ) + { + const auto& c_e_s = + c->ExprCases()->AsListExpr()->Exprs(); + + for ( const auto& c_e : c_e_s ) + { + auto c_v = c_e->Eval(nullptr); + ASSERT(c_v); + + std::string c_v_rep; + + if ( is_int ) + c_v_rep = Fmt(int(c_v->AsInt())); + else if ( is_uint ) + c_v_rep = Fmt(c_v->AsCount()); + else + c_v_rep = Fmt(p_hash(c_v)); + + Emit("case %s:", c_v_rep); + } + } + + else + Emit("default:"); + + StartBlock(); + GenStmt(c->Body()); + EndBlock(); + } + + --break_level; + + Emit("}"); + } + +void CPPCompile::GenForStmt(const ForStmt* f) + { + Emit("{ // begin a new scope for the internal loop vars"); + + ++break_level; + + auto v = f->StmtExprPtr(); + auto t = v->GetType()->Tag(); + auto loop_vars = f->LoopVars(); + + if ( t == TYPE_TABLE ) + GenForOverTable(v, f->ValueVar(), loop_vars); + + else if ( t == TYPE_VECTOR ) + GenForOverVector(v, loop_vars); + + else if ( t == TYPE_STRING ) + GenForOverString(v, loop_vars); + + else + reporter->InternalError("bad for statement in CPPCompile::GenStmt"); + + GenStmt(f->LoopBody()); + EndBlock(); + + if ( t == TYPE_TABLE ) + EndBlock(); + + --break_level; + + Emit("} // end of for scope"); + } + +void CPPCompile::GenForOverTable(const ExprPtr& tbl, const IDPtr& value_var, + const IDPList* loop_vars) + { + Emit("auto tv__CPP = %s;", GenExpr(tbl, GEN_DONT_CARE)); + Emit("const PDict* loop_vals__CPP = tv__CPP->AsTable();"); + + Emit("if ( loop_vals__CPP->Length() > 0 )"); + StartBlock(); + + Emit("for ( const auto& lve__CPP : *loop_vals__CPP )"); + StartBlock(); + + Emit("auto k__CPP = lve__CPP.GetHashKey();"); + Emit("auto* current_tev__CPP = lve__CPP.GetValue();"); + Emit("auto ind_lv__CPP = tv__CPP->RecreateIndex(*k__CPP);"); + + if ( value_var ) + Emit("%s = %s;", + IDName(value_var), + GenericValPtrToGT("current_tev__CPP->GetVal()", + value_var->GetType(), + GEN_NATIVE)); + + for ( int i = 0; i < loop_vars->length(); ++i ) + { + auto var = (*loop_vars)[i]; + const auto& v_t = var->GetType(); + auto acc = NativeAccessor(v_t); + + if ( IsNativeType(v_t) ) + Emit("%s = ind_lv__CPP->Idx(%s)%s;", + IDName(var), Fmt(i), acc); + else + Emit("%s = {NewRef{}, ind_lv__CPP->Idx(%s)%s};", + IDName(var), Fmt(i), acc); + } + } + +void CPPCompile::GenForOverVector(const ExprPtr& vec, const IDPList* loop_vars) + { + Emit("auto vv__CPP = %s;", GenExpr(vec, GEN_DONT_CARE)); + + Emit("for ( auto i__CPP = 0u; i__CPP < vv__CPP->Size(); ++i__CPP )"); + StartBlock(); + + Emit("if ( ! vv__CPP->Has(i__CPP) ) continue;"); + Emit("%s = i__CPP;", IDName((*loop_vars)[0])); + } + +void CPPCompile::GenForOverString(const ExprPtr& str, const IDPList* loop_vars) + { + Emit("auto sval__CPP = %s;", GenExpr(str, GEN_DONT_CARE)); + + Emit("for ( auto i__CPP = 0u; i__CPP < sval__CPP->Len(); ++i__CPP )"); + StartBlock(); + + Emit("auto sv__CPP = make_intrusive(1, (const char*) sval__CPP->Bytes() + i__CPP);"); + Emit("%s = std::move(sv__CPP);", IDName((*loop_vars)[0])); + } + +} // zeek::detail diff --git a/src/script_opt/CPP/Tracker.cc b/src/script_opt/CPP/Tracker.cc new file mode 100644 index 0000000000..0b786749e4 --- /dev/null +++ b/src/script_opt/CPP/Tracker.cc @@ -0,0 +1,91 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include "zeek/Desc.h" +#include "zeek/script_opt/CPP/Tracker.h" +#include "zeek/script_opt/CPP/Util.h" +#include "zeek/script_opt/ProfileFunc.h" + + +namespace zeek::detail { + +template +void CPPTracker::AddKey(IntrusivePtr key, p_hash_type h) + { + if ( HasKey(key) ) + return; + + if ( h == 0 ) + h = Hash(key); + + if ( map2.count(h) == 0 ) + { + int index; + if ( mapper && mapper->count(h) > 0 ) + { + const auto& pair = (*mapper)[h]; + index = pair.index; + scope2[h] = Fmt(pair.scope); + inherited.insert(h); + } + else + { + index = num_non_inherited++; + keys.push_back(key); + } + + map2[h] = index; + reps[h] = key.get(); + } + + ASSERT(h != 0); // check for hash botches + + map[key.get()] = h; + } + +template +std::string CPPTracker::KeyName(const T* key) + { + ASSERT(HasKey(key)); + + auto hash = map[key]; + ASSERT(hash != 0); + + auto index = map2[hash]; + + std::string scope; + if ( IsInherited(hash) ) + scope = scope_prefix(scope2[hash]); + + return scope + std::string(base_name) + "_" + Fmt(index) + "__CPP"; + } + +template +void CPPTracker::LogIfNew(IntrusivePtr key, int scope, FILE* log_file) + { + if ( IsInherited(key) ) + return; + + auto hash = map[key.get()]; + auto index = map2[hash]; + + fprintf(log_file, "hash\n%llu %d %d\n", hash, index, scope); + } + +template +p_hash_type CPPTracker::Hash(IntrusivePtr key) const + { + ODesc d; + d.SetDeterminism(true); + key->Describe(&d); + std::string desc = d.Description(); + auto h = std::hash{}(base_name + desc); + return p_hash_type(h); + } + + +// Instantiate the templates we'll need. +template class CPPTracker; +template class CPPTracker; +template class CPPTracker; + +} // zeek::detail diff --git a/src/script_opt/CPP/Tracker.h b/src/script_opt/CPP/Tracker.h new file mode 100644 index 0000000000..2140b88098 --- /dev/null +++ b/src/script_opt/CPP/Tracker.h @@ -0,0 +1,97 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// C++ compiler helper class that tracks distinct instances of a given key, +// where the key can have any IntrusivePtr type. The properties of a +// tracker are that it (1) supports a notion that two technically distinct +// keys in fact reflect the same underlying object, (2) provides an +// instance of such keys to consistently serve as their "representative", +// (3) provides names (suitable for use as C++ variables) for representative +// keys, and (4) has a notion of "inheritance" (the underlying object is +// already available from a previously generated namespace). +// +// Notions of "same" are taken from hash values ala those provided by +// ProfileFunc. + +#pragma once + +#include "zeek/script_opt/CPP/HashMgr.h" + +namespace zeek::detail { + +// T is a type that has an IntrusivePtr instantiation. + +template +class CPPTracker { +public: + // The base name is used to construct key names. The mapper, + // if present, maps hash values to information about the previously + // generated scope in which the value appears. + CPPTracker(const char* _base_name, VarMapper* _mapper = nullptr) + : base_name(_base_name), mapper(_mapper) + { + } + + // True if the given key has already been entered. + bool HasKey(const T* key) const { return map.count(key) > 0; } + bool HasKey(IntrusivePtr key) const { return HasKey(key.get()); } + + // Only adds the key if it's not already present. If a hash + // is provided, then refrains from computing it. + void AddKey(IntrusivePtr key, p_hash_type h = 0); + + // Returns the (C++ variable) name associated with the given key. + std::string KeyName(const T* key); + std::string KeyName(IntrusivePtr key) + { return KeyName(key.get()); } + + // Returns all of the distinct keys entered into the tracker. + // A key is "distinct" if it's both (1) a representative and + // (2) not inherited. + const std::vector>& DistinctKeys() const + { return keys; } + + // For a given key, get its representative. + const T* GetRep(const T* key) + { ASSERT(HasKey(key)); return reps[map[key]]; } + const T* GetRep(IntrusivePtr key) { return GetRep(key.get()); } + + // True if the given key is represented by an inherited value. + bool IsInherited(const T* key) + { ASSERT(HasKey(key)); return IsInherited(map[key]); } + bool IsInherited(const IntrusivePtr& key) + { ASSERT(HasKey(key)); return IsInherited(map[key.get()]); } + bool IsInherited(p_hash_type h) { return inherited.count(h) > 0; } + + // If the given key is not inherited, logs it and its associated + // scope to the given file. + void LogIfNew(IntrusivePtr key, int scope, FILE* log_file); + +private: + // Compute a hash for the given key. + p_hash_type Hash(IntrusivePtr key) const; + + // Maps keys to internal representations (i.e., hashes). + std::unordered_map map; + + // Maps internal representations to distinct values. These + // may-or-may-not be indices into an "inherited" namespace scope. + std::unordered_map map2; + std::unordered_map scope2; // only if inherited + std::unordered_set inherited; // which are inherited + int num_non_inherited = 0; // distinct non-inherited map2 entries + + // Tracks the set of distinct keys, to facilitate iterating over them. + // Each such key also has an entry in map2. + std::vector> keys; + + // Maps internal representations back to keys. + std::unordered_map reps; + + // Used to construct key names. + std::string base_name; + + // If non-nil, the mapper to consult for previous names. + VarMapper* mapper; +}; + +} // zeek::detail diff --git a/src/script_opt/CPP/Types.cc b/src/script_opt/CPP/Types.cc new file mode 100644 index 0000000000..2499e780c3 --- /dev/null +++ b/src/script_opt/CPP/Types.cc @@ -0,0 +1,570 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include "zeek/script_opt/CPP/Compile.h" + + +namespace zeek::detail { + +bool CPPCompile::IsNativeType(const TypePtr& t) const + { + if ( ! t ) + return true; + + switch ( t->Tag() ) { + case TYPE_BOOL: + case TYPE_COUNT: + case TYPE_DOUBLE: + case TYPE_ENUM: + case TYPE_INT: + case TYPE_INTERVAL: + case TYPE_PORT: + case TYPE_TIME: + case TYPE_VOID: + return true; + + case TYPE_ADDR: + case TYPE_ANY: + case TYPE_FILE: + case TYPE_FUNC: + case TYPE_OPAQUE: + case TYPE_PATTERN: + case TYPE_RECORD: + case TYPE_STRING: + case TYPE_SUBNET: + case TYPE_TABLE: + case TYPE_TYPE: + case TYPE_VECTOR: + return false; + + case TYPE_LIST: + // These occur when initializing tables. + return false; + + default: + reporter->InternalError("bad type in CPPCompile::IsNativeType"); + } + } + +std::string CPPCompile::NativeToGT(const std::string& expr, const TypePtr& t, + GenType gt) + { + if ( gt == GEN_DONT_CARE ) + return expr; + + if ( gt == GEN_NATIVE || ! IsNativeType(t) ) + return expr; + + // Need to convert to a ValPtr. + switch ( t->Tag() ) { + case TYPE_VOID: + return expr; + + case TYPE_BOOL: + return std::string("val_mgr->Bool(") + expr + ")"; + + case TYPE_INT: + return std::string("val_mgr->Int(") + expr + ")"; + + case TYPE_COUNT: + return std::string("val_mgr->Count(") + expr + ")"; + + case TYPE_PORT: + return std::string("val_mgr->Port(") + expr + ")"; + + case TYPE_ENUM: + return std::string("make_enum__CPP(") + GenTypeName(t) + ", " + + expr + ")"; + + default: + return std::string("make_intrusive<") + IntrusiveVal(t) + + ">(" + expr + ")"; + } + } + +std::string CPPCompile::GenericValPtrToGT(const std::string& expr, + const TypePtr& t, GenType gt) + { + if ( gt != GEN_VAL_PTR && IsNativeType(t) ) + return expr + NativeAccessor(t); + else + return std::string("cast_intrusive<") + IntrusiveVal(t) + + ">(" + expr + ")"; + } + +void CPPCompile::ExpandTypeVar(const TypePtr& t) + { + auto tn = GenTypeName(t); + + switch ( t->Tag() ) { + case TYPE_LIST: + ExpandListTypeVar(t, tn); + break; + + case TYPE_RECORD: + ExpandRecordTypeVar(t, tn); + break; + + case TYPE_ENUM: + ExpandEnumTypeVar(t, tn); + break; + + case TYPE_TABLE: + ExpandTableTypeVar(t, tn); + break; + + case TYPE_FUNC: + ExpandFuncTypeVar(t, tn); + break; + + case TYPE_TYPE: + AddInit(t, tn, std::string("make_intrusive(") + + GenTypeName(t->AsTypeType()->GetType()) + ")"); + break; + + case TYPE_VECTOR: + AddInit(t, tn, std::string("make_intrusive(") + + GenTypeName(t->AsVectorType()->Yield()) + ")"); + break; + + default: + break; + } + + auto& script_type_name = t->GetName(); + if ( script_type_name.size() > 0 ) + AddInit(t, tn + "->SetName(\"" + script_type_name + "\");"); + + AddInit(t); + } + +void CPPCompile::ExpandListTypeVar(const TypePtr& t, std::string& tn) + { + auto tl = t->AsTypeList()->GetTypes(); + auto t_name = tn + "->AsTypeList()"; + + for ( auto i = 0; i < tl.size(); ++i ) + AddInit(t, t_name + "->Append(" + + GenTypeName(tl[i]) + ");"); + } + +void CPPCompile::ExpandRecordTypeVar(const TypePtr& t, std::string& tn) + { + auto r = t->AsRecordType()->Types(); + auto t_name = tn + "->AsRecordType()"; + + AddInit(t, std::string("if ( ") + t_name + "->NumFields() == 0 )"); + + AddInit(t, "{"); + AddInit(t, "type_decl_list tl;"); + + for ( auto i = 0; i < r->length(); ++i ) + { + const auto& td = (*r)[i]; + AddInit(t, GenTypeDecl(td)); + } + + AddInit(t, t_name + "->AddFieldsDirectly(tl);"); + AddInit(t, "}"); + } + +void CPPCompile::ExpandEnumTypeVar(const TypePtr& t, std::string& tn) + { + auto e_name = tn + "->AsEnumType()"; + auto et = t->AsEnumType(); + auto names = et->Names(); + + AddInit(t, "{ auto et = " + e_name + ";"); + AddInit(t, "if ( et->Names().size() == 0 ) {"); + + for ( const auto& name_pair : et->Names() ) + AddInit(t, std::string("\tet->AddNameInternal(\"") + + name_pair.first + "\", " + + Fmt(int(name_pair.second)) + ");"); + + AddInit(t, "}}"); + } + +void CPPCompile::ExpandTableTypeVar(const TypePtr& t, std::string& tn) + { + auto tbl = t->AsTableType(); + + const auto& indices = tbl->GetIndices(); + const auto& yield = tbl->Yield(); + + if ( tbl->IsSet() ) + AddInit(t, tn, + std::string("make_intrusive(cast_intrusive(") + + GenTypeName(indices) + " ), nullptr)"); + else + AddInit(t, tn, + std::string("make_intrusive(cast_intrusive(") + + GenTypeName(indices) + "), " + + GenTypeName(yield) + ")"); + } + +void CPPCompile::ExpandFuncTypeVar(const TypePtr& t, std::string& tn) + { + auto f = t->AsFuncType(); + + auto args_type_accessor = GenTypeName(f->Params()); + auto yt = f->Yield(); + + std::string yield_type_accessor; + + if ( yt ) + yield_type_accessor += GenTypeName(yt); + else + yield_type_accessor += "nullptr"; + + auto fl = f->Flavor(); + + std::string fl_name; + if ( fl == FUNC_FLAVOR_FUNCTION ) + fl_name = "FUNC_FLAVOR_FUNCTION"; + else if ( fl == FUNC_FLAVOR_EVENT ) + fl_name = "FUNC_FLAVOR_EVENT"; + else if ( fl == FUNC_FLAVOR_HOOK ) + fl_name = "FUNC_FLAVOR_HOOK"; + + auto type_init = std::string("make_intrusive(cast_intrusive(") + + args_type_accessor + "), " + + yield_type_accessor + ", " + fl_name + ")"; + + AddInit(t, tn, type_init); + } + +std::string CPPCompile::GenTypeDecl(const TypeDecl* td) + { + auto type_accessor = GenTypeName(td->type); + + auto td_name = std::string("util::copy_string(\"") + td->id + "\")"; + + if ( td->attrs ) + return std::string("tl.append(new TypeDecl(") + + td_name + ", " + type_accessor + + ", " + AttrsName(td->attrs) +"));"; + + return std::string("tl.append(new TypeDecl(") + td_name + ", " + + type_accessor +"));"; + } + +std::string CPPCompile::GenTypeName(const Type* t) + { + return types.KeyName(TypeRep(t)); + } + +const char* CPPCompile::TypeTagName(TypeTag tag) const + { + switch ( tag ) { + case TYPE_ADDR: return "TYPE_ADDR"; + case TYPE_ANY: return "TYPE_ANY"; + case TYPE_BOOL: return "TYPE_BOOL"; + case TYPE_COUNT: return "TYPE_COUNT"; + case TYPE_DOUBLE: return "TYPE_DOUBLE"; + case TYPE_ENUM: return "TYPE_ENUM"; + case TYPE_ERROR: return "TYPE_ERROR"; + case TYPE_FILE: return "TYPE_FILE"; + case TYPE_FUNC: return "TYPE_FUNC"; + case TYPE_INT: return "TYPE_INT"; + case TYPE_INTERVAL: return "TYPE_INTERVAL"; + case TYPE_OPAQUE: return "TYPE_OPAQUE"; + case TYPE_PATTERN: return "TYPE_PATTERN"; + case TYPE_PORT: return "TYPE_PORT"; + case TYPE_RECORD: return "TYPE_RECORD"; + case TYPE_STRING: return "TYPE_STRING"; + case TYPE_SUBNET: return "TYPE_SUBNET"; + case TYPE_TABLE: return "TYPE_TABLE"; + case TYPE_TIME: return "TYPE_TIME"; + case TYPE_TIMER: return "TYPE_TIMER"; + case TYPE_TYPE: return "TYPE_TYPE"; + case TYPE_VECTOR: return "TYPE_VECTOR"; + case TYPE_VOID: return "TYPE_VOID"; + + default: + reporter->InternalError("bad type in CPPCompile::TypeTagName"); + } + } + +const char* CPPCompile::TypeName(const TypePtr& t) + { + switch ( t->Tag() ) { + case TYPE_BOOL: return "bool"; + case TYPE_COUNT: return "bro_uint_t"; + case TYPE_DOUBLE: return "double"; + case TYPE_ENUM: return "int"; + case TYPE_INT: return "bro_int_t"; + case TYPE_INTERVAL: return "double"; + case TYPE_PORT: return "bro_uint_t"; + case TYPE_TIME: return "double"; + case TYPE_VOID: return "void"; + + case TYPE_ADDR: return "AddrVal"; + case TYPE_ANY: return "Val"; + case TYPE_FILE: return "FileVal"; + case TYPE_FUNC: return "FuncVal"; + case TYPE_OPAQUE: return "OpaqueVal"; + case TYPE_PATTERN: return "PatternVal"; + case TYPE_RECORD: return "RecordVal"; + case TYPE_STRING: return "StringVal"; + case TYPE_SUBNET: return "SubNetVal"; + case TYPE_TABLE: return "TableVal"; + case TYPE_TYPE: return "TypeVal"; + case TYPE_VECTOR: return "VectorVal"; + + default: + reporter->InternalError("bad type in CPPCompile::TypeName"); + } + } + +const char* CPPCompile::FullTypeName(const TypePtr& t) + { + if ( ! t ) + return "void"; + + switch ( t->Tag() ) { + case TYPE_BOOL: + case TYPE_COUNT: + case TYPE_DOUBLE: + case TYPE_ENUM: + case TYPE_INT: + case TYPE_INTERVAL: + case TYPE_PORT: + case TYPE_TIME: + case TYPE_VOID: + return TypeName(t); + + case TYPE_ADDR: return "AddrValPtr"; + case TYPE_ANY: return "ValPtr"; + case TYPE_FILE: return "FileValPtr"; + case TYPE_FUNC: return "FuncValPtr"; + case TYPE_OPAQUE: return "OpaqueValPtr"; + case TYPE_PATTERN: return "PatternValPtr"; + case TYPE_RECORD: return "RecordValPtr"; + case TYPE_STRING: return "StringValPtr"; + case TYPE_SUBNET: return "SubNetValPtr"; + case TYPE_TABLE: return "TableValPtr"; + case TYPE_TYPE: return "TypeValPtr"; + case TYPE_VECTOR: return "VectorValPtr"; + + default: + reporter->InternalError("bad type in CPPCompile::FullTypeName"); + } + } + +const char* CPPCompile::TypeType(const TypePtr& t) + { + switch ( t->Tag() ) { + case TYPE_RECORD: return "RecordType"; + case TYPE_TABLE: return "TableType"; + case TYPE_VECTOR: return "VectorType"; + + default: + reporter->InternalError("bad type in CPPCompile::TypeType"); + } + } + +void CPPCompile::RegisterType(const TypePtr& tp) + { + auto t = TypeRep(tp); + + if ( processed_types.count(t) > 0 ) + return; + + // Add the type before going further, to avoid loops due to types + // that reference each other. + processed_types.insert(t); + + switch ( t->Tag() ) { + case TYPE_ADDR: + case TYPE_ANY: + case TYPE_BOOL: + case TYPE_COUNT: + case TYPE_DOUBLE: + case TYPE_ENUM: + case TYPE_ERROR: + case TYPE_INT: + case TYPE_INTERVAL: + case TYPE_PATTERN: + case TYPE_PORT: + case TYPE_STRING: + case TYPE_TIME: + case TYPE_TIMER: + case TYPE_VOID: + case TYPE_OPAQUE: + case TYPE_SUBNET: + case TYPE_FILE: + // Nothing to do. + break; + + case TYPE_TYPE: + { + const auto& tt = t->AsTypeType()->GetType(); + NoteNonRecordInitDependency(t, tt); + RegisterType(tt); + } + break; + + case TYPE_VECTOR: + { + const auto& yield = t->AsVectorType()->Yield(); + NoteNonRecordInitDependency(t, yield); + RegisterType(yield); + } + break; + + case TYPE_LIST: + RegisterListType(tp); + break; + + case TYPE_TABLE: + RegisterTableType(tp); + break; + + case TYPE_RECORD: + RegisterRecordType(tp); + break; + + case TYPE_FUNC: + RegisterFuncType(tp); + break; + + default: + reporter->InternalError("bad type in CPPCompile::RegisterType"); + } + + AddInit(t); + + if ( ! types.IsInherited(t) ) + { + auto t_rep = types.GetRep(t); + if ( t_rep == t ) + GenPreInit(t); + else + NoteInitDependency(t, t_rep); + } + } + +void CPPCompile::RegisterListType(const TypePtr& t) + { + auto tl = t->AsTypeList()->GetTypes(); + + for ( auto i = 0; i < tl.size(); ++i ) + { + NoteNonRecordInitDependency(t, tl[i]); + RegisterType(tl[i]); + } + } + +void CPPCompile::RegisterTableType(const TypePtr& t) + { + auto tbl = t->AsTableType(); + const auto& indices = tbl->GetIndices(); + const auto& yield = tbl->Yield(); + + NoteNonRecordInitDependency(t, indices); + RegisterType(indices); + + if ( yield ) + { + NoteNonRecordInitDependency(t, yield); + RegisterType(yield); + } + } + +void CPPCompile::RegisterRecordType(const TypePtr& t) + { + auto r = t->AsRecordType()->Types(); + + for ( auto i = 0; i < r->length(); ++i ) + { + const auto& r_i = (*r)[i]; + + NoteNonRecordInitDependency(t, r_i->type); + RegisterType(r_i->type); + + if ( r_i->attrs ) + { + NoteInitDependency(t, r_i->attrs); + RegisterAttributes(r_i->attrs); + } + } + } + +void CPPCompile::RegisterFuncType(const TypePtr& t) + { + auto f = t->AsFuncType(); + + NoteInitDependency(t, TypeRep(f->Params())); + RegisterType(f->Params()); + + if ( f->Yield() ) + { + NoteNonRecordInitDependency(t, f->Yield()); + RegisterType(f->Yield()); + } + } + +const char* CPPCompile::NativeAccessor(const TypePtr& t) + { + switch ( t->Tag() ) { + case TYPE_BOOL: return "->AsBool()"; + case TYPE_COUNT: return "->AsCount()"; + case TYPE_DOUBLE: return "->AsDouble()"; + case TYPE_ENUM: return "->AsEnum()"; + case TYPE_INT: return "->AsInt()"; + case TYPE_INTERVAL: return "->AsDouble()"; + case TYPE_PORT: return "->AsCount()"; + case TYPE_TIME: return "->AsDouble()"; + + case TYPE_ADDR: return "->AsAddrVal()"; + case TYPE_FILE: return "->AsFileVal()"; + case TYPE_FUNC: return "->AsFuncVal()"; + case TYPE_OPAQUE: return "->AsOpaqueVal()"; + case TYPE_PATTERN: return "->AsPatternVal()"; + case TYPE_RECORD: return "->AsRecordVal()"; + case TYPE_STRING: return "->AsStringVal()"; + case TYPE_SUBNET: return "->AsSubNetVal()"; + case TYPE_TABLE: return "->AsTableVal()"; + case TYPE_TYPE: return "->AsTypeVal()"; + case TYPE_VECTOR: return "->AsVectorVal()"; + + case TYPE_ANY: return ".get()"; + + case TYPE_VOID: return ""; + + default: + reporter->InternalError("bad type in CPPCompile::NativeAccessor"); + } + } + +const char* CPPCompile::IntrusiveVal(const TypePtr& t) + { + switch ( t->Tag() ) { + case TYPE_BOOL: return "BoolVal"; + case TYPE_COUNT: return "CountVal"; + case TYPE_DOUBLE: return "DoubleVal"; + case TYPE_ENUM: return "EnumVal"; + case TYPE_INT: return "IntVal"; + case TYPE_INTERVAL: return "IntervalVal"; + case TYPE_PORT: return "PortVal"; + case TYPE_TIME: return "TimeVal"; + + case TYPE_ADDR: return "AddrVal"; + case TYPE_ANY: return "Val"; + case TYPE_FILE: return "FileVal"; + case TYPE_FUNC: return "FuncVal"; + case TYPE_OPAQUE: return "OpaqueVal"; + case TYPE_PATTERN: return "PatternVal"; + case TYPE_RECORD: return "RecordVal"; + case TYPE_STRING: return "StringVal"; + case TYPE_SUBNET: return "SubNetVal"; + case TYPE_TABLE: return "TableVal"; + case TYPE_TYPE: return "TypeVal"; + case TYPE_VECTOR: return "VectorVal"; + + default: + reporter->InternalError("bad type in CPPCompile::IntrusiveVal"); + } + } + +} // zeek::detail diff --git a/src/script_opt/CPP/Util.cc b/src/script_opt/CPP/Util.cc new file mode 100644 index 0000000000..2660fb78b7 --- /dev/null +++ b/src/script_opt/CPP/Util.cc @@ -0,0 +1,67 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include +#include +#include + +#include "zeek/script_opt/CPP/Util.h" + +namespace zeek::detail { + +std::string Fmt(double d) + { + // Special hack to preserve the signed-ness of the magic -0.0. + if ( d == 0.0 && signbit(d) ) + return "-0.0"; + + // Unfortunately, to_string(double) is hardwired to use %f with + // default of 6 digits precision. + char buf[8192]; + snprintf(buf, sizeof buf, "%.17g", d); + return buf; + } + +std::string scope_prefix(const std::string& scope) + { + return std::string("zeek::detail::CPP_") + scope + "::"; + } + +std::string scope_prefix(int scope) + { + return scope_prefix(std::to_string(scope)); + } + +bool is_CPP_compilable(const ProfileFunc* pf) + { + if ( pf->NumWhenStmts() > 0 ) + return false; + + if ( pf->TypeSwitches().size() > 0 ) + return false; + + return true; + } + +void lock_file(const std::string& fname, FILE* f) + { + if ( flock(fileno(f), LOCK_EX) < 0 ) + { + char buf[256]; + util::zeek_strerror_r(errno, buf, sizeof(buf)); + reporter->Error("flock failed on %s: %s", fname.c_str(), buf); + exit(1); + } + } + +void unlock_file(const std::string& fname, FILE* f) + { + if ( flock(fileno(f), LOCK_UN) < 0 ) + { + char buf[256]; + util::zeek_strerror_r(errno, buf, sizeof(buf)); + reporter->Error("un-flock failed on %s: %s", fname.c_str(), buf); + exit(1); + } + } + +} // zeek::detail diff --git a/src/script_opt/CPP/Util.h b/src/script_opt/CPP/Util.h new file mode 100644 index 0000000000..25b7a0abc6 --- /dev/null +++ b/src/script_opt/CPP/Util.h @@ -0,0 +1,30 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Utility functions for compile-to-C++ compiler. + +#pragma once + +#include "zeek/script_opt/ProfileFunc.h" + +namespace zeek::detail { + +// Conversions to strings. +inline std::string Fmt(int i) { return std::to_string(i); } +inline std::string Fmt(p_hash_type u) { return std::to_string(u) + "ULL"; } +extern std::string Fmt(double d); + +// Returns the prefix for the scoping used by the compiler. +extern std::string scope_prefix(const std::string& scope); + +// Same, but for scopes identified with numbers. +extern std::string scope_prefix(int scope); + +// True if the given function is compilable to C++. +extern bool is_CPP_compilable(const ProfileFunc* pf); + +// Helper utilities for file locking, to ensure that hash files +// don't receive conflicting writes due to concurrent compilations. +extern void lock_file(const std::string& fname, FILE* f); +extern void unlock_file(const std::string& fname, FILE* f); + +} // zeek::detail diff --git a/src/script_opt/CPP/Vars.cc b/src/script_opt/CPP/Vars.cc new file mode 100644 index 0000000000..de28f120f9 --- /dev/null +++ b/src/script_opt/CPP/Vars.cc @@ -0,0 +1,249 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include +#include +#include + +#include "zeek/script_opt/ProfileFunc.h" +#include "zeek/script_opt/CPP/Compile.h" + + +namespace zeek::detail { + +bool CPPCompile::CheckForCollisions() + { + for ( auto& g : pfs.AllGlobals() ) + { + auto gn = std::string(g->Name()); + + if ( hm.HasGlobal(gn) ) + { + // Make sure the previous compilation used the + // same type and initialization value for the global. + auto ht_orig = hm.GlobalTypeHash(gn); + auto hv_orig = hm.GlobalValHash(gn); + + auto ht = pfs.HashType(g->GetType()); + p_hash_type hv = 0; + if ( g->GetVal() ) + hv = p_hash(g->GetVal()); + + if ( ht != ht_orig || hv != hv_orig ) + { + fprintf(stderr, "%s: hash clash for global %s (%llu/%llu vs. %llu/%llu)\n", + working_dir.c_str(), gn.c_str(), + ht, hv, ht_orig, hv_orig); + fprintf(stderr, "val: %s\n", g->GetVal() ? obj_desc(g->GetVal().get()).c_str() : ""); + return true; + } + } + } + + for ( auto& t : pfs.RepTypes() ) + { + auto tag = t->Tag(); + + if ( tag != TYPE_ENUM && tag != TYPE_RECORD ) + // Other types, if inconsistent, will just not reuse + // the previously compiled version of the type. + continue; + + // We identify enum's and record's by name. Make sure that + // the name either (1) wasn't previously used, or (2) if it + // was, it was likewise for an enum or a record. + const auto& tn = t->GetName(); + if ( tn.size() == 0 || ! hm.HasGlobal(tn) ) + // No concern of collision since the type name + // wasn't previously compiled. + continue; + + if ( tag == TYPE_ENUM && hm.HasEnumTypeGlobal(tn) ) + // No inconsistency. + continue; + + if ( tag == TYPE_RECORD && hm.HasRecordTypeGlobal(tn) ) + // No inconsistency. + continue; + + fprintf(stderr, "%s: type \"%s\" collides with compiled global\n", + working_dir.c_str(), tn.c_str()); + return true; + } + + return false; + } + +void CPPCompile::CreateGlobal(const ID* g) + { + auto gn = std::string(g->Name()); + bool is_bif = pfs.BiFGlobals().count(g) > 0; + + if ( pfs.Globals().count(g) == 0 ) + { + // Only used in the context of calls. If it's compilable, + // the we'll call it directly. + if ( compilable_funcs.count(gn) > 0 ) + { + AddGlobal(gn, "zf", true); + return; + } + + if ( is_bif ) + { + AddBiF(g, false); + return; + } + } + + if ( AddGlobal(gn, "gl", true) ) + { // We'll be creating this global. + Emit("IDPtr %s;", globals[gn]); + + if ( pfs.Events().count(gn) > 0 ) + // This is an event that's also used as + // a variable. + Emit("EventHandlerPtr %s_ev;", globals[gn]); + + const auto& t = g->GetType(); + NoteInitDependency(g, TypeRep(t)); + + AddInit(g, globals[gn], + std::string("lookup_global__CPP(\"") + gn + "\", " + + GenTypeName(t) + ")"); + } + + if ( is_bif ) + // This is a BiF that's referred to in a non-call context, + // so we didn't already add it above. + AddBiF(g, true); + + global_vars.emplace(g); + } + +void CPPCompile::UpdateGlobalHashes() + { + for ( auto& g : pfs.AllGlobals() ) + { + auto gn = g->Name(); + + if ( hm.HasGlobal(gn) ) + // Not new to this compilation run. + continue; + + auto ht = pfs.HashType(g->GetType()); + + p_hash_type hv = 0; + if ( g->GetVal() ) + hv = p_hash(g->GetVal()); + + fprintf(hm.HashFile(), "global\n%s\n", gn); + fprintf(hm.HashFile(), "%llu %llu\n", ht, hv); + + // Record location information in the hash file for + // diagnostic purposes. + auto loc = g->GetLocationInfo(); + fprintf(hm.HashFile(), "%s %d\n", loc->filename, loc->first_line); + + // Flag any named record/enum types. + if ( g->IsType() ) + { + const auto& t = g->GetType(); + if ( t->Tag() == TYPE_RECORD ) + fprintf(hm.HashFile(), "record\n%s\n", gn); + else if ( t->Tag() == TYPE_ENUM ) + fprintf(hm.HashFile(), "enum\n%s\n", gn); + } + } + } + +void CPPCompile::AddBiF(const ID* b, bool is_var) + { + auto bn = b->Name(); + auto n = std::string(bn); + if ( is_var ) + n = n + "_"; // make the name distinct + + if ( AddGlobal(n, "bif", true) ) + Emit("Func* %s;", globals[n]); + + AddInit(b, globals[n], std::string("lookup_bif__CPP(\"") + bn + "\")"); + } + +bool CPPCompile::AddGlobal(const std::string& g, const char* suffix, bool track) + { + bool new_var = false; + + if ( globals.count(g) == 0 ) + { + auto gn = GlobalName(g, suffix); + + if ( hm.HasGlobalVar(gn) ) + gn = scope_prefix(hm.GlobalVarScope(gn)) + gn; + else + { + new_var = true; + + if ( track && update ) + fprintf(hm.HashFile(), "global-var\n%s\n%d\n", + gn.c_str(), addl_tag); + } + + globals.emplace(g, gn); + } + + return new_var; + } + +void CPPCompile::RegisterEvent(std::string ev_name) + { + body_events[body_name].emplace_back(std::move(ev_name)); + } + +const std::string& CPPCompile::IDNameStr(const ID* id) const + { + if ( id->IsGlobal() ) + { + auto g = std::string(id->Name()); + ASSERT(globals.count(g) > 0); + return ((CPPCompile*)(this))->globals[g]; + } + + ASSERT(locals.count(id) > 0); + + return ((CPPCompile*)(this))->locals[id]; + } + +std::string CPPCompile::LocalName(const ID* l) const + { + auto n = l->Name(); + auto without_module = strstr(n, "::"); + + if ( without_module ) + return Canonicalize(without_module + 2); + else + return Canonicalize(n); + } + +std::string CPPCompile::Canonicalize(const char* name) const + { + std::string cname; + + for ( int i = 0; name[i]; ++i ) + { + auto c = name[i]; + + // Strip <>'s - these get introduced for lambdas. + if ( c == '<' || c == '>' ) + continue; + + if ( c == ':' || c == '-' ) + c = '_'; + + cname = cname + c; + } + + // Add a trailing '_' to avoid conflicts with C++ keywords. + return cname + "_"; + } + +} // zeek::detail diff --git a/src/script_opt/CPP/bare-embedded-build b/src/script_opt/CPP/bare-embedded-build new file mode 100755 index 0000000000..ade7b3d9db --- /dev/null +++ b/src/script_opt/CPP/bare-embedded-build @@ -0,0 +1,13 @@ +#! /bin/sh + +base=../../.. +so=$base/src/script_opt/CPP +build=$base/build + +echo > $so/CPP-gen-addl.h +export -n ZEEK_USE_CPP ZEEK_ADD_CPP +export ZEEK_HASH_DIR=$build +cd $build +echo | src/zeek -b -O gen-C++ +mv ./CPP-gen-addl.h $so/CPP-gen.cc +ninja || echo Bare embedded build failed diff --git a/src/script_opt/CPP/eval-test-suite b/src/script_opt/CPP/eval-test-suite new file mode 100755 index 0000000000..a9e55ffc69 --- /dev/null +++ b/src/script_opt/CPP/eval-test-suite @@ -0,0 +1,12 @@ +#! /bin/sh + +base=../../.. +test=$base/testing/btest +so=$base/src/script_opt/CPP +build=$base/build +gen=CPP-gen-addl.h + +export -n ZEEK_ADD_CPP +cd $test +rm -rf .tmp +../../auxil/btest/btest -j -a cpp -f cpp-test.diag core diff --git a/src/script_opt/CPP/full-embedded-build b/src/script_opt/CPP/full-embedded-build new file mode 100755 index 0000000000..492791c675 --- /dev/null +++ b/src/script_opt/CPP/full-embedded-build @@ -0,0 +1,13 @@ +#! /bin/sh + +base=../../.. +so=$base/src/script_opt/CPP +build=$base/build + +echo > $so/CPP-gen-addl.h +export -n ZEEK_USE_CPP ZEEK_ADD_CPP +export ZEEK_HASH_DIR=$build +cd $build +echo | src/zeek -O gen-C++ +mv ./CPP-gen-addl.h $so/CPP-gen.cc +ninja || echo Full embedded build failed diff --git a/src/script_opt/CPP/non-embedded-build b/src/script_opt/CPP/non-embedded-build new file mode 100755 index 0000000000..7d8e7b50c5 --- /dev/null +++ b/src/script_opt/CPP/non-embedded-build @@ -0,0 +1,7 @@ +#! /bin/sh + +base=../../.. +so=$base/src/script_opt/CPP +echo > $so/CPP-gen.cc +cd $base/build +ninja || echo Non-embedded build failed diff --git a/src/script_opt/CPP/single-full-test.sh b/src/script_opt/CPP/single-full-test.sh new file mode 100755 index 0000000000..f4802230ff --- /dev/null +++ b/src/script_opt/CPP/single-full-test.sh @@ -0,0 +1,27 @@ +#! /bin/sh + +echo $1 + +base=../../.. +test=$base/testing/btest +so=$base/src/script_opt/CPP +build=$base/build +gen=CPP-gen-addl.h + +echo >$gen + +./non-embedded-build >$build/errs 2>&1 || echo non-embedded build failed + +export -n ZEEK_USE_CPP +export ZEEK_HASH_DIR=$test ZEEK_GEN_CPP= +cd $test +../../auxil/btest/btest $1 >jbuild-$1.out 2>&1 +grep -c '^namespace' $gen +mv $gen $so/CPP-gen.cc +cd $build +ninja >& errs || echo build for $1 failed + +export -n ZEEK_GEN_CPP +cd $test +rm -rf .tmp +../../auxil/btest/btest -a cpp -f cpp-test.$1.diag $1 diff --git a/src/script_opt/CPP/single-test.sh b/src/script_opt/CPP/single-test.sh new file mode 100755 index 0000000000..07a2f3622c --- /dev/null +++ b/src/script_opt/CPP/single-test.sh @@ -0,0 +1,25 @@ +#! /bin/sh + +echo $1 + +base=../../.. +test=$base/testing/btest +so=$base/src/script_opt/CPP +build=$base/build +gen=CPP-gen-addl.h + +export -n ZEEK_USE_CPP +export ZEEK_HASH_DIR=$test ZEEK_ADD_CPP= +cd $test +cp $build/CPP-hashes.dat . +echo >$gen +../../auxil/btest/btest $1 >cpp-build-$1.out 2>&1 +grep -c '^namespace' $gen +mv $gen $so +cd $build +ninja >& errs || echo build for $1 failed + +export -n ZEEK_ADD_CPP +cd $test +rm -rf .tmp +../../auxil/btest/btest -j -a cpp -f cpp-jtest.$1.diag $1 diff --git a/src/script_opt/CPP/test-suite-build b/src/script_opt/CPP/test-suite-build new file mode 100755 index 0000000000..a79ffd88bb --- /dev/null +++ b/src/script_opt/CPP/test-suite-build @@ -0,0 +1,18 @@ +#! /bin/sh + +base=../../.. +test=$base/testing/btest +so=$base/src/script_opt/CPP +build=$base/build +gen=CPP-gen-addl.h + +export -n ZEEK_USE_CPP +export ZEEK_HASH_DIR=$test ZEEK_ADD_CPP= +cd $test +cp $build/CPP-hashes.dat . +echo >$gen +../../auxil/btest/btest $1 >test-suite-build.out 2>&1 +grep -c '^namespace' $gen +mv $gen $so +cd $build +ninja >& errs || echo test suite build failed diff --git a/src/script_opt/CPP/update-single-test.sh b/src/script_opt/CPP/update-single-test.sh new file mode 100755 index 0000000000..dcdecfbb70 --- /dev/null +++ b/src/script_opt/CPP/update-single-test.sh @@ -0,0 +1,23 @@ +#! /bin/sh + +base=../../.. +test=$base/testing/btest +so=$base/src/script_opt/CPP +build=$base/build +gen=CPP-gen-addl.h + +export -n ZEEK_USE_CPP +export ZEEK_HASH_DIR=$test ZEEK_ADD_CPP= +cd $test +cp $build/CPP-hashes.dat . +echo >$gen +../../auxil/btest/btest $1 >jbuild-$1.out 2>&1 +grep -c '^namespace' $gen +mv $gen $so +cd $build +ninja >& errs || echo build for $1 failed + +export -n ZEEK_ADD_CPP +cd $test +rm -rf .tmp +../../auxil/btest/btest -U -a cpp -f cpp-test.$1.diag.update $1 diff --git a/src/script_opt/ScriptOpt.cc b/src/script_opt/ScriptOpt.cc index 457d1b8534..1995c0c7b8 100644 --- a/src/script_opt/ScriptOpt.cc +++ b/src/script_opt/ScriptOpt.cc @@ -19,6 +19,8 @@ AnalyOpt analysis_options; std::unordered_set non_recursive_funcs; +void (*CPP_init_hook)() = nullptr; + // Tracks all of the loaded functions (including event handlers and hooks). static std::vector funcs; diff --git a/src/script_opt/ScriptOpt.h b/src/script_opt/ScriptOpt.h index c887580a8d..0e5cfc98a8 100644 --- a/src/script_opt/ScriptOpt.h +++ b/src/script_opt/ScriptOpt.h @@ -118,4 +118,9 @@ extern const FuncInfo* analyze_global_stmts(Stmt* stmts); extern void analyze_scripts(); +// Used for C++-compiled scripts to signal their presence, by setting this +// to a non-empty value. +extern void (*CPP_init_hook)(); + + } // namespace zeek::detail