diff --git a/CHANGES b/CHANGES index 119126825d..22f85cc368 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,9 @@ +4.2.0-dev.385 | 2021-11-23 19:43:48 -0700 + + * Changes to speed up compilation of Compiled-to-C++ Zeek Scripts (Vern Paxson, Corelight) + + * removing unused SubNetType class (Vern Paxson, Corelight) + 4.2.0-dev.371 | 2021-11-23 19:41:10 -0700 * Add new tunnel packet analyzers, remove old ones (Tim Wojtulewicz, Corelight) diff --git a/VERSION b/VERSION index 199abb35c0..bf06f15b1f 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -4.2.0-dev.371 +4.2.0-dev.385 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a14e1cfb78..a0e2e8124b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -392,7 +392,9 @@ set(MAIN_SRCS script_opt/CPP/GenFunc.cc script_opt/CPP/HashMgr.cc script_opt/CPP/Inits.cc - script_opt/CPP/RuntimeInit.cc + script_opt/CPP/InitsInfo.cc + script_opt/CPP/RuntimeInits.cc + script_opt/CPP/RuntimeInitSupport.cc script_opt/CPP/RuntimeOps.cc script_opt/CPP/RuntimeVec.cc script_opt/CPP/Stmts.cc diff --git a/src/Options.cc b/src/Options.cc index 3b7224a212..2fa98a3b3a 100644 --- a/src/Options.cc +++ b/src/Options.cc @@ -204,8 +204,6 @@ static void print_analysis_help() fprintf(stderr, " report-uncompilable print names of functions that can't be compiled\n"); fprintf(stderr, " use-C++ use available C++ script bodies\n"); fprintf(stderr, "\n experimental options for incremental compilation:\n"); - fprintf(stderr, " add-C++ generate private C++ for any missing script bodies\n"); - fprintf(stderr, " update-C++ generate reusable C++ for any missing script bodies\n"); } static void set_analysis_option(const char* opt, Options& opts) @@ -225,9 +223,7 @@ static void set_analysis_option(const char* opt, Options& opts) exit(0); } - if ( util::streq(opt, "add-C++") ) - a_o.add_CPP = true; - else if ( util::streq(opt, "dump-uds") ) + if ( util::streq(opt, "dump-uds") ) a_o.activate = a_o.dump_uds = true; else if ( util::streq(opt, "dump-xform") ) a_o.activate = a_o.dump_xform = true; @@ -255,8 +251,6 @@ static void set_analysis_option(const char* opt, Options& opts) a_o.inliner = a_o.report_recursive = true; else if ( util::streq(opt, "report-uncompilable") ) a_o.report_uncompilable = true; - else if ( util::streq(opt, "update-C++") ) - a_o.update_CPP = true; else if ( util::streq(opt, "use-C++") ) a_o.use_CPP = true; else if ( util::streq(opt, "xform") ) diff --git a/src/Type.cc b/src/Type.cc index 9c0f22d868..4fc56f3fb5 100644 --- a/src/Type.cc +++ b/src/Type.cc @@ -123,18 +123,6 @@ RecordType* Type::AsRecordType() return (RecordType*)this; } -const SubNetType* Type::AsSubNetType() const - { - CHECK_TYPE_TAG(TYPE_SUBNET, "Type::AsSubNetType"); - return (const SubNetType*)this; - } - -SubNetType* Type::AsSubNetType() - { - CHECK_TYPE_TAG(TYPE_SUBNET, "Type::AsSubNetType"); - return (SubNetType*)this; - } - const FuncType* Type::AsFuncType() const { CHECK_TYPE_TAG(TYPE_FUNC, "Type::AsFuncType"); @@ -1447,16 +1435,6 @@ string RecordType::GetFieldDeprecationWarning(int field, bool has_check) const return ""; } -SubNetType::SubNetType() : Type(TYPE_SUBNET) { } - -void SubNetType::Describe(ODesc* d) const - { - if ( d->IsReadable() ) - d->Add("subnet"); - else - d->Add(int(Tag())); - } - FileType::FileType(TypePtr yield_type) : Type(TYPE_FILE), yield(std::move(yield_type)) { } FileType::~FileType() = default; diff --git a/src/Type.h b/src/Type.h index e81ed9eec9..cc6b6c9106 100644 --- a/src/Type.h +++ b/src/Type.h @@ -152,7 +152,6 @@ class TypeList; class TableType; class SetType; class RecordType; -class SubNetType; class FuncType; class EnumType; class VectorType; @@ -165,7 +164,6 @@ using TypeListPtr = IntrusivePtr; using TableTypePtr = IntrusivePtr; using SetTypePtr = IntrusivePtr; using RecordTypePtr = IntrusivePtr; -using SubNetTypePtr = IntrusivePtr; using FuncTypePtr = IntrusivePtr; using EnumTypePtr = IntrusivePtr; using VectorTypePtr = IntrusivePtr; @@ -226,9 +224,6 @@ public: const RecordType* AsRecordType() const; RecordType* AsRecordType(); - const SubNetType* AsSubNetType() const; - SubNetType* AsSubNetType(); - const FuncType* AsFuncType() const; FuncType* AsFuncType(); @@ -700,13 +695,6 @@ protected: type_decl_list* types; }; -class SubNetType final : public Type - { -public: - SubNetType(); - void Describe(ODesc* d) const override; - }; - class FileType final : public Type { public: diff --git a/src/script_opt/CPP/Attrs.cc b/src/script_opt/CPP/Attrs.cc index b1d12978ac..437270a7d4 100644 --- a/src/script_opt/CPP/Attrs.cc +++ b/src/script_opt/CPP/Attrs.cc @@ -7,42 +7,55 @@ namespace zeek::detail using namespace std; -void CPPCompile::RegisterAttributes(const AttributesPtr& attrs) +shared_ptr CPPCompile::RegisterAttributes(const AttributesPtr& attrs) { - if ( ! attrs || attributes.HasKey(attrs) ) - return; + if ( ! attrs ) + return nullptr; + + auto a = attrs.get(); + auto pa = processed_attrs.find(a); + + if ( pa != processed_attrs.end() ) + return pa->second; attributes.AddKey(attrs); - AddInit(attrs); - auto a_rep = attributes.GetRep(attrs); - if ( a_rep != attrs.get() ) + // The cast is just so we can make an IntrusivePtr. + auto a_rep = const_cast(attributes.GetRep(attrs)); + if ( a_rep != a ) { - NoteInitDependency(attrs.get(), a_rep); - return; + AttributesPtr a_rep_ptr = {NewRef{}, a_rep}; + processed_attrs[a] = RegisterAttributes(a_rep_ptr); + return processed_attrs[a]; } for ( const auto& a : attrs->GetAttrs() ) - { - const auto& e = a->GetExpr(); - if ( e ) - { - if ( IsSimpleInitExpr(e) ) - { - // Make sure any dependencies it has get noted. - (void)GenExpr(e, GEN_VAL_PTR); - continue; - } + (void)RegisterAttr(a); - init_exprs.AddKey(e); - AddInit(e); - NoteInitDependency(attrs, e); + shared_ptr gi = make_shared(this, attrs); + attrs_info->AddInstance(gi); + processed_attrs[a] = gi; - auto e_rep = init_exprs.GetRep(e); - if ( e_rep != e.get() ) - NoteInitDependency(e.get(), e_rep); - } - } + return gi; + } + +shared_ptr CPPCompile::RegisterAttr(const AttrPtr& attr) + { + auto a = attr.get(); + auto pa = processed_attr.find(a); + + if ( pa != processed_attr.end() ) + return pa->second; + + const auto& e = a->GetExpr(); + if ( e && ! IsSimpleInitExpr(e) ) + init_exprs.AddKey(e); + + auto gi = make_shared(this, attr); + attr_info->AddInstance(gi); + processed_attr[a] = gi; + + return gi; } void CPPCompile::BuildAttrs(const AttributesPtr& attrs, string& attr_tags, string& attr_vals) @@ -72,78 +85,9 @@ void CPPCompile::BuildAttrs(const AttributesPtr& attrs, string& attr_tags, strin attr_vals = string("{") + attr_vals + "}"; } -void CPPCompile::GenAttrs(const AttributesPtr& attrs) +const char* CPPCompile::AttrName(AttrTag t) { - NL(); - - Emit("AttributesPtr %s", AttrsName(attrs)); - - StartBlock(); - - const auto& avec = attrs->GetAttrs(); - Emit("auto attrs = std::vector();"); - - AddInit(attrs); - - for ( const auto& attr : avec ) - { - const auto& e = attr->GetExpr(); - - if ( ! e ) - { - Emit("attrs.emplace_back(make_intrusive(%s));", AttrName(attr)); - continue; - } - - NoteInitDependency(attrs, e); - AddInit(e); - - string e_arg; - if ( IsSimpleInitExpr(e) ) - e_arg = GenAttrExpr(e); - else - e_arg = InitExprName(e); - - Emit("attrs.emplace_back(make_intrusive(%s, %s));", AttrName(attr), e_arg); - } - - Emit("return make_intrusive(attrs, nullptr, true, false);"); - - EndBlock(); - } - -string CPPCompile::GenAttrExpr(const ExprPtr& e) - { - switch ( e->Tag() ) - { - case EXPR_CONST: - return string("make_intrusive(") + GenExpr(e, GEN_VAL_PTR) + ")"; - - case EXPR_NAME: - NoteInitDependency(e, e->AsNameExpr()->IdPtr()); - return string("make_intrusive(") + globals[e->AsNameExpr()->Id()->Name()] + - ")"; - - case EXPR_RECORD_COERCE: - NoteInitDependency(e, TypeRep(e->GetType())); - return string("make_intrusive(make_intrusive(" - "make_intrusive()), cast_intrusive(") + - GenTypeName(e->GetType()) + "))"; - - default: - reporter->InternalError("bad expr tag in CPPCompile::GenAttrs"); - return "###"; - } - } - -string CPPCompile::AttrsName(const AttributesPtr& a) - { - return attributes.KeyName(a) + "()"; - } - -const char* CPPCompile::AttrName(const AttrPtr& attr) - { - switch ( attr->Tag() ) + switch ( t ) { case ATTR_OPTIONAL: return "ATTR_OPTIONAL"; diff --git a/src/script_opt/CPP/Attrs.h b/src/script_opt/CPP/Attrs.h new file mode 100644 index 0000000000..8a399341e7 --- /dev/null +++ b/src/script_opt/CPP/Attrs.h @@ -0,0 +1,19 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Definitions associated with type attributes. + +#pragma once + +namespace zeek::detail + { + +enum AttrExprType + { + AE_NONE, // attribute doesn't have an expression + AE_CONST, // easy expression - a constant (ConstExpr) + AE_NAME, // easy - a global (NameExpr) + AE_RECORD, // an empty record cast to a given type + AE_CALL, // everything else - requires a lambda, essentially + }; + + } // zeek::detail diff --git a/src/script_opt/CPP/Compile.h b/src/script_opt/CPP/Compile.h index 5331d2a9a6..dbac78469b 100644 --- a/src/script_opt/CPP/Compile.h +++ b/src/script_opt/CPP/Compile.h @@ -5,18 +5,20 @@ #include "zeek/Desc.h" #include "zeek/script_opt/CPP/Func.h" #include "zeek/script_opt/CPP/HashMgr.h" +#include "zeek/script_opt/CPP/InitsInfo.h" #include "zeek/script_opt/CPP/Tracker.h" #include "zeek/script_opt/CPP/Util.h" #include "zeek/script_opt/ScriptOpt.h" // We structure the compiler for generating C++ versions of Zeek script -// bodies as a single large class. While we divide the compiler's +// bodies maily as a single large class. While we divide the compiler's // functionality into a number of groups (see below), these interact with // one another, and in particular with various member variables, enough // so that it's not clear there's benefit to further splitting the // functionality into multiple classes. (Some splitting has already been // done for more self-contained functionality, resulting in the CPPTracker -// and CPPHashManager classes.) +// and CPPHashManager classes, and initialization information in +// InitsInfo.{h,cc} and RuntimeInits.{h,cc}.) // // Most aspects of translating to C++ have a straightforward nature. // We can turn many Zeek script statements directly into the C++ that's @@ -45,26 +47,6 @@ // all of the scripts loaded in "bare" mode, plus those for foo.zeek; and // without the "-b" for all of the default scripts plus those in foo.zeek. // -// One of the design goals employed is to support "incremental" compilation, -// i.e., compiling *additional* Zeek scripts at a later point after an -// initial compilation. This comes in two forms. -// -// "-O update-C++" produces C++ code that extends that already compiled, -// in a manner where subsequent compilations can leverage both the original -// and the newly added. Such compilations *must* be done in a consistent -// context (for example, any types extended in the original are extended in -// the same manner - plus then perhaps further extensions - in the updated -// code). -// -// "-O add-C++" instead produces C++ code that (1) will not be leveraged in -// any subsequent compilations, and (2) can be inconsistent with other -// "-O add-C++" code added in the future. The main use of this feature is -// to support compiling polyglot versions of Zeek scripts used to run -// the test suite. -// -// Zeek invocations specifying "-O use-C++" will activate any code compiled -// into the zeek binary; otherwise, the code lies dormant. -// // "-O report-C++" reports on which compiled functions will/won't be used // (including ones that are available but not relevant to the scripts loaded // on the command line). This can be useful when debugging to make sure @@ -104,29 +86,41 @@ // // Emit Low-level code generation. // -// Of these, Inits is probably the most subtle. It turns out to be -// very tricky ensuring that we create run-time variables in the -// proper order. For example, a global might need a record type to be -// defined; one of the record's fields is a table; that table contains -// another record; one of that other record's fields is the original -// record (recursion); another field has an &default expression that -// requires the compiler to generate a helper function to construct -// the expression dynamically; and that helper function might in turn -// refer to other types that require initialization. +// Of these, Inits is the most subtle and complex. There are two major +// challenges in creating run-time values (such as Zeek types and constants). // -// To deal with these dependencies, for every run-time object the compiler -// maintains (1) all of the other run-time objects on which its initialization -// depends, and (2) the C++ statements needed to initialize it, once those -// other objects have been initialized. It then beings initialization with -// objects that have no dependencies, marks those as done (essentially), finds -// objects that now can be initialized and emits their initializations, -// marks those as done, etc. +// First, generating individual code for creating each of these winds up +// incurring unacceptable compile times (for example, clang compiling all +// of the base scripts with optimization takes many hours on a high-end +// laptop). As a result, we employ a table-driven approach that compiles +// much faster (though still taking many minutes on the same high-end laptop, +// running about 40x faster however). // -// Below in declaring the CPPCompiler class, we group methods in accordance -// with those listed above. We also locate member variables with the group -// most relevant for their usage. However, keep in mind that many member -// variables are used by multiple groups, which is why we haven't created -// distinct per-group classes. +// Second, initializations frequently rely upon *other* initializations +// having occurred first. For example, a global might need a record type +// to be defined; one of the record's fields is a table; that table contains +// another record; one of that other record's fields is the original record +// (recursion); another field has an &default expression that requires the +// compiler to generate a helper function to construct the expression +// dynamically; and that helper function might in turn refer to other types +// that require initialization. What's required is a framework for ensuring +// that everything occurs in the proper order. +// +// The logic for dealing with these complexities is isolated into several +// sets of classes. InitsInfo.{h,cc} provides the classes related to tracking +// how to generate initializations in the proper order. RuntimeInits.{h,cc} +// provides the classes used when initialization generated code in order +// to instantiate all of the necessary values. See those files for discussions +// on how they address the points framed above. +// +// In declaring the CPPCompiler class, we group methods in accordance with +// those listed above, locating member variables with the group most relevant +// for their usage. However, keep in mind that many member variables are +// used by multiple groups, which is why we haven't created distinct +// per-group classes. In addition, we make a number of methods public +// in order to avoid the need for numerous "friend" declarations to allow +// associated classes (like those for initialization) access to a the +// necessary compiler methods. namespace zeek::detail { @@ -135,10 +129,128 @@ class CPPCompile { public: CPPCompile(std::vector& _funcs, ProfileFuncs& pfs, const std::string& gen_name, - const std::string& addl_name, CPPHashManager& _hm, bool _update, bool _standalone, + const std::string& addl_name, CPPHashManager& _hm, bool _standalone, bool report_uncompilable); ~CPPCompile(); + // Constructing a CPPCompile object does all of the compilation. + // The public methods here are for use by helper classes. + + // Tracks the given type (with support methods for ones that + // are complicated), recursively including its sub-types, and + // creating initializations for constructing C++ variables + // representing the types. + // + // Returns the initialization info associated with the type. + std::shared_ptr RegisterType(const TypePtr& t); + + // Easy access to the global offset and the initialization + // cohort associated with a given type. + int TypeOffset(const TypePtr& t) { return GI_Offset(RegisterType(t)); } + int TypeCohort(const TypePtr& t) { return GI_Cohort(RegisterType(t)); } + + // Tracks a Zeek ValPtr used as a constant value. These occur + // in two contexts: directly as constant expressions, and indirectly + // as elements within aggregate constants (such as in vector + // initializers). + // + // Returns the associated initialization info. In addition, + // consts_offset returns an offset into an initialization-time + // global that tracks all constructed globals, providing + // general access to them for aggregate constants. + std::shared_ptr RegisterConstant(const ValPtr& vp, int& consts_offset); + + // Tracks a global to generate the necessary initialization. + // Returns the associated initialization info. + std::shared_ptr RegisterGlobal(const ID* g); + + // Tracks a use of the given set of attributes, including + // initialization dependencies and the generation of any + // associated expressions. + // + // Returns the initialization info associated with the set of + // attributes. + std::shared_ptr RegisterAttributes(const AttributesPtr& attrs); + + // Convenient access to the global offset associated with + // a set of Attributes. + int AttributesOffset(const AttributesPtr& attrs) + { + return GI_Offset(RegisterAttributes(attrs)); + } + + // The same, for a single attribute. + std::shared_ptr RegisterAttr(const AttrPtr& attr); + int AttrOffset(const AttrPtr& attr) { return GI_Offset(RegisterAttr(attr)); } + + // Returns a mapping of from Attr objects to their associated + // initialization information. The Attr must have previously + // been registered. + auto& ProcessedAttr() const { return processed_attr; } + + // True if the given expression is simple enough that we can + // generate code to evaluate it directly, and don't need to + // create a separate function per RegisterInitExpr() to track it. + static bool IsSimpleInitExpr(const ExprPtr& e); + + // Tracks expressions used in attributes (such as &default=). + // + // We need to generate code to evaluate these, via CallExpr's + // that invoke functions that return the value of the expression. + // However, we can't generate that code when first encountering + // the attribute, because doing so will need to refer to the names + // of types, and initially those are unavailable (because the type's + // representatives, per pfs.RepTypes(), might not have yet been + // tracked). So instead we track the associated CallExprInitInfo + // objects, and after all types have been tracked, then spin + // through them to generate the code. + // + // Returns the associated initialization information. + std::shared_ptr RegisterInitExpr(const ExprPtr& e); + + // Tracks a C++ string value needed for initialization. Returns + // an offset into the global vector that will hold these. + int TrackString(std::string s) + { + auto ts = tracked_strings.find(s); + if ( ts != tracked_strings.end() ) + return ts->second; + + int offset = ordered_tracked_strings.size(); + tracked_strings[s] = offset; + ordered_tracked_strings.emplace_back(s); + + return offset; + } + + // Tracks a profile hash value needed for initialization. Returns + // an offset into the global vector that will hold these. + int TrackHash(p_hash_type h) + { + auto th = tracked_hashes.find(h); + if ( th != tracked_hashes.end() ) + return th->second; + + int offset = ordered_tracked_hashes.size(); + tracked_hashes[h] = offset; + ordered_tracked_hashes.emplace_back(h); + + return offset; + } + + // Returns the hash associated with a given function body. + // It's a fatal error to call this for a body that hasn't + // been compiled. + p_hash_type BodyHash(const Stmt* body); + + // Returns true if at least one of the function bodies associated + // with the function/hook/event handler of the given fname is + // not compilable. + bool NotFullyCompilable(const std::string& fname) const + { + return not_fully_compilable.count(fname) > 0; + } + private: // Start of methods related to driving the overall compilation // process. @@ -148,6 +260,37 @@ private: // Main driver, invoked by constructor. void Compile(bool report_uncompilable); + // The following methods all create objects that track the + // initializations of a given type of value. In each, "tag" + // is the name used to identify the initializer global + // associated with the given type of value, and "type" is + // its C++ representation. Often "tag" is concatenated with + // "type" to designate a specific C++ type. For example, + // "tag" might be "Double" and "type" might be "ValPtr"; + // the resulting global's type is "DoubleValPtr". + + // Creates an object for tracking values associated with Zeek + // constants. "c_type" is the C++ type used in the initializer + // for each object; or, if empty, it specifies that we represent + // the value using an index into a separate vector that holds + // the constant. + std::shared_ptr CreateConstInitInfo(const char* tag, const char* type, + const char* c_type); + + // Creates an object for tracking compound initializers, which + // are whose initialization uses indexes into other vectors. + std::shared_ptr CreateCompoundInitInfo(const char* tag, const char* type); + + // Creates an object for tracking initializers that have custom + // C++ objects to hold their initialization information. + std::shared_ptr CreateCustomInitInfo(const char* tag, const char* type); + + // Generates the declaration associated with a set of initializations + // and tracks the object to facilitate looping over all so + // initializations. As a convenience, returns the object. + std::shared_ptr RegisterInitInfo(const char* tag, const char* type, + std::shared_ptr gi); + // Generate the beginning of the compiled code: run-time functions, // namespace, auxiliary globals. void GenProlog(); @@ -158,7 +301,7 @@ private: void RegisterCompiledBody(const std::string& f); // After compilation, generate the final code. Most of this is - // run-time initialization of various dynamic values. + // in support of run-time initialization of various dynamic values. void GenEpilog(); // True if the given function (plus body and profile) is one @@ -185,9 +328,13 @@ private: // it including some functionality we don't currently support // for compilation. // - // Indexed by the name of the function. + // Indexed by the C++ name of the function. std::unordered_set compilable_funcs; + // Tracks which functions/hooks/events have at least one non-compilable + // body. Indexed by the Zeek name of function. + std::unordered_set not_fully_compilable; + // Maps functions (not hooks or events) to upstream compiled names. std::unordered_map hashed_funcs; @@ -200,10 +347,6 @@ private: // compilation units. int addl_tag = 0; - // If true, then we're updating the C++ base (i.e., generating - // code meant for use by subsequently generated code). - bool update = false; - // If true, the generated code should run "standalone". bool standalone = false; @@ -211,7 +354,7 @@ private: // needed for "seatbelts", to ensure that we can produce a // unique hash relating to this compilation (*and* its // compilation time, which is why these are "seatbelts" and - // likely not important to make distinct. + // likely not important to make distinct). p_hash_type total_hash = 0; // Working directory in which we're compiling. Used to quasi-locate @@ -236,11 +379,6 @@ private: // track it as such. void CreateGlobal(const ID* g); - // For the globals used in the compilation, if new then append - // them to the hash file to make the information available - // to subsequent compilation runs. - void UpdateGlobalHashes(); - // Register the given identifier as a BiF. If is_var is true // then the BiF is also used in a non-call context. void AddBiF(const ID* b, bool is_var); @@ -258,10 +396,9 @@ private: // The following match various forms of identifiers to the // name used for their C++ equivalent. - const char* IDName(const ID& id) { return IDName(&id); } const char* IDName(const IDPtr& id) { return IDName(id.get()); } const char* IDName(const ID* id) { return IDNameStr(id).c_str(); } - const std::string& IDNameStr(const ID* id) const; + const std::string& IDNameStr(const ID* id); // Returns a canonicalized version of a variant of a global made // distinct by the given suffix. @@ -280,12 +417,20 @@ private: // conflict with C++ keywords. std::string Canonicalize(const char* name) const; + // Returns the name of the global corresponding to an expression + // (which must be a EXPR_NAME). + std::string GlobalName(const ExprPtr& e) { return globals[e->AsNameExpr()->Id()->Name()]; } + // Maps global names (not identifiers) to the names we use for them. std::unordered_map globals; // Similar for locals, for the function currently being compiled. std::unordered_map locals; + // Retrieves the initialization information associated with the + // given global. + std::unordered_map> global_gis; + // Maps event names to the names we use for them. std::unordered_map events; @@ -307,14 +452,37 @@ private: // Similar, but for lambdas. void DeclareLambda(const LambdaExpr* l, const ProfileFunc* pf); - // Declares the CPPStmt subclass used for compiling the given + // Generates code to declare the compiled version of a script // function. "ft" gives the functions type, "pf" its profile, // "fname" its C++ name, "body" its AST, "l" if non-nil its // corresponding lambda expression, and "flavor" whether it's // a hook/event/function. + // + // We use two basic approaches. Most functions are represented + // by a "CPPDynStmt" object that's parameterized by a void* pointer + // to the underlying C++ function and an index used to dynamically + // cast the pointer to having the correct type for then calling it. + // Lambdas, however (including "implicit" lambdas used to associate + // complex expressions with &attributes), each have a unique + // subclass derived from CPPStmt that calls the underlying C++ + // function without requiring a cast, and that holds the values + // of the lambda's captures. + // + // It would be cleanest to use the latter approach for all functions, + // but the hundreds/thousands of additional classes required for + // doing so significantly slows down C++ compilation, so we instead + // opt for the uglier dynamic casting approach, which only requires + // one additional class. + void CreateFunction(const FuncTypePtr& ft, const ProfileFunc* pf, const std::string& fname, + const StmtPtr& body, int priority, const LambdaExpr* l, + FunctionFlavor flavor); + + // Used for the case of creating a custom subclass of CPPStmt. void DeclareSubclass(const FuncTypePtr& ft, const ProfileFunc* pf, const std::string& fname, - const StmtPtr& body, int priority, const LambdaExpr* l, - FunctionFlavor flavor); + const std::string& args, const IDPList* lambda_ids); + + // Used for the case of employing an instance of a CPPDynStmt object. + void DeclareDynCPPStmt(); // Generates the declarations (and in-line definitions) associated // with compiling a lambda. @@ -331,11 +499,40 @@ private: // the given type, lambda captures (if non-nil), and profile. std::string ParamDecl(const FuncTypePtr& ft, const IDPList* lambda_ids, const ProfileFunc* pf); + // Returns in p_types the types associated with the parameters for a function + // of the given type, set of lambda captures (if any), and profile. + void GatherParamTypes(std::vector& p_types, const FuncTypePtr& ft, + const IDPList* lambda_ids, const ProfileFunc* pf); + + // Same, but instead returns the parameter's names. + void GatherParamNames(std::vector& p_names, const FuncTypePtr& ft, + const IDPList* lambda_ids, const ProfileFunc* pf); + // Inspects the given profile to find the i'th parameter (starting // at 0). Returns nil if the profile indicates that that parameter // is not used by the function. const ID* FindParam(int i, const ProfileFunc* pf); + // Information associated with a CPPDynStmt dynamic dispatch. + struct DispatchInfo + { + std::string cast; // C++ cast to use for function pointer + std::string args; // arguments to pass to the function + bool is_hook; // whether the function is a hook + TypePtr yield; // what type the function returns, if any + }; + + // An array of cast/invocation pairs used to generate the CPPDynStmt + // Exec method. + std::vector func_casting_glue; + + // Maps casting strings to indices into func_casting_glue. The index + // is what's used to dynamically switch to the right dispatch. + std::unordered_map casting_index; + + // Maps functions (using their C++ name) to their casting strings. + std::unordered_map func_index; + // Names for lambda capture ID's. These require a separate space // that incorporates the lambda's name, to deal with nested lambda's // that refer to the identifiers with the same name. @@ -344,7 +541,7 @@ private: // The function's parameters. Tracked so we don't re-declare them. std::unordered_set params; - // Whether we're parsing a hook. + // Whether we're compiling a hook. bool in_hook = false; // @@ -362,8 +559,12 @@ private: void CompileLambda(const LambdaExpr* l, const ProfileFunc* pf); // Generates the body of the Invoke() method (which supplies the - // "glue" between for calling the C++-generated code). - void GenInvokeBody(const std::string& fname, const TypePtr& t, const std::string& args); + // "glue" for calling the C++-generated code, for CPPStmt subclasses). + void GenInvokeBody(const std::string& fname, const TypePtr& t, const std::string& args) + { + GenInvokeBody(fname + "(" + args + ")", t); + } + void GenInvokeBody(const std::string& call, const TypePtr& t); // Generates the code for the body of a script function with // the given type, profile, C++ name, AST, lambda captures @@ -405,9 +606,6 @@ private: // Maps function bodies to the names we use for them. std::unordered_map body_names; - // Reverse mapping. - std::unordered_map names_to_bodies; - // Maps function names to hashes of bodies. std::unordered_map body_hashes; @@ -426,62 +624,84 @@ private: // // End of methods related to generating compiled script bodies. - // Start of methods related to generating code for representing - // script constants as run-time values. - // See Consts.cc for definitions. - // + // Methods related to generating code for representing script constants + // as run-time values. There's only one nontrivial one of these, + // RegisterConstant() (declared above, as it's public). All the other + // work is done by secondary objects - see InitsInfo.{h,cc} for those. - // Returns an instantiation of a constant - either as a native - // C++ constant, or as a C++ variable that will be bound to - // a Zeek value at run-time initialization - that is needed - // by the given "parent" object (which acquires an initialization - // dependency, if a C++ variable is needed). - std::string BuildConstant(IntrusivePtr parent, const ValPtr& vp) - { - return BuildConstant(parent.get(), vp); - } - std::string BuildConstant(const Obj* parent, const ValPtr& vp); + // Returns the object used to track indices (vectors of integers + // that are used to index various other vectors, including other + // indices). Only used by CPP_InitsInfo objects, but stored + // in the CPPCompile object to make it available across different + // CPP_InitsInfo objects. - // Called to create a constant appropriate for the given expression - // or, more directly, the given value. The second method returns - // "true" if a C++ variable needed to be created to construct the - // constant at run-time initialization, false if can be instantiated - // directly as a C++ constant. - void AddConstant(const ConstExpr* c); - bool AddConstant(const ValPtr& v); - - // Build particular types of C++ variables (with the given name) - // to hold constants initialized at run-time. - void AddStringConstant(const ValPtr& v, std::string& const_name); - void AddPatternConstant(const ValPtr& v, std::string& const_name); - void AddListConstant(const ValPtr& v, std::string& const_name); - void AddRecordConstant(const ValPtr& v, std::string& const_name); - void AddTableConstant(const ValPtr& v, std::string& const_name); - void AddVectorConstant(const ValPtr& v, std::string& const_name); + friend class CPP_InitsInfo; + IndicesManager& IndMgr() { return indices_mgr; } // Maps (non-native) constants to associated C++ globals. std::unordered_map const_exprs; - // Maps the values of (non-native) constants to associated C++ globals. - std::unordered_map const_vals; + // Maps the values of (non-native) constants to associated initializer + // information. + std::unordered_map> const_vals; + + // Same, but for the offset into the vector that tracks all constants + // collectively (to support initialization of compound constants). + std::unordered_map const_offsets; + + // The same as the above pair, but indexed by the string representation + // rather than the Val*. The reason for having both is to enable + // reusing common constants even though their Val*'s differ. + std::unordered_map> constants; + std::unordered_map constants_offsets; // Used for memory management associated with const_vals's index. std::vector cv_indices; - // Maps string representations of (non-native) constants to - // associated C++ globals. - std::unordered_map constants; + // For different types of constants (as indicated by TypeTag), + // provides the associated object that manages the initializers + // for those constants. + std::unordered_map> const_info; - // Maps the same representations to the Val* associated with their - // original creation. This enables us to construct initialization - // dependencies for later Val*'s that are able to reuse the same - // constant. - std::unordered_map constants_to_vals; + // Tracks entries for constructing the vector of all constants + // (regardless of type). Each entry provides a TypeTag, used + // to identify the type-specific vector for a given constant, + // and the offset into that vector. + std::vector> consts; - // Function variables that we need to create dynamically for - // initializing globals, coupled with the name of their associated - // constant. - std::unordered_map func_vars; + // The following objects track initialization information for + // different types of initializers: Zeek types, individual + // attributes, sets of attributes, expressions that call script + // functions (for attribute expressions), registering lambda + // bodies, and registering Zeek globals. + std::shared_ptr type_info; + std::shared_ptr attr_info; + std::shared_ptr attrs_info; + std::shared_ptr call_exprs_info; + std::shared_ptr lambda_reg_info; + std::shared_ptr global_id_info; + + // Tracks all of the above objects (as well as each entry in + // const_info), to facilitate easy iterating over them. + std::set> all_global_info; + + // Tracks the attribute expressions for which we need to generate + // function calls to evaluate them. + std::unordered_map> init_infos; + + // See IndMgr() above for the role of this variable. + IndicesManager indices_mgr; + + // Maps strings to associated offsets. + std::unordered_map tracked_strings; + + // Tracks strings we've registered in order (corresponding to + // their offsets). + std::vector ordered_tracked_strings; + + // The same as the previous two, but for profile hashes. + std::vector ordered_tracked_hashes; + std::unordered_map tracked_hashes; // // End of methods related to generating code for script constants. @@ -649,9 +869,9 @@ private: // not the outer map). int num_rf_mappings = 0; - // For each entry in "field_mapping", the record and TypeDecl - // associated with the mapping. - std::vector> field_decls; + // For each entry in "field_mapping", the record (as a global + // offset) and TypeDecl associated with the mapping. + std::vector> field_decls; // For enums that are extended via redef's, maps each distinct // value (that the compiled scripts refer to) to locations in the @@ -665,9 +885,9 @@ private: // not the outer map). int num_ev_mappings = 0; - // For each entry in "enum_mapping", the record and name - // associated with the mapping. - std::vector> enum_names; + // For each entry in "enum_mapping", the EnumType (as a global + // offset) and name associated with the mapping. + std::vector> enum_names; // // End of methods related to generating code for AST Expr's. @@ -690,24 +910,6 @@ private: // given script type 't', converts it as needed to the given GenType. std::string GenericValPtrToGT(const std::string& expr, const TypePtr& t, GenType gt); - // For a given type, generates the code necessary to initialize - // it at run time. The term "expand" in the method's name refers - // to the fact that the type has already been previously declared - // (necessary to facilitate defining recursive types), so this method - // generates the "meat" of the type but not its original declaration. - void ExpandTypeVar(const TypePtr& t); - - // Methods for expanding specific such types. "tn" is the name - // of the C++ variable used for the particular type. - void ExpandListTypeVar(const TypePtr& t, std::string& tn); - void ExpandRecordTypeVar(const TypePtr& t, std::string& tn); - void ExpandEnumTypeVar(const TypePtr& t, std::string& tn); - void ExpandTableTypeVar(const TypePtr& t, std::string& tn); - void ExpandFuncTypeVar(const TypePtr& t, std::string& tn); - - // The following assumes we're populating a type_decl_list called "tl". - std::string GenTypeDecl(const TypeDecl* td); - // Returns the name of a C++ variable that will hold a TypePtr // of the appropriate flavor. 't' does not need to be a type // representative. @@ -721,21 +923,11 @@ private: const Type* TypeRep(const TypePtr& t) { return TypeRep(t.get()); } // Low-level C++ representations for types, of various flavors. - const char* TypeTagName(TypeTag tag) const; + static const char* TypeTagName(TypeTag tag); const char* TypeName(const TypePtr& t); const char* FullTypeName(const TypePtr& t); const char* TypeType(const TypePtr& t); - // Track the given type (with support methods for onces that - // are complicated), recursively including its sub-types, and - // creating initializations (and dependencies) for constructing - // C++ variables representing the types. - void RegisterType(const TypePtr& t); - void RegisterListType(const TypePtr& t); - void RegisterTableType(const TypePtr& t); - void RegisterRecordType(const TypePtr& t); - void RegisterFuncType(const TypePtr& t); - // Access to a type's underlying values. const char* NativeAccessor(const TypePtr& t); @@ -744,11 +936,13 @@ private: const char* IntrusiveVal(const TypePtr& t); // Maps types to indices in the global "types__CPP" array. - CPPTracker types = {"types", &compiled_items}; + CPPTracker types = {"types", true, &compiled_items}; // Used to prevent analysis of mutually-referring types from - // leading to infinite recursion. - std::unordered_set processed_types; + // leading to infinite recursion. Maps types to their global + // initialization information (or, initially, to nullptr, if + // they're in the process of being registered). + std::unordered_map> processed_types; // // End of methods related to managing script types. @@ -758,11 +952,6 @@ private: // See Attrs.cc for definitions. // - // Tracks a use of the given set of attributes, including - // initialization dependencies and the generation of any - // associated expressions. - void RegisterAttributes(const AttributesPtr& attrs); - // Populates the 2nd and 3rd arguments with C++ representations // of the tags and (optional) values/expressions associated with // the set of attributes. @@ -772,16 +961,17 @@ private: void GenAttrs(const AttributesPtr& attrs); std::string GenAttrExpr(const ExprPtr& e); - // Returns the name of the C++ variable that will hold the given - // attributes at run-time. - std::string AttrsName(const AttributesPtr& attrs); - // Returns a string representation of the name associated with - // different attributes (e.g., "ATTR_DEFAULT"). - const char* AttrName(const AttrPtr& attr); + // different attribute tags (e.g., "ATTR_DEFAULT"). + static const char* AttrName(AttrTag t); // Similar for attributes, so we can reconstruct record types. - CPPTracker attributes = {"attrs", &compiled_items}; + CPPTracker attributes = {"attrs", false, &compiled_items}; + + // Maps Attributes and Attr's to their global initialization + // information. + std::unordered_map> processed_attrs; + std::unordered_map> processed_attr; // // End of methods related to managing script type attributes. @@ -790,121 +980,42 @@ private: // See Inits.cc for definitions. // - // Generates code to construct a CallExpr that can be used to - // evaluate the expression 'e' as an initializer (typically - // for a record &default attribute). - void GenInitExpr(const ExprPtr& e); - - // True if the given expression is simple enough that we can - // generate code to evaluate it directly, and don't need to - // create a separate function per GenInitExpr(). - bool IsSimpleInitExpr(const ExprPtr& e) const; + // Generates code for dynamically generating an expression + // associated with an attribute, via a function call. + void GenInitExpr(std::shared_ptr ce_init); // Returns the name of a function used to evaluate an // initialization expression. std::string InitExprName(const ExprPtr& e); - // Generates code to initializes the global 'g' (with C++ name "gl") - // to the given value *if* on start-up it doesn't already have a value. - void GenGlobalInit(const ID* g, std::string& gl, const ValPtr& v); - - // Generates code to initialize all of the function-valued globals - // (i.e., those pointing to lambdas). - void GenFuncVarInits(); - - // Generates the "pre-initialization" for a given type. For - // extensible types (records, enums, lists), these are empty - // versions that we'll later populate. - void GenPreInit(const Type* t); - - // Generates a function that executes the pre-initializations. - void GenPreInits(); - - // The following all track that for a given object, code associated - // with initializing it. Multiple calls for the same object append - // additional lines of code (the order of the calls is preserved). - // - // Versions with "lhs" and "rhs" arguments provide an initialization - // of the form "lhs = rhs;", as a convenience. - void AddInit(const IntrusivePtr& o, const std::string& lhs, const std::string& rhs) + // Convenience functions for return the offset or initialization cohort + // associated with an initialization. + int GI_Offset(const std::shared_ptr& gi) const { return gi ? gi->Offset() : -1; } + int GI_Cohort(const std::shared_ptr& gi) const { - AddInit(o.get(), lhs + " = " + rhs + ";"); - } - void AddInit(const Obj* o, const std::string& lhs, const std::string& rhs) - { - AddInit(o, lhs + " = " + rhs + ";"); - } - void AddInit(const IntrusivePtr& o, const std::string& init) { AddInit(o.get(), init); } - void AddInit(const Obj* o, const std::string& init); - - // We do consistency checking of initialization dependencies by - // looking for depended-on objects have initializations. Sometimes - // it's unclear whether the object will actually require - // initialization, in which case we add an empty initialization - // for it so that the consistency-checking is happy. - void AddInit(const IntrusivePtr& o) { AddInit(o.get()); } - void AddInit(const Obj* o); - - // This is akin to an initialization, but done separately - // (upon "activation") so it can include initializations that - // rely on parsing having finished (in particular, BiFs having - // been registered). Only used when generating standalone code. - void AddActivation(std::string a) { activations.emplace_back(a); } - - // Records the fact that the initialization of object o1 depends - // on that of object o2. - void NoteInitDependency(const IntrusivePtr& o1, const IntrusivePtr& o2) - { - NoteInitDependency(o1.get(), o2.get()); - } - void NoteInitDependency(const IntrusivePtr& o1, const Obj* o2) - { - NoteInitDependency(o1.get(), o2); - } - void NoteInitDependency(const Obj* o1, const IntrusivePtr& o2) - { - NoteInitDependency(o1, o2.get()); - } - void NoteInitDependency(const Obj* o1, const Obj* o2); - - // Records an initialization dependency of the given object - // on the given type, unless the type is a record. We need - // this notion to protect against circular dependencies in - // the face of recursive records. - void NoteNonRecordInitDependency(const Obj* o, const TypePtr& t) - { - if ( t && t->Tag() != TYPE_RECORD ) - NoteInitDependency(o, TypeRep(t)); - } - void NoteNonRecordInitDependency(const IntrusivePtr o, const TypePtr& t) - { - NoteNonRecordInitDependency(o.get(), t); + return gi ? gi->InitCohort() : 0; } - // Analyzes the initialization dependencies to ensure that they're - // consistent, i.e., every object that either depends on another, - // or is itself depended on, appears in the "to_do" set. - void CheckInitConsistency(std::unordered_set& to_do); - - // Generate initializations for the items in the "to_do" set, - // in accordance with their dependencies. Returns 'n', the - // number of initialization functions generated. They should - // be called in order, from 1 to n. - int GenDependentInits(std::unordered_set& to_do); - - // Generates a function for initializing the nc'th cohort. - void GenInitCohort(int nc, std::unordered_set& cohort); - - // Initialize the mappings for record field offsets for field - // accesses into regions of records that can be extensible (and - // thus can vary at run-time to the offsets encountered during - // compilation). + // Generate code to initialize the mappings for record field + // offsets for field accesses into regions of records that + // can be extensible (and thus can vary at run-time to the + // offsets encountered during compilation). void InitializeFieldMappings(); - // Same, but for enum types. The second form does a single - // initialization corresponding to the given index in the mapping. + // Same, but for enum types. void InitializeEnumMappings(); - void InitializeEnumMappings(const EnumType* et, const std::string& e_name, int index); + + // Generate code to initialize BiFs. + void InitializeBiFs(); + + // Generate code to initialize strings that we track. + void InitializeStrings(); + + // Generate code to initialize hashes that we track. + void InitializeHashes(); + + // Generate code to initialize indirect references to constants. + void InitializeConsts(); // Generate the initialization hook for this set of compiled code. void GenInitHook(); @@ -917,25 +1028,15 @@ private: // what we compiled. void GenLoad(); - // A list of pre-initializations (those potentially required by - // other initializations, and that themselves have no dependencies). - std::vector pre_inits; - - // A list of "activations" (essentially, post-initializations). - // See AddActivation() above. - std::vector activations; + // A list of BiFs to look up during initialization. First + // string is the name of the C++ global holding the BiF, the + // second is its name as known to Zeek. + std::unordered_map BiFs; // Expressions for which we need to generate initialization-time // code. Currently, these are only expressions appearing in // attributes. - CPPTracker init_exprs = {"gen_init_expr", &compiled_items}; - - // Maps an object requiring initialization to its initializers. - std::unordered_map> obj_inits; - - // Maps an object requiring initializations to its dependencies - // on other such objects. - std::unordered_map> obj_deps; + CPPTracker init_exprs = {"gen_init_expr", false, &compiled_items}; // // End of methods related to run-time initialization. @@ -944,12 +1045,20 @@ private: // See Emit.cc for definitions. // + // The following all need to be able to emit code. + friend class CPP_BasicConstInitsInfo; + friend class CPP_CompoundInitsInfo; + friend class IndicesManager; + // Used to create (indented) C++ {...} code blocks. "needs_semi" // controls whether to terminate the block with a ';' (such as // for class definitions. void StartBlock(); void EndBlock(bool needs_semi = false); + void IndentUp() { ++block_level; } + void IndentDown() { --block_level; } + // Various ways of generating code. The multi-argument methods // assume that the first argument is a printf-style format // (but one that can only have %s specifiers). @@ -960,11 +1069,12 @@ private: NL(); } - void Emit(const std::string& fmt, const std::string& arg) const + void Emit(const std::string& fmt, const std::string& arg, bool do_NL = true) const { Indent(); fprintf(write_file, fmt.c_str(), arg.c_str()); - NL(); + if ( do_NL ) + NL(); } void Emit(const std::string& fmt, const std::string& arg1, const std::string& arg2) const @@ -999,14 +1109,15 @@ private: NL(); } - // Returns an expression for constructing a Zeek String object - // corresponding to the given byte array. - std::string GenString(const char* b, int len) const; - - // For the given byte array / string, returns a version expanded - // with escape sequences in order to represent it as a C++ string. - std::string CPPEscape(const char* b, int len) const; - std::string CPPEscape(const char* s) const { return CPPEscape(s, strlen(s)); } + void Emit(const std::string& fmt, const std::string& arg1, const std::string& arg2, + const std::string& arg3, const std::string& arg4, const std::string& arg5, + const std::string& arg6) const + { + Indent(); + fprintf(write_file, fmt.c_str(), arg1.c_str(), arg2.c_str(), arg3.c_str(), arg4.c_str(), + arg5.c_str(), arg6.c_str()); + NL(); + } void NL() const { fputc('\n', write_file); } diff --git a/src/script_opt/CPP/Consts.cc b/src/script_opt/CPP/Consts.cc index c21a1db9b8..c53f5b0395 100644 --- a/src/script_opt/CPP/Consts.cc +++ b/src/script_opt/CPP/Consts.cc @@ -4,55 +4,27 @@ #include "zeek/RE.h" #include "zeek/script_opt/CPP/Compile.h" +using namespace std; + namespace zeek::detail { -using namespace std; - -string CPPCompile::BuildConstant(const Obj* parent, const ValPtr& vp) +shared_ptr CPPCompile::RegisterConstant(const ValPtr& vp, int& consts_offset) { - if ( ! vp ) - return "nullptr"; + // Make sure the value pointer, which might be transient + // in construction, sticks around so we can track its + // value. + cv_indices.push_back(vp); - if ( AddConstant(vp) ) - { - auto v = vp.get(); - AddInit(parent); - NoteInitDependency(parent, v); - - // Make sure the value pointer, which might be transient - // in construction, sticks around so we can track its - // value. - cv_indices.push_back(vp); - - return const_vals[v]; - } - else - return NativeToGT(GenVal(vp), vp->GetType(), GEN_VAL_PTR); - } - -void CPPCompile::AddConstant(const ConstExpr* c) - { - auto v = c->ValuePtr(); - - if ( AddConstant(v) ) - { - AddInit(c); - NoteInitDependency(c, v.get()); - } - } - -bool CPPCompile::AddConstant(const ValPtr& vp) - { auto v = vp.get(); + auto cv = const_vals.find(v); - if ( IsNativeType(v->GetType()) ) - // These we instantiate directly. - return false; - - if ( const_vals.count(v) > 0 ) + if ( cv != const_vals.end() ) + { // Already did this one. - return true; + consts_offset = const_offsets[v]; + return cv->second; + } // Formulate a key that's unique per distinct constant. @@ -79,216 +51,104 @@ bool CPPCompile::AddConstant(const ValPtr& vp) c_desc = d.Description(); } - if ( constants.count(c_desc) > 0 ) + auto c = constants.find(c_desc); + if ( c != constants.end() ) { - const_vals[v] = constants[c_desc]; - - auto orig_v = constants_to_vals[c_desc]; - ASSERT(v != orig_v); - AddInit(v); - NoteInitDependency(v, orig_v); - - return true; + const_vals[v] = c->second; + consts_offset = const_offsets[v] = constants_offsets[c_desc]; + return c->second; } - // Need a C++ global for this constant. - auto const_name = string("CPP__const__") + Fmt(int(constants.size())); - - const_vals[v] = constants[c_desc] = const_name; - constants_to_vals[c_desc] = v; - auto tag = t->Tag(); + auto const_name = const_info[tag]->NextName(); + shared_ptr gi; switch ( tag ) { - case TYPE_STRING: - AddStringConstant(vp, const_name); + case TYPE_BOOL: + gi = make_shared(vp->AsBool() ? "true" : "false"); break; - case TYPE_PATTERN: - AddPatternConstant(vp, const_name); + case TYPE_INT: + gi = make_shared(to_string(vp->AsInt())); break; - case TYPE_LIST: - AddListConstant(vp, const_name); + case TYPE_COUNT: + gi = make_shared(to_string(vp->AsCount()) + "ULL"); break; - case TYPE_RECORD: - AddRecordConstant(vp, const_name); + case TYPE_DOUBLE: + gi = make_shared(to_string(vp->AsDouble())); break; - case TYPE_TABLE: - AddTableConstant(vp, const_name); + case TYPE_TIME: + gi = make_shared(to_string(vp->AsDouble())); break; - case TYPE_VECTOR: - AddVectorConstant(vp, const_name); + case TYPE_INTERVAL: + gi = make_shared(to_string(vp->AsDouble())); break; case TYPE_ADDR: - case TYPE_SUBNET: - { - auto prefix = (tag == TYPE_ADDR) ? "Addr" : "SubNet"; - - Emit("%sValPtr %s;", prefix, const_name); - - ODesc d; - v->Describe(&d); - - AddInit(v, const_name, - string("make_intrusive<") + prefix + "Val>(\"" + d.Description() + "\")"); - } + gi = make_shared(this, vp); break; - case TYPE_FUNC: - Emit("FuncValPtr %s;", const_name); + case TYPE_SUBNET: + gi = make_shared(this, vp); + break; - // We can't generate the initialization now because it - // depends on first having compiled the associated body, - // so we know its hash. So for now we just note it - // to deal with later. - func_vars[v->AsFuncVal()] = const_name; + case TYPE_ENUM: + gi = make_shared(this, vp); + break; + + case TYPE_STRING: + gi = make_shared(this, vp); + break; + + case TYPE_PATTERN: + gi = make_shared(this, vp); + break; + + case TYPE_PORT: + gi = make_shared(vp); + break; + + case TYPE_LIST: + gi = make_shared(this, vp); + break; + + case TYPE_VECTOR: + gi = make_shared(this, vp); + break; + + case TYPE_RECORD: + gi = make_shared(this, vp); + break; + + case TYPE_TABLE: + gi = make_shared(this, vp); break; case TYPE_FILE: - { - Emit("FileValPtr %s;", const_name); + gi = make_shared(this, vp); + break; - auto f = cast_intrusive(vp)->Get(); - - AddInit(v, const_name, - string("make_intrusive(") + "make_intrusive(\"" + f->Name() + - "\", \"w\"))"); - } + case TYPE_FUNC: + gi = make_shared(this, vp); break; default: reporter->InternalError("bad constant type in CPPCompile::AddConstant"); + break; } - return true; - } + const_info[tag]->AddInstance(gi); + const_vals[v] = constants[c_desc] = gi; -void CPPCompile::AddStringConstant(const ValPtr& v, string& const_name) - { - Emit("StringValPtr %s;", const_name); + consts_offset = const_offsets[v] = constants_offsets[c_desc] = consts.size(); + consts.emplace_back(pair(tag, gi->Offset())); - auto s = v->AsString(); - const char* b = (const char*)(s->Bytes()); - auto len = s->Len(); - - AddInit(v, const_name, GenString(b, len)); - } - -void CPPCompile::AddPatternConstant(const ValPtr& v, string& const_name) - { - Emit("PatternValPtr %s;", const_name); - - auto re = v->AsPatternVal()->Get(); - - AddInit(v, string("{ auto re = new RE_Matcher(") + CPPEscape(re->OrigText()) + ");"); - - if ( re->IsCaseInsensitive() ) - AddInit(v, "re->MakeCaseInsensitive();"); - - AddInit(v, "re->Compile();"); - AddInit(v, const_name, "make_intrusive(re)"); - AddInit(v, "}"); - } - -void CPPCompile::AddListConstant(const ValPtr& v, string& const_name) - { - Emit("ListValPtr %s;", const_name); - - // No initialization dependency on the main type since we don't - // use the underlying TypeList. However, we *do* use the types of - // the elements. - - AddInit(v, const_name, string("make_intrusive(TYPE_ANY)")); - - auto lv = cast_intrusive(v); - auto n = lv->Length(); - - for ( auto i = 0; i < n; ++i ) - { - const auto& l_i = lv->Idx(i); - auto l_i_c = BuildConstant(v, l_i); - AddInit(v, const_name + "->Append(" + l_i_c + ");"); - NoteInitDependency(v, TypeRep(l_i->GetType())); - } - } - -void CPPCompile::AddRecordConstant(const ValPtr& v, string& const_name) - { - const auto& t = v->GetType(); - - Emit("RecordValPtr %s;", const_name); - - NoteInitDependency(v, TypeRep(t)); - - AddInit(v, const_name, - string("make_intrusive(") + "cast_intrusive(" + GenTypeName(t) + - "))"); - - auto r = cast_intrusive(v); - auto n = r->NumFields(); - - for ( auto i = 0u; i < n; ++i ) - { - const auto& r_i = r->GetField(i); - - if ( r_i ) - { - auto r_i_c = BuildConstant(v, r_i); - AddInit(v, const_name + "->Assign(" + Fmt(static_cast(i)) + ", " + r_i_c + ");"); - } - } - } - -void CPPCompile::AddTableConstant(const ValPtr& v, string& const_name) - { - const auto& t = v->GetType(); - - Emit("TableValPtr %s;", const_name); - - NoteInitDependency(v, TypeRep(t)); - - AddInit(v, const_name, - string("make_intrusive(") + "cast_intrusive(" + GenTypeName(t) + - "))"); - - auto tv = cast_intrusive(v); - auto tv_map = tv->ToMap(); - - for ( auto& tv_i : tv_map ) - { - auto ind = BuildConstant(v, tv_i.first); - auto val = BuildConstant(v, tv_i.second); - AddInit(v, const_name + "->Assign(" + ind + ", " + val + ");"); - } - } - -void CPPCompile::AddVectorConstant(const ValPtr& v, string& const_name) - { - const auto& t = v->GetType(); - - Emit("VectorValPtr %s;", const_name); - - NoteInitDependency(v, TypeRep(t)); - - AddInit(v, const_name, - string("make_intrusive(") + "cast_intrusive(" + GenTypeName(t) + - "))"); - - auto vv = cast_intrusive(v); - auto n = vv->Size(); - - for ( auto i = 0u; i < n; ++i ) - { - const auto& v_i = vv->ValAt(i); - auto v_i_c = BuildConstant(v, v_i); - AddInit(v, const_name + "->Append(" + v_i_c + ");"); - } + return gi; } } // zeek::detail diff --git a/src/script_opt/CPP/DeclFunc.cc b/src/script_opt/CPP/DeclFunc.cc index dbca009052..0d9117d2d8 100644 --- a/src/script_opt/CPP/DeclFunc.cc +++ b/src/script_opt/CPP/DeclFunc.cc @@ -22,7 +22,7 @@ void CPPCompile::DeclareFunc(const FuncInfo& func) const auto& body = func.Body(); auto priority = func.Priority(); - DeclareSubclass(f->GetType(), pf, fname, body, priority, nullptr, f->Flavor()); + CreateFunction(f->GetType(), pf, fname, body, priority, nullptr, f->Flavor()); if ( f->GetBodies().size() == 1 ) compiled_simple_funcs[f->Name()] = fname; @@ -40,17 +40,88 @@ void CPPCompile::DeclareLambda(const LambdaExpr* l, const ProfileFunc* pf) for ( auto id : ids ) lambda_names[id] = LocalName(id); - DeclareSubclass(l_id->GetType(), pf, lname, body, 0, l, FUNC_FLAVOR_FUNCTION); + CreateFunction(l_id->GetType(), pf, lname, body, 0, l, FUNC_FLAVOR_FUNCTION); } -void CPPCompile::DeclareSubclass(const FuncTypePtr& ft, const ProfileFunc* pf, const string& fname, - const StmtPtr& body, int priority, const LambdaExpr* l, - FunctionFlavor flavor) +void CPPCompile::CreateFunction(const FuncTypePtr& ft, const ProfileFunc* pf, const string& fname, + const StmtPtr& body, int priority, const LambdaExpr* l, + FunctionFlavor flavor) { const auto& yt = ft->Yield(); in_hook = flavor == FUNC_FLAVOR_HOOK; const IDPList* lambda_ids = l ? &l->OuterIDs() : nullptr; + string args = BindArgs(ft, lambda_ids); + + auto yt_decl = in_hook ? "bool" : FullTypeName(yt); + + vector p_types; + GatherParamTypes(p_types, ft, lambda_ids, pf); + + string cast = string(yt_decl) + "(*)("; + for ( auto& pt : p_types ) + cast += pt + ", "; + cast += string("Frame*)"); + + // We need to distinguish between hooks and non-hooks that happen + // to have matching type signatures. They'll be equivalent if they + // have identical cast's. To keep them separate, we cheat and + // make hook casts different, string-wise, without altering their + // semantics. + if ( in_hook ) + cast += " "; + + func_index[fname] = cast; + + if ( casting_index.count(cast) == 0 ) + { + casting_index[cast] = func_casting_glue.size(); + + DispatchInfo di; + di.cast = cast; + di.args = args; + di.is_hook = in_hook; + di.yield = yt; + + func_casting_glue.emplace_back(di); + } + + if ( lambda_ids ) + { + DeclareSubclass(ft, pf, fname, args, lambda_ids); + BuildLambda(ft, pf, fname, body, l, lambda_ids); + EndBlock(true); + } + else + { + Emit("static %s %s(%s);", yt_decl, fname, ParamDecl(ft, lambda_ids, pf)); + + // Track this function as known to have been compiled. + // We don't track lambda bodies as compiled because they + // can't be instantiated directly without also supplying + // the captures. In principle we could make an exception + // for lambdas that don't take any arguments, but that + // seems potentially more confusing than beneficial. + compiled_funcs.emplace(fname); + + auto loc_f = script_specific_filename(body); + cf_locs[fname] = loc_f; + } + + auto h = pf->HashVal(); + + body_hashes[fname] = h; + body_priorities[fname] = priority; + body_names.emplace(body.get(), fname); + + total_hash = merge_p_hashes(total_hash, h); + } + +void CPPCompile::DeclareSubclass(const FuncTypePtr& ft, const ProfileFunc* pf, const string& fname, + const string& args, const IDPList* lambda_ids) + { + const auto& yt = ft->Yield(); + auto yt_decl = in_hook ? "bool" : FullTypeName(yt); NL(); @@ -76,8 +147,7 @@ void CPPCompile::DeclareSubclass(const FuncTypePtr& ft, const ProfileFunc* pf, c } } - Emit("%s_cl(const char* name%s) : CPPStmt(name)%s { }", fname, addl_args.c_str(), - inits.c_str()); + Emit("%s_cl(const char* name%s) : CPPStmt(name)%s { }", fname, addl_args, inits); // An additional constructor just used to generate place-holder // instances, due to the mis-design that lambdas are identified @@ -92,7 +162,7 @@ void CPPCompile::DeclareSubclass(const FuncTypePtr& ft, const ProfileFunc* pf, c if ( in_hook ) { - Emit("if ( ! %s(%s) )", fname, BindArgs(ft, lambda_ids)); + Emit("if ( ! %s(%s) )", fname, args); StartBlock(); Emit("flow = FLOW_BREAK;"); EndBlock(); @@ -100,42 +170,36 @@ void CPPCompile::DeclareSubclass(const FuncTypePtr& ft, const ProfileFunc* pf, c } else if ( IsNativeType(yt) ) - GenInvokeBody(fname, yt, BindArgs(ft, lambda_ids)); + GenInvokeBody(fname, yt, args); else - Emit("return %s(%s);", fname, BindArgs(ft, lambda_ids)); + Emit("return %s(%s);", fname, args); EndBlock(); + } - if ( lambda_ids ) - BuildLambda(ft, pf, fname, body, l, lambda_ids); - else - { - // Track this function as known to have been compiled. - // We don't track lambda bodies as compiled because they - // can't be instantiated directly without also supplying - // the captures. In principle we could make an exception - // for lambdas that don't take any arguments, but that - // seems potentially more confusing than beneficial. - compiled_funcs.emplace(fname); - - auto loc_f = script_specific_filename(body); - cf_locs[fname] = loc_f; - - // Some guidance for those looking through the generated code. - Emit("// compiled body for: %s", loc_f); - } - - EndBlock(true); - - auto h = pf->HashVal(); - - body_hashes[fname] = h; - body_priorities[fname] = priority; - body_names.emplace(body.get(), fname); - names_to_bodies.emplace(fname, body.get()); - - total_hash = merge_p_hashes(total_hash, h); +void CPPCompile::DeclareDynCPPStmt() + { + Emit("// A version of CPPStmt that manages a function pointer and"); + Emit("// dynamically casts it to a given type to call it via Exec()."); + Emit("// We will later generate a custom Exec method to support this"); + Emit("// dispatch. All of this is ugly, and only needed because clang"); + Emit("// goes nuts (super slow) in the face of thousands of templates"); + Emit("// in a given context (initializers, or a function body)."); + Emit("class CPPDynStmt : public CPPStmt"); + Emit("\t{"); + Emit("public:"); + Emit("\tCPPDynStmt(const char* _name, void* _func, int _type_signature) : CPPStmt(_name), " + "func(_func), type_signature(_type_signature) { }"); + Emit("\tValPtr Exec(Frame* f, StmtFlowType& flow) override final;"); + Emit("private:"); + Emit("\t// The function to call in Exec()."); + Emit("\tvoid* func;"); + Emit("\t// Used via a switch in the dynamically-generated Exec() method"); + Emit("\t// to cast func to the write type, and to call it with the"); + Emit("\t// right arguments pulled out of the frame."); + Emit("\tint type_signature;"); + Emit("\t};"); } void CPPCompile::BuildLambda(const FuncTypePtr& ft, const ProfileFunc* pf, const string& fname, @@ -146,28 +210,17 @@ void CPPCompile::BuildLambda(const FuncTypePtr& ft, const ProfileFunc* pf, const { auto name = lambda_names[id]; auto tn = FullTypeName(id->GetType()); - Emit("%s %s;", tn, name.c_str()); + Emit("%s %s;", tn, name); } // Generate initialization to create and register the lambda. - auto literal_name = string("\"") + l->Name() + "\""; - auto instantiate = string("make_intrusive<") + fname + "_cl>(" + literal_name + ")"; + auto h = pf->HashVal(); + auto nl = lambda_ids->length(); + bool has_captures = nl > 0; - int nl = lambda_ids->length(); - auto h = Fmt(pf->HashVal()); - auto has_captures = nl > 0 ? "true" : "false"; - auto l_init = string("register_lambda__CPP(") + instantiate + ", " + h + ", \"" + l->Name() + - "\", " + GenTypeName(ft) + ", " + has_captures + ");"; - - AddInit(l, l_init); - NoteInitDependency(l, TypeRep(ft)); - - // Make the lambda's body's initialization depend on the lambda's - // initialization. That way GenFuncVarInits() can generate - // initializations with the assurance that the associated body - // hashes will have been registered. - AddInit(body.get()); - NoteInitDependency(body.get(), l); + auto gi = make_shared(this, l->Name(), ft, fname + "_cl", h, + has_captures); + lambda_reg_info->AddInstance(gi); // Generate method to extract the lambda captures from a deserialized // Frame object. @@ -237,17 +290,71 @@ string CPPCompile::BindArgs(const FuncTypePtr& ft, const IDPList* lambda_ids) string CPPCompile::ParamDecl(const FuncTypePtr& ft, const IDPList* lambda_ids, const ProfileFunc* pf) { - const auto& params = ft->Params(); - int n = params->NumFields(); + vector p_types; + vector p_names; + + GatherParamTypes(p_types, ft, lambda_ids, pf); + GatherParamNames(p_names, ft, lambda_ids, pf); + + ASSERT(p_types.size() == p_names.size()); string decl; + for ( auto i = 0U; i < p_types.size(); ++i ) + decl += p_types[i] + " " + p_names[i] + ", "; + + // Add in the declaration of the frame. + return decl + "Frame* f__CPP"; + } + +void CPPCompile::GatherParamTypes(vector& p_types, const FuncTypePtr& ft, + const IDPList* lambda_ids, const ProfileFunc* pf) + { + const auto& params = ft->Params(); + int n = params->NumFields(); + for ( auto i = 0; i < n; ++i ) { const auto& t = params->GetFieldType(i); auto tn = FullTypeName(t); auto param_id = FindParam(i, pf); - string fn; + + if ( IsNativeType(t) ) + // Native types are always pass-by-value. + p_types.emplace_back(tn); + else + { + if ( param_id && pf->Assignees().count(param_id) > 0 ) + // We modify the parameter. + p_types.emplace_back(tn); + else + // Not modified, so pass by const reference. + p_types.emplace_back(string("const ") + tn + "&"); + } + } + + if ( lambda_ids ) + // Add the captures as additional parameters. + for ( auto& id : *lambda_ids ) + { + const auto& t = id->GetType(); + auto tn = FullTypeName(t); + + // Allow the captures to be modified. + p_types.emplace_back(string(tn) + "& "); + } + } + +void CPPCompile::GatherParamNames(vector& p_names, const FuncTypePtr& ft, + const IDPList* lambda_ids, const ProfileFunc* pf) + { + const auto& params = ft->Params(); + int n = params->NumFields(); + + for ( auto i = 0; i < n; ++i ) + { + const auto& t = params->GetFieldType(i); + auto param_id = FindParam(i, pf); if ( param_id ) { @@ -255,50 +362,22 @@ string CPPCompile::ParamDecl(const FuncTypePtr& ft, const IDPList* lambda_ids, // We'll need to translate the parameter // from its current representation to // type "any". - fn = string("any_param__CPP_") + Fmt(i); + p_names.emplace_back(string("any_param__CPP_") + Fmt(i)); else - fn = LocalName(param_id); + p_names.emplace_back(LocalName(param_id)); } else - // Parameters that are unused don't wind up - // in the ProfileFunc. Rather than dig their - // name out of the function's declaration, we - // explicitly name them to reflect that they're - // unused. - fn = string("unused_param__CPP_") + Fmt(i); - - if ( IsNativeType(t) ) - // Native types are always pass-by-value. - decl = decl + tn + " " + fn; - else - { - if ( param_id && pf->Assignees().count(param_id) > 0 ) - // We modify the parameter. - decl = decl + tn + " " + fn; - else - // Not modified, so pass by const reference. - decl = decl + "const " + tn + "& " + fn; - } - - decl += ", "; + // Parameters that are unused don't wind up in the + // ProfileFunc. Rather than dig their name out of + // the function's declaration, we explicitly name + // them to reflect that they're unused. + p_names.emplace_back(string("unused_param__CPP_") + Fmt(i)); } if ( lambda_ids ) - { // Add the captures as additional parameters. for ( auto& id : *lambda_ids ) - { - auto name = lambda_names[id]; - const auto& t = id->GetType(); - auto tn = FullTypeName(t); - - // Allow the captures to be modified. - decl = decl + tn + "& " + name + ", "; - } - } - - // Add in the declaration of the frame. - return decl + "Frame* f__CPP"; + p_names.emplace_back(lambda_names[id]); } const ID* CPPCompile::FindParam(int i, const ProfileFunc* pf) diff --git a/src/script_opt/CPP/Driver.cc b/src/script_opt/CPP/Driver.cc index ccaa0a0190..dd7348ddca 100644 --- a/src/script_opt/CPP/Driver.cc +++ b/src/script_opt/CPP/Driver.cc @@ -12,14 +12,13 @@ namespace zeek::detail using namespace std; CPPCompile::CPPCompile(vector& _funcs, ProfileFuncs& _pfs, const string& gen_name, - const string& _addl_name, CPPHashManager& _hm, bool _update, - bool _standalone, bool report_uncompilable) - : funcs(_funcs), pfs(_pfs), hm(_hm), update(_update), standalone(_standalone) + const string& _addl_name, CPPHashManager& _hm, bool _standalone, + bool report_uncompilable) + : funcs(_funcs), pfs(_pfs), hm(_hm), standalone(_standalone) { addl_name = _addl_name; - bool is_addl = hm.IsAppend(); - auto target_name = is_addl ? addl_name.c_str() : gen_name.c_str(); - auto mode = is_addl ? "a" : "w"; + auto target_name = gen_name.c_str(); + auto mode = "w"; write_file = fopen(target_name, mode); if ( ! write_file ) @@ -27,30 +26,6 @@ CPPCompile::CPPCompile(vector& _funcs, ProfileFuncs& _pfs, const strin reporter->Error("can't open C++ target file %s", target_name); exit(1); } - - if ( is_addl ) - { - // We need a unique number to associate with the name - // space for the code we're adding. A convenient way to - // generate this safely is to use the present size of the - // file we're appending to. That guarantees that every - // incremental compilation will wind up with a different - // number. - struct stat st; - if ( fstat(fileno(write_file), &st) != 0 ) - { - char buf[256]; - util::zeek_strerror_r(errno, buf, sizeof(buf)); - reporter->Error("fstat failed on %s: %s", target_name, buf); - exit(1); - } - - // We use a value of "0" to mean "we're not appending, - // we're generating from scratch", so make sure we're - // distinct from that. - addl_tag = st.st_size + 1; - } - else { // Create an empty "additional" file. @@ -83,10 +58,6 @@ void CPPCompile::Compile(bool report_uncompilable) working_dir = buf; - if ( update && addl_tag > 0 && CheckForCollisions() ) - // Inconsistent compilation environment. - exit(1); - GenProlog(); // Determine which functions we can call directly, and reuse @@ -100,9 +71,13 @@ void CPPCompile::Compile(bool report_uncompilable) const char* reason; if ( IsCompilable(func, &reason) ) compilable_funcs.insert(BodyName(func)); - else if ( reason && report_uncompilable ) - fprintf(stderr, "%s cannot be compiled to C++ due to %s\n", func.Func()->Name(), - reason); + else + { + if ( reason && report_uncompilable ) + fprintf(stderr, "%s cannot be compiled to C++ due to %s\n", func.Func()->Name(), + reason); + not_fully_compilable.insert(func.Func()->Name()); + } auto h = func.Profile()->HashVal(); if ( hm.HasHash(h) ) @@ -119,39 +94,24 @@ void CPPCompile::Compile(bool report_uncompilable) { TypePtr tp{NewRef{}, (Type*)(t)}; types.AddKey(tp, pfs.HashType(t)); + (void)RegisterType(tp); } - for ( const auto& t : types.DistinctKeys() ) - if ( ! types.IsInherited(t) ) - // Type is new to this compilation, so we'll - // be generating it. - Emit("TypePtr %s;", types.KeyName(t)); + // ### This doesn't work for -O add-C++ + Emit("TypePtr types__CPP[%s];", Fmt(static_cast(types.DistinctKeys().size()))); NL(); - for ( const auto& c : pfs.Constants() ) - AddConstant(c); +#if 0 + for ( auto gi : all_global_info ) + Emit(gi->Declare()); NL(); +#endif for ( auto& g : pfs.AllGlobals() ) CreateGlobal(g); - // Now that the globals are created, register their attributes, - // if any, and generate their initialization for use in standalone - // scripts. We can't do these in CreateGlobal() because at that - // point it's possible that some of the globals refer to other - // globals not-yet-created. - for ( auto& g : pfs.AllGlobals() ) - { - RegisterAttributes(g->GetAttrs()); - if ( g->HasVal() ) - { - auto gn = string(g->Name()); - GenGlobalInit(g, globals[gn], g->GetVal()); - } - } - for ( const auto& e : pfs.Events() ) if ( AddGlobal(e, "gl", false) ) Emit("EventHandlerPtr %s_ev;", globals[string(e)]); @@ -201,10 +161,13 @@ void CPPCompile::Compile(bool report_uncompilable) lambda_names.insert(n); } + NL(); + Emit("std::vector CPP__bodies_to_register = {"); + for ( const auto& f : compiled_funcs ) RegisterCompiledBody(f); - GenFuncVarInits(); + Emit("};"); GenEpilog(); } @@ -217,12 +180,75 @@ void CPPCompile::GenProlog() Emit("namespace zeek::detail { //\n"); } - Emit("namespace CPP_%s { // %s\n", Fmt(addl_tag), working_dir.c_str()); + Emit("namespace CPP_%s { // %s\n", Fmt(addl_tag), working_dir); // The following might-or-might-not wind up being populated/used. Emit("std::vector field_mapping;"); Emit("std::vector enum_mapping;"); NL(); + + const_info[TYPE_BOOL] = CreateConstInitInfo("Bool", "ValPtr", "bool"); + const_info[TYPE_INT] = CreateConstInitInfo("Int", "ValPtr", "bro_int_t"); + const_info[TYPE_COUNT] = CreateConstInitInfo("Count", "ValPtr", "bro_uint_t"); + const_info[TYPE_DOUBLE] = CreateConstInitInfo("Double", "ValPtr", "double"); + const_info[TYPE_TIME] = CreateConstInitInfo("Time", "ValPtr", "double"); + const_info[TYPE_INTERVAL] = CreateConstInitInfo("Interval", "ValPtr", "double"); + const_info[TYPE_ADDR] = CreateConstInitInfo("Addr", "ValPtr", ""); + const_info[TYPE_SUBNET] = CreateConstInitInfo("SubNet", "ValPtr", ""); + const_info[TYPE_PORT] = CreateConstInitInfo("Port", "ValPtr", "uint32_t"); + + const_info[TYPE_ENUM] = CreateCompoundInitInfo("Enum", "ValPtr"); + const_info[TYPE_STRING] = CreateCompoundInitInfo("String", "ValPtr"); + const_info[TYPE_LIST] = CreateCompoundInitInfo("List", "ValPtr"); + const_info[TYPE_PATTERN] = CreateCompoundInitInfo("Pattern", "ValPtr"); + const_info[TYPE_VECTOR] = CreateCompoundInitInfo("Vector", "ValPtr"); + const_info[TYPE_RECORD] = CreateCompoundInitInfo("Record", "ValPtr"); + const_info[TYPE_TABLE] = CreateCompoundInitInfo("Table", "ValPtr"); + const_info[TYPE_FUNC] = CreateCompoundInitInfo("Func", "ValPtr"); + const_info[TYPE_FILE] = CreateCompoundInitInfo("File", "ValPtr"); + + type_info = CreateCompoundInitInfo("Type", "Ptr"); + attr_info = CreateCompoundInitInfo("Attr", "Ptr"); + attrs_info = CreateCompoundInitInfo("Attributes", "Ptr"); + + call_exprs_info = CreateCustomInitInfo("CallExpr", "Ptr"); + lambda_reg_info = CreateCustomInitInfo("LambdaRegistration", ""); + global_id_info = CreateCustomInitInfo("GlobalID", ""); + + NL(); + DeclareDynCPPStmt(); + NL(); + } + +shared_ptr CPPCompile::CreateConstInitInfo(const char* tag, const char* type, + const char* c_type) + { + auto gi = make_shared(tag, type, c_type); + return RegisterInitInfo(tag, type, gi); + } + +shared_ptr CPPCompile::CreateCompoundInitInfo(const char* tag, const char* type) + { + auto gi = make_shared(tag, type); + return RegisterInitInfo(tag, type, gi); + } + +shared_ptr CPPCompile::CreateCustomInitInfo(const char* tag, const char* type) + { + auto gi = make_shared(tag, type); + if ( type[0] == '\0' ) + gi->SetCPPType("void*"); + + return RegisterInitInfo(tag, type, gi); + } + +shared_ptr CPPCompile::RegisterInitInfo(const char* tag, const char* type, + shared_ptr gi) + { + string v_type = type[0] ? (string(tag) + type) : "void*"; + Emit("std::vector<%s> CPP__%s__;", v_type, string(tag)); + all_global_info.insert(gi); + return gi; } void CPPCompile::RegisterCompiledBody(const string& f) @@ -232,8 +258,9 @@ void CPPCompile::RegisterCompiledBody(const string& f) // Build up an initializer of the events relevant to the function. string events; - if ( body_events.count(f) > 0 ) - for ( const auto& e : body_events[f] ) + auto be = body_events.find(f); + if ( be != body_events.end() ) + for ( const auto& e : be->second ) { if ( events.size() > 0 ) events += ", "; @@ -252,74 +279,136 @@ void CPPCompile::RegisterCompiledBody(const string& f) // same binary). h = merge_p_hashes(h, p_hash(cf_locs[f])); - auto init = string("register_body__CPP(make_intrusive<") + f + "_cl>(\"" + f + "\"), " + - Fmt(p) + ", " + Fmt(h) + ", " + events + ");"; - - AddInit(names_to_bodies[f], init); - - if ( update ) - { - fprintf(hm.HashFile(), "func\n%s%s\n", scope_prefix(addl_tag).c_str(), f.c_str()); - fprintf(hm.HashFile(), "%llu\n", h); - } + auto fi = func_index.find(f); + ASSERT(fi != func_index.end()); + auto type_signature = casting_index[fi->second]; + Emit("\tCPP_RegisterBody(\"%s\", (void*) %s, %s, %s, %s, std::vector(%s)),", f, f, + Fmt(type_signature), Fmt(p), Fmt(h), events); } void CPPCompile::GenEpilog() { NL(); + for ( const auto& ii : init_infos ) + GenInitExpr(ii.second); - for ( const auto& e : init_exprs.DistinctKeys() ) + NL(); + Emit("ValPtr CPPDynStmt::Exec(Frame* f, StmtFlowType& flow)"); + StartBlock(); + Emit("flow = FLOW_RETURN;"); + Emit("switch ( type_signature )"); + StartBlock(); + for ( auto i = 0U; i < func_casting_glue.size(); ++i ) { - GenInitExpr(e); - if ( update ) - init_exprs.LogIfNew(e, addl_tag, hm.HashFile()); + Emit("case %s:", to_string(i)); + StartBlock(); + auto& glue = func_casting_glue[i]; + + auto invoke = string("(*(") + glue.cast + ")(func))(" + glue.args + ")"; + + if ( glue.is_hook ) + { + Emit("if ( ! %s )", invoke); + StartBlock(); + Emit("flow = FLOW_BREAK;"); + EndBlock(); + Emit("return nullptr;"); + } + + else if ( IsNativeType(glue.yield) ) + GenInvokeBody(invoke, glue.yield); + + else + Emit("return %s;", invoke); + + EndBlock(); } - for ( const auto& a : attributes.DistinctKeys() ) - { - GenAttrs(a); - if ( update ) - attributes.LogIfNew(a, addl_tag, hm.HashFile()); - } + Emit("default:"); + Emit("\treporter->InternalError(\"invalid type in CPPDynStmt::Exec\");"); + Emit("\treturn nullptr;"); - // Generate the guts of compound types, and preserve type names - // if present. - for ( const auto& t : types.DistinctKeys() ) - { - ExpandTypeVar(t); - if ( update ) - types.LogIfNew(t, addl_tag, hm.HashFile()); - } + EndBlock(); + EndBlock(); - InitializeEnumMappings(); + NL(); - GenPreInits(); - - unordered_set to_do; - for ( const auto& oi : obj_inits ) - to_do.insert(oi.first); - - CheckInitConsistency(to_do); - auto nc = GenDependentInits(to_do); + for ( auto gi : all_global_info ) + gi->GenerateInitializers(this); if ( standalone ) GenStandaloneActivation(); + NL(); + InitializeEnumMappings(); + + NL(); + InitializeFieldMappings(); + + NL(); + InitializeBiFs(); + + NL(); + indices_mgr.Generate(this); + + NL(); + InitializeStrings(); + + NL(); + InitializeHashes(); + + NL(); + InitializeConsts(); + NL(); Emit("void init__CPP()"); StartBlock(); - Emit("enum_mapping.resize(%s);\n", Fmt(int(enum_names.size()))); - Emit("pre_init__CPP();"); + Emit("std::vector> InitIndices;"); + Emit("generate_indices_set(CPP__Indices__init, InitIndices);"); + + Emit("std::map> InitConsts;"); NL(); - for ( auto i = 1; i <= nc; ++i ) - Emit("init_%s__CPP();", Fmt(i)); + for ( const auto& ci : const_info ) + { + auto& gi = ci.second; + Emit("InitConsts.emplace(%s, std::make_shared>(%s));", + TypeTagName(ci.first), gi->CPPType(), gi->InitsName()); + } + + Emit("InitsManager im(CPP__ConstVals, InitConsts, InitIndices, CPP__Strings, CPP__Hashes, " + "CPP__Type__, CPP__Attributes__, CPP__Attr__, CPP__CallExpr__);"); + + NL(); + Emit("for ( auto& b : CPP__bodies_to_register )"); + StartBlock(); + Emit("auto f = make_intrusive(b.func_name.c_str(), b.func, b.type_signature);"); + Emit("register_body__CPP(f, b.priority, b.h, b.events);"); + EndBlock(); + + NL(); + int max_cohort = 0; + for ( auto gi : all_global_info ) + max_cohort = std::max(max_cohort, gi->MaxCohort()); + + for ( auto c = 0; c <= max_cohort; ++c ) + for ( auto gi : all_global_info ) + if ( gi->CohortSize(c) > 0 ) + Emit("%s.InitializeCohort(&im, %s);", gi->InitializersName(), Fmt(c)); + + NL(); + Emit("for ( auto& b : CPP__BiF_lookups__ )"); + Emit("\tb.ResolveBiF();"); // Populate mappings for dynamic offsets. NL(); - InitializeFieldMappings(); + Emit("for ( auto& em : CPP__enum_mappings__ )"); + Emit("\tenum_mapping.push_back(em.ComputeOffset(&im));"); + NL(); + Emit("for ( auto& fm : CPP__field_mappings__ )"); + Emit("\tfield_mapping.push_back(fm.ComputeOffset(&im));"); if ( standalone ) Emit("standalone_init__CPP();"); @@ -328,10 +417,7 @@ void CPPCompile::GenEpilog() GenInitHook(); - Emit("} // %s\n\n", scope_prefix(addl_tag).c_str()); - - if ( update ) - UpdateGlobalHashes(); + Emit("} // %s\n\n", scope_prefix(addl_tag)); if ( addl_tag > 0 ) return; diff --git a/src/script_opt/CPP/Emit.cc b/src/script_opt/CPP/Emit.cc index 91a79b2a2f..84e122f9c0 100644 --- a/src/script_opt/CPP/Emit.cc +++ b/src/script_opt/CPP/Emit.cc @@ -13,75 +13,14 @@ using namespace std; void CPPCompile::StartBlock() { - ++block_level; + IndentUp(); Emit("{"); } void CPPCompile::EndBlock(bool needs_semi) { Emit("}%s", needs_semi ? ";" : ""); - --block_level; - } - -string CPPCompile::GenString(const char* b, int len) const - { - return string("make_intrusive(") + Fmt(len) + ", " + CPPEscape(b, len) + ")"; - } - -string CPPCompile::CPPEscape(const char* b, int len) const - { - string res = "\""; - - for ( int i = 0; i < len; ++i ) - { - unsigned char c = b[i]; - - switch ( c ) - { - case '\a': - res += "\\a"; - break; - case '\b': - res += "\\b"; - break; - case '\f': - res += "\\f"; - break; - case '\n': - res += "\\n"; - break; - case '\r': - res += "\\r"; - break; - case '\t': - res += "\\t"; - break; - case '\v': - res += "\\v"; - break; - - case '\\': - res += "\\\\"; - break; - case '"': - res += "\\\""; - break; - - default: - if ( isprint(c) ) - res += c; - else - { - char buf[8192]; - snprintf(buf, sizeof buf, "%03o", c); - res += "\\"; - res += buf; - } - break; - } - } - - return res + "\""; + IndentDown(); } void CPPCompile::Indent() const diff --git a/src/script_opt/CPP/Exprs.cc b/src/script_opt/CPP/Exprs.cc index c2a9cc2753..dd3baa995c 100644 --- a/src/script_opt/CPP/Exprs.cc +++ b/src/script_opt/CPP/Exprs.cc @@ -232,7 +232,12 @@ string CPPCompile::GenConstExpr(const ConstExpr* c, GenType gt) const auto& t = c->GetType(); if ( ! IsNativeType(t) ) - return NativeToGT(const_vals[c->Value()], t, gt); + { + auto v = c->ValuePtr(); + int consts_offset; // ignored + (void)RegisterConstant(v, consts_offset); + return NativeToGT(const_vals[v.get()]->Name(), t, gt); + } return NativeToGT(GenVal(c->ValuePtr()), t, gt); } @@ -1168,21 +1173,25 @@ string CPPCompile::GenField(const ExprPtr& rec, int field) // Need to dynamically map the field. int mapping_slot; - if ( record_field_mappings.count(rt) > 0 && record_field_mappings[rt].count(field) > 0 ) + auto rfm = record_field_mappings.find(rt); + if ( rfm != record_field_mappings.end() && rfm->second.count(field) > 0 ) // We're already tracking this field. - mapping_slot = record_field_mappings[rt][field]; + mapping_slot = rfm->second[field]; else { // New mapping. mapping_slot = num_rf_mappings++; + auto pt = processed_types.find(rt); + ASSERT(pt != processed_types.end()); + auto rt_offset = pt->second->Offset(); string field_name = rt->FieldName(field); - field_decls.emplace_back(pair(rt, rt->FieldDecl(field))); + field_decls.emplace_back(pair(rt_offset, rt->FieldDecl(field))); - if ( record_field_mappings.count(rt) > 0 ) + if ( rfm != record_field_mappings.end() ) // We're already tracking this record. - record_field_mappings[rt][field] = mapping_slot; + rfm->second[field] = mapping_slot; else { // Need to start tracking this record. @@ -1207,9 +1216,10 @@ string CPPCompile::GenEnum(const TypePtr& t, const ValPtr& ev) // Need to dynamically map the access. int mapping_slot; - if ( enum_val_mappings.count(et) > 0 && enum_val_mappings[et].count(v) > 0 ) + auto evm = enum_val_mappings.find(et); + if ( evm != enum_val_mappings.end() && evm->second.count(v) > 0 ) // We're already tracking this value. - mapping_slot = enum_val_mappings[et][v]; + mapping_slot = evm->second[v]; else { @@ -1217,12 +1227,12 @@ string CPPCompile::GenEnum(const TypePtr& t, const ValPtr& ev) mapping_slot = num_ev_mappings++; string enum_name = et->Lookup(v); - enum_names.emplace_back(pair(et, move(enum_name))); + enum_names.emplace_back(pair(TypeOffset(t), move(enum_name))); - if ( enum_val_mappings.count(et) > 0 ) + if ( evm != enum_val_mappings.end() ) { // We're already tracking this enum. - enum_val_mappings[et][v] = mapping_slot; + evm->second[v] = mapping_slot; } else { diff --git a/src/script_opt/CPP/GenFunc.cc b/src/script_opt/CPP/GenFunc.cc index d0cb328f87..47a9ec33bc 100644 --- a/src/script_opt/CPP/GenFunc.cc +++ b/src/script_opt/CPP/GenFunc.cc @@ -34,10 +34,8 @@ void CPPCompile::CompileLambda(const LambdaExpr* l, const ProfileFunc* pf) DefineBody(l_id->GetType(), pf, lname, body, &ids, FUNC_FLAVOR_FUNCTION); } -void CPPCompile::GenInvokeBody(const string& fname, const TypePtr& t, const string& args) +void CPPCompile::GenInvokeBody(const string& call, const TypePtr& t) { - auto call = fname + "(" + args + ")"; - if ( ! t || t->Tag() == TYPE_VOID ) { Emit("%s;", call); @@ -144,7 +142,7 @@ void CPPCompile::InitializeEvents(const ProfileFunc* pf) // returns an EventHandlerPtr, sigh. Emit("if ( event_registry->Lookup(\"%s\") )", e); StartBlock(); - Emit("%s = event_registry->Register(\"%s\");", ev_name.c_str(), e); + Emit("%s = event_registry->Register(\"%s\");", ev_name, e); EndBlock(); Emit("did_init = true;"); EndBlock(); @@ -233,6 +231,18 @@ string CPPCompile::BodyName(const FuncInfo& func) return fname + "__" + Fmt(static_cast(i)); } +p_hash_type CPPCompile::BodyHash(const Stmt* body) + { + auto bn = body_names.find(body); + ASSERT(bn != body_names.end()); + + auto& body_name = bn->second; + auto bh = body_hashes.find(body_name); + ASSERT(bh != body_hashes.end()); + + return bh->second; + } + string CPPCompile::GenArgs(const RecordTypePtr& params, const Expr* e) { const auto& exprs = e->AsListExpr()->Exprs(); diff --git a/src/script_opt/CPP/HashMgr.cc b/src/script_opt/CPP/HashMgr.cc index 4a6625391a..402b9dc012 100644 --- a/src/script_opt/CPP/HashMgr.cc +++ b/src/script_opt/CPP/HashMgr.cc @@ -12,28 +12,11 @@ using namespace std; VarMapper compiled_items; -CPPHashManager::CPPHashManager(const char* hash_name_base, bool _append) +CPPHashManager::CPPHashManager(const char* hash_name_base) { - append = _append; - hash_name = string(hash_name_base) + ".dat"; - if ( append ) - { - hf_r = fopen(hash_name.c_str(), "r"); - if ( ! hf_r ) - { - reporter->Error("can't open auxiliary C++ hash file %s for reading", hash_name.c_str()); - exit(1); - } - - lock_file(hash_name, hf_r); - LoadHashes(hf_r); - } - - auto mode = append ? "a" : "w"; - - hf_w = fopen(hash_name.c_str(), mode); + hf_w = fopen(hash_name.c_str(), "w"); if ( ! hf_w ) { reporter->Error("can't open auxiliary C++ hash file %s for writing", hash_name.c_str()); diff --git a/src/script_opt/CPP/HashMgr.h b/src/script_opt/CPP/HashMgr.h index 6a495b597e..2ae2e65ace 100644 --- a/src/script_opt/CPP/HashMgr.h +++ b/src/script_opt/CPP/HashMgr.h @@ -27,11 +27,9 @@ public: // end of the file (and the hash file will be locked, to prevent // overlapping updates from concurrent compilation/appends). // Otherwise, the file will be generated afresh. - CPPHashManager(const char* hash_name_base, bool append); + CPPHashManager(const char* hash_name_base); ~CPPHashManager(); - bool IsAppend() const { return append; } - // True if the given hash has already been generated. bool HasHash(p_hash_type h) const { return previously_compiled.count(h) > 0; } @@ -96,10 +94,6 @@ protected: // names, rather than their script-level names. std::unordered_map gv_scopes; - // Whether we're appending to existing hash file(s), or starting - // afresh. - bool append; - // Base for file names. std::string hash_name; diff --git a/src/script_opt/CPP/Inits.cc b/src/script_opt/CPP/Inits.cc index 48f3dd1cc1..2e51b95412 100644 --- a/src/script_opt/CPP/Inits.cc +++ b/src/script_opt/CPP/Inits.cc @@ -14,12 +14,31 @@ namespace zeek::detail using namespace std; -void CPPCompile::GenInitExpr(const ExprPtr& e) +std::shared_ptr CPPCompile::RegisterInitExpr(const ExprPtr& ep) + { + auto ename = InitExprName(ep); + + auto ii = init_infos.find(ename); + if ( ii != init_infos.end() ) + return ii->second; + + auto wrapper_cl = string("wrapper_") + ename + "_cl"; + + auto gi = make_shared(this, ep, ename, wrapper_cl); + call_exprs_info->AddInstance(gi); + init_infos[ename] = gi; + + return gi; + } + +void CPPCompile::GenInitExpr(std::shared_ptr ce_init) { NL(); + const auto& e = ce_init->GetExpr(); const auto& t = e->GetType(); - auto ename = InitExprName(e); + const auto& ename = ce_init->Name(); + const auto& wc = ce_init->WrapperClass(); // First, create a CPPFunc that we can compile to compute 'e'. auto name = string("wrapper_") + ename; @@ -29,18 +48,17 @@ void CPPCompile::GenInitExpr(const ExprPtr& e) // Create the Func subclass that can be used in a CallExpr to // evaluate 'e'. - Emit("class %s_cl : public CPPFunc", name); + Emit("class %s : public CPPFunc", wc); StartBlock(); Emit("public:"); - Emit("%s_cl() : CPPFunc(\"%s\", %s)", name, name, e->IsPure() ? "true" : "false"); + Emit("%s() : CPPFunc(\"%s\", %s)", wc, name, e->IsPure() ? "true" : "false"); StartBlock(); Emit("type = make_intrusive(make_intrusive(new type_decl_list()), %s, " "FUNC_FLAVOR_FUNCTION);", GenTypeName(t)); - NoteInitDependency(e, TypeRep(t)); EndBlock(); Emit("ValPtr Invoke(zeek::Args* args, Frame* parent) const override final"); @@ -62,15 +80,9 @@ void CPPCompile::GenInitExpr(const ExprPtr& e) EndBlock(); Emit("CallExprPtr %s;", ename); - - NoteInitDependency(e, TypeRep(t)); - AddInit(e, ename, - string("make_intrusive(make_intrusive(make_intrusive(" - "make_intrusive<") + - name + "_cl>())), make_intrusive(), false)"); } -bool CPPCompile::IsSimpleInitExpr(const ExprPtr& e) const +bool CPPCompile::IsSimpleInitExpr(const ExprPtr& e) { switch ( e->Tag() ) { @@ -101,360 +113,83 @@ string CPPCompile::InitExprName(const ExprPtr& e) return init_exprs.KeyName(e); } -void CPPCompile::GenGlobalInit(const ID* g, string& gl, const ValPtr& v) - { - const auto& t = v->GetType(); - auto tag = t->Tag(); - - if ( tag == TYPE_FUNC ) - // This should get initialized by recognizing hash of - // the function's body. - return; - - string init_val; - if ( tag == TYPE_OPAQUE ) - { - // We can only generate these by reproducing the expression - // (presumably a function call) used to create the value. - // That isn't fully sound, since if the global's value - // was redef'd in terms of its original value (e.g., - // "redef x = f(x)"), then we'll wind up with a broken - // expression. It's difficult to detect that in full - // generality, so um Don't Do That. (Note that this - // only affects execution of standalone compiled code, - // where the original scripts are replaced by load-stubs. - // If the scripts are available, then the HasVal() test - // we generate will mean we don't wind up using this - // expression anyway.) - - // Use the final initialization expression. - auto& init_exprs = g->GetOptInfo()->GetInitExprs(); - init_val = GenExpr(init_exprs.back(), GEN_VAL_PTR, false); - } - else - init_val = BuildConstant(g, v); - - auto& attrs = g->GetAttrs(); - - AddInit(g, string("if ( ! ") + gl + "->HasVal() )"); - - if ( attrs ) - { - RegisterAttributes(attrs); - - AddInit(g, "\t{"); - AddInit(g, "\t" + gl + "->SetVal(" + init_val + ");"); - AddInit(g, "\t" + gl + "->SetAttrs(" + AttrsName(attrs) + ");"); - AddInit(g, "\t}"); - } - else - AddInit(g, "\t" + gl + "->SetVal(" + init_val + ");"); - } - -void CPPCompile::GenFuncVarInits() - { - for ( const auto& fv_init : func_vars ) - { - auto& fv = fv_init.first; - auto& const_name = fv_init.second; - - auto f = fv->AsFunc(); - const auto& fn = f->Name(); - const auto& ft = f->GetType(); - - NoteInitDependency(fv, TypeRep(ft)); - - const auto& bodies = f->GetBodies(); - - string hashes = "{"; - - for ( const auto& b : bodies ) - { - auto body = b.stmts.get(); - - ASSERT(body_names.count(body) > 0); - - auto& body_name = body_names[body]; - ASSERT(body_hashes.count(body_name) > 0); - - NoteInitDependency(fv, body); - - if ( hashes.size() > 1 ) - hashes += ", "; - - hashes += Fmt(body_hashes[body_name]); - } - - hashes += "}"; - - auto init = string("lookup_func__CPP(\"") + fn + "\", " + hashes + ", " + GenTypeName(ft) + - ")"; - - AddInit(fv, const_name, init); - } - } - -void CPPCompile::GenPreInit(const Type* t) - { - string pre_init; - - switch ( t->Tag() ) - { - case TYPE_ADDR: - case TYPE_ANY: - case TYPE_BOOL: - case TYPE_COUNT: - case TYPE_DOUBLE: - case TYPE_ERROR: - case TYPE_INT: - case TYPE_INTERVAL: - case TYPE_PATTERN: - case TYPE_PORT: - case TYPE_STRING: - case TYPE_TIME: - case TYPE_TIMER: - case TYPE_VOID: - pre_init = string("base_type(") + TypeTagName(t->Tag()) + ")"; - break; - - case TYPE_ENUM: - pre_init = string("get_enum_type__CPP(\"") + t->GetName() + "\")"; - break; - - case TYPE_SUBNET: - pre_init = string("make_intrusive()"); - break; - - case TYPE_FILE: - pre_init = string("make_intrusive(") + GenTypeName(t->AsFileType()->Yield()) + - ")"; - break; - - case TYPE_OPAQUE: - pre_init = string("make_intrusive(\"") + t->AsOpaqueType()->Name() + "\")"; - break; - - case TYPE_RECORD: - { - string name; - - if ( t->GetName() != "" ) - name = string("\"") + t->GetName() + string("\""); - else - name = "nullptr"; - - pre_init = string("get_record_type__CPP(") + name + ")"; - } - break; - - case TYPE_LIST: - pre_init = string("make_intrusive()"); - break; - - case TYPE_TYPE: - case TYPE_VECTOR: - case TYPE_TABLE: - case TYPE_FUNC: - // Nothing to do for these, pre-initialization-wise. - return; - - default: - reporter->InternalError("bad type in CPPCompile::GenType"); - } - - pre_inits.emplace_back(GenTypeName(t) + " = " + pre_init + ";"); - } - -void CPPCompile::GenPreInits() - { - NL(); - Emit("void pre_init__CPP()"); - - StartBlock(); - for ( const auto& i : pre_inits ) - Emit(i); - EndBlock(); - } - -void CPPCompile::AddInit(const Obj* o, const string& init) - { - obj_inits[o].emplace_back(init); - } - -void CPPCompile::AddInit(const Obj* o) - { - if ( obj_inits.count(o) == 0 ) - obj_inits[o] = {}; - } - -void CPPCompile::NoteInitDependency(const Obj* o1, const Obj* o2) - { - obj_deps[o1].emplace(o2); - } - -void CPPCompile::CheckInitConsistency(unordered_set& to_do) - { - for ( const auto& od : obj_deps ) - { - const auto& o = od.first; - - if ( to_do.count(o) == 0 ) - { - fprintf(stderr, "object not in to_do: %s\n", obj_desc(o).c_str()); - exit(1); - } - - for ( const auto& d : od.second ) - { - if ( to_do.count(d) == 0 ) - { - fprintf(stderr, "dep object for %s not in to_do: %s\n", obj_desc(o).c_str(), - obj_desc(d).c_str()); - exit(1); - } - } - } - } - -int CPPCompile::GenDependentInits(unordered_set& to_do) - { - int n = 0; - - // The basic approach is fairly brute force: find elements of - // to_do that don't have any pending dependencies; generate those; - // and remove them from the to_do list, freeing up other to_do entries - // to now not having any pending dependencies. Iterate until there - // are no more to-do items. - while ( to_do.size() > 0 ) - { - unordered_set cohort; - - for ( const auto& o : to_do ) - { - const auto& od = obj_deps.find(o); - - bool has_pending_dep = false; - - if ( od != obj_deps.end() ) - { - for ( const auto& d : od->second ) - if ( to_do.count(d) > 0 ) - { - has_pending_dep = true; - break; - } - } - - if ( has_pending_dep ) - continue; - - cohort.insert(o); - } - - ASSERT(cohort.size() > 0); - - GenInitCohort(++n, cohort); - - for ( const auto& o : cohort ) - { - ASSERT(to_do.count(o) > 0); - to_do.erase(o); - } - } - - return n; - } - -void CPPCompile::GenInitCohort(int nc, unordered_set& cohort) - { - NL(); - Emit("void init_%s__CPP()", Fmt(nc)); - StartBlock(); - - // If any script/BiF functions are used for initializing globals, - // the code generated from that will expect the presence of a - // frame pointer, even if nil. - Emit("Frame* f__CPP = nullptr;"); - - // The following is just for making the output readable/pretty: - // add space between initializations for distinct objects, taking - // into account that some objects have empty initializations. - bool did_an_init = false; - - for ( auto o : cohort ) - { - if ( did_an_init ) - { - NL(); - did_an_init = false; - } - - for ( const auto& i : obj_inits.find(o)->second ) - { - Emit("%s", i); - did_an_init = true; - } - } - - EndBlock(); - } - void CPPCompile::InitializeFieldMappings() { - Emit("int fm_offset;"); + Emit("std::vector CPP__field_mappings__ = "); + + StartBlock(); for ( const auto& mapping : field_decls ) { - auto rt = mapping.first; + auto rt_arg = Fmt(mapping.first); auto td = mapping.second; - auto fn = td->id; - auto rt_name = GenTypeName(rt) + "->AsRecordType()"; + auto type_arg = Fmt(TypeOffset(td->type)); + auto attrs_arg = Fmt(AttributesOffset(td->attrs)); - Emit("fm_offset = %s->FieldOffset(\"%s\");", rt_name, fn); - Emit("if ( fm_offset < 0 )"); - - StartBlock(); - Emit("// field does not exist, create it"); - Emit("fm_offset = %s->NumFields();", rt_name); - Emit("type_decl_list tl;"); - Emit(GenTypeDecl(td)); - Emit("%s->AddFieldsDirectly(tl);", rt_name); - EndBlock(); - - Emit("field_mapping.push_back(fm_offset);"); + Emit("CPP_FieldMapping(%s, \"%s\", %s, %s),", rt_arg, td->id, type_arg, attrs_arg); } + + EndBlock(true); } void CPPCompile::InitializeEnumMappings() { - int n = 0; + Emit("std::vector CPP__enum_mappings__ = "); + + StartBlock(); for ( const auto& mapping : enum_names ) - InitializeEnumMappings(mapping.first, mapping.second, n++); + Emit("CPP_EnumMapping(%s, \"%s\"),", Fmt(mapping.first), mapping.second); + + EndBlock(true); } -void CPPCompile::InitializeEnumMappings(const EnumType* et, const string& e_name, int index) +void CPPCompile::InitializeBiFs() { - AddInit(et, "{"); + Emit("std::vector CPP__BiF_lookups__ = "); - auto et_name = GenTypeName(et) + "->AsEnumType()"; - AddInit(et, "int em_offset = " + et_name + "->Lookup(\"" + e_name + "\");"); - AddInit(et, "if ( em_offset < 0 )"); + StartBlock(); - AddInit(et, "\t{"); - AddInit(et, "\tem_offset = " + et_name + "->Names().size();"); - // The following is to catch the case where the offset is already - // in use due to it being specified explicitly for an existing enum. - AddInit(et, "\tif ( " + et_name + "->Lookup(em_offset) )"); - AddInit( - et, - "\t\treporter->InternalError(\"enum inconsistency while initializing compiled scripts\");"); - AddInit(et, "\t" + et_name + "->AddNameInternal(\"" + e_name + "\", em_offset);"); - AddInit(et, "\t}"); + for ( const auto& b : BiFs ) + Emit("CPP_LookupBiF(%s, \"%s\"),", b.first, b.second); - AddInit(et, "enum_mapping[" + Fmt(index) + "] = em_offset;"); + EndBlock(true); + } - AddInit(et, "}"); +void CPPCompile::InitializeStrings() + { + Emit("std::vector CPP__Strings ="); + + StartBlock(); + + for ( const auto& s : ordered_tracked_strings ) + Emit("\"%s\",", s); + + EndBlock(true); + } + +void CPPCompile::InitializeHashes() + { + Emit("std::vector CPP__Hashes ="); + + StartBlock(); + + for ( const auto& h : ordered_tracked_hashes ) + Emit(Fmt(h) + ","); + + EndBlock(true); + } + +void CPPCompile::InitializeConsts() + { + Emit("std::vector CPP__ConstVals ="); + + StartBlock(); + + for ( const auto& c : consts ) + Emit("CPP_ValElem(%s, %s),", TypeTagName(c.first), Fmt(c.second)); + + EndBlock(true); } void CPPCompile::GenInitHook() @@ -482,11 +217,13 @@ void CPPCompile::GenStandaloneActivation() { NL(); +#if 0 Emit("void standalone_activation__CPP()"); StartBlock(); for ( auto& a : activations ) Emit(a); EndBlock(); +#endif NL(); Emit("void standalone_init__CPP()"); @@ -511,8 +248,9 @@ void CPPCompile::GenStandaloneActivation() // We didn't wind up compiling it. continue; - ASSERT(body_hashes.count(bname) > 0); - func_bodies[f].push_back(body_hashes[bname]); + auto bh = body_hashes.find(bname); + ASSERT(bh != body_hashes.end()); + func_bodies[f].push_back(bh->second); } for ( auto& fb : func_bodies ) diff --git a/src/script_opt/CPP/InitsInfo.cc b/src/script_opt/CPP/InitsInfo.cc new file mode 100644 index 0000000000..c83b78f5ed --- /dev/null +++ b/src/script_opt/CPP/InitsInfo.cc @@ -0,0 +1,575 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include "zeek/Desc.h" +#include "zeek/RE.h" +#include "zeek/ZeekString.h" +#include "zeek/script_opt/CPP/Attrs.h" +#include "zeek/script_opt/CPP/Compile.h" + +using namespace std; + +namespace zeek::detail + { + +string CPP_InitsInfo::Name(int index) const + { + return base_name + "[" + Fmt(index) + "]"; + } + +void CPP_InitsInfo::AddInstance(shared_ptr g) + { + auto init_cohort = g->InitCohort(); + + if ( static_cast(instances.size()) <= init_cohort ) + instances.resize(init_cohort + 1); + + g->SetOffset(this, size++); + + instances[init_cohort].push_back(move(g)); + } + +string CPP_InitsInfo::Declare() const + { + return string("std::vector<") + CPPType() + "> " + base_name + ";"; + } + +void CPP_InitsInfo::GenerateInitializers(CPPCompile* c) + { + BuildOffsetSet(c); + + c->NL(); + + auto gt = InitsType(); + + // Declare the initializer. + c->Emit("%s %s = %s(%s, %s,", gt, InitializersName(), gt, base_name, Fmt(offset_set)); + + c->IndentUp(); + c->Emit("{"); + + // Add each cohort as a vector element. + for ( auto& cohort : instances ) + { + c->Emit("{"); + BuildCohort(c, cohort); + c->Emit("},"); + } + + c->Emit("}"); + c->IndentDown(); + c->Emit(");"); + } + +void CPP_InitsInfo::BuildOffsetSet(CPPCompile* c) + { + vector offsets_vec; + + for ( auto& cohort : instances ) + { + // Reduce the offsets used by this cohort to an + // offset into the managed vector-of-indices global. + vector offsets; + offsets.reserve(cohort.size()); + for ( auto& co : cohort ) + offsets.push_back(co->Offset()); + + offsets_vec.push_back(c->IndMgr().AddIndices(offsets)); + } + + // Now that we have all the offsets in a vector, reduce them, too, + // to an offset into the managed vector-of-indices global, + offset_set = c->IndMgr().AddIndices(offsets_vec); + } + +void CPP_InitsInfo::BuildCohort(CPPCompile* c, std::vector>& cohort) + { + for ( auto& co : cohort ) + { + vector ivs; + co->InitializerVals(ivs); + BuildCohortElement(c, co->InitializerType(), ivs); + } + } + +void CPP_InitsInfo::BuildCohortElement(CPPCompile* c, string init_type, vector& ivs) + { + string full_init; + bool did_one = false; + for ( auto& iv : ivs ) + { + if ( did_one ) + full_init += ", "; + else + did_one = true; + + full_init += iv; + } + + c->Emit("std::make_shared<%s>(%s),", init_type, full_init); + } + +void CPP_CompoundInitsInfo::BuildCohortElement(CPPCompile* c, string init_type, vector& ivs) + { + string init_line; + for ( auto& iv : ivs ) + init_line += iv + ", "; + + c->Emit("{ %s},", init_line); + } + +void CPP_BasicConstInitsInfo::BuildCohortElement(CPPCompile* c, string init_type, + vector& ivs) + { + ASSERT(ivs.size() == 1); + c->Emit(ivs[0] + ","); + } + +string CPP_InitInfo::ValElem(CPPCompile* c, ValPtr v) + { + if ( v ) + { + int consts_offset; + auto gi = c->RegisterConstant(v, consts_offset); + init_cohort = max(init_cohort, gi->InitCohort() + 1); + return Fmt(consts_offset); + } + else + return Fmt(-1); + } + +DescConstInfo::DescConstInfo(CPPCompile* c, ValPtr v) : CPP_InitInfo() + { + ODesc d; + v->Describe(&d); + auto s = c->TrackString(d.Description()); + init = Fmt(s); + } + +EnumConstInfo::EnumConstInfo(CPPCompile* c, ValPtr v) + { + auto ev = v->AsEnumVal(); + auto& ev_t = ev->GetType(); + e_type = c->TypeOffset(ev_t); + init_cohort = c->TypeCohort(ev_t) + 1; + e_val = v->AsEnum(); + } + +StringConstInfo::StringConstInfo(CPPCompile* c, ValPtr v) : CPP_InitInfo() + { + auto s = v->AsString(); + const char* b = (const char*)(s->Bytes()); + + len = s->Len(); + chars = c->TrackString(CPPEscape(b, len)); + } + +PatternConstInfo::PatternConstInfo(CPPCompile* c, ValPtr v) : CPP_InitInfo() + { + auto re = v->AsPatternVal()->Get(); + pattern = c->TrackString(CPPEscape(re->OrigText())); + is_case_insensitive = re->IsCaseInsensitive(); + } + +CompoundItemInfo::CompoundItemInfo(CPPCompile* _c, ValPtr v) : CPP_InitInfo(), c(_c) + { + auto& t = v->GetType(); + type = c->TypeOffset(t); + init_cohort = c->TypeCohort(t) + 1; + } + +ListConstInfo::ListConstInfo(CPPCompile* _c, ValPtr v) : CompoundItemInfo(_c) + { + auto lv = cast_intrusive(v); + auto n = lv->Length(); + + for ( auto i = 0; i < n; ++i ) + vals.emplace_back(ValElem(c, lv->Idx(i))); + } + +VectorConstInfo::VectorConstInfo(CPPCompile* c, ValPtr v) : CompoundItemInfo(c, v) + { + auto vv = cast_intrusive(v); + auto n = vv->Size(); + + for ( auto i = 0U; i < n; ++i ) + vals.emplace_back(ValElem(c, vv->ValAt(i))); + } + +RecordConstInfo::RecordConstInfo(CPPCompile* c, ValPtr v) : CompoundItemInfo(c, v) + { + auto r = cast_intrusive(v); + auto n = r->NumFields(); + + type = c->TypeOffset(r->GetType()); + + for ( auto i = 0U; i < n; ++i ) + vals.emplace_back(ValElem(c, r->GetField(i))); + } + +TableConstInfo::TableConstInfo(CPPCompile* c, ValPtr v) : CompoundItemInfo(c, v) + { + auto tv = cast_intrusive(v); + + for ( auto& tv_i : tv->ToMap() ) + { + vals.emplace_back(ValElem(c, tv_i.first)); // index + vals.emplace_back(ValElem(c, tv_i.second)); // value + } + } + +FileConstInfo::FileConstInfo(CPPCompile* c, ValPtr v) : CompoundItemInfo(c, v) + { + auto fv = cast_intrusive(v); + auto fname = c->TrackString(fv->Get()->Name()); + vals.emplace_back(Fmt(fname)); + } + +FuncConstInfo::FuncConstInfo(CPPCompile* _c, ValPtr v) : CompoundItemInfo(_c, v), fv(v->AsFuncVal()) + { + // This is slightly hacky. There's a chance that this constant + // depends on a lambda being registered. Here we use the knowledge + // that LambdaRegistrationInfo sets its cohort to 1 more than + // the function type, so we can ensure any possible lambda has + // been registered by setting ours to 2 more. CompoundItemInfo + // has already set our cohort to 1 more. + ++init_cohort; + } + +void FuncConstInfo::InitializerVals(std::vector& ivs) const + { + auto f = fv->AsFunc(); + const auto& fn = f->Name(); + + ivs.emplace_back(Fmt(type)); + ivs.emplace_back(Fmt(c->TrackString(fn))); + + if ( ! c->NotFullyCompilable(fn) ) + { + const auto& bodies = f->GetBodies(); + + for ( const auto& b : bodies ) + { + auto h = c->BodyHash(b.stmts.get()); + auto h_o = c->TrackHash(h); + ivs.emplace_back(Fmt(h_o)); + } + } + } + +AttrInfo::AttrInfo(CPPCompile* _c, const AttrPtr& attr) : CompoundItemInfo(_c) + { + vals.emplace_back(Fmt(static_cast(attr->Tag()))); + auto a_e = attr->GetExpr(); + + if ( a_e ) + { + auto gi = c->RegisterType(a_e->GetType()); + init_cohort = max(init_cohort, gi->InitCohort() + 1); + + if ( ! CPPCompile::IsSimpleInitExpr(a_e) ) + { + gi = c->RegisterInitExpr(a_e); + init_cohort = max(init_cohort, gi->InitCohort() + 1); + + vals.emplace_back(Fmt(static_cast(AE_CALL))); + vals.emplace_back(Fmt(gi->Offset())); + } + + else if ( a_e->Tag() == EXPR_CONST ) + { + auto v = a_e->AsConstExpr()->ValuePtr(); + vals.emplace_back(Fmt(static_cast(AE_CONST))); + vals.emplace_back(ValElem(c, v)); + } + + else if ( a_e->Tag() == EXPR_NAME ) + { + auto g = a_e->AsNameExpr()->Id(); + auto gi = c->RegisterGlobal(g); + init_cohort = max(init_cohort, gi->InitCohort() + 1); + + vals.emplace_back(Fmt(static_cast(AE_NAME))); + vals.emplace_back(Fmt(c->TrackString(g->Name()))); + } + + else + { + ASSERT(a_e->Tag() == EXPR_RECORD_COERCE); + vals.emplace_back(Fmt(static_cast(AE_RECORD))); + vals.emplace_back(Fmt(gi->Offset())); + } + } + + else + vals.emplace_back(Fmt(static_cast(AE_NONE))); + } + +AttrsInfo::AttrsInfo(CPPCompile* _c, const AttributesPtr& _attrs) : CompoundItemInfo(_c) + { + const auto& pas = c->ProcessedAttr(); + + for ( const auto& a : _attrs->GetAttrs() ) + { + auto pa = pas.find(a.get()); + ASSERT(pa != pas.end()); + const auto& gi = pa->second; + init_cohort = max(init_cohort, gi->InitCohort() + 1); + vals.emplace_back(Fmt(gi->Offset())); + } + } + +GlobalInitInfo::GlobalInitInfo(CPPCompile* c, const ID* g, string _CPP_name) + : CPP_InitInfo(), CPP_name(move(_CPP_name)) + { + Zeek_name = g->Name(); + + auto gi = c->RegisterType(g->GetType()); + init_cohort = max(init_cohort, gi->InitCohort() + 1); + type = gi->Offset(); + + gi = c->RegisterAttributes(g->GetAttrs()); + if ( gi ) + { + init_cohort = max(init_cohort, gi->InitCohort() + 1); + attrs = gi->Offset(); + } + else + attrs = -1; + + exported = g->IsExport(); + + val = ValElem(c, g->GetVal()); + } + +void GlobalInitInfo::InitializerVals(std::vector& ivs) const + { + ivs.push_back(CPP_name); + ivs.push_back(string("\"") + Zeek_name + "\""); + ivs.push_back(Fmt(type)); + ivs.push_back(Fmt(attrs)); + ivs.push_back(val); + ivs.push_back(Fmt(exported)); + } + +CallExprInitInfo::CallExprInitInfo(CPPCompile* c, ExprPtr _e, string _e_name, string _wrapper_class) + : e(move(_e)), e_name(move(_e_name)), wrapper_class(move(_wrapper_class)) + { + auto gi = c->RegisterType(e->GetType()); + init_cohort = max(init_cohort, gi->InitCohort() + 1); + } + +LambdaRegistrationInfo::LambdaRegistrationInfo(CPPCompile* c, string _name, FuncTypePtr ft, + string _wrapper_class, p_hash_type _h, + bool _has_captures) + : name(move(_name)), wrapper_class(move(_wrapper_class)), h(_h), has_captures(_has_captures) + { + auto gi = c->RegisterType(ft); + init_cohort = max(init_cohort, gi->InitCohort() + 1); + func_type = gi->Offset(); + } + +void LambdaRegistrationInfo::InitializerVals(std::vector& ivs) const + { + ivs.emplace_back(string("\"") + name + "\""); + ivs.emplace_back(Fmt(func_type)); + ivs.emplace_back(Fmt(h)); + ivs.emplace_back(has_captures ? "true" : "false"); + } + +void EnumTypeInfo::AddInitializerVals(std::vector& ivs) const + { + ivs.emplace_back(Fmt(c->TrackString(t->GetName()))); + + auto et = t->AsEnumType(); + + for ( const auto& name_pair : et->Names() ) + { + ivs.emplace_back(Fmt(c->TrackString(name_pair.first))); + ivs.emplace_back(Fmt(int(name_pair.second))); + } + } + +void OpaqueTypeInfo::AddInitializerVals(std::vector& ivs) const + { + ivs.emplace_back(Fmt(c->TrackString(t->GetName()))); + } + +TypeTypeInfo::TypeTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c, move(_t)) + { + tt = t->AsTypeType()->GetType(); + auto gi = c->RegisterType(tt); + if ( gi ) + init_cohort = gi->InitCohort(); + } + +void TypeTypeInfo::AddInitializerVals(std::vector& ivs) const + { + ivs.emplace_back(to_string(c->TypeOffset(tt))); + } + +VectorTypeInfo::VectorTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c, move(_t)) + { + yield = t->Yield(); + auto gi = c->RegisterType(yield); + if ( gi ) + init_cohort = gi->InitCohort(); + } + +void VectorTypeInfo::AddInitializerVals(std::vector& ivs) const + { + ivs.emplace_back(to_string(c->TypeOffset(yield))); + } + +ListTypeInfo::ListTypeInfo(CPPCompile* _c, TypePtr _t) + : AbstractTypeInfo(_c, move(_t)), types(t->AsTypeList()->GetTypes()) + { + for ( auto& tl_i : types ) + { + auto gi = c->RegisterType(tl_i); + if ( gi ) + init_cohort = max(init_cohort, gi->InitCohort()); + } + } + +void ListTypeInfo::AddInitializerVals(std::vector& ivs) const + { + string type_list; + for ( auto& t : types ) + ivs.emplace_back(Fmt(c->TypeOffset(t))); + } + +TableTypeInfo::TableTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c, move(_t)) + { + auto tbl = t->AsTableType(); + + auto gi = c->RegisterType(tbl->GetIndices()); + ASSERT(gi); + indices = gi->Offset(); + init_cohort = gi->InitCohort(); + + yield = tbl->Yield(); + + if ( yield ) + { + gi = c->RegisterType(yield); + if ( gi ) + init_cohort = max(init_cohort, gi->InitCohort()); + } + } + +void TableTypeInfo::AddInitializerVals(std::vector& ivs) const + { + ivs.emplace_back(Fmt(indices)); + ivs.emplace_back(Fmt(yield ? c->TypeOffset(yield) : -1)); + } + +FuncTypeInfo::FuncTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c, move(_t)) + { + auto f = t->AsFuncType(); + + flavor = f->Flavor(); + params = f->Params(); + yield = f->Yield(); + + auto gi = c->RegisterType(f->Params()); + if ( gi ) + init_cohort = gi->InitCohort(); + + if ( yield ) + { + gi = c->RegisterType(f->Yield()); + if ( gi ) + init_cohort = max(init_cohort, gi->InitCohort()); + } + } + +void FuncTypeInfo::AddInitializerVals(std::vector& ivs) const + { + ivs.emplace_back(Fmt(c->TypeOffset(params))); + ivs.emplace_back(Fmt(yield ? c->TypeOffset(yield) : -1)); + ivs.emplace_back(Fmt(static_cast(flavor))); + } + +RecordTypeInfo::RecordTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c, move(_t)) + { + auto r = t->AsRecordType()->Types(); + + if ( ! r ) + return; + + for ( const auto& r_i : *r ) + { + field_names.emplace_back(r_i->id); + + auto gi = c->RegisterType(r_i->type); + if ( gi ) + init_cohort = max(init_cohort, gi->InitCohort()); + // else it's a recursive type, no need to adjust cohort here + + field_types.push_back(r_i->type); + + if ( r_i->attrs ) + { + gi = c->RegisterAttributes(r_i->attrs); + init_cohort = max(init_cohort, gi->InitCohort() + 1); + field_attrs.push_back(gi->Offset()); + } + else + field_attrs.push_back(-1); + } + } + +void RecordTypeInfo::AddInitializerVals(std::vector& ivs) const + { + ivs.emplace_back(Fmt(c->TrackString(t->GetName()))); + + auto n = field_names.size(); + + for ( auto i = 0U; i < n; ++i ) + { + ivs.emplace_back(Fmt(c->TrackString(field_names[i]))); + + // Because RecordType's can be recursively defined, + // during construction we couldn't reliably access + // the field type's offsets. At this point, though, + // they should all be available. + ivs.emplace_back(Fmt(c->TypeOffset(field_types[i]))); + ivs.emplace_back(Fmt(field_attrs[i])); + } + } + +void IndicesManager::Generate(CPPCompile* c) + { + c->Emit("int CPP__Indices__init[] ="); + c->StartBlock(); + + int nset = 0; + for ( auto& is : indices_set ) + { + // Track the offsets into the raw vector, to make it + // easier to debug problems. + auto line = string("/* ") + to_string(nset++) + " */ "; + + // We first record the size, then the values. + line += to_string(is.size()) + ", "; + + auto n = 1; + for ( auto i : is ) + { + line += to_string(i) + ", "; + if ( ++n % 10 == 0 ) + { + c->Emit(line); + line.clear(); + } + } + + if ( line.size() > 0 ) + c->Emit(line); + } + + c->Emit("-1"); + c->EndBlock(true); + } + + } // zeek::detail diff --git a/src/script_opt/CPP/InitsInfo.h b/src/script_opt/CPP/InitsInfo.h new file mode 100644 index 0000000000..b8453b8135 --- /dev/null +++ b/src/script_opt/CPP/InitsInfo.h @@ -0,0 +1,693 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Classes for tracking information for initializing C++ values used by the +// generated code. + +// Initialization is probably the most complex part of the entire compiler, +// as there are a lot of considerations. There are two basic parts: (1) the +// generation of C++ code for doing run-time initialization, which is covered +// by the classes in this file, and (2) the execution of that code to do the +// actual initialization, which is covered by the classes in RuntimeInits.h. +// +// There are two fundamental types of initialization, those that create values +// (such as Zeek Type and Val objects) that will be used during the execution +// of compiled scripts, and those that perform actions such as registering +// the presence of a global or a lambda. In addition, for the former (values +// used at run-time), some are grouped together into vectors, with the compiled +// code using a hardwired index to get to a particular value; and some have +// standalone globals (for example, one for each BiF that a compiled script +// may call). +// +// For each of these types of initialization, our general approach is to a +// class that manages a single instance of that type, and an an object that +// manages all of those instances collectively. The latter object will, for +// example, attend to determining the offset into the run-time vector associated +// with a particular initialized value. +// +// An additional complexity is that often the initialization of a particular +// value will depend on *other* values having already been initialized. For +// example, a record type might have a field that is a table, and thus the +// type corresponding to the table needs to be available before we can create +// the record type. However, the table might have a set of attributes +// associated with it, which have to be initialized before we can create the +// table type, those in turn requiring the initialization of each of the +// individual attributes in the set. One of those attributes might specify +// a &default function for the table, requiring initializing *that* value +// (not just the type, but also a way to refer to the particular instance of +// the function) before initializing the attribute, etc. Worse, record types +// can be *indirectly recursive*, which requires first initializing a "stub" +// for the record type before doing the final initialization. +// +// The general strategy for dealing with all of these dependencies is to +// compute for each initialization its "cohort". An initialization that +// doesn't depend on any others is in cohort 0. An initialization X that +// depends on an initialization Y will have cohort(X) = cohort(Y) + 1; or, +// in general, one more than the highest cohort of any initialization it +// depends on. (We cut a corner in that, due to how initialization information +// is constructed, if X and Y are for the same type of object then we can +// safely use cohort(X) = cohort(Y).) We then execute run-time initialization +// in waves, one cohort at a time. +// +// Because C++ compilers can struggle when trying to optimize large quantities +// of code - clang in particular could take many CPU *hours* back when our +// compiler just generated C++ code snippets for each initialization - rather +// than producing code that directly executes each given initialization, we +// instead employ a table-driven approach. The C++ initializers for the +// tables contain simple values - often just vectors of integers - that compile +// quickly. At run-time we then spin through the elements of the tables (one +// cohort at a time) to obtain the information needed to initialize any given +// item. +// +// Many forms of initialization are specified in terms of indices into globals +// that hold items of various types. Thus, the most common initialization +// information is a vector of integers/indices. These data structures can +// be recursive, too, namely we sometimes associate an index with a vector +// of integers/indices and then we can track multiple such vectors using +// another vector of integers/indices. + +#include "zeek/File.h" +#include "zeek/Val.h" +#include "zeek/script_opt/ProfileFunc.h" + +#pragma once + +namespace zeek::detail + { + +class CPPCompile; + +// Abstract class for tracking information about a single initialization item. +class CPP_InitInfo; + +// Abstract class for tracking information about a collection of initialization +// items. +class CPP_InitsInfo + { +public: + CPP_InitsInfo(std::string _tag, std::string type) : tag(std::move(_tag)) + { + base_name = std::string("CPP__") + tag + "__"; + CPP_type = tag + type; + } + + virtual ~CPP_InitsInfo() { } + + // Returns the name of the C++ global that will hold the items' values + // at run-time, once initialized. These are all vectors, for which + // the generated code accesses a particular item by indexing the vector. + const std::string& InitsName() const { return base_name; } + + // Returns the name of the C++ global used to hold the table we employ + // for table-driven initialization. + std::string InitializersName() const { return base_name + "init"; } + + // Returns the "name" of the given element in the run-time vector + // associated with this collection of initialization items. It's not + // really a name but rather a vector index, so for example Name(12) + // might return "CPP__Pattern__[12]", but we use the term Name because + // the representation used to be individualized globals, such as + // "CPP__Pattern__12". + std::string Name(int index) const; + + // Returns the name that will correspond to the next item added to + // this set. + std::string NextName() const { return Name(size); } + + // The largest initialization cohort of any item in this collection. + int MaxCohort() const { return static_cast(instances.size()) - 1; } + + // Returns the number of initializations in this collection that below + // to the given cohort c. + int CohortSize(int c) const { return c > MaxCohort() ? 0 : instances[c].size(); } + + // Returns the C++ type associated with this collection's run-time vector. + // This might be, for example, "PatternVal" + const std::string& CPPType() const { return CPP_type; } + + // Sets the associated C++ type. + virtual void SetCPPType(std::string ct) { CPP_type = std::move(ct); } + + // Returns the type associated with the table used for initialization + // (i.e., this is the type of the global returned by InitializersName()). + std::string InitsType() const { return inits_type; } + + // Add a new initialization instance to the collection. + void AddInstance(std::shared_ptr g); + + // Emit code to populate the table used to initialize this collection. + void GenerateInitializers(CPPCompile* c); + +protected: + // Computes offset_set - see below. + void BuildOffsetSet(CPPCompile* c); + + // Returns a declaration suitable for the run-time vector that holds + // the initialized items in the collection. + std::string Declare() const; + + // For a given cohort, generates the associated table elements for + // creating it. + void BuildCohort(CPPCompile* c, std::vector>& cohort); + + // Given the initialization type and initializers for with a given + // cohort element, build the associated table element. + virtual void BuildCohortElement(CPPCompile* c, std::string init_type, + std::vector& ivs); + + // Total number of initializers. + int size = 0; + + // Each cohort is represented by a vector whose elements correspond + // to the initialization information for a single item. This variable + // holds a vector of cohorts, indexed by the number of the cohort. + // (Note, some cohorts may be empty.) + std::vector>> instances; + + // Each cohort has associated with it a vector of offsets, specifying + // positions in the run-time vector of the items in the cohort. + // + // We reduce each such vector to an index into the collection of + // such vectors (as managed by an IndicesManager - see below). + // + // Once we've done that reduction, we can represent each cohort + // using a single index, and thus all of the cohorts using a vector + // of indices. We then reduce *that* vector to a single index, + // again using the IndicesManager. We store that single index + // in the "offset_set" variable. + int offset_set = 0; + + // Tag used to distinguish a particular collection of constants. + std::string tag; + + // C++ name for this collection of constants. + std::string base_name; + + // C++ type associated with a single instance of these constants. + std::string CPP_type; + + // C++ type associated with the collection of initializers. + std::string inits_type; + }; + +// A class for a collection of initialization items for which each item +// has a "custom" initializer (that is, a bespoke C++ object, rather than +// a simple C++ type or a vector of indices). +class CPP_CustomInitsInfo : public CPP_InitsInfo + { +public: + CPP_CustomInitsInfo(std::string _tag, std::string _type) + : CPP_InitsInfo(std::move(_tag), std::move(_type)) + { + BuildInitType(); + } + + void SetCPPType(std::string ct) override + { + CPP_InitsInfo::SetCPPType(std::move(ct)); + BuildInitType(); + } + +private: + void BuildInitType() { inits_type = std::string("CPP_CustomInits<") + CPPType() + ">"; } + }; + +// A class for a collection of initialization items corresponding to "basic" +// constants, i.e., those that can be represented either directly as C++ +// constants, or as indices into a vector of C++ objects. +class CPP_BasicConstInitsInfo : public CPP_CustomInitsInfo + { +public: + // In the following, if "c_type" is non-empty then it specifes the + // C++ type used to directly represent the constant. If empty, it + // indicates that we instead use an index into a separate vector. + CPP_BasicConstInitsInfo(std::string _tag, std::string type, std::string c_type) + : CPP_CustomInitsInfo(std::move(_tag), std::move(type)) + { + if ( c_type.empty() ) + inits_type = std::string("CPP_") + tag + "Consts"; + else + inits_type = std::string("CPP_BasicConsts<") + CPP_type + ", " + c_type + ", " + tag + + "Val>"; + } + + void BuildCohortElement(CPPCompile* c, std::string init_type, + std::vector& ivs) override; + }; + +// A class for a collection of initialization items that are defined using +// other initialization items. +class CPP_CompoundInitsInfo : public CPP_InitsInfo + { +public: + CPP_CompoundInitsInfo(std::string _tag, std::string type) + : CPP_InitsInfo(std::move(_tag), std::move(type)) + { + if ( tag == "Type" ) + // These need a refined version of CPP_IndexedInits + // in order to build different types dynamically. + inits_type = "CPP_TypeInits"; + else + inits_type = std::string("CPP_IndexedInits<") + CPPType() + ">"; + } + + void BuildCohortElement(CPPCompile* c, std::string init_type, + std::vector& ivs) override; + }; + +// Abstract class for tracking information about a single initialization item. +class CPP_InitInfo + { +public: + // No constructor - basic initialization happens when the object is + // added via AddInstance() to a CPP_InitsInfo object, which in turn + // will lead to invocation of this object's SetOffset() method. + + virtual ~CPP_InitInfo() { } + + // Associates this item with an initialization collection and run-time + // vector offset. + void SetOffset(const CPP_InitsInfo* _inits_collection, int _offset) + { + inits_collection = _inits_collection; + offset = _offset; + } + + // Returns the offset for this item into the associated run-time vector. + int Offset() const { return offset; } + + // Returns the name that should be used for referring to this + // value in the generated code. + std::string Name() const { return inits_collection->Name(offset); } + + // Returns this item's initialization cohort. + int InitCohort() const { return init_cohort; } + + // Returns the type used for this initializer. + virtual std::string InitializerType() const { return ""; } + + // Returns values used for creating this value, one element per + // constructor parameter. + virtual void InitializerVals(std::vector& ivs) const = 0; + +protected: + // Returns an offset (into the run-time vector holding all Zeek + // constant values) corresponding to the given value. Registers + // the constant if needed. + std::string ValElem(CPPCompile* c, ValPtr v); + + // By default, values have no dependencies on other values + // being first initialized. Those that do must increase this + // value in their constructors. + int init_cohort = 0; + + // Tracks the collection to which this item belongs. + const CPP_InitsInfo* inits_collection = nullptr; + + // Offset of this item in the collection, or -1 if no association. + int offset = -1; + }; + +// Information associated with initializing a basic (non-compound) constant. +class BasicConstInfo : public CPP_InitInfo + { +public: + BasicConstInfo(std::string _val) : val(std::move(_val)) { } + + void InitializerVals(std::vector& ivs) const override { ivs.emplace_back(val); } + +private: + // All we need to track is the C++ representation of the constant. + std::string val; + }; + +// Information associated with initializing a constant whose Val constructor +// takes a string. +class DescConstInfo : public CPP_InitInfo + { +public: + DescConstInfo(CPPCompile* c, ValPtr v); + + void InitializerVals(std::vector& ivs) const override { ivs.emplace_back(init); } + +private: + std::string init; + }; + +class EnumConstInfo : public CPP_InitInfo + { +public: + EnumConstInfo(CPPCompile* c, ValPtr v); + + void InitializerVals(std::vector& ivs) const override + { + ivs.emplace_back(std::to_string(e_type)); + ivs.emplace_back(std::to_string(e_val)); + } + +private: + int e_type; // an index into the enum's Zeek type + int e_val; // integer value of the enum + }; + +class StringConstInfo : public CPP_InitInfo + { +public: + StringConstInfo(CPPCompile* c, ValPtr v); + + void InitializerVals(std::vector& ivs) const override + { + ivs.emplace_back(std::to_string(chars)); + ivs.emplace_back(std::to_string(len)); + } + +private: + int chars; // index into vector of char*'s + int len; // length of the string + }; + +class PatternConstInfo : public CPP_InitInfo + { +public: + PatternConstInfo(CPPCompile* c, ValPtr v); + + void InitializerVals(std::vector& ivs) const override + { + ivs.emplace_back(std::to_string(pattern)); + ivs.emplace_back(std::to_string(is_case_insensitive)); + } + +private: + int pattern; // index into string representation of pattern + int is_case_insensitive; // case-insensitivity flag, 0 or 1 + }; + +class PortConstInfo : public CPP_InitInfo + { +public: + PortConstInfo(ValPtr v) : p(static_cast(v->AsPortVal())->Get()) { } + + void InitializerVals(std::vector& ivs) const override + { + ivs.emplace_back(std::to_string(p) + "U"); + } + +private: + bro_uint_t p; + }; + +// Abstract class for compound items (those defined in terms of other items). +class CompoundItemInfo : public CPP_InitInfo + { +public: + // The first of these is used for items with custom Zeek types, + // the second when the type is generic/inapplicable. + CompoundItemInfo(CPPCompile* c, ValPtr v); + CompoundItemInfo(CPPCompile* _c) : c(_c) { type = -1; } + + void InitializerVals(std::vector& ivs) const override + { + if ( type >= 0 ) + ivs.emplace_back(std::to_string(type)); + + for ( auto& v : vals ) + ivs.push_back(v); + } + +protected: + CPPCompile* c; + int type; + std::vector vals; // initialization values + }; + +// This next set corresponds to compound Zeek constants of various types. +class ListConstInfo : public CompoundItemInfo + { +public: + ListConstInfo(CPPCompile* c, ValPtr v); + }; + +class VectorConstInfo : public CompoundItemInfo + { +public: + VectorConstInfo(CPPCompile* c, ValPtr v); + }; + +class RecordConstInfo : public CompoundItemInfo + { +public: + RecordConstInfo(CPPCompile* c, ValPtr v); + }; + +class TableConstInfo : public CompoundItemInfo + { +public: + TableConstInfo(CPPCompile* c, ValPtr v); + }; + +class FileConstInfo : public CompoundItemInfo + { +public: + FileConstInfo(CPPCompile* c, ValPtr v); + }; + +class FuncConstInfo : public CompoundItemInfo + { +public: + FuncConstInfo(CPPCompile* _c, ValPtr v); + + void InitializerVals(std::vector& ivs) const override; + +private: + FuncVal* fv; + }; + +// Initialization information for single attributes and sets of attributes. +class AttrInfo : public CompoundItemInfo + { +public: + AttrInfo(CPPCompile* c, const AttrPtr& attr); + }; + +class AttrsInfo : public CompoundItemInfo + { +public: + AttrsInfo(CPPCompile* c, const AttributesPtr& attrs); + }; + +// Information for initialization a Zeek global. +class GlobalInitInfo : public CPP_InitInfo + { +public: + GlobalInitInfo(CPPCompile* c, const ID* g, std::string CPP_name); + + std::string InitializerType() const override { return "CPP_GlobalInit"; } + void InitializerVals(std::vector& ivs) const override; + +protected: + std::string Zeek_name; + std::string CPP_name; + int type; + int attrs; + std::string val; + bool exported; + }; + +// Information for initializing an item corresponding to a Zeek function +// call, needed to associate complex expressions with attributes. +class CallExprInitInfo : public CPP_InitInfo + { +public: + CallExprInitInfo(CPPCompile* c, ExprPtr e, std::string e_name, std::string wrapper_class); + + std::string InitializerType() const override + { + return std::string("CPP_CallExprInit<") + wrapper_class + ">"; + } + void InitializerVals(std::vector& ivs) const override { ivs.emplace_back(e_name); } + + // Accessors, since code to initialize these is generated separately + // from that of most initialization collections. + const ExprPtr& GetExpr() const { return e; } + const std::string& Name() const { return e_name; } + const std::string& WrapperClass() const { return wrapper_class; } + +protected: + ExprPtr e; + std::string e_name; + std::string wrapper_class; + }; + +// Information for registering the class/function assocaited with a lambda. +class LambdaRegistrationInfo : public CPP_InitInfo + { +public: + LambdaRegistrationInfo(CPPCompile* c, std::string name, FuncTypePtr ft, + std::string wrapper_class, p_hash_type h, bool has_captures); + + std::string InitializerType() const override + { + return std::string("CPP_LambdaRegistration<") + wrapper_class + ">"; + } + void InitializerVals(std::vector& ivs) const override; + +protected: + std::string name; + int func_type; + std::string wrapper_class; + p_hash_type h; + bool has_captures; + }; + +// Abstract class for representing information for initializing a Zeek type. +class AbstractTypeInfo : public CPP_InitInfo + { +public: + AbstractTypeInfo(CPPCompile* _c, TypePtr _t) : c(_c), t(std::move(_t)) { } + + void InitializerVals(std::vector& ivs) const override + { + ivs.emplace_back(std::to_string(static_cast(t->Tag()))); + AddInitializerVals(ivs); + } + + virtual void AddInitializerVals(std::vector& ivs) const { } + +protected: + CPPCompile* c; + TypePtr t; // the type we're initializing + }; + +// The following capture information for different Zeek types. +class BaseTypeInfo : public AbstractTypeInfo + { +public: + BaseTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c, std::move(_t)) { } + }; + +class EnumTypeInfo : public AbstractTypeInfo + { +public: + EnumTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c, std::move(_t)) { } + + void AddInitializerVals(std::vector& ivs) const override; + }; + +class OpaqueTypeInfo : public AbstractTypeInfo + { +public: + OpaqueTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c, std::move(_t)) { } + + void AddInitializerVals(std::vector& ivs) const override; + }; + +class TypeTypeInfo : public AbstractTypeInfo + { +public: + TypeTypeInfo(CPPCompile* c, TypePtr _t); + + void AddInitializerVals(std::vector& ivs) const override; + +private: + TypePtr tt; // the type referred to by t + }; + +class VectorTypeInfo : public AbstractTypeInfo + { +public: + VectorTypeInfo(CPPCompile* c, TypePtr _t); + + void AddInitializerVals(std::vector& ivs) const override; + +private: + TypePtr yield; + }; + +class ListTypeInfo : public AbstractTypeInfo + { +public: + ListTypeInfo(CPPCompile* c, TypePtr _t); + + void AddInitializerVals(std::vector& ivs) const override; + +private: + const std::vector& types; + }; + +class TableTypeInfo : public AbstractTypeInfo + { +public: + TableTypeInfo(CPPCompile* c, TypePtr _t); + + void AddInitializerVals(std::vector& ivs) const override; + +private: + int indices; + TypePtr yield; + }; + +class FuncTypeInfo : public AbstractTypeInfo + { +public: + FuncTypeInfo(CPPCompile* c, TypePtr _t); + + void AddInitializerVals(std::vector& ivs) const override; + +private: + FunctionFlavor flavor; + TypePtr params; + TypePtr yield; + }; + +class RecordTypeInfo : public AbstractTypeInfo + { +public: + RecordTypeInfo(CPPCompile* c, TypePtr _t); + + void AddInitializerVals(std::vector& ivs) const override; + +private: + std::vector field_names; + std::vector field_types; + std::vector field_attrs; + }; + +// Much of the table-driven initialization is based on vectors of indices, +// which we represent as vectors of int's, where each int is used to index a +// global C++ vector. This class manages such vectors. In particular, it +// reduces a given vector-of-indices to a single value, itself an index, that +// can be used at run-time to retrieve a reference to the original vector. +// +// Note that the notion recurses: if we have several vector-of-indices, we can +// reduce each to an index, and then take the resulting vector-of-meta-indices +// and reduce it further to an index. Doing so allows us to concisely refer +// to a potentially large, deep set of indices using a single value - such as +// for CPP_InitsInfo's "offset_set" member variable. + +class IndicesManager + { +public: + IndicesManager() { } + + // Adds a new vector-of-indices to the collection we're tracking, + // returning the offset that will be associated with it at run-time. + int AddIndices(std::vector indices) + { + int n = indices_set.size(); + indices_set.emplace_back(std::move(indices)); + return n; + } + + // Generates the initializations used to construct the managed + // vectors at run-time. + void Generate(CPPCompile* c); + +private: + // Each vector-of-indices being tracked. We could obtain some + // space and time savings by recognizing duplicate vectors + // (for example, empty vectors are very common), but as long + // as the code compiles and executes without undue overhead, + // this doesn't appear necessary. + std::vector> indices_set; + }; + + } // zeek::detail diff --git a/src/script_opt/CPP/README.md b/src/script_opt/CPP/README.md index e2790320fd..b9153cd9a5 100644 --- a/src/script_opt/CPP/README.md +++ b/src/script_opt/CPP/README.md @@ -114,40 +114,6 @@ There are additional workflows relating to running the test suite, which we document only briefly here as they're likely going to change or go away , as it's not clear they're actually needed. -First, `-O update-C++` will run using a Zeek instance that already includes -compiled scripts and, for any functions pulled in by the command-line scripts, -if they're not already compiled, will generate additional C++ code for -those that can be combined with the already-compiled code. The -additionally compiled code leverages the existing compiled-in functions -(and globals), which it learns about via the `CPP-hashes.dat` file mentioned -above. Any code compiled in this fashion must be _consistent_ with the -previously compiled code, meaning that globals and extensible types (enums, -records) have definitions that align with those previously used, and any -other code later compiled must also be consistent. - -In a similar vein, `-O add-C++` likewise uses a Zeek instance that already -includes compiled scripts. It generates additional C++ code that leverages -that existing compilation. However, this code is _not_ meant for use with -subsequently compiled code; later code also build with `add-C++` can have -inconsistencies with this code. (The utility of this mode is to support -compiling the entire test suite as one large incremental compilation, -rather than as hundreds of pointwise compilations.) - -Both of these _append_ to any existing `CPP-gen-addl.h` file, providing -a means for building it up to reflect a number of compilations. - -The `update-C++` and `add-C++` options help support different -ways of building the `btest` test suite. They were meant to enable doing so -without requiring per-test-suite-element recompilations. However, experiences -to date have found that trying to avoid pointwise compilations incurs -additional headaches, so it's better to just bite off the cost of a large -number of recompilations. Given that, it might make sense to remove these -options. - -Finally, with respect to workflow there are number of simple scripts in -`src/script_opt/CPP/` (which should ultimately be replaced) in support of -compiler maintenance: - * `non-embedded-build` Builds `zeek` without any embedded compiled-to-C++ scripts. * `bare-embedded-build` @@ -183,12 +149,11 @@ Known Issues Here we list various known issues with using the compiler:
-* Compilation of compiled code can be noticeably slow (if built using -`./configure --enable-debug`) or hugely slow (if not), with the latter -taking on the order of an hour on a beefy laptop. This slowness complicates +* Compilation of compiled code can be quite slow when the C++ compilation +includes optimization, +taking many minutes on a beefy laptop. This slowness complicates CI/CD approaches for always running compiled code against the test suite -when merging changes. It's not presently clear how feasible it is to -speed this up. +when merging changes. * Run-time error messages generally lack location information and information about associated expressions/statements, making them hard to puzzle out. diff --git a/src/script_opt/CPP/Runtime.h b/src/script_opt/CPP/Runtime.h index 258d24d673..d872c1d9e3 100644 --- a/src/script_opt/CPP/Runtime.h +++ b/src/script_opt/CPP/Runtime.h @@ -17,18 +17,21 @@ #include "zeek/ZeekString.h" #include "zeek/module_util.h" #include "zeek/script_opt/CPP/Func.h" -#include "zeek/script_opt/CPP/RuntimeInit.h" +#include "zeek/script_opt/CPP/RuntimeInitSupport.h" +#include "zeek/script_opt/CPP/RuntimeInits.h" #include "zeek/script_opt/CPP/RuntimeOps.h" #include "zeek/script_opt/CPP/RuntimeVec.h" #include "zeek/script_opt/ScriptOpt.h" -namespace zeek +namespace zeek::detail { using BoolValPtr = IntrusivePtr; +using IntValPtr = IntrusivePtr; using CountValPtr = IntrusivePtr; using DoubleValPtr = IntrusivePtr; using StringValPtr = IntrusivePtr; +using TimeValPtr = IntrusivePtr; using IntervalValPtr = IntrusivePtr; using PatternValPtr = IntrusivePtr; using FuncValPtr = IntrusivePtr; diff --git a/src/script_opt/CPP/RuntimeInit.cc b/src/script_opt/CPP/RuntimeInitSupport.cc similarity index 86% rename from src/script_opt/CPP/RuntimeInit.cc rename to src/script_opt/CPP/RuntimeInitSupport.cc index f98718dce0..6a657d0713 100644 --- a/src/script_opt/CPP/RuntimeInit.cc +++ b/src/script_opt/CPP/RuntimeInitSupport.cc @@ -1,6 +1,6 @@ // See the file "COPYING" in the main distribution directory for copyright. -#include "zeek/script_opt/CPP/RuntimeInit.h" +#include "zeek/script_opt/CPP/RuntimeInitSupport.h" #include "zeek/EventRegistry.h" #include "zeek/module_util.h" @@ -49,7 +49,7 @@ static int flag_init_CPP() static int dummy = flag_init_CPP(); -void register_type__CPP(TypePtr t, const std::string& name) +void register_type__CPP(TypePtr t, const string& name) { if ( t->GetName().size() > 0 ) // Already registered. @@ -113,8 +113,8 @@ void activate_bodies__CPP(const char* fn, const char* module, bool exported, Typ auto v = fg->GetVal(); if ( ! v ) { // Create it. - std::vector no_bodies; - std::vector no_priorities; + vector no_bodies; + vector no_priorities; auto sf = make_intrusive(fn, ft, no_bodies, no_priorities); v = make_intrusive(move(sf)); @@ -154,8 +154,9 @@ void activate_bodies__CPP(const char* fn, const char* module, bool exported, Typ continue; // Add in the new body. - ASSERT(compiled_scripts.count(h) > 0); - auto cs = compiled_scripts[h]; + auto csi = compiled_scripts.find(h); + ASSERT(csi != compiled_scripts.end()); + auto cs = csi->second; f->AddBody(cs.body, no_inits, num_params, cs.priority); added_bodies[fn].insert(h); @@ -193,14 +194,37 @@ FuncValPtr lookup_func__CPP(string name, vector hashes, const TypeP { auto ft = cast_intrusive(t); + if ( hashes.empty() ) + { + // This happens for functions that have at least one + // uncompilable body. + auto gl = lookup_ID(name.c_str(), GLOBAL_MODULE_NAME, false, false, false); + if ( ! gl ) + { + reporter->CPPRuntimeError("non-compiled function %s missing", name.c_str()); + exit(1); + } + + auto v = gl->GetVal(); + if ( ! v || v->GetType()->Tag() != TYPE_FUNC ) + { + reporter->CPPRuntimeError("non-compiled function %s has an invalid value", + name.c_str()); + exit(1); + } + + return cast_intrusive(v); + } + vector bodies; vector priorities; for ( auto h : hashes ) { - ASSERT(compiled_scripts.count(h) > 0); + auto cs = compiled_scripts.find(h); + ASSERT(cs != compiled_scripts.end()); - const auto& f = compiled_scripts[h]; + const auto& f = cs->second; bodies.push_back(f.body); priorities.push_back(f.priority); diff --git a/src/script_opt/CPP/RuntimeInit.h b/src/script_opt/CPP/RuntimeInitSupport.h similarity index 98% rename from src/script_opt/CPP/RuntimeInit.h rename to src/script_opt/CPP/RuntimeInitSupport.h index fbc44a32dc..284290c95b 100644 --- a/src/script_opt/CPP/RuntimeInit.h +++ b/src/script_opt/CPP/RuntimeInitSupport.h @@ -5,6 +5,7 @@ #pragma once #include "zeek/Val.h" +#include "zeek/script_opt/CPP/Attrs.h" #include "zeek/script_opt/CPP/Func.h" namespace zeek diff --git a/src/script_opt/CPP/RuntimeInits.cc b/src/script_opt/CPP/RuntimeInits.cc new file mode 100644 index 0000000000..4f52171ffe --- /dev/null +++ b/src/script_opt/CPP/RuntimeInits.cc @@ -0,0 +1,523 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include "zeek/script_opt/CPP/RuntimeInits.h" + +#include "zeek/Desc.h" +#include "zeek/File.h" +#include "zeek/RE.h" +#include "zeek/ZeekString.h" +#include "zeek/script_opt/CPP/RuntimeInitSupport.h" + +using namespace std; + +namespace zeek::detail + { + +template +void CPP_IndexedInits::InitializeCohortWithOffsets(InitsManager* im, int cohort, + const std::vector& cohort_offsets) + { + auto& co = this->inits[cohort]; + for ( auto i = 0U; i < co.size(); ++i ) + Generate(im, this->inits_vec, cohort_offsets[i], co[i]); + } + +template +void CPP_IndexedInits::Generate(InitsManager* im, std::vector& ivec, int offset, + ValElemVec& init_vals) + { + auto& e_type = im->Types(init_vals[0]); + int val = init_vals[1]; + ivec[offset] = make_enum__CPP(e_type, val); + } + +template +void CPP_IndexedInits::Generate(InitsManager* im, std::vector& ivec, int offset, + ValElemVec& init_vals) + { + auto chars = im->Strings(init_vals[0]); + int len = init_vals[1]; + ivec[offset] = make_intrusive(len, chars); + } + +template +void CPP_IndexedInits::Generate(InitsManager* im, std::vector& ivec, int offset, + ValElemVec& init_vals) + { + auto re = new RE_Matcher(im->Strings(init_vals[0])); + if ( init_vals[1] ) + re->MakeCaseInsensitive(); + + re->Compile(); + + ivec[offset] = make_intrusive(re); + } + +template +void CPP_IndexedInits::Generate(InitsManager* im, std::vector& ivec, int offset, + ValElemVec& init_vals) const + { + auto l = make_intrusive(TYPE_ANY); + + for ( auto& iv : init_vals ) + l->Append(im->ConstVals(iv)); + + ivec[offset] = l; + } + +template +void CPP_IndexedInits::Generate(InitsManager* im, std::vector& ivec, int offset, + ValElemVec& init_vals) const + { + auto iv_it = init_vals.begin(); + auto iv_end = init_vals.end(); + auto t = *(iv_it++); + + auto vt = cast_intrusive(im->Types(t)); + auto vv = make_intrusive(vt); + + while ( iv_it != iv_end ) + vv->Append(im->ConstVals(*(iv_it++))); + + ivec[offset] = vv; + } + +template +void CPP_IndexedInits::Generate(InitsManager* im, std::vector& ivec, int offset, + ValElemVec& init_vals) const + { + auto iv_it = init_vals.begin(); + auto iv_end = init_vals.end(); + auto t = *(iv_it++); + + auto rt = cast_intrusive(im->Types(t)); + auto rv = make_intrusive(rt); + + auto field = 0; + while ( iv_it != iv_end ) + { + auto v = *(iv_it++); + if ( v >= 0 ) + rv->Assign(field, im->ConstVals(v)); + ++field; + } + + ivec[offset] = rv; + } + +template +void CPP_IndexedInits::Generate(InitsManager* im, std::vector& ivec, int offset, + ValElemVec& init_vals) const + { + auto iv_it = init_vals.begin(); + auto iv_end = init_vals.end(); + auto t = *(iv_it++); + + auto tt = cast_intrusive(im->Types(t)); + auto tv = make_intrusive(tt); + + while ( iv_it != iv_end ) + { + auto index = im->ConstVals(*(iv_it++)); + auto v = *(iv_it++); + auto value = v >= 0 ? im->ConstVals(v) : nullptr; + tv->Assign(index, value); + } + + ivec[offset] = tv; + } + +template +void CPP_IndexedInits::Generate(InitsManager* im, std::vector& ivec, int offset, + ValElemVec& init_vals) const + { + // Note, in the following we use element 1, not 0, because we + // don't need the "type" value in element 0. + auto fn = im->Strings(init_vals[1]); + auto fv = make_intrusive(make_intrusive(fn, "w")); + + ivec[offset] = fv; + } + +template +void CPP_IndexedInits::Generate(InitsManager* im, std::vector& ivec, int offset, + ValElemVec& init_vals) const + { + auto iv_it = init_vals.begin(); + auto iv_end = init_vals.end(); + auto t = *(iv_it++); + auto fn = im->Strings(*(iv_it++)); + + std::vector hashes; + + while ( iv_it != iv_end ) + hashes.push_back(im->Hashes(*(iv_it++))); + + ivec[offset] = lookup_func__CPP(fn, hashes, im->Types(t)); + } + +template +void CPP_IndexedInits::Generate(InitsManager* im, std::vector& ivec, int offset, + ValElemVec& init_vals) const + { + auto tag = static_cast(init_vals[0]); + auto ae_tag = static_cast(init_vals[1]); + + ExprPtr e; + auto e_arg = init_vals[2]; + + switch ( ae_tag ) + { + case AE_NONE: + break; + + case AE_CONST: + e = make_intrusive(im->ConstVals(e_arg)); + break; + + case AE_NAME: + { + auto name = im->Strings(e_arg); + auto gl = lookup_ID(name, GLOBAL_MODULE_NAME, false, false, false); + ASSERT(gl); + e = make_intrusive(gl); + break; + } + + case AE_RECORD: + { + auto t = im->Types(e_arg); + auto rt = cast_intrusive(t); + auto empty_vals = make_intrusive(); + auto construct = make_intrusive(empty_vals); + e = make_intrusive(construct, rt); + break; + } + + case AE_CALL: + e = im->CallExprs(e_arg); + break; + } + + ivec[offset] = make_intrusive(tag, e); + } + +template +void CPP_IndexedInits::Generate(InitsManager* im, std::vector& ivec, int offset, + ValElemVec& init_vals) const + { + std::vector a_list; + + for ( auto& iv : init_vals ) + a_list.emplace_back(im->Attrs(iv)); + + ivec[offset] = make_intrusive(a_list, nullptr, false, false); + } + +// Instantiate the templates we'll need. + +template class CPP_IndexedInits; +template class CPP_IndexedInits; +template class CPP_IndexedInits; +template class CPP_IndexedInits; +template class CPP_IndexedInits; +template class CPP_IndexedInits; +template class CPP_IndexedInits; +template class CPP_IndexedInits; +template class CPP_IndexedInits; +template class CPP_IndexedInits; +template class CPP_IndexedInits; +template class CPP_IndexedInits; + +void CPP_TypeInits::DoPreInits(InitsManager* im, const std::vector& offsets_vec) + { + for ( auto cohort = 0U; cohort < offsets_vec.size(); ++cohort ) + { + auto& co = inits[cohort]; + auto& cohort_offsets = im->Indices(offsets_vec[cohort]); + for ( auto i = 0U; i < co.size(); ++i ) + PreInit(im, cohort_offsets[i], co[i]); + } + } + +void CPP_TypeInits::PreInit(InitsManager* im, int offset, ValElemVec& init_vals) + { + auto tag = static_cast(init_vals[0]); + + if ( tag == TYPE_LIST ) + inits_vec[offset] = make_intrusive(); + + else if ( tag == TYPE_RECORD ) + { + auto name = im->Strings(init_vals[1]); + if ( name[0] ) + inits_vec[offset] = get_record_type__CPP(name); + else + inits_vec[offset] = get_record_type__CPP(nullptr); + } + + // else no pre-initialization needed + } + +void CPP_TypeInits::Generate(InitsManager* im, vector& ivec, int offset, + ValElemVec& init_vals) const + { + auto tag = static_cast(init_vals[0]); + TypePtr t; + switch ( tag ) + { + case TYPE_ADDR: + case TYPE_ANY: + case TYPE_BOOL: + case TYPE_COUNT: + case TYPE_DOUBLE: + case TYPE_ERROR: + case TYPE_INT: + case TYPE_INTERVAL: + case TYPE_PATTERN: + case TYPE_PORT: + case TYPE_STRING: + case TYPE_TIME: + case TYPE_TIMER: + case TYPE_VOID: + case TYPE_SUBNET: + case TYPE_FILE: + t = base_type(tag); + break; + + case TYPE_ENUM: + t = BuildEnumType(im, init_vals); + break; + + case TYPE_OPAQUE: + t = BuildOpaqueType(im, init_vals); + break; + + case TYPE_TYPE: + t = BuildTypeType(im, init_vals); + break; + + case TYPE_VECTOR: + t = BuildVectorType(im, init_vals); + break; + + case TYPE_LIST: + t = BuildTypeList(im, init_vals, offset); + break; + + case TYPE_TABLE: + t = BuildTableType(im, init_vals); + break; + + case TYPE_FUNC: + t = BuildFuncType(im, init_vals); + break; + + case TYPE_RECORD: + t = BuildRecordType(im, init_vals, offset); + break; + + default: + ASSERT(0); + } + + ivec[offset] = t; + } + +TypePtr CPP_TypeInits::BuildEnumType(InitsManager* im, ValElemVec& init_vals) const + { + auto iv_it = init_vals.begin(); + auto iv_end = init_vals.end(); + auto name = im->Strings(*++iv_it); // skip element [0] + auto et = get_enum_type__CPP(name); + + if ( et->Names().empty() ) + { + ++iv_it; + while ( iv_it != iv_end ) + { + auto e_name = im->Strings(*(iv_it++)); + auto e_val = *(iv_it++); + et->AddNameInternal(e_name, e_val); + } + } + + return et; + } + +TypePtr CPP_TypeInits::BuildOpaqueType(InitsManager* im, ValElemVec& init_vals) const + { + auto name = im->Strings(init_vals[1]); + return make_intrusive(name); + } + +TypePtr CPP_TypeInits::BuildTypeType(InitsManager* im, ValElemVec& init_vals) const + { + auto& t = im->Types(init_vals[1]); + return make_intrusive(t); + } + +TypePtr CPP_TypeInits::BuildVectorType(InitsManager* im, ValElemVec& init_vals) const + { + auto& t = im->Types(init_vals[1]); + return make_intrusive(t); + } + +TypePtr CPP_TypeInits::BuildTypeList(InitsManager* im, ValElemVec& init_vals, int offset) const + { + const auto& tl = cast_intrusive(inits_vec[offset]); + + auto iv_it = init_vals.begin(); + auto iv_end = init_vals.end(); + + ++iv_it; + + while ( iv_it != iv_end ) + tl->Append(im->Types(*(iv_it++))); + + return tl; + } + +TypePtr CPP_TypeInits::BuildTableType(InitsManager* im, ValElemVec& init_vals) const + { + auto index = cast_intrusive(im->Types(init_vals[1])); + auto yield_i = init_vals[2]; + auto yield = yield_i >= 0 ? im->Types(yield_i) : nullptr; + + return make_intrusive(index, yield); + } + +TypePtr CPP_TypeInits::BuildFuncType(InitsManager* im, ValElemVec& init_vals) const + { + auto p = cast_intrusive(im->Types(init_vals[1])); + auto yield_i = init_vals[2]; + auto flavor = static_cast(init_vals[3]); + + TypePtr y; + + if ( yield_i >= 0 ) + y = im->Types(yield_i); + + else if ( flavor == FUNC_FLAVOR_FUNCTION || flavor == FUNC_FLAVOR_HOOK ) + y = base_type(TYPE_VOID); + + return make_intrusive(p, y, flavor); + } + +TypePtr CPP_TypeInits::BuildRecordType(InitsManager* im, ValElemVec& init_vals, int offset) const + { + auto r = cast_intrusive(inits_vec[offset]); + ASSERT(r); + + if ( r->NumFields() == 0 ) + { + type_decl_list tl; + + auto n = init_vals.size(); + auto i = 2U; + + while ( i < n ) + { + auto s = im->Strings(init_vals[i++]); + auto id = util::copy_string(s); + auto type = im->Types(init_vals[i++]); + auto attrs_i = init_vals[i++]; + + AttributesPtr attrs; + if ( attrs_i >= 0 ) + attrs = im->Attributes(attrs_i); + + tl.append(new TypeDecl(id, type, attrs)); + } + + r->AddFieldsDirectly(tl); + } + + return r; + } + +int CPP_FieldMapping::ComputeOffset(InitsManager* im) const + { + auto r = im->Types(rec)->AsRecordType(); + auto fm_offset = r->FieldOffset(field_name.c_str()); + + if ( fm_offset < 0 ) + { // field does not exist, create it + fm_offset = r->NumFields(); + + auto id = util::copy_string(field_name.c_str()); + auto type = im->Types(field_type); + + AttributesPtr attrs; + if ( field_attrs >= 0 ) + attrs = im->Attributes(field_attrs); + + type_decl_list tl; + tl.append(new TypeDecl(id, type, attrs)); + + r->AddFieldsDirectly(tl); + } + + return fm_offset; + } + +int CPP_EnumMapping::ComputeOffset(InitsManager* im) const + { + auto e = im->Types(e_type)->AsEnumType(); + + auto em_offset = e->Lookup(e_name); + if ( em_offset < 0 ) + { // enum constant does not exist, create it + em_offset = e->Names().size(); + if ( e->Lookup(em_offset) ) + reporter->InternalError("enum inconsistency while initializing compiled scripts"); + e->AddNameInternal(e_name, em_offset); + } + + return em_offset; + } + +void CPP_GlobalInit::Generate(InitsManager* im, std::vector& /* inits_vec */, + int /* offset */) const + { + global = lookup_global__CPP(name, im->Types(type), exported); + + if ( ! global->HasVal() && val >= 0 ) + { + global->SetVal(im->ConstVals(val)); + if ( attrs >= 0 ) + global->SetAttrs(im->Attributes(attrs)); + } + } + +void generate_indices_set(int* inits, std::vector>& indices_set) + { + // First figure out how many groups of indices there are, so we + // can pre-allocate the outer vector. + auto i_ptr = inits; + int num_inits = 0; + while ( *i_ptr >= 0 ) + { + ++num_inits; + int n = *i_ptr; + i_ptr += n + 1; // skip over vector elements + } + + indices_set.reserve(num_inits); + + i_ptr = inits; + while ( *i_ptr >= 0 ) + { + int n = *i_ptr; + ++i_ptr; + std::vector indices; + indices.reserve(n); + for ( int i = 0; i < n; ++i ) + indices.push_back(i_ptr[i]); + i_ptr += n; + + indices_set.emplace_back(move(indices)); + } + } + + } // zeek::detail diff --git a/src/script_opt/CPP/RuntimeInits.h b/src/script_opt/CPP/RuntimeInits.h new file mode 100644 index 0000000000..031208a1ce --- /dev/null +++ b/src/script_opt/CPP/RuntimeInits.h @@ -0,0 +1,542 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Classes for run-time initialization and management of C++ values used +// by the generated code. + +// See InitsInfo.h for a discussion of initialization issues and the +// associated strategies for dealing with them. + +#include "zeek/Expr.h" +#include "zeek/module_util.h" +#include "zeek/script_opt/CPP/RuntimeInitSupport.h" + +#pragma once + +namespace zeek::detail + { + +using FileValPtr = IntrusivePtr; +using FuncValPtr = IntrusivePtr; + +class InitsManager; + +// An abstract helper class used to access elements of an initialization vector. +// We need the abstraction because InitsManager below needs to be able to refer +// to any of a range of templated classes. +class CPP_AbstractInitAccessor + { +public: + virtual ~CPP_AbstractInitAccessor() { } + virtual ValPtr Get(int index) const { return nullptr; } + }; + +// Convenient way to refer to an offset associated with a particular Zeek type. +using CPP_ValElem = std::pair; + +// This class groups together all of the vectors needed for run-time +// initialization. We gather them together into a single object so as +// to avoid wiring in a set of globals that the various initialization +// methods have to know about. +class InitsManager + { +public: + InitsManager(std::vector& _const_vals, + std::map>& _consts, + std::vector>& _indices, std::vector& _strings, + std::vector& _hashes, std::vector& _types, + std::vector& _attributes, std::vector& _attrs, + std::vector& _call_exprs) + : const_vals(_const_vals), consts(_consts), indices(_indices), strings(_strings), + hashes(_hashes), types(_types), attributes(_attributes), attrs(_attrs), + call_exprs(_call_exprs) + { + } + + // Providse generic access to Zeek constant values based on a single + // index. + ValPtr ConstVals(int offset) const + { + auto& cv = const_vals[offset]; + return Consts(cv.first, cv.second); + } + + // Retrieves the Zeek constant value for a particular Zeek type. + ValPtr Consts(TypeTag tag, int index) const { return consts[tag]->Get(index); } + + // Accessors for the sundry initialization vectors, each retrieving + // a specific element identified by an index/offset. + const std::vector& Indices(int offset) const { return indices[offset]; } + const char* Strings(int offset) const { return strings[offset]; } + const p_hash_type Hashes(int offset) const { return hashes[offset]; } + const TypePtr& Types(int offset) const { return types[offset]; } + const AttributesPtr& Attributes(int offset) const { return attributes[offset]; } + const AttrPtr& Attrs(int offset) const { return attrs[offset]; } + const CallExprPtr& CallExprs(int offset) const { return call_exprs[offset]; } + +private: + std::vector& const_vals; + std::map>& consts; + std::vector>& indices; + std::vector& strings; + std::vector& hashes; + std::vector& types; + std::vector& attributes; + std::vector& attrs; + std::vector& call_exprs; + }; + +// Manages an initialization vector of the given type. +template class CPP_Init + { +public: + virtual ~CPP_Init() { } + + // Pre-initializes the given element of the vector, if necessary. + virtual void PreInit(InitsManager* im, std::vector& inits_vec, int offset) const { } + + // Initializes the given element of the vector. + virtual void Generate(InitsManager* im, std::vector& inits_vec, int offset) const { } + }; + +// Abstract class for creating a collection of initializers. T1 is +// the type of the generated vector, T2 the type of its initializers. +template class CPP_AbstractInits + { +public: + CPP_AbstractInits(std::vector& _inits_vec, int _offsets_set, std::vector _inits) + : inits_vec(_inits_vec), offsets_set(_offsets_set), inits(std::move(_inits)) + { + // Compute how big to make the vector. + int num_inits = 0; + + for ( const auto& cohort : inits ) + num_inits += cohort.size(); + + inits_vec.resize(num_inits); + } + + // Initialize the given cohort of elements. + void InitializeCohort(InitsManager* im, int cohort) + { + // Get this object's vector-of-vector-of-indices. + auto& offsets_vec = im->Indices(offsets_set); + + if ( cohort == 0 ) + DoPreInits(im, offsets_vec); + + // Get the vector-of-indices for this cohort. + auto& cohort_offsets = im->Indices(offsets_vec[cohort]); + + InitializeCohortWithOffsets(im, cohort, cohort_offsets); + } + +protected: + virtual void InitializeCohortWithOffsets(InitsManager* im, int cohort, + const std::vector& cohort_offsets) + { + } + + // Pre-initialize all elements requiring it. + virtual void DoPreInits(InitsManager* im, const std::vector& offsets_vec) { } + + // Generate a single element. + virtual void GenerateElement(InitsManager* im, T2& init, int offset) { } + + // The initialization vector in its entirety. + std::vector& inits_vec; + + // A meta-index for retrieving the vector-of-vector-of-indices. + int offsets_set; + + // Indexed by cohort. + std::vector inits; + }; + +// Manages an initialization vector that uses "custom" initializers +// (tailored ones rather than initializers based on indexing). +template using CPP_InitVec = std::vector>>; +template class CPP_CustomInits : public CPP_AbstractInits> + { +public: + CPP_CustomInits(std::vector& _inits_vec, int _offsets_set, + std::vector> _inits) + : CPP_AbstractInits>(_inits_vec, _offsets_set, std::move(_inits)) + { + } + +private: + void DoPreInits(InitsManager* im, const std::vector& offsets_vec) override + { + int cohort = 0; + for ( const auto& co : this->inits ) + { + auto& cohort_offsets = im->Indices(offsets_vec[cohort]); + for ( auto i = 0U; i < co.size(); ++i ) + co[i]->PreInit(im, this->inits_vec, cohort_offsets[i]); + ++cohort; + } + } + + void InitializeCohortWithOffsets(InitsManager* im, int cohort, + const std::vector& cohort_offsets) override + { + // Loop over the cohort's elements to initialize them. + auto& co = this->inits[cohort]; + for ( auto i = 0U; i < co.size(); ++i ) + co[i]->Generate(im, this->inits_vec, cohort_offsets[i]); + } + }; + +// Provides access to elements of an initialization vector of the given type. +template class CPP_InitAccessor : public CPP_AbstractInitAccessor + { +public: + CPP_InitAccessor(std::vector& _inits_vec) : inits_vec(_inits_vec) { } + + ValPtr Get(int index) const override { return inits_vec[index]; } + +private: + std::vector& inits_vec; + }; + +// A type used for initializations that are based on indices into +// initialization vectors. +using ValElemVec = std::vector; +using ValElemVecVec = std::vector; + +// Manages an initialization vector of the given type whose elements are +// built up from previously constructed values in other initialization vectors. +template class CPP_IndexedInits : public CPP_AbstractInits + { +public: + CPP_IndexedInits(std::vector& _inits_vec, int _offsets_set, + std::vector _inits) + : CPP_AbstractInits(_inits_vec, _offsets_set, std::move(_inits)) + { + } + +protected: + void InitializeCohortWithOffsets(InitsManager* im, int cohort, + const std::vector& cohort_offsets) override; + + // Note, in the following we pass in the inits_vec, even though + // the method will have direct access to it, because we want to + // use overloading to dispatch to custom generation for different + // types of values. + void Generate(InitsManager* im, std::vector& ivec, int offset, + ValElemVec& init_vals); + void Generate(InitsManager* im, std::vector& ivec, int offset, + ValElemVec& init_vals); + void Generate(InitsManager* im, std::vector& ivec, int offset, + ValElemVec& init_vals); + void Generate(InitsManager* im, std::vector& ivec, int offset, + ValElemVec& init_vals) const; + void Generate(InitsManager* im, std::vector& ivec, int offset, + ValElemVec& init_vals) const; + void Generate(InitsManager* im, std::vector& ivec, int offset, + ValElemVec& init_vals) const; + void Generate(InitsManager* im, std::vector& ivec, int offset, + ValElemVec& init_vals) const; + void Generate(InitsManager* im, std::vector& ivec, int offset, + ValElemVec& init_vals) const; + void Generate(InitsManager* im, std::vector& ivec, int offset, + ValElemVec& init_vals) const; + void Generate(InitsManager* im, std::vector& ivec, int offset, + ValElemVec& init_vals) const; + void Generate(InitsManager* im, std::vector& ivec, int offset, + ValElemVec& init_vals) const; + + // The TypePtr initialization vector requires special treatment, since + // it has to dispatch on subclasses of TypePtr. + virtual void Generate(InitsManager* im, std::vector& ivec, int offset, + ValElemVec& init_vals) const + { + ASSERT(0); + } + }; + +// A specialization of CPP_IndexedInits that supports initializing based +// on subclasses of TypePtr. +class CPP_TypeInits : public CPP_IndexedInits + { +public: + CPP_TypeInits(std::vector& _inits_vec, int _offsets_set, + std::vector> _inits) + : CPP_IndexedInits(_inits_vec, _offsets_set, _inits) + { + } + +protected: + void DoPreInits(InitsManager* im, const std::vector& offsets_vec) override; + void PreInit(InitsManager* im, int offset, ValElemVec& init_vals); + + void Generate(InitsManager* im, std::vector& ivec, int offset, + ValElemVec& init_vals) const override; + + TypePtr BuildEnumType(InitsManager* im, ValElemVec& init_vals) const; + TypePtr BuildOpaqueType(InitsManager* im, ValElemVec& init_vals) const; + TypePtr BuildTypeType(InitsManager* im, ValElemVec& init_vals) const; + TypePtr BuildVectorType(InitsManager* im, ValElemVec& init_vals) const; + TypePtr BuildTypeList(InitsManager* im, ValElemVec& init_vals, int offset) const; + TypePtr BuildTableType(InitsManager* im, ValElemVec& init_vals) const; + TypePtr BuildFuncType(InitsManager* im, ValElemVec& init_vals) const; + TypePtr BuildRecordType(InitsManager* im, ValElemVec& init_vals, int offset) const; + }; + +// Abstract class for initializing basic (non-compound) constants. T1 is +// the Zeek type for the constructed constant, T2 is the C++ type of its +// initializer. +// +// In principle we could derive this from CPP_AbstractInits, though to do so +// we'd need to convert the initializers to a vector-of-vector-of-T2, which +// would trade complexity here for complexity in InitsInfo. So we instead +// keep this class distinct, since at heart it's a simpler set of methods +// and that way we can keep them as such here. +template class CPP_AbstractBasicConsts + { +public: + CPP_AbstractBasicConsts(std::vector& _inits_vec, int _offsets_set, std::vector _inits) + : inits_vec(_inits_vec), offsets_set(_offsets_set), inits(std::move(_inits)) + { + inits_vec.resize(inits.size()); + } + + void InitializeCohort(InitsManager* im, int cohort) + { + ASSERT(cohort == 0); + auto& offsets_vec = im->Indices(offsets_set); + auto& cohort_offsets = im->Indices(offsets_vec[cohort]); + for ( auto i = 0U; i < inits.size(); ++i ) + InitElem(im, cohort_offsets[i], i); + } + +protected: + virtual void InitElem(InitsManager* im, int offset, int index) { ASSERT(0); } + +protected: + // See CPP_AbstractInits for the nature of these. + std::vector& inits_vec; + int offsets_set; + std::vector inits; + }; + +// Class for initializing a basic constant of Zeek type T1, using initializers +// of C++ type T2. T1 is an intrusive pointer to a T3 type; for example, if +// T1 is a BoolValPtr then T3 will be BoolVal. +template +class CPP_BasicConsts : public CPP_AbstractBasicConsts + { +public: + CPP_BasicConsts(std::vector& _inits_vec, int _offsets_set, std::vector _inits) + : CPP_AbstractBasicConsts(_inits_vec, _offsets_set, std::move(_inits)) + { + } + + void InitElem(InitsManager* /* im */, int offset, int index) override + { + this->inits_vec[offset] = make_intrusive(this->inits[index]); + } + }; + +// Specific classes for basic constants that use string-based constructors. +class CPP_AddrConsts : public CPP_AbstractBasicConsts + { +public: + CPP_AddrConsts(std::vector& _inits_vec, int _offsets_set, std::vector _inits) + : CPP_AbstractBasicConsts(_inits_vec, _offsets_set, std::move(_inits)) + { + } + + void InitElem(InitsManager* im, int offset, int index) override + { + auto s = im->Strings(this->inits[index]); + this->inits_vec[offset] = make_intrusive(s); + } + }; + +class CPP_SubNetConsts : public CPP_AbstractBasicConsts + { +public: + CPP_SubNetConsts(std::vector& _inits_vec, int _offsets_set, + std::vector _inits) + : CPP_AbstractBasicConsts(_inits_vec, _offsets_set, std::move(_inits)) + { + } + + void InitElem(InitsManager* im, int offset, int index) override + { + auto s = im->Strings(this->inits[index]); + this->inits_vec[offset] = make_intrusive(s); + } + }; + +// Class for initializing a Zeek global. These don't go into an initialization +// vector, so we use void* as the underlying type. +class CPP_GlobalInit : public CPP_Init + { +public: + CPP_GlobalInit(IDPtr& _global, const char* _name, int _type, int _attrs, int _val, + bool _exported) + : CPP_Init(), global(_global), name(_name), type(_type), attrs(_attrs), val(_val), + exported(_exported) + { + } + + void Generate(InitsManager* im, std::vector& /* inits_vec */, + int /* offset */) const override; + +protected: + IDPtr& global; + const char* name; + int type; + int attrs; + int val; + bool exported; + }; + +// Abstract class for constructing a CallExpr to evaluate a Zeek expression. +class CPP_AbstractCallExprInit : public CPP_Init + { +public: + CPP_AbstractCallExprInit() : CPP_Init() { } + }; + +// Constructs a CallExpr that calls a given CPPFunc subclass. +template class CPP_CallExprInit : public CPP_AbstractCallExprInit + { +public: + CPP_CallExprInit(CallExprPtr& _e_var) : CPP_AbstractCallExprInit(), e_var(_e_var) { } + + void Generate(InitsManager* /* im */, std::vector& inits_vec, + int offset) const override + { + auto wrapper_class = make_intrusive(); + auto func_val = make_intrusive(wrapper_class); + auto func_expr = make_intrusive(func_val); + auto empty_args = make_intrusive(); + + e_var = make_intrusive(func_expr, empty_args); + inits_vec[offset] = e_var; + } + +private: + // Where to store the expression once we've built it. + CallExprPtr& e_var; + }; + +// Abstract class for registering a lambda defined in terms of a CPPStmt. +class CPP_AbstractLambdaRegistration : public CPP_Init + { +public: + CPP_AbstractLambdaRegistration() : CPP_Init() { } + }; + +// Registers a lambda defined in terms of a given CPPStmt subclass. +template class CPP_LambdaRegistration : public CPP_AbstractLambdaRegistration + { +public: + CPP_LambdaRegistration(const char* _name, int _func_type, p_hash_type _h, bool _has_captures) + : CPP_AbstractLambdaRegistration(), name(_name), func_type(_func_type), h(_h), + has_captures(_has_captures) + { + } + + void Generate(InitsManager* im, std::vector& inits_vec, int offset) const override + { + auto l = make_intrusive(name); + auto& ft = im->Types(func_type); + register_lambda__CPP(l, h, name, ft, has_captures); + } + +protected: + const char* name; + int func_type; + p_hash_type h; + bool has_captures; + }; + +// Constructs at run-time a mapping between abstract record field offsets used +// when compiling a set of scripts to their concrete offsets (which might differ +// from those during compilation due to loading of other scripts that extend +// various records). +class CPP_FieldMapping + { +public: + CPP_FieldMapping(int _rec, std::string _field_name, int _field_type, int _field_attrs) + : rec(_rec), field_name(std::move(_field_name)), field_type(_field_type), + field_attrs(_field_attrs) + { + } + + int ComputeOffset(InitsManager* im) const; + +private: + int rec; // index to retrieve the record's type + std::string field_name; // which field this offset pertains to + int field_type; // the field's type, in case we have to construct it + int field_attrs; // the same for the field's attributes + }; + +// Constructs at run-time a mapping between abstract enum values used when +// compiling a set of scripts to their concrete values (which might differ +// from those during compilation due to loading of other scripts that extend +// the enum). +class CPP_EnumMapping + { +public: + CPP_EnumMapping(int _e_type, std::string _e_name) : e_type(_e_type), e_name(std::move(_e_name)) + { + } + + int ComputeOffset(InitsManager* im) const; + +private: + int e_type; // index to EnumType + std::string e_name; // which enum constant for that type + }; + +// Looks up a BiF of the given name, making it available to compiled +// code via a C++ global. +class CPP_LookupBiF + { +public: + CPP_LookupBiF(zeek::Func*& _bif_func, std::string _bif_name) + : bif_func(_bif_func), bif_name(std::move(_bif_name)) + { + } + + void ResolveBiF() const { bif_func = lookup_bif__CPP(bif_name.c_str()); } + +protected: + zeek::Func*& bif_func; // where to store the pointer to the BiF + std::string bif_name; // the BiF's name + }; + +// Information needed to register a compiled function body (which makes it +// available to substitute for the body's AST). The compiler generates +// code that loops over a vector of these to perform the registrations. +struct CPP_RegisterBody + { + CPP_RegisterBody(std::string _func_name, void* _func, int _type_signature, int _priority, + p_hash_type _h, std::vector _events) + : func_name(std::move(_func_name)), func(_func), type_signature(_type_signature), + priority(_priority), h(_h), events(std::move(_events)) + { + } + + std::string func_name; // name of the function + void* func; // pointer to C++ + int type_signature; + int priority; + p_hash_type h; + std::vector events; + }; + +// Helper function that takes a (large) array of int's and from them +// constructs the corresponding vector-of-vector-of-indices. Each +// vector-of-indices is represented first by an int specifying its +// size, and then that many int's for its values. We recognize the +// end of the array upon encountering a "size" entry of -1. +extern void generate_indices_set(int* inits, std::vector>& indices_set); + + } // zeek::detail diff --git a/src/script_opt/CPP/Stmts.cc b/src/script_opt/CPP/Stmts.cc index 47d8f713a9..f62e72b4cb 100644 --- a/src/script_opt/CPP/Stmts.cc +++ b/src/script_opt/CPP/Stmts.cc @@ -245,7 +245,7 @@ void CPPCompile::GenSwitchStmt(const SwitchStmt* sw) else sw_val = string("p_hash(") + GenExpr(e, GEN_VAL_PTR) + ")"; - Emit("switch ( %s ) {", sw_val.c_str()); + Emit("switch ( %s ) {", sw_val); ++break_level; diff --git a/src/script_opt/CPP/Tracker.cc b/src/script_opt/CPP/Tracker.cc index e82e5878b2..d4491274bd 100644 --- a/src/script_opt/CPP/Tracker.cc +++ b/src/script_opt/CPP/Tracker.cc @@ -51,13 +51,26 @@ template string CPPTracker::KeyName(const T* key) auto hash = map[key]; ASSERT(hash != 0); + auto rep = reps[hash]; + auto gi = gi_s.find(rep); + if ( gi != gi_s.end() ) + return gi->second->Name(); + auto index = map2[hash]; string scope; if ( IsInherited(hash) ) scope = scope_prefix(scope2[hash]); - return scope + string(base_name) + "_" + Fmt(index) + "__CPP"; + string ind = Fmt(index); + string full_name; + + if ( single_global ) + full_name = base_name + "__CPP[" + ind + "]"; + else + full_name = base_name + "_" + ind + "__CPP"; + + return scope + full_name; } template void CPPTracker::LogIfNew(IntrusivePtr key, int scope, FILE* log_file) diff --git a/src/script_opt/CPP/Tracker.h b/src/script_opt/CPP/Tracker.h index 175d48192e..6005247058 100644 --- a/src/script_opt/CPP/Tracker.h +++ b/src/script_opt/CPP/Tracker.h @@ -15,6 +15,7 @@ #pragma once #include "zeek/script_opt/CPP/HashMgr.h" +#include "zeek/script_opt/CPP/InitsInfo.h" namespace zeek::detail { @@ -24,11 +25,13 @@ namespace zeek::detail template class CPPTracker { public: - // The base name is used to construct key names. The mapper, - // if present, maps hash values to information about the previously - // generated scope in which the value appears. - CPPTracker(const char* _base_name, VarMapper* _mapper = nullptr) - : base_name(_base_name), mapper(_mapper) + // The base name is used to construct key names. "single_global", + // if true, specifies that the names should be constructed as + // indexes into a single global, rather than as distinct globals. + // The mapper, if present, maps hash values to information about + // the previously generated scope in which the value appears. + CPPTracker(const char* _base_name, bool _single_global, VarMapper* _mapper = nullptr) + : base_name(_base_name), single_global(_single_global), mapper(_mapper) { } @@ -40,6 +43,8 @@ public: // is provided, then refrains from computing it. void AddKey(IntrusivePtr key, p_hash_type h = 0); + void AddInitInfo(const T* rep, std::shared_ptr gi) { gi_s[rep] = std::move(gi); } + // Returns the (C++ variable) name associated with the given key. std::string KeyName(const T* key); std::string KeyName(IntrusivePtr key) { return KeyName(key.get()); } @@ -81,6 +86,8 @@ private: // Maps keys to internal representations (i.e., hashes). std::unordered_map map; + std::unordered_map> gi_s; + // Maps internal representations to distinct values. These // may-or-may-not be indices into an "inherited" namespace scope. std::unordered_map map2; @@ -98,6 +105,10 @@ private: // Used to construct key names. std::string base_name; + // Whether to base the names out of a single global, or distinct + // globals. + bool single_global; + // If non-nil, the mapper to consult for previous names. VarMapper* mapper; }; diff --git a/src/script_opt/CPP/Types.cc b/src/script_opt/CPP/Types.cc index 1e04a33206..9a482d072e 100644 --- a/src/script_opt/CPP/Types.cc +++ b/src/script_opt/CPP/Types.cc @@ -91,170 +91,13 @@ string CPPCompile::GenericValPtrToGT(const string& expr, const TypePtr& t, GenTy return string("cast_intrusive<") + IntrusiveVal(t) + ">(" + expr + ")"; } -void CPPCompile::ExpandTypeVar(const TypePtr& t) - { - auto tn = GenTypeName(t); - - switch ( t->Tag() ) - { - case TYPE_LIST: - ExpandListTypeVar(t, tn); - break; - - case TYPE_RECORD: - ExpandRecordTypeVar(t, tn); - break; - - case TYPE_ENUM: - ExpandEnumTypeVar(t, tn); - break; - - case TYPE_TABLE: - ExpandTableTypeVar(t, tn); - break; - - case TYPE_FUNC: - ExpandFuncTypeVar(t, tn); - break; - - case TYPE_TYPE: - AddInit(t, tn, - string("make_intrusive(") + GenTypeName(t->AsTypeType()->GetType()) + - ")"); - break; - - case TYPE_VECTOR: - AddInit(t, tn, - string("make_intrusive(") + - GenTypeName(t->AsVectorType()->Yield()) + ")"); - break; - - default: - break; - } - - auto& script_type_name = t->GetName(); - if ( ! script_type_name.empty() ) - AddInit(t, "register_type__CPP(" + tn + ", \"" + script_type_name + "\");"); - - AddInit(t); - } - -void CPPCompile::ExpandListTypeVar(const TypePtr& t, string& tn) - { - const auto& tl = t->AsTypeList()->GetTypes(); - auto t_name = tn + "->AsTypeList()"; - - for ( const auto& tl_i : tl ) - AddInit(t, t_name + "->Append(" + GenTypeName(tl_i) + ");"); - } - -void CPPCompile::ExpandRecordTypeVar(const TypePtr& t, string& tn) - { - auto r = t->AsRecordType()->Types(); - - if ( ! r ) - return; - - auto t_name = tn + "->AsRecordType()"; - - AddInit(t, string("if ( ") + t_name + "->NumFields() == 0 )"); - - AddInit(t, "{"); - AddInit(t, "type_decl_list tl;"); - - for ( auto i = 0; i < r->length(); ++i ) - { - const auto& td = (*r)[i]; - AddInit(t, GenTypeDecl(td)); - } - - AddInit(t, t_name + "->AddFieldsDirectly(tl);"); - AddInit(t, "}"); - } - -void CPPCompile::ExpandEnumTypeVar(const TypePtr& t, string& tn) - { - auto e_name = tn + "->AsEnumType()"; - auto et = t->AsEnumType(); - auto names = et->Names(); - - AddInit(t, "{ auto et = " + e_name + ";"); - AddInit(t, "if ( et->Names().empty() ) {"); - - for ( const auto& name_pair : et->Names() ) - AddInit(t, string("\tet->AddNameInternal(\"") + name_pair.first + "\", " + - Fmt(int(name_pair.second)) + ");"); - - AddInit(t, "}}"); - } - -void CPPCompile::ExpandTableTypeVar(const TypePtr& t, string& tn) - { - auto tbl = t->AsTableType(); - - const auto& indices = tbl->GetIndices(); - const auto& yield = tbl->Yield(); - - if ( tbl->IsSet() ) - AddInit(t, tn, - string("make_intrusive(cast_intrusive(") + GenTypeName(indices) + - " ), nullptr)"); - else - AddInit(t, tn, - string("make_intrusive(cast_intrusive(") + - GenTypeName(indices) + "), " + GenTypeName(yield) + ")"); - } - -void CPPCompile::ExpandFuncTypeVar(const TypePtr& t, string& tn) - { - auto f = t->AsFuncType(); - - auto args_type_accessor = GenTypeName(f->Params()); - const auto& yt = f->Yield(); - - string yield_type_accessor; - - if ( yt ) - yield_type_accessor += GenTypeName(yt); - else - yield_type_accessor += "nullptr"; - - auto fl = f->Flavor(); - - string fl_name; - if ( fl == FUNC_FLAVOR_FUNCTION ) - fl_name = "FUNC_FLAVOR_FUNCTION"; - else if ( fl == FUNC_FLAVOR_EVENT ) - fl_name = "FUNC_FLAVOR_EVENT"; - else if ( fl == FUNC_FLAVOR_HOOK ) - fl_name = "FUNC_FLAVOR_HOOK"; - - auto type_init = string("make_intrusive(cast_intrusive(") + - args_type_accessor + "), " + yield_type_accessor + ", " + fl_name + ")"; - - AddInit(t, tn, type_init); - } - -string CPPCompile::GenTypeDecl(const TypeDecl* td) - { - auto type_accessor = GenTypeName(td->type); - - auto td_name = string("util::copy_string(\"") + td->id + "\")"; - - if ( td->attrs ) - return string("tl.append(new TypeDecl(") + td_name + ", " + type_accessor + ", " + - AttrsName(td->attrs) + "));"; - - return string("tl.append(new TypeDecl(") + td_name + ", " + type_accessor + "));"; - } - string CPPCompile::GenTypeName(const Type* t) { + ASSERT(processed_types.count(TypeRep(t)) > 0); return types.KeyName(TypeRep(t)); } -const char* CPPCompile::TypeTagName(TypeTag tag) const +const char* CPPCompile::TypeTagName(TypeTag tag) { switch ( tag ) { @@ -280,6 +123,8 @@ const char* CPPCompile::TypeTagName(TypeTag tag) const return "TYPE_INT"; case TYPE_INTERVAL: return "TYPE_INTERVAL"; + case TYPE_LIST: + return "TYPE_LIST"; case TYPE_OPAQUE: return "TYPE_OPAQUE"; case TYPE_PATTERN: @@ -431,16 +276,17 @@ const char* CPPCompile::TypeType(const TypePtr& t) } } -void CPPCompile::RegisterType(const TypePtr& tp) +shared_ptr CPPCompile::RegisterType(const TypePtr& tp) { auto t = TypeRep(tp); - if ( processed_types.count(t) > 0 ) - return; + auto pt = processed_types.find(t); + if ( pt != processed_types.end() ) + return pt->second; - // Add the type before going further, to avoid loops due to types - // that reference each other. - processed_types.insert(t); + processed_types[t] = nullptr; + + shared_ptr gi; switch ( t->Tag() ) { @@ -449,7 +295,6 @@ void CPPCompile::RegisterType(const TypePtr& tp) case TYPE_BOOL: case TYPE_COUNT: case TYPE_DOUBLE: - case TYPE_ENUM: case TYPE_ERROR: case TYPE_INT: case TYPE_INTERVAL: @@ -459,119 +304,53 @@ void CPPCompile::RegisterType(const TypePtr& tp) case TYPE_TIME: case TYPE_TIMER: case TYPE_VOID: - case TYPE_OPAQUE: case TYPE_SUBNET: case TYPE_FILE: - // Nothing to do. + gi = make_shared(this, tp); + break; + + case TYPE_ENUM: + gi = make_shared(this, tp); + break; + + case TYPE_OPAQUE: + gi = make_shared(this, tp); break; case TYPE_TYPE: - { - const auto& tt = t->AsTypeType()->GetType(); - NoteNonRecordInitDependency(t, tt); - RegisterType(tt); - } + gi = make_shared(this, tp); break; case TYPE_VECTOR: - { - const auto& yield = t->AsVectorType()->Yield(); - NoteNonRecordInitDependency(t, yield); - RegisterType(yield); - } + gi = make_shared(this, tp); break; case TYPE_LIST: - RegisterListType(tp); + gi = make_shared(this, tp); break; case TYPE_TABLE: - RegisterTableType(tp); + gi = make_shared(this, tp); break; case TYPE_RECORD: - RegisterRecordType(tp); + gi = make_shared(this, tp); break; case TYPE_FUNC: - RegisterFuncType(tp); + gi = make_shared(this, tp); break; default: reporter->InternalError("bad type in CPPCompile::RegisterType"); } - AddInit(t); + type_info->AddInstance(gi); + processed_types[t] = gi; - if ( ! types.IsInherited(t) ) - { - auto t_rep = types.GetRep(t); - if ( t_rep == t ) - GenPreInit(t); - else - NoteInitDependency(t, t_rep); - } - } + types.AddInitInfo(t, gi); -void CPPCompile::RegisterListType(const TypePtr& t) - { - const auto& tl = t->AsTypeList()->GetTypes(); - - for ( auto& tl_i : tl ) - { - NoteNonRecordInitDependency(t, tl_i); - RegisterType(tl_i); - } - } - -void CPPCompile::RegisterTableType(const TypePtr& t) - { - auto tbl = t->AsTableType(); - const auto& indices = tbl->GetIndices(); - const auto& yield = tbl->Yield(); - - NoteNonRecordInitDependency(t, indices); - RegisterType(indices); - - if ( yield ) - { - NoteNonRecordInitDependency(t, yield); - RegisterType(yield); - } - } - -void CPPCompile::RegisterRecordType(const TypePtr& t) - { - auto r = t->AsRecordType()->Types(); - - if ( ! r ) - return; - - for ( const auto& r_i : *r ) - { - NoteNonRecordInitDependency(t, r_i->type); - RegisterType(r_i->type); - - if ( r_i->attrs ) - { - NoteInitDependency(t, r_i->attrs); - RegisterAttributes(r_i->attrs); - } - } - } - -void CPPCompile::RegisterFuncType(const TypePtr& t) - { - auto f = t->AsFuncType(); - - NoteInitDependency(t, TypeRep(f->Params())); - RegisterType(f->Params()); - - if ( f->Yield() ) - { - NoteNonRecordInitDependency(t, f->Yield()); - RegisterType(f->Yield()); - } + return gi; } const char* CPPCompile::NativeAccessor(const TypePtr& t) diff --git a/src/script_opt/CPP/Util.cc b/src/script_opt/CPP/Util.cc index c2a6fed195..1c64c9af21 100644 --- a/src/script_opt/CPP/Util.cc +++ b/src/script_opt/CPP/Util.cc @@ -75,4 +75,60 @@ void unlock_file(const string& fname, FILE* f) } } +string CPPEscape(const char* b, int len) + { + string res; + + for ( int i = 0; i < len; ++i ) + { + unsigned char c = b[i]; + + switch ( c ) + { + case '\a': + res += "\\a"; + break; + case '\b': + res += "\\b"; + break; + case '\f': + res += "\\f"; + break; + case '\n': + res += "\\n"; + break; + case '\r': + res += "\\r"; + break; + case '\t': + res += "\\t"; + break; + case '\v': + res += "\\v"; + break; + + case '\\': + res += "\\\\"; + break; + case '"': + res += "\\\""; + break; + + default: + if ( isprint(c) ) + res += c; + else + { + char buf[8192]; + snprintf(buf, sizeof buf, "%03o", c); + res += "\\"; + res += buf; + } + break; + } + } + + return res; + } + } // zeek::detail diff --git a/src/script_opt/CPP/Util.h b/src/script_opt/CPP/Util.h index 6ea06e6752..23e2533922 100644 --- a/src/script_opt/CPP/Util.h +++ b/src/script_opt/CPP/Util.h @@ -36,4 +36,12 @@ extern bool is_CPP_compilable(const ProfileFunc* pf, const char** reason = nullp extern void lock_file(const std::string& fname, FILE* f); extern void unlock_file(const std::string& fname, FILE* f); +// For the given byte array / string, returns a version expanded +// with escape sequences in order to represent it as a C++ string. +extern std::string CPPEscape(const char* b, int len); +inline std::string CPPEscape(const char* s) + { + return CPPEscape(s, strlen(s)); + } + } // zeek::detail diff --git a/src/script_opt/CPP/Vars.cc b/src/script_opt/CPP/Vars.cc index 6d06ffd08d..c62aae0869 100644 --- a/src/script_opt/CPP/Vars.cc +++ b/src/script_opt/CPP/Vars.cc @@ -83,7 +83,7 @@ void CPPCompile::CreateGlobal(const ID* g) if ( pfs.Globals().count(g) == 0 ) { // Only used in the context of calls. If it's compilable, - // the we'll call it directly. + // then we'll call it directly. if ( compilable_funcs.count(gn) > 0 ) { AddGlobal(gn, "zf", true); @@ -102,18 +102,12 @@ void CPPCompile::CreateGlobal(const ID* g) Emit("IDPtr %s;", globals[gn]); if ( pfs.Events().count(gn) > 0 ) - // This is an event that's also used as - // a variable. + // This is an event that's also used as a variable. Emit("EventHandlerPtr %s_ev;", globals[gn]); - const auto& t = g->GetType(); - NoteInitDependency(g, TypeRep(t)); - - auto exported = g->IsExport() ? "true" : "false"; - - AddInit(g, globals[gn], - string("lookup_global__CPP(\"") + gn + "\", " + GenTypeName(t) + ", " + exported + - ")"); + auto gi = make_shared(this, g, globals[gn]); + global_id_info->AddInstance(gi); + global_gis[g] = gi; } if ( is_bif ) @@ -124,40 +118,25 @@ void CPPCompile::CreateGlobal(const ID* g) global_vars.emplace(g); } -void CPPCompile::UpdateGlobalHashes() +std::shared_ptr CPPCompile::RegisterGlobal(const ID* g) { - for ( auto& g : pfs.AllGlobals() ) + auto gg = global_gis.find(g); + + if ( gg == global_gis.end() ) { - auto gn = g->Name(); + auto gn = string(g->Name()); - if ( hm.HasGlobal(gn) ) - // Not new to this compilation run. - continue; + if ( globals.count(gn) == 0 ) + // Create a name for it. + (void)IDNameStr(g); - auto ht = pfs.HashType(g->GetType()); - - p_hash_type hv = 0; - if ( g->GetVal() ) - hv = p_hash(g->GetVal()); - - fprintf(hm.HashFile(), "global\n%s\n", gn); - fprintf(hm.HashFile(), "%llu %llu\n", ht, hv); - - // Record location information in the hash file for - // diagnostic purposes. - auto loc = g->GetLocationInfo(); - fprintf(hm.HashFile(), "%s %d\n", loc->filename, loc->first_line); - - // Flag any named record/enum types. - if ( g->IsType() ) - { - const auto& t = g->GetType(); - if ( t->Tag() == TYPE_RECORD ) - fprintf(hm.HashFile(), "record\n%s\n", gn); - else if ( t->Tag() == TYPE_ENUM ) - fprintf(hm.HashFile(), "enum\n%s\n", gn); - } + auto gi = make_shared(this, g, globals[gn]); + global_id_info->AddInstance(gi); + global_gis[g] = gi; + return gi; } + else + return gg->second; } void CPPCompile::AddBiF(const ID* b, bool is_var) @@ -170,12 +149,8 @@ void CPPCompile::AddBiF(const ID* b, bool is_var) if ( AddGlobal(n, "bif", true) ) Emit("Func* %s;", globals[n]); - auto lookup = string("lookup_bif__CPP(\"") + bn + "\")"; - - if ( standalone ) - AddActivation(globals[n] + " = " + lookup + ";"); - else - AddInit(b, globals[n], lookup); + ASSERT(BiFs.count(globals[n]) == 0); + BiFs[globals[n]] = bn; } bool CPPCompile::AddGlobal(const string& g, const char* suffix, bool track) @@ -189,13 +164,8 @@ bool CPPCompile::AddGlobal(const string& g, const char* suffix, bool track) if ( hm.HasGlobalVar(gn) ) gn = scope_prefix(hm.GlobalVarScope(gn)) + gn; else - { new_var = true; - if ( track && update ) - fprintf(hm.HashFile(), "global-var\n%s\n%d\n", gn.c_str(), addl_tag); - } - globals.emplace(g, gn); } @@ -207,18 +177,19 @@ void CPPCompile::RegisterEvent(string ev_name) body_events[body_name].emplace_back(move(ev_name)); } -const string& CPPCompile::IDNameStr(const ID* id) const +const string& CPPCompile::IDNameStr(const ID* id) { if ( id->IsGlobal() ) { auto g = string(id->Name()); - ASSERT(globals.count(g) > 0); - return ((CPPCompile*)(this))->globals[g]; + if ( globals.count(g) == 0 ) + CreateGlobal(id); + return globals[g]; } - ASSERT(locals.count(id) > 0); - - return ((CPPCompile*)(this))->locals[id]; + auto l = locals.find(id); + ASSERT(l != locals.end()); + return l->second; } string CPPCompile::LocalName(const ID* l) const diff --git a/src/script_opt/ScriptOpt.cc b/src/script_opt/ScriptOpt.cc index 9f12373ce9..2940b1b5ec 100644 --- a/src/script_opt/ScriptOpt.cc +++ b/src/script_opt/ScriptOpt.cc @@ -221,8 +221,6 @@ static void init_options() check_env_opt("ZEEK_PROFILE", analysis_options.profile_ZAM); // Compile-to-C++-related options. - check_env_opt("ZEEK_ADD_CPP", analysis_options.add_CPP); - check_env_opt("ZEEK_UPDATE_CPP", analysis_options.update_CPP); check_env_opt("ZEEK_GEN_CPP", analysis_options.gen_CPP); check_env_opt("ZEEK_GEN_STANDALONE_CPP", analysis_options.gen_standalone_CPP); check_env_opt("ZEEK_COMPILE_ALL", analysis_options.compile_all); @@ -233,23 +231,6 @@ static void init_options() analysis_options.gen_CPP = true; if ( analysis_options.gen_CPP ) - { - if ( analysis_options.add_CPP ) - { - reporter->Warning("gen-C++ incompatible with add-C++"); - analysis_options.add_CPP = false; - } - - if ( analysis_options.update_CPP ) - { - reporter->Warning("gen-C++ incompatible with update-C++"); - analysis_options.update_CPP = false; - } - - generating_CPP = true; - } - - if ( analysis_options.update_CPP || analysis_options.add_CPP ) generating_CPP = true; if ( analysis_options.use_CPP && generating_CPP ) @@ -399,7 +380,7 @@ static void generate_CPP(std::unique_ptr& pfs) { const auto hash_name = hash_dir + "CPP-hashes"; - auto hm = std::make_unique(hash_name.c_str(), analysis_options.add_CPP); + auto hm = std::make_unique(hash_name.c_str()); if ( analysis_options.gen_CPP ) { @@ -413,26 +394,12 @@ static void generate_CPP(std::unique_ptr& pfs) } } } - else - { // doing add-C++ instead, so look for previous compilations - for ( auto& func : funcs ) - { - auto hash = func.Profile()->HashVal(); - if ( compiled_scripts.count(hash) > 0 || hm->HasHash(hash) ) - func.SetSkip(true); - } - - // Now that we've presumably marked a lot of functions - // as skippable, recompute the global profile. - pfs = std::make_unique(funcs, is_CPP_compilable, false); - } const auto gen_name = hash_dir + "CPP-gen.cc"; const auto addl_name = hash_dir + "CPP-gen-addl.h"; - CPPCompile cpp(funcs, *pfs, gen_name, addl_name, *hm, - analysis_options.gen_CPP || analysis_options.update_CPP, - analysis_options.gen_standalone_CPP, analysis_options.report_uncompilable); + CPPCompile cpp(funcs, *pfs, gen_name, addl_name, *hm, analysis_options.gen_standalone_CPP, + analysis_options.report_uncompilable); } static void find_when_funcs(std::unique_ptr& pfs, diff --git a/src/script_opt/ScriptOpt.h b/src/script_opt/ScriptOpt.h index 065a9bb2c9..8d15eadb23 100644 --- a/src/script_opt/ScriptOpt.h +++ b/src/script_opt/ScriptOpt.h @@ -96,18 +96,6 @@ struct AnalyOpt // of the corresponding script, and not activated by default). bool gen_standalone_CPP = false; - // If true, generate C++ for those script bodies that don't already - // have generated code, in a form that enables later compiles to - // take advantage of the newly-added elements. Only use for generating - // a zeek that will always include the associated scripts. - bool update_CPP = false; - - // If true, generate C++ for those script bodies that don't already - // have generated code. The added C++ is not made available for - // later generated code, and will work for a generated zeek that - // runs without including the associated scripts. - bool add_CPP = false; - // If true, use C++ bodies if available. bool use_CPP = false;