extensive rewrite of generation & execution of run-time initialization

This commit is contained in:
Vern Paxson 2021-11-07 17:00:19 -08:00
parent bc3bf4ea6c
commit e1a760e674
26 changed files with 3459 additions and 1580 deletions

View file

@ -401,7 +401,9 @@ set(MAIN_SRCS
script_opt/CPP/GenFunc.cc script_opt/CPP/GenFunc.cc
script_opt/CPP/HashMgr.cc script_opt/CPP/HashMgr.cc
script_opt/CPP/Inits.cc script_opt/CPP/Inits.cc
script_opt/CPP/RuntimeInit.cc script_opt/CPP/InitsInfo.cc
script_opt/CPP/RuntimeInits.cc
script_opt/CPP/RuntimeInitSupport.cc
script_opt/CPP/RuntimeOps.cc script_opt/CPP/RuntimeOps.cc
script_opt/CPP/RuntimeVec.cc script_opt/CPP/RuntimeVec.cc
script_opt/CPP/Stmts.cc script_opt/CPP/Stmts.cc

View file

@ -7,42 +7,53 @@ namespace zeek::detail
using namespace std; using namespace std;
void CPPCompile::RegisterAttributes(const AttributesPtr& attrs) shared_ptr<CPP_InitInfo> CPPCompile::RegisterAttributes(const AttributesPtr& attrs)
{ {
if ( ! attrs || attributes.HasKey(attrs) ) if ( ! attrs )
return; return nullptr;
auto a = attrs.get();
if ( processed_attrs.count(a) > 0 )
return processed_attrs[a];
attributes.AddKey(attrs); attributes.AddKey(attrs);
AddInit(attrs);
auto a_rep = attributes.GetRep(attrs); // The cast is just so we can make an IntrusivePtr.
if ( a_rep != attrs.get() ) auto a_rep = const_cast<Attributes*>(attributes.GetRep(attrs));
if ( a_rep != a )
{ {
NoteInitDependency(attrs.get(), a_rep); AttributesPtr a_rep_ptr = {NewRef{}, a_rep};
return; processed_attrs[a] = RegisterAttributes(a_rep_ptr);
return processed_attrs[a];
} }
for ( const auto& a : attrs->GetAttrs() ) for ( const auto& a : attrs->GetAttrs() )
{ (void)RegisterAttr(a);
const auto& e = a->GetExpr();
if ( e )
{
if ( IsSimpleInitExpr(e) )
{
// Make sure any dependencies it has get noted.
(void)GenExpr(e, GEN_VAL_PTR);
continue;
}
init_exprs.AddKey(e); shared_ptr<CPP_InitInfo> gi = make_shared<AttrsInfo>(this, attrs);
AddInit(e); attrs_info->AddInstance(gi);
NoteInitDependency(attrs, e); processed_attrs[a] = gi;
auto e_rep = init_exprs.GetRep(e); return gi;
if ( e_rep != e.get() ) }
NoteInitDependency(e.get(), e_rep);
} shared_ptr<CPP_InitInfo> CPPCompile::RegisterAttr(const AttrPtr& attr)
} {
auto a = attr.get();
if ( processed_attr.count(a) > 0 )
return processed_attr[a];
const auto& e = a->GetExpr();
if ( e && ! IsSimpleInitExpr(e) )
init_exprs.AddKey(e);
auto gi = make_shared<AttrInfo>(this, attr);
attr_info->AddInstance(gi);
processed_attr[a] = gi;
return gi;
} }
void CPPCompile::BuildAttrs(const AttributesPtr& attrs, string& attr_tags, string& attr_vals) void CPPCompile::BuildAttrs(const AttributesPtr& attrs, string& attr_tags, string& attr_vals)
@ -72,78 +83,9 @@ void CPPCompile::BuildAttrs(const AttributesPtr& attrs, string& attr_tags, strin
attr_vals = string("{") + attr_vals + "}"; attr_vals = string("{") + attr_vals + "}";
} }
void CPPCompile::GenAttrs(const AttributesPtr& attrs) const char* CPPCompile::AttrName(AttrTag t)
{ {
NL(); switch ( t )
Emit("AttributesPtr %s", AttrsName(attrs));
StartBlock();
const auto& avec = attrs->GetAttrs();
Emit("auto attrs = std::vector<AttrPtr>();");
AddInit(attrs);
for ( const auto& attr : avec )
{
const auto& e = attr->GetExpr();
if ( ! e )
{
Emit("attrs.emplace_back(make_intrusive<Attr>(%s));", AttrName(attr));
continue;
}
NoteInitDependency(attrs, e);
AddInit(e);
string e_arg;
if ( IsSimpleInitExpr(e) )
e_arg = GenAttrExpr(e);
else
e_arg = InitExprName(e);
Emit("attrs.emplace_back(make_intrusive<Attr>(%s, %s));", AttrName(attr), e_arg);
}
Emit("return make_intrusive<Attributes>(attrs, nullptr, true, false);");
EndBlock();
}
string CPPCompile::GenAttrExpr(const ExprPtr& e)
{
switch ( e->Tag() )
{
case EXPR_CONST:
return string("make_intrusive<ConstExpr>(") + GenExpr(e, GEN_VAL_PTR) + ")";
case EXPR_NAME:
NoteInitDependency(e, e->AsNameExpr()->IdPtr());
return string("make_intrusive<NameExpr>(") + globals[e->AsNameExpr()->Id()->Name()] +
")";
case EXPR_RECORD_COERCE:
NoteInitDependency(e, TypeRep(e->GetType()));
return string("make_intrusive<RecordCoerceExpr>(make_intrusive<RecordConstructorExpr>("
"make_intrusive<ListExpr>()), cast_intrusive<RecordType>(") +
GenTypeName(e->GetType()) + "))";
default:
reporter->InternalError("bad expr tag in CPPCompile::GenAttrs");
return "###";
}
}
string CPPCompile::AttrsName(const AttributesPtr& a)
{
return attributes.KeyName(a) + "()";
}
const char* CPPCompile::AttrName(const AttrPtr& attr)
{
switch ( attr->Tag() )
{ {
case ATTR_OPTIONAL: case ATTR_OPTIONAL:
return "ATTR_OPTIONAL"; return "ATTR_OPTIONAL";

View file

@ -0,0 +1,19 @@
// See the file "COPYING" in the main distribution directory for copyright.
// Definitions associated with type attributes.
#pragma once
namespace zeek::detail
{
enum AttrExprType
{
AE_NONE, // attribute doesn't have an expression
AE_CONST, // easy expression - a constant (ConstExpr)
AE_NAME, // easy - a global (NameExpr)
AE_RECORD, // an empty record cast to a given type
AE_CALL, // everything else - requires a lambda, essentially
};
} // zeek::detail

View file

@ -5,18 +5,20 @@
#include "zeek/Desc.h" #include "zeek/Desc.h"
#include "zeek/script_opt/CPP/Func.h" #include "zeek/script_opt/CPP/Func.h"
#include "zeek/script_opt/CPP/HashMgr.h" #include "zeek/script_opt/CPP/HashMgr.h"
#include "zeek/script_opt/CPP/InitsInfo.h"
#include "zeek/script_opt/CPP/Tracker.h" #include "zeek/script_opt/CPP/Tracker.h"
#include "zeek/script_opt/CPP/Util.h" #include "zeek/script_opt/CPP/Util.h"
#include "zeek/script_opt/ScriptOpt.h" #include "zeek/script_opt/ScriptOpt.h"
// We structure the compiler for generating C++ versions of Zeek script // We structure the compiler for generating C++ versions of Zeek script
// bodies as a single large class. While we divide the compiler's // bodies maily as a single large class. While we divide the compiler's
// functionality into a number of groups (see below), these interact with // functionality into a number of groups (see below), these interact with
// one another, and in particular with various member variables, enough // one another, and in particular with various member variables, enough
// so that it's not clear there's benefit to further splitting the // so that it's not clear there's benefit to further splitting the
// functionality into multiple classes. (Some splitting has already been // functionality into multiple classes. (Some splitting has already been
// done for more self-contained functionality, resulting in the CPPTracker // done for more self-contained functionality, resulting in the CPPTracker
// and CPPHashManager classes.) // and CPPHashManager classes, and initialization information in
// InitsInfo.{h,cc} and RuntimeInits.{h,cc}.)
// //
// Most aspects of translating to C++ have a straightforward nature. // Most aspects of translating to C++ have a straightforward nature.
// We can turn many Zeek script statements directly into the C++ that's // We can turn many Zeek script statements directly into the C++ that's
@ -45,26 +47,6 @@
// all of the scripts loaded in "bare" mode, plus those for foo.zeek; and // all of the scripts loaded in "bare" mode, plus those for foo.zeek; and
// without the "-b" for all of the default scripts plus those in foo.zeek. // without the "-b" for all of the default scripts plus those in foo.zeek.
// //
// One of the design goals employed is to support "incremental" compilation,
// i.e., compiling *additional* Zeek scripts at a later point after an
// initial compilation. This comes in two forms.
//
// "-O update-C++" produces C++ code that extends that already compiled,
// in a manner where subsequent compilations can leverage both the original
// and the newly added. Such compilations *must* be done in a consistent
// context (for example, any types extended in the original are extended in
// the same manner - plus then perhaps further extensions - in the updated
// code).
//
// "-O add-C++" instead produces C++ code that (1) will not be leveraged in
// any subsequent compilations, and (2) can be inconsistent with other
// "-O add-C++" code added in the future. The main use of this feature is
// to support compiling polyglot versions of Zeek scripts used to run
// the test suite.
//
// Zeek invocations specifying "-O use-C++" will activate any code compiled
// into the zeek binary; otherwise, the code lies dormant.
//
// "-O report-C++" reports on which compiled functions will/won't be used // "-O report-C++" reports on which compiled functions will/won't be used
// (including ones that are available but not relevant to the scripts loaded // (including ones that are available but not relevant to the scripts loaded
// on the command line). This can be useful when debugging to make sure // on the command line). This can be useful when debugging to make sure
@ -104,29 +86,41 @@
// //
// Emit Low-level code generation. // Emit Low-level code generation.
// //
// Of these, Inits is probably the most subtle. It turns out to be // Of these, Inits is the most subtle and complex. There are two major
// very tricky ensuring that we create run-time variables in the // challenges in creating run-time values (such as Zeek types and constants).
// proper order. For example, a global might need a record type to be
// defined; one of the record's fields is a table; that table contains
// another record; one of that other record's fields is the original
// record (recursion); another field has an &default expression that
// requires the compiler to generate a helper function to construct
// the expression dynamically; and that helper function might in turn
// refer to other types that require initialization.
// //
// To deal with these dependencies, for every run-time object the compiler // First, generating individual code for creating each of these winds up
// maintains (1) all of the other run-time objects on which its initialization // incurring unacceptable compile times (for example, clang compiling all
// depends, and (2) the C++ statements needed to initialize it, once those // of the base scripts with optimization takes many hours on a high-end
// other objects have been initialized. It then beings initialization with // laptop). As a result, we employ a table-driven approach that compiles
// objects that have no dependencies, marks those as done (essentially), finds // much faster (though still taking many minutes on the same high-end laptop,
// objects that now can be initialized and emits their initializations, // running about 40x faster however).
// marks those as done, etc.
// //
// Below in declaring the CPPCompiler class, we group methods in accordance // Second, initializations frequently rely upon *other* initializations
// with those listed above. We also locate member variables with the group // having occurred first. For example, a global might need a record type
// most relevant for their usage. However, keep in mind that many member // to be defined; one of the record's fields is a table; that table contains
// variables are used by multiple groups, which is why we haven't created // another record; one of that other record's fields is the original record
// distinct per-group classes. // (recursion); another field has an &default expression that requires the
// compiler to generate a helper function to construct the expression
// dynamically; and that helper function might in turn refer to other types
// that require initialization. What's required is a framework for ensuring
// that everything occurs in the proper order.
//
// The logic for dealing with these complexities is isolated into several
// sets of classes. InitsInfo.{h,cc} provides the classes related to tracking
// how to generate initializations in the proper order. RuntimeInits.{h,cc}
// provides the classes used when initialization generated code in order
// to instantiate all of the necessary values. See those files for discussions
// on how they address the points framed above.
//
// In declaring the CPPCompiler class, we group methods in accordance with
// those listed above, locating member variables with the group most relevant
// for their usage. However, keep in mind that many member variables are
// used by multiple groups, which is why we haven't created distinct
// per-group classes. In addition, we make a number of methods public
// in order to avoid the need for numerous "friend" declarations to allow
// associated classes (like those for initialization) access to a the
// necessary compiler methods.
namespace zeek::detail namespace zeek::detail
{ {
@ -135,10 +129,124 @@ class CPPCompile
{ {
public: public:
CPPCompile(std::vector<FuncInfo>& _funcs, ProfileFuncs& pfs, const std::string& gen_name, CPPCompile(std::vector<FuncInfo>& _funcs, ProfileFuncs& pfs, const std::string& gen_name,
const std::string& addl_name, CPPHashManager& _hm, bool _update, bool _standalone, const std::string& addl_name, CPPHashManager& _hm, bool _standalone,
bool report_uncompilable); bool report_uncompilable);
~CPPCompile(); ~CPPCompile();
// Constructing a CPPCompile object does all of the compilation.
// The public methods here are for use by helper classes.
// Tracks the given type (with support methods for ones that
// are complicated), recursively including its sub-types, and
// creating initializations for constructing C++ variables
// representing the types.
//
// Returns the initialization info associated with the type.
std::shared_ptr<CPP_InitInfo> RegisterType(const TypePtr& t);
// Easy access to the global offset and the initialization
// cohort associated with a given type.
int TypeOffset(const TypePtr& t) { return GI_Offset(RegisterType(t)); }
int TypeCohort(const TypePtr& t) { return GI_Cohort(RegisterType(t)); }
// Tracks a Zeek ValPtr used as a constant value. These occur
// in two contexts: directly as constant expressions, and indirectly
// as elements within aggregate constants (such as in vector
// initializers).
//
// Returns the associated initialization info. In addition,
// consts_offset returns an offset into an initialization-time
// global that tracks all constructed globals, providing
// general access to them for aggregate constants.
std::shared_ptr<CPP_InitInfo> RegisterConstant(const ValPtr& vp, int& consts_offset);
// Tracks a global to generate the necessary initialization.
// Returns the associated initialization info.
std::shared_ptr<CPP_InitInfo> RegisterGlobal(const ID* g);
// Tracks a use of the given set of attributes, including
// initialization dependencies and the generation of any
// associated expressions.
//
// Returns the initialization info associated with the set of
// attributes.
std::shared_ptr<CPP_InitInfo> RegisterAttributes(const AttributesPtr& attrs);
// Convenient access to the global offset associated with
// a set of Attributes.
int AttributesOffset(const AttributesPtr& attrs)
{
return GI_Offset(RegisterAttributes(attrs));
}
// The same, for a single attribute.
std::shared_ptr<CPP_InitInfo> RegisterAttr(const AttrPtr& attr);
int AttrOffset(const AttrPtr& attr) { return GI_Offset(RegisterAttr(attr)); }
// Returns a mapping of from Attr objects to their associated
// initialization information. The Attr must have previously
// been registered.
auto ProcessedAttr() { return processed_attr; }
// True if the given expression is simple enough that we can
// generate code to evaluate it directly, and don't need to
// create a separate function per RegisterInitExpr() to track it.
static bool IsSimpleInitExpr(const ExprPtr& e);
// Tracks expressions used in attributes (such as &default=<expr>).
//
// We need to generate code to evaluate these, via CallExpr's
// that invoke functions that return the value of the expression.
// However, we can't generate that code when first encountering
// the attribute, because doing so will need to refer to the names
// of types, and initially those are unavailable (because the type's
// representatives, per pfs.RepTypes(), might not have yet been
// tracked). So instead we track the associated CallExprInitInfo
// objects, and after all types have been tracked, then spin
// through them to generate the code.
//
// Returns the associated initialization information.
std::shared_ptr<CPP_InitInfo> RegisterInitExpr(const ExprPtr& e);
// Tracks a C++ string value needed for initialization. Returns
// an offset into the global vector that will hold these.
int TrackString(std::string s)
{
if ( tracked_strings.count(s) == 0 )
{
tracked_strings[s] = ordered_tracked_strings.size();
ordered_tracked_strings.emplace_back(s);
}
return tracked_strings[s];
}
// Tracks a profile hash value needed for initialization. Returns
// an offset into the global vector that will hold these.
int TrackHash(p_hash_type h)
{
if ( tracked_hashes.count(h) == 0 )
{
tracked_hashes[h] = ordered_tracked_hashes.size();
ordered_tracked_hashes.emplace_back(h);
}
return tracked_hashes[h];
}
// Returns the hash associated with a given function body.
// It's a fatal error to call this for a body that hasn't
// been compiled.
p_hash_type BodyHash(const Stmt* body);
// Returns true if at least one of the function bodies associated
// with the function/hook/event handler of the given fname is
// not compilable.
bool NotFullyCompilable(const std::string& fname) const
{
return not_fully_compilable.count(fname) > 0;
}
private: private:
// Start of methods related to driving the overall compilation // Start of methods related to driving the overall compilation
// process. // process.
@ -148,6 +256,37 @@ private:
// Main driver, invoked by constructor. // Main driver, invoked by constructor.
void Compile(bool report_uncompilable); void Compile(bool report_uncompilable);
// The following methods all create objects that track the
// initializations of a given type of value. In each, "tag"
// is the name used to identify the initializer global
// associated with the given type of value, and "type" is
// its C++ representation. Often "tag" is concatenated with
// "type" to designate a specific C++ type. For example,
// "tag" might be "Double" and "type" might be "ValPtr";
// the resulting global's type is "DoubleValPtr".
// Creates an object for tracking values associated with Zeek
// constants. "c_type" is the C++ type used in the initializer
// for each object; or, if empty, it specifies that we represent
// the value using an index into a separate vector that holds
// the constant.
std::shared_ptr<CPP_InitsInfo> CreateConstInitInfo(const char* tag, const char* type,
const char* c_type);
// Creates an object for tracking compound initializers, which
// are whose initialization uses indexes into other vectors.
std::shared_ptr<CPP_InitsInfo> CreateCompoundInitInfo(const char* tag, const char* type);
// Creates an object for tracking initializers that have custom
// C++ objects to hold their initialization information.
std::shared_ptr<CPP_InitsInfo> CreateCustomInitInfo(const char* tag, const char* type);
// Generates the declaration associated with a set of initializations
// and tracks the object to facilitate looping over all so
// initializations. As a convenience, returns the object.
std::shared_ptr<CPP_InitsInfo> RegisterInitInfo(const char* tag, const char* type,
std::shared_ptr<CPP_InitsInfo> gi);
// Generate the beginning of the compiled code: run-time functions, // Generate the beginning of the compiled code: run-time functions,
// namespace, auxiliary globals. // namespace, auxiliary globals.
void GenProlog(); void GenProlog();
@ -158,7 +297,7 @@ private:
void RegisterCompiledBody(const std::string& f); void RegisterCompiledBody(const std::string& f);
// After compilation, generate the final code. Most of this is // After compilation, generate the final code. Most of this is
// run-time initialization of various dynamic values. // in support of run-time initialization of various dynamic values.
void GenEpilog(); void GenEpilog();
// True if the given function (plus body and profile) is one // True if the given function (plus body and profile) is one
@ -185,9 +324,13 @@ private:
// it including some functionality we don't currently support // it including some functionality we don't currently support
// for compilation. // for compilation.
// //
// Indexed by the name of the function. // Indexed by the C++ name of the function.
std::unordered_set<std::string> compilable_funcs; std::unordered_set<std::string> compilable_funcs;
// Tracks which functions/hooks/events have at least one non-compilable
// body. Indexed by the Zeek name of function.
std::unordered_set<std::string> not_fully_compilable;
// Maps functions (not hooks or events) to upstream compiled names. // Maps functions (not hooks or events) to upstream compiled names.
std::unordered_map<std::string, std::string> hashed_funcs; std::unordered_map<std::string, std::string> hashed_funcs;
@ -200,10 +343,6 @@ private:
// compilation units. // compilation units.
int addl_tag = 0; int addl_tag = 0;
// If true, then we're updating the C++ base (i.e., generating
// code meant for use by subsequently generated code).
bool update = false;
// If true, the generated code should run "standalone". // If true, the generated code should run "standalone".
bool standalone = false; bool standalone = false;
@ -211,7 +350,7 @@ private:
// needed for "seatbelts", to ensure that we can produce a // needed for "seatbelts", to ensure that we can produce a
// unique hash relating to this compilation (*and* its // unique hash relating to this compilation (*and* its
// compilation time, which is why these are "seatbelts" and // compilation time, which is why these are "seatbelts" and
// likely not important to make distinct. // likely not important to make distinct).
p_hash_type total_hash = 0; p_hash_type total_hash = 0;
// Working directory in which we're compiling. Used to quasi-locate // Working directory in which we're compiling. Used to quasi-locate
@ -236,11 +375,6 @@ private:
// track it as such. // track it as such.
void CreateGlobal(const ID* g); void CreateGlobal(const ID* g);
// For the globals used in the compilation, if new then append
// them to the hash file to make the information available
// to subsequent compilation runs.
void UpdateGlobalHashes();
// Register the given identifier as a BiF. If is_var is true // Register the given identifier as a BiF. If is_var is true
// then the BiF is also used in a non-call context. // then the BiF is also used in a non-call context.
void AddBiF(const ID* b, bool is_var); void AddBiF(const ID* b, bool is_var);
@ -258,10 +392,9 @@ private:
// The following match various forms of identifiers to the // The following match various forms of identifiers to the
// name used for their C++ equivalent. // name used for their C++ equivalent.
const char* IDName(const ID& id) { return IDName(&id); }
const char* IDName(const IDPtr& id) { return IDName(id.get()); } const char* IDName(const IDPtr& id) { return IDName(id.get()); }
const char* IDName(const ID* id) { return IDNameStr(id).c_str(); } const char* IDName(const ID* id) { return IDNameStr(id).c_str(); }
const std::string& IDNameStr(const ID* id) const; const std::string& IDNameStr(const ID* id);
// Returns a canonicalized version of a variant of a global made // Returns a canonicalized version of a variant of a global made
// distinct by the given suffix. // distinct by the given suffix.
@ -280,12 +413,20 @@ private:
// conflict with C++ keywords. // conflict with C++ keywords.
std::string Canonicalize(const char* name) const; std::string Canonicalize(const char* name) const;
// Returns the name of the global corresponding to an expression
// (which must be a EXPR_NAME).
std::string GlobalName(const ExprPtr& e) { return globals[e->AsNameExpr()->Id()->Name()]; }
// Maps global names (not identifiers) to the names we use for them. // Maps global names (not identifiers) to the names we use for them.
std::unordered_map<std::string, std::string> globals; std::unordered_map<std::string, std::string> globals;
// Similar for locals, for the function currently being compiled. // Similar for locals, for the function currently being compiled.
std::unordered_map<const ID*, std::string> locals; std::unordered_map<const ID*, std::string> locals;
// Retrieves the initialization information associated with the
// given global.
std::unordered_map<const ID*, std::shared_ptr<CPP_InitInfo>> global_gis;
// Maps event names to the names we use for them. // Maps event names to the names we use for them.
std::unordered_map<std::string, std::string> events; std::unordered_map<std::string, std::string> events;
@ -307,14 +448,37 @@ private:
// Similar, but for lambdas. // Similar, but for lambdas.
void DeclareLambda(const LambdaExpr* l, const ProfileFunc* pf); void DeclareLambda(const LambdaExpr* l, const ProfileFunc* pf);
// Declares the CPPStmt subclass used for compiling the given // Generates code to declare the compiled version of a script
// function. "ft" gives the functions type, "pf" its profile, // function. "ft" gives the functions type, "pf" its profile,
// "fname" its C++ name, "body" its AST, "l" if non-nil its // "fname" its C++ name, "body" its AST, "l" if non-nil its
// corresponding lambda expression, and "flavor" whether it's // corresponding lambda expression, and "flavor" whether it's
// a hook/event/function. // a hook/event/function.
//
// We use two basic approaches. Most functions are represented
// by a "CPPDynStmt" object that's parameterized by a void* pointer
// to the underlying C++ function and an index used to dynamically
// cast the pointer to having the correct type for then calling it.
// Lambdas, however (including "implicit" lambdas used to associate
// complex expressions with &attributes), each have a unique
// subclass derived from CPPStmt that calls the underlying C++
// function without requiring a cast, and that holds the values
// of the lambda's captures.
//
// It would be cleanest to use the latter approach for all functions,
// but the hundreds/thousands of additional classes required for
// doing so significantly slows down C++ compilation, so we instead
// opt for the uglier dynamic casting approach, which only requires
// one additional class.
void CreateFunction(const FuncTypePtr& ft, const ProfileFunc* pf, const std::string& fname,
const StmtPtr& body, int priority, const LambdaExpr* l,
FunctionFlavor flavor);
// Used for the case of creating a custom subclass of CPPStmt.
void DeclareSubclass(const FuncTypePtr& ft, const ProfileFunc* pf, const std::string& fname, void DeclareSubclass(const FuncTypePtr& ft, const ProfileFunc* pf, const std::string& fname,
const StmtPtr& body, int priority, const LambdaExpr* l, const std::string& args, const IDPList* lambda_ids);
FunctionFlavor flavor);
// Used for the case of employing an instance of a CPPDynStmt object.
void DeclareDynCPPStmt();
// Generates the declarations (and in-line definitions) associated // Generates the declarations (and in-line definitions) associated
// with compiling a lambda. // with compiling a lambda.
@ -331,11 +495,40 @@ private:
// the given type, lambda captures (if non-nil), and profile. // the given type, lambda captures (if non-nil), and profile.
std::string ParamDecl(const FuncTypePtr& ft, const IDPList* lambda_ids, const ProfileFunc* pf); std::string ParamDecl(const FuncTypePtr& ft, const IDPList* lambda_ids, const ProfileFunc* pf);
// Returns in p_types the types associated with the parameters for a function
// of the given type, set of lambda captures (if any), and profile.
void GatherParamTypes(std::vector<std::string>& p_types, const FuncTypePtr& ft,
const IDPList* lambda_ids, const ProfileFunc* pf);
// Same, but instead returns the parameter's names.
void GatherParamNames(std::vector<std::string>& p_names, const FuncTypePtr& ft,
const IDPList* lambda_ids, const ProfileFunc* pf);
// Inspects the given profile to find the i'th parameter (starting // Inspects the given profile to find the i'th parameter (starting
// at 0). Returns nil if the profile indicates that that parameter // at 0). Returns nil if the profile indicates that that parameter
// is not used by the function. // is not used by the function.
const ID* FindParam(int i, const ProfileFunc* pf); const ID* FindParam(int i, const ProfileFunc* pf);
// Information associated with a CPPDynStmt dynamic dispatch.
struct DispatchInfo
{
std::string cast; // C++ cast to use for function pointer
std::string args; // arguments to pass to the function
bool is_hook; // whether the function is a hook
TypePtr yield; // what type the function returns, if any
};
// An array of cast/invocation pairs used to generate the CPPDynStmt
// Exec method.
std::vector<DispatchInfo> func_casting_glue;
// Maps casting strings to indices into func_casting_glue. The index
// is what's used to dynamically switch to the right dispatch.
std::unordered_map<std::string, int> casting_index;
// Maps functions (using their C++ name) to their casting strings.
std::unordered_map<std::string, std::string> func_index;
// Names for lambda capture ID's. These require a separate space // Names for lambda capture ID's. These require a separate space
// that incorporates the lambda's name, to deal with nested lambda's // that incorporates the lambda's name, to deal with nested lambda's
// that refer to the identifiers with the same name. // that refer to the identifiers with the same name.
@ -344,7 +537,7 @@ private:
// The function's parameters. Tracked so we don't re-declare them. // The function's parameters. Tracked so we don't re-declare them.
std::unordered_set<const ID*> params; std::unordered_set<const ID*> params;
// Whether we're parsing a hook. // Whether we're compiling a hook.
bool in_hook = false; bool in_hook = false;
// //
@ -362,8 +555,12 @@ private:
void CompileLambda(const LambdaExpr* l, const ProfileFunc* pf); void CompileLambda(const LambdaExpr* l, const ProfileFunc* pf);
// Generates the body of the Invoke() method (which supplies the // Generates the body of the Invoke() method (which supplies the
// "glue" between for calling the C++-generated code). // "glue" for calling the C++-generated code, for CPPStmt subclasses).
void GenInvokeBody(const std::string& fname, const TypePtr& t, const std::string& args); void GenInvokeBody(const std::string& fname, const TypePtr& t, const std::string& args)
{
GenInvokeBody(fname + "(" + args + ")", t);
}
void GenInvokeBody(const std::string& call, const TypePtr& t);
// Generates the code for the body of a script function with // Generates the code for the body of a script function with
// the given type, profile, C++ name, AST, lambda captures // the given type, profile, C++ name, AST, lambda captures
@ -405,9 +602,6 @@ private:
// Maps function bodies to the names we use for them. // Maps function bodies to the names we use for them.
std::unordered_map<const Stmt*, std::string> body_names; std::unordered_map<const Stmt*, std::string> body_names;
// Reverse mapping.
std::unordered_map<std::string, const Stmt*> names_to_bodies;
// Maps function names to hashes of bodies. // Maps function names to hashes of bodies.
std::unordered_map<std::string, p_hash_type> body_hashes; std::unordered_map<std::string, p_hash_type> body_hashes;
@ -426,62 +620,84 @@ private:
// //
// End of methods related to generating compiled script bodies. // End of methods related to generating compiled script bodies.
// Start of methods related to generating code for representing // Methods related to generating code for representing script constants
// script constants as run-time values. // as run-time values. There's only one nontrivial one of these,
// See Consts.cc for definitions. // RegisterConstant() (declared above, as it's public). All the other
// // work is done by secondary objects - see InitsInfo.{h,cc} for those.
// Returns an instantiation of a constant - either as a native // Returns the object used to track indices (vectors of integers
// C++ constant, or as a C++ variable that will be bound to // that are used to index various other vectors, including other
// a Zeek value at run-time initialization - that is needed // indices). Only used by CPP_InitsInfo objects, but stored
// by the given "parent" object (which acquires an initialization // in the CPPCompile object to make it available across different
// dependency, if a C++ variable is needed). // CPP_InitsInfo objects.
std::string BuildConstant(IntrusivePtr<Obj> parent, const ValPtr& vp)
{
return BuildConstant(parent.get(), vp);
}
std::string BuildConstant(const Obj* parent, const ValPtr& vp);
// Called to create a constant appropriate for the given expression friend class CPP_InitsInfo;
// or, more directly, the given value. The second method returns IndicesManager& IndMgr() { return indices_mgr; }
// "true" if a C++ variable needed to be created to construct the
// constant at run-time initialization, false if can be instantiated
// directly as a C++ constant.
void AddConstant(const ConstExpr* c);
bool AddConstant(const ValPtr& v);
// Build particular types of C++ variables (with the given name)
// to hold constants initialized at run-time.
void AddStringConstant(const ValPtr& v, std::string& const_name);
void AddPatternConstant(const ValPtr& v, std::string& const_name);
void AddListConstant(const ValPtr& v, std::string& const_name);
void AddRecordConstant(const ValPtr& v, std::string& const_name);
void AddTableConstant(const ValPtr& v, std::string& const_name);
void AddVectorConstant(const ValPtr& v, std::string& const_name);
// Maps (non-native) constants to associated C++ globals. // Maps (non-native) constants to associated C++ globals.
std::unordered_map<const ConstExpr*, std::string> const_exprs; std::unordered_map<const ConstExpr*, std::string> const_exprs;
// Maps the values of (non-native) constants to associated C++ globals. // Maps the values of (non-native) constants to associated initializer
std::unordered_map<const Val*, std::string> const_vals; // information.
std::unordered_map<const Val*, std::shared_ptr<CPP_InitInfo>> const_vals;
// Same, but for the offset into the vector that tracks all constants
// collectively (to support initialization of compound constants).
std::unordered_map<const Val*, int> const_offsets;
// The same as the above pair, but indexed by the string representation
// rather than the Val*. The reason for having both is to enable
// reusing common constants even though their Val*'s differ.
std::unordered_map<std::string, std::shared_ptr<CPP_InitInfo>> constants;
std::unordered_map<std::string, int> constants_offsets;
// Used for memory management associated with const_vals's index. // Used for memory management associated with const_vals's index.
std::vector<ValPtr> cv_indices; std::vector<ValPtr> cv_indices;
// Maps string representations of (non-native) constants to // For different types of constants (as indicated by TypeTag),
// associated C++ globals. // provides the associated object that manages the initializers
std::unordered_map<std::string, std::string> constants; // for those constants.
std::unordered_map<TypeTag, std::shared_ptr<CPP_InitsInfo>> const_info;
// Maps the same representations to the Val* associated with their // Tracks entries for constructing the vector of all constants
// original creation. This enables us to construct initialization // (regardless of type). Each entry provides a TypeTag, used
// dependencies for later Val*'s that are able to reuse the same // to identify the type-specific vector for a given constant,
// constant. // and the offset into that vector.
std::unordered_map<std::string, const Val*> constants_to_vals; std::vector<std::pair<TypeTag, int>> consts;
// Function variables that we need to create dynamically for // The following objects track initialization information for
// initializing globals, coupled with the name of their associated // different types of initializers: Zeek types, individual
// constant. // attributes, sets of attributes, expressions that call script
std::unordered_map<FuncVal*, std::string> func_vars; // functions (for attribute expressions), registering lambda
// bodies, and registering Zeek globals.
std::shared_ptr<CPP_InitsInfo> type_info;
std::shared_ptr<CPP_InitsInfo> attr_info;
std::shared_ptr<CPP_InitsInfo> attrs_info;
std::shared_ptr<CPP_InitsInfo> call_exprs_info;
std::shared_ptr<CPP_InitsInfo> lambda_reg_info;
std::shared_ptr<CPP_InitsInfo> global_id_info;
// Tracks all of the above objects (as well as each entry in
// const_info), to facilitate easy iterating over them.
std::set<std::shared_ptr<CPP_InitsInfo>> all_global_info;
// Tracks the attribute expressions for which we need to generate
// function calls to evaluate them.
std::unordered_map<std::string, std::shared_ptr<CallExprInitInfo>> init_infos;
// See IndMgr() above for the role of this variable.
IndicesManager indices_mgr;
// Maps strings to associated offsets.
std::unordered_map<std::string, int> tracked_strings;
// Tracks strings we've registered in order (corresponding to
// their offsets).
std::vector<std::string> ordered_tracked_strings;
// The same as the previous two, but for profile hashes.
std::vector<p_hash_type> ordered_tracked_hashes;
std::unordered_map<p_hash_type, int> tracked_hashes;
// //
// End of methods related to generating code for script constants. // End of methods related to generating code for script constants.
@ -649,9 +865,9 @@ private:
// not the outer map). // not the outer map).
int num_rf_mappings = 0; int num_rf_mappings = 0;
// For each entry in "field_mapping", the record and TypeDecl // For each entry in "field_mapping", the record (as a global
// associated with the mapping. // offset) and TypeDecl associated with the mapping.
std::vector<std::pair<const RecordType*, const TypeDecl*>> field_decls; std::vector<std::pair<int, const TypeDecl*>> field_decls;
// For enums that are extended via redef's, maps each distinct // For enums that are extended via redef's, maps each distinct
// value (that the compiled scripts refer to) to locations in the // value (that the compiled scripts refer to) to locations in the
@ -665,9 +881,9 @@ private:
// not the outer map). // not the outer map).
int num_ev_mappings = 0; int num_ev_mappings = 0;
// For each entry in "enum_mapping", the record and name // For each entry in "enum_mapping", the EnumType (as a global
// associated with the mapping. // offset) and name associated with the mapping.
std::vector<std::pair<const EnumType*, std::string>> enum_names; std::vector<std::pair<int, std::string>> enum_names;
// //
// End of methods related to generating code for AST Expr's. // End of methods related to generating code for AST Expr's.
@ -690,24 +906,6 @@ private:
// given script type 't', converts it as needed to the given GenType. // given script type 't', converts it as needed to the given GenType.
std::string GenericValPtrToGT(const std::string& expr, const TypePtr& t, GenType gt); std::string GenericValPtrToGT(const std::string& expr, const TypePtr& t, GenType gt);
// For a given type, generates the code necessary to initialize
// it at run time. The term "expand" in the method's name refers
// to the fact that the type has already been previously declared
// (necessary to facilitate defining recursive types), so this method
// generates the "meat" of the type but not its original declaration.
void ExpandTypeVar(const TypePtr& t);
// Methods for expanding specific such types. "tn" is the name
// of the C++ variable used for the particular type.
void ExpandListTypeVar(const TypePtr& t, std::string& tn);
void ExpandRecordTypeVar(const TypePtr& t, std::string& tn);
void ExpandEnumTypeVar(const TypePtr& t, std::string& tn);
void ExpandTableTypeVar(const TypePtr& t, std::string& tn);
void ExpandFuncTypeVar(const TypePtr& t, std::string& tn);
// The following assumes we're populating a type_decl_list called "tl".
std::string GenTypeDecl(const TypeDecl* td);
// Returns the name of a C++ variable that will hold a TypePtr // Returns the name of a C++ variable that will hold a TypePtr
// of the appropriate flavor. 't' does not need to be a type // of the appropriate flavor. 't' does not need to be a type
// representative. // representative.
@ -721,21 +919,11 @@ private:
const Type* TypeRep(const TypePtr& t) { return TypeRep(t.get()); } const Type* TypeRep(const TypePtr& t) { return TypeRep(t.get()); }
// Low-level C++ representations for types, of various flavors. // Low-level C++ representations for types, of various flavors.
const char* TypeTagName(TypeTag tag) const; static const char* TypeTagName(TypeTag tag);
const char* TypeName(const TypePtr& t); const char* TypeName(const TypePtr& t);
const char* FullTypeName(const TypePtr& t); const char* FullTypeName(const TypePtr& t);
const char* TypeType(const TypePtr& t); const char* TypeType(const TypePtr& t);
// Track the given type (with support methods for onces that
// are complicated), recursively including its sub-types, and
// creating initializations (and dependencies) for constructing
// C++ variables representing the types.
void RegisterType(const TypePtr& t);
void RegisterListType(const TypePtr& t);
void RegisterTableType(const TypePtr& t);
void RegisterRecordType(const TypePtr& t);
void RegisterFuncType(const TypePtr& t);
// Access to a type's underlying values. // Access to a type's underlying values.
const char* NativeAccessor(const TypePtr& t); const char* NativeAccessor(const TypePtr& t);
@ -744,11 +932,13 @@ private:
const char* IntrusiveVal(const TypePtr& t); const char* IntrusiveVal(const TypePtr& t);
// Maps types to indices in the global "types__CPP" array. // Maps types to indices in the global "types__CPP" array.
CPPTracker<Type> types = {"types", &compiled_items}; CPPTracker<Type> types = {"types", true, &compiled_items};
// Used to prevent analysis of mutually-referring types from // Used to prevent analysis of mutually-referring types from
// leading to infinite recursion. // leading to infinite recursion. Maps types to their global
std::unordered_set<const Type*> processed_types; // initialization information (or, initially, to nullptr, if
// they're in the process of being registered).
std::unordered_map<const Type*, std::shared_ptr<CPP_InitInfo>> processed_types;
// //
// End of methods related to managing script types. // End of methods related to managing script types.
@ -758,11 +948,6 @@ private:
// See Attrs.cc for definitions. // See Attrs.cc for definitions.
// //
// Tracks a use of the given set of attributes, including
// initialization dependencies and the generation of any
// associated expressions.
void RegisterAttributes(const AttributesPtr& attrs);
// Populates the 2nd and 3rd arguments with C++ representations // Populates the 2nd and 3rd arguments with C++ representations
// of the tags and (optional) values/expressions associated with // of the tags and (optional) values/expressions associated with
// the set of attributes. // the set of attributes.
@ -772,16 +957,17 @@ private:
void GenAttrs(const AttributesPtr& attrs); void GenAttrs(const AttributesPtr& attrs);
std::string GenAttrExpr(const ExprPtr& e); std::string GenAttrExpr(const ExprPtr& e);
// Returns the name of the C++ variable that will hold the given
// attributes at run-time.
std::string AttrsName(const AttributesPtr& attrs);
// Returns a string representation of the name associated with // Returns a string representation of the name associated with
// different attributes (e.g., "ATTR_DEFAULT"). // different attribute tags (e.g., "ATTR_DEFAULT").
const char* AttrName(const AttrPtr& attr); static const char* AttrName(AttrTag t);
// Similar for attributes, so we can reconstruct record types. // Similar for attributes, so we can reconstruct record types.
CPPTracker<Attributes> attributes = {"attrs", &compiled_items}; CPPTracker<Attributes> attributes = {"attrs", false, &compiled_items};
// Maps Attributes and Attr's to their global initialization
// information.
std::unordered_map<const Attributes*, std::shared_ptr<CPP_InitInfo>> processed_attrs;
std::unordered_map<const Attr*, std::shared_ptr<CPP_InitInfo>> processed_attr;
// //
// End of methods related to managing script type attributes. // End of methods related to managing script type attributes.
@ -790,121 +976,42 @@ private:
// See Inits.cc for definitions. // See Inits.cc for definitions.
// //
// Generates code to construct a CallExpr that can be used to // Generates code for dynamically generating an expression
// evaluate the expression 'e' as an initializer (typically // associated with an attribute, via a function call.
// for a record &default attribute). void GenInitExpr(std::shared_ptr<CallExprInitInfo> ce_init);
void GenInitExpr(const ExprPtr& e);
// True if the given expression is simple enough that we can
// generate code to evaluate it directly, and don't need to
// create a separate function per GenInitExpr().
bool IsSimpleInitExpr(const ExprPtr& e) const;
// Returns the name of a function used to evaluate an // Returns the name of a function used to evaluate an
// initialization expression. // initialization expression.
std::string InitExprName(const ExprPtr& e); std::string InitExprName(const ExprPtr& e);
// Generates code to initializes the global 'g' (with C++ name "gl") // Convenience functions for return the offset or initialization cohort
// to the given value *if* on start-up it doesn't already have a value. // associated with an initialization.
void GenGlobalInit(const ID* g, std::string& gl, const ValPtr& v); int GI_Offset(const std::shared_ptr<CPP_InitInfo>& gi) const { return gi ? gi->Offset() : -1; }
int GI_Cohort(const std::shared_ptr<CPP_InitInfo>& gi) const
// Generates code to initialize all of the function-valued globals
// (i.e., those pointing to lambdas).
void GenFuncVarInits();
// Generates the "pre-initialization" for a given type. For
// extensible types (records, enums, lists), these are empty
// versions that we'll later populate.
void GenPreInit(const Type* t);
// Generates a function that executes the pre-initializations.
void GenPreInits();
// The following all track that for a given object, code associated
// with initializing it. Multiple calls for the same object append
// additional lines of code (the order of the calls is preserved).
//
// Versions with "lhs" and "rhs" arguments provide an initialization
// of the form "lhs = rhs;", as a convenience.
void AddInit(const IntrusivePtr<Obj>& o, const std::string& lhs, const std::string& rhs)
{ {
AddInit(o.get(), lhs + " = " + rhs + ";"); return gi ? gi->InitCohort() : 0;
}
void AddInit(const Obj* o, const std::string& lhs, const std::string& rhs)
{
AddInit(o, lhs + " = " + rhs + ";");
}
void AddInit(const IntrusivePtr<Obj>& o, const std::string& init) { AddInit(o.get(), init); }
void AddInit(const Obj* o, const std::string& init);
// We do consistency checking of initialization dependencies by
// looking for depended-on objects have initializations. Sometimes
// it's unclear whether the object will actually require
// initialization, in which case we add an empty initialization
// for it so that the consistency-checking is happy.
void AddInit(const IntrusivePtr<Obj>& o) { AddInit(o.get()); }
void AddInit(const Obj* o);
// This is akin to an initialization, but done separately
// (upon "activation") so it can include initializations that
// rely on parsing having finished (in particular, BiFs having
// been registered). Only used when generating standalone code.
void AddActivation(std::string a) { activations.emplace_back(a); }
// Records the fact that the initialization of object o1 depends
// on that of object o2.
void NoteInitDependency(const IntrusivePtr<Obj>& o1, const IntrusivePtr<Obj>& o2)
{
NoteInitDependency(o1.get(), o2.get());
}
void NoteInitDependency(const IntrusivePtr<Obj>& o1, const Obj* o2)
{
NoteInitDependency(o1.get(), o2);
}
void NoteInitDependency(const Obj* o1, const IntrusivePtr<Obj>& o2)
{
NoteInitDependency(o1, o2.get());
}
void NoteInitDependency(const Obj* o1, const Obj* o2);
// Records an initialization dependency of the given object
// on the given type, unless the type is a record. We need
// this notion to protect against circular dependencies in
// the face of recursive records.
void NoteNonRecordInitDependency(const Obj* o, const TypePtr& t)
{
if ( t && t->Tag() != TYPE_RECORD )
NoteInitDependency(o, TypeRep(t));
}
void NoteNonRecordInitDependency(const IntrusivePtr<Obj> o, const TypePtr& t)
{
NoteNonRecordInitDependency(o.get(), t);
} }
// Analyzes the initialization dependencies to ensure that they're // Generate code to initialize the mappings for record field
// consistent, i.e., every object that either depends on another, // offsets for field accesses into regions of records that
// or is itself depended on, appears in the "to_do" set. // can be extensible (and thus can vary at run-time to the
void CheckInitConsistency(std::unordered_set<const Obj*>& to_do); // offsets encountered during compilation).
// Generate initializations for the items in the "to_do" set,
// in accordance with their dependencies. Returns 'n', the
// number of initialization functions generated. They should
// be called in order, from 1 to n.
int GenDependentInits(std::unordered_set<const Obj*>& to_do);
// Generates a function for initializing the nc'th cohort.
void GenInitCohort(int nc, std::unordered_set<const Obj*>& cohort);
// Initialize the mappings for record field offsets for field
// accesses into regions of records that can be extensible (and
// thus can vary at run-time to the offsets encountered during
// compilation).
void InitializeFieldMappings(); void InitializeFieldMappings();
// Same, but for enum types. The second form does a single // Same, but for enum types.
// initialization corresponding to the given index in the mapping.
void InitializeEnumMappings(); void InitializeEnumMappings();
void InitializeEnumMappings(const EnumType* et, const std::string& e_name, int index);
// Generate code to initialize BiFs.
void InitializeBiFs();
// Generate code to initialize strings that we track.
void InitializeStrings();
// Generate code to initialize hashes that we track.
void InitializeHashes();
// Generate code to initialize indirect references to constants.
void InitializeConsts();
// Generate the initialization hook for this set of compiled code. // Generate the initialization hook for this set of compiled code.
void GenInitHook(); void GenInitHook();
@ -917,25 +1024,15 @@ private:
// what we compiled. // what we compiled.
void GenLoad(); void GenLoad();
// A list of pre-initializations (those potentially required by // A list of BiFs to look up during initialization. First
// other initializations, and that themselves have no dependencies). // string is the name of the C++ global holding the BiF, the
std::vector<std::string> pre_inits; // second is its name as known to Zeek.
std::unordered_map<std::string, std::string> BiFs;
// A list of "activations" (essentially, post-initializations).
// See AddActivation() above.
std::vector<std::string> activations;
// Expressions for which we need to generate initialization-time // Expressions for which we need to generate initialization-time
// code. Currently, these are only expressions appearing in // code. Currently, these are only expressions appearing in
// attributes. // attributes.
CPPTracker<Expr> init_exprs = {"gen_init_expr", &compiled_items}; CPPTracker<Expr> init_exprs = {"gen_init_expr", false, &compiled_items};
// Maps an object requiring initialization to its initializers.
std::unordered_map<const Obj*, std::vector<std::string>> obj_inits;
// Maps an object requiring initializations to its dependencies
// on other such objects.
std::unordered_map<const Obj*, std::unordered_set<const Obj*>> obj_deps;
// //
// End of methods related to run-time initialization. // End of methods related to run-time initialization.
@ -944,12 +1041,20 @@ private:
// See Emit.cc for definitions. // See Emit.cc for definitions.
// //
// The following all need to be able to emit code.
friend class CPP_BasicConstInitsInfo;
friend class CPP_CompoundInitsInfo;
friend class IndicesManager;
// Used to create (indented) C++ {...} code blocks. "needs_semi" // Used to create (indented) C++ {...} code blocks. "needs_semi"
// controls whether to terminate the block with a ';' (such as // controls whether to terminate the block with a ';' (such as
// for class definitions. // for class definitions.
void StartBlock(); void StartBlock();
void EndBlock(bool needs_semi = false); void EndBlock(bool needs_semi = false);
void IndentUp() { ++block_level; }
void IndentDown() { --block_level; }
// Various ways of generating code. The multi-argument methods // Various ways of generating code. The multi-argument methods
// assume that the first argument is a printf-style format // assume that the first argument is a printf-style format
// (but one that can only have %s specifiers). // (but one that can only have %s specifiers).
@ -960,11 +1065,12 @@ private:
NL(); NL();
} }
void Emit(const std::string& fmt, const std::string& arg) const void Emit(const std::string& fmt, const std::string& arg, bool do_NL = true) const
{ {
Indent(); Indent();
fprintf(write_file, fmt.c_str(), arg.c_str()); fprintf(write_file, fmt.c_str(), arg.c_str());
NL(); if ( do_NL )
NL();
} }
void Emit(const std::string& fmt, const std::string& arg1, const std::string& arg2) const void Emit(const std::string& fmt, const std::string& arg1, const std::string& arg2) const
@ -999,14 +1105,15 @@ private:
NL(); NL();
} }
// Returns an expression for constructing a Zeek String object void Emit(const std::string& fmt, const std::string& arg1, const std::string& arg2,
// corresponding to the given byte array. const std::string& arg3, const std::string& arg4, const std::string& arg5,
std::string GenString(const char* b, int len) const; const std::string& arg6) const
{
// For the given byte array / string, returns a version expanded Indent();
// with escape sequences in order to represent it as a C++ string. fprintf(write_file, fmt.c_str(), arg1.c_str(), arg2.c_str(), arg3.c_str(), arg4.c_str(),
std::string CPPEscape(const char* b, int len) const; arg5.c_str(), arg6.c_str());
std::string CPPEscape(const char* s) const { return CPPEscape(s, strlen(s)); } NL();
}
void NL() const { fputc('\n', write_file); } void NL() const { fputc('\n', write_file); }

View file

@ -4,55 +4,26 @@
#include "zeek/RE.h" #include "zeek/RE.h"
#include "zeek/script_opt/CPP/Compile.h" #include "zeek/script_opt/CPP/Compile.h"
using namespace std;
namespace zeek::detail namespace zeek::detail
{ {
using namespace std; shared_ptr<CPP_InitInfo> CPPCompile::RegisterConstant(const ValPtr& vp, int& consts_offset)
string CPPCompile::BuildConstant(const Obj* parent, const ValPtr& vp)
{ {
if ( ! vp ) // Make sure the value pointer, which might be transient
return "nullptr"; // in construction, sticks around so we can track its
// value.
cv_indices.push_back(vp);
if ( AddConstant(vp) )
{
auto v = vp.get();
AddInit(parent);
NoteInitDependency(parent, v);
// Make sure the value pointer, which might be transient
// in construction, sticks around so we can track its
// value.
cv_indices.push_back(vp);
return const_vals[v];
}
else
return NativeToGT(GenVal(vp), vp->GetType(), GEN_VAL_PTR);
}
void CPPCompile::AddConstant(const ConstExpr* c)
{
auto v = c->ValuePtr();
if ( AddConstant(v) )
{
AddInit(c);
NoteInitDependency(c, v.get());
}
}
bool CPPCompile::AddConstant(const ValPtr& vp)
{
auto v = vp.get(); auto v = vp.get();
if ( IsNativeType(v->GetType()) )
// These we instantiate directly.
return false;
if ( const_vals.count(v) > 0 ) if ( const_vals.count(v) > 0 )
{
// Already did this one. // Already did this one.
return true; consts_offset = const_offsets[v];
return const_vals[v];
}
// Formulate a key that's unique per distinct constant. // Formulate a key that's unique per distinct constant.
@ -82,213 +53,100 @@ bool CPPCompile::AddConstant(const ValPtr& vp)
if ( constants.count(c_desc) > 0 ) if ( constants.count(c_desc) > 0 )
{ {
const_vals[v] = constants[c_desc]; const_vals[v] = constants[c_desc];
consts_offset = const_offsets[v] = constants_offsets[c_desc];
auto orig_v = constants_to_vals[c_desc]; return const_vals[v];
ASSERT(v != orig_v);
AddInit(v);
NoteInitDependency(v, orig_v);
return true;
} }
// Need a C++ global for this constant.
auto const_name = string("CPP__const__") + Fmt(int(constants.size()));
const_vals[v] = constants[c_desc] = const_name;
constants_to_vals[c_desc] = v;
auto tag = t->Tag(); auto tag = t->Tag();
auto const_name = const_info[tag]->NextName();
shared_ptr<CPP_InitInfo> gi;
switch ( tag ) switch ( tag )
{ {
case TYPE_STRING: case TYPE_BOOL:
AddStringConstant(vp, const_name); gi = make_shared<BasicConstInfo>(vp->AsBool() ? "true" : "false");
break; break;
case TYPE_PATTERN: case TYPE_INT:
AddPatternConstant(vp, const_name); gi = make_shared<BasicConstInfo>(to_string(vp->AsInt()));
break; break;
case TYPE_LIST: case TYPE_COUNT:
AddListConstant(vp, const_name); gi = make_shared<BasicConstInfo>(to_string(vp->AsCount()) + "ULL");
break; break;
case TYPE_RECORD: case TYPE_DOUBLE:
AddRecordConstant(vp, const_name); gi = make_shared<BasicConstInfo>(to_string(vp->AsDouble()));
break; break;
case TYPE_TABLE: case TYPE_TIME:
AddTableConstant(vp, const_name); gi = make_shared<BasicConstInfo>(to_string(vp->AsDouble()));
break; break;
case TYPE_VECTOR: case TYPE_INTERVAL:
AddVectorConstant(vp, const_name); gi = make_shared<BasicConstInfo>(to_string(vp->AsDouble()));
break; break;
case TYPE_ADDR: case TYPE_ADDR:
case TYPE_SUBNET: gi = make_shared<DescConstInfo>(this, vp);
{
auto prefix = (tag == TYPE_ADDR) ? "Addr" : "SubNet";
Emit("%sValPtr %s;", prefix, const_name);
ODesc d;
v->Describe(&d);
AddInit(v, const_name,
string("make_intrusive<") + prefix + "Val>(\"" + d.Description() + "\")");
}
break; break;
case TYPE_FUNC: case TYPE_SUBNET:
Emit("FuncValPtr %s;", const_name); gi = make_shared<DescConstInfo>(this, vp);
break;
// We can't generate the initialization now because it case TYPE_ENUM:
// depends on first having compiled the associated body, gi = make_shared<EnumConstInfo>(this, vp);
// so we know its hash. So for now we just note it break;
// to deal with later.
func_vars[v->AsFuncVal()] = const_name; case TYPE_STRING:
gi = make_shared<StringConstInfo>(this, vp);
break;
case TYPE_PATTERN:
gi = make_shared<PatternConstInfo>(this, vp);
break;
case TYPE_PORT:
gi = make_shared<PortConstInfo>(vp);
break;
case TYPE_LIST:
gi = make_shared<ListConstInfo>(this, vp);
break;
case TYPE_VECTOR:
gi = make_shared<VectorConstInfo>(this, vp);
break;
case TYPE_RECORD:
gi = make_shared<RecordConstInfo>(this, vp);
break;
case TYPE_TABLE:
gi = make_shared<TableConstInfo>(this, vp);
break; break;
case TYPE_FILE: case TYPE_FILE:
{ gi = make_shared<FileConstInfo>(this, vp);
Emit("FileValPtr %s;", const_name); break;
auto f = cast_intrusive<FileVal>(vp)->Get(); case TYPE_FUNC:
gi = make_shared<FuncConstInfo>(this, vp);
AddInit(v, const_name,
string("make_intrusive<FileVal>(") + "make_intrusive<File>(\"" + f->Name() +
"\", \"w\"))");
}
break; break;
default: default:
reporter->InternalError("bad constant type in CPPCompile::AddConstant"); reporter->InternalError("bad constant type in CPPCompile::AddConstant");
break;
} }
return true; const_info[tag]->AddInstance(gi);
} const_vals[v] = constants[c_desc] = gi;
void CPPCompile::AddStringConstant(const ValPtr& v, string& const_name) consts_offset = const_offsets[v] = constants_offsets[c_desc] = consts.size();
{ consts.emplace_back(pair(tag, gi->Offset()));
Emit("StringValPtr %s;", const_name);
auto s = v->AsString(); return gi;
const char* b = (const char*)(s->Bytes());
auto len = s->Len();
AddInit(v, const_name, GenString(b, len));
}
void CPPCompile::AddPatternConstant(const ValPtr& v, string& const_name)
{
Emit("PatternValPtr %s;", const_name);
auto re = v->AsPatternVal()->Get();
AddInit(v, string("{ auto re = new RE_Matcher(") + CPPEscape(re->OrigText()) + ");");
if ( re->IsCaseInsensitive() )
AddInit(v, "re->MakeCaseInsensitive();");
AddInit(v, "re->Compile();");
AddInit(v, const_name, "make_intrusive<PatternVal>(re)");
AddInit(v, "}");
}
void CPPCompile::AddListConstant(const ValPtr& v, string& const_name)
{
Emit("ListValPtr %s;", const_name);
// No initialization dependency on the main type since we don't
// use the underlying TypeList. However, we *do* use the types of
// the elements.
AddInit(v, const_name, string("make_intrusive<ListVal>(TYPE_ANY)"));
auto lv = cast_intrusive<ListVal>(v);
auto n = lv->Length();
for ( auto i = 0; i < n; ++i )
{
const auto& l_i = lv->Idx(i);
auto l_i_c = BuildConstant(v, l_i);
AddInit(v, const_name + "->Append(" + l_i_c + ");");
NoteInitDependency(v, TypeRep(l_i->GetType()));
}
}
void CPPCompile::AddRecordConstant(const ValPtr& v, string& const_name)
{
const auto& t = v->GetType();
Emit("RecordValPtr %s;", const_name);
NoteInitDependency(v, TypeRep(t));
AddInit(v, const_name,
string("make_intrusive<RecordVal>(") + "cast_intrusive<RecordType>(" + GenTypeName(t) +
"))");
auto r = cast_intrusive<RecordVal>(v);
auto n = r->NumFields();
for ( auto i = 0u; i < n; ++i )
{
const auto& r_i = r->GetField(i);
if ( r_i )
{
auto r_i_c = BuildConstant(v, r_i);
AddInit(v, const_name + "->Assign(" + Fmt(static_cast<int>(i)) + ", " + r_i_c + ");");
}
}
}
void CPPCompile::AddTableConstant(const ValPtr& v, string& const_name)
{
const auto& t = v->GetType();
Emit("TableValPtr %s;", const_name);
NoteInitDependency(v, TypeRep(t));
AddInit(v, const_name,
string("make_intrusive<TableVal>(") + "cast_intrusive<TableType>(" + GenTypeName(t) +
"))");
auto tv = cast_intrusive<TableVal>(v);
auto tv_map = tv->ToMap();
for ( auto& tv_i : tv_map )
{
auto ind = BuildConstant(v, tv_i.first);
auto val = BuildConstant(v, tv_i.second);
AddInit(v, const_name + "->Assign(" + ind + ", " + val + ");");
}
}
void CPPCompile::AddVectorConstant(const ValPtr& v, string& const_name)
{
const auto& t = v->GetType();
Emit("VectorValPtr %s;", const_name);
NoteInitDependency(v, TypeRep(t));
AddInit(v, const_name,
string("make_intrusive<VectorVal>(") + "cast_intrusive<VectorType>(" + GenTypeName(t) +
"))");
auto vv = cast_intrusive<VectorVal>(v);
auto n = vv->Size();
for ( auto i = 0u; i < n; ++i )
{
const auto& v_i = vv->ValAt(i);
auto v_i_c = BuildConstant(v, v_i);
AddInit(v, const_name + "->Append(" + v_i_c + ");");
}
} }
} // zeek::detail } // zeek::detail

View file

@ -22,7 +22,7 @@ void CPPCompile::DeclareFunc(const FuncInfo& func)
const auto& body = func.Body(); const auto& body = func.Body();
auto priority = func.Priority(); auto priority = func.Priority();
DeclareSubclass(f->GetType(), pf, fname, body, priority, nullptr, f->Flavor()); CreateFunction(f->GetType(), pf, fname, body, priority, nullptr, f->Flavor());
if ( f->GetBodies().size() == 1 ) if ( f->GetBodies().size() == 1 )
compiled_simple_funcs[f->Name()] = fname; compiled_simple_funcs[f->Name()] = fname;
@ -40,17 +40,88 @@ void CPPCompile::DeclareLambda(const LambdaExpr* l, const ProfileFunc* pf)
for ( auto id : ids ) for ( auto id : ids )
lambda_names[id] = LocalName(id); lambda_names[id] = LocalName(id);
DeclareSubclass(l_id->GetType<FuncType>(), pf, lname, body, 0, l, FUNC_FLAVOR_FUNCTION); CreateFunction(l_id->GetType<FuncType>(), pf, lname, body, 0, l, FUNC_FLAVOR_FUNCTION);
} }
void CPPCompile::DeclareSubclass(const FuncTypePtr& ft, const ProfileFunc* pf, const string& fname, void CPPCompile::CreateFunction(const FuncTypePtr& ft, const ProfileFunc* pf, const string& fname,
const StmtPtr& body, int priority, const LambdaExpr* l, const StmtPtr& body, int priority, const LambdaExpr* l,
FunctionFlavor flavor) FunctionFlavor flavor)
{ {
const auto& yt = ft->Yield(); const auto& yt = ft->Yield();
in_hook = flavor == FUNC_FLAVOR_HOOK; in_hook = flavor == FUNC_FLAVOR_HOOK;
const IDPList* lambda_ids = l ? &l->OuterIDs() : nullptr; const IDPList* lambda_ids = l ? &l->OuterIDs() : nullptr;
string args = BindArgs(ft, lambda_ids);
auto yt_decl = in_hook ? "bool" : FullTypeName(yt);
vector<string> p_types;
GatherParamTypes(p_types, ft, lambda_ids, pf);
string cast = string(yt_decl) + "(*)(";
for ( auto& pt : p_types )
cast += pt + ", ";
cast += string("Frame*)");
// We need to distinguish between hooks and non-hooks that happen
// to have matching type signatures. They'll be equivalent if they
// have identical cast's. To keep them separate, we cheat and
// make hook casts different, string-wise, without altering their
// semantics.
if ( in_hook )
cast += " ";
func_index[fname] = cast;
if ( casting_index.count(cast) == 0 )
{
casting_index[cast] = func_casting_glue.size();
DispatchInfo di;
di.cast = cast;
di.args = args;
di.is_hook = in_hook;
di.yield = yt;
func_casting_glue.emplace_back(di);
}
if ( lambda_ids )
{
DeclareSubclass(ft, pf, fname, args, lambda_ids);
BuildLambda(ft, pf, fname, body, l, lambda_ids);
EndBlock(true);
}
else
{
Emit("static %s %s(%s);", yt_decl, fname, ParamDecl(ft, lambda_ids, pf));
// Track this function as known to have been compiled.
// We don't track lambda bodies as compiled because they
// can't be instantiated directly without also supplying
// the captures. In principle we could make an exception
// for lambdas that don't take any arguments, but that
// seems potentially more confusing than beneficial.
compiled_funcs.emplace(fname);
auto loc_f = script_specific_filename(body);
cf_locs[fname] = loc_f;
}
auto h = pf->HashVal();
body_hashes[fname] = h;
body_priorities[fname] = priority;
body_names.emplace(body.get(), fname);
total_hash = merge_p_hashes(total_hash, h);
}
void CPPCompile::DeclareSubclass(const FuncTypePtr& ft, const ProfileFunc* pf, const string& fname,
const string& args, const IDPList* lambda_ids)
{
const auto& yt = ft->Yield();
auto yt_decl = in_hook ? "bool" : FullTypeName(yt); auto yt_decl = in_hook ? "bool" : FullTypeName(yt);
NL(); NL();
@ -76,8 +147,7 @@ void CPPCompile::DeclareSubclass(const FuncTypePtr& ft, const ProfileFunc* pf, c
} }
} }
Emit("%s_cl(const char* name%s) : CPPStmt(name)%s { }", fname, addl_args.c_str(), Emit("%s_cl(const char* name%s) : CPPStmt(name)%s { }", fname, addl_args, inits);
inits.c_str());
// An additional constructor just used to generate place-holder // An additional constructor just used to generate place-holder
// instances, due to the mis-design that lambdas are identified // instances, due to the mis-design that lambdas are identified
@ -92,7 +162,7 @@ void CPPCompile::DeclareSubclass(const FuncTypePtr& ft, const ProfileFunc* pf, c
if ( in_hook ) if ( in_hook )
{ {
Emit("if ( ! %s(%s) )", fname, BindArgs(ft, lambda_ids)); Emit("if ( ! %s(%s) )", fname, args);
StartBlock(); StartBlock();
Emit("flow = FLOW_BREAK;"); Emit("flow = FLOW_BREAK;");
EndBlock(); EndBlock();
@ -100,42 +170,36 @@ void CPPCompile::DeclareSubclass(const FuncTypePtr& ft, const ProfileFunc* pf, c
} }
else if ( IsNativeType(yt) ) else if ( IsNativeType(yt) )
GenInvokeBody(fname, yt, BindArgs(ft, lambda_ids)); GenInvokeBody(fname, yt, args);
else else
Emit("return %s(%s);", fname, BindArgs(ft, lambda_ids)); Emit("return %s(%s);", fname, args);
EndBlock(); EndBlock();
}
if ( lambda_ids ) void CPPCompile::DeclareDynCPPStmt()
BuildLambda(ft, pf, fname, body, l, lambda_ids); {
else Emit("// A version of CPPStmt that manages a function pointer and");
{ Emit("// dynamically casts it to a given type to call it via Exec().");
// Track this function as known to have been compiled. Emit("// We will later generate a custom Exec method to support this");
// We don't track lambda bodies as compiled because they Emit("// dispatch. All of this is ugly, and only needed because clang");
// can't be instantiated directly without also supplying Emit("// goes nuts (super slow) in the face of thousands of templates");
// the captures. In principle we could make an exception Emit("// in a given context (initializers, or a function body).");
// for lambdas that don't take any arguments, but that Emit("class CPPDynStmt : public CPPStmt");
// seems potentially more confusing than beneficial. Emit("\t{");
compiled_funcs.emplace(fname); Emit("public:");
Emit("\tCPPDynStmt(const char* _name, void* _func, int _type_signature) : CPPStmt(_name), "
auto loc_f = script_specific_filename(body); "func(_func), type_signature(_type_signature) { }");
cf_locs[fname] = loc_f; Emit("\tValPtr Exec(Frame* f, StmtFlowType& flow) override final;");
Emit("private:");
// Some guidance for those looking through the generated code. Emit("\t// The function to call in Exec().");
Emit("// compiled body for: %s", loc_f); Emit("\tvoid* func;");
} Emit("\t// Used via a switch in the dynamically-generated Exec() method");
Emit("\t// to cast func to the write type, and to call it with the");
EndBlock(true); Emit("\t// right arguments pulled out of the frame.");
Emit("\tint type_signature;");
auto h = pf->HashVal(); Emit("\t};");
body_hashes[fname] = h;
body_priorities[fname] = priority;
body_names.emplace(body.get(), fname);
names_to_bodies.emplace(fname, body.get());
total_hash = merge_p_hashes(total_hash, h);
} }
void CPPCompile::BuildLambda(const FuncTypePtr& ft, const ProfileFunc* pf, const string& fname, void CPPCompile::BuildLambda(const FuncTypePtr& ft, const ProfileFunc* pf, const string& fname,
@ -146,28 +210,17 @@ void CPPCompile::BuildLambda(const FuncTypePtr& ft, const ProfileFunc* pf, const
{ {
auto name = lambda_names[id]; auto name = lambda_names[id];
auto tn = FullTypeName(id->GetType()); auto tn = FullTypeName(id->GetType());
Emit("%s %s;", tn, name.c_str()); Emit("%s %s;", tn, name);
} }
// Generate initialization to create and register the lambda. // Generate initialization to create and register the lambda.
auto literal_name = string("\"") + l->Name() + "\""; auto h = pf->HashVal();
auto instantiate = string("make_intrusive<") + fname + "_cl>(" + literal_name + ")"; auto nl = lambda_ids->length();
bool has_captures = nl > 0;
int nl = lambda_ids->length(); auto gi = make_shared<LambdaRegistrationInfo>(this, l->Name(), ft, fname + "_cl", h,
auto h = Fmt(pf->HashVal()); has_captures);
auto has_captures = nl > 0 ? "true" : "false"; lambda_reg_info->AddInstance(gi);
auto l_init = string("register_lambda__CPP(") + instantiate + ", " + h + ", \"" + l->Name() +
"\", " + GenTypeName(ft) + ", " + has_captures + ");";
AddInit(l, l_init);
NoteInitDependency(l, TypeRep(ft));
// Make the lambda's body's initialization depend on the lambda's
// initialization. That way GenFuncVarInits() can generate
// initializations with the assurance that the associated body
// hashes will have been registered.
AddInit(body.get());
NoteInitDependency(body.get(), l);
// Generate method to extract the lambda captures from a deserialized // Generate method to extract the lambda captures from a deserialized
// Frame object. // Frame object.
@ -237,17 +290,71 @@ string CPPCompile::BindArgs(const FuncTypePtr& ft, const IDPList* lambda_ids)
string CPPCompile::ParamDecl(const FuncTypePtr& ft, const IDPList* lambda_ids, string CPPCompile::ParamDecl(const FuncTypePtr& ft, const IDPList* lambda_ids,
const ProfileFunc* pf) const ProfileFunc* pf)
{ {
const auto& params = ft->Params(); vector<string> p_types;
int n = params->NumFields(); vector<string> p_names;
GatherParamTypes(p_types, ft, lambda_ids, pf);
GatherParamNames(p_names, ft, lambda_ids, pf);
ASSERT(p_types.size() == p_names.size());
string decl; string decl;
for ( auto i = 0U; i < p_types.size(); ++i )
decl += p_types[i] + " " + p_names[i] + ", ";
// Add in the declaration of the frame.
return decl + "Frame* f__CPP";
}
void CPPCompile::GatherParamTypes(vector<string>& p_types, const FuncTypePtr& ft,
const IDPList* lambda_ids, const ProfileFunc* pf)
{
const auto& params = ft->Params();
int n = params->NumFields();
for ( auto i = 0; i < n; ++i ) for ( auto i = 0; i < n; ++i )
{ {
const auto& t = params->GetFieldType(i); const auto& t = params->GetFieldType(i);
auto tn = FullTypeName(t); auto tn = FullTypeName(t);
auto param_id = FindParam(i, pf); auto param_id = FindParam(i, pf);
string fn;
if ( IsNativeType(t) )
// Native types are always pass-by-value.
p_types.emplace_back(tn);
else
{
if ( param_id && pf->Assignees().count(param_id) > 0 )
// We modify the parameter.
p_types.emplace_back(tn);
else
// Not modified, so pass by const reference.
p_types.emplace_back(string("const ") + tn + "&");
}
}
if ( lambda_ids )
// Add the captures as additional parameters.
for ( auto& id : *lambda_ids )
{
const auto& t = id->GetType();
auto tn = FullTypeName(t);
// Allow the captures to be modified.
p_types.emplace_back(string(tn) + "& ");
}
}
void CPPCompile::GatherParamNames(vector<string>& p_names, const FuncTypePtr& ft,
const IDPList* lambda_ids, const ProfileFunc* pf)
{
const auto& params = ft->Params();
int n = params->NumFields();
for ( auto i = 0; i < n; ++i )
{
const auto& t = params->GetFieldType(i);
auto param_id = FindParam(i, pf);
if ( param_id ) if ( param_id )
{ {
@ -255,50 +362,22 @@ string CPPCompile::ParamDecl(const FuncTypePtr& ft, const IDPList* lambda_ids,
// We'll need to translate the parameter // We'll need to translate the parameter
// from its current representation to // from its current representation to
// type "any". // type "any".
fn = string("any_param__CPP_") + Fmt(i); p_names.emplace_back(string("any_param__CPP_") + Fmt(i));
else else
fn = LocalName(param_id); p_names.emplace_back(LocalName(param_id));
} }
else else
// Parameters that are unused don't wind up // Parameters that are unused don't wind up in the
// in the ProfileFunc. Rather than dig their // ProfileFunc. Rather than dig their name out of
// name out of the function's declaration, we // the function's declaration, we explicitly name
// explicitly name them to reflect that they're // them to reflect that they're unused.
// unused. p_names.emplace_back(string("unused_param__CPP_") + Fmt(i));
fn = string("unused_param__CPP_") + Fmt(i);
if ( IsNativeType(t) )
// Native types are always pass-by-value.
decl = decl + tn + " " + fn;
else
{
if ( param_id && pf->Assignees().count(param_id) > 0 )
// We modify the parameter.
decl = decl + tn + " " + fn;
else
// Not modified, so pass by const reference.
decl = decl + "const " + tn + "& " + fn;
}
decl += ", ";
} }
if ( lambda_ids ) if ( lambda_ids )
{
// Add the captures as additional parameters. // Add the captures as additional parameters.
for ( auto& id : *lambda_ids ) for ( auto& id : *lambda_ids )
{ p_names.emplace_back(lambda_names[id]);
auto name = lambda_names[id];
const auto& t = id->GetType();
auto tn = FullTypeName(t);
// Allow the captures to be modified.
decl = decl + tn + "& " + name + ", ";
}
}
// Add in the declaration of the frame.
return decl + "Frame* f__CPP";
} }
const ID* CPPCompile::FindParam(int i, const ProfileFunc* pf) const ID* CPPCompile::FindParam(int i, const ProfileFunc* pf)

View file

@ -12,14 +12,13 @@ namespace zeek::detail
using namespace std; using namespace std;
CPPCompile::CPPCompile(vector<FuncInfo>& _funcs, ProfileFuncs& _pfs, const string& gen_name, CPPCompile::CPPCompile(vector<FuncInfo>& _funcs, ProfileFuncs& _pfs, const string& gen_name,
const string& _addl_name, CPPHashManager& _hm, bool _update, const string& _addl_name, CPPHashManager& _hm, bool _standalone,
bool _standalone, bool report_uncompilable) bool report_uncompilable)
: funcs(_funcs), pfs(_pfs), hm(_hm), update(_update), standalone(_standalone) : funcs(_funcs), pfs(_pfs), hm(_hm), standalone(_standalone)
{ {
addl_name = _addl_name; addl_name = _addl_name;
bool is_addl = hm.IsAppend(); auto target_name = gen_name.c_str();
auto target_name = is_addl ? addl_name.c_str() : gen_name.c_str(); auto mode = "w";
auto mode = is_addl ? "a" : "w";
write_file = fopen(target_name, mode); write_file = fopen(target_name, mode);
if ( ! write_file ) if ( ! write_file )
@ -27,30 +26,6 @@ CPPCompile::CPPCompile(vector<FuncInfo>& _funcs, ProfileFuncs& _pfs, const strin
reporter->Error("can't open C++ target file %s", target_name); reporter->Error("can't open C++ target file %s", target_name);
exit(1); exit(1);
} }
if ( is_addl )
{
// We need a unique number to associate with the name
// space for the code we're adding. A convenient way to
// generate this safely is to use the present size of the
// file we're appending to. That guarantees that every
// incremental compilation will wind up with a different
// number.
struct stat st;
if ( fstat(fileno(write_file), &st) != 0 )
{
char buf[256];
util::zeek_strerror_r(errno, buf, sizeof(buf));
reporter->Error("fstat failed on %s: %s", target_name, buf);
exit(1);
}
// We use a value of "0" to mean "we're not appending,
// we're generating from scratch", so make sure we're
// distinct from that.
addl_tag = st.st_size + 1;
}
else else
{ {
// Create an empty "additional" file. // Create an empty "additional" file.
@ -83,10 +58,6 @@ void CPPCompile::Compile(bool report_uncompilable)
working_dir = buf; working_dir = buf;
if ( update && addl_tag > 0 && CheckForCollisions() )
// Inconsistent compilation environment.
exit(1);
GenProlog(); GenProlog();
// Determine which functions we can call directly, and reuse // Determine which functions we can call directly, and reuse
@ -100,9 +71,13 @@ void CPPCompile::Compile(bool report_uncompilable)
const char* reason; const char* reason;
if ( IsCompilable(func, &reason) ) if ( IsCompilable(func, &reason) )
compilable_funcs.insert(BodyName(func)); compilable_funcs.insert(BodyName(func));
else if ( reason && report_uncompilable ) else
fprintf(stderr, "%s cannot be compiled to C++ due to %s\n", func.Func()->Name(), {
reason); if ( reason && report_uncompilable )
fprintf(stderr, "%s cannot be compiled to C++ due to %s\n", func.Func()->Name(),
reason);
not_fully_compilable.insert(func.Func()->Name());
}
auto h = func.Profile()->HashVal(); auto h = func.Profile()->HashVal();
if ( hm.HasHash(h) ) if ( hm.HasHash(h) )
@ -119,39 +94,24 @@ void CPPCompile::Compile(bool report_uncompilable)
{ {
TypePtr tp{NewRef{}, (Type*)(t)}; TypePtr tp{NewRef{}, (Type*)(t)};
types.AddKey(tp, pfs.HashType(t)); types.AddKey(tp, pfs.HashType(t));
(void)RegisterType(tp);
} }
for ( const auto& t : types.DistinctKeys() ) // ### This doesn't work for -O add-C++
if ( ! types.IsInherited(t) ) Emit("TypePtr types__CPP[%s];", Fmt(static_cast<int>(types.DistinctKeys().size())));
// Type is new to this compilation, so we'll
// be generating it.
Emit("TypePtr %s;", types.KeyName(t));
NL(); NL();
for ( const auto& c : pfs.Constants() ) #if 0
AddConstant(c); for ( auto gi : all_global_info )
Emit(gi->Declare());
NL(); NL();
#endif
for ( auto& g : pfs.AllGlobals() ) for ( auto& g : pfs.AllGlobals() )
CreateGlobal(g); CreateGlobal(g);
// Now that the globals are created, register their attributes,
// if any, and generate their initialization for use in standalone
// scripts. We can't do these in CreateGlobal() because at that
// point it's possible that some of the globals refer to other
// globals not-yet-created.
for ( auto& g : pfs.AllGlobals() )
{
RegisterAttributes(g->GetAttrs());
if ( g->HasVal() )
{
auto gn = string(g->Name());
GenGlobalInit(g, globals[gn], g->GetVal());
}
}
for ( const auto& e : pfs.Events() ) for ( const auto& e : pfs.Events() )
if ( AddGlobal(e, "gl", false) ) if ( AddGlobal(e, "gl", false) )
Emit("EventHandlerPtr %s_ev;", globals[string(e)]); Emit("EventHandlerPtr %s_ev;", globals[string(e)]);
@ -201,10 +161,13 @@ void CPPCompile::Compile(bool report_uncompilable)
lambda_names.insert(n); lambda_names.insert(n);
} }
NL();
Emit("std::vector<CPP_RegisterBody> CPP__bodies_to_register = {");
for ( const auto& f : compiled_funcs ) for ( const auto& f : compiled_funcs )
RegisterCompiledBody(f); RegisterCompiledBody(f);
GenFuncVarInits(); Emit("};");
GenEpilog(); GenEpilog();
} }
@ -217,12 +180,75 @@ void CPPCompile::GenProlog()
Emit("namespace zeek::detail { //\n"); Emit("namespace zeek::detail { //\n");
} }
Emit("namespace CPP_%s { // %s\n", Fmt(addl_tag), working_dir.c_str()); Emit("namespace CPP_%s { // %s\n", Fmt(addl_tag), working_dir);
// The following might-or-might-not wind up being populated/used. // The following might-or-might-not wind up being populated/used.
Emit("std::vector<int> field_mapping;"); Emit("std::vector<int> field_mapping;");
Emit("std::vector<int> enum_mapping;"); Emit("std::vector<int> enum_mapping;");
NL(); NL();
const_info[TYPE_BOOL] = CreateConstInitInfo("Bool", "ValPtr", "bool");
const_info[TYPE_INT] = CreateConstInitInfo("Int", "ValPtr", "bro_int_t");
const_info[TYPE_COUNT] = CreateConstInitInfo("Count", "ValPtr", "bro_uint_t");
const_info[TYPE_DOUBLE] = CreateConstInitInfo("Double", "ValPtr", "double");
const_info[TYPE_TIME] = CreateConstInitInfo("Time", "ValPtr", "double");
const_info[TYPE_INTERVAL] = CreateConstInitInfo("Interval", "ValPtr", "double");
const_info[TYPE_ADDR] = CreateConstInitInfo("Addr", "ValPtr", "");
const_info[TYPE_SUBNET] = CreateConstInitInfo("SubNet", "ValPtr", "");
const_info[TYPE_PORT] = CreateConstInitInfo("Port", "ValPtr", "uint32_t");
const_info[TYPE_ENUM] = CreateCompoundInitInfo("Enum", "ValPtr");
const_info[TYPE_STRING] = CreateCompoundInitInfo("String", "ValPtr");
const_info[TYPE_LIST] = CreateCompoundInitInfo("List", "ValPtr");
const_info[TYPE_PATTERN] = CreateCompoundInitInfo("Pattern", "ValPtr");
const_info[TYPE_VECTOR] = CreateCompoundInitInfo("Vector", "ValPtr");
const_info[TYPE_RECORD] = CreateCompoundInitInfo("Record", "ValPtr");
const_info[TYPE_TABLE] = CreateCompoundInitInfo("Table", "ValPtr");
const_info[TYPE_FUNC] = CreateCompoundInitInfo("Func", "ValPtr");
const_info[TYPE_FILE] = CreateCompoundInitInfo("File", "ValPtr");
type_info = CreateCompoundInitInfo("Type", "Ptr");
attr_info = CreateCompoundInitInfo("Attr", "Ptr");
attrs_info = CreateCompoundInitInfo("Attributes", "Ptr");
call_exprs_info = CreateCustomInitInfo("CallExpr", "Ptr");
lambda_reg_info = CreateCustomInitInfo("LambdaRegistration", "");
global_id_info = CreateCustomInitInfo("GlobalID", "");
NL();
DeclareDynCPPStmt();
NL();
}
shared_ptr<CPP_InitsInfo> CPPCompile::CreateConstInitInfo(const char* tag, const char* type,
const char* c_type)
{
auto gi = make_shared<CPP_BasicConstInitsInfo>(tag, type, c_type);
return RegisterInitInfo(tag, type, gi);
}
shared_ptr<CPP_InitsInfo> CPPCompile::CreateCompoundInitInfo(const char* tag, const char* type)
{
auto gi = make_shared<CPP_CompoundInitsInfo>(tag, type);
return RegisterInitInfo(tag, type, gi);
}
shared_ptr<CPP_InitsInfo> CPPCompile::CreateCustomInitInfo(const char* tag, const char* type)
{
auto gi = make_shared<CPP_CustomInitsInfo>(tag, type);
if ( type[0] == '\0' )
gi->SetCPPType("void*");
return RegisterInitInfo(tag, type, gi);
}
shared_ptr<CPP_InitsInfo> CPPCompile::RegisterInitInfo(const char* tag, const char* type,
shared_ptr<CPP_InitsInfo> gi)
{
string v_type = type[0] ? (string(tag) + type) : "void*";
Emit("std::vector<%s> CPP__%s__;", v_type, string(tag));
all_global_info.insert(gi);
return gi;
} }
void CPPCompile::RegisterCompiledBody(const string& f) void CPPCompile::RegisterCompiledBody(const string& f)
@ -252,74 +278,135 @@ void CPPCompile::RegisterCompiledBody(const string& f)
// same binary). // same binary).
h = merge_p_hashes(h, p_hash(cf_locs[f])); h = merge_p_hashes(h, p_hash(cf_locs[f]));
auto init = string("register_body__CPP(make_intrusive<") + f + "_cl>(\"" + f + "\"), " + ASSERT(func_index.count(f) > 0);
Fmt(p) + ", " + Fmt(h) + ", " + events + ");"; auto type_signature = casting_index[func_index[f]];
Emit("\tCPP_RegisterBody(\"%s\", (void*) %s, %s, %s, %s, std::vector<std::string>(%s)),", f, f,
AddInit(names_to_bodies[f], init); Fmt(type_signature), Fmt(p), Fmt(h), events);
if ( update )
{
fprintf(hm.HashFile(), "func\n%s%s\n", scope_prefix(addl_tag).c_str(), f.c_str());
fprintf(hm.HashFile(), "%llu\n", h);
}
} }
void CPPCompile::GenEpilog() void CPPCompile::GenEpilog()
{ {
NL(); NL();
for ( const auto& ii : init_infos )
GenInitExpr(ii.second);
for ( const auto& e : init_exprs.DistinctKeys() ) NL();
Emit("ValPtr CPPDynStmt::Exec(Frame* f, StmtFlowType& flow)");
StartBlock();
Emit("flow = FLOW_RETURN;");
Emit("switch ( type_signature )");
StartBlock();
for ( auto i = 0U; i < func_casting_glue.size(); ++i )
{ {
GenInitExpr(e); Emit("case %s:", to_string(i));
if ( update ) StartBlock();
init_exprs.LogIfNew(e, addl_tag, hm.HashFile()); auto& glue = func_casting_glue[i];
auto invoke = string("(*(") + glue.cast + ")(func))(" + glue.args + ")";
if ( glue.is_hook )
{
Emit("if ( ! %s )", invoke);
StartBlock();
Emit("flow = FLOW_BREAK;");
EndBlock();
Emit("return nullptr;");
}
else if ( IsNativeType(glue.yield) )
GenInvokeBody(invoke, glue.yield);
else
Emit("return %s;", invoke);
EndBlock();
} }
for ( const auto& a : attributes.DistinctKeys() ) Emit("default:");
{ Emit("\treporter->InternalError(\"invalid type in CPPDynStmt::Exec\");");
GenAttrs(a); Emit("\treturn nullptr;");
if ( update )
attributes.LogIfNew(a, addl_tag, hm.HashFile());
}
// Generate the guts of compound types, and preserve type names EndBlock();
// if present. EndBlock();
for ( const auto& t : types.DistinctKeys() )
{
ExpandTypeVar(t);
if ( update )
types.LogIfNew(t, addl_tag, hm.HashFile());
}
InitializeEnumMappings(); NL();
GenPreInits(); for ( auto gi : all_global_info )
gi->GenerateInitializers(this);
unordered_set<const Obj*> to_do;
for ( const auto& oi : obj_inits )
to_do.insert(oi.first);
CheckInitConsistency(to_do);
auto nc = GenDependentInits(to_do);
if ( standalone ) if ( standalone )
GenStandaloneActivation(); GenStandaloneActivation();
NL();
InitializeEnumMappings();
NL();
InitializeFieldMappings();
NL();
InitializeBiFs();
NL();
indices_mgr.Generate(this);
NL();
InitializeStrings();
NL();
InitializeHashes();
NL();
InitializeConsts();
NL(); NL();
Emit("void init__CPP()"); Emit("void init__CPP()");
StartBlock(); StartBlock();
Emit("enum_mapping.resize(%s);\n", Fmt(int(enum_names.size()))); Emit("std::vector<std::vector<int>> InitIndices;");
Emit("pre_init__CPP();"); Emit("generate_indices_set(CPP__Indices__init, InitIndices);");
Emit("std::map<TypeTag, std::shared_ptr<CPP_AbstractInitAccessor>> InitConsts;");
NL(); NL();
for ( auto i = 1; i <= nc; ++i ) for ( const auto& ci : const_info )
Emit("init_%s__CPP();", Fmt(i)); {
auto& gi = ci.second;
Emit("InitConsts.emplace(%s, std::make_shared<CPP_InitAccessor<%s>>(%s));",
TypeTagName(ci.first), gi->CPPType(), gi->InitsName());
}
Emit("InitsManager im(CPP__ConstVals, InitConsts, InitIndices, CPP__Strings, CPP__Hashes, "
"CPP__Type__, CPP__Attributes__, CPP__Attr__, CPP__CallExpr__);");
NL();
Emit("for ( auto& b : CPP__bodies_to_register )");
StartBlock();
Emit("auto f = make_intrusive<CPPDynStmt>(b.func_name.c_str(), b.func, b.type_signature);");
Emit("register_body__CPP(f, b.priority, b.h, b.events);");
EndBlock();
NL();
int max_cohort = 0;
for ( auto gi : all_global_info )
max_cohort = std::max(max_cohort, gi->MaxCohort());
for ( auto c = 0; c <= max_cohort; ++c )
for ( auto gi : all_global_info )
if ( gi->CohortSize(c) > 0 )
Emit("%s.InitializeCohort(&im, %s);", gi->InitializersName(), Fmt(c));
NL();
Emit("for ( auto& b : CPP__BiF_lookups__ )");
Emit("\tb.ResolveBiF();");
// Populate mappings for dynamic offsets. // Populate mappings for dynamic offsets.
NL(); NL();
InitializeFieldMappings(); Emit("for ( auto& em : CPP__enum_mappings__ )");
Emit("\tenum_mapping.push_back(em.ComputeOffset(&im));");
NL();
Emit("for ( auto& fm : CPP__field_mappings__ )");
Emit("\tfield_mapping.push_back(fm.ComputeOffset(&im));");
if ( standalone ) if ( standalone )
Emit("standalone_init__CPP();"); Emit("standalone_init__CPP();");
@ -328,10 +415,7 @@ void CPPCompile::GenEpilog()
GenInitHook(); GenInitHook();
Emit("} // %s\n\n", scope_prefix(addl_tag).c_str()); Emit("} // %s\n\n", scope_prefix(addl_tag));
if ( update )
UpdateGlobalHashes();
if ( addl_tag > 0 ) if ( addl_tag > 0 )
return; return;

View file

@ -13,75 +13,14 @@ using namespace std;
void CPPCompile::StartBlock() void CPPCompile::StartBlock()
{ {
++block_level; IndentUp();
Emit("{"); Emit("{");
} }
void CPPCompile::EndBlock(bool needs_semi) void CPPCompile::EndBlock(bool needs_semi)
{ {
Emit("}%s", needs_semi ? ";" : ""); Emit("}%s", needs_semi ? ";" : "");
--block_level; IndentDown();
}
string CPPCompile::GenString(const char* b, int len) const
{
return string("make_intrusive<StringVal>(") + Fmt(len) + ", " + CPPEscape(b, len) + ")";
}
string CPPCompile::CPPEscape(const char* b, int len) const
{
string res = "\"";
for ( int i = 0; i < len; ++i )
{
unsigned char c = b[i];
switch ( c )
{
case '\a':
res += "\\a";
break;
case '\b':
res += "\\b";
break;
case '\f':
res += "\\f";
break;
case '\n':
res += "\\n";
break;
case '\r':
res += "\\r";
break;
case '\t':
res += "\\t";
break;
case '\v':
res += "\\v";
break;
case '\\':
res += "\\\\";
break;
case '"':
res += "\\\"";
break;
default:
if ( isprint(c) )
res += c;
else
{
char buf[8192];
snprintf(buf, sizeof buf, "%03o", c);
res += "\\";
res += buf;
}
break;
}
}
return res + "\"";
} }
void CPPCompile::Indent() const void CPPCompile::Indent() const

View file

@ -232,7 +232,12 @@ string CPPCompile::GenConstExpr(const ConstExpr* c, GenType gt)
const auto& t = c->GetType(); const auto& t = c->GetType();
if ( ! IsNativeType(t) ) if ( ! IsNativeType(t) )
return NativeToGT(const_vals[c->Value()], t, gt); {
auto v = c->ValuePtr();
int consts_offset; // ignored
(void)RegisterConstant(v, consts_offset);
return NativeToGT(const_vals[v.get()]->Name(), t, gt);
}
return NativeToGT(GenVal(c->ValuePtr()), t, gt); return NativeToGT(GenVal(c->ValuePtr()), t, gt);
} }
@ -1177,8 +1182,10 @@ string CPPCompile::GenField(const ExprPtr& rec, int field)
// New mapping. // New mapping.
mapping_slot = num_rf_mappings++; mapping_slot = num_rf_mappings++;
ASSERT(processed_types.count(rt) > 0);
auto rt_offset = processed_types[rt]->Offset();
string field_name = rt->FieldName(field); string field_name = rt->FieldName(field);
field_decls.emplace_back(pair(rt, rt->FieldDecl(field))); field_decls.emplace_back(pair(rt_offset, rt->FieldDecl(field)));
if ( record_field_mappings.count(rt) > 0 ) if ( record_field_mappings.count(rt) > 0 )
// We're already tracking this record. // We're already tracking this record.
@ -1217,7 +1224,7 @@ string CPPCompile::GenEnum(const TypePtr& t, const ValPtr& ev)
mapping_slot = num_ev_mappings++; mapping_slot = num_ev_mappings++;
string enum_name = et->Lookup(v); string enum_name = et->Lookup(v);
enum_names.emplace_back(pair(et, move(enum_name))); enum_names.emplace_back(pair(TypeOffset(t), move(enum_name)));
if ( enum_val_mappings.count(et) > 0 ) if ( enum_val_mappings.count(et) > 0 )
{ {

View file

@ -34,10 +34,8 @@ void CPPCompile::CompileLambda(const LambdaExpr* l, const ProfileFunc* pf)
DefineBody(l_id->GetType<FuncType>(), pf, lname, body, &ids, FUNC_FLAVOR_FUNCTION); DefineBody(l_id->GetType<FuncType>(), pf, lname, body, &ids, FUNC_FLAVOR_FUNCTION);
} }
void CPPCompile::GenInvokeBody(const string& fname, const TypePtr& t, const string& args) void CPPCompile::GenInvokeBody(const string& call, const TypePtr& t)
{ {
auto call = fname + "(" + args + ")";
if ( ! t || t->Tag() == TYPE_VOID ) if ( ! t || t->Tag() == TYPE_VOID )
{ {
Emit("%s;", call); Emit("%s;", call);
@ -144,7 +142,7 @@ void CPPCompile::InitializeEvents(const ProfileFunc* pf)
// returns an EventHandlerPtr, sigh. // returns an EventHandlerPtr, sigh.
Emit("if ( event_registry->Lookup(\"%s\") )", e); Emit("if ( event_registry->Lookup(\"%s\") )", e);
StartBlock(); StartBlock();
Emit("%s = event_registry->Register(\"%s\");", ev_name.c_str(), e); Emit("%s = event_registry->Register(\"%s\");", ev_name, e);
EndBlock(); EndBlock();
Emit("did_init = true;"); Emit("did_init = true;");
EndBlock(); EndBlock();
@ -233,6 +231,16 @@ string CPPCompile::BodyName(const FuncInfo& func)
return fname + "__" + Fmt(static_cast<int>(i)); return fname + "__" + Fmt(static_cast<int>(i));
} }
p_hash_type CPPCompile::BodyHash(const Stmt* body)
{
ASSERT(body_names.count(body) > 0);
auto& body_name = body_names[body];
ASSERT(body_hashes.count(body_name) > 0);
return body_hashes[body_name];
}
string CPPCompile::GenArgs(const RecordTypePtr& params, const Expr* e) string CPPCompile::GenArgs(const RecordTypePtr& params, const Expr* e)
{ {
const auto& exprs = e->AsListExpr()->Exprs(); const auto& exprs = e->AsListExpr()->Exprs();

View file

@ -12,28 +12,11 @@ using namespace std;
VarMapper compiled_items; VarMapper compiled_items;
CPPHashManager::CPPHashManager(const char* hash_name_base, bool _append) CPPHashManager::CPPHashManager(const char* hash_name_base)
{ {
append = _append;
hash_name = string(hash_name_base) + ".dat"; hash_name = string(hash_name_base) + ".dat";
if ( append ) hf_w = fopen(hash_name.c_str(), "w");
{
hf_r = fopen(hash_name.c_str(), "r");
if ( ! hf_r )
{
reporter->Error("can't open auxiliary C++ hash file %s for reading", hash_name.c_str());
exit(1);
}
lock_file(hash_name, hf_r);
LoadHashes(hf_r);
}
auto mode = append ? "a" : "w";
hf_w = fopen(hash_name.c_str(), mode);
if ( ! hf_w ) if ( ! hf_w )
{ {
reporter->Error("can't open auxiliary C++ hash file %s for writing", hash_name.c_str()); reporter->Error("can't open auxiliary C++ hash file %s for writing", hash_name.c_str());

View file

@ -27,11 +27,9 @@ public:
// end of the file (and the hash file will be locked, to prevent // end of the file (and the hash file will be locked, to prevent
// overlapping updates from concurrent compilation/appends). // overlapping updates from concurrent compilation/appends).
// Otherwise, the file will be generated afresh. // Otherwise, the file will be generated afresh.
CPPHashManager(const char* hash_name_base, bool append); CPPHashManager(const char* hash_name_base);
~CPPHashManager(); ~CPPHashManager();
bool IsAppend() const { return append; }
// True if the given hash has already been generated. // True if the given hash has already been generated.
bool HasHash(p_hash_type h) const { return previously_compiled.count(h) > 0; } bool HasHash(p_hash_type h) const { return previously_compiled.count(h) > 0; }
@ -96,10 +94,6 @@ protected:
// names, rather than their script-level names. // names, rather than their script-level names.
std::unordered_map<std::string, int> gv_scopes; std::unordered_map<std::string, int> gv_scopes;
// Whether we're appending to existing hash file(s), or starting
// afresh.
bool append;
// Base for file names. // Base for file names.
std::string hash_name; std::string hash_name;

View file

@ -14,12 +14,30 @@ namespace zeek::detail
using namespace std; using namespace std;
void CPPCompile::GenInitExpr(const ExprPtr& e) std::shared_ptr<CPP_InitInfo> CPPCompile::RegisterInitExpr(const ExprPtr& ep)
{
auto ename = InitExprName(ep);
if ( init_infos.count(ename) )
return init_infos[ename];
auto wrapper_cl = string("wrapper_") + ename + "_cl";
auto gi = make_shared<CallExprInitInfo>(this, ep, ename, wrapper_cl);
call_exprs_info->AddInstance(gi);
init_infos[ename] = gi;
return gi;
}
void CPPCompile::GenInitExpr(std::shared_ptr<CallExprInitInfo> ce_init)
{ {
NL(); NL();
const auto& e = ce_init->GetExpr();
const auto& t = e->GetType(); const auto& t = e->GetType();
auto ename = InitExprName(e); const auto& ename = ce_init->Name();
const auto& wc = ce_init->WrapperClass();
// First, create a CPPFunc that we can compile to compute 'e'. // First, create a CPPFunc that we can compile to compute 'e'.
auto name = string("wrapper_") + ename; auto name = string("wrapper_") + ename;
@ -29,18 +47,17 @@ void CPPCompile::GenInitExpr(const ExprPtr& e)
// Create the Func subclass that can be used in a CallExpr to // Create the Func subclass that can be used in a CallExpr to
// evaluate 'e'. // evaluate 'e'.
Emit("class %s_cl : public CPPFunc", name); Emit("class %s : public CPPFunc", wc);
StartBlock(); StartBlock();
Emit("public:"); Emit("public:");
Emit("%s_cl() : CPPFunc(\"%s\", %s)", name, name, e->IsPure() ? "true" : "false"); Emit("%s() : CPPFunc(\"%s\", %s)", wc, name, e->IsPure() ? "true" : "false");
StartBlock(); StartBlock();
Emit("type = make_intrusive<FuncType>(make_intrusive<RecordType>(new type_decl_list()), %s, " Emit("type = make_intrusive<FuncType>(make_intrusive<RecordType>(new type_decl_list()), %s, "
"FUNC_FLAVOR_FUNCTION);", "FUNC_FLAVOR_FUNCTION);",
GenTypeName(t)); GenTypeName(t));
NoteInitDependency(e, TypeRep(t));
EndBlock(); EndBlock();
Emit("ValPtr Invoke(zeek::Args* args, Frame* parent) const override final"); Emit("ValPtr Invoke(zeek::Args* args, Frame* parent) const override final");
@ -62,15 +79,9 @@ void CPPCompile::GenInitExpr(const ExprPtr& e)
EndBlock(); EndBlock();
Emit("CallExprPtr %s;", ename); Emit("CallExprPtr %s;", ename);
NoteInitDependency(e, TypeRep(t));
AddInit(e, ename,
string("make_intrusive<CallExpr>(make_intrusive<ConstExpr>(make_intrusive<FuncVal>("
"make_intrusive<") +
name + "_cl>())), make_intrusive<ListExpr>(), false)");
} }
bool CPPCompile::IsSimpleInitExpr(const ExprPtr& e) const bool CPPCompile::IsSimpleInitExpr(const ExprPtr& e)
{ {
switch ( e->Tag() ) switch ( e->Tag() )
{ {
@ -101,360 +112,83 @@ string CPPCompile::InitExprName(const ExprPtr& e)
return init_exprs.KeyName(e); return init_exprs.KeyName(e);
} }
void CPPCompile::GenGlobalInit(const ID* g, string& gl, const ValPtr& v)
{
const auto& t = v->GetType();
auto tag = t->Tag();
if ( tag == TYPE_FUNC )
// This should get initialized by recognizing hash of
// the function's body.
return;
string init_val;
if ( tag == TYPE_OPAQUE )
{
// We can only generate these by reproducing the expression
// (presumably a function call) used to create the value.
// That isn't fully sound, since if the global's value
// was redef'd in terms of its original value (e.g.,
// "redef x = f(x)"), then we'll wind up with a broken
// expression. It's difficult to detect that in full
// generality, so um Don't Do That. (Note that this
// only affects execution of standalone compiled code,
// where the original scripts are replaced by load-stubs.
// If the scripts are available, then the HasVal() test
// we generate will mean we don't wind up using this
// expression anyway.)
// Use the final initialization expression.
auto& init_exprs = g->GetOptInfo()->GetInitExprs();
init_val = GenExpr(init_exprs.back(), GEN_VAL_PTR, false);
}
else
init_val = BuildConstant(g, v);
auto& attrs = g->GetAttrs();
AddInit(g, string("if ( ! ") + gl + "->HasVal() )");
if ( attrs )
{
RegisterAttributes(attrs);
AddInit(g, "\t{");
AddInit(g, "\t" + gl + "->SetVal(" + init_val + ");");
AddInit(g, "\t" + gl + "->SetAttrs(" + AttrsName(attrs) + ");");
AddInit(g, "\t}");
}
else
AddInit(g, "\t" + gl + "->SetVal(" + init_val + ");");
}
void CPPCompile::GenFuncVarInits()
{
for ( const auto& fv_init : func_vars )
{
auto& fv = fv_init.first;
auto& const_name = fv_init.second;
auto f = fv->AsFunc();
const auto& fn = f->Name();
const auto& ft = f->GetType();
NoteInitDependency(fv, TypeRep(ft));
const auto& bodies = f->GetBodies();
string hashes = "{";
for ( const auto& b : bodies )
{
auto body = b.stmts.get();
ASSERT(body_names.count(body) > 0);
auto& body_name = body_names[body];
ASSERT(body_hashes.count(body_name) > 0);
NoteInitDependency(fv, body);
if ( hashes.size() > 1 )
hashes += ", ";
hashes += Fmt(body_hashes[body_name]);
}
hashes += "}";
auto init = string("lookup_func__CPP(\"") + fn + "\", " + hashes + ", " + GenTypeName(ft) +
")";
AddInit(fv, const_name, init);
}
}
void CPPCompile::GenPreInit(const Type* t)
{
string pre_init;
switch ( t->Tag() )
{
case TYPE_ADDR:
case TYPE_ANY:
case TYPE_BOOL:
case TYPE_COUNT:
case TYPE_DOUBLE:
case TYPE_ERROR:
case TYPE_INT:
case TYPE_INTERVAL:
case TYPE_PATTERN:
case TYPE_PORT:
case TYPE_STRING:
case TYPE_TIME:
case TYPE_TIMER:
case TYPE_VOID:
pre_init = string("base_type(") + TypeTagName(t->Tag()) + ")";
break;
case TYPE_ENUM:
pre_init = string("get_enum_type__CPP(\"") + t->GetName() + "\")";
break;
case TYPE_SUBNET:
pre_init = string("make_intrusive<SubNetType>()");
break;
case TYPE_FILE:
pre_init = string("make_intrusive<FileType>(") + GenTypeName(t->AsFileType()->Yield()) +
")";
break;
case TYPE_OPAQUE:
pre_init = string("make_intrusive<OpaqueType>(\"") + t->AsOpaqueType()->Name() + "\")";
break;
case TYPE_RECORD:
{
string name;
if ( t->GetName() != "" )
name = string("\"") + t->GetName() + string("\"");
else
name = "nullptr";
pre_init = string("get_record_type__CPP(") + name + ")";
}
break;
case TYPE_LIST:
pre_init = string("make_intrusive<TypeList>()");
break;
case TYPE_TYPE:
case TYPE_VECTOR:
case TYPE_TABLE:
case TYPE_FUNC:
// Nothing to do for these, pre-initialization-wise.
return;
default:
reporter->InternalError("bad type in CPPCompile::GenType");
}
pre_inits.emplace_back(GenTypeName(t) + " = " + pre_init + ";");
}
void CPPCompile::GenPreInits()
{
NL();
Emit("void pre_init__CPP()");
StartBlock();
for ( const auto& i : pre_inits )
Emit(i);
EndBlock();
}
void CPPCompile::AddInit(const Obj* o, const string& init)
{
obj_inits[o].emplace_back(init);
}
void CPPCompile::AddInit(const Obj* o)
{
if ( obj_inits.count(o) == 0 )
obj_inits[o] = {};
}
void CPPCompile::NoteInitDependency(const Obj* o1, const Obj* o2)
{
obj_deps[o1].emplace(o2);
}
void CPPCompile::CheckInitConsistency(unordered_set<const Obj*>& to_do)
{
for ( const auto& od : obj_deps )
{
const auto& o = od.first;
if ( to_do.count(o) == 0 )
{
fprintf(stderr, "object not in to_do: %s\n", obj_desc(o).c_str());
exit(1);
}
for ( const auto& d : od.second )
{
if ( to_do.count(d) == 0 )
{
fprintf(stderr, "dep object for %s not in to_do: %s\n", obj_desc(o).c_str(),
obj_desc(d).c_str());
exit(1);
}
}
}
}
int CPPCompile::GenDependentInits(unordered_set<const Obj*>& to_do)
{
int n = 0;
// The basic approach is fairly brute force: find elements of
// to_do that don't have any pending dependencies; generate those;
// and remove them from the to_do list, freeing up other to_do entries
// to now not having any pending dependencies. Iterate until there
// are no more to-do items.
while ( to_do.size() > 0 )
{
unordered_set<const Obj*> cohort;
for ( const auto& o : to_do )
{
const auto& od = obj_deps.find(o);
bool has_pending_dep = false;
if ( od != obj_deps.end() )
{
for ( const auto& d : od->second )
if ( to_do.count(d) > 0 )
{
has_pending_dep = true;
break;
}
}
if ( has_pending_dep )
continue;
cohort.insert(o);
}
ASSERT(cohort.size() > 0);
GenInitCohort(++n, cohort);
for ( const auto& o : cohort )
{
ASSERT(to_do.count(o) > 0);
to_do.erase(o);
}
}
return n;
}
void CPPCompile::GenInitCohort(int nc, unordered_set<const Obj*>& cohort)
{
NL();
Emit("void init_%s__CPP()", Fmt(nc));
StartBlock();
// If any script/BiF functions are used for initializing globals,
// the code generated from that will expect the presence of a
// frame pointer, even if nil.
Emit("Frame* f__CPP = nullptr;");
// The following is just for making the output readable/pretty:
// add space between initializations for distinct objects, taking
// into account that some objects have empty initializations.
bool did_an_init = false;
for ( auto o : cohort )
{
if ( did_an_init )
{
NL();
did_an_init = false;
}
for ( const auto& i : obj_inits.find(o)->second )
{
Emit("%s", i);
did_an_init = true;
}
}
EndBlock();
}
void CPPCompile::InitializeFieldMappings() void CPPCompile::InitializeFieldMappings()
{ {
Emit("int fm_offset;"); Emit("std::vector<CPP_FieldMapping> CPP__field_mappings__ = ");
StartBlock();
for ( const auto& mapping : field_decls ) for ( const auto& mapping : field_decls )
{ {
auto rt = mapping.first; auto rt_arg = Fmt(mapping.first);
auto td = mapping.second; auto td = mapping.second;
auto fn = td->id; auto type_arg = Fmt(TypeOffset(td->type));
auto rt_name = GenTypeName(rt) + "->AsRecordType()"; auto attrs_arg = Fmt(AttributesOffset(td->attrs));
Emit("fm_offset = %s->FieldOffset(\"%s\");", rt_name, fn); Emit("CPP_FieldMapping(%s, \"%s\", %s, %s),", rt_arg, td->id, type_arg, attrs_arg);
Emit("if ( fm_offset < 0 )");
StartBlock();
Emit("// field does not exist, create it");
Emit("fm_offset = %s->NumFields();", rt_name);
Emit("type_decl_list tl;");
Emit(GenTypeDecl(td));
Emit("%s->AddFieldsDirectly(tl);", rt_name);
EndBlock();
Emit("field_mapping.push_back(fm_offset);");
} }
EndBlock(true);
} }
void CPPCompile::InitializeEnumMappings() void CPPCompile::InitializeEnumMappings()
{ {
int n = 0; Emit("std::vector<CPP_EnumMapping> CPP__enum_mappings__ = ");
StartBlock();
for ( const auto& mapping : enum_names ) for ( const auto& mapping : enum_names )
InitializeEnumMappings(mapping.first, mapping.second, n++); Emit("CPP_EnumMapping(%s, \"%s\"),", Fmt(mapping.first), mapping.second);
EndBlock(true);
} }
void CPPCompile::InitializeEnumMappings(const EnumType* et, const string& e_name, int index) void CPPCompile::InitializeBiFs()
{ {
AddInit(et, "{"); Emit("std::vector<CPP_LookupBiF> CPP__BiF_lookups__ = ");
auto et_name = GenTypeName(et) + "->AsEnumType()"; StartBlock();
AddInit(et, "int em_offset = " + et_name + "->Lookup(\"" + e_name + "\");");
AddInit(et, "if ( em_offset < 0 )");
AddInit(et, "\t{"); for ( const auto& b : BiFs )
AddInit(et, "\tem_offset = " + et_name + "->Names().size();"); Emit("CPP_LookupBiF(%s, \"%s\"),", b.first, b.second);
// The following is to catch the case where the offset is already
// in use due to it being specified explicitly for an existing enum.
AddInit(et, "\tif ( " + et_name + "->Lookup(em_offset) )");
AddInit(
et,
"\t\treporter->InternalError(\"enum inconsistency while initializing compiled scripts\");");
AddInit(et, "\t" + et_name + "->AddNameInternal(\"" + e_name + "\", em_offset);");
AddInit(et, "\t}");
AddInit(et, "enum_mapping[" + Fmt(index) + "] = em_offset;"); EndBlock(true);
}
AddInit(et, "}"); void CPPCompile::InitializeStrings()
{
Emit("std::vector<const char*> CPP__Strings =");
StartBlock();
for ( const auto& s : ordered_tracked_strings )
Emit("\"%s\",", s);
EndBlock(true);
}
void CPPCompile::InitializeHashes()
{
Emit("std::vector<p_hash_type> CPP__Hashes =");
StartBlock();
for ( const auto& h : ordered_tracked_hashes )
Emit(Fmt(h) + ",");
EndBlock(true);
}
void CPPCompile::InitializeConsts()
{
Emit("std::vector<CPP_ValElem> CPP__ConstVals =");
StartBlock();
for ( const auto& c : consts )
Emit("CPP_ValElem(%s, %s),", TypeTagName(c.first), Fmt(c.second));
EndBlock(true);
} }
void CPPCompile::GenInitHook() void CPPCompile::GenInitHook()
@ -482,11 +216,13 @@ void CPPCompile::GenStandaloneActivation()
{ {
NL(); NL();
#if 0
Emit("void standalone_activation__CPP()"); Emit("void standalone_activation__CPP()");
StartBlock(); StartBlock();
for ( auto& a : activations ) for ( auto& a : activations )
Emit(a); Emit(a);
EndBlock(); EndBlock();
#endif
NL(); NL();
Emit("void standalone_init__CPP()"); Emit("void standalone_init__CPP()");

View file

@ -0,0 +1,577 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include "zeek/Desc.h"
#include "zeek/RE.h"
#include "zeek/ZeekString.h"
#include "zeek/script_opt/CPP/Attrs.h"
#include "zeek/script_opt/CPP/Compile.h"
using namespace std;
namespace zeek::detail
{
string CPP_InitsInfo::Name(int index) const
{
return base_name + "[" + Fmt(index) + "]";
}
void CPP_InitsInfo::AddInstance(shared_ptr<CPP_InitInfo> g)
{
auto init_cohort = g->InitCohort();
if ( static_cast<int>(instances.size()) <= init_cohort )
instances.resize(init_cohort + 1);
g->SetOffset(this, size++);
instances[init_cohort].push_back(move(g));
}
string CPP_InitsInfo::Declare() const
{
return string("std::vector<") + CPPType() + "> " + base_name + ";";
}
void CPP_InitsInfo::GenerateInitializers(CPPCompile* c)
{
BuildOffsetSet(c);
c->NL();
auto gt = InitsType();
// Declare the initializer.
c->Emit("%s %s = %s(%s, %s,", gt, InitializersName(), gt, base_name, Fmt(offset_set));
c->IndentUp();
c->Emit("{");
// Add each cohort as a vector element.
for ( auto& cohort : instances )
{
c->Emit("{");
BuildCohort(c, cohort);
c->Emit("},");
}
c->Emit("}");
c->IndentDown();
c->Emit(");");
}
void CPP_InitsInfo::BuildOffsetSet(CPPCompile* c)
{
vector<int> offsets_vec;
for ( auto& cohort : instances )
{
// Reduce the offsets used by this cohort to an
// offset into the managed vector-of-indices global.
vector<int> offsets;
offsets.reserve(cohort.size());
for ( auto& co : cohort )
offsets.push_back(co->Offset());
offsets_vec.push_back(c->IndMgr().AddIndices(offsets));
}
// Now that we have all the offsets in a vector, reduce them, too,
// to an offset into the managed vector-of-indices global,
offset_set = c->IndMgr().AddIndices(offsets_vec);
}
void CPP_InitsInfo::BuildCohort(CPPCompile* c, std::vector<std::shared_ptr<CPP_InitInfo>>& cohort)
{
for ( auto& co : cohort )
{
vector<string> ivs;
co->InitializerVals(ivs);
BuildCohortElement(c, co->InitializerType(), ivs);
}
}
void CPP_InitsInfo::BuildCohortElement(CPPCompile* c, string init_type, vector<string>& ivs)
{
string full_init;
bool did_one = false;
for ( auto& iv : ivs )
{
if ( did_one )
full_init += ", ";
else
did_one = true;
full_init += iv;
}
c->Emit("std::make_shared<%s>(%s),", init_type, full_init);
}
void CPP_CompoundInitsInfo::BuildCohortElement(CPPCompile* c, string init_type, vector<string>& ivs)
{
string init_line;
for ( auto& iv : ivs )
init_line += iv + ", ";
c->Emit("{ %s},", init_line);
}
void CPP_BasicConstInitsInfo::BuildCohortElement(CPPCompile* c, string init_type,
vector<string>& ivs)
{
ASSERT(ivs.size() == 1);
c->Emit(ivs[0] + ",");
}
string CPP_InitInfo::ValElem(CPPCompile* c, ValPtr v)
{
string init_type;
string init_args;
if ( v )
{
int consts_offset;
auto gi = c->RegisterConstant(v, consts_offset);
init_cohort = max(init_cohort, gi->InitCohort() + 1);
return Fmt(consts_offset);
}
else
return Fmt(-1);
}
DescConstInfo::DescConstInfo(CPPCompile* c, ValPtr v) : CPP_InitInfo()
{
ODesc d;
v->Describe(&d);
auto s = c->TrackString(d.Description());
init = Fmt(s);
}
EnumConstInfo::EnumConstInfo(CPPCompile* c, ValPtr v)
{
auto ev = v->AsEnumVal();
auto& ev_t = ev->GetType();
e_type = c->TypeOffset(ev_t);
init_cohort = c->TypeCohort(ev_t) + 1;
e_val = v->AsEnum();
}
StringConstInfo::StringConstInfo(CPPCompile* c, ValPtr v) : CPP_InitInfo()
{
auto s = v->AsString();
const char* b = (const char*)(s->Bytes());
len = s->Len();
chars = c->TrackString(CPPEscape(b, len));
}
PatternConstInfo::PatternConstInfo(CPPCompile* c, ValPtr v) : CPP_InitInfo()
{
auto re = v->AsPatternVal()->Get();
pattern = c->TrackString(CPPEscape(re->OrigText()));
is_case_insensitive = re->IsCaseInsensitive();
}
CompoundItemInfo::CompoundItemInfo(CPPCompile* _c, ValPtr v) : CPP_InitInfo(), c(_c)
{
auto& t = v->GetType();
type = c->TypeOffset(t);
init_cohort = c->TypeCohort(t) + 1;
}
ListConstInfo::ListConstInfo(CPPCompile* _c, ValPtr v) : CompoundItemInfo(_c)
{
auto lv = cast_intrusive<ListVal>(v);
auto n = lv->Length();
for ( auto i = 0U; i < n; ++i )
vals.emplace_back(ValElem(c, lv->Idx(i)));
}
VectorConstInfo::VectorConstInfo(CPPCompile* c, ValPtr v) : CompoundItemInfo(c, v)
{
auto vv = cast_intrusive<VectorVal>(v);
auto n = vv->Size();
for ( auto i = 0; i < n; ++i )
vals.emplace_back(ValElem(c, vv->ValAt(i)));
}
RecordConstInfo::RecordConstInfo(CPPCompile* c, ValPtr v) : CompoundItemInfo(c, v)
{
auto r = cast_intrusive<RecordVal>(v);
auto n = r->NumFields();
type = c->TypeOffset(r->GetType());
for ( auto i = 0; i < n; ++i )
vals.emplace_back(ValElem(c, r->GetField(i)));
}
TableConstInfo::TableConstInfo(CPPCompile* c, ValPtr v) : CompoundItemInfo(c, v)
{
auto tv = cast_intrusive<TableVal>(v);
for ( auto& tv_i : tv->ToMap() )
{
vals.emplace_back(ValElem(c, tv_i.first)); // index
vals.emplace_back(ValElem(c, tv_i.second)); // value
}
}
FileConstInfo::FileConstInfo(CPPCompile* c, ValPtr v) : CompoundItemInfo(c, v)
{
auto fv = cast_intrusive<FileVal>(v);
auto fname = c->TrackString(fv->Get()->Name());
vals.emplace_back(Fmt(fname));
}
FuncConstInfo::FuncConstInfo(CPPCompile* _c, ValPtr v) : CompoundItemInfo(_c, v), fv(v->AsFuncVal())
{
// This is slightly hacky. There's a chance that this constant
// depends on a lambda being registered. Here we use the knowledge
// that LambdaRegistrationInfo sets its cohort to 1 more than
// the function type, so we can ensure any possible lambda has
// been registered by setting ours to 2 more. CompoundItemInfo
// has already set our cohort to 1 more.
++init_cohort;
}
void FuncConstInfo::InitializerVals(std::vector<std::string>& ivs) const
{
auto f = fv->AsFunc();
const auto& fn = f->Name();
ivs.emplace_back(Fmt(type));
ivs.emplace_back(Fmt(c->TrackString(fn)));
string hashes;
if ( ! c->NotFullyCompilable(fn) )
{
const auto& bodies = f->GetBodies();
for ( const auto& b : bodies )
{
auto h = c->BodyHash(b.stmts.get());
auto h_o = c->TrackHash(h);
ivs.emplace_back(Fmt(h_o));
}
}
}
AttrInfo::AttrInfo(CPPCompile* _c, const AttrPtr& attr) : CompoundItemInfo(_c)
{
vals.emplace_back(Fmt(static_cast<int>(attr->Tag())));
auto a_e = attr->GetExpr();
if ( a_e )
{
auto gi = c->RegisterType(a_e->GetType());
init_cohort = max(init_cohort, gi->InitCohort() + 1);
if ( ! CPPCompile::IsSimpleInitExpr(a_e) )
{
gi = c->RegisterInitExpr(a_e);
init_cohort = max(init_cohort, gi->InitCohort() + 1);
vals.emplace_back(Fmt(static_cast<int>(AE_CALL)));
vals.emplace_back(Fmt(gi->Offset()));
}
else if ( a_e->Tag() == EXPR_CONST )
{
auto v = a_e->AsConstExpr()->ValuePtr();
vals.emplace_back(Fmt(static_cast<int>(AE_CONST)));
vals.emplace_back(ValElem(c, v));
}
else if ( a_e->Tag() == EXPR_NAME )
{
auto g = a_e->AsNameExpr()->Id();
auto gi = c->RegisterGlobal(g);
init_cohort = max(init_cohort, gi->InitCohort() + 1);
vals.emplace_back(Fmt(static_cast<int>(AE_NAME)));
vals.emplace_back(Fmt(c->TrackString(g->Name())));
}
else
{
ASSERT(a_e->Tag() == EXPR_RECORD_COERCE);
vals.emplace_back(Fmt(static_cast<int>(AE_RECORD)));
vals.emplace_back(Fmt(gi->Offset()));
}
}
else
vals.emplace_back(Fmt(static_cast<int>(AE_NONE)));
}
AttrsInfo::AttrsInfo(CPPCompile* _c, const AttributesPtr& _attrs) : CompoundItemInfo(_c)
{
for ( const auto& a : _attrs->GetAttrs() )
{
ASSERT(c->ProcessedAttr().count(a.get()) > 0);
auto gi = c->ProcessedAttr()[a.get()];
init_cohort = max(init_cohort, gi->InitCohort() + 1);
vals.emplace_back(Fmt(gi->Offset()));
}
}
GlobalInitInfo::GlobalInitInfo(CPPCompile* c, const ID* g, string _CPP_name)
: CPP_InitInfo(), CPP_name(move(_CPP_name))
{
Zeek_name = g->Name();
auto gi = c->RegisterType(g->GetType());
init_cohort = max(init_cohort, gi->InitCohort() + 1);
type = gi->Offset();
gi = c->RegisterAttributes(g->GetAttrs());
if ( gi )
{
init_cohort = max(init_cohort, gi->InitCohort() + 1);
attrs = gi->Offset();
}
else
attrs = -1;
exported = g->IsExport();
val = ValElem(c, g->GetVal());
}
void GlobalInitInfo::InitializerVals(std::vector<std::string>& ivs) const
{
ivs.push_back(CPP_name);
ivs.push_back(string("\"") + Zeek_name + "\"");
ivs.push_back(Fmt(type));
ivs.push_back(Fmt(attrs));
ivs.push_back(val);
ivs.push_back(Fmt(exported));
}
CallExprInitInfo::CallExprInitInfo(CPPCompile* c, ExprPtr _e, string _e_name, string _wrapper_class)
: e(move(_e)), e_name(move(_e_name)), wrapper_class(move(_wrapper_class))
{
auto gi = c->RegisterType(e->GetType());
init_cohort = max(init_cohort, gi->InitCohort() + 1);
}
LambdaRegistrationInfo::LambdaRegistrationInfo(CPPCompile* c, string _name, FuncTypePtr ft,
string _wrapper_class, p_hash_type _h,
bool _has_captures)
: name(move(_name)), wrapper_class(move(_wrapper_class)), h(_h), has_captures(_has_captures)
{
auto gi = c->RegisterType(ft);
init_cohort = max(init_cohort, gi->InitCohort() + 1);
func_type = gi->Offset();
}
void LambdaRegistrationInfo::InitializerVals(std::vector<std::string>& ivs) const
{
ivs.emplace_back(string("\"") + name + "\"");
ivs.emplace_back(Fmt(func_type));
ivs.emplace_back(Fmt(h));
ivs.emplace_back(has_captures ? "true" : "false");
}
void EnumTypeInfo::AddInitializerVals(std::vector<std::string>& ivs) const
{
ivs.emplace_back(Fmt(c->TrackString(t->GetName())));
auto et = t->AsEnumType();
for ( const auto& name_pair : et->Names() )
{
ivs.emplace_back(Fmt(c->TrackString(name_pair.first)));
ivs.emplace_back(Fmt(int(name_pair.second)));
}
}
void OpaqueTypeInfo::AddInitializerVals(std::vector<std::string>& ivs) const
{
ivs.emplace_back(Fmt(c->TrackString(t->GetName())));
}
TypeTypeInfo::TypeTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c, move(_t))
{
tt = t->AsTypeType()->GetType();
auto gi = c->RegisterType(tt);
if ( gi )
init_cohort = gi->InitCohort();
}
void TypeTypeInfo::AddInitializerVals(std::vector<std::string>& ivs) const
{
ivs.emplace_back(to_string(c->TypeOffset(tt)));
}
VectorTypeInfo::VectorTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c, move(_t))
{
yield = t->Yield();
auto gi = c->RegisterType(yield);
if ( gi )
init_cohort = gi->InitCohort();
}
void VectorTypeInfo::AddInitializerVals(std::vector<std::string>& ivs) const
{
ivs.emplace_back(to_string(c->TypeOffset(yield)));
}
ListTypeInfo::ListTypeInfo(CPPCompile* _c, TypePtr _t)
: AbstractTypeInfo(_c, move(_t)), types(t->AsTypeList()->GetTypes())
{
for ( auto& tl_i : types )
{
auto gi = c->RegisterType(tl_i);
if ( gi )
init_cohort = max(init_cohort, gi->InitCohort());
}
}
void ListTypeInfo::AddInitializerVals(std::vector<std::string>& ivs) const
{
string type_list;
for ( auto& t : types )
ivs.emplace_back(Fmt(c->TypeOffset(t)));
}
TableTypeInfo::TableTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c, move(_t))
{
auto tbl = t->AsTableType();
auto gi = c->RegisterType(tbl->GetIndices());
ASSERT(gi);
indices = gi->Offset();
init_cohort = gi->InitCohort();
yield = tbl->Yield();
if ( yield )
{
gi = c->RegisterType(yield);
if ( gi )
init_cohort = max(init_cohort, gi->InitCohort());
}
}
void TableTypeInfo::AddInitializerVals(std::vector<std::string>& ivs) const
{
ivs.emplace_back(Fmt(indices));
ivs.emplace_back(Fmt(yield ? c->TypeOffset(yield) : -1));
}
FuncTypeInfo::FuncTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c, move(_t))
{
auto f = t->AsFuncType();
flavor = f->Flavor();
params = f->Params();
yield = f->Yield();
auto gi = c->RegisterType(f->Params());
if ( gi )
init_cohort = gi->InitCohort();
if ( yield )
{
gi = c->RegisterType(f->Yield());
if ( gi )
init_cohort = max(init_cohort, gi->InitCohort());
}
}
void FuncTypeInfo::AddInitializerVals(std::vector<std::string>& ivs) const
{
ivs.emplace_back(Fmt(c->TypeOffset(params)));
ivs.emplace_back(Fmt(yield ? c->TypeOffset(yield) : -1));
ivs.emplace_back(Fmt(static_cast<int>(flavor)));
}
RecordTypeInfo::RecordTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c, move(_t))
{
auto r = t->AsRecordType()->Types();
if ( ! r )
return;
for ( const auto& r_i : *r )
{
field_names.emplace_back(r_i->id);
auto gi = c->RegisterType(r_i->type);
if ( gi )
init_cohort = max(init_cohort, gi->InitCohort());
// else it's a recursive type, no need to adjust cohort here
field_types.push_back(r_i->type);
if ( r_i->attrs )
{
gi = c->RegisterAttributes(r_i->attrs);
init_cohort = max(init_cohort, gi->InitCohort() + 1);
field_attrs.push_back(gi->Offset());
}
else
field_attrs.push_back(-1);
}
}
void RecordTypeInfo::AddInitializerVals(std::vector<std::string>& ivs) const
{
ivs.emplace_back(Fmt(c->TrackString(t->GetName())));
auto n = field_names.size();
for ( auto i = 0U; i < n; ++i )
{
ivs.emplace_back(Fmt(c->TrackString(field_names[i])));
// Because RecordType's can be recursively defined,
// during construction we couldn't reliably access
// the field type's offsets. At this point, though,
// they should all be available.
ivs.emplace_back(Fmt(c->TypeOffset(field_types[i])));
ivs.emplace_back(Fmt(field_attrs[i]));
}
}
void IndicesManager::Generate(CPPCompile* c)
{
c->Emit("int CPP__Indices__init[] =");
c->StartBlock();
int nset = 0;
for ( auto& is : indices_set )
{
// Track the offsets into the raw vector, to make it
// easier to debug problems.
auto line = string("/* ") + to_string(nset++) + " */ ";
// We first record the size, then the values.
line += to_string(is.size()) + ", ";
auto n = 1;
for ( auto i : is )
{
line += to_string(i) + ", ";
if ( ++n % 10 == 0 )
{
c->Emit(line);
line.clear();
}
}
if ( line.size() > 0 )
c->Emit(line);
}
c->Emit("-1");
c->EndBlock(true);
}
} // zeek::detail

View file

@ -0,0 +1,693 @@
// See the file "COPYING" in the main distribution directory for copyright.
// Classes for tracking information for initializing C++ values used by the
// generated code.
// Initialization is probably the most complex part of the entire compiler,
// as there are a lot of considerations. There are two basic parts: (1) the
// generation of C++ code for doing run-time initialization, which is covered
// by the classes in this file, and (2) the execution of that code to do the
// actual initialization, which is covered by the classes in RuntimeInits.h.
//
// There are two fundamental types of initialization, those that create values
// (such as Zeek Type and Val objects) that will be used during the execution
// of compiled scripts, and those that perform actions such as registering
// the presence of a global or a lambda. In addition, for the former (values
// used at run-time), some are grouped together into vectors, with the compiled
// code using a hardwired index to get to a particular value; and some have
// standalone globals (for example, one for each BiF that a compiled script
// may call).
//
// For each of these types of initialization, our general approach is to a
// class that manages a single instance of that type, and an an object that
// manages all of those instances collectively. The latter object will, for
// example, attend to determining the offset into the run-time vector associated
// with a particular initialized value.
//
// An additional complexity is that often the initialization of a particular
// value will depend on *other* values having already been initialized. For
// example, a record type might have a field that is a table, and thus the
// type corresponding to the table needs to be available before we can create
// the record type. However, the table might have a set of attributes
// associated with it, which have to be initialized before we can create the
// table type, those in turn requiring the initialization of each of the
// individual attributes in the set. One of those attributes might specify
// a &default function for the table, requiring initializing *that* value
// (not just the type, but also a way to refer to the particular instance of
// the function) before initializing the attribute, etc. Worse, record types
// can be *indirectly recursive*, which requires first initializing a "stub"
// for the record type before doing the final initialization.
//
// The general strategy for dealing with all of these dependencies is to
// compute for each initialization its "cohort". An initialization that
// doesn't depend on any others is in cohort 0. An initialization X that
// depends on an initialization Y will have cohort(X) = cohort(Y) + 1; or,
// in general, one more than the highest cohort of any initialization it
// depends on. (We cut a corner in that, due to how initialization information
// is constructed, if X and Y are for the same type of object then we can
// safely use cohort(X) = cohort(Y).) We then execute run-time initialization
// in waves, one cohort at a time.
//
// Because C++ compilers can struggle when trying to optimize large quantities
// of code - clang in particular could take many CPU *hours* back when our
// compiler just generated C++ code snippets for each initialization - rather
// than producing code that directly executes each given initialization, we
// instead employ a table-driven approach. The C++ initializers for the
// tables contain simple values - often just vectors of integers - that compile
// quickly. At run-time we then spin through the elements of the tables (one
// cohort at a time) to obtain the information needed to initialize any given
// item.
//
// Many forms of initialization are specified in terms of indices into globals
// that hold items of various types. Thus, the most common initialization
// information is a vector of integers/indices. These data structures can
// be recursive, too, namely we sometimes associate an index with a vector
// of integers/indices and then we can track multiple such vectors using
// another vector of integers/indices.
#include "zeek/File.h"
#include "zeek/Val.h"
#include "zeek/script_opt/ProfileFunc.h"
#pragma once
namespace zeek::detail
{
class CPPCompile;
// Abstract class for tracking information about a single initialization item.
class CPP_InitInfo;
// Abstract class for tracking information about a collection of initialization
// items.
class CPP_InitsInfo
{
public:
CPP_InitsInfo(std::string _tag, std::string type) : tag(std::move(_tag))
{
base_name = std::string("CPP__") + tag + "__";
CPP_type = tag + type;
}
virtual ~CPP_InitsInfo() { }
// Returns the name of the C++ global that will hold the items' values
// at run-time, once initialized. These are all vectors, for which
// the generated code accesses a particular item by indexing the vector.
const std::string& InitsName() const { return base_name; }
// Returns the name of the C++ global used to hold the table we employ
// for table-driven initialization.
std::string InitializersName() const { return base_name + "init"; }
// Returns the "name" of the given element in the run-time vector
// associated with this collection of initialization items. It's not
// really a name but rather a vector index, so for example Name(12)
// might return "CPP__Pattern__[12]", but we use the term Name because
// the representation used to be individualized globals, such as
// "CPP__Pattern__12".
std::string Name(int index) const;
// Returns the name that will correspond to the next item added to
// this set.
std::string NextName() const { return Name(size); }
// The largest initialization cohort of any item in this collection.
int MaxCohort() const { return static_cast<int>(instances.size()) - 1; }
// Returns the number of initializations in this collection that below
// to the given cohort c.
int CohortSize(int c) const { return c > MaxCohort() ? 0 : instances[c].size(); }
// Returns the C++ type associated with this collection's run-time vector.
// This might be, for example, "PatternVal"
const std::string& CPPType() const { return CPP_type; }
// Sets the associated C++ type.
virtual void SetCPPType(std::string ct) { CPP_type = std::move(ct); }
// Returns the type associated with the table used for initialization
// (i.e., this is the type of the global returned by InitializersName()).
std::string InitsType() const { return inits_type; }
// Add a new initialization instance to the collection.
void AddInstance(std::shared_ptr<CPP_InitInfo> g);
// Emit code to populate the table used to initialize this collection.
void GenerateInitializers(CPPCompile* c);
protected:
// Computes offset_set - see below.
void BuildOffsetSet(CPPCompile* c);
// Returns a declaration suitable for the run-time vector that holds
// the initialized items in the collection.
std::string Declare() const;
// For a given cohort, generates the associated table elements for
// creating it.
void BuildCohort(CPPCompile* c, std::vector<std::shared_ptr<CPP_InitInfo>>& cohort);
// Given the initialization type and initializers for with a given
// cohort element, build the associated table element.
virtual void BuildCohortElement(CPPCompile* c, std::string init_type,
std::vector<std::string>& ivs);
// Total number of initializers.
int size = 0;
// Each cohort is represented by a vector whose elements correspond
// to the initialization information for a single item. This variable
// holds a vector of cohorts, indexed by the number of the cohort.
// (Note, some cohorts may be empty.)
std::vector<std::vector<std::shared_ptr<CPP_InitInfo>>> instances;
// Each cohort has associated with it a vector of offsets, specifying
// positions in the run-time vector of the items in the cohort.
//
// We reduce each such vector to an index into the collection of
// such vectors (as managed by an IndicesManager - see below).
//
// Once we've done that reduction, we can represent each cohort
// using a single index, and thus all of the cohorts using a vector
// of indices. We then reduce *that* vector to a single index,
// again using the IndicesManager. We store that single index
// in the "offset_set" variable.
int offset_set = 0;
// Tag used to distinguish a particular collection of constants.
std::string tag;
// C++ name for this collection of constants.
std::string base_name;
// C++ type associated with a single instance of these constants.
std::string CPP_type;
// C++ type associated with the collection of initializers.
std::string inits_type;
};
// A class for a collection of initialization items for which each item
// has a "custom" initializer (that is, a bespoke C++ object, rather than
// a simple C++ type or a vector of indices).
class CPP_CustomInitsInfo : public CPP_InitsInfo
{
public:
CPP_CustomInitsInfo(std::string _tag, std::string _type)
: CPP_InitsInfo(std::move(_tag), std::move(_type))
{
BuildInitType();
}
void SetCPPType(std::string ct) override
{
CPP_InitsInfo::SetCPPType(std::move(ct));
BuildInitType();
}
private:
void BuildInitType() { inits_type = std::string("CPP_CustomInits<") + CPPType() + ">"; }
};
// A class for a collection of initialization items corresponding to "basic"
// constants, i.e., those that can be represented either directly as C++
// constants, or as indices into a vector of C++ objects.
class CPP_BasicConstInitsInfo : public CPP_CustomInitsInfo
{
public:
// In the following, if "c_type" is non-empty then it specifes the
// C++ type used to directly represent the constant. If empty, it
// indicates that we instead use an index into a separate vector.
CPP_BasicConstInitsInfo(std::string _tag, std::string type, std::string c_type)
: CPP_CustomInitsInfo(std::move(_tag), std::move(type))
{
if ( c_type.empty() )
inits_type = std::string("CPP_") + tag + "Consts";
else
inits_type = std::string("CPP_BasicConsts<") + CPP_type + ", " + c_type + ", " + tag +
"Val>";
}
void BuildCohortElement(CPPCompile* c, std::string init_type,
std::vector<std::string>& ivs) override;
};
// A class for a collection of initialization items that are defined using
// other initialization items.
class CPP_CompoundInitsInfo : public CPP_InitsInfo
{
public:
CPP_CompoundInitsInfo(std::string _tag, std::string type)
: CPP_InitsInfo(std::move(_tag), std::move(type))
{
if ( tag == "Type" )
// These need a refined version of CPP_IndexedInits
// in order to build different types dynamically.
inits_type = "CPP_TypeInits";
else
inits_type = std::string("CPP_IndexedInits<") + CPPType() + ">";
}
void BuildCohortElement(CPPCompile* c, std::string init_type,
std::vector<std::string>& ivs) override;
};
// Abstract class for tracking information about a single initialization item.
class CPP_InitInfo
{
public:
// No constructor - basic initialization happens when the object is
// added via AddInstance() to a CPP_InitsInfo object, which in turn
// will lead to invocation of this object's SetOffset() method.
virtual ~CPP_InitInfo() { }
// Associates this item with an initialization collection and run-time
// vector offset.
void SetOffset(const CPP_InitsInfo* _inits_collection, int _offset)
{
inits_collection = _inits_collection;
offset = _offset;
}
// Returns the offset for this item into the associated run-time vector.
int Offset() const { return offset; }
// Returns the name that should be used for referring to this
// value in the generated code.
std::string Name() const { return inits_collection->Name(offset); }
// Returns this item's initialization cohort.
int InitCohort() const { return init_cohort; }
// Returns the type used for this initializer.
virtual std::string InitializerType() const { return "<shouldn't-be-used>"; }
// Returns values used for creating this value, one element per
// constructor parameter.
virtual void InitializerVals(std::vector<std::string>& ivs) const = 0;
protected:
// Returns an offset (into the run-time vector holding all Zeek
// constant values) corresponding to the given value. Registers
// the constant if needed.
std::string ValElem(CPPCompile* c, ValPtr v);
// By default, values have no dependencies on other values
// being first initialized. Those that do must increase this
// value in their constructors.
int init_cohort = 0;
// Tracks the collection to which this item belongs.
const CPP_InitsInfo* inits_collection = nullptr;
// Offset of this item in the collection, or -1 if no association.
int offset = -1;
};
// Information associated with initializing a basic (non-compound) constant.
class BasicConstInfo : public CPP_InitInfo
{
public:
BasicConstInfo(std::string _val) : val(std::move(_val)) { }
void InitializerVals(std::vector<std::string>& ivs) const override { ivs.emplace_back(val); }
private:
// All we need to track is the C++ representation of the constant.
std::string val;
};
// Information associated with initializing a constant whose Val constructor
// takes a string.
class DescConstInfo : public CPP_InitInfo
{
public:
DescConstInfo(CPPCompile* c, ValPtr v);
void InitializerVals(std::vector<std::string>& ivs) const override { ivs.emplace_back(init); }
private:
std::string init;
};
class EnumConstInfo : public CPP_InitInfo
{
public:
EnumConstInfo(CPPCompile* c, ValPtr v);
void InitializerVals(std::vector<std::string>& ivs) const override
{
ivs.emplace_back(std::to_string(e_type));
ivs.emplace_back(std::to_string(e_val));
}
private:
int e_type; // an index into the enum's Zeek type
int e_val; // integer value of the enum
};
class StringConstInfo : public CPP_InitInfo
{
public:
StringConstInfo(CPPCompile* c, ValPtr v);
void InitializerVals(std::vector<std::string>& ivs) const override
{
ivs.emplace_back(std::to_string(chars));
ivs.emplace_back(std::to_string(len));
}
private:
int chars; // index into vector of char*'s
int len; // length of the string
};
class PatternConstInfo : public CPP_InitInfo
{
public:
PatternConstInfo(CPPCompile* c, ValPtr v);
void InitializerVals(std::vector<std::string>& ivs) const override
{
ivs.emplace_back(std::to_string(pattern));
ivs.emplace_back(std::to_string(is_case_insensitive));
}
private:
int pattern; // index into string representation of pattern
int is_case_insensitive; // case-insensitivity flag, 0 or 1
};
class PortConstInfo : public CPP_InitInfo
{
public:
PortConstInfo(ValPtr v) : p(static_cast<UnsignedValImplementation*>(v->AsPortVal())->Get()) { }
void InitializerVals(std::vector<std::string>& ivs) const override
{
ivs.emplace_back(std::to_string(p) + "U");
}
private:
bro_uint_t p;
};
// Abstract class for compound items (those defined in terms of other items).
class CompoundItemInfo : public CPP_InitInfo
{
public:
// The first of these is used for items with custom Zeek types,
// the second when the type is generic/inapplicable.
CompoundItemInfo(CPPCompile* c, ValPtr v);
CompoundItemInfo(CPPCompile* _c) : c(_c) { type = -1; }
void InitializerVals(std::vector<std::string>& ivs) const override
{
if ( type >= 0 )
ivs.emplace_back(std::to_string(type));
for ( auto& v : vals )
ivs.push_back(v);
}
protected:
CPPCompile* c;
int type;
std::vector<std::string> vals; // initialization values
};
// This next set corresponds to compound Zeek constants of various types.
class ListConstInfo : public CompoundItemInfo
{
public:
ListConstInfo(CPPCompile* c, ValPtr v);
};
class VectorConstInfo : public CompoundItemInfo
{
public:
VectorConstInfo(CPPCompile* c, ValPtr v);
};
class RecordConstInfo : public CompoundItemInfo
{
public:
RecordConstInfo(CPPCompile* c, ValPtr v);
};
class TableConstInfo : public CompoundItemInfo
{
public:
TableConstInfo(CPPCompile* c, ValPtr v);
};
class FileConstInfo : public CompoundItemInfo
{
public:
FileConstInfo(CPPCompile* c, ValPtr v);
};
class FuncConstInfo : public CompoundItemInfo
{
public:
FuncConstInfo(CPPCompile* _c, ValPtr v);
void InitializerVals(std::vector<std::string>& ivs) const override;
private:
FuncVal* fv;
};
// Initialization information for single attributes and sets of attributes.
class AttrInfo : public CompoundItemInfo
{
public:
AttrInfo(CPPCompile* c, const AttrPtr& attr);
};
class AttrsInfo : public CompoundItemInfo
{
public:
AttrsInfo(CPPCompile* c, const AttributesPtr& attrs);
};
// Information for initialization a Zeek global.
class GlobalInitInfo : public CPP_InitInfo
{
public:
GlobalInitInfo(CPPCompile* c, const ID* g, std::string CPP_name);
std::string InitializerType() const override { return "CPP_GlobalInit"; }
void InitializerVals(std::vector<std::string>& ivs) const override;
protected:
std::string Zeek_name;
std::string CPP_name;
int type;
int attrs;
std::string val;
bool exported;
};
// Information for initializing an item corresponding to a Zeek function
// call, needed to associate complex expressions with attributes.
class CallExprInitInfo : public CPP_InitInfo
{
public:
CallExprInitInfo(CPPCompile* c, ExprPtr e, std::string e_name, std::string wrapper_class);
std::string InitializerType() const override
{
return std::string("CPP_CallExprInit<") + wrapper_class + ">";
}
void InitializerVals(std::vector<std::string>& ivs) const override { ivs.emplace_back(e_name); }
// Accessors, since code to initialize these is generated separately
// from that of most initialization collections.
const ExprPtr& GetExpr() const { return e; }
const std::string& Name() const { return e_name; }
const std::string& WrapperClass() const { return wrapper_class; }
protected:
ExprPtr e;
std::string e_name;
std::string wrapper_class;
};
// Information for registering the class/function assocaited with a lambda.
class LambdaRegistrationInfo : public CPP_InitInfo
{
public:
LambdaRegistrationInfo(CPPCompile* c, std::string name, FuncTypePtr ft,
std::string wrapper_class, p_hash_type h, bool has_captures);
std::string InitializerType() const override
{
return std::string("CPP_LambdaRegistration<") + wrapper_class + ">";
}
void InitializerVals(std::vector<std::string>& ivs) const override;
protected:
std::string name;
int func_type;
std::string wrapper_class;
p_hash_type h;
bool has_captures;
};
// Abstract class for representing information for initializing a Zeek type.
class AbstractTypeInfo : public CPP_InitInfo
{
public:
AbstractTypeInfo(CPPCompile* _c, TypePtr _t) : c(_c), t(std::move(_t)) { }
void InitializerVals(std::vector<std::string>& ivs) const override
{
ivs.emplace_back(std::to_string(static_cast<int>(t->Tag())));
AddInitializerVals(ivs);
}
virtual void AddInitializerVals(std::vector<std::string>& ivs) const { }
protected:
CPPCompile* c;
TypePtr t; // the type we're initializing
};
// The following capture information for different Zeek types.
class BaseTypeInfo : public AbstractTypeInfo
{
public:
BaseTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c, std::move(_t)) { }
};
class EnumTypeInfo : public AbstractTypeInfo
{
public:
EnumTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c, std::move(_t)) { }
void AddInitializerVals(std::vector<std::string>& ivs) const override;
};
class OpaqueTypeInfo : public AbstractTypeInfo
{
public:
OpaqueTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c, std::move(_t)) { }
void AddInitializerVals(std::vector<std::string>& ivs) const override;
};
class TypeTypeInfo : public AbstractTypeInfo
{
public:
TypeTypeInfo(CPPCompile* c, TypePtr _t);
void AddInitializerVals(std::vector<std::string>& ivs) const override;
private:
TypePtr tt; // the type referred to by t
};
class VectorTypeInfo : public AbstractTypeInfo
{
public:
VectorTypeInfo(CPPCompile* c, TypePtr _t);
void AddInitializerVals(std::vector<std::string>& ivs) const override;
private:
TypePtr yield;
};
class ListTypeInfo : public AbstractTypeInfo
{
public:
ListTypeInfo(CPPCompile* c, TypePtr _t);
void AddInitializerVals(std::vector<std::string>& ivs) const override;
private:
const std::vector<TypePtr>& types;
};
class TableTypeInfo : public AbstractTypeInfo
{
public:
TableTypeInfo(CPPCompile* c, TypePtr _t);
void AddInitializerVals(std::vector<std::string>& ivs) const override;
private:
int indices;
TypePtr yield;
};
class FuncTypeInfo : public AbstractTypeInfo
{
public:
FuncTypeInfo(CPPCompile* c, TypePtr _t);
void AddInitializerVals(std::vector<std::string>& ivs) const override;
private:
FunctionFlavor flavor;
TypePtr params;
TypePtr yield;
};
class RecordTypeInfo : public AbstractTypeInfo
{
public:
RecordTypeInfo(CPPCompile* c, TypePtr _t);
void AddInitializerVals(std::vector<std::string>& ivs) const override;
private:
std::vector<std::string> field_names;
std::vector<TypePtr> field_types;
std::vector<int> field_attrs;
};
// Much of the table-driven initialization is based on vectors of indices,
// which we represent as vectors of int's, where each int is used to index a
// global C++ vector. This class manages such vectors. In particular, it
// reduces a given vector-of-indices to a single value, itself an index, that
// can be used at run-time to retrieve a reference to the original vector.
//
// Note that the notion recurses: if we have several vector-of-indices, we can
// reduce each to an index, and then take the resulting vector-of-meta-indices
// and reduce it further to an index. Doing so allows us to concisely refer
// to a potentially large, deep set of indices using a single value - such as
// for CPP_InitsInfo's "offset_set" member variable.
class IndicesManager
{
public:
IndicesManager() { }
// Adds a new vector-of-indices to the collection we're tracking,
// returning the offset that will be associated with it at run-time.
int AddIndices(std::vector<int> indices)
{
int n = indices_set.size();
indices_set.emplace_back(std::move(indices));
return n;
}
// Generates the initializations used to construct the managed
// vectors at run-time.
void Generate(CPPCompile* c);
private:
// Each vector-of-indices being tracked. We could obtain some
// space and time savings by recognizing duplicate vectors
// (for example, empty vectors are very common), but as long
// as the code compiles and executes without undue overhead,
// this doesn't appear necessary.
std::vector<std::vector<int>> indices_set;
};
} // zeek::detail

View file

@ -17,18 +17,21 @@
#include "zeek/ZeekString.h" #include "zeek/ZeekString.h"
#include "zeek/module_util.h" #include "zeek/module_util.h"
#include "zeek/script_opt/CPP/Func.h" #include "zeek/script_opt/CPP/Func.h"
#include "zeek/script_opt/CPP/RuntimeInit.h" #include "zeek/script_opt/CPP/RuntimeInitSupport.h"
#include "zeek/script_opt/CPP/RuntimeInits.h"
#include "zeek/script_opt/CPP/RuntimeOps.h" #include "zeek/script_opt/CPP/RuntimeOps.h"
#include "zeek/script_opt/CPP/RuntimeVec.h" #include "zeek/script_opt/CPP/RuntimeVec.h"
#include "zeek/script_opt/ScriptOpt.h" #include "zeek/script_opt/ScriptOpt.h"
namespace zeek namespace zeek::detail
{ {
using BoolValPtr = IntrusivePtr<zeek::BoolVal>; using BoolValPtr = IntrusivePtr<zeek::BoolVal>;
using IntValPtr = IntrusivePtr<zeek::IntVal>;
using CountValPtr = IntrusivePtr<zeek::CountVal>; using CountValPtr = IntrusivePtr<zeek::CountVal>;
using DoubleValPtr = IntrusivePtr<zeek::DoubleVal>; using DoubleValPtr = IntrusivePtr<zeek::DoubleVal>;
using StringValPtr = IntrusivePtr<zeek::StringVal>; using StringValPtr = IntrusivePtr<zeek::StringVal>;
using TimeValPtr = IntrusivePtr<zeek::TimeVal>;
using IntervalValPtr = IntrusivePtr<zeek::IntervalVal>; using IntervalValPtr = IntrusivePtr<zeek::IntervalVal>;
using PatternValPtr = IntrusivePtr<zeek::PatternVal>; using PatternValPtr = IntrusivePtr<zeek::PatternVal>;
using FuncValPtr = IntrusivePtr<zeek::FuncVal>; using FuncValPtr = IntrusivePtr<zeek::FuncVal>;

View file

@ -1,6 +1,6 @@
// See the file "COPYING" in the main distribution directory for copyright. // See the file "COPYING" in the main distribution directory for copyright.
#include "zeek/script_opt/CPP/RuntimeInit.h" #include "zeek/script_opt/CPP/RuntimeInitSupport.h"
#include "zeek/EventRegistry.h" #include "zeek/EventRegistry.h"
#include "zeek/module_util.h" #include "zeek/module_util.h"
@ -49,7 +49,7 @@ static int flag_init_CPP()
static int dummy = flag_init_CPP(); static int dummy = flag_init_CPP();
void register_type__CPP(TypePtr t, const std::string& name) void register_type__CPP(TypePtr t, const string& name)
{ {
if ( t->GetName().size() > 0 ) if ( t->GetName().size() > 0 )
// Already registered. // Already registered.
@ -113,8 +113,8 @@ void activate_bodies__CPP(const char* fn, const char* module, bool exported, Typ
auto v = fg->GetVal(); auto v = fg->GetVal();
if ( ! v ) if ( ! v )
{ // Create it. { // Create it.
std::vector<StmtPtr> no_bodies; vector<StmtPtr> no_bodies;
std::vector<int> no_priorities; vector<int> no_priorities;
auto sf = make_intrusive<ScriptFunc>(fn, ft, no_bodies, no_priorities); auto sf = make_intrusive<ScriptFunc>(fn, ft, no_bodies, no_priorities);
v = make_intrusive<FuncVal>(move(sf)); v = make_intrusive<FuncVal>(move(sf));
@ -193,6 +193,28 @@ FuncValPtr lookup_func__CPP(string name, vector<p_hash_type> hashes, const TypeP
{ {
auto ft = cast_intrusive<FuncType>(t); auto ft = cast_intrusive<FuncType>(t);
if ( hashes.empty() )
{
// This happens for functions that have at least one
// uncompilable body.
auto gl = lookup_ID(name.c_str(), GLOBAL_MODULE_NAME, false, false, false);
if ( ! gl )
{
reporter->CPPRuntimeError("non-compiled function %s missing", name.c_str());
exit(1);
}
auto v = gl->GetVal();
if ( ! v || v->GetType()->Tag() != TYPE_FUNC )
{
reporter->CPPRuntimeError("non-compiled function %s has an invalid value",
name.c_str());
exit(1);
}
return cast_intrusive<FuncVal>(v);
}
vector<StmtPtr> bodies; vector<StmtPtr> bodies;
vector<int> priorities; vector<int> priorities;

View file

@ -5,6 +5,7 @@
#pragma once #pragma once
#include "zeek/Val.h" #include "zeek/Val.h"
#include "zeek/script_opt/CPP/Attrs.h"
#include "zeek/script_opt/CPP/Func.h" #include "zeek/script_opt/CPP/Func.h"
namespace zeek namespace zeek

View file

@ -0,0 +1,528 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include "zeek/script_opt/CPP/RunTimeInits.h"
#include "zeek/Desc.h"
#include "zeek/File.h"
#include "zeek/RE.h"
#include "zeek/ZeekString.h"
#include "zeek/script_opt/CPP/RunTimeInitSupport.h"
using namespace std;
namespace zeek::detail
{
template <class T>
void CPP_IndexedInits<T>::InitializeCohortWithOffsets(InitsManager* im, int cohort,
const std::vector<int>& cohort_offsets)
{
auto& co = this->inits[cohort];
for ( auto i = 0U; i < co.size(); ++i )
Generate(im, this->inits_vec, cohort_offsets[i], co[i]);
}
template <class T>
void CPP_IndexedInits<T>::Generate(InitsManager* im, std::vector<EnumValPtr>& ivec, int offset,
ValElemVec& init_vals)
{
auto& e_type = im->Types(init_vals[0]);
int val = init_vals[1];
ivec[offset] = make_enum__CPP(e_type, val);
}
template <class T>
void CPP_IndexedInits<T>::Generate(InitsManager* im, std::vector<StringValPtr>& ivec, int offset,
ValElemVec& init_vals)
{
auto chars = im->Strings(init_vals[0]);
int len = init_vals[1];
ivec[offset] = make_intrusive<StringVal>(len, chars);
}
template <class T>
void CPP_IndexedInits<T>::Generate(InitsManager* im, std::vector<PatternValPtr>& ivec, int offset,
ValElemVec& init_vals)
{
auto re = new RE_Matcher(im->Strings(init_vals[0]));
if ( init_vals[1] )
re->MakeCaseInsensitive();
re->Compile();
ivec[offset] = make_intrusive<PatternVal>(re);
}
template <class T>
void CPP_IndexedInits<T>::Generate(InitsManager* im, std::vector<ListValPtr>& ivec, int offset,
ValElemVec& init_vals) const
{
auto n = init_vals.size();
auto i = 0U;
auto l = make_intrusive<ListVal>(TYPE_ANY);
while ( i < n )
l->Append(im->ConstVals(init_vals[i++]));
ivec[offset] = l;
}
template <class T>
void CPP_IndexedInits<T>::Generate(InitsManager* im, std::vector<VectorValPtr>& ivec, int offset,
ValElemVec& init_vals) const
{
auto n = init_vals.size();
auto i = 0U;
auto t = init_vals[i++];
auto vt = cast_intrusive<VectorType>(im->Types(t));
auto vv = make_intrusive<VectorVal>(vt);
while ( i < n )
vv->Append(im->ConstVals(init_vals[i++]));
ivec[offset] = vv;
}
template <class T>
void CPP_IndexedInits<T>::Generate(InitsManager* im, std::vector<RecordValPtr>& ivec, int offset,
ValElemVec& init_vals) const
{
auto n = init_vals.size();
auto i = 0U;
auto t = init_vals[i++];
auto rt = cast_intrusive<RecordType>(im->Types(t));
auto rv = make_intrusive<RecordVal>(rt);
while ( i < n )
{
auto v = init_vals[i];
if ( v >= 0 )
rv->Assign(i - 1, im->ConstVals(v));
++i;
}
ivec[offset] = rv;
}
template <class T>
void CPP_IndexedInits<T>::Generate(InitsManager* im, std::vector<TableValPtr>& ivec, int offset,
ValElemVec& init_vals) const
{
auto n = init_vals.size();
auto i = 0U;
auto t = init_vals[i++];
auto tt = cast_intrusive<TableType>(im->Types(t));
auto tv = make_intrusive<TableVal>(tt);
while ( i < n )
{
auto index = im->ConstVals(init_vals[i++]);
auto v = init_vals[i++];
auto value = v >= 0 ? im->ConstVals(v) : nullptr;
tv->Assign(index, value);
}
ivec[offset] = tv;
}
template <class T>
void CPP_IndexedInits<T>::Generate(InitsManager* im, std::vector<FileValPtr>& ivec, int offset,
ValElemVec& init_vals) const
{
auto n = init_vals.size();
auto i = 0U;
auto t = init_vals[i++]; // not used
auto fn = im->Strings(init_vals[i++]);
auto fv = make_intrusive<FileVal>(make_intrusive<File>(fn, "w"));
ivec[offset] = fv;
}
template <class T>
void CPP_IndexedInits<T>::Generate(InitsManager* im, std::vector<FuncValPtr>& ivec, int offset,
ValElemVec& init_vals) const
{
auto n = init_vals.size();
auto i = 0U;
auto t = init_vals[i++];
auto fn = im->Strings(init_vals[i++]);
std::vector<p_hash_type> hashes;
while ( i < n )
hashes.push_back(im->Hashes(init_vals[i++]));
ivec[offset] = lookup_func__CPP(fn, hashes, im->Types(t));
}
template <class T>
void CPP_IndexedInits<T>::Generate(InitsManager* im, std::vector<AttrPtr>& ivec, int offset,
ValElemVec& init_vals) const
{
auto tag = static_cast<AttrTag>(init_vals[0]);
auto ae_tag = static_cast<AttrExprType>(init_vals[1]);
ExprPtr e;
auto e_arg = init_vals[2];
switch ( ae_tag )
{
case AE_NONE:
break;
case AE_CONST:
e = make_intrusive<ConstExpr>(im->ConstVals(e_arg));
break;
case AE_NAME:
{
auto name = im->Strings(e_arg);
auto gl = lookup_ID(name, GLOBAL_MODULE_NAME, false, false, false);
ASSERT(gl);
e = make_intrusive<NameExpr>(gl);
break;
}
case AE_RECORD:
{
auto t = im->Types(e_arg);
auto rt = cast_intrusive<RecordType>(t);
auto empty_vals = make_intrusive<ListExpr>();
auto construct = make_intrusive<RecordConstructorExpr>(empty_vals);
e = make_intrusive<RecordCoerceExpr>(construct, rt);
break;
}
case AE_CALL:
e = im->CallExprs(e_arg);
break;
}
ivec[offset] = make_intrusive<Attr>(tag, e);
}
template <class T>
void CPP_IndexedInits<T>::Generate(InitsManager* im, std::vector<AttributesPtr>& ivec, int offset,
ValElemVec& init_vals) const
{
auto n = init_vals.size();
auto i = 0U;
std::vector<AttrPtr> a_list;
while ( i < n )
a_list.emplace_back(im->Attrs(init_vals[i++]));
ivec[offset] = make_intrusive<Attributes>(a_list, nullptr, false, false);
}
// Instantiate the templates we'll need.
template class CPP_IndexedInits<EnumValPtr>;
template class CPP_IndexedInits<StringValPtr>;
template class CPP_IndexedInits<PatternValPtr>;
template class CPP_IndexedInits<ListValPtr>;
template class CPP_IndexedInits<VectorValPtr>;
template class CPP_IndexedInits<RecordValPtr>;
template class CPP_IndexedInits<TableValPtr>;
template class CPP_IndexedInits<FileValPtr>;
template class CPP_IndexedInits<FuncValPtr>;
template class CPP_IndexedInits<AttrPtr>;
template class CPP_IndexedInits<AttributesPtr>;
template class CPP_IndexedInits<TypePtr>;
void CPP_TypeInits::DoPreInits(InitsManager* im, const std::vector<int>& offsets_vec)
{
for ( auto cohort = 0U; cohort < offsets_vec.size(); ++cohort )
{
auto& co = inits[cohort];
auto& cohort_offsets = im->Indices(offsets_vec[cohort]);
for ( auto i = 0U; i < co.size(); ++i )
PreInit(im, cohort_offsets[i], co[i]);
}
}
void CPP_TypeInits::PreInit(InitsManager* im, int offset, ValElemVec& init_vals)
{
auto tag = static_cast<TypeTag>(init_vals[0]);
if ( tag == TYPE_LIST )
inits_vec[offset] = make_intrusive<TypeList>();
else if ( tag == TYPE_RECORD )
{
auto name = im->Strings(init_vals[1]);
if ( name[0] )
inits_vec[offset] = get_record_type__CPP(name);
else
inits_vec[offset] = get_record_type__CPP(nullptr);
}
// else no pre-initialization needed
}
void CPP_TypeInits::Generate(InitsManager* im, vector<TypePtr>& ivec, int offset,
ValElemVec& init_vals) const
{
auto tag = static_cast<TypeTag>(init_vals[0]);
TypePtr t;
switch ( tag )
{
case TYPE_ADDR:
case TYPE_ANY:
case TYPE_BOOL:
case TYPE_COUNT:
case TYPE_DOUBLE:
case TYPE_ERROR:
case TYPE_INT:
case TYPE_INTERVAL:
case TYPE_PATTERN:
case TYPE_PORT:
case TYPE_STRING:
case TYPE_TIME:
case TYPE_TIMER:
case TYPE_VOID:
case TYPE_SUBNET:
case TYPE_FILE:
t = base_type(tag);
break;
case TYPE_ENUM:
t = BuildEnumType(im, init_vals);
break;
case TYPE_OPAQUE:
t = BuildOpaqueType(im, init_vals);
break;
case TYPE_TYPE:
t = BuildTypeType(im, init_vals);
break;
case TYPE_VECTOR:
t = BuildVectorType(im, init_vals);
break;
case TYPE_LIST:
t = BuildTypeList(im, init_vals, offset);
break;
case TYPE_TABLE:
t = BuildTableType(im, init_vals);
break;
case TYPE_FUNC:
t = BuildFuncType(im, init_vals);
break;
case TYPE_RECORD:
t = BuildRecordType(im, init_vals, offset);
break;
default:
ASSERT(0);
}
ivec[offset] = t;
}
TypePtr CPP_TypeInits::BuildEnumType(InitsManager* im, ValElemVec& init_vals) const
{
auto name = im->Strings(init_vals[1]);
auto et = get_enum_type__CPP(name);
if ( et->Names().empty() )
{
auto n = init_vals.size();
auto i = 2U;
while ( i < n )
{
auto e_name = im->Strings(init_vals[i++]);
auto e_val = init_vals[i++];
et->AddNameInternal(e_name, e_val);
}
}
return et;
}
TypePtr CPP_TypeInits::BuildOpaqueType(InitsManager* im, ValElemVec& init_vals) const
{
auto name = im->Strings(init_vals[1]);
return make_intrusive<OpaqueType>(name);
}
TypePtr CPP_TypeInits::BuildTypeType(InitsManager* im, ValElemVec& init_vals) const
{
auto& t = im->Types(init_vals[1]);
return make_intrusive<TypeType>(t);
}
TypePtr CPP_TypeInits::BuildVectorType(InitsManager* im, ValElemVec& init_vals) const
{
auto& t = im->Types(init_vals[1]);
return make_intrusive<VectorType>(t);
}
TypePtr CPP_TypeInits::BuildTypeList(InitsManager* im, ValElemVec& init_vals, int offset) const
{
const auto& tl = cast_intrusive<TypeList>(inits_vec[offset]);
auto n = init_vals.size();
auto i = 1U;
while ( i < n )
tl->Append(im->Types(init_vals[i++]));
return tl;
}
TypePtr CPP_TypeInits::BuildTableType(InitsManager* im, ValElemVec& init_vals) const
{
auto index = cast_intrusive<TypeList>(im->Types(init_vals[1]));
auto yield_i = init_vals[2];
auto yield = yield_i >= 0 ? im->Types(yield_i) : nullptr;
return make_intrusive<TableType>(index, yield);
}
TypePtr CPP_TypeInits::BuildFuncType(InitsManager* im, ValElemVec& init_vals) const
{
auto p = cast_intrusive<RecordType>(im->Types(init_vals[1]));
auto yield_i = init_vals[2];
auto flavor = static_cast<FunctionFlavor>(init_vals[3]);
TypePtr y;
if ( yield_i >= 0 )
y = im->Types(yield_i);
else if ( flavor == FUNC_FLAVOR_FUNCTION || flavor == FUNC_FLAVOR_HOOK )
y = base_type(TYPE_VOID);
return make_intrusive<FuncType>(p, y, flavor);
}
TypePtr CPP_TypeInits::BuildRecordType(InitsManager* im, ValElemVec& init_vals, int offset) const
{
auto r = cast_intrusive<RecordType>(inits_vec[offset]);
ASSERT(r);
if ( r->NumFields() == 0 )
{
type_decl_list tl;
auto n = init_vals.size();
auto i = 2U;
while ( i < n )
{
auto s = im->Strings(init_vals[i++]);
auto id = util::copy_string(s);
auto type = im->Types(init_vals[i++]);
auto attrs_i = init_vals[i++];
AttributesPtr attrs;
if ( attrs_i >= 0 )
attrs = im->Attributes(attrs_i);
tl.append(new TypeDecl(id, type, attrs));
}
r->AddFieldsDirectly(tl);
}
return r;
}
int CPP_FieldMapping::ComputeOffset(InitsManager* im) const
{
auto r = im->Types(rec)->AsRecordType();
auto fm_offset = r->FieldOffset(field_name.c_str());
if ( fm_offset < 0 )
{ // field does not exist, create it
fm_offset = r->NumFields();
auto id = util::copy_string(field_name.c_str());
auto type = im->Types(field_type);
AttributesPtr attrs;
if ( field_attrs >= 0 )
attrs = im->Attributes(field_attrs);
type_decl_list tl;
tl.append(new TypeDecl(id, type, attrs));
r->AddFieldsDirectly(tl);
}
return fm_offset;
}
int CPP_EnumMapping::ComputeOffset(InitsManager* im) const
{
auto e = im->Types(e_type)->AsEnumType();
auto em_offset = e->Lookup(e_name);
if ( em_offset < 0 )
{ // enum constant does not exist, create it
em_offset = e->Names().size();
if ( e->Lookup(em_offset) )
reporter->InternalError("enum inconsistency while initializing compiled scripts");
e->AddNameInternal(e_name, em_offset);
}
return em_offset;
}
void CPP_GlobalInit::Generate(InitsManager* im, std::vector<void*>& /* inits_vec */,
int /* offset */) const
{
global = lookup_global__CPP(name, im->Types(type), exported);
if ( ! global->HasVal() && val >= 0 )
{
global->SetVal(im->ConstVals(val));
if ( attrs >= 0 )
global->SetAttrs(im->Attributes(attrs));
}
}
void generate_indices_set(int* inits, std::vector<std::vector<int>>& indices_set)
{
// First figure out how many groups of indices there are, so we
// can pre-allocate the outer vector.
auto i_ptr = inits;
int num_inits = 0;
while ( *i_ptr >= 0 )
{
++num_inits;
int n = *i_ptr;
i_ptr += n + 1; // skip over vector elements
}
indices_set.reserve(num_inits);
i_ptr = inits;
while ( *i_ptr >= 0 )
{
int n = *i_ptr;
++i_ptr;
std::vector<int> indices;
indices.reserve(n);
for ( int i = 0; i < n; ++i )
indices.push_back(i_ptr[i]);
i_ptr += n;
indices_set.emplace_back(move(indices));
}
}
} // zeek::detail

View file

@ -0,0 +1,542 @@
// See the file "COPYING" in the main distribution directory for copyright.
// Classes for run-time initialization and management of C++ values used
// by the generated code.
// See InitsInfo.h for a discussion of initialization issues and the
// associated strategies for dealing with them.
#include "zeek/Expr.h"
#include "zeek/module_util.h"
#include "zeek/script_opt/CPP/RuntimeInitSupport.h"
#pragma once
namespace zeek::detail
{
using FileValPtr = IntrusivePtr<FileVal>;
using FuncValPtr = IntrusivePtr<FuncVal>;
class InitsManager;
// An abstract helper class used to access elements of an initialization vector.
// We need the abstraction because InitsManager below needs to be able to refer
// to any of a range of templated classes.
class CPP_AbstractInitAccessor
{
public:
virtual ~CPP_AbstractInitAccessor() { }
virtual ValPtr Get(int index) const { return nullptr; }
};
// Convenient way to refer to an offset associated with a particular Zeek type.
using CPP_ValElem = std::pair<TypeTag, int>;
// This class groups together all of the vectors needed for run-time
// initialization. We gather them together into a single object so as
// to avoid wiring in a set of globals that the various initialization
// methods have to know about.
class InitsManager
{
public:
InitsManager(std::vector<CPP_ValElem>& _const_vals,
std::map<TypeTag, std::shared_ptr<CPP_AbstractInitAccessor>>& _consts,
std::vector<std::vector<int>>& _indices, std::vector<const char*>& _strings,
std::vector<p_hash_type>& _hashes, std::vector<TypePtr>& _types,
std::vector<AttributesPtr>& _attributes, std::vector<AttrPtr>& _attrs,
std::vector<CallExprPtr>& _call_exprs)
: const_vals(_const_vals), consts(_consts), indices(_indices), strings(_strings),
hashes(_hashes), types(_types), attributes(_attributes), attrs(_attrs),
call_exprs(_call_exprs)
{
}
// Providse generic access to Zeek constant values based on a single
// index.
ValPtr ConstVals(int offset) const
{
auto& cv = const_vals[offset];
return Consts(cv.first, cv.second);
}
// Retrieves the Zeek constant value for a particular Zeek type.
ValPtr Consts(TypeTag tag, int index) const { return consts[tag]->Get(index); }
// Accessors for the sundry initialization vectors, each retrieving
// a specific element identified by an index/offset.
const std::vector<int>& Indices(int offset) const { return indices[offset]; }
const char* Strings(int offset) const { return strings[offset]; }
const p_hash_type Hashes(int offset) const { return hashes[offset]; }
const TypePtr& Types(int offset) const { return types[offset]; }
const AttributesPtr& Attributes(int offset) const { return attributes[offset]; }
const AttrPtr& Attrs(int offset) const { return attrs[offset]; }
const CallExprPtr& CallExprs(int offset) const { return call_exprs[offset]; }
private:
std::vector<CPP_ValElem>& const_vals;
std::map<TypeTag, std::shared_ptr<CPP_AbstractInitAccessor>>& consts;
std::vector<std::vector<int>>& indices;
std::vector<const char*>& strings;
std::vector<p_hash_type>& hashes;
std::vector<TypePtr>& types;
std::vector<AttributesPtr>& attributes;
std::vector<AttrPtr>& attrs;
std::vector<CallExprPtr>& call_exprs;
};
// Manages an initialization vector of the given type.
template <class T> class CPP_Init
{
public:
virtual ~CPP_Init() { }
// Pre-initializes the given element of the vector, if necessary.
virtual void PreInit(InitsManager* im, std::vector<T>& inits_vec, int offset) const { }
// Initializes the given element of the vector.
virtual void Generate(InitsManager* im, std::vector<T>& inits_vec, int offset) const { }
};
// Abstract class for creating a collection of initializers. T1 is
// the type of the generated vector, T2 the type of its initializers.
template <class T1, class T2> class CPP_AbstractInits
{
public:
CPP_AbstractInits(std::vector<T1>& _inits_vec, int _offsets_set, std::vector<T2> _inits)
: inits_vec(_inits_vec), offsets_set(_offsets_set), inits(std::move(_inits))
{
// Compute how big to make the vector.
int num_inits = 0;
for ( const auto& cohort : inits )
num_inits += cohort.size();
inits_vec.resize(num_inits);
}
// Initialize the given cohort of elements.
void InitializeCohort(InitsManager* im, int cohort)
{
// Get this object's vector-of-vector-of-indices.
auto& offsets_vec = im->Indices(offsets_set);
if ( cohort == 0 )
DoPreInits(im, offsets_vec);
// Get the vector-of-indices for this cohort.
auto& cohort_offsets = im->Indices(offsets_vec[cohort]);
InitializeCohortWithOffsets(im, cohort, cohort_offsets);
}
protected:
virtual void InitializeCohortWithOffsets(InitsManager* im, int cohort,
const std::vector<int>& cohort_offsets)
{
}
// Pre-initialize all elements requiring it.
virtual void DoPreInits(InitsManager* im, const std::vector<int>& offsets_vec) { }
// Generate a single element.
virtual void GenerateElement(InitsManager* im, T2& init, int offset) { }
// The initialization vector in its entirety.
std::vector<T1>& inits_vec;
// A meta-index for retrieving the vector-of-vector-of-indices.
int offsets_set;
// Indexed by cohort.
std::vector<T2> inits;
};
// Manages an initialization vector that uses "custom" initializers
// (tailored ones rather than initializers based on indexing).
template <class T> using CPP_InitVec = std::vector<std::shared_ptr<CPP_Init<T>>>;
template <class T> class CPP_CustomInits : public CPP_AbstractInits<T, CPP_InitVec<T>>
{
public:
CPP_CustomInits(std::vector<T>& _inits_vec, int _offsets_set,
std::vector<CPP_InitVec<T>> _inits)
: CPP_AbstractInits<T, CPP_InitVec<T>>(_inits_vec, _offsets_set, std::move(_inits))
{
}
private:
void DoPreInits(InitsManager* im, const std::vector<int>& offsets_vec) override
{
int cohort = 0;
for ( const auto& co : this->inits )
{
auto& cohort_offsets = im->Indices(offsets_vec[cohort]);
for ( auto i = 0U; i < co.size(); ++i )
co[i]->PreInit(im, this->inits_vec, cohort_offsets[i]);
++cohort;
}
}
void InitializeCohortWithOffsets(InitsManager* im, int cohort,
const std::vector<int>& cohort_offsets) override
{
// Loop over the cohort's elements to initialize them.
auto& co = this->inits[cohort];
for ( auto i = 0U; i < co.size(); ++i )
co[i]->Generate(im, this->inits_vec, cohort_offsets[i]);
}
};
// Provides access to elements of an initialization vector of the given type.
template <class T> class CPP_InitAccessor : public CPP_AbstractInitAccessor
{
public:
CPP_InitAccessor(std::vector<T>& _inits_vec) : inits_vec(_inits_vec) { }
ValPtr Get(int index) const override { return inits_vec[index]; }
private:
std::vector<T>& inits_vec;
};
// A type used for initializations that are based on indices into
// initialization vectors.
using ValElemVec = std::vector<int>;
using ValElemVecVec = std::vector<ValElemVec>;
// Manages an initialization vector of the given type whose elements are
// built up from previously constructed values in other initialization vectors.
template <class T> class CPP_IndexedInits : public CPP_AbstractInits<T, ValElemVecVec>
{
public:
CPP_IndexedInits(std::vector<T>& _inits_vec, int _offsets_set,
std::vector<ValElemVecVec> _inits)
: CPP_AbstractInits<T, ValElemVecVec>(_inits_vec, _offsets_set, std::move(_inits))
{
}
protected:
void InitializeCohortWithOffsets(InitsManager* im, int cohort,
const std::vector<int>& cohort_offsets) override;
// Note, in the following we pass in the inits_vec, even though
// the method will have direct access to it, because we want to
// use overloading to dispatch to custom generation for different
// types of values.
void Generate(InitsManager* im, std::vector<EnumValPtr>& ivec, int offset,
ValElemVec& init_vals);
void Generate(InitsManager* im, std::vector<StringValPtr>& ivec, int offset,
ValElemVec& init_vals);
void Generate(InitsManager* im, std::vector<PatternValPtr>& ivec, int offset,
ValElemVec& init_vals);
void Generate(InitsManager* im, std::vector<ListValPtr>& ivec, int offset,
ValElemVec& init_vals) const;
void Generate(InitsManager* im, std::vector<VectorValPtr>& ivec, int offset,
ValElemVec& init_vals) const;
void Generate(InitsManager* im, std::vector<RecordValPtr>& ivec, int offset,
ValElemVec& init_vals) const;
void Generate(InitsManager* im, std::vector<TableValPtr>& ivec, int offset,
ValElemVec& init_vals) const;
void Generate(InitsManager* im, std::vector<FileValPtr>& ivec, int offset,
ValElemVec& init_vals) const;
void Generate(InitsManager* im, std::vector<FuncValPtr>& ivec, int offset,
ValElemVec& init_vals) const;
void Generate(InitsManager* im, std::vector<AttrPtr>& ivec, int offset,
ValElemVec& init_vals) const;
void Generate(InitsManager* im, std::vector<AttributesPtr>& ivec, int offset,
ValElemVec& init_vals) const;
// The TypePtr initialization vector requires special treatment, since
// it has to dispatch on subclasses of TypePtr.
virtual void Generate(InitsManager* im, std::vector<TypePtr>& ivec, int offset,
ValElemVec& init_vals) const
{
ASSERT(0);
}
};
// A specialization of CPP_IndexedInits that supports initializing based
// on subclasses of TypePtr.
class CPP_TypeInits : public CPP_IndexedInits<TypePtr>
{
public:
CPP_TypeInits(std::vector<TypePtr>& _inits_vec, int _offsets_set,
std::vector<std::vector<ValElemVec>> _inits)
: CPP_IndexedInits<TypePtr>(_inits_vec, _offsets_set, _inits)
{
}
protected:
void DoPreInits(InitsManager* im, const std::vector<int>& offsets_vec) override;
void PreInit(InitsManager* im, int offset, ValElemVec& init_vals);
void Generate(InitsManager* im, std::vector<TypePtr>& ivec, int offset,
ValElemVec& init_vals) const override;
TypePtr BuildEnumType(InitsManager* im, ValElemVec& init_vals) const;
TypePtr BuildOpaqueType(InitsManager* im, ValElemVec& init_vals) const;
TypePtr BuildTypeType(InitsManager* im, ValElemVec& init_vals) const;
TypePtr BuildVectorType(InitsManager* im, ValElemVec& init_vals) const;
TypePtr BuildTypeList(InitsManager* im, ValElemVec& init_vals, int offset) const;
TypePtr BuildTableType(InitsManager* im, ValElemVec& init_vals) const;
TypePtr BuildFuncType(InitsManager* im, ValElemVec& init_vals) const;
TypePtr BuildRecordType(InitsManager* im, ValElemVec& init_vals, int offset) const;
};
// Abstract class for initializing basic (non-compound) constants. T1 is
// the Zeek type for the constructed constant, T2 is the C++ type of its
// initializer.
//
// In principle we could derive this from CPP_AbstractInits, though to do so
// we'd need to convert the initializers to a vector-of-vector-of-T2, which
// would trade complexity here for complexity in InitsInfo. So we instead
// keep this class distinct, since at heart it's a simpler set of methods
// and that way we can keep them as such here.
template <class T1, typename T2> class CPP_AbstractBasicConsts
{
public:
CPP_AbstractBasicConsts(std::vector<T1>& _inits_vec, int _offsets_set, std::vector<T2> _inits)
: inits_vec(_inits_vec), offsets_set(_offsets_set), inits(std::move(_inits))
{
inits_vec.resize(inits.size());
}
void InitializeCohort(InitsManager* im, int cohort)
{
ASSERT(cohort == 0);
auto& offsets_vec = im->Indices(offsets_set);
auto& cohort_offsets = im->Indices(offsets_vec[cohort]);
for ( auto i = 0U; i < inits.size(); ++i )
InitElem(im, cohort_offsets[i], i);
}
protected:
virtual void InitElem(InitsManager* im, int offset, int index) { ASSERT(0); }
protected:
// See CPP_AbstractInits for the nature of these.
std::vector<T1>& inits_vec;
int offsets_set;
std::vector<T2> inits;
};
// Class for initializing a basic constant of Zeek type T1, using initializers
// of C++ type T2. T1 is an intrusive pointer to a T3 type; for example, if
// T1 is a BoolValPtr then T3 will be BoolVal.
template <class T1, typename T2, class T3>
class CPP_BasicConsts : public CPP_AbstractBasicConsts<T1, T2>
{
public:
CPP_BasicConsts(std::vector<T1>& _inits_vec, int _offsets_set, std::vector<T2> _inits)
: CPP_AbstractBasicConsts<T1, T2>(_inits_vec, _offsets_set, std::move(_inits))
{
}
void InitElem(InitsManager* /* im */, int offset, int index) override
{
this->inits_vec[offset] = make_intrusive<T3>(this->inits[index]);
}
};
// Specific classes for basic constants that use string-based constructors.
class CPP_AddrConsts : public CPP_AbstractBasicConsts<AddrValPtr, int>
{
public:
CPP_AddrConsts(std::vector<AddrValPtr>& _inits_vec, int _offsets_set, std::vector<int> _inits)
: CPP_AbstractBasicConsts<AddrValPtr, int>(_inits_vec, _offsets_set, std::move(_inits))
{
}
void InitElem(InitsManager* im, int offset, int index) override
{
auto s = im->Strings(this->inits[index]);
this->inits_vec[offset] = make_intrusive<AddrVal>(s);
}
};
class CPP_SubNetConsts : public CPP_AbstractBasicConsts<SubNetValPtr, int>
{
public:
CPP_SubNetConsts(std::vector<SubNetValPtr>& _inits_vec, int _offsets_set,
std::vector<int> _inits)
: CPP_AbstractBasicConsts<SubNetValPtr, int>(_inits_vec, _offsets_set, std::move(_inits))
{
}
void InitElem(InitsManager* im, int offset, int index) override
{
auto s = im->Strings(this->inits[index]);
this->inits_vec[offset] = make_intrusive<SubNetVal>(s);
}
};
// Class for initializing a Zeek global. These don't go into an initialization
// vector, so we use void* as the underlying type.
class CPP_GlobalInit : public CPP_Init<void*>
{
public:
CPP_GlobalInit(IDPtr& _global, const char* _name, int _type, int _attrs, int _val,
bool _exported)
: CPP_Init<void*>(), global(_global), name(_name), type(_type), attrs(_attrs), val(_val),
exported(_exported)
{
}
void Generate(InitsManager* im, std::vector<void*>& /* inits_vec */,
int /* offset */) const override;
protected:
IDPtr& global;
const char* name;
int type;
int attrs;
int val;
bool exported;
};
// Abstract class for constructing a CallExpr to evaluate a Zeek expression.
class CPP_AbstractCallExprInit : public CPP_Init<CallExprPtr>
{
public:
CPP_AbstractCallExprInit() : CPP_Init<CallExprPtr>() { }
};
// Constructs a CallExpr that calls a given CPPFunc subclass.
template <class T> class CPP_CallExprInit : public CPP_AbstractCallExprInit
{
public:
CPP_CallExprInit(CallExprPtr& _e_var) : CPP_AbstractCallExprInit(), e_var(_e_var) { }
void Generate(InitsManager* /* im */, std::vector<CallExprPtr>& inits_vec,
int offset) const override
{
auto wrapper_class = make_intrusive<T>();
auto func_val = make_intrusive<FuncVal>(wrapper_class);
auto func_expr = make_intrusive<ConstExpr>(func_val);
auto empty_args = make_intrusive<ListExpr>();
e_var = make_intrusive<CallExpr>(func_expr, empty_args);
inits_vec[offset] = e_var;
}
private:
// Where to store the expression once we've built it.
CallExprPtr& e_var;
};
// Abstract class for registering a lambda defined in terms of a CPPStmt.
class CPP_AbstractLambdaRegistration : public CPP_Init<void*>
{
public:
CPP_AbstractLambdaRegistration() : CPP_Init<void*>() { }
};
// Registers a lambda defined in terms of a given CPPStmt subclass.
template <class T> class CPP_LambdaRegistration : public CPP_AbstractLambdaRegistration
{
public:
CPP_LambdaRegistration(const char* _name, int _func_type, p_hash_type _h, bool _has_captures)
: CPP_AbstractLambdaRegistration(), name(_name), func_type(_func_type), h(_h),
has_captures(_has_captures)
{
}
void Generate(InitsManager* im, std::vector<void*>& inits_vec, int offset) const override
{
auto l = make_intrusive<T>(name);
auto& ft = im->Types(func_type);
register_lambda__CPP(l, h, name, ft, has_captures);
}
protected:
const char* name;
int func_type;
p_hash_type h;
bool has_captures;
};
// Constructs at run-time a mapping between abstract record field offsets used
// when compiling a set of scripts to their concrete offsets (which might differ
// from those during compilation due to loading of other scripts that extend
// various records).
class CPP_FieldMapping
{
public:
CPP_FieldMapping(int _rec, std::string _field_name, int _field_type, int _field_attrs)
: rec(_rec), field_name(std::move(_field_name)), field_type(_field_type),
field_attrs(_field_attrs)
{
}
int ComputeOffset(InitsManager* im) const;
private:
int rec; // index to retrieve the record's type
std::string field_name; // which field this offset pertains to
int field_type; // the field's type, in case we have to construct it
int field_attrs; // the same for the field's attributes
};
// Constructs at run-time a mapping between abstract enum values used when
// compiling a set of scripts to their concrete values (which might differ
// from those during compilation due to loading of other scripts that extend
// the enum).
class CPP_EnumMapping
{
public:
CPP_EnumMapping(int _e_type, std::string _e_name) : e_type(_e_type), e_name(std::move(_e_name))
{
}
int ComputeOffset(InitsManager* im) const;
private:
int e_type; // index to EnumType
std::string e_name; // which enum constant for that type
};
// Looks up a BiF of the given name, making it available to compiled
// code via a C++ global.
class CPP_LookupBiF
{
public:
CPP_LookupBiF(zeek::Func*& _bif_func, std::string _bif_name)
: bif_func(_bif_func), bif_name(std::move(_bif_name))
{
}
void ResolveBiF() const { bif_func = lookup_bif__CPP(bif_name.c_str()); }
protected:
zeek::Func*& bif_func; // where to store the pointer to the BiF
std::string bif_name; // the BiF's name
};
// Information needed to register a compiled function body (which makes it
// available to substitute for the body's AST). The compiler generates
// code that loops over a vector of these to perform the registrations.
struct CPP_RegisterBody
{
CPP_RegisterBody(std::string _func_name, void* _func, int _type_signature, int _priority,
p_hash_type _h, std::vector<std::string> _events)
: func_name(std::move(_func_name)), func(_func), type_signature(_type_signature),
priority(_priority), h(_h), events(std::move(_events))
{
}
std::string func_name; // name of the function
void* func; // pointer to C++
int type_signature;
int priority;
p_hash_type h;
std::vector<std::string> events;
};
// Helper function that takes a (large) array of int's and from them
// constructs the corresponding vector-of-vector-of-indices. Each
// vector-of-indices is represented first by an int specifying its
// size, and then that many int's for its values. We recognize the
// end of the array upon encountering a "size" entry of -1.
extern void generate_indices_set(int* inits, std::vector<std::vector<int>>& indices_set);
} // zeek::detail

View file

@ -245,7 +245,7 @@ void CPPCompile::GenSwitchStmt(const SwitchStmt* sw)
else else
sw_val = string("p_hash(") + GenExpr(e, GEN_VAL_PTR) + ")"; sw_val = string("p_hash(") + GenExpr(e, GEN_VAL_PTR) + ")";
Emit("switch ( %s ) {", sw_val.c_str()); Emit("switch ( %s ) {", sw_val);
++break_level; ++break_level;

View file

@ -51,13 +51,25 @@ template <class T> string CPPTracker<T>::KeyName(const T* key)
auto hash = map[key]; auto hash = map[key];
ASSERT(hash != 0); ASSERT(hash != 0);
auto rep = reps[hash];
if ( gi_s.count(rep) > 0 )
return gi_s[rep]->Name();
auto index = map2[hash]; auto index = map2[hash];
string scope; string scope;
if ( IsInherited(hash) ) if ( IsInherited(hash) )
scope = scope_prefix(scope2[hash]); scope = scope_prefix(scope2[hash]);
return scope + string(base_name) + "_" + Fmt(index) + "__CPP"; string ind = Fmt(index);
string full_name;
if ( single_global )
full_name = base_name + "__CPP[" + ind + "]";
else
full_name = base_name + "_" + ind + "__CPP";
return scope + full_name;
} }
template <class T> void CPPTracker<T>::LogIfNew(IntrusivePtr<T> key, int scope, FILE* log_file) template <class T> void CPPTracker<T>::LogIfNew(IntrusivePtr<T> key, int scope, FILE* log_file)

View file

@ -15,6 +15,7 @@
#pragma once #pragma once
#include "zeek/script_opt/CPP/HashMgr.h" #include "zeek/script_opt/CPP/HashMgr.h"
#include "zeek/script_opt/CPP/InitsInfo.h"
namespace zeek::detail namespace zeek::detail
{ {
@ -24,11 +25,13 @@ namespace zeek::detail
template <class T> class CPPTracker template <class T> class CPPTracker
{ {
public: public:
// The base name is used to construct key names. The mapper, // The base name is used to construct key names. "single_global",
// if present, maps hash values to information about the previously // if true, specifies that the names should be constructed as
// generated scope in which the value appears. // indexes into a single global, rather than as distinct globals.
CPPTracker(const char* _base_name, VarMapper* _mapper = nullptr) // The mapper, if present, maps hash values to information about
: base_name(_base_name), mapper(_mapper) // the previously generated scope in which the value appears.
CPPTracker(const char* _base_name, bool _single_global, VarMapper* _mapper = nullptr)
: base_name(_base_name), single_global(_single_global), mapper(_mapper)
{ {
} }
@ -40,6 +43,8 @@ public:
// is provided, then refrains from computing it. // is provided, then refrains from computing it.
void AddKey(IntrusivePtr<T> key, p_hash_type h = 0); void AddKey(IntrusivePtr<T> key, p_hash_type h = 0);
void AddInitInfo(const T* rep, std::shared_ptr<CPP_InitInfo> gi) { gi_s[rep] = std::move(gi); }
// Returns the (C++ variable) name associated with the given key. // Returns the (C++ variable) name associated with the given key.
std::string KeyName(const T* key); std::string KeyName(const T* key);
std::string KeyName(IntrusivePtr<T> key) { return KeyName(key.get()); } std::string KeyName(IntrusivePtr<T> key) { return KeyName(key.get()); }
@ -81,6 +86,8 @@ private:
// Maps keys to internal representations (i.e., hashes). // Maps keys to internal representations (i.e., hashes).
std::unordered_map<const T*, p_hash_type> map; std::unordered_map<const T*, p_hash_type> map;
std::unordered_map<const T*, std::shared_ptr<CPP_InitInfo>> gi_s;
// Maps internal representations to distinct values. These // Maps internal representations to distinct values. These
// may-or-may-not be indices into an "inherited" namespace scope. // may-or-may-not be indices into an "inherited" namespace scope.
std::unordered_map<p_hash_type, int> map2; std::unordered_map<p_hash_type, int> map2;
@ -98,6 +105,10 @@ private:
// Used to construct key names. // Used to construct key names.
std::string base_name; std::string base_name;
// Whether to base the names out of a single global, or distinct
// globals.
bool single_global;
// If non-nil, the mapper to consult for previous names. // If non-nil, the mapper to consult for previous names.
VarMapper* mapper; VarMapper* mapper;
}; };

View file

@ -91,170 +91,13 @@ string CPPCompile::GenericValPtrToGT(const string& expr, const TypePtr& t, GenTy
return string("cast_intrusive<") + IntrusiveVal(t) + ">(" + expr + ")"; return string("cast_intrusive<") + IntrusiveVal(t) + ">(" + expr + ")";
} }
void CPPCompile::ExpandTypeVar(const TypePtr& t)
{
auto tn = GenTypeName(t);
switch ( t->Tag() )
{
case TYPE_LIST:
ExpandListTypeVar(t, tn);
break;
case TYPE_RECORD:
ExpandRecordTypeVar(t, tn);
break;
case TYPE_ENUM:
ExpandEnumTypeVar(t, tn);
break;
case TYPE_TABLE:
ExpandTableTypeVar(t, tn);
break;
case TYPE_FUNC:
ExpandFuncTypeVar(t, tn);
break;
case TYPE_TYPE:
AddInit(t, tn,
string("make_intrusive<TypeType>(") + GenTypeName(t->AsTypeType()->GetType()) +
")");
break;
case TYPE_VECTOR:
AddInit(t, tn,
string("make_intrusive<VectorType>(") +
GenTypeName(t->AsVectorType()->Yield()) + ")");
break;
default:
break;
}
auto& script_type_name = t->GetName();
if ( ! script_type_name.empty() )
AddInit(t, "register_type__CPP(" + tn + ", \"" + script_type_name + "\");");
AddInit(t);
}
void CPPCompile::ExpandListTypeVar(const TypePtr& t, string& tn)
{
const auto& tl = t->AsTypeList()->GetTypes();
auto t_name = tn + "->AsTypeList()";
for ( const auto& tl_i : tl )
AddInit(t, t_name + "->Append(" + GenTypeName(tl_i) + ");");
}
void CPPCompile::ExpandRecordTypeVar(const TypePtr& t, string& tn)
{
auto r = t->AsRecordType()->Types();
if ( ! r )
return;
auto t_name = tn + "->AsRecordType()";
AddInit(t, string("if ( ") + t_name + "->NumFields() == 0 )");
AddInit(t, "{");
AddInit(t, "type_decl_list tl;");
for ( auto i = 0; i < r->length(); ++i )
{
const auto& td = (*r)[i];
AddInit(t, GenTypeDecl(td));
}
AddInit(t, t_name + "->AddFieldsDirectly(tl);");
AddInit(t, "}");
}
void CPPCompile::ExpandEnumTypeVar(const TypePtr& t, string& tn)
{
auto e_name = tn + "->AsEnumType()";
auto et = t->AsEnumType();
auto names = et->Names();
AddInit(t, "{ auto et = " + e_name + ";");
AddInit(t, "if ( et->Names().empty() ) {");
for ( const auto& name_pair : et->Names() )
AddInit(t, string("\tet->AddNameInternal(\"") + name_pair.first + "\", " +
Fmt(int(name_pair.second)) + ");");
AddInit(t, "}}");
}
void CPPCompile::ExpandTableTypeVar(const TypePtr& t, string& tn)
{
auto tbl = t->AsTableType();
const auto& indices = tbl->GetIndices();
const auto& yield = tbl->Yield();
if ( tbl->IsSet() )
AddInit(t, tn,
string("make_intrusive<SetType>(cast_intrusive<TypeList>(") + GenTypeName(indices) +
" ), nullptr)");
else
AddInit(t, tn,
string("make_intrusive<TableType>(cast_intrusive<TypeList>(") +
GenTypeName(indices) + "), " + GenTypeName(yield) + ")");
}
void CPPCompile::ExpandFuncTypeVar(const TypePtr& t, string& tn)
{
auto f = t->AsFuncType();
auto args_type_accessor = GenTypeName(f->Params());
const auto& yt = f->Yield();
string yield_type_accessor;
if ( yt )
yield_type_accessor += GenTypeName(yt);
else
yield_type_accessor += "nullptr";
auto fl = f->Flavor();
string fl_name;
if ( fl == FUNC_FLAVOR_FUNCTION )
fl_name = "FUNC_FLAVOR_FUNCTION";
else if ( fl == FUNC_FLAVOR_EVENT )
fl_name = "FUNC_FLAVOR_EVENT";
else if ( fl == FUNC_FLAVOR_HOOK )
fl_name = "FUNC_FLAVOR_HOOK";
auto type_init = string("make_intrusive<FuncType>(cast_intrusive<RecordType>(") +
args_type_accessor + "), " + yield_type_accessor + ", " + fl_name + ")";
AddInit(t, tn, type_init);
}
string CPPCompile::GenTypeDecl(const TypeDecl* td)
{
auto type_accessor = GenTypeName(td->type);
auto td_name = string("util::copy_string(\"") + td->id + "\")";
if ( td->attrs )
return string("tl.append(new TypeDecl(") + td_name + ", " + type_accessor + ", " +
AttrsName(td->attrs) + "));";
return string("tl.append(new TypeDecl(") + td_name + ", " + type_accessor + "));";
}
string CPPCompile::GenTypeName(const Type* t) string CPPCompile::GenTypeName(const Type* t)
{ {
ASSERT(processed_types.count(TypeRep(t)) > 0);
return types.KeyName(TypeRep(t)); return types.KeyName(TypeRep(t));
} }
const char* CPPCompile::TypeTagName(TypeTag tag) const const char* CPPCompile::TypeTagName(TypeTag tag)
{ {
switch ( tag ) switch ( tag )
{ {
@ -280,6 +123,8 @@ const char* CPPCompile::TypeTagName(TypeTag tag) const
return "TYPE_INT"; return "TYPE_INT";
case TYPE_INTERVAL: case TYPE_INTERVAL:
return "TYPE_INTERVAL"; return "TYPE_INTERVAL";
case TYPE_LIST:
return "TYPE_LIST";
case TYPE_OPAQUE: case TYPE_OPAQUE:
return "TYPE_OPAQUE"; return "TYPE_OPAQUE";
case TYPE_PATTERN: case TYPE_PATTERN:
@ -431,16 +276,16 @@ const char* CPPCompile::TypeType(const TypePtr& t)
} }
} }
void CPPCompile::RegisterType(const TypePtr& tp) shared_ptr<CPP_InitInfo> CPPCompile::RegisterType(const TypePtr& tp)
{ {
auto t = TypeRep(tp); auto t = TypeRep(tp);
if ( processed_types.count(t) > 0 ) if ( processed_types.count(t) > 0 )
return; return processed_types[t];
// Add the type before going further, to avoid loops due to types processed_types[t] = nullptr;
// that reference each other.
processed_types.insert(t); shared_ptr<CPP_InitInfo> gi;
switch ( t->Tag() ) switch ( t->Tag() )
{ {
@ -449,7 +294,6 @@ void CPPCompile::RegisterType(const TypePtr& tp)
case TYPE_BOOL: case TYPE_BOOL:
case TYPE_COUNT: case TYPE_COUNT:
case TYPE_DOUBLE: case TYPE_DOUBLE:
case TYPE_ENUM:
case TYPE_ERROR: case TYPE_ERROR:
case TYPE_INT: case TYPE_INT:
case TYPE_INTERVAL: case TYPE_INTERVAL:
@ -459,119 +303,53 @@ void CPPCompile::RegisterType(const TypePtr& tp)
case TYPE_TIME: case TYPE_TIME:
case TYPE_TIMER: case TYPE_TIMER:
case TYPE_VOID: case TYPE_VOID:
case TYPE_OPAQUE:
case TYPE_SUBNET: case TYPE_SUBNET:
case TYPE_FILE: case TYPE_FILE:
// Nothing to do. gi = make_shared<BaseTypeInfo>(this, tp);
break;
case TYPE_ENUM:
gi = make_shared<EnumTypeInfo>(this, tp);
break;
case TYPE_OPAQUE:
gi = make_shared<OpaqueTypeInfo>(this, tp);
break; break;
case TYPE_TYPE: case TYPE_TYPE:
{ gi = make_shared<TypeTypeInfo>(this, tp);
const auto& tt = t->AsTypeType()->GetType();
NoteNonRecordInitDependency(t, tt);
RegisterType(tt);
}
break; break;
case TYPE_VECTOR: case TYPE_VECTOR:
{ gi = make_shared<VectorTypeInfo>(this, tp);
const auto& yield = t->AsVectorType()->Yield();
NoteNonRecordInitDependency(t, yield);
RegisterType(yield);
}
break; break;
case TYPE_LIST: case TYPE_LIST:
RegisterListType(tp); gi = make_shared<ListTypeInfo>(this, tp);
break; break;
case TYPE_TABLE: case TYPE_TABLE:
RegisterTableType(tp); gi = make_shared<TableTypeInfo>(this, tp);
break; break;
case TYPE_RECORD: case TYPE_RECORD:
RegisterRecordType(tp); gi = make_shared<RecordTypeInfo>(this, tp);
break; break;
case TYPE_FUNC: case TYPE_FUNC:
RegisterFuncType(tp); gi = make_shared<FuncTypeInfo>(this, tp);
break; break;
default: default:
reporter->InternalError("bad type in CPPCompile::RegisterType"); reporter->InternalError("bad type in CPPCompile::RegisterType");
} }
AddInit(t); type_info->AddInstance(gi);
processed_types[t] = gi;
if ( ! types.IsInherited(t) ) types.AddInitInfo(t, gi);
{
auto t_rep = types.GetRep(t);
if ( t_rep == t )
GenPreInit(t);
else
NoteInitDependency(t, t_rep);
}
}
void CPPCompile::RegisterListType(const TypePtr& t) return gi;
{
const auto& tl = t->AsTypeList()->GetTypes();
for ( auto& tl_i : tl )
{
NoteNonRecordInitDependency(t, tl_i);
RegisterType(tl_i);
}
}
void CPPCompile::RegisterTableType(const TypePtr& t)
{
auto tbl = t->AsTableType();
const auto& indices = tbl->GetIndices();
const auto& yield = tbl->Yield();
NoteNonRecordInitDependency(t, indices);
RegisterType(indices);
if ( yield )
{
NoteNonRecordInitDependency(t, yield);
RegisterType(yield);
}
}
void CPPCompile::RegisterRecordType(const TypePtr& t)
{
auto r = t->AsRecordType()->Types();
if ( ! r )
return;
for ( const auto& r_i : *r )
{
NoteNonRecordInitDependency(t, r_i->type);
RegisterType(r_i->type);
if ( r_i->attrs )
{
NoteInitDependency(t, r_i->attrs);
RegisterAttributes(r_i->attrs);
}
}
}
void CPPCompile::RegisterFuncType(const TypePtr& t)
{
auto f = t->AsFuncType();
NoteInitDependency(t, TypeRep(f->Params()));
RegisterType(f->Params());
if ( f->Yield() )
{
NoteNonRecordInitDependency(t, f->Yield());
RegisterType(f->Yield());
}
} }
const char* CPPCompile::NativeAccessor(const TypePtr& t) const char* CPPCompile::NativeAccessor(const TypePtr& t)

View file

@ -83,7 +83,7 @@ void CPPCompile::CreateGlobal(const ID* g)
if ( pfs.Globals().count(g) == 0 ) if ( pfs.Globals().count(g) == 0 )
{ {
// Only used in the context of calls. If it's compilable, // Only used in the context of calls. If it's compilable,
// the we'll call it directly. // then we'll call it directly.
if ( compilable_funcs.count(gn) > 0 ) if ( compilable_funcs.count(gn) > 0 )
{ {
AddGlobal(gn, "zf", true); AddGlobal(gn, "zf", true);
@ -102,18 +102,12 @@ void CPPCompile::CreateGlobal(const ID* g)
Emit("IDPtr %s;", globals[gn]); Emit("IDPtr %s;", globals[gn]);
if ( pfs.Events().count(gn) > 0 ) if ( pfs.Events().count(gn) > 0 )
// This is an event that's also used as // This is an event that's also used as a variable.
// a variable.
Emit("EventHandlerPtr %s_ev;", globals[gn]); Emit("EventHandlerPtr %s_ev;", globals[gn]);
const auto& t = g->GetType(); auto gi = make_shared<GlobalInitInfo>(this, g, globals[gn]);
NoteInitDependency(g, TypeRep(t)); global_id_info->AddInstance(gi);
global_gis[g] = gi;
auto exported = g->IsExport() ? "true" : "false";
AddInit(g, globals[gn],
string("lookup_global__CPP(\"") + gn + "\", " + GenTypeName(t) + ", " + exported +
")");
} }
if ( is_bif ) if ( is_bif )
@ -124,40 +118,22 @@ void CPPCompile::CreateGlobal(const ID* g)
global_vars.emplace(g); global_vars.emplace(g);
} }
void CPPCompile::UpdateGlobalHashes() std::shared_ptr<CPP_InitInfo> CPPCompile::RegisterGlobal(const ID* g)
{ {
for ( auto& g : pfs.AllGlobals() ) if ( global_gis.count(g) == 0 )
{ {
auto gn = g->Name(); auto gn = string(g->Name());
if ( hm.HasGlobal(gn) ) if ( globals.count(gn) == 0 )
// Not new to this compilation run. // Create a name for it.
continue; (void)IDNameStr(g);
auto ht = pfs.HashType(g->GetType()); auto gi = make_shared<GlobalInitInfo>(this, g, globals[gn]);
global_id_info->AddInstance(gi);
p_hash_type hv = 0; global_gis[g] = gi;
if ( g->GetVal() )
hv = p_hash(g->GetVal());
fprintf(hm.HashFile(), "global\n%s\n", gn);
fprintf(hm.HashFile(), "%llu %llu\n", ht, hv);
// Record location information in the hash file for
// diagnostic purposes.
auto loc = g->GetLocationInfo();
fprintf(hm.HashFile(), "%s %d\n", loc->filename, loc->first_line);
// Flag any named record/enum types.
if ( g->IsType() )
{
const auto& t = g->GetType();
if ( t->Tag() == TYPE_RECORD )
fprintf(hm.HashFile(), "record\n%s\n", gn);
else if ( t->Tag() == TYPE_ENUM )
fprintf(hm.HashFile(), "enum\n%s\n", gn);
}
} }
return global_gis[g];
} }
void CPPCompile::AddBiF(const ID* b, bool is_var) void CPPCompile::AddBiF(const ID* b, bool is_var)
@ -170,12 +146,8 @@ void CPPCompile::AddBiF(const ID* b, bool is_var)
if ( AddGlobal(n, "bif", true) ) if ( AddGlobal(n, "bif", true) )
Emit("Func* %s;", globals[n]); Emit("Func* %s;", globals[n]);
auto lookup = string("lookup_bif__CPP(\"") + bn + "\")"; ASSERT(BiFs.count(globals[n]) == 0);
BiFs[globals[n]] = bn;
if ( standalone )
AddActivation(globals[n] + " = " + lookup + ";");
else
AddInit(b, globals[n], lookup);
} }
bool CPPCompile::AddGlobal(const string& g, const char* suffix, bool track) bool CPPCompile::AddGlobal(const string& g, const char* suffix, bool track)
@ -189,13 +161,8 @@ bool CPPCompile::AddGlobal(const string& g, const char* suffix, bool track)
if ( hm.HasGlobalVar(gn) ) if ( hm.HasGlobalVar(gn) )
gn = scope_prefix(hm.GlobalVarScope(gn)) + gn; gn = scope_prefix(hm.GlobalVarScope(gn)) + gn;
else else
{
new_var = true; new_var = true;
if ( track && update )
fprintf(hm.HashFile(), "global-var\n%s\n%d\n", gn.c_str(), addl_tag);
}
globals.emplace(g, gn); globals.emplace(g, gn);
} }
@ -207,18 +174,19 @@ void CPPCompile::RegisterEvent(string ev_name)
body_events[body_name].emplace_back(move(ev_name)); body_events[body_name].emplace_back(move(ev_name));
} }
const string& CPPCompile::IDNameStr(const ID* id) const const string& CPPCompile::IDNameStr(const ID* id)
{ {
if ( id->IsGlobal() ) if ( id->IsGlobal() )
{ {
auto g = string(id->Name()); auto g = string(id->Name());
ASSERT(globals.count(g) > 0); if ( globals.count(g) == 0 )
return ((CPPCompile*)(this))->globals[g]; CreateGlobal(id);
return globals[g];
} }
ASSERT(locals.count(id) > 0); ASSERT(locals.count(id) > 0);
return ((CPPCompile*)(this))->locals[id]; return locals[id];
} }
string CPPCompile::LocalName(const ID* l) const string CPPCompile::LocalName(const ID* l) const

View file

@ -399,7 +399,7 @@ static void generate_CPP(std::unique_ptr<ProfileFuncs>& pfs)
{ {
const auto hash_name = hash_dir + "CPP-hashes"; const auto hash_name = hash_dir + "CPP-hashes";
auto hm = std::make_unique<CPPHashManager>(hash_name.c_str(), analysis_options.add_CPP); auto hm = std::make_unique<CPPHashManager>(hash_name.c_str());
if ( analysis_options.gen_CPP ) if ( analysis_options.gen_CPP )
{ {
@ -413,26 +413,12 @@ static void generate_CPP(std::unique_ptr<ProfileFuncs>& pfs)
} }
} }
} }
else
{ // doing add-C++ instead, so look for previous compilations
for ( auto& func : funcs )
{
auto hash = func.Profile()->HashVal();
if ( compiled_scripts.count(hash) > 0 || hm->HasHash(hash) )
func.SetSkip(true);
}
// Now that we've presumably marked a lot of functions
// as skippable, recompute the global profile.
pfs = std::make_unique<ProfileFuncs>(funcs, is_CPP_compilable, false);
}
const auto gen_name = hash_dir + "CPP-gen.cc"; const auto gen_name = hash_dir + "CPP-gen.cc";
const auto addl_name = hash_dir + "CPP-gen-addl.h"; const auto addl_name = hash_dir + "CPP-gen-addl.h";
CPPCompile cpp(funcs, *pfs, gen_name, addl_name, *hm, CPPCompile cpp(funcs, *pfs, gen_name, addl_name, *hm, analysis_options.gen_standalone_CPP,
analysis_options.gen_CPP || analysis_options.update_CPP, analysis_options.report_uncompilable);
analysis_options.gen_standalone_CPP, analysis_options.report_uncompilable);
} }
static void find_when_funcs(std::unique_ptr<ProfileFuncs>& pfs, static void find_when_funcs(std::unique_ptr<ProfileFuncs>& pfs,