Merge remote-tracking branch 'origin/topic/vern/CPP-speedup'

* origin/topic/vern/CPP-speedup:
  use iterator-based idiom for check-if-present-then-access
  const-ify member function
  switch looping over vectors to use iterators
  remove unused local variables
  efficiency fix - return a reference rather than a copy of a map
  fix for case-sensitive file systems
  geez sometimes it's signed sometimes it's not
  documentation updates
  remove -O add-C++ and -O update-C++ options
  extensive rewrite of generation & execution of run-time initialization
  factoring out CPPEscape to be a standalone function
  removing unused SubNetType class
This commit is contained in:
Tim Wojtulewicz 2021-11-23 19:43:48 -07:00
commit e0b4659488
35 changed files with 3574 additions and 1714 deletions

View file

@ -1,3 +1,9 @@
4.2.0-dev.385 | 2021-11-23 19:43:48 -0700
* Changes to speed up compilation of Compiled-to-C++ Zeek Scripts (Vern Paxson, Corelight)
* removing unused SubNetType class (Vern Paxson, Corelight)
4.2.0-dev.371 | 2021-11-23 19:41:10 -0700 4.2.0-dev.371 | 2021-11-23 19:41:10 -0700
* Add new tunnel packet analyzers, remove old ones (Tim Wojtulewicz, Corelight) * Add new tunnel packet analyzers, remove old ones (Tim Wojtulewicz, Corelight)

View file

@ -1 +1 @@
4.2.0-dev.371 4.2.0-dev.385

View file

@ -392,7 +392,9 @@ set(MAIN_SRCS
script_opt/CPP/GenFunc.cc script_opt/CPP/GenFunc.cc
script_opt/CPP/HashMgr.cc script_opt/CPP/HashMgr.cc
script_opt/CPP/Inits.cc script_opt/CPP/Inits.cc
script_opt/CPP/RuntimeInit.cc script_opt/CPP/InitsInfo.cc
script_opt/CPP/RuntimeInits.cc
script_opt/CPP/RuntimeInitSupport.cc
script_opt/CPP/RuntimeOps.cc script_opt/CPP/RuntimeOps.cc
script_opt/CPP/RuntimeVec.cc script_opt/CPP/RuntimeVec.cc
script_opt/CPP/Stmts.cc script_opt/CPP/Stmts.cc

View file

@ -204,8 +204,6 @@ static void print_analysis_help()
fprintf(stderr, " report-uncompilable print names of functions that can't be compiled\n"); fprintf(stderr, " report-uncompilable print names of functions that can't be compiled\n");
fprintf(stderr, " use-C++ use available C++ script bodies\n"); fprintf(stderr, " use-C++ use available C++ script bodies\n");
fprintf(stderr, "\n experimental options for incremental compilation:\n"); fprintf(stderr, "\n experimental options for incremental compilation:\n");
fprintf(stderr, " add-C++ generate private C++ for any missing script bodies\n");
fprintf(stderr, " update-C++ generate reusable C++ for any missing script bodies\n");
} }
static void set_analysis_option(const char* opt, Options& opts) static void set_analysis_option(const char* opt, Options& opts)
@ -225,9 +223,7 @@ static void set_analysis_option(const char* opt, Options& opts)
exit(0); exit(0);
} }
if ( util::streq(opt, "add-C++") ) if ( util::streq(opt, "dump-uds") )
a_o.add_CPP = true;
else if ( util::streq(opt, "dump-uds") )
a_o.activate = a_o.dump_uds = true; a_o.activate = a_o.dump_uds = true;
else if ( util::streq(opt, "dump-xform") ) else if ( util::streq(opt, "dump-xform") )
a_o.activate = a_o.dump_xform = true; a_o.activate = a_o.dump_xform = true;
@ -255,8 +251,6 @@ static void set_analysis_option(const char* opt, Options& opts)
a_o.inliner = a_o.report_recursive = true; a_o.inliner = a_o.report_recursive = true;
else if ( util::streq(opt, "report-uncompilable") ) else if ( util::streq(opt, "report-uncompilable") )
a_o.report_uncompilable = true; a_o.report_uncompilable = true;
else if ( util::streq(opt, "update-C++") )
a_o.update_CPP = true;
else if ( util::streq(opt, "use-C++") ) else if ( util::streq(opt, "use-C++") )
a_o.use_CPP = true; a_o.use_CPP = true;
else if ( util::streq(opt, "xform") ) else if ( util::streq(opt, "xform") )

View file

@ -123,18 +123,6 @@ RecordType* Type::AsRecordType()
return (RecordType*)this; return (RecordType*)this;
} }
const SubNetType* Type::AsSubNetType() const
{
CHECK_TYPE_TAG(TYPE_SUBNET, "Type::AsSubNetType");
return (const SubNetType*)this;
}
SubNetType* Type::AsSubNetType()
{
CHECK_TYPE_TAG(TYPE_SUBNET, "Type::AsSubNetType");
return (SubNetType*)this;
}
const FuncType* Type::AsFuncType() const const FuncType* Type::AsFuncType() const
{ {
CHECK_TYPE_TAG(TYPE_FUNC, "Type::AsFuncType"); CHECK_TYPE_TAG(TYPE_FUNC, "Type::AsFuncType");
@ -1447,16 +1435,6 @@ string RecordType::GetFieldDeprecationWarning(int field, bool has_check) const
return ""; return "";
} }
SubNetType::SubNetType() : Type(TYPE_SUBNET) { }
void SubNetType::Describe(ODesc* d) const
{
if ( d->IsReadable() )
d->Add("subnet");
else
d->Add(int(Tag()));
}
FileType::FileType(TypePtr yield_type) : Type(TYPE_FILE), yield(std::move(yield_type)) { } FileType::FileType(TypePtr yield_type) : Type(TYPE_FILE), yield(std::move(yield_type)) { }
FileType::~FileType() = default; FileType::~FileType() = default;

View file

@ -152,7 +152,6 @@ class TypeList;
class TableType; class TableType;
class SetType; class SetType;
class RecordType; class RecordType;
class SubNetType;
class FuncType; class FuncType;
class EnumType; class EnumType;
class VectorType; class VectorType;
@ -165,7 +164,6 @@ using TypeListPtr = IntrusivePtr<TypeList>;
using TableTypePtr = IntrusivePtr<TableType>; using TableTypePtr = IntrusivePtr<TableType>;
using SetTypePtr = IntrusivePtr<SetType>; using SetTypePtr = IntrusivePtr<SetType>;
using RecordTypePtr = IntrusivePtr<RecordType>; using RecordTypePtr = IntrusivePtr<RecordType>;
using SubNetTypePtr = IntrusivePtr<SubNetType>;
using FuncTypePtr = IntrusivePtr<FuncType>; using FuncTypePtr = IntrusivePtr<FuncType>;
using EnumTypePtr = IntrusivePtr<EnumType>; using EnumTypePtr = IntrusivePtr<EnumType>;
using VectorTypePtr = IntrusivePtr<VectorType>; using VectorTypePtr = IntrusivePtr<VectorType>;
@ -226,9 +224,6 @@ public:
const RecordType* AsRecordType() const; const RecordType* AsRecordType() const;
RecordType* AsRecordType(); RecordType* AsRecordType();
const SubNetType* AsSubNetType() const;
SubNetType* AsSubNetType();
const FuncType* AsFuncType() const; const FuncType* AsFuncType() const;
FuncType* AsFuncType(); FuncType* AsFuncType();
@ -700,13 +695,6 @@ protected:
type_decl_list* types; type_decl_list* types;
}; };
class SubNetType final : public Type
{
public:
SubNetType();
void Describe(ODesc* d) const override;
};
class FileType final : public Type class FileType final : public Type
{ {
public: public:

View file

@ -7,42 +7,55 @@ namespace zeek::detail
using namespace std; using namespace std;
void CPPCompile::RegisterAttributes(const AttributesPtr& attrs) shared_ptr<CPP_InitInfo> CPPCompile::RegisterAttributes(const AttributesPtr& attrs)
{ {
if ( ! attrs || attributes.HasKey(attrs) ) if ( ! attrs )
return; return nullptr;
auto a = attrs.get();
auto pa = processed_attrs.find(a);
if ( pa != processed_attrs.end() )
return pa->second;
attributes.AddKey(attrs); attributes.AddKey(attrs);
AddInit(attrs);
auto a_rep = attributes.GetRep(attrs); // The cast is just so we can make an IntrusivePtr.
if ( a_rep != attrs.get() ) auto a_rep = const_cast<Attributes*>(attributes.GetRep(attrs));
if ( a_rep != a )
{ {
NoteInitDependency(attrs.get(), a_rep); AttributesPtr a_rep_ptr = {NewRef{}, a_rep};
return; processed_attrs[a] = RegisterAttributes(a_rep_ptr);
return processed_attrs[a];
} }
for ( const auto& a : attrs->GetAttrs() ) for ( const auto& a : attrs->GetAttrs() )
{ (void)RegisterAttr(a);
const auto& e = a->GetExpr();
if ( e )
{
if ( IsSimpleInitExpr(e) )
{
// Make sure any dependencies it has get noted.
(void)GenExpr(e, GEN_VAL_PTR);
continue;
}
init_exprs.AddKey(e); shared_ptr<CPP_InitInfo> gi = make_shared<AttrsInfo>(this, attrs);
AddInit(e); attrs_info->AddInstance(gi);
NoteInitDependency(attrs, e); processed_attrs[a] = gi;
auto e_rep = init_exprs.GetRep(e); return gi;
if ( e_rep != e.get() ) }
NoteInitDependency(e.get(), e_rep);
} shared_ptr<CPP_InitInfo> CPPCompile::RegisterAttr(const AttrPtr& attr)
} {
auto a = attr.get();
auto pa = processed_attr.find(a);
if ( pa != processed_attr.end() )
return pa->second;
const auto& e = a->GetExpr();
if ( e && ! IsSimpleInitExpr(e) )
init_exprs.AddKey(e);
auto gi = make_shared<AttrInfo>(this, attr);
attr_info->AddInstance(gi);
processed_attr[a] = gi;
return gi;
} }
void CPPCompile::BuildAttrs(const AttributesPtr& attrs, string& attr_tags, string& attr_vals) void CPPCompile::BuildAttrs(const AttributesPtr& attrs, string& attr_tags, string& attr_vals)
@ -72,78 +85,9 @@ void CPPCompile::BuildAttrs(const AttributesPtr& attrs, string& attr_tags, strin
attr_vals = string("{") + attr_vals + "}"; attr_vals = string("{") + attr_vals + "}";
} }
void CPPCompile::GenAttrs(const AttributesPtr& attrs) const char* CPPCompile::AttrName(AttrTag t)
{ {
NL(); switch ( t )
Emit("AttributesPtr %s", AttrsName(attrs));
StartBlock();
const auto& avec = attrs->GetAttrs();
Emit("auto attrs = std::vector<AttrPtr>();");
AddInit(attrs);
for ( const auto& attr : avec )
{
const auto& e = attr->GetExpr();
if ( ! e )
{
Emit("attrs.emplace_back(make_intrusive<Attr>(%s));", AttrName(attr));
continue;
}
NoteInitDependency(attrs, e);
AddInit(e);
string e_arg;
if ( IsSimpleInitExpr(e) )
e_arg = GenAttrExpr(e);
else
e_arg = InitExprName(e);
Emit("attrs.emplace_back(make_intrusive<Attr>(%s, %s));", AttrName(attr), e_arg);
}
Emit("return make_intrusive<Attributes>(attrs, nullptr, true, false);");
EndBlock();
}
string CPPCompile::GenAttrExpr(const ExprPtr& e)
{
switch ( e->Tag() )
{
case EXPR_CONST:
return string("make_intrusive<ConstExpr>(") + GenExpr(e, GEN_VAL_PTR) + ")";
case EXPR_NAME:
NoteInitDependency(e, e->AsNameExpr()->IdPtr());
return string("make_intrusive<NameExpr>(") + globals[e->AsNameExpr()->Id()->Name()] +
")";
case EXPR_RECORD_COERCE:
NoteInitDependency(e, TypeRep(e->GetType()));
return string("make_intrusive<RecordCoerceExpr>(make_intrusive<RecordConstructorExpr>("
"make_intrusive<ListExpr>()), cast_intrusive<RecordType>(") +
GenTypeName(e->GetType()) + "))";
default:
reporter->InternalError("bad expr tag in CPPCompile::GenAttrs");
return "###";
}
}
string CPPCompile::AttrsName(const AttributesPtr& a)
{
return attributes.KeyName(a) + "()";
}
const char* CPPCompile::AttrName(const AttrPtr& attr)
{
switch ( attr->Tag() )
{ {
case ATTR_OPTIONAL: case ATTR_OPTIONAL:
return "ATTR_OPTIONAL"; return "ATTR_OPTIONAL";

View file

@ -0,0 +1,19 @@
// See the file "COPYING" in the main distribution directory for copyright.
// Definitions associated with type attributes.
#pragma once
namespace zeek::detail
{
enum AttrExprType
{
AE_NONE, // attribute doesn't have an expression
AE_CONST, // easy expression - a constant (ConstExpr)
AE_NAME, // easy - a global (NameExpr)
AE_RECORD, // an empty record cast to a given type
AE_CALL, // everything else - requires a lambda, essentially
};
} // zeek::detail

View file

@ -5,18 +5,20 @@
#include "zeek/Desc.h" #include "zeek/Desc.h"
#include "zeek/script_opt/CPP/Func.h" #include "zeek/script_opt/CPP/Func.h"
#include "zeek/script_opt/CPP/HashMgr.h" #include "zeek/script_opt/CPP/HashMgr.h"
#include "zeek/script_opt/CPP/InitsInfo.h"
#include "zeek/script_opt/CPP/Tracker.h" #include "zeek/script_opt/CPP/Tracker.h"
#include "zeek/script_opt/CPP/Util.h" #include "zeek/script_opt/CPP/Util.h"
#include "zeek/script_opt/ScriptOpt.h" #include "zeek/script_opt/ScriptOpt.h"
// We structure the compiler for generating C++ versions of Zeek script // We structure the compiler for generating C++ versions of Zeek script
// bodies as a single large class. While we divide the compiler's // bodies maily as a single large class. While we divide the compiler's
// functionality into a number of groups (see below), these interact with // functionality into a number of groups (see below), these interact with
// one another, and in particular with various member variables, enough // one another, and in particular with various member variables, enough
// so that it's not clear there's benefit to further splitting the // so that it's not clear there's benefit to further splitting the
// functionality into multiple classes. (Some splitting has already been // functionality into multiple classes. (Some splitting has already been
// done for more self-contained functionality, resulting in the CPPTracker // done for more self-contained functionality, resulting in the CPPTracker
// and CPPHashManager classes.) // and CPPHashManager classes, and initialization information in
// InitsInfo.{h,cc} and RuntimeInits.{h,cc}.)
// //
// Most aspects of translating to C++ have a straightforward nature. // Most aspects of translating to C++ have a straightforward nature.
// We can turn many Zeek script statements directly into the C++ that's // We can turn many Zeek script statements directly into the C++ that's
@ -45,26 +47,6 @@
// all of the scripts loaded in "bare" mode, plus those for foo.zeek; and // all of the scripts loaded in "bare" mode, plus those for foo.zeek; and
// without the "-b" for all of the default scripts plus those in foo.zeek. // without the "-b" for all of the default scripts plus those in foo.zeek.
// //
// One of the design goals employed is to support "incremental" compilation,
// i.e., compiling *additional* Zeek scripts at a later point after an
// initial compilation. This comes in two forms.
//
// "-O update-C++" produces C++ code that extends that already compiled,
// in a manner where subsequent compilations can leverage both the original
// and the newly added. Such compilations *must* be done in a consistent
// context (for example, any types extended in the original are extended in
// the same manner - plus then perhaps further extensions - in the updated
// code).
//
// "-O add-C++" instead produces C++ code that (1) will not be leveraged in
// any subsequent compilations, and (2) can be inconsistent with other
// "-O add-C++" code added in the future. The main use of this feature is
// to support compiling polyglot versions of Zeek scripts used to run
// the test suite.
//
// Zeek invocations specifying "-O use-C++" will activate any code compiled
// into the zeek binary; otherwise, the code lies dormant.
//
// "-O report-C++" reports on which compiled functions will/won't be used // "-O report-C++" reports on which compiled functions will/won't be used
// (including ones that are available but not relevant to the scripts loaded // (including ones that are available but not relevant to the scripts loaded
// on the command line). This can be useful when debugging to make sure // on the command line). This can be useful when debugging to make sure
@ -104,29 +86,41 @@
// //
// Emit Low-level code generation. // Emit Low-level code generation.
// //
// Of these, Inits is probably the most subtle. It turns out to be // Of these, Inits is the most subtle and complex. There are two major
// very tricky ensuring that we create run-time variables in the // challenges in creating run-time values (such as Zeek types and constants).
// proper order. For example, a global might need a record type to be
// defined; one of the record's fields is a table; that table contains
// another record; one of that other record's fields is the original
// record (recursion); another field has an &default expression that
// requires the compiler to generate a helper function to construct
// the expression dynamically; and that helper function might in turn
// refer to other types that require initialization.
// //
// To deal with these dependencies, for every run-time object the compiler // First, generating individual code for creating each of these winds up
// maintains (1) all of the other run-time objects on which its initialization // incurring unacceptable compile times (for example, clang compiling all
// depends, and (2) the C++ statements needed to initialize it, once those // of the base scripts with optimization takes many hours on a high-end
// other objects have been initialized. It then beings initialization with // laptop). As a result, we employ a table-driven approach that compiles
// objects that have no dependencies, marks those as done (essentially), finds // much faster (though still taking many minutes on the same high-end laptop,
// objects that now can be initialized and emits their initializations, // running about 40x faster however).
// marks those as done, etc.
// //
// Below in declaring the CPPCompiler class, we group methods in accordance // Second, initializations frequently rely upon *other* initializations
// with those listed above. We also locate member variables with the group // having occurred first. For example, a global might need a record type
// most relevant for their usage. However, keep in mind that many member // to be defined; one of the record's fields is a table; that table contains
// variables are used by multiple groups, which is why we haven't created // another record; one of that other record's fields is the original record
// distinct per-group classes. // (recursion); another field has an &default expression that requires the
// compiler to generate a helper function to construct the expression
// dynamically; and that helper function might in turn refer to other types
// that require initialization. What's required is a framework for ensuring
// that everything occurs in the proper order.
//
// The logic for dealing with these complexities is isolated into several
// sets of classes. InitsInfo.{h,cc} provides the classes related to tracking
// how to generate initializations in the proper order. RuntimeInits.{h,cc}
// provides the classes used when initialization generated code in order
// to instantiate all of the necessary values. See those files for discussions
// on how they address the points framed above.
//
// In declaring the CPPCompiler class, we group methods in accordance with
// those listed above, locating member variables with the group most relevant
// for their usage. However, keep in mind that many member variables are
// used by multiple groups, which is why we haven't created distinct
// per-group classes. In addition, we make a number of methods public
// in order to avoid the need for numerous "friend" declarations to allow
// associated classes (like those for initialization) access to a the
// necessary compiler methods.
namespace zeek::detail namespace zeek::detail
{ {
@ -135,10 +129,128 @@ class CPPCompile
{ {
public: public:
CPPCompile(std::vector<FuncInfo>& _funcs, ProfileFuncs& pfs, const std::string& gen_name, CPPCompile(std::vector<FuncInfo>& _funcs, ProfileFuncs& pfs, const std::string& gen_name,
const std::string& addl_name, CPPHashManager& _hm, bool _update, bool _standalone, const std::string& addl_name, CPPHashManager& _hm, bool _standalone,
bool report_uncompilable); bool report_uncompilable);
~CPPCompile(); ~CPPCompile();
// Constructing a CPPCompile object does all of the compilation.
// The public methods here are for use by helper classes.
// Tracks the given type (with support methods for ones that
// are complicated), recursively including its sub-types, and
// creating initializations for constructing C++ variables
// representing the types.
//
// Returns the initialization info associated with the type.
std::shared_ptr<CPP_InitInfo> RegisterType(const TypePtr& t);
// Easy access to the global offset and the initialization
// cohort associated with a given type.
int TypeOffset(const TypePtr& t) { return GI_Offset(RegisterType(t)); }
int TypeCohort(const TypePtr& t) { return GI_Cohort(RegisterType(t)); }
// Tracks a Zeek ValPtr used as a constant value. These occur
// in two contexts: directly as constant expressions, and indirectly
// as elements within aggregate constants (such as in vector
// initializers).
//
// Returns the associated initialization info. In addition,
// consts_offset returns an offset into an initialization-time
// global that tracks all constructed globals, providing
// general access to them for aggregate constants.
std::shared_ptr<CPP_InitInfo> RegisterConstant(const ValPtr& vp, int& consts_offset);
// Tracks a global to generate the necessary initialization.
// Returns the associated initialization info.
std::shared_ptr<CPP_InitInfo> RegisterGlobal(const ID* g);
// Tracks a use of the given set of attributes, including
// initialization dependencies and the generation of any
// associated expressions.
//
// Returns the initialization info associated with the set of
// attributes.
std::shared_ptr<CPP_InitInfo> RegisterAttributes(const AttributesPtr& attrs);
// Convenient access to the global offset associated with
// a set of Attributes.
int AttributesOffset(const AttributesPtr& attrs)
{
return GI_Offset(RegisterAttributes(attrs));
}
// The same, for a single attribute.
std::shared_ptr<CPP_InitInfo> RegisterAttr(const AttrPtr& attr);
int AttrOffset(const AttrPtr& attr) { return GI_Offset(RegisterAttr(attr)); }
// Returns a mapping of from Attr objects to their associated
// initialization information. The Attr must have previously
// been registered.
auto& ProcessedAttr() const { return processed_attr; }
// True if the given expression is simple enough that we can
// generate code to evaluate it directly, and don't need to
// create a separate function per RegisterInitExpr() to track it.
static bool IsSimpleInitExpr(const ExprPtr& e);
// Tracks expressions used in attributes (such as &default=<expr>).
//
// We need to generate code to evaluate these, via CallExpr's
// that invoke functions that return the value of the expression.
// However, we can't generate that code when first encountering
// the attribute, because doing so will need to refer to the names
// of types, and initially those are unavailable (because the type's
// representatives, per pfs.RepTypes(), might not have yet been
// tracked). So instead we track the associated CallExprInitInfo
// objects, and after all types have been tracked, then spin
// through them to generate the code.
//
// Returns the associated initialization information.
std::shared_ptr<CPP_InitInfo> RegisterInitExpr(const ExprPtr& e);
// Tracks a C++ string value needed for initialization. Returns
// an offset into the global vector that will hold these.
int TrackString(std::string s)
{
auto ts = tracked_strings.find(s);
if ( ts != tracked_strings.end() )
return ts->second;
int offset = ordered_tracked_strings.size();
tracked_strings[s] = offset;
ordered_tracked_strings.emplace_back(s);
return offset;
}
// Tracks a profile hash value needed for initialization. Returns
// an offset into the global vector that will hold these.
int TrackHash(p_hash_type h)
{
auto th = tracked_hashes.find(h);
if ( th != tracked_hashes.end() )
return th->second;
int offset = ordered_tracked_hashes.size();
tracked_hashes[h] = offset;
ordered_tracked_hashes.emplace_back(h);
return offset;
}
// Returns the hash associated with a given function body.
// It's a fatal error to call this for a body that hasn't
// been compiled.
p_hash_type BodyHash(const Stmt* body);
// Returns true if at least one of the function bodies associated
// with the function/hook/event handler of the given fname is
// not compilable.
bool NotFullyCompilable(const std::string& fname) const
{
return not_fully_compilable.count(fname) > 0;
}
private: private:
// Start of methods related to driving the overall compilation // Start of methods related to driving the overall compilation
// process. // process.
@ -148,6 +260,37 @@ private:
// Main driver, invoked by constructor. // Main driver, invoked by constructor.
void Compile(bool report_uncompilable); void Compile(bool report_uncompilable);
// The following methods all create objects that track the
// initializations of a given type of value. In each, "tag"
// is the name used to identify the initializer global
// associated with the given type of value, and "type" is
// its C++ representation. Often "tag" is concatenated with
// "type" to designate a specific C++ type. For example,
// "tag" might be "Double" and "type" might be "ValPtr";
// the resulting global's type is "DoubleValPtr".
// Creates an object for tracking values associated with Zeek
// constants. "c_type" is the C++ type used in the initializer
// for each object; or, if empty, it specifies that we represent
// the value using an index into a separate vector that holds
// the constant.
std::shared_ptr<CPP_InitsInfo> CreateConstInitInfo(const char* tag, const char* type,
const char* c_type);
// Creates an object for tracking compound initializers, which
// are whose initialization uses indexes into other vectors.
std::shared_ptr<CPP_InitsInfo> CreateCompoundInitInfo(const char* tag, const char* type);
// Creates an object for tracking initializers that have custom
// C++ objects to hold their initialization information.
std::shared_ptr<CPP_InitsInfo> CreateCustomInitInfo(const char* tag, const char* type);
// Generates the declaration associated with a set of initializations
// and tracks the object to facilitate looping over all so
// initializations. As a convenience, returns the object.
std::shared_ptr<CPP_InitsInfo> RegisterInitInfo(const char* tag, const char* type,
std::shared_ptr<CPP_InitsInfo> gi);
// Generate the beginning of the compiled code: run-time functions, // Generate the beginning of the compiled code: run-time functions,
// namespace, auxiliary globals. // namespace, auxiliary globals.
void GenProlog(); void GenProlog();
@ -158,7 +301,7 @@ private:
void RegisterCompiledBody(const std::string& f); void RegisterCompiledBody(const std::string& f);
// After compilation, generate the final code. Most of this is // After compilation, generate the final code. Most of this is
// run-time initialization of various dynamic values. // in support of run-time initialization of various dynamic values.
void GenEpilog(); void GenEpilog();
// True if the given function (plus body and profile) is one // True if the given function (plus body and profile) is one
@ -185,9 +328,13 @@ private:
// it including some functionality we don't currently support // it including some functionality we don't currently support
// for compilation. // for compilation.
// //
// Indexed by the name of the function. // Indexed by the C++ name of the function.
std::unordered_set<std::string> compilable_funcs; std::unordered_set<std::string> compilable_funcs;
// Tracks which functions/hooks/events have at least one non-compilable
// body. Indexed by the Zeek name of function.
std::unordered_set<std::string> not_fully_compilable;
// Maps functions (not hooks or events) to upstream compiled names. // Maps functions (not hooks or events) to upstream compiled names.
std::unordered_map<std::string, std::string> hashed_funcs; std::unordered_map<std::string, std::string> hashed_funcs;
@ -200,10 +347,6 @@ private:
// compilation units. // compilation units.
int addl_tag = 0; int addl_tag = 0;
// If true, then we're updating the C++ base (i.e., generating
// code meant for use by subsequently generated code).
bool update = false;
// If true, the generated code should run "standalone". // If true, the generated code should run "standalone".
bool standalone = false; bool standalone = false;
@ -211,7 +354,7 @@ private:
// needed for "seatbelts", to ensure that we can produce a // needed for "seatbelts", to ensure that we can produce a
// unique hash relating to this compilation (*and* its // unique hash relating to this compilation (*and* its
// compilation time, which is why these are "seatbelts" and // compilation time, which is why these are "seatbelts" and
// likely not important to make distinct. // likely not important to make distinct).
p_hash_type total_hash = 0; p_hash_type total_hash = 0;
// Working directory in which we're compiling. Used to quasi-locate // Working directory in which we're compiling. Used to quasi-locate
@ -236,11 +379,6 @@ private:
// track it as such. // track it as such.
void CreateGlobal(const ID* g); void CreateGlobal(const ID* g);
// For the globals used in the compilation, if new then append
// them to the hash file to make the information available
// to subsequent compilation runs.
void UpdateGlobalHashes();
// Register the given identifier as a BiF. If is_var is true // Register the given identifier as a BiF. If is_var is true
// then the BiF is also used in a non-call context. // then the BiF is also used in a non-call context.
void AddBiF(const ID* b, bool is_var); void AddBiF(const ID* b, bool is_var);
@ -258,10 +396,9 @@ private:
// The following match various forms of identifiers to the // The following match various forms of identifiers to the
// name used for their C++ equivalent. // name used for their C++ equivalent.
const char* IDName(const ID& id) { return IDName(&id); }
const char* IDName(const IDPtr& id) { return IDName(id.get()); } const char* IDName(const IDPtr& id) { return IDName(id.get()); }
const char* IDName(const ID* id) { return IDNameStr(id).c_str(); } const char* IDName(const ID* id) { return IDNameStr(id).c_str(); }
const std::string& IDNameStr(const ID* id) const; const std::string& IDNameStr(const ID* id);
// Returns a canonicalized version of a variant of a global made // Returns a canonicalized version of a variant of a global made
// distinct by the given suffix. // distinct by the given suffix.
@ -280,12 +417,20 @@ private:
// conflict with C++ keywords. // conflict with C++ keywords.
std::string Canonicalize(const char* name) const; std::string Canonicalize(const char* name) const;
// Returns the name of the global corresponding to an expression
// (which must be a EXPR_NAME).
std::string GlobalName(const ExprPtr& e) { return globals[e->AsNameExpr()->Id()->Name()]; }
// Maps global names (not identifiers) to the names we use for them. // Maps global names (not identifiers) to the names we use for them.
std::unordered_map<std::string, std::string> globals; std::unordered_map<std::string, std::string> globals;
// Similar for locals, for the function currently being compiled. // Similar for locals, for the function currently being compiled.
std::unordered_map<const ID*, std::string> locals; std::unordered_map<const ID*, std::string> locals;
// Retrieves the initialization information associated with the
// given global.
std::unordered_map<const ID*, std::shared_ptr<CPP_InitInfo>> global_gis;
// Maps event names to the names we use for them. // Maps event names to the names we use for them.
std::unordered_map<std::string, std::string> events; std::unordered_map<std::string, std::string> events;
@ -307,14 +452,37 @@ private:
// Similar, but for lambdas. // Similar, but for lambdas.
void DeclareLambda(const LambdaExpr* l, const ProfileFunc* pf); void DeclareLambda(const LambdaExpr* l, const ProfileFunc* pf);
// Declares the CPPStmt subclass used for compiling the given // Generates code to declare the compiled version of a script
// function. "ft" gives the functions type, "pf" its profile, // function. "ft" gives the functions type, "pf" its profile,
// "fname" its C++ name, "body" its AST, "l" if non-nil its // "fname" its C++ name, "body" its AST, "l" if non-nil its
// corresponding lambda expression, and "flavor" whether it's // corresponding lambda expression, and "flavor" whether it's
// a hook/event/function. // a hook/event/function.
//
// We use two basic approaches. Most functions are represented
// by a "CPPDynStmt" object that's parameterized by a void* pointer
// to the underlying C++ function and an index used to dynamically
// cast the pointer to having the correct type for then calling it.
// Lambdas, however (including "implicit" lambdas used to associate
// complex expressions with &attributes), each have a unique
// subclass derived from CPPStmt that calls the underlying C++
// function without requiring a cast, and that holds the values
// of the lambda's captures.
//
// It would be cleanest to use the latter approach for all functions,
// but the hundreds/thousands of additional classes required for
// doing so significantly slows down C++ compilation, so we instead
// opt for the uglier dynamic casting approach, which only requires
// one additional class.
void CreateFunction(const FuncTypePtr& ft, const ProfileFunc* pf, const std::string& fname,
const StmtPtr& body, int priority, const LambdaExpr* l,
FunctionFlavor flavor);
// Used for the case of creating a custom subclass of CPPStmt.
void DeclareSubclass(const FuncTypePtr& ft, const ProfileFunc* pf, const std::string& fname, void DeclareSubclass(const FuncTypePtr& ft, const ProfileFunc* pf, const std::string& fname,
const StmtPtr& body, int priority, const LambdaExpr* l, const std::string& args, const IDPList* lambda_ids);
FunctionFlavor flavor);
// Used for the case of employing an instance of a CPPDynStmt object.
void DeclareDynCPPStmt();
// Generates the declarations (and in-line definitions) associated // Generates the declarations (and in-line definitions) associated
// with compiling a lambda. // with compiling a lambda.
@ -331,11 +499,40 @@ private:
// the given type, lambda captures (if non-nil), and profile. // the given type, lambda captures (if non-nil), and profile.
std::string ParamDecl(const FuncTypePtr& ft, const IDPList* lambda_ids, const ProfileFunc* pf); std::string ParamDecl(const FuncTypePtr& ft, const IDPList* lambda_ids, const ProfileFunc* pf);
// Returns in p_types the types associated with the parameters for a function
// of the given type, set of lambda captures (if any), and profile.
void GatherParamTypes(std::vector<std::string>& p_types, const FuncTypePtr& ft,
const IDPList* lambda_ids, const ProfileFunc* pf);
// Same, but instead returns the parameter's names.
void GatherParamNames(std::vector<std::string>& p_names, const FuncTypePtr& ft,
const IDPList* lambda_ids, const ProfileFunc* pf);
// Inspects the given profile to find the i'th parameter (starting // Inspects the given profile to find the i'th parameter (starting
// at 0). Returns nil if the profile indicates that that parameter // at 0). Returns nil if the profile indicates that that parameter
// is not used by the function. // is not used by the function.
const ID* FindParam(int i, const ProfileFunc* pf); const ID* FindParam(int i, const ProfileFunc* pf);
// Information associated with a CPPDynStmt dynamic dispatch.
struct DispatchInfo
{
std::string cast; // C++ cast to use for function pointer
std::string args; // arguments to pass to the function
bool is_hook; // whether the function is a hook
TypePtr yield; // what type the function returns, if any
};
// An array of cast/invocation pairs used to generate the CPPDynStmt
// Exec method.
std::vector<DispatchInfo> func_casting_glue;
// Maps casting strings to indices into func_casting_glue. The index
// is what's used to dynamically switch to the right dispatch.
std::unordered_map<std::string, int> casting_index;
// Maps functions (using their C++ name) to their casting strings.
std::unordered_map<std::string, std::string> func_index;
// Names for lambda capture ID's. These require a separate space // Names for lambda capture ID's. These require a separate space
// that incorporates the lambda's name, to deal with nested lambda's // that incorporates the lambda's name, to deal with nested lambda's
// that refer to the identifiers with the same name. // that refer to the identifiers with the same name.
@ -344,7 +541,7 @@ private:
// The function's parameters. Tracked so we don't re-declare them. // The function's parameters. Tracked so we don't re-declare them.
std::unordered_set<const ID*> params; std::unordered_set<const ID*> params;
// Whether we're parsing a hook. // Whether we're compiling a hook.
bool in_hook = false; bool in_hook = false;
// //
@ -362,8 +559,12 @@ private:
void CompileLambda(const LambdaExpr* l, const ProfileFunc* pf); void CompileLambda(const LambdaExpr* l, const ProfileFunc* pf);
// Generates the body of the Invoke() method (which supplies the // Generates the body of the Invoke() method (which supplies the
// "glue" between for calling the C++-generated code). // "glue" for calling the C++-generated code, for CPPStmt subclasses).
void GenInvokeBody(const std::string& fname, const TypePtr& t, const std::string& args); void GenInvokeBody(const std::string& fname, const TypePtr& t, const std::string& args)
{
GenInvokeBody(fname + "(" + args + ")", t);
}
void GenInvokeBody(const std::string& call, const TypePtr& t);
// Generates the code for the body of a script function with // Generates the code for the body of a script function with
// the given type, profile, C++ name, AST, lambda captures // the given type, profile, C++ name, AST, lambda captures
@ -405,9 +606,6 @@ private:
// Maps function bodies to the names we use for them. // Maps function bodies to the names we use for them.
std::unordered_map<const Stmt*, std::string> body_names; std::unordered_map<const Stmt*, std::string> body_names;
// Reverse mapping.
std::unordered_map<std::string, const Stmt*> names_to_bodies;
// Maps function names to hashes of bodies. // Maps function names to hashes of bodies.
std::unordered_map<std::string, p_hash_type> body_hashes; std::unordered_map<std::string, p_hash_type> body_hashes;
@ -426,62 +624,84 @@ private:
// //
// End of methods related to generating compiled script bodies. // End of methods related to generating compiled script bodies.
// Start of methods related to generating code for representing // Methods related to generating code for representing script constants
// script constants as run-time values. // as run-time values. There's only one nontrivial one of these,
// See Consts.cc for definitions. // RegisterConstant() (declared above, as it's public). All the other
// // work is done by secondary objects - see InitsInfo.{h,cc} for those.
// Returns an instantiation of a constant - either as a native // Returns the object used to track indices (vectors of integers
// C++ constant, or as a C++ variable that will be bound to // that are used to index various other vectors, including other
// a Zeek value at run-time initialization - that is needed // indices). Only used by CPP_InitsInfo objects, but stored
// by the given "parent" object (which acquires an initialization // in the CPPCompile object to make it available across different
// dependency, if a C++ variable is needed). // CPP_InitsInfo objects.
std::string BuildConstant(IntrusivePtr<Obj> parent, const ValPtr& vp)
{
return BuildConstant(parent.get(), vp);
}
std::string BuildConstant(const Obj* parent, const ValPtr& vp);
// Called to create a constant appropriate for the given expression friend class CPP_InitsInfo;
// or, more directly, the given value. The second method returns IndicesManager& IndMgr() { return indices_mgr; }
// "true" if a C++ variable needed to be created to construct the
// constant at run-time initialization, false if can be instantiated
// directly as a C++ constant.
void AddConstant(const ConstExpr* c);
bool AddConstant(const ValPtr& v);
// Build particular types of C++ variables (with the given name)
// to hold constants initialized at run-time.
void AddStringConstant(const ValPtr& v, std::string& const_name);
void AddPatternConstant(const ValPtr& v, std::string& const_name);
void AddListConstant(const ValPtr& v, std::string& const_name);
void AddRecordConstant(const ValPtr& v, std::string& const_name);
void AddTableConstant(const ValPtr& v, std::string& const_name);
void AddVectorConstant(const ValPtr& v, std::string& const_name);
// Maps (non-native) constants to associated C++ globals. // Maps (non-native) constants to associated C++ globals.
std::unordered_map<const ConstExpr*, std::string> const_exprs; std::unordered_map<const ConstExpr*, std::string> const_exprs;
// Maps the values of (non-native) constants to associated C++ globals. // Maps the values of (non-native) constants to associated initializer
std::unordered_map<const Val*, std::string> const_vals; // information.
std::unordered_map<const Val*, std::shared_ptr<CPP_InitInfo>> const_vals;
// Same, but for the offset into the vector that tracks all constants
// collectively (to support initialization of compound constants).
std::unordered_map<const Val*, int> const_offsets;
// The same as the above pair, but indexed by the string representation
// rather than the Val*. The reason for having both is to enable
// reusing common constants even though their Val*'s differ.
std::unordered_map<std::string, std::shared_ptr<CPP_InitInfo>> constants;
std::unordered_map<std::string, int> constants_offsets;
// Used for memory management associated with const_vals's index. // Used for memory management associated with const_vals's index.
std::vector<ValPtr> cv_indices; std::vector<ValPtr> cv_indices;
// Maps string representations of (non-native) constants to // For different types of constants (as indicated by TypeTag),
// associated C++ globals. // provides the associated object that manages the initializers
std::unordered_map<std::string, std::string> constants; // for those constants.
std::unordered_map<TypeTag, std::shared_ptr<CPP_InitsInfo>> const_info;
// Maps the same representations to the Val* associated with their // Tracks entries for constructing the vector of all constants
// original creation. This enables us to construct initialization // (regardless of type). Each entry provides a TypeTag, used
// dependencies for later Val*'s that are able to reuse the same // to identify the type-specific vector for a given constant,
// constant. // and the offset into that vector.
std::unordered_map<std::string, const Val*> constants_to_vals; std::vector<std::pair<TypeTag, int>> consts;
// Function variables that we need to create dynamically for // The following objects track initialization information for
// initializing globals, coupled with the name of their associated // different types of initializers: Zeek types, individual
// constant. // attributes, sets of attributes, expressions that call script
std::unordered_map<FuncVal*, std::string> func_vars; // functions (for attribute expressions), registering lambda
// bodies, and registering Zeek globals.
std::shared_ptr<CPP_InitsInfo> type_info;
std::shared_ptr<CPP_InitsInfo> attr_info;
std::shared_ptr<CPP_InitsInfo> attrs_info;
std::shared_ptr<CPP_InitsInfo> call_exprs_info;
std::shared_ptr<CPP_InitsInfo> lambda_reg_info;
std::shared_ptr<CPP_InitsInfo> global_id_info;
// Tracks all of the above objects (as well as each entry in
// const_info), to facilitate easy iterating over them.
std::set<std::shared_ptr<CPP_InitsInfo>> all_global_info;
// Tracks the attribute expressions for which we need to generate
// function calls to evaluate them.
std::unordered_map<std::string, std::shared_ptr<CallExprInitInfo>> init_infos;
// See IndMgr() above for the role of this variable.
IndicesManager indices_mgr;
// Maps strings to associated offsets.
std::unordered_map<std::string, int> tracked_strings;
// Tracks strings we've registered in order (corresponding to
// their offsets).
std::vector<std::string> ordered_tracked_strings;
// The same as the previous two, but for profile hashes.
std::vector<p_hash_type> ordered_tracked_hashes;
std::unordered_map<p_hash_type, int> tracked_hashes;
// //
// End of methods related to generating code for script constants. // End of methods related to generating code for script constants.
@ -649,9 +869,9 @@ private:
// not the outer map). // not the outer map).
int num_rf_mappings = 0; int num_rf_mappings = 0;
// For each entry in "field_mapping", the record and TypeDecl // For each entry in "field_mapping", the record (as a global
// associated with the mapping. // offset) and TypeDecl associated with the mapping.
std::vector<std::pair<const RecordType*, const TypeDecl*>> field_decls; std::vector<std::pair<int, const TypeDecl*>> field_decls;
// For enums that are extended via redef's, maps each distinct // For enums that are extended via redef's, maps each distinct
// value (that the compiled scripts refer to) to locations in the // value (that the compiled scripts refer to) to locations in the
@ -665,9 +885,9 @@ private:
// not the outer map). // not the outer map).
int num_ev_mappings = 0; int num_ev_mappings = 0;
// For each entry in "enum_mapping", the record and name // For each entry in "enum_mapping", the EnumType (as a global
// associated with the mapping. // offset) and name associated with the mapping.
std::vector<std::pair<const EnumType*, std::string>> enum_names; std::vector<std::pair<int, std::string>> enum_names;
// //
// End of methods related to generating code for AST Expr's. // End of methods related to generating code for AST Expr's.
@ -690,24 +910,6 @@ private:
// given script type 't', converts it as needed to the given GenType. // given script type 't', converts it as needed to the given GenType.
std::string GenericValPtrToGT(const std::string& expr, const TypePtr& t, GenType gt); std::string GenericValPtrToGT(const std::string& expr, const TypePtr& t, GenType gt);
// For a given type, generates the code necessary to initialize
// it at run time. The term "expand" in the method's name refers
// to the fact that the type has already been previously declared
// (necessary to facilitate defining recursive types), so this method
// generates the "meat" of the type but not its original declaration.
void ExpandTypeVar(const TypePtr& t);
// Methods for expanding specific such types. "tn" is the name
// of the C++ variable used for the particular type.
void ExpandListTypeVar(const TypePtr& t, std::string& tn);
void ExpandRecordTypeVar(const TypePtr& t, std::string& tn);
void ExpandEnumTypeVar(const TypePtr& t, std::string& tn);
void ExpandTableTypeVar(const TypePtr& t, std::string& tn);
void ExpandFuncTypeVar(const TypePtr& t, std::string& tn);
// The following assumes we're populating a type_decl_list called "tl".
std::string GenTypeDecl(const TypeDecl* td);
// Returns the name of a C++ variable that will hold a TypePtr // Returns the name of a C++ variable that will hold a TypePtr
// of the appropriate flavor. 't' does not need to be a type // of the appropriate flavor. 't' does not need to be a type
// representative. // representative.
@ -721,21 +923,11 @@ private:
const Type* TypeRep(const TypePtr& t) { return TypeRep(t.get()); } const Type* TypeRep(const TypePtr& t) { return TypeRep(t.get()); }
// Low-level C++ representations for types, of various flavors. // Low-level C++ representations for types, of various flavors.
const char* TypeTagName(TypeTag tag) const; static const char* TypeTagName(TypeTag tag);
const char* TypeName(const TypePtr& t); const char* TypeName(const TypePtr& t);
const char* FullTypeName(const TypePtr& t); const char* FullTypeName(const TypePtr& t);
const char* TypeType(const TypePtr& t); const char* TypeType(const TypePtr& t);
// Track the given type (with support methods for onces that
// are complicated), recursively including its sub-types, and
// creating initializations (and dependencies) for constructing
// C++ variables representing the types.
void RegisterType(const TypePtr& t);
void RegisterListType(const TypePtr& t);
void RegisterTableType(const TypePtr& t);
void RegisterRecordType(const TypePtr& t);
void RegisterFuncType(const TypePtr& t);
// Access to a type's underlying values. // Access to a type's underlying values.
const char* NativeAccessor(const TypePtr& t); const char* NativeAccessor(const TypePtr& t);
@ -744,11 +936,13 @@ private:
const char* IntrusiveVal(const TypePtr& t); const char* IntrusiveVal(const TypePtr& t);
// Maps types to indices in the global "types__CPP" array. // Maps types to indices in the global "types__CPP" array.
CPPTracker<Type> types = {"types", &compiled_items}; CPPTracker<Type> types = {"types", true, &compiled_items};
// Used to prevent analysis of mutually-referring types from // Used to prevent analysis of mutually-referring types from
// leading to infinite recursion. // leading to infinite recursion. Maps types to their global
std::unordered_set<const Type*> processed_types; // initialization information (or, initially, to nullptr, if
// they're in the process of being registered).
std::unordered_map<const Type*, std::shared_ptr<CPP_InitInfo>> processed_types;
// //
// End of methods related to managing script types. // End of methods related to managing script types.
@ -758,11 +952,6 @@ private:
// See Attrs.cc for definitions. // See Attrs.cc for definitions.
// //
// Tracks a use of the given set of attributes, including
// initialization dependencies and the generation of any
// associated expressions.
void RegisterAttributes(const AttributesPtr& attrs);
// Populates the 2nd and 3rd arguments with C++ representations // Populates the 2nd and 3rd arguments with C++ representations
// of the tags and (optional) values/expressions associated with // of the tags and (optional) values/expressions associated with
// the set of attributes. // the set of attributes.
@ -772,16 +961,17 @@ private:
void GenAttrs(const AttributesPtr& attrs); void GenAttrs(const AttributesPtr& attrs);
std::string GenAttrExpr(const ExprPtr& e); std::string GenAttrExpr(const ExprPtr& e);
// Returns the name of the C++ variable that will hold the given
// attributes at run-time.
std::string AttrsName(const AttributesPtr& attrs);
// Returns a string representation of the name associated with // Returns a string representation of the name associated with
// different attributes (e.g., "ATTR_DEFAULT"). // different attribute tags (e.g., "ATTR_DEFAULT").
const char* AttrName(const AttrPtr& attr); static const char* AttrName(AttrTag t);
// Similar for attributes, so we can reconstruct record types. // Similar for attributes, so we can reconstruct record types.
CPPTracker<Attributes> attributes = {"attrs", &compiled_items}; CPPTracker<Attributes> attributes = {"attrs", false, &compiled_items};
// Maps Attributes and Attr's to their global initialization
// information.
std::unordered_map<const Attributes*, std::shared_ptr<CPP_InitInfo>> processed_attrs;
std::unordered_map<const Attr*, std::shared_ptr<CPP_InitInfo>> processed_attr;
// //
// End of methods related to managing script type attributes. // End of methods related to managing script type attributes.
@ -790,121 +980,42 @@ private:
// See Inits.cc for definitions. // See Inits.cc for definitions.
// //
// Generates code to construct a CallExpr that can be used to // Generates code for dynamically generating an expression
// evaluate the expression 'e' as an initializer (typically // associated with an attribute, via a function call.
// for a record &default attribute). void GenInitExpr(std::shared_ptr<CallExprInitInfo> ce_init);
void GenInitExpr(const ExprPtr& e);
// True if the given expression is simple enough that we can
// generate code to evaluate it directly, and don't need to
// create a separate function per GenInitExpr().
bool IsSimpleInitExpr(const ExprPtr& e) const;
// Returns the name of a function used to evaluate an // Returns the name of a function used to evaluate an
// initialization expression. // initialization expression.
std::string InitExprName(const ExprPtr& e); std::string InitExprName(const ExprPtr& e);
// Generates code to initializes the global 'g' (with C++ name "gl") // Convenience functions for return the offset or initialization cohort
// to the given value *if* on start-up it doesn't already have a value. // associated with an initialization.
void GenGlobalInit(const ID* g, std::string& gl, const ValPtr& v); int GI_Offset(const std::shared_ptr<CPP_InitInfo>& gi) const { return gi ? gi->Offset() : -1; }
int GI_Cohort(const std::shared_ptr<CPP_InitInfo>& gi) const
// Generates code to initialize all of the function-valued globals
// (i.e., those pointing to lambdas).
void GenFuncVarInits();
// Generates the "pre-initialization" for a given type. For
// extensible types (records, enums, lists), these are empty
// versions that we'll later populate.
void GenPreInit(const Type* t);
// Generates a function that executes the pre-initializations.
void GenPreInits();
// The following all track that for a given object, code associated
// with initializing it. Multiple calls for the same object append
// additional lines of code (the order of the calls is preserved).
//
// Versions with "lhs" and "rhs" arguments provide an initialization
// of the form "lhs = rhs;", as a convenience.
void AddInit(const IntrusivePtr<Obj>& o, const std::string& lhs, const std::string& rhs)
{ {
AddInit(o.get(), lhs + " = " + rhs + ";"); return gi ? gi->InitCohort() : 0;
}
void AddInit(const Obj* o, const std::string& lhs, const std::string& rhs)
{
AddInit(o, lhs + " = " + rhs + ";");
}
void AddInit(const IntrusivePtr<Obj>& o, const std::string& init) { AddInit(o.get(), init); }
void AddInit(const Obj* o, const std::string& init);
// We do consistency checking of initialization dependencies by
// looking for depended-on objects have initializations. Sometimes
// it's unclear whether the object will actually require
// initialization, in which case we add an empty initialization
// for it so that the consistency-checking is happy.
void AddInit(const IntrusivePtr<Obj>& o) { AddInit(o.get()); }
void AddInit(const Obj* o);
// This is akin to an initialization, but done separately
// (upon "activation") so it can include initializations that
// rely on parsing having finished (in particular, BiFs having
// been registered). Only used when generating standalone code.
void AddActivation(std::string a) { activations.emplace_back(a); }
// Records the fact that the initialization of object o1 depends
// on that of object o2.
void NoteInitDependency(const IntrusivePtr<Obj>& o1, const IntrusivePtr<Obj>& o2)
{
NoteInitDependency(o1.get(), o2.get());
}
void NoteInitDependency(const IntrusivePtr<Obj>& o1, const Obj* o2)
{
NoteInitDependency(o1.get(), o2);
}
void NoteInitDependency(const Obj* o1, const IntrusivePtr<Obj>& o2)
{
NoteInitDependency(o1, o2.get());
}
void NoteInitDependency(const Obj* o1, const Obj* o2);
// Records an initialization dependency of the given object
// on the given type, unless the type is a record. We need
// this notion to protect against circular dependencies in
// the face of recursive records.
void NoteNonRecordInitDependency(const Obj* o, const TypePtr& t)
{
if ( t && t->Tag() != TYPE_RECORD )
NoteInitDependency(o, TypeRep(t));
}
void NoteNonRecordInitDependency(const IntrusivePtr<Obj> o, const TypePtr& t)
{
NoteNonRecordInitDependency(o.get(), t);
} }
// Analyzes the initialization dependencies to ensure that they're // Generate code to initialize the mappings for record field
// consistent, i.e., every object that either depends on another, // offsets for field accesses into regions of records that
// or is itself depended on, appears in the "to_do" set. // can be extensible (and thus can vary at run-time to the
void CheckInitConsistency(std::unordered_set<const Obj*>& to_do); // offsets encountered during compilation).
// Generate initializations for the items in the "to_do" set,
// in accordance with their dependencies. Returns 'n', the
// number of initialization functions generated. They should
// be called in order, from 1 to n.
int GenDependentInits(std::unordered_set<const Obj*>& to_do);
// Generates a function for initializing the nc'th cohort.
void GenInitCohort(int nc, std::unordered_set<const Obj*>& cohort);
// Initialize the mappings for record field offsets for field
// accesses into regions of records that can be extensible (and
// thus can vary at run-time to the offsets encountered during
// compilation).
void InitializeFieldMappings(); void InitializeFieldMappings();
// Same, but for enum types. The second form does a single // Same, but for enum types.
// initialization corresponding to the given index in the mapping.
void InitializeEnumMappings(); void InitializeEnumMappings();
void InitializeEnumMappings(const EnumType* et, const std::string& e_name, int index);
// Generate code to initialize BiFs.
void InitializeBiFs();
// Generate code to initialize strings that we track.
void InitializeStrings();
// Generate code to initialize hashes that we track.
void InitializeHashes();
// Generate code to initialize indirect references to constants.
void InitializeConsts();
// Generate the initialization hook for this set of compiled code. // Generate the initialization hook for this set of compiled code.
void GenInitHook(); void GenInitHook();
@ -917,25 +1028,15 @@ private:
// what we compiled. // what we compiled.
void GenLoad(); void GenLoad();
// A list of pre-initializations (those potentially required by // A list of BiFs to look up during initialization. First
// other initializations, and that themselves have no dependencies). // string is the name of the C++ global holding the BiF, the
std::vector<std::string> pre_inits; // second is its name as known to Zeek.
std::unordered_map<std::string, std::string> BiFs;
// A list of "activations" (essentially, post-initializations).
// See AddActivation() above.
std::vector<std::string> activations;
// Expressions for which we need to generate initialization-time // Expressions for which we need to generate initialization-time
// code. Currently, these are only expressions appearing in // code. Currently, these are only expressions appearing in
// attributes. // attributes.
CPPTracker<Expr> init_exprs = {"gen_init_expr", &compiled_items}; CPPTracker<Expr> init_exprs = {"gen_init_expr", false, &compiled_items};
// Maps an object requiring initialization to its initializers.
std::unordered_map<const Obj*, std::vector<std::string>> obj_inits;
// Maps an object requiring initializations to its dependencies
// on other such objects.
std::unordered_map<const Obj*, std::unordered_set<const Obj*>> obj_deps;
// //
// End of methods related to run-time initialization. // End of methods related to run-time initialization.
@ -944,12 +1045,20 @@ private:
// See Emit.cc for definitions. // See Emit.cc for definitions.
// //
// The following all need to be able to emit code.
friend class CPP_BasicConstInitsInfo;
friend class CPP_CompoundInitsInfo;
friend class IndicesManager;
// Used to create (indented) C++ {...} code blocks. "needs_semi" // Used to create (indented) C++ {...} code blocks. "needs_semi"
// controls whether to terminate the block with a ';' (such as // controls whether to terminate the block with a ';' (such as
// for class definitions. // for class definitions.
void StartBlock(); void StartBlock();
void EndBlock(bool needs_semi = false); void EndBlock(bool needs_semi = false);
void IndentUp() { ++block_level; }
void IndentDown() { --block_level; }
// Various ways of generating code. The multi-argument methods // Various ways of generating code. The multi-argument methods
// assume that the first argument is a printf-style format // assume that the first argument is a printf-style format
// (but one that can only have %s specifiers). // (but one that can only have %s specifiers).
@ -960,11 +1069,12 @@ private:
NL(); NL();
} }
void Emit(const std::string& fmt, const std::string& arg) const void Emit(const std::string& fmt, const std::string& arg, bool do_NL = true) const
{ {
Indent(); Indent();
fprintf(write_file, fmt.c_str(), arg.c_str()); fprintf(write_file, fmt.c_str(), arg.c_str());
NL(); if ( do_NL )
NL();
} }
void Emit(const std::string& fmt, const std::string& arg1, const std::string& arg2) const void Emit(const std::string& fmt, const std::string& arg1, const std::string& arg2) const
@ -999,14 +1109,15 @@ private:
NL(); NL();
} }
// Returns an expression for constructing a Zeek String object void Emit(const std::string& fmt, const std::string& arg1, const std::string& arg2,
// corresponding to the given byte array. const std::string& arg3, const std::string& arg4, const std::string& arg5,
std::string GenString(const char* b, int len) const; const std::string& arg6) const
{
// For the given byte array / string, returns a version expanded Indent();
// with escape sequences in order to represent it as a C++ string. fprintf(write_file, fmt.c_str(), arg1.c_str(), arg2.c_str(), arg3.c_str(), arg4.c_str(),
std::string CPPEscape(const char* b, int len) const; arg5.c_str(), arg6.c_str());
std::string CPPEscape(const char* s) const { return CPPEscape(s, strlen(s)); } NL();
}
void NL() const { fputc('\n', write_file); } void NL() const { fputc('\n', write_file); }

View file

@ -4,55 +4,27 @@
#include "zeek/RE.h" #include "zeek/RE.h"
#include "zeek/script_opt/CPP/Compile.h" #include "zeek/script_opt/CPP/Compile.h"
using namespace std;
namespace zeek::detail namespace zeek::detail
{ {
using namespace std; shared_ptr<CPP_InitInfo> CPPCompile::RegisterConstant(const ValPtr& vp, int& consts_offset)
string CPPCompile::BuildConstant(const Obj* parent, const ValPtr& vp)
{ {
if ( ! vp ) // Make sure the value pointer, which might be transient
return "nullptr"; // in construction, sticks around so we can track its
// value.
cv_indices.push_back(vp);
if ( AddConstant(vp) )
{
auto v = vp.get();
AddInit(parent);
NoteInitDependency(parent, v);
// Make sure the value pointer, which might be transient
// in construction, sticks around so we can track its
// value.
cv_indices.push_back(vp);
return const_vals[v];
}
else
return NativeToGT(GenVal(vp), vp->GetType(), GEN_VAL_PTR);
}
void CPPCompile::AddConstant(const ConstExpr* c)
{
auto v = c->ValuePtr();
if ( AddConstant(v) )
{
AddInit(c);
NoteInitDependency(c, v.get());
}
}
bool CPPCompile::AddConstant(const ValPtr& vp)
{
auto v = vp.get(); auto v = vp.get();
auto cv = const_vals.find(v);
if ( IsNativeType(v->GetType()) ) if ( cv != const_vals.end() )
// These we instantiate directly. {
return false;
if ( const_vals.count(v) > 0 )
// Already did this one. // Already did this one.
return true; consts_offset = const_offsets[v];
return cv->second;
}
// Formulate a key that's unique per distinct constant. // Formulate a key that's unique per distinct constant.
@ -79,216 +51,104 @@ bool CPPCompile::AddConstant(const ValPtr& vp)
c_desc = d.Description(); c_desc = d.Description();
} }
if ( constants.count(c_desc) > 0 ) auto c = constants.find(c_desc);
if ( c != constants.end() )
{ {
const_vals[v] = constants[c_desc]; const_vals[v] = c->second;
consts_offset = const_offsets[v] = constants_offsets[c_desc];
auto orig_v = constants_to_vals[c_desc]; return c->second;
ASSERT(v != orig_v);
AddInit(v);
NoteInitDependency(v, orig_v);
return true;
} }
// Need a C++ global for this constant.
auto const_name = string("CPP__const__") + Fmt(int(constants.size()));
const_vals[v] = constants[c_desc] = const_name;
constants_to_vals[c_desc] = v;
auto tag = t->Tag(); auto tag = t->Tag();
auto const_name = const_info[tag]->NextName();
shared_ptr<CPP_InitInfo> gi;
switch ( tag ) switch ( tag )
{ {
case TYPE_STRING: case TYPE_BOOL:
AddStringConstant(vp, const_name); gi = make_shared<BasicConstInfo>(vp->AsBool() ? "true" : "false");
break; break;
case TYPE_PATTERN: case TYPE_INT:
AddPatternConstant(vp, const_name); gi = make_shared<BasicConstInfo>(to_string(vp->AsInt()));
break; break;
case TYPE_LIST: case TYPE_COUNT:
AddListConstant(vp, const_name); gi = make_shared<BasicConstInfo>(to_string(vp->AsCount()) + "ULL");
break; break;
case TYPE_RECORD: case TYPE_DOUBLE:
AddRecordConstant(vp, const_name); gi = make_shared<BasicConstInfo>(to_string(vp->AsDouble()));
break; break;
case TYPE_TABLE: case TYPE_TIME:
AddTableConstant(vp, const_name); gi = make_shared<BasicConstInfo>(to_string(vp->AsDouble()));
break; break;
case TYPE_VECTOR: case TYPE_INTERVAL:
AddVectorConstant(vp, const_name); gi = make_shared<BasicConstInfo>(to_string(vp->AsDouble()));
break; break;
case TYPE_ADDR: case TYPE_ADDR:
case TYPE_SUBNET: gi = make_shared<DescConstInfo>(this, vp);
{
auto prefix = (tag == TYPE_ADDR) ? "Addr" : "SubNet";
Emit("%sValPtr %s;", prefix, const_name);
ODesc d;
v->Describe(&d);
AddInit(v, const_name,
string("make_intrusive<") + prefix + "Val>(\"" + d.Description() + "\")");
}
break; break;
case TYPE_FUNC: case TYPE_SUBNET:
Emit("FuncValPtr %s;", const_name); gi = make_shared<DescConstInfo>(this, vp);
break;
// We can't generate the initialization now because it case TYPE_ENUM:
// depends on first having compiled the associated body, gi = make_shared<EnumConstInfo>(this, vp);
// so we know its hash. So for now we just note it break;
// to deal with later.
func_vars[v->AsFuncVal()] = const_name; case TYPE_STRING:
gi = make_shared<StringConstInfo>(this, vp);
break;
case TYPE_PATTERN:
gi = make_shared<PatternConstInfo>(this, vp);
break;
case TYPE_PORT:
gi = make_shared<PortConstInfo>(vp);
break;
case TYPE_LIST:
gi = make_shared<ListConstInfo>(this, vp);
break;
case TYPE_VECTOR:
gi = make_shared<VectorConstInfo>(this, vp);
break;
case TYPE_RECORD:
gi = make_shared<RecordConstInfo>(this, vp);
break;
case TYPE_TABLE:
gi = make_shared<TableConstInfo>(this, vp);
break; break;
case TYPE_FILE: case TYPE_FILE:
{ gi = make_shared<FileConstInfo>(this, vp);
Emit("FileValPtr %s;", const_name); break;
auto f = cast_intrusive<FileVal>(vp)->Get(); case TYPE_FUNC:
gi = make_shared<FuncConstInfo>(this, vp);
AddInit(v, const_name,
string("make_intrusive<FileVal>(") + "make_intrusive<File>(\"" + f->Name() +
"\", \"w\"))");
}
break; break;
default: default:
reporter->InternalError("bad constant type in CPPCompile::AddConstant"); reporter->InternalError("bad constant type in CPPCompile::AddConstant");
break;
} }
return true; const_info[tag]->AddInstance(gi);
} const_vals[v] = constants[c_desc] = gi;
void CPPCompile::AddStringConstant(const ValPtr& v, string& const_name) consts_offset = const_offsets[v] = constants_offsets[c_desc] = consts.size();
{ consts.emplace_back(pair(tag, gi->Offset()));
Emit("StringValPtr %s;", const_name);
auto s = v->AsString(); return gi;
const char* b = (const char*)(s->Bytes());
auto len = s->Len();
AddInit(v, const_name, GenString(b, len));
}
void CPPCompile::AddPatternConstant(const ValPtr& v, string& const_name)
{
Emit("PatternValPtr %s;", const_name);
auto re = v->AsPatternVal()->Get();
AddInit(v, string("{ auto re = new RE_Matcher(") + CPPEscape(re->OrigText()) + ");");
if ( re->IsCaseInsensitive() )
AddInit(v, "re->MakeCaseInsensitive();");
AddInit(v, "re->Compile();");
AddInit(v, const_name, "make_intrusive<PatternVal>(re)");
AddInit(v, "}");
}
void CPPCompile::AddListConstant(const ValPtr& v, string& const_name)
{
Emit("ListValPtr %s;", const_name);
// No initialization dependency on the main type since we don't
// use the underlying TypeList. However, we *do* use the types of
// the elements.
AddInit(v, const_name, string("make_intrusive<ListVal>(TYPE_ANY)"));
auto lv = cast_intrusive<ListVal>(v);
auto n = lv->Length();
for ( auto i = 0; i < n; ++i )
{
const auto& l_i = lv->Idx(i);
auto l_i_c = BuildConstant(v, l_i);
AddInit(v, const_name + "->Append(" + l_i_c + ");");
NoteInitDependency(v, TypeRep(l_i->GetType()));
}
}
void CPPCompile::AddRecordConstant(const ValPtr& v, string& const_name)
{
const auto& t = v->GetType();
Emit("RecordValPtr %s;", const_name);
NoteInitDependency(v, TypeRep(t));
AddInit(v, const_name,
string("make_intrusive<RecordVal>(") + "cast_intrusive<RecordType>(" + GenTypeName(t) +
"))");
auto r = cast_intrusive<RecordVal>(v);
auto n = r->NumFields();
for ( auto i = 0u; i < n; ++i )
{
const auto& r_i = r->GetField(i);
if ( r_i )
{
auto r_i_c = BuildConstant(v, r_i);
AddInit(v, const_name + "->Assign(" + Fmt(static_cast<int>(i)) + ", " + r_i_c + ");");
}
}
}
void CPPCompile::AddTableConstant(const ValPtr& v, string& const_name)
{
const auto& t = v->GetType();
Emit("TableValPtr %s;", const_name);
NoteInitDependency(v, TypeRep(t));
AddInit(v, const_name,
string("make_intrusive<TableVal>(") + "cast_intrusive<TableType>(" + GenTypeName(t) +
"))");
auto tv = cast_intrusive<TableVal>(v);
auto tv_map = tv->ToMap();
for ( auto& tv_i : tv_map )
{
auto ind = BuildConstant(v, tv_i.first);
auto val = BuildConstant(v, tv_i.second);
AddInit(v, const_name + "->Assign(" + ind + ", " + val + ");");
}
}
void CPPCompile::AddVectorConstant(const ValPtr& v, string& const_name)
{
const auto& t = v->GetType();
Emit("VectorValPtr %s;", const_name);
NoteInitDependency(v, TypeRep(t));
AddInit(v, const_name,
string("make_intrusive<VectorVal>(") + "cast_intrusive<VectorType>(" + GenTypeName(t) +
"))");
auto vv = cast_intrusive<VectorVal>(v);
auto n = vv->Size();
for ( auto i = 0u; i < n; ++i )
{
const auto& v_i = vv->ValAt(i);
auto v_i_c = BuildConstant(v, v_i);
AddInit(v, const_name + "->Append(" + v_i_c + ");");
}
} }
} // zeek::detail } // zeek::detail

View file

@ -22,7 +22,7 @@ void CPPCompile::DeclareFunc(const FuncInfo& func)
const auto& body = func.Body(); const auto& body = func.Body();
auto priority = func.Priority(); auto priority = func.Priority();
DeclareSubclass(f->GetType(), pf, fname, body, priority, nullptr, f->Flavor()); CreateFunction(f->GetType(), pf, fname, body, priority, nullptr, f->Flavor());
if ( f->GetBodies().size() == 1 ) if ( f->GetBodies().size() == 1 )
compiled_simple_funcs[f->Name()] = fname; compiled_simple_funcs[f->Name()] = fname;
@ -40,17 +40,88 @@ void CPPCompile::DeclareLambda(const LambdaExpr* l, const ProfileFunc* pf)
for ( auto id : ids ) for ( auto id : ids )
lambda_names[id] = LocalName(id); lambda_names[id] = LocalName(id);
DeclareSubclass(l_id->GetType<FuncType>(), pf, lname, body, 0, l, FUNC_FLAVOR_FUNCTION); CreateFunction(l_id->GetType<FuncType>(), pf, lname, body, 0, l, FUNC_FLAVOR_FUNCTION);
} }
void CPPCompile::DeclareSubclass(const FuncTypePtr& ft, const ProfileFunc* pf, const string& fname, void CPPCompile::CreateFunction(const FuncTypePtr& ft, const ProfileFunc* pf, const string& fname,
const StmtPtr& body, int priority, const LambdaExpr* l, const StmtPtr& body, int priority, const LambdaExpr* l,
FunctionFlavor flavor) FunctionFlavor flavor)
{ {
const auto& yt = ft->Yield(); const auto& yt = ft->Yield();
in_hook = flavor == FUNC_FLAVOR_HOOK; in_hook = flavor == FUNC_FLAVOR_HOOK;
const IDPList* lambda_ids = l ? &l->OuterIDs() : nullptr; const IDPList* lambda_ids = l ? &l->OuterIDs() : nullptr;
string args = BindArgs(ft, lambda_ids);
auto yt_decl = in_hook ? "bool" : FullTypeName(yt);
vector<string> p_types;
GatherParamTypes(p_types, ft, lambda_ids, pf);
string cast = string(yt_decl) + "(*)(";
for ( auto& pt : p_types )
cast += pt + ", ";
cast += string("Frame*)");
// We need to distinguish between hooks and non-hooks that happen
// to have matching type signatures. They'll be equivalent if they
// have identical cast's. To keep them separate, we cheat and
// make hook casts different, string-wise, without altering their
// semantics.
if ( in_hook )
cast += " ";
func_index[fname] = cast;
if ( casting_index.count(cast) == 0 )
{
casting_index[cast] = func_casting_glue.size();
DispatchInfo di;
di.cast = cast;
di.args = args;
di.is_hook = in_hook;
di.yield = yt;
func_casting_glue.emplace_back(di);
}
if ( lambda_ids )
{
DeclareSubclass(ft, pf, fname, args, lambda_ids);
BuildLambda(ft, pf, fname, body, l, lambda_ids);
EndBlock(true);
}
else
{
Emit("static %s %s(%s);", yt_decl, fname, ParamDecl(ft, lambda_ids, pf));
// Track this function as known to have been compiled.
// We don't track lambda bodies as compiled because they
// can't be instantiated directly without also supplying
// the captures. In principle we could make an exception
// for lambdas that don't take any arguments, but that
// seems potentially more confusing than beneficial.
compiled_funcs.emplace(fname);
auto loc_f = script_specific_filename(body);
cf_locs[fname] = loc_f;
}
auto h = pf->HashVal();
body_hashes[fname] = h;
body_priorities[fname] = priority;
body_names.emplace(body.get(), fname);
total_hash = merge_p_hashes(total_hash, h);
}
void CPPCompile::DeclareSubclass(const FuncTypePtr& ft, const ProfileFunc* pf, const string& fname,
const string& args, const IDPList* lambda_ids)
{
const auto& yt = ft->Yield();
auto yt_decl = in_hook ? "bool" : FullTypeName(yt); auto yt_decl = in_hook ? "bool" : FullTypeName(yt);
NL(); NL();
@ -76,8 +147,7 @@ void CPPCompile::DeclareSubclass(const FuncTypePtr& ft, const ProfileFunc* pf, c
} }
} }
Emit("%s_cl(const char* name%s) : CPPStmt(name)%s { }", fname, addl_args.c_str(), Emit("%s_cl(const char* name%s) : CPPStmt(name)%s { }", fname, addl_args, inits);
inits.c_str());
// An additional constructor just used to generate place-holder // An additional constructor just used to generate place-holder
// instances, due to the mis-design that lambdas are identified // instances, due to the mis-design that lambdas are identified
@ -92,7 +162,7 @@ void CPPCompile::DeclareSubclass(const FuncTypePtr& ft, const ProfileFunc* pf, c
if ( in_hook ) if ( in_hook )
{ {
Emit("if ( ! %s(%s) )", fname, BindArgs(ft, lambda_ids)); Emit("if ( ! %s(%s) )", fname, args);
StartBlock(); StartBlock();
Emit("flow = FLOW_BREAK;"); Emit("flow = FLOW_BREAK;");
EndBlock(); EndBlock();
@ -100,42 +170,36 @@ void CPPCompile::DeclareSubclass(const FuncTypePtr& ft, const ProfileFunc* pf, c
} }
else if ( IsNativeType(yt) ) else if ( IsNativeType(yt) )
GenInvokeBody(fname, yt, BindArgs(ft, lambda_ids)); GenInvokeBody(fname, yt, args);
else else
Emit("return %s(%s);", fname, BindArgs(ft, lambda_ids)); Emit("return %s(%s);", fname, args);
EndBlock(); EndBlock();
}
if ( lambda_ids ) void CPPCompile::DeclareDynCPPStmt()
BuildLambda(ft, pf, fname, body, l, lambda_ids); {
else Emit("// A version of CPPStmt that manages a function pointer and");
{ Emit("// dynamically casts it to a given type to call it via Exec().");
// Track this function as known to have been compiled. Emit("// We will later generate a custom Exec method to support this");
// We don't track lambda bodies as compiled because they Emit("// dispatch. All of this is ugly, and only needed because clang");
// can't be instantiated directly without also supplying Emit("// goes nuts (super slow) in the face of thousands of templates");
// the captures. In principle we could make an exception Emit("// in a given context (initializers, or a function body).");
// for lambdas that don't take any arguments, but that Emit("class CPPDynStmt : public CPPStmt");
// seems potentially more confusing than beneficial. Emit("\t{");
compiled_funcs.emplace(fname); Emit("public:");
Emit("\tCPPDynStmt(const char* _name, void* _func, int _type_signature) : CPPStmt(_name), "
auto loc_f = script_specific_filename(body); "func(_func), type_signature(_type_signature) { }");
cf_locs[fname] = loc_f; Emit("\tValPtr Exec(Frame* f, StmtFlowType& flow) override final;");
Emit("private:");
// Some guidance for those looking through the generated code. Emit("\t// The function to call in Exec().");
Emit("// compiled body for: %s", loc_f); Emit("\tvoid* func;");
} Emit("\t// Used via a switch in the dynamically-generated Exec() method");
Emit("\t// to cast func to the write type, and to call it with the");
EndBlock(true); Emit("\t// right arguments pulled out of the frame.");
Emit("\tint type_signature;");
auto h = pf->HashVal(); Emit("\t};");
body_hashes[fname] = h;
body_priorities[fname] = priority;
body_names.emplace(body.get(), fname);
names_to_bodies.emplace(fname, body.get());
total_hash = merge_p_hashes(total_hash, h);
} }
void CPPCompile::BuildLambda(const FuncTypePtr& ft, const ProfileFunc* pf, const string& fname, void CPPCompile::BuildLambda(const FuncTypePtr& ft, const ProfileFunc* pf, const string& fname,
@ -146,28 +210,17 @@ void CPPCompile::BuildLambda(const FuncTypePtr& ft, const ProfileFunc* pf, const
{ {
auto name = lambda_names[id]; auto name = lambda_names[id];
auto tn = FullTypeName(id->GetType()); auto tn = FullTypeName(id->GetType());
Emit("%s %s;", tn, name.c_str()); Emit("%s %s;", tn, name);
} }
// Generate initialization to create and register the lambda. // Generate initialization to create and register the lambda.
auto literal_name = string("\"") + l->Name() + "\""; auto h = pf->HashVal();
auto instantiate = string("make_intrusive<") + fname + "_cl>(" + literal_name + ")"; auto nl = lambda_ids->length();
bool has_captures = nl > 0;
int nl = lambda_ids->length(); auto gi = make_shared<LambdaRegistrationInfo>(this, l->Name(), ft, fname + "_cl", h,
auto h = Fmt(pf->HashVal()); has_captures);
auto has_captures = nl > 0 ? "true" : "false"; lambda_reg_info->AddInstance(gi);
auto l_init = string("register_lambda__CPP(") + instantiate + ", " + h + ", \"" + l->Name() +
"\", " + GenTypeName(ft) + ", " + has_captures + ");";
AddInit(l, l_init);
NoteInitDependency(l, TypeRep(ft));
// Make the lambda's body's initialization depend on the lambda's
// initialization. That way GenFuncVarInits() can generate
// initializations with the assurance that the associated body
// hashes will have been registered.
AddInit(body.get());
NoteInitDependency(body.get(), l);
// Generate method to extract the lambda captures from a deserialized // Generate method to extract the lambda captures from a deserialized
// Frame object. // Frame object.
@ -237,17 +290,71 @@ string CPPCompile::BindArgs(const FuncTypePtr& ft, const IDPList* lambda_ids)
string CPPCompile::ParamDecl(const FuncTypePtr& ft, const IDPList* lambda_ids, string CPPCompile::ParamDecl(const FuncTypePtr& ft, const IDPList* lambda_ids,
const ProfileFunc* pf) const ProfileFunc* pf)
{ {
const auto& params = ft->Params(); vector<string> p_types;
int n = params->NumFields(); vector<string> p_names;
GatherParamTypes(p_types, ft, lambda_ids, pf);
GatherParamNames(p_names, ft, lambda_ids, pf);
ASSERT(p_types.size() == p_names.size());
string decl; string decl;
for ( auto i = 0U; i < p_types.size(); ++i )
decl += p_types[i] + " " + p_names[i] + ", ";
// Add in the declaration of the frame.
return decl + "Frame* f__CPP";
}
void CPPCompile::GatherParamTypes(vector<string>& p_types, const FuncTypePtr& ft,
const IDPList* lambda_ids, const ProfileFunc* pf)
{
const auto& params = ft->Params();
int n = params->NumFields();
for ( auto i = 0; i < n; ++i ) for ( auto i = 0; i < n; ++i )
{ {
const auto& t = params->GetFieldType(i); const auto& t = params->GetFieldType(i);
auto tn = FullTypeName(t); auto tn = FullTypeName(t);
auto param_id = FindParam(i, pf); auto param_id = FindParam(i, pf);
string fn;
if ( IsNativeType(t) )
// Native types are always pass-by-value.
p_types.emplace_back(tn);
else
{
if ( param_id && pf->Assignees().count(param_id) > 0 )
// We modify the parameter.
p_types.emplace_back(tn);
else
// Not modified, so pass by const reference.
p_types.emplace_back(string("const ") + tn + "&");
}
}
if ( lambda_ids )
// Add the captures as additional parameters.
for ( auto& id : *lambda_ids )
{
const auto& t = id->GetType();
auto tn = FullTypeName(t);
// Allow the captures to be modified.
p_types.emplace_back(string(tn) + "& ");
}
}
void CPPCompile::GatherParamNames(vector<string>& p_names, const FuncTypePtr& ft,
const IDPList* lambda_ids, const ProfileFunc* pf)
{
const auto& params = ft->Params();
int n = params->NumFields();
for ( auto i = 0; i < n; ++i )
{
const auto& t = params->GetFieldType(i);
auto param_id = FindParam(i, pf);
if ( param_id ) if ( param_id )
{ {
@ -255,50 +362,22 @@ string CPPCompile::ParamDecl(const FuncTypePtr& ft, const IDPList* lambda_ids,
// We'll need to translate the parameter // We'll need to translate the parameter
// from its current representation to // from its current representation to
// type "any". // type "any".
fn = string("any_param__CPP_") + Fmt(i); p_names.emplace_back(string("any_param__CPP_") + Fmt(i));
else else
fn = LocalName(param_id); p_names.emplace_back(LocalName(param_id));
} }
else else
// Parameters that are unused don't wind up // Parameters that are unused don't wind up in the
// in the ProfileFunc. Rather than dig their // ProfileFunc. Rather than dig their name out of
// name out of the function's declaration, we // the function's declaration, we explicitly name
// explicitly name them to reflect that they're // them to reflect that they're unused.
// unused. p_names.emplace_back(string("unused_param__CPP_") + Fmt(i));
fn = string("unused_param__CPP_") + Fmt(i);
if ( IsNativeType(t) )
// Native types are always pass-by-value.
decl = decl + tn + " " + fn;
else
{
if ( param_id && pf->Assignees().count(param_id) > 0 )
// We modify the parameter.
decl = decl + tn + " " + fn;
else
// Not modified, so pass by const reference.
decl = decl + "const " + tn + "& " + fn;
}
decl += ", ";
} }
if ( lambda_ids ) if ( lambda_ids )
{
// Add the captures as additional parameters. // Add the captures as additional parameters.
for ( auto& id : *lambda_ids ) for ( auto& id : *lambda_ids )
{ p_names.emplace_back(lambda_names[id]);
auto name = lambda_names[id];
const auto& t = id->GetType();
auto tn = FullTypeName(t);
// Allow the captures to be modified.
decl = decl + tn + "& " + name + ", ";
}
}
// Add in the declaration of the frame.
return decl + "Frame* f__CPP";
} }
const ID* CPPCompile::FindParam(int i, const ProfileFunc* pf) const ID* CPPCompile::FindParam(int i, const ProfileFunc* pf)

View file

@ -12,14 +12,13 @@ namespace zeek::detail
using namespace std; using namespace std;
CPPCompile::CPPCompile(vector<FuncInfo>& _funcs, ProfileFuncs& _pfs, const string& gen_name, CPPCompile::CPPCompile(vector<FuncInfo>& _funcs, ProfileFuncs& _pfs, const string& gen_name,
const string& _addl_name, CPPHashManager& _hm, bool _update, const string& _addl_name, CPPHashManager& _hm, bool _standalone,
bool _standalone, bool report_uncompilable) bool report_uncompilable)
: funcs(_funcs), pfs(_pfs), hm(_hm), update(_update), standalone(_standalone) : funcs(_funcs), pfs(_pfs), hm(_hm), standalone(_standalone)
{ {
addl_name = _addl_name; addl_name = _addl_name;
bool is_addl = hm.IsAppend(); auto target_name = gen_name.c_str();
auto target_name = is_addl ? addl_name.c_str() : gen_name.c_str(); auto mode = "w";
auto mode = is_addl ? "a" : "w";
write_file = fopen(target_name, mode); write_file = fopen(target_name, mode);
if ( ! write_file ) if ( ! write_file )
@ -27,30 +26,6 @@ CPPCompile::CPPCompile(vector<FuncInfo>& _funcs, ProfileFuncs& _pfs, const strin
reporter->Error("can't open C++ target file %s", target_name); reporter->Error("can't open C++ target file %s", target_name);
exit(1); exit(1);
} }
if ( is_addl )
{
// We need a unique number to associate with the name
// space for the code we're adding. A convenient way to
// generate this safely is to use the present size of the
// file we're appending to. That guarantees that every
// incremental compilation will wind up with a different
// number.
struct stat st;
if ( fstat(fileno(write_file), &st) != 0 )
{
char buf[256];
util::zeek_strerror_r(errno, buf, sizeof(buf));
reporter->Error("fstat failed on %s: %s", target_name, buf);
exit(1);
}
// We use a value of "0" to mean "we're not appending,
// we're generating from scratch", so make sure we're
// distinct from that.
addl_tag = st.st_size + 1;
}
else else
{ {
// Create an empty "additional" file. // Create an empty "additional" file.
@ -83,10 +58,6 @@ void CPPCompile::Compile(bool report_uncompilable)
working_dir = buf; working_dir = buf;
if ( update && addl_tag > 0 && CheckForCollisions() )
// Inconsistent compilation environment.
exit(1);
GenProlog(); GenProlog();
// Determine which functions we can call directly, and reuse // Determine which functions we can call directly, and reuse
@ -100,9 +71,13 @@ void CPPCompile::Compile(bool report_uncompilable)
const char* reason; const char* reason;
if ( IsCompilable(func, &reason) ) if ( IsCompilable(func, &reason) )
compilable_funcs.insert(BodyName(func)); compilable_funcs.insert(BodyName(func));
else if ( reason && report_uncompilable ) else
fprintf(stderr, "%s cannot be compiled to C++ due to %s\n", func.Func()->Name(), {
reason); if ( reason && report_uncompilable )
fprintf(stderr, "%s cannot be compiled to C++ due to %s\n", func.Func()->Name(),
reason);
not_fully_compilable.insert(func.Func()->Name());
}
auto h = func.Profile()->HashVal(); auto h = func.Profile()->HashVal();
if ( hm.HasHash(h) ) if ( hm.HasHash(h) )
@ -119,39 +94,24 @@ void CPPCompile::Compile(bool report_uncompilable)
{ {
TypePtr tp{NewRef{}, (Type*)(t)}; TypePtr tp{NewRef{}, (Type*)(t)};
types.AddKey(tp, pfs.HashType(t)); types.AddKey(tp, pfs.HashType(t));
(void)RegisterType(tp);
} }
for ( const auto& t : types.DistinctKeys() ) // ### This doesn't work for -O add-C++
if ( ! types.IsInherited(t) ) Emit("TypePtr types__CPP[%s];", Fmt(static_cast<int>(types.DistinctKeys().size())));
// Type is new to this compilation, so we'll
// be generating it.
Emit("TypePtr %s;", types.KeyName(t));
NL(); NL();
for ( const auto& c : pfs.Constants() ) #if 0
AddConstant(c); for ( auto gi : all_global_info )
Emit(gi->Declare());
NL(); NL();
#endif
for ( auto& g : pfs.AllGlobals() ) for ( auto& g : pfs.AllGlobals() )
CreateGlobal(g); CreateGlobal(g);
// Now that the globals are created, register their attributes,
// if any, and generate their initialization for use in standalone
// scripts. We can't do these in CreateGlobal() because at that
// point it's possible that some of the globals refer to other
// globals not-yet-created.
for ( auto& g : pfs.AllGlobals() )
{
RegisterAttributes(g->GetAttrs());
if ( g->HasVal() )
{
auto gn = string(g->Name());
GenGlobalInit(g, globals[gn], g->GetVal());
}
}
for ( const auto& e : pfs.Events() ) for ( const auto& e : pfs.Events() )
if ( AddGlobal(e, "gl", false) ) if ( AddGlobal(e, "gl", false) )
Emit("EventHandlerPtr %s_ev;", globals[string(e)]); Emit("EventHandlerPtr %s_ev;", globals[string(e)]);
@ -201,10 +161,13 @@ void CPPCompile::Compile(bool report_uncompilable)
lambda_names.insert(n); lambda_names.insert(n);
} }
NL();
Emit("std::vector<CPP_RegisterBody> CPP__bodies_to_register = {");
for ( const auto& f : compiled_funcs ) for ( const auto& f : compiled_funcs )
RegisterCompiledBody(f); RegisterCompiledBody(f);
GenFuncVarInits(); Emit("};");
GenEpilog(); GenEpilog();
} }
@ -217,12 +180,75 @@ void CPPCompile::GenProlog()
Emit("namespace zeek::detail { //\n"); Emit("namespace zeek::detail { //\n");
} }
Emit("namespace CPP_%s { // %s\n", Fmt(addl_tag), working_dir.c_str()); Emit("namespace CPP_%s { // %s\n", Fmt(addl_tag), working_dir);
// The following might-or-might-not wind up being populated/used. // The following might-or-might-not wind up being populated/used.
Emit("std::vector<int> field_mapping;"); Emit("std::vector<int> field_mapping;");
Emit("std::vector<int> enum_mapping;"); Emit("std::vector<int> enum_mapping;");
NL(); NL();
const_info[TYPE_BOOL] = CreateConstInitInfo("Bool", "ValPtr", "bool");
const_info[TYPE_INT] = CreateConstInitInfo("Int", "ValPtr", "bro_int_t");
const_info[TYPE_COUNT] = CreateConstInitInfo("Count", "ValPtr", "bro_uint_t");
const_info[TYPE_DOUBLE] = CreateConstInitInfo("Double", "ValPtr", "double");
const_info[TYPE_TIME] = CreateConstInitInfo("Time", "ValPtr", "double");
const_info[TYPE_INTERVAL] = CreateConstInitInfo("Interval", "ValPtr", "double");
const_info[TYPE_ADDR] = CreateConstInitInfo("Addr", "ValPtr", "");
const_info[TYPE_SUBNET] = CreateConstInitInfo("SubNet", "ValPtr", "");
const_info[TYPE_PORT] = CreateConstInitInfo("Port", "ValPtr", "uint32_t");
const_info[TYPE_ENUM] = CreateCompoundInitInfo("Enum", "ValPtr");
const_info[TYPE_STRING] = CreateCompoundInitInfo("String", "ValPtr");
const_info[TYPE_LIST] = CreateCompoundInitInfo("List", "ValPtr");
const_info[TYPE_PATTERN] = CreateCompoundInitInfo("Pattern", "ValPtr");
const_info[TYPE_VECTOR] = CreateCompoundInitInfo("Vector", "ValPtr");
const_info[TYPE_RECORD] = CreateCompoundInitInfo("Record", "ValPtr");
const_info[TYPE_TABLE] = CreateCompoundInitInfo("Table", "ValPtr");
const_info[TYPE_FUNC] = CreateCompoundInitInfo("Func", "ValPtr");
const_info[TYPE_FILE] = CreateCompoundInitInfo("File", "ValPtr");
type_info = CreateCompoundInitInfo("Type", "Ptr");
attr_info = CreateCompoundInitInfo("Attr", "Ptr");
attrs_info = CreateCompoundInitInfo("Attributes", "Ptr");
call_exprs_info = CreateCustomInitInfo("CallExpr", "Ptr");
lambda_reg_info = CreateCustomInitInfo("LambdaRegistration", "");
global_id_info = CreateCustomInitInfo("GlobalID", "");
NL();
DeclareDynCPPStmt();
NL();
}
shared_ptr<CPP_InitsInfo> CPPCompile::CreateConstInitInfo(const char* tag, const char* type,
const char* c_type)
{
auto gi = make_shared<CPP_BasicConstInitsInfo>(tag, type, c_type);
return RegisterInitInfo(tag, type, gi);
}
shared_ptr<CPP_InitsInfo> CPPCompile::CreateCompoundInitInfo(const char* tag, const char* type)
{
auto gi = make_shared<CPP_CompoundInitsInfo>(tag, type);
return RegisterInitInfo(tag, type, gi);
}
shared_ptr<CPP_InitsInfo> CPPCompile::CreateCustomInitInfo(const char* tag, const char* type)
{
auto gi = make_shared<CPP_CustomInitsInfo>(tag, type);
if ( type[0] == '\0' )
gi->SetCPPType("void*");
return RegisterInitInfo(tag, type, gi);
}
shared_ptr<CPP_InitsInfo> CPPCompile::RegisterInitInfo(const char* tag, const char* type,
shared_ptr<CPP_InitsInfo> gi)
{
string v_type = type[0] ? (string(tag) + type) : "void*";
Emit("std::vector<%s> CPP__%s__;", v_type, string(tag));
all_global_info.insert(gi);
return gi;
} }
void CPPCompile::RegisterCompiledBody(const string& f) void CPPCompile::RegisterCompiledBody(const string& f)
@ -232,8 +258,9 @@ void CPPCompile::RegisterCompiledBody(const string& f)
// Build up an initializer of the events relevant to the function. // Build up an initializer of the events relevant to the function.
string events; string events;
if ( body_events.count(f) > 0 ) auto be = body_events.find(f);
for ( const auto& e : body_events[f] ) if ( be != body_events.end() )
for ( const auto& e : be->second )
{ {
if ( events.size() > 0 ) if ( events.size() > 0 )
events += ", "; events += ", ";
@ -252,74 +279,136 @@ void CPPCompile::RegisterCompiledBody(const string& f)
// same binary). // same binary).
h = merge_p_hashes(h, p_hash(cf_locs[f])); h = merge_p_hashes(h, p_hash(cf_locs[f]));
auto init = string("register_body__CPP(make_intrusive<") + f + "_cl>(\"" + f + "\"), " + auto fi = func_index.find(f);
Fmt(p) + ", " + Fmt(h) + ", " + events + ");"; ASSERT(fi != func_index.end());
auto type_signature = casting_index[fi->second];
AddInit(names_to_bodies[f], init); Emit("\tCPP_RegisterBody(\"%s\", (void*) %s, %s, %s, %s, std::vector<std::string>(%s)),", f, f,
Fmt(type_signature), Fmt(p), Fmt(h), events);
if ( update )
{
fprintf(hm.HashFile(), "func\n%s%s\n", scope_prefix(addl_tag).c_str(), f.c_str());
fprintf(hm.HashFile(), "%llu\n", h);
}
} }
void CPPCompile::GenEpilog() void CPPCompile::GenEpilog()
{ {
NL(); NL();
for ( const auto& ii : init_infos )
GenInitExpr(ii.second);
for ( const auto& e : init_exprs.DistinctKeys() ) NL();
Emit("ValPtr CPPDynStmt::Exec(Frame* f, StmtFlowType& flow)");
StartBlock();
Emit("flow = FLOW_RETURN;");
Emit("switch ( type_signature )");
StartBlock();
for ( auto i = 0U; i < func_casting_glue.size(); ++i )
{ {
GenInitExpr(e); Emit("case %s:", to_string(i));
if ( update ) StartBlock();
init_exprs.LogIfNew(e, addl_tag, hm.HashFile()); auto& glue = func_casting_glue[i];
auto invoke = string("(*(") + glue.cast + ")(func))(" + glue.args + ")";
if ( glue.is_hook )
{
Emit("if ( ! %s )", invoke);
StartBlock();
Emit("flow = FLOW_BREAK;");
EndBlock();
Emit("return nullptr;");
}
else if ( IsNativeType(glue.yield) )
GenInvokeBody(invoke, glue.yield);
else
Emit("return %s;", invoke);
EndBlock();
} }
for ( const auto& a : attributes.DistinctKeys() ) Emit("default:");
{ Emit("\treporter->InternalError(\"invalid type in CPPDynStmt::Exec\");");
GenAttrs(a); Emit("\treturn nullptr;");
if ( update )
attributes.LogIfNew(a, addl_tag, hm.HashFile());
}
// Generate the guts of compound types, and preserve type names EndBlock();
// if present. EndBlock();
for ( const auto& t : types.DistinctKeys() )
{
ExpandTypeVar(t);
if ( update )
types.LogIfNew(t, addl_tag, hm.HashFile());
}
InitializeEnumMappings(); NL();
GenPreInits(); for ( auto gi : all_global_info )
gi->GenerateInitializers(this);
unordered_set<const Obj*> to_do;
for ( const auto& oi : obj_inits )
to_do.insert(oi.first);
CheckInitConsistency(to_do);
auto nc = GenDependentInits(to_do);
if ( standalone ) if ( standalone )
GenStandaloneActivation(); GenStandaloneActivation();
NL();
InitializeEnumMappings();
NL();
InitializeFieldMappings();
NL();
InitializeBiFs();
NL();
indices_mgr.Generate(this);
NL();
InitializeStrings();
NL();
InitializeHashes();
NL();
InitializeConsts();
NL(); NL();
Emit("void init__CPP()"); Emit("void init__CPP()");
StartBlock(); StartBlock();
Emit("enum_mapping.resize(%s);\n", Fmt(int(enum_names.size()))); Emit("std::vector<std::vector<int>> InitIndices;");
Emit("pre_init__CPP();"); Emit("generate_indices_set(CPP__Indices__init, InitIndices);");
Emit("std::map<TypeTag, std::shared_ptr<CPP_AbstractInitAccessor>> InitConsts;");
NL(); NL();
for ( auto i = 1; i <= nc; ++i ) for ( const auto& ci : const_info )
Emit("init_%s__CPP();", Fmt(i)); {
auto& gi = ci.second;
Emit("InitConsts.emplace(%s, std::make_shared<CPP_InitAccessor<%s>>(%s));",
TypeTagName(ci.first), gi->CPPType(), gi->InitsName());
}
Emit("InitsManager im(CPP__ConstVals, InitConsts, InitIndices, CPP__Strings, CPP__Hashes, "
"CPP__Type__, CPP__Attributes__, CPP__Attr__, CPP__CallExpr__);");
NL();
Emit("for ( auto& b : CPP__bodies_to_register )");
StartBlock();
Emit("auto f = make_intrusive<CPPDynStmt>(b.func_name.c_str(), b.func, b.type_signature);");
Emit("register_body__CPP(f, b.priority, b.h, b.events);");
EndBlock();
NL();
int max_cohort = 0;
for ( auto gi : all_global_info )
max_cohort = std::max(max_cohort, gi->MaxCohort());
for ( auto c = 0; c <= max_cohort; ++c )
for ( auto gi : all_global_info )
if ( gi->CohortSize(c) > 0 )
Emit("%s.InitializeCohort(&im, %s);", gi->InitializersName(), Fmt(c));
NL();
Emit("for ( auto& b : CPP__BiF_lookups__ )");
Emit("\tb.ResolveBiF();");
// Populate mappings for dynamic offsets. // Populate mappings for dynamic offsets.
NL(); NL();
InitializeFieldMappings(); Emit("for ( auto& em : CPP__enum_mappings__ )");
Emit("\tenum_mapping.push_back(em.ComputeOffset(&im));");
NL();
Emit("for ( auto& fm : CPP__field_mappings__ )");
Emit("\tfield_mapping.push_back(fm.ComputeOffset(&im));");
if ( standalone ) if ( standalone )
Emit("standalone_init__CPP();"); Emit("standalone_init__CPP();");
@ -328,10 +417,7 @@ void CPPCompile::GenEpilog()
GenInitHook(); GenInitHook();
Emit("} // %s\n\n", scope_prefix(addl_tag).c_str()); Emit("} // %s\n\n", scope_prefix(addl_tag));
if ( update )
UpdateGlobalHashes();
if ( addl_tag > 0 ) if ( addl_tag > 0 )
return; return;

View file

@ -13,75 +13,14 @@ using namespace std;
void CPPCompile::StartBlock() void CPPCompile::StartBlock()
{ {
++block_level; IndentUp();
Emit("{"); Emit("{");
} }
void CPPCompile::EndBlock(bool needs_semi) void CPPCompile::EndBlock(bool needs_semi)
{ {
Emit("}%s", needs_semi ? ";" : ""); Emit("}%s", needs_semi ? ";" : "");
--block_level; IndentDown();
}
string CPPCompile::GenString(const char* b, int len) const
{
return string("make_intrusive<StringVal>(") + Fmt(len) + ", " + CPPEscape(b, len) + ")";
}
string CPPCompile::CPPEscape(const char* b, int len) const
{
string res = "\"";
for ( int i = 0; i < len; ++i )
{
unsigned char c = b[i];
switch ( c )
{
case '\a':
res += "\\a";
break;
case '\b':
res += "\\b";
break;
case '\f':
res += "\\f";
break;
case '\n':
res += "\\n";
break;
case '\r':
res += "\\r";
break;
case '\t':
res += "\\t";
break;
case '\v':
res += "\\v";
break;
case '\\':
res += "\\\\";
break;
case '"':
res += "\\\"";
break;
default:
if ( isprint(c) )
res += c;
else
{
char buf[8192];
snprintf(buf, sizeof buf, "%03o", c);
res += "\\";
res += buf;
}
break;
}
}
return res + "\"";
} }
void CPPCompile::Indent() const void CPPCompile::Indent() const

View file

@ -232,7 +232,12 @@ string CPPCompile::GenConstExpr(const ConstExpr* c, GenType gt)
const auto& t = c->GetType(); const auto& t = c->GetType();
if ( ! IsNativeType(t) ) if ( ! IsNativeType(t) )
return NativeToGT(const_vals[c->Value()], t, gt); {
auto v = c->ValuePtr();
int consts_offset; // ignored
(void)RegisterConstant(v, consts_offset);
return NativeToGT(const_vals[v.get()]->Name(), t, gt);
}
return NativeToGT(GenVal(c->ValuePtr()), t, gt); return NativeToGT(GenVal(c->ValuePtr()), t, gt);
} }
@ -1168,21 +1173,25 @@ string CPPCompile::GenField(const ExprPtr& rec, int field)
// Need to dynamically map the field. // Need to dynamically map the field.
int mapping_slot; int mapping_slot;
if ( record_field_mappings.count(rt) > 0 && record_field_mappings[rt].count(field) > 0 ) auto rfm = record_field_mappings.find(rt);
if ( rfm != record_field_mappings.end() && rfm->second.count(field) > 0 )
// We're already tracking this field. // We're already tracking this field.
mapping_slot = record_field_mappings[rt][field]; mapping_slot = rfm->second[field];
else else
{ {
// New mapping. // New mapping.
mapping_slot = num_rf_mappings++; mapping_slot = num_rf_mappings++;
auto pt = processed_types.find(rt);
ASSERT(pt != processed_types.end());
auto rt_offset = pt->second->Offset();
string field_name = rt->FieldName(field); string field_name = rt->FieldName(field);
field_decls.emplace_back(pair(rt, rt->FieldDecl(field))); field_decls.emplace_back(pair(rt_offset, rt->FieldDecl(field)));
if ( record_field_mappings.count(rt) > 0 ) if ( rfm != record_field_mappings.end() )
// We're already tracking this record. // We're already tracking this record.
record_field_mappings[rt][field] = mapping_slot; rfm->second[field] = mapping_slot;
else else
{ {
// Need to start tracking this record. // Need to start tracking this record.
@ -1207,9 +1216,10 @@ string CPPCompile::GenEnum(const TypePtr& t, const ValPtr& ev)
// Need to dynamically map the access. // Need to dynamically map the access.
int mapping_slot; int mapping_slot;
if ( enum_val_mappings.count(et) > 0 && enum_val_mappings[et].count(v) > 0 ) auto evm = enum_val_mappings.find(et);
if ( evm != enum_val_mappings.end() && evm->second.count(v) > 0 )
// We're already tracking this value. // We're already tracking this value.
mapping_slot = enum_val_mappings[et][v]; mapping_slot = evm->second[v];
else else
{ {
@ -1217,12 +1227,12 @@ string CPPCompile::GenEnum(const TypePtr& t, const ValPtr& ev)
mapping_slot = num_ev_mappings++; mapping_slot = num_ev_mappings++;
string enum_name = et->Lookup(v); string enum_name = et->Lookup(v);
enum_names.emplace_back(pair(et, move(enum_name))); enum_names.emplace_back(pair(TypeOffset(t), move(enum_name)));
if ( enum_val_mappings.count(et) > 0 ) if ( evm != enum_val_mappings.end() )
{ {
// We're already tracking this enum. // We're already tracking this enum.
enum_val_mappings[et][v] = mapping_slot; evm->second[v] = mapping_slot;
} }
else else
{ {

View file

@ -34,10 +34,8 @@ void CPPCompile::CompileLambda(const LambdaExpr* l, const ProfileFunc* pf)
DefineBody(l_id->GetType<FuncType>(), pf, lname, body, &ids, FUNC_FLAVOR_FUNCTION); DefineBody(l_id->GetType<FuncType>(), pf, lname, body, &ids, FUNC_FLAVOR_FUNCTION);
} }
void CPPCompile::GenInvokeBody(const string& fname, const TypePtr& t, const string& args) void CPPCompile::GenInvokeBody(const string& call, const TypePtr& t)
{ {
auto call = fname + "(" + args + ")";
if ( ! t || t->Tag() == TYPE_VOID ) if ( ! t || t->Tag() == TYPE_VOID )
{ {
Emit("%s;", call); Emit("%s;", call);
@ -144,7 +142,7 @@ void CPPCompile::InitializeEvents(const ProfileFunc* pf)
// returns an EventHandlerPtr, sigh. // returns an EventHandlerPtr, sigh.
Emit("if ( event_registry->Lookup(\"%s\") )", e); Emit("if ( event_registry->Lookup(\"%s\") )", e);
StartBlock(); StartBlock();
Emit("%s = event_registry->Register(\"%s\");", ev_name.c_str(), e); Emit("%s = event_registry->Register(\"%s\");", ev_name, e);
EndBlock(); EndBlock();
Emit("did_init = true;"); Emit("did_init = true;");
EndBlock(); EndBlock();
@ -233,6 +231,18 @@ string CPPCompile::BodyName(const FuncInfo& func)
return fname + "__" + Fmt(static_cast<int>(i)); return fname + "__" + Fmt(static_cast<int>(i));
} }
p_hash_type CPPCompile::BodyHash(const Stmt* body)
{
auto bn = body_names.find(body);
ASSERT(bn != body_names.end());
auto& body_name = bn->second;
auto bh = body_hashes.find(body_name);
ASSERT(bh != body_hashes.end());
return bh->second;
}
string CPPCompile::GenArgs(const RecordTypePtr& params, const Expr* e) string CPPCompile::GenArgs(const RecordTypePtr& params, const Expr* e)
{ {
const auto& exprs = e->AsListExpr()->Exprs(); const auto& exprs = e->AsListExpr()->Exprs();

View file

@ -12,28 +12,11 @@ using namespace std;
VarMapper compiled_items; VarMapper compiled_items;
CPPHashManager::CPPHashManager(const char* hash_name_base, bool _append) CPPHashManager::CPPHashManager(const char* hash_name_base)
{ {
append = _append;
hash_name = string(hash_name_base) + ".dat"; hash_name = string(hash_name_base) + ".dat";
if ( append ) hf_w = fopen(hash_name.c_str(), "w");
{
hf_r = fopen(hash_name.c_str(), "r");
if ( ! hf_r )
{
reporter->Error("can't open auxiliary C++ hash file %s for reading", hash_name.c_str());
exit(1);
}
lock_file(hash_name, hf_r);
LoadHashes(hf_r);
}
auto mode = append ? "a" : "w";
hf_w = fopen(hash_name.c_str(), mode);
if ( ! hf_w ) if ( ! hf_w )
{ {
reporter->Error("can't open auxiliary C++ hash file %s for writing", hash_name.c_str()); reporter->Error("can't open auxiliary C++ hash file %s for writing", hash_name.c_str());

View file

@ -27,11 +27,9 @@ public:
// end of the file (and the hash file will be locked, to prevent // end of the file (and the hash file will be locked, to prevent
// overlapping updates from concurrent compilation/appends). // overlapping updates from concurrent compilation/appends).
// Otherwise, the file will be generated afresh. // Otherwise, the file will be generated afresh.
CPPHashManager(const char* hash_name_base, bool append); CPPHashManager(const char* hash_name_base);
~CPPHashManager(); ~CPPHashManager();
bool IsAppend() const { return append; }
// True if the given hash has already been generated. // True if the given hash has already been generated.
bool HasHash(p_hash_type h) const { return previously_compiled.count(h) > 0; } bool HasHash(p_hash_type h) const { return previously_compiled.count(h) > 0; }
@ -96,10 +94,6 @@ protected:
// names, rather than their script-level names. // names, rather than their script-level names.
std::unordered_map<std::string, int> gv_scopes; std::unordered_map<std::string, int> gv_scopes;
// Whether we're appending to existing hash file(s), or starting
// afresh.
bool append;
// Base for file names. // Base for file names.
std::string hash_name; std::string hash_name;

View file

@ -14,12 +14,31 @@ namespace zeek::detail
using namespace std; using namespace std;
void CPPCompile::GenInitExpr(const ExprPtr& e) std::shared_ptr<CPP_InitInfo> CPPCompile::RegisterInitExpr(const ExprPtr& ep)
{
auto ename = InitExprName(ep);
auto ii = init_infos.find(ename);
if ( ii != init_infos.end() )
return ii->second;
auto wrapper_cl = string("wrapper_") + ename + "_cl";
auto gi = make_shared<CallExprInitInfo>(this, ep, ename, wrapper_cl);
call_exprs_info->AddInstance(gi);
init_infos[ename] = gi;
return gi;
}
void CPPCompile::GenInitExpr(std::shared_ptr<CallExprInitInfo> ce_init)
{ {
NL(); NL();
const auto& e = ce_init->GetExpr();
const auto& t = e->GetType(); const auto& t = e->GetType();
auto ename = InitExprName(e); const auto& ename = ce_init->Name();
const auto& wc = ce_init->WrapperClass();
// First, create a CPPFunc that we can compile to compute 'e'. // First, create a CPPFunc that we can compile to compute 'e'.
auto name = string("wrapper_") + ename; auto name = string("wrapper_") + ename;
@ -29,18 +48,17 @@ void CPPCompile::GenInitExpr(const ExprPtr& e)
// Create the Func subclass that can be used in a CallExpr to // Create the Func subclass that can be used in a CallExpr to
// evaluate 'e'. // evaluate 'e'.
Emit("class %s_cl : public CPPFunc", name); Emit("class %s : public CPPFunc", wc);
StartBlock(); StartBlock();
Emit("public:"); Emit("public:");
Emit("%s_cl() : CPPFunc(\"%s\", %s)", name, name, e->IsPure() ? "true" : "false"); Emit("%s() : CPPFunc(\"%s\", %s)", wc, name, e->IsPure() ? "true" : "false");
StartBlock(); StartBlock();
Emit("type = make_intrusive<FuncType>(make_intrusive<RecordType>(new type_decl_list()), %s, " Emit("type = make_intrusive<FuncType>(make_intrusive<RecordType>(new type_decl_list()), %s, "
"FUNC_FLAVOR_FUNCTION);", "FUNC_FLAVOR_FUNCTION);",
GenTypeName(t)); GenTypeName(t));
NoteInitDependency(e, TypeRep(t));
EndBlock(); EndBlock();
Emit("ValPtr Invoke(zeek::Args* args, Frame* parent) const override final"); Emit("ValPtr Invoke(zeek::Args* args, Frame* parent) const override final");
@ -62,15 +80,9 @@ void CPPCompile::GenInitExpr(const ExprPtr& e)
EndBlock(); EndBlock();
Emit("CallExprPtr %s;", ename); Emit("CallExprPtr %s;", ename);
NoteInitDependency(e, TypeRep(t));
AddInit(e, ename,
string("make_intrusive<CallExpr>(make_intrusive<ConstExpr>(make_intrusive<FuncVal>("
"make_intrusive<") +
name + "_cl>())), make_intrusive<ListExpr>(), false)");
} }
bool CPPCompile::IsSimpleInitExpr(const ExprPtr& e) const bool CPPCompile::IsSimpleInitExpr(const ExprPtr& e)
{ {
switch ( e->Tag() ) switch ( e->Tag() )
{ {
@ -101,360 +113,83 @@ string CPPCompile::InitExprName(const ExprPtr& e)
return init_exprs.KeyName(e); return init_exprs.KeyName(e);
} }
void CPPCompile::GenGlobalInit(const ID* g, string& gl, const ValPtr& v)
{
const auto& t = v->GetType();
auto tag = t->Tag();
if ( tag == TYPE_FUNC )
// This should get initialized by recognizing hash of
// the function's body.
return;
string init_val;
if ( tag == TYPE_OPAQUE )
{
// We can only generate these by reproducing the expression
// (presumably a function call) used to create the value.
// That isn't fully sound, since if the global's value
// was redef'd in terms of its original value (e.g.,
// "redef x = f(x)"), then we'll wind up with a broken
// expression. It's difficult to detect that in full
// generality, so um Don't Do That. (Note that this
// only affects execution of standalone compiled code,
// where the original scripts are replaced by load-stubs.
// If the scripts are available, then the HasVal() test
// we generate will mean we don't wind up using this
// expression anyway.)
// Use the final initialization expression.
auto& init_exprs = g->GetOptInfo()->GetInitExprs();
init_val = GenExpr(init_exprs.back(), GEN_VAL_PTR, false);
}
else
init_val = BuildConstant(g, v);
auto& attrs = g->GetAttrs();
AddInit(g, string("if ( ! ") + gl + "->HasVal() )");
if ( attrs )
{
RegisterAttributes(attrs);
AddInit(g, "\t{");
AddInit(g, "\t" + gl + "->SetVal(" + init_val + ");");
AddInit(g, "\t" + gl + "->SetAttrs(" + AttrsName(attrs) + ");");
AddInit(g, "\t}");
}
else
AddInit(g, "\t" + gl + "->SetVal(" + init_val + ");");
}
void CPPCompile::GenFuncVarInits()
{
for ( const auto& fv_init : func_vars )
{
auto& fv = fv_init.first;
auto& const_name = fv_init.second;
auto f = fv->AsFunc();
const auto& fn = f->Name();
const auto& ft = f->GetType();
NoteInitDependency(fv, TypeRep(ft));
const auto& bodies = f->GetBodies();
string hashes = "{";
for ( const auto& b : bodies )
{
auto body = b.stmts.get();
ASSERT(body_names.count(body) > 0);
auto& body_name = body_names[body];
ASSERT(body_hashes.count(body_name) > 0);
NoteInitDependency(fv, body);
if ( hashes.size() > 1 )
hashes += ", ";
hashes += Fmt(body_hashes[body_name]);
}
hashes += "}";
auto init = string("lookup_func__CPP(\"") + fn + "\", " + hashes + ", " + GenTypeName(ft) +
")";
AddInit(fv, const_name, init);
}
}
void CPPCompile::GenPreInit(const Type* t)
{
string pre_init;
switch ( t->Tag() )
{
case TYPE_ADDR:
case TYPE_ANY:
case TYPE_BOOL:
case TYPE_COUNT:
case TYPE_DOUBLE:
case TYPE_ERROR:
case TYPE_INT:
case TYPE_INTERVAL:
case TYPE_PATTERN:
case TYPE_PORT:
case TYPE_STRING:
case TYPE_TIME:
case TYPE_TIMER:
case TYPE_VOID:
pre_init = string("base_type(") + TypeTagName(t->Tag()) + ")";
break;
case TYPE_ENUM:
pre_init = string("get_enum_type__CPP(\"") + t->GetName() + "\")";
break;
case TYPE_SUBNET:
pre_init = string("make_intrusive<SubNetType>()");
break;
case TYPE_FILE:
pre_init = string("make_intrusive<FileType>(") + GenTypeName(t->AsFileType()->Yield()) +
")";
break;
case TYPE_OPAQUE:
pre_init = string("make_intrusive<OpaqueType>(\"") + t->AsOpaqueType()->Name() + "\")";
break;
case TYPE_RECORD:
{
string name;
if ( t->GetName() != "" )
name = string("\"") + t->GetName() + string("\"");
else
name = "nullptr";
pre_init = string("get_record_type__CPP(") + name + ")";
}
break;
case TYPE_LIST:
pre_init = string("make_intrusive<TypeList>()");
break;
case TYPE_TYPE:
case TYPE_VECTOR:
case TYPE_TABLE:
case TYPE_FUNC:
// Nothing to do for these, pre-initialization-wise.
return;
default:
reporter->InternalError("bad type in CPPCompile::GenType");
}
pre_inits.emplace_back(GenTypeName(t) + " = " + pre_init + ";");
}
void CPPCompile::GenPreInits()
{
NL();
Emit("void pre_init__CPP()");
StartBlock();
for ( const auto& i : pre_inits )
Emit(i);
EndBlock();
}
void CPPCompile::AddInit(const Obj* o, const string& init)
{
obj_inits[o].emplace_back(init);
}
void CPPCompile::AddInit(const Obj* o)
{
if ( obj_inits.count(o) == 0 )
obj_inits[o] = {};
}
void CPPCompile::NoteInitDependency(const Obj* o1, const Obj* o2)
{
obj_deps[o1].emplace(o2);
}
void CPPCompile::CheckInitConsistency(unordered_set<const Obj*>& to_do)
{
for ( const auto& od : obj_deps )
{
const auto& o = od.first;
if ( to_do.count(o) == 0 )
{
fprintf(stderr, "object not in to_do: %s\n", obj_desc(o).c_str());
exit(1);
}
for ( const auto& d : od.second )
{
if ( to_do.count(d) == 0 )
{
fprintf(stderr, "dep object for %s not in to_do: %s\n", obj_desc(o).c_str(),
obj_desc(d).c_str());
exit(1);
}
}
}
}
int CPPCompile::GenDependentInits(unordered_set<const Obj*>& to_do)
{
int n = 0;
// The basic approach is fairly brute force: find elements of
// to_do that don't have any pending dependencies; generate those;
// and remove them from the to_do list, freeing up other to_do entries
// to now not having any pending dependencies. Iterate until there
// are no more to-do items.
while ( to_do.size() > 0 )
{
unordered_set<const Obj*> cohort;
for ( const auto& o : to_do )
{
const auto& od = obj_deps.find(o);
bool has_pending_dep = false;
if ( od != obj_deps.end() )
{
for ( const auto& d : od->second )
if ( to_do.count(d) > 0 )
{
has_pending_dep = true;
break;
}
}
if ( has_pending_dep )
continue;
cohort.insert(o);
}
ASSERT(cohort.size() > 0);
GenInitCohort(++n, cohort);
for ( const auto& o : cohort )
{
ASSERT(to_do.count(o) > 0);
to_do.erase(o);
}
}
return n;
}
void CPPCompile::GenInitCohort(int nc, unordered_set<const Obj*>& cohort)
{
NL();
Emit("void init_%s__CPP()", Fmt(nc));
StartBlock();
// If any script/BiF functions are used for initializing globals,
// the code generated from that will expect the presence of a
// frame pointer, even if nil.
Emit("Frame* f__CPP = nullptr;");
// The following is just for making the output readable/pretty:
// add space between initializations for distinct objects, taking
// into account that some objects have empty initializations.
bool did_an_init = false;
for ( auto o : cohort )
{
if ( did_an_init )
{
NL();
did_an_init = false;
}
for ( const auto& i : obj_inits.find(o)->second )
{
Emit("%s", i);
did_an_init = true;
}
}
EndBlock();
}
void CPPCompile::InitializeFieldMappings() void CPPCompile::InitializeFieldMappings()
{ {
Emit("int fm_offset;"); Emit("std::vector<CPP_FieldMapping> CPP__field_mappings__ = ");
StartBlock();
for ( const auto& mapping : field_decls ) for ( const auto& mapping : field_decls )
{ {
auto rt = mapping.first; auto rt_arg = Fmt(mapping.first);
auto td = mapping.second; auto td = mapping.second;
auto fn = td->id; auto type_arg = Fmt(TypeOffset(td->type));
auto rt_name = GenTypeName(rt) + "->AsRecordType()"; auto attrs_arg = Fmt(AttributesOffset(td->attrs));
Emit("fm_offset = %s->FieldOffset(\"%s\");", rt_name, fn); Emit("CPP_FieldMapping(%s, \"%s\", %s, %s),", rt_arg, td->id, type_arg, attrs_arg);
Emit("if ( fm_offset < 0 )");
StartBlock();
Emit("// field does not exist, create it");
Emit("fm_offset = %s->NumFields();", rt_name);
Emit("type_decl_list tl;");
Emit(GenTypeDecl(td));
Emit("%s->AddFieldsDirectly(tl);", rt_name);
EndBlock();
Emit("field_mapping.push_back(fm_offset);");
} }
EndBlock(true);
} }
void CPPCompile::InitializeEnumMappings() void CPPCompile::InitializeEnumMappings()
{ {
int n = 0; Emit("std::vector<CPP_EnumMapping> CPP__enum_mappings__ = ");
StartBlock();
for ( const auto& mapping : enum_names ) for ( const auto& mapping : enum_names )
InitializeEnumMappings(mapping.first, mapping.second, n++); Emit("CPP_EnumMapping(%s, \"%s\"),", Fmt(mapping.first), mapping.second);
EndBlock(true);
} }
void CPPCompile::InitializeEnumMappings(const EnumType* et, const string& e_name, int index) void CPPCompile::InitializeBiFs()
{ {
AddInit(et, "{"); Emit("std::vector<CPP_LookupBiF> CPP__BiF_lookups__ = ");
auto et_name = GenTypeName(et) + "->AsEnumType()"; StartBlock();
AddInit(et, "int em_offset = " + et_name + "->Lookup(\"" + e_name + "\");");
AddInit(et, "if ( em_offset < 0 )");
AddInit(et, "\t{"); for ( const auto& b : BiFs )
AddInit(et, "\tem_offset = " + et_name + "->Names().size();"); Emit("CPP_LookupBiF(%s, \"%s\"),", b.first, b.second);
// The following is to catch the case where the offset is already
// in use due to it being specified explicitly for an existing enum.
AddInit(et, "\tif ( " + et_name + "->Lookup(em_offset) )");
AddInit(
et,
"\t\treporter->InternalError(\"enum inconsistency while initializing compiled scripts\");");
AddInit(et, "\t" + et_name + "->AddNameInternal(\"" + e_name + "\", em_offset);");
AddInit(et, "\t}");
AddInit(et, "enum_mapping[" + Fmt(index) + "] = em_offset;"); EndBlock(true);
}
AddInit(et, "}"); void CPPCompile::InitializeStrings()
{
Emit("std::vector<const char*> CPP__Strings =");
StartBlock();
for ( const auto& s : ordered_tracked_strings )
Emit("\"%s\",", s);
EndBlock(true);
}
void CPPCompile::InitializeHashes()
{
Emit("std::vector<p_hash_type> CPP__Hashes =");
StartBlock();
for ( const auto& h : ordered_tracked_hashes )
Emit(Fmt(h) + ",");
EndBlock(true);
}
void CPPCompile::InitializeConsts()
{
Emit("std::vector<CPP_ValElem> CPP__ConstVals =");
StartBlock();
for ( const auto& c : consts )
Emit("CPP_ValElem(%s, %s),", TypeTagName(c.first), Fmt(c.second));
EndBlock(true);
} }
void CPPCompile::GenInitHook() void CPPCompile::GenInitHook()
@ -482,11 +217,13 @@ void CPPCompile::GenStandaloneActivation()
{ {
NL(); NL();
#if 0
Emit("void standalone_activation__CPP()"); Emit("void standalone_activation__CPP()");
StartBlock(); StartBlock();
for ( auto& a : activations ) for ( auto& a : activations )
Emit(a); Emit(a);
EndBlock(); EndBlock();
#endif
NL(); NL();
Emit("void standalone_init__CPP()"); Emit("void standalone_init__CPP()");
@ -511,8 +248,9 @@ void CPPCompile::GenStandaloneActivation()
// We didn't wind up compiling it. // We didn't wind up compiling it.
continue; continue;
ASSERT(body_hashes.count(bname) > 0); auto bh = body_hashes.find(bname);
func_bodies[f].push_back(body_hashes[bname]); ASSERT(bh != body_hashes.end());
func_bodies[f].push_back(bh->second);
} }
for ( auto& fb : func_bodies ) for ( auto& fb : func_bodies )

View file

@ -0,0 +1,575 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include "zeek/Desc.h"
#include "zeek/RE.h"
#include "zeek/ZeekString.h"
#include "zeek/script_opt/CPP/Attrs.h"
#include "zeek/script_opt/CPP/Compile.h"
using namespace std;
namespace zeek::detail
{
string CPP_InitsInfo::Name(int index) const
{
return base_name + "[" + Fmt(index) + "]";
}
void CPP_InitsInfo::AddInstance(shared_ptr<CPP_InitInfo> g)
{
auto init_cohort = g->InitCohort();
if ( static_cast<int>(instances.size()) <= init_cohort )
instances.resize(init_cohort + 1);
g->SetOffset(this, size++);
instances[init_cohort].push_back(move(g));
}
string CPP_InitsInfo::Declare() const
{
return string("std::vector<") + CPPType() + "> " + base_name + ";";
}
void CPP_InitsInfo::GenerateInitializers(CPPCompile* c)
{
BuildOffsetSet(c);
c->NL();
auto gt = InitsType();
// Declare the initializer.
c->Emit("%s %s = %s(%s, %s,", gt, InitializersName(), gt, base_name, Fmt(offset_set));
c->IndentUp();
c->Emit("{");
// Add each cohort as a vector element.
for ( auto& cohort : instances )
{
c->Emit("{");
BuildCohort(c, cohort);
c->Emit("},");
}
c->Emit("}");
c->IndentDown();
c->Emit(");");
}
void CPP_InitsInfo::BuildOffsetSet(CPPCompile* c)
{
vector<int> offsets_vec;
for ( auto& cohort : instances )
{
// Reduce the offsets used by this cohort to an
// offset into the managed vector-of-indices global.
vector<int> offsets;
offsets.reserve(cohort.size());
for ( auto& co : cohort )
offsets.push_back(co->Offset());
offsets_vec.push_back(c->IndMgr().AddIndices(offsets));
}
// Now that we have all the offsets in a vector, reduce them, too,
// to an offset into the managed vector-of-indices global,
offset_set = c->IndMgr().AddIndices(offsets_vec);
}
void CPP_InitsInfo::BuildCohort(CPPCompile* c, std::vector<std::shared_ptr<CPP_InitInfo>>& cohort)
{
for ( auto& co : cohort )
{
vector<string> ivs;
co->InitializerVals(ivs);
BuildCohortElement(c, co->InitializerType(), ivs);
}
}
void CPP_InitsInfo::BuildCohortElement(CPPCompile* c, string init_type, vector<string>& ivs)
{
string full_init;
bool did_one = false;
for ( auto& iv : ivs )
{
if ( did_one )
full_init += ", ";
else
did_one = true;
full_init += iv;
}
c->Emit("std::make_shared<%s>(%s),", init_type, full_init);
}
void CPP_CompoundInitsInfo::BuildCohortElement(CPPCompile* c, string init_type, vector<string>& ivs)
{
string init_line;
for ( auto& iv : ivs )
init_line += iv + ", ";
c->Emit("{ %s},", init_line);
}
void CPP_BasicConstInitsInfo::BuildCohortElement(CPPCompile* c, string init_type,
vector<string>& ivs)
{
ASSERT(ivs.size() == 1);
c->Emit(ivs[0] + ",");
}
string CPP_InitInfo::ValElem(CPPCompile* c, ValPtr v)
{
if ( v )
{
int consts_offset;
auto gi = c->RegisterConstant(v, consts_offset);
init_cohort = max(init_cohort, gi->InitCohort() + 1);
return Fmt(consts_offset);
}
else
return Fmt(-1);
}
DescConstInfo::DescConstInfo(CPPCompile* c, ValPtr v) : CPP_InitInfo()
{
ODesc d;
v->Describe(&d);
auto s = c->TrackString(d.Description());
init = Fmt(s);
}
EnumConstInfo::EnumConstInfo(CPPCompile* c, ValPtr v)
{
auto ev = v->AsEnumVal();
auto& ev_t = ev->GetType();
e_type = c->TypeOffset(ev_t);
init_cohort = c->TypeCohort(ev_t) + 1;
e_val = v->AsEnum();
}
StringConstInfo::StringConstInfo(CPPCompile* c, ValPtr v) : CPP_InitInfo()
{
auto s = v->AsString();
const char* b = (const char*)(s->Bytes());
len = s->Len();
chars = c->TrackString(CPPEscape(b, len));
}
PatternConstInfo::PatternConstInfo(CPPCompile* c, ValPtr v) : CPP_InitInfo()
{
auto re = v->AsPatternVal()->Get();
pattern = c->TrackString(CPPEscape(re->OrigText()));
is_case_insensitive = re->IsCaseInsensitive();
}
CompoundItemInfo::CompoundItemInfo(CPPCompile* _c, ValPtr v) : CPP_InitInfo(), c(_c)
{
auto& t = v->GetType();
type = c->TypeOffset(t);
init_cohort = c->TypeCohort(t) + 1;
}
ListConstInfo::ListConstInfo(CPPCompile* _c, ValPtr v) : CompoundItemInfo(_c)
{
auto lv = cast_intrusive<ListVal>(v);
auto n = lv->Length();
for ( auto i = 0; i < n; ++i )
vals.emplace_back(ValElem(c, lv->Idx(i)));
}
VectorConstInfo::VectorConstInfo(CPPCompile* c, ValPtr v) : CompoundItemInfo(c, v)
{
auto vv = cast_intrusive<VectorVal>(v);
auto n = vv->Size();
for ( auto i = 0U; i < n; ++i )
vals.emplace_back(ValElem(c, vv->ValAt(i)));
}
RecordConstInfo::RecordConstInfo(CPPCompile* c, ValPtr v) : CompoundItemInfo(c, v)
{
auto r = cast_intrusive<RecordVal>(v);
auto n = r->NumFields();
type = c->TypeOffset(r->GetType());
for ( auto i = 0U; i < n; ++i )
vals.emplace_back(ValElem(c, r->GetField(i)));
}
TableConstInfo::TableConstInfo(CPPCompile* c, ValPtr v) : CompoundItemInfo(c, v)
{
auto tv = cast_intrusive<TableVal>(v);
for ( auto& tv_i : tv->ToMap() )
{
vals.emplace_back(ValElem(c, tv_i.first)); // index
vals.emplace_back(ValElem(c, tv_i.second)); // value
}
}
FileConstInfo::FileConstInfo(CPPCompile* c, ValPtr v) : CompoundItemInfo(c, v)
{
auto fv = cast_intrusive<FileVal>(v);
auto fname = c->TrackString(fv->Get()->Name());
vals.emplace_back(Fmt(fname));
}
FuncConstInfo::FuncConstInfo(CPPCompile* _c, ValPtr v) : CompoundItemInfo(_c, v), fv(v->AsFuncVal())
{
// This is slightly hacky. There's a chance that this constant
// depends on a lambda being registered. Here we use the knowledge
// that LambdaRegistrationInfo sets its cohort to 1 more than
// the function type, so we can ensure any possible lambda has
// been registered by setting ours to 2 more. CompoundItemInfo
// has already set our cohort to 1 more.
++init_cohort;
}
void FuncConstInfo::InitializerVals(std::vector<std::string>& ivs) const
{
auto f = fv->AsFunc();
const auto& fn = f->Name();
ivs.emplace_back(Fmt(type));
ivs.emplace_back(Fmt(c->TrackString(fn)));
if ( ! c->NotFullyCompilable(fn) )
{
const auto& bodies = f->GetBodies();
for ( const auto& b : bodies )
{
auto h = c->BodyHash(b.stmts.get());
auto h_o = c->TrackHash(h);
ivs.emplace_back(Fmt(h_o));
}
}
}
AttrInfo::AttrInfo(CPPCompile* _c, const AttrPtr& attr) : CompoundItemInfo(_c)
{
vals.emplace_back(Fmt(static_cast<int>(attr->Tag())));
auto a_e = attr->GetExpr();
if ( a_e )
{
auto gi = c->RegisterType(a_e->GetType());
init_cohort = max(init_cohort, gi->InitCohort() + 1);
if ( ! CPPCompile::IsSimpleInitExpr(a_e) )
{
gi = c->RegisterInitExpr(a_e);
init_cohort = max(init_cohort, gi->InitCohort() + 1);
vals.emplace_back(Fmt(static_cast<int>(AE_CALL)));
vals.emplace_back(Fmt(gi->Offset()));
}
else if ( a_e->Tag() == EXPR_CONST )
{
auto v = a_e->AsConstExpr()->ValuePtr();
vals.emplace_back(Fmt(static_cast<int>(AE_CONST)));
vals.emplace_back(ValElem(c, v));
}
else if ( a_e->Tag() == EXPR_NAME )
{
auto g = a_e->AsNameExpr()->Id();
auto gi = c->RegisterGlobal(g);
init_cohort = max(init_cohort, gi->InitCohort() + 1);
vals.emplace_back(Fmt(static_cast<int>(AE_NAME)));
vals.emplace_back(Fmt(c->TrackString(g->Name())));
}
else
{
ASSERT(a_e->Tag() == EXPR_RECORD_COERCE);
vals.emplace_back(Fmt(static_cast<int>(AE_RECORD)));
vals.emplace_back(Fmt(gi->Offset()));
}
}
else
vals.emplace_back(Fmt(static_cast<int>(AE_NONE)));
}
AttrsInfo::AttrsInfo(CPPCompile* _c, const AttributesPtr& _attrs) : CompoundItemInfo(_c)
{
const auto& pas = c->ProcessedAttr();
for ( const auto& a : _attrs->GetAttrs() )
{
auto pa = pas.find(a.get());
ASSERT(pa != pas.end());
const auto& gi = pa->second;
init_cohort = max(init_cohort, gi->InitCohort() + 1);
vals.emplace_back(Fmt(gi->Offset()));
}
}
GlobalInitInfo::GlobalInitInfo(CPPCompile* c, const ID* g, string _CPP_name)
: CPP_InitInfo(), CPP_name(move(_CPP_name))
{
Zeek_name = g->Name();
auto gi = c->RegisterType(g->GetType());
init_cohort = max(init_cohort, gi->InitCohort() + 1);
type = gi->Offset();
gi = c->RegisterAttributes(g->GetAttrs());
if ( gi )
{
init_cohort = max(init_cohort, gi->InitCohort() + 1);
attrs = gi->Offset();
}
else
attrs = -1;
exported = g->IsExport();
val = ValElem(c, g->GetVal());
}
void GlobalInitInfo::InitializerVals(std::vector<std::string>& ivs) const
{
ivs.push_back(CPP_name);
ivs.push_back(string("\"") + Zeek_name + "\"");
ivs.push_back(Fmt(type));
ivs.push_back(Fmt(attrs));
ivs.push_back(val);
ivs.push_back(Fmt(exported));
}
CallExprInitInfo::CallExprInitInfo(CPPCompile* c, ExprPtr _e, string _e_name, string _wrapper_class)
: e(move(_e)), e_name(move(_e_name)), wrapper_class(move(_wrapper_class))
{
auto gi = c->RegisterType(e->GetType());
init_cohort = max(init_cohort, gi->InitCohort() + 1);
}
LambdaRegistrationInfo::LambdaRegistrationInfo(CPPCompile* c, string _name, FuncTypePtr ft,
string _wrapper_class, p_hash_type _h,
bool _has_captures)
: name(move(_name)), wrapper_class(move(_wrapper_class)), h(_h), has_captures(_has_captures)
{
auto gi = c->RegisterType(ft);
init_cohort = max(init_cohort, gi->InitCohort() + 1);
func_type = gi->Offset();
}
void LambdaRegistrationInfo::InitializerVals(std::vector<std::string>& ivs) const
{
ivs.emplace_back(string("\"") + name + "\"");
ivs.emplace_back(Fmt(func_type));
ivs.emplace_back(Fmt(h));
ivs.emplace_back(has_captures ? "true" : "false");
}
void EnumTypeInfo::AddInitializerVals(std::vector<std::string>& ivs) const
{
ivs.emplace_back(Fmt(c->TrackString(t->GetName())));
auto et = t->AsEnumType();
for ( const auto& name_pair : et->Names() )
{
ivs.emplace_back(Fmt(c->TrackString(name_pair.first)));
ivs.emplace_back(Fmt(int(name_pair.second)));
}
}
void OpaqueTypeInfo::AddInitializerVals(std::vector<std::string>& ivs) const
{
ivs.emplace_back(Fmt(c->TrackString(t->GetName())));
}
TypeTypeInfo::TypeTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c, move(_t))
{
tt = t->AsTypeType()->GetType();
auto gi = c->RegisterType(tt);
if ( gi )
init_cohort = gi->InitCohort();
}
void TypeTypeInfo::AddInitializerVals(std::vector<std::string>& ivs) const
{
ivs.emplace_back(to_string(c->TypeOffset(tt)));
}
VectorTypeInfo::VectorTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c, move(_t))
{
yield = t->Yield();
auto gi = c->RegisterType(yield);
if ( gi )
init_cohort = gi->InitCohort();
}
void VectorTypeInfo::AddInitializerVals(std::vector<std::string>& ivs) const
{
ivs.emplace_back(to_string(c->TypeOffset(yield)));
}
ListTypeInfo::ListTypeInfo(CPPCompile* _c, TypePtr _t)
: AbstractTypeInfo(_c, move(_t)), types(t->AsTypeList()->GetTypes())
{
for ( auto& tl_i : types )
{
auto gi = c->RegisterType(tl_i);
if ( gi )
init_cohort = max(init_cohort, gi->InitCohort());
}
}
void ListTypeInfo::AddInitializerVals(std::vector<std::string>& ivs) const
{
string type_list;
for ( auto& t : types )
ivs.emplace_back(Fmt(c->TypeOffset(t)));
}
TableTypeInfo::TableTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c, move(_t))
{
auto tbl = t->AsTableType();
auto gi = c->RegisterType(tbl->GetIndices());
ASSERT(gi);
indices = gi->Offset();
init_cohort = gi->InitCohort();
yield = tbl->Yield();
if ( yield )
{
gi = c->RegisterType(yield);
if ( gi )
init_cohort = max(init_cohort, gi->InitCohort());
}
}
void TableTypeInfo::AddInitializerVals(std::vector<std::string>& ivs) const
{
ivs.emplace_back(Fmt(indices));
ivs.emplace_back(Fmt(yield ? c->TypeOffset(yield) : -1));
}
FuncTypeInfo::FuncTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c, move(_t))
{
auto f = t->AsFuncType();
flavor = f->Flavor();
params = f->Params();
yield = f->Yield();
auto gi = c->RegisterType(f->Params());
if ( gi )
init_cohort = gi->InitCohort();
if ( yield )
{
gi = c->RegisterType(f->Yield());
if ( gi )
init_cohort = max(init_cohort, gi->InitCohort());
}
}
void FuncTypeInfo::AddInitializerVals(std::vector<std::string>& ivs) const
{
ivs.emplace_back(Fmt(c->TypeOffset(params)));
ivs.emplace_back(Fmt(yield ? c->TypeOffset(yield) : -1));
ivs.emplace_back(Fmt(static_cast<int>(flavor)));
}
RecordTypeInfo::RecordTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c, move(_t))
{
auto r = t->AsRecordType()->Types();
if ( ! r )
return;
for ( const auto& r_i : *r )
{
field_names.emplace_back(r_i->id);
auto gi = c->RegisterType(r_i->type);
if ( gi )
init_cohort = max(init_cohort, gi->InitCohort());
// else it's a recursive type, no need to adjust cohort here
field_types.push_back(r_i->type);
if ( r_i->attrs )
{
gi = c->RegisterAttributes(r_i->attrs);
init_cohort = max(init_cohort, gi->InitCohort() + 1);
field_attrs.push_back(gi->Offset());
}
else
field_attrs.push_back(-1);
}
}
void RecordTypeInfo::AddInitializerVals(std::vector<std::string>& ivs) const
{
ivs.emplace_back(Fmt(c->TrackString(t->GetName())));
auto n = field_names.size();
for ( auto i = 0U; i < n; ++i )
{
ivs.emplace_back(Fmt(c->TrackString(field_names[i])));
// Because RecordType's can be recursively defined,
// during construction we couldn't reliably access
// the field type's offsets. At this point, though,
// they should all be available.
ivs.emplace_back(Fmt(c->TypeOffset(field_types[i])));
ivs.emplace_back(Fmt(field_attrs[i]));
}
}
void IndicesManager::Generate(CPPCompile* c)
{
c->Emit("int CPP__Indices__init[] =");
c->StartBlock();
int nset = 0;
for ( auto& is : indices_set )
{
// Track the offsets into the raw vector, to make it
// easier to debug problems.
auto line = string("/* ") + to_string(nset++) + " */ ";
// We first record the size, then the values.
line += to_string(is.size()) + ", ";
auto n = 1;
for ( auto i : is )
{
line += to_string(i) + ", ";
if ( ++n % 10 == 0 )
{
c->Emit(line);
line.clear();
}
}
if ( line.size() > 0 )
c->Emit(line);
}
c->Emit("-1");
c->EndBlock(true);
}
} // zeek::detail

View file

@ -0,0 +1,693 @@
// See the file "COPYING" in the main distribution directory for copyright.
// Classes for tracking information for initializing C++ values used by the
// generated code.
// Initialization is probably the most complex part of the entire compiler,
// as there are a lot of considerations. There are two basic parts: (1) the
// generation of C++ code for doing run-time initialization, which is covered
// by the classes in this file, and (2) the execution of that code to do the
// actual initialization, which is covered by the classes in RuntimeInits.h.
//
// There are two fundamental types of initialization, those that create values
// (such as Zeek Type and Val objects) that will be used during the execution
// of compiled scripts, and those that perform actions such as registering
// the presence of a global or a lambda. In addition, for the former (values
// used at run-time), some are grouped together into vectors, with the compiled
// code using a hardwired index to get to a particular value; and some have
// standalone globals (for example, one for each BiF that a compiled script
// may call).
//
// For each of these types of initialization, our general approach is to a
// class that manages a single instance of that type, and an an object that
// manages all of those instances collectively. The latter object will, for
// example, attend to determining the offset into the run-time vector associated
// with a particular initialized value.
//
// An additional complexity is that often the initialization of a particular
// value will depend on *other* values having already been initialized. For
// example, a record type might have a field that is a table, and thus the
// type corresponding to the table needs to be available before we can create
// the record type. However, the table might have a set of attributes
// associated with it, which have to be initialized before we can create the
// table type, those in turn requiring the initialization of each of the
// individual attributes in the set. One of those attributes might specify
// a &default function for the table, requiring initializing *that* value
// (not just the type, but also a way to refer to the particular instance of
// the function) before initializing the attribute, etc. Worse, record types
// can be *indirectly recursive*, which requires first initializing a "stub"
// for the record type before doing the final initialization.
//
// The general strategy for dealing with all of these dependencies is to
// compute for each initialization its "cohort". An initialization that
// doesn't depend on any others is in cohort 0. An initialization X that
// depends on an initialization Y will have cohort(X) = cohort(Y) + 1; or,
// in general, one more than the highest cohort of any initialization it
// depends on. (We cut a corner in that, due to how initialization information
// is constructed, if X and Y are for the same type of object then we can
// safely use cohort(X) = cohort(Y).) We then execute run-time initialization
// in waves, one cohort at a time.
//
// Because C++ compilers can struggle when trying to optimize large quantities
// of code - clang in particular could take many CPU *hours* back when our
// compiler just generated C++ code snippets for each initialization - rather
// than producing code that directly executes each given initialization, we
// instead employ a table-driven approach. The C++ initializers for the
// tables contain simple values - often just vectors of integers - that compile
// quickly. At run-time we then spin through the elements of the tables (one
// cohort at a time) to obtain the information needed to initialize any given
// item.
//
// Many forms of initialization are specified in terms of indices into globals
// that hold items of various types. Thus, the most common initialization
// information is a vector of integers/indices. These data structures can
// be recursive, too, namely we sometimes associate an index with a vector
// of integers/indices and then we can track multiple such vectors using
// another vector of integers/indices.
#include "zeek/File.h"
#include "zeek/Val.h"
#include "zeek/script_opt/ProfileFunc.h"
#pragma once
namespace zeek::detail
{
class CPPCompile;
// Abstract class for tracking information about a single initialization item.
class CPP_InitInfo;
// Abstract class for tracking information about a collection of initialization
// items.
class CPP_InitsInfo
{
public:
CPP_InitsInfo(std::string _tag, std::string type) : tag(std::move(_tag))
{
base_name = std::string("CPP__") + tag + "__";
CPP_type = tag + type;
}
virtual ~CPP_InitsInfo() { }
// Returns the name of the C++ global that will hold the items' values
// at run-time, once initialized. These are all vectors, for which
// the generated code accesses a particular item by indexing the vector.
const std::string& InitsName() const { return base_name; }
// Returns the name of the C++ global used to hold the table we employ
// for table-driven initialization.
std::string InitializersName() const { return base_name + "init"; }
// Returns the "name" of the given element in the run-time vector
// associated with this collection of initialization items. It's not
// really a name but rather a vector index, so for example Name(12)
// might return "CPP__Pattern__[12]", but we use the term Name because
// the representation used to be individualized globals, such as
// "CPP__Pattern__12".
std::string Name(int index) const;
// Returns the name that will correspond to the next item added to
// this set.
std::string NextName() const { return Name(size); }
// The largest initialization cohort of any item in this collection.
int MaxCohort() const { return static_cast<int>(instances.size()) - 1; }
// Returns the number of initializations in this collection that below
// to the given cohort c.
int CohortSize(int c) const { return c > MaxCohort() ? 0 : instances[c].size(); }
// Returns the C++ type associated with this collection's run-time vector.
// This might be, for example, "PatternVal"
const std::string& CPPType() const { return CPP_type; }
// Sets the associated C++ type.
virtual void SetCPPType(std::string ct) { CPP_type = std::move(ct); }
// Returns the type associated with the table used for initialization
// (i.e., this is the type of the global returned by InitializersName()).
std::string InitsType() const { return inits_type; }
// Add a new initialization instance to the collection.
void AddInstance(std::shared_ptr<CPP_InitInfo> g);
// Emit code to populate the table used to initialize this collection.
void GenerateInitializers(CPPCompile* c);
protected:
// Computes offset_set - see below.
void BuildOffsetSet(CPPCompile* c);
// Returns a declaration suitable for the run-time vector that holds
// the initialized items in the collection.
std::string Declare() const;
// For a given cohort, generates the associated table elements for
// creating it.
void BuildCohort(CPPCompile* c, std::vector<std::shared_ptr<CPP_InitInfo>>& cohort);
// Given the initialization type and initializers for with a given
// cohort element, build the associated table element.
virtual void BuildCohortElement(CPPCompile* c, std::string init_type,
std::vector<std::string>& ivs);
// Total number of initializers.
int size = 0;
// Each cohort is represented by a vector whose elements correspond
// to the initialization information for a single item. This variable
// holds a vector of cohorts, indexed by the number of the cohort.
// (Note, some cohorts may be empty.)
std::vector<std::vector<std::shared_ptr<CPP_InitInfo>>> instances;
// Each cohort has associated with it a vector of offsets, specifying
// positions in the run-time vector of the items in the cohort.
//
// We reduce each such vector to an index into the collection of
// such vectors (as managed by an IndicesManager - see below).
//
// Once we've done that reduction, we can represent each cohort
// using a single index, and thus all of the cohorts using a vector
// of indices. We then reduce *that* vector to a single index,
// again using the IndicesManager. We store that single index
// in the "offset_set" variable.
int offset_set = 0;
// Tag used to distinguish a particular collection of constants.
std::string tag;
// C++ name for this collection of constants.
std::string base_name;
// C++ type associated with a single instance of these constants.
std::string CPP_type;
// C++ type associated with the collection of initializers.
std::string inits_type;
};
// A class for a collection of initialization items for which each item
// has a "custom" initializer (that is, a bespoke C++ object, rather than
// a simple C++ type or a vector of indices).
class CPP_CustomInitsInfo : public CPP_InitsInfo
{
public:
CPP_CustomInitsInfo(std::string _tag, std::string _type)
: CPP_InitsInfo(std::move(_tag), std::move(_type))
{
BuildInitType();
}
void SetCPPType(std::string ct) override
{
CPP_InitsInfo::SetCPPType(std::move(ct));
BuildInitType();
}
private:
void BuildInitType() { inits_type = std::string("CPP_CustomInits<") + CPPType() + ">"; }
};
// A class for a collection of initialization items corresponding to "basic"
// constants, i.e., those that can be represented either directly as C++
// constants, or as indices into a vector of C++ objects.
class CPP_BasicConstInitsInfo : public CPP_CustomInitsInfo
{
public:
// In the following, if "c_type" is non-empty then it specifes the
// C++ type used to directly represent the constant. If empty, it
// indicates that we instead use an index into a separate vector.
CPP_BasicConstInitsInfo(std::string _tag, std::string type, std::string c_type)
: CPP_CustomInitsInfo(std::move(_tag), std::move(type))
{
if ( c_type.empty() )
inits_type = std::string("CPP_") + tag + "Consts";
else
inits_type = std::string("CPP_BasicConsts<") + CPP_type + ", " + c_type + ", " + tag +
"Val>";
}
void BuildCohortElement(CPPCompile* c, std::string init_type,
std::vector<std::string>& ivs) override;
};
// A class for a collection of initialization items that are defined using
// other initialization items.
class CPP_CompoundInitsInfo : public CPP_InitsInfo
{
public:
CPP_CompoundInitsInfo(std::string _tag, std::string type)
: CPP_InitsInfo(std::move(_tag), std::move(type))
{
if ( tag == "Type" )
// These need a refined version of CPP_IndexedInits
// in order to build different types dynamically.
inits_type = "CPP_TypeInits";
else
inits_type = std::string("CPP_IndexedInits<") + CPPType() + ">";
}
void BuildCohortElement(CPPCompile* c, std::string init_type,
std::vector<std::string>& ivs) override;
};
// Abstract class for tracking information about a single initialization item.
class CPP_InitInfo
{
public:
// No constructor - basic initialization happens when the object is
// added via AddInstance() to a CPP_InitsInfo object, which in turn
// will lead to invocation of this object's SetOffset() method.
virtual ~CPP_InitInfo() { }
// Associates this item with an initialization collection and run-time
// vector offset.
void SetOffset(const CPP_InitsInfo* _inits_collection, int _offset)
{
inits_collection = _inits_collection;
offset = _offset;
}
// Returns the offset for this item into the associated run-time vector.
int Offset() const { return offset; }
// Returns the name that should be used for referring to this
// value in the generated code.
std::string Name() const { return inits_collection->Name(offset); }
// Returns this item's initialization cohort.
int InitCohort() const { return init_cohort; }
// Returns the type used for this initializer.
virtual std::string InitializerType() const { return "<shouldn't-be-used>"; }
// Returns values used for creating this value, one element per
// constructor parameter.
virtual void InitializerVals(std::vector<std::string>& ivs) const = 0;
protected:
// Returns an offset (into the run-time vector holding all Zeek
// constant values) corresponding to the given value. Registers
// the constant if needed.
std::string ValElem(CPPCompile* c, ValPtr v);
// By default, values have no dependencies on other values
// being first initialized. Those that do must increase this
// value in their constructors.
int init_cohort = 0;
// Tracks the collection to which this item belongs.
const CPP_InitsInfo* inits_collection = nullptr;
// Offset of this item in the collection, or -1 if no association.
int offset = -1;
};
// Information associated with initializing a basic (non-compound) constant.
class BasicConstInfo : public CPP_InitInfo
{
public:
BasicConstInfo(std::string _val) : val(std::move(_val)) { }
void InitializerVals(std::vector<std::string>& ivs) const override { ivs.emplace_back(val); }
private:
// All we need to track is the C++ representation of the constant.
std::string val;
};
// Information associated with initializing a constant whose Val constructor
// takes a string.
class DescConstInfo : public CPP_InitInfo
{
public:
DescConstInfo(CPPCompile* c, ValPtr v);
void InitializerVals(std::vector<std::string>& ivs) const override { ivs.emplace_back(init); }
private:
std::string init;
};
class EnumConstInfo : public CPP_InitInfo
{
public:
EnumConstInfo(CPPCompile* c, ValPtr v);
void InitializerVals(std::vector<std::string>& ivs) const override
{
ivs.emplace_back(std::to_string(e_type));
ivs.emplace_back(std::to_string(e_val));
}
private:
int e_type; // an index into the enum's Zeek type
int e_val; // integer value of the enum
};
class StringConstInfo : public CPP_InitInfo
{
public:
StringConstInfo(CPPCompile* c, ValPtr v);
void InitializerVals(std::vector<std::string>& ivs) const override
{
ivs.emplace_back(std::to_string(chars));
ivs.emplace_back(std::to_string(len));
}
private:
int chars; // index into vector of char*'s
int len; // length of the string
};
class PatternConstInfo : public CPP_InitInfo
{
public:
PatternConstInfo(CPPCompile* c, ValPtr v);
void InitializerVals(std::vector<std::string>& ivs) const override
{
ivs.emplace_back(std::to_string(pattern));
ivs.emplace_back(std::to_string(is_case_insensitive));
}
private:
int pattern; // index into string representation of pattern
int is_case_insensitive; // case-insensitivity flag, 0 or 1
};
class PortConstInfo : public CPP_InitInfo
{
public:
PortConstInfo(ValPtr v) : p(static_cast<UnsignedValImplementation*>(v->AsPortVal())->Get()) { }
void InitializerVals(std::vector<std::string>& ivs) const override
{
ivs.emplace_back(std::to_string(p) + "U");
}
private:
bro_uint_t p;
};
// Abstract class for compound items (those defined in terms of other items).
class CompoundItemInfo : public CPP_InitInfo
{
public:
// The first of these is used for items with custom Zeek types,
// the second when the type is generic/inapplicable.
CompoundItemInfo(CPPCompile* c, ValPtr v);
CompoundItemInfo(CPPCompile* _c) : c(_c) { type = -1; }
void InitializerVals(std::vector<std::string>& ivs) const override
{
if ( type >= 0 )
ivs.emplace_back(std::to_string(type));
for ( auto& v : vals )
ivs.push_back(v);
}
protected:
CPPCompile* c;
int type;
std::vector<std::string> vals; // initialization values
};
// This next set corresponds to compound Zeek constants of various types.
class ListConstInfo : public CompoundItemInfo
{
public:
ListConstInfo(CPPCompile* c, ValPtr v);
};
class VectorConstInfo : public CompoundItemInfo
{
public:
VectorConstInfo(CPPCompile* c, ValPtr v);
};
class RecordConstInfo : public CompoundItemInfo
{
public:
RecordConstInfo(CPPCompile* c, ValPtr v);
};
class TableConstInfo : public CompoundItemInfo
{
public:
TableConstInfo(CPPCompile* c, ValPtr v);
};
class FileConstInfo : public CompoundItemInfo
{
public:
FileConstInfo(CPPCompile* c, ValPtr v);
};
class FuncConstInfo : public CompoundItemInfo
{
public:
FuncConstInfo(CPPCompile* _c, ValPtr v);
void InitializerVals(std::vector<std::string>& ivs) const override;
private:
FuncVal* fv;
};
// Initialization information for single attributes and sets of attributes.
class AttrInfo : public CompoundItemInfo
{
public:
AttrInfo(CPPCompile* c, const AttrPtr& attr);
};
class AttrsInfo : public CompoundItemInfo
{
public:
AttrsInfo(CPPCompile* c, const AttributesPtr& attrs);
};
// Information for initialization a Zeek global.
class GlobalInitInfo : public CPP_InitInfo
{
public:
GlobalInitInfo(CPPCompile* c, const ID* g, std::string CPP_name);
std::string InitializerType() const override { return "CPP_GlobalInit"; }
void InitializerVals(std::vector<std::string>& ivs) const override;
protected:
std::string Zeek_name;
std::string CPP_name;
int type;
int attrs;
std::string val;
bool exported;
};
// Information for initializing an item corresponding to a Zeek function
// call, needed to associate complex expressions with attributes.
class CallExprInitInfo : public CPP_InitInfo
{
public:
CallExprInitInfo(CPPCompile* c, ExprPtr e, std::string e_name, std::string wrapper_class);
std::string InitializerType() const override
{
return std::string("CPP_CallExprInit<") + wrapper_class + ">";
}
void InitializerVals(std::vector<std::string>& ivs) const override { ivs.emplace_back(e_name); }
// Accessors, since code to initialize these is generated separately
// from that of most initialization collections.
const ExprPtr& GetExpr() const { return e; }
const std::string& Name() const { return e_name; }
const std::string& WrapperClass() const { return wrapper_class; }
protected:
ExprPtr e;
std::string e_name;
std::string wrapper_class;
};
// Information for registering the class/function assocaited with a lambda.
class LambdaRegistrationInfo : public CPP_InitInfo
{
public:
LambdaRegistrationInfo(CPPCompile* c, std::string name, FuncTypePtr ft,
std::string wrapper_class, p_hash_type h, bool has_captures);
std::string InitializerType() const override
{
return std::string("CPP_LambdaRegistration<") + wrapper_class + ">";
}
void InitializerVals(std::vector<std::string>& ivs) const override;
protected:
std::string name;
int func_type;
std::string wrapper_class;
p_hash_type h;
bool has_captures;
};
// Abstract class for representing information for initializing a Zeek type.
class AbstractTypeInfo : public CPP_InitInfo
{
public:
AbstractTypeInfo(CPPCompile* _c, TypePtr _t) : c(_c), t(std::move(_t)) { }
void InitializerVals(std::vector<std::string>& ivs) const override
{
ivs.emplace_back(std::to_string(static_cast<int>(t->Tag())));
AddInitializerVals(ivs);
}
virtual void AddInitializerVals(std::vector<std::string>& ivs) const { }
protected:
CPPCompile* c;
TypePtr t; // the type we're initializing
};
// The following capture information for different Zeek types.
class BaseTypeInfo : public AbstractTypeInfo
{
public:
BaseTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c, std::move(_t)) { }
};
class EnumTypeInfo : public AbstractTypeInfo
{
public:
EnumTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c, std::move(_t)) { }
void AddInitializerVals(std::vector<std::string>& ivs) const override;
};
class OpaqueTypeInfo : public AbstractTypeInfo
{
public:
OpaqueTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c, std::move(_t)) { }
void AddInitializerVals(std::vector<std::string>& ivs) const override;
};
class TypeTypeInfo : public AbstractTypeInfo
{
public:
TypeTypeInfo(CPPCompile* c, TypePtr _t);
void AddInitializerVals(std::vector<std::string>& ivs) const override;
private:
TypePtr tt; // the type referred to by t
};
class VectorTypeInfo : public AbstractTypeInfo
{
public:
VectorTypeInfo(CPPCompile* c, TypePtr _t);
void AddInitializerVals(std::vector<std::string>& ivs) const override;
private:
TypePtr yield;
};
class ListTypeInfo : public AbstractTypeInfo
{
public:
ListTypeInfo(CPPCompile* c, TypePtr _t);
void AddInitializerVals(std::vector<std::string>& ivs) const override;
private:
const std::vector<TypePtr>& types;
};
class TableTypeInfo : public AbstractTypeInfo
{
public:
TableTypeInfo(CPPCompile* c, TypePtr _t);
void AddInitializerVals(std::vector<std::string>& ivs) const override;
private:
int indices;
TypePtr yield;
};
class FuncTypeInfo : public AbstractTypeInfo
{
public:
FuncTypeInfo(CPPCompile* c, TypePtr _t);
void AddInitializerVals(std::vector<std::string>& ivs) const override;
private:
FunctionFlavor flavor;
TypePtr params;
TypePtr yield;
};
class RecordTypeInfo : public AbstractTypeInfo
{
public:
RecordTypeInfo(CPPCompile* c, TypePtr _t);
void AddInitializerVals(std::vector<std::string>& ivs) const override;
private:
std::vector<std::string> field_names;
std::vector<TypePtr> field_types;
std::vector<int> field_attrs;
};
// Much of the table-driven initialization is based on vectors of indices,
// which we represent as vectors of int's, where each int is used to index a
// global C++ vector. This class manages such vectors. In particular, it
// reduces a given vector-of-indices to a single value, itself an index, that
// can be used at run-time to retrieve a reference to the original vector.
//
// Note that the notion recurses: if we have several vector-of-indices, we can
// reduce each to an index, and then take the resulting vector-of-meta-indices
// and reduce it further to an index. Doing so allows us to concisely refer
// to a potentially large, deep set of indices using a single value - such as
// for CPP_InitsInfo's "offset_set" member variable.
class IndicesManager
{
public:
IndicesManager() { }
// Adds a new vector-of-indices to the collection we're tracking,
// returning the offset that will be associated with it at run-time.
int AddIndices(std::vector<int> indices)
{
int n = indices_set.size();
indices_set.emplace_back(std::move(indices));
return n;
}
// Generates the initializations used to construct the managed
// vectors at run-time.
void Generate(CPPCompile* c);
private:
// Each vector-of-indices being tracked. We could obtain some
// space and time savings by recognizing duplicate vectors
// (for example, empty vectors are very common), but as long
// as the code compiles and executes without undue overhead,
// this doesn't appear necessary.
std::vector<std::vector<int>> indices_set;
};
} // zeek::detail

View file

@ -114,40 +114,6 @@ There are additional workflows relating to running the test suite, which
we document only briefly here as they're likely going to change or go away we document only briefly here as they're likely going to change or go away
, as it's not clear they're actually needed. , as it's not clear they're actually needed.
First, `-O update-C++` will run using a Zeek instance that already includes
compiled scripts and, for any functions pulled in by the command-line scripts,
if they're not already compiled, will generate additional C++ code for
those that can be combined with the already-compiled code. The
additionally compiled code leverages the existing compiled-in functions
(and globals), which it learns about via the `CPP-hashes.dat` file mentioned
above. Any code compiled in this fashion must be _consistent_ with the
previously compiled code, meaning that globals and extensible types (enums,
records) have definitions that align with those previously used, and any
other code later compiled must also be consistent.
In a similar vein, `-O add-C++` likewise uses a Zeek instance that already
includes compiled scripts. It generates additional C++ code that leverages
that existing compilation. However, this code is _not_ meant for use with
subsequently compiled code; later code also build with `add-C++` can have
inconsistencies with this code. (The utility of this mode is to support
compiling the entire test suite as one large incremental compilation,
rather than as hundreds of pointwise compilations.)
Both of these _append_ to any existing `CPP-gen-addl.h` file, providing
a means for building it up to reflect a number of compilations.
The `update-C++` and `add-C++` options help support different
ways of building the `btest` test suite. They were meant to enable doing so
without requiring per-test-suite-element recompilations. However, experiences
to date have found that trying to avoid pointwise compilations incurs
additional headaches, so it's better to just bite off the cost of a large
number of recompilations. Given that, it might make sense to remove these
options.
Finally, with respect to workflow there are number of simple scripts in
`src/script_opt/CPP/` (which should ultimately be replaced) in support of
compiler maintenance:
* `non-embedded-build` * `non-embedded-build`
Builds `zeek` without any embedded compiled-to-C++ scripts. Builds `zeek` without any embedded compiled-to-C++ scripts.
* `bare-embedded-build` * `bare-embedded-build`
@ -183,12 +149,11 @@ Known Issues
Here we list various known issues with using the compiler: Here we list various known issues with using the compiler:
<br> <br>
* Compilation of compiled code can be noticeably slow (if built using * Compilation of compiled code can be quite slow when the C++ compilation
`./configure --enable-debug`) or hugely slow (if not), with the latter includes optimization,
taking on the order of an hour on a beefy laptop. This slowness complicates taking many minutes on a beefy laptop. This slowness complicates
CI/CD approaches for always running compiled code against the test suite CI/CD approaches for always running compiled code against the test suite
when merging changes. It's not presently clear how feasible it is to when merging changes.
speed this up.
* Run-time error messages generally lack location information and information * Run-time error messages generally lack location information and information
about associated expressions/statements, making them hard to puzzle out. about associated expressions/statements, making them hard to puzzle out.

View file

@ -17,18 +17,21 @@
#include "zeek/ZeekString.h" #include "zeek/ZeekString.h"
#include "zeek/module_util.h" #include "zeek/module_util.h"
#include "zeek/script_opt/CPP/Func.h" #include "zeek/script_opt/CPP/Func.h"
#include "zeek/script_opt/CPP/RuntimeInit.h" #include "zeek/script_opt/CPP/RuntimeInitSupport.h"
#include "zeek/script_opt/CPP/RuntimeInits.h"
#include "zeek/script_opt/CPP/RuntimeOps.h" #include "zeek/script_opt/CPP/RuntimeOps.h"
#include "zeek/script_opt/CPP/RuntimeVec.h" #include "zeek/script_opt/CPP/RuntimeVec.h"
#include "zeek/script_opt/ScriptOpt.h" #include "zeek/script_opt/ScriptOpt.h"
namespace zeek namespace zeek::detail
{ {
using BoolValPtr = IntrusivePtr<zeek::BoolVal>; using BoolValPtr = IntrusivePtr<zeek::BoolVal>;
using IntValPtr = IntrusivePtr<zeek::IntVal>;
using CountValPtr = IntrusivePtr<zeek::CountVal>; using CountValPtr = IntrusivePtr<zeek::CountVal>;
using DoubleValPtr = IntrusivePtr<zeek::DoubleVal>; using DoubleValPtr = IntrusivePtr<zeek::DoubleVal>;
using StringValPtr = IntrusivePtr<zeek::StringVal>; using StringValPtr = IntrusivePtr<zeek::StringVal>;
using TimeValPtr = IntrusivePtr<zeek::TimeVal>;
using IntervalValPtr = IntrusivePtr<zeek::IntervalVal>; using IntervalValPtr = IntrusivePtr<zeek::IntervalVal>;
using PatternValPtr = IntrusivePtr<zeek::PatternVal>; using PatternValPtr = IntrusivePtr<zeek::PatternVal>;
using FuncValPtr = IntrusivePtr<zeek::FuncVal>; using FuncValPtr = IntrusivePtr<zeek::FuncVal>;

View file

@ -1,6 +1,6 @@
// See the file "COPYING" in the main distribution directory for copyright. // See the file "COPYING" in the main distribution directory for copyright.
#include "zeek/script_opt/CPP/RuntimeInit.h" #include "zeek/script_opt/CPP/RuntimeInitSupport.h"
#include "zeek/EventRegistry.h" #include "zeek/EventRegistry.h"
#include "zeek/module_util.h" #include "zeek/module_util.h"
@ -49,7 +49,7 @@ static int flag_init_CPP()
static int dummy = flag_init_CPP(); static int dummy = flag_init_CPP();
void register_type__CPP(TypePtr t, const std::string& name) void register_type__CPP(TypePtr t, const string& name)
{ {
if ( t->GetName().size() > 0 ) if ( t->GetName().size() > 0 )
// Already registered. // Already registered.
@ -113,8 +113,8 @@ void activate_bodies__CPP(const char* fn, const char* module, bool exported, Typ
auto v = fg->GetVal(); auto v = fg->GetVal();
if ( ! v ) if ( ! v )
{ // Create it. { // Create it.
std::vector<StmtPtr> no_bodies; vector<StmtPtr> no_bodies;
std::vector<int> no_priorities; vector<int> no_priorities;
auto sf = make_intrusive<ScriptFunc>(fn, ft, no_bodies, no_priorities); auto sf = make_intrusive<ScriptFunc>(fn, ft, no_bodies, no_priorities);
v = make_intrusive<FuncVal>(move(sf)); v = make_intrusive<FuncVal>(move(sf));
@ -154,8 +154,9 @@ void activate_bodies__CPP(const char* fn, const char* module, bool exported, Typ
continue; continue;
// Add in the new body. // Add in the new body.
ASSERT(compiled_scripts.count(h) > 0); auto csi = compiled_scripts.find(h);
auto cs = compiled_scripts[h]; ASSERT(csi != compiled_scripts.end());
auto cs = csi->second;
f->AddBody(cs.body, no_inits, num_params, cs.priority); f->AddBody(cs.body, no_inits, num_params, cs.priority);
added_bodies[fn].insert(h); added_bodies[fn].insert(h);
@ -193,14 +194,37 @@ FuncValPtr lookup_func__CPP(string name, vector<p_hash_type> hashes, const TypeP
{ {
auto ft = cast_intrusive<FuncType>(t); auto ft = cast_intrusive<FuncType>(t);
if ( hashes.empty() )
{
// This happens for functions that have at least one
// uncompilable body.
auto gl = lookup_ID(name.c_str(), GLOBAL_MODULE_NAME, false, false, false);
if ( ! gl )
{
reporter->CPPRuntimeError("non-compiled function %s missing", name.c_str());
exit(1);
}
auto v = gl->GetVal();
if ( ! v || v->GetType()->Tag() != TYPE_FUNC )
{
reporter->CPPRuntimeError("non-compiled function %s has an invalid value",
name.c_str());
exit(1);
}
return cast_intrusive<FuncVal>(v);
}
vector<StmtPtr> bodies; vector<StmtPtr> bodies;
vector<int> priorities; vector<int> priorities;
for ( auto h : hashes ) for ( auto h : hashes )
{ {
ASSERT(compiled_scripts.count(h) > 0); auto cs = compiled_scripts.find(h);
ASSERT(cs != compiled_scripts.end());
const auto& f = compiled_scripts[h]; const auto& f = cs->second;
bodies.push_back(f.body); bodies.push_back(f.body);
priorities.push_back(f.priority); priorities.push_back(f.priority);

View file

@ -5,6 +5,7 @@
#pragma once #pragma once
#include "zeek/Val.h" #include "zeek/Val.h"
#include "zeek/script_opt/CPP/Attrs.h"
#include "zeek/script_opt/CPP/Func.h" #include "zeek/script_opt/CPP/Func.h"
namespace zeek namespace zeek

View file

@ -0,0 +1,523 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include "zeek/script_opt/CPP/RuntimeInits.h"
#include "zeek/Desc.h"
#include "zeek/File.h"
#include "zeek/RE.h"
#include "zeek/ZeekString.h"
#include "zeek/script_opt/CPP/RuntimeInitSupport.h"
using namespace std;
namespace zeek::detail
{
template <class T>
void CPP_IndexedInits<T>::InitializeCohortWithOffsets(InitsManager* im, int cohort,
const std::vector<int>& cohort_offsets)
{
auto& co = this->inits[cohort];
for ( auto i = 0U; i < co.size(); ++i )
Generate(im, this->inits_vec, cohort_offsets[i], co[i]);
}
template <class T>
void CPP_IndexedInits<T>::Generate(InitsManager* im, std::vector<EnumValPtr>& ivec, int offset,
ValElemVec& init_vals)
{
auto& e_type = im->Types(init_vals[0]);
int val = init_vals[1];
ivec[offset] = make_enum__CPP(e_type, val);
}
template <class T>
void CPP_IndexedInits<T>::Generate(InitsManager* im, std::vector<StringValPtr>& ivec, int offset,
ValElemVec& init_vals)
{
auto chars = im->Strings(init_vals[0]);
int len = init_vals[1];
ivec[offset] = make_intrusive<StringVal>(len, chars);
}
template <class T>
void CPP_IndexedInits<T>::Generate(InitsManager* im, std::vector<PatternValPtr>& ivec, int offset,
ValElemVec& init_vals)
{
auto re = new RE_Matcher(im->Strings(init_vals[0]));
if ( init_vals[1] )
re->MakeCaseInsensitive();
re->Compile();
ivec[offset] = make_intrusive<PatternVal>(re);
}
template <class T>
void CPP_IndexedInits<T>::Generate(InitsManager* im, std::vector<ListValPtr>& ivec, int offset,
ValElemVec& init_vals) const
{
auto l = make_intrusive<ListVal>(TYPE_ANY);
for ( auto& iv : init_vals )
l->Append(im->ConstVals(iv));
ivec[offset] = l;
}
template <class T>
void CPP_IndexedInits<T>::Generate(InitsManager* im, std::vector<VectorValPtr>& ivec, int offset,
ValElemVec& init_vals) const
{
auto iv_it = init_vals.begin();
auto iv_end = init_vals.end();
auto t = *(iv_it++);
auto vt = cast_intrusive<VectorType>(im->Types(t));
auto vv = make_intrusive<VectorVal>(vt);
while ( iv_it != iv_end )
vv->Append(im->ConstVals(*(iv_it++)));
ivec[offset] = vv;
}
template <class T>
void CPP_IndexedInits<T>::Generate(InitsManager* im, std::vector<RecordValPtr>& ivec, int offset,
ValElemVec& init_vals) const
{
auto iv_it = init_vals.begin();
auto iv_end = init_vals.end();
auto t = *(iv_it++);
auto rt = cast_intrusive<RecordType>(im->Types(t));
auto rv = make_intrusive<RecordVal>(rt);
auto field = 0;
while ( iv_it != iv_end )
{
auto v = *(iv_it++);
if ( v >= 0 )
rv->Assign(field, im->ConstVals(v));
++field;
}
ivec[offset] = rv;
}
template <class T>
void CPP_IndexedInits<T>::Generate(InitsManager* im, std::vector<TableValPtr>& ivec, int offset,
ValElemVec& init_vals) const
{
auto iv_it = init_vals.begin();
auto iv_end = init_vals.end();
auto t = *(iv_it++);
auto tt = cast_intrusive<TableType>(im->Types(t));
auto tv = make_intrusive<TableVal>(tt);
while ( iv_it != iv_end )
{
auto index = im->ConstVals(*(iv_it++));
auto v = *(iv_it++);
auto value = v >= 0 ? im->ConstVals(v) : nullptr;
tv->Assign(index, value);
}
ivec[offset] = tv;
}
template <class T>
void CPP_IndexedInits<T>::Generate(InitsManager* im, std::vector<FileValPtr>& ivec, int offset,
ValElemVec& init_vals) const
{
// Note, in the following we use element 1, not 0, because we
// don't need the "type" value in element 0.
auto fn = im->Strings(init_vals[1]);
auto fv = make_intrusive<FileVal>(make_intrusive<File>(fn, "w"));
ivec[offset] = fv;
}
template <class T>
void CPP_IndexedInits<T>::Generate(InitsManager* im, std::vector<FuncValPtr>& ivec, int offset,
ValElemVec& init_vals) const
{
auto iv_it = init_vals.begin();
auto iv_end = init_vals.end();
auto t = *(iv_it++);
auto fn = im->Strings(*(iv_it++));
std::vector<p_hash_type> hashes;
while ( iv_it != iv_end )
hashes.push_back(im->Hashes(*(iv_it++)));
ivec[offset] = lookup_func__CPP(fn, hashes, im->Types(t));
}
template <class T>
void CPP_IndexedInits<T>::Generate(InitsManager* im, std::vector<AttrPtr>& ivec, int offset,
ValElemVec& init_vals) const
{
auto tag = static_cast<AttrTag>(init_vals[0]);
auto ae_tag = static_cast<AttrExprType>(init_vals[1]);
ExprPtr e;
auto e_arg = init_vals[2];
switch ( ae_tag )
{
case AE_NONE:
break;
case AE_CONST:
e = make_intrusive<ConstExpr>(im->ConstVals(e_arg));
break;
case AE_NAME:
{
auto name = im->Strings(e_arg);
auto gl = lookup_ID(name, GLOBAL_MODULE_NAME, false, false, false);
ASSERT(gl);
e = make_intrusive<NameExpr>(gl);
break;
}
case AE_RECORD:
{
auto t = im->Types(e_arg);
auto rt = cast_intrusive<RecordType>(t);
auto empty_vals = make_intrusive<ListExpr>();
auto construct = make_intrusive<RecordConstructorExpr>(empty_vals);
e = make_intrusive<RecordCoerceExpr>(construct, rt);
break;
}
case AE_CALL:
e = im->CallExprs(e_arg);
break;
}
ivec[offset] = make_intrusive<Attr>(tag, e);
}
template <class T>
void CPP_IndexedInits<T>::Generate(InitsManager* im, std::vector<AttributesPtr>& ivec, int offset,
ValElemVec& init_vals) const
{
std::vector<AttrPtr> a_list;
for ( auto& iv : init_vals )
a_list.emplace_back(im->Attrs(iv));
ivec[offset] = make_intrusive<Attributes>(a_list, nullptr, false, false);
}
// Instantiate the templates we'll need.
template class CPP_IndexedInits<EnumValPtr>;
template class CPP_IndexedInits<StringValPtr>;
template class CPP_IndexedInits<PatternValPtr>;
template class CPP_IndexedInits<ListValPtr>;
template class CPP_IndexedInits<VectorValPtr>;
template class CPP_IndexedInits<RecordValPtr>;
template class CPP_IndexedInits<TableValPtr>;
template class CPP_IndexedInits<FileValPtr>;
template class CPP_IndexedInits<FuncValPtr>;
template class CPP_IndexedInits<AttrPtr>;
template class CPP_IndexedInits<AttributesPtr>;
template class CPP_IndexedInits<TypePtr>;
void CPP_TypeInits::DoPreInits(InitsManager* im, const std::vector<int>& offsets_vec)
{
for ( auto cohort = 0U; cohort < offsets_vec.size(); ++cohort )
{
auto& co = inits[cohort];
auto& cohort_offsets = im->Indices(offsets_vec[cohort]);
for ( auto i = 0U; i < co.size(); ++i )
PreInit(im, cohort_offsets[i], co[i]);
}
}
void CPP_TypeInits::PreInit(InitsManager* im, int offset, ValElemVec& init_vals)
{
auto tag = static_cast<TypeTag>(init_vals[0]);
if ( tag == TYPE_LIST )
inits_vec[offset] = make_intrusive<TypeList>();
else if ( tag == TYPE_RECORD )
{
auto name = im->Strings(init_vals[1]);
if ( name[0] )
inits_vec[offset] = get_record_type__CPP(name);
else
inits_vec[offset] = get_record_type__CPP(nullptr);
}
// else no pre-initialization needed
}
void CPP_TypeInits::Generate(InitsManager* im, vector<TypePtr>& ivec, int offset,
ValElemVec& init_vals) const
{
auto tag = static_cast<TypeTag>(init_vals[0]);
TypePtr t;
switch ( tag )
{
case TYPE_ADDR:
case TYPE_ANY:
case TYPE_BOOL:
case TYPE_COUNT:
case TYPE_DOUBLE:
case TYPE_ERROR:
case TYPE_INT:
case TYPE_INTERVAL:
case TYPE_PATTERN:
case TYPE_PORT:
case TYPE_STRING:
case TYPE_TIME:
case TYPE_TIMER:
case TYPE_VOID:
case TYPE_SUBNET:
case TYPE_FILE:
t = base_type(tag);
break;
case TYPE_ENUM:
t = BuildEnumType(im, init_vals);
break;
case TYPE_OPAQUE:
t = BuildOpaqueType(im, init_vals);
break;
case TYPE_TYPE:
t = BuildTypeType(im, init_vals);
break;
case TYPE_VECTOR:
t = BuildVectorType(im, init_vals);
break;
case TYPE_LIST:
t = BuildTypeList(im, init_vals, offset);
break;
case TYPE_TABLE:
t = BuildTableType(im, init_vals);
break;
case TYPE_FUNC:
t = BuildFuncType(im, init_vals);
break;
case TYPE_RECORD:
t = BuildRecordType(im, init_vals, offset);
break;
default:
ASSERT(0);
}
ivec[offset] = t;
}
TypePtr CPP_TypeInits::BuildEnumType(InitsManager* im, ValElemVec& init_vals) const
{
auto iv_it = init_vals.begin();
auto iv_end = init_vals.end();
auto name = im->Strings(*++iv_it); // skip element [0]
auto et = get_enum_type__CPP(name);
if ( et->Names().empty() )
{
++iv_it;
while ( iv_it != iv_end )
{
auto e_name = im->Strings(*(iv_it++));
auto e_val = *(iv_it++);
et->AddNameInternal(e_name, e_val);
}
}
return et;
}
TypePtr CPP_TypeInits::BuildOpaqueType(InitsManager* im, ValElemVec& init_vals) const
{
auto name = im->Strings(init_vals[1]);
return make_intrusive<OpaqueType>(name);
}
TypePtr CPP_TypeInits::BuildTypeType(InitsManager* im, ValElemVec& init_vals) const
{
auto& t = im->Types(init_vals[1]);
return make_intrusive<TypeType>(t);
}
TypePtr CPP_TypeInits::BuildVectorType(InitsManager* im, ValElemVec& init_vals) const
{
auto& t = im->Types(init_vals[1]);
return make_intrusive<VectorType>(t);
}
TypePtr CPP_TypeInits::BuildTypeList(InitsManager* im, ValElemVec& init_vals, int offset) const
{
const auto& tl = cast_intrusive<TypeList>(inits_vec[offset]);
auto iv_it = init_vals.begin();
auto iv_end = init_vals.end();
++iv_it;
while ( iv_it != iv_end )
tl->Append(im->Types(*(iv_it++)));
return tl;
}
TypePtr CPP_TypeInits::BuildTableType(InitsManager* im, ValElemVec& init_vals) const
{
auto index = cast_intrusive<TypeList>(im->Types(init_vals[1]));
auto yield_i = init_vals[2];
auto yield = yield_i >= 0 ? im->Types(yield_i) : nullptr;
return make_intrusive<TableType>(index, yield);
}
TypePtr CPP_TypeInits::BuildFuncType(InitsManager* im, ValElemVec& init_vals) const
{
auto p = cast_intrusive<RecordType>(im->Types(init_vals[1]));
auto yield_i = init_vals[2];
auto flavor = static_cast<FunctionFlavor>(init_vals[3]);
TypePtr y;
if ( yield_i >= 0 )
y = im->Types(yield_i);
else if ( flavor == FUNC_FLAVOR_FUNCTION || flavor == FUNC_FLAVOR_HOOK )
y = base_type(TYPE_VOID);
return make_intrusive<FuncType>(p, y, flavor);
}
TypePtr CPP_TypeInits::BuildRecordType(InitsManager* im, ValElemVec& init_vals, int offset) const
{
auto r = cast_intrusive<RecordType>(inits_vec[offset]);
ASSERT(r);
if ( r->NumFields() == 0 )
{
type_decl_list tl;
auto n = init_vals.size();
auto i = 2U;
while ( i < n )
{
auto s = im->Strings(init_vals[i++]);
auto id = util::copy_string(s);
auto type = im->Types(init_vals[i++]);
auto attrs_i = init_vals[i++];
AttributesPtr attrs;
if ( attrs_i >= 0 )
attrs = im->Attributes(attrs_i);
tl.append(new TypeDecl(id, type, attrs));
}
r->AddFieldsDirectly(tl);
}
return r;
}
int CPP_FieldMapping::ComputeOffset(InitsManager* im) const
{
auto r = im->Types(rec)->AsRecordType();
auto fm_offset = r->FieldOffset(field_name.c_str());
if ( fm_offset < 0 )
{ // field does not exist, create it
fm_offset = r->NumFields();
auto id = util::copy_string(field_name.c_str());
auto type = im->Types(field_type);
AttributesPtr attrs;
if ( field_attrs >= 0 )
attrs = im->Attributes(field_attrs);
type_decl_list tl;
tl.append(new TypeDecl(id, type, attrs));
r->AddFieldsDirectly(tl);
}
return fm_offset;
}
int CPP_EnumMapping::ComputeOffset(InitsManager* im) const
{
auto e = im->Types(e_type)->AsEnumType();
auto em_offset = e->Lookup(e_name);
if ( em_offset < 0 )
{ // enum constant does not exist, create it
em_offset = e->Names().size();
if ( e->Lookup(em_offset) )
reporter->InternalError("enum inconsistency while initializing compiled scripts");
e->AddNameInternal(e_name, em_offset);
}
return em_offset;
}
void CPP_GlobalInit::Generate(InitsManager* im, std::vector<void*>& /* inits_vec */,
int /* offset */) const
{
global = lookup_global__CPP(name, im->Types(type), exported);
if ( ! global->HasVal() && val >= 0 )
{
global->SetVal(im->ConstVals(val));
if ( attrs >= 0 )
global->SetAttrs(im->Attributes(attrs));
}
}
void generate_indices_set(int* inits, std::vector<std::vector<int>>& indices_set)
{
// First figure out how many groups of indices there are, so we
// can pre-allocate the outer vector.
auto i_ptr = inits;
int num_inits = 0;
while ( *i_ptr >= 0 )
{
++num_inits;
int n = *i_ptr;
i_ptr += n + 1; // skip over vector elements
}
indices_set.reserve(num_inits);
i_ptr = inits;
while ( *i_ptr >= 0 )
{
int n = *i_ptr;
++i_ptr;
std::vector<int> indices;
indices.reserve(n);
for ( int i = 0; i < n; ++i )
indices.push_back(i_ptr[i]);
i_ptr += n;
indices_set.emplace_back(move(indices));
}
}
} // zeek::detail

View file

@ -0,0 +1,542 @@
// See the file "COPYING" in the main distribution directory for copyright.
// Classes for run-time initialization and management of C++ values used
// by the generated code.
// See InitsInfo.h for a discussion of initialization issues and the
// associated strategies for dealing with them.
#include "zeek/Expr.h"
#include "zeek/module_util.h"
#include "zeek/script_opt/CPP/RuntimeInitSupport.h"
#pragma once
namespace zeek::detail
{
using FileValPtr = IntrusivePtr<FileVal>;
using FuncValPtr = IntrusivePtr<FuncVal>;
class InitsManager;
// An abstract helper class used to access elements of an initialization vector.
// We need the abstraction because InitsManager below needs to be able to refer
// to any of a range of templated classes.
class CPP_AbstractInitAccessor
{
public:
virtual ~CPP_AbstractInitAccessor() { }
virtual ValPtr Get(int index) const { return nullptr; }
};
// Convenient way to refer to an offset associated with a particular Zeek type.
using CPP_ValElem = std::pair<TypeTag, int>;
// This class groups together all of the vectors needed for run-time
// initialization. We gather them together into a single object so as
// to avoid wiring in a set of globals that the various initialization
// methods have to know about.
class InitsManager
{
public:
InitsManager(std::vector<CPP_ValElem>& _const_vals,
std::map<TypeTag, std::shared_ptr<CPP_AbstractInitAccessor>>& _consts,
std::vector<std::vector<int>>& _indices, std::vector<const char*>& _strings,
std::vector<p_hash_type>& _hashes, std::vector<TypePtr>& _types,
std::vector<AttributesPtr>& _attributes, std::vector<AttrPtr>& _attrs,
std::vector<CallExprPtr>& _call_exprs)
: const_vals(_const_vals), consts(_consts), indices(_indices), strings(_strings),
hashes(_hashes), types(_types), attributes(_attributes), attrs(_attrs),
call_exprs(_call_exprs)
{
}
// Providse generic access to Zeek constant values based on a single
// index.
ValPtr ConstVals(int offset) const
{
auto& cv = const_vals[offset];
return Consts(cv.first, cv.second);
}
// Retrieves the Zeek constant value for a particular Zeek type.
ValPtr Consts(TypeTag tag, int index) const { return consts[tag]->Get(index); }
// Accessors for the sundry initialization vectors, each retrieving
// a specific element identified by an index/offset.
const std::vector<int>& Indices(int offset) const { return indices[offset]; }
const char* Strings(int offset) const { return strings[offset]; }
const p_hash_type Hashes(int offset) const { return hashes[offset]; }
const TypePtr& Types(int offset) const { return types[offset]; }
const AttributesPtr& Attributes(int offset) const { return attributes[offset]; }
const AttrPtr& Attrs(int offset) const { return attrs[offset]; }
const CallExprPtr& CallExprs(int offset) const { return call_exprs[offset]; }
private:
std::vector<CPP_ValElem>& const_vals;
std::map<TypeTag, std::shared_ptr<CPP_AbstractInitAccessor>>& consts;
std::vector<std::vector<int>>& indices;
std::vector<const char*>& strings;
std::vector<p_hash_type>& hashes;
std::vector<TypePtr>& types;
std::vector<AttributesPtr>& attributes;
std::vector<AttrPtr>& attrs;
std::vector<CallExprPtr>& call_exprs;
};
// Manages an initialization vector of the given type.
template <class T> class CPP_Init
{
public:
virtual ~CPP_Init() { }
// Pre-initializes the given element of the vector, if necessary.
virtual void PreInit(InitsManager* im, std::vector<T>& inits_vec, int offset) const { }
// Initializes the given element of the vector.
virtual void Generate(InitsManager* im, std::vector<T>& inits_vec, int offset) const { }
};
// Abstract class for creating a collection of initializers. T1 is
// the type of the generated vector, T2 the type of its initializers.
template <class T1, class T2> class CPP_AbstractInits
{
public:
CPP_AbstractInits(std::vector<T1>& _inits_vec, int _offsets_set, std::vector<T2> _inits)
: inits_vec(_inits_vec), offsets_set(_offsets_set), inits(std::move(_inits))
{
// Compute how big to make the vector.
int num_inits = 0;
for ( const auto& cohort : inits )
num_inits += cohort.size();
inits_vec.resize(num_inits);
}
// Initialize the given cohort of elements.
void InitializeCohort(InitsManager* im, int cohort)
{
// Get this object's vector-of-vector-of-indices.
auto& offsets_vec = im->Indices(offsets_set);
if ( cohort == 0 )
DoPreInits(im, offsets_vec);
// Get the vector-of-indices for this cohort.
auto& cohort_offsets = im->Indices(offsets_vec[cohort]);
InitializeCohortWithOffsets(im, cohort, cohort_offsets);
}
protected:
virtual void InitializeCohortWithOffsets(InitsManager* im, int cohort,
const std::vector<int>& cohort_offsets)
{
}
// Pre-initialize all elements requiring it.
virtual void DoPreInits(InitsManager* im, const std::vector<int>& offsets_vec) { }
// Generate a single element.
virtual void GenerateElement(InitsManager* im, T2& init, int offset) { }
// The initialization vector in its entirety.
std::vector<T1>& inits_vec;
// A meta-index for retrieving the vector-of-vector-of-indices.
int offsets_set;
// Indexed by cohort.
std::vector<T2> inits;
};
// Manages an initialization vector that uses "custom" initializers
// (tailored ones rather than initializers based on indexing).
template <class T> using CPP_InitVec = std::vector<std::shared_ptr<CPP_Init<T>>>;
template <class T> class CPP_CustomInits : public CPP_AbstractInits<T, CPP_InitVec<T>>
{
public:
CPP_CustomInits(std::vector<T>& _inits_vec, int _offsets_set,
std::vector<CPP_InitVec<T>> _inits)
: CPP_AbstractInits<T, CPP_InitVec<T>>(_inits_vec, _offsets_set, std::move(_inits))
{
}
private:
void DoPreInits(InitsManager* im, const std::vector<int>& offsets_vec) override
{
int cohort = 0;
for ( const auto& co : this->inits )
{
auto& cohort_offsets = im->Indices(offsets_vec[cohort]);
for ( auto i = 0U; i < co.size(); ++i )
co[i]->PreInit(im, this->inits_vec, cohort_offsets[i]);
++cohort;
}
}
void InitializeCohortWithOffsets(InitsManager* im, int cohort,
const std::vector<int>& cohort_offsets) override
{
// Loop over the cohort's elements to initialize them.
auto& co = this->inits[cohort];
for ( auto i = 0U; i < co.size(); ++i )
co[i]->Generate(im, this->inits_vec, cohort_offsets[i]);
}
};
// Provides access to elements of an initialization vector of the given type.
template <class T> class CPP_InitAccessor : public CPP_AbstractInitAccessor
{
public:
CPP_InitAccessor(std::vector<T>& _inits_vec) : inits_vec(_inits_vec) { }
ValPtr Get(int index) const override { return inits_vec[index]; }
private:
std::vector<T>& inits_vec;
};
// A type used for initializations that are based on indices into
// initialization vectors.
using ValElemVec = std::vector<int>;
using ValElemVecVec = std::vector<ValElemVec>;
// Manages an initialization vector of the given type whose elements are
// built up from previously constructed values in other initialization vectors.
template <class T> class CPP_IndexedInits : public CPP_AbstractInits<T, ValElemVecVec>
{
public:
CPP_IndexedInits(std::vector<T>& _inits_vec, int _offsets_set,
std::vector<ValElemVecVec> _inits)
: CPP_AbstractInits<T, ValElemVecVec>(_inits_vec, _offsets_set, std::move(_inits))
{
}
protected:
void InitializeCohortWithOffsets(InitsManager* im, int cohort,
const std::vector<int>& cohort_offsets) override;
// Note, in the following we pass in the inits_vec, even though
// the method will have direct access to it, because we want to
// use overloading to dispatch to custom generation for different
// types of values.
void Generate(InitsManager* im, std::vector<EnumValPtr>& ivec, int offset,
ValElemVec& init_vals);
void Generate(InitsManager* im, std::vector<StringValPtr>& ivec, int offset,
ValElemVec& init_vals);
void Generate(InitsManager* im, std::vector<PatternValPtr>& ivec, int offset,
ValElemVec& init_vals);
void Generate(InitsManager* im, std::vector<ListValPtr>& ivec, int offset,
ValElemVec& init_vals) const;
void Generate(InitsManager* im, std::vector<VectorValPtr>& ivec, int offset,
ValElemVec& init_vals) const;
void Generate(InitsManager* im, std::vector<RecordValPtr>& ivec, int offset,
ValElemVec& init_vals) const;
void Generate(InitsManager* im, std::vector<TableValPtr>& ivec, int offset,
ValElemVec& init_vals) const;
void Generate(InitsManager* im, std::vector<FileValPtr>& ivec, int offset,
ValElemVec& init_vals) const;
void Generate(InitsManager* im, std::vector<FuncValPtr>& ivec, int offset,
ValElemVec& init_vals) const;
void Generate(InitsManager* im, std::vector<AttrPtr>& ivec, int offset,
ValElemVec& init_vals) const;
void Generate(InitsManager* im, std::vector<AttributesPtr>& ivec, int offset,
ValElemVec& init_vals) const;
// The TypePtr initialization vector requires special treatment, since
// it has to dispatch on subclasses of TypePtr.
virtual void Generate(InitsManager* im, std::vector<TypePtr>& ivec, int offset,
ValElemVec& init_vals) const
{
ASSERT(0);
}
};
// A specialization of CPP_IndexedInits that supports initializing based
// on subclasses of TypePtr.
class CPP_TypeInits : public CPP_IndexedInits<TypePtr>
{
public:
CPP_TypeInits(std::vector<TypePtr>& _inits_vec, int _offsets_set,
std::vector<std::vector<ValElemVec>> _inits)
: CPP_IndexedInits<TypePtr>(_inits_vec, _offsets_set, _inits)
{
}
protected:
void DoPreInits(InitsManager* im, const std::vector<int>& offsets_vec) override;
void PreInit(InitsManager* im, int offset, ValElemVec& init_vals);
void Generate(InitsManager* im, std::vector<TypePtr>& ivec, int offset,
ValElemVec& init_vals) const override;
TypePtr BuildEnumType(InitsManager* im, ValElemVec& init_vals) const;
TypePtr BuildOpaqueType(InitsManager* im, ValElemVec& init_vals) const;
TypePtr BuildTypeType(InitsManager* im, ValElemVec& init_vals) const;
TypePtr BuildVectorType(InitsManager* im, ValElemVec& init_vals) const;
TypePtr BuildTypeList(InitsManager* im, ValElemVec& init_vals, int offset) const;
TypePtr BuildTableType(InitsManager* im, ValElemVec& init_vals) const;
TypePtr BuildFuncType(InitsManager* im, ValElemVec& init_vals) const;
TypePtr BuildRecordType(InitsManager* im, ValElemVec& init_vals, int offset) const;
};
// Abstract class for initializing basic (non-compound) constants. T1 is
// the Zeek type for the constructed constant, T2 is the C++ type of its
// initializer.
//
// In principle we could derive this from CPP_AbstractInits, though to do so
// we'd need to convert the initializers to a vector-of-vector-of-T2, which
// would trade complexity here for complexity in InitsInfo. So we instead
// keep this class distinct, since at heart it's a simpler set of methods
// and that way we can keep them as such here.
template <class T1, typename T2> class CPP_AbstractBasicConsts
{
public:
CPP_AbstractBasicConsts(std::vector<T1>& _inits_vec, int _offsets_set, std::vector<T2> _inits)
: inits_vec(_inits_vec), offsets_set(_offsets_set), inits(std::move(_inits))
{
inits_vec.resize(inits.size());
}
void InitializeCohort(InitsManager* im, int cohort)
{
ASSERT(cohort == 0);
auto& offsets_vec = im->Indices(offsets_set);
auto& cohort_offsets = im->Indices(offsets_vec[cohort]);
for ( auto i = 0U; i < inits.size(); ++i )
InitElem(im, cohort_offsets[i], i);
}
protected:
virtual void InitElem(InitsManager* im, int offset, int index) { ASSERT(0); }
protected:
// See CPP_AbstractInits for the nature of these.
std::vector<T1>& inits_vec;
int offsets_set;
std::vector<T2> inits;
};
// Class for initializing a basic constant of Zeek type T1, using initializers
// of C++ type T2. T1 is an intrusive pointer to a T3 type; for example, if
// T1 is a BoolValPtr then T3 will be BoolVal.
template <class T1, typename T2, class T3>
class CPP_BasicConsts : public CPP_AbstractBasicConsts<T1, T2>
{
public:
CPP_BasicConsts(std::vector<T1>& _inits_vec, int _offsets_set, std::vector<T2> _inits)
: CPP_AbstractBasicConsts<T1, T2>(_inits_vec, _offsets_set, std::move(_inits))
{
}
void InitElem(InitsManager* /* im */, int offset, int index) override
{
this->inits_vec[offset] = make_intrusive<T3>(this->inits[index]);
}
};
// Specific classes for basic constants that use string-based constructors.
class CPP_AddrConsts : public CPP_AbstractBasicConsts<AddrValPtr, int>
{
public:
CPP_AddrConsts(std::vector<AddrValPtr>& _inits_vec, int _offsets_set, std::vector<int> _inits)
: CPP_AbstractBasicConsts<AddrValPtr, int>(_inits_vec, _offsets_set, std::move(_inits))
{
}
void InitElem(InitsManager* im, int offset, int index) override
{
auto s = im->Strings(this->inits[index]);
this->inits_vec[offset] = make_intrusive<AddrVal>(s);
}
};
class CPP_SubNetConsts : public CPP_AbstractBasicConsts<SubNetValPtr, int>
{
public:
CPP_SubNetConsts(std::vector<SubNetValPtr>& _inits_vec, int _offsets_set,
std::vector<int> _inits)
: CPP_AbstractBasicConsts<SubNetValPtr, int>(_inits_vec, _offsets_set, std::move(_inits))
{
}
void InitElem(InitsManager* im, int offset, int index) override
{
auto s = im->Strings(this->inits[index]);
this->inits_vec[offset] = make_intrusive<SubNetVal>(s);
}
};
// Class for initializing a Zeek global. These don't go into an initialization
// vector, so we use void* as the underlying type.
class CPP_GlobalInit : public CPP_Init<void*>
{
public:
CPP_GlobalInit(IDPtr& _global, const char* _name, int _type, int _attrs, int _val,
bool _exported)
: CPP_Init<void*>(), global(_global), name(_name), type(_type), attrs(_attrs), val(_val),
exported(_exported)
{
}
void Generate(InitsManager* im, std::vector<void*>& /* inits_vec */,
int /* offset */) const override;
protected:
IDPtr& global;
const char* name;
int type;
int attrs;
int val;
bool exported;
};
// Abstract class for constructing a CallExpr to evaluate a Zeek expression.
class CPP_AbstractCallExprInit : public CPP_Init<CallExprPtr>
{
public:
CPP_AbstractCallExprInit() : CPP_Init<CallExprPtr>() { }
};
// Constructs a CallExpr that calls a given CPPFunc subclass.
template <class T> class CPP_CallExprInit : public CPP_AbstractCallExprInit
{
public:
CPP_CallExprInit(CallExprPtr& _e_var) : CPP_AbstractCallExprInit(), e_var(_e_var) { }
void Generate(InitsManager* /* im */, std::vector<CallExprPtr>& inits_vec,
int offset) const override
{
auto wrapper_class = make_intrusive<T>();
auto func_val = make_intrusive<FuncVal>(wrapper_class);
auto func_expr = make_intrusive<ConstExpr>(func_val);
auto empty_args = make_intrusive<ListExpr>();
e_var = make_intrusive<CallExpr>(func_expr, empty_args);
inits_vec[offset] = e_var;
}
private:
// Where to store the expression once we've built it.
CallExprPtr& e_var;
};
// Abstract class for registering a lambda defined in terms of a CPPStmt.
class CPP_AbstractLambdaRegistration : public CPP_Init<void*>
{
public:
CPP_AbstractLambdaRegistration() : CPP_Init<void*>() { }
};
// Registers a lambda defined in terms of a given CPPStmt subclass.
template <class T> class CPP_LambdaRegistration : public CPP_AbstractLambdaRegistration
{
public:
CPP_LambdaRegistration(const char* _name, int _func_type, p_hash_type _h, bool _has_captures)
: CPP_AbstractLambdaRegistration(), name(_name), func_type(_func_type), h(_h),
has_captures(_has_captures)
{
}
void Generate(InitsManager* im, std::vector<void*>& inits_vec, int offset) const override
{
auto l = make_intrusive<T>(name);
auto& ft = im->Types(func_type);
register_lambda__CPP(l, h, name, ft, has_captures);
}
protected:
const char* name;
int func_type;
p_hash_type h;
bool has_captures;
};
// Constructs at run-time a mapping between abstract record field offsets used
// when compiling a set of scripts to their concrete offsets (which might differ
// from those during compilation due to loading of other scripts that extend
// various records).
class CPP_FieldMapping
{
public:
CPP_FieldMapping(int _rec, std::string _field_name, int _field_type, int _field_attrs)
: rec(_rec), field_name(std::move(_field_name)), field_type(_field_type),
field_attrs(_field_attrs)
{
}
int ComputeOffset(InitsManager* im) const;
private:
int rec; // index to retrieve the record's type
std::string field_name; // which field this offset pertains to
int field_type; // the field's type, in case we have to construct it
int field_attrs; // the same for the field's attributes
};
// Constructs at run-time a mapping between abstract enum values used when
// compiling a set of scripts to their concrete values (which might differ
// from those during compilation due to loading of other scripts that extend
// the enum).
class CPP_EnumMapping
{
public:
CPP_EnumMapping(int _e_type, std::string _e_name) : e_type(_e_type), e_name(std::move(_e_name))
{
}
int ComputeOffset(InitsManager* im) const;
private:
int e_type; // index to EnumType
std::string e_name; // which enum constant for that type
};
// Looks up a BiF of the given name, making it available to compiled
// code via a C++ global.
class CPP_LookupBiF
{
public:
CPP_LookupBiF(zeek::Func*& _bif_func, std::string _bif_name)
: bif_func(_bif_func), bif_name(std::move(_bif_name))
{
}
void ResolveBiF() const { bif_func = lookup_bif__CPP(bif_name.c_str()); }
protected:
zeek::Func*& bif_func; // where to store the pointer to the BiF
std::string bif_name; // the BiF's name
};
// Information needed to register a compiled function body (which makes it
// available to substitute for the body's AST). The compiler generates
// code that loops over a vector of these to perform the registrations.
struct CPP_RegisterBody
{
CPP_RegisterBody(std::string _func_name, void* _func, int _type_signature, int _priority,
p_hash_type _h, std::vector<std::string> _events)
: func_name(std::move(_func_name)), func(_func), type_signature(_type_signature),
priority(_priority), h(_h), events(std::move(_events))
{
}
std::string func_name; // name of the function
void* func; // pointer to C++
int type_signature;
int priority;
p_hash_type h;
std::vector<std::string> events;
};
// Helper function that takes a (large) array of int's and from them
// constructs the corresponding vector-of-vector-of-indices. Each
// vector-of-indices is represented first by an int specifying its
// size, and then that many int's for its values. We recognize the
// end of the array upon encountering a "size" entry of -1.
extern void generate_indices_set(int* inits, std::vector<std::vector<int>>& indices_set);
} // zeek::detail

View file

@ -245,7 +245,7 @@ void CPPCompile::GenSwitchStmt(const SwitchStmt* sw)
else else
sw_val = string("p_hash(") + GenExpr(e, GEN_VAL_PTR) + ")"; sw_val = string("p_hash(") + GenExpr(e, GEN_VAL_PTR) + ")";
Emit("switch ( %s ) {", sw_val.c_str()); Emit("switch ( %s ) {", sw_val);
++break_level; ++break_level;

View file

@ -51,13 +51,26 @@ template <class T> string CPPTracker<T>::KeyName(const T* key)
auto hash = map[key]; auto hash = map[key];
ASSERT(hash != 0); ASSERT(hash != 0);
auto rep = reps[hash];
auto gi = gi_s.find(rep);
if ( gi != gi_s.end() )
return gi->second->Name();
auto index = map2[hash]; auto index = map2[hash];
string scope; string scope;
if ( IsInherited(hash) ) if ( IsInherited(hash) )
scope = scope_prefix(scope2[hash]); scope = scope_prefix(scope2[hash]);
return scope + string(base_name) + "_" + Fmt(index) + "__CPP"; string ind = Fmt(index);
string full_name;
if ( single_global )
full_name = base_name + "__CPP[" + ind + "]";
else
full_name = base_name + "_" + ind + "__CPP";
return scope + full_name;
} }
template <class T> void CPPTracker<T>::LogIfNew(IntrusivePtr<T> key, int scope, FILE* log_file) template <class T> void CPPTracker<T>::LogIfNew(IntrusivePtr<T> key, int scope, FILE* log_file)

View file

@ -15,6 +15,7 @@
#pragma once #pragma once
#include "zeek/script_opt/CPP/HashMgr.h" #include "zeek/script_opt/CPP/HashMgr.h"
#include "zeek/script_opt/CPP/InitsInfo.h"
namespace zeek::detail namespace zeek::detail
{ {
@ -24,11 +25,13 @@ namespace zeek::detail
template <class T> class CPPTracker template <class T> class CPPTracker
{ {
public: public:
// The base name is used to construct key names. The mapper, // The base name is used to construct key names. "single_global",
// if present, maps hash values to information about the previously // if true, specifies that the names should be constructed as
// generated scope in which the value appears. // indexes into a single global, rather than as distinct globals.
CPPTracker(const char* _base_name, VarMapper* _mapper = nullptr) // The mapper, if present, maps hash values to information about
: base_name(_base_name), mapper(_mapper) // the previously generated scope in which the value appears.
CPPTracker(const char* _base_name, bool _single_global, VarMapper* _mapper = nullptr)
: base_name(_base_name), single_global(_single_global), mapper(_mapper)
{ {
} }
@ -40,6 +43,8 @@ public:
// is provided, then refrains from computing it. // is provided, then refrains from computing it.
void AddKey(IntrusivePtr<T> key, p_hash_type h = 0); void AddKey(IntrusivePtr<T> key, p_hash_type h = 0);
void AddInitInfo(const T* rep, std::shared_ptr<CPP_InitInfo> gi) { gi_s[rep] = std::move(gi); }
// Returns the (C++ variable) name associated with the given key. // Returns the (C++ variable) name associated with the given key.
std::string KeyName(const T* key); std::string KeyName(const T* key);
std::string KeyName(IntrusivePtr<T> key) { return KeyName(key.get()); } std::string KeyName(IntrusivePtr<T> key) { return KeyName(key.get()); }
@ -81,6 +86,8 @@ private:
// Maps keys to internal representations (i.e., hashes). // Maps keys to internal representations (i.e., hashes).
std::unordered_map<const T*, p_hash_type> map; std::unordered_map<const T*, p_hash_type> map;
std::unordered_map<const T*, std::shared_ptr<CPP_InitInfo>> gi_s;
// Maps internal representations to distinct values. These // Maps internal representations to distinct values. These
// may-or-may-not be indices into an "inherited" namespace scope. // may-or-may-not be indices into an "inherited" namespace scope.
std::unordered_map<p_hash_type, int> map2; std::unordered_map<p_hash_type, int> map2;
@ -98,6 +105,10 @@ private:
// Used to construct key names. // Used to construct key names.
std::string base_name; std::string base_name;
// Whether to base the names out of a single global, or distinct
// globals.
bool single_global;
// If non-nil, the mapper to consult for previous names. // If non-nil, the mapper to consult for previous names.
VarMapper* mapper; VarMapper* mapper;
}; };

View file

@ -91,170 +91,13 @@ string CPPCompile::GenericValPtrToGT(const string& expr, const TypePtr& t, GenTy
return string("cast_intrusive<") + IntrusiveVal(t) + ">(" + expr + ")"; return string("cast_intrusive<") + IntrusiveVal(t) + ">(" + expr + ")";
} }
void CPPCompile::ExpandTypeVar(const TypePtr& t)
{
auto tn = GenTypeName(t);
switch ( t->Tag() )
{
case TYPE_LIST:
ExpandListTypeVar(t, tn);
break;
case TYPE_RECORD:
ExpandRecordTypeVar(t, tn);
break;
case TYPE_ENUM:
ExpandEnumTypeVar(t, tn);
break;
case TYPE_TABLE:
ExpandTableTypeVar(t, tn);
break;
case TYPE_FUNC:
ExpandFuncTypeVar(t, tn);
break;
case TYPE_TYPE:
AddInit(t, tn,
string("make_intrusive<TypeType>(") + GenTypeName(t->AsTypeType()->GetType()) +
")");
break;
case TYPE_VECTOR:
AddInit(t, tn,
string("make_intrusive<VectorType>(") +
GenTypeName(t->AsVectorType()->Yield()) + ")");
break;
default:
break;
}
auto& script_type_name = t->GetName();
if ( ! script_type_name.empty() )
AddInit(t, "register_type__CPP(" + tn + ", \"" + script_type_name + "\");");
AddInit(t);
}
void CPPCompile::ExpandListTypeVar(const TypePtr& t, string& tn)
{
const auto& tl = t->AsTypeList()->GetTypes();
auto t_name = tn + "->AsTypeList()";
for ( const auto& tl_i : tl )
AddInit(t, t_name + "->Append(" + GenTypeName(tl_i) + ");");
}
void CPPCompile::ExpandRecordTypeVar(const TypePtr& t, string& tn)
{
auto r = t->AsRecordType()->Types();
if ( ! r )
return;
auto t_name = tn + "->AsRecordType()";
AddInit(t, string("if ( ") + t_name + "->NumFields() == 0 )");
AddInit(t, "{");
AddInit(t, "type_decl_list tl;");
for ( auto i = 0; i < r->length(); ++i )
{
const auto& td = (*r)[i];
AddInit(t, GenTypeDecl(td));
}
AddInit(t, t_name + "->AddFieldsDirectly(tl);");
AddInit(t, "}");
}
void CPPCompile::ExpandEnumTypeVar(const TypePtr& t, string& tn)
{
auto e_name = tn + "->AsEnumType()";
auto et = t->AsEnumType();
auto names = et->Names();
AddInit(t, "{ auto et = " + e_name + ";");
AddInit(t, "if ( et->Names().empty() ) {");
for ( const auto& name_pair : et->Names() )
AddInit(t, string("\tet->AddNameInternal(\"") + name_pair.first + "\", " +
Fmt(int(name_pair.second)) + ");");
AddInit(t, "}}");
}
void CPPCompile::ExpandTableTypeVar(const TypePtr& t, string& tn)
{
auto tbl = t->AsTableType();
const auto& indices = tbl->GetIndices();
const auto& yield = tbl->Yield();
if ( tbl->IsSet() )
AddInit(t, tn,
string("make_intrusive<SetType>(cast_intrusive<TypeList>(") + GenTypeName(indices) +
" ), nullptr)");
else
AddInit(t, tn,
string("make_intrusive<TableType>(cast_intrusive<TypeList>(") +
GenTypeName(indices) + "), " + GenTypeName(yield) + ")");
}
void CPPCompile::ExpandFuncTypeVar(const TypePtr& t, string& tn)
{
auto f = t->AsFuncType();
auto args_type_accessor = GenTypeName(f->Params());
const auto& yt = f->Yield();
string yield_type_accessor;
if ( yt )
yield_type_accessor += GenTypeName(yt);
else
yield_type_accessor += "nullptr";
auto fl = f->Flavor();
string fl_name;
if ( fl == FUNC_FLAVOR_FUNCTION )
fl_name = "FUNC_FLAVOR_FUNCTION";
else if ( fl == FUNC_FLAVOR_EVENT )
fl_name = "FUNC_FLAVOR_EVENT";
else if ( fl == FUNC_FLAVOR_HOOK )
fl_name = "FUNC_FLAVOR_HOOK";
auto type_init = string("make_intrusive<FuncType>(cast_intrusive<RecordType>(") +
args_type_accessor + "), " + yield_type_accessor + ", " + fl_name + ")";
AddInit(t, tn, type_init);
}
string CPPCompile::GenTypeDecl(const TypeDecl* td)
{
auto type_accessor = GenTypeName(td->type);
auto td_name = string("util::copy_string(\"") + td->id + "\")";
if ( td->attrs )
return string("tl.append(new TypeDecl(") + td_name + ", " + type_accessor + ", " +
AttrsName(td->attrs) + "));";
return string("tl.append(new TypeDecl(") + td_name + ", " + type_accessor + "));";
}
string CPPCompile::GenTypeName(const Type* t) string CPPCompile::GenTypeName(const Type* t)
{ {
ASSERT(processed_types.count(TypeRep(t)) > 0);
return types.KeyName(TypeRep(t)); return types.KeyName(TypeRep(t));
} }
const char* CPPCompile::TypeTagName(TypeTag tag) const const char* CPPCompile::TypeTagName(TypeTag tag)
{ {
switch ( tag ) switch ( tag )
{ {
@ -280,6 +123,8 @@ const char* CPPCompile::TypeTagName(TypeTag tag) const
return "TYPE_INT"; return "TYPE_INT";
case TYPE_INTERVAL: case TYPE_INTERVAL:
return "TYPE_INTERVAL"; return "TYPE_INTERVAL";
case TYPE_LIST:
return "TYPE_LIST";
case TYPE_OPAQUE: case TYPE_OPAQUE:
return "TYPE_OPAQUE"; return "TYPE_OPAQUE";
case TYPE_PATTERN: case TYPE_PATTERN:
@ -431,16 +276,17 @@ const char* CPPCompile::TypeType(const TypePtr& t)
} }
} }
void CPPCompile::RegisterType(const TypePtr& tp) shared_ptr<CPP_InitInfo> CPPCompile::RegisterType(const TypePtr& tp)
{ {
auto t = TypeRep(tp); auto t = TypeRep(tp);
if ( processed_types.count(t) > 0 ) auto pt = processed_types.find(t);
return; if ( pt != processed_types.end() )
return pt->second;
// Add the type before going further, to avoid loops due to types processed_types[t] = nullptr;
// that reference each other.
processed_types.insert(t); shared_ptr<CPP_InitInfo> gi;
switch ( t->Tag() ) switch ( t->Tag() )
{ {
@ -449,7 +295,6 @@ void CPPCompile::RegisterType(const TypePtr& tp)
case TYPE_BOOL: case TYPE_BOOL:
case TYPE_COUNT: case TYPE_COUNT:
case TYPE_DOUBLE: case TYPE_DOUBLE:
case TYPE_ENUM:
case TYPE_ERROR: case TYPE_ERROR:
case TYPE_INT: case TYPE_INT:
case TYPE_INTERVAL: case TYPE_INTERVAL:
@ -459,119 +304,53 @@ void CPPCompile::RegisterType(const TypePtr& tp)
case TYPE_TIME: case TYPE_TIME:
case TYPE_TIMER: case TYPE_TIMER:
case TYPE_VOID: case TYPE_VOID:
case TYPE_OPAQUE:
case TYPE_SUBNET: case TYPE_SUBNET:
case TYPE_FILE: case TYPE_FILE:
// Nothing to do. gi = make_shared<BaseTypeInfo>(this, tp);
break;
case TYPE_ENUM:
gi = make_shared<EnumTypeInfo>(this, tp);
break;
case TYPE_OPAQUE:
gi = make_shared<OpaqueTypeInfo>(this, tp);
break; break;
case TYPE_TYPE: case TYPE_TYPE:
{ gi = make_shared<TypeTypeInfo>(this, tp);
const auto& tt = t->AsTypeType()->GetType();
NoteNonRecordInitDependency(t, tt);
RegisterType(tt);
}
break; break;
case TYPE_VECTOR: case TYPE_VECTOR:
{ gi = make_shared<VectorTypeInfo>(this, tp);
const auto& yield = t->AsVectorType()->Yield();
NoteNonRecordInitDependency(t, yield);
RegisterType(yield);
}
break; break;
case TYPE_LIST: case TYPE_LIST:
RegisterListType(tp); gi = make_shared<ListTypeInfo>(this, tp);
break; break;
case TYPE_TABLE: case TYPE_TABLE:
RegisterTableType(tp); gi = make_shared<TableTypeInfo>(this, tp);
break; break;
case TYPE_RECORD: case TYPE_RECORD:
RegisterRecordType(tp); gi = make_shared<RecordTypeInfo>(this, tp);
break; break;
case TYPE_FUNC: case TYPE_FUNC:
RegisterFuncType(tp); gi = make_shared<FuncTypeInfo>(this, tp);
break; break;
default: default:
reporter->InternalError("bad type in CPPCompile::RegisterType"); reporter->InternalError("bad type in CPPCompile::RegisterType");
} }
AddInit(t); type_info->AddInstance(gi);
processed_types[t] = gi;
if ( ! types.IsInherited(t) ) types.AddInitInfo(t, gi);
{
auto t_rep = types.GetRep(t);
if ( t_rep == t )
GenPreInit(t);
else
NoteInitDependency(t, t_rep);
}
}
void CPPCompile::RegisterListType(const TypePtr& t) return gi;
{
const auto& tl = t->AsTypeList()->GetTypes();
for ( auto& tl_i : tl )
{
NoteNonRecordInitDependency(t, tl_i);
RegisterType(tl_i);
}
}
void CPPCompile::RegisterTableType(const TypePtr& t)
{
auto tbl = t->AsTableType();
const auto& indices = tbl->GetIndices();
const auto& yield = tbl->Yield();
NoteNonRecordInitDependency(t, indices);
RegisterType(indices);
if ( yield )
{
NoteNonRecordInitDependency(t, yield);
RegisterType(yield);
}
}
void CPPCompile::RegisterRecordType(const TypePtr& t)
{
auto r = t->AsRecordType()->Types();
if ( ! r )
return;
for ( const auto& r_i : *r )
{
NoteNonRecordInitDependency(t, r_i->type);
RegisterType(r_i->type);
if ( r_i->attrs )
{
NoteInitDependency(t, r_i->attrs);
RegisterAttributes(r_i->attrs);
}
}
}
void CPPCompile::RegisterFuncType(const TypePtr& t)
{
auto f = t->AsFuncType();
NoteInitDependency(t, TypeRep(f->Params()));
RegisterType(f->Params());
if ( f->Yield() )
{
NoteNonRecordInitDependency(t, f->Yield());
RegisterType(f->Yield());
}
} }
const char* CPPCompile::NativeAccessor(const TypePtr& t) const char* CPPCompile::NativeAccessor(const TypePtr& t)

View file

@ -75,4 +75,60 @@ void unlock_file(const string& fname, FILE* f)
} }
} }
string CPPEscape(const char* b, int len)
{
string res;
for ( int i = 0; i < len; ++i )
{
unsigned char c = b[i];
switch ( c )
{
case '\a':
res += "\\a";
break;
case '\b':
res += "\\b";
break;
case '\f':
res += "\\f";
break;
case '\n':
res += "\\n";
break;
case '\r':
res += "\\r";
break;
case '\t':
res += "\\t";
break;
case '\v':
res += "\\v";
break;
case '\\':
res += "\\\\";
break;
case '"':
res += "\\\"";
break;
default:
if ( isprint(c) )
res += c;
else
{
char buf[8192];
snprintf(buf, sizeof buf, "%03o", c);
res += "\\";
res += buf;
}
break;
}
}
return res;
}
} // zeek::detail } // zeek::detail

View file

@ -36,4 +36,12 @@ extern bool is_CPP_compilable(const ProfileFunc* pf, const char** reason = nullp
extern void lock_file(const std::string& fname, FILE* f); extern void lock_file(const std::string& fname, FILE* f);
extern void unlock_file(const std::string& fname, FILE* f); extern void unlock_file(const std::string& fname, FILE* f);
// For the given byte array / string, returns a version expanded
// with escape sequences in order to represent it as a C++ string.
extern std::string CPPEscape(const char* b, int len);
inline std::string CPPEscape(const char* s)
{
return CPPEscape(s, strlen(s));
}
} // zeek::detail } // zeek::detail

View file

@ -83,7 +83,7 @@ void CPPCompile::CreateGlobal(const ID* g)
if ( pfs.Globals().count(g) == 0 ) if ( pfs.Globals().count(g) == 0 )
{ {
// Only used in the context of calls. If it's compilable, // Only used in the context of calls. If it's compilable,
// the we'll call it directly. // then we'll call it directly.
if ( compilable_funcs.count(gn) > 0 ) if ( compilable_funcs.count(gn) > 0 )
{ {
AddGlobal(gn, "zf", true); AddGlobal(gn, "zf", true);
@ -102,18 +102,12 @@ void CPPCompile::CreateGlobal(const ID* g)
Emit("IDPtr %s;", globals[gn]); Emit("IDPtr %s;", globals[gn]);
if ( pfs.Events().count(gn) > 0 ) if ( pfs.Events().count(gn) > 0 )
// This is an event that's also used as // This is an event that's also used as a variable.
// a variable.
Emit("EventHandlerPtr %s_ev;", globals[gn]); Emit("EventHandlerPtr %s_ev;", globals[gn]);
const auto& t = g->GetType(); auto gi = make_shared<GlobalInitInfo>(this, g, globals[gn]);
NoteInitDependency(g, TypeRep(t)); global_id_info->AddInstance(gi);
global_gis[g] = gi;
auto exported = g->IsExport() ? "true" : "false";
AddInit(g, globals[gn],
string("lookup_global__CPP(\"") + gn + "\", " + GenTypeName(t) + ", " + exported +
")");
} }
if ( is_bif ) if ( is_bif )
@ -124,40 +118,25 @@ void CPPCompile::CreateGlobal(const ID* g)
global_vars.emplace(g); global_vars.emplace(g);
} }
void CPPCompile::UpdateGlobalHashes() std::shared_ptr<CPP_InitInfo> CPPCompile::RegisterGlobal(const ID* g)
{ {
for ( auto& g : pfs.AllGlobals() ) auto gg = global_gis.find(g);
if ( gg == global_gis.end() )
{ {
auto gn = g->Name(); auto gn = string(g->Name());
if ( hm.HasGlobal(gn) ) if ( globals.count(gn) == 0 )
// Not new to this compilation run. // Create a name for it.
continue; (void)IDNameStr(g);
auto ht = pfs.HashType(g->GetType()); auto gi = make_shared<GlobalInitInfo>(this, g, globals[gn]);
global_id_info->AddInstance(gi);
p_hash_type hv = 0; global_gis[g] = gi;
if ( g->GetVal() ) return gi;
hv = p_hash(g->GetVal());
fprintf(hm.HashFile(), "global\n%s\n", gn);
fprintf(hm.HashFile(), "%llu %llu\n", ht, hv);
// Record location information in the hash file for
// diagnostic purposes.
auto loc = g->GetLocationInfo();
fprintf(hm.HashFile(), "%s %d\n", loc->filename, loc->first_line);
// Flag any named record/enum types.
if ( g->IsType() )
{
const auto& t = g->GetType();
if ( t->Tag() == TYPE_RECORD )
fprintf(hm.HashFile(), "record\n%s\n", gn);
else if ( t->Tag() == TYPE_ENUM )
fprintf(hm.HashFile(), "enum\n%s\n", gn);
}
} }
else
return gg->second;
} }
void CPPCompile::AddBiF(const ID* b, bool is_var) void CPPCompile::AddBiF(const ID* b, bool is_var)
@ -170,12 +149,8 @@ void CPPCompile::AddBiF(const ID* b, bool is_var)
if ( AddGlobal(n, "bif", true) ) if ( AddGlobal(n, "bif", true) )
Emit("Func* %s;", globals[n]); Emit("Func* %s;", globals[n]);
auto lookup = string("lookup_bif__CPP(\"") + bn + "\")"; ASSERT(BiFs.count(globals[n]) == 0);
BiFs[globals[n]] = bn;
if ( standalone )
AddActivation(globals[n] + " = " + lookup + ";");
else
AddInit(b, globals[n], lookup);
} }
bool CPPCompile::AddGlobal(const string& g, const char* suffix, bool track) bool CPPCompile::AddGlobal(const string& g, const char* suffix, bool track)
@ -189,13 +164,8 @@ bool CPPCompile::AddGlobal(const string& g, const char* suffix, bool track)
if ( hm.HasGlobalVar(gn) ) if ( hm.HasGlobalVar(gn) )
gn = scope_prefix(hm.GlobalVarScope(gn)) + gn; gn = scope_prefix(hm.GlobalVarScope(gn)) + gn;
else else
{
new_var = true; new_var = true;
if ( track && update )
fprintf(hm.HashFile(), "global-var\n%s\n%d\n", gn.c_str(), addl_tag);
}
globals.emplace(g, gn); globals.emplace(g, gn);
} }
@ -207,18 +177,19 @@ void CPPCompile::RegisterEvent(string ev_name)
body_events[body_name].emplace_back(move(ev_name)); body_events[body_name].emplace_back(move(ev_name));
} }
const string& CPPCompile::IDNameStr(const ID* id) const const string& CPPCompile::IDNameStr(const ID* id)
{ {
if ( id->IsGlobal() ) if ( id->IsGlobal() )
{ {
auto g = string(id->Name()); auto g = string(id->Name());
ASSERT(globals.count(g) > 0); if ( globals.count(g) == 0 )
return ((CPPCompile*)(this))->globals[g]; CreateGlobal(id);
return globals[g];
} }
ASSERT(locals.count(id) > 0); auto l = locals.find(id);
ASSERT(l != locals.end());
return ((CPPCompile*)(this))->locals[id]; return l->second;
} }
string CPPCompile::LocalName(const ID* l) const string CPPCompile::LocalName(const ID* l) const

View file

@ -221,8 +221,6 @@ static void init_options()
check_env_opt("ZEEK_PROFILE", analysis_options.profile_ZAM); check_env_opt("ZEEK_PROFILE", analysis_options.profile_ZAM);
// Compile-to-C++-related options. // Compile-to-C++-related options.
check_env_opt("ZEEK_ADD_CPP", analysis_options.add_CPP);
check_env_opt("ZEEK_UPDATE_CPP", analysis_options.update_CPP);
check_env_opt("ZEEK_GEN_CPP", analysis_options.gen_CPP); check_env_opt("ZEEK_GEN_CPP", analysis_options.gen_CPP);
check_env_opt("ZEEK_GEN_STANDALONE_CPP", analysis_options.gen_standalone_CPP); check_env_opt("ZEEK_GEN_STANDALONE_CPP", analysis_options.gen_standalone_CPP);
check_env_opt("ZEEK_COMPILE_ALL", analysis_options.compile_all); check_env_opt("ZEEK_COMPILE_ALL", analysis_options.compile_all);
@ -233,23 +231,6 @@ static void init_options()
analysis_options.gen_CPP = true; analysis_options.gen_CPP = true;
if ( analysis_options.gen_CPP ) if ( analysis_options.gen_CPP )
{
if ( analysis_options.add_CPP )
{
reporter->Warning("gen-C++ incompatible with add-C++");
analysis_options.add_CPP = false;
}
if ( analysis_options.update_CPP )
{
reporter->Warning("gen-C++ incompatible with update-C++");
analysis_options.update_CPP = false;
}
generating_CPP = true;
}
if ( analysis_options.update_CPP || analysis_options.add_CPP )
generating_CPP = true; generating_CPP = true;
if ( analysis_options.use_CPP && generating_CPP ) if ( analysis_options.use_CPP && generating_CPP )
@ -399,7 +380,7 @@ static void generate_CPP(std::unique_ptr<ProfileFuncs>& pfs)
{ {
const auto hash_name = hash_dir + "CPP-hashes"; const auto hash_name = hash_dir + "CPP-hashes";
auto hm = std::make_unique<CPPHashManager>(hash_name.c_str(), analysis_options.add_CPP); auto hm = std::make_unique<CPPHashManager>(hash_name.c_str());
if ( analysis_options.gen_CPP ) if ( analysis_options.gen_CPP )
{ {
@ -413,26 +394,12 @@ static void generate_CPP(std::unique_ptr<ProfileFuncs>& pfs)
} }
} }
} }
else
{ // doing add-C++ instead, so look for previous compilations
for ( auto& func : funcs )
{
auto hash = func.Profile()->HashVal();
if ( compiled_scripts.count(hash) > 0 || hm->HasHash(hash) )
func.SetSkip(true);
}
// Now that we've presumably marked a lot of functions
// as skippable, recompute the global profile.
pfs = std::make_unique<ProfileFuncs>(funcs, is_CPP_compilable, false);
}
const auto gen_name = hash_dir + "CPP-gen.cc"; const auto gen_name = hash_dir + "CPP-gen.cc";
const auto addl_name = hash_dir + "CPP-gen-addl.h"; const auto addl_name = hash_dir + "CPP-gen-addl.h";
CPPCompile cpp(funcs, *pfs, gen_name, addl_name, *hm, CPPCompile cpp(funcs, *pfs, gen_name, addl_name, *hm, analysis_options.gen_standalone_CPP,
analysis_options.gen_CPP || analysis_options.update_CPP, analysis_options.report_uncompilable);
analysis_options.gen_standalone_CPP, analysis_options.report_uncompilable);
} }
static void find_when_funcs(std::unique_ptr<ProfileFuncs>& pfs, static void find_when_funcs(std::unique_ptr<ProfileFuncs>& pfs,

View file

@ -96,18 +96,6 @@ struct AnalyOpt
// of the corresponding script, and not activated by default). // of the corresponding script, and not activated by default).
bool gen_standalone_CPP = false; bool gen_standalone_CPP = false;
// If true, generate C++ for those script bodies that don't already
// have generated code, in a form that enables later compiles to
// take advantage of the newly-added elements. Only use for generating
// a zeek that will always include the associated scripts.
bool update_CPP = false;
// If true, generate C++ for those script bodies that don't already
// have generated code. The added C++ is not made available for
// later generated code, and will work for a generated zeek that
// runs without including the associated scripts.
bool add_CPP = false;
// If true, use C++ bodies if available. // If true, use C++ bodies if available.
bool use_CPP = false; bool use_CPP = false;