Merge remote-tracking branch 'origin/topic/vern/cpp-init'

* origin/topic/vern/cpp-init:
  Func: Add SetCapturesVec()
  marked some recently added BTests as not suitable for -O gen-C++ testing
  robustness improvements for -O gen-C++ generation of lambdas / "when"s
  speedups for compilation of initializers in -O gen-C++ generated code
  fixes for -O gen-C++ generation of floating point constants
  -O gen-C++ fix for dealing with use of more than one module qualifier
  header tweaks to provide gen-C++ script optimization with more flexibility
  fix for script optimization of constants of type "opaque"
  fix for script optimization of "in" operations
  some minor tidying of -O gen-C++ sources
This commit is contained in:
Arne Welzel 2024-08-15 10:30:37 +02:00
commit ac5cbcc43e
26 changed files with 364 additions and 147 deletions

26
CHANGES
View file

@ -1,3 +1,29 @@
7.1.0-dev.164 | 2024-08-15 10:30:37 +0200
* Func: Add SetCapturesVec() (Arne Welzel, Corelight)
Add an API to directly set captures_vec for use by C++ compilation. The
current code keys off or asserts on ZAM stmts, making it difficult to
leverage captures_vec in other contexts.
* marked some recently added BTests as not suitable for -O gen-C++ testing (Vern Paxson, Corelight)
* robustness improvements for -O gen-C++ generation of lambdas / "when"s (Vern Paxson, Corelight)
* speedups for compilation of initializers in -O gen-C++ generated code (Vern Paxson, Corelight)
* fixes for -O gen-C++ generation of floating point constants (Vern Paxson, Corelight)
* -O gen-C++ fix for dealing with use of more than one module qualifier (Vern Paxson, Corelight)
* header tweaks to provide gen-C++ script optimization with more flexibility (Vern Paxson, Corelight)
* fix for script optimization of constants of type "opaque" (Vern Paxson, Corelight)
* fix for script optimization of "in" operations (Vern Paxson, Corelight)
* some minor tidying of -O gen-C++ sources (Vern Paxson, Corelight)
7.1.0-dev.152 | 2024-08-14 20:08:14 +0200
* mysql: Implement and test COM_CHANGE_USER (Arne Welzel, Corelight)

View file

@ -1 +1 @@
7.1.0-dev.152
7.1.0-dev.164

View file

@ -474,7 +474,16 @@ public:
// Optimization-related:
ExprPtr Duplicate() override;
ValPtr FoldVal() const override { return val; }
ValPtr FoldVal() const override {
if ( type->Tag() == TYPE_OPAQUE )
// Aggressive constant propagation can lead to the appearance of
// opaque "constants". Don't consider these as foldable because
// they're problematic to generate independently.
return nullptr;
return val;
}
protected:
void ExprDescribe(ODesc* d) const override;

View file

@ -211,6 +211,15 @@ public:
return *captures_vec;
}
/**
* Set the set of ZVal's used for captures.
*
* Used for script optimization purposes.
*
* @param cv The value used for captures_vec.
*/
void SetCapturesVec(std::unique_ptr<std::vector<ZVal>> cv) { captures_vec = std::move(cv); }
// Same definition as in Frame.h.
using OffsetMap = std::unordered_map<std::string, int>;

View file

@ -35,6 +35,7 @@ class CompositeHash;
class Expr;
class ListExpr;
class ZAMCompiler;
class CPPRuntime;
using ExprPtr = IntrusivePtr<Expr>;
using ListExprPtr = IntrusivePtr<ListExpr>;
@ -752,6 +753,7 @@ private:
class CreationInitsOptimizer;
friend zeek::RecordVal;
friend zeek::detail::ZAMCompiler;
friend zeek::detail::CPPRuntime;
const auto& DeferredInits() const { return deferred_inits; }
const auto& CreationInits() const { return creation_inits; }

View file

@ -143,6 +143,7 @@ public:
// cohort associated with a given type.
int TypeOffset(const TypePtr& t) { return GI_Offset(RegisterType(t)); }
int TypeCohort(const TypePtr& t) { return GI_Cohort(RegisterType(t)); }
int TypeFinalCohort(const TypePtr& t) { return GI_FinalCohort(RegisterType(t)); }
// Tracks a Zeek ValPtr used as a constant value. These occur
// in two contexts: directly as constant expressions, and indirectly
@ -384,6 +385,10 @@ private:
std::string LocalName(const ID* l) const;
std::string LocalName(const IDPtr& l) const { return LocalName(l.get()); }
// The same, but for a capture.
std::string CaptureName(const ID* l) const;
std::string CaptureName(const IDPtr& l) const { return CaptureName(l.get()); }
// Returns a canonicalized name, with various non-alphanumeric
// characters stripped or transformed, and guaranteed not to
// conflict with C++ keywords.
@ -584,8 +589,11 @@ private:
// Maps function names to events relevant to them.
std::unordered_map<std::string, std::vector<std::string>> body_events;
// Full type of the function we're currently compiling.
FuncTypePtr func_type;
// Return type of the function we're currently compiling.
TypePtr ret_type = nullptr;
TypePtr ret_type;
// Internal name of the function we're currently compiling.
std::string body_name;
@ -696,6 +704,8 @@ private:
void GenValueSwitchStmt(const Expr* e, const case_list* cases);
void GenWhenStmt(const WhenStmt* w);
void GenWhenStmt(const WhenInfo* wi, const std::string& when_lambda, const Location* loc,
std::vector<std::string> local_aggrs);
void GenForStmt(const ForStmt* f);
void GenForOverTable(const ExprPtr& tbl, const IDPtr& value_var, const IDPList* loop_vars);
void GenForOverVector(const ExprPtr& tbl, const IDPtr& value_var, const IDPList* loop_vars);
@ -770,6 +780,7 @@ private:
std::string GenSizeExpr(const Expr* e, GenType gt);
std::string GenScheduleExpr(const Expr* e);
std::string GenLambdaExpr(const Expr* e);
std::string GenLambdaExpr(const Expr* e, std::string capture_args);
std::string GenIsExpr(const Expr* e, GenType gt);
std::string GenArithCoerceExpr(const Expr* e, GenType gt);
@ -963,6 +974,7 @@ private:
// associated with an initialization.
int GI_Offset(const std::shared_ptr<CPP_InitInfo>& gi) const { return gi ? gi->Offset() : -1; }
int GI_Cohort(const std::shared_ptr<CPP_InitInfo>& gi) const { return gi ? gi->InitCohort() : 0; }
int GI_FinalCohort(const std::shared_ptr<CPP_InitInfo>& gi) const { return gi ? gi->FinalInitCohort() : 0; }
// Generate code to initialize the mappings for record field
// offsets for field accesses into regions of records that

View file

@ -31,7 +31,7 @@ void CPPCompile::DeclareLambda(const LambdaExpr* l, const ProfileFunc* pf) {
auto& ids = l->OuterIDs();
for ( auto id : ids )
lambda_names[id] = LocalName(id);
lambda_names[id] = CaptureName(id);
CreateFunction(l_id->GetType<FuncType>(), pf, lname, body, 0, l, FUNC_FLAVOR_FUNCTION);
}
@ -40,7 +40,12 @@ void CPPCompile::CreateFunction(const FuncTypePtr& ft, const ProfileFunc* pf, co
int priority, const LambdaExpr* l, FunctionFlavor flavor) {
const auto& yt = ft->Yield();
in_hook = flavor == FUNC_FLAVOR_HOOK;
const IDPList* lambda_ids = l ? &l->OuterIDs() : nullptr;
IDPList effective_lambda_ids;
if ( l )
effective_lambda_ids = l->OuterIDs();
const IDPList* lambda_ids = l ? &effective_lambda_ids : nullptr;
string args = BindArgs(ft, lambda_ids);
@ -328,18 +333,16 @@ void CPPCompile::GatherParamNames(vector<string>& p_names, const FuncTypePtr& ft
if ( param_id ) {
if ( t->Tag() == TYPE_ANY && param_id->GetType()->Tag() != TYPE_ANY )
// We'll need to translate the parameter
// from its current representation to
// type "any".
// We'll need to translate the parameter from its current
// representation to type "any".
p_names.emplace_back(string("any_param__CPP_") + Fmt(i));
else
p_names.emplace_back(LocalName(param_id));
}
else
// Parameters that are unused don't wind up in the
// ProfileFunc. Rather than dig their name out of
// the function's declaration, we explicitly name
// them to reflect that they're unused.
// Parameters that are unused don't wind up in the ProfileFunc.
// Rather than dig their name out of the function's declaration,
// we explicitly name them to reflect that they're unused.
p_names.emplace_back(string("unused_param__CPP_") + Fmt(i));
}

View file

@ -37,11 +37,12 @@ void CPPCompile::Compile(bool report_uncompilable) {
// previously compiled instances of those if present.
for ( auto& func : funcs ) {
const auto& f = func.Func();
auto& body = func.Body();
auto& ofiles = analysis_options.only_files;
auto allow_cond = analysis_options.allow_cond;
string fn = func.Body()->GetLocationInfo()->filename;
string fn = body->GetLocationInfo()->filename;
if ( ! allow_cond && ! func.ShouldSkip() && ! ofiles.empty() && files_with_conditionals.count(fn) > 0 ) {
if ( report_uncompilable )
@ -184,8 +185,8 @@ void CPPCompile::GenProlog() {
Emit("namespace CPP_%s { // %s\n", Fmt(total_hash), string(working_dir));
// The following might-or-might-not wind up being populated/used.
Emit("std::vector<int> field_mapping;");
Emit("std::vector<int> enum_mapping;");
Emit("std::vector<zeek_int_t> field_mapping;");
Emit("std::vector<zeek_int_t> enum_mapping;");
NL();
const_info[TYPE_BOOL] = CreateConstInitInfo("Bool", "ValPtr", "bool");

View file

@ -639,13 +639,15 @@ string CPPCompile::GenScheduleExpr(const Expr* e) {
}
string CPPCompile::GenLambdaExpr(const Expr* e) {
auto l = static_cast<const LambdaExpr*>(e);
auto& body = l->Ingredients()->Body();
return GenLambdaExpr(e, GenLambdaClone(l, false));
}
string CPPCompile::GenLambdaExpr(const Expr* e, string capture_args) {
auto l = static_cast<const LambdaExpr*>(e);
auto name = Canonicalize(l->Name().c_str()) + "_lb_cl";
auto cl_args = string("\"") + name + "\"";
if ( l->OuterIDs().size() > 0 )
cl_args = cl_args + GenLambdaClone(l, false);
auto cl_args = string("\"") + name + "\"" + std::move(capture_args);
auto body = string("make_intrusive<") + name + ">(" + cl_args + ")";
auto func = string("make_intrusive<CPPLambdaFunc>(\"") + l->Name() + "\", cast_intrusive<FuncType>(" +
GenTypeName(l->GetType()) + "), " + body + ")";
@ -1175,7 +1177,7 @@ string CPPCompile::GenLambdaClone(const LambdaExpr* l, bool all_deep) {
for ( const auto& id : ids ) {
const auto& id_t = id->GetType();
auto arg = LocalName(id);
auto arg = CaptureName(id);
if ( captures && ! IsNativeType(id_t) ) {
for ( const auto& c : *captures )
@ -1183,7 +1185,7 @@ string CPPCompile::GenLambdaClone(const LambdaExpr* l, bool all_deep) {
arg = string("cast_intrusive<") + TypeName(id_t) + ">(" + arg + "->Clone())";
}
cl_args = cl_args + ", " + arg;
cl_args += ", " + arg;
}
return cl_args;
@ -1248,7 +1250,7 @@ string CPPCompile::GenEnum(const TypePtr& t, const ValPtr& ev) {
if ( ! et->HasRedefs() )
// Can use direct access.
return std::to_string(v);
return "zeek_int_t(" + std::to_string(v) + ")";
// Need to dynamically map the access.
int mapping_slot;

View file

@ -8,9 +8,7 @@
#include "zeek/Func.h"
#include "zeek/script_opt/ProfileFunc.h"
namespace zeek {
namespace detail {
namespace zeek::detail {
// A subclass of Func used for lambdas that the compiler creates for
// complex initializations (expressions used in type attributes).
@ -42,11 +40,6 @@ public:
const std::string& Name() { return name; }
// Sets/returns a hash associated with this statement. A value
// of 0 means "not set".
p_hash_type GetHash() const { return hash; }
void SetHash(p_hash_type h) { hash = h; }
// The following only get defined by lambda bodies.
virtual void SetLambdaCaptures(Frame* f) {}
virtual std::vector<ValPtr> SerializeLambdaCaptures() const { return std::vector<ValPtr>{}; }
@ -64,7 +57,6 @@ protected:
TraversalCode Traverse(TraversalCallback* cb) const override { return TC_CONTINUE; }
std::string name;
p_hash_type hash = 0ULL;
// A pseudo AST "call" node, used to support error localization.
CallExprPtr ce;
@ -117,6 +109,4 @@ extern std::unordered_map<p_hash_type, void (*)()> standalone_callbacks;
// Callbacks to finalize initialization of standalone compiled scripts.
extern std::vector<void (*)()> standalone_finalizations;
} // namespace detail
} // namespace zeek
} // namespace zeek::detail

View file

@ -38,12 +38,18 @@ void CPPCompile::GenInvokeBody(const string& call, const TypePtr& t) {
void CPPCompile::DefineBody(const FuncTypePtr& ft, const ProfileFunc* pf, const string& fname, const StmtPtr& body,
const IDPList* lambda_ids, FunctionFlavor flavor) {
IDPList l_ids;
if ( lambda_ids )
l_ids = *lambda_ids;
locals.clear();
params.clear();
body_name = fname;
func_type = ft;
ret_type = ft->Yield();
in_hook = flavor == FUNC_FLAVOR_HOOK;
auto ret_type_str = in_hook ? "bool" : FullTypeName(ret_type);
@ -52,7 +58,7 @@ void CPPCompile::DefineBody(const FuncTypePtr& ft, const ProfileFunc* pf, const
NL();
Emit("%s %s(%s)", ret_type_str, fname, ParamDecl(ft, lambda_ids, pf));
Emit("%s %s(%s)", ret_type_str, fname, ParamDecl(ft, &l_ids, pf));
StartBlock();
@ -64,7 +70,7 @@ void CPPCompile::DefineBody(const FuncTypePtr& ft, const ProfileFunc* pf, const
InitializeEvents(pf);
// Create the local variables.
DeclareLocals(pf, lambda_ids);
DeclareLocals(pf, &l_ids);
GenStmt(body);
@ -135,11 +141,12 @@ void CPPCompile::InitializeEvents(const ProfileFunc* pf) {
}
void CPPCompile::DeclareLocals(const ProfileFunc* pf, const IDPList* lambda_ids) {
// It's handy to have a set of the lambda captures rather than a list.
IDSet lambda_set;
// We track captures by their names rather than their ID*'s because the
// latter can be inconsistent when inlining.
set<string> capture_names;
if ( lambda_ids )
for ( auto li : *lambda_ids )
lambda_set.insert(li);
capture_names.insert(CaptureName(li));
const auto& ls = pf->Locals();
@ -149,11 +156,11 @@ void CPPCompile::DeclareLocals(const ProfileFunc* pf, const IDPList* lambda_ids)
for ( const auto& l : ls ) {
auto ln = LocalName(l);
auto cn = CaptureName(l);
if ( lambda_set.count(l) > 0 )
// No need to declare these, they're passed in as
// parameters.
ln = lambda_names[l];
if ( capture_names.count(cn) > 0 )
// No need to declare these, they're passed in as parameters.
ln = cn;
else if ( params.count(l) == 0 ) { // Not a parameter, so must be a local.
Emit("%s %s;", FullTypeName(l->GetType()), ln);

View file

@ -166,7 +166,7 @@ void CPPCompile::InitializeConsts() {
StartBlock();
for ( const auto& c : consts )
Emit("CPP_ValElem(%s, %s),", TypeTagName(c.first), Fmt(c.second));
Emit("{%s, %s},", TypeTagName(c.first), Fmt(c.second));
EndBlock(true);
}

View file

@ -7,6 +7,7 @@
#include "zeek/ZeekString.h"
#include "zeek/script_opt/CPP/Attrs.h"
#include "zeek/script_opt/CPP/Compile.h"
#include "zeek/script_opt/CPP/RuntimeInits.h"
using namespace std;
@ -38,6 +39,13 @@ void CPP_InitsInfo::GenerateInitializers(CPPCompile* c) {
c->Emit("%s %s = %s(%s, %s,", gt, InitializersName(), gt, base_name, Fmt(offset_set));
c->IndentUp();
GenerateCohorts(c);
c->IndentDown();
c->Emit(");");
}
void CPP_InitsInfo::GenerateCohorts(CPPCompile* c) {
c->Emit("{");
int n = 0;
@ -47,7 +55,7 @@ void CPP_InitsInfo::GenerateInitializers(CPPCompile* c) {
if ( ++n > 1 )
c->Emit("");
if ( cohort.size() == 1 && ! IsCompound() )
if ( cohort.size() == 1 && ! UsesCompoundVectors() )
BuildCohort(c, cohort);
else {
c->Emit("{");
@ -57,8 +65,6 @@ void CPP_InitsInfo::GenerateInitializers(CPPCompile* c) {
}
c->Emit("}");
c->IndentDown();
c->Emit(");");
}
void CPP_InitsInfo::BuildOffsetSet(CPPCompile* c) {
@ -80,25 +86,25 @@ void CPP_InitsInfo::BuildOffsetSet(CPPCompile* c) {
offset_set = c->IndMgr().AddIndices(offsets_vec);
}
void CPP_InitsInfo::BuildCohort(CPPCompile* c, std::vector<std::shared_ptr<CPP_InitInfo>>& cohort) {
int n = 0;
for ( auto& co : cohort ) {
vector<string> ivs;
auto o = co->InitObj();
if ( o ) {
static std::string describe_initializer(const Obj* o) {
auto od = obj_desc(o);
// Escape any embedded comment characters.
od = regex_replace(od, std::regex("/\\*"), "<<SLASH-STAR>>");
od = regex_replace(od, std::regex("\\*/"), "<<STAR-SLASH>>");
c->Emit("/* #%s: Initializing %s: */", Fmt(co->Offset()), od);
return od;
}
void CPP_InitsInfo::BuildCohort(CPPCompile* c, std::vector<std::shared_ptr<CPP_InitInfo>>& cohort) {
for ( auto& co : cohort ) {
vector<string> ivs;
auto o = co->InitObj();
if ( o )
c->Emit("/* #%s: Initializing %s: */", Fmt(co->Offset()), describe_initializer(o));
co->InitializerVals(ivs);
BuildCohortElement(c, co->InitializerType(), ivs);
++n;
}
}
@ -117,12 +123,50 @@ void CPP_InitsInfo::BuildCohortElement(CPPCompile* c, string init_type, vector<s
c->Emit("std::make_shared<%s>(%s),", init_type, full_init);
}
void CPP_CompoundInitsInfo::GenerateInitializers(CPPCompile* c) {
c->Emit("");
c->Emit("static int %s_init[] = {", tag);
int n = 0;
c->IndentUp();
for ( auto& cohort : instances ) {
if ( ++n > 1 )
c->Emit("");
// Figure out the size of the cohort.
for ( auto& co : cohort ) {
auto o = co->InitObj();
if ( o )
c->Emit("/* #%s: Initializing %s: */", Fmt(co->Offset()), describe_initializer(o));
vector<string> ivs;
co->InitializerVals(ivs);
c->Emit(Fmt(int(ivs.size())) + ",");
BuildCohortElement(c, co->InitializerType(), ivs);
}
static const auto end_of_vv = Fmt(END_OF_VEC_VEC) + ",";
c->Emit(end_of_vv);
}
static const auto end_of_vvv = Fmt(END_OF_VEC_VEC_VEC) + ",";
c->Emit(end_of_vvv);
c->IndentDown();
c->Emit("};");
CPP_InitsInfo::GenerateInitializers(c);
}
void CPP_CompoundInitsInfo::GenerateCohorts(CPPCompile* c) { c->Emit("%s_init", tag); }
void CPP_CompoundInitsInfo::BuildCohortElement(CPPCompile* c, string init_type, vector<string>& ivs) {
string init_line;
for ( auto& iv : ivs )
init_line += iv + ",";
c->Emit("{ %s},", init_line);
c->Emit("%s", init_line);
}
void CPP_BasicConstInitsInfo::BuildCohortElement(CPPCompile* c, string init_type, vector<string>& ivs) {
@ -174,7 +218,7 @@ PatternConstInfo::PatternConstInfo(CPPCompile* c, ValPtr v) : CPP_InitInfo(v) {
CompoundItemInfo::CompoundItemInfo(CPPCompile* _c, ValPtr v) : CPP_InitInfo(v), c(_c) {
auto& t = v->GetType();
type = c->TypeOffset(t);
init_cohort = c->TypeCohort(t) + 1;
init_cohort = c->TypeFinalCohort(t) + 1;
}
ListConstInfo::ListConstInfo(CPPCompile* _c, ValPtr v) : CompoundItemInfo(_c) {
@ -400,6 +444,10 @@ void TypeTypeInfo::AddInitializerVals(std::vector<std::string>& ivs) const {
}
VectorTypeInfo::VectorTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c, std::move(_t)) {
auto vt = t->AsVectorType();
if ( vt->IsUnspecifiedVector() )
yield = base_type(TYPE_VOID);
else
yield = t->Yield();
auto gi = c->RegisterType(yield);
if ( gi )
@ -552,7 +600,8 @@ void IndicesManager::Generate(CPPCompile* c) {
c->Emit(line);
}
c->Emit("-1");
static const auto end_of_vv = Fmt(END_OF_VEC_VEC);
c->Emit(end_of_vv);
c->EndBlock(true);
}

View file

@ -18,7 +18,7 @@
// standalone globals (for example, one for each BiF that a compiled script
// may call).
//
// For each of these types of initialization, our general approach is to a
// For each of these types of initialization, our general approach is to have a
// class that manages a single instance of that type, and an an object that
// manages all of those instances collectively. The latter object will, for
// example, attend to determining the offset into the run-time vector associated
@ -48,8 +48,15 @@
// safely use cohort(X) = cohort(Y).) We then execute run-time initialization
// in waves, one cohort at a time.
//
// Many forms of initialization are specified in terms of indices into globals
// that hold items of various types. Thus, the most common initialization
// information is a vector of integers/indices. These data structures can
// be recursive, too, namely we sometimes associate an index with a vector
// of integers/indices and then we can track multiple such vectors using
// another vector of integers/indices.
//
// Because C++ compilers can struggle when trying to optimize large quantities
// of code - clang in particular could take many CPU *hours* back when our
// of code - clang in particular could take many CPU *hours* back when the
// compiler just generated C++ code snippets for each initialization - rather
// than producing code that directly executes each given initialization, we
// instead employ a table-driven approach. The C++ initializers for the
@ -58,12 +65,14 @@
// cohort at a time) to obtain the information needed to initialize any given
// item.
//
// Many forms of initialization are specified in terms of indices into globals
// that hold items of various types. Thus, the most common initialization
// information is a vector of integers/indices. These data structures can
// be recursive, too, namely we sometimes associate an index with a vector
// of integers/indices and then we can track multiple such vectors using
// another vector of integers/indices.
// Even this has headaches for very large initializations: both clang and g++
// are *much* slower to initialize large vectors of simple template types
// (such as std::pair) than non-template types (such as a struct with two
// fields, which is all std::pair is, at the end of the day). A similar problem
// holds for initializing vectors-of-vectors-of-vectors, so we reduce these
// cases to simpler forms (structs for the first example, a single vector
// with information embedded within it for how to expand its values into
// a vector-of-vector-of-vector fr the second).
#include "zeek/File.h"
#include "zeek/Val.h"
@ -124,10 +133,10 @@ public:
// Sets the associated C++ type.
virtual void SetCPPType(std::string ct) { CPP_type = std::move(ct); }
// Whether this initializer is in terms of compound objects. Used
// Whether this initializer is in terms of compound vectors. Used
// for avoiding compiler warnings about singleton initializations in
// braces.
virtual bool IsCompound() const { return false; }
virtual bool UsesCompoundVectors() const { return false; }
// Returns the type associated with the table used for initialization
// (i.e., this is the type of the global returned by InitializersName()).
@ -137,9 +146,11 @@ public:
void AddInstance(std::shared_ptr<CPP_InitInfo> g);
// Emit code to populate the table used to initialize this collection.
void GenerateInitializers(CPPCompile* c);
virtual void GenerateInitializers(CPPCompile* c);
protected:
virtual void GenerateCohorts(CPPCompile* c);
// Computes offset_set - see below.
void BuildOffsetSet(CPPCompile* c);
@ -205,7 +216,7 @@ public:
BuildInitType();
}
bool IsCompound() const override { return true; }
bool UsesCompoundVectors() const override { return true; }
private:
void BuildInitType() { inits_type = std::string("CPP_CustomInits<") + CPPType() + ">"; }
@ -227,7 +238,7 @@ public:
inits_type = std::string("CPP_BasicConsts<") + CPP_type + ", " + c_type + ", " + tag + "Val>";
}
bool IsCompound() const override { return false; }
bool UsesCompoundVectors() const override { return false; }
void BuildCohortElement(CPPCompile* c, std::string init_type, std::vector<std::string>& ivs) override;
};
@ -245,7 +256,12 @@ public:
inits_type = std::string("CPP_IndexedInits<") + CPPType() + ">";
}
bool IsCompound() const override { return true; }
// This isn't true (anymore) because we separately build up the compound
// vectors needed for the initialization.
bool UsesCompoundVectors() const override { return false; }
void GenerateInitializers(CPPCompile* c) override;
void GenerateCohorts(CPPCompile* c) override;
void BuildCohortElement(CPPCompile* c, std::string init_type, std::vector<std::string>& ivs) override;
};

View file

@ -465,12 +465,12 @@ void CPP_GlobalInit::Generate(InitsManager* im, std::vector<void*>& /* inits_vec
global->SetAttrs(im->Attributes(attrs));
}
void generate_indices_set(int* inits, std::vector<std::vector<int>>& indices_set) {
size_t generate_indices_set(int* inits, std::vector<std::vector<int>>& indices_set) {
// First figure out how many groups of indices there are, so we
// can pre-allocate the outer vector.
auto i_ptr = inits;
int num_inits = 0;
while ( *i_ptr >= 0 ) {
while ( *i_ptr != END_OF_VEC_VEC && *i_ptr != END_OF_VEC_VEC_VEC ) {
++num_inits;
int n = *i_ptr;
i_ptr += n + 1; // skip over vector elements
@ -479,7 +479,7 @@ void generate_indices_set(int* inits, std::vector<std::vector<int>>& indices_set
indices_set.reserve(num_inits);
i_ptr = inits;
while ( *i_ptr >= 0 ) {
while ( *i_ptr != END_OF_VEC_VEC ) {
int n = *i_ptr;
++i_ptr;
std::vector<int> indices;
@ -490,6 +490,20 @@ void generate_indices_set(int* inits, std::vector<std::vector<int>>& indices_set
indices_set.emplace_back(std::move(indices));
}
return i_ptr - inits + 1;
}
std::vector<std::vector<std::vector<int>>> generate_indices_set(int* inits) {
std::vector<std::vector<std::vector<int>>> indices_set;
while ( *inits != END_OF_VEC_VEC_VEC ) {
std::vector<std::vector<int>> cohort_inits;
inits += generate_indices_set(inits, cohort_inits);
indices_set.push_back(std::move(cohort_inits));
}
return indices_set;
}
} // namespace zeek::detail

View file

@ -19,6 +19,28 @@ using FuncValPtr = IntrusivePtr<FuncVal>;
class InitsManager;
// Helper function that takes a (large) array of int's and from them
// constructs the corresponding vector-of-vector-of-indices. Each
// vector-of-indices is represented first by an int specifying its
// size, and then that many int's for its values. We recognize the
// end of the array upon encountering a "size" entry of END_OF_VEC_VEC.
//
// Returns how many elements were processed out of "inits", including its
// terminator.
extern size_t generate_indices_set(int* inits, std::vector<std::vector<int>>& indices_set);
// The same but for one more level of vector construction. The source array
// has sub-arrays terminated with END_OF_VEC_VEC per the above, and the whole
// shebang is terminated with END_OF_VEC_VEC_VEC.
//
// Returns the vector construction.
extern std::vector<std::vector<std::vector<int>>> generate_indices_set(int* inits);
// These need to be distinct from any values that can appear, which means
// they should be negative, and not -1, which is used as a "N/A" value.
#define END_OF_VEC_VEC -100
#define END_OF_VEC_VEC_VEC -200
// An abstract helper class used to access elements of an initialization vector.
// We need the abstraction because InitsManager below needs to be able to refer
// to any of a range of templated classes.
@ -29,7 +51,12 @@ public:
};
// Convenient way to refer to an offset associated with a particular Zeek type.
using CPP_ValElem = std::pair<TypeTag, int>;
// A "struct" rather than a std::pair because C++ compilers are terribly slow
// at initializing large numbers of the latter.
struct CPP_ValElem {
TypeTag tag;
int offset;
};
// This class groups together all of the vectors needed for run-time
// initialization. We gather them together into a single object so as
@ -57,7 +84,7 @@ public:
// index.
ValPtr ConstVals(int offset) const {
auto& cv = const_vals[offset];
return Consts(cv.first, cv.second);
return Consts(cv.tag, cv.offset);
}
// Retrieves the Zeek constant value for a particular Zeek type.
@ -157,9 +184,6 @@ protected:
// Pre-initialize all elements requiring it.
virtual void DoPreInits(InitsManager* im, const std::vector<int>& offsets_vec) {}
// Generate a single element.
virtual void GenerateElement(InitsManager* im, T2& init, int offset) {}
// The initialization vector in its entirety.
std::vector<T1>& inits_vec;
@ -221,16 +245,16 @@ using ValElemVecVec = std::vector<ValElemVec>;
template<class T>
class CPP_IndexedInits : public CPP_AbstractInits<T, ValElemVecVec> {
public:
CPP_IndexedInits(std::vector<T>& _inits_vec, int _offsets_set, std::vector<ValElemVecVec> _inits)
: CPP_AbstractInits<T, ValElemVecVec>(_inits_vec, _offsets_set, std::move(_inits)) {}
CPP_IndexedInits(std::vector<T>& _inits_vec, int _offsets_set, int* raw_inits)
: CPP_AbstractInits<T, ValElemVecVec>(_inits_vec, _offsets_set, generate_indices_set(raw_inits)) {}
protected:
void InitializeCohortWithOffsets(InitsManager* im, int cohort, const std::vector<int>& cohort_offsets) override;
// Note, in the following we pass in the inits_vec, even though
// the method will have direct access to it, because we want to
// use overloading to dispatch to custom generation for different
// types of values.
// Note, in the following we pass in the inits_vec ("ivec"), even though
// the method will have direct access to it, because we want to use
// overloading to dispatch to custom generation for different types of
// values.
void Generate(InitsManager* im, std::vector<EnumValPtr>& ivec, int offset, ValElemVec& init_vals);
void Generate(InitsManager* im, std::vector<StringValPtr>& ivec, int offset, ValElemVec& init_vals);
void Generate(InitsManager* im, std::vector<PatternValPtr>& ivec, int offset, ValElemVec& init_vals);
@ -254,8 +278,8 @@ protected:
// on subclasses of TypePtr.
class CPP_TypeInits : public CPP_IndexedInits<TypePtr> {
public:
CPP_TypeInits(std::vector<TypePtr>& _inits_vec, int _offsets_set, std::vector<std::vector<ValElemVec>> _inits)
: CPP_IndexedInits<TypePtr>(_inits_vec, _offsets_set, _inits) {}
CPP_TypeInits(std::vector<TypePtr>& _inits_vec, int _offsets_set, int* raw_inits)
: CPP_IndexedInits<TypePtr>(_inits_vec, _offsets_set, raw_inits) {}
protected:
void DoPreInits(InitsManager* im, const std::vector<int>& offsets_vec) override;
@ -504,11 +528,4 @@ struct CPP_RegisterBody {
std::vector<std::string> events;
};
// Helper function that takes a (large) array of int's and from them
// constructs the corresponding vector-of-vector-of-indices. Each
// vector-of-indices is represented first by an int specifying its
// size, and then that many int's for its values. We recognize the
// end of the array upon encountering a "size" entry of -1.
extern void generate_indices_set(int* inits, std::vector<std::vector<int>>& indices_set);
} // namespace zeek::detail

View file

@ -91,7 +91,7 @@ ValPtr when_index_slice__CPP(VectorVal* vec, const ListVal* lv) {
return v;
}
ValPtr when_invoke__CPP(Func* f, std::vector<ValPtr> args, Frame* frame, void* caller_addr) {
ValPtr when_invoke__CPP(Func* f, ValVec args, Frame* frame, void* caller_addr) {
auto trigger = frame->GetTrigger();
if ( trigger ) {
@ -194,11 +194,7 @@ void remove_element__CPP(TableValPtr aggr, ListValPtr indices) {
check_iterators__CPP(iterators_invalidated);
}
// A helper function that takes a parallel vectors of attribute tags
// and values and returns a collective AttributesPtr corresponding to
// those instantiated attributes. For attributes that don't have
// associated expressions, the corresponding value should be nil.
static AttributesPtr build_attrs__CPP(vector<int> attr_tags, vector<ValPtr> attr_vals) {
AttributesPtr build_attrs__CPP(IntVec attr_tags, vector<ValPtr> attr_vals) {
vector<AttrPtr> attrs;
int nattrs = attr_tags.size();
for ( auto i = 0; i < nattrs; ++i ) {
@ -243,7 +239,7 @@ TableValPtr table_constructor__CPP(vector<ValPtr> indices, vector<ValPtr> vals,
return aggr;
}
void assign_attrs__CPP(IDPtr id, std::vector<int> attr_tags, std::vector<ValPtr> attr_vals) {
void assign_attrs__CPP(IDPtr id, IntVec attr_tags, ValVec attr_vals) {
id->SetAttrs(build_attrs__CPP(std::move(attr_tags), std::move(attr_vals)));
}

View file

@ -10,13 +10,24 @@
namespace zeek {
using IntVec = std::vector<int>;
using ValVec = std::vector<ValPtr>;
using SubNetValPtr = IntrusivePtr<zeek::SubNetVal>;
namespace detail {
class CPPRuntime {
public:
static auto RawOptField(const RecordValPtr& rv, int field) { return rv->RawOptField(field); }
static auto& RawField(const RecordValPtr& rv, int field) { return rv->RawField(field); }
static auto& RawField(RecordVal* rv, int field) { return rv->RawField(field); }
static auto& RawOptField(const RecordValPtr& rv, int field) { return rv->RawOptField(field); }
static auto& RawOptField(RecordVal* rv, int field) { return rv->RawOptField(field); }
static const auto& GetCreationInits(const RecordType* rt) { return rt->CreationInits(); }
static RecordVal* BuildRecordVal(RecordTypePtr t, std::vector<std::optional<ZVal>> init_vals) {
return new RecordVal(std::move(t), std::move(init_vals));
}
};
// Returns the concatenation of the given strings.
@ -27,21 +38,21 @@ extern bool str_in__CPP(const String* s1, const String* s2);
// Converts a vector of individual ValPtr's into a single ListValPtr
// suitable for indexing an aggregate.
extern ListValPtr index_val__CPP(std::vector<ValPtr> indices);
extern ListValPtr index_val__CPP(ValVec indices);
// Returns the value corresponding to indexing the given table/vector/string
// with the given set of indices. These are functions rather than something
// generated directly so that they can package up the error handling for
// the case where there's no such index. "patstr" refers to indexing a
// table[pattern] of X with a string value.
extern ValPtr index_table__CPP(const TableValPtr& t, std::vector<ValPtr> indices);
extern ValPtr index_patstr_table__CPP(const TableValPtr& t, std::vector<ValPtr> indices);
extern ValPtr index_table__CPP(const TableValPtr& t, ValVec indices);
extern ValPtr index_patstr_table__CPP(const TableValPtr& t, ValVec indices);
extern ValPtr index_vec__CPP(const VectorValPtr& vec, int index);
extern ValPtr index_string__CPP(const StringValPtr& svp, std::vector<ValPtr> indices);
extern ValPtr index_string__CPP(const StringValPtr& svp, ValVec indices);
// The same, but for indexing happening inside a "when" clause.
extern ValPtr when_index_table__CPP(const TableValPtr& t, std::vector<ValPtr> indices);
extern ValPtr when_index_patstr__CPP(const TableValPtr& t, std::vector<ValPtr> indices);
extern ValPtr when_index_table__CPP(const TableValPtr& t, ValVec indices);
extern ValPtr when_index_patstr__CPP(const TableValPtr& t, ValVec indices);
extern ValPtr when_index_vec__CPP(const VectorValPtr& vec, int index);
// For vector slices, we use the existing index_slice(), but we need a
@ -50,7 +61,7 @@ extern ValPtr when_index_slice__CPP(VectorVal* vec, const ListVal* lv);
// Calls out to the given script or BiF function, which does not return
// a value.
inline ValPtr invoke_void__CPP(Func* f, std::vector<ValPtr> args, Frame* frame) { return f->Invoke(&args, frame); }
inline ValPtr invoke_void__CPP(Func* f, ValVec args, Frame* frame) { return f->Invoke(&args, frame); }
// Used for error propagation by failed calls.
class CPPInterpreterException : public InterpreterException {};
@ -58,7 +69,7 @@ class CPPInterpreterException : public InterpreterException {};
// Calls out to the given script or BiF function. A separate function because
// of the need to (1) construct the "args" vector using {} initializers,
// but (2) needing to have the address of that vector.
inline ValPtr invoke__CPP(Func* f, std::vector<ValPtr> args, Frame* frame) {
inline ValPtr invoke__CPP(Func* f, ValVec args, Frame* frame) {
auto v = f->Invoke(&args, frame);
if ( ! v )
throw CPPInterpreterException();
@ -71,7 +82,7 @@ inline ValPtr invoke__CPP(Func* f, std::vector<ValPtr> args, Frame* frame) {
// last argument is the address of the calling function; we just need
// it to be distinct to the call, so we can associate a Trigger cache
// with it.
extern ValPtr when_invoke__CPP(Func* f, std::vector<ValPtr> args, Frame* frame, void* caller_addr);
extern ValPtr when_invoke__CPP(Func* f, ValVec args, Frame* frame, void* caller_addr);
// Thrown when a call inside a "when" delays.
class CPPDelayedCallException : public InterpreterException {};
@ -201,29 +212,35 @@ inline VectorValPtr vector_coerce__CPP(const ValPtr& v, const TypePtr& t) {
return make_intrusive<VectorVal>(cast_intrusive<VectorType>(t));
}
// Takes parallel vectors of attribute tags and values and returns a
// collective AttributesPtr corresponding to those instantiated attributes.
// For attributes that don't have associated expressions, the corresponding
// value should be nil.
extern AttributesPtr build_attrs__CPP(IntVec attr_tags, std::vector<ValPtr> attr_vals);
// Constructs a set of the given type, containing the given elements, and
// with the associated attributes.
extern TableValPtr set_constructor__CPP(std::vector<ValPtr> elements, TableTypePtr t, std::vector<int> attr_tags,
std::vector<ValPtr> attr_vals);
extern TableValPtr set_constructor__CPP(ValVec elements, TableTypePtr t, IntVec attr_tags, ValVec attr_vals);
// Constructs a table of the given type, containing the given elements
// (specified as parallel index/value vectors), and with the associated
// attributes.
extern TableValPtr table_constructor__CPP(std::vector<ValPtr> indices, std::vector<ValPtr> vals, TableTypePtr t,
std::vector<int> attr_tags, std::vector<ValPtr> attr_vals);
extern TableValPtr table_constructor__CPP(ValVec indices, ValVec vals, TableTypePtr t, IntVec attr_tags,
ValVec attr_vals);
// Assigns a set of attributes to an identifier.
extern void assign_attrs__CPP(IDPtr id, std::vector<int> attr_tags, std::vector<ValPtr> attr_vals);
extern void assign_attrs__CPP(IDPtr id, IntVec attr_tags, ValVec attr_vals);
// Constructs a record of the given type, whose (ordered) fields are
// assigned to the corresponding elements of the given vector of values.
extern RecordValPtr record_constructor__CPP(std::vector<ValPtr> vals, RecordTypePtr t);
extern RecordValPtr record_constructor__CPP(ValVec vals, RecordTypePtr t);
// Same, but with a map when using a named constructor.
extern RecordValPtr record_constructor_map__CPP(std::vector<ValPtr> vals, std::vector<int> map, RecordTypePtr t);
extern RecordValPtr record_constructor_map__CPP(ValVec vals, IntVec map, RecordTypePtr t);
// Constructs a vector of the given type, populated with the given values.
extern VectorValPtr vector_constructor__CPP(std::vector<ValPtr> vals, VectorTypePtr t);
extern VectorValPtr vector_constructor__CPP(ValVec vals, VectorTypePtr t);
// For patterns, executes p1 += p2.
inline PatternValPtr re_append__CPP(const PatternValPtr& p1, const PatternValPtr& p2) {
@ -234,7 +251,7 @@ inline PatternValPtr re_append__CPP(const PatternValPtr& p1, const PatternValPtr
// Schedules an event to occur at the given absolute time, parameterized
// with the given set of values. A separate function to facilitate avoiding
// the scheduling if Zeek is terminating.
extern ValPtr schedule__CPP(double dt, EventHandlerPtr event, std::vector<ValPtr> args);
extern ValPtr schedule__CPP(double dt, EventHandlerPtr event, ValVec args);
// Simple helper functions for supporting absolute value.
inline zeek_uint_t iabs__CPP(zeek_int_t v) { return v < 0 ? -v : v; }

View file

@ -109,7 +109,7 @@ VEC_OP1(comp, ~, )
}
// Analogous to VEC_OP1, instantiates a function for a given binary operation,
// with customimzable kernels for "int" and "double" operations.
// with customizable kernels for "int" and "double" operations.
// This version is for operations whose result type is the same as the
// operand type.
#define VEC_OP2(name, op, int_kernel, double_kernel, zero_check, is_bool) \

View file

@ -305,15 +305,22 @@ void CPPCompile::GenValueSwitchStmt(const Expr* e, const case_list* cases) {
void CPPCompile::GenWhenStmt(const WhenStmt* w) {
auto wi = w->Info();
auto wl = wi->Lambda();
if ( ! wl )
reporter->FatalError("cannot compile deprecated \"when\" statement");
vector<string> local_aggrs;
for ( auto& l : wi->WhenExprLocals() )
if ( IsAggr(l->GetType()) )
local_aggrs.push_back(IDNameStr(l.get()));
auto when_lambda = GenExpr(wi->Lambda(), GEN_NATIVE);
GenWhenStmt(wi.get(), when_lambda, w->GetLocationInfo(), std::move(local_aggrs));
}
void CPPCompile::GenWhenStmt(const WhenInfo* wi, const std::string& when_lambda, const Location* loc,
vector<string> local_aggrs) {
auto is_return = wi->IsReturn() ? "true" : "false";
auto timeout = wi->TimeoutExpr();
auto timeout_val = timeout ? GenExpr(timeout, GEN_NATIVE) : "-1.0";
auto loc = w->GetLocationInfo();
Emit("{ // begin a new scope for internal variables");
@ -331,17 +338,18 @@ void CPPCompile::GenWhenStmt(const WhenStmt* w) {
NL();
Emit("std::vector<ValPtr> CPP__local_aggrs;");
for ( auto& l : wi->WhenExprLocals() )
if ( IsAggr(l->GetType()) )
Emit("CPP__local_aggrs.emplace_back(%s);", IDNameStr(l.get()));
for ( auto& la : local_aggrs )
Emit("CPP__local_aggrs.emplace_back(%s);", la);
Emit("CPP__wi->Instantiate(%s);", GenExpr(wi->Lambda(), GEN_NATIVE));
Emit("CPP__wi->Instantiate(%s);", when_lambda);
// We need a new frame for the trigger to unambiguously associate
// with, in case we're called multiple times with our existing frame.
Emit("auto new_frame = make_intrusive<Frame>(0, nullptr, nullptr);");
Emit("auto curr_t = f__CPP->GetTrigger();");
Emit("auto curr_assoc = f__CPP->GetTriggerAssoc();");
if ( ! ret_type || ret_type->Tag() == TYPE_VOID )
Emit("// Note, the following works even if curr_t is nil.");
Emit("new_frame->SetTrigger({NewRef{}, curr_t});");
Emit("new_frame->SetTriggerAssoc(curr_assoc);");
@ -352,7 +360,7 @@ void CPPCompile::GenWhenStmt(const WhenStmt* w) {
if ( ret_type && ret_type->Tag() != TYPE_VOID ) {
// Note, ret_type can be active but we *still* don't have
// a return type, due to the faked-up "any" return type
// a return value, due to the faked-up "any" return type
// associated with "when" lambdas, so check for that case.
Emit("if ( curr_t )");
StartBlock();

View file

@ -17,6 +17,16 @@ string Fmt(double d) {
if ( d == 0.0 && signbit(d) )
return "-0.0";
if ( isinf(d) ) {
string infty = "std::numeric_limits<double>::infinity()";
if ( d < 0.0 )
infty = "-" + infty;
return infty;
}
if ( isnan(d) )
return "std::numeric_limits<double>::quiet_NaN()";
// Unfortunately, to_string(double) is hardwired to use %f with
// default of 6 digits precision.
char buf[8192];

View file

@ -111,12 +111,32 @@ string CPPCompile::LocalName(const ID* l) const {
auto n = l->Name();
auto without_module = strstr(n, "::");
if ( without_module )
return Canonicalize(without_module + 2);
else
while ( without_module ) {
n = without_module + 2;
without_module = strstr(n, "::");
}
return Canonicalize(n);
}
string CPPCompile::CaptureName(const ID* l) const {
// We want to strip both the module and any inlining appendage.
auto n = l->Name();
auto without_module = strstr(n, "::");
while ( without_module ) {
n = without_module + 2;
without_module = strstr(n, "::");
}
auto appendage = strchr(n, '.');
if ( appendage )
return string(n, appendage - n) + "_";
return string(n) + "_";
}
string CPPCompile::Canonicalize(const char* name) const {
string cname;
@ -127,7 +147,7 @@ string CPPCompile::Canonicalize(const char* name) const {
if ( c == '<' || c == '>' )
continue;
if ( c == ':' || c == '-' )
if ( c == ':' || c == '-' || c == '.' )
c = '_';
cname += c;

View file

@ -115,6 +115,9 @@ bool Expr::IsReducedConditional(Reducer* c) const {
return NonReduced(this);
if ( op1->Tag() == EXPR_LIST ) {
if ( ! op1->IsReduced(c) )
return NonReduced(this);
auto l1 = op1->AsListExpr();
auto& l1_e = l1->Exprs();
@ -472,6 +475,7 @@ ExprPtr UnaryExpr::Reduce(Reducer* c, StmtPtr& red_stmt) {
auto op_val = op->FoldVal();
if ( op_val ) {
auto fold = Fold(op_val.get());
if ( fold->GetType()->Tag() != TYPE_OPAQUE )
return TransformMe(make_intrusive<ConstExpr>(fold), c, red_stmt);
}
@ -520,6 +524,7 @@ ExprPtr BinaryExpr::Reduce(Reducer* c, StmtPtr& red_stmt) {
auto op2_fold_val = op2->FoldVal();
if ( op1_fold_val && op2_fold_val ) {
auto fold = Fold(op1_fold_val.get(), op2_fold_val.get());
if ( fold->GetType()->Tag() != TYPE_OPAQUE )
return TransformMe(make_intrusive<ConstExpr>(fold), c, red_stmt);
}

View file

@ -401,7 +401,6 @@ static void use_CPP() {
++num_used;
auto b = s->second.body;
b->SetHash(hash);
// We may have already updated the body if
// we're using code compiled for standalone.
@ -532,6 +531,9 @@ static void analyze_scripts_for_ZAM() {
}
void clear_script_analysis() {
if ( analysis_options.gen_CPP )
return;
IDOptInfo::ClearGlobalInitExprs();
// We need to explicitly clear out the optimization information

View file

@ -1,4 +1,5 @@
# @TEST-DOC: Verify cluster_started() is not rebroadcasted if the manager restarts.
# @TEST-REQUIRES: test "${ZEEK_USE_CPP}" != "1"
# @TEST-PORT: SUPERVISOR_PORT
# @TEST-PORT: MANAGER_PORT
# @TEST-PORT: PROXY_PORT

View file

@ -1,4 +1,5 @@
# @TEST-DOC: Verify cluster_started() is not rebroadcasted if a worker restarts.
# @TEST-REQUIRES: test "${ZEEK_USE_CPP}" != "1"
# @TEST-PORT: SUPERVISOR_PORT
# @TEST-PORT: MANAGER_PORT
# @TEST-PORT: PROXY_PORT