From 0ca2f9a8b27e5add83476d42db12adc135de0c6b Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 13 Aug 2024 14:43:17 -0700 Subject: [PATCH] speedups for compilation of initializers in -O gen-C++ generated code --- src/script_opt/CPP/Compile.h | 2 + src/script_opt/CPP/Inits.cc | 2 +- src/script_opt/CPP/InitsInfo.cc | 89 +++++++++++++++++++++++------- src/script_opt/CPP/InitsInfo.h | 19 +++++-- src/script_opt/CPP/RuntimeInits.cc | 20 ++++++- src/script_opt/CPP/RuntimeInits.h | 57 ++++++++++++------- 6 files changed, 139 insertions(+), 50 deletions(-) diff --git a/src/script_opt/CPP/Compile.h b/src/script_opt/CPP/Compile.h index e825b1a0a9..975956d79d 100644 --- a/src/script_opt/CPP/Compile.h +++ b/src/script_opt/CPP/Compile.h @@ -143,6 +143,7 @@ public: // cohort associated with a given type. int TypeOffset(const TypePtr& t) { return GI_Offset(RegisterType(t)); } int TypeCohort(const TypePtr& t) { return GI_Cohort(RegisterType(t)); } + int TypeFinalCohort(const TypePtr& t) { return GI_FinalCohort(RegisterType(t)); } // Tracks a Zeek ValPtr used as a constant value. These occur // in two contexts: directly as constant expressions, and indirectly @@ -963,6 +964,7 @@ private: // associated with an initialization. int GI_Offset(const std::shared_ptr& gi) const { return gi ? gi->Offset() : -1; } int GI_Cohort(const std::shared_ptr& gi) const { return gi ? gi->InitCohort() : 0; } + int GI_FinalCohort(const std::shared_ptr& gi) const { return gi ? gi->FinalInitCohort() : 0; } // Generate code to initialize the mappings for record field // offsets for field accesses into regions of records that diff --git a/src/script_opt/CPP/Inits.cc b/src/script_opt/CPP/Inits.cc index 4e7a30b145..ba30011745 100644 --- a/src/script_opt/CPP/Inits.cc +++ b/src/script_opt/CPP/Inits.cc @@ -166,7 +166,7 @@ void CPPCompile::InitializeConsts() { StartBlock(); for ( const auto& c : consts ) - Emit("CPP_ValElem(%s, %s),", TypeTagName(c.first), Fmt(c.second)); + Emit("{%s, %s},", TypeTagName(c.first), Fmt(c.second)); EndBlock(true); } diff --git a/src/script_opt/CPP/InitsInfo.cc b/src/script_opt/CPP/InitsInfo.cc index b4c03bf541..848c38e15b 100644 --- a/src/script_opt/CPP/InitsInfo.cc +++ b/src/script_opt/CPP/InitsInfo.cc @@ -7,6 +7,7 @@ #include "zeek/ZeekString.h" #include "zeek/script_opt/CPP/Attrs.h" #include "zeek/script_opt/CPP/Compile.h" +#include "zeek/script_opt/CPP/RuntimeInits.h" using namespace std; @@ -38,6 +39,13 @@ void CPP_InitsInfo::GenerateInitializers(CPPCompile* c) { c->Emit("%s %s = %s(%s, %s,", gt, InitializersName(), gt, base_name, Fmt(offset_set)); c->IndentUp(); + GenerateCohorts(c); + c->IndentDown(); + + c->Emit(");"); +} + +void CPP_InitsInfo::GenerateCohorts(CPPCompile* c) { c->Emit("{"); int n = 0; @@ -47,7 +55,7 @@ void CPP_InitsInfo::GenerateInitializers(CPPCompile* c) { if ( ++n > 1 ) c->Emit(""); - if ( cohort.size() == 1 && ! IsCompound() ) + if ( cohort.size() == 1 && ! UsesCompoundVectors() ) BuildCohort(c, cohort); else { c->Emit("{"); @@ -57,8 +65,6 @@ void CPP_InitsInfo::GenerateInitializers(CPPCompile* c) { } c->Emit("}"); - c->IndentDown(); - c->Emit(");"); } void CPP_InitsInfo::BuildOffsetSet(CPPCompile* c) { @@ -80,25 +86,25 @@ void CPP_InitsInfo::BuildOffsetSet(CPPCompile* c) { offset_set = c->IndMgr().AddIndices(offsets_vec); } -void CPP_InitsInfo::BuildCohort(CPPCompile* c, std::vector>& cohort) { - int n = 0; +static std::string describe_initializer(const Obj* o) { + auto od = obj_desc(o); + // Escape any embedded comment characters. + od = regex_replace(od, std::regex("/\\*"), "<>"); + od = regex_replace(od, std::regex("\\*/"), "<>"); + + return od; +} + +void CPP_InitsInfo::BuildCohort(CPPCompile* c, std::vector>& cohort) { for ( auto& co : cohort ) { vector ivs; auto o = co->InitObj(); - if ( o ) { - auto od = obj_desc(o); - - // Escape any embedded comment characters. - od = regex_replace(od, std::regex("/\\*"), "<>"); - od = regex_replace(od, std::regex("\\*/"), "<>"); - - c->Emit("/* #%s: Initializing %s: */", Fmt(co->Offset()), od); - } + if ( o ) + c->Emit("/* #%s: Initializing %s: */", Fmt(co->Offset()), describe_initializer(o)); co->InitializerVals(ivs); BuildCohortElement(c, co->InitializerType(), ivs); - ++n; } } @@ -117,12 +123,50 @@ void CPP_InitsInfo::BuildCohortElement(CPPCompile* c, string init_type, vectorEmit("std::make_shared<%s>(%s),", init_type, full_init); } +void CPP_CompoundInitsInfo::GenerateInitializers(CPPCompile* c) { + c->Emit(""); + c->Emit("static int %s_init[] = {", tag); + int n = 0; + + c->IndentUp(); + + for ( auto& cohort : instances ) { + if ( ++n > 1 ) + c->Emit(""); + + // Figure out the size of the cohort. + for ( auto& co : cohort ) { + auto o = co->InitObj(); + if ( o ) + c->Emit("/* #%s: Initializing %s: */", Fmt(co->Offset()), describe_initializer(o)); + + vector ivs; + co->InitializerVals(ivs); + c->Emit(Fmt(int(ivs.size())) + ","); + BuildCohortElement(c, co->InitializerType(), ivs); + } + + static const auto end_of_vv = Fmt(END_OF_VEC_VEC) + ","; + c->Emit(end_of_vv); + } + + static const auto end_of_vvv = Fmt(END_OF_VEC_VEC_VEC) + ","; + c->Emit(end_of_vvv); + + c->IndentDown(); + c->Emit("};"); + + CPP_InitsInfo::GenerateInitializers(c); +} + +void CPP_CompoundInitsInfo::GenerateCohorts(CPPCompile* c) { c->Emit("%s_init", tag); } + void CPP_CompoundInitsInfo::BuildCohortElement(CPPCompile* c, string init_type, vector& ivs) { string init_line; for ( auto& iv : ivs ) - init_line += iv + ", "; + init_line += iv + ","; - c->Emit("{ %s},", init_line); + c->Emit("%s", init_line); } void CPP_BasicConstInitsInfo::BuildCohortElement(CPPCompile* c, string init_type, vector& ivs) { @@ -174,7 +218,7 @@ PatternConstInfo::PatternConstInfo(CPPCompile* c, ValPtr v) : CPP_InitInfo(v) { CompoundItemInfo::CompoundItemInfo(CPPCompile* _c, ValPtr v) : CPP_InitInfo(v), c(_c) { auto& t = v->GetType(); type = c->TypeOffset(t); - init_cohort = c->TypeCohort(t) + 1; + init_cohort = c->TypeFinalCohort(t) + 1; } ListConstInfo::ListConstInfo(CPPCompile* _c, ValPtr v) : CompoundItemInfo(_c) { @@ -400,7 +444,11 @@ void TypeTypeInfo::AddInitializerVals(std::vector& ivs) const { } VectorTypeInfo::VectorTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c, std::move(_t)) { - yield = t->Yield(); + auto vt = t->AsVectorType(); + if ( vt->IsUnspecifiedVector() ) + yield = base_type(TYPE_VOID); + else + yield = t->Yield(); auto gi = c->RegisterType(yield); if ( gi ) init_cohort = gi->InitCohort(); @@ -552,7 +600,8 @@ void IndicesManager::Generate(CPPCompile* c) { c->Emit(line); } - c->Emit("-1"); + static const auto end_of_vv = Fmt(END_OF_VEC_VEC); + c->Emit(end_of_vv); c->EndBlock(true); } diff --git a/src/script_opt/CPP/InitsInfo.h b/src/script_opt/CPP/InitsInfo.h index c2bba9fbd6..d2b097a304 100644 --- a/src/script_opt/CPP/InitsInfo.h +++ b/src/script_opt/CPP/InitsInfo.h @@ -133,10 +133,10 @@ public: // Sets the associated C++ type. virtual void SetCPPType(std::string ct) { CPP_type = std::move(ct); } - // Whether this initializer is in terms of compound objects. Used + // Whether this initializer is in terms of compound vectors. Used // for avoiding compiler warnings about singleton initializations in // braces. - virtual bool IsCompound() const { return false; } + virtual bool UsesCompoundVectors() const { return false; } // Returns the type associated with the table used for initialization // (i.e., this is the type of the global returned by InitializersName()). @@ -146,9 +146,11 @@ public: void AddInstance(std::shared_ptr g); // Emit code to populate the table used to initialize this collection. - void GenerateInitializers(CPPCompile* c); + virtual void GenerateInitializers(CPPCompile* c); protected: + virtual void GenerateCohorts(CPPCompile* c); + // Computes offset_set - see below. void BuildOffsetSet(CPPCompile* c); @@ -214,7 +216,7 @@ public: BuildInitType(); } - bool IsCompound() const override { return true; } + bool UsesCompoundVectors() const override { return true; } private: void BuildInitType() { inits_type = std::string("CPP_CustomInits<") + CPPType() + ">"; } @@ -236,7 +238,7 @@ public: inits_type = std::string("CPP_BasicConsts<") + CPP_type + ", " + c_type + ", " + tag + "Val>"; } - bool IsCompound() const override { return false; } + bool UsesCompoundVectors() const override { return false; } void BuildCohortElement(CPPCompile* c, std::string init_type, std::vector& ivs) override; }; @@ -254,7 +256,12 @@ public: inits_type = std::string("CPP_IndexedInits<") + CPPType() + ">"; } - bool IsCompound() const override { return true; } + // This isn't true (anymore) because we separately build up the compound + // vectors needed for the initialization. + bool UsesCompoundVectors() const override { return false; } + + void GenerateInitializers(CPPCompile* c) override; + void GenerateCohorts(CPPCompile* c) override; void BuildCohortElement(CPPCompile* c, std::string init_type, std::vector& ivs) override; }; diff --git a/src/script_opt/CPP/RuntimeInits.cc b/src/script_opt/CPP/RuntimeInits.cc index 80c2337f63..99837d08e4 100644 --- a/src/script_opt/CPP/RuntimeInits.cc +++ b/src/script_opt/CPP/RuntimeInits.cc @@ -465,12 +465,12 @@ void CPP_GlobalInit::Generate(InitsManager* im, std::vector& /* inits_vec global->SetAttrs(im->Attributes(attrs)); } -void generate_indices_set(int* inits, std::vector>& indices_set) { +size_t generate_indices_set(int* inits, std::vector>& indices_set) { // First figure out how many groups of indices there are, so we // can pre-allocate the outer vector. auto i_ptr = inits; int num_inits = 0; - while ( *i_ptr >= 0 ) { + while ( *i_ptr != END_OF_VEC_VEC && *i_ptr != END_OF_VEC_VEC_VEC ) { ++num_inits; int n = *i_ptr; i_ptr += n + 1; // skip over vector elements @@ -479,7 +479,7 @@ void generate_indices_set(int* inits, std::vector>& indices_set indices_set.reserve(num_inits); i_ptr = inits; - while ( *i_ptr >= 0 ) { + while ( *i_ptr != END_OF_VEC_VEC ) { int n = *i_ptr; ++i_ptr; std::vector indices; @@ -490,6 +490,20 @@ void generate_indices_set(int* inits, std::vector>& indices_set indices_set.emplace_back(std::move(indices)); } + + return i_ptr - inits + 1; +} + +std::vector>> generate_indices_set(int* inits) { + std::vector>> indices_set; + + while ( *inits != END_OF_VEC_VEC_VEC ) { + std::vector> cohort_inits; + inits += generate_indices_set(inits, cohort_inits); + indices_set.push_back(std::move(cohort_inits)); + } + + return indices_set; } } // namespace zeek::detail diff --git a/src/script_opt/CPP/RuntimeInits.h b/src/script_opt/CPP/RuntimeInits.h index fe718e8226..268d2ca250 100644 --- a/src/script_opt/CPP/RuntimeInits.h +++ b/src/script_opt/CPP/RuntimeInits.h @@ -19,6 +19,28 @@ using FuncValPtr = IntrusivePtr; class InitsManager; +// Helper function that takes a (large) array of int's and from them +// constructs the corresponding vector-of-vector-of-indices. Each +// vector-of-indices is represented first by an int specifying its +// size, and then that many int's for its values. We recognize the +// end of the array upon encountering a "size" entry of END_OF_VEC_VEC. +// +// Returns how many elements were processed out of "inits", including its +// terminator. +extern size_t generate_indices_set(int* inits, std::vector>& indices_set); + +// The same but for one more level of vector construction. The source array +// has sub-arrays terminated with END_OF_VEC_VEC per the above, and the whole +// shebang is terminated with END_OF_VEC_VEC_VEC. +// +// Returns the vector construction. +extern std::vector>> generate_indices_set(int* inits); + +// These need to be distinct from any values that can appear, which means +// they should be negative, and not -1, which is used as a "N/A" value. +#define END_OF_VEC_VEC -100 +#define END_OF_VEC_VEC_VEC -200 + // An abstract helper class used to access elements of an initialization vector. // We need the abstraction because InitsManager below needs to be able to refer // to any of a range of templated classes. @@ -29,7 +51,12 @@ public: }; // Convenient way to refer to an offset associated with a particular Zeek type. -using CPP_ValElem = std::pair; +// A "struct" rather than a std::pair because C++ compilers are terribly slow +// at initializing large numbers of the latter. +struct CPP_ValElem { + TypeTag tag; + int offset; +}; // This class groups together all of the vectors needed for run-time // initialization. We gather them together into a single object so as @@ -57,7 +84,7 @@ public: // index. ValPtr ConstVals(int offset) const { auto& cv = const_vals[offset]; - return Consts(cv.first, cv.second); + return Consts(cv.tag, cv.offset); } // Retrieves the Zeek constant value for a particular Zeek type. @@ -157,9 +184,6 @@ protected: // Pre-initialize all elements requiring it. virtual void DoPreInits(InitsManager* im, const std::vector& offsets_vec) {} - // Generate a single element. - virtual void GenerateElement(InitsManager* im, T2& init, int offset) {} - // The initialization vector in its entirety. std::vector& inits_vec; @@ -221,16 +245,16 @@ using ValElemVecVec = std::vector; template class CPP_IndexedInits : public CPP_AbstractInits { public: - CPP_IndexedInits(std::vector& _inits_vec, int _offsets_set, std::vector _inits) - : CPP_AbstractInits(_inits_vec, _offsets_set, std::move(_inits)) {} + CPP_IndexedInits(std::vector& _inits_vec, int _offsets_set, int* raw_inits) + : CPP_AbstractInits(_inits_vec, _offsets_set, generate_indices_set(raw_inits)) {} protected: void InitializeCohortWithOffsets(InitsManager* im, int cohort, const std::vector& cohort_offsets) override; - // Note, in the following we pass in the inits_vec, even though - // the method will have direct access to it, because we want to - // use overloading to dispatch to custom generation for different - // types of values. + // Note, in the following we pass in the inits_vec ("ivec"), even though + // the method will have direct access to it, because we want to use + // overloading to dispatch to custom generation for different types of + // values. void Generate(InitsManager* im, std::vector& ivec, int offset, ValElemVec& init_vals); void Generate(InitsManager* im, std::vector& ivec, int offset, ValElemVec& init_vals); void Generate(InitsManager* im, std::vector& ivec, int offset, ValElemVec& init_vals); @@ -254,8 +278,8 @@ protected: // on subclasses of TypePtr. class CPP_TypeInits : public CPP_IndexedInits { public: - CPP_TypeInits(std::vector& _inits_vec, int _offsets_set, std::vector> _inits) - : CPP_IndexedInits(_inits_vec, _offsets_set, _inits) {} + CPP_TypeInits(std::vector& _inits_vec, int _offsets_set, int* raw_inits) + : CPP_IndexedInits(_inits_vec, _offsets_set, raw_inits) {} protected: void DoPreInits(InitsManager* im, const std::vector& offsets_vec) override; @@ -504,11 +528,4 @@ struct CPP_RegisterBody { std::vector events; }; -// Helper function that takes a (large) array of int's and from them -// constructs the corresponding vector-of-vector-of-indices. Each -// vector-of-indices is represented first by an int specifying its -// size, and then that many int's for its values. We recognize the -// end of the array upon encountering a "size" entry of -1. -extern void generate_indices_set(int* inits, std::vector>& indices_set); - } // namespace zeek::detail