From ce7e253dfd7c8a890469879c29c66f659f964a51 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 14 Feb 2023 15:18:10 -0800 Subject: [PATCH 1/3] Fixed bad memory access in compiled-to-C++ scripts when initializing attributes --- src/script_opt/CPP/RuntimeInits.cc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/script_opt/CPP/RuntimeInits.cc b/src/script_opt/CPP/RuntimeInits.cc index fe8f07ce9b..3d31177a68 100644 --- a/src/script_opt/CPP/RuntimeInits.cc +++ b/src/script_opt/CPP/RuntimeInits.cc @@ -170,12 +170,20 @@ void CPP_IndexedInits::Generate(InitsManager* im, std::vector& ivec, auto tag = static_cast(init_vals[0]); auto ae_tag = static_cast(init_vals[1]); + if ( ae_tag == AE_NONE ) + { + ivec[offset] = make_intrusive(tag); + return; + } + ExprPtr e; auto e_arg = init_vals[2]; switch ( ae_tag ) { case AE_NONE: + // Shouldn't happen, per test above. + ASSERT(0); break; case AE_CONST: From 3d0faa8ceaec63c89e8d20e7c85a0f0862d64107 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 14 Feb 2023 15:19:49 -0800 Subject: [PATCH 2/3] fixes for order-of-initialization in scripts compiled to C++ annotations of such initializations to tie them to the original Zeek script --- src/script_opt/CPP/InitsInfo.cc | 53 ++++++++++++++++++++++----------- src/script_opt/CPP/InitsInfo.h | 33 +++++++++++++++----- 2 files changed, 61 insertions(+), 25 deletions(-) diff --git a/src/script_opt/CPP/InitsInfo.cc b/src/script_opt/CPP/InitsInfo.cc index 26d2c4b90a..73d0c8275e 100644 --- a/src/script_opt/CPP/InitsInfo.cc +++ b/src/script_opt/CPP/InitsInfo.cc @@ -18,14 +18,14 @@ string CPP_InitsInfo::Name(int index) const void CPP_InitsInfo::AddInstance(shared_ptr g) { - auto init_cohort = g->InitCohort(); + auto final_init_cohort = g->FinalInitCohort(); - if ( static_cast(instances.size()) <= init_cohort ) - instances.resize(init_cohort + 1); + if ( static_cast(instances.size()) <= final_init_cohort ) + instances.resize(final_init_cohort + 1); g->SetOffset(this, size++); - instances[init_cohort].push_back(move(g)); + instances[final_init_cohort].push_back(move(g)); } string CPP_InitsInfo::Declare() const @@ -47,9 +47,14 @@ void CPP_InitsInfo::GenerateInitializers(CPPCompile* c) c->IndentUp(); c->Emit("{"); + int n = 0; + // Add each cohort as a vector element. for ( auto& cohort : instances ) { + if ( ++n > 1 ) + c->Emit(""); + c->Emit("{"); BuildCohort(c, cohort); c->Emit("},"); @@ -83,11 +88,17 @@ void CPP_InitsInfo::BuildOffsetSet(CPPCompile* c) void CPP_InitsInfo::BuildCohort(CPPCompile* c, std::vector>& cohort) { + int n = 0; + for ( auto& co : cohort ) { vector ivs; + auto o = co->InitObj(); + if ( o ) + c->Emit("/* #%s: Initializing %s: */", Fmt(co->Offset()), obj_desc(o)); co->InitializerVals(ivs); BuildCohortElement(c, co->InitializerType(), ivs); + ++n; } } @@ -137,7 +148,7 @@ string CPP_InitInfo::ValElem(CPPCompile* c, ValPtr v) return Fmt(-1); } -DescConstInfo::DescConstInfo(CPPCompile* c, ValPtr v) : CPP_InitInfo() +DescConstInfo::DescConstInfo(CPPCompile* c, ValPtr v) : CPP_InitInfo(v) { ODesc d; v->Describe(&d); @@ -145,7 +156,7 @@ DescConstInfo::DescConstInfo(CPPCompile* c, ValPtr v) : CPP_InitInfo() init = Fmt(s); } -EnumConstInfo::EnumConstInfo(CPPCompile* c, ValPtr v) +EnumConstInfo::EnumConstInfo(CPPCompile* c, ValPtr v) : CPP_InitInfo(v) { auto ev = v->AsEnumVal(); auto& ev_t = ev->GetType(); @@ -154,7 +165,7 @@ EnumConstInfo::EnumConstInfo(CPPCompile* c, ValPtr v) e_val = v->AsEnum(); } -StringConstInfo::StringConstInfo(CPPCompile* c, ValPtr v) : CPP_InitInfo() +StringConstInfo::StringConstInfo(CPPCompile* c, ValPtr v) : CPP_InitInfo(v) { auto s = v->AsString(); const char* b = (const char*)(s->Bytes()); @@ -163,7 +174,7 @@ StringConstInfo::StringConstInfo(CPPCompile* c, ValPtr v) : CPP_InitInfo() chars = c->TrackString(CPPEscape(b, len)); } -PatternConstInfo::PatternConstInfo(CPPCompile* c, ValPtr v) : CPP_InitInfo() +PatternConstInfo::PatternConstInfo(CPPCompile* c, ValPtr v) : CPP_InitInfo(v) { auto re = v->AsPatternVal()->Get(); pattern = c->TrackString(CPPEscape(re->OrigText())); @@ -171,7 +182,7 @@ PatternConstInfo::PatternConstInfo(CPPCompile* c, ValPtr v) : CPP_InitInfo() is_single_line = re->IsSingleLine(); } -CompoundItemInfo::CompoundItemInfo(CPPCompile* _c, ValPtr v) : CPP_InitInfo(), c(_c) +CompoundItemInfo::CompoundItemInfo(CPPCompile* _c, ValPtr v) : CPP_InitInfo(v), c(_c) { auto& t = v->GetType(); type = c->TypeOffset(t); @@ -330,7 +341,7 @@ AttrsInfo::AttrsInfo(CPPCompile* _c, const AttributesPtr& _attrs) : CompoundItem } GlobalInitInfo::GlobalInitInfo(CPPCompile* c, const ID* g, string _CPP_name) - : CPP_InitInfo(), CPP_name(move(_CPP_name)) + : CPP_InitInfo(g), CPP_name(move(_CPP_name)) { Zeek_name = g->Name(); @@ -369,7 +380,7 @@ void GlobalInitInfo::InitializerVals(std::vector& ivs) const } CallExprInitInfo::CallExprInitInfo(CPPCompile* c, ExprPtr _e, string _e_name, string _wrapper_class) - : e(move(_e)), e_name(move(_e_name)), wrapper_class(move(_wrapper_class)) + : CPP_InitInfo(_e), e(move(_e)), e_name(move(_e_name)), wrapper_class(move(_wrapper_class)) { auto gi = c->RegisterType(e->GetType()); init_cohort = max(init_cohort, gi->InitCohort() + 1); @@ -378,7 +389,8 @@ CallExprInitInfo::CallExprInitInfo(CPPCompile* c, ExprPtr _e, string _e_name, st LambdaRegistrationInfo::LambdaRegistrationInfo(CPPCompile* c, string _name, FuncTypePtr ft, string _wrapper_class, p_hash_type _h, bool _has_captures) - : name(move(_name)), wrapper_class(move(_wrapper_class)), h(_h), has_captures(_has_captures) + : CPP_InitInfo(ft), name(move(_name)), wrapper_class(move(_wrapper_class)), h(_h), + has_captures(_has_captures) { auto gi = c->RegisterType(ft); init_cohort = max(init_cohort, gi->InitCohort() + 1); @@ -440,22 +452,27 @@ void VectorTypeInfo::AddInitializerVals(std::vector& ivs) const ListTypeInfo::ListTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c, move(_t)), types(t->AsTypeList()->GetTypes()) { + // Note, we leave init_cohort at 0 because the skeleton of this type + // is built in the first cohort. for ( auto& tl_i : types ) { auto gi = c->RegisterType(tl_i); if ( gi ) - init_cohort = max(init_cohort, gi->InitCohort()); + final_init_cohort = max(final_init_cohort, gi->InitCohort()); } if ( ! types.empty() ) - ++init_cohort; + ++final_init_cohort; } void ListTypeInfo::AddInitializerVals(std::vector& ivs) const { string type_list; for ( auto& t : types ) - ivs.emplace_back(Fmt(c->TypeOffset(t))); + { + auto iv = Fmt(c->TypeOffset(t)); + ivs.emplace_back(iv); + } } TableTypeInfo::TableTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c, move(_t)) @@ -512,6 +529,8 @@ void FuncTypeInfo::AddInitializerVals(std::vector& ivs) const RecordTypeInfo::RecordTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c, move(_t)) { + // Note, we leave init_cohort at 0 because the skeleton of this type + // is built in the first cohort. auto r = t->AsRecordType()->Types(); if ( ! r ) @@ -523,7 +542,7 @@ RecordTypeInfo::RecordTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c auto gi = c->RegisterType(r_i->type); if ( gi ) - init_cohort = max(init_cohort, gi->InitCohort()); + final_init_cohort = max(final_init_cohort, gi->InitCohort()); // else it's a recursive type, no need to adjust cohort here field_types.push_back(r_i->type); @@ -531,7 +550,7 @@ RecordTypeInfo::RecordTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c if ( r_i->attrs ) { gi = c->RegisterAttributes(r_i->attrs); - init_cohort = max(init_cohort, gi->InitCohort() + 1); + final_init_cohort = max(final_init_cohort, gi->InitCohort() + 1); field_attrs.push_back(gi->Offset()); } else diff --git a/src/script_opt/CPP/InitsInfo.h b/src/script_opt/CPP/InitsInfo.h index 5d8b3d5dab..d50480fb20 100644 --- a/src/script_opt/CPP/InitsInfo.h +++ b/src/script_opt/CPP/InitsInfo.h @@ -116,7 +116,7 @@ public: // The largest initialization cohort of any item in this collection. int MaxCohort() const { return static_cast(instances.size()) - 1; } - // Returns the number of initializations in this collection that below + // Returns the number of initializations in this collection that belong // to the given cohort c. int CohortSize(int c) const { return c > MaxCohort() ? 0 : instances[c].size(); } @@ -258,9 +258,8 @@ public: class CPP_InitInfo { public: - // No constructor - basic initialization happens when the object is - // added via AddInstance() to a CPP_InitsInfo object, which in turn - // will lead to invocation of this object's SetOffset() method. + CPP_InitInfo(const IntrusivePtr& _o) : o(_o.get()) { } + CPP_InitInfo(const Obj* _o) : o(_o) { } virtual ~CPP_InitInfo() { } @@ -282,6 +281,10 @@ public: // Returns this item's initialization cohort. int InitCohort() const { return init_cohort; } + // Returns this item's "final" initialization cohort. See + // discussion below. + int FinalInitCohort() const { return final_init_cohort ? final_init_cohort : init_cohort; } + // Returns the type used for this initializer. virtual std::string InitializerType() const { return ""; } @@ -289,6 +292,8 @@ public: // constructor parameter. virtual void InitializerVals(std::vector& ivs) const = 0; + const Obj* InitObj() const { return o; } + protected: // Returns an offset (into the run-time vector holding all Zeek // constant values) corresponding to the given value. Registers @@ -300,18 +305,27 @@ protected: // value in their constructors. int init_cohort = 0; + // Some initializers (record and list types, in particular) become + // available for other initializers to use them after the first + // cohort is initialized; however, the final initialization comes + // later. If non-zero, this variable tracks the latter. + int final_init_cohort = 0; + // Tracks the collection to which this item belongs. const CPP_InitsInfo* inits_collection = nullptr; // Offset of this item in the collection, or -1 if no association. int offset = -1; + + // Associated object. Used for annotating output. + const Obj* o; }; // Information associated with initializing a basic (non-compound) constant. class BasicConstInfo : public CPP_InitInfo { public: - BasicConstInfo(std::string _val) : val(std::move(_val)) { } + BasicConstInfo(std::string _val) : CPP_InitInfo(nullptr), val(std::move(_val)) { } void InitializerVals(std::vector& ivs) const override { ivs.emplace_back(val); } @@ -386,7 +400,10 @@ private: class PortConstInfo : public CPP_InitInfo { public: - PortConstInfo(ValPtr v) : p(static_cast(v->AsPortVal())->Get()) { } + PortConstInfo(ValPtr v) + : CPP_InitInfo(v), p(static_cast(v->AsPortVal())->Get()) + { + } void InitializerVals(std::vector& ivs) const override { @@ -404,7 +421,7 @@ public: // The first of these is used for items with custom Zeek types, // the second when the type is generic/inapplicable. CompoundItemInfo(CPPCompile* c, ValPtr v); - CompoundItemInfo(CPPCompile* _c) : c(_c) { type = -1; } + CompoundItemInfo(CPPCompile* _c) : CPP_InitInfo(nullptr), c(_c) { type = -1; } void InitializerVals(std::vector& ivs) const override { @@ -545,7 +562,7 @@ protected: class AbstractTypeInfo : public CPP_InitInfo { public: - AbstractTypeInfo(CPPCompile* _c, TypePtr _t) : c(_c), t(std::move(_t)) { } + AbstractTypeInfo(CPPCompile* _c, TypePtr _t) : CPP_InitInfo(_t), c(_c), t(std::move(_t)) { } void InitializerVals(std::vector& ivs) const override { From 2f347bf7d422373284611d7a62f86584dae95d01 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 14 Feb 2023 15:20:56 -0800 Subject: [PATCH 3/3] added to C++ script compiler maintainer notes utility of starting with full base script compile --- src/script_opt/CPP/maint/README | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/src/script_opt/CPP/maint/README b/src/script_opt/CPP/maint/README index 72360997af..35a7e6e33e 100644 --- a/src/script_opt/CPP/maint/README +++ b/src/script_opt/CPP/maint/README @@ -15,34 +15,44 @@ The maintenance workflow: to check in updates to the list of how the compiler currently fares on various btests (see end of this doc): - Thu Jan 12 14:05:26 PST 2023 + Tue Feb 14 15:15:27 PST 2023 -2. Run "find-test-files.sh" to generate a list (to stdout) of all of the +2. Make sure the compiler can compile and execute the base scripts: + + echo | src/zeek -O gen-C++ + ninja + src/zeek -O use-C++ -r some.pcap + + Do this first because if it can't, you'll be making changes to the + compiler that you'll want to subsequent run against the test suite, + per the following. + +3. Run "find-test-files.sh" to generate a list (to stdout) of all of the possible Zeek source files found in the test suite. -3. For each such Zeek file, run "check-zeek.sh" to see whether Zeek can +4. For each such Zeek file, run "check-zeek.sh" to see whether Zeek can parse it. This helps remove from further consideration difficult tests (like those that have embedded input files, or multiple separate scripts). -4. "mkdir CPP-test" - a directory for holding results relating to C++ testing +5. "mkdir CPP-test" - a directory for holding results relating to C++ testing -5. Run "check-CPP-gen.sh" for each Zeek file that passed "check-zeek.sh". +6. Run "check-CPP-gen.sh" for each Zeek file that passed "check-zeek.sh". This will generate a corresponding file in CPP-test/out* indicating whether "-O gen-C++" can successfully run on the input. Presently, it should be able to do so for all of them, other than some exceptions noted below. This step is parallelizable, say using xargs -P 10. -6. Copy ./src/zeek to ./zeek.HOLD. This is used to speed up recompilation used +7. Copy ./src/zeek to ./zeek.HOLD. This is used to speed up recompilation used in the next step. However, it's also a headache to do development to fix a bug and then forget to update zeek.HOLD, which means you wind up running the old version. You can combat that by removing ./zeek.HOLD every time you start working on fixing a bug. -7. Use the appended database to remove inputs that have known issues. +8. Use the appended database to remove inputs that have known issues. -8. For every input that survives that pruning, run "do-CPP-btest.sh". +9. For every input that survives that pruning, run "do-CPP-btest.sh". This will generate C++ for the BTest, compile it, and run the result to see if it succeeds. It populates CPP-test/diag* with the Btest diagnostic output (empty means success). For non-empty output,