Merge remote-tracking branch 'origin/topic/vern/Feb23-C++-maint'

* origin/topic/vern/Feb23-C++-maint:
  added to C++ script compiler maintainer notes utility of starting with full base script compile
  fixes for order-of-initialization in scripts compiled to C++ annotations of such initializations to tie them to the original Zeek script
  Fixed bad memory access in compiled-to-C++ scripts when initializing attributes
This commit is contained in:
Arne Welzel 2023-02-15 13:30:29 +01:00
commit 8e2d68ffec
6 changed files with 95 additions and 34 deletions

View file

@ -1,3 +1,10 @@
6.0.0-dev.45 | 2023-02-15 13:30:29 +0100
* added to C++ script compiler maintainer notes utility of starting with full base script compile (Vern Paxson, Corelight)
* fixes for order-of-initialization in scripts compiled to C++ (Vern Paxson, Corelight)
annotations of such initializations to tie them to the original Zeek script
6.0.0-dev.41 | 2023-02-14 13:18:11 -0800
* Bump external zeek-testing commit hash to bring it up to master (Christian Kreibich, Corelight)

View file

@ -1 +1 @@
6.0.0-dev.41
6.0.0-dev.45

View file

@ -18,14 +18,14 @@ string CPP_InitsInfo::Name(int index) const
void CPP_InitsInfo::AddInstance(shared_ptr<CPP_InitInfo> g)
{
auto init_cohort = g->InitCohort();
auto final_init_cohort = g->FinalInitCohort();
if ( static_cast<int>(instances.size()) <= init_cohort )
instances.resize(init_cohort + 1);
if ( static_cast<int>(instances.size()) <= final_init_cohort )
instances.resize(final_init_cohort + 1);
g->SetOffset(this, size++);
instances[init_cohort].push_back(move(g));
instances[final_init_cohort].push_back(move(g));
}
string CPP_InitsInfo::Declare() const
@ -47,9 +47,14 @@ void CPP_InitsInfo::GenerateInitializers(CPPCompile* c)
c->IndentUp();
c->Emit("{");
int n = 0;
// Add each cohort as a vector element.
for ( auto& cohort : instances )
{
if ( ++n > 1 )
c->Emit("");
c->Emit("{");
BuildCohort(c, cohort);
c->Emit("},");
@ -83,11 +88,17 @@ void CPP_InitsInfo::BuildOffsetSet(CPPCompile* c)
void CPP_InitsInfo::BuildCohort(CPPCompile* c, std::vector<std::shared_ptr<CPP_InitInfo>>& cohort)
{
int n = 0;
for ( auto& co : cohort )
{
vector<string> ivs;
auto o = co->InitObj();
if ( o )
c->Emit("/* #%s: Initializing %s: */", Fmt(co->Offset()), obj_desc(o));
co->InitializerVals(ivs);
BuildCohortElement(c, co->InitializerType(), ivs);
++n;
}
}
@ -137,7 +148,7 @@ string CPP_InitInfo::ValElem(CPPCompile* c, ValPtr v)
return Fmt(-1);
}
DescConstInfo::DescConstInfo(CPPCompile* c, ValPtr v) : CPP_InitInfo()
DescConstInfo::DescConstInfo(CPPCompile* c, ValPtr v) : CPP_InitInfo(v)
{
ODesc d;
v->Describe(&d);
@ -145,7 +156,7 @@ DescConstInfo::DescConstInfo(CPPCompile* c, ValPtr v) : CPP_InitInfo()
init = Fmt(s);
}
EnumConstInfo::EnumConstInfo(CPPCompile* c, ValPtr v)
EnumConstInfo::EnumConstInfo(CPPCompile* c, ValPtr v) : CPP_InitInfo(v)
{
auto ev = v->AsEnumVal();
auto& ev_t = ev->GetType();
@ -154,7 +165,7 @@ EnumConstInfo::EnumConstInfo(CPPCompile* c, ValPtr v)
e_val = v->AsEnum();
}
StringConstInfo::StringConstInfo(CPPCompile* c, ValPtr v) : CPP_InitInfo()
StringConstInfo::StringConstInfo(CPPCompile* c, ValPtr v) : CPP_InitInfo(v)
{
auto s = v->AsString();
const char* b = (const char*)(s->Bytes());
@ -163,7 +174,7 @@ StringConstInfo::StringConstInfo(CPPCompile* c, ValPtr v) : CPP_InitInfo()
chars = c->TrackString(CPPEscape(b, len));
}
PatternConstInfo::PatternConstInfo(CPPCompile* c, ValPtr v) : CPP_InitInfo()
PatternConstInfo::PatternConstInfo(CPPCompile* c, ValPtr v) : CPP_InitInfo(v)
{
auto re = v->AsPatternVal()->Get();
pattern = c->TrackString(CPPEscape(re->OrigText()));
@ -171,7 +182,7 @@ PatternConstInfo::PatternConstInfo(CPPCompile* c, ValPtr v) : CPP_InitInfo()
is_single_line = re->IsSingleLine();
}
CompoundItemInfo::CompoundItemInfo(CPPCompile* _c, ValPtr v) : CPP_InitInfo(), c(_c)
CompoundItemInfo::CompoundItemInfo(CPPCompile* _c, ValPtr v) : CPP_InitInfo(v), c(_c)
{
auto& t = v->GetType();
type = c->TypeOffset(t);
@ -330,7 +341,7 @@ AttrsInfo::AttrsInfo(CPPCompile* _c, const AttributesPtr& _attrs) : CompoundItem
}
GlobalInitInfo::GlobalInitInfo(CPPCompile* c, const ID* g, string _CPP_name)
: CPP_InitInfo(), CPP_name(move(_CPP_name))
: CPP_InitInfo(g), CPP_name(move(_CPP_name))
{
Zeek_name = g->Name();
@ -369,7 +380,7 @@ void GlobalInitInfo::InitializerVals(std::vector<std::string>& ivs) const
}
CallExprInitInfo::CallExprInitInfo(CPPCompile* c, ExprPtr _e, string _e_name, string _wrapper_class)
: e(move(_e)), e_name(move(_e_name)), wrapper_class(move(_wrapper_class))
: CPP_InitInfo(_e), e(move(_e)), e_name(move(_e_name)), wrapper_class(move(_wrapper_class))
{
auto gi = c->RegisterType(e->GetType());
init_cohort = max(init_cohort, gi->InitCohort() + 1);
@ -378,7 +389,8 @@ CallExprInitInfo::CallExprInitInfo(CPPCompile* c, ExprPtr _e, string _e_name, st
LambdaRegistrationInfo::LambdaRegistrationInfo(CPPCompile* c, string _name, FuncTypePtr ft,
string _wrapper_class, p_hash_type _h,
bool _has_captures)
: name(move(_name)), wrapper_class(move(_wrapper_class)), h(_h), has_captures(_has_captures)
: CPP_InitInfo(ft), name(move(_name)), wrapper_class(move(_wrapper_class)), h(_h),
has_captures(_has_captures)
{
auto gi = c->RegisterType(ft);
init_cohort = max(init_cohort, gi->InitCohort() + 1);
@ -440,22 +452,27 @@ void VectorTypeInfo::AddInitializerVals(std::vector<std::string>& ivs) const
ListTypeInfo::ListTypeInfo(CPPCompile* _c, TypePtr _t)
: AbstractTypeInfo(_c, move(_t)), types(t->AsTypeList()->GetTypes())
{
// Note, we leave init_cohort at 0 because the skeleton of this type
// is built in the first cohort.
for ( auto& tl_i : types )
{
auto gi = c->RegisterType(tl_i);
if ( gi )
init_cohort = max(init_cohort, gi->InitCohort());
final_init_cohort = max(final_init_cohort, gi->InitCohort());
}
if ( ! types.empty() )
++init_cohort;
++final_init_cohort;
}
void ListTypeInfo::AddInitializerVals(std::vector<std::string>& ivs) const
{
string type_list;
for ( auto& t : types )
ivs.emplace_back(Fmt(c->TypeOffset(t)));
{
auto iv = Fmt(c->TypeOffset(t));
ivs.emplace_back(iv);
}
}
TableTypeInfo::TableTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c, move(_t))
@ -512,6 +529,8 @@ void FuncTypeInfo::AddInitializerVals(std::vector<std::string>& ivs) const
RecordTypeInfo::RecordTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c, move(_t))
{
// Note, we leave init_cohort at 0 because the skeleton of this type
// is built in the first cohort.
auto r = t->AsRecordType()->Types();
if ( ! r )
@ -523,7 +542,7 @@ RecordTypeInfo::RecordTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c
auto gi = c->RegisterType(r_i->type);
if ( gi )
init_cohort = max(init_cohort, gi->InitCohort());
final_init_cohort = max(final_init_cohort, gi->InitCohort());
// else it's a recursive type, no need to adjust cohort here
field_types.push_back(r_i->type);
@ -531,7 +550,7 @@ RecordTypeInfo::RecordTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c
if ( r_i->attrs )
{
gi = c->RegisterAttributes(r_i->attrs);
init_cohort = max(init_cohort, gi->InitCohort() + 1);
final_init_cohort = max(final_init_cohort, gi->InitCohort() + 1);
field_attrs.push_back(gi->Offset());
}
else

View file

@ -116,7 +116,7 @@ public:
// The largest initialization cohort of any item in this collection.
int MaxCohort() const { return static_cast<int>(instances.size()) - 1; }
// Returns the number of initializations in this collection that below
// Returns the number of initializations in this collection that belong
// to the given cohort c.
int CohortSize(int c) const { return c > MaxCohort() ? 0 : instances[c].size(); }
@ -258,9 +258,8 @@ public:
class CPP_InitInfo
{
public:
// No constructor - basic initialization happens when the object is
// added via AddInstance() to a CPP_InitsInfo object, which in turn
// will lead to invocation of this object's SetOffset() method.
CPP_InitInfo(const IntrusivePtr<Obj>& _o) : o(_o.get()) { }
CPP_InitInfo(const Obj* _o) : o(_o) { }
virtual ~CPP_InitInfo() { }
@ -282,6 +281,10 @@ public:
// Returns this item's initialization cohort.
int InitCohort() const { return init_cohort; }
// Returns this item's "final" initialization cohort. See
// discussion below.
int FinalInitCohort() const { return final_init_cohort ? final_init_cohort : init_cohort; }
// Returns the type used for this initializer.
virtual std::string InitializerType() const { return "<shouldn't-be-used>"; }
@ -289,6 +292,8 @@ public:
// constructor parameter.
virtual void InitializerVals(std::vector<std::string>& ivs) const = 0;
const Obj* InitObj() const { return o; }
protected:
// Returns an offset (into the run-time vector holding all Zeek
// constant values) corresponding to the given value. Registers
@ -300,18 +305,27 @@ protected:
// value in their constructors.
int init_cohort = 0;
// Some initializers (record and list types, in particular) become
// available for other initializers to use them after the first
// cohort is initialized; however, the final initialization comes
// later. If non-zero, this variable tracks the latter.
int final_init_cohort = 0;
// Tracks the collection to which this item belongs.
const CPP_InitsInfo* inits_collection = nullptr;
// Offset of this item in the collection, or -1 if no association.
int offset = -1;
// Associated object. Used for annotating output.
const Obj* o;
};
// Information associated with initializing a basic (non-compound) constant.
class BasicConstInfo : public CPP_InitInfo
{
public:
BasicConstInfo(std::string _val) : val(std::move(_val)) { }
BasicConstInfo(std::string _val) : CPP_InitInfo(nullptr), val(std::move(_val)) { }
void InitializerVals(std::vector<std::string>& ivs) const override { ivs.emplace_back(val); }
@ -386,7 +400,10 @@ private:
class PortConstInfo : public CPP_InitInfo
{
public:
PortConstInfo(ValPtr v) : p(static_cast<UnsignedValImplementation*>(v->AsPortVal())->Get()) { }
PortConstInfo(ValPtr v)
: CPP_InitInfo(v), p(static_cast<UnsignedValImplementation*>(v->AsPortVal())->Get())
{
}
void InitializerVals(std::vector<std::string>& ivs) const override
{
@ -404,7 +421,7 @@ public:
// The first of these is used for items with custom Zeek types,
// the second when the type is generic/inapplicable.
CompoundItemInfo(CPPCompile* c, ValPtr v);
CompoundItemInfo(CPPCompile* _c) : c(_c) { type = -1; }
CompoundItemInfo(CPPCompile* _c) : CPP_InitInfo(nullptr), c(_c) { type = -1; }
void InitializerVals(std::vector<std::string>& ivs) const override
{
@ -545,7 +562,7 @@ protected:
class AbstractTypeInfo : public CPP_InitInfo
{
public:
AbstractTypeInfo(CPPCompile* _c, TypePtr _t) : c(_c), t(std::move(_t)) { }
AbstractTypeInfo(CPPCompile* _c, TypePtr _t) : CPP_InitInfo(_t), c(_c), t(std::move(_t)) { }
void InitializerVals(std::vector<std::string>& ivs) const override
{

View file

@ -170,12 +170,20 @@ void CPP_IndexedInits<T>::Generate(InitsManager* im, std::vector<AttrPtr>& ivec,
auto tag = static_cast<AttrTag>(init_vals[0]);
auto ae_tag = static_cast<AttrExprType>(init_vals[1]);
if ( ae_tag == AE_NONE )
{
ivec[offset] = make_intrusive<Attr>(tag);
return;
}
ExprPtr e;
auto e_arg = init_vals[2];
switch ( ae_tag )
{
case AE_NONE:
// Shouldn't happen, per test above.
ASSERT(0);
break;
case AE_CONST:

View file

@ -15,34 +15,44 @@ The maintenance workflow:
to check in updates to the list of how the compiler currently fares
on various btests (see end of this doc):
Thu Jan 12 14:05:26 PST 2023
Tue Feb 14 15:15:27 PST 2023
2. Run "find-test-files.sh" to generate a list (to stdout) of all of the
2. Make sure the compiler can compile and execute the base scripts:
echo | src/zeek -O gen-C++
ninja
src/zeek -O use-C++ -r some.pcap
Do this first because if it can't, you'll be making changes to the
compiler that you'll want to subsequent run against the test suite,
per the following.
3. Run "find-test-files.sh" to generate a list (to stdout) of all of the
possible Zeek source files found in the test suite.
3. For each such Zeek file, run "check-zeek.sh" to see whether Zeek can
4. For each such Zeek file, run "check-zeek.sh" to see whether Zeek can
parse it. This helps remove from further consideration difficult
tests (like those that have embedded input files, or multiple separate
scripts).
4. "mkdir CPP-test" - a directory for holding results relating to C++ testing
5. "mkdir CPP-test" - a directory for holding results relating to C++ testing
5. Run "check-CPP-gen.sh" for each Zeek file that passed "check-zeek.sh".
6. Run "check-CPP-gen.sh" for each Zeek file that passed "check-zeek.sh".
This will generate a corresponding file in CPP-test/out* indicating whether
"-O gen-C++" can successfully run on the input. Presently, it should
be able to do so for all of them, other than some exceptions noted below.
This step is parallelizable, say using xargs -P 10.
6. Copy ./src/zeek to ./zeek.HOLD. This is used to speed up recompilation used
7. Copy ./src/zeek to ./zeek.HOLD. This is used to speed up recompilation used
in the next step. However, it's also a headache to do development to
fix a bug and then forget to update zeek.HOLD, which means you wind up
running the old version. You can combat that by removing ./zeek.HOLD
every time you start working on fixing a bug.
7. Use the appended database to remove inputs that have known issues.
8. Use the appended database to remove inputs that have known issues.
8. For every input that survives that pruning, run "do-CPP-btest.sh".
9. For every input that survives that pruning, run "do-CPP-btest.sh".
This will generate C++ for the BTest, compile it, and run the result
to see if it succeeds. It populates CPP-test/diag* with the Btest
diagnostic output (empty means success). For non-empty output,