Merge remote-tracking branch 'origin/topic/vern/CPP-streamlining'

* origin/topic/vern/CPP-streamlining:
  BTest baseline updates for compile-to-C++
  mark ZAM regression BTests as not suitable for compile-to-C++
  fix for -O gen-C++ maintenance helper to skip BTest intermediary files
  introduced simplified initialization for non-standalone -O gen-C++ code tied -O gen-standalone-C++ to use of --optimize-files
  streamline generated -O C++ code by relying on per-function profiles rather than aggregate profile
  when reporting available/unavailble C++ script bodies, flag those that are skipped
  modified AST profiling to mark (and fully skip) non-optimizable functions
  modified merge_types() to skip work if given identical types, which also preserves type names (useful for -O gen-C++)
This commit is contained in:
Tim Wojtulewicz 2024-12-09 12:15:46 -07:00
commit 6e75417032
35 changed files with 334 additions and 143 deletions

View file

@ -29,6 +29,7 @@ CPPCompile::CPPCompile(vector<FuncInfo>& _funcs, std::shared_ptr<ProfileFuncs> _
CPPCompile::~CPPCompile() { fclose(write_file); }
void CPPCompile::Compile(bool report_uncompilable) {
unordered_set<const Type*> rep_types;
unordered_set<string> filenames_reported_as_skipped;
bool had_to_skip = false;
@ -63,6 +64,24 @@ void CPPCompile::Compile(bool report_uncompilable) {
continue;
}
auto pf = func.Profile();
total_hash = merge_p_hashes(total_hash, pf->HashVal());
for ( auto t : pf->UnorderedTypes() )
rep_types.insert(pfs->TypeRep(t));
auto& pf_all_gl = pf->AllGlobals();
all_accessed_globals.insert(pf_all_gl.begin(), pf_all_gl.end());
auto& pf_gl = pf->Globals();
accessed_globals.insert(pf_gl.begin(), pf_gl.end());
auto& pf_events = pf->Events();
accessed_events.insert(pf_events.begin(), pf_events.end());
auto& pf_lambdas = pf->Lambdas();
accessed_lambdas.insert(pf_lambdas.begin(), pf_lambdas.end());
if ( is_lambda(f) || is_when_lambda(f) ) {
// We deal with these separately.
func.SetSkip(true);
@ -85,35 +104,35 @@ void CPPCompile::Compile(bool report_uncompilable) {
}
}
if ( standalone && had_to_skip )
reporter->FatalError("aborting standalone compilation to C++ due to having to skip some functions");
// Generate a hash unique for this compilation.
for ( const auto& func : funcs )
if ( ! func.ShouldSkip() )
total_hash = merge_p_hashes(total_hash, func.Profile()->HashVal());
if ( standalone && had_to_skip )
reporter->FatalError("aborting standalone compilation to C++ due to having to skip some functions");
auto t = util::current_time();
total_hash = merge_p_hashes(total_hash, hash<double>{}(t));
GenProlog();
// Track all of the types we'll be using.
for ( const auto& t : pfs->RepTypes() ) {
for ( const auto& t : rep_types ) {
TypePtr tp{NewRef{}, (Type*)(t)};
types.AddKey(tp, pfs->HashType(t));
}
NL();
for ( auto& g : pfs->AllGlobals() )
for ( auto& g : all_accessed_globals )
CreateGlobal(g);
for ( const auto& e : pfs->Events() )
for ( const auto& e : accessed_events )
if ( AddGlobal(e, "gl") )
Emit("EventHandlerPtr %s_ev;", globals[string(e)]);
for ( const auto& t : pfs->RepTypes() ) {
for ( const auto& t : rep_types ) {
ASSERT(types.HasKey(t));
TypePtr tp{NewRef{}, (Type*)(t)};
RegisterType(tp);
@ -131,7 +150,7 @@ void CPPCompile::Compile(bool report_uncompilable) {
// be identical. In that case, we don't want to generate the lambda
// twice, but we do want to map the second one to the same body name.
unordered_map<string, const Stmt*> lambda_ASTs;
for ( const auto& l : pfs->Lambdas() ) {
for ( const auto& l : accessed_lambdas ) {
const auto& n = l->Name();
const auto body = l->Ingredients()->Body().get();
if ( lambda_ASTs.count(n) > 0 )
@ -151,7 +170,7 @@ void CPPCompile::Compile(bool report_uncompilable) {
CompileFunc(func);
lambda_ASTs.clear();
for ( const auto& l : pfs->Lambdas() ) {
for ( const auto& l : accessed_lambdas ) {
const auto& n = l->Name();
if ( lambda_ASTs.count(n) > 0 )
continue;

View file

@ -60,6 +60,11 @@ void GenFinishInit();
// Generate the function that registers compiled script bodies.
void GenRegisterBodies();
public:
// Whether we're generating "standalone" code.
bool TargetingStandalone() const { return standalone; }
private:
// True if the given function (plus body and profile) is one that should be
// compiled. If non-nil, sets reason to the the reason why, if there's a
// fundamental problem. If however the function should be skipped for other

View file

@ -308,9 +308,8 @@ string CPPCompile::GenCallExpr(const CallExpr* c, GenType gt, bool top_level) {
if ( pfs->BiFGlobals().count(f_id) == 0 )
gen += +"->AsFunc()";
else if ( pfs->Globals().count(f_id) > 0 )
// The BiF version has an extra "_", per
// AddBiF(..., true).
else if ( accessed_globals.count(f_id) > 0 )
// The BiF version has an extra "_", per AddBiF(..., true).
gen = globals[string(id_name) + "_"];
}

View file

@ -104,11 +104,18 @@ void CPPCompile::InitializeFieldMappings() {
StartBlock();
string type_arg, attrs_arg;
if ( ! standalone )
type_arg = attrs_arg = "DO_NOT_CONSTRUCT_VALUE_MARKER";
for ( const auto& mapping : field_decls ) {
auto rt_arg = Fmt(mapping.first);
auto td = mapping.second;
auto type_arg = Fmt(TypeOffset(td->type));
auto attrs_arg = Fmt(AttributesOffset(td->attrs));
if ( standalone ) {
type_arg = Fmt(TypeOffset(td->type));
attrs_arg = Fmt(AttributesOffset(td->attrs));
}
Emit("CPP_FieldMapping(%s, \"%s\", %s, %s),", rt_arg, td->id, type_arg, attrs_arg);
}
@ -121,8 +128,10 @@ void CPPCompile::InitializeEnumMappings() {
StartBlock();
auto create_if_missing = standalone ? "true" : "false";
for ( const auto& mapping : enum_names )
Emit("CPP_EnumMapping(%s, \"%s\"),", Fmt(mapping.first), mapping.second);
Emit("CPP_EnumMapping(%s, \"%s\", %s),", Fmt(mapping.first), mapping.second, create_if_missing);
EndBlock(true);
}
@ -178,9 +187,15 @@ void CPPCompile::InitializeGlobals() {
Emit("Frame* f__CPP = nullptr;");
NL();
auto& ofiles = analysis_options.only_files;
for ( const auto& ginit : IDOptInfo::GetGlobalInitExprs() ) {
auto g = ginit.Id();
if ( pfs->Globals().count(g) == 0 )
if ( ! ofiles.empty() && ! obj_matches_opt_files(g) )
continue;
if ( accessed_globals.count(g) == 0 )
continue;
auto ic = ginit.IC();

View file

@ -355,10 +355,18 @@ AttrsInfo::AttrsInfo(CPPCompile* _c, const AttributesPtr& _attrs) : CompoundItem
}
}
GlobalInitInfo::GlobalInitInfo(CPPCompile* c, const ID* g, string _CPP_name)
GlobalLookupInitInfo::GlobalLookupInitInfo(CPPCompile* c, const ID* g, string _CPP_name)
: CPP_InitInfo(g), CPP_name(std::move(_CPP_name)) {
Zeek_name = g->Name();
}
void GlobalLookupInitInfo::InitializerVals(std::vector<std::string>& ivs) const {
ivs.push_back(CPP_name);
ivs.push_back(string("\"") + Zeek_name + "\"");
}
GlobalInitInfo::GlobalInitInfo(CPPCompile* c, const ID* g, string _CPP_name)
: GlobalLookupInitInfo(c, g, std::move(_CPP_name)) {
auto& gt = g->GetType();
auto gi = c->RegisterType(gt);
init_cohort = max(init_cohort, gi->InitCohort() + 1);
@ -375,7 +383,7 @@ GlobalInitInfo::GlobalInitInfo(CPPCompile* c, const ID* g, string _CPP_name)
exported = g->IsExport();
val = ValElem(c, nullptr); // empty because we initialize dynamically
if ( gt->Tag() == TYPE_FUNC && ! g->GetVal() )
if ( gt->Tag() == TYPE_FUNC && (! g->GetVal() || g->GetVal()->AsFunc()->GetKind() == Func::BUILTIN_FUNC) )
// Remember this peculiarity so we can recreate it for
// error-behavior-compatibility.
func_with_no_val = true;
@ -549,7 +557,7 @@ RecordTypeInfo::RecordTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c
field_types.push_back(r_i->type);
if ( r_i->attrs ) {
if ( c->TargetingStandalone() && r_i->attrs ) {
gi = c->RegisterAttributes(r_i->attrs);
final_init_cohort = max(final_init_cohort, gi->InitCohort() + 1);
field_attrs.push_back(gi->Offset());
@ -576,6 +584,13 @@ void RecordTypeInfo::AddInitializerVals(std::vector<std::string>& ivs) const {
}
}
NamedTypeInfo::NamedTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c, std::move(_t)) {}
void NamedTypeInfo::AddInitializerVals(std::vector<std::string>& ivs) const {
ivs.emplace_back(Fmt(NAMED_TYPE_MARKER));
ivs.emplace_back(Fmt(c->TrackString(t->GetName())));
}
void IndicesManager::Generate(CPPCompile* c) {
c->Emit("int CPP__Indices__init[] =");
c->StartBlock();

View file

@ -478,8 +478,22 @@ public:
AttrsInfo(CPPCompile* c, const AttributesPtr& attrs);
};
// Information for initialization a Zeek global.
class GlobalInitInfo : public CPP_InitInfo {
// A lightweight initializer for a Zeek global that will look it up at
// initialization time but not create it if missing.
class GlobalLookupInitInfo : public CPP_InitInfo {
public:
GlobalLookupInitInfo(CPPCompile* c, const ID* g, std::string CPP_name);
std::string InitializerType() const override { return "CPP_GlobalLookupInit"; }
void InitializerVals(std::vector<std::string>& ivs) const override;
protected:
std::string Zeek_name;
std::string CPP_name;
};
// Information for initializing a Zeek global.
class GlobalInitInfo : public GlobalLookupInitInfo {
public:
GlobalInitInfo(CPPCompile* c, const ID* g, std::string CPP_name);
@ -639,6 +653,14 @@ private:
std::vector<int> field_attrs;
};
// Class for initializing a named Zeek type that should be present at startup.
class NamedTypeInfo : public AbstractTypeInfo {
public:
NamedTypeInfo(CPPCompile* c, TypePtr _t);
void AddInitializerVals(std::vector<std::string>& ivs) const override;
};
// Much of the table-driven initialization is based on vectors of indices,
// which we represent as vectors of int's, where each int is used to index a
// global C++ vector. This class manages such vectors. In particular, it

View file

@ -90,7 +90,7 @@ and also any compiled-to-C++ bodies present in the `zeek` binary that
The above workflows require the subsequent `zeek` execution to include
the `target.zeek` script. You can avoid this by replacing the first step with:
1. `./src/zeek -O gen-standalone-C++ target.zeek >target-stand-in.zeek`
1. `./src/zeek -O gen-standalone-C++ --optimize-files=target.zeek target.zeek >target-stand-in.zeek`
(and then building as in the 2nd step above).
This option prints to _stdout_ a
@ -100,13 +100,9 @@ without needing to include `target.zeek` in the invocation (nor
the `-O use-C++` option). After loading the stand-in script,
you can still access types and functions declared in `target.zeek`.
Note: the implementation differences between `gen-C++` and `gen-standalone-C++`
wound up being modest enough that it might make sense to just always provide
the latter functionality, which it turns out does not introduce any
additional constraints compared to the current `gen-C++` functionality.
On the other hand, it's possible (not yet established) that code created
using `gen-C++` can be made to compile significantly faster than
standalone code.
Note: `gen-standalone-C++` _must_ be used with `--optimize-files`, as the
compiler needs the latter to determine which global declarations the
standalone code needs to initialize.
There are additional workflows relating to running the test suite: see
`src/script_opt/CPP/maint/README`.

View file

@ -235,7 +235,7 @@ void CPP_TypeInits::PreInit(InitsManager* im, int offset, ValElemVec& init_vals)
if ( tag == TYPE_LIST )
inits_vec[offset] = make_intrusive<TypeList>();
else if ( tag == TYPE_RECORD ) {
else if ( tag == TYPE_RECORD && init_vals[1] != NAMED_TYPE_MARKER ) {
auto name = im->Strings(init_vals[1]);
if ( name[0] )
inits_vec[offset] = get_record_type__CPP(name);
@ -243,7 +243,7 @@ void CPP_TypeInits::PreInit(InitsManager* im, int offset, ValElemVec& init_vals)
inits_vec[offset] = get_record_type__CPP(nullptr);
}
else if ( tag == TYPE_TABLE )
else if ( tag == TYPE_TABLE && init_vals[1] != NAMED_TYPE_MARKER )
inits_vec[offset] = make_intrusive<CPPTableType>();
// else no pre-initialization needed
@ -251,6 +251,13 @@ void CPP_TypeInits::PreInit(InitsManager* im, int offset, ValElemVec& init_vals)
void CPP_TypeInits::Generate(InitsManager* im, vector<TypePtr>& ivec, int offset, ValElemVec& init_vals) const {
auto tag = static_cast<TypeTag>(init_vals[0]);
if ( init_vals.size() > 1 && init_vals[1] == NAMED_TYPE_MARKER ) {
auto name = im->Strings(init_vals[2]);
ivec[offset] = find_global__CPP(name)->GetType();
return;
}
TypePtr t;
switch ( tag ) {
case TYPE_ADDR:
@ -406,6 +413,11 @@ int CPP_FieldMapping::ComputeOffset(InitsManager* im) const {
auto fm_offset = r->FieldOffset(field_name.c_str());
if ( fm_offset < 0 ) { // field does not exist, create it
if ( field_type == DO_NOT_CONSTRUCT_VALUE_MARKER ) {
reporter->CPPRuntimeError("record field \"%s\" missing in %s", field_name.c_str(), obj_desc(r).c_str());
exit(1);
}
fm_offset = r->NumFields();
auto id = util::copy_string(field_name.c_str(), field_name.size());
@ -429,6 +441,11 @@ int CPP_EnumMapping::ComputeOffset(InitsManager* im) const {
auto em_offset = e->Lookup(e_name);
if ( em_offset < 0 ) { // enum constant does not exist, create it
if ( ! construct_if_missing ) {
reporter->CPPRuntimeError("enum element \"%s\" missing in %s", e_name.c_str(), obj_desc(e).c_str());
exit(1);
}
em_offset = e->Names().size();
if ( e->Lookup(em_offset) )
reporter->InternalError("enum inconsistency while initializing compiled scripts");
@ -438,6 +455,10 @@ int CPP_EnumMapping::ComputeOffset(InitsManager* im) const {
return em_offset;
}
void CPP_GlobalLookupInit::Generate(InitsManager* im, std::vector<void*>& /* inits_vec */, int /* offset */) const {
global = find_global__CPP(name);
}
void CPP_GlobalInit::Generate(InitsManager* im, std::vector<void*>& /* inits_vec */, int /* offset */) const {
auto& t = im->Types(type);
global = lookup_global__CPP(name, t, exported);

View file

@ -41,6 +41,14 @@ extern std::vector<std::vector<std::vector<int>>> generate_indices_set(int* init
#define END_OF_VEC_VEC -100
#define END_OF_VEC_VEC_VEC -200
// A marker value for "named" types (those that are simply looked up by
// name at initialization time).
#define NAMED_TYPE_MARKER -300
// A marker value indicating values that should not be constructed if not
// already present.
#define DO_NOT_CONSTRUCT_VALUE_MARKER -400
// An abstract helper class used to access elements of an initialization vector.
// We need the abstraction because InitsManager below needs to be able to refer
// to any of a range of templated classes.
@ -369,8 +377,19 @@ public:
}
};
// Class for initializing a Zeek global. These don't go into an initialization
// Classes for initializing Zeek globals. These don't go into an initialization
// vector, so we use void* as the underlying type.
class CPP_GlobalLookupInit : public CPP_Init<void*> {
public:
CPP_GlobalLookupInit(IDPtr& _global, const char* _name) : CPP_Init<void*>(), global(_global), name(_name) {}
void Generate(InitsManager* im, std::vector<void*>& /* inits_vec */, int /* offset */) const override;
protected:
IDPtr& global;
const char* name;
};
class CPP_GlobalInit : public CPP_Init<void*> {
public:
CPP_GlobalInit(IDPtr& _global, const char* _name, int _type, int _attrs, int _val, bool _exported,
@ -463,8 +482,12 @@ public:
private:
int rec; // index to retrieve the record's type
std::string field_name; // which field this offset pertains to
int field_type; // the field's type, in case we have to construct it
int field_attrs; // the same for the field's attributes
// The field's type, in case we have to construct it. If
// DO_NOT_CONSTRUCT_VALUE_MARKER then it's instead an error
// if missing.
int field_type;
int field_attrs; // the same for the field's attributes
};
// Constructs at run-time a mapping between abstract enum values used when
@ -473,13 +496,15 @@ private:
// the enum).
class CPP_EnumMapping {
public:
CPP_EnumMapping(int _e_type, std::string _e_name) : e_type(_e_type), e_name(std::move(_e_name)) {}
CPP_EnumMapping(int _e_type, std::string _e_name, bool _construct_if_missing)
: e_type(_e_type), e_name(std::move(_e_name)), construct_if_missing(_construct_if_missing) {}
int ComputeOffset(InitsManager* im) const;
private:
int e_type; // index to EnumType
std::string e_name; // which enum constant for that type
int e_type; // index to EnumType
std::string e_name; // which enum constant for that type
bool construct_if_missing; // if true, construct constant if not present
};
// Looks up a BiF of the given name, making it available to compiled

View file

@ -191,41 +191,45 @@ shared_ptr<CPP_InitInfo> CPPCompile::RegisterType(const TypePtr& tp) {
shared_ptr<CPP_InitInfo> gi;
switch ( t->Tag() ) {
case TYPE_ADDR:
case TYPE_ANY:
case TYPE_BOOL:
case TYPE_COUNT:
case TYPE_DOUBLE:
case TYPE_ERROR:
case TYPE_INT:
case TYPE_INTERVAL:
case TYPE_PATTERN:
case TYPE_PORT:
case TYPE_STRING:
case TYPE_TIME:
case TYPE_VOID:
case TYPE_SUBNET:
case TYPE_FILE: gi = make_shared<BaseTypeInfo>(this, tp); break;
if ( standalone || t->GetName().empty() ) {
switch ( t->Tag() ) {
case TYPE_ADDR:
case TYPE_ANY:
case TYPE_BOOL:
case TYPE_COUNT:
case TYPE_DOUBLE:
case TYPE_ERROR:
case TYPE_INT:
case TYPE_INTERVAL:
case TYPE_PATTERN:
case TYPE_PORT:
case TYPE_STRING:
case TYPE_TIME:
case TYPE_VOID:
case TYPE_SUBNET:
case TYPE_FILE: gi = make_shared<BaseTypeInfo>(this, tp); break;
case TYPE_ENUM: gi = make_shared<EnumTypeInfo>(this, tp); break;
case TYPE_ENUM: gi = make_shared<EnumTypeInfo>(this, tp); break;
case TYPE_OPAQUE: gi = make_shared<OpaqueTypeInfo>(this, tp); break;
case TYPE_OPAQUE: gi = make_shared<OpaqueTypeInfo>(this, tp); break;
case TYPE_TYPE: gi = make_shared<TypeTypeInfo>(this, tp); break;
case TYPE_TYPE: gi = make_shared<TypeTypeInfo>(this, tp); break;
case TYPE_VECTOR: gi = make_shared<VectorTypeInfo>(this, tp); break;
case TYPE_VECTOR: gi = make_shared<VectorTypeInfo>(this, tp); break;
case TYPE_LIST: gi = make_shared<ListTypeInfo>(this, tp); break;
case TYPE_LIST: gi = make_shared<ListTypeInfo>(this, tp); break;
case TYPE_TABLE: gi = make_shared<TableTypeInfo>(this, tp); break;
case TYPE_TABLE: gi = make_shared<TableTypeInfo>(this, tp); break;
case TYPE_RECORD: gi = make_shared<RecordTypeInfo>(this, tp); break;
case TYPE_RECORD: gi = make_shared<RecordTypeInfo>(this, tp); break;
case TYPE_FUNC: gi = make_shared<FuncTypeInfo>(this, tp); break;
case TYPE_FUNC: gi = make_shared<FuncTypeInfo>(this, tp); break;
default: reporter->InternalError("bad type in CPPCompile::RegisterType");
default: reporter->InternalError("bad type in CPPCompile::RegisterType");
}
}
else
gi = make_shared<NamedTypeInfo>(this, tp);
type_info->AddInstance(gi);
processed_types[t] = gi;

View file

@ -11,7 +11,7 @@ void CPPCompile::CreateGlobal(const ID* g) {
auto gn = string(g->Name());
bool is_bif = pfs->BiFGlobals().count(g) > 0;
if ( pfs->Globals().count(g) == 0 ) {
if ( accessed_globals.count(g) == 0 ) {
// Only used in the context of calls. If it's compilable,
// then we'll call it directly.
if ( compilable_funcs.count(gn) > 0 ) {
@ -28,11 +28,16 @@ void CPPCompile::CreateGlobal(const ID* g) {
if ( AddGlobal(gn, "gl") ) { // We'll be creating this global.
Emit("IDPtr %s;", globals[gn]);
if ( pfs->Events().count(gn) > 0 )
if ( accessed_events.count(gn) > 0 )
// This is an event that's also used as a variable.
Emit("EventHandlerPtr %s_ev;", globals[gn]);
auto gi = make_shared<GlobalInitInfo>(this, g, globals[gn]);
shared_ptr<CPP_InitInfo> gi;
if ( standalone )
gi = make_shared<GlobalInitInfo>(this, g, globals[gn]);
else
gi = make_shared<GlobalLookupInitInfo>(this, g, globals[gn]);
global_id_info->AddInstance(gi);
global_gis[g] = gi;
}
@ -64,7 +69,12 @@ std::shared_ptr<CPP_InitInfo> CPPCompile::RegisterGlobal(const ID* g) {
return gg->second;
}
auto gi = make_shared<GlobalInitInfo>(this, g, globals[gn]);
shared_ptr<CPP_InitInfo> gi;
if ( standalone )
gi = make_shared<GlobalInitInfo>(this, g, globals[gn]);
else
gi = make_shared<GlobalLookupInitInfo>(this, g, globals[gn]);
global_id_info->AddInstance(gi);
global_gis[g] = gi;

View file

@ -53,6 +53,19 @@ std::string Canonicalize(const std::string& name) const;
// be a EXPR_NAME).
std::string GlobalName(const ExprPtr& e) { return globals[e->AsNameExpr()->Id()->Name()]; }
// Globals that are used (appear in the profiles) of the bodies we're
// compiling. Includes globals just used as functions to call.
std::unordered_set<const ID*> all_accessed_globals;
// Same, but just the globals used in contexts beyond function calls.
std::unordered_set<const ID*> accessed_globals;
// Lambdas that are accessed.
std::unordered_set<const LambdaExpr*> accessed_lambdas;
// Events that are accessed.
std::unordered_set<std::string> accessed_events;
// Maps global names (not identifiers) to the names we use for them.
std::unordered_map<std::string, std::string> globals;

View file

@ -1,6 +1,7 @@
#! /bin/sh
find ../testing/btest -type f |
grep -v '\.tmp/' |
xargs grep -E -l '@TEST' |
xargs grep -E -l '^[ ]*(event|print)' |
xargs grep -E -c 'REQUIRES.*CPP.*((!=.*1)|(==.*0))' |