-O gen-standalone-C++ fixes for recent more aggressive AST profiling

This commit is contained in:
Vern Paxson 2024-12-12 14:26:21 -08:00
parent b547c7287d
commit 0a813a53c7
16 changed files with 188 additions and 104 deletions

View file

@ -5,6 +5,7 @@
#include <cerrno>
#include "zeek/script_opt/CPP/Compile.h"
#include "zeek/script_opt/IDOptInfo.h"
extern std::unordered_set<std::string> files_with_conditionals;
@ -31,87 +32,49 @@ CPPCompile::~CPPCompile() { fclose(write_file); }
void CPPCompile::Compile(bool report_uncompilable) {
unordered_set<const Type*> rep_types;
unordered_set<string> filenames_reported_as_skipped;
unordered_set<const Attr*> attrs;
bool had_to_skip = false;
// Determine which functions we can call directly, and reuse
// previously compiled instances of those if present.
for ( auto& func : funcs ) {
const auto& f = func.Func();
auto& body = func.Body();
auto& ofiles = analysis_options.only_files;
auto allow_cond = analysis_options.allow_cond;
string fn = body->GetLocationInfo()->filename;
if ( ! allow_cond && ! func.ShouldSkip() && ! ofiles.empty() && files_with_conditionals.count(fn) > 0 ) {
if ( report_uncompilable )
reporter->Warning("%s cannot be compiled to C++ due to source file %s having conditional code",
f->GetName().c_str(), fn.c_str());
else if ( filenames_reported_as_skipped.count(fn) == 0 ) {
reporter->Warning("skipping compilation of files in %s due to presence of conditional code",
fn.c_str());
filenames_reported_as_skipped.insert(fn);
}
for ( auto& func : funcs )
if ( ! AnalyzeFuncBody(func, filenames_reported_as_skipped, rep_types, report_uncompilable) )
had_to_skip = true;
func.SetSkip(true);
}
if ( func.ShouldSkip() ) {
not_fully_compilable.insert(f->GetName());
continue;
}
if ( standalone ) {
if ( had_to_skip )
reporter->FatalError("aborting standalone compilation to C++ due to having to skip some functions");
auto pf = func.Profile();
total_hash = merge_p_hashes(total_hash, pf->HashVal());
for ( auto& g : global_scope()->OrderedVars() ) {
if ( ! obj_matches_opt_files(g) )
continue;
for ( auto t : pf->UnorderedTypes() )
rep_types.insert(pfs->TypeRep(t));
// We will need to generate this global's definition, including
// its initialization. Make sure we're tracking it and its
// associated types, including those required for initializing.
auto& t = g->GetType();
(void)pfs->HashType(t);
rep_types.insert(TypeRep(t));
auto& pf_all_gl = pf->AllGlobals();
all_accessed_globals.insert(pf_all_gl.begin(), pf_all_gl.end());
all_accessed_globals.insert(g.get());
accessed_globals.insert(g.get());
auto& pf_gl = pf->Globals();
accessed_globals.insert(pf_gl.begin(), pf_gl.end());
auto& pf_events = pf->Events();
accessed_events.insert(pf_events.begin(), pf_events.end());
auto& pf_lambdas = pf->Lambdas();
accessed_lambdas.insert(pf_lambdas.begin(), pf_lambdas.end());
if ( is_lambda(f) || is_when_lambda(f) ) {
// We deal with these separately.
func.SetSkip(true);
continue;
}
const char* reason;
if ( IsCompilable(func, &reason) ) {
if ( f->Flavor() == FUNC_FLAVOR_FUNCTION )
// Note this as a callable compiled function.
compilable_funcs.insert(BodyName(func));
}
else {
if ( reason && report_uncompilable ) {
had_to_skip = true;
reporter->Warning("%s cannot be compiled to C++ due to %s", f->GetName().c_str(), reason);
for ( const auto& i_e : g->GetOptInfo()->GetInitExprs() ) {
auto pf = std::make_shared<ProfileFunc>(i_e.get());
for ( auto& t : pf->OrderedTypes() ) {
(void)pfs->HashType(t);
rep_types.insert(TypeRep(t));
}
}
not_fully_compilable.insert(f->GetName());
}
for ( auto& ea : pfs->ExprAttrs() )
if ( obj_matches_opt_files(ea.first) ) {
auto& attr = ea.first;
attrs.insert(attr);
auto& t = attr->GetExpr()->GetType();
rep_types.insert(TypeRep(t));
}
}
// Generate a hash unique for this compilation.
for ( const auto& func : funcs )
if ( ! func.ShouldSkip() )
total_hash = merge_p_hashes(total_hash, func.Profile()->HashVal());
if ( standalone && had_to_skip )
reporter->FatalError("aborting standalone compilation to C++ due to having to skip some functions");
auto t = util::current_time();
total_hash = merge_p_hashes(total_hash, hash<double>{}(t));
@ -134,10 +97,15 @@ void CPPCompile::Compile(bool report_uncompilable) {
for ( const auto& t : rep_types ) {
ASSERT(types.HasKey(t));
TypePtr tp{NewRef{}, (Type*)(t)};
TypePtr tp{NewRef{}, const_cast<Type*>(t)};
RegisterType(tp);
}
for ( const auto& attr : attrs ) {
AttrPtr attr_p = {NewRef{}, const_cast<Attr*>(attr)};
(void)RegisterAttr(attr_p);
}
// The scaffolding is now in place to go ahead and generate
// the functions & lambdas. First declare them ...
for ( const auto& func : funcs )
@ -187,9 +155,92 @@ void CPPCompile::Compile(bool report_uncompilable) {
Emit("};");
if ( standalone )
// Now that we've identified all of the record fields we might have
// to generate, make sure we track their attributes.
for ( const auto& fd : field_decls ) {
auto td = fd.second;
if ( obj_matches_opt_files(td->type) ) {
TypePtr tp = {NewRef{}, const_cast<Type*>(TypeRep(td->type))};
RegisterType(tp);
}
if ( obj_matches_opt_files(td->attrs) )
RegisterAttributes(td->attrs);
}
GenEpilog();
}
bool CPPCompile::AnalyzeFuncBody(FuncInfo& fi, unordered_set<string>& filenames_reported_as_skipped,
unordered_set<const Type*>& rep_types, bool report_uncompilable) {
const auto& f = fi.Func();
auto& body = fi.Body();
string fn = body->GetLocationInfo()->filename;
if ( ! analysis_options.allow_cond && ! fi.ShouldSkip() ) {
if ( ! analysis_options.only_files.empty() && files_with_conditionals.count(fn) > 0 ) {
if ( report_uncompilable )
reporter->Warning("%s cannot be compiled to C++ due to source file %s having conditional code",
f->GetName().c_str(), fn.c_str());
else if ( filenames_reported_as_skipped.count(fn) == 0 ) {
reporter->Warning("skipping compilation of files in %s due to presence of conditional code",
fn.c_str());
filenames_reported_as_skipped.insert(fn);
}
fi.SetSkip(true);
}
}
if ( fi.ShouldSkip() ) {
not_fully_compilable.insert(f->GetName());
return true;
}
auto pf = fi.Profile();
total_hash = merge_p_hashes(total_hash, pf->HashVal());
for ( auto t : pf->UnorderedTypes() )
rep_types.insert(pfs->TypeRep(t));
auto& pf_all_gl = pf->AllGlobals();
all_accessed_globals.insert(pf_all_gl.begin(), pf_all_gl.end());
auto& pf_gl = pf->Globals();
accessed_globals.insert(pf_gl.begin(), pf_gl.end());
auto& pf_events = pf->Events();
accessed_events.insert(pf_events.begin(), pf_events.end());
auto& pf_lambdas = pf->Lambdas();
accessed_lambdas.insert(pf_lambdas.begin(), pf_lambdas.end());
if ( is_lambda(f) || is_when_lambda(f) ) {
// We deal with these separately.
fi.SetSkip(true);
return true;
}
const char* reason;
if ( IsCompilable(fi, &reason) ) {
if ( f->Flavor() == FUNC_FLAVOR_FUNCTION )
// Note this as a callable compiled function.
compilable_funcs.insert(BodyName(fi));
}
else {
if ( reason && (standalone || report_uncompilable) ) {
reporter->Warning("%s cannot be compiled to C++ due to %s", f->GetName().c_str(), reason);
}
not_fully_compilable.insert(f->GetName());
return false;
}
return true;
}
void CPPCompile::GenProlog() {
Emit("#include \"zeek/script_opt/CPP/Runtime.h\"\n");

View file

@ -7,6 +7,14 @@
// Main driver, invoked by constructor.
void Compile(bool report_uncompilable);
// For a given function body, assess its compilability and track its elements.
// Returns true if the body was analyzed, false if it was skipped. If skipped
// then either generates a warning (if report_uncompilable is true) or
// updates filenames_reported_as_skipped. Updates rep_types with the type
// representatives seen in the function.
bool AnalyzeFuncBody(FuncInfo& fi, std::unordered_set<std::string>& filenames_reported_as_skipped,
std::unordered_set<const Type*>& rep_types, bool report_uncompilable);
// Generate the beginning of the compiled code: run-time functions,
// namespace, auxiliary globals.
void GenProlog();

View file

@ -1264,7 +1264,8 @@ string CPPCompile::GenEnum(const TypePtr& t, const ValPtr& ev) {
mapping_slot = num_ev_mappings++;
string enum_name = et->Lookup(v);
enum_names.emplace_back(TypeOffset(t), std::move(enum_name));
bool create_if_missing = standalone && obj_matches_opt_files(ev);
enum_names.emplace_back(EnumMappingInfo{TypeOffset(t), std::move(enum_name), create_if_missing});
if ( evm != enum_val_mappings.end() ) {
// We're already tracking this enum.

View file

@ -142,6 +142,13 @@ std::unordered_map<const EnumType*, std::unordered_map<int, int>> enum_val_mappi
// outer map).
int num_ev_mappings = 0;
// Information captured for generating entries in "enum_mapping".
struct EnumMappingInfo {
int enum_type; // as a global offset
std::string enum_name;
bool create_if_missing;
};
// For each entry in "enum_mapping", the EnumType (as a global offset) and
// name associated with the mapping.
std::vector<std::pair<int, std::string>> enum_names;
std::vector<EnumMappingInfo> enum_names;

View file

@ -104,17 +104,21 @@ void CPPCompile::InitializeFieldMappings() {
StartBlock();
string type_arg, attrs_arg;
if ( ! standalone )
type_arg = attrs_arg = "DO_NOT_CONSTRUCT_VALUE_MARKER";
for ( const auto& mapping : field_decls ) {
auto rt_arg = Fmt(mapping.first);
auto td = mapping.second;
string type_arg = "DO_NOT_CONSTRUCT_VALUE_MARKER";
string attrs_arg = "DO_NOT_CONSTRUCT_VALUE_MARKER";
if ( standalone ) {
type_arg = Fmt(TypeOffset(td->type));
attrs_arg = Fmt(AttributesOffset(td->attrs));
// We can assess whether this field is one we need to generate
// because if it is, it will have an &optional attribute that
// is local to one of the cmopiled source files.
if ( td->attrs && obj_matches_opt_files(td->attrs) ) {
type_arg = Fmt(TypeOffset(td->type));
attrs_arg = Fmt(AttributesOffset(td->attrs));
}
}
Emit("CPP_FieldMapping(%s, \"%s\", %s, %s),", rt_arg, td->id, type_arg, attrs_arg);
@ -128,10 +132,11 @@ void CPPCompile::InitializeEnumMappings() {
StartBlock();
auto create_if_missing = standalone ? "true" : "false";
for ( const auto& mapping : enum_names )
Emit("CPP_EnumMapping(%s, \"%s\", %s),", Fmt(mapping.first), mapping.second, create_if_missing);
for ( const auto& en : enum_names ) {
auto create_if_missing = en.create_if_missing ? "true" : "false";
string init_args = Fmt(en.enum_type) + ", \"" + en.enum_name + "\", " + create_if_missing;
Emit("CPP_EnumMapping(%s),", init_args);
}
EndBlock(true);
}

View file

@ -384,8 +384,9 @@ GlobalInitInfo::GlobalInitInfo(CPPCompile* c, const ID* g, string _CPP_name)
val = ValElem(c, nullptr); // empty because we initialize dynamically
if ( gt->Tag() == TYPE_FUNC && (! g->GetVal() || g->GetVal()->AsFunc()->GetKind() == Func::BUILTIN_FUNC) )
// Remember this peculiarity so we can recreate it for
// error-behavior-compatibility.
// Be sure not to try to create BiFs. In addition, GetVal() can be
// nil in certain error situations, which we'll want to recreate
// for behavior compatibility.
func_with_no_val = true;
}
@ -557,7 +558,7 @@ RecordTypeInfo::RecordTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c
field_types.push_back(r_i->type);
if ( c->TargetingStandalone() && r_i->attrs ) {
if ( r_i->attrs && c->TargetingStandalone() && obj_matches_opt_files(r_i->attrs) ) {
gi = c->RegisterAttributes(r_i->attrs);
final_init_cohort = max(final_init_cohort, gi->InitCohort() + 1);
field_attrs.push_back(gi->Offset());

View file

@ -501,8 +501,6 @@ public:
void InitializerVals(std::vector<std::string>& ivs) const override;
protected:
std::string Zeek_name;
std::string CPP_name;
int type;
int attrs;
std::string val;

View file

@ -38,16 +38,18 @@ extern std::vector<std::vector<std::vector<int>>> generate_indices_set(int* init
// These need to be distinct from any values that can appear, which means
// they should be negative, and not -1, which is used as a "N/A" value.
#define END_OF_VEC_VEC -100
#define END_OF_VEC_VEC_VEC -200
// clang-format off
constexpr int END_OF_VEC_VEC = -100;
constexpr int END_OF_VEC_VEC_VEC = -200;
// A marker value for "named" types (those that are simply looked up by
// name at initialization time).
#define NAMED_TYPE_MARKER -300
constexpr int NAMED_TYPE_MARKER = -300;
// A marker value indicating values that should not be constructed if not
// already present.
#define DO_NOT_CONSTRUCT_VALUE_MARKER -400
constexpr int DO_NOT_CONSTRUCT_VALUE_MARKER = -400;
// clang-format on
// An abstract helper class used to access elements of an initialization vector.
// We need the abstraction because InitsManager below needs to be able to refer

View file

@ -191,7 +191,7 @@ shared_ptr<CPP_InitInfo> CPPCompile::RegisterType(const TypePtr& tp) {
shared_ptr<CPP_InitInfo> gi;
if ( standalone || t->GetName().empty() ) {
if ( (standalone && obj_matches_opt_files(tp)) || t->GetName().empty() ) {
switch ( t->Tag() ) {
case TYPE_ADDR:
case TYPE_ANY:

View file

@ -17,12 +17,14 @@ The maintenance workflow:
ninja
src/zeek -O use-C++ -r some.pcap
and that it can compile them standalone:
and that standalone compilation works:
rm CPP-gen.cc
ninja
src/zeek -O gen-standalone-C++ /dev/null
src/zeek -b -O gen-standalone-C++ --optimize-files=base/protocols/conn base/protocols/conn >my-test.zeek
ninja
src/zeek -b -r some.pcap my-test.zeek
# Confirm that it generates conn.log
rm CPP-gen.cc
ninja