-O gen-standalone-C++ fixes for recent more aggressive AST profiling

This commit is contained in:
Vern Paxson 2024-12-12 14:26:21 -08:00
parent b547c7287d
commit 0a813a53c7
16 changed files with 188 additions and 104 deletions

View file

@ -6,10 +6,9 @@ extend-ignore-re = [
# ALLO is a valid FTP command
"\"ALLO\".*200",
"des-ede3-cbc-Env-OID",
"Remove in v6.1.*SupressWeird",
"max_repititions:.*Remove in v6.1",
"mis-aliasing of",
"mis-indexing",
"compilability",
# On purpose
"\"THE NETBIOS NAM\"",
# NFS stuff.

View file

@ -5,6 +5,7 @@
#include <cerrno>
#include "zeek/script_opt/CPP/Compile.h"
#include "zeek/script_opt/IDOptInfo.h"
extern std::unordered_set<std::string> files_with_conditionals;
@ -31,87 +32,49 @@ CPPCompile::~CPPCompile() { fclose(write_file); }
void CPPCompile::Compile(bool report_uncompilable) {
unordered_set<const Type*> rep_types;
unordered_set<string> filenames_reported_as_skipped;
unordered_set<const Attr*> attrs;
bool had_to_skip = false;
// Determine which functions we can call directly, and reuse
// previously compiled instances of those if present.
for ( auto& func : funcs ) {
const auto& f = func.Func();
auto& body = func.Body();
auto& ofiles = analysis_options.only_files;
auto allow_cond = analysis_options.allow_cond;
string fn = body->GetLocationInfo()->filename;
if ( ! allow_cond && ! func.ShouldSkip() && ! ofiles.empty() && files_with_conditionals.count(fn) > 0 ) {
if ( report_uncompilable )
reporter->Warning("%s cannot be compiled to C++ due to source file %s having conditional code",
f->GetName().c_str(), fn.c_str());
else if ( filenames_reported_as_skipped.count(fn) == 0 ) {
reporter->Warning("skipping compilation of files in %s due to presence of conditional code",
fn.c_str());
filenames_reported_as_skipped.insert(fn);
}
for ( auto& func : funcs )
if ( ! AnalyzeFuncBody(func, filenames_reported_as_skipped, rep_types, report_uncompilable) )
had_to_skip = true;
func.SetSkip(true);
}
if ( func.ShouldSkip() ) {
not_fully_compilable.insert(f->GetName());
continue;
}
auto pf = func.Profile();
total_hash = merge_p_hashes(total_hash, pf->HashVal());
for ( auto t : pf->UnorderedTypes() )
rep_types.insert(pfs->TypeRep(t));
auto& pf_all_gl = pf->AllGlobals();
all_accessed_globals.insert(pf_all_gl.begin(), pf_all_gl.end());
auto& pf_gl = pf->Globals();
accessed_globals.insert(pf_gl.begin(), pf_gl.end());
auto& pf_events = pf->Events();
accessed_events.insert(pf_events.begin(), pf_events.end());
auto& pf_lambdas = pf->Lambdas();
accessed_lambdas.insert(pf_lambdas.begin(), pf_lambdas.end());
if ( is_lambda(f) || is_when_lambda(f) ) {
// We deal with these separately.
func.SetSkip(true);
continue;
}
const char* reason;
if ( IsCompilable(func, &reason) ) {
if ( f->Flavor() == FUNC_FLAVOR_FUNCTION )
// Note this as a callable compiled function.
compilable_funcs.insert(BodyName(func));
}
else {
if ( reason && report_uncompilable ) {
had_to_skip = true;
reporter->Warning("%s cannot be compiled to C++ due to %s", f->GetName().c_str(), reason);
}
not_fully_compilable.insert(f->GetName());
}
}
// Generate a hash unique for this compilation.
for ( const auto& func : funcs )
if ( ! func.ShouldSkip() )
total_hash = merge_p_hashes(total_hash, func.Profile()->HashVal());
if ( standalone && had_to_skip )
if ( standalone ) {
if ( had_to_skip )
reporter->FatalError("aborting standalone compilation to C++ due to having to skip some functions");
for ( auto& g : global_scope()->OrderedVars() ) {
if ( ! obj_matches_opt_files(g) )
continue;
// We will need to generate this global's definition, including
// its initialization. Make sure we're tracking it and its
// associated types, including those required for initializing.
auto& t = g->GetType();
(void)pfs->HashType(t);
rep_types.insert(TypeRep(t));
all_accessed_globals.insert(g.get());
accessed_globals.insert(g.get());
for ( const auto& i_e : g->GetOptInfo()->GetInitExprs() ) {
auto pf = std::make_shared<ProfileFunc>(i_e.get());
for ( auto& t : pf->OrderedTypes() ) {
(void)pfs->HashType(t);
rep_types.insert(TypeRep(t));
}
}
}
for ( auto& ea : pfs->ExprAttrs() )
if ( obj_matches_opt_files(ea.first) ) {
auto& attr = ea.first;
attrs.insert(attr);
auto& t = attr->GetExpr()->GetType();
rep_types.insert(TypeRep(t));
}
}
auto t = util::current_time();
total_hash = merge_p_hashes(total_hash, hash<double>{}(t));
@ -134,10 +97,15 @@ void CPPCompile::Compile(bool report_uncompilable) {
for ( const auto& t : rep_types ) {
ASSERT(types.HasKey(t));
TypePtr tp{NewRef{}, (Type*)(t)};
TypePtr tp{NewRef{}, const_cast<Type*>(t)};
RegisterType(tp);
}
for ( const auto& attr : attrs ) {
AttrPtr attr_p = {NewRef{}, const_cast<Attr*>(attr)};
(void)RegisterAttr(attr_p);
}
// The scaffolding is now in place to go ahead and generate
// the functions & lambdas. First declare them ...
for ( const auto& func : funcs )
@ -187,9 +155,92 @@ void CPPCompile::Compile(bool report_uncompilable) {
Emit("};");
if ( standalone )
// Now that we've identified all of the record fields we might have
// to generate, make sure we track their attributes.
for ( const auto& fd : field_decls ) {
auto td = fd.second;
if ( obj_matches_opt_files(td->type) ) {
TypePtr tp = {NewRef{}, const_cast<Type*>(TypeRep(td->type))};
RegisterType(tp);
}
if ( obj_matches_opt_files(td->attrs) )
RegisterAttributes(td->attrs);
}
GenEpilog();
}
bool CPPCompile::AnalyzeFuncBody(FuncInfo& fi, unordered_set<string>& filenames_reported_as_skipped,
unordered_set<const Type*>& rep_types, bool report_uncompilable) {
const auto& f = fi.Func();
auto& body = fi.Body();
string fn = body->GetLocationInfo()->filename;
if ( ! analysis_options.allow_cond && ! fi.ShouldSkip() ) {
if ( ! analysis_options.only_files.empty() && files_with_conditionals.count(fn) > 0 ) {
if ( report_uncompilable )
reporter->Warning("%s cannot be compiled to C++ due to source file %s having conditional code",
f->GetName().c_str(), fn.c_str());
else if ( filenames_reported_as_skipped.count(fn) == 0 ) {
reporter->Warning("skipping compilation of files in %s due to presence of conditional code",
fn.c_str());
filenames_reported_as_skipped.insert(fn);
}
fi.SetSkip(true);
}
}
if ( fi.ShouldSkip() ) {
not_fully_compilable.insert(f->GetName());
return true;
}
auto pf = fi.Profile();
total_hash = merge_p_hashes(total_hash, pf->HashVal());
for ( auto t : pf->UnorderedTypes() )
rep_types.insert(pfs->TypeRep(t));
auto& pf_all_gl = pf->AllGlobals();
all_accessed_globals.insert(pf_all_gl.begin(), pf_all_gl.end());
auto& pf_gl = pf->Globals();
accessed_globals.insert(pf_gl.begin(), pf_gl.end());
auto& pf_events = pf->Events();
accessed_events.insert(pf_events.begin(), pf_events.end());
auto& pf_lambdas = pf->Lambdas();
accessed_lambdas.insert(pf_lambdas.begin(), pf_lambdas.end());
if ( is_lambda(f) || is_when_lambda(f) ) {
// We deal with these separately.
fi.SetSkip(true);
return true;
}
const char* reason;
if ( IsCompilable(fi, &reason) ) {
if ( f->Flavor() == FUNC_FLAVOR_FUNCTION )
// Note this as a callable compiled function.
compilable_funcs.insert(BodyName(fi));
}
else {
if ( reason && (standalone || report_uncompilable) ) {
reporter->Warning("%s cannot be compiled to C++ due to %s", f->GetName().c_str(), reason);
}
not_fully_compilable.insert(f->GetName());
return false;
}
return true;
}
void CPPCompile::GenProlog() {
Emit("#include \"zeek/script_opt/CPP/Runtime.h\"\n");

View file

@ -7,6 +7,14 @@
// Main driver, invoked by constructor.
void Compile(bool report_uncompilable);
// For a given function body, assess its compilability and track its elements.
// Returns true if the body was analyzed, false if it was skipped. If skipped
// then either generates a warning (if report_uncompilable is true) or
// updates filenames_reported_as_skipped. Updates rep_types with the type
// representatives seen in the function.
bool AnalyzeFuncBody(FuncInfo& fi, std::unordered_set<std::string>& filenames_reported_as_skipped,
std::unordered_set<const Type*>& rep_types, bool report_uncompilable);
// Generate the beginning of the compiled code: run-time functions,
// namespace, auxiliary globals.
void GenProlog();

View file

@ -1264,7 +1264,8 @@ string CPPCompile::GenEnum(const TypePtr& t, const ValPtr& ev) {
mapping_slot = num_ev_mappings++;
string enum_name = et->Lookup(v);
enum_names.emplace_back(TypeOffset(t), std::move(enum_name));
bool create_if_missing = standalone && obj_matches_opt_files(ev);
enum_names.emplace_back(EnumMappingInfo{TypeOffset(t), std::move(enum_name), create_if_missing});
if ( evm != enum_val_mappings.end() ) {
// We're already tracking this enum.

View file

@ -142,6 +142,13 @@ std::unordered_map<const EnumType*, std::unordered_map<int, int>> enum_val_mappi
// outer map).
int num_ev_mappings = 0;
// Information captured for generating entries in "enum_mapping".
struct EnumMappingInfo {
int enum_type; // as a global offset
std::string enum_name;
bool create_if_missing;
};
// For each entry in "enum_mapping", the EnumType (as a global offset) and
// name associated with the mapping.
std::vector<std::pair<int, std::string>> enum_names;
std::vector<EnumMappingInfo> enum_names;

View file

@ -104,18 +104,22 @@ void CPPCompile::InitializeFieldMappings() {
StartBlock();
string type_arg, attrs_arg;
if ( ! standalone )
type_arg = attrs_arg = "DO_NOT_CONSTRUCT_VALUE_MARKER";
for ( const auto& mapping : field_decls ) {
auto rt_arg = Fmt(mapping.first);
auto td = mapping.second;
string type_arg = "DO_NOT_CONSTRUCT_VALUE_MARKER";
string attrs_arg = "DO_NOT_CONSTRUCT_VALUE_MARKER";
if ( standalone ) {
// We can assess whether this field is one we need to generate
// because if it is, it will have an &optional attribute that
// is local to one of the cmopiled source files.
if ( td->attrs && obj_matches_opt_files(td->attrs) ) {
type_arg = Fmt(TypeOffset(td->type));
attrs_arg = Fmt(AttributesOffset(td->attrs));
}
}
Emit("CPP_FieldMapping(%s, \"%s\", %s, %s),", rt_arg, td->id, type_arg, attrs_arg);
}
@ -128,10 +132,11 @@ void CPPCompile::InitializeEnumMappings() {
StartBlock();
auto create_if_missing = standalone ? "true" : "false";
for ( const auto& mapping : enum_names )
Emit("CPP_EnumMapping(%s, \"%s\", %s),", Fmt(mapping.first), mapping.second, create_if_missing);
for ( const auto& en : enum_names ) {
auto create_if_missing = en.create_if_missing ? "true" : "false";
string init_args = Fmt(en.enum_type) + ", \"" + en.enum_name + "\", " + create_if_missing;
Emit("CPP_EnumMapping(%s),", init_args);
}
EndBlock(true);
}

View file

@ -384,8 +384,9 @@ GlobalInitInfo::GlobalInitInfo(CPPCompile* c, const ID* g, string _CPP_name)
val = ValElem(c, nullptr); // empty because we initialize dynamically
if ( gt->Tag() == TYPE_FUNC && (! g->GetVal() || g->GetVal()->AsFunc()->GetKind() == Func::BUILTIN_FUNC) )
// Remember this peculiarity so we can recreate it for
// error-behavior-compatibility.
// Be sure not to try to create BiFs. In addition, GetVal() can be
// nil in certain error situations, which we'll want to recreate
// for behavior compatibility.
func_with_no_val = true;
}
@ -557,7 +558,7 @@ RecordTypeInfo::RecordTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c
field_types.push_back(r_i->type);
if ( c->TargetingStandalone() && r_i->attrs ) {
if ( r_i->attrs && c->TargetingStandalone() && obj_matches_opt_files(r_i->attrs) ) {
gi = c->RegisterAttributes(r_i->attrs);
final_init_cohort = max(final_init_cohort, gi->InitCohort() + 1);
field_attrs.push_back(gi->Offset());

View file

@ -501,8 +501,6 @@ public:
void InitializerVals(std::vector<std::string>& ivs) const override;
protected:
std::string Zeek_name;
std::string CPP_name;
int type;
int attrs;
std::string val;

View file

@ -38,16 +38,18 @@ extern std::vector<std::vector<std::vector<int>>> generate_indices_set(int* init
// These need to be distinct from any values that can appear, which means
// they should be negative, and not -1, which is used as a "N/A" value.
#define END_OF_VEC_VEC -100
#define END_OF_VEC_VEC_VEC -200
// clang-format off
constexpr int END_OF_VEC_VEC = -100;
constexpr int END_OF_VEC_VEC_VEC = -200;
// A marker value for "named" types (those that are simply looked up by
// name at initialization time).
#define NAMED_TYPE_MARKER -300
constexpr int NAMED_TYPE_MARKER = -300;
// A marker value indicating values that should not be constructed if not
// already present.
#define DO_NOT_CONSTRUCT_VALUE_MARKER -400
constexpr int DO_NOT_CONSTRUCT_VALUE_MARKER = -400;
// clang-format on
// An abstract helper class used to access elements of an initialization vector.
// We need the abstraction because InitsManager below needs to be able to refer

View file

@ -191,7 +191,7 @@ shared_ptr<CPP_InitInfo> CPPCompile::RegisterType(const TypePtr& tp) {
shared_ptr<CPP_InitInfo> gi;
if ( standalone || t->GetName().empty() ) {
if ( (standalone && obj_matches_opt_files(tp)) || t->GetName().empty() ) {
switch ( t->Tag() ) {
case TYPE_ADDR:
case TYPE_ANY:

View file

@ -17,12 +17,14 @@ The maintenance workflow:
ninja
src/zeek -O use-C++ -r some.pcap
and that it can compile them standalone:
and that standalone compilation works:
rm CPP-gen.cc
ninja
src/zeek -O gen-standalone-C++ /dev/null
src/zeek -b -O gen-standalone-C++ --optimize-files=base/protocols/conn base/protocols/conn >my-test.zeek
ninja
src/zeek -b -r some.pcap my-test.zeek
# Confirm that it generates conn.log
rm CPP-gen.cc
ninja

View file

@ -534,6 +534,7 @@ void ProfileFunc::TrackID(const ID* id) {
if ( id->IsGlobal() ) {
globals.insert(id);
all_globals.insert(id);
TrackType(id->GetType());
}
ordered_ids.push_back(id);

View file

@ -379,6 +379,7 @@ public:
const IDSet& BiFGlobals() const { return BiF_globals; }
const std::unordered_set<const LambdaExpr*>& Lambdas() const { return lambdas; }
const std::unordered_set<std::string>& Events() const { return events; }
const auto& ExprAttrs() const { return expr_attrs; }
const auto& FuncProfs() const { return func_profs; }

View file

@ -58,6 +58,10 @@ bool is_lambda(const ScriptFunc* f) { return lambdas.count(f) > 0; }
bool is_when_lambda(const ScriptFunc* f) { return when_lambdas.count(f) > 0; }
void analyze_global_stmts(Stmt* stmts) {
if ( analysis_options.gen_standalone_CPP && obj_matches_opt_files(stmts) )
reporter->FatalError("cannot include global statements with -O gen-standalone-C++: %s",
obj_desc(stmts).c_str());
// We ignore analysis_options.only_{files,funcs} - if they're in use, later
// logic will keep this function from being compiled, but it's handy
// now to enter it into "funcs" so we have a FuncInfo to return.
@ -571,6 +575,10 @@ void clear_script_analysis() {
for ( auto& id : f.Scope()->OrderedVars() )
id->ClearOptInfo();
// Clear out optimization info for global variables, too.
for ( auto& g : global_scope()->OrderedVars() )
g->ClearOptInfo();
// Keep the functions around if we're profiling, so we can loop
// over them to generate the profiles.
if ( ! analysis_options.profile_ZAM )
@ -640,22 +648,16 @@ void analyze_scripts(bool no_unused_warnings) {
if ( analysis_options.use_CPP )
use_CPP();
std::shared_ptr<ProfileFuncs> pfs;
// Note, in the following it's not clear whether the final argument
// for absolute/relative record fields matters any more ...
if ( generating_CPP )
pfs = std::make_shared<ProfileFuncs>(funcs, is_CPP_compilable, true, false);
else
pfs = std::make_shared<ProfileFuncs>(funcs, nullptr, true, true);
if ( generating_CPP ) {
if ( analysis_options.gen_ZAM )
reporter->FatalError("-O ZAM and -O gen-C++ conflict");
auto pfs = std::make_shared<ProfileFuncs>(funcs, is_CPP_compilable, true, false);
generate_CPP(pfs);
exit(0);
}
auto pfs = std::make_shared<ProfileFuncs>(funcs, nullptr, true, true);
analyze_scripts_for_ZAM(pfs);
if ( reporter->Errors() > 0 )

View file

@ -18,6 +18,7 @@ struct Options;
namespace zeek::detail {
using ObjPtr = IntrusivePtr<Obj>;
using TypeSet = std::unordered_set<const Type*>;
// Flags controlling what sorts of analysis to do.
@ -256,6 +257,7 @@ extern bool should_analyze(const ScriptFuncPtr& f, const StmtPtr& body);
// True if the given object's location matches one specified by
// --optimize-files=...
extern bool obj_matches_opt_files(const Obj* obj);
inline bool obj_matches_opt_files(const ObjPtr& obj) { return obj_matches_opt_files(obj.get()); }
// Analyze all of the parsed scripts collectively for usage issues (unless
// suppressed by the flag) and optimization.

View file

@ -967,6 +967,10 @@ SetupResult setup(int argc, char** argv, Options* zopts) {
exit(reporter->Errors() != 0);
}
if ( stmts && (stmts->Tag() == STMT_NULL || (stmts->Tag() == STMT_LIST && stmts->AsStmtList()->Stmts().empty())) )
// There are no actual global statements.
stmts = nullptr;
if ( stmts )
analyze_global_stmts(stmts);