diff --git a/.typos.toml b/.typos.toml index 4d74a52e7f..16ce49b93e 100644 --- a/.typos.toml +++ b/.typos.toml @@ -6,10 +6,9 @@ extend-ignore-re = [ # ALLO is a valid FTP command "\"ALLO\".*200", "des-ede3-cbc-Env-OID", - "Remove in v6.1.*SupressWeird", - "max_repititions:.*Remove in v6.1", "mis-aliasing of", "mis-indexing", + "compilability", # On purpose "\"THE NETBIOS NAM\"", # NFS stuff. diff --git a/src/script_opt/CPP/Driver.cc b/src/script_opt/CPP/Driver.cc index c96a8012cf..45c0ffb7ac 100644 --- a/src/script_opt/CPP/Driver.cc +++ b/src/script_opt/CPP/Driver.cc @@ -5,6 +5,7 @@ #include #include "zeek/script_opt/CPP/Compile.h" +#include "zeek/script_opt/IDOptInfo.h" extern std::unordered_set files_with_conditionals; @@ -31,87 +32,49 @@ CPPCompile::~CPPCompile() { fclose(write_file); } void CPPCompile::Compile(bool report_uncompilable) { unordered_set rep_types; unordered_set filenames_reported_as_skipped; + unordered_set attrs; bool had_to_skip = false; - // Determine which functions we can call directly, and reuse - // previously compiled instances of those if present. - for ( auto& func : funcs ) { - const auto& f = func.Func(); - auto& body = func.Body(); - - auto& ofiles = analysis_options.only_files; - auto allow_cond = analysis_options.allow_cond; - - string fn = body->GetLocationInfo()->filename; - - if ( ! allow_cond && ! func.ShouldSkip() && ! ofiles.empty() && files_with_conditionals.count(fn) > 0 ) { - if ( report_uncompilable ) - reporter->Warning("%s cannot be compiled to C++ due to source file %s having conditional code", - f->GetName().c_str(), fn.c_str()); - - else if ( filenames_reported_as_skipped.count(fn) == 0 ) { - reporter->Warning("skipping compilation of files in %s due to presence of conditional code", - fn.c_str()); - filenames_reported_as_skipped.insert(fn); - } - + for ( auto& func : funcs ) + if ( ! AnalyzeFuncBody(func, filenames_reported_as_skipped, rep_types, report_uncompilable) ) had_to_skip = true; - func.SetSkip(true); - } - if ( func.ShouldSkip() ) { - not_fully_compilable.insert(f->GetName()); - continue; - } + if ( standalone ) { + if ( had_to_skip ) + reporter->FatalError("aborting standalone compilation to C++ due to having to skip some functions"); - auto pf = func.Profile(); - total_hash = merge_p_hashes(total_hash, pf->HashVal()); + for ( auto& g : global_scope()->OrderedVars() ) { + if ( ! obj_matches_opt_files(g) ) + continue; - for ( auto t : pf->UnorderedTypes() ) - rep_types.insert(pfs->TypeRep(t)); + // We will need to generate this global's definition, including + // its initialization. Make sure we're tracking it and its + // associated types, including those required for initializing. + auto& t = g->GetType(); + (void)pfs->HashType(t); + rep_types.insert(TypeRep(t)); - auto& pf_all_gl = pf->AllGlobals(); - all_accessed_globals.insert(pf_all_gl.begin(), pf_all_gl.end()); + all_accessed_globals.insert(g.get()); + accessed_globals.insert(g.get()); - auto& pf_gl = pf->Globals(); - accessed_globals.insert(pf_gl.begin(), pf_gl.end()); - - auto& pf_events = pf->Events(); - accessed_events.insert(pf_events.begin(), pf_events.end()); - - auto& pf_lambdas = pf->Lambdas(); - accessed_lambdas.insert(pf_lambdas.begin(), pf_lambdas.end()); - - if ( is_lambda(f) || is_when_lambda(f) ) { - // We deal with these separately. - func.SetSkip(true); - continue; - } - - const char* reason; - if ( IsCompilable(func, &reason) ) { - if ( f->Flavor() == FUNC_FLAVOR_FUNCTION ) - // Note this as a callable compiled function. - compilable_funcs.insert(BodyName(func)); - } - else { - if ( reason && report_uncompilable ) { - had_to_skip = true; - reporter->Warning("%s cannot be compiled to C++ due to %s", f->GetName().c_str(), reason); + for ( const auto& i_e : g->GetOptInfo()->GetInitExprs() ) { + auto pf = std::make_shared(i_e.get()); + for ( auto& t : pf->OrderedTypes() ) { + (void)pfs->HashType(t); + rep_types.insert(TypeRep(t)); + } } - - not_fully_compilable.insert(f->GetName()); } + + for ( auto& ea : pfs->ExprAttrs() ) + if ( obj_matches_opt_files(ea.first) ) { + auto& attr = ea.first; + attrs.insert(attr); + auto& t = attr->GetExpr()->GetType(); + rep_types.insert(TypeRep(t)); + } } - // Generate a hash unique for this compilation. - for ( const auto& func : funcs ) - if ( ! func.ShouldSkip() ) - total_hash = merge_p_hashes(total_hash, func.Profile()->HashVal()); - - if ( standalone && had_to_skip ) - reporter->FatalError("aborting standalone compilation to C++ due to having to skip some functions"); - auto t = util::current_time(); total_hash = merge_p_hashes(total_hash, hash{}(t)); @@ -134,10 +97,15 @@ void CPPCompile::Compile(bool report_uncompilable) { for ( const auto& t : rep_types ) { ASSERT(types.HasKey(t)); - TypePtr tp{NewRef{}, (Type*)(t)}; + TypePtr tp{NewRef{}, const_cast(t)}; RegisterType(tp); } + for ( const auto& attr : attrs ) { + AttrPtr attr_p = {NewRef{}, const_cast(attr)}; + (void)RegisterAttr(attr_p); + } + // The scaffolding is now in place to go ahead and generate // the functions & lambdas. First declare them ... for ( const auto& func : funcs ) @@ -187,9 +155,92 @@ void CPPCompile::Compile(bool report_uncompilable) { Emit("};"); + if ( standalone ) + // Now that we've identified all of the record fields we might have + // to generate, make sure we track their attributes. + for ( const auto& fd : field_decls ) { + auto td = fd.second; + if ( obj_matches_opt_files(td->type) ) { + TypePtr tp = {NewRef{}, const_cast(TypeRep(td->type))}; + RegisterType(tp); + } + if ( obj_matches_opt_files(td->attrs) ) + RegisterAttributes(td->attrs); + } + GenEpilog(); } +bool CPPCompile::AnalyzeFuncBody(FuncInfo& fi, unordered_set& filenames_reported_as_skipped, + unordered_set& rep_types, bool report_uncompilable) { + const auto& f = fi.Func(); + auto& body = fi.Body(); + + string fn = body->GetLocationInfo()->filename; + + if ( ! analysis_options.allow_cond && ! fi.ShouldSkip() ) { + if ( ! analysis_options.only_files.empty() && files_with_conditionals.count(fn) > 0 ) { + if ( report_uncompilable ) + reporter->Warning("%s cannot be compiled to C++ due to source file %s having conditional code", + f->GetName().c_str(), fn.c_str()); + + else if ( filenames_reported_as_skipped.count(fn) == 0 ) { + reporter->Warning("skipping compilation of files in %s due to presence of conditional code", + fn.c_str()); + filenames_reported_as_skipped.insert(fn); + } + + fi.SetSkip(true); + } + } + + if ( fi.ShouldSkip() ) { + not_fully_compilable.insert(f->GetName()); + return true; + } + + auto pf = fi.Profile(); + total_hash = merge_p_hashes(total_hash, pf->HashVal()); + + for ( auto t : pf->UnorderedTypes() ) + rep_types.insert(pfs->TypeRep(t)); + + auto& pf_all_gl = pf->AllGlobals(); + all_accessed_globals.insert(pf_all_gl.begin(), pf_all_gl.end()); + + auto& pf_gl = pf->Globals(); + accessed_globals.insert(pf_gl.begin(), pf_gl.end()); + + auto& pf_events = pf->Events(); + accessed_events.insert(pf_events.begin(), pf_events.end()); + + auto& pf_lambdas = pf->Lambdas(); + accessed_lambdas.insert(pf_lambdas.begin(), pf_lambdas.end()); + + if ( is_lambda(f) || is_when_lambda(f) ) { + // We deal with these separately. + fi.SetSkip(true); + return true; + } + + const char* reason; + if ( IsCompilable(fi, &reason) ) { + if ( f->Flavor() == FUNC_FLAVOR_FUNCTION ) + // Note this as a callable compiled function. + compilable_funcs.insert(BodyName(fi)); + } + else { + if ( reason && (standalone || report_uncompilable) ) { + reporter->Warning("%s cannot be compiled to C++ due to %s", f->GetName().c_str(), reason); + } + + not_fully_compilable.insert(f->GetName()); + return false; + } + + return true; +} + void CPPCompile::GenProlog() { Emit("#include \"zeek/script_opt/CPP/Runtime.h\"\n"); diff --git a/src/script_opt/CPP/Driver.h b/src/script_opt/CPP/Driver.h index f4f8e123d8..6b17267aa8 100644 --- a/src/script_opt/CPP/Driver.h +++ b/src/script_opt/CPP/Driver.h @@ -7,6 +7,14 @@ // Main driver, invoked by constructor. void Compile(bool report_uncompilable); +// For a given function body, assess its compilability and track its elements. +// Returns true if the body was analyzed, false if it was skipped. If skipped +// then either generates a warning (if report_uncompilable is true) or +// updates filenames_reported_as_skipped. Updates rep_types with the type +// representatives seen in the function. +bool AnalyzeFuncBody(FuncInfo& fi, std::unordered_set& filenames_reported_as_skipped, + std::unordered_set& rep_types, bool report_uncompilable); + // Generate the beginning of the compiled code: run-time functions, // namespace, auxiliary globals. void GenProlog(); diff --git a/src/script_opt/CPP/Exprs.cc b/src/script_opt/CPP/Exprs.cc index ddef91b5a8..5a39d4d850 100644 --- a/src/script_opt/CPP/Exprs.cc +++ b/src/script_opt/CPP/Exprs.cc @@ -1264,7 +1264,8 @@ string CPPCompile::GenEnum(const TypePtr& t, const ValPtr& ev) { mapping_slot = num_ev_mappings++; string enum_name = et->Lookup(v); - enum_names.emplace_back(TypeOffset(t), std::move(enum_name)); + bool create_if_missing = standalone && obj_matches_opt_files(ev); + enum_names.emplace_back(EnumMappingInfo{TypeOffset(t), std::move(enum_name), create_if_missing}); if ( evm != enum_val_mappings.end() ) { // We're already tracking this enum. diff --git a/src/script_opt/CPP/Exprs.h b/src/script_opt/CPP/Exprs.h index 37ce2946ec..c5830e8ff3 100644 --- a/src/script_opt/CPP/Exprs.h +++ b/src/script_opt/CPP/Exprs.h @@ -142,6 +142,13 @@ std::unordered_map> enum_val_mappi // outer map). int num_ev_mappings = 0; +// Information captured for generating entries in "enum_mapping". +struct EnumMappingInfo { + int enum_type; // as a global offset + std::string enum_name; + bool create_if_missing; +}; + // For each entry in "enum_mapping", the EnumType (as a global offset) and // name associated with the mapping. -std::vector> enum_names; +std::vector enum_names; diff --git a/src/script_opt/CPP/Inits.cc b/src/script_opt/CPP/Inits.cc index 54b40b161c..130bb5d49e 100644 --- a/src/script_opt/CPP/Inits.cc +++ b/src/script_opt/CPP/Inits.cc @@ -104,17 +104,21 @@ void CPPCompile::InitializeFieldMappings() { StartBlock(); - string type_arg, attrs_arg; - if ( ! standalone ) - type_arg = attrs_arg = "DO_NOT_CONSTRUCT_VALUE_MARKER"; - for ( const auto& mapping : field_decls ) { auto rt_arg = Fmt(mapping.first); auto td = mapping.second; + string type_arg = "DO_NOT_CONSTRUCT_VALUE_MARKER"; + string attrs_arg = "DO_NOT_CONSTRUCT_VALUE_MARKER"; + if ( standalone ) { - type_arg = Fmt(TypeOffset(td->type)); - attrs_arg = Fmt(AttributesOffset(td->attrs)); + // We can assess whether this field is one we need to generate + // because if it is, it will have an &optional attribute that + // is local to one of the cmopiled source files. + if ( td->attrs && obj_matches_opt_files(td->attrs) ) { + type_arg = Fmt(TypeOffset(td->type)); + attrs_arg = Fmt(AttributesOffset(td->attrs)); + } } Emit("CPP_FieldMapping(%s, \"%s\", %s, %s),", rt_arg, td->id, type_arg, attrs_arg); @@ -128,10 +132,11 @@ void CPPCompile::InitializeEnumMappings() { StartBlock(); - auto create_if_missing = standalone ? "true" : "false"; - - for ( const auto& mapping : enum_names ) - Emit("CPP_EnumMapping(%s, \"%s\", %s),", Fmt(mapping.first), mapping.second, create_if_missing); + for ( const auto& en : enum_names ) { + auto create_if_missing = en.create_if_missing ? "true" : "false"; + string init_args = Fmt(en.enum_type) + ", \"" + en.enum_name + "\", " + create_if_missing; + Emit("CPP_EnumMapping(%s),", init_args); + } EndBlock(true); } diff --git a/src/script_opt/CPP/InitsInfo.cc b/src/script_opt/CPP/InitsInfo.cc index ba8e3cd674..6d1dcdc438 100644 --- a/src/script_opt/CPP/InitsInfo.cc +++ b/src/script_opt/CPP/InitsInfo.cc @@ -384,8 +384,9 @@ GlobalInitInfo::GlobalInitInfo(CPPCompile* c, const ID* g, string _CPP_name) val = ValElem(c, nullptr); // empty because we initialize dynamically if ( gt->Tag() == TYPE_FUNC && (! g->GetVal() || g->GetVal()->AsFunc()->GetKind() == Func::BUILTIN_FUNC) ) - // Remember this peculiarity so we can recreate it for - // error-behavior-compatibility. + // Be sure not to try to create BiFs. In addition, GetVal() can be + // nil in certain error situations, which we'll want to recreate + // for behavior compatibility. func_with_no_val = true; } @@ -557,7 +558,7 @@ RecordTypeInfo::RecordTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c field_types.push_back(r_i->type); - if ( c->TargetingStandalone() && r_i->attrs ) { + if ( r_i->attrs && c->TargetingStandalone() && obj_matches_opt_files(r_i->attrs) ) { gi = c->RegisterAttributes(r_i->attrs); final_init_cohort = max(final_init_cohort, gi->InitCohort() + 1); field_attrs.push_back(gi->Offset()); diff --git a/src/script_opt/CPP/InitsInfo.h b/src/script_opt/CPP/InitsInfo.h index 3432871d67..3d6448ef01 100644 --- a/src/script_opt/CPP/InitsInfo.h +++ b/src/script_opt/CPP/InitsInfo.h @@ -501,8 +501,6 @@ public: void InitializerVals(std::vector& ivs) const override; protected: - std::string Zeek_name; - std::string CPP_name; int type; int attrs; std::string val; diff --git a/src/script_opt/CPP/RuntimeInits.h b/src/script_opt/CPP/RuntimeInits.h index ddd8cf578e..a37acaa720 100644 --- a/src/script_opt/CPP/RuntimeInits.h +++ b/src/script_opt/CPP/RuntimeInits.h @@ -38,16 +38,18 @@ extern std::vector>> generate_indices_set(int* init // These need to be distinct from any values that can appear, which means // they should be negative, and not -1, which is used as a "N/A" value. -#define END_OF_VEC_VEC -100 -#define END_OF_VEC_VEC_VEC -200 +// clang-format off +constexpr int END_OF_VEC_VEC = -100; +constexpr int END_OF_VEC_VEC_VEC = -200; // A marker value for "named" types (those that are simply looked up by // name at initialization time). -#define NAMED_TYPE_MARKER -300 +constexpr int NAMED_TYPE_MARKER = -300; // A marker value indicating values that should not be constructed if not // already present. -#define DO_NOT_CONSTRUCT_VALUE_MARKER -400 +constexpr int DO_NOT_CONSTRUCT_VALUE_MARKER = -400; +// clang-format on // An abstract helper class used to access elements of an initialization vector. // We need the abstraction because InitsManager below needs to be able to refer diff --git a/src/script_opt/CPP/Types.cc b/src/script_opt/CPP/Types.cc index 59aab07a04..37215c0d62 100644 --- a/src/script_opt/CPP/Types.cc +++ b/src/script_opt/CPP/Types.cc @@ -191,7 +191,7 @@ shared_ptr CPPCompile::RegisterType(const TypePtr& tp) { shared_ptr gi; - if ( standalone || t->GetName().empty() ) { + if ( (standalone && obj_matches_opt_files(tp)) || t->GetName().empty() ) { switch ( t->Tag() ) { case TYPE_ADDR: case TYPE_ANY: diff --git a/src/script_opt/CPP/maint/README b/src/script_opt/CPP/maint/README index 0913151dbd..4317640219 100644 --- a/src/script_opt/CPP/maint/README +++ b/src/script_opt/CPP/maint/README @@ -17,12 +17,14 @@ The maintenance workflow: ninja src/zeek -O use-C++ -r some.pcap - and that it can compile them standalone: + and that standalone compilation works: rm CPP-gen.cc ninja - src/zeek -O gen-standalone-C++ /dev/null + src/zeek -b -O gen-standalone-C++ --optimize-files=base/protocols/conn base/protocols/conn >my-test.zeek ninja + src/zeek -b -r some.pcap my-test.zeek + # Confirm that it generates conn.log rm CPP-gen.cc ninja diff --git a/src/script_opt/ProfileFunc.cc b/src/script_opt/ProfileFunc.cc index dbccc7f778..64e85480d7 100644 --- a/src/script_opt/ProfileFunc.cc +++ b/src/script_opt/ProfileFunc.cc @@ -534,6 +534,7 @@ void ProfileFunc::TrackID(const ID* id) { if ( id->IsGlobal() ) { globals.insert(id); all_globals.insert(id); + TrackType(id->GetType()); } ordered_ids.push_back(id); diff --git a/src/script_opt/ProfileFunc.h b/src/script_opt/ProfileFunc.h index c8bcd71d9a..feb78ce786 100644 --- a/src/script_opt/ProfileFunc.h +++ b/src/script_opt/ProfileFunc.h @@ -379,6 +379,7 @@ public: const IDSet& BiFGlobals() const { return BiF_globals; } const std::unordered_set& Lambdas() const { return lambdas; } const std::unordered_set& Events() const { return events; } + const auto& ExprAttrs() const { return expr_attrs; } const auto& FuncProfs() const { return func_profs; } diff --git a/src/script_opt/ScriptOpt.cc b/src/script_opt/ScriptOpt.cc index b69a61b6fc..2175570085 100644 --- a/src/script_opt/ScriptOpt.cc +++ b/src/script_opt/ScriptOpt.cc @@ -58,6 +58,10 @@ bool is_lambda(const ScriptFunc* f) { return lambdas.count(f) > 0; } bool is_when_lambda(const ScriptFunc* f) { return when_lambdas.count(f) > 0; } void analyze_global_stmts(Stmt* stmts) { + if ( analysis_options.gen_standalone_CPP && obj_matches_opt_files(stmts) ) + reporter->FatalError("cannot include global statements with -O gen-standalone-C++: %s", + obj_desc(stmts).c_str()); + // We ignore analysis_options.only_{files,funcs} - if they're in use, later // logic will keep this function from being compiled, but it's handy // now to enter it into "funcs" so we have a FuncInfo to return. @@ -571,6 +575,10 @@ void clear_script_analysis() { for ( auto& id : f.Scope()->OrderedVars() ) id->ClearOptInfo(); + // Clear out optimization info for global variables, too. + for ( auto& g : global_scope()->OrderedVars() ) + g->ClearOptInfo(); + // Keep the functions around if we're profiling, so we can loop // over them to generate the profiles. if ( ! analysis_options.profile_ZAM ) @@ -640,22 +648,16 @@ void analyze_scripts(bool no_unused_warnings) { if ( analysis_options.use_CPP ) use_CPP(); - std::shared_ptr pfs; - // Note, in the following it's not clear whether the final argument - // for absolute/relative record fields matters any more ... - if ( generating_CPP ) - pfs = std::make_shared(funcs, is_CPP_compilable, true, false); - else - pfs = std::make_shared(funcs, nullptr, true, true); - if ( generating_CPP ) { if ( analysis_options.gen_ZAM ) reporter->FatalError("-O ZAM and -O gen-C++ conflict"); + auto pfs = std::make_shared(funcs, is_CPP_compilable, true, false); generate_CPP(pfs); exit(0); } + auto pfs = std::make_shared(funcs, nullptr, true, true); analyze_scripts_for_ZAM(pfs); if ( reporter->Errors() > 0 ) diff --git a/src/script_opt/ScriptOpt.h b/src/script_opt/ScriptOpt.h index 4890987aec..028a93ea54 100644 --- a/src/script_opt/ScriptOpt.h +++ b/src/script_opt/ScriptOpt.h @@ -18,6 +18,7 @@ struct Options; namespace zeek::detail { +using ObjPtr = IntrusivePtr; using TypeSet = std::unordered_set; // Flags controlling what sorts of analysis to do. @@ -256,6 +257,7 @@ extern bool should_analyze(const ScriptFuncPtr& f, const StmtPtr& body); // True if the given object's location matches one specified by // --optimize-files=... extern bool obj_matches_opt_files(const Obj* obj); +inline bool obj_matches_opt_files(const ObjPtr& obj) { return obj_matches_opt_files(obj.get()); } // Analyze all of the parsed scripts collectively for usage issues (unless // suppressed by the flag) and optimization. diff --git a/src/zeek-setup.cc b/src/zeek-setup.cc index e4d8c8e95c..d61dabb6ad 100644 --- a/src/zeek-setup.cc +++ b/src/zeek-setup.cc @@ -967,6 +967,10 @@ SetupResult setup(int argc, char** argv, Options* zopts) { exit(reporter->Errors() != 0); } + if ( stmts && (stmts->Tag() == STMT_NULL || (stmts->Tag() == STMT_LIST && stmts->AsStmtList()->Stmts().empty())) ) + // There are no actual global statements. + stmts = nullptr; + if ( stmts ) analyze_global_stmts(stmts);