diff --git a/CHANGES b/CHANGES index 7ec8fba65c..7aeaa91581 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,13 @@ +5.2.0-dev.46 | 2022-10-03 09:45:05 -0700 + + * use dynamic rather than static initialization of globals for scripts compiled to C++ (Vern Paxson, Corelight) + + * safety checking for initializing scripts compiled to C++ (Vern Paxson, Corelight) + + * fixes for initializing scripts compiled to C++ (Vern Paxson, Corelight) + + * restructured tracking of initializations of globals for script compilation (Vern Paxson, Corelight) + 5.2.0-dev.40 | 2022-10-03 09:44:40 -0700 * http: Prevent script errors when http$current_entity is not set (Arne Welzel, Corelight) diff --git a/VERSION b/VERSION index c864c8b7d9..919c853191 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -5.2.0-dev.40 +5.2.0-dev.46 diff --git a/src/ID.cc b/src/ID.cc index acd4597e80..2df52b2520 100644 --- a/src/ID.cc +++ b/src/ID.cc @@ -680,11 +680,6 @@ std::vector ID::GetOptionHandlers() const return v; } -void IDOptInfo::AddInitExpr(ExprPtr init_expr) - { - init_exprs.emplace_back(std::move(init_expr)); - } - } // namespace detail } // namespace zeek diff --git a/src/Var.cc b/src/Var.cc index 9a3b9bd9ac..c4e13c7d74 100644 --- a/src/Var.cc +++ b/src/Var.cc @@ -17,6 +17,7 @@ #include "zeek/Traverse.h" #include "zeek/Val.h" #include "zeek/module_util.h" +#include "zeek/script_opt/IDOptInfo.h" #include "zeek/script_opt/StmtOptInfo.h" #include "zeek/script_opt/UsageAnalyzer.h" @@ -117,12 +118,12 @@ static bool add_prototype(const IDPtr& id, Type* t, std::vector* attrs, return true; } -static void initialize_var(const IDPtr& id, InitClass c, ExprPtr init) +static ExprPtr initialize_var(const IDPtr& id, InitClass c, ExprPtr init) { if ( ! id->HasVal() ) { if ( c == INIT_REMOVE ) - return; + return nullptr; bool no_init = ! init; @@ -134,7 +135,7 @@ static void initialize_var(const IDPtr& id, InitClass c, ExprPtr init) auto& t = id->GetType(); if ( ! IsAggr(t) ) - return; + return nullptr; ValPtr init_val; @@ -147,7 +148,7 @@ static void initialize_var(const IDPtr& id, InitClass c, ExprPtr init) catch ( InterpreterException& ) { id->Error("initialization failed"); - return; + return nullptr; } } @@ -157,11 +158,11 @@ static void initialize_var(const IDPtr& id, InitClass c, ExprPtr init) else if ( t->Tag() == TYPE_VECTOR ) init_val = make_intrusive(cast_intrusive(t)); - id->SetVal(init_val); - return; + init = make_intrusive(init_val); + c = INIT_FULL; } - if ( c == INIT_EXTRA ) + else if ( c == INIT_EXTRA ) c = INIT_FULL; } @@ -177,19 +178,12 @@ static void initialize_var(const IDPtr& id, InitClass c, ExprPtr init) assignment = make_intrusive(lhs, init); else // This can happen due to error propagation. - return; + return nullptr; if ( assignment->IsError() ) - return; + return nullptr; - try - { - (void)assignment->Eval(nullptr); - } - catch ( InterpreterException& ) - { - id->Error("initialization failed"); - } + return assignment; } static void make_var(const IDPtr& id, TypePtr t, InitClass c, ExprPtr init, @@ -347,11 +341,29 @@ static void make_var(const IDPtr& id, TypePtr t, InitClass c, ExprPtr init, if ( init && ((c == INIT_EXTRA && id->GetAttr(ATTR_ADD_FUNC)) || (c == INIT_REMOVE && id->GetAttr(ATTR_DEL_FUNC))) ) + { // Just apply the function. id->SetVal(init, c); + id->GetOptInfo()->AddInitExpr(init, c); + } else if ( dt != VAR_REDEF || init || ! attr ) - initialize_var(id, c, init); + { + auto init_expr = initialize_var(id, c, init); + if ( init_expr ) + { + id->GetOptInfo()->AddInitExpr(init_expr); + + try + { + (void)init_expr->Eval(nullptr); + } + catch ( InterpreterException& ) + { + id->Error("initialization failed"); + } + } + } } if ( dt == VAR_CONST ) diff --git a/src/parse.y b/src/parse.y index 3e98d60944..5735c7beaf 100644 --- a/src/parse.y +++ b/src/parse.y @@ -102,7 +102,6 @@ #include "zeek/zeekygen/Manager.h" #include "zeek/module_util.h" #include "zeek/IntrusivePtr.h" -#include "zeek/script_opt/IDOptInfo.h" extern const char* filename; // Absolute path of file currently being parsed. extern const char* last_filename; // Absolute path of last file parsed. @@ -321,8 +320,6 @@ static void build_global(ID* id, Type* t, InitClass ic, Expr* e, add_global(id_ptr, std::move(t_ptr), ic, e_ptr, std::move(attrs_ptr), dt); - id->GetOptInfo()->AddInitExpr(e_ptr); - if ( dt == VAR_REDEF ) zeekygen_mgr->Redef(id, ::filename, ic, std::move(e_ptr)); else @@ -342,8 +339,6 @@ static StmtPtr build_local(ID* id, Type* t, InitClass ic, Expr* e, auto init = add_local(std::move(id_ptr), std::move(t_ptr), ic, e_ptr, std::move(attrs_ptr), dt); - id->GetOptInfo()->AddInitExpr(std::move(e_ptr)); - if ( do_coverage ) script_coverage_mgr.AddStmt(init.get()); diff --git a/src/script_opt/CPP/Attrs.cc b/src/script_opt/CPP/Attrs.cc index 437270a7d4..c4ba3345b6 100644 --- a/src/script_opt/CPP/Attrs.cc +++ b/src/script_opt/CPP/Attrs.cc @@ -18,7 +18,7 @@ shared_ptr CPPCompile::RegisterAttributes(const AttributesPtr& att if ( pa != processed_attrs.end() ) return pa->second; - attributes.AddKey(attrs); + attributes.AddKey(attrs, pfs.HashAttrs(attrs)); // The cast is just so we can make an IntrusivePtr. auto a_rep = const_cast(attributes.GetRep(attrs)); @@ -49,7 +49,7 @@ shared_ptr CPPCompile::RegisterAttr(const AttrPtr& attr) const auto& e = a->GetExpr(); if ( e && ! IsSimpleInitExpr(e) ) - init_exprs.AddKey(e); + init_exprs.AddKey(e, p_hash(e)); auto gi = make_shared(this, attr); attr_info->AddInstance(gi); diff --git a/src/script_opt/CPP/Compile.h b/src/script_opt/CPP/Compile.h index bb75a5fba3..afdb636531 100644 --- a/src/script_opt/CPP/Compile.h +++ b/src/script_opt/CPP/Compile.h @@ -1015,6 +1015,10 @@ private: // Generate code to initialize indirect references to constants. void InitializeConsts(); + // Generate code to initialize globals (using dynamic statements + // rather than constants). + void InitializeGlobals(); + // Generate the initialization hook for this set of compiled code. void GenInitHook(); diff --git a/src/script_opt/CPP/Driver.cc b/src/script_opt/CPP/Driver.cc index c470617d8b..aae5ffe321 100644 --- a/src/script_opt/CPP/Driver.cc +++ b/src/script_opt/CPP/Driver.cc @@ -5,6 +5,7 @@ #include #include "zeek/script_opt/CPP/Compile.h" +#include "zeek/script_opt/IDOptInfo.h" extern std::unordered_set files_with_conditionals; @@ -310,6 +311,9 @@ void CPPCompile::RegisterCompiledBody(const string& f) void CPPCompile::GenEpilog() { + NL(); + InitializeGlobals(); + NL(); for ( const auto& ii : init_infos ) GenInitExpr(ii.second); @@ -468,6 +472,9 @@ void CPPCompile::GenFinishInit() NL(); Emit("load_BiFs__CPP();"); + NL(); + Emit("init_globals__CPP();"); + EndBlock(); } diff --git a/src/script_opt/CPP/Inits.cc b/src/script_opt/CPP/Inits.cc index 376fc93ea3..171350b83a 100644 --- a/src/script_opt/CPP/Inits.cc +++ b/src/script_opt/CPP/Inits.cc @@ -188,6 +188,50 @@ void CPPCompile::InitializeConsts() EndBlock(true); } +void CPPCompile::InitializeGlobals() + { + Emit("static void init_globals__CPP()"); + StartBlock(); + + Emit("Frame* f__CPP = nullptr;"); + NL(); + + for ( auto ginit : IDOptInfo::GetGlobalInitExprs() ) + { + auto g = ginit.Id(); + if ( pfs.Globals().count(g) == 0 ) + continue; + + auto ic = ginit.IC(); + auto& init = ginit.Init(); + + if ( ic == INIT_NONE ) + { + IDPtr gid = {NewRef{}, const_cast(g)}; + auto gn = make_intrusive(make_intrusive(gid)); + auto ae = make_intrusive(gn, init, true); + Emit(GenExpr(ae.get(), GEN_NATIVE, true) + ";"); + } + + else + { + // This branch occurs for += or -= initializations that + // use associated functions. + string ics; + if ( ic == INIT_EXTRA ) + ics = "INIT_EXTRA"; + else if ( ic == INIT_REMOVE ) + ics = "INIT_REMOVE"; + else + reporter->FatalError("bad initialization class in CPPCompile::InitializeGlobals()"); + + Emit("%s->SetValue(%s, %s);", globals[g->Name()], GenExpr(init, GEN_NATIVE, true), ics); + } + } + + EndBlock(); + } + void CPPCompile::GenInitHook() { NL(); diff --git a/src/script_opt/CPP/InitsInfo.cc b/src/script_opt/CPP/InitsInfo.cc index ba423e10fc..a4cca821f0 100644 --- a/src/script_opt/CPP/InitsInfo.cc +++ b/src/script_opt/CPP/InitsInfo.cc @@ -339,16 +339,7 @@ GlobalInitInfo::GlobalInitInfo(CPPCompile* c, const ID* g, string _CPP_name) attrs = -1; exported = g->IsExport(); - - auto v = g->GetVal(); - if ( v && gt->Tag() == TYPE_OPAQUE ) - { - reporter->Error("cannot compile to C++ global \"%s\" initialized to opaque value", - g->Name()); - v = nullptr; - } - - val = ValElem(c, v); + val = ValElem(c, nullptr); // empty because we initialize dynamically } void GlobalInitInfo::InitializerVals(std::vector& ivs) const @@ -439,6 +430,9 @@ ListTypeInfo::ListTypeInfo(CPPCompile* _c, TypePtr _t) if ( gi ) init_cohort = max(init_cohort, gi->InitCohort()); } + + if ( ! types.empty() ) + ++init_cohort; } void ListTypeInfo::AddInitializerVals(std::vector& ivs) const diff --git a/src/script_opt/CPP/README.md b/src/script_opt/CPP/README.md index f34dbd079e..9f8d8645cd 100644 --- a/src/script_opt/CPP/README.md +++ b/src/script_opt/CPP/README.md @@ -173,9 +173,6 @@ as currently done, to instead be in a pseudo-event handler. code requires initializing a global variable that specifies extend fields in an extensible record (i.e., fields added using `redef`). -* The compiler will not compile bodies that include "when" statements -This is fairly involved to fix. - * If a lambda generates an event that is not otherwise referred to, that event will not be registered upon instantiating the lambda. This is not particularly difficult to fix. diff --git a/src/script_opt/CPP/RuntimeInits.cc b/src/script_opt/CPP/RuntimeInits.cc index cebfccd373..dc06c12d3e 100644 --- a/src/script_opt/CPP/RuntimeInits.cc +++ b/src/script_opt/CPP/RuntimeInits.cc @@ -487,11 +487,10 @@ void CPP_GlobalInit::Generate(InitsManager* im, std::vector& /* inits_vec global = lookup_global__CPP(name, im->Types(type), exported); if ( ! global->HasVal() && val >= 0 ) - { global->SetVal(im->ConstVals(val)); - if ( attrs >= 0 ) - global->SetAttrs(im->Attributes(attrs)); - } + + if ( attrs >= 0 ) + global->SetAttrs(im->Attributes(attrs)); } void generate_indices_set(int* inits, std::vector>& indices_set) diff --git a/src/script_opt/CPP/RuntimeInits.h b/src/script_opt/CPP/RuntimeInits.h index 031208a1ce..7fd217d922 100644 --- a/src/script_opt/CPP/RuntimeInits.h +++ b/src/script_opt/CPP/RuntimeInits.h @@ -66,12 +66,41 @@ public: // Accessors for the sundry initialization vectors, each retrieving // a specific element identified by an index/offset. const std::vector& Indices(int offset) const { return indices[offset]; } - const char* Strings(int offset) const { return strings[offset]; } - const p_hash_type Hashes(int offset) const { return hashes[offset]; } - const TypePtr& Types(int offset) const { return types[offset]; } - const AttributesPtr& Attributes(int offset) const { return attributes[offset]; } - const AttrPtr& Attrs(int offset) const { return attrs[offset]; } - const CallExprPtr& CallExprs(int offset) const { return call_exprs[offset]; } + const char* Strings(int offset) const + { + ASSERT(offset >= 0 && offset < strings.size()); + ASSERT(strings[offset]); + return strings[offset]; + } + const p_hash_type Hashes(int offset) const + { + ASSERT(offset >= 0 && offset < hashes.size()); + return hashes[offset]; + } + const TypePtr& Types(int offset) const + { + ASSERT(offset >= 0 && offset < types.size()); + ASSERT(types[offset]); + return types[offset]; + } + const AttributesPtr& Attributes(int offset) const + { + ASSERT(offset >= 0 && offset < attributes.size()); + ASSERT(attributes[offset]); + return attributes[offset]; + } + const AttrPtr& Attrs(int offset) const + { + ASSERT(offset >= 0 && offset < attrs.size()); + ASSERT(attrs[offset]); + return attrs[offset]; + } + const CallExprPtr& CallExprs(int offset) const + { + ASSERT(offset >= 0 && offset < call_exprs.size()); + ASSERT(call_exprs[offset]); + return call_exprs[offset]; + } private: std::vector& const_vals; diff --git a/src/script_opt/CPP/maint/README b/src/script_opt/CPP/maint/README index f72aa90a18..e3ec4577fa 100644 --- a/src/script_opt/CPP/maint/README +++ b/src/script_opt/CPP/maint/README @@ -15,7 +15,7 @@ The maintenance workflow: to check in updates to the list of how the compiler currently fares on various btests (see end of this doc): - Fri Sep 16 16:13:49 PDT 2022 + Thu Sep 29 14:49:49 PDT 2022 2. Run "find-test-files.sh" to generate a list (to stdout) of all of the possible Zeek source files found in the test suite. @@ -74,18 +74,11 @@ These BTests won't successfully run due to the indicated issue: Database Of Known Issues (keep sorted) ../testing/btest/bifs/table_values.zeek bad-constructor -../testing/btest/core/global_opaque_val.zeek opaque ../testing/btest/language/alternate-event-hook-prototypes.zeek deprecated -../testing/btest/language/global-init-calls-bif.zeek opaque ../testing/btest/language/redef-same-prefixtable-idx.zeek deprecated ../testing/btest/language/table-redef.zeek deprecated ../testing/btest/language/when-aggregates.zeek bad-when ../testing/btest/scripts/base/protocols/krb/smb2_krb.test skipped ../testing/btest/scripts/base/protocols/krb/smb2_krb_nokeytab.test skipped ../testing/btest/scripts/base/utils/active-http.test test-glitch -../testing/btest/scripts/policy/frameworks/telemetry/log-prefixes.zeek opaque -../testing/btest/scripts/policy/frameworks/telemetry/log.zeek opaque ../testing/btest/scripts/policy/misc/dump-events.zeek skipped -../testing/btest/telemetry/counter.zeek opaque -../testing/btest/telemetry/gauge.zeek opaque -../testing/btest/telemetry/histogram.zeek opaque diff --git a/src/script_opt/IDOptInfo.cc b/src/script_opt/IDOptInfo.cc index 69718683ab..bf100cf784 100644 --- a/src/script_opt/IDOptInfo.cc +++ b/src/script_opt/IDOptInfo.cc @@ -51,6 +51,8 @@ void IDDefRegion::Dump() const printf("\n"); } +std::vector IDOptInfo::global_init_exprs; + void IDOptInfo::Clear() { static bool did_init = false; @@ -69,6 +71,17 @@ void IDOptInfo::Clear() tracing = trace_ID && util::streq(trace_ID, my_id->Name()); } +void IDOptInfo::AddInitExpr(ExprPtr init_expr, InitClass ic) + { + if ( ! init_expr ) + return; + + if ( my_id->IsGlobal() ) + global_init_exprs.emplace_back(IDInitInfo(my_id, init_expr, ic)); + + init_exprs.emplace_back(std::move(init_expr)); + } + void IDOptInfo::DefinedAfter(const Stmt* s, const ExprPtr& e, const std::vector& conf_blocks, zeek_uint_t conf_start) { diff --git a/src/script_opt/IDOptInfo.h b/src/script_opt/IDOptInfo.h index 6bf8718a68..a55f5791a8 100644 --- a/src/script_opt/IDOptInfo.h +++ b/src/script_opt/IDOptInfo.h @@ -105,6 +105,24 @@ protected: ExprPtr def_expr; }; +// Class tracking information associated with a (global) identifier's +// (re-)initialization. + +class IDInitInfo + { +public: + IDInitInfo(const ID* _id, ExprPtr _init, InitClass _ic) : id(_id), init(_init), ic(_ic) { } + + const ID* Id() const { return id; } + const ExprPtr& Init() const { return init; } + InitClass IC() const { return ic; } + +private: + const ID* id; + ExprPtr init; + InitClass ic; + }; + // Class tracking optimization information associated with identifiers. class IDOptInfo @@ -118,11 +136,19 @@ public: void Clear(); // Used to track expressions employed when explicitly initializing - // the identifier. These are needed by compile-to-C++ script - // optimization. They're not used by ZAM optimization. - void AddInitExpr(ExprPtr init_expr); + // the (global) identifier. These are needed by compile-to-C++ script + // optimization, and for tracking variable usage. An initialization + // class other than INIT_NONE indicates that initialization should + // be done with the ExprPtr form of ID::SetVal. + void AddInitExpr(ExprPtr init_expr, InitClass ic = INIT_NONE); + + // Returns the initialization expressions for this identifier. const std::vector& GetInitExprs() const { return init_exprs; } + // Returns a list of the initialization expressions seen for all + // globals, ordered by when they were processed. + static auto& GetGlobalInitExprs() { return global_init_exprs; } + // Associated constant expression, if any. This is only set // for identifiers that are aliases for a constant (i.e., there // are no other assignments to them). @@ -224,6 +250,9 @@ private: // one of the earlier instances rather than the last one. std::vector init_exprs; + // Tracks initializations of globals in the order they're seen. + static std::vector global_init_exprs; + // If non-nil, a constant that this identifier always holds // once initially defined. const ConstExpr* const_expr = nullptr; @@ -256,8 +285,12 @@ private: // Whether the identifier is a temporary variable. bool is_temp = false; - // Only needed for debugging purposes. + // Associated identifier, to enable tracking of initialization + // expressions for globals (for C++ compilation), and for debugging + // output. const ID* my_id; + + // Only needed for debugging purposes. bool tracing = false; // Track whether we've already generated usage errors. diff --git a/src/script_opt/ProfileFunc.cc b/src/script_opt/ProfileFunc.cc index a6057eb4e3..74f2ab28c8 100644 --- a/src/script_opt/ProfileFunc.cc +++ b/src/script_opt/ProfileFunc.cc @@ -917,7 +917,10 @@ p_hash_type ProfileFuncs::HashAttrs(const AttributesPtr& Attrs) // can vary in structure due to compilation of elements. We // do though enforce consistency for their types. if ( e ) + { h = merge_p_hashes(h, HashType(e->GetType())); + h = merge_p_hashes(h, p_hash(e.get())); + } } return h;