Merge remote-tracking branch 'origin/topic/vern/CPP-global-init'

* origin/topic/vern/CPP-global-init:
  updates to -O C++ user and maintenance documentation
  use dynamic rather than static initialization of globals for scripts compiled to C++
  safety checking for initializing scripts compiled to C++
  fixes for initializing scripts compiled to C++
  restructured tracking of initializations of globals for script compilation
This commit is contained in:
Tim Wojtulewicz 2022-10-03 09:45:05 -07:00
commit bf98c1e9c8
17 changed files with 194 additions and 66 deletions

10
CHANGES
View file

@ -1,3 +1,13 @@
5.2.0-dev.46 | 2022-10-03 09:45:05 -0700
* use dynamic rather than static initialization of globals for scripts compiled to C++ (Vern Paxson, Corelight)
* safety checking for initializing scripts compiled to C++ (Vern Paxson, Corelight)
* fixes for initializing scripts compiled to C++ (Vern Paxson, Corelight)
* restructured tracking of initializations of globals for script compilation (Vern Paxson, Corelight)
5.2.0-dev.40 | 2022-10-03 09:44:40 -0700 5.2.0-dev.40 | 2022-10-03 09:44:40 -0700
* http: Prevent script errors when http$current_entity is not set (Arne Welzel, Corelight) * http: Prevent script errors when http$current_entity is not set (Arne Welzel, Corelight)

View file

@ -1 +1 @@
5.2.0-dev.40 5.2.0-dev.46

View file

@ -680,11 +680,6 @@ std::vector<Func*> ID::GetOptionHandlers() const
return v; return v;
} }
void IDOptInfo::AddInitExpr(ExprPtr init_expr)
{
init_exprs.emplace_back(std::move(init_expr));
}
} // namespace detail } // namespace detail
} // namespace zeek } // namespace zeek

View file

@ -17,6 +17,7 @@
#include "zeek/Traverse.h" #include "zeek/Traverse.h"
#include "zeek/Val.h" #include "zeek/Val.h"
#include "zeek/module_util.h" #include "zeek/module_util.h"
#include "zeek/script_opt/IDOptInfo.h"
#include "zeek/script_opt/StmtOptInfo.h" #include "zeek/script_opt/StmtOptInfo.h"
#include "zeek/script_opt/UsageAnalyzer.h" #include "zeek/script_opt/UsageAnalyzer.h"
@ -117,12 +118,12 @@ static bool add_prototype(const IDPtr& id, Type* t, std::vector<AttrPtr>* attrs,
return true; return true;
} }
static void initialize_var(const IDPtr& id, InitClass c, ExprPtr init) static ExprPtr initialize_var(const IDPtr& id, InitClass c, ExprPtr init)
{ {
if ( ! id->HasVal() ) if ( ! id->HasVal() )
{ {
if ( c == INIT_REMOVE ) if ( c == INIT_REMOVE )
return; return nullptr;
bool no_init = ! init; bool no_init = ! init;
@ -134,7 +135,7 @@ static void initialize_var(const IDPtr& id, InitClass c, ExprPtr init)
auto& t = id->GetType(); auto& t = id->GetType();
if ( ! IsAggr(t) ) if ( ! IsAggr(t) )
return; return nullptr;
ValPtr init_val; ValPtr init_val;
@ -147,7 +148,7 @@ static void initialize_var(const IDPtr& id, InitClass c, ExprPtr init)
catch ( InterpreterException& ) catch ( InterpreterException& )
{ {
id->Error("initialization failed"); id->Error("initialization failed");
return; return nullptr;
} }
} }
@ -157,11 +158,11 @@ static void initialize_var(const IDPtr& id, InitClass c, ExprPtr init)
else if ( t->Tag() == TYPE_VECTOR ) else if ( t->Tag() == TYPE_VECTOR )
init_val = make_intrusive<VectorVal>(cast_intrusive<VectorType>(t)); init_val = make_intrusive<VectorVal>(cast_intrusive<VectorType>(t));
id->SetVal(init_val); init = make_intrusive<ConstExpr>(init_val);
return; c = INIT_FULL;
} }
if ( c == INIT_EXTRA ) else if ( c == INIT_EXTRA )
c = INIT_FULL; c = INIT_FULL;
} }
@ -177,19 +178,12 @@ static void initialize_var(const IDPtr& id, InitClass c, ExprPtr init)
assignment = make_intrusive<RemoveFromExpr>(lhs, init); assignment = make_intrusive<RemoveFromExpr>(lhs, init);
else else
// This can happen due to error propagation. // This can happen due to error propagation.
return; return nullptr;
if ( assignment->IsError() ) if ( assignment->IsError() )
return; return nullptr;
try return assignment;
{
(void)assignment->Eval(nullptr);
}
catch ( InterpreterException& )
{
id->Error("initialization failed");
}
} }
static void make_var(const IDPtr& id, TypePtr t, InitClass c, ExprPtr init, static void make_var(const IDPtr& id, TypePtr t, InitClass c, ExprPtr init,
@ -347,11 +341,29 @@ static void make_var(const IDPtr& id, TypePtr t, InitClass c, ExprPtr init,
if ( init && ((c == INIT_EXTRA && id->GetAttr(ATTR_ADD_FUNC)) || if ( init && ((c == INIT_EXTRA && id->GetAttr(ATTR_ADD_FUNC)) ||
(c == INIT_REMOVE && id->GetAttr(ATTR_DEL_FUNC))) ) (c == INIT_REMOVE && id->GetAttr(ATTR_DEL_FUNC))) )
{
// Just apply the function. // Just apply the function.
id->SetVal(init, c); id->SetVal(init, c);
id->GetOptInfo()->AddInitExpr(init, c);
}
else if ( dt != VAR_REDEF || init || ! attr ) else if ( dt != VAR_REDEF || init || ! attr )
initialize_var(id, c, init); {
auto init_expr = initialize_var(id, c, init);
if ( init_expr )
{
id->GetOptInfo()->AddInitExpr(init_expr);
try
{
(void)init_expr->Eval(nullptr);
}
catch ( InterpreterException& )
{
id->Error("initialization failed");
}
}
}
} }
if ( dt == VAR_CONST ) if ( dt == VAR_CONST )

View file

@ -102,7 +102,6 @@
#include "zeek/zeekygen/Manager.h" #include "zeek/zeekygen/Manager.h"
#include "zeek/module_util.h" #include "zeek/module_util.h"
#include "zeek/IntrusivePtr.h" #include "zeek/IntrusivePtr.h"
#include "zeek/script_opt/IDOptInfo.h"
extern const char* filename; // Absolute path of file currently being parsed. extern const char* filename; // Absolute path of file currently being parsed.
extern const char* last_filename; // Absolute path of last file parsed. extern const char* last_filename; // Absolute path of last file parsed.
@ -321,8 +320,6 @@ static void build_global(ID* id, Type* t, InitClass ic, Expr* e,
add_global(id_ptr, std::move(t_ptr), ic, e_ptr, std::move(attrs_ptr), dt); add_global(id_ptr, std::move(t_ptr), ic, e_ptr, std::move(attrs_ptr), dt);
id->GetOptInfo()->AddInitExpr(e_ptr);
if ( dt == VAR_REDEF ) if ( dt == VAR_REDEF )
zeekygen_mgr->Redef(id, ::filename, ic, std::move(e_ptr)); zeekygen_mgr->Redef(id, ::filename, ic, std::move(e_ptr));
else else
@ -342,8 +339,6 @@ static StmtPtr build_local(ID* id, Type* t, InitClass ic, Expr* e,
auto init = add_local(std::move(id_ptr), std::move(t_ptr), ic, auto init = add_local(std::move(id_ptr), std::move(t_ptr), ic,
e_ptr, std::move(attrs_ptr), dt); e_ptr, std::move(attrs_ptr), dt);
id->GetOptInfo()->AddInitExpr(std::move(e_ptr));
if ( do_coverage ) if ( do_coverage )
script_coverage_mgr.AddStmt(init.get()); script_coverage_mgr.AddStmt(init.get());

View file

@ -18,7 +18,7 @@ shared_ptr<CPP_InitInfo> CPPCompile::RegisterAttributes(const AttributesPtr& att
if ( pa != processed_attrs.end() ) if ( pa != processed_attrs.end() )
return pa->second; return pa->second;
attributes.AddKey(attrs); attributes.AddKey(attrs, pfs.HashAttrs(attrs));
// The cast is just so we can make an IntrusivePtr. // The cast is just so we can make an IntrusivePtr.
auto a_rep = const_cast<Attributes*>(attributes.GetRep(attrs)); auto a_rep = const_cast<Attributes*>(attributes.GetRep(attrs));
@ -49,7 +49,7 @@ shared_ptr<CPP_InitInfo> CPPCompile::RegisterAttr(const AttrPtr& attr)
const auto& e = a->GetExpr(); const auto& e = a->GetExpr();
if ( e && ! IsSimpleInitExpr(e) ) if ( e && ! IsSimpleInitExpr(e) )
init_exprs.AddKey(e); init_exprs.AddKey(e, p_hash(e));
auto gi = make_shared<AttrInfo>(this, attr); auto gi = make_shared<AttrInfo>(this, attr);
attr_info->AddInstance(gi); attr_info->AddInstance(gi);

View file

@ -1015,6 +1015,10 @@ private:
// Generate code to initialize indirect references to constants. // Generate code to initialize indirect references to constants.
void InitializeConsts(); void InitializeConsts();
// Generate code to initialize globals (using dynamic statements
// rather than constants).
void InitializeGlobals();
// Generate the initialization hook for this set of compiled code. // Generate the initialization hook for this set of compiled code.
void GenInitHook(); void GenInitHook();

View file

@ -5,6 +5,7 @@
#include <cerrno> #include <cerrno>
#include "zeek/script_opt/CPP/Compile.h" #include "zeek/script_opt/CPP/Compile.h"
#include "zeek/script_opt/IDOptInfo.h"
extern std::unordered_set<std::string> files_with_conditionals; extern std::unordered_set<std::string> files_with_conditionals;
@ -310,6 +311,9 @@ void CPPCompile::RegisterCompiledBody(const string& f)
void CPPCompile::GenEpilog() void CPPCompile::GenEpilog()
{ {
NL();
InitializeGlobals();
NL(); NL();
for ( const auto& ii : init_infos ) for ( const auto& ii : init_infos )
GenInitExpr(ii.second); GenInitExpr(ii.second);
@ -468,6 +472,9 @@ void CPPCompile::GenFinishInit()
NL(); NL();
Emit("load_BiFs__CPP();"); Emit("load_BiFs__CPP();");
NL();
Emit("init_globals__CPP();");
EndBlock(); EndBlock();
} }

View file

@ -188,6 +188,50 @@ void CPPCompile::InitializeConsts()
EndBlock(true); EndBlock(true);
} }
void CPPCompile::InitializeGlobals()
{
Emit("static void init_globals__CPP()");
StartBlock();
Emit("Frame* f__CPP = nullptr;");
NL();
for ( auto ginit : IDOptInfo::GetGlobalInitExprs() )
{
auto g = ginit.Id();
if ( pfs.Globals().count(g) == 0 )
continue;
auto ic = ginit.IC();
auto& init = ginit.Init();
if ( ic == INIT_NONE )
{
IDPtr gid = {NewRef{}, const_cast<ID*>(g)};
auto gn = make_intrusive<RefExpr>(make_intrusive<NameExpr>(gid));
auto ae = make_intrusive<AssignExpr>(gn, init, true);
Emit(GenExpr(ae.get(), GEN_NATIVE, true) + ";");
}
else
{
// This branch occurs for += or -= initializations that
// use associated functions.
string ics;
if ( ic == INIT_EXTRA )
ics = "INIT_EXTRA";
else if ( ic == INIT_REMOVE )
ics = "INIT_REMOVE";
else
reporter->FatalError("bad initialization class in CPPCompile::InitializeGlobals()");
Emit("%s->SetValue(%s, %s);", globals[g->Name()], GenExpr(init, GEN_NATIVE, true), ics);
}
}
EndBlock();
}
void CPPCompile::GenInitHook() void CPPCompile::GenInitHook()
{ {
NL(); NL();

View file

@ -339,16 +339,7 @@ GlobalInitInfo::GlobalInitInfo(CPPCompile* c, const ID* g, string _CPP_name)
attrs = -1; attrs = -1;
exported = g->IsExport(); exported = g->IsExport();
val = ValElem(c, nullptr); // empty because we initialize dynamically
auto v = g->GetVal();
if ( v && gt->Tag() == TYPE_OPAQUE )
{
reporter->Error("cannot compile to C++ global \"%s\" initialized to opaque value",
g->Name());
v = nullptr;
}
val = ValElem(c, v);
} }
void GlobalInitInfo::InitializerVals(std::vector<std::string>& ivs) const void GlobalInitInfo::InitializerVals(std::vector<std::string>& ivs) const
@ -439,6 +430,9 @@ ListTypeInfo::ListTypeInfo(CPPCompile* _c, TypePtr _t)
if ( gi ) if ( gi )
init_cohort = max(init_cohort, gi->InitCohort()); init_cohort = max(init_cohort, gi->InitCohort());
} }
if ( ! types.empty() )
++init_cohort;
} }
void ListTypeInfo::AddInitializerVals(std::vector<std::string>& ivs) const void ListTypeInfo::AddInitializerVals(std::vector<std::string>& ivs) const

View file

@ -173,9 +173,6 @@ as currently done, to instead be in a pseudo-event handler.
code requires initializing a global variable that specifies extend fields in code requires initializing a global variable that specifies extend fields in
an extensible record (i.e., fields added using `redef`). an extensible record (i.e., fields added using `redef`).
* The compiler will not compile bodies that include "when" statements
This is fairly involved to fix.
* If a lambda generates an event that is not otherwise referred to, that * If a lambda generates an event that is not otherwise referred to, that
event will not be registered upon instantiating the lambda. This is not event will not be registered upon instantiating the lambda. This is not
particularly difficult to fix. particularly difficult to fix.

View file

@ -487,11 +487,10 @@ void CPP_GlobalInit::Generate(InitsManager* im, std::vector<void*>& /* inits_vec
global = lookup_global__CPP(name, im->Types(type), exported); global = lookup_global__CPP(name, im->Types(type), exported);
if ( ! global->HasVal() && val >= 0 ) if ( ! global->HasVal() && val >= 0 )
{
global->SetVal(im->ConstVals(val)); global->SetVal(im->ConstVals(val));
if ( attrs >= 0 )
global->SetAttrs(im->Attributes(attrs)); if ( attrs >= 0 )
} global->SetAttrs(im->Attributes(attrs));
} }
void generate_indices_set(int* inits, std::vector<std::vector<int>>& indices_set) void generate_indices_set(int* inits, std::vector<std::vector<int>>& indices_set)

View file

@ -66,12 +66,41 @@ public:
// Accessors for the sundry initialization vectors, each retrieving // Accessors for the sundry initialization vectors, each retrieving
// a specific element identified by an index/offset. // a specific element identified by an index/offset.
const std::vector<int>& Indices(int offset) const { return indices[offset]; } const std::vector<int>& Indices(int offset) const { return indices[offset]; }
const char* Strings(int offset) const { return strings[offset]; } const char* Strings(int offset) const
const p_hash_type Hashes(int offset) const { return hashes[offset]; } {
const TypePtr& Types(int offset) const { return types[offset]; } ASSERT(offset >= 0 && offset < strings.size());
const AttributesPtr& Attributes(int offset) const { return attributes[offset]; } ASSERT(strings[offset]);
const AttrPtr& Attrs(int offset) const { return attrs[offset]; } return strings[offset];
const CallExprPtr& CallExprs(int offset) const { return call_exprs[offset]; } }
const p_hash_type Hashes(int offset) const
{
ASSERT(offset >= 0 && offset < hashes.size());
return hashes[offset];
}
const TypePtr& Types(int offset) const
{
ASSERT(offset >= 0 && offset < types.size());
ASSERT(types[offset]);
return types[offset];
}
const AttributesPtr& Attributes(int offset) const
{
ASSERT(offset >= 0 && offset < attributes.size());
ASSERT(attributes[offset]);
return attributes[offset];
}
const AttrPtr& Attrs(int offset) const
{
ASSERT(offset >= 0 && offset < attrs.size());
ASSERT(attrs[offset]);
return attrs[offset];
}
const CallExprPtr& CallExprs(int offset) const
{
ASSERT(offset >= 0 && offset < call_exprs.size());
ASSERT(call_exprs[offset]);
return call_exprs[offset];
}
private: private:
std::vector<CPP_ValElem>& const_vals; std::vector<CPP_ValElem>& const_vals;

View file

@ -15,7 +15,7 @@ The maintenance workflow:
to check in updates to the list of how the compiler currently fares to check in updates to the list of how the compiler currently fares
on various btests (see end of this doc): on various btests (see end of this doc):
Fri Sep 16 16:13:49 PDT 2022 Thu Sep 29 14:49:49 PDT 2022
2. Run "find-test-files.sh" to generate a list (to stdout) of all of the 2. Run "find-test-files.sh" to generate a list (to stdout) of all of the
possible Zeek source files found in the test suite. possible Zeek source files found in the test suite.
@ -74,18 +74,11 @@ These BTests won't successfully run due to the indicated issue:
Database Of Known Issues (keep sorted) Database Of Known Issues (keep sorted)
../testing/btest/bifs/table_values.zeek bad-constructor ../testing/btest/bifs/table_values.zeek bad-constructor
../testing/btest/core/global_opaque_val.zeek opaque
../testing/btest/language/alternate-event-hook-prototypes.zeek deprecated ../testing/btest/language/alternate-event-hook-prototypes.zeek deprecated
../testing/btest/language/global-init-calls-bif.zeek opaque
../testing/btest/language/redef-same-prefixtable-idx.zeek deprecated ../testing/btest/language/redef-same-prefixtable-idx.zeek deprecated
../testing/btest/language/table-redef.zeek deprecated ../testing/btest/language/table-redef.zeek deprecated
../testing/btest/language/when-aggregates.zeek bad-when ../testing/btest/language/when-aggregates.zeek bad-when
../testing/btest/scripts/base/protocols/krb/smb2_krb.test skipped ../testing/btest/scripts/base/protocols/krb/smb2_krb.test skipped
../testing/btest/scripts/base/protocols/krb/smb2_krb_nokeytab.test skipped ../testing/btest/scripts/base/protocols/krb/smb2_krb_nokeytab.test skipped
../testing/btest/scripts/base/utils/active-http.test test-glitch ../testing/btest/scripts/base/utils/active-http.test test-glitch
../testing/btest/scripts/policy/frameworks/telemetry/log-prefixes.zeek opaque
../testing/btest/scripts/policy/frameworks/telemetry/log.zeek opaque
../testing/btest/scripts/policy/misc/dump-events.zeek skipped ../testing/btest/scripts/policy/misc/dump-events.zeek skipped
../testing/btest/telemetry/counter.zeek opaque
../testing/btest/telemetry/gauge.zeek opaque
../testing/btest/telemetry/histogram.zeek opaque

View file

@ -51,6 +51,8 @@ void IDDefRegion::Dump() const
printf("\n"); printf("\n");
} }
std::vector<IDInitInfo> IDOptInfo::global_init_exprs;
void IDOptInfo::Clear() void IDOptInfo::Clear()
{ {
static bool did_init = false; static bool did_init = false;
@ -69,6 +71,17 @@ void IDOptInfo::Clear()
tracing = trace_ID && util::streq(trace_ID, my_id->Name()); tracing = trace_ID && util::streq(trace_ID, my_id->Name());
} }
void IDOptInfo::AddInitExpr(ExprPtr init_expr, InitClass ic)
{
if ( ! init_expr )
return;
if ( my_id->IsGlobal() )
global_init_exprs.emplace_back(IDInitInfo(my_id, init_expr, ic));
init_exprs.emplace_back(std::move(init_expr));
}
void IDOptInfo::DefinedAfter(const Stmt* s, const ExprPtr& e, void IDOptInfo::DefinedAfter(const Stmt* s, const ExprPtr& e,
const std::vector<const Stmt*>& conf_blocks, zeek_uint_t conf_start) const std::vector<const Stmt*>& conf_blocks, zeek_uint_t conf_start)
{ {

View file

@ -105,6 +105,24 @@ protected:
ExprPtr def_expr; ExprPtr def_expr;
}; };
// Class tracking information associated with a (global) identifier's
// (re-)initialization.
class IDInitInfo
{
public:
IDInitInfo(const ID* _id, ExprPtr _init, InitClass _ic) : id(_id), init(_init), ic(_ic) { }
const ID* Id() const { return id; }
const ExprPtr& Init() const { return init; }
InitClass IC() const { return ic; }
private:
const ID* id;
ExprPtr init;
InitClass ic;
};
// Class tracking optimization information associated with identifiers. // Class tracking optimization information associated with identifiers.
class IDOptInfo class IDOptInfo
@ -118,11 +136,19 @@ public:
void Clear(); void Clear();
// Used to track expressions employed when explicitly initializing // Used to track expressions employed when explicitly initializing
// the identifier. These are needed by compile-to-C++ script // the (global) identifier. These are needed by compile-to-C++ script
// optimization. They're not used by ZAM optimization. // optimization, and for tracking variable usage. An initialization
void AddInitExpr(ExprPtr init_expr); // class other than INIT_NONE indicates that initialization should
// be done with the ExprPtr form of ID::SetVal.
void AddInitExpr(ExprPtr init_expr, InitClass ic = INIT_NONE);
// Returns the initialization expressions for this identifier.
const std::vector<ExprPtr>& GetInitExprs() const { return init_exprs; } const std::vector<ExprPtr>& GetInitExprs() const { return init_exprs; }
// Returns a list of the initialization expressions seen for all
// globals, ordered by when they were processed.
static auto& GetGlobalInitExprs() { return global_init_exprs; }
// Associated constant expression, if any. This is only set // Associated constant expression, if any. This is only set
// for identifiers that are aliases for a constant (i.e., there // for identifiers that are aliases for a constant (i.e., there
// are no other assignments to them). // are no other assignments to them).
@ -224,6 +250,9 @@ private:
// one of the earlier instances rather than the last one. // one of the earlier instances rather than the last one.
std::vector<ExprPtr> init_exprs; std::vector<ExprPtr> init_exprs;
// Tracks initializations of globals in the order they're seen.
static std::vector<IDInitInfo> global_init_exprs;
// If non-nil, a constant that this identifier always holds // If non-nil, a constant that this identifier always holds
// once initially defined. // once initially defined.
const ConstExpr* const_expr = nullptr; const ConstExpr* const_expr = nullptr;
@ -256,8 +285,12 @@ private:
// Whether the identifier is a temporary variable. // Whether the identifier is a temporary variable.
bool is_temp = false; bool is_temp = false;
// Only needed for debugging purposes. // Associated identifier, to enable tracking of initialization
// expressions for globals (for C++ compilation), and for debugging
// output.
const ID* my_id; const ID* my_id;
// Only needed for debugging purposes.
bool tracing = false; bool tracing = false;
// Track whether we've already generated usage errors. // Track whether we've already generated usage errors.

View file

@ -917,7 +917,10 @@ p_hash_type ProfileFuncs::HashAttrs(const AttributesPtr& Attrs)
// can vary in structure due to compilation of elements. We // can vary in structure due to compilation of elements. We
// do though enforce consistency for their types. // do though enforce consistency for their types.
if ( e ) if ( e )
{
h = merge_p_hashes(h, HashType(e->GetType())); h = merge_p_hashes(h, HashType(e->GetType()));
h = merge_p_hashes(h, p_hash(e.get()));
}
} }
return h; return h;