Merge remote-tracking branch 'origin/topic/vern/CPP-global-init'

* origin/topic/vern/CPP-global-init:
  updates to -O C++ user and maintenance documentation
  use dynamic rather than static initialization of globals for scripts compiled to C++
  safety checking for initializing scripts compiled to C++
  fixes for initializing scripts compiled to C++
  restructured tracking of initializations of globals for script compilation
This commit is contained in:
Tim Wojtulewicz 2022-10-03 09:45:05 -07:00
commit bf98c1e9c8
17 changed files with 194 additions and 66 deletions

10
CHANGES
View file

@ -1,3 +1,13 @@
5.2.0-dev.46 | 2022-10-03 09:45:05 -0700
* use dynamic rather than static initialization of globals for scripts compiled to C++ (Vern Paxson, Corelight)
* safety checking for initializing scripts compiled to C++ (Vern Paxson, Corelight)
* fixes for initializing scripts compiled to C++ (Vern Paxson, Corelight)
* restructured tracking of initializations of globals for script compilation (Vern Paxson, Corelight)
5.2.0-dev.40 | 2022-10-03 09:44:40 -0700
* http: Prevent script errors when http$current_entity is not set (Arne Welzel, Corelight)

View file

@ -1 +1 @@
5.2.0-dev.40
5.2.0-dev.46

View file

@ -680,11 +680,6 @@ std::vector<Func*> ID::GetOptionHandlers() const
return v;
}
void IDOptInfo::AddInitExpr(ExprPtr init_expr)
{
init_exprs.emplace_back(std::move(init_expr));
}
} // namespace detail
} // namespace zeek

View file

@ -17,6 +17,7 @@
#include "zeek/Traverse.h"
#include "zeek/Val.h"
#include "zeek/module_util.h"
#include "zeek/script_opt/IDOptInfo.h"
#include "zeek/script_opt/StmtOptInfo.h"
#include "zeek/script_opt/UsageAnalyzer.h"
@ -117,12 +118,12 @@ static bool add_prototype(const IDPtr& id, Type* t, std::vector<AttrPtr>* attrs,
return true;
}
static void initialize_var(const IDPtr& id, InitClass c, ExprPtr init)
static ExprPtr initialize_var(const IDPtr& id, InitClass c, ExprPtr init)
{
if ( ! id->HasVal() )
{
if ( c == INIT_REMOVE )
return;
return nullptr;
bool no_init = ! init;
@ -134,7 +135,7 @@ static void initialize_var(const IDPtr& id, InitClass c, ExprPtr init)
auto& t = id->GetType();
if ( ! IsAggr(t) )
return;
return nullptr;
ValPtr init_val;
@ -147,7 +148,7 @@ static void initialize_var(const IDPtr& id, InitClass c, ExprPtr init)
catch ( InterpreterException& )
{
id->Error("initialization failed");
return;
return nullptr;
}
}
@ -157,11 +158,11 @@ static void initialize_var(const IDPtr& id, InitClass c, ExprPtr init)
else if ( t->Tag() == TYPE_VECTOR )
init_val = make_intrusive<VectorVal>(cast_intrusive<VectorType>(t));
id->SetVal(init_val);
return;
init = make_intrusive<ConstExpr>(init_val);
c = INIT_FULL;
}
if ( c == INIT_EXTRA )
else if ( c == INIT_EXTRA )
c = INIT_FULL;
}
@ -177,19 +178,12 @@ static void initialize_var(const IDPtr& id, InitClass c, ExprPtr init)
assignment = make_intrusive<RemoveFromExpr>(lhs, init);
else
// This can happen due to error propagation.
return;
return nullptr;
if ( assignment->IsError() )
return;
return nullptr;
try
{
(void)assignment->Eval(nullptr);
}
catch ( InterpreterException& )
{
id->Error("initialization failed");
}
return assignment;
}
static void make_var(const IDPtr& id, TypePtr t, InitClass c, ExprPtr init,
@ -347,11 +341,29 @@ static void make_var(const IDPtr& id, TypePtr t, InitClass c, ExprPtr init,
if ( init && ((c == INIT_EXTRA && id->GetAttr(ATTR_ADD_FUNC)) ||
(c == INIT_REMOVE && id->GetAttr(ATTR_DEL_FUNC))) )
{
// Just apply the function.
id->SetVal(init, c);
id->GetOptInfo()->AddInitExpr(init, c);
}
else if ( dt != VAR_REDEF || init || ! attr )
initialize_var(id, c, init);
{
auto init_expr = initialize_var(id, c, init);
if ( init_expr )
{
id->GetOptInfo()->AddInitExpr(init_expr);
try
{
(void)init_expr->Eval(nullptr);
}
catch ( InterpreterException& )
{
id->Error("initialization failed");
}
}
}
}
if ( dt == VAR_CONST )

View file

@ -102,7 +102,6 @@
#include "zeek/zeekygen/Manager.h"
#include "zeek/module_util.h"
#include "zeek/IntrusivePtr.h"
#include "zeek/script_opt/IDOptInfo.h"
extern const char* filename; // Absolute path of file currently being parsed.
extern const char* last_filename; // Absolute path of last file parsed.
@ -321,8 +320,6 @@ static void build_global(ID* id, Type* t, InitClass ic, Expr* e,
add_global(id_ptr, std::move(t_ptr), ic, e_ptr, std::move(attrs_ptr), dt);
id->GetOptInfo()->AddInitExpr(e_ptr);
if ( dt == VAR_REDEF )
zeekygen_mgr->Redef(id, ::filename, ic, std::move(e_ptr));
else
@ -342,8 +339,6 @@ static StmtPtr build_local(ID* id, Type* t, InitClass ic, Expr* e,
auto init = add_local(std::move(id_ptr), std::move(t_ptr), ic,
e_ptr, std::move(attrs_ptr), dt);
id->GetOptInfo()->AddInitExpr(std::move(e_ptr));
if ( do_coverage )
script_coverage_mgr.AddStmt(init.get());

View file

@ -18,7 +18,7 @@ shared_ptr<CPP_InitInfo> CPPCompile::RegisterAttributes(const AttributesPtr& att
if ( pa != processed_attrs.end() )
return pa->second;
attributes.AddKey(attrs);
attributes.AddKey(attrs, pfs.HashAttrs(attrs));
// The cast is just so we can make an IntrusivePtr.
auto a_rep = const_cast<Attributes*>(attributes.GetRep(attrs));
@ -49,7 +49,7 @@ shared_ptr<CPP_InitInfo> CPPCompile::RegisterAttr(const AttrPtr& attr)
const auto& e = a->GetExpr();
if ( e && ! IsSimpleInitExpr(e) )
init_exprs.AddKey(e);
init_exprs.AddKey(e, p_hash(e));
auto gi = make_shared<AttrInfo>(this, attr);
attr_info->AddInstance(gi);

View file

@ -1015,6 +1015,10 @@ private:
// Generate code to initialize indirect references to constants.
void InitializeConsts();
// Generate code to initialize globals (using dynamic statements
// rather than constants).
void InitializeGlobals();
// Generate the initialization hook for this set of compiled code.
void GenInitHook();

View file

@ -5,6 +5,7 @@
#include <cerrno>
#include "zeek/script_opt/CPP/Compile.h"
#include "zeek/script_opt/IDOptInfo.h"
extern std::unordered_set<std::string> files_with_conditionals;
@ -310,6 +311,9 @@ void CPPCompile::RegisterCompiledBody(const string& f)
void CPPCompile::GenEpilog()
{
NL();
InitializeGlobals();
NL();
for ( const auto& ii : init_infos )
GenInitExpr(ii.second);
@ -468,6 +472,9 @@ void CPPCompile::GenFinishInit()
NL();
Emit("load_BiFs__CPP();");
NL();
Emit("init_globals__CPP();");
EndBlock();
}

View file

@ -188,6 +188,50 @@ void CPPCompile::InitializeConsts()
EndBlock(true);
}
void CPPCompile::InitializeGlobals()
{
Emit("static void init_globals__CPP()");
StartBlock();
Emit("Frame* f__CPP = nullptr;");
NL();
for ( auto ginit : IDOptInfo::GetGlobalInitExprs() )
{
auto g = ginit.Id();
if ( pfs.Globals().count(g) == 0 )
continue;
auto ic = ginit.IC();
auto& init = ginit.Init();
if ( ic == INIT_NONE )
{
IDPtr gid = {NewRef{}, const_cast<ID*>(g)};
auto gn = make_intrusive<RefExpr>(make_intrusive<NameExpr>(gid));
auto ae = make_intrusive<AssignExpr>(gn, init, true);
Emit(GenExpr(ae.get(), GEN_NATIVE, true) + ";");
}
else
{
// This branch occurs for += or -= initializations that
// use associated functions.
string ics;
if ( ic == INIT_EXTRA )
ics = "INIT_EXTRA";
else if ( ic == INIT_REMOVE )
ics = "INIT_REMOVE";
else
reporter->FatalError("bad initialization class in CPPCompile::InitializeGlobals()");
Emit("%s->SetValue(%s, %s);", globals[g->Name()], GenExpr(init, GEN_NATIVE, true), ics);
}
}
EndBlock();
}
void CPPCompile::GenInitHook()
{
NL();

View file

@ -339,16 +339,7 @@ GlobalInitInfo::GlobalInitInfo(CPPCompile* c, const ID* g, string _CPP_name)
attrs = -1;
exported = g->IsExport();
auto v = g->GetVal();
if ( v && gt->Tag() == TYPE_OPAQUE )
{
reporter->Error("cannot compile to C++ global \"%s\" initialized to opaque value",
g->Name());
v = nullptr;
}
val = ValElem(c, v);
val = ValElem(c, nullptr); // empty because we initialize dynamically
}
void GlobalInitInfo::InitializerVals(std::vector<std::string>& ivs) const
@ -439,6 +430,9 @@ ListTypeInfo::ListTypeInfo(CPPCompile* _c, TypePtr _t)
if ( gi )
init_cohort = max(init_cohort, gi->InitCohort());
}
if ( ! types.empty() )
++init_cohort;
}
void ListTypeInfo::AddInitializerVals(std::vector<std::string>& ivs) const

View file

@ -173,9 +173,6 @@ as currently done, to instead be in a pseudo-event handler.
code requires initializing a global variable that specifies extend fields in
an extensible record (i.e., fields added using `redef`).
* The compiler will not compile bodies that include "when" statements
This is fairly involved to fix.
* If a lambda generates an event that is not otherwise referred to, that
event will not be registered upon instantiating the lambda. This is not
particularly difficult to fix.

View file

@ -487,12 +487,11 @@ void CPP_GlobalInit::Generate(InitsManager* im, std::vector<void*>& /* inits_vec
global = lookup_global__CPP(name, im->Types(type), exported);
if ( ! global->HasVal() && val >= 0 )
{
global->SetVal(im->ConstVals(val));
if ( attrs >= 0 )
global->SetAttrs(im->Attributes(attrs));
}
}
void generate_indices_set(int* inits, std::vector<std::vector<int>>& indices_set)
{

View file

@ -66,12 +66,41 @@ public:
// Accessors for the sundry initialization vectors, each retrieving
// a specific element identified by an index/offset.
const std::vector<int>& Indices(int offset) const { return indices[offset]; }
const char* Strings(int offset) const { return strings[offset]; }
const p_hash_type Hashes(int offset) const { return hashes[offset]; }
const TypePtr& Types(int offset) const { return types[offset]; }
const AttributesPtr& Attributes(int offset) const { return attributes[offset]; }
const AttrPtr& Attrs(int offset) const { return attrs[offset]; }
const CallExprPtr& CallExprs(int offset) const { return call_exprs[offset]; }
const char* Strings(int offset) const
{
ASSERT(offset >= 0 && offset < strings.size());
ASSERT(strings[offset]);
return strings[offset];
}
const p_hash_type Hashes(int offset) const
{
ASSERT(offset >= 0 && offset < hashes.size());
return hashes[offset];
}
const TypePtr& Types(int offset) const
{
ASSERT(offset >= 0 && offset < types.size());
ASSERT(types[offset]);
return types[offset];
}
const AttributesPtr& Attributes(int offset) const
{
ASSERT(offset >= 0 && offset < attributes.size());
ASSERT(attributes[offset]);
return attributes[offset];
}
const AttrPtr& Attrs(int offset) const
{
ASSERT(offset >= 0 && offset < attrs.size());
ASSERT(attrs[offset]);
return attrs[offset];
}
const CallExprPtr& CallExprs(int offset) const
{
ASSERT(offset >= 0 && offset < call_exprs.size());
ASSERT(call_exprs[offset]);
return call_exprs[offset];
}
private:
std::vector<CPP_ValElem>& const_vals;

View file

@ -15,7 +15,7 @@ The maintenance workflow:
to check in updates to the list of how the compiler currently fares
on various btests (see end of this doc):
Fri Sep 16 16:13:49 PDT 2022
Thu Sep 29 14:49:49 PDT 2022
2. Run "find-test-files.sh" to generate a list (to stdout) of all of the
possible Zeek source files found in the test suite.
@ -74,18 +74,11 @@ These BTests won't successfully run due to the indicated issue:
Database Of Known Issues (keep sorted)
../testing/btest/bifs/table_values.zeek bad-constructor
../testing/btest/core/global_opaque_val.zeek opaque
../testing/btest/language/alternate-event-hook-prototypes.zeek deprecated
../testing/btest/language/global-init-calls-bif.zeek opaque
../testing/btest/language/redef-same-prefixtable-idx.zeek deprecated
../testing/btest/language/table-redef.zeek deprecated
../testing/btest/language/when-aggregates.zeek bad-when
../testing/btest/scripts/base/protocols/krb/smb2_krb.test skipped
../testing/btest/scripts/base/protocols/krb/smb2_krb_nokeytab.test skipped
../testing/btest/scripts/base/utils/active-http.test test-glitch
../testing/btest/scripts/policy/frameworks/telemetry/log-prefixes.zeek opaque
../testing/btest/scripts/policy/frameworks/telemetry/log.zeek opaque
../testing/btest/scripts/policy/misc/dump-events.zeek skipped
../testing/btest/telemetry/counter.zeek opaque
../testing/btest/telemetry/gauge.zeek opaque
../testing/btest/telemetry/histogram.zeek opaque

View file

@ -51,6 +51,8 @@ void IDDefRegion::Dump() const
printf("\n");
}
std::vector<IDInitInfo> IDOptInfo::global_init_exprs;
void IDOptInfo::Clear()
{
static bool did_init = false;
@ -69,6 +71,17 @@ void IDOptInfo::Clear()
tracing = trace_ID && util::streq(trace_ID, my_id->Name());
}
void IDOptInfo::AddInitExpr(ExprPtr init_expr, InitClass ic)
{
if ( ! init_expr )
return;
if ( my_id->IsGlobal() )
global_init_exprs.emplace_back(IDInitInfo(my_id, init_expr, ic));
init_exprs.emplace_back(std::move(init_expr));
}
void IDOptInfo::DefinedAfter(const Stmt* s, const ExprPtr& e,
const std::vector<const Stmt*>& conf_blocks, zeek_uint_t conf_start)
{

View file

@ -105,6 +105,24 @@ protected:
ExprPtr def_expr;
};
// Class tracking information associated with a (global) identifier's
// (re-)initialization.
class IDInitInfo
{
public:
IDInitInfo(const ID* _id, ExprPtr _init, InitClass _ic) : id(_id), init(_init), ic(_ic) { }
const ID* Id() const { return id; }
const ExprPtr& Init() const { return init; }
InitClass IC() const { return ic; }
private:
const ID* id;
ExprPtr init;
InitClass ic;
};
// Class tracking optimization information associated with identifiers.
class IDOptInfo
@ -118,11 +136,19 @@ public:
void Clear();
// Used to track expressions employed when explicitly initializing
// the identifier. These are needed by compile-to-C++ script
// optimization. They're not used by ZAM optimization.
void AddInitExpr(ExprPtr init_expr);
// the (global) identifier. These are needed by compile-to-C++ script
// optimization, and for tracking variable usage. An initialization
// class other than INIT_NONE indicates that initialization should
// be done with the ExprPtr form of ID::SetVal.
void AddInitExpr(ExprPtr init_expr, InitClass ic = INIT_NONE);
// Returns the initialization expressions for this identifier.
const std::vector<ExprPtr>& GetInitExprs() const { return init_exprs; }
// Returns a list of the initialization expressions seen for all
// globals, ordered by when they were processed.
static auto& GetGlobalInitExprs() { return global_init_exprs; }
// Associated constant expression, if any. This is only set
// for identifiers that are aliases for a constant (i.e., there
// are no other assignments to them).
@ -224,6 +250,9 @@ private:
// one of the earlier instances rather than the last one.
std::vector<ExprPtr> init_exprs;
// Tracks initializations of globals in the order they're seen.
static std::vector<IDInitInfo> global_init_exprs;
// If non-nil, a constant that this identifier always holds
// once initially defined.
const ConstExpr* const_expr = nullptr;
@ -256,8 +285,12 @@ private:
// Whether the identifier is a temporary variable.
bool is_temp = false;
// Only needed for debugging purposes.
// Associated identifier, to enable tracking of initialization
// expressions for globals (for C++ compilation), and for debugging
// output.
const ID* my_id;
// Only needed for debugging purposes.
bool tracing = false;
// Track whether we've already generated usage errors.

View file

@ -917,7 +917,10 @@ p_hash_type ProfileFuncs::HashAttrs(const AttributesPtr& Attrs)
// can vary in structure due to compilation of elements. We
// do though enforce consistency for their types.
if ( e )
{
h = merge_p_hashes(h, HashType(e->GetType()));
h = merge_p_hashes(h, p_hash(e.get()));
}
}
return h;