script optimization fixes:

new initialization model for standalone C++ scripts
 type coercion fix
 ZAM fix for compiling using C++ optimizer
 disambiguate empty constructors
This commit is contained in:
Vern Paxson 2022-11-20 12:16:25 -08:00
parent dbb2aa88a6
commit 134f8f2ef5
14 changed files with 80 additions and 96 deletions

View file

@ -49,7 +49,15 @@ shared_ptr<CPP_InitInfo> CPPCompile::RegisterAttr(const AttrPtr& attr)
const auto& e = a->GetExpr(); const auto& e = a->GetExpr();
if ( e && ! IsSimpleInitExpr(e) ) if ( e && ! IsSimpleInitExpr(e) )
init_exprs.AddKey(e, p_hash(e)); {
auto h = p_hash(e);
// Include the type in the hash, otherwise expressions
// like "vector()" are ambiguous.
h = merge_p_hashes(h, p_hash(e->GetType()));
init_exprs.AddKey(e, h);
}
auto gi = make_shared<AttrInfo>(this, attr); auto gi = make_shared<AttrInfo>(this, attr);
attr_info->AddInstance(gi); attr_info->AddInstance(gi);

View file

@ -29,15 +29,7 @@ function load_CPP%(h: count%): bool
return zeek::val_mgr->False(); return zeek::val_mgr->False();
} }
// Ensure that any compiled scripts are used. If instead (*cb->second)();
// the AST is used, then when we activate the standalone
// scripts, they won't be able to avoid installing redundant
// event handlers.
detail::analysis_options.use_CPP = true;
// Mark this script as one we should activate after loading
// compiled scripts.
detail::standalone_activations.push_back(cb->second);
return zeek::val_mgr->True(); return zeek::val_mgr->True();
%} %}

View file

@ -494,13 +494,18 @@ void CPPCompile::GenFinishInit()
Emit("\tfield_mapping.push_back(fm.ComputeOffset(&im));"); Emit("\tfield_mapping.push_back(fm.ComputeOffset(&im));");
NL(); NL();
Emit("load_BiFs__CPP();");
if ( standalone ) if ( standalone )
{ // BiFs need to be loaded later, because the main
NL(); // initialization finishes upon loading of the activation
// script, rather than after all scripts have been parsed
// and BiFs have been loaded.
Emit("init_globals__CPP();"); Emit("init_globals__CPP();");
} else
// For non-standalone, we're only initializing after all
// scripts have been parsed and BiFs loaded, so it's fine
// to go ahead and do so now.
Emit("load_BiFs__CPP();");
EndBlock(); EndBlock();
} }
@ -513,7 +518,9 @@ void CPPCompile::GenRegisterBodies()
Emit("for ( auto& b : CPP__bodies_to_register )"); Emit("for ( auto& b : CPP__bodies_to_register )");
StartBlock(); StartBlock();
Emit("auto f = make_intrusive<CPPDynStmt>(b.func_name.c_str(), b.func, b.type_signature);"); Emit("auto f = make_intrusive<CPPDynStmt>(b.func_name.c_str(), b.func, b.type_signature);");
Emit("register_body__CPP(f, b.priority, b.h, b.events, finish_init__CPP);");
auto reg = standalone ? "register_standalone_body" : "register_body";
Emit("%s__CPP(f, b.priority, b.h, b.events, finish_init__CPP);", reg);
EndBlock(); EndBlock();
EndBlock(); EndBlock();

View file

@ -869,7 +869,13 @@ string CPPCompile::GenUnary(const Expr* e, GenType gt, const char* op, const cha
if ( e->GetType()->Tag() == TYPE_VECTOR ) if ( e->GetType()->Tag() == TYPE_VECTOR )
return GenVectorOp(e, GenExpr(e->GetOp1(), GEN_NATIVE), vec_op); return GenVectorOp(e, GenExpr(e->GetOp1(), GEN_NATIVE), vec_op);
return NativeToGT(string(op) + "(" + GenExpr(e->GetOp1(), GEN_NATIVE) + ")", e->GetType(), gt); // Look for coercions that the interpreter does implicitly.
auto op1 = e->GetOp1();
if ( op1->GetType()->Tag() == TYPE_COUNT &&
(e->Tag() == EXPR_POSITIVE || e->Tag() == EXPR_NEGATE) )
op1 = make_intrusive<ArithCoerceExpr>(op1, TYPE_INT);
return NativeToGT(string(op) + "(" + GenExpr(op1, GEN_NATIVE) + ")", e->GetType(), gt);
} }
string CPPCompile::GenBinary(const Expr* e, GenType gt, const char* op, const char* vec_op) string CPPCompile::GenBinary(const Expr* e, GenType gt, const char* op, const char* vec_op)

View file

@ -15,7 +15,7 @@ using namespace std;
unordered_map<p_hash_type, CompiledScript> compiled_scripts; unordered_map<p_hash_type, CompiledScript> compiled_scripts;
unordered_map<string, unordered_set<p_hash_type>> added_bodies; unordered_map<string, unordered_set<p_hash_type>> added_bodies;
unordered_map<p_hash_type, void (*)()> standalone_callbacks; unordered_map<p_hash_type, void (*)()> standalone_callbacks;
vector<void (*)()> standalone_activations; vector<void (*)()> standalone_finalizations;
void CPPFunc::Describe(ODesc* d) const void CPPFunc::Describe(ODesc* d) const
{ {

View file

@ -119,9 +119,8 @@ extern std::unordered_map<std::string, std::unordered_set<p_hash_type>> added_bo
// Maps hashes to standalone script initialization callbacks. // Maps hashes to standalone script initialization callbacks.
extern std::unordered_map<p_hash_type, void (*)()> standalone_callbacks; extern std::unordered_map<p_hash_type, void (*)()> standalone_callbacks;
// Standalone callbacks marked for activation by calls to the // Callbacks to finalize initialization of standalone compiled scripts.
// load_CPP() BiF. extern std::vector<void (*)()> standalone_finalizations;
extern std::vector<void (*)()> standalone_activations;
} // namespace detail } // namespace detail

View file

@ -331,8 +331,8 @@ void CPPCompile::GenStandaloneActivation()
Emit("void standalone_init__CPP()"); Emit("void standalone_init__CPP()");
StartBlock(); StartBlock();
Emit("init__CPP();"); Emit("init__CPP();");
Emit("CPP_activation_funcs.push_back(standalone_activation__CPP);"); Emit("standalone_activation__CPP();");
Emit("CPP_activation_hook = activate__CPPs;"); Emit("standalone_finalizations.push_back(load_BiFs__CPP);");
EndBlock(); EndBlock();
} }

View file

@ -11,7 +11,6 @@ namespace zeek::detail
using namespace std; using namespace std;
vector<CPP_init_func> CPP_init_funcs; vector<CPP_init_func> CPP_init_funcs;
vector<CPP_init_func> CPP_activation_funcs;
// Calls all of the initialization hooks, in the order they were added. // Calls all of the initialization hooks, in the order they were added.
void init_CPPs() void init_CPPs()
@ -25,18 +24,6 @@ void init_CPPs()
need_init = false; need_init = false;
} }
// Calls all of the registered activation hooks for standalone code.
void activate__CPPs()
{
static bool need_init = true;
if ( need_init )
for ( auto f : CPP_activation_funcs )
f();
need_init = false;
}
// This is a trick used to register the presence of compiled code. // This is a trick used to register the presence of compiled code.
// The initialization of the static variable will make CPP_init_hook // The initialization of the static variable will make CPP_init_hook
// non-null, which the main part of Zeek uses to tell that there's // non-null, which the main part of Zeek uses to tell that there's
@ -68,6 +55,16 @@ void register_body__CPP(CPPStmtPtr body, int priority, p_hash_type hash, vector<
compiled_scripts[hash] = {move(body), priority, move(events), finish_init}; compiled_scripts[hash] = {move(body), priority, move(events), finish_init};
} }
static unordered_map<p_hash_type, CompiledScript> compiled_standalone_scripts;
void register_standalone_body__CPP(CPPStmtPtr body, int priority, p_hash_type hash,
vector<string> events, void (*finish_init)())
{
// For standalone scripts we don't actually need finish_init, but
// we keep it for symmetry with compiled_scripts.
compiled_standalone_scripts[hash] = {move(body), priority, move(events), finish_init};
}
void register_lambda__CPP(CPPStmtPtr body, p_hash_type hash, const char* name, TypePtr t, void register_lambda__CPP(CPPStmtPtr body, p_hash_type hash, const char* name, TypePtr t,
bool has_captures) bool has_captures)
{ {
@ -111,6 +108,13 @@ void activate_bodies__CPP(const char* fn, const char* module, bool exported, Typ
fg->SetType(ft); fg->SetType(ft);
} }
if ( ! fg->GetAttr(ATTR_IS_USED) )
{
vector<AttrPtr> used_attr;
used_attr.emplace_back(make_intrusive<Attr>(ATTR_IS_USED));
fg->AddAttrs(make_intrusive<Attributes>(used_attr, nullptr, false, true));
}
auto v = fg->GetVal(); auto v = fg->GetVal();
if ( ! v ) if ( ! v )
{ // Create it. { // Create it.
@ -123,19 +127,6 @@ void activate_bodies__CPP(const char* fn, const char* module, bool exported, Typ
} }
auto f = v->AsFunc(); auto f = v->AsFunc();
const auto& bodies = f->GetBodies();
// Track hashes of compiled bodies already associated with f.
unordered_set<p_hash_type> existing_CPP_bodies;
for ( auto& b : bodies )
{
auto s = b.stmts;
if ( s->Tag() != STMT_CPP )
continue;
const auto& cpp_s = cast_intrusive<CPPStmt>(s);
existing_CPP_bodies.insert(cpp_s->GetHash());
}
// Events we need to register. // Events we need to register.
unordered_set<string> events; unordered_set<string> events;
@ -148,15 +139,9 @@ void activate_bodies__CPP(const char* fn, const char* module, bool exported, Typ
for ( auto h : hashes ) for ( auto h : hashes )
{ {
if ( existing_CPP_bodies.count(h) > 0 )
// We're presumably running with the original script,
// and have already incorporated this compiled body
// into f.
continue;
// Add in the new body. // Add in the new body.
auto csi = compiled_scripts.find(h); auto csi = compiled_standalone_scripts.find(h);
ASSERT(csi != compiled_scripts.end()); ASSERT(csi != compiled_standalone_scripts.end());
auto cs = csi->second; auto cs = csi->second;
f->AddBody(cs.body, no_inits, num_params, cs.priority); f->AddBody(cs.body, no_inits, num_params, cs.priority);

View file

@ -23,12 +23,6 @@ using CPP_init_func = void (*)();
// Tracks the initialization hooks for different compilation runs. // Tracks the initialization hooks for different compilation runs.
extern std::vector<CPP_init_func> CPP_init_funcs; extern std::vector<CPP_init_func> CPP_init_funcs;
// Tracks the activation hooks for different "standalone" compilations.
extern std::vector<CPP_init_func> CPP_activation_funcs;
// Activates all previously registered standalone code.
extern void activate__CPPs();
// Registers the given global type, if not already present. // Registers the given global type, if not already present.
extern void register_type__CPP(TypePtr t, const std::string& name); extern void register_type__CPP(TypePtr t, const std::string& name);
@ -39,6 +33,10 @@ extern void register_type__CPP(TypePtr t, const std::string& name);
extern void register_body__CPP(CPPStmtPtr body, int priority, p_hash_type hash, extern void register_body__CPP(CPPStmtPtr body, int priority, p_hash_type hash,
std::vector<std::string> events, void (*finish_init)()); std::vector<std::string> events, void (*finish_init)());
// Same but for standalone function bodies.
extern void register_standalone_body__CPP(CPPStmtPtr body, int priority, p_hash_type hash,
std::vector<std::string> events, void (*finish_init)());
// Registers a lambda body as associated with the given hash. Includes // Registers a lambda body as associated with the given hash. Includes
// the name of the lambda (so it can be made available as a quasi-global // the name of the lambda (so it can be made available as a quasi-global
// identifier), its type, and whether it needs captures. // identifier), its type, and whether it needs captures.

View file

@ -11,6 +11,10 @@ using namespace std;
void CPPCompile::GenStmt(const Stmt* s) void CPPCompile::GenStmt(const Stmt* s)
{ {
auto loc = s->GetLocationInfo();
if ( loc != &detail::no_location && s->Tag() != STMT_LIST )
Emit("// %s:%s", loc->filename, to_string(loc->first_line));
switch ( s->Tag() ) switch ( s->Tag() )
{ {
case STMT_INIT: case STMT_INIT:

View file

@ -16,9 +16,6 @@ template <class T> void CPPTracker<T>::AddKey(IntrusivePtr<T> key, p_hash_type h
if ( HasKey(key) ) if ( HasKey(key) )
return; return;
if ( h == 0 )
h = Hash(key);
if ( map2.count(h) == 0 ) if ( map2.count(h) == 0 )
{ {
auto index = keys.size(); auto index = keys.size();
@ -57,16 +54,6 @@ template <class T> string CPPTracker<T>::KeyName(const T* key)
return full_name; return full_name;
} }
template <class T> p_hash_type CPPTracker<T>::Hash(IntrusivePtr<T> key) const
{
ODesc d;
d.SetDeterminism(true);
key->Describe(&d);
string desc = d.Description();
auto h = hash<string>{}(base_name + desc);
return p_hash_type(h);
}
// Instantiate the templates we'll need. // Instantiate the templates we'll need.
template class CPPTracker<Type>; template class CPPTracker<Type>;
template class CPPTracker<Attributes>; template class CPPTracker<Attributes>;

View file

@ -35,9 +35,8 @@ public:
bool HasKey(const T* key) const { return map.count(key) > 0; } bool HasKey(const T* key) const { return map.count(key) > 0; }
bool HasKey(IntrusivePtr<T> key) const { return HasKey(key.get()); } bool HasKey(IntrusivePtr<T> key) const { return HasKey(key.get()); }
// Only adds the key if it's not already present. If a hash // Only adds the key if it's not already present.
// is provided, then refrains from computing it. void AddKey(IntrusivePtr<T> key, p_hash_type h);
void AddKey(IntrusivePtr<T> key, p_hash_type h = 0);
void AddInitInfo(const T* rep, std::shared_ptr<CPP_InitInfo> gi) { gi_s[rep] = std::move(gi); } void AddInitInfo(const T* rep, std::shared_ptr<CPP_InitInfo> gi) { gi_s[rep] = std::move(gi); }
@ -58,9 +57,6 @@ public:
const T* GetRep(IntrusivePtr<T> key) { return GetRep(key.get()); } const T* GetRep(IntrusivePtr<T> key) { return GetRep(key.get()); }
private: private:
// Compute a hash for the given key.
p_hash_type Hash(IntrusivePtr<T> key) const;
// Maps keys to internal representations (i.e., hashes). // Maps keys to internal representations (i.e., hashes).
std::unordered_map<const T*, p_hash_type> map; std::unordered_map<const T*, p_hash_type> map;

View file

@ -418,13 +418,8 @@ static void use_CPP()
} }
} }
if ( num_used == 0 && standalone_activations.empty() ) if ( num_used == 0 )
reporter->FatalError("no C++ functions found to use"); reporter->FatalError("no C++ functions found to use");
// Now that we've loaded all of the compiled scripts
// relevant for the AST, activate standalone ones.
for ( auto cb : standalone_activations )
(*cb)();
} }
static void generate_CPP(std::unique_ptr<ProfileFuncs>& pfs) static void generate_CPP(std::unique_ptr<ProfileFuncs>& pfs)
@ -527,14 +522,13 @@ static void analyze_scripts_for_ZAM(std::unique_ptr<ProfileFuncs>& pfs)
} }
void analyze_scripts(bool no_unused_warnings) void analyze_scripts(bool no_unused_warnings)
{
static bool did_init = false;
if ( ! did_init )
{ {
init_options(); init_options();
did_init = true;
} // Any standalone compiled scripts have already been instantiated
// at this point, but may require post-loading-of-scripts finalization.
for ( auto cb : standalone_finalizations )
(*cb)();
std::unique_ptr<UsageAnalyzer> ua; std::unique_ptr<UsageAnalyzer> ua;
if ( ! no_unused_warnings ) if ( ! no_unused_warnings )

View file

@ -72,11 +72,16 @@ int ZAMCompiler::InternalAddVal(ZInstAux* zi, int i, Expr* e)
auto& indices = e->GetOp1()->AsListExpr()->Exprs(); auto& indices = e->GetOp1()->AsListExpr()->Exprs();
auto val = e->GetOp2(); auto val = e->GetOp2();
int width = indices.length(); int width = indices.length();
int num_vals;
for ( int j = 0; j < width; ++j ) for ( int j = 0; j < width; ++j )
ASSERT(InternalAddVal(zi, i + j, indices[j]) == 1); {
num_vals = InternalAddVal(zi, i + j, indices[j]);
ASSERT(num_vals == 1);
}
ASSERT(InternalAddVal(zi, i + width, val.get()) == 1); num_vals = InternalAddVal(zi, i + width, val.get());
ASSERT(num_vals == 1);
return width + 1; return width + 1;
} }
@ -87,7 +92,10 @@ int ZAMCompiler::InternalAddVal(ZInstAux* zi, int i, Expr* e)
int width = indices.length(); int width = indices.length();
for ( int j = 0; j < width; ++j ) for ( int j = 0; j < width; ++j )
ASSERT(InternalAddVal(zi, i + j, indices[j]) == 1); {
int num_vals = InternalAddVal(zi, i + j, indices[j]);
ASSERT(num_vals == 1);
}
return width; return width;
} }