mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 14:48:21 +00:00
Merge remote-tracking branch 'origin/topic/vern/cpp-prep-profiling'
* origin/topic/vern/cpp-prep-profiling: Add missing errno include to ProfileFunc.cc Adjust GetAttrs() usage in ProfileFunc::PreExpr() to const-reference Fix whitespace in ProfileFunc::PreExpr() Avoid redundant map/set searches in various ProfileFunc methods Improve detail::script_specific_filename() Use std::string_view in p_hash() to avoid string copies function profiling rewritten - more detailed info, supports global profiling track whether a given function/body should be included/skipped for optimization
This commit is contained in:
commit
fe6fd61468
6 changed files with 935 additions and 125 deletions
14
CHANGES
14
CHANGES
|
@ -1,4 +1,18 @@
|
||||||
|
|
||||||
|
4.1.0-dev.461 | 2021-04-01 14:11:44 -0700
|
||||||
|
|
||||||
|
* function profiling rewritten - more detailed info, supports global profiling (Vern Paxson, Corelight)
|
||||||
|
|
||||||
|
Hashes for Zeek script types are now done globally rather than
|
||||||
|
per-function-body, which can save considerable time due to the complexity
|
||||||
|
of some commonly used types (such as connection records).
|
||||||
|
|
||||||
|
Hashing has been expanded to provide more robust distinctness
|
||||||
|
(lack of collisions in practice) and determinism (consistently computing
|
||||||
|
the same hash across compilations).
|
||||||
|
|
||||||
|
* track whether a given function/body should be included/skipped for optimization (Vern Paxson, Corelight)
|
||||||
|
|
||||||
4.1.0-dev.451 | 2021-03-31 11:58:08 -0700
|
4.1.0-dev.451 | 2021-03-31 11:58:08 -0700
|
||||||
|
|
||||||
* Add ssh to Alpine Dockerfile for retrieving external test repos (Jon Siwek, Corelight)
|
* Add ssh to Alpine Dockerfile for retrieving external test repos (Jon Siwek, Corelight)
|
||||||
|
|
2
VERSION
2
VERSION
|
@ -1 +1 @@
|
||||||
4.1.0-dev.451
|
4.1.0-dev.461
|
||||||
|
|
|
@ -1,5 +1,8 @@
|
||||||
// See the file "COPYING" in the main distribution directory for copyright.
|
// See the file "COPYING" in the main distribution directory for copyright.
|
||||||
|
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <cerrno>
|
||||||
|
|
||||||
#include "zeek/script_opt/ProfileFunc.h"
|
#include "zeek/script_opt/ProfileFunc.h"
|
||||||
#include "zeek/Desc.h"
|
#include "zeek/Desc.h"
|
||||||
#include "zeek/Stmt.h"
|
#include "zeek/Stmt.h"
|
||||||
|
@ -9,27 +12,100 @@
|
||||||
namespace zeek::detail {
|
namespace zeek::detail {
|
||||||
|
|
||||||
|
|
||||||
TraversalCode ProfileFunc::PreStmt(const Stmt* s)
|
// Computes the profiling hash of a Obj based on its (deterministic)
|
||||||
|
// description.
|
||||||
|
p_hash_type p_hash(const Obj* o)
|
||||||
{
|
{
|
||||||
++num_stmts;
|
ODesc d;
|
||||||
|
d.SetDeterminism(true);
|
||||||
auto tag = s->Tag();
|
o->Describe(&d);
|
||||||
|
return p_hash(d.Description());
|
||||||
if ( compute_hash )
|
|
||||||
UpdateHash(int(tag));
|
|
||||||
|
|
||||||
if ( tag == STMT_INIT )
|
|
||||||
{
|
|
||||||
for ( const auto& id : s->AsInitStmt()->Inits() )
|
|
||||||
inits.insert(id.get());
|
|
||||||
|
|
||||||
// Don't recurse into these, as we don't want to consider
|
|
||||||
// a local that only appears in an initialization as a
|
|
||||||
// relevant local.
|
|
||||||
return TC_ABORTSTMT;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
switch ( tag ) {
|
std::string script_specific_filename(const StmtPtr& body)
|
||||||
|
{
|
||||||
|
// The specific filename is taken from the location filename, making
|
||||||
|
// it absolute if necessary.
|
||||||
|
auto body_loc = body->GetLocationInfo();
|
||||||
|
auto bl_f = body_loc->filename;
|
||||||
|
ASSERT(bl_f != nullptr);
|
||||||
|
|
||||||
|
if ( (bl_f[0] != '.' && bl_f[0] != '/') ||
|
||||||
|
(bl_f[0] == '.' && (bl_f[1] == '/' ||
|
||||||
|
(bl_f[1] == '.' && bl_f[2] == '/'))) )
|
||||||
|
{
|
||||||
|
// Add working directory to avoid collisions over the
|
||||||
|
// same relative name.
|
||||||
|
static std::string working_dir;
|
||||||
|
if ( working_dir.empty() )
|
||||||
|
{
|
||||||
|
char buf[8192];
|
||||||
|
if ( ! getcwd(buf, sizeof buf) )
|
||||||
|
reporter->InternalError("getcwd failed: %s", strerror(errno));
|
||||||
|
|
||||||
|
working_dir = buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
return working_dir + "/" + bl_f;
|
||||||
|
}
|
||||||
|
|
||||||
|
return bl_f;
|
||||||
|
}
|
||||||
|
|
||||||
|
p_hash_type script_specific_hash(const StmtPtr& body, p_hash_type generic_hash)
|
||||||
|
{
|
||||||
|
auto bl_f = script_specific_filename(body);
|
||||||
|
return merge_p_hashes(generic_hash, p_hash(bl_f));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
ProfileFunc::ProfileFunc(const Func* func, const StmtPtr& body)
|
||||||
|
{
|
||||||
|
Profile(func->GetType().get(), body);
|
||||||
|
}
|
||||||
|
|
||||||
|
ProfileFunc::ProfileFunc(const Expr* e)
|
||||||
|
{
|
||||||
|
if ( e->Tag() == EXPR_LAMBDA )
|
||||||
|
{
|
||||||
|
auto func = e->AsLambdaExpr();
|
||||||
|
|
||||||
|
for ( auto oid : func->OuterIDs() )
|
||||||
|
captures.insert(oid);
|
||||||
|
|
||||||
|
Profile(func->GetType()->AsFuncType(), func->Ingredients().body);
|
||||||
|
}
|
||||||
|
|
||||||
|
else
|
||||||
|
// We don't have a function type, so do the traversal
|
||||||
|
// directly.
|
||||||
|
e->Traverse(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ProfileFunc::Profile(const FuncType* ft, const StmtPtr& body)
|
||||||
|
{
|
||||||
|
num_params = ft->Params()->NumFields();
|
||||||
|
TrackType(ft);
|
||||||
|
body->Traverse(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
TraversalCode ProfileFunc::PreStmt(const Stmt* s)
|
||||||
|
{
|
||||||
|
stmts.push_back(s);
|
||||||
|
|
||||||
|
switch ( s->Tag() ) {
|
||||||
|
case STMT_INIT:
|
||||||
|
for ( const auto& id : s->AsInitStmt()->Inits() )
|
||||||
|
{
|
||||||
|
inits.insert(id.get());
|
||||||
|
TrackType(id->GetType());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Don't traverse further into the statement, since we
|
||||||
|
// don't want to view the identifiers as locals unless
|
||||||
|
// they're also used elsewhere.
|
||||||
|
return TC_ABORTSTMT;
|
||||||
|
|
||||||
case STMT_WHEN:
|
case STMT_WHEN:
|
||||||
++num_when_stmts;
|
++num_when_stmts;
|
||||||
|
|
||||||
|
@ -39,7 +115,8 @@ TraversalCode ProfileFunc::PreStmt(const Stmt* s)
|
||||||
|
|
||||||
// It doesn't do any harm for us to re-traverse the
|
// It doesn't do any harm for us to re-traverse the
|
||||||
// conditional, so we don't bother hand-traversing the
|
// conditional, so we don't bother hand-traversing the
|
||||||
// rest of the when but just let the usual processing do it.
|
// rest of the "when", but just let the usual processing
|
||||||
|
// do it.
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case STMT_FOR:
|
case STMT_FOR:
|
||||||
|
@ -67,6 +144,8 @@ TraversalCode ProfileFunc::PreStmt(const Stmt* s)
|
||||||
// incomplete list of locals that need to be tracked.
|
// incomplete list of locals that need to be tracked.
|
||||||
|
|
||||||
auto sw = s->AsSwitchStmt();
|
auto sw = s->AsSwitchStmt();
|
||||||
|
bool is_type_switch = false;
|
||||||
|
|
||||||
for ( auto& c : *sw->Cases() )
|
for ( auto& c : *sw->Cases() )
|
||||||
{
|
{
|
||||||
auto idl = c->TypeCases();
|
auto idl = c->TypeCases();
|
||||||
|
@ -74,8 +153,15 @@ TraversalCode ProfileFunc::PreStmt(const Stmt* s)
|
||||||
{
|
{
|
||||||
for ( auto id : *idl )
|
for ( auto id : *idl )
|
||||||
locals.insert(id);
|
locals.insert(id);
|
||||||
|
|
||||||
|
is_type_switch = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( is_type_switch )
|
||||||
|
type_switches.insert(sw);
|
||||||
|
else
|
||||||
|
expr_switches.insert(sw);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -88,37 +174,74 @@ TraversalCode ProfileFunc::PreStmt(const Stmt* s)
|
||||||
|
|
||||||
TraversalCode ProfileFunc::PreExpr(const Expr* e)
|
TraversalCode ProfileFunc::PreExpr(const Expr* e)
|
||||||
{
|
{
|
||||||
++num_exprs;
|
exprs.push_back(e);
|
||||||
|
|
||||||
auto tag = e->Tag();
|
TrackType(e->GetType());
|
||||||
|
|
||||||
if ( compute_hash )
|
switch ( e->Tag() ) {
|
||||||
UpdateHash(int(tag));
|
|
||||||
|
|
||||||
switch ( tag ) {
|
|
||||||
case EXPR_CONST:
|
case EXPR_CONST:
|
||||||
if ( compute_hash )
|
constants.push_back(e->AsConstExpr());
|
||||||
{
|
|
||||||
CheckType(e->GetType());
|
|
||||||
UpdateHash(e->AsConstExpr()->ValuePtr());
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case EXPR_NAME:
|
case EXPR_NAME:
|
||||||
{
|
{
|
||||||
auto n = e->AsNameExpr();
|
auto n = e->AsNameExpr();
|
||||||
auto id = n->Id();
|
auto id = n->Id();
|
||||||
if ( id->IsGlobal() )
|
|
||||||
globals.insert(id);
|
|
||||||
else
|
|
||||||
locals.insert(id);
|
|
||||||
|
|
||||||
if ( compute_hash )
|
if ( id->IsGlobal() )
|
||||||
{
|
{
|
||||||
UpdateHash({NewRef{}, id});
|
globals.insert(id);
|
||||||
CheckType(e->GetType());
|
all_globals.insert(id);
|
||||||
|
|
||||||
|
const auto& t = id->GetType();
|
||||||
|
if ( t->Tag() == TYPE_FUNC &&
|
||||||
|
t->AsFuncType()->Flavor() == FUNC_FLAVOR_EVENT )
|
||||||
|
events.insert(id->Name());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// This is a tad ugly. Unfortunately due to the
|
||||||
|
// weird way that Zeek function *declarations* work,
|
||||||
|
// there's no reliable way to get the list of
|
||||||
|
// parameters for a function *definition*, since
|
||||||
|
// they can have different names than what's present
|
||||||
|
// in the declaration. So we identify them directly,
|
||||||
|
// by knowing that they come at the beginning of the
|
||||||
|
// frame ... and being careful to avoid misconfusing
|
||||||
|
// a lambda capture with a low frame offset as a
|
||||||
|
// parameter.
|
||||||
|
if ( captures.count(id) == 0 &&
|
||||||
|
id->Offset() < num_params )
|
||||||
|
params.insert(id);
|
||||||
|
|
||||||
|
locals.insert(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Turns out that NameExpr's can be constructed using a
|
||||||
|
// different Type* than that of the identifier itself,
|
||||||
|
// so be sure we track the latter too.
|
||||||
|
TrackType(id->GetType());
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case EXPR_FIELD:
|
||||||
|
{
|
||||||
|
auto f = e->AsFieldExpr()->Field();
|
||||||
|
addl_hashes.push_back(p_hash(f));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case EXPR_ASSIGN:
|
||||||
|
{
|
||||||
|
if ( e->GetOp1()->Tag() == EXPR_REF )
|
||||||
|
{
|
||||||
|
auto lhs = e->GetOp1()->GetOp1();
|
||||||
|
if ( lhs->Tag() == EXPR_NAME )
|
||||||
|
assignees.insert(lhs->AsNameExpr()->Id());
|
||||||
|
}
|
||||||
|
// else this isn't a direct assignment.
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -134,7 +257,7 @@ TraversalCode ProfileFunc::PreExpr(const Expr* e)
|
||||||
}
|
}
|
||||||
|
|
||||||
auto n = f->AsNameExpr();
|
auto n = f->AsNameExpr();
|
||||||
IDPtr func = {NewRef{}, n->Id()};
|
auto func = n->Id();
|
||||||
|
|
||||||
if ( ! func->IsGlobal() )
|
if ( ! func->IsGlobal() )
|
||||||
{
|
{
|
||||||
|
@ -142,6 +265,8 @@ TraversalCode ProfileFunc::PreExpr(const Expr* e)
|
||||||
return TC_CONTINUE;
|
return TC_CONTINUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
all_globals.insert(func);
|
||||||
|
|
||||||
auto func_v = func->GetVal();
|
auto func_v = func->GetVal();
|
||||||
if ( func_v )
|
if ( func_v )
|
||||||
{
|
{
|
||||||
|
@ -156,28 +281,84 @@ TraversalCode ProfileFunc::PreExpr(const Expr* e)
|
||||||
when_calls.insert(bf);
|
when_calls.insert(bf);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
BiF_calls.insert(func_vf);
|
BiF_globals.insert(func);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// We could complain, but for now we don't because
|
// We could complain, but for now we don't, because
|
||||||
// if we're invoked prior to full Zeek initialization,
|
// if we're invoked prior to full Zeek initialization,
|
||||||
// the value might indeed not there.
|
// the value might indeed not there yet.
|
||||||
// printf("no function value for global %s\n", func->Name());
|
// printf("no function value for global %s\n", func->Name());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Recurse into the arguments.
|
// Recurse into the arguments.
|
||||||
auto args = c->Args();
|
auto args = c->Args();
|
||||||
args->Traverse(this);
|
args->Traverse(this);
|
||||||
|
|
||||||
|
// Do the following explicitly, since we won't be recursing
|
||||||
|
// into the LHS global.
|
||||||
|
|
||||||
|
// Note that the type of the expression and the type of the
|
||||||
|
// function can actually be *different* due to the NameExpr
|
||||||
|
// being constructed based on a forward reference and then
|
||||||
|
// the global getting a different (constructed) type when
|
||||||
|
// the function is actually declared. Geez. So hedge our
|
||||||
|
// bets.
|
||||||
|
TrackType(n->GetType());
|
||||||
|
TrackType(func->GetType());
|
||||||
|
|
||||||
|
TrackID(func);
|
||||||
|
|
||||||
return TC_ABORTSTMT;
|
return TC_ABORTSTMT;
|
||||||
}
|
}
|
||||||
|
|
||||||
case EXPR_EVENT:
|
case EXPR_EVENT:
|
||||||
events.insert(e->AsEventExpr()->Name());
|
{
|
||||||
|
auto ev = e->AsEventExpr()->Name();
|
||||||
|
events.insert(ev);
|
||||||
|
addl_hashes.push_back(p_hash(ev));
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case EXPR_LAMBDA:
|
case EXPR_LAMBDA:
|
||||||
++num_lambdas;
|
{
|
||||||
|
auto l = e->AsLambdaExpr();
|
||||||
|
lambdas.push_back(l);
|
||||||
|
|
||||||
|
for ( const auto& i : l->OuterIDs() )
|
||||||
|
{
|
||||||
|
locals.insert(i);
|
||||||
|
TrackID(i);
|
||||||
|
|
||||||
|
// See above re EXPR_NAME regarding the following
|
||||||
|
// logic.
|
||||||
|
if ( captures.count(i) == 0 &&
|
||||||
|
i->Offset() < num_params )
|
||||||
|
params.insert(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Avoid recursing into the body.
|
||||||
|
return TC_ABORTSTMT;
|
||||||
|
}
|
||||||
|
|
||||||
|
case EXPR_SET_CONSTRUCTOR:
|
||||||
|
{
|
||||||
|
auto sc = static_cast<const SetConstructorExpr*>(e);
|
||||||
|
const auto& attrs = sc->GetAttrs();
|
||||||
|
|
||||||
|
if ( attrs )
|
||||||
|
constructor_attrs.insert(attrs.get());
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case EXPR_TABLE_CONSTRUCTOR:
|
||||||
|
{
|
||||||
|
auto tc = static_cast<const TableConstructorExpr*>(e);
|
||||||
|
const auto& attrs = tc->GetAttrs();
|
||||||
|
|
||||||
|
if ( attrs )
|
||||||
|
constructor_attrs.insert(attrs.get());
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
@ -187,32 +368,355 @@ TraversalCode ProfileFunc::PreExpr(const Expr* e)
|
||||||
return TC_CONTINUE;
|
return TC_CONTINUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ProfileFunc::CheckType(const TypePtr& t)
|
TraversalCode ProfileFunc::PreID(const ID* id)
|
||||||
{
|
{
|
||||||
|
TrackID(id);
|
||||||
|
|
||||||
|
// There's no need for any further analysis of this ID.
|
||||||
|
return TC_ABORTSTMT;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ProfileFunc::TrackType(const Type* t)
|
||||||
|
{
|
||||||
|
if ( ! t )
|
||||||
|
return;
|
||||||
|
|
||||||
|
auto [it, inserted] = types.insert(t);
|
||||||
|
|
||||||
|
if ( ! inserted )
|
||||||
|
// We've already tracked it.
|
||||||
|
return;
|
||||||
|
|
||||||
|
ordered_types.push_back(t);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ProfileFunc::TrackID(const ID* id)
|
||||||
|
{
|
||||||
|
if ( ! id )
|
||||||
|
return;
|
||||||
|
|
||||||
|
auto [it, inserted] = ids.insert(id);
|
||||||
|
|
||||||
|
if ( ! inserted )
|
||||||
|
// Already tracked.
|
||||||
|
return;
|
||||||
|
|
||||||
|
ordered_ids.push_back(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
ProfileFuncs::ProfileFuncs(std::vector<FuncInfo>& funcs, is_compilable_pred pred)
|
||||||
|
{
|
||||||
|
for ( auto& f : funcs )
|
||||||
|
{
|
||||||
|
if ( f.ShouldSkip() )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
auto pf = std::make_unique<ProfileFunc>(f.Func(), f.Body());
|
||||||
|
|
||||||
|
if ( ! pred || (*pred)(pf.get()) )
|
||||||
|
MergeInProfile(pf.get());
|
||||||
|
else
|
||||||
|
f.SetSkip(true);
|
||||||
|
|
||||||
|
f.SetProfile(std::move(pf));
|
||||||
|
func_profs[f.Func()] = f.Profile();
|
||||||
|
}
|
||||||
|
|
||||||
|
// We now have the main (starting) types used by all of the
|
||||||
|
// functions. Recursively compute their hashes.
|
||||||
|
ComputeTypeHashes(main_types);
|
||||||
|
|
||||||
|
// Computing the hashes can have marked expressions (seen in
|
||||||
|
// record attributes) for further analysis. Likewise, when
|
||||||
|
// doing the profile merges above we may have noted lambda
|
||||||
|
// expressions. Analyze these, and iteratively any further
|
||||||
|
// expressions that that analysis uncovers.
|
||||||
|
DrainPendingExprs();
|
||||||
|
|
||||||
|
// We now have all the information we need to form definitive,
|
||||||
|
// deterministic hashes.
|
||||||
|
ComputeBodyHashes(funcs);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ProfileFuncs::MergeInProfile(ProfileFunc* pf)
|
||||||
|
{
|
||||||
|
all_globals.insert(pf->AllGlobals().begin(), pf->AllGlobals().end());
|
||||||
|
globals.insert(pf->Globals().begin(), pf->Globals().end());
|
||||||
|
constants.insert(pf->Constants().begin(), pf->Constants().end());
|
||||||
|
main_types.insert(main_types.end(),
|
||||||
|
pf->OrderedTypes().begin(), pf->OrderedTypes().end());
|
||||||
|
script_calls.insert(pf->ScriptCalls().begin(), pf->ScriptCalls().end());
|
||||||
|
BiF_globals.insert(pf->BiFGlobals().begin(), pf->BiFGlobals().end());
|
||||||
|
events.insert(pf->Events().begin(), pf->Events().end());
|
||||||
|
|
||||||
|
for ( auto& i : pf->Lambdas() )
|
||||||
|
{
|
||||||
|
lambdas.insert(i);
|
||||||
|
pending_exprs.push_back(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
for ( auto& a : pf->ConstructorAttrs() )
|
||||||
|
AnalyzeAttrs(a);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ProfileFuncs::DrainPendingExprs()
|
||||||
|
{
|
||||||
|
while ( pending_exprs.size() > 0 )
|
||||||
|
{
|
||||||
|
// Copy the pending expressions so we can loop over them
|
||||||
|
// while accruing additions.
|
||||||
|
auto pe = pending_exprs;
|
||||||
|
pending_exprs.clear();
|
||||||
|
|
||||||
|
for ( auto e : pe )
|
||||||
|
{
|
||||||
|
auto pf = std::make_shared<ProfileFunc>(e);
|
||||||
|
|
||||||
|
expr_profs[e] = pf;
|
||||||
|
MergeInProfile(pf.get());
|
||||||
|
|
||||||
|
// It's important to compute the hashes over the
|
||||||
|
// ordered types rather than the unordered. If type
|
||||||
|
// T1 depends on a recursive type T2, then T1's hash
|
||||||
|
// will vary with depending on whether we arrive at
|
||||||
|
// T1 via an in-progress traversal of T2 (in which
|
||||||
|
// case T1 will see the "stub" in-progress hash for
|
||||||
|
// T2), or via a separate type T3 (in which case it
|
||||||
|
// will see the full hash).
|
||||||
|
ComputeTypeHashes(pf->OrderedTypes());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ProfileFuncs::ComputeTypeHashes(const std::vector<const Type*>& types)
|
||||||
|
{
|
||||||
|
for ( auto t : types )
|
||||||
|
(void) HashType(t);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ProfileFuncs::ComputeBodyHashes(std::vector<FuncInfo>& funcs)
|
||||||
|
{
|
||||||
|
for ( auto& f : funcs )
|
||||||
|
if ( ! f.ShouldSkip() )
|
||||||
|
ComputeProfileHash(f.Profile());
|
||||||
|
|
||||||
|
for ( auto& l : lambdas )
|
||||||
|
ComputeProfileHash(ExprProf(l));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ProfileFuncs::ComputeProfileHash(std::shared_ptr<ProfileFunc> pf)
|
||||||
|
{
|
||||||
|
p_hash_type h = 0;
|
||||||
|
|
||||||
|
// We add markers between each class of hash component, to
|
||||||
|
// prevent collisions due to elements with simple hashes
|
||||||
|
// (such as Stmt's or Expr's that are only represented by
|
||||||
|
// the hash of their tag).
|
||||||
|
h = merge_p_hashes(h, p_hash("stmts"));
|
||||||
|
for ( auto i : pf->Stmts() )
|
||||||
|
h = merge_p_hashes(h, p_hash(i->Tag()));
|
||||||
|
|
||||||
|
h = merge_p_hashes(h, p_hash("exprs"));
|
||||||
|
for ( auto i : pf->Exprs() )
|
||||||
|
h = merge_p_hashes(h, p_hash(i->Tag()));
|
||||||
|
|
||||||
|
h = merge_p_hashes(h, p_hash("ids"));
|
||||||
|
for ( auto i : pf->OrderedIdentifiers() )
|
||||||
|
h = merge_p_hashes(h, p_hash(i->Name()));
|
||||||
|
|
||||||
|
h = merge_p_hashes(h, p_hash("constants"));
|
||||||
|
for ( auto i : pf->Constants() )
|
||||||
|
h = merge_p_hashes(h, p_hash(i->Value()));
|
||||||
|
|
||||||
|
h = merge_p_hashes(h, p_hash("types"));
|
||||||
|
for ( auto i : pf->OrderedTypes() )
|
||||||
|
h = merge_p_hashes(h, HashType(i));
|
||||||
|
|
||||||
|
h = merge_p_hashes(h, p_hash("lambdas"));
|
||||||
|
for ( auto i : pf->Lambdas() )
|
||||||
|
h = merge_p_hashes(h, p_hash(i));
|
||||||
|
|
||||||
|
h = merge_p_hashes(h, p_hash("addl"));
|
||||||
|
for ( auto i : pf->AdditionalHashes() )
|
||||||
|
h = merge_p_hashes(h, i);
|
||||||
|
|
||||||
|
pf->SetHashVal(h);
|
||||||
|
}
|
||||||
|
|
||||||
|
p_hash_type ProfileFuncs::HashType(const Type* t)
|
||||||
|
{
|
||||||
|
if ( ! t )
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
auto it = type_hashes.find(t);
|
||||||
|
|
||||||
|
if ( it != type_hashes.end() )
|
||||||
|
// We've already done this Type*.
|
||||||
|
return it->second;
|
||||||
|
|
||||||
auto& tn = t->GetName();
|
auto& tn = t->GetName();
|
||||||
if ( tn.size() > 0 && seen_types.count(tn) > 0 )
|
if ( ! tn.empty() )
|
||||||
// No need to hash this in again, as we've already done so.
|
|
||||||
return;
|
|
||||||
|
|
||||||
if ( seen_type_ptrs.count(t.get()) > 0 )
|
|
||||||
// We've seen the raw pointer, even though it doesn't have
|
|
||||||
// a name.
|
|
||||||
return;
|
|
||||||
|
|
||||||
seen_types.insert(tn);
|
|
||||||
seen_type_ptrs.insert(t.get());
|
|
||||||
|
|
||||||
UpdateHash(t);
|
|
||||||
}
|
|
||||||
|
|
||||||
void ProfileFunc::UpdateHash(const IntrusivePtr<zeek::Obj>& o)
|
|
||||||
{
|
{
|
||||||
ODesc d;
|
auto seen_it = seen_type_names.find(tn);
|
||||||
o->Describe(&d);
|
|
||||||
std::string desc(d.Description());
|
if ( seen_it != seen_type_names.end() )
|
||||||
auto h = std::hash<std::string>{}(desc);
|
{
|
||||||
MergeInHash(h);
|
// We've already done a type with the same name, even
|
||||||
|
// though with a different Type*. Reuse its results.
|
||||||
|
auto seen_t = seen_it->second;
|
||||||
|
auto h = type_hashes[seen_t];
|
||||||
|
|
||||||
|
type_hashes[t] = h;
|
||||||
|
type_to_rep[t] = type_to_rep[seen_t];
|
||||||
|
|
||||||
|
return h;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto h = p_hash(t->Tag());
|
||||||
|
|
||||||
|
// Enter an initial value for this type's hash. We'll update it
|
||||||
|
// at the end, but having it here first will prevent recursive
|
||||||
|
// records from leading to infinite recursion as we traverse them.
|
||||||
|
// It's okay that the initial value is degenerate, because if we access
|
||||||
|
// it during the traversal that will only happen due to a recursive
|
||||||
|
// type, in which case the other elements of that type will serve
|
||||||
|
// to differentiate its hash.
|
||||||
|
type_hashes[t] = h;
|
||||||
|
|
||||||
|
switch ( t->Tag() ) {
|
||||||
|
case TYPE_ADDR:
|
||||||
|
case TYPE_ANY:
|
||||||
|
case TYPE_BOOL:
|
||||||
|
case TYPE_COUNT:
|
||||||
|
case TYPE_DOUBLE:
|
||||||
|
case TYPE_ENUM:
|
||||||
|
case TYPE_ERROR:
|
||||||
|
case TYPE_INT:
|
||||||
|
case TYPE_INTERVAL:
|
||||||
|
case TYPE_OPAQUE:
|
||||||
|
case TYPE_PATTERN:
|
||||||
|
case TYPE_PORT:
|
||||||
|
case TYPE_STRING:
|
||||||
|
case TYPE_SUBNET:
|
||||||
|
case TYPE_TIME:
|
||||||
|
case TYPE_TIMER:
|
||||||
|
case TYPE_UNION:
|
||||||
|
case TYPE_VOID:
|
||||||
|
h = merge_p_hashes(h, p_hash(t));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case TYPE_RECORD:
|
||||||
|
{
|
||||||
|
const auto& ft = t->AsRecordType();
|
||||||
|
auto n = ft->NumFields();
|
||||||
|
|
||||||
|
h = merge_p_hashes(h, p_hash("record"));
|
||||||
|
h = merge_p_hashes(h, p_hash(n));
|
||||||
|
|
||||||
|
for ( auto i = 0; i < n; ++i )
|
||||||
|
{
|
||||||
|
const auto& f = ft->FieldDecl(i);
|
||||||
|
h = merge_p_hashes(h, p_hash(f->id));
|
||||||
|
h = merge_p_hashes(h, HashType(f->type));
|
||||||
|
|
||||||
|
// We don't hash the field name, as in some contexts
|
||||||
|
// those are ignored.
|
||||||
|
|
||||||
|
if ( f->attrs )
|
||||||
|
{
|
||||||
|
h = merge_p_hashes(h, p_hash(f->attrs));
|
||||||
|
AnalyzeAttrs(f->attrs.get());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case TYPE_TABLE:
|
||||||
|
{
|
||||||
|
auto tbl = t->AsTableType();
|
||||||
|
h = merge_p_hashes(h, p_hash("table"));
|
||||||
|
h = merge_p_hashes(h, p_hash("indices"));
|
||||||
|
h = merge_p_hashes(h, HashType(tbl->GetIndices()));
|
||||||
|
h = merge_p_hashes(h, p_hash("tbl-yield"));
|
||||||
|
h = merge_p_hashes(h, HashType(tbl->Yield()));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case TYPE_FUNC:
|
||||||
|
{
|
||||||
|
auto ft = t->AsFuncType();
|
||||||
|
auto flv = ft->FlavorString();
|
||||||
|
h = merge_p_hashes(h, p_hash(flv));
|
||||||
|
h = merge_p_hashes(h, p_hash("params"));
|
||||||
|
h = merge_p_hashes(h, HashType(ft->Params()));
|
||||||
|
h = merge_p_hashes(h, p_hash("func-yield"));
|
||||||
|
h = merge_p_hashes(h, HashType(ft->Yield()));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case TYPE_LIST:
|
||||||
|
{
|
||||||
|
auto& tl = t->AsTypeList()->GetTypes();
|
||||||
|
|
||||||
|
h = merge_p_hashes(h, p_hash("list"));
|
||||||
|
h = merge_p_hashes(h, p_hash(tl.size()));
|
||||||
|
|
||||||
|
for ( const auto& tl_i : tl )
|
||||||
|
h = merge_p_hashes(h, HashType(tl_i));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case TYPE_VECTOR:
|
||||||
|
h = merge_p_hashes(h, p_hash("vec"));
|
||||||
|
h = merge_p_hashes(h, HashType(t->AsVectorType()->Yield()));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case TYPE_FILE:
|
||||||
|
h = merge_p_hashes(h, p_hash("file"));
|
||||||
|
h = merge_p_hashes(h, HashType(t->AsFileType()->Yield()));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case TYPE_TYPE:
|
||||||
|
h = merge_p_hashes(h, p_hash("type"));
|
||||||
|
h = merge_p_hashes(h, HashType(t->AsTypeType()->GetType()));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_hashes[t] = h;
|
||||||
|
|
||||||
|
auto [rep_it, rep_inserted] = type_hash_reps.emplace(h, t);
|
||||||
|
|
||||||
|
if ( rep_inserted )
|
||||||
|
{ // No previous rep, so use this Type* for that.
|
||||||
|
type_to_rep[t] = t;
|
||||||
|
rep_types.push_back(t);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
type_to_rep[t] = rep_it->second;
|
||||||
|
|
||||||
|
if ( ! tn.empty() )
|
||||||
|
seen_type_names[tn] = t;
|
||||||
|
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ProfileFuncs::AnalyzeAttrs(const Attributes* Attrs)
|
||||||
|
{
|
||||||
|
auto attrs = Attrs->GetAttrs();
|
||||||
|
|
||||||
|
for ( const auto& a : attrs )
|
||||||
|
{
|
||||||
|
const Expr* e = a->GetExpr().get();
|
||||||
|
|
||||||
|
if ( e )
|
||||||
|
{
|
||||||
|
pending_exprs.push_back(e);
|
||||||
|
if ( e->Tag() == EXPR_LAMBDA )
|
||||||
|
lambdas.insert(e->AsLambdaExpr());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace zeek::detail
|
} // namespace zeek::detail
|
||||||
|
|
|
@ -1,49 +1,169 @@
|
||||||
// See the file "COPYING" in the main distribution directory for copyright.
|
// See the file "COPYING" in the main distribution directory for copyright.
|
||||||
|
|
||||||
// Class for traversing a function body's AST to build up a profile
|
// Classes for traversing functions and their body ASTs to build up profiles
|
||||||
// of its various elements.
|
// of the various elements (types, globals, locals, lambdas, etc.) that appear.
|
||||||
|
// These profiles enable script optimization to make decisions regarding
|
||||||
|
// compilability and how to efficiently provide run-time components.
|
||||||
|
// For all of the following, we use the term "function" to refer to a single
|
||||||
|
// ScriptFunc/body pair, so an event handler or hook with multiple bodies
|
||||||
|
// is treated as multiple distinct "function"'s.
|
||||||
|
//
|
||||||
|
// One key element of constructing profiles concerns computing hashes over
|
||||||
|
// both the Zeek scripting types present in the functions, and over entire
|
||||||
|
// functions (which means computing hashes over each of the function's
|
||||||
|
// components). Hashes need to be (1) distinct (collision-free in practice)
|
||||||
|
// and (2) deterministic (across Zeek invocations, the same components always
|
||||||
|
// map to the same hashes). We need these properties because we use hashes
|
||||||
|
// to robustly identify identical instances of the same function, for example
|
||||||
|
// so we can recognize that an instance of the function definition seen in
|
||||||
|
// a script matches a previously compiled function body, so we can safely
|
||||||
|
// replace the function's AST with the compiled version).
|
||||||
|
//
|
||||||
|
// We profile functions collectively (via the ProfileFuncs class), rather
|
||||||
|
// than in isolation, because doing so (1) allows us to share expensive
|
||||||
|
// profiling steps (in particular, computing the hashes of types, as some
|
||||||
|
// of the Zeek script records get huge, and occur frequently), and (2) enables
|
||||||
|
// us to develop a global picture of all of the components germane to a set
|
||||||
|
// of functions. The global profile is built up in terms of individual
|
||||||
|
// profiles (via the ProfileFunc class), which identify each function's
|
||||||
|
// basic components, and then using these as starting points to build out
|
||||||
|
// the global profile and compute the hashes of functions and types.
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <string_view>
|
||||||
|
|
||||||
#include "zeek/Expr.h"
|
#include "zeek/Expr.h"
|
||||||
#include "zeek/Stmt.h"
|
#include "zeek/Stmt.h"
|
||||||
#include "zeek/Traverse.h"
|
#include "zeek/Traverse.h"
|
||||||
|
#include "zeek/script_opt/ScriptOpt.h"
|
||||||
|
|
||||||
namespace zeek::detail {
|
namespace zeek::detail {
|
||||||
|
|
||||||
|
// The type used to represent hashes. We use the mnemonic "p_hash" as
|
||||||
|
// short for "profile hash", to avoid confusion with hashes used elsehwere
|
||||||
|
// in Zeek (which are for the most part keyed, a property we explicitly
|
||||||
|
// do not want).
|
||||||
|
using p_hash_type = unsigned long long;
|
||||||
|
|
||||||
|
// Helper functions for computing/managing hashes.
|
||||||
|
|
||||||
|
inline p_hash_type p_hash(int val)
|
||||||
|
{ return std::hash<int>{}(val); }
|
||||||
|
|
||||||
|
inline p_hash_type p_hash(std::string_view val)
|
||||||
|
{ return std::hash<std::string_view>{}(val); }
|
||||||
|
|
||||||
|
extern p_hash_type p_hash(const Obj* o);
|
||||||
|
inline p_hash_type p_hash(const IntrusivePtr<Obj>& o)
|
||||||
|
{ return p_hash(o.get()); }
|
||||||
|
|
||||||
|
inline p_hash_type merge_p_hashes(p_hash_type h1, p_hash_type h2)
|
||||||
|
{
|
||||||
|
// Taken from Boost. See for example
|
||||||
|
// https://www.boost.org/doc/libs/1_35_0/doc/html/boost/hash_combine_id241013.html
|
||||||
|
// or
|
||||||
|
// https://stackoverflow.com/questions/4948780/magic-number-in-boosthash-combine
|
||||||
|
return h1 ^ (h2 + 0x9e3779b9 + (h1 << 6) + (h1 >> 2));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns a filename associated with the given function body. Used to
|
||||||
|
// provide distinctness to identical function bodies seen in separate,
|
||||||
|
// potentially conflicting incremental compilations. This is only germane
|
||||||
|
// for allowing incremental compilation of subsets of the test suite, so
|
||||||
|
// if we decide to forgo that capability, we can remove this.
|
||||||
|
extern std::string script_specific_filename(const StmtPtr& body);
|
||||||
|
|
||||||
|
// Returns a incremental-compilation-specific hash for the given function
|
||||||
|
// body, given it's non-specific hash is "generic_hash".
|
||||||
|
extern p_hash_type script_specific_hash(const StmtPtr& body, p_hash_type generic_hash);
|
||||||
|
|
||||||
|
|
||||||
|
// Class for profiling the components of a single function (or expression).
|
||||||
class ProfileFunc : public TraversalCallback {
|
class ProfileFunc : public TraversalCallback {
|
||||||
public:
|
public:
|
||||||
// If the argument is true, then we compute a hash over the function's
|
// Constructor used for the usual case of profiling a script
|
||||||
// AST to (pseudo-)uniquely identify it.
|
// function and one of its bodies.
|
||||||
ProfileFunc(bool _compute_hash = false)
|
ProfileFunc(const Func* func, const StmtPtr& body);
|
||||||
{ compute_hash = _compute_hash; }
|
|
||||||
|
|
||||||
|
// Constructor for profiling an AST expression. This exists
|
||||||
|
// to support (1) profiling lambda expressions, and (2) traversing
|
||||||
|
// attribute expressions (such as &default=expr) to discover what
|
||||||
|
// components they include.
|
||||||
|
ProfileFunc(const Expr* func);
|
||||||
|
|
||||||
|
// See the comments for the associated member variables for each
|
||||||
|
// of these accessors.
|
||||||
const std::unordered_set<const ID*>& Globals() const
|
const std::unordered_set<const ID*>& Globals() const
|
||||||
{ return globals; }
|
{ return globals; }
|
||||||
|
const std::unordered_set<const ID*>& AllGlobals() const
|
||||||
|
{ return all_globals; }
|
||||||
const std::unordered_set<const ID*>& Locals() const
|
const std::unordered_set<const ID*>& Locals() const
|
||||||
{ return locals; }
|
{ return locals; }
|
||||||
|
const std::unordered_set<const ID*>& Params() const
|
||||||
|
{ return params; }
|
||||||
|
const std::unordered_set<const ID*>& Assignees() const
|
||||||
|
{ return assignees; }
|
||||||
const std::unordered_set<const ID*>& Inits() const
|
const std::unordered_set<const ID*>& Inits() const
|
||||||
{ return inits; }
|
{ return inits; }
|
||||||
|
const std::vector<const Stmt*>& Stmts() const
|
||||||
|
{ return stmts; }
|
||||||
|
const std::vector<const Expr*>& Exprs() const
|
||||||
|
{ return exprs; }
|
||||||
|
const std::vector<const LambdaExpr*>& Lambdas() const
|
||||||
|
{ return lambdas; }
|
||||||
|
const std::vector<const ConstExpr*>& Constants() const
|
||||||
|
{ return constants; }
|
||||||
|
const std::unordered_set<const ID*>& UnorderedIdentifiers() const
|
||||||
|
{ return ids; }
|
||||||
|
const std::vector<const ID*>& OrderedIdentifiers() const
|
||||||
|
{ return ordered_ids; }
|
||||||
|
const std::unordered_set<const Type*>& UnorderedTypes() const
|
||||||
|
{ return types; }
|
||||||
|
const std::vector<const Type*>& OrderedTypes() const
|
||||||
|
{ return ordered_types; }
|
||||||
const std::unordered_set<ScriptFunc*>& ScriptCalls() const
|
const std::unordered_set<ScriptFunc*>& ScriptCalls() const
|
||||||
{ return script_calls; }
|
{ return script_calls; }
|
||||||
const std::unordered_set<Func*>& BiFCalls() const
|
const std::unordered_set<const ID*>& BiFGlobals() const
|
||||||
{ return BiF_calls; }
|
{ return BiF_globals; }
|
||||||
const std::unordered_set<ScriptFunc*>& WhenCalls() const
|
const std::unordered_set<ScriptFunc*>& WhenCalls() const
|
||||||
{ return when_calls; }
|
{ return when_calls; }
|
||||||
const std::unordered_set<const char*>& Events() const
|
const std::unordered_set<std::string>& Events() const
|
||||||
{ return events; }
|
{ return events; }
|
||||||
|
const std::unordered_set<const Attributes*>& ConstructorAttrs() const
|
||||||
|
{ return constructor_attrs; }
|
||||||
|
const std::unordered_set<const SwitchStmt*>& ExprSwitches() const
|
||||||
|
{ return expr_switches; }
|
||||||
|
const std::unordered_set<const SwitchStmt*>& TypeSwitches() const
|
||||||
|
{ return type_switches; }
|
||||||
|
|
||||||
bool DoesIndirectCalls() { return does_indirect_calls; }
|
bool DoesIndirectCalls() { return does_indirect_calls; }
|
||||||
|
|
||||||
std::size_t HashVal() { return hash_val; }
|
int NumParams() const { return num_params; }
|
||||||
|
int NumLambdas() const { return lambdas.size(); }
|
||||||
|
int NumWhenStmts() const { return num_when_stmts; }
|
||||||
|
|
||||||
int NumStmts() { return num_stmts; }
|
const std::vector<p_hash_type>& AdditionalHashes() const
|
||||||
int NumWhenStmts() { return num_when_stmts; }
|
{ return addl_hashes; }
|
||||||
int NumExprs() { return num_exprs; }
|
|
||||||
int NumLambdas() { return num_lambdas; }
|
// Set this function's hash to the given value; retrieve that value.
|
||||||
|
void SetHashVal(p_hash_type hash) { hash_val = hash; }
|
||||||
|
p_hash_type HashVal() const { return hash_val; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
// Construct the profile for the given function signature and body.
|
||||||
|
void Profile(const FuncType* ft, const StmtPtr& body);
|
||||||
|
|
||||||
TraversalCode PreStmt(const Stmt*) override;
|
TraversalCode PreStmt(const Stmt*) override;
|
||||||
TraversalCode PreExpr(const Expr*) override;
|
TraversalCode PreExpr(const Expr*) override;
|
||||||
|
TraversalCode PreID(const ID*) override;
|
||||||
|
|
||||||
|
// Take note of the presence of a given type.
|
||||||
|
void TrackType(const Type* t);
|
||||||
|
void TrackType(const TypePtr& t) { TrackType(t.get()); }
|
||||||
|
|
||||||
|
// Take note of the presence of an identifier.
|
||||||
|
void TrackID(const ID* id);
|
||||||
|
|
||||||
// Globals seen in the function.
|
// Globals seen in the function.
|
||||||
//
|
//
|
||||||
|
@ -51,79 +171,249 @@ protected:
|
||||||
// called in a call.
|
// called in a call.
|
||||||
std::unordered_set<const ID*> globals;
|
std::unordered_set<const ID*> globals;
|
||||||
|
|
||||||
|
// Same, but also includes globals only seen as called functions.
|
||||||
|
std::unordered_set<const ID*> all_globals;
|
||||||
|
|
||||||
// Locals seen in the function.
|
// Locals seen in the function.
|
||||||
std::unordered_set<const ID*> locals;
|
std::unordered_set<const ID*> locals;
|
||||||
|
|
||||||
// Same for locals seen in initializations, so we can find
|
// The function's parameters. Empty if our starting point was
|
||||||
// unused aggregates.
|
// profiling an expression.
|
||||||
|
std::unordered_set<const ID*> params;
|
||||||
|
|
||||||
|
// How many parameters the function has. The default value flags
|
||||||
|
// that we started the profile with an expression rather than a
|
||||||
|
// function.
|
||||||
|
int num_params = -1;
|
||||||
|
|
||||||
|
// Identifiers (globals, locals, parameters) that are assigned to.
|
||||||
|
// Does not include implicit assignments due to initializations,
|
||||||
|
// which are instead captured in "inits".
|
||||||
|
std::unordered_set<const ID*> assignees;
|
||||||
|
|
||||||
|
// Same for locals seen in initializations, so we can find,
|
||||||
|
// for example, unused aggregates.
|
||||||
std::unordered_set<const ID*> inits;
|
std::unordered_set<const ID*> inits;
|
||||||
|
|
||||||
|
// Statements seen in the function. Does not include indirect
|
||||||
|
// statements, such as those in lambda bodies.
|
||||||
|
std::vector<const Stmt*> stmts;
|
||||||
|
|
||||||
|
// Expressions seen in the function. Does not include indirect
|
||||||
|
// expressions (such as those appearing in attributes of types).
|
||||||
|
std::vector<const Expr*> exprs;
|
||||||
|
|
||||||
|
// Lambdas seen in the function. We don't profile lambda bodies,
|
||||||
|
// but rather make them available for separate profiling if
|
||||||
|
// appropriate.
|
||||||
|
std::vector<const LambdaExpr*> lambdas;
|
||||||
|
|
||||||
|
// If we're profiling a lambda function, this holds the captures.
|
||||||
|
std::unordered_set<const ID*> captures;
|
||||||
|
|
||||||
|
// Constants seen in the function.
|
||||||
|
std::vector<const ConstExpr*> constants;
|
||||||
|
|
||||||
|
// Identifiers seen in the function.
|
||||||
|
std::unordered_set<const ID*> ids;
|
||||||
|
|
||||||
|
// The same, but in a deterministic order.
|
||||||
|
std::vector<const ID*> ordered_ids;
|
||||||
|
|
||||||
|
// Types seen in the function. A set rather than a vector because
|
||||||
|
// the same type can be seen numerous times.
|
||||||
|
std::unordered_set<const Type*> types;
|
||||||
|
|
||||||
|
// The same, but in a deterministic order, with duplicates removed.
|
||||||
|
std::vector<const Type*> ordered_types;
|
||||||
|
|
||||||
// Script functions that this script calls.
|
// Script functions that this script calls.
|
||||||
std::unordered_set<ScriptFunc*> script_calls;
|
std::unordered_set<ScriptFunc*> script_calls;
|
||||||
|
|
||||||
// Same for BiF's.
|
// Same for BiF's, though for them we record the corresponding global
|
||||||
std::unordered_set<Func*> BiF_calls;
|
// rather than the BuiltinFunc*.
|
||||||
|
std::unordered_set<const ID*> BiF_globals;
|
||||||
|
|
||||||
// Script functions appearing in "when" clauses.
|
// Script functions appearing in "when" clauses.
|
||||||
std::unordered_set<ScriptFunc*> when_calls;
|
std::unordered_set<ScriptFunc*> when_calls;
|
||||||
|
|
||||||
// Names of generated events.
|
// Names of generated events.
|
||||||
std::unordered_set<const char*> events;
|
std::unordered_set<std::string> events;
|
||||||
|
|
||||||
|
// Attributes seen in set or table constructors.
|
||||||
|
std::unordered_set<const Attributes*> constructor_attrs;
|
||||||
|
|
||||||
|
// Switch statements with either expression cases or type cases.
|
||||||
|
std::unordered_set<const SwitchStmt*> expr_switches;
|
||||||
|
std::unordered_set<const SwitchStmt*> type_switches;
|
||||||
|
|
||||||
// True if the function makes a call through an expression rather
|
// True if the function makes a call through an expression rather
|
||||||
// than simply a function's (global) name.
|
// than simply a function's (global) name.
|
||||||
bool does_indirect_calls = false;
|
bool does_indirect_calls = false;
|
||||||
|
|
||||||
// Hash value. Only valid if constructor requested it.
|
// Additional values present in the body that should be factored
|
||||||
std::size_t hash_val = 0;
|
// into its hash.
|
||||||
|
std::vector<p_hash_type> addl_hashes;
|
||||||
|
|
||||||
// How many statements / when statements / lambda expressions /
|
// Associated hash value.
|
||||||
// expressions appear in the function body.
|
p_hash_type hash_val = 0;
|
||||||
int num_stmts = 0;
|
|
||||||
|
// How many when statements appear in the function body. We could
|
||||||
|
// track these individually, but to date all that's mattered is
|
||||||
|
// whether a given body contains any.
|
||||||
int num_when_stmts = 0;
|
int num_when_stmts = 0;
|
||||||
int num_lambdas = 0;
|
|
||||||
int num_exprs = 0;
|
|
||||||
|
|
||||||
// Whether we're separately processing a "when" condition to
|
// Whether we're separately processing a "when" condition to
|
||||||
// mine out its script calls.
|
// mine out its script calls.
|
||||||
bool in_when = false;
|
bool in_when = false;
|
||||||
|
};
|
||||||
|
|
||||||
// We only compute a hash over the function if requested, since
|
// Function pointer for a predicate that determines whether a given
|
||||||
// it's somewhat expensive.
|
// profile is compilable. Alternatively we could derive subclasses
|
||||||
bool compute_hash;
|
// from ProfileFuncs and use a virtual method for this, but that seems
|
||||||
|
// heavier-weight for what's really a simple notion.
|
||||||
|
typedef bool (*is_compilable_pred)(const ProfileFunc*);
|
||||||
|
|
||||||
// The following are for computing a consistent hash that isn't
|
// Collectively profile an entire collection of functions.
|
||||||
// too profligate in how much it needs to compute over.
|
class ProfileFuncs {
|
||||||
|
public:
|
||||||
|
// Updates entries in "funcs" to include profiles. If pred is
|
||||||
|
// non-nil, then it is called for each profile to see whether it's
|
||||||
|
// compilable, and, if not, the FuncInfo is marked as ShouldSkip().
|
||||||
|
ProfileFuncs(std::vector<FuncInfo>& funcs,
|
||||||
|
is_compilable_pred pred = nullptr);
|
||||||
|
|
||||||
// Checks whether we've already noted this type, and, if not,
|
// The following accessors provide a global profile across all of
|
||||||
// updates the hash with it.
|
// the (non-skipped) functions in "funcs". See the comments for
|
||||||
void CheckType(const TypePtr& t);
|
// the associated member variables for documentation.
|
||||||
|
const std::unordered_set<const ID*>& Globals() const
|
||||||
|
{ return globals; }
|
||||||
|
const std::unordered_set<const ID*>& AllGlobals() const
|
||||||
|
{ return all_globals; }
|
||||||
|
const std::unordered_set<const ConstExpr*>& Constants() const
|
||||||
|
{ return constants; }
|
||||||
|
const std::vector<const Type*>& MainTypes() const
|
||||||
|
{ return main_types; }
|
||||||
|
const std::vector<const Type*>& RepTypes() const
|
||||||
|
{ return rep_types; }
|
||||||
|
const std::unordered_set<ScriptFunc*>& ScriptCalls() const
|
||||||
|
{ return script_calls; }
|
||||||
|
const std::unordered_set<const ID*>& BiFGlobals() const
|
||||||
|
{ return BiF_globals; }
|
||||||
|
const std::unordered_set<const LambdaExpr*>& Lambdas() const
|
||||||
|
{ return lambdas; }
|
||||||
|
const std::unordered_set<std::string>& Events() const
|
||||||
|
{ return events; }
|
||||||
|
|
||||||
void UpdateHash(int val)
|
std::shared_ptr<ProfileFunc> FuncProf(const ScriptFunc* f)
|
||||||
|
{ return func_profs[f]; }
|
||||||
|
|
||||||
|
// This is only externally germane for LambdaExpr's.
|
||||||
|
std::shared_ptr<ProfileFunc> ExprProf(const Expr* e)
|
||||||
|
{ return expr_profs[e]; }
|
||||||
|
|
||||||
|
// Returns the "representative" Type* for the hash associated with
|
||||||
|
// the parameter (which might be the parameter itself).
|
||||||
|
const Type* TypeRep(const Type* orig)
|
||||||
{
|
{
|
||||||
auto h = std::hash<int>{}(val);
|
auto it = type_to_rep.find(orig);
|
||||||
MergeInHash(h);
|
ASSERT(it != type_to_rep.end());
|
||||||
|
return it->second;
|
||||||
}
|
}
|
||||||
|
|
||||||
void UpdateHash(const IntrusivePtr<Obj>& o);
|
// Returns the hash associated with the given type, computing it
|
||||||
|
// if necessary.
|
||||||
|
p_hash_type HashType(const TypePtr& t) { return HashType(t.get()); }
|
||||||
|
p_hash_type HashType(const Type* t);
|
||||||
|
|
||||||
void MergeInHash(std::size_t h)
|
protected:
|
||||||
{
|
// Incorporate the given function profile into the global profile.
|
||||||
// Taken from Boost. See for example
|
void MergeInProfile(ProfileFunc* pf);
|
||||||
// https://www.boost.org/doc/libs/1_35_0/doc/html/boost/hash_combine_id241013.html
|
|
||||||
// or
|
|
||||||
// https://stackoverflow.com/questions/4948780/magic-number-in-boosthash-combine
|
|
||||||
hash_val ^= h + 0x9e3779b9 + (hash_val << 6) + (hash_val >> 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Types that we've already processed. Hashing types can be
|
// When traversing types, Zeek records can have attributes that in
|
||||||
// quite expensive since some of the common Zeek record types
|
// turn have expressions associated with them. The expressions can
|
||||||
// (e.g., notices) are huge, so useful to not do them more than
|
// in turn have types, which might be records with further attribute
|
||||||
// once. We track two forms, one by name (if available) and one
|
// expressions, etc. This method iteratively processes the list
|
||||||
// by raw pointer (if not). Doing so allows us to track named
|
// expressions we need to analyze until no new ones are added.
|
||||||
// sub-records but also records that have no names.
|
void DrainPendingExprs();
|
||||||
std::unordered_set<std::string> seen_types;
|
|
||||||
std::unordered_set<const Type*> seen_type_ptrs;
|
// Compute hashes for the given set of types. Potentially recursive
|
||||||
|
// upon discovering additional types.
|
||||||
|
void ComputeTypeHashes(const std::vector<const Type*>& types);
|
||||||
|
|
||||||
|
// Compute hashes to associate with each function
|
||||||
|
void ComputeBodyHashes(std::vector<FuncInfo>& funcs);
|
||||||
|
|
||||||
|
// Compute the hash associated with a single function profile.
|
||||||
|
void ComputeProfileHash(std::shared_ptr<ProfileFunc> pf);
|
||||||
|
|
||||||
|
// Analyze the expressions and lambdas appearing in a set of
|
||||||
|
// attributes.
|
||||||
|
void AnalyzeAttrs(const Attributes* Attrs);
|
||||||
|
|
||||||
|
// Globals seen across the functions, other than those solely seen
|
||||||
|
// as the function being called in a call.
|
||||||
|
std::unordered_set<const ID*> globals;
|
||||||
|
|
||||||
|
// Same, but also includes globals only seen as called functions.
|
||||||
|
std::unordered_set<const ID*> all_globals;
|
||||||
|
|
||||||
|
// Constants seen across the functions.
|
||||||
|
std::unordered_set<const ConstExpr*> constants;
|
||||||
|
|
||||||
|
// Types seen across the functions. Does not include subtypes.
|
||||||
|
// Deterministically ordered.
|
||||||
|
std::vector<const Type*> main_types;
|
||||||
|
|
||||||
|
// "Representative" types seen across the functions. Includes
|
||||||
|
// subtypes. These all have unique hashes, and are returned by
|
||||||
|
// calls to TypeRep(). Deterministically ordered.
|
||||||
|
std::vector<const Type*> rep_types;
|
||||||
|
|
||||||
|
// Maps a type to its representative (which might be itself).
|
||||||
|
std::unordered_map<const Type*, const Type*> type_to_rep;
|
||||||
|
|
||||||
|
// Script functions that get called.
|
||||||
|
std::unordered_set<ScriptFunc*> script_calls;
|
||||||
|
|
||||||
|
// Same for BiF's.
|
||||||
|
std::unordered_set<const ID*> BiF_globals;
|
||||||
|
|
||||||
|
// And for lambda's.
|
||||||
|
std::unordered_set<const LambdaExpr*> lambdas;
|
||||||
|
|
||||||
|
// Names of generated events.
|
||||||
|
std::unordered_set<std::string> events;
|
||||||
|
|
||||||
|
// Maps script functions to associated profiles. This isn't
|
||||||
|
// actually well-defined in the case of event handlers and hooks,
|
||||||
|
// which can have multiple bodies. However, this is only used
|
||||||
|
// in the context of analyzing a single-bodied function.
|
||||||
|
std::unordered_map<const ScriptFunc*, std::shared_ptr<ProfileFunc>> func_profs;
|
||||||
|
|
||||||
|
// Maps expressions to their profiles. This is only germane
|
||||||
|
// externally for LambdaExpr's, but internally it abets memory
|
||||||
|
// management.
|
||||||
|
std::unordered_map<const Expr*, std::shared_ptr<ProfileFunc>> expr_profs;
|
||||||
|
|
||||||
|
// These remaining member variables are only used internally,
|
||||||
|
// not provided via accessors:
|
||||||
|
|
||||||
|
// Maps types to their hashes.
|
||||||
|
std::unordered_map<const Type*, p_hash_type> type_hashes;
|
||||||
|
|
||||||
|
// An inverse mapping, to a representative for each distinct hash.
|
||||||
|
std::unordered_map<p_hash_type, const Type*> type_hash_reps;
|
||||||
|
|
||||||
|
// For types with names, tracks the ones we've already hashed,
|
||||||
|
// so we can avoid work for distinct pointers that refer to the
|
||||||
|
// same underlying type.
|
||||||
|
std::unordered_map<std::string, const Type*> seen_type_names;
|
||||||
|
|
||||||
|
// Expressions that we've discovered that we need to further
|
||||||
|
// profile. These can arise for example due to lambdas or
|
||||||
|
// record attributes.
|
||||||
|
std::vector<const Expr*> pending_exprs;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -81,7 +81,7 @@ void optimize_func(ScriptFunc* f, std::shared_ptr<ProfileFunc> pf,
|
||||||
|
|
||||||
if ( analysis_options.optimize_AST )
|
if ( analysis_options.optimize_AST )
|
||||||
{
|
{
|
||||||
pf = std::make_shared<ProfileFunc>(false);
|
pf = std::make_shared<ProfileFunc>(f, body);
|
||||||
body->Traverse(pf.get());
|
body->Traverse(pf.get());
|
||||||
|
|
||||||
RD_Decorate reduced_rds(pf);
|
RD_Decorate reduced_rds(pf);
|
||||||
|
@ -111,7 +111,7 @@ void optimize_func(ScriptFunc* f, std::shared_ptr<ProfileFunc> pf,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Profile the new body.
|
// Profile the new body.
|
||||||
pf = std::make_shared<ProfileFunc>();
|
pf = std::make_shared<ProfileFunc>(f, body);
|
||||||
body->Traverse(pf.get());
|
body->Traverse(pf.get());
|
||||||
|
|
||||||
// Compute its reaching definitions.
|
// Compute its reaching definitions.
|
||||||
|
@ -224,15 +224,7 @@ void analyze_scripts()
|
||||||
|
|
||||||
// Now that everything's parsed and BiF's have been initialized,
|
// Now that everything's parsed and BiF's have been initialized,
|
||||||
// profile the functions.
|
// profile the functions.
|
||||||
std::unordered_map<const ScriptFunc*, std::shared_ptr<ProfileFunc>>
|
auto pfs = std::make_unique<ProfileFuncs>(funcs);
|
||||||
func_profs;
|
|
||||||
|
|
||||||
for ( auto& f : funcs )
|
|
||||||
{
|
|
||||||
f.SetProfile(std::make_shared<ProfileFunc>(true));
|
|
||||||
f.Body()->Traverse(f.Profile().get());
|
|
||||||
func_profs[f.Func()] = f.Profile();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Figure out which functions either directly or indirectly
|
// Figure out which functions either directly or indirectly
|
||||||
// appear in "when" clauses.
|
// appear in "when" clauses.
|
||||||
|
@ -275,7 +267,7 @@ void analyze_scripts()
|
||||||
{
|
{
|
||||||
when_funcs.insert(wf);
|
when_funcs.insert(wf);
|
||||||
|
|
||||||
for ( auto& wff : func_profs[wf]->ScriptCalls() )
|
for ( auto& wff : pfs->FuncProf(wf)->ScriptCalls() )
|
||||||
{
|
{
|
||||||
if ( when_funcs.count(wff) > 0 )
|
if ( when_funcs.count(wff) > 0 )
|
||||||
// We've already processed this
|
// We've already processed this
|
||||||
|
|
|
@ -75,6 +75,13 @@ public:
|
||||||
void SetProfile(std::shared_ptr<ProfileFunc> _pf);
|
void SetProfile(std::shared_ptr<ProfileFunc> _pf);
|
||||||
void SetSaveFile(std::string _sf) { save_file = std::move(_sf); }
|
void SetSaveFile(std::string _sf) { save_file = std::move(_sf); }
|
||||||
|
|
||||||
|
// The following provide a way of marking FuncInfo's as
|
||||||
|
// should-be-skipped for script optimization, generally because
|
||||||
|
// the function body has a property that a given script optimizer
|
||||||
|
// doesn't know how to deal with. Defaults to don't-skip.
|
||||||
|
bool ShouldSkip() const { return skip; }
|
||||||
|
void SetSkip(bool should_skip) { skip = should_skip; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
ScriptFuncPtr func;
|
ScriptFuncPtr func;
|
||||||
ScopePtr scope;
|
ScopePtr scope;
|
||||||
|
@ -84,6 +91,9 @@ protected:
|
||||||
// If we're saving this function in a file, this is the name
|
// If we're saving this function in a file, this is the name
|
||||||
// of the file to use.
|
// of the file to use.
|
||||||
std::string save_file;
|
std::string save_file;
|
||||||
|
|
||||||
|
// Whether to skip optimizing this function.
|
||||||
|
bool skip = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue