function profiling rewritten - more detailed info, supports global profiling

This commit is contained in:
Vern Paxson 2021-03-25 16:17:32 -07:00
parent bb3a69ebb3
commit 297adf3486
3 changed files with 896 additions and 124 deletions

View file

@ -1,5 +1,7 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include <unistd.h>
#include "zeek/script_opt/ProfileFunc.h"
#include "zeek/Desc.h"
#include "zeek/Stmt.h"
@ -9,27 +11,97 @@
namespace zeek::detail {
TraversalCode ProfileFunc::PreStmt(const Stmt* s)
// Computes the profiling hash of a Obj based on its (deterministic)
// description.
p_hash_type p_hash(const Obj* o)
{
++num_stmts;
ODesc d;
d.SetDeterminism(true);
o->Describe(&d);
return p_hash(d.Description());
}
auto tag = s->Tag();
std::string script_specific_filename(const StmtPtr& body)
{
// The specific filename is taken from the location filename, making
// it absolute if necessary.
auto body_loc = body->GetLocationInfo();
auto bl_f = body_loc->filename;
ASSERT(bl_f != nullptr);
if ( compute_hash )
UpdateHash(int(tag));
if ( tag == STMT_INIT )
if ( bl_f[0] == '.' &&
(bl_f[1] == '/' || (bl_f[1] == '.' && bl_f[2] == '/')) )
{
for ( const auto& id : s->AsInitStmt()->Inits() )
inits.insert(id.get());
// Add working directory to avoid collisions over the
// same relative name.
static std::string working_dir;
if ( working_dir.size() == 0 )
{
char buf[8192];
getcwd(buf, sizeof buf);
working_dir = buf;
}
// Don't recurse into these, as we don't want to consider
// a local that only appears in an initialization as a
// relevant local.
return TC_ABORTSTMT;
return working_dir + "/" + bl_f;
}
switch ( tag ) {
return bl_f;
}
p_hash_type script_specific_hash(const StmtPtr& body, p_hash_type generic_hash)
{
auto bl_f = script_specific_filename(body);
return merge_p_hashes(generic_hash, p_hash(bl_f));
}
ProfileFunc::ProfileFunc(const Func* func, const StmtPtr& body)
{
Profile(func->GetType().get(), body);
}
ProfileFunc::ProfileFunc(const Expr* e)
{
if ( e->Tag() == EXPR_LAMBDA )
{
auto func = e->AsLambdaExpr();
for ( auto oid : func->OuterIDs() )
captures.insert(oid);
Profile(func->GetType()->AsFuncType(), func->Ingredients().body);
}
else
// We don't have a function type, so do the traversal
// directly.
e->Traverse(this);
}
void ProfileFunc::Profile(const FuncType* ft, const StmtPtr& body)
{
num_params = ft->Params()->NumFields();
TrackType(ft);
body->Traverse(this);
}
TraversalCode ProfileFunc::PreStmt(const Stmt* s)
{
stmts.push_back(s);
switch ( s->Tag() ) {
case STMT_INIT:
for ( const auto& id : s->AsInitStmt()->Inits() )
{
inits.insert(id.get());
TrackType(id->GetType());
}
// Don't traverse further into the statement, since we
// don't want to view the identifiers as locals unless
// they're also used elsewhere.
return TC_ABORTSTMT;
case STMT_WHEN:
++num_when_stmts;
@ -39,7 +111,8 @@ TraversalCode ProfileFunc::PreStmt(const Stmt* s)
// It doesn't do any harm for us to re-traverse the
// conditional, so we don't bother hand-traversing the
// rest of the when but just let the usual processing do it.
// rest of the "when", but just let the usual processing
// do it.
break;
case STMT_FOR:
@ -67,15 +140,24 @@ TraversalCode ProfileFunc::PreStmt(const Stmt* s)
// incomplete list of locals that need to be tracked.
auto sw = s->AsSwitchStmt();
bool is_type_switch = false;
for ( auto& c : *sw->Cases() )
{
auto idl = c->TypeCases();
if ( idl )
if ( idl )
{
for ( auto id : *idl )
locals.insert(id);
is_type_switch = true;
}
}
if ( is_type_switch )
type_switches.insert(sw);
else
expr_switches.insert(sw);
}
break;
@ -88,37 +170,74 @@ TraversalCode ProfileFunc::PreStmt(const Stmt* s)
TraversalCode ProfileFunc::PreExpr(const Expr* e)
{
++num_exprs;
exprs.push_back(e);
auto tag = e->Tag();
TrackType(e->GetType());
if ( compute_hash )
UpdateHash(int(tag));
switch ( tag ) {
switch ( e->Tag() ) {
case EXPR_CONST:
if ( compute_hash )
{
CheckType(e->GetType());
UpdateHash(e->AsConstExpr()->ValuePtr());
}
constants.push_back(e->AsConstExpr());
break;
case EXPR_NAME:
{
auto n = e->AsNameExpr();
auto id = n->Id();
if ( id->IsGlobal() )
globals.insert(id);
else
locals.insert(id);
if ( compute_hash )
if ( id->IsGlobal() )
{
UpdateHash({NewRef{}, id});
CheckType(e->GetType());
globals.insert(id);
all_globals.insert(id);
const auto& t = id->GetType();
if ( t->Tag() == TYPE_FUNC &&
t->AsFuncType()->Flavor() == FUNC_FLAVOR_EVENT )
events.insert(id->Name());
}
else
{
// This is a tad ugly. Unfortunately due to the
// weird way that Zeek function *declarations* work,
// there's no reliable way to get the list of
// parameters for a function *definition*, since
// they can have different names than what's present
// in the declaration. So we identify them directly,
// by knowing that they come at the beginning of the
// frame ... and being careful to avoid misconfusing
// a lambda capture with a low frame offset as a
// parameter.
if ( captures.count(id) == 0 &&
id->Offset() < num_params )
params.insert(id);
locals.insert(id);
}
// Turns out that NameExpr's can be constructed using a
// different Type* than that of the identifier itself,
// so be sure we track the latter too.
TrackType(id->GetType());
break;
}
case EXPR_FIELD:
{
auto f = e->AsFieldExpr()->Field();
addl_hashes.push_back(p_hash(f));
}
break;
case EXPR_ASSIGN:
{
if ( e->GetOp1()->Tag() == EXPR_REF )
{
auto lhs = e->GetOp1()->GetOp1();
if ( lhs->Tag() == EXPR_NAME )
assignees.insert(lhs->AsNameExpr()->Id());
}
// else this isn't a direct assignment.
break;
}
@ -134,7 +253,7 @@ TraversalCode ProfileFunc::PreExpr(const Expr* e)
}
auto n = f->AsNameExpr();
IDPtr func = {NewRef{}, n->Id()};
auto func = n->Id();
if ( ! func->IsGlobal() )
{
@ -142,6 +261,8 @@ TraversalCode ProfileFunc::PreExpr(const Expr* e)
return TC_CONTINUE;
}
all_globals.insert(func);
auto func_v = func->GetVal();
if ( func_v )
{
@ -156,28 +277,84 @@ TraversalCode ProfileFunc::PreExpr(const Expr* e)
when_calls.insert(bf);
}
else
BiF_calls.insert(func_vf);
BiF_globals.insert(func);
}
else
{
// We could complain, but for now we don't because
// We could complain, but for now we don't, because
// if we're invoked prior to full Zeek initialization,
// the value might indeed not there.
// the value might indeed not there yet.
// printf("no function value for global %s\n", func->Name());
}
// Recurse into the arguments.
auto args = c->Args();
args->Traverse(this);
// Do the following explicitly, since we won't be recursing
// into the LHS global.
// Note that the type of the expression and the type of the
// function can actually be *different* due to the NameExpr
// being constructed based on a forward reference and then
// the global getting a different (constructed) type when
// the function is actually declared. Geez. So hedge our
// bets.
TrackType(n->GetType());
TrackType(func->GetType());
TrackID(func);
return TC_ABORTSTMT;
}
case EXPR_EVENT:
events.insert(e->AsEventExpr()->Name());
{
auto ev = e->AsEventExpr()->Name();
events.insert(ev);
addl_hashes.push_back(p_hash(ev));
}
break;
case EXPR_LAMBDA:
++num_lambdas;
{
auto l = e->AsLambdaExpr();
lambdas.push_back(l);
for ( const auto& i : l->OuterIDs() )
{
locals.insert(i);
TrackID(i);
// See above re EXPR_NAME regarding the following
// logic.
if ( captures.count(i) == 0 &&
i->Offset() < num_params )
params.insert(i);
}
// Avoid recursing into the body.
return TC_ABORTSTMT;
}
case EXPR_SET_CONSTRUCTOR:
{
auto sc = static_cast<const SetConstructorExpr*>(e);
auto attrs = sc->GetAttrs();
if ( attrs )
constructor_attrs.insert(attrs.get());
}
break;
case EXPR_TABLE_CONSTRUCTOR:
{
auto tc = static_cast<const TableConstructorExpr*>(e);
auto attrs = tc->GetAttrs();
if ( attrs )
constructor_attrs.insert(attrs.get());
}
break;
default:
@ -187,32 +364,345 @@ TraversalCode ProfileFunc::PreExpr(const Expr* e)
return TC_CONTINUE;
}
void ProfileFunc::CheckType(const TypePtr& t)
TraversalCode ProfileFunc::PreID(const ID* id)
{
TrackID(id);
// There's no need for any further analysis of this ID.
return TC_ABORTSTMT;
}
void ProfileFunc::TrackType(const Type* t)
{
if ( ! t )
return;
if ( types.count(t) > 0 )
// We've already tracke it.
return;
types.insert(t);
ordered_types.push_back(t);
}
void ProfileFunc::TrackID(const ID* id)
{
if ( ! id )
return;
if ( ids.count(id) > 0 )
// Already tracked.
return;
ids.insert(id);
ordered_ids.push_back(id);
}
ProfileFuncs::ProfileFuncs(std::vector<FuncInfo>& funcs, is_compilable_pred pred)
{
for ( auto& f : funcs )
{
if ( f.ShouldSkip() )
continue;
auto pf = std::make_unique<ProfileFunc>(f.Func(), f.Body());
if ( ! pred || (*pred)(pf.get()) )
MergeInProfile(pf.get());
else
f.SetSkip(true);
f.SetProfile(std::move(pf));
func_profs[f.Func()] = f.Profile();
}
// We now have the main (starting) types used by all of the
// functions. Recursively compute their hashes.
ComputeTypeHashes(main_types);
// Computing the hashes can have marked expressions (seen in
// record attributes) for further analysis. Likewise, when
// doing the profile merges above we may have noted lambda
// expressions. Analyze these, and iteratively any further
// expressions that that analysis uncovers.
DrainPendingExprs();
// We now have all the information we need to form definitive,
// deterministic hashes.
ComputeBodyHashes(funcs);
}
void ProfileFuncs::MergeInProfile(ProfileFunc* pf)
{
all_globals.insert(pf->AllGlobals().begin(), pf->AllGlobals().end());
globals.insert(pf->Globals().begin(), pf->Globals().end());
constants.insert(pf->Constants().begin(), pf->Constants().end());
main_types.insert(main_types.end(),
pf->OrderedTypes().begin(), pf->OrderedTypes().end());
script_calls.insert(pf->ScriptCalls().begin(), pf->ScriptCalls().end());
BiF_globals.insert(pf->BiFGlobals().begin(), pf->BiFGlobals().end());
events.insert(pf->Events().begin(), pf->Events().end());
for ( auto& i : pf->Lambdas() )
{
lambdas.insert(i);
pending_exprs.push_back(i);
}
for ( auto& a : pf->ConstructorAttrs() )
AnalyzeAttrs(a);
}
void ProfileFuncs::DrainPendingExprs()
{
while ( pending_exprs.size() > 0 )
{
// Copy the pending expressions so we can loop over them
// while accruing additions.
auto pe = pending_exprs;
pending_exprs.clear();
for ( auto e : pe )
{
auto pf = std::make_shared<ProfileFunc>(e);
expr_profs[e] = pf;
MergeInProfile(pf.get());
// It's important to compute the hashes over the
// ordered types rather than the unordered. If type
// T1 depends on a recursive type T2, then T1's hash
// will vary with depending on whether we arrive at
// T1 via an in-progress traversal of T2 (in which
// case T1 will see the "stub" in-progress hash for
// T2), or via a separate type T3 (in which case it
// will see the full hash).
ComputeTypeHashes(pf->OrderedTypes());
}
}
}
void ProfileFuncs::ComputeTypeHashes(const std::vector<const Type*>& types)
{
for ( auto t : types )
(void) HashType(t);
}
void ProfileFuncs::ComputeBodyHashes(std::vector<FuncInfo>& funcs)
{
for ( auto& f : funcs )
if ( ! f.ShouldSkip() )
ComputeProfileHash(f.Profile());
for ( auto& l : lambdas )
ComputeProfileHash(ExprProf(l));
}
void ProfileFuncs::ComputeProfileHash(std::shared_ptr<ProfileFunc> pf)
{
p_hash_type h = 0;
// We add markers between each class of hash component, to
// prevent collisions due to elements with simple hashes
// (such as Stmt's or Expr's that are only represented by
// the hash of their tag).
h = merge_p_hashes(h, p_hash("stmts"));
for ( auto i : pf->Stmts() )
h = merge_p_hashes(h, p_hash(i->Tag()));
h = merge_p_hashes(h, p_hash("exprs"));
for ( auto i : pf->Exprs() )
h = merge_p_hashes(h, p_hash(i->Tag()));
h = merge_p_hashes(h, p_hash("ids"));
for ( auto i : pf->OrderedIdentifiers() )
h = merge_p_hashes(h, p_hash(i->Name()));
h = merge_p_hashes(h, p_hash("constants"));
for ( auto i : pf->Constants() )
h = merge_p_hashes(h, p_hash(i->Value()));
h = merge_p_hashes(h, p_hash("types"));
for ( auto i : pf->OrderedTypes() )
h = merge_p_hashes(h, HashType(i));
h = merge_p_hashes(h, p_hash("lambdas"));
for ( auto i : pf->Lambdas() )
h = merge_p_hashes(h, p_hash(i));
h = merge_p_hashes(h, p_hash("addl"));
for ( auto i : pf->AdditionalHashes() )
h = merge_p_hashes(h, i);
pf->SetHashVal(h);
}
p_hash_type ProfileFuncs::HashType(const Type* t)
{
if ( ! t )
return 0;
if ( type_hashes.count(t) > 0 )
// We've already done this Type*.
return type_hashes[t];
auto& tn = t->GetName();
if ( tn.size() > 0 && seen_types.count(tn) > 0 )
// No need to hash this in again, as we've already done so.
return;
if ( tn.size() > 0 && seen_type_names.count(tn) > 0 )
{
// We've already done a type with the same name, even
// though with a different Type*. Reuse its results.
auto seen_t = seen_type_names[tn];
auto h = type_hashes[seen_t];
if ( seen_type_ptrs.count(t.get()) > 0 )
// We've seen the raw pointer, even though it doesn't have
// a name.
return;
type_hashes[t] = h;
type_to_rep[t] = type_to_rep[seen_t];
seen_types.insert(tn);
seen_type_ptrs.insert(t.get());
return h;
}
UpdateHash(t);
auto h = p_hash(t->Tag());
// Enter an initial value for this type's hash. We'll update it
// at the end, but having it here first will prevent recursive
// records from leading to infinite recursion as we traverse them.
// It's okay that the initial value is degenerate, because if we access
// it during the traversal that will only happen due to a recursive
// type, in which case the other elements of that type will serve
// to differentiate its hash.
type_hashes[t] = h;
switch ( t->Tag() ) {
case TYPE_ADDR:
case TYPE_ANY:
case TYPE_BOOL:
case TYPE_COUNT:
case TYPE_DOUBLE:
case TYPE_ENUM:
case TYPE_ERROR:
case TYPE_INT:
case TYPE_INTERVAL:
case TYPE_OPAQUE:
case TYPE_PATTERN:
case TYPE_PORT:
case TYPE_STRING:
case TYPE_SUBNET:
case TYPE_TIME:
case TYPE_TIMER:
case TYPE_UNION:
case TYPE_VOID:
h = merge_p_hashes(h, p_hash(t));
break;
case TYPE_RECORD:
{
const auto& ft = t->AsRecordType();
auto n = ft->NumFields();
h = merge_p_hashes(h, p_hash("record"));
h = merge_p_hashes(h, p_hash(n));
for ( auto i = 0; i < n; ++i )
{
const auto& f = ft->FieldDecl(i);
h = merge_p_hashes(h, p_hash(f->id));
h = merge_p_hashes(h, HashType(f->type));
// We don't hash the field name, as in some contexts
// those are ignored.
if ( f->attrs )
{
h = merge_p_hashes(h, p_hash(f->attrs));
AnalyzeAttrs(f->attrs.get());
}
}
}
break;
case TYPE_TABLE:
{
auto tbl = t->AsTableType();
h = merge_p_hashes(h, p_hash("table"));
h = merge_p_hashes(h, p_hash("indices"));
h = merge_p_hashes(h, HashType(tbl->GetIndices()));
h = merge_p_hashes(h, p_hash("tbl-yield"));
h = merge_p_hashes(h, HashType(tbl->Yield()));
}
break;
case TYPE_FUNC:
{
auto ft = t->AsFuncType();
auto flv = ft->FlavorString();
h = merge_p_hashes(h, p_hash(flv));
h = merge_p_hashes(h, p_hash("params"));
h = merge_p_hashes(h, HashType(ft->Params()));
h = merge_p_hashes(h, p_hash("func-yield"));
h = merge_p_hashes(h, HashType(ft->Yield()));
}
break;
case TYPE_LIST:
{
auto& tl = t->AsTypeList()->GetTypes();
h = merge_p_hashes(h, p_hash("list"));
h = merge_p_hashes(h, p_hash(tl.size()));
for ( const auto& tl_i : tl )
h = merge_p_hashes(h, HashType(tl_i));
}
break;
case TYPE_VECTOR:
h = merge_p_hashes(h, p_hash("vec"));
h = merge_p_hashes(h, HashType(t->AsVectorType()->Yield()));
break;
case TYPE_FILE:
h = merge_p_hashes(h, p_hash("file"));
h = merge_p_hashes(h, HashType(t->AsFileType()->Yield()));
break;
case TYPE_TYPE:
h = merge_p_hashes(h, p_hash("type"));
h = merge_p_hashes(h, HashType(t->AsTypeType()->GetType()));
break;
}
void ProfileFunc::UpdateHash(const IntrusivePtr<zeek::Obj>& o)
type_hashes[t] = h;
if ( type_hash_reps.count(h) == 0 )
{ // No previous rep, so use this Type* for that.
type_hash_reps[h] = t;
type_to_rep[t] = t;
rep_types.push_back(t);
}
else
type_to_rep[t] = type_hash_reps[h];
if ( tn.size() > 0 )
seen_type_names[tn] = t;
return h;
}
void ProfileFuncs::AnalyzeAttrs(const Attributes* Attrs)
{
ODesc d;
o->Describe(&d);
std::string desc(d.Description());
auto h = std::hash<std::string>{}(desc);
MergeInHash(h);
}
auto attrs = Attrs->GetAttrs();
for ( const auto& a : attrs )
{
const Expr* e = a->GetExpr().get();
if ( e )
{
pending_exprs.push_back(e);
if ( e->Tag() == EXPR_LAMBDA )
lambdas.insert(e->AsLambdaExpr());
}
}
}
} // namespace zeek::detail