zeek/src/script_opt/ProfileFunc.h

738 lines
32 KiB
C++

// See the file "COPYING" in the main distribution directory for copyright.
// Classes for traversing functions and their body ASTs to build up profiles
// of the various elements (types, globals, locals, lambdas, etc.) that appear.
// These profiles enable script optimization to make decisions regarding
// compatibility and how to efficiently provide run-time components.
// For all of the following, we use the term "function" to refer to a single
// ScriptFunc/body pair, so an event handler or hook with multiple bodies
// is treated as multiple distinct "function"'s.
//
// One key element of constructing profiles concerns computing hashes over
// both the Zeek scripting types present in the functions, and over entire
// functions (which means computing hashes over each of the function's
// components). Hashes need to be (1) distinct (collision-free in practice)
// and (2) deterministic (across Zeek invocations, the same components always
// map to the same hashes). We need these properties because we use hashes
// to robustly identify identical instances of the same function, for example
// so we can recognize that an instance of the function definition seen in
// a script matches a previously compiled function body, so we can safely
// replace the function's AST with the compiled version).
//
// We profile functions collectively (via the ProfileFuncs class), rather
// than in isolation, because doing so (1) allows us to share expensive
// profiling steps (in particular, computing the hashes of types, as some
// of the Zeek script records get huge, and occur frequently), and (2) enables
// us to develop a global picture of all of the components germane to a set
// of functions. The global profile is built up in terms of individual
// profiles (via the ProfileFunc class), which identify each function's
// basic components, and then using these as starting points to build out
// the global profile and compute the hashes of functions and types.
#pragma once
#include <string_view>
#include "zeek/Expr.h"
#include "zeek/Stmt.h"
#include "zeek/Traverse.h"
#include "zeek/script_opt/ScriptOpt.h"
namespace zeek::detail {
// The type used to represent hashes. We use the mnemonic "p_hash" as
// short for "profile hash", to avoid confusion with hashes used elsewhere
// in Zeek (which are for the most part keyed, a property we explicitly
// do not want).
using p_hash_type = unsigned long long;
// Helper functions for computing/managing hashes.
inline p_hash_type p_hash(int val) { return std::hash<int>{}(val); }
inline p_hash_type p_hash(std::string_view val) { return std::hash<std::string_view>{}(val); }
extern p_hash_type p_hash(const Obj* o);
inline p_hash_type p_hash(const IntrusivePtr<Obj>& o) { return p_hash(o.get()); }
inline p_hash_type merge_p_hashes(p_hash_type h1, p_hash_type h2) {
// Taken from Boost. See for example
// https://www.boost.org/doc/libs/1_35_0/doc/html/boost/hash_combine_id241013.html
// or
// https://stackoverflow.com/questions/4948780/magic-number-in-boosthash-combine
return h1 ^ (h2 + 0x9e3779b9 + (h1 << 6) + (h1 >> 2));
}
using AttrSet = std::unordered_set<const Attr*>;
using AttrVec = std::vector<const Attr*>;
class ProfileFuncs;
// Class for profiling the components of a single function (or expression).
class ProfileFunc : public TraversalCallback {
public:
// Constructor used for the usual case of profiling a script
// function and one of its bodies.
ProfileFunc(const Func* func, const StmtPtr& body, bool abs_rec_fields);
// Constructors for profiling an AST statement expression. These exist
// to support (1) profiling lambda expressions and loop bodies, and
// (2) traversing attribute expressions (such as &default=expr)
// to discover what components they include.
ProfileFunc(const Stmt* body, bool abs_rec_fields = false);
ProfileFunc(const Expr* func, bool abs_rec_fields = false);
// Returns the function, body, or expression profiled. Each can be
// null depending on the constructor used.
const Func* ProfiledFunc() const { return profiled_func; }
const ScopePtr& ProfiledScope() const { return profiled_scope; }
const Stmt* ProfiledBody() const { return profiled_body; }
const Expr* ProfiledExpr() const { return profiled_expr; }
// See the comments for the associated member variables for each
// of these accessors.
const IDSet& Globals() const { return globals; }
const IDSet& AllGlobals() const { return all_globals; }
const IDSet& Locals() const { return locals; }
const IDSet& Captures() const { return captures; }
const auto& CapturesOffsets() const { return captures_offsets; }
const IDSet& WhenLocals() const { return when_locals; }
const IDSet& Params() const { return params; }
const std::unordered_map<IDPtr, int>& Assignees() const { return assignees; }
const IDSet& NonLocalAssignees() const { return non_local_assignees; }
const auto& TableRefs() const { return tbl_refs; }
const auto& AggrMods() const { return aggr_mods; }
const IDSet& Inits() const { return inits; }
const std::vector<StmtPtr>& Stmts() const { return stmts; }
const std::vector<ExprPtr>& Exprs() const { return exprs; }
const std::vector<const LambdaExpr*>& Lambdas() const { return lambdas; }
const std::vector<const ConstExpr*>& Constants() const { return constants; }
const std::vector<IDPtr>& OrderedIdentifiers() const { return ordered_ids; }
const TypeSet& UnorderedTypes() const { return types; }
const std::vector<const Type*>& OrderedTypes() const { return ordered_types; }
const auto& TypeAliases() const { return type_aliases; }
const std::unordered_set<ScriptFunc*>& ScriptCalls() const { return script_calls; }
const IDSet& BiFGlobals() const { return BiF_globals; }
const std::unordered_set<std::string>& Events() const { return events; }
const std::unordered_map<const Attributes*, TypePtr>& ConstructorAttrs() const { return constructor_attrs; }
const std::unordered_map<const Type*, std::set<const Attributes*>>& RecordConstructorAttrs() const {
return rec_constructor_attrs;
}
const std::unordered_set<const SwitchStmt*>& ExprSwitches() const { return expr_switches; }
const std::unordered_set<const SwitchStmt*>& TypeSwitches() const { return type_switches; }
bool DoesIndirectCalls() const { return does_indirect_calls; }
const IDSet& IndirectFuncs() const { return indirect_funcs; }
int NumParams() const { return num_params; }
int NumLambdas() const { return lambdas.size(); }
int NumWhenStmts() const { return num_when_stmts; }
const std::vector<p_hash_type>& AdditionalHashes() const { return addl_hashes; }
// Set this function's hash to the given value; retrieve that value.
void SetHashVal(p_hash_type hash) { hash_val = hash; }
p_hash_type HashVal() const {
ASSERT(hash_val);
return *hash_val;
}
bool HasHashVal() const { return bool(hash_val); }
protected:
// Construct the profile for the given function signature and body.
void Profile(const FuncType* ft, const StmtPtr& body);
TraversalCode PreStmt(const Stmt*) override;
TraversalCode PreExpr(const Expr*) override;
TraversalCode PreID(const ID*) override;
TraversalCode PreType(const Type*) override;
// Take note of the presence of a given type.
void TrackType(const Type* t);
void TrackType(const TypePtr& t) { TrackType(t.get()); }
// Take note of the presence of an identifier.
void TrackID(IDPtr id);
// Take note of an assignment to an identifier.
void TrackAssignment(IDPtr id);
// Extracts attributes of a record type used in a constructor (or implicit
// initialization, or coercion, which does an implicit construction).
void CheckRecordConstructor(TypePtr t);
// The function, body, or expression profiled. Can be null
// depending on which constructor was used.
const Func* profiled_func = nullptr;
ScopePtr profiled_scope; // null when not in a full function context
FuncTypePtr profiled_func_t; // null when not in a full function context
const Stmt* profiled_body = nullptr;
const Expr* profiled_expr = nullptr;
// Globals seen in the function.
//
// Does *not* include globals solely seen as the function being
// called in a call.
IDSet globals;
// Same, but also includes globals only seen as called functions.
IDSet all_globals;
// Locals seen in the function.
IDSet locals;
// Same, but for those declared in "when" expressions.
IDSet when_locals;
// The function's parameters. Empty if our starting point was
// profiling an expression.
IDSet params;
// How many parameters the function has. The default value flags
// that we started the profile with an expression rather than a
// function.
int num_params = -1;
// Maps identifiers (globals, locals, parameters) to how often
// they are assigned to (no entry if never). Does not include
// implicit assignments due to initializations, which are instead
// captured in "inits".
std::unordered_map<IDPtr, int> assignees;
// A subset of assignees reflecting those that are globals or captures.
IDSet non_local_assignees;
// TableType's that are used in table references (i.e., index operations).
TypeSet tbl_refs;
// Types corresponding to aggregates that are modified.
TypeSet aggr_mods;
// Same for locals seen in initializations, so we can find,
// for example, unused aggregates.
IDSet inits;
// Statements seen in the function. Does not include indirect
// statements, such as those in lambda bodies.
std::vector<StmtPtr> stmts;
// Expressions seen in the function. Does not include indirect
// expressions (such as those appearing in attributes of types).
std::vector<ExprPtr> exprs;
// Lambdas seen in the function. We don't profile lambda bodies,
// but rather make them available for separate profiling if
// appropriate.
std::vector<const LambdaExpr*> lambdas;
// If we're profiling a lambda function, this holds the captures.
IDSet captures;
// This maps capture identifiers to their offsets.
std::unordered_map<IDPtr, int> captures_offsets;
// Constants seen in the function.
std::vector<const ConstExpr*> constants;
// Identifiers seen in the function.
IDSet ids;
// The same, but in a deterministic order.
std::vector<IDPtr> ordered_ids;
// Types seen in the function. A set rather than a vector because
// the same type can be seen numerous times.
TypeSet types;
// The same, but in a deterministic order, with duplicates removed.
std::vector<const Type*> ordered_types;
// For a given type (seen in an attribute), tracks other types that
// are effectively aliased with it via coercions.
std::unordered_map<const Type*, std::set<const Type*>> type_aliases;
// Script functions that this script calls. Includes calls made
// by lambdas and when bodies, as the goal is to identify recursion.
std::unordered_set<ScriptFunc*> script_calls;
// Same for BiF's, though for them we record the corresponding global
// rather than the BuiltinFunc*. In addition, we only track BiFs germane
// to code we're compiling.
IDSet BiF_globals;
// Script functions appearing in "when" clauses.
std::unordered_set<ScriptFunc*> when_calls;
// Names of generated events.
std::unordered_set<std::string> events;
// Attributes seen in set, table, or record constructors, mapped back
// to the type where they appear.
std::unordered_map<const Attributes*, TypePtr> constructor_attrs;
// Attributes associated with record constructors. There can be several,
// so we use a set.
std::unordered_map<const Type*, std::set<const Attributes*>> rec_constructor_attrs;
// Switch statements with either expression cases or type cases.
std::unordered_set<const SwitchStmt*> expr_switches;
std::unordered_set<const SwitchStmt*> type_switches;
// True if the function makes a call through an expression rather
// than simply a function's (global) name.
bool does_indirect_calls = false;
// Functions (not hooks or event handlers) that are referred to in
// a context other than being called. These might be used elsewhere
// for indirect calls.
IDSet indirect_funcs;
// Additional values present in the body that should be factored
// into its hash.
std::vector<p_hash_type> addl_hashes;
// Associated hash value.
std::optional<p_hash_type> hash_val;
// How many when statements appear in the function body. We could
// track these individually, but to date all that's mattered is
// whether a given body contains any.
int num_when_stmts = 0;
// Whether we should treat record field accesses as absolute
// (integer offset) or relative (name-based).
bool abs_rec_fields;
};
// Describes an operation for which some forms of access can lead to state
// modifications.
class SideEffectsOp {
public:
// Access types correspond to:
// NONE - there are no side effects
// CALL - relevant for function calls
// CONSTRUCTION - relevant for constructing/coercing a record
// READ - relevant for reading a table element
// WRITE - relevant for modifying a table element
enum AccessType : uint8_t { NONE, CALL, CONSTRUCTION, READ, WRITE };
SideEffectsOp(AccessType at = NONE, const Type* t = nullptr) : access(at), type(t) {}
auto GetAccessType() const { return access; }
const Type* GetType() const { return type; }
void SetUnknownChanges() { has_unknown_changes = true; }
bool HasUnknownChanges() const { return has_unknown_changes; }
void AddModNonGlobal(IDSet ids) { mod_non_locals.insert(ids.begin(), ids.end()); }
void AddModAggrs(TypeSet types) { mod_aggrs.insert(types.begin(), types.end()); }
const auto& ModNonLocals() const { return mod_non_locals; }
const auto& ModAggrs() const { return mod_aggrs; }
private:
AccessType access;
const Type* type; // type for which some operations alter state
// Globals and/or captures that the operation potentially modifies.
IDSet mod_non_locals;
// Aggregates (specified by types) that potentially modified.
TypeSet mod_aggrs;
// Sometimes the side effects are not known (such as when making
// indirect function calls, so we can't know statically what function
// will be called). We refer to as Unknown, and their implications are
// presumed to be worst-case - any non-local or aggregate is potentially
// affected.
bool has_unknown_changes = false;
};
// Function pointer for a predicate that determines whether a given
// profile is compilable. Alternatively we could derive subclasses
// from ProfileFuncs and use a virtual method for this, but that seems
// heavier-weight for what's really a simple notion.
using is_compilable_pred = bool (*)(const ProfileFunc*, const char** reason);
// Collectively profile an entire collection of functions.
class ProfileFuncs {
public:
// Updates entries in "funcs" to include profiles. If pred is non-nil,
// then it is called for each profile to see whether it's compilable,
// and, if not, the FuncInfo is marked as ShouldSkip().
// "compute_func_hashes" governs whether we compute hashes for the
// FuncInfo entries, or keep their existing ones. "full_record_hashes"
// controls whether the hashes for extended records covers their final,
// full form, or should only their original fields.
ProfileFuncs(std::vector<FuncInfo>& funcs, is_compilable_pred pred, bool compute_func_hashes,
bool full_record_hashes);
// Used to profile additional lambdas that (potentially) weren't part
// of the overall function profiling.
void ProfileLambda(const LambdaExpr* l);
// The following accessors provide a global profile across all of
// the (non-skipped) functions in "funcs". See the comments for
// the associated member variables for documentation.
const IDSet& Globals() const { return globals; }
const IDSet& AllGlobals() const { return all_globals; }
const std::unordered_set<const ConstExpr*>& Constants() const { return constants; }
const std::vector<const Type*>& MainTypes() const { return main_types; }
const std::vector<const Type*>& RepTypes() const { return rep_types; }
const std::unordered_set<ScriptFunc*>& ScriptCalls() const { return script_calls; }
const IDSet& BiFGlobals() const { return BiF_globals; }
const std::unordered_set<const LambdaExpr*>& Lambdas() const { return lambdas; }
const std::unordered_set<std::string>& Events() const { return events; }
const auto& ExprAttrs() const { return expr_attrs; }
const auto& FuncProfs() const { return func_profs; }
// Profiles associated with LambdaExpr's and expressions appearing in
// attributes.
std::shared_ptr<ProfileFunc> ExprProf(const Expr* e) { return expr_profs[e]; }
// Returns true if the given type corresponds to a table that has a
// &default attribute that returns an aggregate value.
bool IsTableWithDefaultAggr(const Type* t);
// Returns true if the given operation has non-zero side effects.
bool HasSideEffects(SideEffectsOp::AccessType access, const TypePtr& t) const;
// Retrieves the side effects of the given operation, updating non_local_ids
// and aggrs with identifiers and aggregate types that are modified.
//
// A return value of true means the side effects are Unknown. If false,
// then there are side effects iff either (or both) of non_local_ids
// or aggrs are non-empty.
bool GetSideEffects(SideEffectsOp::AccessType access, const Type* t, IDSet& non_local_ids, TypeSet& aggrs) const;
// Retrieves the side effects of calling the function corresponding to
// the NameExpr, updating non_local_ids and aggrs with identifiers and
// aggregate types that are modified. is_unknown is set to true if the
// call has Unknown side effects (which overrides the relevance of the
// updates to the sets).
//
// A return value of true means that side effects cannot yet be determined,
// due to dependencies on other side effects. This can happen when
// constructing a ProfileFuncs, but should not happen once its constructed.
bool GetCallSideEffects(const NameExpr* n, IDSet& non_local_ids, TypeSet& aggrs, bool& is_unknown);
// Returns the "representative" Type* for the hash associated with
// the parameter (which might be the parameter itself).
const Type* TypeRep(const Type* orig) {
auto it = type_to_rep.find(orig);
ASSERT(it != type_to_rep.end());
return it->second;
}
// Returns the hash associated with the given type, computing it
// if necessary.
p_hash_type HashType(const TypePtr& t) { return HashType(t.get()); }
p_hash_type HashType(const Type* t);
p_hash_type HashAttrs(const AttributesPtr& attrs);
protected:
// Incorporate the given function profile into the global profile.
void MergeInProfile(ProfileFunc* pf);
// Recursively traverse a (possibly aggregate) value to extract
// all of the types its elements use.
void TraverseValue(const ValPtr& v);
// When traversing types, Zeek records can have attributes that in
// turn have expressions associated with them. The expressions can
// in turn have types, which might be records with further attribute
// expressions, etc. This method iteratively processes the list
// expressions we need to analyze until no new ones are added.
void DrainPendingExprs();
// Compute hashes for the given set of types. Potentially recursive
// upon discovering additional types.
void ComputeTypeHashes(const std::vector<const Type*>& types);
// Compute hashes to associate with each function
void ComputeBodyHashes(std::vector<FuncInfo>& funcs);
// For a given lambda, completes analysis of its profile.
void AnalyzeLambdaProfile(const LambdaExpr* l);
// Compute the hash associated with a single function profile.
void ComputeProfileHash(std::shared_ptr<ProfileFunc> pf);
// Analyze the expressions and lambdas appearing in a set of
// attributes, in the context of a given type.
void AnalyzeAttrs(const Attributes* attrs, const Type* t);
// In the abstract, computes side-effects associated with operations other
// than explicit function calls. Currently, this means tables and records
// that can implicitly call functions that have side effects due to
// attributes such as &default. The machinery also applies to assessing
// the side effects of explicit function calls, which is done by
// (the two versions of) GetCallSideEffects().
void ComputeSideEffects();
// True if the given expression for sure has no side effects, which is
// almost always the case. False if the expression *may* have side effects
// and requires further analysis.
bool DefinitelyHasNoSideEffects(const ExprPtr& e) const;
// Records the side effects associated with the given attribute.
void SetSideEffects(const Attr* a, IDSet& non_local_ids, TypeSet& aggrs, bool is_unknown);
// Returns the attributes associated with the given type *and its aliases*.
AttrVec AssociatedAttrs(const Type* t);
// For a given set of attributes, assesses which ones are associated with
// the given type or its aliases and adds them to the given vector.
void FindAssociatedAttrs(const AttrSet& candidate_attrs, const Type* t, AttrVec& assoc_attrs);
// Assesses the side effects associated with the given expression. Returns
// true if a complete assessment was possible, false if not because the
// results depend on resolving other potential side effects first.
bool AssessSideEffects(const ExprPtr& e, IDSet& non_local_ids, TypeSet& types, bool& is_unknown);
// Same, but for the given profile.
bool AssessSideEffects(const ProfileFunc* pf, IDSet& non_local_ids, TypeSet& types, bool& is_unknown);
// Same but for the particular case of a relevant access to an aggregate
// (which can be constructing a record; reading a table element; or
// modifying a table element).
bool AssessAggrEffects(SideEffectsOp::AccessType access, const Type* t, IDSet& non_local_ids, TypeSet& aggrs,
bool& is_unknown);
// For a given set of side effects, determines whether the given aggregate
// access applies. If so, updates non_local_ids and aggrs and returns true
// if there are Unknown side effects; otherwise returns false.
bool AssessSideEffects(const SideEffectsOp* se, SideEffectsOp::AccessType access, const Type* t,
IDSet& non_local_ids, TypeSet& aggrs) const;
// Returns nil if side effects are not available. That should never be
// the case after we've done our initial analysis, but is provided
// as a signal so that this method can also be used during that analysis.
std::shared_ptr<SideEffectsOp> GetCallSideEffects(const ScriptFunc* f);
// Globals seen across the functions, other than those solely seen
// as the function being called in a call.
IDSet globals;
// Same, but also includes globals only seen as called functions.
IDSet all_globals;
// Constants seen across the functions.
std::unordered_set<const ConstExpr*> constants;
// Types seen across the functions. Does not include subtypes.
// Deterministically ordered.
std::vector<const Type*> main_types;
// "Representative" types seen across the functions. Includes
// subtypes. These all have unique hashes, and are returned by
// calls to TypeRep(). Deterministically ordered.
std::vector<const Type*> rep_types;
// Maps a type to its representative (which might be itself).
std::unordered_map<const Type*, const Type*> type_to_rep;
// For a given type, tracks which other types are aliased to it.
// Alias occurs via operations that can propagate attributes, which
// are various forms of aggregate coercions.
std::unordered_map<const Type*, std::set<const Type*>> type_aliases;
// Script functions that get called.
std::unordered_set<ScriptFunc*> script_calls;
// Same for BiF's.
IDSet BiF_globals;
// And for lambda's.
std::unordered_set<const LambdaExpr*> lambdas;
// Lambdas that we have already processed. An optimization to avoid
// unnecessary work.
std::unordered_set<const LambdaExpr*> processed_lambdas;
// Names of generated events.
std::unordered_set<std::string> events;
// Maps script functions to associated profiles. This isn't actually
// well-defined in the case of event handlers and hooks, which can have
// multiple bodies. However, we only use this in the context of calls
// to regular functions, and for that it suffices.
std::unordered_map<const ScriptFunc*, std::shared_ptr<ProfileFunc>> func_profs;
// Map lambda names to their primary functions
std::unordered_map<std::string, const ScriptFunc*> lambda_primaries;
// Tracks side effects associated with script functions. If we decide in
// the future to associate richer side-effect information with BiFs then
// we could expand this to track Func*'s instead.
std::unordered_map<const ScriptFunc*, std::shared_ptr<SideEffectsOp>> func_side_effects;
// Maps expressions to their profiles.
std::unordered_map<const Expr*, std::shared_ptr<ProfileFunc>> expr_profs;
// These remaining member variables are only used internally,
// not provided via accessors:
// Maps expression-valued attributes to a collection of types in which
// the attribute appears. Usually there's just one type, but there are
// some scripting constructs that can result in the same attribute being
// shared across multiple distinct (though compatible) types.
std::unordered_map<const Attr*, std::vector<const Type*>> expr_attrs;
// Tracks whether a given TableType has a &default that returns an
// aggregate. Expressions involving indexing tables with such types
// cannot be optimized out using CSE because each returned value is
// distinct.
std::unordered_map<const Type*, bool> tbl_has_aggr_default;
// For a given attribute, maps it to side effects associated with aggregate
// operations (table reads/writes).
std::unordered_map<const Attr*, std::vector<std::shared_ptr<SideEffectsOp>>> aggr_side_effects;
// The same, but for record constructors.
std::unordered_map<const Attr*, std::vector<std::shared_ptr<SideEffectsOp>>> record_constr_with_side_effects;
// The set of attributes that may have side effects but we haven't yet
// resolved if that's the case. Empty after we're done analyzing for
// side effects.
AttrSet candidates;
// The current candidate we're analyzing. We track this to deal with
// the possibility of the candidate's side effects recursively referring
// to the candidate itself.
const Attr* curr_candidate;
// The set of attributes that definitely have side effects.
AttrSet attrs_with_side_effects;
// The full collection of operations with side effects.
std::vector<std::shared_ptr<SideEffectsOp>> side_effects_ops;
// Which function profiles we are currently analyzing. Used to detect
// recursion and prevent it from leading to non-termination of the analysis.
std::unordered_set<std::shared_ptr<ProfileFunc>> active_func_profiles;
// Maps types to their hashes.
std::unordered_map<const Type*, p_hash_type> type_hashes;
// An inverse mapping, to a representative for each distinct hash.
std::unordered_map<p_hash_type, const Type*> type_hash_reps;
// For types with names, tracks the ones we've already hashed, so we can
// avoid work for distinct pointers that refer to the same underlying type.
std::unordered_map<std::string, const Type*> seen_type_names;
// Expressions that we've discovered that we need to further profile.
// These can arise for example due to lambdas or record attributes.
std::vector<const Expr*> pending_exprs;
// Whether to compute new hashes for the FuncInfo entries. If the FuncInfo
// doesn't have a hash, it will always be computed.
bool compute_func_hashes;
// Whether the hashes for extended records should cover their final,
// full form, or only their original fields.
bool full_record_hashes;
};
// Updates the line numbers associated with an AST node to reflect its
// full block (i.e., correct "first" and "last" for multi-line and compound
// statements).
class SetBlockLineNumbers : public TraversalCallback {
public:
// Note, these modify the location information of their "const" arguments.
// It would be cleaner if Obj provided an interface for doing so (by making
// SetLocationInfo be a "const" method), but unfortunately it's virtual
// (unclear why) so we don't know how it might be overridden in user code.
TraversalCode PreStmt(const Stmt*) override;
TraversalCode PostStmt(const Stmt*) override;
TraversalCode PreExpr(const Expr*) override;
private:
void UpdateLocInfo(Location* loc);
// A stack of block ranges. Each entry in the vector corresponds to a
// statement block, managed in a LIFO manner reflecting statement nesting.
// We start building up a given block's range during pre-traversal and
// finish it during post-traversal, propagating the updates to the
// nesting parent.
std::vector<std::pair<int, int>> block_line_range;
};
// Goes through all of the functions to associate full location information
// with each AST node.
class ASTBlockAnalyzer : public TraversalCallback {
public:
ASTBlockAnalyzer(std::vector<FuncInfo>& funcs);
TraversalCode PreStmt(const Stmt*) override;
TraversalCode PostStmt(const Stmt*) override;
TraversalCode PreExpr(const Expr*) override;
// For a given location, returns its full local description (not
// including its parent).
std::string GetDesc(const Location* loc) const {
auto e_d = exp_desc.find(LocKey(loc));
if ( e_d == exp_desc.end() )
return LocWithFunc(loc);
else
return e_d->second;
}
// Whether we've created a description for the given location. This
// should always be true other than for certain functions with empty
// bodies that are created post-parsing. Available for debugging so
// we can assert we have these.
bool HaveExpDesc(const Location* loc) const { return exp_desc.contains(LocKey(loc)); }
private:
// Construct the full expanded description associated with the given
// location (if not already cached) and return it. This is the "static"
// view; if we reach the location via a non-inlined call, we will
// prepend that expansion when reporting the corresponding profile.
std::string BuildExpandedDescription(const Location* loc);
// Return the key used to associate a Location object with its full
// descriptiion.
std::string LocKey(const Location* loc) const {
return std::string(loc->FileName()) + ":" + std::to_string(loc->FirstLine()) + "-" +
std::to_string(loc->LastLine());
}
// Return the description of a location including its the function
// in which it's embedded.
std::string LocWithFunc(const Location* loc) const {
auto res = func_name_prefix + std::to_string(loc->FirstLine());
if ( loc->FirstLine() != loc->LastLine() )
res += "-" + std::to_string(loc->LastLine());
return res;
}
// The function whose body we are analyzing, in a form convenient
// for adding it as a prefix (i.e., with a trailing ':').
std::string func_name_prefix;
// Stack of expanded descriptions of parent blocks. Each entry is
// a pair of the parent's own description plus the full descriptor
// up to that point.
std::vector<std::pair<std::string, std::string>> parents;
// Maps a statement's location key to its expanded description.
std::unordered_map<std::string, std::string> exp_desc;
};
// If we're profiling, this provides the analysis of how low-level location
// information relates to higher-level statement blocks.
extern std::unique_ptr<ASTBlockAnalyzer> AST_blocks;
// Returns the full name of a function at a given location, including its
// associated module (even for event handlers that don't actually have
// modules in their names), so we can track overall per-module resource
// usage.
extern std::string func_name_at_loc(std::string fname, const Location* loc);
} // namespace zeek::detail