// See the file "COPYING" in the main distribution directory for copyright. // Classes for traversing functions and their body ASTs to build up profiles // of the various elements (types, globals, locals, lambdas, etc.) that appear. // These profiles enable script optimization to make decisions regarding // compilability and how to efficiently provide run-time components. // For all of the following, we use the term "function" to refer to a single // ScriptFunc/body pair, so an event handler or hook with multiple bodies // is treated as multiple distinct "function"'s. // // One key element of constructing profiles concerns computing hashes over // both the Zeek scripting types present in the functions, and over entire // functions (which means computing hashes over each of the function's // components). Hashes need to be (1) distinct (collision-free in practice) // and (2) deterministic (across Zeek invocations, the same components always // map to the same hashes). We need these properties because we use hashes // to robustly identify identical instances of the same function, for example // so we can recognize that an instance of the function definition seen in // a script matches a previously compiled function body, so we can safely // replace the function's AST with the compiled version). // // We profile functions collectively (via the ProfileFuncs class), rather // than in isolation, because doing so (1) allows us to share expensive // profiling steps (in particular, computing the hashes of types, as some // of the Zeek script records get huge, and occur frequently), and (2) enables // us to develop a global picture of all of the components germane to a set // of functions. The global profile is built up in terms of individual // profiles (via the ProfileFunc class), which identify each function's // basic components, and then using these as starting points to build out // the global profile and compute the hashes of functions and types. #pragma once #include "zeek/Expr.h" #include "zeek/Stmt.h" #include "zeek/Traverse.h" #include "zeek/script_opt/ScriptOpt.h" namespace zeek::detail { // The type used to represent hashes. We use the mnemonic "p_hash" as // short for "profile hash", to avoid confusion with hashes used elsehwere // in Zeek (which are for the most part keyed, a property we explicitly // do not want). using p_hash_type = unsigned long long; // Helper functions for computing/managing hashes. inline p_hash_type p_hash(int val) { return std::hash{}(val); } inline p_hash_type p_hash(std::string val) { return std::hash{}(val); } inline p_hash_type p_hash(const char* val) { return p_hash(std::string(val)); } extern p_hash_type p_hash(const Obj* o); inline p_hash_type p_hash(const IntrusivePtr& o) { return p_hash(o.get()); } inline p_hash_type merge_p_hashes(p_hash_type h1, p_hash_type h2) { // Taken from Boost. See for example // https://www.boost.org/doc/libs/1_35_0/doc/html/boost/hash_combine_id241013.html // or // https://stackoverflow.com/questions/4948780/magic-number-in-boosthash-combine return h1 ^ (h2 + 0x9e3779b9 + (h1 << 6) + (h1 >> 2)); } // Returns a filename associated with the given function body. Used to // provide distinctness to identical function bodies seen in separate, // potentially conflicting incremental compilations. This is only germane // for allowing incremental compilation of subsets of the test suite, so // if we decide to forgo that capability, we can remove this. extern std::string script_specific_filename(const StmtPtr& body); // Returns a incremental-compilation-specific hash for the given function // body, given it's non-specific hash is "generic_hash". extern p_hash_type script_specific_hash(const StmtPtr& body, p_hash_type generic_hash); // Class for profiling the components of a single function (or expression). class ProfileFunc : public TraversalCallback { public: // Constructor used for the usual case of profiling a script // function and one of its bodies. ProfileFunc(const Func* func, const StmtPtr& body); // Constructor for profiling an AST expression. This exists // to support (1) profiling lambda expressions, and (2) traversing // attribute expressions (such as &default=expr) to discover what // components they include. ProfileFunc(const Expr* func); // See the comments for the associated member variables for each // of these accessors. const std::unordered_set& Globals() const { return globals; } const std::unordered_set& AllGlobals() const { return all_globals; } const std::unordered_set& Locals() const { return locals; } const std::unordered_set& Params() const { return params; } const std::unordered_set& Assignees() const { return assignees; } const std::unordered_set& Inits() const { return inits; } const std::vector& Stmts() const { return stmts; } const std::vector& Exprs() const { return exprs; } const std::vector& Lambdas() const { return lambdas; } const std::vector& Constants() const { return constants; } const std::unordered_set& UnorderedIdentifiers() const { return ids; } const std::vector& OrderedIdentifiers() const { return ordered_ids; } const std::unordered_set& UnorderedTypes() const { return types; } const std::vector& OrderedTypes() const { return ordered_types; } const std::unordered_set& ScriptCalls() const { return script_calls; } const std::unordered_set& BiFGlobals() const { return BiF_globals; } const std::unordered_set& WhenCalls() const { return when_calls; } const std::unordered_set& Events() const { return events; } const std::unordered_set& ConstructorAttrs() const { return constructor_attrs; } const std::unordered_set& ExprSwitches() const { return expr_switches; } const std::unordered_set& TypeSwitches() const { return type_switches; } bool DoesIndirectCalls() { return does_indirect_calls; } int NumParams() const { return num_params; } int NumLambdas() const { return lambdas.size(); } int NumWhenStmts() const { return num_when_stmts; } const std::vector& AdditionalHashes() const { return addl_hashes; } // Set this function's hash to the given value; retrieve that value. void SetHashVal(p_hash_type hash) { hash_val = hash; } p_hash_type HashVal() const { return hash_val; } protected: // Construct the profile for the given function signature and body. void Profile(const FuncType* ft, const StmtPtr& body); TraversalCode PreStmt(const Stmt*) override; TraversalCode PreExpr(const Expr*) override; TraversalCode PreID(const ID*) override; // Take note of the presence of a given type. void TrackType(const Type* t); void TrackType(const TypePtr& t) { TrackType(t.get()); } // Take note of the presence of an identifier. void TrackID(const ID* id); // Globals seen in the function. // // Does *not* include globals solely seen as the function being // called in a call. std::unordered_set globals; // Same, but also includes globals only seen as called functions. std::unordered_set all_globals; // Locals seen in the function. std::unordered_set locals; // The function's parameters. Empty if our starting point was // profiling an expression. std::unordered_set params; // How many parameters the function has. The default value flags // that we started the profile with an expression rather than a // function. int num_params = -1; // Identifiers (globals, locals, parameters) that are assigned to. // Does not include implicit assignments due to initializations, // which are instead captured in "inits". std::unordered_set assignees; // Same for locals seen in initializations, so we can find, // for example, unused aggregates. std::unordered_set inits; // Statements seen in the function. Does not include indirect // statements, such as those in lambda bodies. std::vector stmts; // Expressions seen in the function. Does not include indirect // expressions (such as those appearing in attributes of types). std::vector exprs; // Lambdas seen in the function. We don't profile lambda bodies, // but rather make them available for separate profiling if // appropriate. std::vector lambdas; // If we're profiling a lambda function, this holds the captures. std::unordered_set captures; // Constants seen in the function. std::vector constants; // Identifiers seen in the function. std::unordered_set ids; // The same, but in a deterministic order. std::vector ordered_ids; // Types seen in the function. A set rather than a vector because // the same type can be seen numerous times. std::unordered_set types; // The same, but in a deterministic order, with duplicates removed. std::vector ordered_types; // Script functions that this script calls. std::unordered_set script_calls; // Same for BiF's, though for them we record the corresponding global // rather than the BuiltinFunc*. std::unordered_set BiF_globals; // Script functions appearing in "when" clauses. std::unordered_set when_calls; // Names of generated events. std::unordered_set events; // Attributes seen in set or table constructors. std::unordered_set constructor_attrs; // Switch statements with either expression cases or type cases. std::unordered_set expr_switches; std::unordered_set type_switches; // True if the function makes a call through an expression rather // than simply a function's (global) name. bool does_indirect_calls = false; // Additional values present in the body that should be factored // into its hash. std::vector addl_hashes; // Associated hash value. p_hash_type hash_val = 0; // How many when statements appear in the function body. We could // track these individually, but to date all that's mattered is // whether a given body contains any. int num_when_stmts = 0; // Whether we're separately processing a "when" condition to // mine out its script calls. bool in_when = false; }; // Function pointer for a predicate that determines whether a given // profile is compilable. Alternatively we could derive subclasses // from ProfileFuncs and use a virtual method for this, but that seems // heavier-weight for what's really a simple notion. typedef bool (*is_compilable_pred)(const ProfileFunc*); // Collectively profile an entire collection of functions. class ProfileFuncs { public: // Updates entries in "funcs" to include profiles. If pred is // non-nil, then it is called for each profile to see whether it's // compilable, and, if not, the FuncInfo is marked as ShouldSkip(). ProfileFuncs(std::vector& funcs, is_compilable_pred pred = nullptr); // The following accessors provide a global profile across all of // the (non-skipped) functions in "funcs". See the comments for // the associated member variables for documentation. const std::unordered_set& Globals() const { return globals; } const std::unordered_set& AllGlobals() const { return all_globals; } const std::unordered_set& Constants() const { return constants; } const std::vector& MainTypes() const { return main_types; } const std::vector& RepTypes() const { return rep_types; } const std::unordered_set& ScriptCalls() const { return script_calls; } const std::unordered_set& BiFGlobals() const { return BiF_globals; } const std::unordered_set& Lambdas() const { return lambdas; } const std::unordered_set& Events() const { return events; } std::shared_ptr FuncProf(const ScriptFunc* f) { return func_profs[f]; } // This is only externally germane for LambdaExpr's. std::shared_ptr ExprProf(const Expr* e) { return expr_profs[e]; } // Returns the "representative" Type* for the hash associated with // the parameter (which might be the parameter itself). const Type* TypeRep(const Type* orig) { ASSERT(type_to_rep.count(orig) > 0); return type_to_rep[orig]; } // Returns the hash associated with the given type, computing it // if necessary. p_hash_type HashType(const TypePtr& t) { return HashType(t.get()); } p_hash_type HashType(const Type* t); protected: // Incorporate the given function profile into the global profile. void MergeInProfile(ProfileFunc* pf); // When traversing types, Zeek records can have attributes that in // turn have expressions associated with them. The expressions can // in turn have types, which might be records with further attribute // expressions, etc. This method iteratively processes the list // expressions we need to analyze until no new ones are added. void DrainPendingExprs(); // Compute hashes for the given set of types. Potentially recursive // upon discovering additional types. void ComputeTypeHashes(const std::vector& types); // Compute hashes to associate with each function void ComputeBodyHashes(std::vector& funcs); // Compute the hash associated with a single function profile. void ComputeProfileHash(std::shared_ptr pf); // Analyze the expressions and lambdas appearing in a set of // attributes. void AnalyzeAttrs(const Attributes* Attrs); // Globals seen across the functions, other than those solely seen // as the function being called in a call. std::unordered_set globals; // Same, but also includes globals only seen as called functions. std::unordered_set all_globals; // Constants seen across the functions. std::unordered_set constants; // Types seen across the functions. Does not include subtypes. // Deterministically ordered. std::vector main_types; // "Representative" types seen across the functions. Includes // subtypes. These all have unique hashes, and are returned by // calls to TypeRep(). Deterministically ordered. std::vector rep_types; // Maps a type to its representative (which might be itself). std::unordered_map type_to_rep; // Script functions that get called. std::unordered_set script_calls; // Same for BiF's. std::unordered_set BiF_globals; // And for lambda's. std::unordered_set lambdas; // Names of generated events. std::unordered_set events; // Maps script functions to associated profiles. This isn't // actually well-defined in the case of event handlers and hooks, // which can have multiple bodies. However, this is only used // in the context of analyzing a single-bodied function. std::unordered_map> func_profs; // Maps expressions to their profiles. This is only germane // externally for LambdaExpr's, but internally it abets memory // management. std::unordered_map> expr_profs; // These remaining member variables are only used internally, // not provided via accessors: // Maps types to their hashes. std::unordered_map type_hashes; // An inverse mapping, to a representative for each distinct hash. std::unordered_map type_hash_reps; // For types with names, tracks the ones we've already hashed, // so we can avoid work for distinct pointers that refer to the // same underlying type. std::unordered_map seen_type_names; // Expressions that we've discovered that we need to further // profile. These can arise for example due to lambdas or // record attributes. std::vector pending_exprs; }; } // namespace zeek::detail