diff --git a/src/script_opt/ProfileFunc.cc b/src/script_opt/ProfileFunc.cc index 901ca70429..d478446ded 100644 --- a/src/script_opt/ProfileFunc.cc +++ b/src/script_opt/ProfileFunc.cc @@ -1401,4 +1401,152 @@ std::shared_ptr ProfileFuncs::GetCallSideEffects(const ScriptFunc return seo; } +// We associate modules with filenames, and take the first one we see. +static std::unordered_map filename_module; + +void switch_to_module(const char* module_name) { + auto loc = GetCurrentLocation(); + if ( loc.first_line != 0 && filename_module.count(loc.filename) == 0 ) + filename_module[loc.filename] = module_name; +} + +std::string func_name_at_loc(std::string fname, const Location* loc) { + auto find_module = filename_module.find(loc->filename); + if ( find_module == filename_module.end() ) + // No associated module. + return fname; + + auto& module = find_module->second; + if ( module.empty() || module == "GLOBAL" ) + // Trivial associated module. + return fname; + + auto mod_prefix = module + "::"; + + if ( fname.find(mod_prefix) == 0 ) + return fname; // it already has the module name + + return mod_prefix + fname; +} + +TraversalCode SetBlockLineNumbers::PreStmt(const Stmt* s) { + auto loc = const_cast(s->GetLocationInfo()); + UpdateLocInfo(loc); + block_line_range.emplace_back(std::pair{loc->first_line, loc->last_line}); + return TC_CONTINUE; +} + +TraversalCode SetBlockLineNumbers::PostStmt(const Stmt* s) { + auto loc = const_cast(s->GetLocationInfo()); + auto& r = block_line_range.back(); + loc->first_line = r.first; + loc->last_line = r.second; + + block_line_range.pop_back(); + + if ( ! block_line_range.empty() ) { + // We may have widened our range, propagate that to our parent. + auto& r_p = block_line_range.back(); + r_p.first = std::min(r_p.first, r.first); + r_p.second = std::max(r_p.second, r.second); + } + + return TC_CONTINUE; +} + +TraversalCode SetBlockLineNumbers::PreExpr(const Expr* e) { + ASSERT(! block_line_range.empty()); + UpdateLocInfo(const_cast(e->GetLocationInfo())); + return TC_CONTINUE; +} + +void SetBlockLineNumbers::UpdateLocInfo(Location* loc) { + // Sometimes locations are generated with inverted line coverage. + if ( loc->first_line > loc->last_line ) + std::swap(loc->first_line, loc->last_line); + + auto first_line = loc->first_line; + auto last_line = loc->last_line; + + if ( ! block_line_range.empty() ) { + auto& r = block_line_range.back(); + r.first = std::min(r.first, first_line); + r.second = std::max(r.second, last_line); + } +} + +BlockAnalyzer::BlockAnalyzer(std::vector& funcs) { + for ( auto& f : funcs ) { + if ( ! f.ShouldAnalyze() ) + continue; + + auto func = f.Func(); + std::string fn = func->Name(); + auto body = f.Body(); + + // First get the line numbers all sorted out. + SetBlockLineNumbers sbln; + body->Traverse(&sbln); + + auto body_loc = body->GetLocationInfo(); + fn = func_name_at_loc(fn, body_loc); + + parents.emplace_back(std::pair{fn, fn}); + func_name_prefix = fn + ":"; + body->Traverse(this); + parents.pop_back(); + } + + // This should never appear! + func_name_prefix = ":"; +} + +static bool is_compound_stmt(const Stmt* s) { + static std::set compound_stmts = {STMT_FOR, STMT_IF, STMT_LIST, STMT_SWITCH, STMT_WHEN, STMT_WHILE}; + return compound_stmts.count(s->Tag()) > 0; +} + +TraversalCode BlockAnalyzer::PreStmt(const Stmt* s) { + auto loc = s->GetLocationInfo(); + auto ls = BuildExpandedDescription(loc); + + if ( is_compound_stmt(s) ) + parents.push_back(std::pair{LocWithFunc(loc), std::move(ls)}); + + return TC_CONTINUE; +} + +TraversalCode BlockAnalyzer::PostStmt(const Stmt* s) { + if ( is_compound_stmt(s) ) + parents.pop_back(); + + return TC_CONTINUE; +} + +TraversalCode BlockAnalyzer::PreExpr(const Expr* e) { + (void)BuildExpandedDescription(e->GetLocationInfo()); + return TC_CONTINUE; +} + +std::string BlockAnalyzer::BuildExpandedDescription(const Location* loc) { + ASSERT(loc && loc->first_line != 0); + + auto ls = LocWithFunc(loc); + if ( ! parents.empty() ) { + auto& parent_pair = parents.back(); + if ( parent_pair.first == ls ) + ls = parent_pair.second; + else + ls = parent_pair.second + ";" + ls; + } + + auto lk = LocKey(loc); + if ( exp_desc.count(lk) == 0 ) + exp_desc[lk] = ls; + + return ls; +} + +std::unique_ptr blocks; + } // namespace zeek::detail diff --git a/src/script_opt/ProfileFunc.h b/src/script_opt/ProfileFunc.h index 55814a9686..905e21411e 100644 --- a/src/script_opt/ProfileFunc.h +++ b/src/script_opt/ProfileFunc.h @@ -609,4 +609,102 @@ protected: bool full_record_hashes; }; +// Updates the line numbers associated with an AST node to reflect its +// full block (i.e., correct "first" and "last" for multi-line and compound +// statements). +class SetBlockLineNumbers : public TraversalCallback { +public: + // Note, these modify the location information of their "const" arguments. + // It would be cleaner if Obj provided an interface for doing so (by making + // SetLocationInfo be a "const" method), but unfortunately it's virtual + // (unclear why) so we don't know how it might be overridden in user code. + TraversalCode PreStmt(const Stmt*) override; + TraversalCode PostStmt(const Stmt*) override; + TraversalCode PreExpr(const Expr*) override; + +private: + void UpdateLocInfo(Location* loc); + + // A stack of block ranges. Each entry in the vector corresponds to a + // statement block, managed in a LIFO manner reflecting statement nesting. + // We start building up a given block's range during pre-traversal and + // finish it during post-traversal, propagating the updates to the + // nesting parent. + std::vector> block_line_range; +}; + +// Goes through all of the functions to associate full location information +// with each AST node. +class BlockAnalyzer : public TraversalCallback { +public: + BlockAnalyzer(std::vector& funcs); + + TraversalCode PreStmt(const Stmt*) override; + TraversalCode PostStmt(const Stmt*) override; + TraversalCode PreExpr(const Expr*) override; + + // For a given location, returns its full local description (not + // including its parent). + std::string GetDesc(const Location* loc) const { + auto e_d = exp_desc.find(LocKey(loc)); + if ( e_d == exp_desc.end() ) + return LocWithFunc(loc); + else + return e_d->second; + } + + // Whether we've created a description for the given location. This + // should always be true other than for certain functions with empty + // bodies that are created post-parsing. Available for debugging so + // we can assert we have these. + bool HaveExpDesc(const Location* loc) const { return exp_desc.count(LocKey(loc)) > 0; } + +private: + // Construct the full expanded description associated with the given + // location (if not already cached) and return it. This is the "static" + // view; if we reach the location via a non-inlined call, we will + // prepend that expansion when reporting the corresponding profile. + std::string BuildExpandedDescription(const Location* loc); + + // Return the key used to associate a Location object with its full + // descriptiion. + std::string LocKey(const Location* loc) const { + return std::string(loc->filename) + ":" + std::to_string(loc->first_line) + "-" + + std::to_string(loc->last_line); + } + + // Return the description of a location including its the function + // in which it's embedded. + std::string LocWithFunc(const Location* loc) const { + auto res = func_name_prefix + std::to_string(loc->first_line); + + if ( loc->first_line != loc->last_line ) + res += "-" + std::to_string(loc->last_line); + + return res; + } + + // The function whose body we are analyzing, in a form convenient + // for adding it as a prefix (i.e., with a trailing ':'). + std::string func_name_prefix; + + // Stack of expanded descriptions of parent blocks. Each entry is + // a pair of the parent's own description plus the full descriptor + // up to that point. + std::vector> parents; + + // Maps a statement's location key to its expanded description. + std::unordered_map exp_desc; +}; + +// If we're profiling, this provides the analysis of how low-level location +// information relates to higher-level statement blocks. +extern std::unique_ptr blocks; + +// Returns the full name of a function at a given location, including its +// associated module (even for event handlers that don't actually have +// modules in their names), so we can track overall per-module resource +// usage. +extern std::string func_name_at_loc(std::string fname, const Location* loc); + } // namespace zeek::detail