refined ZAM function profiling to include (correct) statement line number blocks

This commit is contained in:
Vern Paxson 2024-03-10 12:41:18 -07:00 committed by Tim Wojtulewicz
parent f88862f6ce
commit 037f76e384
2 changed files with 246 additions and 0 deletions

View file

@ -1401,4 +1401,152 @@ std::shared_ptr<SideEffectsOp> ProfileFuncs::GetCallSideEffects(const ScriptFunc
return seo;
}
// We associate modules with filenames, and take the first one we see.
static std::unordered_map<std::string, std::string> filename_module;
void switch_to_module(const char* module_name) {
auto loc = GetCurrentLocation();
if ( loc.first_line != 0 && filename_module.count(loc.filename) == 0 )
filename_module[loc.filename] = module_name;
}
std::string func_name_at_loc(std::string fname, const Location* loc) {
auto find_module = filename_module.find(loc->filename);
if ( find_module == filename_module.end() )
// No associated module.
return fname;
auto& module = find_module->second;
if ( module.empty() || module == "GLOBAL" )
// Trivial associated module.
return fname;
auto mod_prefix = module + "::";
if ( fname.find(mod_prefix) == 0 )
return fname; // it already has the module name
return mod_prefix + fname;
}
TraversalCode SetBlockLineNumbers::PreStmt(const Stmt* s) {
auto loc = const_cast<Location*>(s->GetLocationInfo());
UpdateLocInfo(loc);
block_line_range.emplace_back(std::pair<int, int>{loc->first_line, loc->last_line});
return TC_CONTINUE;
}
TraversalCode SetBlockLineNumbers::PostStmt(const Stmt* s) {
auto loc = const_cast<Location*>(s->GetLocationInfo());
auto& r = block_line_range.back();
loc->first_line = r.first;
loc->last_line = r.second;
block_line_range.pop_back();
if ( ! block_line_range.empty() ) {
// We may have widened our range, propagate that to our parent.
auto& r_p = block_line_range.back();
r_p.first = std::min(r_p.first, r.first);
r_p.second = std::max(r_p.second, r.second);
}
return TC_CONTINUE;
}
TraversalCode SetBlockLineNumbers::PreExpr(const Expr* e) {
ASSERT(! block_line_range.empty());
UpdateLocInfo(const_cast<Location*>(e->GetLocationInfo()));
return TC_CONTINUE;
}
void SetBlockLineNumbers::UpdateLocInfo(Location* loc) {
// Sometimes locations are generated with inverted line coverage.
if ( loc->first_line > loc->last_line )
std::swap(loc->first_line, loc->last_line);
auto first_line = loc->first_line;
auto last_line = loc->last_line;
if ( ! block_line_range.empty() ) {
auto& r = block_line_range.back();
r.first = std::min(r.first, first_line);
r.second = std::max(r.second, last_line);
}
}
BlockAnalyzer::BlockAnalyzer(std::vector<FuncInfo>& funcs) {
for ( auto& f : funcs ) {
if ( ! f.ShouldAnalyze() )
continue;
auto func = f.Func();
std::string fn = func->Name();
auto body = f.Body();
// First get the line numbers all sorted out.
SetBlockLineNumbers sbln;
body->Traverse(&sbln);
auto body_loc = body->GetLocationInfo();
fn = func_name_at_loc(fn, body_loc);
parents.emplace_back(std::pair<std::string, std::string>{fn, fn});
func_name_prefix = fn + ":";
body->Traverse(this);
parents.pop_back();
}
// This should never appear!
func_name_prefix = "<MISSING>:";
}
static bool is_compound_stmt(const Stmt* s) {
static std::set<StmtTag> compound_stmts = {STMT_FOR, STMT_IF, STMT_LIST, STMT_SWITCH, STMT_WHEN, STMT_WHILE};
return compound_stmts.count(s->Tag()) > 0;
}
TraversalCode BlockAnalyzer::PreStmt(const Stmt* s) {
auto loc = s->GetLocationInfo();
auto ls = BuildExpandedDescription(loc);
if ( is_compound_stmt(s) )
parents.push_back(std::pair<std::string, std::string>{LocWithFunc(loc), std::move(ls)});
return TC_CONTINUE;
}
TraversalCode BlockAnalyzer::PostStmt(const Stmt* s) {
if ( is_compound_stmt(s) )
parents.pop_back();
return TC_CONTINUE;
}
TraversalCode BlockAnalyzer::PreExpr(const Expr* e) {
(void)BuildExpandedDescription(e->GetLocationInfo());
return TC_CONTINUE;
}
std::string BlockAnalyzer::BuildExpandedDescription(const Location* loc) {
ASSERT(loc && loc->first_line != 0);
auto ls = LocWithFunc(loc);
if ( ! parents.empty() ) {
auto& parent_pair = parents.back();
if ( parent_pair.first == ls )
ls = parent_pair.second;
else
ls = parent_pair.second + ";" + ls;
}
auto lk = LocKey(loc);
if ( exp_desc.count(lk) == 0 )
exp_desc[lk] = ls;
return ls;
}
std::unique_ptr<BlockAnalyzer> blocks;
} // namespace zeek::detail

View file

@ -609,4 +609,102 @@ protected:
bool full_record_hashes;
};
// Updates the line numbers associated with an AST node to reflect its
// full block (i.e., correct "first" and "last" for multi-line and compound
// statements).
class SetBlockLineNumbers : public TraversalCallback {
public:
// Note, these modify the location information of their "const" arguments.
// It would be cleaner if Obj provided an interface for doing so (by making
// SetLocationInfo be a "const" method), but unfortunately it's virtual
// (unclear why) so we don't know how it might be overridden in user code.
TraversalCode PreStmt(const Stmt*) override;
TraversalCode PostStmt(const Stmt*) override;
TraversalCode PreExpr(const Expr*) override;
private:
void UpdateLocInfo(Location* loc);
// A stack of block ranges. Each entry in the vector corresponds to a
// statement block, managed in a LIFO manner reflecting statement nesting.
// We start building up a given block's range during pre-traversal and
// finish it during post-traversal, propagating the updates to the
// nesting parent.
std::vector<std::pair<int, int>> block_line_range;
};
// Goes through all of the functions to associate full location information
// with each AST node.
class BlockAnalyzer : public TraversalCallback {
public:
BlockAnalyzer(std::vector<FuncInfo>& funcs);
TraversalCode PreStmt(const Stmt*) override;
TraversalCode PostStmt(const Stmt*) override;
TraversalCode PreExpr(const Expr*) override;
// For a given location, returns its full local description (not
// including its parent).
std::string GetDesc(const Location* loc) const {
auto e_d = exp_desc.find(LocKey(loc));
if ( e_d == exp_desc.end() )
return LocWithFunc(loc);
else
return e_d->second;
}
// Whether we've created a description for the given location. This
// should always be true other than for certain functions with empty
// bodies that are created post-parsing. Available for debugging so
// we can assert we have these.
bool HaveExpDesc(const Location* loc) const { return exp_desc.count(LocKey(loc)) > 0; }
private:
// Construct the full expanded description associated with the given
// location (if not already cached) and return it. This is the "static"
// view; if we reach the location via a non-inlined call, we will
// prepend that expansion when reporting the corresponding profile.
std::string BuildExpandedDescription(const Location* loc);
// Return the key used to associate a Location object with its full
// descriptiion.
std::string LocKey(const Location* loc) const {
return std::string(loc->filename) + ":" + std::to_string(loc->first_line) + "-" +
std::to_string(loc->last_line);
}
// Return the description of a location including its the function
// in which it's embedded.
std::string LocWithFunc(const Location* loc) const {
auto res = func_name_prefix + std::to_string(loc->first_line);
if ( loc->first_line != loc->last_line )
res += "-" + std::to_string(loc->last_line);
return res;
}
// The function whose body we are analyzing, in a form convenient
// for adding it as a prefix (i.e., with a trailing ':').
std::string func_name_prefix;
// Stack of expanded descriptions of parent blocks. Each entry is
// a pair of the parent's own description plus the full descriptor
// up to that point.
std::vector<std::pair<std::string, std::string>> parents;
// Maps a statement's location key to its expanded description.
std::unordered_map<std::string, std::string> exp_desc;
};
// If we're profiling, this provides the analysis of how low-level location
// information relates to higher-level statement blocks.
extern std::unique_ptr<BlockAnalyzer> blocks;
// Returns the full name of a function at a given location, including its
// associated module (even for event handlers that don't actually have
// modules in their names), so we can track overall per-module resource
// usage.
extern std::string func_name_at_loc(std::string fname, const Location* loc);
} // namespace zeek::detail