ZAM execution changes to support richer profiling

This commit is contained in:
Vern Paxson 2024-03-10 17:11:04 -04:00 committed by Tim Wojtulewicz
parent c8d15f1eaa
commit c29db63fdd
13 changed files with 284 additions and 131 deletions

View file

@ -1,5 +1,7 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include "zeek/script_opt/ZAM/ZBody.h"
#include "zeek/Desc.h"
#include "zeek/EventHandler.h"
#include "zeek/Frame.h"
@ -28,6 +30,74 @@
namespace zeek::detail {
#ifdef ENABLE_ZAM_PROFILE
static std::vector<const ZAMLocInfo*> caller_locs;
static double compute_CPU_prof_overhead() {
double start = util::curr_CPU_time();
double CPU = 0.0;
const int n = 100000;
for ( int i = 0; i < n; ++i )
CPU = std::max(CPU, util::curr_CPU_time());
return 2.0 * (CPU - start) / n;
}
static double compute_mem_prof_overhead() {
double start = util::curr_CPU_time();
uint64_t m;
util::get_memory_usage(&m, nullptr);
const int n = 20000;
for ( int i = 0; i < n; ++i ) {
uint64_t m2;
util::get_memory_usage(&m2, nullptr);
}
return 2.0 * (util::curr_CPU_time() - start) / n;
}
static double CPU_prof_overhead = compute_CPU_prof_overhead();
static double mem_prof_overhead = compute_mem_prof_overhead();
#define DO_ZAM_PROFILE \
if ( do_profile ) { \
double dt = util::curr_CPU_time() - profile_CPU; \
auto& prof_info = (*curr_prof_vec)[profile_pc]; \
++prof_info.num_samples; \
prof_info.CPU_time += dt; \
ZOP_CPU[z.op] += dt; \
}
// These next two macros appear in code generated by gen-zam.
#define ZAM_PROFILE_PRE_CALL \
if ( do_profile ) { \
caller_locs.push_back(z.loc.get()); \
if ( ! z.aux->is_BiF_call ) { /* For non-BiFs we don't include the callee's execution time as part of our own \
*/ \
DO_ZAM_PROFILE \
} \
}
#define ZAM_PROFILE_POST_CALL \
if ( do_profile ) { \
caller_locs.pop_back(); \
if ( ! z.aux->is_BiF_call ) { /* We already did the profiling, move on to next instruction */ \
++pc; \
continue; \
} \
}
#else
#define DO_ZAM_PROFILE
#define ZAM_PROFILE_PRE_CALL
#define ZAM_PROFILE_POST_CALL
static double CPU_prof_overhead = 0.0;
static double mem_prof_overhead = 0.0;
#endif
using std::vector;
// Thrown when a call inside a "when" delays.
@ -41,9 +111,22 @@ int ZOP_count[OP_NOP + 1];
double ZOP_CPU[OP_NOP + 1];
void report_ZOP_profile() {
static bool did_overhead_report = false;
if ( ! did_overhead_report ) {
fprintf(analysis_options.profile_file, "Profile sampled every %d instructions; all calls profiled\n",
analysis_options.profile_sampling_rate);
fprintf(analysis_options.profile_file,
"Profiling overhead = %.0f nsec/instruction, memory profiling overhead = %.0f nsec/call\n",
CPU_prof_overhead * 1e9, mem_prof_overhead * 1e9);
did_overhead_report = true;
}
for ( int i = 1; i <= OP_NOP; ++i )
if ( ZOP_count[i] > 0 )
printf("%s\t%d\t%.06f\n", ZOP_name(ZOp(i)), ZOP_count[i], ZOP_CPU[i]);
if ( ZOP_count[i] > 0 ) {
auto CPU = std::max(ZOP_CPU[i] - ZOP_count[i] * CPU_prof_overhead, 0.0);
fprintf(analysis_options.profile_file, "%s\t%d\t%.06f\n", ZOP_name(ZOp(i)), ZOP_count[i], CPU);
}
}
// Sets the given element to a copy of an existing (not newly constructed)
@ -120,8 +203,8 @@ VEC_COERCE(IU, TYPE_INT, zeek_int_t, AsCount(), count_to_int_would_overflow, "un
VEC_COERCE(UD, TYPE_COUNT, zeek_uint_t, AsDouble(), double_to_count_would_overflow, "double to unsigned")
VEC_COERCE(UI, TYPE_COUNT, zeek_int_t, AsInt(), int_to_count_would_overflow, "signed to unsigned")
ZBody::ZBody(const char* _func_name, const ZAMCompiler* zc) : Stmt(STMT_ZAM) {
func_name = _func_name;
ZBody::ZBody(std::string _func_name, const ZAMCompiler* zc) : Stmt(STMT_ZAM) {
func_name = std::move(_func_name);
frame_denizens = zc->FrameDenizens();
frame_size = frame_denizens.size();
@ -157,12 +240,9 @@ ZBody::ZBody(const char* _func_name, const ZAMCompiler* zc) : Stmt(STMT_ZAM) {
if ( ! did_init ) {
auto log_ID_type = lookup_ID("ID", "Log");
ASSERT(log_ID_type);
log_ID_enum_type = log_ID_type->GetType<EnumType>();
any_base_type = base_type(TYPE_ANY);
ZAM::log_ID_enum_type = log_ID_type->GetType<EnumType>();
ZAM::any_base_type = base_type(TYPE_ANY);
ZVal::SetZValNilStatusAddr(&ZAM_error);
did_init = false;
}
}
@ -170,9 +250,6 @@ ZBody::ZBody(const char* _func_name, const ZAMCompiler* zc) : Stmt(STMT_ZAM) {
ZBody::~ZBody() {
delete[] fixed_frame;
delete[] insts;
delete inst_count;
delete inst_CPU;
delete CPU_time;
}
void ZBody::SetInsts(vector<ZInst*>& _insts) {
@ -194,12 +271,6 @@ void ZBody::SetInsts(vector<ZInstI*>& instsI) {
for ( auto i = 0U; i < end_pc; ++i ) {
auto& iI = *instsI[i];
insts_copy[i] = iI;
if ( iI.stmt ) {
auto l = iI.stmt->GetLocationInfo();
if ( l != &no_location )
insts_copy[i].loc = std::make_shared<Location>(l->filename, l->first_line, l->last_line,
l->first_column, l->last_column);
}
}
insts = insts_copy;
@ -209,34 +280,22 @@ void ZBody::SetInsts(vector<ZInstI*>& instsI) {
void ZBody::InitProfile() {
if ( analysis_options.profile_ZAM ) {
inst_count = new vector<int>;
inst_CPU = new vector<double>;
for ( auto i = 0U; i < end_pc; ++i ) {
inst_count->push_back(0);
inst_CPU->push_back(0.0);
}
CPU_time = new double;
*CPU_time = 0.0;
default_prof_vec = BuildProfVec();
curr_prof_vec = default_prof_vec.get();
}
}
ValPtr ZBody::Exec(Frame* f, StmtFlowType& flow) {
#ifdef DEBUG
double t = analysis_options.profile_ZAM ? util::curr_CPU_time() : 0.0;
#endif
std::shared_ptr<ProfVec> ZBody::BuildProfVec() const {
auto pv = std::make_shared<ProfVec>();
pv->resize(end_pc);
auto val = DoExec(f, flow);
for ( auto i = 0U; i < end_pc; ++i )
(*pv)[i] = {0, 0.0};
#ifdef DEBUG
if ( analysis_options.profile_ZAM )
*CPU_time += util::curr_CPU_time() - t;
#endif
return val;
return pv;
}
ValPtr ZBody::DoExec(Frame* f, StmtFlowType& flow) {
ValPtr ZBody::Exec(Frame* f, StmtFlowType& flow) {
unsigned int pc = 0;
// Return value, or nil if none.
@ -245,8 +304,27 @@ ValPtr ZBody::DoExec(Frame* f, StmtFlowType& flow) {
// Type of the return value. If nil, then we don't have a value.
TypePtr ret_type;
#ifdef DEBUG
bool do_profile = analysis_options.profile_ZAM;
#ifdef ENABLE_ZAM_PROFILE
static bool profiling_active = analysis_options.profile_ZAM;
static int sampling_rate = analysis_options.profile_sampling_rate;
double start_CPU_time = 0.0;
uint64_t start_mem = 0;
if ( profiling_active ) {
++ncall;
start_CPU_time = util::curr_CPU_time();
util::get_memory_usage(&start_mem, nullptr);
if ( caller_locs.empty() )
curr_prof_vec = default_prof_vec.get();
else {
auto pv = prof_vecs.find(caller_locs);
if ( pv == prof_vecs.end() )
pv = prof_vecs.insert({caller_locs, BuildProfVec()}).first;
curr_prof_vec = pv->second.get();
}
}
#endif
ZVal* frame;
@ -276,16 +354,23 @@ ValPtr ZBody::DoExec(Frame* f, StmtFlowType& flow) {
while ( pc < end_pc && ! ZAM_error ) {
auto& z = insts[pc];
#ifdef DEBUG
#ifdef ENABLE_ZAM_PROFILE
bool do_profile = false;
int profile_pc = 0;
double profile_CPU = 0.0;
if ( do_profile ) {
++ZOP_count[z.op];
++(*inst_count)[pc];
if ( profiling_active ) {
static auto seed = util::detail::random_number();
seed = util::detail::prng(seed);
do_profile = seed % sampling_rate == 0;
profile_pc = pc;
profile_CPU = util::curr_CPU_time();
if ( do_profile ) {
++ZOP_count[z.op];
++ninst;
profile_pc = pc;
profile_CPU = util::curr_CPU_time();
}
}
#endif
@ -302,13 +387,7 @@ ValPtr ZBody::DoExec(Frame* f, StmtFlowType& flow) {
default: reporter->InternalError("bad ZAM opcode");
}
#ifdef DEBUG
if ( do_profile ) {
double dt = util::curr_CPU_time() - profile_CPU;
inst_CPU->at(profile_pc) += dt;
ZOP_CPU[z.op] += dt;
}
#endif
DO_ZAM_PROFILE
++pc;
}
@ -340,30 +419,86 @@ ValPtr ZBody::DoExec(Frame* f, StmtFlowType& flow) {
delete[] frame;
}
#ifdef ENABLE_ZAM_PROFILE
if ( profiling_active ) {
tot_CPU_time += util::curr_CPU_time() - start_CPU_time;
uint64_t final_mem;
util::get_memory_usage(&final_mem, nullptr);
if ( final_mem > start_mem )
tot_mem += final_mem - start_mem;
}
#endif
return result;
}
void ZBody::ProfileExecution() const {
if ( inst_count->empty() ) {
printf("%s has an empty body\n", func_name);
void ZBody::ReportExecutionProfile(ProfMap& pm) {
static bool did_overhead_report = false;
if ( end_pc == 0 ) {
fprintf(analysis_options.profile_file, "%s has an empty body\n", func_name.c_str());
return;
}
if ( (*inst_count)[0] == 0 ) {
printf("%s did not execute\n", func_name);
auto& dpv = *default_prof_vec;
if ( dpv[0].num_samples == 0 && prof_vecs.empty() ) {
fprintf(analysis_options.profile_file, "%s did not execute\n", func_name.c_str());
return;
}
printf("%s CPU time: %.06f\n", func_name, *CPU_time);
int total_samples = ncall + ninst;
double adj_CPU_time = tot_CPU_time;
adj_CPU_time -= ncall * (mem_prof_overhead + CPU_prof_overhead);
adj_CPU_time -= ninst * CPU_prof_overhead;
adj_CPU_time = std::max(adj_CPU_time, 0.0);
for ( auto i = 0U; i < inst_count->size(); ++i ) {
printf("%s %d %d %.06f ", func_name, i, (*inst_count)[i], (*inst_CPU)[i]);
insts[i].Dump(i, &frame_denizens);
fprintf(analysis_options.profile_file, "%s CPU time %.06f, %" PRIu64 " memory, %d calls, %d sampled instructions\n",
func_name.c_str(), adj_CPU_time, tot_mem, ncall, ninst);
if ( dpv[0].num_samples != 0 )
ReportProfile(pm, dpv, "", {});
for ( auto& pv : prof_vecs ) {
std::string prefix;
std::set<std::string> modules;
for ( auto& caller : pv.first ) {
prefix += caller->Describe(true) + ";";
auto& m = caller->GetModules();
modules.insert(m.begin(), m.end());
}
ReportProfile(pm, *pv.second, prefix, std::move(modules));
}
}
void ZBody::ReportProfile(ProfMap& pm, const ProfVec& pv, const std::string& prefix,
std::set<std::string> caller_modules) const {
for ( auto i = 0U; i < pv.size(); ++i ) {
auto ninst = pv[i].num_samples;
auto CPU = pv[i].CPU_time;
CPU = std::max(CPU - ninst * CPU_prof_overhead, 0.0);
fprintf(analysis_options.profile_file, "%s %d %" PRId64 " %.06f ", func_name.c_str(), i, ninst, CPU);
insts[i].Dump(analysis_options.profile_file, i, &frame_denizens, prefix);
auto modules = caller_modules;
auto& m = insts[i].loc->GetModules();
modules.insert(m.begin(), m.end());
for ( auto& m : modules ) {
auto mod_prof = pm.find(m);
if ( mod_prof == pm.end() )
pm[m] = {ninst, CPU};
else {
mod_prof->second.num_samples += ninst;
mod_prof->second.CPU_time += CPU;
}
}
}
}
bool ZBody::CheckAnyType(const TypePtr& any_type, const TypePtr& expected_type,
const std::shared_ptr<Location>& loc) const {
const std::shared_ptr<ZAMLocInfo>& loc) const {
if ( IsAny(expected_type) )
return true;
@ -382,7 +517,7 @@ bool ZBody::CheckAnyType(const TypePtr& any_type, const TypePtr& expected_type,
char buf[8192];
snprintf(buf, sizeof buf, "run-time type clash (%s/%s)", type_name(at), type_name(et));
reporter->RuntimeError(loc.get(), "%s", buf);
reporter->RuntimeError(loc->Loc(), "%s", buf);
return false;
}
@ -411,13 +546,13 @@ void ZBody::Dump() const {
for ( unsigned i = 0; i < end_pc; ++i ) {
auto& inst = insts[i];
printf("%d: ", i);
inst.Dump(i, &frame_denizens);
inst.Dump(stdout, i, &frame_denizens, "");
}
}
void ZBody::StmtDescribe(ODesc* d) const {
d->AddSP("ZAM-code");
d->AddSP(func_name);
d->Add(func_name.c_str());
}
TraversalCode ZBody::Traverse(TraversalCallback* cb) const {