diff --git a/src/script_opt/ProfileFunc.cc b/src/script_opt/ProfileFunc.cc index d478446ded..8c63977b56 100644 --- a/src/script_opt/ProfileFunc.cc +++ b/src/script_opt/ProfileFunc.cc @@ -1475,7 +1475,7 @@ void SetBlockLineNumbers::UpdateLocInfo(Location* loc) { } } -BlockAnalyzer::BlockAnalyzer(std::vector& funcs) { +ASTBlockAnalyzer::ASTBlockAnalyzer(std::vector& funcs) { for ( auto& f : funcs ) { if ( ! f.ShouldAnalyze() ) continue; @@ -1506,7 +1506,7 @@ static bool is_compound_stmt(const Stmt* s) { return compound_stmts.count(s->Tag()) > 0; } -TraversalCode BlockAnalyzer::PreStmt(const Stmt* s) { +TraversalCode ASTBlockAnalyzer::PreStmt(const Stmt* s) { auto loc = s->GetLocationInfo(); auto ls = BuildExpandedDescription(loc); @@ -1516,19 +1516,19 @@ TraversalCode BlockAnalyzer::PreStmt(const Stmt* s) { return TC_CONTINUE; } -TraversalCode BlockAnalyzer::PostStmt(const Stmt* s) { +TraversalCode ASTBlockAnalyzer::PostStmt(const Stmt* s) { if ( is_compound_stmt(s) ) parents.pop_back(); return TC_CONTINUE; } -TraversalCode BlockAnalyzer::PreExpr(const Expr* e) { +TraversalCode ASTBlockAnalyzer::PreExpr(const Expr* e) { (void)BuildExpandedDescription(e->GetLocationInfo()); return TC_CONTINUE; } -std::string BlockAnalyzer::BuildExpandedDescription(const Location* loc) { +std::string ASTBlockAnalyzer::BuildExpandedDescription(const Location* loc) { ASSERT(loc && loc->first_line != 0); auto ls = LocWithFunc(loc); @@ -1547,6 +1547,6 @@ std::string BlockAnalyzer::BuildExpandedDescription(const Location* loc) { return ls; } -std::unique_ptr blocks; +std::unique_ptr AST_blocks; } // namespace zeek::detail diff --git a/src/script_opt/ProfileFunc.h b/src/script_opt/ProfileFunc.h index 905e21411e..e75688adb6 100644 --- a/src/script_opt/ProfileFunc.h +++ b/src/script_opt/ProfileFunc.h @@ -635,9 +635,9 @@ private: // Goes through all of the functions to associate full location information // with each AST node. -class BlockAnalyzer : public TraversalCallback { +class ASTBlockAnalyzer : public TraversalCallback { public: - BlockAnalyzer(std::vector& funcs); + ASTBlockAnalyzer(std::vector& funcs); TraversalCode PreStmt(const Stmt*) override; TraversalCode PostStmt(const Stmt*) override; @@ -699,7 +699,7 @@ private: // If we're profiling, this provides the analysis of how low-level location // information relates to higher-level statement blocks. -extern std::unique_ptr blocks; +extern std::unique_ptr AST_blocks; // Returns the full name of a function at a given location, including its // associated module (even for event handlers that don't actually have diff --git a/src/script_opt/ScriptOpt.cc b/src/script_opt/ScriptOpt.cc index 1dcf500d54..57dbb8b1bd 100644 --- a/src/script_opt/ScriptOpt.cc +++ b/src/script_opt/ScriptOpt.cc @@ -456,7 +456,7 @@ static void analyze_scripts_for_ZAM() { if ( analysis_options.profile_ZAM ) { #ifdef ENABLE_ZAM_PROFILE - blocks = std::make_unique(funcs); + AST_blocks = std::make_unique(funcs); const auto prof_filename = "zprof.out"; analysis_options.profile_file = fopen(prof_filename, "w"); if ( ! analysis_options.profile_file ) @@ -634,14 +634,14 @@ void profile_script_execution() { for ( auto& f : funcs ) { if ( f.Body()->Tag() == STMT_ZAM ) { auto zb = cast_intrusive(f.Body()); - zb->ProfileExecution(module_prof); + zb->ReportExecutionProfile(module_prof); } } for ( auto& mp : module_prof ) - if ( mp.second.first > 0 ) + if ( mp.second.num_samples > 0 ) fprintf(analysis_options.profile_file, "module %s sampled CPU time %.06f, %d sampled instructions\n", - mp.first.c_str(), mp.second.second, static_cast(mp.second.first)); + mp.first.c_str(), mp.second.CPU_time, static_cast(mp.second.num_samples)); } } diff --git a/src/script_opt/ZAM/Driver.cc b/src/script_opt/ZAM/Driver.cc index ef3bac2a9d..b37e033d43 100644 --- a/src/script_opt/ZAM/Driver.cc +++ b/src/script_opt/ZAM/Driver.cc @@ -28,8 +28,8 @@ ZAMCompiler::ZAMCompiler(ScriptFuncPtr f, std::shared_ptr _pfs, st ASSERT(loc->first_line != 0 || body->Tag() == STMT_NULL); auto loc_copy = std::make_shared(loc->filename, loc->first_line, loc->last_line, loc->first_column, loc->last_column); - curr_func = func->Name(); - curr_loc = std::make_shared(curr_func, std::move(loc_copy), nullptr); + ZAM::curr_func = func->Name(); + ZAM::curr_loc = std::make_shared(ZAM::curr_func, std::move(loc_copy), nullptr); Init(); } diff --git a/src/script_opt/ZAM/Ops.in b/src/script_opt/ZAM/Ops.in index af66ad92ba..54ac2c92d3 100644 --- a/src/script_opt/ZAM/Ops.in +++ b/src/script_opt/ZAM/Ops.in @@ -1389,7 +1389,7 @@ eval if ( frame[z.v2].vector_val->Size() > 0 ) unary-expr-op To-Any-Coerce op-type X set-type $1 -eval AssignV1(ZVal(frame[z.v2].ToVal(z.t), any_base_type)) +eval AssignV1(ZVal(frame[z.v2].ToVal(z.t), ZAM::any_base_type)) unary-expr-op From-Any-Coerce op-type X @@ -2376,7 +2376,7 @@ macro LogWriteNoResPost() internal-op Log-Write side-effects OP_LOG_WRITE_VV OP_VV type VVV -eval LogWritePre(frame[z.v2].ToVal(log_ID_enum_type), v3) +eval LogWritePre(frame[z.v2].ToVal(ZAM::log_ID_enum_type), v3) LogWriteResPost() internal-op Log-WriteC @@ -2390,7 +2390,7 @@ internal-op Log-Write side-effects op1-read type VV -eval LogWritePre(frame[z.v1].ToVal(log_ID_enum_type), v2) +eval LogWritePre(frame[z.v1].ToVal(ZAM::log_ID_enum_type), v2) LogWriteNoResPost() internal-op Log-WriteC diff --git a/src/script_opt/ZAM/Profile.cc b/src/script_opt/ZAM/Profile.cc index 074e78a518..65b8f608b2 100644 --- a/src/script_opt/ZAM/Profile.cc +++ b/src/script_opt/ZAM/Profile.cc @@ -19,15 +19,17 @@ ZAMLocInfo::ZAMLocInfo(std::string _func_name, std::shared_ptr _loc, s if ( main_module != std::string::npos ) modules.insert(func_name.substr(0, main_module)); - if ( parent ) - parent->AddInModules(modules); + if ( parent ) { + auto& m = parent->GetModules(); + modules.insert(m.begin(), m.end()); + } } std::string ZAMLocInfo::Describe(bool include_lines) const { std::string desc; - if ( blocks ) { - desc = blocks->GetDesc(loc.get()); + if ( AST_blocks ) { + desc = AST_blocks->GetDesc(loc.get()); if ( parent ) desc = parent->Describe(false) + ";" + desc; } diff --git a/src/script_opt/ZAM/Profile.h b/src/script_opt/ZAM/Profile.h index 031ade736a..fab3c3a278 100644 --- a/src/script_opt/ZAM/Profile.h +++ b/src/script_opt/ZAM/Profile.h @@ -22,9 +22,7 @@ public: const Location* Loc() const { return loc.get(); } std::shared_ptr LocPtr() const { return loc; } std::shared_ptr Parent() { return parent; } - - // Add this location's modules to the target set. - void AddInModules(std::set& target) const { target.insert(modules.begin(), modules.end()); } + const auto& GetModules() const { return modules; } // If include_lines is true, then in the description we include line // number information, otherwise we omit them. diff --git a/src/script_opt/ZAM/Stmt.cc b/src/script_opt/ZAM/Stmt.cc index fad27f1c93..4ee998d87d 100644 --- a/src/script_opt/ZAM/Stmt.cc +++ b/src/script_opt/ZAM/Stmt.cc @@ -14,9 +14,9 @@ const ZAMStmt ZAMCompiler::CompileStmt(const Stmt* s) { ASSERT(loc->first_line != 0 || s->Tag() == STMT_NULL); auto loc_copy = std::make_shared(loc->filename, loc->first_line, loc->last_line, loc->first_column, loc->last_column); - ASSERT(! blocks || s->Tag() == STMT_NULL || blocks->HaveExpDesc(loc_copy.get())); - auto loc_parent = curr_loc->Parent(); - curr_loc = std::make_shared(curr_func, std::move(loc_copy), curr_loc->Parent()); + ASSERT(! AST_blocks || s->Tag() == STMT_NULL || AST_blocks->HaveExpDesc(loc_copy.get())); + auto loc_parent = ZAM::curr_loc->Parent(); + ZAM::curr_loc = std::make_shared(ZAM::curr_func, std::move(loc_copy), ZAM::curr_loc->Parent()); switch ( s->Tag() ) { case STMT_PRINT: return CompilePrint(static_cast(s)); @@ -907,15 +907,15 @@ const ZAMStmt ZAMCompiler::CompileReturn(const ReturnStmt* r) { const ZAMStmt ZAMCompiler::CompileCatchReturn(const CatchReturnStmt* cr) { retvars.push_back(cr->RetVar()); - auto hold_func = curr_func; - auto hold_loc = curr_loc; + auto hold_func = ZAM::curr_func; + auto hold_loc = ZAM::curr_loc; - curr_func = cr->Func()->Name(); + ZAM::curr_func = cr->Func()->Name(); - bool is_event_inline = (hold_func == curr_func); + bool is_event_inline = (hold_func == ZAM::curr_func); if ( ! is_event_inline ) - curr_loc = std::make_shared(curr_func, curr_loc->LocPtr(), hold_loc); + ZAM::curr_loc = std::make_shared(ZAM::curr_func, ZAM::curr_loc->LocPtr(), hold_loc); PushCatchReturns(); @@ -929,8 +929,8 @@ const ZAMStmt ZAMCompiler::CompileCatchReturn(const CatchReturnStmt* cr) { // Strictly speaking, we could do this even if is_event_inline // is true, because the values won't have changed. However, that // just looks weird, so we condition this to match the above. - curr_func = hold_func; - curr_loc = hold_loc; + ZAM::curr_func = hold_func; + ZAM::curr_loc = hold_loc; } return block_end; diff --git a/src/script_opt/ZAM/Support.cc b/src/script_opt/ZAM/Support.cc index 71a518bec1..7e0d9172be 100644 --- a/src/script_opt/ZAM/Support.cc +++ b/src/script_opt/ZAM/Support.cc @@ -12,10 +12,13 @@ namespace zeek::detail { +namespace ZAM { std::string curr_func; std::shared_ptr curr_loc; TypePtr log_ID_enum_type; TypePtr any_base_type; +} // namespace ZAM + bool ZAM_error = false; bool is_ZAM_compilable(const ProfileFunc* pf, const char** reason) { diff --git a/src/script_opt/ZAM/Support.h b/src/script_opt/ZAM/Support.h index 52d9235b85..6e01b7c611 100644 --- a/src/script_opt/ZAM/Support.h +++ b/src/script_opt/ZAM/Support.h @@ -12,6 +12,8 @@ namespace zeek::detail { using ValVec = std::vector; +namespace ZAM { + // The name of the current function being compiled. For inlined functions, // this is the name of the inlinee, not the inliner. extern std::string curr_func; @@ -19,6 +21,15 @@ extern std::string curr_func; // The location corresponding to the current statement being compiled. extern std::shared_ptr curr_loc; +// Needed for the logging built-in. Exported so that ZAM can make sure it's +// defined when compiling. +extern TypePtr log_ID_enum_type; + +// Needed for a slight performance gain when dealing with "any" types. +extern TypePtr any_base_type; + +} // namespace ZAM + // True if a function with the given profile can be compiled to ZAM. // If not, returns the reason in *reason, if non-nil. class ProfileFunc; @@ -31,13 +42,6 @@ extern bool IsAny(const Type* t); inline bool IsAny(const TypePtr& t) { return IsAny(t.get()); } inline bool IsAny(const Expr* e) { return IsAny(e->GetType()); } -// Needed for the logging built-in. Exported so that ZAM can make sure it's -// defined when compiling. -extern TypePtr log_ID_enum_type; - -// Needed for a slight performance gain when dealing with "any" types. -extern TypePtr any_base_type; - extern void ZAM_run_time_error(const char* msg); extern void ZAM_run_time_error(std::shared_ptr loc, const char* msg); extern void ZAM_run_time_error(std::shared_ptr loc, const char* msg, const Obj* o); diff --git a/src/script_opt/ZAM/ZBody.cc b/src/script_opt/ZAM/ZBody.cc index b2f20fa225..123fa4088c 100644 --- a/src/script_opt/ZAM/ZBody.cc +++ b/src/script_opt/ZAM/ZBody.cc @@ -1,5 +1,7 @@ // See the file "COPYING" in the main distribution directory for copyright. +#include "zeek/script_opt/ZAM/ZBody.h" + #include "zeek/Desc.h" #include "zeek/EventHandler.h" #include "zeek/Frame.h" @@ -28,6 +30,74 @@ namespace zeek::detail { +#ifdef ENABLE_ZAM_PROFILE + +static std::vector caller_locs; + +static double compute_CPU_prof_overhead() { + double start = util::curr_CPU_time(); + double CPU = 0.0; + const int n = 100000; + for ( int i = 0; i < n; ++i ) + CPU = std::max(CPU, util::curr_CPU_time()); + + return 2.0 * (CPU - start) / n; +} + +static double compute_mem_prof_overhead() { + double start = util::curr_CPU_time(); + uint64_t m; + util::get_memory_usage(&m, nullptr); + const int n = 20000; + for ( int i = 0; i < n; ++i ) { + uint64_t m2; + util::get_memory_usage(&m2, nullptr); + } + + return 2.0 * (util::curr_CPU_time() - start) / n; +} + +static double CPU_prof_overhead = compute_CPU_prof_overhead(); +static double mem_prof_overhead = compute_mem_prof_overhead(); + +#define DO_ZAM_PROFILE \ + if ( do_profile ) { \ + double dt = util::curr_CPU_time() - profile_CPU; \ + auto& prof_info = (*curr_prof_vec)[profile_pc]; \ + ++prof_info.num_samples; \ + prof_info.CPU_time += dt; \ + ZOP_CPU[z.op] += dt; \ + } + +// These next two macros appear in code generated by gen-zam. +#define ZAM_PROFILE_PRE_CALL \ + if ( do_profile ) { \ + caller_locs.push_back(z.loc.get()); \ + if ( ! z.aux->is_BiF_call ) { /* For non-BiFs we don't include the callee's execution time as part of our own \ + */ \ + DO_ZAM_PROFILE \ + } \ + } + +#define ZAM_PROFILE_POST_CALL \ + if ( do_profile ) { \ + caller_locs.pop_back(); \ + if ( ! z.aux->is_BiF_call ) { /* We already did the profiling, move on to next instruction */ \ + ++pc; \ + continue; \ + } \ + } + +#else + +#define DO_ZAM_PROFILE +#define ZAM_PROFILE_PRE_CALL +#define ZAM_PROFILE_POST_CALL +static double CPU_prof_overhead = 0.0; +static double mem_prof_overhead = 0.0; + +#endif + using std::vector; // Thrown when a call inside a "when" delays. @@ -41,9 +111,22 @@ int ZOP_count[OP_NOP + 1]; double ZOP_CPU[OP_NOP + 1]; void report_ZOP_profile() { + static bool did_overhead_report = false; + + if ( ! did_overhead_report ) { + fprintf(analysis_options.profile_file, "Profile sampled every %d instructions; all calls profiled\n", + analysis_options.profile_sampling_rate); + fprintf(analysis_options.profile_file, + "Profiling overhead = %.0f nsec/instruction, memory profiling overhead = %.0f nsec/call\n", + CPU_prof_overhead * 1e9, mem_prof_overhead * 1e9); + did_overhead_report = true; + } + for ( int i = 1; i <= OP_NOP; ++i ) - if ( ZOP_count[i] > 0 ) - printf("%s\t%d\t%.06f\n", ZOP_name(ZOp(i)), ZOP_count[i], ZOP_CPU[i]); + if ( ZOP_count[i] > 0 ) { + auto CPU = std::max(ZOP_CPU[i] - ZOP_count[i] * CPU_prof_overhead, 0.0); + fprintf(analysis_options.profile_file, "%s\t%d\t%.06f\n", ZOP_name(ZOp(i)), ZOP_count[i], CPU); + } } // Sets the given element to a copy of an existing (not newly constructed) @@ -120,8 +203,8 @@ VEC_COERCE(IU, TYPE_INT, zeek_int_t, AsCount(), count_to_int_would_overflow, "un VEC_COERCE(UD, TYPE_COUNT, zeek_uint_t, AsDouble(), double_to_count_would_overflow, "double to unsigned") VEC_COERCE(UI, TYPE_COUNT, zeek_int_t, AsInt(), int_to_count_would_overflow, "signed to unsigned") -ZBody::ZBody(const char* _func_name, const ZAMCompiler* zc) : Stmt(STMT_ZAM) { - func_name = _func_name; +ZBody::ZBody(std::string _func_name, const ZAMCompiler* zc) : Stmt(STMT_ZAM) { + func_name = std::move(_func_name); frame_denizens = zc->FrameDenizens(); frame_size = frame_denizens.size(); @@ -157,12 +240,9 @@ ZBody::ZBody(const char* _func_name, const ZAMCompiler* zc) : Stmt(STMT_ZAM) { if ( ! did_init ) { auto log_ID_type = lookup_ID("ID", "Log"); ASSERT(log_ID_type); - log_ID_enum_type = log_ID_type->GetType(); - - any_base_type = base_type(TYPE_ANY); - + ZAM::log_ID_enum_type = log_ID_type->GetType(); + ZAM::any_base_type = base_type(TYPE_ANY); ZVal::SetZValNilStatusAddr(&ZAM_error); - did_init = false; } } @@ -170,9 +250,6 @@ ZBody::ZBody(const char* _func_name, const ZAMCompiler* zc) : Stmt(STMT_ZAM) { ZBody::~ZBody() { delete[] fixed_frame; delete[] insts; - delete inst_count; - delete inst_CPU; - delete CPU_time; } void ZBody::SetInsts(vector& _insts) { @@ -194,12 +271,6 @@ void ZBody::SetInsts(vector& instsI) { for ( auto i = 0U; i < end_pc; ++i ) { auto& iI = *instsI[i]; insts_copy[i] = iI; - if ( iI.stmt ) { - auto l = iI.stmt->GetLocationInfo(); - if ( l != &no_location ) - insts_copy[i].loc = std::make_shared(l->filename, l->first_line, l->last_line, - l->first_column, l->last_column); - } } insts = insts_copy; @@ -209,34 +280,22 @@ void ZBody::SetInsts(vector& instsI) { void ZBody::InitProfile() { if ( analysis_options.profile_ZAM ) { - inst_count = new vector; - inst_CPU = new vector; - for ( auto i = 0U; i < end_pc; ++i ) { - inst_count->push_back(0); - inst_CPU->push_back(0.0); - } - - CPU_time = new double; - *CPU_time = 0.0; + default_prof_vec = BuildProfVec(); + curr_prof_vec = default_prof_vec.get(); } } -ValPtr ZBody::Exec(Frame* f, StmtFlowType& flow) { -#ifdef DEBUG - double t = analysis_options.profile_ZAM ? util::curr_CPU_time() : 0.0; -#endif +std::shared_ptr ZBody::BuildProfVec() const { + auto pv = std::make_shared(); + pv->resize(end_pc); - auto val = DoExec(f, flow); + for ( auto i = 0U; i < end_pc; ++i ) + (*pv)[i] = {0, 0.0}; -#ifdef DEBUG - if ( analysis_options.profile_ZAM ) - *CPU_time += util::curr_CPU_time() - t; -#endif - - return val; + return pv; } -ValPtr ZBody::DoExec(Frame* f, StmtFlowType& flow) { +ValPtr ZBody::Exec(Frame* f, StmtFlowType& flow) { unsigned int pc = 0; // Return value, or nil if none. @@ -245,8 +304,27 @@ ValPtr ZBody::DoExec(Frame* f, StmtFlowType& flow) { // Type of the return value. If nil, then we don't have a value. TypePtr ret_type; -#ifdef DEBUG - bool do_profile = analysis_options.profile_ZAM; +#ifdef ENABLE_ZAM_PROFILE + static bool profiling_active = analysis_options.profile_ZAM; + static int sampling_rate = analysis_options.profile_sampling_rate; + + double start_CPU_time = 0.0; + uint64_t start_mem = 0; + + if ( profiling_active ) { + ++ncall; + start_CPU_time = util::curr_CPU_time(); + util::get_memory_usage(&start_mem, nullptr); + + if ( caller_locs.empty() ) + curr_prof_vec = default_prof_vec.get(); + else { + auto pv = prof_vecs.find(caller_locs); + if ( pv == prof_vecs.end() ) + pv = prof_vecs.insert({caller_locs, BuildProfVec()}).first; + curr_prof_vec = pv->second.get(); + } + } #endif ZVal* frame; @@ -276,16 +354,23 @@ ValPtr ZBody::DoExec(Frame* f, StmtFlowType& flow) { while ( pc < end_pc && ! ZAM_error ) { auto& z = insts[pc]; -#ifdef DEBUG +#ifdef ENABLE_ZAM_PROFILE + bool do_profile = false; int profile_pc = 0; double profile_CPU = 0.0; - if ( do_profile ) { - ++ZOP_count[z.op]; - ++(*inst_count)[pc]; + if ( profiling_active ) { + static auto seed = util::detail::random_number(); + seed = util::detail::prng(seed); + do_profile = seed % sampling_rate == 0; - profile_pc = pc; - profile_CPU = util::curr_CPU_time(); + if ( do_profile ) { + ++ZOP_count[z.op]; + ++ninst; + + profile_pc = pc; + profile_CPU = util::curr_CPU_time(); + } } #endif @@ -302,13 +387,7 @@ ValPtr ZBody::DoExec(Frame* f, StmtFlowType& flow) { default: reporter->InternalError("bad ZAM opcode"); } -#ifdef DEBUG - if ( do_profile ) { - double dt = util::curr_CPU_time() - profile_CPU; - inst_CPU->at(profile_pc) += dt; - ZOP_CPU[z.op] += dt; - } -#endif + DO_ZAM_PROFILE ++pc; } @@ -340,30 +419,86 @@ ValPtr ZBody::DoExec(Frame* f, StmtFlowType& flow) { delete[] frame; } +#ifdef ENABLE_ZAM_PROFILE + if ( profiling_active ) { + tot_CPU_time += util::curr_CPU_time() - start_CPU_time; + uint64_t final_mem; + util::get_memory_usage(&final_mem, nullptr); + if ( final_mem > start_mem ) + tot_mem += final_mem - start_mem; + } +#endif + return result; } -void ZBody::ProfileExecution() const { - if ( inst_count->empty() ) { - printf("%s has an empty body\n", func_name); +void ZBody::ReportExecutionProfile(ProfMap& pm) { + static bool did_overhead_report = false; + + if ( end_pc == 0 ) { + fprintf(analysis_options.profile_file, "%s has an empty body\n", func_name.c_str()); return; } - if ( (*inst_count)[0] == 0 ) { - printf("%s did not execute\n", func_name); + auto& dpv = *default_prof_vec; + + if ( dpv[0].num_samples == 0 && prof_vecs.empty() ) { + fprintf(analysis_options.profile_file, "%s did not execute\n", func_name.c_str()); return; } - printf("%s CPU time: %.06f\n", func_name, *CPU_time); + int total_samples = ncall + ninst; + double adj_CPU_time = tot_CPU_time; + adj_CPU_time -= ncall * (mem_prof_overhead + CPU_prof_overhead); + adj_CPU_time -= ninst * CPU_prof_overhead; + adj_CPU_time = std::max(adj_CPU_time, 0.0); - for ( auto i = 0U; i < inst_count->size(); ++i ) { - printf("%s %d %d %.06f ", func_name, i, (*inst_count)[i], (*inst_CPU)[i]); - insts[i].Dump(i, &frame_denizens); + fprintf(analysis_options.profile_file, "%s CPU time %.06f, %" PRIu64 " memory, %d calls, %d sampled instructions\n", + func_name.c_str(), adj_CPU_time, tot_mem, ncall, ninst); + + if ( dpv[0].num_samples != 0 ) + ReportProfile(pm, dpv, "", {}); + + for ( auto& pv : prof_vecs ) { + std::string prefix; + std::set modules; + for ( auto& caller : pv.first ) { + prefix += caller->Describe(true) + ";"; + auto& m = caller->GetModules(); + modules.insert(m.begin(), m.end()); + } + + ReportProfile(pm, *pv.second, prefix, std::move(modules)); + } +} + +void ZBody::ReportProfile(ProfMap& pm, const ProfVec& pv, const std::string& prefix, + std::set caller_modules) const { + for ( auto i = 0U; i < pv.size(); ++i ) { + auto ninst = pv[i].num_samples; + auto CPU = pv[i].CPU_time; + CPU = std::max(CPU - ninst * CPU_prof_overhead, 0.0); + fprintf(analysis_options.profile_file, "%s %d %" PRId64 " %.06f ", func_name.c_str(), i, ninst, CPU); + insts[i].Dump(analysis_options.profile_file, i, &frame_denizens, prefix); + + auto modules = caller_modules; + auto& m = insts[i].loc->GetModules(); + modules.insert(m.begin(), m.end()); + + for ( auto& m : modules ) { + auto mod_prof = pm.find(m); + if ( mod_prof == pm.end() ) + pm[m] = {ninst, CPU}; + else { + mod_prof->second.num_samples += ninst; + mod_prof->second.CPU_time += CPU; + } + } } } bool ZBody::CheckAnyType(const TypePtr& any_type, const TypePtr& expected_type, - const std::shared_ptr& loc) const { + const std::shared_ptr& loc) const { if ( IsAny(expected_type) ) return true; @@ -382,7 +517,7 @@ bool ZBody::CheckAnyType(const TypePtr& any_type, const TypePtr& expected_type, char buf[8192]; snprintf(buf, sizeof buf, "run-time type clash (%s/%s)", type_name(at), type_name(et)); - reporter->RuntimeError(loc.get(), "%s", buf); + reporter->RuntimeError(loc->Loc(), "%s", buf); return false; } @@ -411,13 +546,13 @@ void ZBody::Dump() const { for ( unsigned i = 0; i < end_pc; ++i ) { auto& inst = insts[i]; printf("%d: ", i); - inst.Dump(i, &frame_denizens); + inst.Dump(stdout, i, &frame_denizens, ""); } } void ZBody::StmtDescribe(ODesc* d) const { d->AddSP("ZAM-code"); - d->AddSP(func_name); + d->Add(func_name.c_str()); } TraversalCode ZBody::Traverse(TraversalCallback* cb) const { diff --git a/src/script_opt/ZAM/ZBody.h b/src/script_opt/ZAM/ZBody.h index b00993cad3..db4c5beb7b 100644 --- a/src/script_opt/ZAM/ZBody.h +++ b/src/script_opt/ZAM/ZBody.h @@ -5,6 +5,7 @@ #pragma once #include "zeek/script_opt/ZAM/IterInfo.h" +#include "zeek/script_opt/ZAM/Profile.h" #include "zeek/script_opt/ZAM/Support.h" namespace zeek::detail { @@ -26,9 +27,18 @@ using CaseMaps = std::vector>; using TableIterVec = std::vector; +struct ProfVal { + zeek_uint_t num_samples = 0; + double CPU_time = 0.0; +}; + +using ProfVec = std::vector; +using ProfMap = std::unordered_map; +using CallStack = std::vector; + class ZBody : public Stmt { public: - ZBody(const char* _func_name, const ZAMCompiler* zc); + ZBody(std::string _func_name, const ZAMCompiler* zc); ~ZBody() override; @@ -48,26 +58,29 @@ public: void Dump() const; - void ProfileExecution() const; + void ReportExecutionProfile(ProfMap& pm); -protected: + const std::string& FuncName() const { return func_name; } + +private: // Initializes profiling information, if needed. void InitProfile(); + std::shared_ptr BuildProfVec() const; - ValPtr DoExec(Frame* f, StmtFlowType& flow); + void ReportProfile(ProfMap& pm, const ProfVec& pv, const std::string& prefix, + std::set caller_modules) const; // Run-time checking for "any" type being consistent with // expected typed. Returns true if the type match is okay. bool CheckAnyType(const TypePtr& any_type, const TypePtr& expected_type, - const std::shared_ptr& loc) const; + const std::shared_ptr& loc) const; StmtPtr Duplicate() override { return {NewRef{}, this}; } void StmtDescribe(ODesc* d) const override; TraversalCode Traverse(TraversalCallback* cb) const override; -private: - const char* func_name = nullptr; + std::string func_name; const ZInst* insts = nullptr; unsigned int end_pc = 0; @@ -100,21 +113,19 @@ private: std::vector globals; int num_globals; - // The following are only maintained if we're doing profiling. - // - // These need to be pointers so we can manipulate them in a - // const method. - std::vector* inst_count = nullptr; // for profiling - double* CPU_time = nullptr; // cumulative CPU time for the program - std::vector* inst_CPU = nullptr; // per-instruction CPU time. - CaseMaps int_cases; CaseMaps uint_cases; CaseMaps double_cases; CaseMaps str_cases; + + // The following are only maintained if we're doing profiling. + int ninst = 0; + int ncall = 0; + double tot_CPU_time = 0.0; + uint64_t tot_mem = 0; + std::map> prof_vecs; + std::shared_ptr default_prof_vec; + ProfVec* curr_prof_vec; }; -// Prints the execution profile. -extern void report_ZOP_profile(); - } // namespace zeek::detail diff --git a/src/script_opt/ZAM/ZInst.h b/src/script_opt/ZAM/ZInst.h index 6b0bb890a5..8bde9bc571 100644 --- a/src/script_opt/ZAM/ZInst.h +++ b/src/script_opt/ZAM/ZInst.h @@ -60,14 +60,14 @@ public: ZInst(ZOp _op, ZAMOpType _op_type) { op = _op; op_type = _op_type; - ASSERT(curr_loc); - loc = curr_loc; + ASSERT(ZAM::curr_loc); + loc = ZAM::curr_loc; } // Create a stub instruction that will be populated later. ZInst() { - ASSERT(curr_loc); - loc = curr_loc; + ASSERT(ZAM::curr_loc); + loc = ZAM::curr_loc; } virtual ~ZInst() = default;