diff --git a/src/EventRegistry.cc b/src/EventRegistry.cc index 297819d8dc..482811f139 100644 --- a/src/EventRegistry.cc +++ b/src/EventRegistry.cc @@ -172,11 +172,20 @@ void EventGroup::UpdateFuncBodies() { static auto is_group_disabled = [](const auto& g) { return g->IsDisabled(); }; for ( auto& func : funcs ) { - for ( auto& b : func->bodies ) + bool func_changed = false; + for ( auto& b : func->bodies ) { + auto prev = b.disabled; b.disabled = std::any_of(b.groups.cbegin(), b.groups.cend(), is_group_disabled); + if ( prev != b.disabled ) + func_changed = true; + } + + if ( ! func_changed ) + continue; static auto is_body_enabled = [](const auto& b) { return ! b.disabled; }; func->has_enabled_bodies = std::any_of(func->bodies.cbegin(), func->bodies.cend(), is_body_enabled); + func->all_bodies_enabled = std::all_of(func->bodies.cbegin(), func->bodies.cend(), is_body_enabled); } } diff --git a/src/Func.h b/src/Func.h index 331ce5c61f..d10eedc291 100644 --- a/src/Func.h +++ b/src/Func.h @@ -84,6 +84,13 @@ public: */ bool HasEnabledBodies() const { return ! bodies.empty() && has_enabled_bodies; }; + /** + * Is every body enabled? + * + * @return true if all bodies are enabled. (If no bodies, then true.) + */ + bool HasAllBodiesEnabled() const { return all_bodies_enabled; }; + /** * Calls a Zeek function. * @param args the list of arguments to the function call. @@ -152,6 +159,7 @@ private: // expose accessors in the zeek:: public interface. friend class EventGroup; bool has_enabled_bodies = true; + bool all_bodies_enabled = true; }; namespace detail { diff --git a/src/script_opt/Inline.cc b/src/script_opt/Inline.cc index e3f6e90c9b..a481cbcbfd 100644 --- a/src/script_opt/Inline.cc +++ b/src/script_opt/Inline.cc @@ -3,6 +3,8 @@ #include "zeek/script_opt/Inline.h" #include "zeek/Desc.h" +#include "zeek/EventRegistry.h" +#include "zeek/module_util.h" #include "zeek/script_opt/ProfileFunc.h" #include "zeek/script_opt/ScriptOpt.h" #include "zeek/script_opt/StmtOptInfo.h" @@ -99,13 +101,13 @@ void Inliner::Analyze() { } for ( auto& f : funcs ) { + if ( f.ShouldSkip() ) + continue; + const auto& func_ptr = f.FuncPtr(); const auto& func = func_ptr.get(); const auto& body = f.Body(); - if ( ! should_analyze(func_ptr, body) ) - continue; - // Candidates are non-event, non-hook, non-recursive, // non-compiled functions ... if ( func->Flavor() != FUNC_FLAVOR_FUNCTION ) @@ -120,33 +122,171 @@ void Inliner::Analyze() { inline_ables[func] = f.Profile(); } + CoalesceEventHandlers(); + for ( auto& f : funcs ) - if ( should_analyze(f.FuncPtr(), f.Body()) ) + if ( f.ShouldAnalyze() ) InlineFunction(&f); } +void Inliner::CoalesceEventHandlers() { + std::unordered_map event_handlers; + BodyInfo body_to_info; + for ( size_t i = 0U; i < funcs.size(); ++i ) { + auto& f = funcs[i]; + if ( ! f.ShouldAnalyze() ) + continue; + + auto& func_ptr = f.FuncPtr(); + const auto& func = func_ptr.get(); + const auto& func_type = func->GetType(); + + if ( func_type->AsFuncType()->Flavor() != FUNC_FLAVOR_EVENT ) + continue; + + // Special-case: zeek_init both has tons of event handlers (even + // with -b), such that it inevitably blows out the inlining budget, + // *and* only runs once, such that even if we could inline it, if + // it takes more time to compile it than to just run it via the + // interpreter, it's a lose. + static std::string zeek_init_name = "zeek_init"; + if ( func->Name() == zeek_init_name ) + continue; + + const auto& body = f.Body(); + + if ( func->GetKind() == Func::SCRIPT_FUNC && func->GetBodies().size() > 1 ) { + if ( event_handlers.count(func) == 0 ) + event_handlers[func] = 1; + else + ++event_handlers[func]; + ASSERT(body_to_info.count(body.get()) == 0); + body_to_info[body.get()] = i; + } + } + + for ( auto& e : event_handlers ) { + auto func = e.first; + auto& bodies = func->GetBodies(); + if ( bodies.size() != e.second ) + // It's potentially unsound to inline some-but-not-all event + // handlers, because doing so may violate &priority's. We + // could do the work of identifying such instances and only + // skipping those, but given that ZAM is feature-complete + // the mismatch here should only arise when using restrictions + // like --optimize-file, which likely aren't the common case. + continue; + + CoalesceEventHandlers({NewRef{}, func}, bodies, body_to_info); + } +} + +void Inliner::CoalesceEventHandlers(ScriptFuncPtr func, const std::vector& bodies, + const BodyInfo& body_to_info) { + auto merged_body = make_intrusive(); + auto oi = merged_body->GetOptInfo(); + + auto& params = func->GetType()->Params(); + auto nparams = params->NumFields(); + size_t init_frame_size = static_cast(nparams); + + PreInline(oi, init_frame_size); + + // We pattern the new (alternate) body off of the first body. + auto& b0 = func->GetBodies()[0].stmts; + auto b0_info = body_to_info.find(b0.get()); + ASSERT(b0_info != body_to_info.end()); + auto& info0 = funcs[b0_info->second]; + auto& scope0 = info0.Scope(); + auto& vars = scope0->OrderedVars(); + + // We need to create a new Scope. Otherwise, when inlining the first + // body the analysis of identifiers gets confused regarding whether + // a given identifier represents the outer instance or the inner. + auto empty_attrs = std::make_unique>(); + push_scope(scope0->GetID(), std::move(empty_attrs)); + + std::vector param_ids; + + for ( auto i = 0; i < nparams; ++i ) { + auto& vi = vars[i]; + // We use a special scope name so that when debugging issues we can + // see that a given variable came from coalescing event handlers. + auto p = install_ID(vi->Name(), "", false, false); + p->SetType(vi->GetType()); + param_ids.push_back(std::move(p)); + } + + auto new_scope = pop_scope(); + auto orig_scope = func->GetScope(); + func->SetScope(new_scope); + + // Build up the calling arguments. + auto args = make_intrusive(); + for ( auto& p : param_ids ) + args->Append(make_intrusive(p)); + + for ( auto& b : bodies ) { + auto bp = b.stmts; + auto bi_find = body_to_info.find(bp.get()); + ASSERT(bi_find != body_to_info.end()); + auto& bi = funcs[bi_find->second]; + auto ie = DoInline(func, bp, args, bi.Scope(), bi.Profile()); + + if ( ! ie ) { + // Failure presumably occurred due to hitting the maximum + // AST complexity for inlining. We can give up by simply + // returning, as at this point we haven't made any actual + // changes other than the function's scope. + func->SetScope(orig_scope); + return; + } + + merged_body->Stmts().push_back(make_intrusive(ie)); + } + + auto inlined_func = make_intrusive(merged_body, new_scope, func); + + // Replace the function for that EventHandler with the delegating one. + auto* eh = event_registry->Lookup(func->Name()); + ASSERT(eh); + eh->SetFunc(inlined_func); + + // Likewise, replace the value of the identifier. + auto fid = lookup_ID(func->Name(), GLOBAL_MODULE_NAME, false, false, false); + ASSERT(fid); + fid->SetVal(make_intrusive(inlined_func)); + + PostInline(oi, inlined_func); + + funcs.emplace_back(inlined_func, new_scope, merged_body, 0); + + auto pf = std::make_shared(inlined_func.get(), merged_body, true); + funcs.back().SetProfile(std::move(pf)); +} + void Inliner::InlineFunction(FuncInfo* f) { - max_inlined_frame_size = 0; - - // It's important that we take the current frame size from the - // *scope* and not f->Func(). The latter tracks the maximum required - // across all bodies, but we want to track the size for this - // particular body. - curr_frame_size = f->Scope()->Length(); - auto oi = f->Body()->GetOptInfo(); + PreInline(oi, f->Scope()->Length()); + f->Body()->Inline(this); + PostInline(oi, f->FuncPtr()); +} + +void Inliner::PreInline(StmtOptInfo* oi, size_t frame_size) { + max_inlined_frame_size = 0; + curr_frame_size = frame_size; num_stmts = oi->num_stmts; num_exprs = oi->num_exprs; +} - f->Body()->Inline(this); - +void Inliner::PostInline(StmtOptInfo* oi, ScriptFuncPtr f) { oi->num_stmts = num_stmts; oi->num_exprs = num_exprs; int new_frame_size = curr_frame_size + max_inlined_frame_size; - if ( new_frame_size > f->Func()->FrameSize() ) - f->Func()->SetFrameSize(new_frame_size); + if ( new_frame_size > f->FrameSize() ) + f->SetFrameSize(new_frame_size); } ExprPtr Inliner::CheckForInlining(CallExprPtr c) { @@ -166,21 +306,21 @@ ExprPtr Inliner::CheckForInlining(CallExprPtr c) { if ( ! func_v ) return c; - auto function = func_v->AsFunc(); + auto function = func_v->AsFuncVal()->AsFuncPtr(); if ( function->GetKind() != Func::SCRIPT_FUNC ) return c; - auto func_vf = static_cast(function); + auto func_vf = cast_intrusive(function); - auto ia = inline_ables.find(func_vf); + auto ia = inline_ables.find(func_vf.get()); if ( ia == inline_ables.end() ) return c; if ( c->IsInWhen() ) { // Don't inline these, as doing so requires propagating // the in-when attribute to the inlined function body. - skipped_inlining.insert(func_vf); + skipped_inlining.insert(func_vf.get()); return c; } @@ -189,21 +329,32 @@ ExprPtr Inliner::CheckForInlining(CallExprPtr c) { // BiFs, which won't happen here, but instead to script functions that // are misusing/abusing the loophole.) if ( function->GetType()->Params()->NumFields() == 1 && c->Args()->Exprs().size() != 1 ) { - skipped_inlining.insert(func_vf); + skipped_inlining.insert(func_vf.get()); return c; } // We're going to inline the body, unless it's too large. auto body = func_vf->GetBodies()[0].stmts; // there's only 1 body + auto scope = func_vf->GetScope(); + auto ie = DoInline(func_vf, body, c->ArgsPtr(), scope, ia->second); + + if ( ie ) { + ie->SetOriginal(c); + did_inline.insert(func_vf.get()); + } + + return ie; +} + +ExprPtr Inliner::DoInline(ScriptFuncPtr sf, StmtPtr body, ListExprPtr args, ScopePtr scope, const ProfileFunc* pf) { + // Inline the body, unless it's too large. auto oi = body->GetOptInfo(); if ( num_stmts + oi->num_stmts + num_exprs + oi->num_exprs > MAX_INLINE_SIZE ) { - skipped_inlining.insert(func_vf); + skipped_inlining.insert(sf.get()); return nullptr; // signals "stop inlining" } - did_inline.insert(func_vf); - num_stmts += oi->num_stmts; num_exprs += oi->num_exprs; @@ -220,10 +371,8 @@ ExprPtr Inliner::CheckForInlining(CallExprPtr c) { // with the scope, which gives us all the variables declared in // the function, *using the knowledge that the parameters are // declared first*. - auto scope = func_vf->GetScope(); - auto& pf = ia->second; auto& vars = scope->OrderedVars(); - int nparam = func_vf->GetType()->Params()->NumFields(); + int nparam = sf->GetType()->Params()->NumFields(); std::vector params; std::vector param_is_modified; @@ -237,7 +386,7 @@ ExprPtr Inliner::CheckForInlining(CallExprPtr c) { // Recursively inline the body. This is safe to do because we've // ensured there are no recursive loops ... but we have to be // careful in accounting for the frame sizes. - int frame_size = func_vf->FrameSize(); + int frame_size = sf->FrameSize(); int hold_curr_frame_size = curr_frame_size; curr_frame_size = frame_size; @@ -255,12 +404,10 @@ ExprPtr Inliner::CheckForInlining(CallExprPtr c) { else max_inlined_frame_size = hold_max_inlined_frame_size; - auto t = c->GetType(); - auto ie = make_intrusive(c->ArgsPtr(), std::move(params), std::move(param_is_modified), body_dup, - curr_frame_size, t); - ie->SetOriginal(c); + auto t = scope->GetReturnType(); - return ie; + ASSERT(params.size() == args->Exprs().size()); + return make_intrusive(args, params, param_is_modified, body_dup, curr_frame_size, t); } } // namespace zeek::detail diff --git a/src/script_opt/Inline.h b/src/script_opt/Inline.h index 1f4594535d..eec0bf49c9 100644 --- a/src/script_opt/Inline.h +++ b/src/script_opt/Inline.h @@ -37,9 +37,34 @@ protected: // recursively inlines eligible ones. void Analyze(); + // Maps an event handler body to its corresponding FuncInfo. For the + // latter we use a cursor rather than a direct reference or pointer + // because the collection of FuncInfo's are maintained in a vector and + // can wind up moving around in memory. + using BodyInfo = std::unordered_map; + + // Goes through all the event handlers and coalesces those with + // multiple bodies into a single "alternative" body. + void CoalesceEventHandlers(); + + // For a given event handler, collection of bodies, and associated + // function information, creates a new FuncInfo entry reflecting an + // alternative body for the event handler with all of the bodies + // coalesced into a single new body. + void CoalesceEventHandlers(ScriptFuncPtr sf, const std::vector& bodies, const BodyInfo& body_to_info); + // Recursively inlines any calls associated with the given function. void InlineFunction(FuncInfo* f); + // Performs common functionality prior to inlining a call body. + void PreInline(StmtOptInfo* oi, size_t frame_size); + + // Performs common functionality that comes after inlining a call body. + void PostInline(StmtOptInfo* oi, ScriptFuncPtr f); + + // Inlines the given body using the given arguments. + ExprPtr DoInline(ScriptFuncPtr sf, StmtPtr body, ListExprPtr args, ScopePtr scope, const ProfileFunc* pf); + // Information about all of the functions (and events/hooks) in // the full set of scripts. std::vector& funcs; diff --git a/src/script_opt/ScriptOpt.cc b/src/script_opt/ScriptOpt.cc index d58fe73f25..35de232ce1 100644 --- a/src/script_opt/ScriptOpt.cc +++ b/src/script_opt/ScriptOpt.cc @@ -473,17 +473,15 @@ static void analyze_scripts_for_ZAM(std::unique_ptr& pfs) { bool did_one = false; for ( auto& f : funcs ) { + if ( ! f.ShouldAnalyze() ) + continue; + auto func = f.Func(); auto l = lambdas.find(func); bool is_lambda = l != lambdas.end(); - if ( ! analysis_options.only_funcs.empty() || ! analysis_options.only_files.empty() ) { - if ( ! should_analyze(f.FuncPtr(), f.Body()) ) - continue; - } - - else if ( ! analysis_options.compile_all && ! is_lambda && inl && inl->WasFullyInlined(func) && - func_used_indirectly.count(func) == 0 ) { + if ( ! analysis_options.compile_all && ! is_lambda && inl && inl->WasFullyInlined(func) && + func_used_indirectly.count(func) == 0 ) { // No need to compile as it won't be called directly. // We'd like to zero out the body to recover the // memory, but a *few* such functions do get called, @@ -563,7 +561,7 @@ void analyze_scripts(bool no_unused_warnings) { if ( should_analyze(func.FuncPtr(), func.Body()) ) have_one_to_do = true; else - func.SetSkip(true); + func.SetShouldNotAnalyze(); if ( ! have_one_to_do ) reporter->FatalError("no matching functions/files for C++ compilation"); diff --git a/src/script_opt/ScriptOpt.h b/src/script_opt/ScriptOpt.h index 579da8f754..1176990da2 100644 --- a/src/script_opt/ScriptOpt.h +++ b/src/script_opt/ScriptOpt.h @@ -134,13 +134,18 @@ public: const ProfileFunc* Profile() const { return pf.get(); } std::shared_ptr ProfilePtr() const { return pf; } + void SetScope(ScopePtr new_scope) { scope = std::move(new_scope); } void SetBody(StmtPtr new_body) { body = std::move(new_body); } void SetProfile(std::shared_ptr _pf) { pf = std::move(_pf); } + bool ShouldAnalyze() const { return should_analyze; } + void SetShouldNotAnalyze() { + should_analyze = false; + skip = true; + } + // The following provide a way of marking FuncInfo's as - // should-be-skipped for script optimization, generally because - // the function body has a property that a given script optimizer - // doesn't know how to deal with. Defaults to don't-skip. + // should-be-skipped for a given phase of script optimization. bool ShouldSkip() const { return skip; } void SetSkip(bool should_skip) { skip = should_skip; } @@ -151,10 +156,39 @@ protected: std::shared_ptr pf; int priority; - // Whether to skip optimizing this function. + // Whether to analyze this function at all, per optimization selection + // via --optimize-file/--optimize-func. If those flags aren't used, + // then this will remain true, given that both ZAM and -O gen-C++ are + // feature-complete. + bool should_analyze = true; + + // Whether to skip optimizing this function in a given context. May be + // altered during optimization. bool skip = false; }; +// ScriptFunc subclass that runs a single (coalesced) body if possible, +// otherwise delegates to the original function with multiple bodies. +class CoalescedScriptFunc : public ScriptFunc { +public: + CoalescedScriptFunc(StmtPtr merged_body, ScopePtr scope, ScriptFuncPtr orig_func) + : ScriptFunc(orig_func->Name(), orig_func->GetType(), {merged_body}, {0}), orig_func(orig_func) { + SetScope(scope); + }; + + ValPtr Invoke(zeek::Args* args, Frame* parent) const override { + // If the original function has all bodies enabled, run our + // coalesced one, otherwise delegate. + if ( orig_func->HasAllBodiesEnabled() ) + return ScriptFunc::Invoke(args, parent); + + return orig_func->Invoke(args, parent); + } + +private: + ScriptFuncPtr orig_func; +}; + // We track which functions are definitely not recursive. We do this // as the negative, rather than tracking functions known to be recursive, // so that if we don't do the analysis at all (it's driven by inlining),