Merge remote-tracking branch 'origin/topic/vern/ZAM-inlining'

* origin/topic/vern/ZAM-inlining:
  speed up ZAM compilation by capping function size when inlining
This commit is contained in:
Tim Wojtulewicz 2022-05-19 11:45:38 -07:00
commit 73273fc87b
15 changed files with 85 additions and 15 deletions

View file

@ -1,3 +1,7 @@
5.0.0-dev.462 | 2022-05-19 11:45:38 -0700
* speed up ZAM compilation by capping function size when inlining (Vern Paxson, Corelight)
5.0.0-dev.460 | 2022-05-19 11:24:50 -0700 5.0.0-dev.460 | 2022-05-19 11:24:50 -0700
* Zeekify the scripts.base.utils.paths test (Christian Kreibich, Corelight) * Zeekify the scripts.base.utils.paths test (Christian Kreibich, Corelight)

View file

@ -1 +1 @@
5.0.0-dev.460 5.0.0-dev.462

View file

@ -110,10 +110,13 @@ const char* expr_name(BroExprTag t)
return expr_names[int(t)]; return expr_names[int(t)];
} }
int Expr::num_exprs = 0;
Expr::Expr(BroExprTag arg_tag) : tag(arg_tag), paren(false), type(nullptr) Expr::Expr(BroExprTag arg_tag) : tag(arg_tag), paren(false), type(nullptr)
{ {
SetLocationInfo(&start_location, &end_location); SetLocationInfo(&start_location, &end_location);
opt_info = new ExprOptInfo(); opt_info = new ExprOptInfo();
++num_exprs;
} }
Expr::~Expr() Expr::~Expr()

View file

@ -406,6 +406,12 @@ public:
// this statement. // this statement.
ExprOptInfo* GetOptInfo() const { return opt_info; } ExprOptInfo* GetOptInfo() const { return opt_info; }
// Returns the number of expressions created since the last reset.
static int GetNumExprs() { return num_exprs; }
// Clears the number of expressions created.
static void ResetNumExprs() { num_exprs = 0; }
~Expr() override; ~Expr() override;
protected: protected:
@ -441,6 +447,9 @@ protected:
// Information associated with the Expr for purposes of // Information associated with the Expr for purposes of
// script optimization. // script optimization.
ExprOptInfo* opt_info; ExprOptInfo* opt_info;
// Number of expressions created thus far.
static int num_exprs;
}; };
class NameExpr final : public Expr class NameExpr final : public Expr

View file

@ -59,6 +59,8 @@ const char* stmt_name(StmtTag t)
return stmt_names[int(t)]; return stmt_names[int(t)];
} }
int Stmt::num_stmts = 0;
Stmt::Stmt(StmtTag arg_tag) Stmt::Stmt(StmtTag arg_tag)
{ {
tag = arg_tag; tag = arg_tag;
@ -69,6 +71,8 @@ Stmt::Stmt(StmtTag arg_tag)
opt_info = new StmtOptInfo(); opt_info = new StmtOptInfo();
SetLocationInfo(&start_location, &end_location); SetLocationInfo(&start_location, &end_location);
++num_stmts;
} }
Stmt::~Stmt() Stmt::~Stmt()

View file

@ -177,6 +177,12 @@ public:
// this statement. // this statement.
StmtOptInfo* GetOptInfo() const { return opt_info; } StmtOptInfo* GetOptInfo() const { return opt_info; }
// Returns the number of statements created since the last reset.
static int GetNumStmts() { return num_stmts; }
// Clears the number of statements created.
static void ResetNumStmts() { num_stmts = 0; }
protected: protected:
explicit Stmt(StmtTag arg_tag); explicit Stmt(StmtTag arg_tag);
@ -203,6 +209,9 @@ protected:
// Information associated with the Stmt for purposes of // Information associated with the Stmt for purposes of
// script optimization. // script optimization.
StmtOptInfo* opt_info; StmtOptInfo* opt_info;
// Number of statements created thus far.
static int num_stmts;
}; };
} // namespace detail } // namespace detail

View file

@ -718,6 +718,10 @@ void begin_func(IDPtr id, const char* module_name, FunctionFlavor flavor, bool i
if ( Attr* depr_attr = find_attr(current_scope()->Attrs().get(), ATTR_DEPRECATED) ) if ( Attr* depr_attr = find_attr(current_scope()->Attrs().get(), ATTR_DEPRECATED) )
current_scope()->GetID()->MakeDeprecated(depr_attr->GetExpr()); current_scope()->GetID()->MakeDeprecated(depr_attr->GetExpr());
// Reset the AST node statistics to track afresh for this function.
Stmt::ResetNumStmts();
Expr::ResetNumExprs();
} }
class OuterIDBindingFinder : public TraversalCallback class OuterIDBindingFinder : public TraversalCallback
@ -804,7 +808,10 @@ void end_func(StmtPtr body, bool free_of_conditionals)
// by duplicating can itself be correctly duplicated. // by duplicating can itself be correctly duplicated.
body = body->Duplicate()->Duplicate(); body = body->Duplicate()->Duplicate();
body->GetOptInfo()->is_free_of_conditionals = free_of_conditionals; auto oi = body->GetOptInfo();
oi->is_free_of_conditionals = free_of_conditionals;
oi->num_stmts = Stmt::GetNumStmts();
oi->num_exprs = Expr::GetNumExprs();
auto ingredients = std::make_unique<function_ingredients>(pop_scope(), std::move(body)); auto ingredients = std::make_unique<function_ingredients>(pop_scope(), std::move(body));

View file

@ -2329,6 +2329,10 @@ ExprPtr CallExpr::Inline(Inliner* inl)
{ {
auto new_me = inl->CheckForInlining({NewRef{}, this}); auto new_me = inl->CheckForInlining({NewRef{}, this});
if ( ! new_me )
// All done with inlining.
return ThisPtr();
if ( new_me.get() != this ) if ( new_me.get() != this )
return new_me; return new_me;

View file

@ -5,10 +5,13 @@
#include "zeek/Desc.h" #include "zeek/Desc.h"
#include "zeek/script_opt/ProfileFunc.h" #include "zeek/script_opt/ProfileFunc.h"
#include "zeek/script_opt/ScriptOpt.h" #include "zeek/script_opt/ScriptOpt.h"
#include "zeek/script_opt/StmtOptInfo.h"
namespace zeek::detail namespace zeek::detail
{ {
constexpr int MAX_INLINE_SIZE = 1000;
void Inliner::Analyze() void Inliner::Analyze()
{ {
// Locate self- and indirectly recursive functions. // Locate self- and indirectly recursive functions.
@ -139,8 +142,15 @@ void Inliner::InlineFunction(FuncInfo* f)
// particular body. // particular body.
curr_frame_size = f->Scope()->Length(); curr_frame_size = f->Scope()->Length();
auto oi = f->Body()->GetOptInfo();
num_stmts = oi->num_stmts;
num_exprs = oi->num_exprs;
f->Body()->Inline(this); f->Body()->Inline(this);
oi->num_stmts = num_stmts;
oi->num_exprs = num_exprs;
int new_frame_size = curr_frame_size + max_inlined_frame_size; int new_frame_size = curr_frame_size + max_inlined_frame_size;
if ( new_frame_size > f->Func()->FrameSize() ) if ( new_frame_size > f->Func()->FrameSize() )
@ -175,9 +185,19 @@ ExprPtr Inliner::CheckForInlining(CallExprPtr c)
if ( inline_ables.count(func_vf) == 0 ) if ( inline_ables.count(func_vf) == 0 )
return c; return c;
ListExprPtr args = {NewRef{}, c->Args()}; // We're going to inline the body, unless it's too large.
auto body = func_vf->GetBodies()[0].stmts; // there's only 1 body auto body = func_vf->GetBodies()[0].stmts; // there's only 1 body
auto t = c->GetType(); auto oi = body->GetOptInfo();
if ( num_stmts + oi->num_stmts + num_exprs + oi->num_exprs > MAX_INLINE_SIZE )
return nullptr;
num_stmts += oi->num_stmts;
num_exprs += oi->num_exprs;
auto body_dup = body->Duplicate();
body_dup->GetOptInfo()->num_stmts = oi->num_stmts;
body_dup->GetOptInfo()->num_exprs = oi->num_exprs;
// Getting the names of the parameters is tricky. It's tempting // Getting the names of the parameters is tricky. It's tempting
// to take them from the function's type declaration, but alas // to take them from the function's type declaration, but alas
@ -198,8 +218,6 @@ ExprPtr Inliner::CheckForInlining(CallExprPtr c)
for ( int i = 0; i < nparam; ++i ) for ( int i = 0; i < nparam; ++i )
params.emplace_back(vars[i]); params.emplace_back(vars[i]);
auto body_dup = body->Duplicate();
// Recursively inline the body. This is safe to do because we've // Recursively inline the body. This is safe to do because we've
// ensured there are no recursive loops ... but we have to be // ensured there are no recursive loops ... but we have to be
// careful in accounting for the frame sizes. // careful in accounting for the frame sizes.
@ -221,6 +239,8 @@ ExprPtr Inliner::CheckForInlining(CallExprPtr c)
else else
max_inlined_frame_size = hold_max_inlined_frame_size; max_inlined_frame_size = hold_max_inlined_frame_size;
ListExprPtr args = {NewRef{}, c->Args()};
auto t = c->GetType();
auto ie = make_intrusive<InlineExpr>(args, std::move(params), body_dup, curr_frame_size, t); auto ie = make_intrusive<InlineExpr>(args, std::move(params), body_dup, curr_frame_size, t);
ie->SetOriginal(c); ie->SetOriginal(c);

View file

@ -27,8 +27,8 @@ public:
Analyze(); Analyze();
} }
// Either returns the original CallExpr if it's not inline-able, // Either returns the original CallExpr if it's not inline-able;
// or an InlineExpr if it is. // or an InlineExpr if it is; or nil if further inlining should stop.
ExprPtr CheckForInlining(CallExprPtr c); ExprPtr CheckForInlining(CallExprPtr c);
// True if the given function has been inlined. // True if the given function has been inlined.
@ -57,6 +57,12 @@ protected:
// prior to increasing it to accommodate inlining. // prior to increasing it to accommodate inlining.
int curr_frame_size; int curr_frame_size;
// The number of statements and expressions in the function being
// inlined. Dynamically updated as the inlining proceeds. Used
// to cap inlining complexity.
int num_stmts;
int num_exprs;
// Whether to generate a report about functions either directly and // Whether to generate a report about functions either directly and
// indirectly recursive. // indirectly recursive.
bool report_recursive; bool report_recursive;

View file

@ -26,6 +26,10 @@ public:
// Whether this statement is free of the possible influence // Whether this statement is free of the possible influence
// of conditional code. // of conditional code.
bool is_free_of_conditionals = true; bool is_free_of_conditionals = true;
// Number of statements and expressions in a function body.
int num_stmts = 0;
int num_exprs = 0;
}; };
} // namespace zeek::detail } // namespace zeek::detail

View file

@ -1,6 +1,10 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
0 <...>/coverage-blacklist.zeek, line 13 print cover me; 0 <...>/coverage-blacklist.zeek, line 13 print cover me;
0 <...>/coverage-blacklist.zeek, line 15 if (T) print always executed;
0 <...>/coverage-blacklist.zeek, line 17 print always executed; 0 <...>/coverage-blacklist.zeek, line 17 print always executed;
0 <...>/coverage-blacklist.zeek, line 22 if (F) print impossible;
0 <...>/coverage-blacklist.zeek, line 24 if (F) print also impossible, but included in code coverage analysis;
0 <...>/coverage-blacklist.zeek, line 26 print also impossible, but included in code coverage analysis; 0 <...>/coverage-blacklist.zeek, line 26 print also impossible, but included in code coverage analysis;
0 <...>/coverage-blacklist.zeek, line 29 print success; 0 <...>/coverage-blacklist.zeek, line 29 print success;
0 <...>/coverage-blacklist.zeek, line 5 print first; 0 <...>/coverage-blacklist.zeek, line 5 print first;
0 <...>/coverage-blacklist.zeek, line 7 if (F) { print hello; print world; }

View file

@ -1,4 +1,4 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
runtime error in <...>/type-cast-error-dynamic.zeek, line 12: invalid cast of value with type 'count' to type 'string' runtime error in <...>/type-cast-error-dynamic.zeek, line 12: invalid cast of value with type 'count' to type 'string'
runtime error in <...>/type-cast-error-dynamic.zeek, line 12: invalid cast of value with type 'record { a:addr; b:port; }' to type 'string' runtime error in <...>/type-cast-error-dynamic.zeek, line 12: invalid cast of value with type 'record { a:addr; b:port; }' to type 'string'
runtime error in <...>/type-cast-error-dynamic.zeek, line 12: invalid cast of value with type 'record { data:opaque of Broker::Data &optional; }' to type 'string' (nil $data field) runtime error in <...>/type-cast-error-dynamic.zeek, line 12: invalid cast of value with type 'Broker::Data' to type 'string' (nil $data field)

View file

@ -9,7 +9,7 @@
# @TEST-EXEC: btest-bg-run worker-2 "cp ../cluster-layout.zeek . && CLUSTER_NODE=worker-2 zeek -b --pseudo-realtime -C -r $TRACES/tls/ecdhe.pcap %INPUT" # @TEST-EXEC: btest-bg-run worker-2 "cp ../cluster-layout.zeek . && CLUSTER_NODE=worker-2 zeek -b --pseudo-realtime -C -r $TRACES/tls/ecdhe.pcap %INPUT"
# This timeout needs to be large to accommodate ZAM compilation delays. # This timeout needs to be large to accommodate ZAM compilation delays.
# @TEST-EXEC: btest-bg-wait 90 # @TEST-EXEC: btest-bg-wait 45
# @TEST-EXEC: btest-diff worker-1/.stdout # @TEST-EXEC: btest-diff worker-1/.stdout
# @TEST-EXEC: btest-diff worker-2/.stdout # @TEST-EXEC: btest-diff worker-2/.stdout

View file

@ -1,7 +1,3 @@
# the scripts and triggers the timeout. Ultimately we need to address this
# by capping the size of inlined functions, since the main delay comes from
# traversing enormous AST function bodies.
# @TEST-REQUIRES: test "${ZEEK_ZAM}" != "1"
# @TEST-PORT: BROKER_PORT1 # @TEST-PORT: BROKER_PORT1
# @TEST-PORT: BROKER_PORT2 # @TEST-PORT: BROKER_PORT2
# @TEST-PORT: BROKER_PORT3 # @TEST-PORT: BROKER_PORT3
@ -10,7 +6,7 @@
# @TEST-EXEC: btest-bg-run worker-1 ZEEKPATH=$ZEEKPATH:.. CLUSTER_NODE=worker-1 zeek -b %INPUT # @TEST-EXEC: btest-bg-run worker-1 ZEEKPATH=$ZEEKPATH:.. CLUSTER_NODE=worker-1 zeek -b %INPUT
# @TEST-EXEC: btest-bg-run worker-2 ZEEKPATH=$ZEEKPATH:.. CLUSTER_NODE=worker-2 zeek -b %INPUT # @TEST-EXEC: btest-bg-run worker-2 ZEEKPATH=$ZEEKPATH:.. CLUSTER_NODE=worker-2 zeek -b %INPUT
# This timeout needs to be large to accommodate ZAM compilation delays. # This timeout needs to be large to accommodate ZAM compilation delays.
# @TEST-EXEC: btest-bg-wait 90 # @TEST-EXEC: btest-bg-wait 45
# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-sort btest-diff manager-1/.stdout # @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-sort btest-diff manager-1/.stdout
@load base/frameworks/sumstats @load base/frameworks/sumstats