switch to ID definition regions; reworked driver functions; more info for reporting uncompilable functions

This commit is contained in:
Vern Paxson 2021-08-16 13:10:12 -07:00
parent 83a0d89caf
commit da6ac0b521
15 changed files with 619 additions and 581 deletions

View file

@ -135,7 +135,8 @@ class CPPCompile {
public: public:
CPPCompile(std::vector<FuncInfo>& _funcs, ProfileFuncs& pfs, CPPCompile(std::vector<FuncInfo>& _funcs, ProfileFuncs& pfs,
const std::string& gen_name, const std::string& addl_name, const std::string& gen_name, const std::string& addl_name,
CPPHashManager& _hm, bool _update, bool _standalone); CPPHashManager& _hm, bool _update, bool _standalone,
bool report_uncompilable);
~CPPCompile(); ~CPPCompile();
private: private:
@ -145,7 +146,7 @@ private:
// //
// Main driver, invoked by constructor. // Main driver, invoked by constructor.
void Compile(); void Compile(bool report_uncompilable);
// Generate the beginning of the compiled code: run-time functions, // Generate the beginning of the compiled code: run-time functions,
// namespace, auxiliary globals. // namespace, auxiliary globals.
@ -161,8 +162,11 @@ private:
void GenEpilog(); void GenEpilog();
// True if the given function (plus body and profile) is one // True if the given function (plus body and profile) is one
// that should be compiled. // that should be compiled. If non-nil, sets reason to the
bool IsCompilable(const FuncInfo& func); // the reason why, if there's a fundamental problem. If however
// the function should be skipped for other reasons, then sets
// it to nil.
bool IsCompilable(const FuncInfo& func, const char** reason = nullptr);
// The set of functions/bodies we're compiling. // The set of functions/bodies we're compiling.
std::vector<FuncInfo>& funcs; std::vector<FuncInfo>& funcs;

View file

@ -14,7 +14,8 @@ using namespace std;
CPPCompile::CPPCompile(vector<FuncInfo>& _funcs, ProfileFuncs& _pfs, CPPCompile::CPPCompile(vector<FuncInfo>& _funcs, ProfileFuncs& _pfs,
const string& gen_name, const string& _addl_name, const string& gen_name, const string& _addl_name,
CPPHashManager& _hm, bool _update, bool _standalone) CPPHashManager& _hm, bool _update, bool _standalone,
bool report_uncompilable)
: funcs(_funcs), pfs(_pfs), hm(_hm), : funcs(_funcs), pfs(_pfs), hm(_hm),
update(_update), standalone(_standalone) update(_update), standalone(_standalone)
{ {
@ -67,7 +68,7 @@ CPPCompile::CPPCompile(vector<FuncInfo>& _funcs, ProfileFuncs& _pfs,
fclose(addl_f); fclose(addl_f);
} }
Compile(); Compile(report_uncompilable);
} }
CPPCompile::~CPPCompile() CPPCompile::~CPPCompile()
@ -75,7 +76,7 @@ CPPCompile::~CPPCompile()
fclose(write_file); fclose(write_file);
} }
void CPPCompile::Compile() void CPPCompile::Compile(bool report_uncompilable)
{ {
// Get the working directory so we can use it in diagnostic messages // Get the working directory so we can use it in diagnostic messages
// as a way to identify this compilation. Only germane when doing // as a way to identify this compilation. Only germane when doing
@ -100,8 +101,13 @@ void CPPCompile::Compile()
// Can't be called directly. // Can't be called directly.
continue; continue;
if ( IsCompilable(func) ) const char* reason;
if ( IsCompilable(func, &reason) )
compilable_funcs.insert(BodyName(func)); compilable_funcs.insert(BodyName(func));
else if ( reason && report_uncompilable )
fprintf(stderr,
"%s cannot be compiled to C++ due to %s\n",
func.Func()->Name(), reason);
auto h = func.Profile()->HashVal(); auto h = func.Profile()->HashVal();
if ( hm.HasHash(h) ) if ( hm.HasHash(h) )
@ -341,17 +347,24 @@ void CPPCompile::GenEpilog()
Emit("} // zeek::detail"); Emit("} // zeek::detail");
} }
bool CPPCompile::IsCompilable(const FuncInfo& func) bool CPPCompile::IsCompilable(const FuncInfo& func, const char** reason)
{ {
if ( ! is_CPP_compilable(func.Profile(), reason) )
return false;
if ( reason )
// Indicate that there's no fundamental reason it can't be
// compiled.
*reason = nullptr;
if ( func.ShouldSkip() ) if ( func.ShouldSkip() )
// Caller marked this function as one to skip.
return false; return false;
if ( hm.HasHash(func.Profile()->HashVal()) ) if ( hm.HasHash(func.Profile()->HashVal()) )
// We've already compiled it. // We've already compiled it.
return false; return false;
return is_CPP_compilable(func.Profile()); return true;
} }
} // zeek::detail } // zeek::detail

View file

@ -33,13 +33,21 @@ string scope_prefix(int scope)
return scope_prefix(to_string(scope)); return scope_prefix(to_string(scope));
} }
bool is_CPP_compilable(const ProfileFunc* pf) bool is_CPP_compilable(const ProfileFunc* pf, const char** reason)
{ {
if ( pf->NumWhenStmts() > 0 ) if ( pf->NumWhenStmts() > 0 )
{
if ( reason )
*reason = "use of \"when\"";
return false; return false;
}
if ( pf->TypeSwitches().size() > 0 ) if ( pf->TypeSwitches().size() > 0 )
{
if ( reason )
*reason = "use of type-based \"switch\"";
return false; return false;
}
return true; return true;
} }

View file

@ -19,8 +19,11 @@ extern std::string scope_prefix(const std::string& scope);
// Same, but for scopes identified with numbers. // Same, but for scopes identified with numbers.
extern std::string scope_prefix(int scope); extern std::string scope_prefix(int scope);
// True if the given function is compilable to C++. // True if the given function is compilable to C++. If it isn't, and
extern bool is_CPP_compilable(const ProfileFunc* pf); // the second argument is non-nil, then on return it points to text
// explaining why not.
extern bool is_CPP_compilable(const ProfileFunc* pf,
const char** reason = nullptr);
// Helper utilities for file locking, to ensure that hash files // Helper utilities for file locking, to ensure that hash files
// don't receive conflicting writes due to concurrent compilations. // don't receive conflicting writes due to concurrent compilations.

View file

@ -10,6 +10,13 @@
namespace zeek::detail { namespace zeek::detail {
RD_Decorate::RD_Decorate(std::shared_ptr<ProfileFunc> _pf, const Func* f,
ScopePtr scope, StmtPtr body)
: pf(std::move(_pf))
{
TraverseFunction(f, scope, body);
}
void RD_Decorate::TraverseFunction(const Func* f, ScopePtr scope, StmtPtr body) void RD_Decorate::TraverseFunction(const Func* f, ScopePtr scope, StmtPtr body)
{ {
func_flavor = f->Flavor(); func_flavor = f->Flavor();

View file

@ -50,9 +50,12 @@ private:
class RD_Decorate : public TraversalCallback { class RD_Decorate : public TraversalCallback {
public: public:
RD_Decorate(std::shared_ptr<ProfileFunc> _pf) : pf(std::move(_pf)) RD_Decorate(std::shared_ptr<ProfileFunc> _pf, const Func* f,
{ } ScopePtr scope, StmtPtr body);
const DefSetsMgr* GetDefSetsMgr() const { return &mgr; }
private:
// Traverses the given function body, using the first two // Traverses the given function body, using the first two
// arguments for context. // arguments for context.
void TraverseFunction(const Func* f, ScopePtr scope, StmtPtr body); void TraverseFunction(const Func* f, ScopePtr scope, StmtPtr body);
@ -62,9 +65,6 @@ public:
TraversalCode PreExpr(const Expr*) override; TraversalCode PreExpr(const Expr*) override;
TraversalCode PostExpr(const Expr*) override; TraversalCode PostExpr(const Expr*) override;
const DefSetsMgr* GetDefSetsMgr() const { return &mgr; }
private:
// The following implement various types of "confluence", i.e., // The following implement various types of "confluence", i.e.,
// situations in which control flow merges from multiple possible // situations in which control flow merges from multiple possible
// paths to a given point. // paths to a given point.

View file

@ -459,7 +459,7 @@ ProfileFuncs::ProfileFuncs(std::vector<FuncInfo>& funcs,
auto pf = std::make_unique<ProfileFunc>(f.Func(), f.Body(), auto pf = std::make_unique<ProfileFunc>(f.Func(), f.Body(),
full_record_hashes); full_record_hashes);
if ( ! pred || (*pred)(pf.get()) ) if ( ! pred || (*pred)(pf.get(), nullptr) )
MergeInProfile(pf.get()); MergeInProfile(pf.get());
else else
f.SetSkip(true); f.SetSkip(true);

View file

@ -281,7 +281,7 @@ protected:
// profile is compilable. Alternatively we could derive subclasses // profile is compilable. Alternatively we could derive subclasses
// from ProfileFuncs and use a virtual method for this, but that seems // from ProfileFuncs and use a virtual method for this, but that seems
// heavier-weight for what's really a simple notion. // heavier-weight for what's really a simple notion.
typedef bool (*is_compilable_pred)(const ProfileFunc*); typedef bool (*is_compilable_pred)(const ProfileFunc*, const char** reason);
// Collectively profile an entire collection of functions. // Collectively profile an entire collection of functions.
class ProfileFuncs { class ProfileFuncs {

View file

@ -7,6 +7,8 @@
#include "zeek/Stmt.h" #include "zeek/Stmt.h"
#include "zeek/Desc.h" #include "zeek/Desc.h"
#include "zeek/Reporter.h" #include "zeek/Reporter.h"
#include "zeek/script_opt/ExprOptInfo.h"
#include "zeek/script_opt/StmtOptInfo.h"
#include "zeek/script_opt/ProfileFunc.h" #include "zeek/script_opt/ProfileFunc.h"
#include "zeek/script_opt/Reduce.h" #include "zeek/script_opt/Reduce.h"
#include "zeek/script_opt/TempVar.h" #include "zeek/script_opt/TempVar.h"
@ -33,6 +35,10 @@ ExprPtr Reducer::GenTemporaryExpr(const TypePtr& t, ExprPtr rhs)
{ {
auto e = make_intrusive<NameExpr>(GenTemporary(t, rhs)); auto e = make_intrusive<NameExpr>(GenTemporary(t, rhs));
e->SetLocationInfo(rhs->GetLocationInfo()); e->SetLocationInfo(rhs->GetLocationInfo());
// No need to associate with current statement, since these
// are not generated during optimization.
return e; return e;
} }
@ -41,7 +47,13 @@ NameExprPtr Reducer::UpdateName(NameExprPtr n)
if ( NameIsReduced(n.get()) ) if ( NameIsReduced(n.get()) )
return n; return n;
return make_intrusive<NameExpr>(FindNewLocal(n)); auto ne = make_intrusive<NameExpr>(FindNewLocal(n));
// This name can be used by follow-on optimization analysis,
// so need to associate it with its statement.
BindExprToCurrStmt(ne);
return ne;
} }
bool Reducer::NameIsReduced(const NameExpr* n) const bool Reducer::NameIsReduced(const NameExpr* n) const
@ -106,6 +118,8 @@ bool Reducer::ID_IsReduced(const ID* id) const
NameExprPtr Reducer::GenInlineBlockName(const IDPtr& id) NameExprPtr Reducer::GenInlineBlockName(const IDPtr& id)
{ {
// We do this during reduction, not optimization, so no need
// to associate with curr_stmt.
return make_intrusive<NameExpr>(GenLocal(id)); return make_intrusive<NameExpr>(GenLocal(id));
} }
@ -118,6 +132,7 @@ NameExprPtr Reducer::PushInlineBlock(TypePtr type)
IDPtr ret_id = install_ID("@retvar", "<internal>", false, false); IDPtr ret_id = install_ID("@retvar", "<internal>", false, false);
ret_id->SetType(type); ret_id->SetType(type);
ret_id->GetOptInfo()->SetTemp();
// Track this as a new local *if* we're in the outermost inlining // Track this as a new local *if* we're in the outermost inlining
// block. If we're recursively deeper into inlining, then this // block. If we're recursively deeper into inlining, then this
@ -141,48 +156,22 @@ bool Reducer::SameVal(const Val* v1, const Val* v2) const
return v1 == v2; return v1 == v2;
} }
ExprPtr Reducer::NewVarUsage(IDPtr var, const DefPoints* dps, const Expr* orig) ExprPtr Reducer::NewVarUsage(IDPtr var, const Expr* orig)
{ {
if ( ! dps )
reporter->InternalError("null defpoints in NewVarUsage");
auto var_usage = make_intrusive<NameExpr>(var); auto var_usage = make_intrusive<NameExpr>(var);
SetDefPoints(var_usage.get(), dps); BindExprToCurrStmt(var_usage);
TrackExprReplacement(orig, var_usage.get());
return var_usage; return var_usage;
} }
const DefPoints* Reducer::GetDefPoints(const NameExpr* var) void Reducer::BindExprToCurrStmt(const ExprPtr& e)
{ {
auto dps = FindDefPoints(var); e->GetOptInfo()->stmt_num = curr_stmt->GetOptInfo()->stmt_num;
if ( ! dps )
{
auto id = var->Id();
auto di = mgr->GetConstID_DI(id);
auto rds = mgr->GetPreMaxRDs(GetRDLookupObj(var));
dps = rds->GetDefPoints(di);
SetDefPoints(var, dps);
} }
return dps; void Reducer::BindStmtToCurrStmt(const StmtPtr& s)
}
const DefPoints* Reducer::FindDefPoints(const NameExpr* var) const
{ {
auto dps = var_usage_to_DPs.find(var); s->GetOptInfo()->stmt_num = curr_stmt->GetOptInfo()->stmt_num;
if ( dps == var_usage_to_DPs.end() )
return nullptr;
else
return dps->second;
}
void Reducer::SetDefPoints(const NameExpr* var, const DefPoints* dps)
{
var_usage_to_DPs[var] = dps;
} }
bool Reducer::SameOp(const Expr* op1, const Expr* op2) bool Reducer::SameOp(const Expr* op1, const Expr* op2)
@ -196,7 +185,7 @@ bool Reducer::SameOp(const Expr* op1, const Expr* op2)
if ( op1->Tag() == EXPR_NAME ) if ( op1->Tag() == EXPR_NAME )
{ {
// Needs to be both the same identifier and in contexts // Needs to be both the same identifier and in contexts
// where the identifier has the same definition points. // where the identifier has the same definitions.
auto op1_n = op1->AsNameExpr(); auto op1_n = op1->AsNameExpr();
auto op2_n = op2->AsNameExpr(); auto op2_n = op2->AsNameExpr();
@ -206,10 +195,13 @@ bool Reducer::SameOp(const Expr* op1, const Expr* op2)
if ( op1_id != op2_id ) if ( op1_id != op2_id )
return false; return false;
auto op1_dps = GetDefPoints(op1_n); auto e_stmt_1 = op1->GetOptInfo()->stmt_num;
auto op2_dps = GetDefPoints(op2_n); auto e_stmt_2 = op2->GetOptInfo()->stmt_num;
return same_DPs(op1_dps, op2_dps); auto def_1 = op1_id->GetOptInfo()->DefinitionBefore(e_stmt_1);
auto def_2 = op2_id->GetOptInfo()->DefinitionBefore(e_stmt_2);
return def_1 == def_2 && def_1 != NO_DEF;
} }
else if ( op1->Tag() == EXPR_CONST ) else if ( op1->Tag() == EXPR_CONST )
@ -391,11 +383,10 @@ IDPtr Reducer::FindExprTmp(const Expr* rhs, const Expr* a,
// always makes it here. // always makes it here.
auto id = et_i->Id().get(); auto id = et_i->Id().get();
// We use 'a' in the following rather than rhs auto stmt_num = a->GetOptInfo()->stmt_num;
// because the RHS can get rewritten (for example, auto def = id->GetOptInfo()->DefinitionBefore(stmt_num);
// due to folding) after we generate RDs, and
// thus might not have any. if ( def == NO_DEF )
if ( ! mgr->HasSinglePreMinRD(a, id) )
// The temporary's value isn't guaranteed // The temporary's value isn't guaranteed
// to make it here. // to make it here.
continue; continue;
@ -470,8 +461,6 @@ void Reducer::CheckIDs(const Expr* e, std::vector<const ID*>& ids) const
bool Reducer::IsCSE(const AssignExpr* a, const NameExpr* lhs, const Expr* rhs) bool Reducer::IsCSE(const AssignExpr* a, const NameExpr* lhs, const Expr* rhs)
{ {
auto a_max_rds = mgr->GetPostMaxRDs(GetRDLookupObj(a));
auto lhs_id = lhs->Id(); auto lhs_id = lhs->Id();
auto lhs_tmp = FindTemporary(lhs_id); // nil if LHS not a temporary auto lhs_tmp = FindTemporary(lhs_id); // nil if LHS not a temporary
auto rhs_tmp = FindExprTmp(rhs, a, lhs_tmp); auto rhs_tmp = FindExprTmp(rhs, a, lhs_tmp);
@ -479,9 +468,7 @@ bool Reducer::IsCSE(const AssignExpr* a, const NameExpr* lhs, const Expr* rhs)
ExprPtr new_rhs; ExprPtr new_rhs;
if ( rhs_tmp ) if ( rhs_tmp )
{ // We already have a temporary { // We already have a temporary
auto tmp_di = mgr->GetConstID_DI(rhs_tmp.get()); new_rhs = NewVarUsage(rhs_tmp, rhs);
auto dps = a_max_rds->GetDefPoints(tmp_di);
new_rhs = NewVarUsage(rhs_tmp, dps, rhs);
rhs = new_rhs.get(); rhs = new_rhs.get();
} }
@ -507,104 +494,73 @@ bool Reducer::IsCSE(const AssignExpr* a, const NameExpr* lhs, const Expr* rhs)
// Treat the LHS as either an alias for the RHS, // Treat the LHS as either an alias for the RHS,
// or as a constant if the RHS is a constant in // or as a constant if the RHS is a constant in
// this context. // this context.
auto rhs_di = mgr->GetConstID_DI(rhs_id.get()); auto stmt_num = a->GetOptInfo()->stmt_num;
auto dps = a_max_rds->GetDefPoints(rhs_di); auto rhs_const = CheckForConst(rhs_id, stmt_num);
auto rhs_const = CheckForConst(rhs_id, dps);
if ( rhs_const ) if ( rhs_const )
lhs_tmp->SetConst(rhs_const); lhs_tmp->SetConst(rhs_const);
else else
lhs_tmp->SetAlias(rhs_id, dps); lhs_tmp->SetAlias(rhs_id);
return true; return true;
} }
// Track where we define the temporary.
auto lhs_di = mgr->GetConstID_DI(lhs_id);
auto dps = a_max_rds->GetDefPoints(lhs_di);
if ( lhs_tmp->DPs() && ! same_DPs(lhs_tmp->DPs(), dps) )
reporter->InternalError("double DPs for temporary");
lhs_tmp->SetDPs(dps);
SetDefPoints(lhs, dps);
expr_temps.emplace_back(lhs_tmp); expr_temps.emplace_back(lhs_tmp);
} }
return false; return false;
} }
const ConstExpr* Reducer::CheckForConst(const IDPtr& id, const ConstExpr* Reducer::CheckForConst(const IDPtr& id, int stmt_num) const
const DefPoints* dps) const
{ {
if ( ! dps || dps->length() == 0 ) if ( id->GetType()->Tag() == TYPE_ANY )
// This can happen for access to uninitialized values. // Don't propagate identifiers of type "any" as constants.
// This is because the identifier might be used in some
// context that's dynamically unreachable due to the type
// of its value (such as via a type-switch), but for which
// constant propagation of the constant value to that
// context can result in compile-time errors when folding
// expressions in which the identifier appears (and is
// in that context presumed to have a different type).
return nullptr; return nullptr;
if ( dps->length() != 1 ) auto oi = id->GetOptInfo();
// Multiple definitions of the variable reach to this auto c = oi->Const();
// location. In theory we could check whether they *all*
// provide the same constant, but that hardly seems likely. if ( c )
return c;
auto e = id->GetOptInfo()->DefExprBefore(stmt_num);
if ( e )
{
auto ce = constant_exprs.find(e.get());
if ( ce != constant_exprs.end() )
e = ce->second;
if ( e->Tag() == EXPR_CONST )
return e->AsConstExpr();
// Follow aliases.
if ( e->Tag() != EXPR_NAME )
return nullptr; return nullptr;
// Identifier has a unique definition. return CheckForConst(e->AsNameExpr()->IdPtr(), stmt_num);
auto dp = (*dps)[0]; }
const Expr* e = nullptr;
if ( dp.Tag() == STMT_DEF )
{
auto s = dp.StmtVal();
if ( s->Tag() == STMT_CATCH_RETURN )
{
// Change 's' to refer to the associated assignment
// statement, if any.
auto cr = s->AsCatchReturnStmt();
s = cr->AssignStmt().get();
if ( ! s )
return nullptr; return nullptr;
} }
if ( s->Tag() != STMT_EXPR ) ConstExprPtr Reducer::Fold(ExprPtr e)
// Defined in a statement other than an assignment.
return nullptr;
e = s->AsExprStmt()->StmtExpr();
}
else if ( dp.Tag() == EXPR_DEF )
e = dp.ExprVal();
else
return nullptr;
if ( e->Tag() != EXPR_ASSIGN )
// Not sure why this would happen, other than EXPR_APPEND_TO,
// but in any case not an expression we can mine for a
// constant.
return nullptr;
auto rhs = e->GetOp2();
if ( rhs->Tag() != EXPR_CONST )
return nullptr;
return rhs->AsConstExpr();
}
void Reducer::TrackExprReplacement(const Expr* orig, const Expr* e)
{ {
new_expr_to_orig[e] = orig; auto c = make_intrusive<ConstExpr>(e->Eval(nullptr));
FoldedTo(e, c);
return c;
} }
const Obj* Reducer::GetRDLookupObj(const Expr* e) const void Reducer::FoldedTo(ExprPtr e, ConstExprPtr c)
{ {
auto orig_e = new_expr_to_orig.find(e); constant_exprs[e.get()] = std::move(c);
if ( orig_e == new_expr_to_orig.end() ) folded_exprs.push_back(std::move(e));
return e;
else
return orig_e->second;
} }
ExprPtr Reducer::OptExpr(Expr* e) ExprPtr Reducer::OptExpr(Expr* e)
@ -635,13 +591,10 @@ ExprPtr Reducer::UpdateExpr(ExprPtr e)
auto tmp_var = FindTemporary(id); auto tmp_var = FindTemporary(id);
if ( ! tmp_var ) if ( ! tmp_var )
{ {
auto max_rds = mgr->GetPreMaxRDs(GetRDLookupObj(n));
IDPtr id_ptr = {NewRef{}, id}; IDPtr id_ptr = {NewRef{}, id};
auto di = mgr->GetConstID_DI(id); auto stmt_num = e->GetOptInfo()->stmt_num;
auto dps = max_rds->GetDefPoints(di); auto is_const = CheckForConst(id_ptr, stmt_num);
auto is_const = CheckForConst(id_ptr, dps);
if ( is_const ) if ( is_const )
{ {
// Remember this variable as one whose value // Remember this variable as one whose value
@ -662,36 +615,33 @@ ExprPtr Reducer::UpdateExpr(ExprPtr e)
auto alias = tmp_var->Alias(); auto alias = tmp_var->Alias();
if ( alias ) if ( alias )
{ {
// Make sure that the definition points for the // Make sure that the definitions for the alias here are
// alias here are the same as when the alias // the same as when the alias was created.
// was created.
auto alias_tmp = FindTemporary(alias.get()); auto alias_tmp = FindTemporary(alias.get());
if ( alias_tmp ) // Resolve any alias chains.
while ( alias_tmp && alias_tmp->Alias() )
{ {
while ( alias_tmp->Alias() ) alias = alias_tmp->Alias();
{ alias_tmp = FindTemporary(alias.get());
// Alias chains can occur due to
// re-reduction while optimizing.
auto a_id = alias_tmp->Id();
if ( a_id == id )
return e;
alias_tmp = FindTemporary(alias_tmp->Id().get());
} }
// Temporaries always have only one definition point, if ( alias->GetOptInfo()->IsTemp() )
{
// Temporaries always have only one definition,
// so no need to check for consistency. // so no need to check for consistency.
auto new_usage = NewVarUsage(alias, alias_tmp->DPs(), e.get()); auto new_usage = NewVarUsage(alias, e.get());
return new_usage; return new_usage;
} }
auto e_max_rds = mgr->GetPreMaxRDs(GetRDLookupObj(e.get())); auto e_stmt_1 = e->GetOptInfo()->stmt_num;
auto alias_di = mgr->GetConstID_DI(alias.get()); auto e_stmt_2 = tmp_var->RHS()->GetOptInfo()->stmt_num;
auto alias_dps = e_max_rds->GetDefPoints(alias_di);
if ( same_DPs(alias_dps, tmp_var->DPs()) ) auto def_1 = alias->GetOptInfo()->DefinitionBefore(e_stmt_1);
return NewVarUsage(alias, alias_dps, e.get()); auto def_2 = tmp_var->Id()->GetOptInfo()->DefinitionBefore(e_stmt_2);
if ( def_1 == def_2 && def_1 != NO_DEF )
return NewVarUsage(alias, e.get());
else else
return e; return e;
} }
@ -758,9 +708,17 @@ StmtPtr Reducer::MergeStmts(const NameExpr* lhs, ExprPtr rhs, Stmt* succ_stmt)
lhs_tmp->Deactivate(); lhs_tmp->Deactivate();
auto merge_e = make_intrusive<AssignExpr>(a_lhs_deref, rhs, false, auto merge_e = make_intrusive<AssignExpr>(a_lhs_deref, rhs, false,
nullptr, nullptr, false); nullptr, nullptr, false);
TrackExprReplacement(rhs.get(), merge_e.get()); auto merge_e_stmt = make_intrusive<ExprStmt>(merge_e);
return make_intrusive<ExprStmt>(merge_e); // Update the associated stmt_num's. For strict correctness, we
// want both of these bound to the earlier of the two statements
// we're merging (though in practice, either will work, since
// we're eliding the only difference between the two). Our
// caller ensures this.
BindExprToCurrStmt(merge_e);
BindStmtToCurrStmt(merge_e_stmt);
return merge_e_stmt;
} }
IDPtr Reducer::GenTemporary(const TypePtr& t, ExprPtr rhs) IDPtr Reducer::GenTemporary(const TypePtr& t, ExprPtr rhs)
@ -809,6 +767,9 @@ IDPtr Reducer::GenLocal(const IDPtr& orig)
local_id->SetType(orig->GetType()); local_id->SetType(orig->GetType());
local_id->SetAttrs(orig->GetAttrs()); local_id->SetAttrs(orig->GetAttrs());
if ( orig->GetOptInfo()->IsTemp() )
local_id->GetOptInfo()->SetTemp();
new_locals.insert(local_id.get()); new_locals.insert(local_id.get());
orig_to_new_locals[orig.get()] = local_id; orig_to_new_locals[orig.get()] = local_id;
@ -1040,27 +1001,6 @@ bool CSE_ValidityChecker::CheckAggrMod(const std::vector<const ID*>& ids,
} }
bool same_DPs(const DefPoints* dp1, const DefPoints* dp2)
{
if ( dp1 == dp2 )
return true;
if ( ! dp1 || ! dp2 )
return false;
// Given how we construct DPs, they should be element-by-element
// equivalent; we don't have to worry about reordering.
if ( dp1->length() != dp2->length() )
return false;
for ( auto i = 0; i < dp1->length(); ++i )
if ( ! (*dp1)[i].SameAs((*dp2)[i]) )
return false;
return true;
}
const Expr* non_reduced_perp; const Expr* non_reduced_perp;
bool checking_reduction; bool checking_reduction;

View file

@ -6,13 +6,11 @@
#include "zeek/Expr.h" #include "zeek/Expr.h"
#include "zeek/Stmt.h" #include "zeek/Stmt.h"
#include "zeek/Traverse.h" #include "zeek/Traverse.h"
#include "zeek/script_opt/DefSetsMgr.h"
namespace zeek::detail { namespace zeek::detail {
class Expr; class Expr;
class TempVar; class TempVar;
class ProfileFunc;
class Reducer { class Reducer {
public: public:
@ -20,8 +18,9 @@ public:
StmtPtr Reduce(StmtPtr s); StmtPtr Reduce(StmtPtr s);
const DefSetsMgr* GetDefSetsMgr() const { return mgr; } void SetReadyToOptimize() { opt_ready = true; }
void SetDefSetsMgr(const DefSetsMgr* _mgr) { mgr = _mgr; }
void SetCurrStmt(const Stmt* stmt) { curr_stmt = stmt; }
ExprPtr GenTemporaryExpr(const TypePtr& t, ExprPtr rhs); ExprPtr GenTemporaryExpr(const TypePtr& t, ExprPtr rhs);
@ -76,7 +75,7 @@ public:
// True if the Reducer is being used in the context of a second // True if the Reducer is being used in the context of a second
// pass over for AST optimization. // pass over for AST optimization.
bool Optimizing() const bool Optimizing() const
{ return ! IsPruning() && mgr != nullptr; } { return ! IsPruning() && opt_ready; }
// A predicate that indicates whether a given reduction pass // A predicate that indicates whether a given reduction pass
// is being made to prune unused statements. // is being made to prune unused statements.
@ -126,6 +125,14 @@ public:
// already been applied. // already been applied.
bool IsCSE(const AssignExpr* a, const NameExpr* lhs, const Expr* rhs); bool IsCSE(const AssignExpr* a, const NameExpr* lhs, const Expr* rhs);
// Returns a constant representing folding of the given expression
// (which must have constant operands).
ConstExprPtr Fold(ExprPtr e);
// Notes that the given expression has been folded to the
// given constant.
void FoldedTo(ExprPtr orig, ConstExprPtr c);
// Given an lhs=rhs statement followed by succ_stmt, returns // Given an lhs=rhs statement followed by succ_stmt, returns
// a (new) merge of the two if they're of the form tmp=rhs, var=tmp; // a (new) merge of the two if they're of the form tmp=rhs, var=tmp;
// otherwise, nil. // otherwise, nil.
@ -150,23 +157,13 @@ protected:
// are in fact equivalent.) // are in fact equivalent.)
bool SameVal(const Val* v1, const Val* v2) const; bool SameVal(const Val* v1, const Val* v2) const;
// Track that the variable "var", which has the given set of // Track that the variable "var" will be a replacement for
// definition points, will be a replacement for the "orig" // the "orig" expression. Returns the replacement expression
// expression. Returns the replacement expression (which is // (which is is just a NameExpr referring to "var").
// is just a NameExpr referring to "var"). ExprPtr NewVarUsage(IDPtr var, const Expr* orig);
ExprPtr NewVarUsage(IDPtr var, const DefPoints* dps, const Expr* orig);
// Returns the definition points associated with "var". If none void BindExprToCurrStmt(const ExprPtr& e);
// exist in our cache, then populates the cache. void BindStmtToCurrStmt(const StmtPtr& s);
const DefPoints* GetDefPoints(const NameExpr* var);
// Retrieve the definition points associated in our cache with the
// given variable, if any.
const DefPoints* FindDefPoints(const NameExpr* var) const;
// Adds a mapping in our cache of the given variable to the given
// set of definition points.
void SetDefPoints(const NameExpr* var, const DefPoints* dps);
// Returns true if op1 and op2 represent the same operand, given // Returns true if op1 and op2 represent the same operand, given
// the reaching definitions available at their usages (e1 and e2). // the reaching definitions available at their usages (e1 and e2).
@ -216,23 +213,10 @@ protected:
// for the current function. // for the current function.
IDPtr GenLocal(const IDPtr& orig); IDPtr GenLocal(const IDPtr& orig);
// This is the heart of constant propagation. Given an identifier // This is the heart of constant propagation. Given an identifier,
// and a set of definition points for it, if its value is constant // if its value is constant at the given location then returns
// then returns the corresponding ConstExpr with the value. // the corresponding ConstExpr with the value.
const ConstExpr* CheckForConst(const IDPtr& id, const ConstExpr* CheckForConst(const IDPtr& id, int stmt_num) const;
const DefPoints* dps) const;
// Track that we're replacing instances of "orig" with a new
// expression. This allows us to locate the RDs associated
// with "orig" in the context of the new expression, without
// requiring an additional RD propagation pass.
void TrackExprReplacement(const Expr* orig, const Expr* e);
// Returns the object we should use to look up RD's associated
// with 'e'. (This isn't necessarily 'e' itself because we
// may have decided to replace it with a different expression,
// per TrackExprReplacement().)
const Obj* GetRDLookupObj(const Expr* e) const;
// Tracks the temporary variables created during the reduction/ // Tracks the temporary variables created during the reduction/
// optimization process. // optimization process.
@ -253,6 +237,14 @@ protected:
// rename local variables when inlining. // rename local variables when inlining.
std::unordered_map<const ID*, IDPtr> orig_to_new_locals; std::unordered_map<const ID*, IDPtr> orig_to_new_locals;
// Tracks expressions we've folded, so that we can recognize them
// for constant propagation.
std::unordered_map<const Expr*, ConstExprPtr> constant_exprs;
// Holds onto those expressions so they don't become invalid
// due to memory management.
std::vector<ExprPtr> folded_exprs;
// Which statements to elide from the AST (because optimization // Which statements to elide from the AST (because optimization
// has determined they're no longer needed). // has determined they're no longer needed).
std::unordered_set<const Stmt*> omitted_stmts; std::unordered_set<const Stmt*> omitted_stmts;
@ -270,25 +262,17 @@ protected:
// exponentially. // exponentially.
int bifurcation_level = 0; int bifurcation_level = 0;
// For a given usage of a variable's value, return the definition
// points associated with its use at that point. We use this
// both as a cache (populating it every time we do a more laborious
// lookup), and proactively when creating new references to variables.
std::unordered_map<const NameExpr*, const DefPoints*> var_usage_to_DPs;
// Tracks which (non-temporary) variables had constant // Tracks which (non-temporary) variables had constant
// values used for constant propagation. // values used for constant propagation.
std::unordered_set<const ID*> constant_vars; std::unordered_set<const ID*> constant_vars;
// For a new expression we've created, map it to the expression
// it's replacing. This allows us to locate the RDs associated
// with the usage.
std::unordered_map<const Expr*, const Expr*> new_expr_to_orig;
// Statement at which the current reduction started. // Statement at which the current reduction started.
StmtPtr reduction_root = nullptr; StmtPtr reduction_root = nullptr;
const DefSetsMgr* mgr = nullptr; // Statement we're currently working on.
const Stmt* curr_stmt = nullptr;
bool opt_ready = false;
}; };
@ -364,8 +348,6 @@ protected:
}; };
extern bool same_DPs(const DefPoints* dp1, const DefPoints* dp2);
// Used for debugging, to communicate which expression wasn't // Used for debugging, to communicate which expression wasn't
// reduced when we expected them all to be. // reduced when we expected them all to be.
extern const Expr* non_reduced_perp; extern const Expr* non_reduced_perp;

View file

@ -6,15 +6,20 @@
#include "zeek/Desc.h" #include "zeek/Desc.h"
#include "zeek/EventHandler.h" #include "zeek/EventHandler.h"
#include "zeek/EventRegistry.h" #include "zeek/EventRegistry.h"
#include "zeek/script_opt/ScriptOpt.h" #include "zeek/script_opt/ScriptOpt.h"
#include "zeek/script_opt/ProfileFunc.h" #include "zeek/script_opt/ProfileFunc.h"
#include "zeek/script_opt/Inline.h" #include "zeek/script_opt/Inline.h"
#include "zeek/script_opt/Reduce.h" #include "zeek/script_opt/Reduce.h"
#include "zeek/script_opt/GenIDDefs.h"
#include "zeek/script_opt/GenRDs.h" #include "zeek/script_opt/GenRDs.h"
#include "zeek/script_opt/UseDefs.h" #include "zeek/script_opt/UseDefs.h"
#include "zeek/script_opt/CPP/Compile.h" #include "zeek/script_opt/CPP/Compile.h"
#include "zeek/script_opt/CPP/Func.h" #include "zeek/script_opt/CPP/Func.h"
#include "zeek/script_opt/ZAM/Compile.h"
namespace zeek::detail { namespace zeek::detail {
@ -31,21 +36,76 @@ static std::vector<FuncInfo> funcs;
static ZAMCompiler* ZAM = nullptr; static ZAMCompiler* ZAM = nullptr;
static bool generating_CPP = false;
static std::string hash_dir; // for storing hashes of previous compilations
void optimize_func(ScriptFunc* f, std::shared_ptr<ProfileFunc> pf, static ScriptFuncPtr global_stmts;
ScopePtr scope, StmtPtr& body,
AnalyOpt& analysis_options) void analyze_func(ScriptFuncPtr f)
{
// Even if we're doing --optimize-only, we still track all functions
// here because the inliner will need the full list.
funcs.emplace_back(f, f->GetScope(), f->CurrentBody(),
f->CurrentPriority());
}
const FuncInfo* analyze_global_stmts(Stmt* stmts)
{
// We ignore analysis_options.only_func - if it's in use, later
// logic will keep this function from being compiled, but it's handy
// now to enter it into "funcs" so we have a FuncInfo to return.
auto id = install_ID("<global-stmts>", GLOBAL_MODULE_NAME, true, false);
auto empty_args_t = make_intrusive<RecordType>(nullptr);
auto func_t = make_intrusive<FuncType>(empty_args_t, nullptr, FUNC_FLAVOR_FUNCTION);
id->SetType(func_t);
auto sc = current_scope();
std::vector<IDPtr> empty_inits;
StmtPtr stmts_p{NewRef{}, stmts};
global_stmts = make_intrusive<ScriptFunc>(id, stmts_p, empty_inits,
sc->Length(), 0);
funcs.emplace_back(global_stmts, sc, stmts_p, 0);
return &funcs.back();
}
static bool optimize_AST(ScriptFunc* f, std::shared_ptr<ProfileFunc>& pf,
std::shared_ptr<Reducer>& rc, ScopePtr scope,
StmtPtr& body)
{
pf = std::make_shared<ProfileFunc>(f, body, true);
// RD_Decorate reduced_rds(pf, f, scope, body);
GenIDDefs ID_defs(pf, f, scope, body);
if ( reporter->Errors() > 0 )
return false;
rc->SetReadyToOptimize();
auto new_body = rc->Reduce(body);
if ( reporter->Errors() > 0 )
return false;
if ( analysis_options.dump_xform )
printf("Optimized: %s\n", obj_desc(new_body.get()).c_str());
f->ReplaceBody(body, new_body);
body = new_body;
return true;
}
static void optimize_func(ScriptFunc* f, std::shared_ptr<ProfileFunc> pf,
ScopePtr scope, StmtPtr& body)
{ {
if ( reporter->Errors() > 0 ) if ( reporter->Errors() > 0 )
return; return;
if ( ! analysis_options.activate )
return;
if ( analysis_options.only_func &&
*analysis_options.only_func != f->Name() )
return;
if ( analysis_options.only_func ) if ( analysis_options.only_func )
printf("Original: %s\n", obj_desc(body.get()).c_str()); printf("Original: %s\n", obj_desc(body.get()).c_str());
@ -53,10 +113,12 @@ void optimize_func(ScriptFunc* f, std::shared_ptr<ProfileFunc> pf,
// We're not able to optimize this. // We're not able to optimize this.
return; return;
if ( pf->NumWhenStmts() > 0 || pf->NumLambdas() > 0 ) const char* reason;
if ( ! is_ZAM_compilable(pf.get(), &reason) )
{ {
if ( analysis_options.only_func ) if ( analysis_options.report_uncompilable )
printf("Skipping analysis due to \"when\" statement or use of lambdas\n"); printf("Skipping compilation of %s due to %s\n",
f->Name(), reason);
return; return;
} }
@ -77,59 +139,35 @@ void optimize_func(ScriptFunc* f, std::shared_ptr<ProfileFunc> pf,
if ( ! new_body->IsReduced(rc.get()) ) if ( ! new_body->IsReduced(rc.get()) )
{ {
if ( non_reduced_perp ) if ( non_reduced_perp )
printf("Reduction inconsistency for %s: %s\n", f->Name(), reporter->InternalError("Reduction inconsistency for %s: %s\n", f->Name(),
obj_desc(non_reduced_perp).c_str()); obj_desc(non_reduced_perp).c_str());
else else
printf("Reduction inconsistency for %s\n", f->Name()); reporter->InternalError("Reduction inconsistency for %s\n", f->Name());
} }
checking_reduction = false; checking_reduction = false;
if ( analysis_options.only_func || analysis_options.dump_xform ) if ( analysis_options.dump_xform )
printf("Transformed: %s\n", obj_desc(new_body.get()).c_str()); printf("Transformed: %s\n", obj_desc(new_body.get()).c_str());
f->ReplaceBody(body, new_body); f->ReplaceBody(body, new_body);
body = new_body; body = new_body;
if ( analysis_options.optimize_AST ) if ( analysis_options.optimize_AST &&
{ ! optimize_AST(f, pf, rc, scope, body) )
pf = std::make_shared<ProfileFunc>(f, body, true);
RD_Decorate reduced_rds(pf);
reduced_rds.TraverseFunction(f, scope, body);
if ( reporter->Errors() > 0 )
{ {
pop_scope(); pop_scope();
return; return;
} }
rc->SetDefSetsMgr(reduced_rds.GetDefSetsMgr());
new_body = rc->Reduce(body);
if ( reporter->Errors() > 0 )
{
pop_scope();
return;
}
if ( analysis_options.only_func || analysis_options.dump_xform )
printf("Optimized: %s\n", obj_desc(new_body.get()).c_str());
f->ReplaceBody(body, new_body);
body = new_body;
}
// Profile the new body. // Profile the new body.
pf = std::make_shared<ProfileFunc>(f, body, true); pf = std::make_shared<ProfileFunc>(f, body, true);
// Compute its reaching definitions. // Compute its reaching definitions.
RD_Decorate reduced_rds(pf); // RD_Decorate reduced_rds(pf, f, scope, body);
GenIDDefs ID_defs(pf, f, scope, body);
reduced_rds.TraverseFunction(f, scope, body); rc->SetReadyToOptimize();
rc->SetDefSetsMgr(reduced_rds.GetDefSetsMgr());
auto ud = std::make_shared<UseDefs>(body, rc); auto ud = std::make_shared<UseDefs>(body, rc);
ud->Analyze(); ud->Analyze();
@ -145,80 +183,58 @@ void optimize_func(ScriptFunc* f, std::shared_ptr<ProfileFunc> pf,
body = new_body; body = new_body;
} }
int new_frame_size = int new_frame_size = scope->Length() + rc->NumTemps() +
scope->Length() + rc->NumTemps() + rc->NumNewLocals(); rc->NumNewLocals();
if ( new_frame_size > f->FrameSize() ) if ( new_frame_size > f->FrameSize() )
f->SetFrameSize(new_frame_size); f->SetFrameSize(new_frame_size);
if ( analysis_options.gen_ZAM_code )
{
ZAM = new ZAMCompiler(f, pf, scope, new_body, ud, rc);
new_body = ZAM->CompileBody();
if ( reporter->Errors() > 0 )
return;
if ( analysis_options.dump_ZAM )
ZAM->Dump();
f->ReplaceBody(body, new_body);
body = new_body;
}
pop_scope(); pop_scope();
} }
FuncInfo::FuncInfo(ScriptFuncPtr _func, ScopePtr _scope, StmtPtr _body,
int _priority)
: func(std::move(_func)), scope(std::move(_scope)),
body(std::move(_body)), priority(_priority)
{}
void FuncInfo::SetProfile(std::shared_ptr<ProfileFunc> _pf)
{ pf = std::move(_pf); }
void analyze_func(ScriptFuncPtr f)
{
if ( analysis_options.only_func &&
*analysis_options.only_func != f->Name() )
return;
funcs.emplace_back(f, f->GetScope(), f->CurrentBody(),
f->CurrentPriority());
}
const FuncInfo* analyze_global_stmts(Stmt* stmts)
{
// We ignore analysis_options.only_func - if it's in use, later
// logic will keep this function from being compiled, but it's handy
// now to enter it into "funcs" so we have a FuncInfo to return.
auto id = install_ID("<global-stmts>", GLOBAL_MODULE_NAME, true, false);
auto empty_args_t = make_intrusive<RecordType>(nullptr);
auto func_t = make_intrusive<FuncType>(empty_args_t, nullptr, FUNC_FLAVOR_FUNCTION);
id->SetType(func_t);
auto sc = current_scope();
std::vector<IDPtr> empty_inits;
StmtPtr stmts_p{NewRef{}, stmts};
auto sf = make_intrusive<ScriptFunc>(id, stmts_p, empty_inits, sc->Length(), 0);
funcs.emplace_back(sf, sc, stmts_p, 0);
return &funcs.back();
}
static void check_env_opt(const char* opt, bool& opt_flag) static void check_env_opt(const char* opt, bool& opt_flag)
{ {
if ( getenv(opt) ) if ( getenv(opt) )
opt_flag = true; opt_flag = true;
} }
void analyze_scripts() static void init_options()
{
static bool did_init = false;
static std::string hash_dir;
bool generating_CPP = false;
if ( ! did_init )
{ {
auto hd = getenv("ZEEK_HASH_DIR"); auto hd = getenv("ZEEK_HASH_DIR");
if ( hd ) if ( hd )
hash_dir = std::string(hd) + "/"; hash_dir = std::string(hd) + "/";
// ZAM-related options.
check_env_opt("ZEEK_DUMP_XFORM", analysis_options.dump_xform); check_env_opt("ZEEK_DUMP_XFORM", analysis_options.dump_xform);
check_env_opt("ZEEK_DUMP_UDS", analysis_options.dump_uds); check_env_opt("ZEEK_DUMP_UDS", analysis_options.dump_uds);
check_env_opt("ZEEK_INLINE", analysis_options.inliner); check_env_opt("ZEEK_INLINE", analysis_options.inliner);
check_env_opt("ZEEK_OPT", analysis_options.optimize_AST); check_env_opt("ZEEK_OPT", analysis_options.optimize_AST);
check_env_opt("ZEEK_XFORM", analysis_options.activate); check_env_opt("ZEEK_XFORM", analysis_options.activate);
check_env_opt("ZEEK_ZAM", analysis_options.gen_ZAM);
check_env_opt("ZEEK_COMPILE_ALL", analysis_options.compile_all);
check_env_opt("ZEEK_ZAM_CODE", analysis_options.gen_ZAM_code);
check_env_opt("ZEEK_NO_ZAM_OPT", analysis_options.no_ZAM_opt);
check_env_opt("ZEEK_DUMP_ZAM", analysis_options.dump_ZAM);
check_env_opt("ZEEK_PROFILE", analysis_options.profile_ZAM);
// Compile-to-C++-related options.
check_env_opt("ZEEK_ADD_CPP", analysis_options.add_CPP); check_env_opt("ZEEK_ADD_CPP", analysis_options.add_CPP);
check_env_opt("ZEEK_UPDATE_CPP", analysis_options.update_CPP); check_env_opt("ZEEK_UPDATE_CPP", analysis_options.update_CPP);
check_env_opt("ZEEK_GEN_CPP", analysis_options.gen_CPP); check_env_opt("ZEEK_GEN_CPP", analysis_options.gen_CPP);
@ -252,16 +268,10 @@ void analyze_scripts()
generating_CPP = true; generating_CPP = true;
if ( analysis_options.use_CPP && generating_CPP ) if ( analysis_options.use_CPP && generating_CPP )
{ reporter->FatalError("generating C++ incompatible with using C++");
reporter->Error("generating C++ incompatible with using C++");
exit(1);
}
if ( analysis_options.use_CPP && ! CPP_init_hook ) if ( analysis_options.use_CPP && ! CPP_init_hook )
{ reporter->FatalError("no C++ functions available to use");
reporter->Error("no C++ functions available to use");
exit(1);
}
auto usage = getenv("ZEEK_USAGE_ISSUES"); auto usage = getenv("ZEEK_USAGE_ISSUES");
@ -275,28 +285,40 @@ void analyze_scripts()
analysis_options.only_func = zo; analysis_options.only_func = zo;
} }
if ( analysis_options.only_func || if ( analysis_options.gen_ZAM )
analysis_options.optimize_AST || {
analysis_options.usage_issues > 0 ) analysis_options.gen_ZAM_code = true;
analysis_options.activate = true; analysis_options.inliner = true;
analysis_options.optimize_AST = true;
did_init = true;
} }
if ( ! analysis_options.activate && ! analysis_options.inliner && if ( analysis_options.dump_ZAM )
! generating_CPP && ! analysis_options.report_CPP && analysis_options.gen_ZAM_code = true;
! analysis_options.use_CPP )
// Avoid profiling overhead.
return;
// Now that everything's parsed and BiF's have been initialized, if ( analysis_options.only_func )
// profile the functions. {
auto pfs = std::make_unique<ProfileFuncs>(funcs, is_CPP_compilable, false); // Note, this comes after the statement above because for
// --optimize-only we don't necessarily want to go all
// the way to *generating* ZAM code, though we'll want to
// dump it *if* we generate it.
analysis_options.dump_xform = analysis_options.dump_ZAM = true;
if ( CPP_init_hook ) if ( analysis_options.gen_ZAM_code || generating_CPP )
(*CPP_init_hook)(); analysis_options.report_uncompilable = true;
}
if ( analysis_options.report_CPP ) if ( analysis_options.report_uncompilable &&
! analysis_options.gen_ZAM_code && ! generating_CPP )
reporter->FatalError("report-uncompilable requires generation of ZAM or C++");
if ( analysis_options.only_func ||
analysis_options.optimize_AST ||
analysis_options.gen_ZAM_code ||
analysis_options.usage_issues > 0 )
analysis_options.activate = true;
}
static void report_CPP()
{ {
if ( ! CPP_init_hook ) if ( ! CPP_init_hook )
{ {
@ -331,6 +353,7 @@ void analyze_scripts()
} }
printf("\nAdditional C++ script bodies available:\n"); printf("\nAdditional C++ script bodies available:\n");
int addl = 0; int addl = 0;
for ( const auto& s : compiled_scripts ) for ( const auto& s : compiled_scripts )
if ( already_reported.count(s.first) == 0 ) if ( already_reported.count(s.first) == 0 )
@ -342,11 +365,9 @@ void analyze_scripts()
if ( addl == 0 ) if ( addl == 0 )
printf("(none)\n"); printf("(none)\n");
exit(0);
} }
if ( analysis_options.use_CPP ) static void use_CPP()
{ {
for ( auto& f : funcs ) for ( auto& f : funcs )
{ {
@ -393,15 +414,27 @@ void analyze_scripts()
(*cb)(); (*cb)();
} }
if ( generating_CPP ) static void generate_CPP(std::unique_ptr<ProfileFuncs>& pfs)
{ {
const auto hash_name = hash_dir + "CPP-hashes"; const auto hash_name = hash_dir + "CPP-hashes";
auto hm = std::make_unique<CPPHashManager>(hash_name.c_str(), auto hm = std::make_unique<CPPHashManager>(hash_name.c_str(),
analysis_options.add_CPP); analysis_options.add_CPP);
if ( ! analysis_options.gen_CPP ) if ( analysis_options.gen_CPP )
{ {
if ( analysis_options.only_func )
{ // deactivate all functions except the target one
for ( auto& func : funcs )
{
auto fn = func.Func()->Name();
if ( *analysis_options.only_func != fn )
func.SetSkip(true);
}
}
}
else
{ // doing add-C++ instead, so look for previous compilations
for ( auto& func : funcs ) for ( auto& func : funcs )
{ {
auto hash = func.Profile()->HashVal(); auto hash = func.Profile()->HashVal();
@ -419,41 +452,21 @@ void analyze_scripts()
const auto addl_name = hash_dir + "CPP-gen-addl.h"; const auto addl_name = hash_dir + "CPP-gen-addl.h";
CPPCompile cpp(funcs, *pfs, gen_name, addl_name, *hm, CPPCompile cpp(funcs, *pfs, gen_name, addl_name, *hm,
analysis_options.gen_CPP || analysis_options.gen_CPP || analysis_options.update_CPP,
analysis_options.update_CPP, analysis_options.gen_standalone_CPP,
analysis_options.gen_standalone_CPP); analysis_options.report_uncompilable);
exit(0);
} }
if ( analysis_options.usage_issues > 0 && analysis_options.optimize_AST ) static void find_when_funcs(std::unique_ptr<ProfileFuncs>& pfs,
std::unordered_set<const ScriptFunc*>& when_funcs)
{ {
fprintf(stderr, "warning: \"-O optimize-AST\" option is incompatible with -u option, deactivating optimization\n");
analysis_options.optimize_AST = false;
}
// Re-profile the functions, this time without worrying about
// compatibility with compilation to C++. Note that the first
// profiling pass above may have marked some of the functions
// as to-skip, so first clear those markings. Once we have
// full compile-to-C++ and ZAM support for all Zeek language
// features, we can remove the re-profiling here.
for ( auto& f : funcs )
f.SetSkip(false);
pfs = std::make_unique<ProfileFuncs>(funcs, nullptr, true);
// Figure out which functions either directly or indirectly // Figure out which functions either directly or indirectly
// appear in "when" clauses. // appear in "when" clauses.
// Final set of functions involved in "when" clauses.
std::unordered_set<const ScriptFunc*> when_funcs;
// Which functions we still need to analyze. // Which functions we still need to analyze.
std::unordered_set<const ScriptFunc*> when_funcs_to_do; std::unordered_set<const ScriptFunc*> when_funcs_to_do;
for ( auto& f : funcs ) for ( auto& f : funcs )
{
if ( f.Profile()->WhenCalls().size() > 0 ) if ( f.Profile()->WhenCalls().size() > 0 )
{ {
when_funcs.insert(f.Func()); when_funcs.insert(f.Func());
@ -463,17 +476,6 @@ void analyze_scripts()
ASSERT(pfs->FuncProf(bf)); ASSERT(pfs->FuncProf(bf));
when_funcs_to_do.insert(bf); when_funcs_to_do.insert(bf);
} }
#ifdef NOT_YET
if ( analysis_options.report_uncompilable )
{
ODesc d;
f.ScriptFunc()->AddLocation(&d);
printf("%s cannot be compiled due to use of \"when\" statement (%s)\n",
f.ScriptFunc()->Name(), d.Description());
}
#endif // NOT_YET
}
} }
// Set of new functions to put on to-do list. Separate from // Set of new functions to put on to-do list. Separate from
@ -501,12 +503,41 @@ void analyze_scripts()
when_funcs_to_do = new_to_do; when_funcs_to_do = new_to_do;
new_to_do.clear(); new_to_do.clear();
} }
}
static void analyze_scripts_for_ZAM(std::unique_ptr<ProfileFuncs>& pfs)
{
if ( analysis_options.usage_issues > 0 &&
analysis_options.optimize_AST )
{
fprintf(stderr, "warning: \"-O optimize-AST\" option is incompatible with -u option, deactivating optimization\n");
analysis_options.optimize_AST = false;
}
// Re-profile the functions, now without worrying about compatibility
// with compilation to C++. Note that the first profiling pass earlier
// may have marked some of the functions as to-skip, so first clear
// those markings. Once we have full compile-to-C++ and ZAM support
// for all Zeek language features, we can remove the re-profiling here.
for ( auto& f : funcs )
f.SetSkip(false);
pfs = std::make_unique<ProfileFuncs>(funcs, nullptr, true);
// set of functions involved (directly or indirectly) in "when"
// clauses.
std::unordered_set<const ScriptFunc*> when_funcs;
find_when_funcs(pfs, when_funcs);
bool report_recursive = analysis_options.report_recursive;
std::unique_ptr<Inliner> inl; std::unique_ptr<Inliner> inl;
if ( analysis_options.inliner ) if ( analysis_options.inliner )
inl = std::make_unique<Inliner>(funcs, analysis_options.report_recursive); inl = std::make_unique<Inliner>(funcs, report_recursive);
if ( ! analysis_options.activate ) if ( ! analysis_options.activate )
// Some --optimize options stop short of AST transformations,
// for development/debugging purposes.
return; return;
// The following tracks inlined functions that are also used // The following tracks inlined functions that are also used
@ -515,6 +546,9 @@ void analyze_scripts()
// since it won't be consulted in that case. // since it won't be consulted in that case.
std::unordered_set<Func*> func_used_indirectly; std::unordered_set<Func*> func_used_indirectly;
if ( global_stmts )
func_used_indirectly.insert(global_stmts.get());
if ( inl ) if ( inl )
{ {
for ( auto& f : funcs ) for ( auto& f : funcs )
@ -540,23 +574,67 @@ void analyze_scripts()
{ {
auto func = f.Func(); auto func = f.Func();
if ( ! analysis_options.compile_all && if ( analysis_options.only_func )
{
if ( *analysis_options.only_func != func->Name() )
continue;
}
else if ( ! analysis_options.compile_all &&
inl && inl->WasInlined(func) && inl && inl->WasInlined(func) &&
func_used_indirectly.count(func) == 0 ) func_used_indirectly.count(func) == 0 )
// No need to compile as it won't be // No need to compile as it won't be called directly.
// called directly.
continue;
if ( when_funcs.count(func) > 0 )
// We don't try to compile these.
continue; continue;
auto new_body = f.Body(); auto new_body = f.Body();
optimize_func(func, f.ProfilePtr(), f.Scope(), new_body, optimize_func(func, f.ProfilePtr(), f.Scope(), new_body);
analysis_options);
f.SetBody(new_body); f.SetBody(new_body);
} }
} }
void analyze_scripts()
{
static bool did_init = false;
if ( ! did_init )
{
init_options();
did_init = true;
}
if ( ! analysis_options.activate && ! analysis_options.inliner &&
! generating_CPP && ! analysis_options.report_CPP &&
! analysis_options.use_CPP )
// No work to do, avoid profiling overhead.
return;
// Now that everything's parsed and BiF's have been initialized,
// profile the functions.
auto pfs = std::make_unique<ProfileFuncs>(funcs, is_CPP_compilable,
false);
if ( CPP_init_hook )
(*CPP_init_hook)();
if ( analysis_options.report_CPP )
{
report_CPP();
exit(0);
}
if ( analysis_options.use_CPP )
use_CPP();
if ( generating_CPP )
{
generate_CPP(pfs);
exit(0);
}
// At this point we're done with C++ considerations, so instead
// are compiling to ZAM.
analyze_scripts_for_ZAM(pfs);
}
} // namespace zeek::detail } // namespace zeek::detail

View file

@ -125,7 +125,11 @@ using ScriptFuncPtr = IntrusivePtr<ScriptFunc>;
// Info we need for tracking an instance of a function. // Info we need for tracking an instance of a function.
class FuncInfo { class FuncInfo {
public: public:
FuncInfo(ScriptFuncPtr func, ScopePtr scope, StmtPtr body, int priority); FuncInfo(ScriptFuncPtr _func, ScopePtr _scope, StmtPtr _body,
int _priority)
: func(std::move(_func)), scope(std::move(_scope)),
body(std::move(_body)), priority(_priority)
{}
ScriptFunc* Func() const { return func.get(); } ScriptFunc* Func() const { return func.get(); }
const ScriptFuncPtr& FuncPtr() const { return func; } const ScriptFuncPtr& FuncPtr() const { return func; }
@ -134,11 +138,11 @@ public:
int Priority() const { return priority; } int Priority() const { return priority; }
const ProfileFunc* Profile() const { return pf.get(); } const ProfileFunc* Profile() const { return pf.get(); }
std::shared_ptr<ProfileFunc> ProfilePtr() const { return pf; } std::shared_ptr<ProfileFunc> ProfilePtr() const { return pf; }
const std::string& SaveFile() const { return save_file; }
void SetBody(StmtPtr new_body) { body = std::move(new_body); } void SetBody(StmtPtr new_body) { body = std::move(new_body); }
void SetProfile(std::shared_ptr<ProfileFunc> _pf); // void SetProfile(std::shared_ptr<ProfileFunc> _pf);
void SetSaveFile(std::string _sf) { save_file = std::move(_sf); } void SetProfile(std::shared_ptr<ProfileFunc> _pf)
{ pf = std::move(_pf); }
// The following provide a way of marking FuncInfo's as // The following provide a way of marking FuncInfo's as
// should-be-skipped for script optimization, generally because // should-be-skipped for script optimization, generally because
@ -156,10 +160,6 @@ protected:
// Whether to skip optimizing this function. // Whether to skip optimizing this function.
bool skip = false; bool skip = false;
// If we're saving this function in a file, this is the name
// of the file to use.
std::string save_file;
}; };

View file

@ -8,6 +8,7 @@
#include "zeek/Reporter.h" #include "zeek/Reporter.h"
#include "zeek/Desc.h" #include "zeek/Desc.h"
#include "zeek/Traverse.h" #include "zeek/Traverse.h"
#include "zeek/script_opt/IDOptInfo.h"
#include "zeek/script_opt/Reduce.h" #include "zeek/script_opt/Reduce.h"
@ -34,6 +35,8 @@ StmtPtr Stmt::Reduce(Reducer* c)
return null; return null;
} }
c->SetCurrStmt(this);
return DoReduce(c); return DoReduce(c);
} }
@ -846,7 +849,9 @@ bool StmtList::ReduceStmt(int& s_i, StmtPList* f_stmts, Reducer* c)
auto& s_i_succ = Stmts()[s_i + 1]; auto& s_i_succ = Stmts()[s_i + 1];
// Don't reduce s_i_succ. If it's what we're // Don't reduce s_i_succ. If it's what we're
// looking for, it's already reduced. // looking for, it's already reduced. Plus
// that's what Reducer::MergeStmts (not that
// it really matters, per the comment there).
auto merge = c->MergeStmts(var, rhs, s_i_succ); auto merge = c->MergeStmts(var, rhs, s_i_succ);
if ( merge ) if ( merge )
{ {
@ -1014,10 +1019,19 @@ StmtPtr CatchReturnStmt::DoReduce(Reducer* c)
return make_intrusive<NullStmt>(); return make_intrusive<NullStmt>();
} }
auto assign = make_intrusive<AssignExpr>(ret_var->Duplicate(), auto rv_dup = ret_var->Duplicate();
ret_e->Duplicate(), auto ret_e_dup = ret_e->Duplicate();
auto assign = make_intrusive<AssignExpr>(rv_dup, ret_e_dup,
false); false);
assign_stmt = make_intrusive<ExprStmt>(assign); assign_stmt = make_intrusive<ExprStmt>(assign);
if ( ret_e_dup->Tag() == EXPR_CONST )
{
auto c = ret_e_dup->AsConstExpr();
rv_dup->AsNameExpr()->Id()->GetOptInfo()->SetConst(c);
}
return assign_stmt; return assign_stmt;
} }

View file

@ -14,28 +14,15 @@ TempVar::TempVar(int num, const TypePtr& t, ExprPtr _rhs) : type(t)
rhs = std::move(_rhs); rhs = std::move(_rhs);
} }
void TempVar::SetAlias(IDPtr _alias, const DefPoints* _dps) void TempVar::SetAlias(IDPtr _alias)
{ {
if ( alias ) if ( alias )
reporter->InternalError("Re-aliasing a temporary"); reporter->InternalError("Re-aliasing a temporary");
if ( ! _dps )
{
printf("trying to alias %s to %s\n", name.c_str(), _alias->Name());
reporter->InternalError("Empty dps for alias");
}
if ( alias == id ) if ( alias == id )
reporter->InternalError("Creating alias loop"); reporter->InternalError("Creating alias loop");
alias = std::move(_alias); alias = std::move(_alias);
dps = _dps;
}
void TempVar::SetDPs(const DefPoints* _dps)
{
ASSERT(_dps->length() == 1);
dps = _dps;
} }
} // zeek::detail } // zeek::detail

View file

@ -9,6 +9,7 @@
#include "zeek/ID.h" #include "zeek/ID.h"
#include "zeek/Expr.h" #include "zeek/Expr.h"
#include "zeek/script_opt/IDOptInfo.h"
#include "zeek/script_opt/ReachingDefs.h" #include "zeek/script_opt/ReachingDefs.h"
namespace zeek::detail { namespace zeek::detail {
@ -22,21 +23,24 @@ public:
const Expr* RHS() const { return rhs.get(); } const Expr* RHS() const { return rhs.get(); }
IDPtr Id() const { return id; } IDPtr Id() const { return id; }
void SetID(IDPtr _id) { id = std::move(_id); } void SetID(IDPtr _id)
{
id = std::move(_id);
id->GetOptInfo()->SetTemp();
}
void Deactivate() { active = false; } void Deactivate() { active = false; }
bool IsActive() const { return active; } bool IsActive() const { return active; }
// Associated constant expression, if any. // Associated constant expression, if any.
const ConstExpr* Const() const { return const_expr; } const ConstExpr* Const() const { return id->GetOptInfo()->Const(); }
// The most use of "const" in any single line in the Zeek // The most use of "const" in any single line in the Zeek
// codebase :-P ... though only by one! // codebase :-P ... though only by one!
void SetConst(const ConstExpr* _const) { const_expr = _const; } void SetConst(const ConstExpr* _const)
{ id->GetOptInfo()->SetConst(_const); }
IDPtr Alias() const { return alias; } IDPtr Alias() const { return alias; }
const DefPoints* DPs() const { return dps; } void SetAlias(IDPtr id);
void SetAlias(IDPtr id, const DefPoints* dps);
void SetDPs(const DefPoints* _dps);
const RDPtr& MaxRDs() const { return max_rds; } const RDPtr& MaxRDs() const { return max_rds; }
void SetMaxRDs(RDPtr rds) { max_rds = std::move(rds); } void SetMaxRDs(RDPtr rds) { max_rds = std::move(rds); }
@ -47,9 +51,7 @@ protected:
const TypePtr& type; const TypePtr& type;
ExprPtr rhs; ExprPtr rhs;
bool active = true; bool active = true;
const ConstExpr* const_expr = nullptr;
IDPtr alias; IDPtr alias;
const DefPoints* dps = nullptr;
RDPtr max_rds; RDPtr max_rds;
}; };