From 9a9995bdd129f79e84684b23d76773f908b7e986 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 16 Aug 2021 10:52:41 -0700 Subject: [PATCH] tracking of optimization information associated with identifiers --- src/ID.cc | 15 +- src/ID.h | 21 +- src/parse.y | 5 +- src/script_opt/CPP/Inits.cc | 3 +- src/script_opt/IDOptInfo.cc | 523 ++++++++++++++++++++++++++++++++++ src/script_opt/IDOptInfo.h | 272 ++++++++++++++++++ src/script_opt/ProfileFunc.cc | 3 +- 7 files changed, 822 insertions(+), 20 deletions(-) create mode 100644 src/script_opt/IDOptInfo.cc create mode 100644 src/script_opt/IDOptInfo.h diff --git a/src/ID.cc b/src/ID.cc index 620e35ef11..99b4101aeb 100644 --- a/src/ID.cc +++ b/src/ID.cc @@ -20,6 +20,7 @@ #include "zeek/zeekygen/ScriptInfo.h" #include "zeek/zeekygen/utils.h" #include "zeek/module_util.h" +#include "zeek/script_opt/IDOptInfo.h" namespace zeek { @@ -119,6 +120,8 @@ ID::ID(const char* arg_name, IDScope arg_scope, bool arg_is_export) is_type = false; offset = 0; + opt_info = new IDOptInfo(this); + infer_return_type = false; SetLocationInfo(&start_location, &end_location); @@ -127,6 +130,7 @@ ID::ID(const char* arg_name, IDScope arg_scope, bool arg_is_export) ID::~ID() { delete [] name; + delete opt_info; } std::string ID::ModuleName() const @@ -285,11 +289,6 @@ const AttrPtr& ID::GetAttr(AttrTag t) const return attrs ? attrs->Find(t) : Attr::nil; } -void ID::AddInitExpr(ExprPtr init_expr) - { - init_exprs.emplace_back(std::move(init_expr)); - } - bool ID::IsDeprecated() const { return GetAttr(ATTR_DEPRECATED) != nullptr; @@ -676,6 +675,12 @@ std::vector ID::GetOptionHandlers() const return v; } + +void IDOptInfo::AddInitExpr(ExprPtr init_expr) + { + init_exprs.emplace_back(std::move(init_expr)); + } + } // namespace detail } // namespace zeek diff --git a/src/ID.h b/src/ID.h index 0d6e8c4e34..9adfb5664a 100644 --- a/src/ID.h +++ b/src/ID.h @@ -7,7 +7,6 @@ #include #include -#include "zeek/IntrusivePtr.h" #include "zeek/Obj.h" #include "zeek/Attr.h" #include "zeek/Notifier.h" @@ -44,6 +43,8 @@ enum IDScope { SCOPE_FUNCTION, SCOPE_MODULE, SCOPE_GLOBAL }; class ID; using IDPtr = IntrusivePtr; +class IDOptInfo; + class ID final : public Obj, public notifier::detail::Modifiable { public: static inline const IDPtr nil; @@ -112,10 +113,6 @@ public: const AttrPtr& GetAttr(AttrTag t) const; - void AddInitExpr(ExprPtr init_expr); - const std::vector& GetInitExprs() const - { return init_exprs; } - bool IsDeprecated() const; void MakeDeprecated(ExprPtr deprecation); @@ -144,6 +141,8 @@ public: void AddOptionHandler(FuncPtr callback, int priority); std::vector GetOptionHandlers() const; + IDOptInfo* GetOptInfo() const { return opt_info; } + protected: void EvalFunc(ExprPtr ef, ExprPtr ev); @@ -161,15 +160,15 @@ protected: ValPtr val; AttributesPtr attrs; - // Expressions used to initialize the identifier, for use by - // the scripts-to-C++ compiler. We need to track all of them - // because it's possible that a global value gets created using - // one of the earlier instances rather than the last one. - std::vector init_exprs; - // contains list of functions that are called when an option changes std::multimap option_handlers; + // Information managed by script optimization. We package this + // up into a separate object for purposes of modularity, and, + // via the associated pointer, to allow it to be modified in + // contexts where the ID is itself "const". + IDOptInfo* opt_info; + }; } // namespace zeek::detail diff --git a/src/parse.y b/src/parse.y index 7ce977d2d4..d3d3710840 100644 --- a/src/parse.y +++ b/src/parse.y @@ -98,6 +98,7 @@ #include "zeek/zeekygen/Manager.h" #include "zeek/module_util.h" #include "zeek/IntrusivePtr.h" +#include "zeek/script_opt/IDOptInfo.h" extern const char* filename; // Absolute path of file currently being parsed. extern const char* last_filename; // Absolute path of last file parsed. @@ -244,7 +245,7 @@ static void build_global(ID* id, Type* t, InitClass ic, Expr* e, add_global(id_ptr, std::move(t_ptr), ic, e_ptr, std::move(attrs_ptr), dt); - id->AddInitExpr(e_ptr); + id->GetOptInfo()->AddInitExpr(e_ptr); if ( dt == VAR_REDEF ) zeekygen_mgr->Redef(id, ::filename, ic, std::move(e_ptr)); @@ -265,7 +266,7 @@ static StmtPtr build_local(ID* id, Type* t, InitClass ic, Expr* e, auto init = add_local(std::move(id_ptr), std::move(t_ptr), ic, e_ptr, std::move(attrs_ptr), dt); - id->AddInitExpr(std::move(e_ptr)); + id->GetOptInfo()->AddInitExpr(std::move(e_ptr)); if ( do_coverage ) script_coverage_mgr.AddStmt(init.get()); diff --git a/src/script_opt/CPP/Inits.cc b/src/script_opt/CPP/Inits.cc index 3e3e9de63c..7f22b349eb 100644 --- a/src/script_opt/CPP/Inits.cc +++ b/src/script_opt/CPP/Inits.cc @@ -6,6 +6,7 @@ #include "zeek/module_util.h" #include "zeek/script_opt/ProfileFunc.h" +#include "zeek/script_opt/IDOptInfo.h" #include "zeek/script_opt/CPP/Compile.h" @@ -122,7 +123,7 @@ void CPPCompile::GenGlobalInit(const ID* g, string& gl, const ValPtr& v) // expression anyway.) // Use the final initialization expression. - auto& init_exprs = g->GetInitExprs(); + auto& init_exprs = g->GetOptInfo()->GetInitExprs(); init_val = GenExpr(init_exprs.back(), GEN_VAL_PTR, false); } else diff --git a/src/script_opt/IDOptInfo.cc b/src/script_opt/IDOptInfo.cc new file mode 100644 index 0000000000..a9c1e5d0fd --- /dev/null +++ b/src/script_opt/IDOptInfo.cc @@ -0,0 +1,523 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include "zeek/Stmt.h" +#include "zeek/Expr.h" +#include "zeek/Desc.h" +#include "zeek/script_opt/IDOptInfo.h" +#include "zeek/script_opt/StmtOptInfo.h" + + +namespace zeek::detail { + +const char* trace_ID = nullptr; + +IDDefRegion::IDDefRegion(const Stmt* s, bool maybe, int def) + { + start_stmt = s->GetOptInfo()->stmt_num; + block_level = s->GetOptInfo()->block_level; + + Init(maybe, def); + } + +IDDefRegion::IDDefRegion(int stmt_num, int level, bool maybe, int def) + { + start_stmt = stmt_num; + block_level = level; + + Init(maybe, def); + } + +IDDefRegion::IDDefRegion(const Stmt* s, const IDDefRegion& ur) + { + start_stmt = s->GetOptInfo()->stmt_num; + block_level = s->GetOptInfo()->block_level; + + Init(ur.MaybeDefined(), ur.DefinedAfter()); + SetDefExpr(ur.DefExprAfter()); + } + +void IDDefRegion::Dump() const + { + printf("\t%d->%d (%d): ", start_stmt, end_stmt, block_level); + + if ( defined != NO_DEF ) + printf("%d (%s)", defined, def_expr ? obj_desc(def_expr.get()).c_str() : ""); + else if ( maybe_defined ) + printf("?"); + else + printf("N/A"); + + printf("\n"); + } + + +void IDOptInfo::Clear() + { + static bool did_init = false; + + if ( ! did_init ) + { + trace_ID = getenv("ZEEK_TRACE_ID"); + did_init = true; + } + + init_exprs.clear(); + usage_regions.clear(); + pending_confluences.clear(); + confluence_stmts.clear(); + + tracing = trace_ID && util::streq(trace_ID, my_id->Name()); + } + +void IDOptInfo::DefinedAfter(const Stmt* s, const ExprPtr& e, + const std::vector& conf_blocks, + int conf_start) + { + if ( tracing ) + printf("ID %s defined at %d: %s\n", trace_ID, s ? s->GetOptInfo()->stmt_num : NO_DEF, s ? obj_desc(s).c_str() : ""); + + if ( ! s ) + { // This is a definition-upon-entry + ASSERT(usage_regions.size() == 0); + usage_regions.emplace_back(0, 0, true, 0); + if ( tracing ) + DumpBlocks(); + return; + } + + auto s_oi = s->GetOptInfo(); + auto stmt_num = s_oi->stmt_num; + + if ( usage_regions.size() == 0 ) + { + // We're seeing this identifier for the first time, + // so we don't have any context or confluence + // information for it. Create its "backstory" region. + ASSERT(confluence_stmts.size() == 0); + usage_regions.emplace_back(0, 0, false, NO_DEF); + } + + // Any pending regions stop prior to this statement. + EndRegionsAfter(stmt_num - 1, s_oi->block_level); + + // Fill in any missing confluence blocks. + int b = 0; // index into our own blocks + int n = confluence_stmts.size(); + + while ( b < n && conf_start < conf_blocks.size() ) + { + auto outer_block = conf_blocks[conf_start]; + + // See if we can find that block. + for ( ; b < n; ++b ) + if ( confluence_stmts[b] == outer_block ) + break; + + if ( b < n ) + { // We found it, look for the next one. + ++conf_start; + ++b; + } + } + + // Add in the remainder. + for ( ; conf_start < conf_blocks.size(); ++conf_start ) + StartConfluenceBlock(conf_blocks[conf_start]); + + // Create a new region corresponding to this definition. + // This needs to come after filling out the confluence + // blocks, since they'll create their own (earlier) regions. + usage_regions.emplace_back(s, true, stmt_num); + usage_regions.back().SetDefExpr(e); + + if ( tracing ) + DumpBlocks(); + } + +void IDOptInfo::ReturnAt(const Stmt* s) + { + if ( tracing ) + printf("ID %s subject to return %d: %s\n", trace_ID, s->GetOptInfo()->stmt_num, obj_desc(s).c_str()); + + // Look for a catch-return that this would branch to. + for ( int i = confluence_stmts.size() - 1; i >= 0; --i ) + if ( confluence_stmts[i]->Tag() == STMT_CATCH_RETURN ) + { + BranchBeyond(s, confluence_stmts[i], false); + if ( tracing ) + DumpBlocks(); + return; + } + + auto s_oi = s->GetOptInfo(); + EndRegionsAfter(s_oi->stmt_num - 1, s_oi->block_level); + + if ( tracing ) + DumpBlocks(); + } + +void IDOptInfo::BranchBackTo(const Stmt* from, const Stmt* to, bool close_all) + { + if ( tracing ) + printf("ID %s branching back from %d->%d: %s\n", trace_ID, + from->GetOptInfo()->stmt_num, + to->GetOptInfo()->stmt_num, obj_desc(from).c_str()); + + // The key notion we need to update is whether the regions + // between from_reg and to_reg still have unique definitions. + // Confluence due to the branch can only take that away, it + // can't instill it. (OTOH, in principle it could update + // "maybe defined", but not in a way we care about, since we + // only draw upon that for diagnosing usage errors, and for + // those the error has already occurred on entry into the loop.) + + auto from_reg = ActiveRegion(); + auto f_oi = from->GetOptInfo(); + auto t_oi = to->GetOptInfo(); + auto t_r_ind = FindRegionBeforeIndex(t_oi->stmt_num); + auto& t_r = usage_regions[t_r_ind]; + + if ( from_reg && from_reg->DefinedAfter() != t_r.DefinedAfter() && + t_r.DefinedAfter() != NO_DEF ) + { + // They disagree on the definition. Move the definition + // point to be the start of the confluence region, and + // update any blocks inside the region that refer to + // a pre-"to" definition to instead reflect the confluence + // region (and remove their definition expressions). + int new_def = t_oi->stmt_num; + + for ( auto i = t_r_ind; i < usage_regions.size(); ++i ) + { + auto& ur = usage_regions[i]; + + if ( ur.DefinedAfter() < new_def ) + { + ASSERT(ur.DefinedAfter() != NO_DEF); + ur.UpdateDefinedAfter(new_def); + ur.SetDefExpr(nullptr); + } + } + } + + int level = close_all ? t_oi->block_level + 1 : f_oi->block_level; + EndRegionsAfter(f_oi->stmt_num, level); + + if ( tracing ) + DumpBlocks(); + } + +void IDOptInfo::BranchBeyond(const Stmt* end_s, const Stmt* block, + bool close_all) + { + if ( tracing ) + printf("ID %s branching forward from %d beyond %d: %s\n", + trace_ID, end_s->GetOptInfo()->stmt_num, + block->GetOptInfo()->stmt_num, obj_desc(end_s).c_str()); + + ASSERT(pending_confluences.count(block) > 0); + + auto ar = ActiveRegionIndex(); + if ( ar != NO_DEF ) + pending_confluences[block].insert(ar); + + auto end_oi = end_s->GetOptInfo(); + int level; + if ( close_all ) + level = block->GetOptInfo()->block_level + 1; + else + level = end_oi->block_level; + + EndRegionsAfter(end_oi->stmt_num, level); + + if ( tracing ) + DumpBlocks(); + } + +void IDOptInfo::StartConfluenceBlock(const Stmt* s) + { + if ( tracing ) + printf("ID %s starting confluence block at %d: %s\n", trace_ID, s->GetOptInfo()->stmt_num, obj_desc(s).c_str()); + + auto s_oi = s->GetOptInfo(); + int block_level = s_oi->block_level; + + // End any confluence blocks at this or inner levels. + for ( auto cs : confluence_stmts ) + { + ASSERT(cs != s); + + auto cs_level = cs->GetOptInfo()->block_level; + + if ( cs_level >= block_level ) + { + ASSERT(cs_level == block_level); + ASSERT(cs == confluence_stmts.back()); + EndRegionsAfter(s_oi->stmt_num - 1, block_level); + } + } + + ConfluenceSet empty_set; + pending_confluences[s] = empty_set; + confluence_stmts.push_back(s); + block_has_orig_flow.push_back(s_oi->contains_branch_beyond); + + // Inherit the closest open, outer region, if necessary. + for ( int i = usage_regions.size() - 1; i >= 0; --i ) + { + auto& ur = usage_regions[i]; + + if ( ur.EndsAfter() == NO_DEF ) + { + if ( ur.BlockLevel() > block_level ) + { + // This can happen for regions left over + // from a previous catch-return, which + // we haven't closed out yet because we + // don't track new identifiers beyond + // outer CRs. Close the region now. + ASSERT(s->Tag() == STMT_CATCH_RETURN); + ur.SetEndsAfter(s_oi->stmt_num - 1); + continue; + } + + if ( ur.BlockLevel() < block_level ) + // Didn't find one at our own level, + // so create on inherited from the + // outer one. + usage_regions.emplace_back(s, ur); + + // We now have one at our level that we can use. + break; + } + } + + if ( tracing ) + DumpBlocks(); + } + +void IDOptInfo::ConfluenceBlockEndsAfter(const Stmt* s, bool no_orig_flow) + { + auto stmt_num = s->GetOptInfo()->stmt_num; + + ASSERT(confluence_stmts.size() > 0); + auto cs = confluence_stmts.back(); + auto& pc = pending_confluences[cs]; + + // End any active regions. Those will all have a level >= that + // of cs, since we're now returning to cs's level. + int cs_stmt_num = cs->GetOptInfo()->stmt_num; + int cs_level = cs->GetOptInfo()->block_level; + + if ( tracing ) + printf("ID %s ending (%d) confluence block (%d, level %d) at %d: %s\n", trace_ID, no_orig_flow, cs_stmt_num, cs_level, stmt_num, obj_desc(s).c_str()); + + if ( block_has_orig_flow.back() ) + no_orig_flow = false; + + // Compute the state of the definition at the point of confluence: + // whether it's at least could-be-defined, whether it's definitely + // defined and if so whether it has a single point of definition. + bool maybe = false; + bool defined = true; + + bool did_single_def = false; + int single_def = NO_DEF; + ExprPtr single_def_expr; + bool have_multi_defs = false; + + int num_regions = 0; + + for ( auto i = 0; i < usage_regions.size(); ++i ) + { + auto& ur = usage_regions[i]; + + if ( ur.BlockLevel() < cs_level ) + // Region is not applicable. + continue; + + if ( ur.EndsAfter() == NO_DEF ) + { // End this region. + ur.SetEndsAfter(stmt_num); + + if ( ur.StartsAfter() <= cs_stmt_num && no_orig_flow && + pc.count(i) == 0 ) + // Don't include this region in our assessment. + continue; + } + + else if ( ur.EndsAfter() < cs_stmt_num ) + // Irrelevant, didn't extend into confluence region. + // We test here just to avoid the set lookup in + // the next test, which presumably will sometimes + // be a tad expensive. + continue; + + else if ( pc.count(i) == 0 ) + // This region isn't active, and we're not + // tracking it for confluence. + continue; + + ++num_regions; + + maybe = maybe || ur.MaybeDefined(); + + if ( ur.DefinedAfter() == NO_DEF ) + { + defined = false; + continue; + } + + if ( did_single_def ) + { + if ( single_def != ur.DefinedAfter() ) + have_multi_defs = true; + } + else + { + single_def = ur.DefinedAfter(); + single_def_expr = ur.DefExprAfter(); + did_single_def = true; + } + } + + if ( num_regions == 0 ) + { // Nothing survives. + ASSERT(maybe == false); + defined = false; + } + + if ( ! defined ) + { + single_def = NO_DEF; + have_multi_defs = false; + } + + if ( have_multi_defs ) + // Definition reflects confluence point, which comes + // just after 's'. + single_def = stmt_num + 1; + + int level = cs->GetOptInfo()->block_level; + usage_regions.emplace_back(stmt_num, level, maybe, single_def); + + if ( single_def != NO_DEF && ! have_multi_defs ) + usage_regions.back().SetDefExpr(single_def_expr); + + confluence_stmts.pop_back(); + block_has_orig_flow.pop_back(); + pending_confluences.erase(cs); + + if ( tracing ) + DumpBlocks(); + } + +bool IDOptInfo::IsPossiblyDefinedBefore(const Stmt* s) + { + return IsPossiblyDefinedBefore(s->GetOptInfo()->stmt_num); + } + +bool IDOptInfo::IsDefinedBefore(const Stmt* s) + { + return IsDefinedBefore(s->GetOptInfo()->stmt_num); + } + +int IDOptInfo::DefinitionBefore(const Stmt* s) + { + return DefinitionBefore(s->GetOptInfo()->stmt_num); + } + +ExprPtr IDOptInfo::DefExprBefore(const Stmt* s) + { + return DefExprBefore(s->GetOptInfo()->stmt_num); + } + +bool IDOptInfo::IsPossiblyDefinedBefore(int stmt_num) + { + if ( usage_regions.size() == 0 ) + return false; + + return FindRegionBefore(stmt_num).MaybeDefined(); + } + +bool IDOptInfo::IsDefinedBefore(int stmt_num) + { + if ( usage_regions.size() == 0 ) + return false; + + return FindRegionBefore(stmt_num).DefinedAfter() != NO_DEF; + } + +int IDOptInfo::DefinitionBefore(int stmt_num) + { + if ( usage_regions.size() == 0 ) + return NO_DEF; + + return FindRegionBefore(stmt_num).DefinedAfter(); + } + +ExprPtr IDOptInfo::DefExprBefore(int stmt_num) + { + if ( usage_regions.size() == 0 ) + return nullptr; + + return FindRegionBefore(stmt_num).DefExprAfter(); + } + +void IDOptInfo::EndRegionsAfter(int stmt_num, int level) + { + for ( int i = usage_regions.size() - 1; i >= 0; --i ) + { + auto& ur = usage_regions[i]; + + if ( ur.BlockLevel() < level ) + return; + + if ( ur.EndsAfter() == NO_DEF ) + ur.SetEndsAfter(stmt_num); + } + } + +int IDOptInfo::FindRegionBeforeIndex(int stmt_num) + { + int region_ind = NO_DEF; + for ( auto i = 0; i < usage_regions.size(); ++i ) + { + auto ur = usage_regions[i]; + + if ( ur.StartsAfter() >= stmt_num ) + break; + + if ( ur.EndsAfter() == NO_DEF ) + // It's active for everything beyond its start. + region_ind = i; + + else if ( ur.EndsAfter() >= stmt_num - 1 ) + // It's active at the beginning of the statement of + // interest. + region_ind = i; + } + + ASSERT(region_ind != NO_DEF); + return region_ind; + } + +int IDOptInfo::ActiveRegionIndex() + { + int i; + for ( i = usage_regions.size() - 1; i >= 0; --i ) + if ( usage_regions[i].EndsAfter() == NO_DEF ) + return i; + + return NO_DEF; + } + +void IDOptInfo::DumpBlocks() const + { + for ( auto i = 0; i < usage_regions.size(); ++i ) + usage_regions[i].Dump(); + + printf("\n"); + } + +} // zeek::detail diff --git a/src/script_opt/IDOptInfo.h b/src/script_opt/IDOptInfo.h new file mode 100644 index 0000000000..e07ad320c9 --- /dev/null +++ b/src/script_opt/IDOptInfo.h @@ -0,0 +1,272 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Auxiliary information associated with identifiers to aid script +// optimization. + +#pragma once + +#include + +#include "zeek/IntrusivePtr.h" + +namespace zeek::detail { + +class Expr; +class Stmt; + +using ExprPtr = IntrusivePtr; + +#define NO_DEF -1 + +// This class tracks a single region during which an identifier has +// a consistent state of definition, meaning either it's (1) defined +// as of its value after a specific statement, (2) might-or-might-not +// be defined, or (3) definitely not defined. + +class IDDefRegion { +public: + IDDefRegion(const Stmt* s, bool maybe, int def); + IDDefRegion(int stmt_num, int level, bool maybe, int def); + IDDefRegion(const Stmt* s, const IDDefRegion& ur); + + void Init(bool maybe, int def) + { + if ( def != NO_DEF ) + maybe_defined = true; + else + maybe_defined = maybe; + + defined = def; + } + + // Returns the starting point of the region, i.e., the number + // of the statement *after* which executing this region begins. + int StartsAfter() const { return start_stmt; } + + // Returns or sets the ending point of the region, i.e., the + // last statement for which this region applies (including executing + // that statement). A value of NO_DEF means that the region + // continues indefinitely, i.e., we haven't yet encountered its end. + int EndsAfter() const { return end_stmt; } + void SetEndsAfter(int _end_stmt) { end_stmt = _end_stmt; } + + // The confluence nesting level associated with the region. Other + // regions that overlap take precedence if they have a higher + // (= more inner) block level. + int BlockLevel() const { return block_level; } + + // True if in the region the identifer could be defined. + bool MaybeDefined() const { return maybe_defined; } + + // Returns (or sets) the statement after which the identifer is + // (definitely) defined, or NO_DEF if it doesn't have a definite + // point of definition. + int DefinedAfter() const { return defined; } + void UpdateDefinedAfter(int _defined) { defined = _defined; } + + // Returns (or sets) the expression used to define the identifier, + // if any. Note that an identifier can be definitely defined + // (i.e., DefinedAfter() returns a statement number, not NO_DEF) + // but not have an associated expression, if the point-of-definition + // is the end of a confluence block. + const ExprPtr& DefExprAfter() const { return def_expr; } + void SetDefExpr(ExprPtr e) { def_expr = e; } + + // Used for debugging. + void Dump() const; + +protected: + // Number of the statement for which this region applies *after* + // its execution. + int start_stmt; + + // Number of the statement that this region applies to, *after* + // its execution. + int end_stmt = NO_DEF; // means the region hasn't ended yet + + // Degree of confluence nesting associated with this region. + int block_level; + + // Identifier could be defined in this region. + bool maybe_defined; + + // If not NO_DEF, then the statement number of either the identifier's + // definition, or its confluence point if multiple, differing + // definitions come together. + int defined; + + // The expression used to define the identifier in this region. + // Nil if either it's ambiguous (due to confluence), or the + // identifier isn't guaranteed to be defined. + ExprPtr def_expr; +}; + + +// Class tracking optimization information associated with identifiers. + +class IDOptInfo { +public: + IDOptInfo(const ID* id) { my_id = id; } + + // Reset all computed information about the identifier. Used + // when making a second pass over an AST after optimizing it, + // to avoid inheriting now-stale information. + void Clear(); + + // Used to track expressions employed when explicitly initializing + // the identifier. These are needed by compile-to-C++ script + // optimization. They're not used by ZAM optimization. + void AddInitExpr(ExprPtr init_expr); + const std::vector& GetInitExprs() const + { return init_exprs; } + + // Associated constant expression, if any. This is only set + // for identifiers that are aliases for a constant (i.e., there + // are no other assignments to them). + const ConstExpr* Const() const { return const_expr; } + + // The most use of "const" in any single line in the Zeek + // codebase :-P ... though only by one! + void SetConst(const ConstExpr* _const) { const_expr = _const; } + + // Whether the identifier is a temporary variable. Temporaries + // are guaranteed to have exactly one point of definition. + bool IsTemp() const { return is_temp; } + void SetTemp() { is_temp = true; } + + // Called when the identifier is defined via execution of the + // given statement, with an assignment to the expression 'e' + // (only non-nil for simple direct assignments). "conf_blocks" + // gives the full set of surrounding confluence statements. + // It should be processed starting at conf_start (note that + // conf_blocks may be empty). + void DefinedAfter(const Stmt* s, const ExprPtr& e, + const std::vector& conf_blocks, + int conf_start); + + // Called upon encountering a "return" statement. + void ReturnAt(const Stmt* s); + + // Called when the current region ends with a backwards branch, + // possibly across multiple block levels, occurring at "from" + // and going into the block "to". If "close_all" is true then + // any pending regions at a level inner to "to" should be + // closed; if not, just those at "from"'s level. + void BranchBackTo(const Stmt* from, const Stmt* to, bool close_all); + + // Called when the current region ends at statement end_s with a + // forwards branch, possibly across multiple block levels, to + // the statement that comes right after the execution of "block". + // See above re "close_all". + void BranchBeyond(const Stmt* end_s, const Stmt* block, bool close_all); + + // Start tracking a confluence block that begins with the body + // of s (not s itself). + void StartConfluenceBlock(const Stmt* s); + + // Finish tracking confluence; s is the last point of execution + // prior to leaving a block. If no_orig_flow is true, then + // the region for 's' itself does not continue to the end of + // the block. + void ConfluenceBlockEndsAfter(const Stmt* s, bool no_orig_flow); + + // All of these regard the identifer's state just *prior* to + // executing the given statement. + bool IsPossiblyDefinedBefore(const Stmt* s); + bool IsDefinedBefore(const Stmt* s); + int DefinitionBefore(const Stmt* s); + ExprPtr DefExprBefore(const Stmt* s); + + // Same, but using statement numbers. + bool IsPossiblyDefinedBefore(int stmt_num); + bool IsDefinedBefore(int stmt_num); + int DefinitionBefore(int stmt_num); + ExprPtr DefExprBefore(int stmt_num); + + // The following are used to avoid multiple error messages + // for use of undefined variables. + bool DidUndefinedWarning() const + { return did_undefined_warning; } + bool DidPossiblyUndefinedWarning() const + { return did_possibly_undefined_warning; } + + void SetDidUndefinedWarning() + { did_undefined_warning = true; } + void SetDidPossiblyUndefinedWarning() + { did_possibly_undefined_warning = true; } + +private: + // End any active regions that are at or inner to the given level. + void EndRegionsAfter(int stmt_num, int level); + + // Find the region that applies *before* executing the given + // statement. There should always be such a region. + IDDefRegion& FindRegionBefore(int stmt_num) + { return usage_regions[FindRegionBeforeIndex(stmt_num)]; } + int FindRegionBeforeIndex(int stmt_num); + + // Return the current "active" region, if any. The active region + // is the innermost region that currently has an end of NO_DEF, + // meaning we have not yet found its end. + IDDefRegion* ActiveRegion() + { + auto ind = ActiveRegionIndex(); + return ind >= 0 ? &usage_regions[ind] : nullptr; + } + int ActiveRegionIndex(); + + // Used for debugging. + void DumpBlocks() const; + + // Expressions used to initialize the identifier, for use by + // the scripts-to-C++ compiler. We need to track all of them + // because it's possible that a global value gets created using + // one of the earlier instances rather than the last one. + std::vector init_exprs; + + // If non-nil, a constant that this identifier always holds + // once initially defined. + const ConstExpr* const_expr = nullptr; + + // The different usage regions associated with the identifier. + // These are constructed such that they're always with non-decreasing + // starting statements. + std::vector usage_regions; + + // A type for collecting the indices of usage_regions that will + // all have confluence together at one point. Used to track + // things like "break" statements that jump out of loops or + // switch confluence regions. + using ConfluenceSet = std::set; + + // Maps loops/switches/catch-returns to their associated + // confluence sets. + std::map pending_confluences; + + // A stack of active confluence statements, so we can always find + // the innermost when ending a confluence block. + std::vector confluence_stmts; + + // Parallel vector that tracks whether, upon creating the + // confluence block, there had already been observed internal flow + // going beyond it. If so, then we can ignore no_orig_flow when + // ending the block, because in fact there *was* original flow. + std::vector block_has_orig_flow; + + // Whether the identifier is a temporary variable. + bool is_temp = false; + + // Only needed for debugging purposes. + const ID* my_id; + bool tracing = false; + + // Track whether we've already generated usage errors. + bool did_undefined_warning = false; + bool did_possibly_undefined_warning = false; +}; + +// If non-nil, then output detailed tracing information when building +// up the usage regions for any identifier with the given name. +extern const char* trace_ID; + +} // namespace zeek::detail diff --git a/src/script_opt/ProfileFunc.cc b/src/script_opt/ProfileFunc.cc index 2d59c8a0d7..1d18037834 100644 --- a/src/script_opt/ProfileFunc.cc +++ b/src/script_opt/ProfileFunc.cc @@ -4,6 +4,7 @@ #include #include "zeek/script_opt/ProfileFunc.h" +#include "zeek/script_opt/IDOptInfo.h" #include "zeek/Desc.h" #include "zeek/Stmt.h" #include "zeek/Func.h" @@ -500,7 +501,7 @@ void ProfileFuncs::MergeInProfile(ProfileFunc* pf) if ( t->Tag() == TYPE_TYPE ) (void) HashType(t->AsTypeType()->GetType()); - auto& init_exprs = g->GetInitExprs(); + auto& init_exprs = g->GetOptInfo()->GetInitExprs(); for ( const auto& i_e : init_exprs ) if ( i_e ) {