From 509428a9dc5e9be8db434a071553212b053ed259 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 22 Sep 2021 11:17:28 -0700 Subject: [PATCH 1/4] removing -uu functionality and associated script analysis now no longer needed --- src/CMakeLists.txt | 4 - src/script_opt/DefItem.cc | 145 -- src/script_opt/DefItem.h | 103 -- src/script_opt/DefPoint.h | 108 -- src/script_opt/DefSetsMgr.cc | 60 - src/script_opt/DefSetsMgr.h | 195 --- src/script_opt/GenRDs.cc | 1295 ----------------- src/script_opt/GenRDs.h | 146 -- src/script_opt/ReachingDefs.cc | 211 --- src/script_opt/ReachingDefs.h | 246 ---- src/script_opt/ScriptOpt.cc | 9 +- src/script_opt/ScriptOpt.h | 8 +- src/script_opt/TempVar.h | 5 - .../language.uninitialized-local3/err | 1 - .../language.uninitialized-local3/out | 6 - .../language.uninitialized-local3/err | 4 - .../language.uninitialized-local3/out | 10 - .../language.uninitialized-local3/err | 3 - .../language.uninitialized-local3/out | 10 - testing/btest/btest.cfg | 7 - .../btest/language/uninitialized-local3.zeek | 44 - 21 files changed, 4 insertions(+), 2616 deletions(-) delete mode 100644 src/script_opt/DefItem.cc delete mode 100644 src/script_opt/DefItem.h delete mode 100644 src/script_opt/DefPoint.h delete mode 100644 src/script_opt/DefSetsMgr.cc delete mode 100644 src/script_opt/DefSetsMgr.h delete mode 100644 src/script_opt/GenRDs.cc delete mode 100644 src/script_opt/GenRDs.h delete mode 100644 src/script_opt/ReachingDefs.cc delete mode 100644 src/script_opt/ReachingDefs.h delete mode 100644 testing/btest/Baseline.cpp/language.uninitialized-local3/err delete mode 100644 testing/btest/Baseline.cpp/language.uninitialized-local3/out delete mode 100644 testing/btest/Baseline.opt/language.uninitialized-local3/err delete mode 100644 testing/btest/Baseline.opt/language.uninitialized-local3/out delete mode 100644 testing/btest/Baseline/language.uninitialized-local3/err delete mode 100644 testing/btest/Baseline/language.uninitialized-local3/out delete mode 100644 testing/btest/language/uninitialized-local3.zeek diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b9b1263812..c0f520fbeb 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -412,15 +412,11 @@ set(MAIN_SRCS ${_gen_zeek_script_cpp} - script_opt/DefItem.cc - script_opt/DefSetsMgr.cc script_opt/Expr.cc - script_opt/GenRDs.cc script_opt/GenIDDefs.cc script_opt/IDOptInfo.cc script_opt/Inline.cc script_opt/ProfileFunc.cc - script_opt/ReachingDefs.cc script_opt/Reduce.cc script_opt/ScriptOpt.cc script_opt/Stmt.cc diff --git a/src/script_opt/DefItem.cc b/src/script_opt/DefItem.cc deleted file mode 100644 index 961444783b..0000000000 --- a/src/script_opt/DefItem.cc +++ /dev/null @@ -1,145 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "zeek/script_opt/DefItem.h" - -#include "zeek/Expr.h" - -namespace zeek::detail - { - -DefinitionItem::DefinitionItem(const ID* _id) : name(_id->Name()) - { - is_id = true; - id = _id; - di = nullptr; - field_name = nullptr; - - t = id->GetType(); - - CheckForRecord(); - } - -DefinitionItem::DefinitionItem(const DefinitionItem* _di, const char* _field_name, TypePtr _t) - { - is_id = false; - id = nullptr; - di = _di; - field_name = _field_name; - - t = std::move(_t); - - name += di->Name(); - name += '$'; - name += field_name; - - CheckForRecord(); - } - -std::shared_ptr DefinitionItem::FindField(const char* field) const - { - if ( ! IsRecord() ) - return nullptr; - - auto offset = rt->FieldOffset(field); - - return FindField(offset); - } - -std::shared_ptr DefinitionItem::FindField(int offset) const - { - if ( ! IsRecord() ) - return nullptr; - - return (*fields)[offset]; - } - -std::shared_ptr DefinitionItem::CreateField(const char* field, TypePtr t) - { - auto offset = rt->FieldOffset(field); - - if ( (*fields)[offset] ) - return (*fields)[offset]; - - (*fields)[offset] = std::make_shared(this, field, std::move(t)); - - return (*fields)[offset]; - } - -std::shared_ptr DefinitionItem::CreateField(int offset, TypePtr t) - { - if ( (*fields)[offset] ) - return (*fields)[offset]; - - auto field = rt->FieldName(offset); - - (*fields)[offset] = std::make_shared(this, field, std::move(t)); - - return (*fields)[offset]; - } - -void DefinitionItem::CheckForRecord() - { - if ( ! IsRecord() ) - { - rt = nullptr; - return; - } - - rt = t->AsRecordType(); - num_fields = rt->NumFields(); - fields = std::vector>(num_fields); - } - -std::shared_ptr DefItemMap::GetExprDI(const Expr* expr) - { - if ( expr->Tag() == EXPR_NAME ) - { - auto id_e = expr->AsNameExpr(); - auto id = id_e->Id(); - return GetID_DI(id); - } - - else if ( expr->Tag() == EXPR_FIELD ) - { - auto f = expr->AsFieldExpr(); - auto r = f->Op(); - - auto r_def = GetExprDI(r); - - if ( ! r_def ) - return nullptr; - - auto field = f->FieldName(); - return r_def->FindField(field); - } - - else - return nullptr; - } - -std::shared_ptr DefItemMap::GetID_DI(const ID* id) - { - auto di = i2d.find(id); - if ( di == i2d.end() ) - { - auto new_entry = std::make_shared(id); - i2d[id] = new_entry; - return new_entry; - } - else - return di->second; - } - -const DefinitionItem* DefItemMap::GetConstID_DI(const ID* id) const - { - auto di = i2d.find(id); - return di == i2d.end() ? nullptr : di->second.get(); - } - -const DefinitionItem* DefItemMap::GetConstID_DI(const DefinitionItem* di, - const char* field_name) const - { - return di->FindField(field_name).get(); - } - - } // zeek::detail diff --git a/src/script_opt/DefItem.h b/src/script_opt/DefItem.h deleted file mode 100644 index 0f4500c8bd..0000000000 --- a/src/script_opt/DefItem.h +++ /dev/null @@ -1,103 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#pragma once - -#include "zeek/ID.h" -#include "zeek/Type.h" -#include "zeek/script_opt/DefPoint.h" - -namespace zeek::detail - { - -// A definition item is a Zeek script entity that can be assigned to. -// Currently, we track variables and record fields; the latter can -// be nested (for example, a field that's in a record that itself is -// a field in another record). In principle we could try to track -// table or vector elements, but that's only going to be feasible for -// constant indices, so presumably not much bang-for-the-buck. -// -// For script optimization, we only need to track variables, and we could -// considerably simplify the code by doing so. However, there's long -// been a desire to be able to statically determine that a record field -// will be used without first having been set, hence we go the more -// complicated route here. - -class DefinitionItem - { -public: - // Constructor for the simple case of tracking assignments to - // a variable. - DefinitionItem(const ID* _id); - - // The more complicated case of assigning to a field in a record - // (which itself might be a field in a record). - DefinitionItem(const DefinitionItem* _di, const char* _field_name, TypePtr _t); - - const char* Name() const { return name.c_str(); } - - TypePtr GetType() const { return t; } - bool IsRecord() const { return t->Tag() == TYPE_RECORD; } - - // The identifier to which this item ultimately belongs. - const ID* RootID() const { return di ? di->RootID() : id; } - - // For this definition item, look for a field corresponding - // to the given name or offset. Nil if the field has not (yet) - // been created. - std::shared_ptr FindField(const char* field) const; - std::shared_ptr FindField(int offset) const; - - // Start tracking a field in this definition item with the - // given name or offset, returning the associated item. - // - // If the field already exists, then it's simply returned. - std::shared_ptr CreateField(const char* field, TypePtr t); - std::shared_ptr CreateField(int offset, TypePtr t); - -protected: - void CheckForRecord(); - - bool is_id; - const ID* id; - const DefinitionItem* di; - const char* field_name; - - TypePtr t; - std::string name; - - const RecordType* rt; - - // If present, tracks definition items for a record's fields as - // these are seen (i.e., as they are entered via CreateField()). - std::optional>> fields; - int num_fields; - }; - -// For a given identifier, locates its associated definition item. -typedef std::unordered_map> ID_to_DI_Map; - -// Class for managing a set of IDs and their associated definition items. -class DefItemMap - { -public: - // Gets the definition for either a name or a record field reference. - // Returns nil if "expr" lacks such a form, or if there isn't - // any such definition. - std::shared_ptr GetExprDI(const Expr* expr); - - // Returns the definition item for a given ID; creates it if - // it doesn't already exist. - std::shared_ptr GetID_DI(const ID* id); - - // Returns the definition item for a given ID, or nil if it - // doesn't exist. - const DefinitionItem* GetConstID_DI(const ID* id) const; - - // The same for a record field for a given definition item. - const DefinitionItem* GetConstID_DI(const DefinitionItem* di, const char* field_name) const; - -protected: - ID_to_DI_Map i2d; - }; - - } // zeek::detail diff --git a/src/script_opt/DefPoint.h b/src/script_opt/DefPoint.h deleted file mode 100644 index 2f78723574..0000000000 --- a/src/script_opt/DefPoint.h +++ /dev/null @@ -1,108 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#pragma once - -#include "zeek/Expr.h" -#include "zeek/Func.h" -#include "zeek/Stmt.h" - -namespace zeek::detail - { - -// A DefinitionPoint is a location where a variable, or possibly a record -// field, is defined (i.e., assigned to). The class tracks the type of -// definition (a statement, inside an expression, an aggregate passed to -// a function or hook, or at the start of a function). - -enum DefPointType - { - // Used to capture the notion "the variable may have no definition - // at this point" (or "has no definition", depending on whether we're - // concerned with minimal or maximal RDs). - NO_DEF_POINT, - - // Assigned at the given statement. - STMT_DEF, - - // The following includes assignments, +=, vec+=, $, ?$ ... - // ... plus names (for implicit creation of records upon - // seeing use) and calls (for aggregates). - // - // Note that ?$ does not in fact create a definition. We include - // it as a heuristic meaning "code after this point can assume - // that the given record field is defined". The heuristic can - // fail if the ?$ predicate is ultimately negated, something that - // we don't try to identify. Basically, the idea is that if the - // script writer is cognizant of needing to check for the existence - // of a field, most likely they got the check correct. Any errors - // we make in this regard only lead to mistakes in identify usage - // problems, not in actual run-time execution. - EXPR_DEF, - - // The variable is assigned when the function begins executing, - // either through an explicit initialization for a local, or because - // it's a function parameter. - FUNC_DEF, - - }; - -class DefinitionPoint - { -public: - DefinitionPoint() - { - o = nullptr; - t = NO_DEF_POINT; - } - - DefinitionPoint(const Stmt* s) - { - o = s; - t = STMT_DEF; - } - - DefinitionPoint(const Expr* e) - { - o = e; - t = EXPR_DEF; - } - - DefinitionPoint(const Func* f) - { - o = f; - t = FUNC_DEF; - } - - DefPointType Tag() const { return t; } - - const Obj* OpaqueVal() const { return o; } - - const Stmt* StmtVal() const - { - ASSERT(t == STMT_DEF); - return (const Stmt*)o; - } - - const Expr* ExprVal() const - { - ASSERT(t == EXPR_DEF); - return (const Expr*)o; - } - - const Func* FuncVal() const - { - ASSERT(t == FUNC_DEF); - return (const Func*)o; - } - - bool SameAs(const DefinitionPoint& dp) const - { - return dp.Tag() == Tag() && dp.OpaqueVal() == OpaqueVal(); - } - -protected: - DefPointType t; - const Obj* o; - }; - - } // zeek::detail diff --git a/src/script_opt/DefSetsMgr.cc b/src/script_opt/DefSetsMgr.cc deleted file mode 100644 index 79839e2a45..0000000000 --- a/src/script_opt/DefSetsMgr.cc +++ /dev/null @@ -1,60 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "zeek/script_opt/DefSetsMgr.h" - -namespace zeek::detail - { - -DefSetsMgr::DefSetsMgr() - { - pre_min_defs = make_intrusive(item_map); - post_min_defs = make_intrusive(item_map); - - pre_max_defs = make_intrusive(item_map); - post_max_defs = make_intrusive(item_map); - } - -void DefSetsMgr::CreatePostDef(const ID* id, DefinitionPoint dp, bool min_only) - { - auto di = item_map.GetID_DI(id); - CreatePostDef(di, dp, min_only); - } - -void DefSetsMgr::CreatePostDef(std::shared_ptr di, DefinitionPoint dp, - bool min_only) - { - auto where = dp.OpaqueVal(); - - if ( ! post_min_defs->HasRDs(where) ) - { - // We haven't yet started creating post RDs for this - // statement/expression, so create them. - auto pre = GetPreMinRDs(where); - SetPostFromPre(where); - } - - if ( ! min_only && ! post_max_defs->HasRDs(where) ) - { - auto pre = GetPreMaxRDs(where); - SetPostFromPre(where); - } - - CreateDef(std::move(di), dp, false, min_only); - } - -void DefSetsMgr::CreateDef(std::shared_ptr di, DefinitionPoint dp, bool is_pre, - bool min_only) - { - auto where = dp.OpaqueVal(); - RDSetPtr min_defs = is_pre ? pre_min_defs : post_min_defs; - - min_defs->AddOrReplace(where, di.get(), dp); - - if ( min_only ) - return; - - RDSetPtr& max_defs = is_pre ? pre_max_defs : post_max_defs; - max_defs->AddOrReplace(where, di.get(), dp); - } - - } // zeek::detail diff --git a/src/script_opt/DefSetsMgr.h b/src/script_opt/DefSetsMgr.h deleted file mode 100644 index c3c8e4b22c..0000000000 --- a/src/script_opt/DefSetsMgr.h +++ /dev/null @@ -1,195 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#pragma once - -#include "zeek/script_opt/DefItem.h" -#include "zeek/script_opt/DefPoint.h" -#include "zeek/script_opt/ReachingDefs.h" - -namespace zeek::detail - { - -// Class for managing collections of reaching definitions associated -// with AST nodes. -// -// Each node has "pre" RDs reflecting the reaching definitions active -// before the node executes, and "post" RDs reflecting the state after -// executing. -// -// In addition, we track both *minimal* RDs (those guaranteed to exist) -// and *maximal* RDs (those that _could_ exist). -// -// To illustrate both of these notions with an example, consider this -// scripting code: -// -// local x = 5; -// if ( predicate() ) -// x = 9; -// foobar(); -// -// The "x = 5" node has empty pre-RDs, and minimal and maximal post-RDs -// of {x = 5 }. The "if" node and its interior call -// to predicate() both inherit those post-RDs as their pre-RDs, and -// these are also the post-RDs for predicate(). -// -// The assignment "x = 9" inherits those post-RDs as its pre-RDs. -// When it executes, has leaves both minimal and maximal post-RDs -// of { x = 9 }. -// -// The post-RDs for the "if" statement (and thus the pre-RDs for the foobar() -// call) have a minimal set of { x = SOMETHING }, and a maximal set of -// { x = 5 , x = 9 }. The minimal set -// here captures the notion that "x is definitely assigned to a value -// at this point, but it's uncertain just what that value is". - -class DefSetsMgr - { -public: - DefSetsMgr(); - - // Returns the minimal or maximal pre-RDs associated with a given node. - RDPtr GetPreMinRDs(const Obj* o) const { return GetRDs(pre_min_defs, o); } - RDPtr GetPreMaxRDs(const Obj* o) const { return GetRDs(pre_max_defs, o); } - - // Same, but for post-RDs. - RDPtr GetPostMinRDs(const Obj* o) const - { - if ( HasPostMinRDs(o) ) - return GetRDs(post_min_defs, o); - else - return GetPreMinRDs(o); - } - RDPtr GetPostMaxRDs(const Obj* o) const - { - if ( HasPostMaxRDs(o) ) - return GetRDs(post_max_defs, o); - else - return GetPreMaxRDs(o); - } - - // Initialize a node's pre-RDs to be empty. - void SetEmptyPre(const Obj* o) - { - auto empty_rds = make_intrusive(); - SetPreMinRDs(o, empty_rds); - SetPreMaxRDs(o, empty_rds); - } - - // Inherit a node's pre-RDs from those of another node. - void SetPreFromPre(const Obj* target, const Obj* source) - { - SetPreMinRDs(target, GetPreMinRDs(source)); - SetPreMaxRDs(target, GetPreMaxRDs(source)); - } - - // Inherit a node's pre-RDs from the post-RDs of another node. - void SetPreFromPost(const Obj* target, const Obj* source) - { - SetPreMinRDs(target, GetPostMinRDs(source)); - SetPreMaxRDs(target, GetPostMaxRDs(source)); - } - - // Set the post-RDs for a given node to the given min/max values. - void SetPostRDs(const Obj* o, RDPtr min_rd, RDPtr max_rd) - { - SetPostMinRDs(o, std::move(min_rd)); - SetPostMaxRDs(o, std::move(max_rd)); - } - - // Propagate the node's pre-RDs to also be its post-RDs. - void SetPostFromPre(const Obj* o) - { - SetPostMinRDs(o, GetPreMinRDs(o)); - SetPostMaxRDs(o, GetPreMaxRDs(o)); - } - - // Inherit a node's post-RDs from another node's pre-RDs. - void SetPostFromPre(const Obj* target, const Obj* source) - { - SetPostMinRDs(target, GetPreMinRDs(source)); - SetPostMaxRDs(target, GetPreMaxRDs(source)); - } - - // Inherit a node's post-RDs from another node's post-RDs. - void SetPostFromPost(const Obj* target, const Obj* source) - { - SetPostMinRDs(target, GetPostMinRDs(source)); - SetPostMaxRDs(target, GetPostMaxRDs(source)); - } - - // Fine-grained control for setting RDs. - void SetPreMinRDs(const Obj* o, RDPtr rd) { pre_min_defs->SetRDs(o, std::move(rd)); } - void SetPreMaxRDs(const Obj* o, RDPtr rd) { pre_max_defs->SetRDs(o, std::move(rd)); } - - void SetPostMinRDs(const Obj* o, RDPtr rd) { post_min_defs->SetRDs(o, std::move(rd)); } - void SetPostMaxRDs(const Obj* o, RDPtr rd) { post_max_defs->SetRDs(o, std::move(rd)); } - - // Used for confluence: add a set of RDs into those already - // associated with a node's pre-RDs / post-RDs. Only applies - // to maximal RDs. - void MergeIntoPre(const Obj* o, const RDPtr& rds) { pre_max_defs->AddRDs(o, rds); } - void MergeIntoPost(const Obj* o, const RDPtr& rds) { post_max_defs->AddRDs(o, rds); } - - // The same, but merging a node's own maximal post-RDs into - // its maximal pre-RDs. - void MergePostIntoPre(const Obj* o) { MergeIntoPre(o, GetPostMaxRDs(o)); } - - // The following predicates look up whether a given node exists - // in the given pre/post minimal/maximal RDs. - bool HasPreMinRDs(const Obj* o) const { return pre_min_defs && pre_min_defs->HasRDs(o); } - bool HasPreMaxRDs(const Obj* o) const { return pre_max_defs && pre_max_defs->HasRDs(o); } - - bool HasPostMinRDs(const Obj* o) const { return post_min_defs && post_min_defs->HasRDs(o); } - bool HasPostMaxRDs(const Obj* o) const { return post_max_defs && post_max_defs->HasRDs(o); } - - // True if the given node has a minimal pre-RD associated - // with the given identifier. - bool HasPreMinRD(const Obj* o, const ID* id) const - { - return pre_min_defs && pre_min_defs->HasRD(o, id); - } - - // True if at the given node, there's a single *unambiguous* - // pre RD for the given identifier. - bool HasSinglePreMinRD(const Obj* o, const ID* id) const - { - return pre_min_defs && pre_min_defs->HasSingleRD(o, id); - } - - // Methods for creating new pre/post RDs. If min_only is true, - // then only done for minimal RDs. - void CreatePreDef(std::shared_ptr di, DefinitionPoint dp, bool min_only) - { - CreateDef(std::move(di), dp, true, min_only); - } - void CreatePostDef(const ID* id, DefinitionPoint dp, bool min_only); - void CreatePostDef(std::shared_ptr di, DefinitionPoint dp, bool min_only); - - std::shared_ptr GetExprDI(const Expr* e) { return item_map.GetExprDI(e); } - std::shared_ptr GetID_DI(const ID* id) { return item_map.GetID_DI(id); } - const DefinitionItem* GetConstID_DI(const ID* id) const { return item_map.GetConstID_DI(id); } - const DefinitionItem* GetConstID_DI(const DefinitionItem* di, const char* field_name) const - { - return item_map.GetConstID_DI(di, field_name); - } - -private: - void CreateDef(std::shared_ptr di, DefinitionPoint dp, bool is_pre, - bool min_only); - - RDPtr GetRDs(const RDSetPtr& defs, const Obj* o) const { return defs->FindRDs(o); } - - // Mappings of minimal reaching defs pre- and post- execution - // of the given node. - RDSetPtr pre_min_defs; - RDSetPtr post_min_defs; - - // Mappings of maximal reaching defs pre- and post- execution - // of the given node. - RDSetPtr pre_max_defs; - RDSetPtr post_max_defs; - - DefItemMap item_map; - }; - - } // zeek::detail diff --git a/src/script_opt/GenRDs.cc b/src/script_opt/GenRDs.cc deleted file mode 100644 index 739a7147e9..0000000000 --- a/src/script_opt/GenRDs.cc +++ /dev/null @@ -1,1295 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "zeek/script_opt/GenRDs.h" - -#include "zeek/Desc.h" -#include "zeek/Reporter.h" -#include "zeek/Scope.h" -#include "zeek/script_opt/ScriptOpt.h" - -namespace zeek::detail - { - -RD_Decorate::RD_Decorate(std::shared_ptr _pf, const Func* f, ScopePtr scope, - StmtPtr body) - : pf(std::move(_pf)) - { - TraverseFunction(f, scope, body); - } - -void RD_Decorate::TraverseFunction(const Func* f, ScopePtr scope, StmtPtr body) - { - func_flavor = f->Flavor(); - - const auto& args = scope->OrderedVars(); - int nparam = f->GetType()->Params()->NumFields(); - - mgr.SetEmptyPre(f); - - for ( const auto& a : args ) - { - if ( --nparam < 0 ) - break; - - CreateInitPostDef(a.get(), DefinitionPoint(f), true, nullptr); - } - - for ( const auto& g : pf->Globals() ) - CreateInitPostDef(g, DefinitionPoint(f), true, nullptr); - - if ( ! mgr.HasPostMinRDs(f) ) - // This happens if we have no arguments or globals. Use the - // empty ones we set up. - mgr.SetPostFromPre(f); - - mgr.SetPreFromPost(body.get(), f); - body->Traverse(this); - } - -TraversalCode RD_Decorate::PreStmt(const Stmt* s) - { - ASSERT(mgr.HasPreMinRDs(s)); - ASSERT(mgr.HasPreMaxRDs(s)); - - switch ( s->Tag() ) - { - case STMT_EXPR: - case STMT_EVENT: - case STMT_ADD: - case STMT_DELETE: - case STMT_RETURN: - case STMT_CHECK_ANY_LEN: - { - // Can't use AsExprStmt() since it doesn't know about - // the tags of its subclasses. - auto e = ((const ExprStmt*)s)->StmtExpr(); - mgr.SetPreFromPre(e, s); - break; - } - - case STMT_PRINT: - { - auto l = s->AsPrintStmt()->ExprList(); - mgr.SetPreFromPre(l, s); - break; - } - - case STMT_CATCH_RETURN: - { - auto cr = s->AsCatchReturnStmt(); - auto block = cr->Block().get(); - - mgr.SetPreFromPre(block, s); - block->Traverse(this); - - // Treat the block as a no-op for analyzing RDs, - // since it shouldn't affect the definition status of - // any of the RDs outside of it. (The one exception is - // for globals, which we can address by synchronizing - // globals after inlined returns.) The only question is - // how to propagate RDs relating to the return value. - mgr.SetPostFromPre(s); - - auto ret_var = cr->RetVar(); - if ( ret_var ) - { - // Ideally for the return variable (if any) we'd track - // whether all of the paths out of the block go through - // a "return ". One way we could do that would - // be to literally assign it for internal returns. - // The trick with that is it could entail some subtle - // debugging of how RDs are propagated across internal - // returns. For now, we punt and just mark it as - // defined. This doesn't lead to any incorrect - // optimization decisions, it just misses out on - // an opportunity to flag a potential return-without- - // value ... but only in the case where we're using - // inlining, too. - CreateInitPostDef(ret_var->Id(), DefinitionPoint(s), true, nullptr); - } - - return TC_ABORTSTMT; - } - - case STMT_LIST: - { - auto sl = s->AsStmtList(); - const auto& stmts = sl->Stmts(); - const Stmt* pred_stmt = s; // current Stmt's predecessor - - for ( const auto& stmt : stmts ) - { - if ( pred_stmt == s ) - mgr.SetPreFromPre(stmt, pred_stmt); - else - mgr.SetPreFromPost(stmt, pred_stmt); - - stmt->Traverse(this); - pred_stmt = stmt; - } - - if ( pred_stmt == s ) - mgr.SetPostFromPre(sl, pred_stmt); - else - mgr.SetPostFromPost(sl, pred_stmt); - - return TC_ABORTSTMT; - } - - case STMT_IF: - { - // While we'd like to think no assignment definitions - // will occur inside conditions (though they could for - // non-reduced code) - but in any case a ?$ operator can - // create pseudo-definitions, so we have to accommodate that - // possibility. - auto i = s->AsIfStmt(); - auto cond = i->StmtExpr(); - - mgr.SetPreFromPre(cond, s); - cond->Traverse(this); - - mgr.SetPreFromPost(i->TrueBranch(), cond); - i->TrueBranch()->Traverse(this); - - mgr.SetPreFromPost(i->FalseBranch(), cond); - i->FalseBranch()->Traverse(this); - - auto true_reached = ! i->TrueBranch()->NoFlowAfter(false); - auto false_reached = ! i->FalseBranch()->NoFlowAfter(false); - - if ( true_reached && false_reached ) - DoIfStmtConfluence(i); - - else - { - if ( true_reached ) - mgr.SetPostFromPost(s, i->TrueBranch()); - - else if ( false_reached ) - mgr.SetPostFromPost(s, i->FalseBranch()); - - else - CreateEmptyPostRDs(s); - } - - return TC_ABORTSTMT; - } - - case STMT_SWITCH: - TraverseSwitch(s->AsSwitchStmt()); - return TC_ABORTSTMT; - - case STMT_FOR: - { - auto f = s->AsForStmt(); - - auto ids = f->LoopVars(); - auto e = f->LoopExpr(); - auto body = f->LoopBody(); - auto val_var = f->ValueVar(); - - mgr.SetPreFromPre(e, s); - e->Traverse(this); - mgr.SetPreFromPost(body, e); - - for ( const auto& id : *ids ) - CreateInitPreDef(id, DefinitionPoint(body)); - - if ( val_var ) - CreateInitPreDef(val_var.get(), DefinitionPoint(body)); - - // If the loop expression's value is uninitialized, that's - // okay, it will just result in an empty loop. In principle, - // for a non-reduced statement it's possible that *getting* - // to the value will touch on something uninitialized. - // For reduced form, however, that will already have been - // hoisted out, so not a concern. - // - // To keep from traversing the loop expression, we just do - // the body manually here. - - block_defs.emplace_back(std::make_unique(false)); - - body->Traverse(this); - - DoLoopConfluence(s, body, body); - - return TC_ABORTSTMT; - } - - case STMT_WHILE: - { - auto w = s->AsWhileStmt(); - auto cond = w->Condition().get(); - - // This is the *predecessor* statement, i.e., what - // gets executed (due to transformation-to-reduced-form) - // prior to evaluating the conditional. - auto cond_stmt = w->CondPredStmt().get(); - - // This is the *conditional itself*, but as a statement. - auto cond_s = w->ConditionAsStmt().get(); - - if ( cond_stmt ) - { - mgr.SetPreFromPre(cond_stmt, w); - cond_stmt->Traverse(this); - mgr.SetPreFromPost(cond, cond_stmt); - } - else - mgr.SetPreFromPre(cond, w); - - cond->Traverse(this); - mgr.SetPreFromPre(cond_s, cond); - mgr.SetPostFromPost(cond_s, cond); - - auto body = w->Body().get(); - mgr.SetPreFromPre(body, cond); - - block_defs.emplace_back(std::make_unique(false)); - - body->Traverse(this); - - auto loop_top = cond_stmt ? cond_stmt : cond_s; - DoLoopConfluence(s, loop_top, body); - - // Make sure the conditional gets its RDs updated. - if ( cond_stmt ) - { - cond_stmt->Traverse(this); - mgr.SetPreFromPost(cond, cond_stmt); - } - else - mgr.SetPreFromPost(cond, cond_s); - - cond->Traverse(this); - - return TC_ABORTSTMT; - } - - case STMT_WHEN: - { - // ### punt on these for now, need to reflect on bindings. - return TC_ABORTSTMT; - } - - default: - break; - } - - return TC_CONTINUE; - } - -void RD_Decorate::TraverseSwitch(const SwitchStmt* sw) - { - DefinitionPoint ds(sw); - - auto e = sw->StmtExpr(); - auto cases = sw->Cases(); - - mgr.SetPreFromPre(e, sw); - auto sw_min_pre = mgr.GetPreMinRDs(sw); - auto sw_max_pre = mgr.GetPreMaxRDs(sw); - - block_defs.emplace_back(std::make_unique(true)); - auto bd = block_defs.back().get(); - - RDPtr sw_post_min_rds = nullptr; - RDPtr sw_post_max_rds = nullptr; - - if ( sw->HasDefault() ) - // Guaranteed that we'll execute one of the switch blocks. - // Start with an empty set of RDs for the post-max and - // build them up via union. - sw_post_max_rds = make_intrusive(); - - else - { - // Entire set of cases is optional, so merge in entering RDs. - mgr.SetPostFromPre(sw); - - sw_post_min_rds = mgr.GetPostMinRDs(sw); - sw_post_max_rds = mgr.GetPostMaxRDs(sw); - } - - // Used to track fall-through. - RDPtr prev_RDs; - - for ( const auto& c : *cases ) - { - auto body = c->Body(); - - mgr.SetPreMinRDs(body, sw_min_pre); - mgr.SetPreMaxRDs(body, sw_max_pre); - - if ( prev_RDs ) - { - mgr.MergeIntoPre(body, prev_RDs); - prev_RDs = nullptr; - } - - auto exprs = c->ExprCases(); - if ( exprs ) - { - mgr.SetPreFromPre(exprs, body); - exprs->Traverse(this); - - // It's perverse to modify a variable in a - // case expression ... and won't happen for - // reduced code, so we just ignore the - // possibility that it occurred. - } - - auto type_ids = c->TypeCases(); - if ( type_ids ) - { - for ( const auto& id : *type_ids ) - if ( id->Name() ) - CreateInitPreDef(id, DefinitionPoint(body)); - } - - auto body_min_pre = mgr.GetPreMinRDs(body); - auto body_max_pre = mgr.GetPreMaxRDs(body); - - // Don't inherit body-def analysis developed for preceding - // switch case. - bd->Clear(); - body->Traverse(this); - - if ( ! bd->PreRDs().empty() ) - reporter->InternalError("mispropagation of switch body defs"); - - if ( body->NoFlowAfter(true) ) - // Post RDs for this block are irrelevant. - continue; - - // Propagate what comes out of the block. - auto case_min_rd = mgr.GetPostMinRDs(body); - auto case_max_rd = mgr.GetPostMaxRDs(body); - - // Look for any definitions reflecting break or fallthrough - // short-circuiting. These only matter for max RDs. - for ( const auto& post : bd->PostRDs() ) - case_max_rd = case_max_rd->Union(post); - - // Scoop up definitions from fallthrough's and remember - // them for the next block. - for ( const auto& future : bd->FutureRDs() ) - { - if ( ! prev_RDs ) - prev_RDs = future; - else - prev_RDs = prev_RDs->Union(future); - } - - // It's possible we haven't set sw_post_min_rds (if the - // switch has a default and thus is guaranteed to execute - // one of the blocks). OTOH, sw_post_max_rds is always set. - sw_post_min_rds = sw_post_min_rds - ? sw_post_min_rds->IntersectWithConsolidation(case_min_rd, ds) - : make_intrusive(case_min_rd); - - sw_post_max_rds = sw_post_max_rds->Union(case_max_rd); - } - - if ( ! sw_post_min_rds ) - // This happens when all of the cases return, including - // a default. In that case, sw_post_max_rds is already - // an empty RD. - sw_post_min_rds = make_intrusive(); - - mgr.SetPostRDs(sw, std::move(sw_post_min_rds), std::move(sw_post_max_rds)); - - block_defs.pop_back(); - } - -void RD_Decorate::DoIfStmtConfluence(const IfStmt* i) - { - auto min_if_branch_rd = mgr.GetPostMinRDs(i->TrueBranch()); - auto min_else_branch_rd = mgr.GetPostMinRDs(i->FalseBranch()); - auto min_post_rds = min_if_branch_rd->Intersect(min_else_branch_rd); - - auto max_if_branch_rd = mgr.GetPostMaxRDs(i->TrueBranch()); - auto max_else_branch_rd = mgr.GetPostMaxRDs(i->FalseBranch()); - auto max_post_rds = max_if_branch_rd->Union(max_else_branch_rd); - - mgr.SetPostRDs(i, std::move(min_post_rds), std::move(max_post_rds)); - } - -void RD_Decorate::DoLoopConfluence(const Stmt* s, const Stmt* top, const Stmt* body) - { - auto bd = std::move(block_defs.back()); - block_defs.pop_back(); - - auto loop_pre = mgr.GetPreMaxRDs(top); - auto loop_post = mgr.GetPostMaxRDs(body); - - for ( const auto& pre : bd->PreRDs() ) - { - mgr.MergeIntoPre(top, pre); - - // Factor in that these definitions also - // essentially make it to the beginning of - // the entire loop. - mgr.MergeIntoPre(s, pre); - } - - for ( const auto& post : bd->PostRDs() ) - mgr.MergeIntoPost(body, post); - - // Freshen due to mergers. - loop_pre = mgr.GetPreMaxRDs(top); - auto loop_min_post = mgr.GetPostMinRDs(body); - auto loop_max_post = mgr.GetPostMaxRDs(body); - - if ( loop_pre != loop_max_post ) - { - // Some body assignments reached the end. Propagate them - // around the loop. - mgr.MergeIntoPre(top, loop_max_post); - - if ( top != body ) - { - // Don't have to worry about block-defs as it's - // simply an expression evaluation, no next/break's. - top->Traverse(this); - mgr.MergeIntoPre(body, mgr.GetPostMaxRDs(top)); - } - - block_defs.emplace_back(std::make_unique(false)); - body->Traverse(this); - block_defs.pop_back(); - - // Ideally we'd check for consistency with the previous - // definitions in bd. This is tricky because the body - // itself might not have RDs if it ends in a "break" or - // such. - } - - DefinitionPoint ds(s); - - // Factor in that the loop might not execute at all. - auto s_min_pre = mgr.GetPreMinRDs(s); - auto s_max_pre = mgr.GetPreMaxRDs(s); - - // For min RDs, we want to compute them directly regardless - // of whether the loop body has flow reaching the end of it, - // since an internal "next" can still cause definitions to - // propagate to the beginning. - auto min_post_rds = s_min_pre->IntersectWithConsolidation(loop_min_post, ds); - mgr.SetPostMinRDs(s, std::move(min_post_rds)); - - // Note, we use ignore_break=true because what we care about is not - // whether flow goes just beyond the last statement of the body, - // but rather whether flow can start at the next statement *after* - // the body, and a "break" will do that. - if ( body->NoFlowAfter(true) ) - mgr.SetPostMaxRDs(s, s_max_pre); - else - { - auto max_post_rds = s_max_pre->Union(loop_max_post); - mgr.SetPostMaxRDs(s, std::move(max_post_rds)); - } - } - -TraversalCode RD_Decorate::PostStmt(const Stmt* s) - { - DefinitionPoint ds(s); - - switch ( s->Tag() ) - { - case STMT_EXPR: - { - auto e = s->AsExprStmt()->StmtExpr(); - mgr.SetPostFromPost(s, e); - break; - } - - case STMT_INIT: - { - mgr.SetPostFromPre(s); - - auto init = s->AsInitStmt(); - auto& inits = init->Inits(); - - for ( const auto& id : inits ) - { - auto id_t = id->GetType(); - - // Only aggregates get initialized. - if ( ! zeek::IsAggr(id_t->Tag()) ) - continue; - - CreateInitPostDef(id.get(), DefinitionPoint(s), false, 0); - } - - break; - } - - case STMT_RETURN: - // No RDs make it past a return. It's tempting to alter - // this for inlined "caught" returns, since changes to - // globals *do* make it past them. However, doing so - // is inconsistent with NoFlowAfter() treating such returns - // as not having control flow go beyond them; and changing - // NoFlowAfter() would be incorrect since it's about - // *immediate* control flow, not broader control flow. - CreateEmptyPostRDs(s); - break; - - case STMT_NEXT: - AddBlockDefs(s, true, false, false); - CreateEmptyPostRDs(s); - break; - - case STMT_BREAK: - if ( block_defs.empty() ) - { - if ( func_flavor == FUNC_FLAVOR_HOOK ) - // Treat as a return. - CreateEmptyPostRDs(s); - else - s->Error("\"break\" in a non-break context"); - break; - } - - AddBlockDefs(s, false, false, block_defs.back()->IsCase()); - - if ( block_defs.back()->IsCase() ) - // The following propagates min RDs so they can - // be intersected across switch cases. - mgr.SetPostFromPre(s); - else - CreateEmptyPostRDs(s); - - break; - - case STMT_FALLTHROUGH: - AddBlockDefs(s, false, true, true); - mgr.SetPostFromPre(s); - break; - - default: - mgr.SetPostFromPre(s); - break; - } - - return TC_CONTINUE; - } - -void RD_Decorate::CreateEmptyPostRDs(const Stmt* s) - { - auto empty_rds = make_intrusive(); - mgr.SetPostRDs(s, empty_rds, empty_rds); - } - -void RD_Decorate::AddBlockDefs(const Stmt* s, bool is_pre, bool is_future, bool is_case) - { - auto rds = mgr.GetPreMaxRDs(s); - - // Walk backward through the block defs finding the appropriate - // match to this one. - for ( int i = block_defs.size() - 1; i >= 0; --i ) - { - auto& bd = block_defs[i]; - - if ( bd->IsCase() == is_case ) - { // This one matches what we're looking for. - if ( is_pre ) - bd->AddPreRDs(rds); - else - { - bd->AddPostRDs(rds); - if ( is_future ) - bd->AddFutureRDs(rds); - } - return; - } - } - - reporter->InternalError("didn't find matching block defs"); - } - -bool RD_Decorate::CheckLHS(const Expr* lhs, const Expr* e) - { - // e can be an EXPR_ASSIGN or an EXPR_APPEND_TO. - auto rhs = e->GetOp2(); - - switch ( lhs->Tag() ) - { - case EXPR_REF: - { - auto r = lhs->AsRefExpr(); - mgr.SetPreFromPre(r->Op(), lhs); - return CheckLHS(r->Op(), e); - } - - case EXPR_NAME: - { - auto n = lhs->AsNameExpr(); - auto id = n->Id(); - - CreateInitPostDef(id, DefinitionPoint(e), false, rhs.get()); - - return true; - } - - case EXPR_LIST: - { // look for [a, b, c] = any_val - auto l = lhs->AsListExpr(); - for ( const auto& expr : l->Exprs() ) - { - if ( expr->Tag() != EXPR_NAME ) - // This will happen for table initializers, - // for example. - return false; - - auto n = expr->AsNameExpr(); - auto id = n->Id(); - - // Since the typing on the RHS may be dynamic, - // we don't try to do any inference of possible - // missing fields, hence "true" in the following. - CreateInitPostDef(id, DefinitionPoint(e), true, 0); - } - - return true; - } - - case EXPR_FIELD: - { - auto f = lhs->AsFieldExpr(); - auto r = f->Op(); - - if ( r->Tag() != EXPR_NAME && r->Tag() != EXPR_FIELD ) - // This is a more complicated expression that we're - // not able to concretely track. - return false; - - // Recurse to traverse LHS so as to install its definitions. - mgr.SetPreFromPre(r, lhs); - r->Traverse(this); - - auto r_def = mgr.GetExprDI(r); - - if ( ! r_def ) - // This should have already generated a complaint. - // Avoid cascade. - return true; - - auto fn = f->FieldName(); - - auto field_rd = r_def->FindField(fn); - if ( ! field_rd ) - field_rd = r_def->CreateField(fn, f->GetType()); - - CreateInitPostDef(field_rd, DefinitionPoint(e), false, rhs.get()); - - return true; - } - - case EXPR_INDEX: - { - auto i_e = lhs->AsIndexExpr(); - auto aggr = i_e->Op1(); - auto index = i_e->Op2(); - - if ( aggr->Tag() == EXPR_NAME ) - { - // Count this as an initialization of the aggregate. - auto id = aggr->AsNameExpr()->Id(); - mgr.CreatePostDef(id, DefinitionPoint(e), false); - - // Don't recurse into assessing the aggregate itself, - // since it's okay in this context. However, we do - // need to recurse into the index, which could have - // problems (references to possibly uninitialized - // values). - mgr.SetPreFromPre(index, lhs); - index->Traverse(this); - return true; - } - - return false; - } - - default: - reporter->InternalError("bad tag in RD_Decorate::CheckLHS"); - } - } - -bool RD_Decorate::IsAggr(const Expr* e) const - { - if ( e->Tag() != EXPR_NAME ) - return false; - - auto n = e->AsNameExpr(); - auto id = n->Id(); - auto tag = id->GetType()->Tag(); - - return zeek::IsAggr(tag); - } - -void RD_Decorate::CheckVar(const Expr* e, const ID* id, bool check_fields) - { - if ( id->IsGlobal() ) - return; - - if ( analysis_options.usage_issues > 0 && ! mgr.HasPreMinRD(e, id) && - ! id->GetAttr(ATTR_IS_ASSIGNED) ) - e->Warn("possibly used without definition"); - - if ( check_fields && id->GetType()->Tag() == TYPE_RECORD ) - { - auto di = mgr.GetID_DI(id); - auto e_pre = mgr.GetPreMinRDs(e); - CheckRecordRDs(di, DefinitionPoint(e), e_pre, e); - } - } - -TraversalCode RD_Decorate::PreExpr(const Expr* e) - { - ASSERT(mgr.HasPreMinRDs(e)); - ASSERT(mgr.HasPreMaxRDs(e)); - - // There are no control flow or confluence issues - the latter - // holds when working on reduced expressions; perverse assignments - // inside &&/|| introduce confluence issues, but that won't lead - // to optimization issues, just imprecision in tracking uninitialized - // values. - mgr.SetPostFromPre(e); - - switch ( e->Tag() ) - { - case EXPR_NAME: - CheckVar(e, e->AsNameExpr()->Id(), true); - break; - - case EXPR_LIST: - { - auto l = e->AsListExpr(); - for ( const auto& expr : l->Exprs() ) - mgr.SetPreFromPre(expr, e); - - break; - } - - case EXPR_INCR: - case EXPR_DECR: - { - auto lval = e->GetOp1(); - auto lhs = lval->AsRefExprPtr()->Op(); - - mgr.SetPreFromPre(lval.get(), e); - - if ( lhs->Tag() == EXPR_NAME ) - (void)CheckLHS(lhs, e); - break; - } - - case EXPR_ADD_TO: - { - auto a_t = e->AsAddToExpr(); - auto lhs = a_t->Op1(); - auto rhs = a_t->Op2(); - - mgr.SetPreFromPre(lhs, e); - mgr.SetPreFromPre(rhs, e); - - if ( IsAggr(lhs) ) - { - auto lhs_n = lhs->AsNameExpr(); - auto lhs_id = lhs_n->Id(); - - // Treat this as an initalization of the set. - mgr.CreatePostDef(lhs_id, DefinitionPoint(a_t), false); - - mgr.SetPreFromPre(rhs, e); - rhs->Traverse(this); - - return TC_ABORTSTMT; - } - - break; - } - - case EXPR_ASSIGN: - { - auto a = e->AsAssignExpr(); - auto lhs = a->Op1(); - auto rhs = a->Op2(); - - if ( lhs->Tag() == EXPR_LIST && rhs->GetType()->Tag() != TYPE_ANY ) - { - // This combination occurs only for assignments used - // to initialize table entries. Treat it as references - // to both the lhs and the rhs, not as an assignment. - mgr.SetPreFromPre(a->GetOp1().get(), a); - mgr.SetPreFromPre(a->GetOp2().get(), a); - return TC_CONTINUE; - } - - bool rhs_aggr = IsAggr(rhs); - - mgr.SetPreFromPre(lhs, a); - mgr.SetPreFromPre(rhs, a); - - if ( ! rhs_aggr ) - { - rhs->Traverse(this); - - // The RHS could have established a pseudo-RD - // due to a ?$ operation. - mgr.SetPostFromPost(e, rhs); - } - - if ( CheckLHS(lhs, a) ) - return TC_ABORTSTMT; - - if ( rhs_aggr ) - { - // No need to analyze the RHS. - lhs->Traverse(this); - return TC_ABORTSTMT; - } - - // Too hard to figure out what's going on with the assignment. - // Just analyze it in terms of values it accesses. - break; - } - - case EXPR_INDEX_ASSIGN: - { - auto a = e->AsIndexAssignExpr(); - auto aggr = a->Op1(); - auto index = a->Op2(); - auto rhs = a->GetOp3().get(); - - bool rhs_aggr = IsAggr(rhs); - - mgr.SetPreFromPre(aggr, a); - mgr.SetPreFromPre(index, a); - mgr.SetPreFromPre(rhs, a); - - if ( aggr->Tag() == EXPR_NAME ) - { - // Don't treat this as an initialization of the - // aggregate, since what's changing is instead - // an element of it. - } - else - aggr->Traverse(this); - - index->Traverse(this); - rhs->Traverse(this); - - return TC_ABORTSTMT; - } - - case EXPR_FIELD_LHS_ASSIGN: - { - auto f = e->AsFieldLHSAssignExpr(); - auto aggr = f->Op1(); - auto r = f->Op2(); - - mgr.SetPreFromPre(aggr, e); - mgr.SetPreFromPre(r, e); - - if ( aggr->Tag() == EXPR_NAME ) - { - // Don't treat as an initialization of the aggregate. - } - else - aggr->Traverse(this); - - r->Traverse(this); - - auto r_def = mgr.GetExprDI(aggr); - if ( ! r_def ) - // This should have already generated a complaint. - // Avoid cascade. - break; - - auto offset = f->Field(); - auto field_rd = r_def->FindField(offset); - - if ( ! field_rd ) - field_rd = r_def->CreateField(offset, f->GetType()); - - CreateInitPostDef(field_rd, DefinitionPoint(e), false, r); - - return TC_ABORTSTMT; - } - - case EXPR_FIELD: - { - auto f = e->AsFieldExpr(); - auto r = f->Op(); - - mgr.SetPreFromPre(r, e); - - if ( r->Tag() != EXPR_NAME && r->Tag() != EXPR_FIELD ) - break; - - if ( analysis_options.usage_issues > 1 ) - { - auto r_def = mgr.GetExprDI(r); - - if ( r_def && ! r_def->RootID()->GetAttr(ATTR_IS_ASSIGNED) ) - { - auto fn = f->FieldName(); - auto field_rd = mgr.GetConstID_DI(r_def.get(), fn); - - auto e_pre = mgr.GetPreMinRDs(e); - if ( ! field_rd || ! e_pre->HasDI(field_rd) ) - printf("record field possibly used without being set: %s\n", - obj_desc(e).c_str()); - } - } - - if ( r->Tag() == EXPR_NAME ) - { - auto r_id = r->AsNameExpr()->Id(); - if ( r_id->IsGlobal() ) - // Don't worry about record fields in globals. - return TC_ABORTSTMT; - - // For names, we care about checking the name - // itself, but if it's a record we don't want to - // complain about missing fields, as they're - // irrelevant other than the one specifically - // being referenced. So we do the CheckVar here - // and don't descend recursively. - CheckVar(r, r_id, false); - } - - else - // Recursively check the subexpression. - r->Traverse(this); - - return TC_ABORTSTMT; - } - - case EXPR_HAS_FIELD: - { - auto hf = e->AsHasFieldExpr(); - auto r = hf->Op(); - - mgr.SetPreFromPre(r, e); - - // Treat this as a definition of r$fn, since it's - // ensuring that that field exists. That's not quite - // right, since this expression's parent could be a - // negation, but at least we know that the script - // writer is thinking about whether it's defined. - - if ( r->Tag() == EXPR_NAME ) - { - auto id_e = r->AsNameExpr(); - auto id = id_e->Id(); - auto id_rt = id_e->GetType()->AsRecordType(); - auto id_di = mgr.GetID_DI(id); - - if ( ! id_di ) - { - printf("%s possibly used without definition\n", id->Name()); - break; - } - - auto fn = hf->FieldName(); - auto ft = id_rt->GetFieldType(fn); - auto field_rd = id_di->CreateField(fn, std::move(ft)); - - CreateInitPostDef(field_rd, DefinitionPoint(hf), true, 0); - - // Don't analyze r itself, since it's not expected - // to be defined here. - return TC_ABORTSTMT; - } - - break; - } - - case EXPR_CALL: - { - auto c = e->AsCallExpr(); - auto f = c->Func(); - auto args_l = c->Args(); - - // If one of the arguments is an aggregate, then - // it's actually passed by reference, and we shouldn't - // ding it for not being initialized. In addition, - // we should treat this as a definition of the - // aggregate, because while it can't be actually - // reassigned, all of its dynamic properties can change - // due to the call. (In the future, we could consider - // analyzing the call to see whether this is in fact - // the case.) - // - // We handle all of this by just doing the traversal - // ourselves. - - mgr.SetPreFromPre(f, e); - f->Traverse(this); - - mgr.SetPreFromPre(args_l, e); - - for ( const auto& expr : args_l->Exprs() ) - { - mgr.SetPreFromPre(expr, e); - - if ( IsAggr(expr) ) - // Not only do we skip analyzing it, but - // we consider it initialized post-return. - mgr.CreatePostDef(expr->AsNameExpr()->Id(), DefinitionPoint(c), false); - else - expr->Traverse(this); - } - - // Kill definitions dependent on globals that might have - // been modified by the call. In the future, we can - // aim to comprehensively understand which globals could - // possibly be altered, but for now we just assume they - // all could. - for ( const auto& g : pf->Globals() ) - if ( ! g->IsConst() ) - mgr.CreatePostDef(g, DefinitionPoint(c), false); - - return TC_ABORTSTMT; - } - - case EXPR_INLINE: - { - auto inl = e->AsInlineExpr(); - mgr.SetPreFromPre(inl->Args().get(), inl); - mgr.SetPreFromPre(inl->Body().get(), inl); - break; - } - - case EXPR_COND: - // Special hack. We don't bother traversing the operands - // of conditionals. This is because we use them heavily - // to deconstruct logical expressions for which the - // actual operand access is safe (guaranteed not to - // access a value that hasn't been undefined), but the - // flow analysis has trouble determining that. In principle - // we could do a bit better here and only traverse operands - // that aren't temporaries, but that's a bit of a pain - // to discern. - mgr.SetPreFromPre(e->GetOp1().get(), e); - mgr.SetPreFromPre(e->GetOp2().get(), e); - mgr.SetPreFromPre(e->GetOp3().get(), e); - - e->GetOp1()->Traverse(this); - - return TC_ABORTSTMT; - - case EXPR_RECORD_CONSTRUCTOR: - { - auto r = static_cast(e); - auto l = r->Op(); - mgr.SetPreFromPre(l.get(), e); - break; - } - - case EXPR_LAMBDA: - { - auto l = static_cast(e); - const auto& ids = l->OuterIDs(); - - for ( auto& id : ids ) - CheckVar(e, id, false); - - // Don't descend into the lambda body - we analyze and - // optimize it separately, as its own function. - return TC_ABORTSTMT; - } - - default: - if ( e->GetOp1() ) - mgr.SetPreFromPre(e->GetOp1().get(), e); - if ( e->GetOp2() ) - mgr.SetPreFromPre(e->GetOp2().get(), e); - if ( e->GetOp3() ) - mgr.SetPreFromPre(e->GetOp3().get(), e); - - break; - } - - return TC_CONTINUE; - } - -TraversalCode RD_Decorate::PostExpr(const Expr* e) - { - if ( e->Tag() == EXPR_APPEND_TO ) - { - // We don't treat the expression as an initialization - // in the PreExpr phase, because we want to catch a - // possible uninitialized LHS. But now we can since - // it's definitely initialized after executing. - auto lhs = e->GetOp1(); - - (void)CheckLHS(lhs.get(), e); - } - - return TC_CONTINUE; - } - -void RD_Decorate::CreateInitPreDef(const ID* id, DefinitionPoint dp) - { - auto di = mgr.GetID_DI(id); - if ( ! di ) - return; - - CreateInitDef(di, dp, true, true, nullptr); - } - -void RD_Decorate::CreateInitPostDef(const ID* id, DefinitionPoint dp, bool assume_full, - const Expr* rhs) - { - auto di = mgr.GetID_DI(id); - if ( ! di ) - return; - - CreateInitDef(di, dp, false, assume_full, rhs); - } - -void RD_Decorate::CreateInitPostDef(std::shared_ptr di, DefinitionPoint dp, - bool assume_full, const Expr* rhs) - { - CreateInitDef(std::move(di), dp, false, assume_full, rhs); - } - -void RD_Decorate::CreateInitDef(std::shared_ptr di, DefinitionPoint dp, bool is_pre, - bool assume_full, const Expr* rhs) - { - if ( is_pre ) - mgr.CreatePreDef(di, dp, false); - else - mgr.CreatePostDef(di, dp, false); - - if ( di->GetType()->Tag() != TYPE_RECORD ) - return; - - std::shared_ptr rhs_di; - - if ( rhs ) - { - if ( rhs->GetType()->Tag() == TYPE_ANY ) - // All bets are off. - assume_full = true; - - else - { - rhs_di = mgr.GetExprDI(rhs); - - if ( ! rhs_di ) - // This happens because the RHS is an - // expression more complicated than just a - // variable or a field reference. Just assume - // it's fully initialized. - assume_full = true; - } - } - - CreateRecordRDs(std::move(di), dp, is_pre, assume_full, rhs_di.get()); - } - -void RD_Decorate::CreateRecordRDs(std::shared_ptr di, DefinitionPoint dp, - bool is_pre, bool assume_full, const DefinitionItem* rhs_di) - { - auto rt = di->GetType()->AsRecordType(); - auto n = rt->NumFields(); - - for ( auto i = 0; i < n; ++i ) - { - auto n_i = rt->FieldName(i); - const auto& t_i = rt->GetFieldType(i); - auto rhs_di_i = rhs_di ? rhs_di->FindField(n_i) : nullptr; - - bool field_is_defined = false; - - if ( assume_full ) - field_is_defined = true; - - else if ( rhs_di_i ) - field_is_defined = true; - - else if ( rt->FieldHasAttr(i, ATTR_DEFAULT) ) - field_is_defined = true; - - else if ( ! rt->FieldHasAttr(i, ATTR_OPTIONAL) && ! is_atomic_type(t_i) ) - // Non-optional aggregates within records will be - // initialized. - field_is_defined = true; - - if ( ! field_is_defined ) - continue; - - auto di_i = di->CreateField(n_i, t_i); - - if ( is_pre ) - mgr.CreatePreDef(di_i, dp, true); - else - mgr.CreatePostDef(di_i, dp, true); - - // Only track RDs associated with record fields if we're - // looking to report associated usage issues, because - // it's quite expensive to do so. - if ( analysis_options.usage_issues > 1 ) - if ( t_i->Tag() == TYPE_RECORD ) - CreateRecordRDs(di_i, dp, is_pre, assume_full, rhs_di_i.get()); - } - } - -void RD_Decorate::CheckRecordRDs(std::shared_ptr di, DefinitionPoint dp, - const RDPtr& pre_rds, const Obj* o) - { - CreateRecordRDs(di, dp, false, nullptr); - - auto root_id = di->RootID(); - if ( root_id->GetAttr(ATTR_IS_ASSIGNED) ) - // No point checking for unset fields. - return; - - auto rt = di->GetType()->AsRecordType(); - auto num_fields = rt->NumFields(); - - for ( auto i = 0; i < num_fields; ++i ) - { - if ( rt->FieldHasAttr(i, ATTR_DEFAULT) || rt->FieldHasAttr(i, ATTR_OPTIONAL) || - rt->FieldHasAttr(i, ATTR_IS_ASSIGNED) ) - continue; - - auto n_i = rt->FieldName(i); - auto field_di = di->FindField(n_i); - - if ( analysis_options.usage_issues <= 1 ) - continue; - - // The following works correctly, but finds a number - // of places in the base scripts where indeed non-optional - // record elements are not initialized. - if ( ! field_di || ! pre_rds->HasDI(field_di.get()) ) - { - printf("%s$%s (%s) possibly used without being set\n", di->Name(), n_i, - obj_desc(o).c_str()); - } - - else - { - // The following allows us to comprehensively track - // nested records to see if any uninitialized elements - // might be used. However, it is also computationally - // very heavy if run on the full code base because - // there are some massive records (in some places - // nested 5 deep). - const auto& t_i = rt->GetFieldType(i); - if ( t_i->Tag() == TYPE_RECORD ) - CheckRecordRDs(field_di, dp, pre_rds, o); - } - } - } - - } // zeek::detail diff --git a/src/script_opt/GenRDs.h b/src/script_opt/GenRDs.h deleted file mode 100644 index 7c3f74d289..0000000000 --- a/src/script_opt/GenRDs.h +++ /dev/null @@ -1,146 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -// Class for generating Reaching Definitions by traversing a function -// body's AST. - -#pragma once - -#include - -#include "zeek/script_opt/DefSetsMgr.h" -#include "zeek/script_opt/ProfileFunc.h" -#include "zeek/script_opt/ReachingDefs.h" - -namespace zeek::detail - { - -// Helper class that tracks definitions gathered in a block that either -// need to be propagated to the beginning of the block or to the end. -// Used for RD propagation due to altered control flow (next/break/fallthrough). -// Managed as a stack (vector) to deal with nested loops, switches, etc. -// Only applies to gathering maximum RDs. -class BlockDefs - { -public: - BlockDefs(bool _is_case) { is_case = _is_case; } - - void AddPreRDs(RDPtr RDs) { pre_RDs.push_back(std::move(RDs)); } - void AddPostRDs(RDPtr RDs) { post_RDs.push_back(std::move(RDs)); } - void AddFutureRDs(RDPtr RDs) { future_RDs.push_back(std::move(RDs)); } - - const std::vector& PreRDs() const { return pre_RDs; } - const std::vector& PostRDs() const { return post_RDs; } - const std::vector& FutureRDs() const { return future_RDs; } - - void Clear() - { - pre_RDs.clear(); - post_RDs.clear(); - future_RDs.clear(); - } - - bool IsCase() const { return is_case; } - -private: - std::vector pre_RDs; - std::vector post_RDs; - std::vector future_RDs; // RDs for next case block - - // Whether this block is for a switch case. If not, - // it's for a loop body. - bool is_case; - }; - -class RD_Decorate : public TraversalCallback - { -public: - RD_Decorate(std::shared_ptr _pf, const Func* f, ScopePtr scope, StmtPtr body); - - const DefSetsMgr* GetDefSetsMgr() const { return &mgr; } - -private: - // Traverses the given function body, using the first two - // arguments for context. - void TraverseFunction(const Func* f, ScopePtr scope, StmtPtr body); - - TraversalCode PreStmt(const Stmt*) override; - TraversalCode PostStmt(const Stmt*) override; - TraversalCode PreExpr(const Expr*) override; - TraversalCode PostExpr(const Expr*) override; - - // The following implement various types of "confluence", i.e., - // situations in which control flow merges from multiple possible - // paths to a given point. - void TraverseSwitch(const SwitchStmt* sw); - void DoIfStmtConfluence(const IfStmt* i); - void DoLoopConfluence(const Stmt* s, const Stmt* top, const Stmt* body); - - // Analyzes the target of an assignment. Returns true if the LHS - // was an expression for which we can track it as a definition - // (e.g., assignments to variables or record fields, but not to - // table or vector elements). - bool CheckLHS(const Expr* lhs, const Expr* a); - - // True if the given expression directly represents an aggregate. - bool IsAggr(const Expr* e) const; - - // Checks for whether the given identifier present in the given - // expression is undefined at that point, per the associated RDs. - // If check_fields is true, then we check the fields of records - // in addition to the record itself. - void CheckVar(const Expr* e, const ID* id, bool check_fields); - - // The following enable tracking of either identifiers or - // record fields before/after the given definition point. - void CreateInitPreDef(const ID* id, DefinitionPoint dp); - void CreateInitPostDef(const ID* id, DefinitionPoint dp, bool assume_full, const Expr* rhs); - void CreateInitPostDef(std::shared_ptr di, DefinitionPoint dp, bool assume_full, - const Expr* rhs); - void CreateInitDef(std::shared_ptr di, DefinitionPoint dp, bool is_pre, - bool assume_full, const Expr* rhs); - - // Helper functions for generating RDs associated with record - // fields. - void CreateRecordRDs(std::shared_ptr di, DefinitionPoint dp, bool assume_full, - const DefinitionItem* rhs_di) - { - CreateRecordRDs(std::move(di), dp, false, assume_full, rhs_di); - } - void CreateRecordRDs(std::shared_ptr di, DefinitionPoint dp, bool is_pre, - bool assume_full, const DefinitionItem* rhs_di); - void CheckRecordRDs(std::shared_ptr di, DefinitionPoint dp, - const RDPtr& pre_rds, const Obj* o); - - void CreateEmptyPostRDs(const Stmt* s); - - // Helper function for tracking block definitions, i.e., those - // associated with loops or switches. We always track the - // maximal "pre" RDs for the given statement. If is_pre is - // true, then we track them as RDs to propagate to the beginning - // of the block. Otherwise, they are to propagate to the end - // of the block; and, if is_future is true, then also to the - // beginning of the next block (used for "fallthrough" switch - // blocks). - // - // is_case specifies whether we are adding definitions associated - // with a switch case. - void AddBlockDefs(const Stmt* s, bool is_pre, bool is_future, bool is_case); - - // Profile for the function. Currently, all we actually need from - // this is the list of globals. - std::shared_ptr pf; - - // Whether the Func is an event/hook/function. We currently only - // need to know whether it's a hook, so we correctly interpret an - // outer "break" in that context. - FunctionFlavor func_flavor; - - // Manager for the associated pre/post minimal/maximal RDs. - DefSetsMgr mgr; - - // A stack of definitions associated with (potentially nested) loop - // and switch blocks. - std::vector> block_defs; - }; - - } // zeek::detail diff --git a/src/script_opt/ReachingDefs.cc b/src/script_opt/ReachingDefs.cc deleted file mode 100644 index 8ff262de92..0000000000 --- a/src/script_opt/ReachingDefs.cc +++ /dev/null @@ -1,211 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "zeek/script_opt/ReachingDefs.h" - -#include "zeek/Desc.h" - -namespace zeek::detail - { - -ReachingDefs::ReachingDefs() - { - my_rd_map = std::make_shared(); - const_rd_map = nullptr; - } - -ReachingDefs::ReachingDefs(RDPtr rd) - { - const_rd_map = rd->RDMap(); - my_rd_map = nullptr; - } - -void ReachingDefs::AddRD(const DefinitionItem* di, const DefinitionPoint& dp) - { - auto points = FindItem(di); - - if ( points && HasPoint(dp, *points) ) - return; - - if ( ! my_rd_map ) - { - CopyMap(); - points = FindItem(di); - } - - if ( points ) - points->push_back(dp); - else - my_rd_map->emplace(di, std::make_unique(&dp, 1)); - } - -void ReachingDefs::AddOrFullyReplace(const DefinitionItem* di, const DefinitionPoint& dp) - { - CopyMapIfNeeded(); - - auto curr_defs = my_rd_map->find(di); - - if ( curr_defs != my_rd_map->end() ) - my_rd_map->erase(curr_defs); - - AddRD(di, dp); - } - -RDPtr ReachingDefs::Intersect(const RDPtr& r) const - { - // The following is used when there are different definitions for - // the same item in the intersection, as a way to capture "it will - // be defined", but not providing a specific point-of-definition - // (since it's ambiguous which one in particular to use). - static DefinitionPoint multi_dps; - - auto res = make_intrusive(); - - for ( const auto& i : *RDMap() ) - for ( const auto& dp : *i.second ) - { - if ( r->HasPair(i.first, dp) ) - // Same definition present in both. - res->AddRD(i.first, dp); - - else if ( r->HasDI(i.first) ) - // There's a definition in r, just not the same - // one. Mark as present-but-not-specific. - res->AddRD(i.first, multi_dps); - } - - return res; - } - -RDPtr ReachingDefs::Union(const RDPtr& r) const - { - auto res = make_intrusive(); - - res->AddRDs(r); - - for ( const auto& i : *RDMap() ) - for ( const auto& dp : *i.second ) - res->AddRD(i.first, dp); - - return res; - } - -RDPtr ReachingDefs::IntersectWithConsolidation(const RDPtr& r, const DefinitionPoint& di) const - { - // Same notion as for the Intersect method. - static DefinitionPoint multi_dps; - - auto res = make_intrusive(); - - for ( const auto& i : *RDMap() ) - for ( const auto& dp : *i.second ) - { - if ( r->HasPair(i.first, dp) ) - // Item and definition point are shared, - // include in result. - res->AddRD(i.first, dp); - else - // Regardless of whether r has the item, - // treat it as such and capture this as - // a multi-definition-point definition. - res->AddRD(i.first, multi_dps); - } - - return res; - } - -bool ReachingDefs::HasPair(const DefinitionItem* di, const DefinitionPoint& dp) const - { - auto points = FindItem(di); - return points && HasPoint(dp, *points); - } - -DefPoints* ReachingDefs::FindItem(const DefinitionItem* di) const - { - const auto& map = RDMap(); - auto it = map->find(di); - - if ( it == map->end() ) - return nullptr; - - return it->second.get(); - } - -bool ReachingDefs::HasPoint(const DefinitionPoint& dp, const DefPoints& dps) const - { - for ( const auto& l_dp : dps ) - if ( l_dp.SameAs(dp) ) - return true; - - return false; - } - -void ReachingDefs::AddRDs(const std::shared_ptr& rd_m) - { - for ( const auto& one_rd : *rd_m ) - for ( const auto& dp : *one_rd.second ) - AddRD(one_rd.first, dp); - } - -void ReachingDefs::CopyMap() - { - my_rd_map = std::make_shared(); - auto old_const_rd_map = std::move(const_rd_map); - const_rd_map = nullptr; - AddRDs(old_const_rd_map); - } - -void ReachingDefs::Dump() const - { - DumpMap(RDMap()); - } - -void ReachingDefs::DumpMap(const std::shared_ptr& map) const - { - printf("%d RD element%s: ", Size(), Size() == 1 ? "" : "s"); - - int n = 0; - for ( auto r = map->begin(); r != map->end(); ++r ) - { - if ( ++n > 1 ) - printf(", "); - - PrintRD(r->first, r->second.get()); - } - - printf("\n"); - } - -void ReachingDefs::PrintRD(const DefinitionItem* di, const DefPoints* dps) const - { - printf("%s (", di->Name()); - - loop_over_list(*dps, i) - { - if ( i > 0 ) - printf(","); - printf("%lx", (unsigned long)(*dps)[i].OpaqueVal()); - } - - printf(")"); - } - -const RDPtr& ReachingDefSet::FindRDs(const Obj* o) const - { - auto rd = a_i.find(o); - if ( rd == a_i.end() ) - { - static RDPtr empty_rds; - return empty_rds; - } - - return rd->second; - } - -void ReachingDefSet::AddOrReplace(const Obj* o, const DefinitionItem* di, const DefinitionPoint& dp) - { - auto rd = a_i.find(o); - ASSERT(rd != a_i.end()); - rd->second->AddOrFullyReplace(di, dp); - } - - } // zeek::detail diff --git a/src/script_opt/ReachingDefs.h b/src/script_opt/ReachingDefs.h deleted file mode 100644 index 9d879e518a..0000000000 --- a/src/script_opt/ReachingDefs.h +++ /dev/null @@ -1,246 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#pragma once - -#include - -#include "zeek/script_opt/DefItem.h" - -namespace zeek::detail - { - -// Maps a DefinitionItem (i.e., a variable or a record field within a -// DefinitionItem) to a list of all the points where the item is defined. -// -// Those points are specific to a given location in a script (see -// AnalyInfo below). For example, right after an assignment to a variable, -// it will have exactly one associated point (the assignment). But at -// other points there can be more than one reaching definition; for example, -// a variable defined in both branches of an if-else will cause the location -// in the script after the if-else statement to have both of those definitions -// as (maximally) reaching. - -typedef List DefPoints; -typedef std::unordered_map> ReachingDefsMap; - -// The ReachingDefs class tracks all of the RDs associated with a given -// AST node. Often these are the same as for the node's predecessor, so -// the class allows for either have a distinct set of RDs or instead -// pointing to the predecessor's RDs. - -class ReachingDefs; -class ReachingDefSet; -using RDPtr = IntrusivePtr; -using RDSetPtr = IntrusivePtr; - -class ReachingDefs : public Obj - { -public: - // Create a new object from scratch. - ReachingDefs(); - - // Create a new object, using the RDs from another object. - ReachingDefs(RDPtr rd); - - // Add in all the definition points from rd into our set, if - // we don't already have them. - void AddRDs(const RDPtr& rd) { AddRDs(rd->RDMap()); } - - // Add in a single definition pair, creating the entry for - // the item if necessary. - void AddRD(const DefinitionItem* di, const DefinitionPoint& dp); - - // Add a single definition pair, if missing. If present, - // replace everything currently associated with new definition. - void AddOrFullyReplace(const DefinitionItem* di, const DefinitionPoint& dp); - - // True if the given definition item is present in our RDs. - bool HasDI(const DefinitionItem* di) const - { - const auto& map = RDMap(); - return map->find(di) != map->end(); - } - - // For the given definition item, returns all of its definition - // points at our location in the AST. - DefPoints* GetDefPoints(const DefinitionItem* di) - { - const auto& map = RDMap(); - auto dps = map->find(di); - return dps == map->end() ? nullptr : dps->second.get(); - } - - // Returns true if two sets of definition points are equivalent, - // *including ordering*. - bool SameDefPoints(const DefPoints* dp1, const DefPoints* dp2) const - { - if ( ! dp1 || ! dp2 ) - return ! dp1 && ! dp2; - - if ( dp1->length() != dp2->length() ) - return false; - - for ( auto i = 0; i < dp1->length(); ++i ) - if ( ! (*dp1)[i].SameAs((*dp2)[i]) ) - return false; - - return true; - } - - // Return a new object representing the intersection/union of - // this object's RDs and those of another. - RDPtr Intersect(const RDPtr& r) const; - RDPtr Union(const RDPtr& r) const; - - // The following intersects this RD with another, but for - // DefinitionItem's that have different DefPoints, rather than - // just fully omitting them (which is what Intersect() will do), - // creates a joint entry with a special DefinitionPoint - // corresponding to "multiple definitions". This allows - // minimal RDs to capture the notions (1) "yes, that value will - // be defined at this point", but also (2) "however, we can't - // rely on which definition reaches". - // - // We also do this for items *not* present in r. The reason is - // that this method is only called (1) when we know that the items - // in r have control flow to di, and (2) for "this" being minimal - // RDs that were present going into the block that resulted in r. - // Thus, those minimal values will always be present at di; they - // might not be in r due to the way that r is computed. (For - // example, computing them correctly for "for" loop bodies is - // messy, and irrevant in terms of actually having the correct - // values for them.) - RDPtr IntersectWithConsolidation(const RDPtr& r, const DefinitionPoint& di) const; - - int Size() const { return RDMap()->size(); } - - // Print out the RDs, for debugging purposes. - void Dump() const; - void DumpMap(const std::shared_ptr& map) const; - -protected: - // True if our RDs include the given definition item, defined at - // the given definition point. - bool HasPair(const DefinitionItem* di, const DefinitionPoint& dp) const; - - DefPoints* FindItem(const DefinitionItem* di) const; - - bool HasPoint(const DefinitionPoint&, const DefPoints& dps) const; - - // Adds in the given RDs if we don't already have them. - void AddRDs(const std::shared_ptr& rd_m); - - const std::shared_ptr& RDMap() const - { - return my_rd_map ? my_rd_map : const_rd_map; - } - - // If we don't already have our own map, copy the one we're using - // so that we then do. - void CopyMapIfNeeded() - { - if ( ! my_rd_map ) - CopyMap(); - } - void CopyMap(); - - void PrintRD(const DefinitionItem* di, const DefPoints* dp) const; - void PrintRD(const DefinitionItem* di, const DefinitionPoint& dp) const; - - // If my_rd_map is non-nil, then we use that map. Otherwise, - // we use the map that const_rd_map points to. The "const" in - // the latter's name is a reminder to not make any changes to - // that map. - std::shared_ptr my_rd_map; - std::shared_ptr const_rd_map; - }; - -// Maps script locations (which are represented by their underlying Obj -// pointers) to the reaching definitions for that particular point. -// -// In spirit, the inverse of ReachingDefsMap. -typedef std::unordered_map AnalyInfo; - -// Reaching definitions associated with a collection of Obj's. -class ReachingDefSet : public Obj - { -public: - ReachingDefSet(DefItemMap& _item_map) : item_map(_item_map) { } - - // Whether in our collection we have RDs associated with the - // given AST node. - bool HasRDs(const Obj* o) const { return a_i.count(o) > 0; } - - // Whether in our collection we have RDs associated with the - // given variable. - bool HasRD(const Obj* o, const ID* id) const { return HasRD(o, item_map.GetConstID_DI(id)); } - - // Whether the given variable has a single = unambiguous RD - // at the given AST node. - bool HasSingleRD(const Obj* o, const ID* id) const - { - auto RDs = a_i.find(o); - if ( RDs == a_i.end() ) - return false; - - auto di = item_map.GetConstID_DI(id); - auto dps = RDs->second->GetDefPoints(di); - - if ( ! dps || dps->length() != 1 ) - return false; - - return (*dps)[0].Tag() != NO_DEF_POINT; - } - - // Whether the given definition item has an RD at the given - // AST node. - bool HasRD(const Obj* o, const DefinitionItem* di) const - { - auto RDs = a_i.find(o); - if ( RDs == a_i.end() ) - return false; - - return RDs->second->HasDI(di); - } - - // Returns the RDs associated with a given AST node, if any. - // If none are, returns an empty ReachingDef object. - const RDPtr& FindRDs(const Obj* o) const; - - // Associates the given RDs with the given AST node. - void SetRDs(const Obj* o, RDPtr rd) - { - auto new_rd = make_intrusive(std::move(rd)); - a_i[o] = new_rd; - } - - // If the given di is new, add this definition. If it - // already exists, replace *all* of its reaching definitions - // with this new one. - void AddOrReplace(const Obj* o, const DefinitionItem* di, const DefinitionPoint& dp); - - // Add the given RDs to those associated with the AST node 'o'. - void AddRDs(const Obj* o, const RDPtr& rd) - { - if ( HasRDs(o) ) - MergeRDs(o, rd); - else - a_i[o] = rd; - } - -protected: - // Merge in the given RDs with those associated with o's. - // - // The caller needs to ensure that we're already tracking the - // RDs of 'o'. - void MergeRDs(const Obj* o, const RDPtr& rd) - { - auto curr_rds = a_i.find(o)->second; - curr_rds->AddRDs(rd); - } - - AnalyInfo a_i; - DefItemMap& item_map; - }; - - } // zeek::detail diff --git a/src/script_opt/ScriptOpt.cc b/src/script_opt/ScriptOpt.cc index 0c950284bc..9f12373ce9 100644 --- a/src/script_opt/ScriptOpt.cc +++ b/src/script_opt/ScriptOpt.cc @@ -11,7 +11,6 @@ #include "zeek/script_opt/CPP/Compile.h" #include "zeek/script_opt/CPP/Func.h" #include "zeek/script_opt/GenIDDefs.h" -#include "zeek/script_opt/GenRDs.h" #include "zeek/script_opt/Inline.h" #include "zeek/script_opt/ProfileFunc.h" #include "zeek/script_opt/Reduce.h" @@ -144,12 +143,6 @@ static void optimize_func(ScriptFunc* f, std::shared_ptr pf, ScopeP f->ReplaceBody(body, new_body); body = new_body; - if ( analysis_options.usage_issues > 1 ) - { - // Use the old-school approach for this. - RD_Decorate reduced_rds(pf, f, scope, body); - } - if ( analysis_options.optimize_AST && ! optimize_AST(f, pf, rc, scope, body) ) { pop_scope(); @@ -268,7 +261,7 @@ static void init_options() auto usage = getenv("ZEEK_USAGE_ISSUES"); if ( usage ) - analysis_options.usage_issues = atoi(usage) > 1 ? 2 : 1; + analysis_options.usage_issues = 1; if ( ! analysis_options.only_func ) { diff --git a/src/script_opt/ScriptOpt.h b/src/script_opt/ScriptOpt.h index b2a4431a2d..065a9bb2c9 100644 --- a/src/script_opt/ScriptOpt.h +++ b/src/script_opt/ScriptOpt.h @@ -78,11 +78,9 @@ struct AnalyOpt bool dump_ZAM = false; // If non-zero, looks for variables that are used-but-possibly-not-set, - // or set-but-not-used. - // - // If > 1, also reports on uses of uninitialized record fields and - // analyzes nested records in depth. Warning: with the current - // data structures this greatly increases analysis time. + // or set-but-not-used. We store this as an int rather than a bool + // because we might at some point extend the analysis to deeper forms + // of usage issues, such as those present in record fields. // // Included here with other ZAM-related options since conducting // the analysis requires activating some of the machinery used diff --git a/src/script_opt/TempVar.h b/src/script_opt/TempVar.h index 9e530fe1d8..13d79a447e 100644 --- a/src/script_opt/TempVar.h +++ b/src/script_opt/TempVar.h @@ -10,7 +10,6 @@ #include "zeek/Expr.h" #include "zeek/ID.h" #include "zeek/script_opt/IDOptInfo.h" -#include "zeek/script_opt/ReachingDefs.h" namespace zeek::detail { @@ -43,9 +42,6 @@ public: IDPtr Alias() const { return alias; } void SetAlias(IDPtr id); - const RDPtr& MaxRDs() const { return max_rds; } - void SetMaxRDs(RDPtr rds) { max_rds = std::move(rds); } - protected: std::string name; IDPtr id; @@ -53,7 +49,6 @@ protected: ExprPtr rhs; bool active = true; IDPtr alias; - RDPtr max_rds; }; } // zeek::detail diff --git a/testing/btest/Baseline.cpp/language.uninitialized-local3/err b/testing/btest/Baseline.cpp/language.uninitialized-local3/err deleted file mode 100644 index 49d861c74c..0000000000 --- a/testing/btest/Baseline.cpp/language.uninitialized-local3/err +++ /dev/null @@ -1 +0,0 @@ -### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. diff --git a/testing/btest/Baseline.cpp/language.uninitialized-local3/out b/testing/btest/Baseline.cpp/language.uninitialized-local3/out deleted file mode 100644 index 1008aa9c80..0000000000 --- a/testing/btest/Baseline.cpp/language.uninitialized-local3/out +++ /dev/null @@ -1,6 +0,0 @@ -### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -[a=, b=, c=9, d=, e=] -[a=, b=, c=9, d=, e=] -[no_worries=[a=, b=, c=9, d=, e=], worries=[a=, b=, c=9, d=, e=]] -[no_worries=[a=, b=, c=9, d=, e=], worries=[a=, b=, c=9, d=, e=]] -T diff --git a/testing/btest/Baseline.opt/language.uninitialized-local3/err b/testing/btest/Baseline.opt/language.uninitialized-local3/err deleted file mode 100644 index c48e1becc6..0000000000 --- a/testing/btest/Baseline.opt/language.uninitialized-local3/err +++ /dev/null @@ -1,4 +0,0 @@ -### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -warning: "-O optimize-AST" option is incompatible with -u option, deactivating optimization -warning in <...>/uninitialized-local3.zeek, line 40: possibly used without definition (x4) -expression error in <...>/uninitialized-local3.zeek, line 40: value used but not set (x4) diff --git a/testing/btest/Baseline.opt/language.uninitialized-local3/out b/testing/btest/Baseline.opt/language.uninitialized-local3/out deleted file mode 100644 index 4e25b837a0..0000000000 --- a/testing/btest/Baseline.opt/language.uninitialized-local3/out +++ /dev/null @@ -1,10 +0,0 @@ -### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -x$a (x <...>/uninitialized-local3.zeek, line 22) possibly used without being set -x$e (x <...>/uninitialized-local3.zeek, line 22) possibly used without being set -x$e (x <...>/uninitialized-local3.zeek, line 26) possibly used without being set -x2$worries$a (x2 <...>/uninitialized-local3.zeek, line 29) possibly used without being set -x2$worries$e (x2 <...>/uninitialized-local3.zeek, line 29) possibly used without being set -[a=, b=, c=9, d=, e=] -[a=, b=, c=9, d=, e=] -[no_worries=[a=, b=, c=9, d=, e=], worries=[a=, b=, c=9, d=, e=]] -[no_worries=[a=, b=, c=9, d=, e=], worries=[a=, b=, c=9, d=, e=]] diff --git a/testing/btest/Baseline/language.uninitialized-local3/err b/testing/btest/Baseline/language.uninitialized-local3/err deleted file mode 100644 index b5d175b9b9..0000000000 --- a/testing/btest/Baseline/language.uninitialized-local3/err +++ /dev/null @@ -1,3 +0,0 @@ -### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -warning in <...>/uninitialized-local3.zeek, line 40: possibly used without definition (x4) -expression error in <...>/uninitialized-local3.zeek, line 40: value used but not set (x4) diff --git a/testing/btest/Baseline/language.uninitialized-local3/out b/testing/btest/Baseline/language.uninitialized-local3/out deleted file mode 100644 index 4e25b837a0..0000000000 --- a/testing/btest/Baseline/language.uninitialized-local3/out +++ /dev/null @@ -1,10 +0,0 @@ -### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -x$a (x <...>/uninitialized-local3.zeek, line 22) possibly used without being set -x$e (x <...>/uninitialized-local3.zeek, line 22) possibly used without being set -x$e (x <...>/uninitialized-local3.zeek, line 26) possibly used without being set -x2$worries$a (x2 <...>/uninitialized-local3.zeek, line 29) possibly used without being set -x2$worries$e (x2 <...>/uninitialized-local3.zeek, line 29) possibly used without being set -[a=, b=, c=9, d=, e=] -[a=, b=, c=9, d=, e=] -[no_worries=[a=, b=, c=9, d=, e=], worries=[a=, b=, c=9, d=, e=]] -[no_worries=[a=, b=, c=9, d=, e=], worries=[a=, b=, c=9, d=, e=]] diff --git a/testing/btest/btest.cfg b/testing/btest/btest.cfg index 1e16baa790..e4de72f6c2 100644 --- a/testing/btest/btest.cfg +++ b/testing/btest/btest.cfg @@ -79,13 +79,6 @@ ZEEK_XFORM=1 ZEEK_USAGE_ISSUES=1 BTEST_BASELINE_DIR=%(testbase)s/Baseline.usage:%(testbase)s/Baseline.xform:%(testbase)s/Baseline -# The same, but for -uu (which supercedes -u functionality). We keep them -# separate because -uu is a lot slower. -[environment-usage2] -ZEEK_XFORM=1 -ZEEK_USAGE_ISSUES=2 -BTEST_BASELINE_DIR=%(testbase)s/Baseline.usage:%(testbase)s/Baseline.xform:%(testbase)s/Baseline - [environment-cpp] ZEEK_USE_CPP=1 BTEST_BASELINE_DIR=%(testbase)s/Baseline.cpp:%(testbase)s/Baseline diff --git a/testing/btest/language/uninitialized-local3.zeek b/testing/btest/language/uninitialized-local3.zeek deleted file mode 100644 index 6f6261e470..0000000000 --- a/testing/btest/language/uninitialized-local3.zeek +++ /dev/null @@ -1,44 +0,0 @@ -# @TEST-REQUIRES: test "${ZEEK_ZAM}" != "1" -# @TEST-EXEC: ZEEK_USAGE_ISSUES=2 zeek -b %INPUT >out 2>err -# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-remove-abspath btest-diff out -# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-remove-abspath btest-diff err - -type r: record { - a: count; - b: count &optional; - c: count &default = 9; - d: string &is_assigned; - e: string; -}; - -type r2: record { - no_worries: r &is_assigned; - worries: r; -}; - -event zeek_init() - { - local x: r; - print x; - - if ( x?$a ) - x$e = "I'm set"; - print x; # should complain about $e, but not about $a - - local x2: r2; - print x2; - - local x3: r2 &is_assigned; - print x3; - - local x4: count; - # note, no execution after this point due to error - - # We use this slightly baroque expression because compiled code - # may have x4 genuinely uninitialized, and we want deterministic - # output in that case. - if ( x4 > 5 ) - print T; - else - print T; - } From 385e49491be9ecd57a40fa9cf313ac55422433b4 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 22 Sep 2021 11:18:52 -0700 Subject: [PATCH 2/4] script simplification that removes an unnecessary &is_assigned --- scripts/base/protocols/conn/main.zeek | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/scripts/base/protocols/conn/main.zeek b/scripts/base/protocols/conn/main.zeek index bd8a8d1261..5f2999b206 100644 --- a/scripts/base/protocols/conn/main.zeek +++ b/scripts/base/protocols/conn/main.zeek @@ -239,10 +239,7 @@ function determine_service(c: connection): string function set_conn(c: connection, eoc: bool) { if ( ! c?$conn ) - { - local tmp: Info &is_assigned; - c$conn = tmp; - } + c$conn = Info(); c$conn$ts=c$start_time; c$conn$uid=c$uid; From 1ff7ff06a8efe53e3738381b0032317d0f247532 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 22 Sep 2021 11:21:46 -0700 Subject: [PATCH 3/4] documentation update --- doc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc b/doc index 09b610c72c..354c8e9bef 160000 --- a/doc +++ b/doc @@ -1 +1 @@ -Subproject commit 09b610c72cdc8f460d87b689eaa8487cecc84e0c +Subproject commit 354c8e9beff8159ff5de55026302065db47d27c3 From e2a5101d9d52ba92ada7d67f35d6d487e933c8eb Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 22 Sep 2021 12:13:18 -0700 Subject: [PATCH 4/4] fix up for linking w/ doc update --- doc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc b/doc index 354c8e9bef..fffda1623d 160000 --- a/doc +++ b/doc @@ -1 +1 @@ -Subproject commit 354c8e9beff8159ff5de55026302065db47d27c3 +Subproject commit fffda1623d5940eb91f1cae7ea2f931417ae4c1e