zeek/src/script_opt/IDOptInfo.h

314 lines
12 KiB
C++

// See the file "COPYING" in the main distribution directory for copyright.
// Auxiliary information associated with identifiers to aid script
// optimization.
#pragma once
#include <set>
#include "zeek/Expr.h"
#include "zeek/ID.h"
#include "zeek/IntrusivePtr.h"
namespace zeek::detail {
class Expr;
class Stmt;
using ExprPtr = IntrusivePtr<Expr>;
constexpr int NO_DEF = -1;
// This class tracks a single region during which an identifier has
// a consistent state of definition, meaning either it's (1) defined
// as of its value after a specific statement, (2) might-or-might-not
// be defined, or (3) definitely not defined.
class IDDefRegion {
public:
IDDefRegion(const Stmt* s, bool maybe, int def);
IDDefRegion(int stmt_num, int level, bool maybe, int def);
IDDefRegion(const Stmt* s, const IDDefRegion& ur);
void Init(bool maybe, int def) {
if ( def != NO_DEF )
maybe_defined = true;
else
maybe_defined = maybe;
defined = def;
}
// Returns the starting point of the region, i.e., the number
// of the statement *after* which executing this region begins.
int StartsAfter() const { return start_stmt; }
// Returns or sets the ending point of the region, i.e., the
// last statement for which this region applies (including executing
// that statement). A value of NO_DEF means that the region
// continues indefinitely, i.e., we haven't yet encountered its end.
int EndsAfter() const { return end_stmt; }
void SetEndsAfter(int _end_stmt) { end_stmt = _end_stmt; }
// Returns or sets whether the region ended due to a new assignment to the
// identifier, or confluence (ending of a scope block). This information
// is used for an optimization in IDOptInfo::FindRegionBeforeIndex().
// The value defaults to false.
bool EndedDueToAssignment() const { return ended_due_to_assignment; }
void SetEndedDueToAssignment() { ended_due_to_assignment = true; }
// The confluence nesting level associated with the region. Other
// regions that overlap take precedence if they have a higher
// (= more inner) block level.
int BlockLevel() const { return block_level; }
// True if in the region the identifier could be defined.
bool MaybeDefined() const { return maybe_defined; }
// Returns (or sets) the statement after which the identifier is
// (definitely) defined, or NO_DEF if it doesn't have a definite
// point of definition.
int DefinedAfter() const { return defined; }
void UpdateDefinedAfter(int _defined) { defined = _defined; }
// Returns (or sets) the expression used to define the identifier,
// if any. Note that an identifier can be definitely defined
// (i.e., DefinedAfter() returns a statement number, not NO_DEF)
// but not have an associated expression, if the point-of-definition
// is the end of a confluence block.
const ExprPtr& DefExprAfter() const { return def_expr; }
void SetDefExpr(ExprPtr e) { def_expr = std::move(e); }
// Used for debugging.
void Dump() const;
protected:
// Number of the statement for which this region applies *after*
// its execution.
int start_stmt;
// Number of the statement that this region applies to, *after*
// its execution.
int end_stmt = NO_DEF; // means the region hasn't ended yet
// Whether the region ended because of an immediately following
// assignment.
bool ended_due_to_assignment = false;
// Degree of confluence nesting associated with this region.
int block_level;
// Identifier could be defined in this region.
bool maybe_defined;
// If not NO_DEF, then the statement number of either the identifier's
// definition, or its confluence point if multiple, differing
// definitions come together.
int defined;
// The expression used to define the identifier in this region.
// Nil if either it's ambiguous (due to confluence), or the
// identifier isn't guaranteed to be defined.
ExprPtr def_expr;
};
// Class tracking information associated with a (global) identifier's
// (re-)initialization.
class IDInitInfo {
public:
IDInitInfo(const ID* _id, ExprPtr _init, InitClass _ic) : id(_id), init(std::move(_init)), ic(_ic) {}
const ID* Id() const { return id; }
const ExprPtr& Init() const { return init; }
InitClass IC() const { return ic; }
private:
const ID* id;
ExprPtr init;
InitClass ic;
};
// Class tracking optimization information associated with identifiers.
class IDOptInfo {
public:
IDOptInfo(const ID* id) { my_id = id; }
// Reset all computed information about the identifier. Used
// when making a second pass over an AST after optimizing it,
// to avoid inheriting now-stale information.
void Clear();
// Used to track expressions employed when explicitly initializing
// the (global) identifier. These are needed by compile-to-C++ script
// optimization, and for tracking variable usage. An initialization
// class other than INIT_NONE indicates that initialization should
// be done with the ExprPtr form of ID::SetVal.
void AddInitExpr(ExprPtr init_expr, InitClass ic = INIT_NONE);
// Returns the initialization expressions or classes for this identifier.
const std::vector<ExprPtr>& GetInitExprs() const { return init_exprs; }
const std::vector<InitClass>& GetInitClasses() const { return init_classes; }
// Returns a list of the initialization expressions seen for all
// globals, ordered by when they were processed.
static auto& GetGlobalInitExprs() { return global_init_exprs; }
static void ClearGlobalInitExprs() { global_init_exprs.clear(); }
// Associated constant expression, if any. This is only set
// for identifiers that are aliases for a constant (i.e., there
// are no other assignments to them).
const ConstExpr* Const() const { return const_expr; }
// The most use of "const" in any single line in the Zeek
// codebase :-P ... though only by one!
void SetConst(const ConstExpr* _const) { const_expr = _const; }
// Whether the identifier is a temporary variable. Temporaries
// are guaranteed to have exactly one point of definition.
bool IsTemp() const { return is_temp; }
void SetTemp() { is_temp = true; }
// Called when the identifier is defined via execution of the
// given statement, with an assignment to the expression 'e'
// (only non-nil for simple direct assignments). "conf_blocks"
// gives the full set of surrounding confluence statements.
// It should be processed starting at conf_start (note that
// conf_blocks may be empty).
void SetDefinedAfter(const Stmt* s, const ExprPtr& e, const std::vector<const Stmt*>& conf_blocks,
zeek_uint_t conf_start);
// Called upon encountering a "return" statement.
void ReturnAt(const Stmt* s);
// Called when the current region ends with a backwards branch,
// possibly across multiple block levels, occurring at "from"
// and going into the block "to". If "close_all" is true then
// any pending regions at a level inner to "to" should be
// closed; if not, just those at "from"'s level.
void BranchBackTo(const Stmt* from, const Stmt* to, bool close_all);
// Called when the current region ends at statement end_s with a
// forwards branch, possibly across multiple block levels, to
// the statement that comes right after the execution of "block".
// See above re "close_all".
void BranchBeyond(const Stmt* end_s, const Stmt* block, bool close_all);
// Start tracking a confluence block that begins with the body
// of s (not s itself).
void StartConfluenceBlock(const Stmt* s);
// Finish tracking confluence; s is the last point of execution
// prior to leaving a block. If no_orig_flow is true, then
// the region for 's' itself does not continue to the end of
// the block.
void ConfluenceBlockEndsAfter(const Stmt* s, bool no_orig_flow);
// All of these regard the identifier's state just *prior* to
// executing the given statement.
bool IsPossiblyDefinedBefore(const Stmt* s);
bool IsDefinedBefore(const Stmt* s);
int DefinitionBefore(const Stmt* s);
ExprPtr DefExprBefore(const Stmt* s);
// Same, but using statement numbers.
bool IsPossiblyDefinedBefore(int stmt_num);
bool IsDefinedBefore(int stmt_num);
int DefinitionBefore(int stmt_num);
ExprPtr DefExprBefore(int stmt_num);
// The following are used to avoid multiple error messages
// for use of undefined variables.
bool DidUndefinedWarning() const { return did_undefined_warning; }
bool DidPossiblyUndefinedWarning() const { return did_possibly_undefined_warning; }
void SetDidUndefinedWarning() { did_undefined_warning = true; }
void SetDidPossiblyUndefinedWarning() { did_possibly_undefined_warning = true; }
private:
// End any active regions that are at or inner to the given level.
void EndRegionsAfter(int stmt_num, int level);
// Find the region that applies *before* executing the given
// statement. There should always be such a region.
IDDefRegion& FindRegionBefore(int stmt_num) { return usage_regions[FindRegionBeforeIndex(stmt_num)]; }
int FindRegionBeforeIndex(int stmt_num);
// Return the current "active" region, if any. The active region
// is the innermost region that currently has an end of NO_DEF,
// meaning we have not yet found its end.
IDDefRegion* ActiveRegion() {
auto ind = ActiveRegionIndex();
return ind >= 0 ? &usage_regions[ind] : nullptr;
}
int ActiveRegionIndex();
// Used for debugging.
void DumpBlocks() const;
// Expressions used to initialize the identifier, for use by
// the scripts-to-C++ compiler. We need to track all of them
// because it's possible that a global value gets created using
// one of the earlier instances rather than the last one.
std::vector<ExprPtr> init_exprs;
// A parallel array of the associated initialization classes.
// We keep the two separate rather than a std::pair because the
// most common use is to just loop over the expressions.
std::vector<InitClass> init_classes;
// Tracks initializations of globals in the order they're seen.
static std::vector<IDInitInfo> global_init_exprs;
// If non-nil, a constant that this identifier always holds
// once initially defined.
const ConstExpr* const_expr = nullptr;
// The different usage regions associated with the identifier.
// These are constructed such that they're always with non-decreasing
// starting statements.
std::vector<IDDefRegion> usage_regions;
// A type for collecting the indices of usage_regions that will
// all have confluence together at one point. Used to track
// things like "break" statements that jump out of loops or
// switch confluence regions.
using ConfluenceSet = std::set<int>;
// Maps loops/switches/catch-returns to their associated
// confluence sets.
std::map<const Stmt*, ConfluenceSet> pending_confluences;
// A stack of active confluence statements, so we can always find
// the innermost when ending a confluence block.
std::vector<const Stmt*> confluence_stmts;
// Parallel vector that tracks whether, upon creating the
// confluence block, there had already been observed internal flow
// going beyond it. If so, then we can ignore no_orig_flow when
// ending the block, because in fact there *was* original flow.
std::vector<bool> block_has_orig_flow;
// Whether the identifier is a temporary variable.
bool is_temp = false;
// Associated identifier, to enable tracking of initialization
// expressions for globals (for C++ compilation), and for debugging
// output.
const ID* my_id;
// Only needed for debugging purposes.
bool tracing = false;
// Track whether we've already generated usage errors.
bool did_undefined_warning = false;
bool did_possibly_undefined_warning = false;
};
// If non-nil, then output detailed tracing information when building
// up the usage regions for any identifier with the given name.
extern const char* trace_ID;
} // namespace zeek::detail