zeek/src/script_opt/IDOptInfo.cc
Arne Welzel a0d35d6e28 Merge remote-tracking branch 'origin/topic/vern/ZAM-const-prop-fix'
* origin/topic/vern/ZAM-const-prop-fix:
  fix for error in ZAM's constant propagation logic

(cherry picked from commit 869bd181b2)
2025-07-14 14:16:09 -07:00

498 lines
15 KiB
C++

// See the file "COPYING" in the main distribution directory for copyright.
#include "zeek/script_opt/IDOptInfo.h"
#include "zeek/Desc.h"
#include "zeek/Expr.h"
#include "zeek/Stmt.h"
#include "zeek/script_opt/StmtOptInfo.h"
namespace zeek::detail {
const char* trace_ID = nullptr;
IDDefRegion::IDDefRegion(const Stmt* s, bool maybe, int def) {
start_stmt = s->GetOptInfo()->stmt_num;
block_level = s->GetOptInfo()->block_level;
Init(maybe, def);
}
IDDefRegion::IDDefRegion(int stmt_num, int level, bool maybe, int def) {
start_stmt = stmt_num;
block_level = level;
Init(maybe, def);
}
IDDefRegion::IDDefRegion(const Stmt* s, const IDDefRegion& ur) {
start_stmt = s->GetOptInfo()->stmt_num;
block_level = s->GetOptInfo()->block_level;
Init(ur.MaybeDefined(), ur.DefinedAfter());
SetDefExpr(ur.DefExprAfter());
}
void IDDefRegion::Dump() const {
printf("\t%d->%d (%d): ", start_stmt, end_stmt, block_level);
if ( defined != NO_DEF )
printf("%d (%s)", defined, def_expr ? obj_desc(def_expr.get()).c_str() : "<no expr>");
else if ( maybe_defined )
printf("?");
else
printf("N/A");
printf("\n");
}
std::vector<IDInitInfo> IDOptInfo::global_init_exprs;
void IDOptInfo::Clear() {
static bool did_init = false;
if ( ! did_init ) {
trace_ID = getenv("ZEEK_TRACE_ID");
did_init = true;
}
init_exprs.clear();
usage_regions.clear();
pending_confluences.clear();
confluence_stmts.clear();
tracing = trace_ID && util::streq(trace_ID, my_id->Name());
}
void IDOptInfo::AddInitExpr(ExprPtr init_expr, InitClass ic) {
if ( ! init_expr )
return;
if ( my_id->IsGlobal() )
global_init_exprs.emplace_back(my_id, init_expr, ic);
init_exprs.emplace_back(std::move(init_expr));
}
void IDOptInfo::SetDefinedAfter(const Stmt* s, const ExprPtr& e, const std::vector<const Stmt*>& conf_blocks,
zeek_uint_t conf_start) {
if ( tracing )
printf("ID %s defined at %d: %s\n", trace_ID, s ? s->GetOptInfo()->stmt_num : NO_DEF,
s ? obj_desc(s).c_str() : "<entry>");
if ( ! s ) { // This is a definition-upon-entry
ASSERT(usage_regions.empty());
usage_regions.emplace_back(0, 0, true, 0);
if ( tracing )
DumpBlocks();
return;
}
auto s_oi = s->GetOptInfo();
auto stmt_num = s_oi->stmt_num;
if ( usage_regions.empty() ) {
// We're seeing this identifier for the first time,
// so we don't have any context or confluence
// information for it. Create its "backstory" region.
ASSERT(confluence_stmts.empty());
usage_regions.emplace_back(0, 0, false, NO_DEF);
}
// Any pending regions stop prior to this statement.
EndRegionsAfter(stmt_num - 1, s_oi->block_level);
// Fill in any missing confluence blocks.
int b = 0; // index into our own blocks
int n = confluence_stmts.size();
while ( b < n && conf_start < conf_blocks.size() ) {
auto outer_block = conf_blocks[conf_start];
// See if we can find that block.
for ( ; b < n; ++b )
if ( confluence_stmts[b] == outer_block )
break;
if ( b < n ) { // We found it, look for the next one.
++conf_start;
++b;
}
}
// Add in the remainder.
for ( ; conf_start < conf_blocks.size(); ++conf_start )
StartConfluenceBlock(conf_blocks[conf_start]);
if ( e ) {
// If we just ended a region that's (1) at the same block level,
// (2) definitive in terms of having assigned to the identifier,
// and (3) adjacent to the one we're about to start (no intervening
// confluence), then mark it as ended-due-to-assignment (as opposed
// to ended-due-to-confluence). Doing so enables us to propagate that
// assignment value to the beginning of this block in
// FindRegionBeforeIndex() so we can collapse assignment cascades;
// see the comment in that method.
auto& ub = usage_regions.back();
if ( ub.BlockLevel() == s->GetOptInfo()->block_level && ub.EndsAfter() == stmt_num - 1 && ub.DefExprAfter() )
ub.SetEndedDueToAssignment();
}
// Create a new region corresponding to this definition.
// This needs to come after filling out the confluence
// blocks, since they'll create their own (earlier) regions.
usage_regions.emplace_back(s, true, stmt_num);
usage_regions.back().SetDefExpr(std::move(e));
if ( tracing )
DumpBlocks();
}
void IDOptInfo::ReturnAt(const Stmt* s) {
if ( tracing )
printf("ID %s subject to return %d: %s\n", trace_ID, s->GetOptInfo()->stmt_num, obj_desc(s).c_str());
// Look for a catch-return that this would branch to.
for ( int i = confluence_stmts.size() - 1; i >= 0; --i )
if ( confluence_stmts[i]->Tag() == STMT_CATCH_RETURN ) {
BranchBeyond(s, confluence_stmts[i], false);
if ( tracing )
DumpBlocks();
return;
}
auto s_oi = s->GetOptInfo();
EndRegionsAfter(s_oi->stmt_num - 1, s_oi->block_level);
if ( tracing )
DumpBlocks();
}
void IDOptInfo::BranchBackTo(const Stmt* from, const Stmt* to, bool close_all) {
if ( tracing )
printf("ID %s branching back from %d->%d: %s\n", trace_ID, from->GetOptInfo()->stmt_num,
to->GetOptInfo()->stmt_num, obj_desc(from).c_str());
// The key notion we need to update is whether the regions
// between from_reg and to_reg still have unique definitions.
// Confluence due to the branch can only take that away, it
// can't instill it. (OTOH, in principle it could update
// "maybe defined", but not in a way we care about, since we
// only draw upon that for diagnosing usage errors, and for
// those the error has already occurred on entry into the loop.)
auto from_reg = ActiveRegion();
auto f_oi = from->GetOptInfo();
auto t_oi = to->GetOptInfo();
zeek_uint_t t_r_ind = FindRegionBeforeIndex(t_oi->stmt_num);
auto& t_r = usage_regions[t_r_ind];
if ( from_reg && from_reg->DefinedAfter() != t_r.DefinedAfter() && t_r.DefinedAfter() != NO_DEF ) {
// They disagree on the definition. Move the definition
// point to be the start of the confluence region, and
// update any blocks inside the region that refer to
// a pre-"to" definition to instead reflect the confluence
// region (and remove their definition expressions).
int new_def = t_oi->stmt_num;
for ( auto i = t_r_ind; i < usage_regions.size(); ++i ) {
auto& ur = usage_regions[i];
if ( ur.DefinedAfter() < new_def ) {
ur.UpdateDefinedAfter(new_def);
ur.SetDefExpr(nullptr);
}
}
}
int level = close_all ? t_oi->block_level + 1 : f_oi->block_level;
EndRegionsAfter(f_oi->stmt_num, level);
if ( tracing )
DumpBlocks();
}
void IDOptInfo::BranchBeyond(const Stmt* end_s, const Stmt* block, bool close_all) {
if ( tracing )
printf("ID %s branching forward from %d beyond %d: %s\n", trace_ID, end_s->GetOptInfo()->stmt_num,
block->GetOptInfo()->stmt_num, obj_desc(end_s).c_str());
ASSERT(pending_confluences.count(block) > 0);
auto ar = ActiveRegionIndex();
if ( ar != NO_DEF )
pending_confluences[block].insert(ar);
auto end_oi = end_s->GetOptInfo();
int level;
if ( close_all )
level = block->GetOptInfo()->block_level + 1;
else
level = end_oi->block_level;
EndRegionsAfter(end_oi->stmt_num, level);
if ( tracing )
DumpBlocks();
}
void IDOptInfo::StartConfluenceBlock(const Stmt* s) {
if ( tracing )
printf("ID %s starting confluence block at %d: %s\n", trace_ID, s->GetOptInfo()->stmt_num, obj_desc(s).c_str());
auto s_oi = s->GetOptInfo();
int block_level = s_oi->block_level;
// End any confluence blocks at this or inner levels.
for ( auto cs : confluence_stmts ) {
ASSERT(cs != s);
auto cs_level = cs->GetOptInfo()->block_level;
if ( cs_level >= block_level ) {
ASSERT(cs_level == block_level);
ASSERT(cs == confluence_stmts.back());
EndRegionsAfter(s_oi->stmt_num - 1, block_level);
}
}
pending_confluences[s] = {};
confluence_stmts.push_back(s);
block_has_orig_flow.push_back(s_oi->contains_branch_beyond);
// Inherit the closest open, outer region, if necessary.
for ( int i = usage_regions.size() - 1; i >= 0; --i ) {
auto& ur = usage_regions[i];
if ( ur.EndsAfter() == NO_DEF ) {
if ( ur.BlockLevel() > block_level ) {
// This can happen for regions left over
// from a previous catch-return, which
// we haven't closed out yet because we
// don't track new identifiers beyond
// outer CRs. Close the region now.
ASSERT(s->Tag() == STMT_CATCH_RETURN);
ur.SetEndsAfter(s_oi->stmt_num - 1);
continue;
}
if ( ur.BlockLevel() < block_level )
// Didn't find one at our own level,
// so create one inherited from the
// outer one.
usage_regions.emplace_back(s, ur);
// We now have one at our level that we can use.
break;
}
}
if ( tracing )
DumpBlocks();
}
void IDOptInfo::ConfluenceBlockEndsAfter(const Stmt* s, bool no_orig_flow) {
auto stmt_num = s->GetOptInfo()->stmt_num;
ASSERT(! confluence_stmts.empty());
auto cs = confluence_stmts.back();
auto& pc = pending_confluences[cs];
// End any active regions. Those will all have a level >= that
// of cs, since we're now returning to cs's level.
int cs_stmt_num = cs->GetOptInfo()->stmt_num;
int cs_level = cs->GetOptInfo()->block_level;
if ( tracing )
printf("ID %s ending (%d) confluence block (%d, level %d) at %d: %s\n", trace_ID, no_orig_flow, cs_stmt_num,
cs_level, stmt_num, obj_desc(s).c_str());
if ( block_has_orig_flow.back() )
no_orig_flow = false;
// Compute the state of the definition at the point of confluence:
// whether it's at least could-be-defined, whether it's definitely
// defined and if so whether it has a single point of definition.
bool maybe = false;
bool defined = true;
bool did_single_def = false;
int single_def = NO_DEF;
ExprPtr single_def_expr;
bool have_multi_defs = false;
int num_regions = 0;
for ( auto i = 0U; i < usage_regions.size(); ++i ) {
auto& ur = usage_regions[i];
if ( ur.BlockLevel() < cs_level )
// Region is not applicable.
continue;
if ( ur.EndsAfter() == NO_DEF ) { // End this region.
ur.SetEndsAfter(stmt_num);
if ( ur.StartsAfter() <= cs_stmt_num && no_orig_flow && pc.count(i) == 0 )
// Don't include this region in our assessment.
continue;
}
else if ( ur.EndsAfter() < cs_stmt_num )
// Irrelevant, didn't extend into confluence region.
// We test here just to avoid the set lookup in
// the next test, which presumably will sometimes
// be a tad expensive.
continue;
else if ( pc.count(i) == 0 )
// This region isn't active, and we're not
// tracking it for confluence.
continue;
++num_regions;
maybe = maybe || ur.MaybeDefined();
if ( ur.DefinedAfter() == NO_DEF ) {
defined = false;
continue;
}
if ( did_single_def ) {
if ( single_def != ur.DefinedAfter() )
have_multi_defs = true;
}
else {
single_def = ur.DefinedAfter();
single_def_expr = ur.DefExprAfter();
did_single_def = true;
}
}
if ( num_regions == 0 ) { // Nothing survives.
ASSERT(maybe == false);
defined = false;
}
if ( ! defined ) {
single_def = NO_DEF;
have_multi_defs = false;
}
if ( have_multi_defs )
// Definition reflects confluence point, which comes
// just after 's'.
single_def = stmt_num + 1;
int level = cs->GetOptInfo()->block_level;
usage_regions.emplace_back(stmt_num, level, maybe, single_def);
if ( single_def != NO_DEF && ! have_multi_defs )
usage_regions.back().SetDefExpr(single_def_expr);
confluence_stmts.pop_back();
block_has_orig_flow.pop_back();
pending_confluences.erase(cs);
if ( tracing )
DumpBlocks();
}
bool IDOptInfo::IsPossiblyDefinedBefore(const Stmt* s) { return IsPossiblyDefinedBefore(s->GetOptInfo()->stmt_num); }
bool IDOptInfo::IsDefinedBefore(const Stmt* s) { return IsDefinedBefore(s->GetOptInfo()->stmt_num); }
int IDOptInfo::DefinitionBefore(const Stmt* s) { return DefinitionBefore(s->GetOptInfo()->stmt_num); }
ExprPtr IDOptInfo::DefExprBefore(const Stmt* s) { return DefExprBefore(s->GetOptInfo()->stmt_num); }
bool IDOptInfo::IsPossiblyDefinedBefore(int stmt_num) {
if ( usage_regions.empty() )
return false;
return FindRegionBefore(stmt_num).MaybeDefined();
}
bool IDOptInfo::IsDefinedBefore(int stmt_num) {
if ( usage_regions.empty() )
return false;
return FindRegionBefore(stmt_num).DefinedAfter() != NO_DEF;
}
int IDOptInfo::DefinitionBefore(int stmt_num) {
if ( usage_regions.empty() )
return NO_DEF;
return FindRegionBefore(stmt_num).DefinedAfter();
}
ExprPtr IDOptInfo::DefExprBefore(int stmt_num) {
if ( usage_regions.empty() )
return nullptr;
return FindRegionBefore(stmt_num).DefExprAfter();
}
void IDOptInfo::EndRegionsAfter(int stmt_num, int level) {
for ( int i = usage_regions.size() - 1; i >= 0; --i ) {
auto& ur = usage_regions[i];
if ( ur.BlockLevel() < level )
return;
if ( ur.EndsAfter() == NO_DEF )
ur.SetEndsAfter(stmt_num);
}
}
int IDOptInfo::FindRegionBeforeIndex(int stmt_num) {
int region_ind = NO_DEF;
for ( auto i = 0U; i < usage_regions.size(); ++i ) {
auto& ur = usage_regions[i];
if ( ur.StartsAfter() >= stmt_num )
break;
// It's active for everything beyond its start.
// or
// It's active at the beginning of the statement of interest.
if ( ur.EndsAfter() == NO_DEF || ur.EndsAfter() >= stmt_num )
region_ind = i;
else if ( ur.EndsAfter() == stmt_num - 1 && ur.EndedDueToAssignment() ) {
// There's one other possibility, which occurs for a series of
// statements like:
//
// a = some_val;
// a = a + 1;
//
// Here, the assignment for "a = some_val" ends right after
// that statement due to new assignment to 'a' on the second line.
// However, it's okay to use the first region on the RHS.
region_ind = i;
}
}
ASSERT(region_ind != NO_DEF);
return region_ind;
}
int IDOptInfo::ActiveRegionIndex() {
int i;
for ( i = usage_regions.size() - 1; i >= 0; --i )
if ( usage_regions[i].EndsAfter() == NO_DEF )
return i;
return NO_DEF;
}
void IDOptInfo::DumpBlocks() const {
for ( auto& ur : usage_regions )
ur.Dump();
printf("<end>\n");
}
} // namespace zeek::detail