diff --git a/CHANGES b/CHANGES index 99fdcd3597..15f372e3ff 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,9 @@ +7.0.8-5 | 2025-07-14 14:15:46 -0700 + + * fix for error in ZAM's constant propagation logic (Vern Paxson, Corelight) + + (cherry picked from commit 869bd181b20fc9840472e35e87249a946177273b) + 7.0.8-4 | 2025-07-14 14:12:46 -0700 * btest/logging: Fly-by cleanup (Arne Welzel, Corelight) diff --git a/VERSION b/VERSION index 8bf56dd624..c7f44231d5 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -7.0.8-4 +7.0.8-5 diff --git a/src/script_opt/GenIDDefs.cc b/src/script_opt/GenIDDefs.cc index fde76e9c95..8722af7424 100644 --- a/src/script_opt/GenIDDefs.cc +++ b/src/script_opt/GenIDDefs.cc @@ -490,7 +490,7 @@ void GenIDDefs::TrackID(const ID* id, const ExprPtr& e) { // here to set the lowest limit for definitions. For now we leave // DefinedAfter as capable of supporting that distinction in case we // find need to revive it in the future. - oi->DefinedAfter(last_stmt_traversed, e, confluence_blocks, 0); + oi->SetDefinedAfter(last_stmt_traversed, e, confluence_blocks, 0); // Ensure we track this identifier across all relevant // confluence regions. diff --git a/src/script_opt/IDOptInfo.cc b/src/script_opt/IDOptInfo.cc index 9e170a2191..e3b8fd34dc 100644 --- a/src/script_opt/IDOptInfo.cc +++ b/src/script_opt/IDOptInfo.cc @@ -74,8 +74,8 @@ void IDOptInfo::AddInitExpr(ExprPtr init_expr, InitClass ic) { init_exprs.emplace_back(std::move(init_expr)); } -void IDOptInfo::DefinedAfter(const Stmt* s, const ExprPtr& e, const std::vector& conf_blocks, - zeek_uint_t conf_start) { +void IDOptInfo::SetDefinedAfter(const Stmt* s, const ExprPtr& e, const std::vector& conf_blocks, + zeek_uint_t conf_start) { if ( tracing ) printf("ID %s defined at %d: %s\n", trace_ID, s ? s->GetOptInfo()->stmt_num : NO_DEF, s ? obj_desc(s).c_str() : ""); @@ -124,6 +124,20 @@ void IDOptInfo::DefinedAfter(const Stmt* s, const ExprPtr& e, const std::vector< for ( ; conf_start < conf_blocks.size(); ++conf_start ) StartConfluenceBlock(conf_blocks[conf_start]); + if ( e ) { + // If we just ended a region that's (1) at the same block level, + // (2) definitive in terms of having assigned to the identifier, + // and (3) adjacent to the one we're about to start (no intervening + // confluence), then mark it as ended-due-to-assignment (as opposed + // to ended-due-to-confluence). Doing so enables us to propagate that + // assignment value to the beginning of this block in + // FindRegionBeforeIndex() so we can collapse assignment cascades; + // see the comment in that method. + auto& ub = usage_regions.back(); + if ( ub.BlockLevel() == s->GetOptInfo()->block_level && ub.EndsAfter() == stmt_num - 1 && ub.DefExprAfter() ) + ub.SetEndedDueToAssignment(); + } + // Create a new region corresponding to this definition. // This needs to come after filling out the confluence // blocks, since they'll create their own (earlier) regions. @@ -436,19 +450,29 @@ void IDOptInfo::EndRegionsAfter(int stmt_num, int level) { int IDOptInfo::FindRegionBeforeIndex(int stmt_num) { int region_ind = NO_DEF; for ( auto i = 0U; i < usage_regions.size(); ++i ) { - auto ur = usage_regions[i]; + auto& ur = usage_regions[i]; if ( ur.StartsAfter() >= stmt_num ) break; - if ( ur.EndsAfter() == NO_DEF ) - // It's active for everything beyond its start. + // It's active for everything beyond its start. + // or + // It's active at the beginning of the statement of interest. + if ( ur.EndsAfter() == NO_DEF || ur.EndsAfter() >= stmt_num ) region_ind = i; - else if ( ur.EndsAfter() >= stmt_num - 1 ) - // It's active at the beginning of the statement of - // interest. + else if ( ur.EndsAfter() == stmt_num - 1 && ur.EndedDueToAssignment() ) { + // There's one other possibility, which occurs for a series of + // statements like: + // + // a = some_val; + // a = a + 1; + // + // Here, the assignment for "a = some_val" ends right after + // that statement due to new assignment to 'a' on the second line. + // However, it's okay to use the first region on the RHS. region_ind = i; + } } ASSERT(region_ind != NO_DEF); diff --git a/src/script_opt/IDOptInfo.h b/src/script_opt/IDOptInfo.h index 5dd07c85e2..81d37ea37c 100644 --- a/src/script_opt/IDOptInfo.h +++ b/src/script_opt/IDOptInfo.h @@ -51,6 +51,13 @@ public: int EndsAfter() const { return end_stmt; } void SetEndsAfter(int _end_stmt) { end_stmt = _end_stmt; } + // Returns or sets whether the region ended due to a new assignment to the + // identifier, or confluence (ending of a scope block). This information + // is used for an optimization in IDOptInfo::FindRegionBeforeIndex(). + // The value defaults to false. + bool EndedDueToAssignment() const { return ended_due_to_assignment; } + void SetEndedDueToAssignment() { ended_due_to_assignment = true; } + // The confluence nesting level associated with the region. Other // regions that overlap take precedence if they have a higher // (= more inner) block level. @@ -85,6 +92,10 @@ protected: // its execution. int end_stmt = NO_DEF; // means the region hasn't ended yet + // Whether the region ended because of an immediately following + // assignment. + bool ended_due_to_assignment = false; + // Degree of confluence nesting associated with this region. int block_level; @@ -165,8 +176,8 @@ public: // gives the full set of surrounding confluence statements. // It should be processed starting at conf_start (note that // conf_blocks may be empty). - void DefinedAfter(const Stmt* s, const ExprPtr& e, const std::vector& conf_blocks, - zeek_uint_t conf_start); + void SetDefinedAfter(const Stmt* s, const ExprPtr& e, const std::vector& conf_blocks, + zeek_uint_t conf_start); // Called upon encountering a "return" statement. void ReturnAt(const Stmt* s); diff --git a/testing/btest/Baseline/opt.regress-constant-prop/output b/testing/btest/Baseline/opt.regress-constant-prop/output new file mode 100644 index 0000000000..845708e279 --- /dev/null +++ b/testing/btest/Baseline/opt.regress-constant-prop/output @@ -0,0 +1,2 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +xyz diff --git a/testing/btest/opt/regress-constant-prop.zeek b/testing/btest/opt/regress-constant-prop.zeek new file mode 100644 index 0000000000..3101b68fc5 --- /dev/null +++ b/testing/btest/opt/regress-constant-prop.zeek @@ -0,0 +1,19 @@ +# @TEST-DOC: Regression test for incorrect constant propagation +# @TEST-REQUIRES: test "${ZEEK_USE_CPP}" != "1" +# @TEST-EXEC: zeek -b -O ZAM %INPUT >output +# @TEST-EXEC: btest-diff output + +function foo(s: string) + { + if ( s == "foo" ) + s = "bar"; + else if ( s == "bar" ) + s = "bletch"; + + print s; + } + +event zeek_init() + { + foo("xyz"); + }