Merge remote-tracking branch 'origin/topic/vern/zam-aggr-change-in-loop'

* origin/topic/vern/zam-aggr-change-in-loop:
  fix for ZAM optimization when an aggregate is modified inside of a loop

(cherry picked from commit 2255fa23b8)
This commit is contained in:
Arne Welzel 2025-05-13 19:50:47 +02:00 committed by Tim Wojtulewicz
parent c700efc3c8
commit 94700130ed
6 changed files with 92 additions and 4 deletions

View file

@ -1,3 +1,9 @@
7.0.7-3 | 2025-05-19 09:15:54 -0700
* fix for ZAM optimization when an aggregate is modified inside of a loop (Vern Paxson, Corelight)
(cherry picked from commit 2255fa23b8a3f7bde1345f2847764412c90487c8)
7.0.7-2 | 2025-05-19 09:09:07 -0700
* Bump `auxil/spicy` to v1.11.5 (Benjamin Bannier, Corelight)

View file

@ -1 +1 @@
7.0.7-2
7.0.7-3

View file

@ -12,6 +12,19 @@ CSE_ValidityChecker::CSE_ValidityChecker(std::shared_ptr<ProfileFuncs> _pfs, con
start_e = _start_e;
end_e = _end_e;
// For validity checking, if end_e is inside a loop and start_e is
// outside that loop, then we need to extend the checking beyond end_e
// to the end of the loop, to account for correctness after iterating
// through the loop. We do that as follows. Upon entering an outer
// loop, we set end_s to that loop. (We can tell it's an outer loop if,
// upon entering, end_s is nil.) (1) If we encounter end_e while inside
// that loop (which we can tell because end_s is non-nil), then we clear
// end_e to signal that we're now using end_s to terminate the traversal.
// (2) If we complete the loop without encountering end_e (which we can
// tell because after traversal end_e is non-nil), then we clear end_s
// to mark that the traversal is now not inside a loop.
end_s = nullptr;
// Track whether this is a record assignment, in which case
// we're attuned to assignments to the same field for the
// same type of record.
@ -38,6 +51,23 @@ TraversalCode CSE_ValidityChecker::PreStmt(const Stmt* s) {
return TC_ABORTALL;
}
if ( (t == STMT_WHILE || t == STMT_FOR) && have_start_e && ! end_s )
// We've started the traversal and are entering an outer loop.
end_s = s;
return TC_CONTINUE;
}
TraversalCode CSE_ValidityChecker::PostStmt(const Stmt* s) {
if ( end_s == s ) {
if ( ! end_e )
// We've done the outer loop containing the end expression.
return TC_ABORTALL;
// We're no longer doing an outer loop.
end_s = nullptr;
}
return TC_CONTINUE;
}
@ -59,8 +89,13 @@ TraversalCode CSE_ValidityChecker::PreExpr(const Expr* e) {
ASSERT(! have_end_e);
have_end_e = true;
// ... and we're now done.
return TC_ABORTALL;
if ( ! end_s )
// We're now done.
return TC_ABORTALL;
// Need to finish the loop before we mark things as done.
// Signal to the statement traversal that we're in that state.
end_e = nullptr;
}
if ( ! have_start_e )

View file

@ -21,6 +21,7 @@ public:
const Expr* end_e);
TraversalCode PreStmt(const Stmt*) override;
TraversalCode PostStmt(const Stmt*) override;
TraversalCode PreExpr(const Expr*) override;
TraversalCode PostExpr(const Expr*) override;
@ -81,9 +82,13 @@ protected:
// assignment expression.
const Expr* start_e;
// Where in the AST to end our analysis.
// Expression in the AST where we should end our analysis. See discussion
// in the constructor for the interplay between this and end_s.
const Expr* end_e;
// Statement in the AST where we should end our analysis.
const Stmt* end_s;
// If what we're analyzing is a record element, then its offset.
// -1 if not.
int field;

View file

@ -0,0 +1,4 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
[hash=bletch]
[hash=xyzzy]
done

View file

@ -0,0 +1,38 @@
# @TEST-DOC: Regression test for an aggregate in a CSE changing inside a loop
# @TEST-REQUIRES: test "${ZEEK_USE_CPP}" != "1"
# @TEST-EXEC: zeek -b -O ZAM %INPUT >output
# @TEST-EXEC: btest-diff output
type Data: record {
hash: string;
};
global map: table[string] of Data;
function traverse_map(hash: string)
{
local tmp = map[hash];
if ( tmp$hash == "" )
return;
while ( tmp$hash in map )
{
# Prior to the fix, the value of tmp$hash computed in the
# earlier "if" statement was used here, rather than the
# optimizer recognizing that "tmp" can have changed at this
# point due to the loop, and thus that value can be stale.
# That led to an infinite loop here.
tmp = map[tmp$hash];
print tmp;
}
}
event zeek_init()
{
map["foo"] = Data($hash="bar");
map["bar"] = Data($hash="bletch");
map["bletch"] = Data($hash="xyzzy");
traverse_map("foo");
print "done";
}