From e339e93e69fa757a70590c0c2dabcc84e2dabbbe Mon Sep 17 00:00:00 2001 From: Arne Welzel Date: Fri, 17 Nov 2023 13:33:22 +0100 Subject: [PATCH] strings.bif/sub,gsub: Respect anchors in pattern Anchors within pattern passed to sub() or gsub() were previously ignored, replacing any occurrence of '' even when '^' was used as a pattern. This is a pretty user-visible change (and we even have anchored patterns within the base scripts), but seems "the right thing to do". Relates to #3455 --- NEWS | 4 ++ src/Val.cc | 9 ++++- testing/btest/Baseline/bifs.sub/.stderr | 1 + testing/btest/Baseline/bifs.sub/out | 11 ++++++ testing/btest/bifs/sub.zeek | 52 +++++++++++++++++++++++++ 5 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 testing/btest/Baseline/bifs.sub/.stderr diff --git a/NEWS b/NEWS index f438bb1fe8..daf081c955 100644 --- a/NEWS +++ b/NEWS @@ -23,6 +23,10 @@ Changed Functionality end-of-line $ anchors. Previously, an anchored pattern would be matched anywhere in the input string. +- The ``sub()`` and ``gsub()` functions now respect the beginning-of-line ^ and + end-of-line $ anchors. Previously, an anchored pattern would be matched anywhere + in the input string. + Removed Functionality --------------------- diff --git a/src/Val.cc b/src/Val.cc index 2550d1d5c4..397c1d777c 100644 --- a/src/Val.cc +++ b/src/Val.cc @@ -789,15 +789,22 @@ StringValPtr StringVal::Replace(RE_Matcher* re, const String& repl, bool do_all) vector> cut_points; int size = 0; // size of result + bool bol = true; + const bool eol = true; while ( n > 0 ) { // Find next match offset. int end_of_match; - while ( n > 0 && (end_of_match = re->MatchPrefix(&s[offset], n)) <= 0 ) { + while ( n > 0 ) { + end_of_match = re->MatchPrefix(&s[offset], n, bol, eol); + if ( end_of_match > 0 ) + break; + // This character is going to be copied to the result. ++size; // Move on to next character. + bol = false; ++offset; --n; } diff --git a/testing/btest/Baseline/bifs.sub/.stderr b/testing/btest/Baseline/bifs.sub/.stderr new file mode 100644 index 0000000000..49d861c74c --- /dev/null +++ b/testing/btest/Baseline/bifs.sub/.stderr @@ -0,0 +1 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. diff --git a/testing/btest/Baseline/bifs.sub/out b/testing/btest/Baseline/bifs.sub/out index 2473316747..8fa7d49e33 100644 --- a/testing/btest/Baseline/bifs.sub/out +++ b/testing/btest/Baseline/bifs.sub/out @@ -1,3 +1,14 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. that is a test that at a test +test +test +foo +foo +tea +tea test +test tea +test tea +tea tea +tea tea +tea diff --git a/testing/btest/bifs/sub.zeek b/testing/btest/bifs/sub.zeek index 1ad4e60137..15427d319b 100644 --- a/testing/btest/bifs/sub.zeek +++ b/testing/btest/bifs/sub.zeek @@ -1,6 +1,8 @@ +# @TEST-DOC: Test the sub() and gsub() functions. # # @TEST-EXEC: zeek -b %INPUT >out # @TEST-EXEC: btest-diff out +# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-remove-abspath btest-diff .stderr event zeek_init() { @@ -10,3 +12,53 @@ event zeek_init() print sub(a, pat, "at"); print gsub(a, pat, "at"); } + +event zeek_init() &priority=-1 + { + local r = sub("test", /^est/, "ea"); + assert r == "test", r; + print r; + + r = sub("test", /tes$/, "foo"); + assert r == "test", r; + print r; + + r = sub("test", /test/, "foo"); + assert r == "foo", r; + print r; + + r = sub("test", /^test$/, "foo"); + assert r == "foo", r; + print r; + + r = sub("test", /est$/, "ea"); + assert r == "tea", r; + print r; + } + +event zeek_init() &priority=-2 + { + local r = gsub("test test", /^test/, "tea"); + assert r == "tea test", r; + print r; + + r = gsub("test test", /test$/, "tea"); + assert r == "test tea", r; + print r; + + r = gsub("test test", /test$/, "tea"); + assert r == "test tea", r; + print r; + + r = gsub("test test", /test/, "tea"); + assert r == "tea tea", r; + print r; + + r = gsub("test test", /est/, "ea"); + assert r == "tea tea", r; + print r; + + r = gsub("test test", /^test test$/, "tea"); + assert r == "tea", r; + print r; + }