diff --git a/src/strings.bif b/src/strings.bif index 0c280c58ac..56a0370c92 100644 --- a/src/strings.bif +++ b/src/strings.bif @@ -280,15 +280,23 @@ static zeek::VectorValPtr do_split_string(zeek::StringVal* str_val, int num_sep = 0; int offset = 0; + bool bol = true; + const bool eol = true; + while ( n >= 0 ) { offset = 0; // Find next match offset. int end_of_match = 0; - while ( n > 0 && - (end_of_match = re->MatchPrefix(s + offset, n)) <= 0 ) + while ( n > 0 ) { - // Move on to next byte. + end_of_match = re->MatchPrefix(s + offset, n, bol, eol); + if ( end_of_match > 0 ) + break; + + // Move on to next byte, use BOL only on the byte such that + // a BOL anchored pattern won't be matched anywhere else. + bol = false; ++offset; --n; } diff --git a/testing/btest/Baseline/bifs.split_string/out b/testing/btest/Baseline/bifs.split_string/out index 0f5377c2e2..4f7ef7e15c 100644 --- a/testing/btest/Baseline/bifs.split_string/out +++ b/testing/btest/Baseline/bifs.split_string/out @@ -31,3 +31,9 @@ A C = D +test, ^est, [test] +test, tes$, [test] +test, ^test$, [, test, ] +aa bb cc, / ?/, [aa, bb, cc] +aa bb cc, / ?/, [aa, , bb, , cc] +aa bb cc, / +/, [aa, bb, cc] diff --git a/testing/btest/bifs/split_string.zeek b/testing/btest/bifs/split_string.zeek index 11f0c5e011..e092f73768 100644 --- a/testing/btest/bifs/split_string.zeek +++ b/testing/btest/bifs/split_string.zeek @@ -34,3 +34,46 @@ event zeek_init() pat = /=/; print_string_vector(split_string_all(a, pat)); } + +event zeek_init() &priority=-5 + { + # Anchor testing. + local r = split_string_n("test", /^est/, T, 1); + assert |r| == 1; + assert r[0] == "test"; + print "test", "^est", r; + + r = split_string_n("test", /tes$/, T, 1); + assert |r| == 1; + assert r[0] == "test"; + print "test", "tes$", r; + + r = split_string_n("test", /^test$/, T, 1); + assert |r| == 3; + assert r[0] == ""; + assert r[1] == "test"; + assert r[2] == ""; + print "test", "^test$", r; + + r = split_string_n("aa bb cc", / ?/, F, 0); + assert |r| == 3; + assert r[0] == "aa"; + assert r[1] == "bb"; + assert r[2] == "cc"; + print "aa bb cc", "/ ?/", r; + + r = split_string_n("aa bb cc", / ?/, T, 0); + assert |r| == 5; + assert r[0] == "aa"; + assert r[1] == " "; + assert r[2] == "bb"; + assert r[3] == " "; + print "aa bb cc", "/ ?/", r; + + r = split_string_n("aa bb cc", / +/, F, 0); + assert |r| == 3; + assert r[0] == "aa"; + assert r[1] == "bb"; + assert r[2] == "cc"; + print "aa bb cc", "/ +/", r; + }