mirror of
https://github.com/zeek/zeek.git
synced 2025-10-10 10:38:20 +00:00
strings.bif/do_split_string: Pass bol and eol to MatchPrefix()
This allows better control of BOL and EOL. MatchPrefix() / LongestMatch() always start with BOL. Closes #3455
This commit is contained in:
parent
a3bd3e4c50
commit
98d9089a8f
3 changed files with 60 additions and 3 deletions
|
@ -280,15 +280,23 @@ static zeek::VectorValPtr do_split_string(zeek::StringVal* str_val,
|
|||
int num_sep = 0;
|
||||
|
||||
int offset = 0;
|
||||
bool bol = true;
|
||||
const bool eol = true;
|
||||
|
||||
while ( n >= 0 )
|
||||
{
|
||||
offset = 0;
|
||||
// Find next match offset.
|
||||
int end_of_match = 0;
|
||||
while ( n > 0 &&
|
||||
(end_of_match = re->MatchPrefix(s + offset, n)) <= 0 )
|
||||
while ( n > 0 )
|
||||
{
|
||||
// Move on to next byte.
|
||||
end_of_match = re->MatchPrefix(s + offset, n, bol, eol);
|
||||
if ( end_of_match > 0 )
|
||||
break;
|
||||
|
||||
// Move on to next byte, use BOL only on the byte such that
|
||||
// a BOL anchored pattern won't be matched anywhere else.
|
||||
bol = false;
|
||||
++offset;
|
||||
--n;
|
||||
}
|
||||
|
|
|
@ -31,3 +31,9 @@ A
|
|||
C
|
||||
=
|
||||
D
|
||||
test, ^est, [test]
|
||||
test, tes$, [test]
|
||||
test, ^test$, [, test, ]
|
||||
aa bb cc, / ?/, [aa, bb, cc]
|
||||
aa bb cc, / ?/, [aa, , bb, , cc]
|
||||
aa bb cc, / +/, [aa, bb, cc]
|
||||
|
|
|
@ -34,3 +34,46 @@ event zeek_init()
|
|||
pat = /=/;
|
||||
print_string_vector(split_string_all(a, pat));
|
||||
}
|
||||
|
||||
event zeek_init() &priority=-5
|
||||
{
|
||||
# Anchor testing.
|
||||
local r = split_string_n("test", /^est/, T, 1);
|
||||
assert |r| == 1;
|
||||
assert r[0] == "test";
|
||||
print "test", "^est", r;
|
||||
|
||||
r = split_string_n("test", /tes$/, T, 1);
|
||||
assert |r| == 1;
|
||||
assert r[0] == "test";
|
||||
print "test", "tes$", r;
|
||||
|
||||
r = split_string_n("test", /^test$/, T, 1);
|
||||
assert |r| == 3;
|
||||
assert r[0] == "";
|
||||
assert r[1] == "test";
|
||||
assert r[2] == "";
|
||||
print "test", "^test$", r;
|
||||
|
||||
r = split_string_n("aa bb cc", / ?/, F, 0);
|
||||
assert |r| == 3;
|
||||
assert r[0] == "aa";
|
||||
assert r[1] == "bb";
|
||||
assert r[2] == "cc";
|
||||
print "aa bb cc", "/ ?/", r;
|
||||
|
||||
r = split_string_n("aa bb cc", / ?/, T, 0);
|
||||
assert |r| == 5;
|
||||
assert r[0] == "aa";
|
||||
assert r[1] == " ";
|
||||
assert r[2] == "bb";
|
||||
assert r[3] == " ";
|
||||
print "aa bb cc", "/ ?/", r;
|
||||
|
||||
r = split_string_n("aa bb cc", / +/, F, 0);
|
||||
assert |r| == 3;
|
||||
assert r[0] == "aa";
|
||||
assert r[1] == "bb";
|
||||
assert r[2] == "cc";
|
||||
print "aa bb cc", "/ +/", r;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue