mirror of
https://github.com/zeek/zeek.git
synced 2025-10-10 10:38:20 +00:00
strings.bif/do_split_string: Pass bol and eol to MatchPrefix()
This allows better control of BOL and EOL. MatchPrefix() / LongestMatch() always start with BOL. Closes #3455
This commit is contained in:
parent
a3bd3e4c50
commit
98d9089a8f
3 changed files with 60 additions and 3 deletions
|
@ -280,15 +280,23 @@ static zeek::VectorValPtr do_split_string(zeek::StringVal* str_val,
|
||||||
int num_sep = 0;
|
int num_sep = 0;
|
||||||
|
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
|
bool bol = true;
|
||||||
|
const bool eol = true;
|
||||||
|
|
||||||
while ( n >= 0 )
|
while ( n >= 0 )
|
||||||
{
|
{
|
||||||
offset = 0;
|
offset = 0;
|
||||||
// Find next match offset.
|
// Find next match offset.
|
||||||
int end_of_match = 0;
|
int end_of_match = 0;
|
||||||
while ( n > 0 &&
|
while ( n > 0 )
|
||||||
(end_of_match = re->MatchPrefix(s + offset, n)) <= 0 )
|
|
||||||
{
|
{
|
||||||
// Move on to next byte.
|
end_of_match = re->MatchPrefix(s + offset, n, bol, eol);
|
||||||
|
if ( end_of_match > 0 )
|
||||||
|
break;
|
||||||
|
|
||||||
|
// Move on to next byte, use BOL only on the byte such that
|
||||||
|
// a BOL anchored pattern won't be matched anywhere else.
|
||||||
|
bol = false;
|
||||||
++offset;
|
++offset;
|
||||||
--n;
|
--n;
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,3 +31,9 @@ A
|
||||||
C
|
C
|
||||||
=
|
=
|
||||||
D
|
D
|
||||||
|
test, ^est, [test]
|
||||||
|
test, tes$, [test]
|
||||||
|
test, ^test$, [, test, ]
|
||||||
|
aa bb cc, / ?/, [aa, bb, cc]
|
||||||
|
aa bb cc, / ?/, [aa, , bb, , cc]
|
||||||
|
aa bb cc, / +/, [aa, bb, cc]
|
||||||
|
|
|
@ -34,3 +34,46 @@ event zeek_init()
|
||||||
pat = /=/;
|
pat = /=/;
|
||||||
print_string_vector(split_string_all(a, pat));
|
print_string_vector(split_string_all(a, pat));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
event zeek_init() &priority=-5
|
||||||
|
{
|
||||||
|
# Anchor testing.
|
||||||
|
local r = split_string_n("test", /^est/, T, 1);
|
||||||
|
assert |r| == 1;
|
||||||
|
assert r[0] == "test";
|
||||||
|
print "test", "^est", r;
|
||||||
|
|
||||||
|
r = split_string_n("test", /tes$/, T, 1);
|
||||||
|
assert |r| == 1;
|
||||||
|
assert r[0] == "test";
|
||||||
|
print "test", "tes$", r;
|
||||||
|
|
||||||
|
r = split_string_n("test", /^test$/, T, 1);
|
||||||
|
assert |r| == 3;
|
||||||
|
assert r[0] == "";
|
||||||
|
assert r[1] == "test";
|
||||||
|
assert r[2] == "";
|
||||||
|
print "test", "^test$", r;
|
||||||
|
|
||||||
|
r = split_string_n("aa bb cc", / ?/, F, 0);
|
||||||
|
assert |r| == 3;
|
||||||
|
assert r[0] == "aa";
|
||||||
|
assert r[1] == "bb";
|
||||||
|
assert r[2] == "cc";
|
||||||
|
print "aa bb cc", "/ ?/", r;
|
||||||
|
|
||||||
|
r = split_string_n("aa bb cc", / ?/, T, 0);
|
||||||
|
assert |r| == 5;
|
||||||
|
assert r[0] == "aa";
|
||||||
|
assert r[1] == " ";
|
||||||
|
assert r[2] == "bb";
|
||||||
|
assert r[3] == " ";
|
||||||
|
print "aa bb cc", "/ ?/", r;
|
||||||
|
|
||||||
|
r = split_string_n("aa bb cc", / +/, F, 0);
|
||||||
|
assert |r| == 3;
|
||||||
|
assert r[0] == "aa";
|
||||||
|
assert r[1] == "bb";
|
||||||
|
assert r[2] == "cc";
|
||||||
|
print "aa bb cc", "/ +/", r;
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue