diff --git a/src/strings.bif b/src/strings.bif index cdafe1ef16..0567bdf709 100644 --- a/src/strings.bif +++ b/src/strings.bif @@ -943,7 +943,7 @@ function safe_shell_quote%(source: string%): string ## ## Returns: The set of strings in *str* that match *re*, or the empty set. ## -## .. zeek:see: find_last strstr +## .. zeek:see: find_all_ordered find_last strstr function find_all%(str: string, re: pattern%) : string_set %{ auto a = zeek::make_intrusive(zeek::id::string_set); @@ -965,6 +965,38 @@ function find_all%(str: string, re: pattern%) : string_set return a; %} +## Finds all occurrences of a pattern in a string. The order in which +## occurrences are found is preverved and the return value may contain +## duplicate elements. +## +## str: The string to inspect. +## +## re: The pattern to look for in *str*. +## +## Returns: All strings in *str* that match *re*, or an empty vector. +## +## .. zeek:see: find_all find_last strstr +function find_all_ordered%(str: string, re: pattern%) : string_vec + %{ + auto a = zeek::make_intrusive(zeek::id::string_vec); + + const u_char* s = str->Bytes(); + const u_char* e = s + str->Len(); + + for ( const u_char* t = s; t < e; ++t ) + { + int n = re->MatchPrefix(t, e - t); + if ( n >= 0 ) + { + auto idx = zeek::make_intrusive(n, (const char*) t); + a->Assign(a->Size(), std::move(idx)); + t += n - 1; + } + } + + return a; + %} + ## Finds the last occurrence of a pattern in a string. This function returns ## the match that starts at the largest index in the string, which is not ## necessarily the longest match. For example, a pattern of ``/.*/`` will @@ -976,7 +1008,7 @@ function find_all%(str: string, re: pattern%) : string_set ## ## Returns: The last string in *str* that matches *re*, or the empty string. ## -## .. zeek:see: find_all strstr +## .. zeek:see: find_all find_all_ordered strstr function find_last%(str: string, re: pattern%) : string %{ const u_char* s = str->Bytes(); diff --git a/testing/btest/Baseline/bifs.find_all_ordered/out b/testing/btest/Baseline/bifs.find_all_ordered/out new file mode 100644 index 0000000000..a769f8e539 --- /dev/null +++ b/testing/btest/Baseline/bifs.find_all_ordered/out @@ -0,0 +1,8 @@ +[this, is, a, test] +[one, two, three, four, one, two, three, four] +[this, is, a, test, test, test] +[] +[a, b] +[foo] +[bar, foo] +[] diff --git a/testing/btest/bifs/find_all_ordered.zeek b/testing/btest/bifs/find_all_ordered.zeek new file mode 100644 index 0000000000..90d2cfa4f3 --- /dev/null +++ b/testing/btest/bifs/find_all_ordered.zeek @@ -0,0 +1,19 @@ +# @TEST-EXEC: zeek -b %INPUT >out +# @TEST-EXEC: btest-diff out + +event zeek_init() + { + local v = vector("this is a test", + "one two three four one two three four", + "this is a test test test", + "1 2 3 4", + "a b", + "foo", + "1bar2foo3", + "" + ); + local pat = /[a-z]+/; + + for ( i in v ) + print find_all_ordered(v[i], pat); + }