Merge branch 'topic/AbdelSaTd/case-insensitive-find'

* topic/AbdelSaTd/case-insensitive-find:
  testing-do-find-str_case-insensitive
  case-insensitive-search-features-for-do_find_str

I did a few small style-fixes while merging this and expanded the test
a bit. I also removed unecessary commits.

Closes GH-1828
This commit is contained in:
Johanna Amann 2021-11-10 11:28:52 +00:00
commit d451fb8184
6 changed files with 78 additions and 8 deletions

View file

@ -1,3 +1,7 @@
4.2.0-dev.317 | 2021-11-10 11:33:29 +0000
* Add case-insensitive search for find_str and rfind_str (Abdel)
4.2.0-dev.314 | 2021-11-10 11:16:28 +0100
* GH-1757: Add new hook `HookLoadFileExtended` that allows plugins

2
NEWS
View file

@ -38,6 +38,8 @@ New Functionality
- The DNS analyzer has initial support for the SVCB and HTTPS types. The new events
are ``dns_SVCB`` and ``dns_HTTPS``.
- The ``find_str`` and ``rfind_str`` bifs now support case-insensitive searches.
Changed Functionality
---------------------

View file

@ -1 +1 @@
4.2.0-dev.314
4.2.0-dev.317

View file

@ -1145,7 +1145,7 @@ function count_substr%(str: string, sub: string%) : count
%%{
static int64_t do_find_str(zeek::StringVal* str, zeek::StringVal* sub, int64_t start, int64_t end, bool rfind)
static int64_t do_find_str(zeek::StringVal* str, zeek::StringVal* sub, int64_t start, int64_t end, bool rfind, bool case_sensitive)
{
// Don't bother if the start is after the end of the string.
if ( start > str->Len() )
@ -1168,11 +1168,19 @@ static int64_t do_find_str(zeek::StringVal* str, zeek::StringVal* sub, int64_t s
return -1;
string s = str->ToStdString().substr(start, end_pos);
string sb = sub->ToStdString();
size_t pos = string::npos;
if ( ! case_sensitive )
{
transform(s.begin(), s.end(), s.begin(), ::tolower);
transform(sb.begin(), sb.end(), sb.begin(), ::tolower);
}
if ( rfind )
pos = s.rfind(sub->ToStdString());
pos = s.rfind(sb);
else
pos = s.find(sub->ToStdString());
pos = s.find(sb);
if ( pos == string::npos )
return -1;
@ -1193,13 +1201,16 @@ static int64_t do_find_str(zeek::StringVal* str, zeek::StringVal* sub, int64_t s
## end: An optional position for the end of the substring. A value less than
## zero (such as the default -1) means a search until the end of the
## string.
## case_sensitive: Set to false to perform a case-insensitive search.
## (default: T). Note that case-insensitive searches use the
## ``tolower`` libc function, which is locale-sensitive.
##
## Returns: The position of the substring. Returns -1 if the string wasn't
## found. Prints an error if the starting position is after the ending
## position.
function find_str%(str: string, sub: string, start: count &default=0, end: int &default=-1%) : int
function find_str%(str: string, sub: string, start: count &default=0, end: int &default=-1, case_sensitive: bool &default=T%) : int
%{
return zeek::val_mgr->Int(do_find_str(str, sub, start, end, false));
return zeek::val_mgr->Int(do_find_str(str, sub, start, end, false, case_sensitive));
%}
## The same as :zeek:see:`find_str`, but returns the highest index matching
@ -1210,13 +1221,16 @@ function find_str%(str: string, sub: string, start: count &default=0, end: int &
## start: An optional position for the start of the substring.
## end: An optional position for the end of the substring. A value less than
## zero (such as the default -1) means a search from the end of the string.
## case_sensitive: Set to false to perform a case-insensitive search.
## (default: T). Note that case-insensitive searches use the
## ``tolower`` libc function, which is locale-sensitive.
##
## Returns: The position of the substring. Returns -1 if the string wasn't
## found. Prints an error if the starting position is after the ending
## position.
function rfind_str%(str: string, sub: string, start: count &default=0, end: int &default=-1%) : int
function rfind_str%(str: string, sub: string, start: count &default=0, end: int &default=-1, case_sensitive: bool &default=T%) : int
%{
return zeek::val_mgr->Int(do_find_str(str, sub, start, end, true));
return zeek::val_mgr->Int(do_find_str(str, sub, start, end, true, case_sensitive));
%}
## Returns whether a string starts with a substring.

View file

@ -0,0 +1,17 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
insensitive
48
48
48
-1
-1
48
48
sensitive
-1
-1
-1
-1
-1
-1
48

View file

@ -0,0 +1,33 @@
#
# @TEST-EXEC: zeek -b %INPUT >out
# @TEST-EXEC: btest-diff out
event zeek_init()
{
local a = "this is the concatenation of HTTP fields of the fOrM of the website that I am protecting";
local b = "form";
local c = "FORM";
local d = "FoRm";
local e = "om0";
local f = "f0rm";
local g = "fOrm";
local h = "fOrM";
print "insensitive";
print find_str(a, b, 0, -1, F);
print find_str(a, c, 0, -1, F);
print find_str(a, d, 0, -1, F);
print find_str(a, e, 0, -1, F);
print find_str(a, f, 0, -1, F);
print find_str(a, g, 0, -1, F);
print find_str(a, h, 0, -1, F);
print "sensitive";
print find_str(a, b, 0, -1);
print find_str(a, c, 0, -1);
print find_str(a, d, 0, -1);
print find_str(a, e, 0, -1);
print find_str(a, f, 0, -1);
print find_str(a, g, 0, -1);
print find_str(a, h, 0, -1);
}