diff --git a/CHANGES b/CHANGES index a314b2fa3c..c7a501198d 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,7 @@ +4.2.0-dev.317 | 2021-11-10 11:33:29 +0000 + + * Add case-insensitive search for find_str and rfind_str (Abdel) + 4.2.0-dev.314 | 2021-11-10 11:16:28 +0100 * GH-1757: Add new hook `HookLoadFileExtended` that allows plugins diff --git a/NEWS b/NEWS index 2ece09bee0..d82680803e 100644 --- a/NEWS +++ b/NEWS @@ -38,6 +38,8 @@ New Functionality - The DNS analyzer has initial support for the SVCB and HTTPS types. The new events are ``dns_SVCB`` and ``dns_HTTPS``. +- The ``find_str`` and ``rfind_str`` bifs now support case-insensitive searches. + Changed Functionality --------------------- diff --git a/VERSION b/VERSION index 5296c1e837..1ac7ea7281 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -4.2.0-dev.314 +4.2.0-dev.317 diff --git a/src/strings.bif b/src/strings.bif index 0d3812f267..e4a6e21851 100644 --- a/src/strings.bif +++ b/src/strings.bif @@ -1145,7 +1145,7 @@ function count_substr%(str: string, sub: string%) : count %%{ -static int64_t do_find_str(zeek::StringVal* str, zeek::StringVal* sub, int64_t start, int64_t end, bool rfind) +static int64_t do_find_str(zeek::StringVal* str, zeek::StringVal* sub, int64_t start, int64_t end, bool rfind, bool case_sensitive) { // Don't bother if the start is after the end of the string. if ( start > str->Len() ) @@ -1168,11 +1168,19 @@ static int64_t do_find_str(zeek::StringVal* str, zeek::StringVal* sub, int64_t s return -1; string s = str->ToStdString().substr(start, end_pos); + string sb = sub->ToStdString(); size_t pos = string::npos; + + if ( ! case_sensitive ) + { + transform(s.begin(), s.end(), s.begin(), ::tolower); + transform(sb.begin(), sb.end(), sb.begin(), ::tolower); + } + if ( rfind ) - pos = s.rfind(sub->ToStdString()); + pos = s.rfind(sb); else - pos = s.find(sub->ToStdString()); + pos = s.find(sb); if ( pos == string::npos ) return -1; @@ -1193,13 +1201,16 @@ static int64_t do_find_str(zeek::StringVal* str, zeek::StringVal* sub, int64_t s ## end: An optional position for the end of the substring. A value less than ## zero (such as the default -1) means a search until the end of the ## string. +## case_sensitive: Set to false to perform a case-insensitive search. +## (default: T). Note that case-insensitive searches use the +## ``tolower`` libc function, which is locale-sensitive. ## ## Returns: The position of the substring. Returns -1 if the string wasn't ## found. Prints an error if the starting position is after the ending ## position. -function find_str%(str: string, sub: string, start: count &default=0, end: int &default=-1%) : int +function find_str%(str: string, sub: string, start: count &default=0, end: int &default=-1, case_sensitive: bool &default=T%) : int %{ - return zeek::val_mgr->Int(do_find_str(str, sub, start, end, false)); + return zeek::val_mgr->Int(do_find_str(str, sub, start, end, false, case_sensitive)); %} ## The same as :zeek:see:`find_str`, but returns the highest index matching @@ -1210,13 +1221,16 @@ function find_str%(str: string, sub: string, start: count &default=0, end: int & ## start: An optional position for the start of the substring. ## end: An optional position for the end of the substring. A value less than ## zero (such as the default -1) means a search from the end of the string. +## case_sensitive: Set to false to perform a case-insensitive search. +## (default: T). Note that case-insensitive searches use the +## ``tolower`` libc function, which is locale-sensitive. ## ## Returns: The position of the substring. Returns -1 if the string wasn't ## found. Prints an error if the starting position is after the ending ## position. -function rfind_str%(str: string, sub: string, start: count &default=0, end: int &default=-1%) : int +function rfind_str%(str: string, sub: string, start: count &default=0, end: int &default=-1, case_sensitive: bool &default=T%) : int %{ - return zeek::val_mgr->Int(do_find_str(str, sub, start, end, true)); + return zeek::val_mgr->Int(do_find_str(str, sub, start, end, true, case_sensitive)); %} ## Returns whether a string starts with a substring. diff --git a/testing/btest/Baseline/bifs.do_find_str/out b/testing/btest/Baseline/bifs.do_find_str/out new file mode 100644 index 0000000000..2d6ffee887 --- /dev/null +++ b/testing/btest/Baseline/bifs.do_find_str/out @@ -0,0 +1,17 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +insensitive +48 +48 +48 +-1 +-1 +48 +48 +sensitive +-1 +-1 +-1 +-1 +-1 +-1 +48 diff --git a/testing/btest/bifs/do_find_str.zeek b/testing/btest/bifs/do_find_str.zeek new file mode 100644 index 0000000000..30e33dcc9d --- /dev/null +++ b/testing/btest/bifs/do_find_str.zeek @@ -0,0 +1,33 @@ +# +# @TEST-EXEC: zeek -b %INPUT >out +# @TEST-EXEC: btest-diff out + +event zeek_init() + { + local a = "this is the concatenation of HTTP fields of the fOrM of the website that I am protecting"; + local b = "form"; + local c = "FORM"; + local d = "FoRm"; + local e = "om0"; + local f = "f0rm"; + local g = "fOrm"; + local h = "fOrM"; + + + print "insensitive"; + print find_str(a, b, 0, -1, F); + print find_str(a, c, 0, -1, F); + print find_str(a, d, 0, -1, F); + print find_str(a, e, 0, -1, F); + print find_str(a, f, 0, -1, F); + print find_str(a, g, 0, -1, F); + print find_str(a, h, 0, -1, F); + print "sensitive"; + print find_str(a, b, 0, -1); + print find_str(a, c, 0, -1); + print find_str(a, d, 0, -1); + print find_str(a, e, 0, -1); + print find_str(a, f, 0, -1); + print find_str(a, g, 0, -1); + print find_str(a, h, 0, -1); + }