From 4194fdd279ca38f1b0dc0db6d03ce769d79b8c08 Mon Sep 17 00:00:00 2001 From: Christian Kreibich Date: Mon, 28 Feb 2022 11:10:08 -0800 Subject: [PATCH 1/4] Fix minor indentation bugs in strings.bif --- src/strings.bif | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/strings.bif b/src/strings.bif index e4a6e21851..6292ae1b66 100644 --- a/src/strings.bif +++ b/src/strings.bif @@ -541,7 +541,7 @@ function to_lower%(str: string%): string *ls++ = s[i]; } - *ls++ = '\0'; + *ls++ = '\0'; return zeek::make_intrusive(new zeek::String(1, lower_s, n)); %} @@ -570,7 +570,7 @@ function to_upper%(str: string%): string *us++ = s[i]; } - *us++ = '\0'; + *us++ = '\0'; return zeek::make_intrusive(new zeek::String(1, upper_s, n)); %} From 3324f35cf9b6b16059985f9c18f48ba9bf490b74 Mon Sep 17 00:00:00 2001 From: Christian Kreibich Date: Mon, 28 Feb 2022 11:23:22 -0800 Subject: [PATCH 2/4] Clarify is_ascii() BiF docstring re behavior on empty strings --- src/strings.bif | 1 + 1 file changed, 1 insertion(+) diff --git a/src/strings.bif b/src/strings.bif index 6292ae1b66..7bc45996c1 100644 --- a/src/strings.bif +++ b/src/strings.bif @@ -614,6 +614,7 @@ function to_string_literal%(str: string%): string %} ## Determines whether a given string contains only ASCII characters. +## The empty string is ASCII. ## ## str: The string to examine. ## From 19bfa071e08207989e487725ef9dfcbbbd365408 Mon Sep 17 00:00:00 2001 From: Christian Kreibich Date: Mon, 28 Feb 2022 11:24:45 -0800 Subject: [PATCH 3/4] Expand testcases around is_num(), is_alpha(), is_alnum(), is_ascii() BiFs --- testing/btest/Baseline/bifs.is_ascii/out | 1 + testing/btest/Baseline/bifs.string_utils/out | 3 +++ testing/btest/bifs/is_ascii.zeek | 1 + testing/btest/bifs/string_utils.zeek | 3 +++ 4 files changed, 8 insertions(+) diff --git a/testing/btest/Baseline/bifs.is_ascii/out b/testing/btest/Baseline/bifs.is_ascii/out index 1956db8698..34333b473a 100644 --- a/testing/btest/Baseline/bifs.is_ascii/out +++ b/testing/btest/Baseline/bifs.is_ascii/out @@ -1,3 +1,4 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. F T +T diff --git a/testing/btest/Baseline/bifs.string_utils/out b/testing/btest/Baseline/bifs.string_utils/out index 147d0e9ea7..7e51c14446 100644 --- a/testing/btest/Baseline/bifs.string_utils/out +++ b/testing/btest/Baseline/bifs.string_utils/out @@ -17,14 +17,17 @@ Content checking ---------------- is_num abc : 0 is_num 123 : 1 +is_num '' : 0 is_alpha ab : 1 is_alpha 1a : 0 is_alpha a1 : 0 +is_alpha '' : 0 is_alnum ab : 1 is_alnum 1a : 1 is_alnum a1 : 1 is_alnum 12 : 1 is_alnum ##12: 0 +is_alnum '' : 0 String counting (input str 'aabbaa') ------------------------------------ diff --git a/testing/btest/bifs/is_ascii.zeek b/testing/btest/bifs/is_ascii.zeek index 505e21e715..da1b1cfa7e 100644 --- a/testing/btest/bifs/is_ascii.zeek +++ b/testing/btest/bifs/is_ascii.zeek @@ -9,4 +9,5 @@ event zeek_init() print is_ascii(a); print is_ascii(b); + print is_ascii(""); } diff --git a/testing/btest/bifs/string_utils.zeek b/testing/btest/bifs/string_utils.zeek index 5b5f70983e..021d7f68d0 100644 --- a/testing/btest/bifs/string_utils.zeek +++ b/testing/btest/bifs/string_utils.zeek @@ -23,14 +23,17 @@ event zeek_init() print "----------------"; print fmt("is_num abc : %d", is_num("abc")); print fmt("is_num 123 : %d", is_num("123")); + print fmt("is_num '' : %d", is_num("")); print fmt("is_alpha ab : %d", is_alpha("ab")); print fmt("is_alpha 1a : %d", is_alpha("1a")); print fmt("is_alpha a1 : %d", is_alpha("a1")); + print fmt("is_alpha '' : %d", is_alpha("")); print fmt("is_alnum ab : %d", is_alnum("ab")); print fmt("is_alnum 1a : %d", is_alnum("1a")); print fmt("is_alnum a1 : %d", is_alnum("a1")); print fmt("is_alnum 12 : %d", is_alnum("12")); print fmt("is_alnum ##12: %d", is_alnum("##12")); + print fmt("is_alnum '' : %d", is_alnum("")); print ""; print "String counting (input str 'aabbaa')"; From b977e76ad5f4581a824a3c01687dad1ad2b35898 Mon Sep 17 00:00:00 2001 From: Christian Kreibich Date: Mon, 28 Feb 2022 13:02:36 -0800 Subject: [PATCH 4/4] The is_num(), is_alpha(), and is_alnum() BiFs now return F on empty string --- NEWS | 2 ++ src/strings.bif | 18 +++++++++++++++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/NEWS b/NEWS index 0acb7f13d0..34c6e3c611 100644 --- a/NEWS +++ b/NEWS @@ -15,6 +15,8 @@ New Functionality Changed Functionality --------------------- +- The is_num(), is_alpha(), and is_alnum() BiFs now return F for the empty string. + Deprecated Functionality ------------------------ diff --git a/src/strings.bif b/src/strings.bif index 7bc45996c1..11a38deaa3 100644 --- a/src/strings.bif +++ b/src/strings.bif @@ -1254,13 +1254,17 @@ function ends_with%(str: string, sub: string%) : bool return zeek::val_mgr->Bool(s.rfind(sub_s) == (s.size() - sub_s.size())); %} -## Returns whether an entire string consists only of digits. +## Returns whether a string consists entirely of digits. +## The empty string is not numeric. ## function is_num%(str: string%) : bool %{ // Python's version of this method (which this is based on) just checks to see if every // character in the string is a numeric value. If something more than this is desired, we // could use something like std::from_chars or std::strto{ul,f} to check it. + if ( str->Len() == 0 ) + return zeek::val_mgr->False(); + const char* s = str->CheckString(); for ( int i = 0; i < str->Len(); i++ ) if ( ! std::isdigit(s[i]) ) @@ -1269,10 +1273,14 @@ function is_num%(str: string%) : bool return zeek::val_mgr->True(); %} -## Returns whether an entire string is alphabetic characters. +## Returns whether a string consists entirely of alphabetic characters. +## The empty string is not alphabetic. ## function is_alpha%(str: string%) : bool %{ + if ( str->Len() == 0 ) + return zeek::val_mgr->False(); + const char* s = str->CheckString(); for ( int i = 0; i < str->Len(); i++ ) if ( ! std::isalpha(s[i]) ) @@ -1281,10 +1289,14 @@ function is_alpha%(str: string%) : bool return zeek::val_mgr->True(); %} -## Returns whether an entire string is alphanumeric characters +## Returns whether a string consists entirely of alphanumeric characters. +## The empty string is not alphanumeric. ## function is_alnum%(str: string%) : bool %{ + if ( str->Len() == 0 ) + return zeek::val_mgr->False(); + const char* s = str->CheckString(); for ( int i = 0; i < str->Len(); i++ ) if ( ! std::isalnum(s[i]) )