Merge branch 'topic/timw/more-string-view-usage'

* topic/timw/more-string-view-usage: Change to use ToStdStringView() in a few other BIFs Convert remove_prefix/suffix BIFs to use std::string_view Rework starts_with BIF similarly to ends_with changes in 1649e3e7cc
2025-10-02 14:48:21 +00:00 · 2024-01-23 10:41:16 -07:00 · 2024-01-23 10:41:16 -07:00 · a3af4a4b51
commit a3af4a4b51
parent 822ca99e80 012acb17cc
6 changed files with 38 additions and 17 deletions
--- a/8
+++ b/8
@ -1,3 +1,11 @@
+6.2.0-dev.449 | 2024-01-23 10:41:16 -0700
+
+  * Change to use ToStdStringView() in a few other BIFs (Tim Wojtulewicz, Corelight)
+
+  * Convert remove_prefix/suffix BIFs to use std::string_view (Tim Wojtulewicz, Corelight)
+
+  * Rework starts_with BIF similarly to ends_with changes in 1649e3e7cc61d77645ca99a6068124e6fc11df2f (Tim Wojtulewicz, Corelight)
+
 6.2.0-dev.445 | 2024-01-23 18:01:50 +0100

  * websocket: Handle breaking from WebSocket::configure_analyzer() (Arne Welzel, Corelight)
--- a/2
+++ b/2
@ -1 +1 @@
-6.2.0-dev.445
+6.2.0-dev.449
--- a/src/strings.bif
+++ b/src/strings.bif
@ -1247,8 +1247,8 @@ function reverse%(str: string%) : string
 ##
 function count_substr%(str: string, sub: string%) : count
 	%{
-	string s = str->ToStdString();
-	string sub_s = sub->ToStdString();
+	auto s = str->ToStdStringView();
+	auto sub_s = sub->ToStdStringView();

 	size_t count = 0;
 	size_t pos = s.find(sub_s);
@ -1355,8 +1355,14 @@ function rfind_str%(str: string, sub: string, start: count &default=0, end: int
 ##
 function starts_with%(str: string, sub: string%) : bool
 	%{
-	string s = str->ToStdString();
-	return zeek::val_mgr->Bool(s.find(sub->ToStdString()) == 0);
+	if ( sub->Len() > str->Len() )
+		return zeek::val_mgr->Bool(false);
+
+	auto sub_s = sub->ToStdStringView();
+	auto s = str->ToStdStringView();
+	auto start_s = std::string_view{s.data(), sub_s.size()};
+
+	return zeek::val_mgr->Bool(start_s == sub_s);
 	%}

 ## Returns whether a string ends with a substring.
@ -1556,8 +1562,8 @@ function remove_prefix%(str: string, sub: string%) : string
 	// This could just use repeated calls to lstrip(), except for a couple of reasons:
 	// 1) lstrip() creates a StringVal at the end, and that would mean repeated recreation of objects
 	// 2) lstrip() searches for any character in the string, not the string as a whole.
-	string s = str->ToStdString();
-	string sub_s = sub->ToStdString();
+	auto s = str->ToStdStringView();
+	auto sub_s = sub->ToStdStringView();

 	size_t pos = s.find(sub_s);
 	if ( pos != 0 )
@ -1578,12 +1584,15 @@ function remove_prefix%(str: string, sub: string%) : string
 function remove_suffix%(str: string, sub: string%) : string
 	%{
 	// See the note in removeprefix for why this doesn't just call rstrip.
-	string s = str->ToStdString();
-	string sub_s = sub->ToStdString();
+	auto s = str->ToStdStringView();
+	auto sub_s = sub->ToStdStringView();

 	size_t pos = s.rfind(sub_s);
 	size_t next_pos = s.size() - sub_s.size();

+	if ( pos != next_pos )
+		return zeek::IntrusivePtr<zeek::StringVal>(NewRef{}, str);
+
 	while ( pos == next_pos )
 		{
 		next_pos -= sub_s.size();
--- a/src/zeek.bif
+++ b/src/zeek.bif
@ -1986,7 +1986,7 @@ function enum_names%(et: any%): string_set

 	if ( et->GetType()->Tag() == TYPE_STRING )
 		{
-		const auto& id = zeek::detail::global_scope()->Find(et->AsStringVal()->ToStdString());
+		const auto& id = zeek::detail::global_scope()->Find(et->AsStringVal()->ToStdStringView());
 		if ( id && id->IsType() )
 			t = id->GetType();
 		}
@ -2184,7 +2184,7 @@ function global_options%(%): string_set
 ##          the string ``"<unknown id>"`` or ``"<no ID value>"`` is returned.
 function lookup_ID%(id: string%) : any
 	%{
-	const auto& i = zeek::detail::global_scope()->Find(id->CheckString());
+	const auto& i = zeek::detail::global_scope()->Find(id->ToStdStringView());
 	if ( ! i )
 		return zeek::make_intrusive<zeek::StringVal>("<unknown id>");

@ -2214,7 +2214,7 @@ function record_fields%(rec: any%): record_field_table

 	if ( rec->GetType()->Tag() == zeek::TYPE_STRING )
 		{
-		const auto& id = zeek::detail::global_scope()->Find(rec->AsStringVal()->ToStdString());
+		const auto& id = zeek::detail::global_scope()->Find(rec->AsStringVal()->ToStdStringView());

 		if ( ! id || ! id->IsType() || id->GetType()->Tag() != zeek::TYPE_RECORD )
 			{
@ -5106,7 +5106,7 @@ function from_json%(s: string, t: any, key_func: string_mapper &default=from_jso
 ## Returns: a compressed version of the input path.
 function compress_path%(dir: string%): string
 	%{
-	return zeek::make_intrusive<zeek::StringVal>(zeek::util::detail::normalize_path(dir->ToStdString()));
+	return zeek::make_intrusive<zeek::StringVal>(zeek::util::detail::normalize_path(dir->ToStdStringView()));
 	%}

 ## Returns true if the given tag belongs to a protocol analyzer.
--- a/testing/btest/Baseline/bifs.string_utils/out
+++ b/testing/btest/Baseline/bifs.string_utils/out
@ -49,10 +49,12 @@ swap_case 'aBc': AbC
 to_title 'bro is a very neat ids': 'Bro Is A Very Neat Ids'
 to_title '   ': '   '
 to_title '  a   c  ': '  A   C  '
-remove_prefix 'ananab'/'an' : ab
-remove_prefix 'anatnab'/'an': atnab
+remove_prefix 'banana'/'ba' : nana
+remove_prefix 'bantana'/'ba': ntana
+remove_prefix 'bantana'/'ab': bantana
 remove_suffix 'banana'/'na' : ba
 remove_suffix 'bantana'/'na': banta
+remove_suffix 'bantana'/'an': bantana

 find_str/rfind_str (input string 'abcdefghi')
 -----------------------------------------------------
--- a/testing/btest/bifs/string_utils.zeek
+++ b/testing/btest/bifs/string_utils.zeek
@ -60,10 +60,12 @@ event zeek_init()
 	print fmt("to_title 'bro is a very neat ids': '%s'", to_title("bro is a very neat ids"));
 	print fmt("to_title '   ': '%s'", to_title("   "));
 	print fmt("to_title '  a   c  ': '%s'", to_title("  a   c  "));
-	print fmt("remove_prefix 'ananab'/'an' : %s", remove_prefix("ananab", "an"));
-	print fmt("remove_prefix 'anatnab'/'an': %s", remove_prefix("anatnab", "an"));
+	print fmt("remove_prefix 'banana'/'ba' : %s", remove_prefix("banana", "ba"));
+	print fmt("remove_prefix 'bantana'/'ba': %s", remove_prefix("bantana", "ba"));
+	print fmt("remove_prefix 'bantana'/'ab': %s", remove_prefix("bantana", "ab"));
 	print fmt("remove_suffix 'banana'/'na' : %s", remove_suffix("banana", "na"));
 	print fmt("remove_suffix 'bantana'/'na': %s", remove_suffix("bantana", "na"));
+	print fmt("remove_suffix 'bantana'/'an': %s", remove_suffix("bantana", "an"));
 	print "";

 	print fmt("find_str/rfind_str (input string '%s')", s3);