Reversing the null-byte change to strings.bif once more.

I'm actually not sure how that got back in again, but it did ... Must have been during the merges.
2025-10-02 14:48:21 +00:00 · 2010-12-08 04:46:10 -08:00 · 2010-12-08 04:46:10 -08:00 · c6631d1483
commit c6631d1483
parent dc5d3560f7
1 changed files with 101 additions and 79 deletions
--- a/src/strings.bif
+++ b/src/strings.bif
@ -138,27 +138,27 @@ function sort_string_array%(a: string_array%): string_array
 function edit%(arg_s: string, arg_edit_char: string%): string
 	%{
-	if ( arg_edit_char->Len() != 1 )
+	const char* s = arg_s->AsString()->CheckString();
 	const char* edit_s = arg_edit_char->AsString()->CheckString();
 	if ( strlen(edit_s) != 1 )
 		builtin_run_time("not exactly one edit character", @ARG@[1]);
-	const u_char* s = arg_s->Bytes();
+	char edit_c = *edit_s;
 	const u_char* edit_s = arg_edit_char->Bytes();
-	u_char edit_c = *edit_s;
+	int n = strlen(s) + 1;
-
+	char* new_s = new char[n];
 	int n = arg_s->Len();
 	u_char* new_s = new u_char[n+1];
 	int ind = 0;
-	for ( int i=0; i<n; ++i )
+	for ( ; *s; ++s )
 		{
-		if ( s[i] == edit_c )
+		if ( *s == edit_c )
 			{ // Delete last character
 			if ( --ind < 0 )
 				ind = 0;
 			}
 		else
-			new_s[ind++] = s[i];
+			new_s[ind++] = *s;
 		}
 	new_s[ind] = '\0';
@ -198,6 +198,7 @@ static int match_prefix(int s_len, const char* s, int t_len, const char* t)
 Val* do_split(StringVal* str_val, RE_Matcher* re, TableVal* other_sep,
 		int incl_sep, int max_num_sep)
 	{
 	const BroString* str = str_val->AsString();
 	TableVal* a = new TableVal(internal_type("string_array")->AsTableType());
 	ListVal* other_strings = 0;
@ -208,55 +209,67 @@ Val* do_split(StringVal* str_val, RE_Matcher* re, TableVal* other_sep,
 	// the future we expect to change this by giving RE_Matcher a
 	// const char* segment.
-	const u_char* s = str_val->Bytes();
+	const char* s = str->CheckString();
-	int n = str_val->Len();
+	int len = strlen(s);
-	const u_char* end_of_s = s + n;
+	const char* end_of_s = s + len;
 	int num = 0;
 	int num_sep = 0;
 	while ( 1 )
 		{
 		int offset = 0;
 		const char* t;
-	while ( n > 0 )
+		if ( max_num_sep > 0 && num_sep >= max_num_sep )
 			t = end_of_s;
 		else
 			{
-		offset = 0;
+			for ( t = s; t < end_of_s; ++t )
 		// Find next match offset.
 		int end_of_match;
 		while ( n > 0 &&
 		        (end_of_match = re->MatchPrefix(&s[offset], n)) <= 0 )
 				{
-			// Move on to next character.
+				offset = re->MatchPrefix(t);
-			++offset;
+
-			--n;
+				if ( other_strings )
 					{
 					val_list* vl = other_strings->Vals();
 					loop_over_list(*vl, i)
 						{
 						const BroString* sub =
 							(*vl)[i]->AsString();
 						if ( sub->Len() > offset &&
 						     match_prefix(end_of_s - t,
 								t, sub->Len(),
 								(const char*) (sub->Bytes())) )
 							{
 							offset = sub->Len();
 							}
 						}
 					}
 				if ( offset > 0 )
 					break;
 				}
 			}
 		Val* ind = new Val(++num, TYPE_COUNT);
-		a->Assign(ind, new StringVal(offset, (const char*) s));
+		a->Assign(ind, new StringVal(t - s, s));
 		Unref(ind);
-		// No more separators will be needed if this is the end of string.
+		if ( t >= end_of_s )
 		if ( n <= 0 )
 			break;
 		++num_sep;
 		if ( incl_sep )
 			{ // including the part that matches the pattern
 			ind = new Val(++num, TYPE_COUNT);
-			a->Assign(ind, new StringVal(end_of_match, (const char*) s+offset));
+			a->Assign(ind, new StringVal(offset, t));
 			Unref(ind);
 			}
-		++num_sep;
+		s = t + offset;
 		if ( max_num_sep && num_sep >= max_num_sep )
 			break;
 		offset += end_of_match;
 		n -= end_of_match;
 		s += offset;
 		if ( s > end_of_s )
 			{
 			internal_error("RegMatch in split goes beyond the string");
 		}
 		}
 	if ( other_strings )
 		delete other_strings;
@ -463,38 +476,42 @@ function subst_string%(s: string, from: string, to: string%): string
 function to_lower%(str: string%): string
 	%{
-	const u_char* s = str->Bytes();
+	const char* s = str->CheckString();
-	int n = str->Len();
+	int n = strlen(s) + 1;
 	char* lower_s = new char[n];
 	char* ls = lower_s;
-	for (int i=0; i<n; ++i)
+	char* ls;
 	for ( ls = lower_s; *s; ++s )
 		{
-		if ( isascii(s[i]) && isupper(s[i]) )
+		if ( isascii(*s) && isupper(*s) )
-			*ls++ = tolower(s[i]);
+			*ls++ = tolower(*s);
 		else
-			*ls++ = s[i];
+			*ls++ = *s;
 		}
-	return new StringVal(new BroString(1, byte_vec(lower_s), n));
+	*ls = '\0';
 	return new StringVal(new BroString(1, byte_vec(lower_s), n-1));
 	%}
 function to_upper%(str: string%): string
 	%{
-	const u_char* s = str->Bytes();
+	const char* s = str->CheckString();
-	int n = str->Len();
+	int n = strlen(s) + 1;
 	char* upper_s = new char[n];
 	char* us = upper_s;
-	for (int i=0; i<n; ++i)
+	char* us;
 	for ( us = upper_s; *s; ++s )
 		{
-		if ( isascii(s[i]) && islower(s[i]) )
+		if ( isascii(*s) && islower(*s) )
-			*us++ = toupper(s[i]);
+			*us++ = toupper(*s);
 		else
-			*us++ = s[i];
+			*us++ = *s;
 		}
-	return new StringVal(new BroString(1, byte_vec(upper_s), n));
+	*us = '\0';
 	return new StringVal(new BroString(1, byte_vec(upper_s), n-1));
 	%}
 function clean%(str: string%): string
@ -587,34 +604,40 @@ function str_split%(s: string, idx: index_vec%): string_vec
 function strip%(str: string%): string
 	%{
-	const u_char* s = str->Bytes();
+	const char* s = str->CheckString();
 	int n = str->Len();
-	if ( n == 0 )
+	int n = strlen(s) + 1;
 	char* strip_s = new char[n];
 	if ( n == 1 )
 		// Empty string.
-		return new StringVal(new BroString(s, n, 1));
+		return new StringVal(new BroString(1, byte_vec(strip_s), 0));
-	const u_char* sp = s;
+	while ( isspace(*s) )
-	// Move a pointer to the end of the string
+		++s;
-	const u_char* e = &sp[n-1];
+
-	while ( e > sp && isspace(*e) )
+	strncpy(strip_s, s, n);
 	char* s2 = strip_s;
 	char* e = &s2[strlen(s2) - 1];
 	while ( e > s2 && isspace(*e) )
 		--e;
-	// Move the pointer for the beginning of the string
+	e[1] = '\0';	// safe even if e hasn't changed, due to n = strlen + 1
 	while ( isspace(*sp) )
 		++sp;
-	return new StringVal(new BroString(sp, e-sp+1, 1));
+	return new StringVal(new BroString(1, byte_vec(s2), (e-s2)+1));
 	%}
 function string_fill%(len: int, source: string%): string
 	%{
-	const u_char* src = source->Bytes();
+	const char* src = source->CheckString();
-	int n = source->Len();
+
 	int sn = strlen(src);
 	char* dst = new char[len];
-	for ( int i = 0; i < len; i += n )
+	for ( int i = 0; i < len; i += sn )
-		::memcpy((dst + i), src, min(n, len - i));
+		::memcpy((dst + i), src, min(sn, len - i));
 	dst[len - 1] = 0;
@ -627,12 +650,11 @@ function string_fill%(len: int, source: string%): string
 #
 function str_shell_escape%(source: string%): string
 	%{
-	uint j = 0;
+	unsigned j = 0;
-	const u_char* src = source->Bytes();
+	const char* src = source->CheckString();
-	uint n = source->Len();
+	char* dst = new char[strlen(src) * 2 + 1];
 	byte_vec dst = new u_char[n * 2 + 1];
-	for ( uint i = 0; i < n; ++i )
+	for ( unsigned i = 0; i < strlen(src); ++i )
 		{
 		switch ( src[i] ) {
 		case '`': case '"': case '\\': case '$':
@ -650,7 +672,7 @@ function str_shell_escape%(source: string%): string
 		}
 	dst[j] = '\0';
-	return new StringVal(new BroString(1, dst, j));
+	return new StringVal(new BroString(1, byte_vec(dst), j));
 	%}
 # Returns all occurrences of the given pattern in the given string (an empty