Reversing the null-byte change to strings.bif once more.

I'm actually not sure how that got back in again, but it did ... Must have been during the merges.
2025-10-02 06:38:20 +00:00 · 2010-12-08 04:46:10 -08:00 · 2010-12-08 04:46:10 -08:00 · c6631d1483
commit c6631d1483
parent dc5d3560f7
1 changed files with 101 additions and 79 deletions
--- a/src/strings.bif
+++ b/src/strings.bif
@ -138,27 +138,27 @@ function sort_string_array%(a: string_array%): string_array

 function edit%(arg_s: string, arg_edit_char: string%): string
 	%{
-	if ( arg_edit_char->Len() != 1 )
+	const char* s = arg_s->AsString()->CheckString();
+	const char* edit_s = arg_edit_char->AsString()->CheckString();
+
+	if ( strlen(edit_s) != 1 )
 		builtin_run_time("not exactly one edit character", @ARG@[1]);

-	const u_char* s = arg_s->Bytes();
-	const u_char* edit_s = arg_edit_char->Bytes();
+	char edit_c = *edit_s;

-	u_char edit_c = *edit_s;
-
-	int n = arg_s->Len();
-	u_char* new_s = new u_char[n+1];
+	int n = strlen(s) + 1;
+	char* new_s = new char[n];
 	int ind = 0;

-	for ( int i=0; i<n; ++i )
+	for ( ; *s; ++s )
 		{
-		if ( s[i] == edit_c )
+		if ( *s == edit_c )
 			{ // Delete last character
 			if ( --ind < 0 )
 				ind = 0;
 			}
 		else
-			new_s[ind++] = s[i];
+			new_s[ind++] = *s;
 		}

 	new_s[ind] = '\0';
@ -198,6 +198,7 @@ static int match_prefix(int s_len, const char* s, int t_len, const char* t)
 Val* do_split(StringVal* str_val, RE_Matcher* re, TableVal* other_sep,
 		int incl_sep, int max_num_sep)
 	{
+	const BroString* str = str_val->AsString();
 	TableVal* a = new TableVal(internal_type("string_array")->AsTableType());
 	ListVal* other_strings = 0;

@ -208,55 +209,67 @@ Val* do_split(StringVal* str_val, RE_Matcher* re, TableVal* other_sep,
 	// the future we expect to change this by giving RE_Matcher a
 	// const char* segment.

-	const u_char* s = str_val->Bytes();
-	int n = str_val->Len();
-	const u_char* end_of_s = s + n;
+	const char* s = str->CheckString();
+	int len = strlen(s);
+	const char* end_of_s = s + len;
 	int num = 0;
 	int num_sep = 0;

+	while ( 1 )
+		{
 		int offset = 0;
+		const char* t;

-	while ( n > 0 )
+		if ( max_num_sep > 0 && num_sep >= max_num_sep )
+			t = end_of_s;
+		else
 			{
-		offset = 0;
-		// Find next match offset.
-		int end_of_match;
-		while ( n > 0 &&
-		        (end_of_match = re->MatchPrefix(&s[offset], n)) <= 0 )
+			for ( t = s; t < end_of_s; ++t )
 				{
-			// Move on to next character.
-			++offset;
-			--n;
+				offset = re->MatchPrefix(t);
+
+				if ( other_strings )
+					{
+					val_list* vl = other_strings->Vals();
+					loop_over_list(*vl, i)
+						{
+						const BroString* sub =
+							(*vl)[i]->AsString();
+						if ( sub->Len() > offset &&
+						     match_prefix(end_of_s - t,
+								t, sub->Len(),
+								(const char*) (sub->Bytes())) )
+							{
+							offset = sub->Len();
+							}
+						}
+					}
+
+				if ( offset > 0 )
+					break;
+				}
 			}

 		Val* ind = new Val(++num, TYPE_COUNT);
-		a->Assign(ind, new StringVal(offset, (const char*) s));
+		a->Assign(ind, new StringVal(t - s, s));
 		Unref(ind);

-		// No more separators will be needed if this is the end of string.
-		if ( n <= 0 )
+		if ( t >= end_of_s )
 			break;

+		++num_sep;
+
 		if ( incl_sep )
 			{ // including the part that matches the pattern
 			ind = new Val(++num, TYPE_COUNT);
-			a->Assign(ind, new StringVal(end_of_match, (const char*) s+offset));
+			a->Assign(ind, new StringVal(offset, t));
 			Unref(ind);
 			}

-		++num_sep;
-		if ( max_num_sep && num_sep >= max_num_sep )
-			break;
-		
-		offset += end_of_match;
-		n -= end_of_match;
-		s += offset;
-		
+		s = t + offset;
 		if ( s > end_of_s )
-			{
 			internal_error("RegMatch in split goes beyond the string");
 		}
-		}

 	if ( other_strings )
 		delete other_strings;
@ -463,38 +476,42 @@ function subst_string%(s: string, from: string, to: string%): string

 function to_lower%(str: string%): string
 	%{
-	const u_char* s = str->Bytes();
-	int n = str->Len();
+	const char* s = str->CheckString();
+	int n = strlen(s) + 1;
 	char* lower_s = new char[n];
-	char* ls = lower_s;

-	for (int i=0; i<n; ++i)
+	char* ls;
+	for ( ls = lower_s; *s; ++s )
 		{
-		if ( isascii(s[i]) && isupper(s[i]) )
-			*ls++ = tolower(s[i]);
+		if ( isascii(*s) && isupper(*s) )
+			*ls++ = tolower(*s);
 		else
-			*ls++ = s[i];
+			*ls++ = *s;
 		}

-	return new StringVal(new BroString(1, byte_vec(lower_s), n));
+	*ls = '\0';
+
+	return new StringVal(new BroString(1, byte_vec(lower_s), n-1));
 	%}

 function to_upper%(str: string%): string
 	%{
-	const u_char* s = str->Bytes();
-	int n = str->Len();
+	const char* s = str->CheckString();
+	int n = strlen(s) + 1;
 	char* upper_s = new char[n];
-	char* us = upper_s;

-	for (int i=0; i<n; ++i)
+	char* us;
+	for ( us = upper_s; *s; ++s )
 		{
-		if ( isascii(s[i]) && islower(s[i]) )
-			*us++ = toupper(s[i]);
+		if ( isascii(*s) && islower(*s) )
+			*us++ = toupper(*s);
 		else
-			*us++ = s[i];
+			*us++ = *s;
 		}

-	return new StringVal(new BroString(1, byte_vec(upper_s), n));
+	*us = '\0';
+
+	return new StringVal(new BroString(1, byte_vec(upper_s), n-1));
 	%}

 function clean%(str: string%): string
@ -587,34 +604,40 @@ function str_split%(s: string, idx: index_vec%): string_vec

 function strip%(str: string%): string
 	%{
-	const u_char* s = str->Bytes();
-	int n = str->Len();
+	const char* s = str->CheckString();

-	if ( n == 0 )
+	int n = strlen(s) + 1;
+	char* strip_s = new char[n];
+
+	if ( n == 1 )
 		// Empty string.
-		return new StringVal(new BroString(s, n, 1));
+		return new StringVal(new BroString(1, byte_vec(strip_s), 0));

-	const u_char* sp = s;
-	// Move a pointer to the end of the string
-	const u_char* e = &sp[n-1];
-	while ( e > sp && isspace(*e) )
+	while ( isspace(*s) )
+		++s;
+
+	strncpy(strip_s, s, n);
+
+	char* s2 = strip_s;
+	char* e = &s2[strlen(s2) - 1];
+
+	while ( e > s2 && isspace(*e) )
 		--e;

-	// Move the pointer for the beginning of the string
-	while ( isspace(*sp) )
-		++sp;
+	e[1] = '\0';	// safe even if e hasn't changed, due to n = strlen + 1

-	return new StringVal(new BroString(sp, e-sp+1, 1));
+	return new StringVal(new BroString(1, byte_vec(s2), (e-s2)+1));
 	%}

 function string_fill%(len: int, source: string%): string
 	%{
-	const u_char* src = source->Bytes();
-	int n = source->Len();
+	const char* src = source->CheckString();
+
+	int sn = strlen(src);
 	char* dst = new char[len];

-	for ( int i = 0; i < len; i += n )
-		::memcpy((dst + i), src, min(n, len - i));
+	for ( int i = 0; i < len; i += sn )
+		::memcpy((dst + i), src, min(sn, len - i));

 	dst[len - 1] = 0;

@ -627,12 +650,11 @@ function string_fill%(len: int, source: string%): string
 #
 function str_shell_escape%(source: string%): string
 	%{
-	uint j = 0;
-	const u_char* src = source->Bytes();
-	uint n = source->Len();
-	byte_vec dst = new u_char[n * 2 + 1];
+	unsigned j = 0;
+	const char* src = source->CheckString();
+	char* dst = new char[strlen(src) * 2 + 1];

-	for ( uint i = 0; i < n; ++i )
+	for ( unsigned i = 0; i < strlen(src); ++i )
 		{
 		switch ( src[i] ) {
 		case '`': case '"': case '\\': case '$':
@ -650,7 +672,7 @@ function str_shell_escape%(source: string%): string
 		}

 	dst[j] = '\0';
-	return new StringVal(new BroString(1, dst, j));
+	return new StringVal(new BroString(1, byte_vec(dst), j));
 	%}

 # Returns all occurrences of the given pattern in the given string (an empty