Merge remote branch 'origin/master' into topic/robin/logging-internals

2025-10-17 14:08:20 +00:00 · 2011-02-20 17:18:06 -08:00 · 2011-02-20 17:18:06 -08:00 · c015605113
commit c015605113
parent 56880da779 f79a1f6e58
29 changed files with 1696 additions and 795 deletions
--- a/src/strings.bif
+++ b/src/strings.bif
@ -154,27 +154,27 @@ function join_string_vec%(vec: string_vec, sep: string%): string

 function edit%(arg_s: string, arg_edit_char: string%): string
 	%{
-	const char* s = arg_s->AsString()->CheckString();
-	const char* edit_s = arg_edit_char->AsString()->CheckString();
-
-	if ( strlen(edit_s) != 1 )
+	if ( arg_edit_char->Len() != 1 )
 		builtin_run_time("not exactly one edit character", @ARG@[1]);

-	char edit_c = *edit_s;
+	const u_char* s = arg_s->Bytes();
+	const u_char* edit_s = arg_edit_char->Bytes();

-	int n = strlen(s) + 1;
-	char* new_s = new char[n];
+	u_char edit_c = *edit_s;
+
+	int n = arg_s->Len();
+	u_char* new_s = new u_char[n+1];
 	int ind = 0;

-	for ( ; *s; ++s )
+	for ( int i = 0; i < n; ++i )
 		{
-		if ( *s == edit_c )
+		if ( s[i] == edit_c )
 			{ // Delete last character
 			if ( --ind < 0 )
 				ind = 0;
 			}
 		else
-			new_s[ind++] = *s;
+			new_s[ind++] = s[i];
 		}

 	new_s[ind] = '\0';
@ -214,75 +214,55 @@ static int match_prefix(int s_len, const char* s, int t_len, const char* t)
 Val* do_split(StringVal* str_val, RE_Matcher* re, TableVal* other_sep,
 		int incl_sep, int max_num_sep)
 	{
-	const BroString* str = str_val->AsString();
 	TableVal* a = new TableVal(internal_type("string_array")->AsTableType());
 	ListVal* other_strings = 0;

 	if ( other_sep && other_sep->Size() > 0 )
 		other_strings = other_sep->ConvertToPureList();

-	// Currently let us assume that str is NUL-terminated. In
-	// the future we expect to change this by giving RE_Matcher a
-	// const char* segment.
-
-	const char* s = str->CheckString();
-	int len = strlen(s);
-	const char* end_of_s = s + len;
+	const u_char* s = str_val->Bytes();
+	int n = str_val->Len();
+	const u_char* end_of_s = s + n;
 	int num = 0;
 	int num_sep = 0;

-	while ( 1 )
+	int offset = 0;
+	while ( n >= 0 )
 		{
-		int offset = 0;
-		const char* t;
-
-		if ( max_num_sep > 0 && num_sep >= max_num_sep )
-			t = end_of_s;
-		else
+		offset = 0;
+		// Find next match offset.
+		int end_of_match = 0;
+		while ( n > 0 &&
+		        (end_of_match = re->MatchPrefix(s + offset, n)) <= 0 )
 			{
-			for ( t = s; t < end_of_s; ++t )
-				{
-				offset = re->MatchPrefix(t);
-
-				if ( other_strings )
-					{
-					val_list* vl = other_strings->Vals();
-					loop_over_list(*vl, i)
-						{
-						const BroString* sub =
-							(*vl)[i]->AsString();
-						if ( sub->Len() > offset &&
-						     match_prefix(end_of_s - t,
-								t, sub->Len(),
-								(const char*) (sub->Bytes())) )
-							{
-							offset = sub->Len();
-							}
-						}
-					}
-
-				if ( offset > 0 )
-					break;
-				}
+			// Move on to next byte.
+			++offset;
+			--n;
 			}

 		Val* ind = new Val(++num, TYPE_COUNT);
-		a->Assign(ind, new StringVal(t - s, s));
+		a->Assign(ind, new StringVal(offset, (const char*) s));
 		Unref(ind);

-		if ( t >= end_of_s )
+		// No more separators will be needed if this is the end of string.
+		if ( n <= 0 )
 			break;

-		++num_sep;
-
 		if ( incl_sep )
 			{ // including the part that matches the pattern
 			ind = new Val(++num, TYPE_COUNT);
-			a->Assign(ind, new StringVal(offset, t));
+			a->Assign(ind, new StringVal(end_of_match, (const char*) s+offset));
 			Unref(ind);
 			}

-		s = t + offset;
+		if ( max_num_sep && num_sep >= max_num_sep )
+			break;
+
+		++num_sep;
+
+		n -= end_of_match;
+		s += offset + end_of_match;;
+
 		if ( s > end_of_s )
 			internal_error("RegMatch in split goes beyond the string");
 		}
@ -492,42 +472,38 @@ function subst_string%(s: string, from: string, to: string%): string

 function to_lower%(str: string%): string
 	%{
-	const char* s = str->CheckString();
-	int n = strlen(s) + 1;
+	const u_char* s = str->Bytes();
+	int n = str->Len();
 	char* lower_s = new char[n];
+	char* ls = lower_s;

-	char* ls;
-	for ( ls = lower_s; *s; ++s )
+	for ( int i = 0; i < n; ++i)
 		{
-		if ( isascii(*s) && isupper(*s) )
-			*ls++ = tolower(*s);
+		if ( isascii(s[i]) && isupper(s[i]) )
+			*ls++ = tolower(s[i]);
 		else
-			*ls++ = *s;
+			*ls++ = s[i];
 		}

-	*ls = '\0';
-
-	return new StringVal(new BroString(1, byte_vec(lower_s), n-1));
+	return new StringVal(new BroString(1, byte_vec(lower_s), n));
 	%}

 function to_upper%(str: string%): string
 	%{
-	const char* s = str->CheckString();
-	int n = strlen(s) + 1;
+	const u_char* s = str->Bytes();
+	int n = str->Len();
 	char* upper_s = new char[n];
+	char* us = upper_s;

-	char* us;
-	for ( us = upper_s; *s; ++s )
+	for ( int i = 0; i < n; ++i)
 		{
-		if ( isascii(*s) && islower(*s) )
-			*us++ = toupper(*s);
+		if ( isascii(s[i]) && islower(s[i]) )
+			*us++ = toupper(s[i]);
 		else
-			*us++ = *s;
+			*us++ = s[i];
 		}

-	*us = '\0';
-
-	return new StringVal(new BroString(1, byte_vec(upper_s), n-1));
+	return new StringVal(new BroString(1, byte_vec(upper_s), n));
 	%}

 function clean%(str: string%): string
@ -620,40 +596,35 @@ function str_split%(s: string, idx: index_vec%): string_vec

 function strip%(str: string%): string
 	%{
-	const char* s = str->CheckString();
+	const u_char* s = str->Bytes();
+	int n = str->Len();

-	int n = strlen(s) + 1;
-	char* strip_s = new char[n];
-
-	if ( n == 1 )
+	if ( n == 0 )
 		// Empty string.
-		return new StringVal(new BroString(1, byte_vec(strip_s), 0));
+		return new StringVal(new BroString(s, n, 1));

-	while ( isspace(*s) )
-		++s;
+	const u_char* sp = s;

-	strncpy(strip_s, s, n);
-
-	char* s2 = strip_s;
-	char* e = &s2[strlen(s2) - 1];
-
-	while ( e > s2 && isspace(*e) )
+	// Move a pointer from the end of the string.
+	const u_char* e = sp + n - 1;
+	while ( e > sp && isspace(*e) )
 		--e;

-	e[1] = '\0';	// safe even if e hasn't changed, due to n = strlen + 1
+	// Move the pointer for the beginning of the string.
+	while ( isspace(*sp) && sp <= e )
+		++sp;

-	return new StringVal(new BroString(1, byte_vec(s2), (e-s2)+1));
+	return new StringVal(new BroString(sp, (e - sp + 1), 1));
 	%}

 function string_fill%(len: int, source: string%): string
 	%{
-	const char* src = source->CheckString();
-
-	int sn = strlen(src);
+	const u_char* src = source->Bytes();
+	int n = source->Len();
 	char* dst = new char[len];

-	for ( int i = 0; i < len; i += sn )
-		::memcpy((dst + i), src, min(sn, len - i));
+	for ( int i = 0; i < len; i += n )
+		::memcpy((dst + i), src, min(n, len - i));

 	dst[len - 1] = 0;

@ -667,10 +638,11 @@ function string_fill%(len: int, source: string%): string
 function str_shell_escape%(source: string%): string
 	%{
 	unsigned j = 0;
-	const char* src = source->CheckString();
-	char* dst = new char[strlen(src) * 2 + 1];
+	const u_char* src = source->Bytes();
+	unsigned n = source->Len();
+	byte_vec dst = new u_char[n * 2 + 1];

-	for ( unsigned i = 0; i < strlen(src); ++i )
+	for ( unsigned i = 0; i < n; ++i )
 		{
 		switch ( src[i] ) {
 		case '`': case '"': case '\\': case '$':
@ -688,7 +660,7 @@ function str_shell_escape%(source: string%): string
 		}

 	dst[j] = '\0';
-	return new StringVal(new BroString(1, byte_vec(dst), j));
+	return new StringVal(new BroString(1, dst, j));
 	%}

 # Returns all occurrences of the given pattern in the given string (an empty