zeek/src/strings.bif

##! Definitions of built-in functions related to string processing and
##! manipulation.


%%{ // C segment
#include <vector>
#include <algorithm>
using namespace std;

#include "SmithWaterman.h"
%%}

## Calculates the Levenshtein distance between the two strings. See `Wikipedia
## <http://en.wikipedia.org/wiki/Levenshtein_distance>`__ for more information.
##
## s1: The first string.
##
## s2: The second string.
##
## Returns: The Levenshtein distance of two strings as a count.
##
function levenshtein_distance%(s1: string, s2: string%): count
	%{
	unsigned int n = s1->Len();
	unsigned int m = s2->Len();

	if ( ! n )
		return new Val(m, TYPE_COUNT);

	if ( ! m )
		return new Val(n, TYPE_COUNT);

	vector<vector<unsigned int> > d(n + 1, vector<unsigned int>(m + 1));

	d[0][0] = 0;

	for ( unsigned int i = 1; i <= n; ++i )
		d[i][0] = i;

	for ( unsigned int i = 1; i <= m; ++i )
		d[0][i] = i;

	for ( unsigned int i = 1; i <= n; ++i )
		{
		for ( unsigned int j = 1; j <= m; ++j )
			d[i][j] = min(min(d[i-1][j] + 1, d[i][j-1] + 1),
				      d[i-1][j-1] + (s1->Bytes()[i-1] == s2->Bytes()[j-1] ? 0 : 1));
		}

	return new Val(d[n][m], TYPE_COUNT);
	%}

## Concatenates all arguments into a single string. The function takes a
## variable number of arguments of type string and stitches them together.
##
## Returns: The concatenation of all (string) arguments.
##
## .. bro:see:: cat cat_sep cat_string_array cat_string_array_n
##              fmt
##              join_string_vec join_string_array
function string_cat%(...%): string
	%{
	int n = 0;
	loop_over_list(@ARG@, i)
		n += @ARG@[i]->AsString()->Len();

	u_char* b = new u_char[n+1];
	BroString* s = new BroString(1, b, n);

	loop_over_list(@ARG@, j)
		{
		const BroString* s = @ARG@[j]->AsString();
		memcpy(b, s->Bytes(), s->Len());
		b += s->Len();
		}
	*b = 0;

	return new StringVal(s);
	%}

%%{
int string_array_to_vs(TableVal* tbl, int start, int end,
			vector<const BroString*>& vs)
	{
	vs.clear();
	for ( int i = start; i <= end; ++i )
		{
		Val* ind = new Val(i, TYPE_COUNT);
		Val* v = tbl->Lookup(ind);
		if ( ! v )
			return 0;
		vs.push_back(v->AsString());
#if 0
		char* str = v->AsString()->Render();
		DEBUG_MSG("string_array[%d] = \"%s\"\n", i, str);
		delete [] str;
#endif
		delete ind;
		}
	return 1;
	}

int vs_to_string_array(vector<const BroString*>& vs, TableVal* tbl,
			int start, int end)
	{
	for ( int i = start, j = 0; i <= end; ++i, ++j )
		{
		Val* ind = new Val(i, TYPE_COUNT);
		tbl->Assign(ind, new StringVal(vs[j]->Len(),
						(const char *)vs[j]->Bytes()));
		Unref(ind);
		}
	return 1;
	}

BroString* cat_string_array_n(TableVal* tbl, int start, int end)
	{
	vector<const BroString*> vs;
	string_array_to_vs(tbl, start, end, vs);
	return concatenate(vs);
	}
%%}

## Concatenates all elements in an array of strings.
##
## a: The :bro:type:`string_array` (``table[count] of string``).
##
## Returns: The concatenation of all elements in *a*.
##
## .. bro:see:: cat cat_sep string_cat cat_string_array_n
##              fmt
##              join_string_vec join_string_array
function cat_string_array%(a: string_array%): string &deprecated
	%{
	TableVal* tbl = a->AsTableVal();
	return new StringVal(cat_string_array_n(tbl, 1, a->AsTable()->Length()));
	%}

## Concatenates a specific range of elements in an array of strings.
##
## a: The :bro:type:`string_array` (``table[count] of string``).
##
## start: The array index of the first element of the range.
##
## end: The array index of the last element of the range.
##
## Returns: The concatenation of the range *[start, end]* in *a*.
##
## .. bro:see:: cat string_cat cat_string_array
##              fmt
##              join_string_vec join_string_array
function cat_string_array_n%(a: string_array, start: count, end: count%): string &deprecated
	%{
	TableVal* tbl = a->AsTableVal();
	return new StringVal(cat_string_array_n(tbl, start, end));
	%}

## Joins all values in the given array of strings with a separator placed
## between each element.
##
## sep: The separator to place between each element.
##
## a: The :bro:type:`string_array` (``table[count] of string``).
##
## Returns: The concatenation of all elements in *a*, with *sep* placed
##          between each element.
##
## .. bro:see:: cat cat_sep string_cat cat_string_array cat_string_array_n
##              fmt
##              join_string_vec
function join_string_array%(sep: string, a: string_array%): string &deprecated
	%{
	vector<const BroString*> vs;
	TableVal* tbl = a->AsTableVal();
	int n = a->AsTable()->Length();

	for ( int i = 1; i <= n; ++i )
		{
		Val* ind = new Val(i, TYPE_COUNT);
		Val* v = tbl->Lookup(ind);
		if ( ! v )
			return 0;

		vs.push_back(v->AsString());
		Unref(ind);

		if ( i < n )
			vs.push_back(sep->AsString());
		}

	return new StringVal(concatenate(vs));
	%}

## Joins all values in the given vector of strings with a separator placed
## between each element.
##
## sep: The separator to place between each element.
##
## vec: The :bro:type:`string_vec` (``vector of string``).
##
## Returns: The concatenation of all elements in *vec*, with *sep* placed
##          between each element.
##
## .. bro:see:: cat cat_sep string_cat cat_string_array cat_string_array_n
##              fmt
##              join_string_array
function join_string_vec%(vec: string_vec, sep: string%): string
	%{
	ODesc d;
	d.SetStyle(RAW_STYLE);

	VectorVal *v = vec->AsVectorVal();

	for ( unsigned i = 0; i < v->Size(); ++i )
		{
		if ( i > 0 )
			d.Add(sep->CheckString(), 0);

		Val* e = v->Lookup(i);

		// If the element is empty, skip it.
		if ( ! e )
			continue;

		e->Describe(&d);
		}

	BroString* s = new BroString(1, d.TakeBytes(), d.Len());
	s->SetUseFreeToDelete(true);

	return new StringVal(s);
	%}

## Sorts an array of strings.
##
## a: The :bro:type:`string_array` (``table[count] of string``).
##
## Returns: A sorted copy of *a*.
##
## .. bro:see:: sort
function sort_string_array%(a: string_array%): string_array &deprecated
	%{
	TableVal* tbl = a->AsTableVal();
	int n = a->AsTable()->Length();

	vector<const BroString*> vs;
	string_array_to_vs(tbl, 1, n, vs);

	unsigned int i, j;
	for ( i = 0; i < vs.size(); ++i )
		{
		const BroString* x = vs[i];
		for ( j = i; j > 0; --j )
			if ( Bstr_cmp(vs[j-1], x) <= 0 )
				break;
			else
				vs[j] = vs[j-1];
		vs[j] = x;
		}
	// sort(vs.begin(), vs.end(), Bstr_cmp);

	TableVal* b = new TableVal(string_array);
	vs_to_string_array(vs, b, 1, n);
	return b;
	%}

## Returns an edited version of a string that applies a special
## "backspace character" (usually ``\x08`` for backspace or ``\x7f`` for DEL).
## For example, ``edit("hello there", "e")`` returns ``"llo t"``.
##
## arg_s: The string to edit.
##
## arg_edit_char: A string of exactly one character that represents the
##                "backspace character". If it is longer than one character Bro
##                generates a run-time error and uses the first character in
##                the string.
##
## Returns: An edited version of *arg_s* where *arg_edit_char* triggers the
##          deletion of the last character.
##
## .. bro:see:: clean
##              to_string_literal
##              escape_string
##              strip
function edit%(arg_s: string, arg_edit_char: string%): string
	%{
	if ( arg_edit_char->Len() != 1 )
		builtin_error("not exactly one edit character", @ARG@[1]);

	const u_char* s = arg_s->Bytes();
	const u_char* edit_s = arg_edit_char->Bytes();

	u_char edit_c = *edit_s;

	int n = arg_s->Len();
	u_char* new_s = new u_char[n+1];
	int ind = 0;

	for ( int i = 0; i < n; ++i )
		{
		if ( s[i] == edit_c )
			{ // Delete last character
			if ( --ind < 0 )
				ind = 0;
			}
		else
			new_s[ind++] = s[i];
		}

	new_s[ind] = '\0';

	return new StringVal(new BroString(1, byte_vec(new_s), ind));
	%}

## Get a substring from a string, given a starting position and length.
##
## s: The string to obtain a substring from.
##
## start: The starting position of the substring in *s*, where 1 is the first
##        character. As a special case, 0 also represents the first character.
##
## n: The number of characters to extract, beginning at *start*.
##
## Returns: A substring of *s* of length *n* from position *start*.
function sub_bytes%(s: string, start: count, n: int%): string
	%{
	if ( start > 0 )
		--start;	// make it 0-based

	BroString* ss = s->AsString()->GetSubstring(start, n);

	if ( ! ss )
		ss = new BroString("");

	return new StringVal(ss);
	%}

%%{
static int match_prefix(int s_len, const char* s, int t_len, const char* t)
	{
	for ( int i = 0; i < t_len; ++i )
		{
		if ( i >= s_len || s[i] != t[i] )
			return 0;
		}
	return 1;
	}

VectorVal* do_split_string(StringVal* str_val, RE_Matcher* re, int incl_sep,
                           int max_num_sep)
	{
	// string_vec is used early in the version script - do not use the NetVar.
	VectorVal* rval = new VectorVal(internal_type("string_vec")->AsVectorType());
	const u_char* s = str_val->Bytes();
	int n = str_val->Len();
	const u_char* end_of_s = s + n;
	int num = 0;
	int num_sep = 0;

	int offset = 0;
	while ( n >= 0 )
		{
		offset = 0;
		// Find next match offset.
		int end_of_match = 0;
		while ( n > 0 &&
		        (end_of_match = re->MatchPrefix(s + offset, n)) <= 0 )
			{
			// Move on to next byte.
			++offset;
			--n;
			}

		if ( max_num_sep && num_sep >= max_num_sep )
			{
			offset = end_of_s - s;
			n=0;
			}

		rval->Assign(num++, new StringVal(offset, (const char*) s));

		// No more separators will be needed if this is the end of string.
		if ( n <= 0 )
			break;

		if ( incl_sep )
			{ // including the part that matches the pattern
			rval->Assign(num++, new StringVal(end_of_match, (const char*) s+offset));
			}

		if ( max_num_sep && num_sep >= max_num_sep )
			break;

		++num_sep;

		n -= end_of_match;
		s += offset + end_of_match;;

		if ( s > end_of_s )
			reporter->InternalError("RegMatch in split goes beyond the string");
		}

	return rval;
	}

Val* do_split(StringVal* str_val, RE_Matcher* re, int incl_sep, int max_num_sep)
	{
	TableVal* a = new TableVal(string_array);
	const u_char* s = str_val->Bytes();
	int n = str_val->Len();
	const u_char* end_of_s = s + n;
	int num = 0;
	int num_sep = 0;

	int offset = 0;
	while ( n >= 0 )
		{
		offset = 0;
		// Find next match offset.
		int end_of_match = 0;
		while ( n > 0 &&
		        (end_of_match = re->MatchPrefix(s + offset, n)) <= 0 )
			{
			// Move on to next byte.
			++offset;
			--n;
			}

		if ( max_num_sep && num_sep >= max_num_sep )
			{
			offset = end_of_s - s;
			n=0;
			}

		Val* ind = new Val(++num, TYPE_COUNT);
		a->Assign(ind, new StringVal(offset, (const char*) s));
		Unref(ind);

		// No more separators will be needed if this is the end of string.
		if ( n <= 0 )
			break;

		if ( incl_sep )
			{ // including the part that matches the pattern
			ind = new Val(++num, TYPE_COUNT);
			a->Assign(ind, new StringVal(end_of_match, (const char*) s+offset));
			Unref(ind);
			}

		if ( max_num_sep && num_sep >= max_num_sep )
			break;

		++num_sep;

		n -= end_of_match;
		s += offset + end_of_match;;

		if ( s > end_of_s )
			reporter->InternalError("RegMatch in split goes beyond the string");
		}

	return a;
	}

Val* do_sub(StringVal* str_val, RE_Matcher* re, StringVal* repl, int do_all)
	{
	const u_char* s = str_val->Bytes();
	int offset = 0;
	int n = str_val->Len();

	// cut_points is a set of pairs of indices in str that should
	// be removed/replaced.  A pair <x,y> means "delete starting
	// at offset x, up to but not including offset y".
	List(ptr_compat_int) cut_points;	// where RE matches pieces of str

	int size = 0;	// size of result

	while ( n > 0 )
		{
		// Find next match offset.
		int end_of_match;
		while ( n > 0 &&
			(end_of_match = re->MatchPrefix(&s[offset], n)) <= 0 )
			{
			// This character is going to be copied to the result.
			++size;

			// Move on to next character.
			++offset;
			--n;
			}

		if ( n <= 0 )
			break;

		// s[offset .. offset+end_of_match-1] matches re.
		cut_points.append(offset);
		cut_points.append(offset + end_of_match);

		offset += end_of_match;
		n -= end_of_match;

		if ( ! do_all )
			{
			// We've now done the first substitution - finished.
			// Include the remainder of the string in the result.
			size += n;
			break;
			}
		}

	// size now reflects amount of space copied.  Factor in amount
	// of space for replacement text.
	int num_cut_points = cut_points.length() / 2;
	size += num_cut_points * repl->Len();

	// And a final NUL for good health.
	++size;

	byte_vec result = new u_char[size];
	byte_vec r = result;

	// Copy it all over.
	int start_offset = 0;
	for ( int i = 0; i < cut_points.length(); i += 2 /* loop over pairs */ )
		{
		int num_to_copy = cut_points[i] - start_offset;
		memcpy(r, s + start_offset, num_to_copy);

		r += num_to_copy;
		start_offset = cut_points[i+1];

		// Now add in replacement text.
		memcpy(r, repl->Bytes(), repl->Len());
		r += repl->Len();
		}

	// Copy final trailing characters.
	int num_to_copy = str_val->Len() - start_offset;
	memcpy(r, s + start_offset, num_to_copy);
	r += num_to_copy;

	// Final NUL.  No need to increment r, since the length
	// computed from it in the next statement does not include
	// the NUL.
	r[0] = '\0';

	return new StringVal(new BroString(1, result, r - result));
	}
%%}

## Splits a string into an array of strings according to a pattern.
##
## str: The string to split.
##
## re: The pattern describing the element separator in *str*.
##
## Returns: An array of strings where each element corresponds to a substring
##          in *str* separated by *re*.
##
## .. bro:see:: split1 split_all split_n str_split split_string1 split_string_all split_string_n str_split
##
## .. note:: The returned table starts at index 1. Note that conceptually the
##           return value is meant to be a vector and this might change in the
##           future.
##
function split%(str: string, re: pattern%): string_array &deprecated
	%{
	return do_split(str, re, 0, 0);
	%}

## Splits a string into an array of strings according to a pattern.
##
## str: The string to split.
##
## re: The pattern describing the element separator in *str*.
##
## Returns: An array of strings where each element corresponds to a substring
##          in *str* separated by *re*.
##
## .. bro:see:: split_string1 split_string_all split_string_n str_split
##
function split_string%(str: string, re: pattern%): string_vec
	%{
	return do_split_string(str, re, 0, 0);
	%}

## Splits a string *once* into a two-element array of strings according to a
## pattern. This function is the same as :bro:id:`split`, but *str* is only
## split once (if possible) at the earliest position and an array of two strings
## is returned.
##
## str: The string to split.
##
## re: The pattern describing the separator to split *str* in two pieces.
##
## Returns: An array of strings with two elements in which the first represents
##          the substring in *str* up to the first occurence of *re*, and the
##          second everything after *re*. An array of one string is returned
##          when *s* cannot be split.
##
## .. bro:see:: split split_all split_n str_split split_string split_string_all split_string_n str_split
function split1%(str: string, re: pattern%): string_array &deprecated
	%{
	return do_split(str, re, 0, 1);
	%}

## Splits a string *once* into a two-element array of strings according to a
## pattern. This function is the same as :bro:id:`split_string`, but *str* is
## only split once (if possible) at the earliest position and an array of two
## strings is returned.
##
## str: The string to split.
##
## re: The pattern describing the separator to split *str* in two pieces.
##
## Returns: An array of strings with two elements in which the first represents
##          the substring in *str* up to the first occurence of *re*, and the
##          second everything after *re*. An array of one string is returned
##          when *s* cannot be split.
##
## .. bro:see:: split_string split_string_all split_string_n str_split
function split_string1%(str: string, re: pattern%): string_vec
	%{
	return do_split_string(str, re, 0, 1);
	%}

## Splits a string into an array of strings according to a pattern. This
## function is the same as :bro:id:`split`, except that the separators are
## returned as well. For example, ``split_all("a-b--cd", /(\-)+/)`` returns
## ``{"a", "-", "b", "--", "cd"}``: odd-indexed elements do not match the
## pattern and even-indexed ones do.
##
## str: The string to split.
##
## re: The pattern describing the element separator in *str*.
##
## Returns: An array of strings where each two successive elements correspond
##          to a substring in *str* of the part not matching *re* (odd-indexed)
##          and the part that matches *re* (even-indexed).
##
## .. bro:see:: split split1 split_n str_split split_string split_string1 split_string_n str_split
function split_all%(str: string, re: pattern%): string_array &deprecated
	%{
	return do_split(str, re, 1, 0);
	%}

## Splits a string into an array of strings according to a pattern. This
## function is the same as :bro:id:`split_string`, except that the separators
## are returned as well. For example, ``split_string_all("a-b--cd", /(\-)+/)``
## returns ``{"a", "-", "b", "--", "cd"}``: odd-indexed elements do match the
## pattern and even-indexed ones do not.
##
## str: The string to split.
##
## re: The pattern describing the element separator in *str*.
##
## Returns: An array of strings where each two successive elements correspond
##          to a substring in *str* of the part not matching *re* (even-indexed)
##          and the part that matches *re* (odd-indexed).
##
## .. bro:see:: split_string split_string1 split_string_n str_split
function split_string_all%(str: string, re: pattern%): string_vec
	%{
	return do_split_string(str, re, 1, 0);
	%}

## Splits a string a given number of times into an array of strings according
## to a pattern. This function is similar to :bro:id:`split1` and
## :bro:id:`split_all`, but with customizable behavior with respect to
## including separators in the result and the number of times to split.
##
## str: The string to split.
##
## re: The pattern describing the element separator in *str*.
##
## incl_sep: A flag indicating whether to include the separator matches in the
##           result (as in :bro:id:`split_all`).
##
## max_num_sep: The number of times to split *str*.
##
## Returns: An array of strings where, if *incl_sep* is true, each two
##          successive elements correspond to a substring in *str* of the part
##          not matching *re* (odd-indexed) and the part that matches *re*
##          (even-indexed).
##
## .. bro:see:: split split1 split_all str_split split_string split_string1 split_string_all str_split
function split_n%(str: string, re: pattern,
		incl_sep: bool, max_num_sep: count%): string_array &deprecated
	%{
	return do_split(str, re, incl_sep, max_num_sep);
	%}

## Splits a string a given number of times into an array of strings according
## to a pattern. This function is similar to :bro:id:`split_string1` and
## :bro:id:`split_string_all`, but with customizable behavior with respect to
## including separators in the result and the number of times to split.
##
## str: The string to split.
##
## re: The pattern describing the element separator in *str*.
##
## incl_sep: A flag indicating whether to include the separator matches in the
##           result (as in :bro:id:`split_string_all`).
##
## max_num_sep: The number of times to split *str*.
##
## Returns: An array of strings where, if *incl_sep* is true, each two
##          successive elements correspond to a substring in *str* of the part
##          not matching *re* (even-indexed) and the part that matches *re*
##          (odd-indexed).
##
## .. bro:see:: split_string split_string1 split_string_all str_split
function split_string_n%(str: string, re: pattern,
		incl_sep: bool, max_num_sep: count%): string_vec
	%{
	return do_split_string(str, re, incl_sep, max_num_sep);
	%}

## Substitutes a given replacement string for the first occurrence of a pattern
## in a given string.
##
## str: The string to perform the substitution in.
##
## re: The pattern being replaced with *repl*.
##
## repl: The string that replaces *re*.
##
## Returns: A copy of *str* with the first occurence of *re* replaced with
##          *repl*.
##
## .. bro:see:: gsub subst_string
function sub%(str: string, re: pattern, repl: string%): string
	%{
	return do_sub(str, re, repl, 0);
	%}

## Substitutes a given replacement string for all occurrences of a pattern
## in a given string.
##
## str: The string to perform the substitution in.
##
## re: The pattern being replaced with *repl*.
##
## repl: The string that replaces *re*.
##
## Returns: A copy of *str* with all occurrences of *re* replaced with *repl*.
##
## .. bro:see:: sub subst_string
function gsub%(str: string, re: pattern, repl: string%): string
	%{
	return do_sub(str, re, repl, 1);
	%}


## Lexicographically compares two strings.
##
## s1: The first string.
##
## s2: The second string.
##
## Returns: An integer greater than, equal to, or less than 0 according as
##          *s1* is greater than, equal to, or less than *s2*.
function strcmp%(s1: string, s2: string%): int
	%{
	return new Val(Bstr_cmp(s1->AsString(), s2->AsString()), TYPE_INT);
	%}

## Locates the first occurrence of one string in another.
##
## big: The string to look in.
##
## little: The (smaller) string to find inside *big*.
##
## Returns: The location of *little* in *big*, or 0 if *little* is not found in
##          *big*.
##
## .. bro:see:: find_all find_last
function strstr%(big: string, little: string%): count
	%{
	return new Val(
		1 + big->AsString()->FindSubstring(little->AsString()),
		TYPE_COUNT);
	%}

## Substitutes each (non-overlapping) appearance of a string in another.
##
## s: The string in which to perform the substitution.
##
## from: The string to look for which is replaced with *to*.
##
## to: The string that replaces all occurrences of *from* in *s*.
##
## Returns: A copy of *s* where each occurrence of *from* is replaced with *to*.
##
## .. bro:see:: sub gsub
function subst_string%(s: string, from: string, to: string%): string
	%{
	const int little_len = from->Len();
	if ( little_len == 0 )
		return s->Ref();

	int big_len = s->Len();
	const u_char* big = s->Bytes();
	data_chunk_t dc;
	vector<data_chunk_t> vs;

	while ( big_len >= little_len )
		{
		int j = strstr_n(big_len, big, little_len, from->Bytes());

		if ( j < 0 )
			break;

		if ( j > 0 )
			{
			dc.length = j; dc.data = (const char*) big;
			vs.push_back(dc);
			}

		dc.length = to->Len();
		dc.data = (const char*) (to->Bytes());
		vs.push_back(dc);

		j += little_len;
		big += j;
		big_len -= j;
		}

	if ( big_len > 0 )
		{
		dc.length = big_len; dc.data = (const char*) big;
		vs.push_back(dc);
		}

	return new StringVal(concatenate(vs));
	%}

## Replaces all uppercase letters in a string with their lowercase counterpart.
##
## str: The string to convert to lowercase letters.
##
## Returns: A copy of the given string with the uppercase letters (as indicated
##          by ``isascii`` and ``isupper``) folded to lowercase
##          (via ``tolower``).
##
## .. bro:see:: to_upper is_ascii
function to_lower%(str: string%): string
	%{
	const u_char* s = str->Bytes();
	int n = str->Len();
	u_char* lower_s = new u_char[n + 1];
	u_char* ls = lower_s;

	for ( int i = 0; i < n; ++i)
		{
		if ( isascii(s[i]) && isupper(s[i]) )
			*ls++ = tolower(s[i]);
		else
			*ls++ = s[i];
		}

    *ls++ = '\0';

	return new StringVal(new BroString(1, lower_s, n));
	%}

## Replaces all lowercase letters in a string with their uppercase counterpart.
##
## str: The string to convert to uppercase letters.
##
## Returns: A copy of the given string with the lowercase letters (as indicated
##          by ``isascii`` and ``islower``) folded to uppercase
##          (via ``toupper``).
##
## .. bro:see:: to_lower is_ascii
function to_upper%(str: string%): string
	%{
	const u_char* s = str->Bytes();
	int n = str->Len();
	u_char* upper_s = new u_char[n + 1];
	u_char* us = upper_s;

	for ( int i = 0; i < n; ++i)
		{
		if ( isascii(s[i]) && islower(s[i]) )
			*us++ = toupper(s[i]);
		else
			*us++ = s[i];
		}

    *us++ = '\0';

	return new StringVal(new BroString(1, upper_s, n));
	%}

## Replaces non-printable characters in a string with escaped sequences. The
## mappings are:
##
##     - values not in *[32, 126]* to ``\xXX``
##
## If the string does not yet have a trailing NUL, one is added internally.
##
## In contrast to :bro:id:`escape_string`, this encoding is *not* fully reversible.`
##
## str: The string to escape.
##
## Returns: The escaped string.
##
## .. bro:see:: to_string_literal escape_string
function clean%(str: string%): string
	%{
	char* s = str->AsString()->Render();
	return new StringVal(new BroString(1, byte_vec(s), strlen(s)));
	%}

## Replaces non-printable characters in a string with escaped sequences. The
## mappings are:
##
##     - values not in *[32, 126]* to ``\xXX``
##     - ``\`` to ``\\``
##     - ``'`` and ``""`` to ``\'`` and ``\"``, respectively.
##
## str: The string to escape.
##
## Returns: The escaped string.
##
## .. bro:see:: clean escape_string
function to_string_literal%(str: string%): string
	%{
	char* s = str->AsString()->Render(BroString::BRO_STRING_LITERAL);
	return new StringVal(new BroString(1, byte_vec(s), strlen(s)));
	%}

## Determines whether a given string contains only ASCII characters.
##
## str: The string to examine.
##
## Returns: False if any byte value of *str* is greater than 127, and true
##          otherwise.
##
## .. bro:see:: to_upper to_lower
function is_ascii%(str: string%): bool
	%{
	int n = str->Len();
	const u_char* s = str->Bytes();

	for ( int i = 0; i < n; ++i )
		if ( s[i] > 127 )
			return new Val(0, TYPE_BOOL);

	return new Val(1, TYPE_BOOL);
	%}

## Replaces non-printable characters in a string with escaped sequences. The
## mappings are:
##
##     - values not in *[32, 126]* to ``\xXX``
##     - ``\`` to ``\\``
##
## In contrast to :bro:id:`clean`, this encoding is fully reversible.`
##
## str: The string to escape.
##
## Returns: The escaped string.
##
## .. bro:see:: clean to_string_literal
function escape_string%(s: string%): string
	%{
	char* escstr = s->AsString()->Render(BroString::ESC_HEX | BroString::ESC_ESC);
	Val* val = new StringVal(escstr);
	delete [] escstr;
	return val;
	%}

## Returns an ASCII hexadecimal representation of a string.
##
## s: The string to convert to hex.
##
## Returns: A copy of *s* where each byte is replaced with the corresponding
##          hex nibble.
function string_to_ascii_hex%(s: string%): string
	%{
	char* x = new char[s->Len() * 2 + 1];
	const u_char* sp = s->Bytes();

	for ( int i = 0; i < s->Len(); ++i )
		sprintf(x + i * 2, "%02x", sp[i]);

	return new StringVal(new BroString(1, (u_char*) x, s->Len() * 2));
	%}

## Uses the Smith-Waterman algorithm to find similar/overlapping substrings.
## See `Wikipedia <http://en.wikipedia.org/wiki/Smith%E2%80%93Waterman_algorithm>`__.
##
## s1: The first string.
##
## s2: The second string.
##
## params: Parameters for the Smith-Waterman algorithm.
##
## Returns: The result of the Smith-Waterman algorithm calculation.
function str_smith_waterman%(s1: string, s2: string, params: sw_params%) : sw_substring_vec
	%{
	SWParams sw_params(params->AsRecordVal()->Lookup(0)->AsCount(),
			   SWVariant(params->AsRecordVal()->Lookup(1)->AsCount()));

	BroSubstring::Vec* subseq =
		smith_waterman(s1->AsString(), s2->AsString(), sw_params);
	VectorVal* result = BroSubstring::VecToPolicy(subseq);
	delete_each(subseq);
	delete subseq;

	return result;
	%}

## Splits a string into substrings with the help of an index vector of cutting
## points.
##
## s: The string to split.
##
## idx: The index vector (``vector of count``) with the cutting points.
##
## Returns: A vector of strings.
##
## .. bro:see:: split split1 split_all split_n
function str_split%(s: string, idx: index_vec%): string_vec
	%{
	vector<Val*>* idx_v = idx->AsVector();
	BroString::IdxVec indices(idx_v->size());
	unsigned int i;

	for ( i = 0; i < idx_v->size(); i++ )
		indices[i] = (*idx_v)[i]->AsCount();

	BroString::Vec* result = s->AsString()->Split(indices);
	VectorVal* result_v = new VectorVal(
	    internal_type("string_vec")->AsVectorType());

	if ( result )
		{
		i = 1;

		for ( BroString::VecIt it = result->begin();
		      it != result->end(); ++it, ++i )
			result_v->Assign(i, new StringVal(*it));
			// StringVal now possesses string.

		delete result;
		}

	return result_v;
	%}

## Strips whitespace at both ends of a string.
##
## str: The string to strip the whitespace from.
##
## Returns: A copy of *str* with leading and trailing whitespace removed.
##
## .. bro:see:: sub gsub
function strip%(str: string%): string
	%{
	const u_char* s = str->Bytes();
	int n = str->Len();

	if ( n == 0 )
		// Empty string.
		return new StringVal(new BroString(s, n, 1));

	const u_char* sp = s;

	// Move a pointer from the end of the string.
	const u_char* e = sp + n - 1;
	while ( e > sp && isspace(*e) )
		--e;

	// Move the pointer for the beginning of the string.
	while ( isspace(*sp) && sp <= e )
		++sp;

	return new StringVal(new BroString(sp, (e - sp + 1), 1));
	%}

## Generates a string of a given size and fills it with repetitions of a source
## string.
##
## len: The length of the output string.
##
## source: The string to concatenate repeatedly until *len* has been reached.
##
## Returns: A string of length *len* filled with *source*.
function string_fill%(len: int, source: string%): string
	%{
	const u_char* src = source->Bytes();
	int64_t n = source->Len();
	char* dst = new char[len];

	for ( int i = 0; i < len; i += n )
		::memcpy((dst + i), src, min(n, len - i));

	dst[len - 1] = 0;

	return new StringVal(new BroString(1, byte_vec(dst), len));
	%}

## Takes a string and escapes characters that would allow execution of
## commands at the shell level. Must be used before including strings in
## :bro:id:`system` or similar calls.
##
## source: The string to escape.
##
## Returns: A shell-escaped version of *source*.
##
## .. bro:see:: system
function str_shell_escape%(source: string%): string
	%{
	unsigned j = 0;
	const u_char* src = source->Bytes();
	unsigned n = source->Len();
	byte_vec dst = new u_char[n * 2 + 1];

	for ( unsigned i = 0; i < n; ++i )
		{
		switch ( src[i] ) {
		case '`': case '"': case '\\': case '$':

		// case '|': case '&': case ';': case '(': case ')': case '<':
		// case '>': case '\'': case '*': case '?': case '[': case ']':
		// case '!': case '#': case '{': case '}':
			dst[j++] = '\\';
			break;
		default:
			break;
		}

		dst[j++] = src[i];
		}

	dst[j] = '\0';
	return new StringVal(new BroString(1, dst, j));
	%}

## Finds all occurrences of a pattern in a string.
##
## str: The string to inspect.
##
## re: The pattern to look for in *str*.
##
## Returns: The set of strings in *str* that match *re*, or the empty set.
##
## .. bro:see: find_last strstr
function find_all%(str: string, re: pattern%) : string_set
	%{
	TableVal* a = new TableVal(string_set);

	const u_char* s = str->Bytes();
	const u_char* e = s + str->Len();

	for ( const u_char* t = s; t < e; ++t )
		{
		int n = re->MatchPrefix(t, e - t);
		if ( n >= 0 )
			{
			Val* idx = new StringVal(n, (const char*) t);
			a->Assign(idx, 0);
			Unref(idx);
			t += n - 1;
			}
		}

	return a;
	%}

## Finds the last occurrence of a pattern in a string. This function returns
## the match that starts at the largest index in the string, which is not
## necessarily the longest match.  For example, a pattern of ``/.*/`` will
## return the final character in the string.
##
## str: The string to inspect.
##
## re: The pattern to look for in *str*.
##
## Returns: The last string in *str* that matches *re*, or the empty string.
##
## .. bro:see: find_all strstr
function find_last%(str: string, re: pattern%) : string
	%{
	const u_char* s = str->Bytes();
	const u_char* e = s + str->Len();

	for ( const u_char* t = e - 1; t >= s; --t )
		{
		int n = re->MatchPrefix(t, e - t);
		if ( n >= 0 )
			return new StringVal(n, (const char*) t);
		}

	return new StringVal("");
	%}

## Returns a hex dump for given input data. The hex dump renders 16 bytes per
## line, with hex on the left and ASCII (where printable)
## on the right.
##
## data_str: The string to dump in hex format.
##
## Returns: The hex dump of the given string.
##
## .. bro:see:: string_to_ascii_hex bytestring_to_hexstr
##
## .. note:: Based on Netdude's hex editor code.
##
function hexdump%(data_str: string%) : string
	%{

// The width of a line of text in the hex-mode view, consisting
// of offset, hex view and ASCII view:
//
// 32 +     16 characters per 8 bytes, twice
// (2*7) +  Single space between bytes, twice
// 4 +      Two spaces between 8-byte sets and ASCII
// 1 +      For newline
// 17 +     For ASCII display, with spacer column
// 6        For 5-digit offset counter, including spacer
//
#define HEX_LINE_WIDTH               74

#define HEX_LINE_START                6
#define HEX_LINE_END                 53
#define HEX_LINE_START_ASCII         56
#define HEX_LINE_START_RIGHT_ASCII   65
#define HEX_LINE_LEFT_MIDDLE         28
#define HEX_LINE_RIGHT_MIDDLE        31
#define HEX_BLOCK_LEN                23
#define HEX_LINE_BYTES               16
#define NULL_CHAR                    '.'
#define NONPRINT_CHAR                '.'

	const u_char* data = data_str->Bytes();
	unsigned data_size = data_str->Len();

	if ( ! data )
		return new StringVal("");

	int num_lines = (data_size / 16) + 1;
	int len = num_lines * HEX_LINE_WIDTH;
	u_char* hex_data = new u_char[len + 1];
	if ( ! hex_data )
		return new StringVal("");

	memset(hex_data, ' ', len);

	u_char* hex_data_ptr = hex_data;
	u_char* ascii_ptr = hex_data_ptr + 50;
	int x = 0, y = 0;

	for ( const u_char* data_ptr = data; data_ptr < data + data_size;
	      ++data_ptr )
		{
		if ( x == 0 )
			{
			char offset[5];
			safe_snprintf(offset, sizeof(offset),
					"%.4x", data_ptr - data);
			memcpy(hex_data_ptr, offset, 4);
			hex_data_ptr += 6;
			ascii_ptr = hex_data_ptr + 50;
			}

		char hex_byte[3];
		safe_snprintf(hex_byte, sizeof(hex_byte),
				"%.2x", (u_char) *data_ptr);

		int val = (u_char) *data_ptr;

		u_char ascii_byte = val;

		// If unprintable, use special characters:
		if ( val < 0x20 || val >= 0x7f )
			{
			if ( val == 0 )
				ascii_byte = NULL_CHAR;
			else
				ascii_byte = NONPRINT_CHAR;
			}

		*hex_data_ptr++ = hex_byte[0];
		*hex_data_ptr++ = hex_byte[1];
		*hex_data_ptr++ = ' ';
		*ascii_ptr++ = ascii_byte;

		if ( x == 7 )
			{
			*hex_data_ptr++ = ' ';
			*ascii_ptr++ = ' ';
			}

		++x;

		if ( x == 16 )
			{
			x = 0;
			*ascii_ptr++ = '\n';
			hex_data_ptr = ascii_ptr;
			}
		}

	// Terminate the string, but ensure it ends with a newline.
	if ( ascii_ptr[-1] != '\n' )
		*ascii_ptr++ = '\n';
	*ascii_ptr = 0;

	StringVal* result = new StringVal((const char*) hex_data);
	delete [] hex_data;

	return result;
	%}

## Returns a reversed copy of the string
##
## str: The string to reverse.
##
## Returns: A reversed copy of *str*
##
function reverse%(str: string%) : string
	%{
	string s = string((const char*)str->Bytes(), str->Len());
	reverse(s.begin(), s.end());
	return new StringVal(s.length(), (const char*)s.c_str());
	%}