Finish Broxygen documentation of string.bif.

2025-10-17 14:08:20 +00:00 · 2011-11-29 14:55:26 -08:00 · 2011-11-29 14:55:26 -08:00 · 98028dba89
commit 98028dba89
parent 76ca1f532f
1 changed files with 318 additions and 43 deletions
--- a/src/strings.bif
+++ b/src/strings.bif
@ -15,7 +15,9 @@ using namespace std;
 ##
 ## Returns: The concatenation of all (string) arguments.
 ##
-## see:: cat cat_string_array cat_string_array_n fmt join_string_vec join_string_array
+## .. bro:see:: cat cat_string_array cat_string_array_n
+##              fmt
+##              join_string_vec join_string_array
 function string_cat%(...%): string
 	%{
 	int n = 0;
@ -85,7 +87,9 @@ BroString* cat_string_array_n(TableVal* tbl, int start, int end)
 ##
 ## Returns: The concatenation of all elements in *a*.
 ##
-## see:: cat string_cat cat_string_array_n fmt join_string_vec join_string_array
+## .. bro:see:: cat string_cat cat_string_array_n
+##              fmt
+##              join_string_vec join_string_array
 function cat_string_array%(a: string_array%): string
 	%{
 	TableVal* tbl = a->AsTableVal();
@ -95,12 +99,16 @@ function cat_string_array%(a: string_array%): string
 ## Concatenates a specific range of elements in an array of strings.
 ##
 ## a: The :bro:id:`string_array` (``table[count] of string``).
+##
 ## start: The array index of the first element of the range.
+##
 ## end: The array index of the last element of the range.
 ##
 ## Returns: The concatenation of the range *[start, end]* in *a*.
 ##
-## see:: cat string_cat cat_string_array_n fmt join_string_vec join_string_array
+## .. bro:see:: cat string_cat cat_string_array
+##              fmt
+##              join_string_vec join_string_array
 function cat_string_array_n%(a: string_array, start: count, end: count%): string
 	%{
 	TableVal* tbl = a->AsTableVal();
@ -111,12 +119,15 @@ function cat_string_array_n%(a: string_array, start: count, end: count%): string
 ## between each element.
 ##
 ## sep: The separator to place between each element.
+##
 ## a: The :bro:id:`string_array` (``table[count] of string``).
 ##
 ## Returns: The concatenation of all elements in *a*, with *sep* placed
 ##          between each element.
 ##
-## see:: cat string_cat cat_string_array_n fmt join_string_vec join_string_array
+## .. bro:see:: cat string_cat cat_string_array cat_string_array_n
+##              fmt
+##              join_string_vec
 function join_string_array%(sep: string, a: string_array%): string
 	%{
 	vector<const BroString*> vs;
@ -144,12 +155,15 @@ function join_string_array%(sep: string, a: string_array%): string
 ## between each element.
 ##
 ## sep: The separator to place between each element.
+##
 ## a: The :bro:id:`string_vec` (``vector of string``).
 ##
 ## Returns: The concatenation of all elements in *a*, with *sep* placed
 ##          between each element.
 ##
-## see:: cat string_cat cat_string_array_n fmt join_string_vec join_string_array
+## .. bro:see:: cat string_cat cat_string_array cat_string_array_n
+##              fmt
+##              join_string_array
 function join_string_vec%(vec: string_vec, sep: string%): string
 	%{
 	ODesc d;
@ -175,7 +189,7 @@ function join_string_vec%(vec: string_vec, sep: string%): string
 ##
 ## Returns: A sorted copy of *a*.
 ##
-## see:: sort
+## .. bro:see:: sort
 function sort_string_array%(a: string_array%): string_array
 	%{
 	TableVal* tbl = a->AsTableVal();
@ -203,6 +217,24 @@ function sort_string_array%(a: string_array%): string_array
 	%}


+## Returns an edited version of a string that applies a special
+## "backspace character" (usually ``\x08`` for backspace or ``\x7f`` for DEL).
+## For ## example, ``edit("hello there", "e")`` returns ``"llo t"``.
+##
+## arg_s: The string to edit.
+##
+## arg_edit_char: A string of exactly one character that represents the
+##                "backspace character". If it is longer than one character Bro
+##                generates a run-time error and uses the first character in
+##                the string.
+##
+## Returns: An edited version of *arg_s* where *arg_edit_char* triggers the
+##          deletetion of the last character.
+##
+## .. bro:see:: clean
+##              to_string_literal
+##              escape_string
+##              strip
 function edit%(arg_s: string, arg_edit_char: string%): string
 	%{
 	if ( arg_edit_char->Len() != 1 )
@ -233,11 +265,28 @@ function edit%(arg_s: string, arg_edit_char: string%): string
 	return new StringVal(new BroString(1, byte_vec(new_s), ind));
 	%}

+## Returns the number of characters (i.e., bytes) in the given string. The
+## length computation includes any embedded NULs, and also a trailing NUL,
+## if any (which is why the function isn't called ``strlen``; to remind
+## the user that Bro strings can include NULs).
+##
+## s: The string to compute the length for.
+##
+## Returns: The number of characters in *s*.
 function byte_len%(s: string%): count
 	%{
 	return new Val(s->Len(), TYPE_COUNT);
 	%}

+## Get a substring of from a string, given a starting position length.
+##
+## s: The string to obtain a substring from.
+##
+## start: The starting position of the substring in *s*
+##
+## n: The number of characters to extract, beginning at *start*.
+##
+## Returns: A substring of *s* of length *n* from position *start*.
 function sub_bytes%(s: string, start: count, n: int%): string
 	%{
 	if ( start > 0 )
@ -417,42 +466,94 @@ Val* do_sub(StringVal* str_val, RE_Matcher* re, StringVal* repl, int do_all)
 	}
 %%}

-# Similar to split in awk.
-
+## Splits a string into an array of strings according to a pattern.
+##
+## str: The string to split.
+##
+## re: The pattern describing the element separator in *str*.
+##
+## Returns: An array of strings where each element corresponds to a substring
+##          in *str* separated by *re*.
+##
+## .. bro:see:: split1 split_all split_n str_split
+##
+## .. note:: The returned table starts at index 1. Note that conceptually the
+##           return value is meant to be a vector and this might change in the
+##           future.
+##
 function split%(str: string, re: pattern%): string_array
 	%{
 	return do_split(str, re, 0, 0, 0);
 	%}

-# split1(str, pattern, include_separator): table[count] of string
-#
-# Same as split, except that str is only split (if possible) at the
-# earliest position and an array of two strings is returned.
-# An array of one string is returned when str cannot be splitted.
-
+## Splits a string *once* into a a two-element array of strings according to a
+## pattern. This function is the same as :bro:id:`split`, but * is only split
+## once (if possible) at the earliest position and an array of two strings is
+## returned.
+##
+## str: The string to split.
+##
+## re: The pattern describing the separator to split *str* in two pieces.
+##
+## Returns: An array of strings with two elements in which the first represents
+##          the substring in *str* up to the first occurence of *re*, and the
+##          second everything after *re*. An array of one string is returned
+##          when *s* cannot be split.
+##
+## .. bro:see:: split split_all split_n str_split
 function split1%(str: string, re: pattern%): string_array
 	%{
 	return do_split(str, re, 0, 0, 1);
 	%}

-# Same as split, except that the array returned by split_all also
-# includes parts of string that match the pattern in the array.
-
-# For example, split_all("a-b--cd", /(\-)+/) returns {"a", "-", "b",
-# "--", "cd"}: odd-indexed elements do not match the pattern
-# and even-indexed ones do.
-
+## Splits a string into an array of strings according to a pattern. This
+## function is the same as :bro:id:`split`, except that the separators are
+## returned as well. For example, ``split_all("a-b--cd", /(\-)+/)`` returns
+## ``{"a", "-", "b", "--", "cd"}``: odd-indexed elements do not match the
+## pattern and even-indexed ones do.
+##
+## str: The string to split.
+##
+## re: The pattern describing the element separator in *str*.
+##
+## Returns: An array of strings where each two successive elements correspond
+## to a substring in *str* of the part not matching *re* (odd-indexed) and the
+## part that matches *re* (even-indexed).
+##
+## .. bro:see:: split split1 split_n str_split
 function split_all%(str: string, re: pattern%): string_array
 	%{
 	return do_split(str, re, 0, 1, 0);
 	%}

+## Splits a string a given number of times into an array of strings according
+## to a pattern. This function is similar to :bro:id:`split1` and
+## :bro:id:`split_all`, but with customizable behavior with respect to
+## including separators in the result and the number of times to split.
+##
+## str: The string to split.
+##
+## re: The pattern describing the element separator in *str*.
+##
+## incl_sep: A flag indicating whether to include the separator matches in the
+##           result (as in :bro:id:`split_all`).
+##
+## max_num_sep: The number of times to split *str*.
+##
+## Returns: An array of strings where, if *incl_sep* is true, each two
+##          successive elements correspond to a substring in *str* of the part
+##          not matching *re* (odd-indexed) and the part that matches *re*
+##          (even-indexed).
+##
+## .. bro:see:: split split1 split_all str_split
 function split_n%(str: string, re: pattern,
 		incl_sep: bool, max_num_sep: count%): string_array
 	%{
 	return do_split(str, re, 0, incl_sep, max_num_sep);
 	%}

+## Deprecated. Will be removed.
+# Reason: the parameter ``other`` does nothing.
 function split_complete%(str: string,
 		re: pattern, other: string_set,
 		incl_sep: bool, max_num_sep: count%): string_array
@ -460,22 +561,65 @@ function split_complete%(str: string,
 	return do_split(str, re, other->AsTableVal(), incl_sep, max_num_sep);
 	%}

+## Substitutes a given replacement string for the first occurrence of a pattern
+## in a given string.
+##
+## str: The string to perform the substitution in.
+##
+## re: The pattern being replaced with *repl*.
+##
+## repl: The string that replacs *re*.
+##
+## Returns: A copy of *str* with the first occurence of *re* replaced with
+##          *repl*.
+##
+## .. bro:see:: gsub subst_string
 function sub%(str: string, re: pattern, repl: string%): string
 	%{
 	return do_sub(str, re, repl, 0);
 	%}

+## Substitutes a given replacement string for the all occurrences of a pattern
+## in a given string.
+##
+## str: The string to perform the substitution in.
+##
+## re: The pattern being replaced with *repl*.
+##
+## repl: The string that replacs *re*.
+##
+## Returns: A copy of *str* with all occurences of *re* replaced with *repl*.
+##
+## .. bro:see:: sub subst_string
 function gsub%(str: string, re: pattern, repl: string%): string
 	%{
 	return do_sub(str, re, repl, 1);
 	%}

+
+## Lexicographically compares two string.
+##
+## s1: The first string.
+##
+## s2: The second string.
+##
+## Returns: An integer greater than, equal to, or less than 0 according as
+##          *s1* is greater than, equal to, or less than *s2*.
 function strcmp%(s1: string, s2: string%): int
 	%{
 	return new Val(Bstr_cmp(s1->AsString(), s2->AsString()), TYPE_INT);
 	%}

-# Returns 0 if $little is not found in $big.
+## Locates the first occurrence of one string in another.
+##
+## big: The string to look in.
+##
+## little: The (smaller) string to find inside *big*.
+##
+## Returns: The location of *little* in *big* or 0 if *little* is not found in
+##          *big*.
+##
+## .. bro:see:: find_all find_last
 function strstr%(big: string, little: string%): count
 	%{
 	return new Val(
@ -483,8 +627,17 @@ function strstr%(big: string, little: string%): count
 		TYPE_COUNT);
 	%}

-# Substitute each (non-overlapping) appearance of $from in $s to $to,
-# and return the resulting string.
+## Substitutes each (non-overlapping) appearance of a string in another.
+##
+## s: The string in which to perform the substitution.
+##
+## from: The string to look for which is replaced with *to*.
+##
+## to: The string that replaces all occurrences of *from* in *s*.
+##
+## Returns: A copy of *s* where each occurrence of *from* is replaced with *to*.
+##
+## .. bro:see:: sub gsub
 function subst_string%(s: string, from: string, to: string%): string
 	%{
 	const int little_len = from->Len();
@ -527,6 +680,14 @@ function subst_string%(s: string, from: string, to: string%): string
 	return new StringVal(concatenate(vs));
 	%}

+## Replaces all uppercase letters in a string with their lowercase counterpart.
+##
+## str: The string to convert to lowercase letters.
+##
+## Returns: A copy of the given string with the uppercase letters (as indicated
+## by ``isascii`` and \verb|isupper|``) folded to lowercase (via ``tolower``).
+##
+## .. bro:see:: to_upper is_ascii
 function to_lower%(str: string%): string
 	%{
 	const u_char* s = str->Bytes();
@ -547,6 +708,14 @@ function to_lower%(str: string%): string
 	return new StringVal(new BroString(1, lower_s, n));
 	%}

+## Replaces all lowercase letters in a string with their uppercase counterpart.
+##
+## str: The string to convert to uppercase letters.
+##
+## Returns: A copy of the given string with the lowercase letters (as indicated
+## by ``isascii`` and \verb|islower|``) folded to uppercase (via ``toupper``).
+##
+## .. bro:see:: to_lower is_ascii
 function to_upper%(str: string%): string
 	%{
 	const u_char* s = str->Bytes();
@ -567,18 +736,54 @@ function to_upper%(str: string%): string
 	return new StringVal(new BroString(1, upper_s, n));
 	%}

+## Replaces non-printable characters in a string with escaped sequences. The
+## mappings are:
+##
+##     - ``NUL`` to ``\0``
+##     - ``DEL`` to ``^?``
+##     - values <= 26 to ``^[A-Z]``
+##     - values not in *[32, 126]** to ``%XX``
+##
+## If the string does not yet have a trailing NUL, one is added.
+##
+## str: The string to escape.
+##
+## Returns: The escaped string.
+##
+## .. bro:see:: to_string_literal escape_string
 function clean%(str: string%): string
 	%{
 	char* s = str->AsString()->Render();
 	return new StringVal(new BroString(1, byte_vec(s), strlen(s)));
 	%}

+## Replaces non-printable characters in a string with escaped sequences. The
+## mappings are:
+##
+##     - ``NUL`` to ``\0``
+##     - ``DEL`` to ``^?``
+##     - values <= 26 to ``^[A-Z]``
+##     - values not in *[32, 126]** to ``%XX``
+##
+## str: The string to escape.
+##
+## Returns: The escaped string.
+##
+## .. bro:see:: clean escape_string
 function to_string_literal%(str: string%): string
 	%{
 	char* s = str->AsString()->Render(BroString::BRO_STRING_LITERAL);
 	return new StringVal(new BroString(1, byte_vec(s), strlen(s)));
 	%}

+## Determines whether a given string contains only ASCII characters.
+##
+## str: The string to examine.
+##
+## Returns: False if any byte value of *str* is greater than 127, and true
+##          otherwise.
+##
+## .. bro:see:: to_upper to_lower
 function is_ascii%(str: string%): bool
 	%{
 	int n = str->Len();
@ -591,7 +796,14 @@ function is_ascii%(str: string%): bool
 	return new Val(1, TYPE_BOOL);
 	%}

-# Make printable version of string.
+## Creates a printable version of a string. This function is the same as
+## :bro:id:`clean` except that non-printable characters are removed.
+##
+## s: The string to escape.
+##
+## Returns: The escaped string.
+##
+## .. bro:see:: clean to_string_literal
 function escape_string%(s: string%): string
 	%{
 	char* escstr = s->AsString()->Render();
@ -600,7 +812,12 @@ function escape_string%(s: string%): string
 	return val;
 	%}

-# Returns an ASCII hexadecimal representation of a string.
+## Returns an ASCII hexadecimal representation of a string.
+##
+## s: The string to convert to hex.
+##
+## Returns: A copy of *s* where each byte is replaced with the corresponding
+##          hex nibble.
 function string_to_ascii_hex%(s: string%): string
 	%{
 	char* x = new char[s->Len() * 2 + 1];
@ -612,8 +829,16 @@ function string_to_ascii_hex%(s: string%): string
 	return new StringVal(new BroString(1, (u_char*) x, s->Len() * 2));
 	%}

-function str_smith_waterman%(s1: string, s2: string, params: sw_params%)
-: sw_substring_vec
+## Uses the `Smith Waterman algorithm
+## <http://en.wikipedia.org/wiki/Smith%E2%80%93Waterman_algorithm>`_ to find
+## similar/overlapping substrings.
+##
+## s1: The first string.
+##
+## s2: The second string.
+##
+## Returns: The result of the Smit Waterman algorithm calculation.
+function str_smith_waterman%(s1: string, s2: string, params: sw_params%) : sw_substring_vec
 	%{
 	SWParams sw_params(params->AsRecordVal()->Lookup(0)->AsCount(),
 			   SWVariant(params->AsRecordVal()->Lookup(1)->AsCount()));
@ -627,6 +852,16 @@ function str_smith_waterman%(s1: string, s2: string, params: sw_params%)
 	return result;
 	%}

+## Splits a string into substrings with the help of an index vector of cutting
+## points.
+##
+## s: The string to split.
+##
+## idx: The index vector (``vector of count``) with the cutting points.
+##
+## Returns: A vector of strings.
+##
+## .. bro:see:: split split1 split_all split_n
 function str_split%(s: string, idx: index_vec%): string_vec
 	%{
 	vector<Val*>* idx_v = idx->AsVector();
@ -655,6 +890,13 @@ function str_split%(s: string, idx: index_vec%): string_vec
 	return result_v;
 	%}

+## Strips whitespace at both ends of a string.
+##
+## str: The string to strip the whitespace from.
+##
+## Returns: A copy of *str* with leading and trailing whitespace removed.
+##
+## .. bro:see:: sub gsub
 function strip%(str: string%): string
 	%{
 	const u_char* s = str->Bytes();
@ -678,6 +920,14 @@ function strip%(str: string%): string
 	return new StringVal(new BroString(sp, (e - sp + 1), 1));
 	%}

+## Generates a string of a given size and fills it with repetitions of a source
+## string.
+##
+## len: The length of the output string.
+##
+## source: The string to concatenate repeatedly until *len* has been reached.
+##
+## Returns: A string of length *len* filled with *source*.
 function string_fill%(len: int, source: string%): string
 	%{
 	const u_char* src = source->Bytes();
@ -692,10 +942,15 @@ function string_fill%(len: int, source: string%): string
 	return new StringVal(new BroString(1, byte_vec(dst), len));
 	%}

-# Takes a string and escapes characters that would allow execution of commands
-# at the shell level.  Must be used before including strings in system() or
-# similar calls.
-#
+## Takes a string and escapes characters that would allow execution of
+## commands at the shell level. Must be used before including strings in
+## :bro:id:`system` or similar calls.
+##
+## source: The string to escape.
+##
+## Returns: A shell-escaped version of *source*.
+##
+## .. bro:see:: system
 function str_shell_escape%(source: string%): string
 	%{
 	unsigned j = 0;
@ -724,8 +979,15 @@ function str_shell_escape%(source: string%): string
 	return new StringVal(new BroString(1, dst, j));
 	%}

-# Returns all occurrences of the given pattern in the given string (an empty
-# empty set if none).
+## Finds all occurrences of a pattern in a string.
+##
+## str: The string to inspect.
+##
+## re: The pattern to look for in *str*.
+##
+## Returns: The set of strings in *str* that match *re*, or the empty set.
+##
+## .. bro:see: find_last strstr
 function find_all%(str: string, re: pattern%) : string_set
 	%{
 	TableVal* a = new TableVal(internal_type("string_set")->AsTableType());
@ -746,11 +1008,18 @@ function find_all%(str: string, re: pattern%) : string_set
 	return a;
 	%}

-# Returns the last occurrence of the given pattern in the given string.
-# If not found, returns an empty string.  Note that this function returns
-# the match that starts at the largest index in the string, which is
-# not necessarily the longest match.  For example, a pattern of /.*/
-# will return the final character in the string.
+## Finds the last occurrence of a pattern in a string. This function returns
+## the match that starts at the largest index in the string, which is not
+## necessarily the longest match.  For example, a pattern of ``/.*/`` will
+## return the final character in the string.
+##
+## str: The string to inspect.
+##
+## re: The pattern to look for in *str*.
+##
+## Returns: The last string in *str* that matches *re*, or the empty string.
+##
+## .. bro:see: find_all strstr
 function find_last%(str: string, re: pattern%) : string
 	%{
 	const u_char* s = str->Bytes();
@ -766,10 +1035,16 @@ function find_last%(str: string, re: pattern%) : string
 	return new StringVal("");
 	%}

-# Returns a hex dump for given input data.  The hex dump renders
-# 16 bytes per line, with hex on the left and ASCII (where printable)
-# on the right.  Based on Netdude's hex editor code.
-#
+## Returns a hex dump for given input data. The hex dump renders 16 bytes per
+## line, with hex on the left and ASCII (where printable)
+## on the right.
+##
+## data_str: The string to dump in hex format.
+##
+## .. bro:see:: string_to_ascii_hex bytestring_to_hexstr
+##
+## .. note:: Based on Netdude's hex editor code.
+##
 function hexdump%(data_str: string%) : string
 	%{