##! Definitions of built-in functions related to string processing and ##! manipulation. %%{ // C segment #include #include using namespace std; #include "SmithWaterman.h" %%} ## Calculates the Levenshtein distance between the two strings. See `Wikipedia ## `__ for more information. ## ## s1: The first string. ## ## s2: The second string. ## ## Returns: The Levenshtein distance of two strings as a count. ## function levenshtein_distance%(s1: string, s2: string%): count %{ unsigned int n = s1->Len(); unsigned int m = s2->Len(); if ( ! n ) return new Val(m, TYPE_COUNT); if ( ! m ) return new Val(n, TYPE_COUNT); vector > d(n + 1, vector(m + 1)); d[0][0] = 0; for ( unsigned int i = 1; i <= n; ++i ) d[i][0] = i; for ( unsigned int i = 1; i <= m; ++i ) d[0][i] = i; for ( unsigned int i = 1; i <= n; ++i ) { for ( unsigned int j = 1; j <= m; ++j ) d[i][j] = min(min(d[i-1][j] + 1, d[i][j-1] + 1), d[i-1][j-1] + (s1->Bytes()[i-1] == s2->Bytes()[j-1] ? 0 : 1)); } return new Val(d[n][m], TYPE_COUNT); %} ## Concatenates all arguments into a single string. The function takes a ## variable number of arguments of type string and stitches them together. ## ## Returns: The concatenation of all (string) arguments. ## ## .. bro:see:: cat cat_sep cat_string_array cat_string_array_n ## fmt ## join_string_vec join_string_array function string_cat%(...%): string %{ int n = 0; loop_over_list(@ARG@, i) n += @ARG@[i]->AsString()->Len(); u_char* b = new u_char[n+1]; BroString* s = new BroString(1, b, n); loop_over_list(@ARG@, j) { const BroString* s = @ARG@[j]->AsString(); memcpy(b, s->Bytes(), s->Len()); b += s->Len(); } *b = 0; return new StringVal(s); %} %%{ int string_array_to_vs(TableVal* tbl, int start, int end, vector& vs) { vs.clear(); for ( int i = start; i <= end; ++i ) { Val* ind = new Val(i, TYPE_COUNT); Val* v = tbl->Lookup(ind); if ( ! v ) return 0; vs.push_back(v->AsString()); #if 0 char* str = v->AsString()->Render(); DEBUG_MSG("string_array[%d] = \"%s\"\n", i, str); delete [] str; #endif delete ind; } return 1; } int vs_to_string_array(vector& vs, TableVal* tbl, int start, int end) { for ( int i = start, j = 0; i <= end; ++i, ++j ) { Val* ind = new Val(i, TYPE_COUNT); tbl->Assign(ind, new StringVal(vs[j]->Len(), (const char *)vs[j]->Bytes())); Unref(ind); } return 1; } BroString* cat_string_array_n(TableVal* tbl, int start, int end) { vector vs; string_array_to_vs(tbl, start, end, vs); return concatenate(vs); } %%} ## Concatenates all elements in an array of strings. ## ## a: The :bro:type:`string_array` (``table[count] of string``). ## ## Returns: The concatenation of all elements in *a*. ## ## .. bro:see:: cat cat_sep string_cat cat_string_array_n ## fmt ## join_string_vec join_string_array function cat_string_array%(a: string_array%): string %{ TableVal* tbl = a->AsTableVal(); return new StringVal(cat_string_array_n(tbl, 1, a->AsTable()->Length())); %} ## Concatenates a specific range of elements in an array of strings. ## ## a: The :bro:type:`string_array` (``table[count] of string``). ## ## start: The array index of the first element of the range. ## ## end: The array index of the last element of the range. ## ## Returns: The concatenation of the range *[start, end]* in *a*. ## ## .. bro:see:: cat string_cat cat_string_array ## fmt ## join_string_vec join_string_array function cat_string_array_n%(a: string_array, start: count, end: count%): string %{ TableVal* tbl = a->AsTableVal(); return new StringVal(cat_string_array_n(tbl, start, end)); %} ## Joins all values in the given array of strings with a separator placed ## between each element. ## ## sep: The separator to place between each element. ## ## a: The :bro:type:`string_array` (``table[count] of string``). ## ## Returns: The concatenation of all elements in *a*, with *sep* placed ## between each element. ## ## .. bro:see:: cat cat_sep string_cat cat_string_array cat_string_array_n ## fmt ## join_string_vec function join_string_array%(sep: string, a: string_array%): string %{ vector vs; TableVal* tbl = a->AsTableVal(); int n = a->AsTable()->Length(); for ( int i = 1; i <= n; ++i ) { Val* ind = new Val(i, TYPE_COUNT); Val* v = tbl->Lookup(ind); if ( ! v ) return 0; vs.push_back(v->AsString()); Unref(ind); if ( i < n ) vs.push_back(sep->AsString()); } return new StringVal(concatenate(vs)); %} ## Joins all values in the given vector of strings with a separator placed ## between each element. ## ## sep: The separator to place between each element. ## ## vec: The :bro:type:`string_vec` (``vector of string``). ## ## Returns: The concatenation of all elements in *vec*, with *sep* placed ## between each element. ## ## .. bro:see:: cat cat_sep string_cat cat_string_array cat_string_array_n ## fmt ## join_string_array function join_string_vec%(vec: string_vec, sep: string%): string %{ ODesc d; VectorVal *v = vec->AsVectorVal(); for ( unsigned i = 0; i < v->Size(); ++i ) { if ( i > 0 ) d.Add(sep->CheckString(), 0); v->Lookup(i)->Describe(&d); } BroString* s = new BroString(1, d.TakeBytes(), d.Len()); s->SetUseFreeToDelete(true); return new StringVal(s); %} ## Sorts an array of strings. ## ## a: The :bro:type:`string_array` (``table[count] of string``). ## ## Returns: A sorted copy of *a*. ## ## .. bro:see:: sort function sort_string_array%(a: string_array%): string_array %{ TableVal* tbl = a->AsTableVal(); int n = a->AsTable()->Length(); vector vs; string_array_to_vs(tbl, 1, n, vs); unsigned int i, j; for ( i = 0; i < vs.size(); ++i ) { const BroString* x = vs[i]; for ( j = i; j > 0; --j ) if ( Bstr_cmp(vs[j-1], x) <= 0 ) break; else vs[j] = vs[j-1]; vs[j] = x; } // sort(vs.begin(), vs.end(), Bstr_cmp); TableVal* b = new TableVal(string_array); vs_to_string_array(vs, b, 1, n); return b; %} ## Returns an edited version of a string that applies a special ## "backspace character" (usually ``\x08`` for backspace or ``\x7f`` for DEL). ## For example, ``edit("hello there", "e")`` returns ``"llo t"``. ## ## arg_s: The string to edit. ## ## arg_edit_char: A string of exactly one character that represents the ## "backspace character". If it is longer than one character Bro ## generates a run-time error and uses the first character in ## the string. ## ## Returns: An edited version of *arg_s* where *arg_edit_char* triggers the ## deletion of the last character. ## ## .. bro:see:: clean ## to_string_literal ## escape_string ## strip function edit%(arg_s: string, arg_edit_char: string%): string %{ if ( arg_edit_char->Len() != 1 ) builtin_error("not exactly one edit character", @ARG@[1]); const u_char* s = arg_s->Bytes(); const u_char* edit_s = arg_edit_char->Bytes(); u_char edit_c = *edit_s; int n = arg_s->Len(); u_char* new_s = new u_char[n+1]; int ind = 0; for ( int i = 0; i < n; ++i ) { if ( s[i] == edit_c ) { // Delete last character if ( --ind < 0 ) ind = 0; } else new_s[ind++] = s[i]; } new_s[ind] = '\0'; return new StringVal(new BroString(1, byte_vec(new_s), ind)); %} ## Get a substring from a string, given a starting position and length. ## ## s: The string to obtain a substring from. ## ## start: The starting position of the substring in *s*, where 1 is the first ## character. As a special case, 0 also represents the first character. ## ## n: The number of characters to extract, beginning at *start*. ## ## Returns: A substring of *s* of length *n* from position *start*. function sub_bytes%(s: string, start: count, n: int%): string %{ if ( start > 0 ) --start; // make it 0-based BroString* ss = s->AsString()->GetSubstring(start, n); if ( ! ss ) ss = new BroString(""); return new StringVal(ss); %} %%{ static int match_prefix(int s_len, const char* s, int t_len, const char* t) { for ( int i = 0; i < t_len; ++i ) { if ( i >= s_len || s[i] != t[i] ) return 0; } return 1; } Val* do_split(StringVal* str_val, RE_Matcher* re, int incl_sep, int max_num_sep) { TableVal* a = new TableVal(string_array); const u_char* s = str_val->Bytes(); int n = str_val->Len(); const u_char* end_of_s = s + n; int num = 0; int num_sep = 0; int offset = 0; while ( n >= 0 ) { offset = 0; // Find next match offset. int end_of_match = 0; while ( n > 0 && (end_of_match = re->MatchPrefix(s + offset, n)) <= 0 ) { // Move on to next byte. ++offset; --n; } if ( max_num_sep && num_sep >= max_num_sep ) { offset = end_of_s - s; n=0; } Val* ind = new Val(++num, TYPE_COUNT); a->Assign(ind, new StringVal(offset, (const char*) s)); Unref(ind); // No more separators will be needed if this is the end of string. if ( n <= 0 ) break; if ( incl_sep ) { // including the part that matches the pattern ind = new Val(++num, TYPE_COUNT); a->Assign(ind, new StringVal(end_of_match, (const char*) s+offset)); Unref(ind); } if ( max_num_sep && num_sep >= max_num_sep ) break; ++num_sep; n -= end_of_match; s += offset + end_of_match;; if ( s > end_of_s ) reporter->InternalError("RegMatch in split goes beyond the string"); } return a; } Val* do_sub(StringVal* str_val, RE_Matcher* re, StringVal* repl, int do_all) { const u_char* s = str_val->Bytes(); int offset = 0; int n = str_val->Len(); // cut_points is a set of pairs of indices in str that should // be removed/replaced. A pair means "delete starting // at offset x, up to but not including offset y". List(ptr_compat_int) cut_points; // where RE matches pieces of str int size = 0; // size of result while ( n > 0 ) { // Find next match offset. int end_of_match; while ( n > 0 && (end_of_match = re->MatchPrefix(&s[offset], n)) <= 0 ) { // This character is going to be copied to the result. ++size; // Move on to next character. ++offset; --n; } if ( n <= 0 ) break; // s[offset .. offset+end_of_match-1] matches re. cut_points.append(offset); cut_points.append(offset + end_of_match); offset += end_of_match; n -= end_of_match; if ( ! do_all ) { // We've now done the first substitution - finished. // Include the remainder of the string in the result. size += n; break; } } // size now reflects amount of space copied. Factor in amount // of space for replacement text. int num_cut_points = cut_points.length() / 2; size += num_cut_points * repl->Len(); // And a final NUL for good health. ++size; byte_vec result = new u_char[size]; byte_vec r = result; // Copy it all over. int start_offset = 0; for ( int i = 0; i < cut_points.length(); i += 2 /* loop over pairs */ ) { int num_to_copy = cut_points[i] - start_offset; memcpy(r, s + start_offset, num_to_copy); r += num_to_copy; start_offset = cut_points[i+1]; // Now add in replacement text. memcpy(r, repl->Bytes(), repl->Len()); r += repl->Len(); } // Copy final trailing characters. int num_to_copy = str_val->Len() - start_offset; memcpy(r, s + start_offset, num_to_copy); r += num_to_copy; // Final NUL. No need to increment r, since the length // computed from it in the next statement does not include // the NUL. r[0] = '\0'; return new StringVal(new BroString(1, result, r - result)); } %%} ## Splits a string into an array of strings according to a pattern. ## ## str: The string to split. ## ## re: The pattern describing the element separator in *str*. ## ## Returns: An array of strings where each element corresponds to a substring ## in *str* separated by *re*. ## ## .. bro:see:: split1 split_all split_n str_split ## ## .. note:: The returned table starts at index 1. Note that conceptually the ## return value is meant to be a vector and this might change in the ## future. ## function split%(str: string, re: pattern%): string_array %{ return do_split(str, re, 0, 0); %} ## Splits a string *once* into a two-element array of strings according to a ## pattern. This function is the same as :bro:id:`split`, but *str* is only ## split once (if possible) at the earliest position and an array of two strings ## is returned. ## ## str: The string to split. ## ## re: The pattern describing the separator to split *str* in two pieces. ## ## Returns: An array of strings with two elements in which the first represents ## the substring in *str* up to the first occurence of *re*, and the ## second everything after *re*. An array of one string is returned ## when *s* cannot be split. ## ## .. bro:see:: split split_all split_n str_split function split1%(str: string, re: pattern%): string_array %{ return do_split(str, re, 0, 1); %} ## Splits a string into an array of strings according to a pattern. This ## function is the same as :bro:id:`split`, except that the separators are ## returned as well. For example, ``split_all("a-b--cd", /(\-)+/)`` returns ## ``{"a", "-", "b", "--", "cd"}``: odd-indexed elements do not match the ## pattern and even-indexed ones do. ## ## str: The string to split. ## ## re: The pattern describing the element separator in *str*. ## ## Returns: An array of strings where each two successive elements correspond ## to a substring in *str* of the part not matching *re* (odd-indexed) ## and the part that matches *re* (even-indexed). ## ## .. bro:see:: split split1 split_n str_split function split_all%(str: string, re: pattern%): string_array %{ return do_split(str, re, 1, 0); %} ## Splits a string a given number of times into an array of strings according ## to a pattern. This function is similar to :bro:id:`split1` and ## :bro:id:`split_all`, but with customizable behavior with respect to ## including separators in the result and the number of times to split. ## ## str: The string to split. ## ## re: The pattern describing the element separator in *str*. ## ## incl_sep: A flag indicating whether to include the separator matches in the ## result (as in :bro:id:`split_all`). ## ## max_num_sep: The number of times to split *str*. ## ## Returns: An array of strings where, if *incl_sep* is true, each two ## successive elements correspond to a substring in *str* of the part ## not matching *re* (odd-indexed) and the part that matches *re* ## (even-indexed). ## ## .. bro:see:: split split1 split_all str_split function split_n%(str: string, re: pattern, incl_sep: bool, max_num_sep: count%): string_array %{ return do_split(str, re, incl_sep, max_num_sep); %} ## Substitutes a given replacement string for the first occurrence of a pattern ## in a given string. ## ## str: The string to perform the substitution in. ## ## re: The pattern being replaced with *repl*. ## ## repl: The string that replaces *re*. ## ## Returns: A copy of *str* with the first occurence of *re* replaced with ## *repl*. ## ## .. bro:see:: gsub subst_string function sub%(str: string, re: pattern, repl: string%): string %{ return do_sub(str, re, repl, 0); %} ## Substitutes a given replacement string for all occurrences of a pattern ## in a given string. ## ## str: The string to perform the substitution in. ## ## re: The pattern being replaced with *repl*. ## ## repl: The string that replaces *re*. ## ## Returns: A copy of *str* with all occurrences of *re* replaced with *repl*. ## ## .. bro:see:: sub subst_string function gsub%(str: string, re: pattern, repl: string%): string %{ return do_sub(str, re, repl, 1); %} ## Lexicographically compares two strings. ## ## s1: The first string. ## ## s2: The second string. ## ## Returns: An integer greater than, equal to, or less than 0 according as ## *s1* is greater than, equal to, or less than *s2*. function strcmp%(s1: string, s2: string%): int %{ return new Val(Bstr_cmp(s1->AsString(), s2->AsString()), TYPE_INT); %} ## Locates the first occurrence of one string in another. ## ## big: The string to look in. ## ## little: The (smaller) string to find inside *big*. ## ## Returns: The location of *little* in *big*, or 0 if *little* is not found in ## *big*. ## ## .. bro:see:: find_all find_last function strstr%(big: string, little: string%): count %{ return new Val( 1 + big->AsString()->FindSubstring(little->AsString()), TYPE_COUNT); %} ## Substitutes each (non-overlapping) appearance of a string in another. ## ## s: The string in which to perform the substitution. ## ## from: The string to look for which is replaced with *to*. ## ## to: The string that replaces all occurrences of *from* in *s*. ## ## Returns: A copy of *s* where each occurrence of *from* is replaced with *to*. ## ## .. bro:see:: sub gsub function subst_string%(s: string, from: string, to: string%): string %{ const int little_len = from->Len(); if ( little_len == 0 ) return s->Ref(); int big_len = s->Len(); const u_char* big = s->Bytes(); data_chunk_t dc; vector vs; while ( big_len >= little_len ) { int j = strstr_n(big_len, big, little_len, from->Bytes()); if ( j < 0 ) break; if ( j > 0 ) { dc.length = j; dc.data = (const char*) big; vs.push_back(dc); } dc.length = to->Len(); dc.data = (const char*) (to->Bytes()); vs.push_back(dc); j += little_len; big += j; big_len -= j; } if ( big_len > 0 ) { dc.length = big_len; dc.data = (const char*) big; vs.push_back(dc); } return new StringVal(concatenate(vs)); %} ## Replaces all uppercase letters in a string with their lowercase counterpart. ## ## str: The string to convert to lowercase letters. ## ## Returns: A copy of the given string with the uppercase letters (as indicated ## by ``isascii`` and ``isupper``) folded to lowercase ## (via ``tolower``). ## ## .. bro:see:: to_upper is_ascii function to_lower%(str: string%): string %{ const u_char* s = str->Bytes(); int n = str->Len(); u_char* lower_s = new u_char[n + 1]; u_char* ls = lower_s; for ( int i = 0; i < n; ++i) { if ( isascii(s[i]) && isupper(s[i]) ) *ls++ = tolower(s[i]); else *ls++ = s[i]; } *ls++ = '\0'; return new StringVal(new BroString(1, lower_s, n)); %} ## Replaces all lowercase letters in a string with their uppercase counterpart. ## ## str: The string to convert to uppercase letters. ## ## Returns: A copy of the given string with the lowercase letters (as indicated ## by ``isascii`` and ``islower``) folded to uppercase ## (via ``toupper``). ## ## .. bro:see:: to_lower is_ascii function to_upper%(str: string%): string %{ const u_char* s = str->Bytes(); int n = str->Len(); u_char* upper_s = new u_char[n + 1]; u_char* us = upper_s; for ( int i = 0; i < n; ++i) { if ( isascii(s[i]) && islower(s[i]) ) *us++ = toupper(s[i]); else *us++ = s[i]; } *us++ = '\0'; return new StringVal(new BroString(1, upper_s, n)); %} ## Replaces non-printable characters in a string with escaped sequences. The ## mappings are: ## ## - ``NUL`` to ``\0`` ## - ``DEL`` to ``^?`` ## - values <= 26 to ``^[A-Z]`` ## - values not in *[32, 126]* to ``%XX`` ## ## If the string does not yet have a trailing NUL, one is added. ## ## str: The string to escape. ## ## Returns: The escaped string. ## ## .. bro:see:: to_string_literal escape_string function clean%(str: string%): string %{ char* s = str->AsString()->Render(); return new StringVal(new BroString(1, byte_vec(s), strlen(s))); %} ## Replaces non-printable characters in a string with escaped sequences. The ## mappings are: ## ## - ``NUL`` to ``\0`` ## - ``DEL`` to ``^?`` ## - values <= 26 to ``^[A-Z]`` ## - values not in *[32, 126]* to ``%XX`` ## ## str: The string to escape. ## ## Returns: The escaped string. ## ## .. bro:see:: clean escape_string function to_string_literal%(str: string%): string %{ char* s = str->AsString()->Render(BroString::BRO_STRING_LITERAL); return new StringVal(new BroString(1, byte_vec(s), strlen(s))); %} ## Determines whether a given string contains only ASCII characters. ## ## str: The string to examine. ## ## Returns: False if any byte value of *str* is greater than 127, and true ## otherwise. ## ## .. bro:see:: to_upper to_lower function is_ascii%(str: string%): bool %{ int n = str->Len(); const u_char* s = str->Bytes(); for ( int i = 0; i < n; ++i ) if ( s[i] > 127 ) return new Val(0, TYPE_BOOL); return new Val(1, TYPE_BOOL); %} ## Creates a printable version of a string. This function is the same as ## :bro:id:`clean` except that non-printable characters are removed. ## ## s: The string to escape. ## ## Returns: The escaped string. ## ## .. bro:see:: clean to_string_literal function escape_string%(s: string%): string %{ char* escstr = s->AsString()->Render(); Val* val = new StringVal(escstr); delete [] escstr; return val; %} ## Returns an ASCII hexadecimal representation of a string. ## ## s: The string to convert to hex. ## ## Returns: A copy of *s* where each byte is replaced with the corresponding ## hex nibble. function string_to_ascii_hex%(s: string%): string %{ char* x = new char[s->Len() * 2 + 1]; const u_char* sp = s->Bytes(); for ( int i = 0; i < s->Len(); ++i ) sprintf(x + i * 2, "%02x", sp[i]); return new StringVal(new BroString(1, (u_char*) x, s->Len() * 2)); %} ## Uses the Smith-Waterman algorithm to find similar/overlapping substrings. ## See `Wikipedia `__. ## ## s1: The first string. ## ## s2: The second string. ## ## params: Parameters for the Smith-Waterman algorithm. ## ## Returns: The result of the Smith-Waterman algorithm calculation. function str_smith_waterman%(s1: string, s2: string, params: sw_params%) : sw_substring_vec %{ SWParams sw_params(params->AsRecordVal()->Lookup(0)->AsCount(), SWVariant(params->AsRecordVal()->Lookup(1)->AsCount())); BroSubstring::Vec* subseq = smith_waterman(s1->AsString(), s2->AsString(), sw_params); VectorVal* result = BroSubstring::VecToPolicy(subseq); delete_each(subseq); delete subseq; return result; %} ## Splits a string into substrings with the help of an index vector of cutting ## points. ## ## s: The string to split. ## ## idx: The index vector (``vector of count``) with the cutting points. ## ## Returns: A vector of strings. ## ## .. bro:see:: split split1 split_all split_n function str_split%(s: string, idx: index_vec%): string_vec %{ vector* idx_v = idx->AsVector(); BroString::IdxVec indices(idx_v->size()); unsigned int i; for ( i = 0; i < idx_v->size(); i++ ) indices[i] = (*idx_v)[i]->AsCount(); BroString::Vec* result = s->AsString()->Split(indices); VectorVal* result_v = new VectorVal( internal_type("string_vec")->AsVectorType()); if ( result ) { i = 1; for ( BroString::VecIt it = result->begin(); it != result->end(); ++it, ++i ) result_v->Assign(i, new StringVal(*it)); // StringVal now possesses string. delete result; } return result_v; %} ## Strips whitespace at both ends of a string. ## ## str: The string to strip the whitespace from. ## ## Returns: A copy of *str* with leading and trailing whitespace removed. ## ## .. bro:see:: sub gsub function strip%(str: string%): string %{ const u_char* s = str->Bytes(); int n = str->Len(); if ( n == 0 ) // Empty string. return new StringVal(new BroString(s, n, 1)); const u_char* sp = s; // Move a pointer from the end of the string. const u_char* e = sp + n - 1; while ( e > sp && isspace(*e) ) --e; // Move the pointer for the beginning of the string. while ( isspace(*sp) && sp <= e ) ++sp; return new StringVal(new BroString(sp, (e - sp + 1), 1)); %} ## Generates a string of a given size and fills it with repetitions of a source ## string. ## ## len: The length of the output string. ## ## source: The string to concatenate repeatedly until *len* has been reached. ## ## Returns: A string of length *len* filled with *source*. function string_fill%(len: int, source: string%): string %{ const u_char* src = source->Bytes(); int64_t n = source->Len(); char* dst = new char[len]; for ( int i = 0; i < len; i += n ) ::memcpy((dst + i), src, min(n, len - i)); dst[len - 1] = 0; return new StringVal(new BroString(1, byte_vec(dst), len)); %} ## Takes a string and escapes characters that would allow execution of ## commands at the shell level. Must be used before including strings in ## :bro:id:`system` or similar calls. ## ## source: The string to escape. ## ## Returns: A shell-escaped version of *source*. ## ## .. bro:see:: system function str_shell_escape%(source: string%): string %{ unsigned j = 0; const u_char* src = source->Bytes(); unsigned n = source->Len(); byte_vec dst = new u_char[n * 2 + 1]; for ( unsigned i = 0; i < n; ++i ) { switch ( src[i] ) { case '`': case '"': case '\\': case '$': // case '|': case '&': case ';': case '(': case ')': case '<': // case '>': case '\'': case '*': case '?': case '[': case ']': // case '!': case '#': case '{': case '}': dst[j++] = '\\'; break; default: break; } dst[j++] = src[i]; } dst[j] = '\0'; return new StringVal(new BroString(1, dst, j)); %} ## Finds all occurrences of a pattern in a string. ## ## str: The string to inspect. ## ## re: The pattern to look for in *str*. ## ## Returns: The set of strings in *str* that match *re*, or the empty set. ## ## .. bro:see: find_last strstr function find_all%(str: string, re: pattern%) : string_set %{ TableVal* a = new TableVal(string_set); const u_char* s = str->Bytes(); const u_char* e = s + str->Len(); for ( const u_char* t = s; t < e; ++t ) { int n = re->MatchPrefix(t, e - t); if ( n >= 0 ) { a->Assign(new StringVal(n, (const char*) t), 0); t += n - 1; } } return a; %} ## Finds the last occurrence of a pattern in a string. This function returns ## the match that starts at the largest index in the string, which is not ## necessarily the longest match. For example, a pattern of ``/.*/`` will ## return the final character in the string. ## ## str: The string to inspect. ## ## re: The pattern to look for in *str*. ## ## Returns: The last string in *str* that matches *re*, or the empty string. ## ## .. bro:see: find_all strstr function find_last%(str: string, re: pattern%) : string %{ const u_char* s = str->Bytes(); const u_char* e = s + str->Len(); for ( const u_char* t = e - 1; t >= s; --t ) { int n = re->MatchPrefix(t, e - t); if ( n >= 0 ) return new StringVal(n, (const char*) t); } return new StringVal(""); %} ## Returns a hex dump for given input data. The hex dump renders 16 bytes per ## line, with hex on the left and ASCII (where printable) ## on the right. ## ## data_str: The string to dump in hex format. ## ## Returns: The hex dump of the given string. ## ## .. bro:see:: string_to_ascii_hex bytestring_to_hexstr ## ## .. note:: Based on Netdude's hex editor code. ## function hexdump%(data_str: string%) : string %{ // The width of a line of text in the hex-mode view, consisting // of offset, hex view and ASCII view: // // 32 + 16 characters per 8 bytes, twice // (2*7) + Single space between bytes, twice // 4 + Two spaces between 8-byte sets and ASCII // 1 + For newline // 17 + For ASCII display, with spacer column // 6 For 5-digit offset counter, including spacer // #define HEX_LINE_WIDTH 74 #define HEX_LINE_START 6 #define HEX_LINE_END 53 #define HEX_LINE_START_ASCII 56 #define HEX_LINE_START_RIGHT_ASCII 65 #define HEX_LINE_LEFT_MIDDLE 28 #define HEX_LINE_RIGHT_MIDDLE 31 #define HEX_BLOCK_LEN 23 #define HEX_LINE_BYTES 16 #define NULL_CHAR '.' #define NONPRINT_CHAR '.' const u_char* data = data_str->Bytes(); unsigned data_size = data_str->Len(); if ( ! data ) return new StringVal(""); int num_lines = (data_size / 16) + 1; int len = num_lines * HEX_LINE_WIDTH; u_char* hex_data = new u_char[len + 1]; if ( ! hex_data ) return new StringVal(""); memset(hex_data, ' ', len); u_char* hex_data_ptr = hex_data; u_char* ascii_ptr = hex_data_ptr + 50; int x = 0, y = 0; for ( const u_char* data_ptr = data; data_ptr < data + data_size; ++data_ptr ) { if ( x == 0 ) { char offset[5]; safe_snprintf(offset, sizeof(offset), "%.4x", data_ptr - data); memcpy(hex_data_ptr, offset, 4); hex_data_ptr += 6; ascii_ptr = hex_data_ptr + 50; } char hex_byte[3]; safe_snprintf(hex_byte, sizeof(hex_byte), "%.2x", (u_char) *data_ptr); int val = (u_char) *data_ptr; u_char ascii_byte = val; // If unprintable, use special characters: if ( val < 0x20 || val >= 0x7f ) { if ( val == 0 ) ascii_byte = NULL_CHAR; else ascii_byte = NONPRINT_CHAR; } *hex_data_ptr++ = hex_byte[0]; *hex_data_ptr++ = hex_byte[1]; *hex_data_ptr++ = ' '; *ascii_ptr++ = ascii_byte; if ( x == 7 ) { *hex_data_ptr++ = ' '; *ascii_ptr++ = ' '; } ++x; if ( x == 16 ) { x = 0; *ascii_ptr++ = '\n'; hex_data_ptr = ascii_ptr; } } // Terminate the string, but ensure it ends with a newline. if ( ascii_ptr[-1] != '\n' ) *ascii_ptr++ = '\n'; *ascii_ptr = 0; StringVal* result = new StringVal((const char*) hex_data); delete [] hex_data; return result; %} ## Returns a reversed copy of the string ## ## str: The string to reverse. ## ## Returns: A reversed copy of *str* ## function reverse%(str: string%) : string %{ string s = string((const char*)str->Bytes(), str->Len()); reverse(s.begin(), s.end()); return new StringVal(s.length(), (const char*)s.c_str()); %}