##! Definitions of built-in functions related to string processing and ##! manipulation. %%{ // C segment #include #include using namespace std; #include "SmithWaterman.h" %%} ## Calculates the Levenshtein distance between the two strings. See `Wikipedia ## `__ for more information. ## ## s1: The first string. ## ## s2: The second string. ## ## Returns: The Levenshtein distance of two strings as a count. ## function levenshtein_distance%(s1: string, s2: string%): count %{ unsigned int n = s1->Len(); unsigned int m = s2->Len(); if ( ! n ) return val_mgr->GetCount(m); if ( ! m ) return val_mgr->GetCount(n); vector > d(n + 1, vector(m + 1)); d[0][0] = 0; for ( unsigned int i = 1; i <= n; ++i ) d[i][0] = i; for ( unsigned int i = 1; i <= m; ++i ) d[0][i] = i; for ( unsigned int i = 1; i <= n; ++i ) { for ( unsigned int j = 1; j <= m; ++j ) d[i][j] = min(min(d[i-1][j] + 1, d[i][j-1] + 1), d[i-1][j-1] + (s1->Bytes()[i-1] == s2->Bytes()[j-1] ? 0 : 1)); } return val_mgr->GetCount(d[n][m]); %} ## Concatenates all arguments into a single string. The function takes a ## variable number of arguments of type string and stitches them together. ## ## Returns: The concatenation of all (string) arguments. ## ## .. zeek:see:: cat cat_sep ## fmt ## join_string_vec function string_cat%(...%): string %{ int n = 0; for ( const auto& a : @ARG@ ) n += a->AsString()->Len(); u_char* b = new u_char[n+1]; BroString* s = new BroString(1, b, n); for ( const auto& a : @ARG@ ) { const BroString* s = a->AsString(); memcpy(b, s->Bytes(), s->Len()); b += s->Len(); } *b = 0; return new StringVal(s); %} %%{ int string_array_to_vs(TableVal* tbl, int start, int end, vector& vs) { vs.clear(); for ( int i = start; i <= end; ++i ) { Val* ind = val_mgr->GetCount(i); Val* v = tbl->Lookup(ind); if ( ! v ) return 0; vs.push_back(v->AsString()); #if 0 char* str = v->AsString()->Render(); DEBUG_MSG("string_array[%d] = \"%s\"\n", i, str); delete [] str; #endif Unref(ind); } return 1; } int vs_to_string_array(vector& vs, TableVal* tbl, int start, int end) { for ( int i = start, j = 0; i <= end; ++i, ++j ) { Val* ind = val_mgr->GetCount(i); tbl->Assign(ind, make_intrusive(vs[j]->Len(), (const char *)vs[j]->Bytes())); Unref(ind); } return 1; } %%} ## Joins all values in the given vector of strings with a separator placed ## between each element. ## ## sep: The separator to place between each element. ## ## vec: The :zeek:type:`string_vec` (``vector of string``). ## ## Returns: The concatenation of all elements in *vec*, with *sep* placed ## between each element. ## ## .. zeek:see:: cat cat_sep string_cat ## fmt function join_string_vec%(vec: string_vec, sep: string%): string %{ ODesc d; d.SetStyle(RAW_STYLE); VectorVal *v = vec->AsVectorVal(); for ( unsigned i = 0; i < v->Size(); ++i ) { if ( i > 0 ) d.Add(sep->CheckString(), 0); Val* e = v->Lookup(i); // If the element is empty, skip it. if ( ! e ) continue; e->Describe(&d); } BroString* s = new BroString(1, d.TakeBytes(), d.Len()); s->SetUseFreeToDelete(true); return new StringVal(s); %} ## Returns an edited version of a string that applies a special ## "backspace character" (usually ``\x08`` for backspace or ``\x7f`` for DEL). ## For example, ``edit("hello there", "e")`` returns ``"llo t"``. ## ## arg_s: The string to edit. ## ## arg_edit_char: A string of exactly one character that represents the ## "backspace character". If it is longer than one character Zeek ## generates a run-time error and uses the first character in ## the string. ## ## Returns: An edited version of *arg_s* where *arg_edit_char* triggers the ## deletion of the last character. ## ## .. zeek:see:: clean ## to_string_literal ## escape_string ## strip function edit%(arg_s: string, arg_edit_char: string%): string %{ if ( arg_edit_char->Len() != 1 ) builtin_error("not exactly one edit character", @ARG@[1]); const u_char* s = arg_s->Bytes(); const u_char* edit_s = arg_edit_char->Bytes(); u_char edit_c = *edit_s; int n = arg_s->Len(); u_char* new_s = new u_char[n+1]; int ind = 0; for ( int i = 0; i < n; ++i ) { if ( s[i] == edit_c ) { // Delete last character if ( --ind < 0 ) ind = 0; } else new_s[ind++] = s[i]; } new_s[ind] = '\0'; return new StringVal(new BroString(1, byte_vec(new_s), ind)); %} ## Get a substring from a string, given a starting position and length. ## ## s: The string to obtain a substring from. ## ## start: The starting position of the substring in *s*, where 1 is the first ## character. As a special case, 0 also represents the first character. ## ## n: The number of characters to extract, beginning at *start*. ## ## Returns: A substring of *s* of length *n* from position *start*. function sub_bytes%(s: string, start: count, n: int%): string %{ if ( start > 0 ) --start; // make it 0-based BroString* ss = s->AsString()->GetSubstring(start, n); if ( ! ss ) ss = new BroString(""); return new StringVal(ss); %} %%{ static int match_prefix(int s_len, const char* s, int t_len, const char* t) { for ( int i = 0; i < t_len; ++i ) { if ( i >= s_len || s[i] != t[i] ) return 0; } return 1; } VectorVal* do_split_string(StringVal* str_val, RE_Matcher* re, int incl_sep, int max_num_sep) { // string_vec is used early in the version script - do not use the NetVar. VectorVal* rval = new VectorVal(internal_type("string_vec")->AsVectorType()); const u_char* s = str_val->Bytes(); int n = str_val->Len(); const u_char* end_of_s = s + n; int num = 0; int num_sep = 0; int offset = 0; while ( n >= 0 ) { offset = 0; // Find next match offset. int end_of_match = 0; while ( n > 0 && (end_of_match = re->MatchPrefix(s + offset, n)) <= 0 ) { // Move on to next byte. ++offset; --n; } if ( max_num_sep && num_sep >= max_num_sep ) { offset = end_of_s - s; n=0; } rval->Assign(num++, make_intrusive(offset, (const char*) s)); // No more separators will be needed if this is the end of string. if ( n <= 0 ) break; if ( incl_sep ) { // including the part that matches the pattern rval->Assign(num++, make_intrusive(end_of_match, (const char*) s+offset)); } if ( max_num_sep && num_sep >= max_num_sep ) break; ++num_sep; n -= end_of_match; s += offset + end_of_match;; if ( s > end_of_s ) reporter->InternalError("RegMatch in split goes beyond the string"); } return rval; } Val* do_split(StringVal* str_val, RE_Matcher* re, int incl_sep, int max_num_sep) { TableVal* a = new TableVal(string_array); const u_char* s = str_val->Bytes(); int n = str_val->Len(); const u_char* end_of_s = s + n; int num = 0; int num_sep = 0; int offset = 0; while ( n >= 0 ) { offset = 0; // Find next match offset. int end_of_match = 0; while ( n > 0 && (end_of_match = re->MatchPrefix(s + offset, n)) <= 0 ) { // Move on to next byte. ++offset; --n; } if ( max_num_sep && num_sep >= max_num_sep ) { offset = end_of_s - s; n=0; } Val* ind = val_mgr->GetCount(++num); a->Assign(ind, make_intrusive(offset, (const char*) s)); Unref(ind); // No more separators will be needed if this is the end of string. if ( n <= 0 ) break; if ( incl_sep ) { // including the part that matches the pattern ind = val_mgr->GetCount(++num); a->Assign(ind, make_intrusive(end_of_match, (const char*) s+offset)); Unref(ind); } if ( max_num_sep && num_sep >= max_num_sep ) break; ++num_sep; n -= end_of_match; s += offset + end_of_match;; if ( s > end_of_s ) reporter->InternalError("RegMatch in split goes beyond the string"); } return a; } %%} ## Splits a string into an array of strings according to a pattern. ## ## str: The string to split. ## ## re: The pattern describing the element separator in *str*. ## ## Returns: An array of strings where each element corresponds to a substring ## in *str* separated by *re*. ## ## .. zeek:see:: split_string1 split_string_all split_string_n str_split ## function split_string%(str: string, re: pattern%): string_vec %{ return do_split_string(str, re, 0, 0); %} ## Splits a string *once* into a two-element array of strings according to a ## pattern. This function is the same as :zeek:id:`split_string`, but *str* is ## only split once (if possible) at the earliest position and an array of two ## strings is returned. ## ## str: The string to split. ## ## re: The pattern describing the separator to split *str* in two pieces. ## ## Returns: An array of strings with two elements in which the first represents ## the substring in *str* up to the first occurence of *re*, and the ## second everything after *re*. An array of one string is returned ## when *s* cannot be split. ## ## .. zeek:see:: split_string split_string_all split_string_n str_split function split_string1%(str: string, re: pattern%): string_vec %{ return do_split_string(str, re, 0, 1); %} ## Splits a string into an array of strings according to a pattern. This ## function is the same as :zeek:id:`split_string`, except that the separators ## are returned as well. For example, ``split_string_all("a-b--cd", /(\-)+/)`` ## returns ``{"a", "-", "b", "--", "cd"}``: odd-indexed elements do match the ## pattern and even-indexed ones do not. ## ## str: The string to split. ## ## re: The pattern describing the element separator in *str*. ## ## Returns: An array of strings where each two successive elements correspond ## to a substring in *str* of the part not matching *re* (even-indexed) ## and the part that matches *re* (odd-indexed). ## ## .. zeek:see:: split_string split_string1 split_string_n str_split function split_string_all%(str: string, re: pattern%): string_vec %{ return do_split_string(str, re, 1, 0); %} ## Splits a string a given number of times into an array of strings according ## to a pattern. This function is similar to :zeek:id:`split_string1` and ## :zeek:id:`split_string_all`, but with customizable behavior with respect to ## including separators in the result and the number of times to split. ## ## str: The string to split. ## ## re: The pattern describing the element separator in *str*. ## ## incl_sep: A flag indicating whether to include the separator matches in the ## result (as in :zeek:id:`split_string_all`). ## ## max_num_sep: The number of times to split *str*. ## ## Returns: An array of strings where, if *incl_sep* is true, each two ## successive elements correspond to a substring in *str* of the part ## not matching *re* (even-indexed) and the part that matches *re* ## (odd-indexed). ## ## .. zeek:see:: split_string split_string1 split_string_all str_split function split_string_n%(str: string, re: pattern, incl_sep: bool, max_num_sep: count%): string_vec %{ return do_split_string(str, re, incl_sep, max_num_sep); %} ## Substitutes a given replacement string for the first occurrence of a pattern ## in a given string. ## ## str: The string to perform the substitution in. ## ## re: The pattern being replaced with *repl*. ## ## repl: The string that replaces *re*. ## ## Returns: A copy of *str* with the first occurence of *re* replaced with ## *repl*. ## ## .. zeek:see:: gsub subst_string function sub%(str: string, re: pattern, repl: string%): string %{ return str->Substitute(re, repl, false); %} ## Substitutes a given replacement string for all occurrences of a pattern ## in a given string. ## ## str: The string to perform the substitution in. ## ## re: The pattern being replaced with *repl*. ## ## repl: The string that replaces *re*. ## ## Returns: A copy of *str* with all occurrences of *re* replaced with *repl*. ## ## .. zeek:see:: sub subst_string function gsub%(str: string, re: pattern, repl: string%): string %{ return str->Substitute(re, repl, true); %} ## Lexicographically compares two strings. ## ## s1: The first string. ## ## s2: The second string. ## ## Returns: An integer greater than, equal to, or less than 0 according as ## *s1* is greater than, equal to, or less than *s2*. function strcmp%(s1: string, s2: string%): int %{ return val_mgr->GetInt(Bstr_cmp(s1->AsString(), s2->AsString())); %} ## Locates the first occurrence of one string in another. ## ## big: The string to look in. ## ## little: The (smaller) string to find inside *big*. ## ## Returns: The location of *little* in *big*, or 0 if *little* is not found in ## *big*. ## ## .. zeek:see:: find_all find_last function strstr%(big: string, little: string%): count %{ return val_mgr->GetCount( 1 + big->AsString()->FindSubstring(little->AsString())); %} ## Substitutes each (non-overlapping) appearance of a string in another. ## ## s: The string in which to perform the substitution. ## ## from: The string to look for which is replaced with *to*. ## ## to: The string that replaces all occurrences of *from* in *s*. ## ## Returns: A copy of *s* where each occurrence of *from* is replaced with *to*. ## ## .. zeek:see:: sub gsub function subst_string%(s: string, from: string, to: string%): string %{ const int little_len = from->Len(); if ( little_len == 0 ) return s->Ref(); int big_len = s->Len(); const u_char* big = s->Bytes(); data_chunk_t dc; vector vs; while ( big_len >= little_len ) { int j = strstr_n(big_len, big, little_len, from->Bytes()); if ( j < 0 ) break; if ( j > 0 ) { dc.length = j; dc.data = (const char*) big; vs.push_back(dc); } dc.length = to->Len(); dc.data = (const char*) (to->Bytes()); vs.push_back(dc); j += little_len; big += j; big_len -= j; } if ( big_len > 0 ) { dc.length = big_len; dc.data = (const char*) big; vs.push_back(dc); } return new StringVal(concatenate(vs)); %} ## Replaces all uppercase letters in a string with their lowercase counterpart. ## ## str: The string to convert to lowercase letters. ## ## Returns: A copy of the given string with the uppercase letters (as indicated ## by ``isascii`` and ``isupper``) folded to lowercase ## (via ``tolower``). ## ## .. zeek:see:: to_upper is_ascii function to_lower%(str: string%): string %{ const u_char* s = str->Bytes(); int n = str->Len(); u_char* lower_s = new u_char[n + 1]; u_char* ls = lower_s; for ( int i = 0; i < n; ++i) { if ( isascii(s[i]) && isupper(s[i]) ) *ls++ = tolower(s[i]); else *ls++ = s[i]; } *ls++ = '\0'; return new StringVal(new BroString(1, lower_s, n)); %} ## Replaces all lowercase letters in a string with their uppercase counterpart. ## ## str: The string to convert to uppercase letters. ## ## Returns: A copy of the given string with the lowercase letters (as indicated ## by ``isascii`` and ``islower``) folded to uppercase ## (via ``toupper``). ## ## .. zeek:see:: to_lower is_ascii function to_upper%(str: string%): string %{ const u_char* s = str->Bytes(); int n = str->Len(); u_char* upper_s = new u_char[n + 1]; u_char* us = upper_s; for ( int i = 0; i < n; ++i) { if ( isascii(s[i]) && islower(s[i]) ) *us++ = toupper(s[i]); else *us++ = s[i]; } *us++ = '\0'; return new StringVal(new BroString(1, upper_s, n)); %} ## Replaces non-printable characters in a string with escaped sequences. The ## mappings are: ## ## - values not in *[32, 126]* to ``\xXX`` ## ## If the string does not yet have a trailing NUL, one is added internally. ## ## In contrast to :zeek:id:`escape_string`, this encoding is *not* fully reversible.` ## ## str: The string to escape. ## ## Returns: The escaped string. ## ## .. zeek:see:: to_string_literal escape_string function clean%(str: string%): string %{ char* s = str->AsString()->Render(); return new StringVal(new BroString(1, byte_vec(s), strlen(s))); %} ## Replaces non-printable characters in a string with escaped sequences. The ## mappings are: ## ## - values not in *[32, 126]* to ``\xXX`` ## - ``\`` to ``\\`` ## - ``'`` and ``""`` to ``\'`` and ``\"``, respectively. ## ## str: The string to escape. ## ## Returns: The escaped string. ## ## .. zeek:see:: clean escape_string function to_string_literal%(str: string%): string %{ char* s = str->AsString()->Render(BroString::BRO_STRING_LITERAL); return new StringVal(new BroString(1, byte_vec(s), strlen(s))); %} ## Determines whether a given string contains only ASCII characters. ## ## str: The string to examine. ## ## Returns: False if any byte value of *str* is greater than 127, and true ## otherwise. ## ## .. zeek:see:: to_upper to_lower function is_ascii%(str: string%): bool %{ int n = str->Len(); const u_char* s = str->Bytes(); for ( int i = 0; i < n; ++i ) if ( s[i] > 127 ) return val_mgr->GetBool(0); return val_mgr->GetBool(1); %} ## Replaces non-printable characters in a string with escaped sequences. The ## mappings are: ## ## - values not in *[32, 126]* to ``\xXX`` ## - ``\`` to ``\\`` ## ## In contrast to :zeek:id:`clean`, this encoding is fully reversible.` ## ## str: The string to escape. ## ## Returns: The escaped string. ## ## .. zeek:see:: clean to_string_literal function escape_string%(s: string%): string %{ char* escstr = s->AsString()->Render(BroString::ESC_HEX | BroString::ESC_ESC); Val* val = new StringVal(escstr); delete [] escstr; return val; %} ## Returns an ASCII hexadecimal representation of a string. ## ## s: The string to convert to hex. ## ## Returns: A copy of *s* where each byte is replaced with the corresponding ## hex nibble. function string_to_ascii_hex%(s: string%): string %{ char* x = new char[s->Len() * 2 + 1]; const u_char* sp = s->Bytes(); for ( int i = 0; i < s->Len(); ++i ) sprintf(x + i * 2, "%02x", sp[i]); return new StringVal(new BroString(1, (u_char*) x, s->Len() * 2)); %} ## Uses the Smith-Waterman algorithm to find similar/overlapping substrings. ## See `Wikipedia `__. ## ## s1: The first string. ## ## s2: The second string. ## ## params: Parameters for the Smith-Waterman algorithm. ## ## Returns: The result of the Smith-Waterman algorithm calculation. function str_smith_waterman%(s1: string, s2: string, params: sw_params%) : sw_substring_vec %{ SWParams sw_params(params->AsRecordVal()->Lookup(0)->AsCount(), SWVariant(params->AsRecordVal()->Lookup(1)->AsCount())); BroSubstring::Vec* subseq = smith_waterman(s1->AsString(), s2->AsString(), sw_params); VectorVal* result = BroSubstring::VecToPolicy(subseq); delete_each(subseq); delete subseq; return result; %} ## Splits a string into substrings with the help of an index vector of cutting ## points. ## ## s: The string to split. ## ## idx: The index vector (``vector of count``) with the cutting points. ## ## Returns: A vector of strings. ## ## .. zeek:see:: split_string split_string1 split_string_all split_string_n function str_split%(s: string, idx: index_vec%): string_vec %{ vector* idx_v = idx->AsVector(); BroString::IdxVec indices(idx_v->size()); unsigned int i; for ( i = 0; i < idx_v->size(); i++ ) indices[i] = (*idx_v)[i]->AsCount(); BroString::Vec* result = s->AsString()->Split(indices); VectorVal* result_v = new VectorVal( internal_type("string_vec")->AsVectorType()); if ( result ) { i = 1; for ( BroString::VecIt it = result->begin(); it != result->end(); ++it, ++i ) result_v->Assign(i, make_intrusive(*it)); // StringVal now possesses string. delete result; } return result_v; %} ## Strips whitespace at both ends of a string. ## ## str: The string to strip the whitespace from. ## ## Returns: A copy of *str* with leading and trailing whitespace removed. ## ## .. zeek:see:: sub gsub lstrip rstrip function strip%(str: string%): string %{ const u_char* s = str->Bytes(); int n = str->Len(); if ( n == 0 ) // Empty string. return new StringVal(new BroString(s, n, 1)); const u_char* sp = s; // Move a pointer from the end of the string. const u_char* e = sp + n - 1; while ( e > sp && isspace(*e) ) --e; // Move the pointer for the beginning of the string. while ( isspace(*sp) && sp <= e ) ++sp; return new StringVal(new BroString(sp, (e - sp + 1), 1)); %} %%{ static bool should_strip(u_char c, const BroString* strip_chars) { auto strip_bytes = strip_chars->Bytes(); for ( auto i = 0; i < strip_chars->Len(); ++i ) if ( c == strip_bytes[i] ) return true; return false; } %%} ## Removes all combinations of characters in the *chars* argument ## starting at the beginning of the string until first mismatch. ## ## str: The string to strip characters from. ## ## chars: A string consisting of the characters to be removed. ## Defaults to all whitespace characters. ## ## Returns: A copy of *str* with the characters in *chars* removed from ## the beginning. ## ## .. zeek:see:: sub gsub strip rstrip function lstrip%(str: string, chars: string &default=" \t\n\r\v\f"%): string %{ const u_char* s = str->Bytes(); int n = str->Len(); // empty input string if ( n == 0 ) return new StringVal(new BroString(s, n, 1)); int i; auto bs_chars = chars->AsString(); for ( i = 0; i < n; ++i ) if ( ! should_strip(s[i], bs_chars) ) break; return new StringVal(new BroString(s + i, n - i, 1)); %} ## Removes all combinations of characters in the *chars* argument ## starting at the end of the string until first mismatch. ## ## str: The string to strip characters from. ## ## chars: A string consisting of the characters to be removed. ## Defaults to all whitespace characters. ## ## Returns: A copy of *str* with the characters in *chars* removed from ## the end. ## ## .. zeek:see:: sub gsub strip lstrip function rstrip%(str: string, chars: string &default=" \t\n\r\v\f"%): string %{ const u_char* s = str->Bytes(); int n = str->Len(); // empty input string if ( n == 0 ) return new StringVal(new BroString(s, n, 1)); int n_to_remove; auto bs_chars = chars->AsString(); for ( n_to_remove = 0; n_to_remove < n; ++n_to_remove ) if ( ! should_strip(s[n - n_to_remove - 1], bs_chars) ) break; return new StringVal(new BroString(s, n - n_to_remove, 1)); %} ## Generates a string of a given size and fills it with repetitions of a source ## string. ## ## len: The length of the output string. ## ## source: The string to concatenate repeatedly until *len* has been reached. ## ## Returns: A string of length *len* filled with *source*. function string_fill%(len: int, source: string%): string %{ const u_char* src = source->Bytes(); int64_t n = source->Len(); char* dst = new char[len]; for ( int i = 0; i < len; i += n ) ::memcpy((dst + i), src, min(n, len - i)); dst[len - 1] = 0; return new StringVal(new BroString(1, byte_vec(dst), len)); %} ## Takes a string and escapes characters that would allow execution of ## commands at the shell level. Must be used before including strings in ## :zeek:id:`system` or similar calls. ## ## source: The string to escape. ## ## Returns: A shell-escaped version of *source*. Specifically, this ## backslash-escapes characters whose literal value is not otherwise ## preserved by enclosure in double-quotes (dollar-sign, backquote, ## backslash, and double-quote itself), and then encloses that ## backslash-escaped string in double-quotes to ultimately preserve ## the literal value of all input characters. ## ## .. zeek:see:: system safe_shell_quote function safe_shell_quote%(source: string%): string %{ unsigned j = 0; const u_char* src = source->Bytes(); unsigned n = source->Len(); byte_vec dst = new u_char[n * 2 + 1 + 2]; dst[j++] = '"'; for ( unsigned i = 0; i < n; ++i ) { switch ( src[i] ) { case '`': case '"': case '\\': case '$': dst[j++] = '\\'; break; default: break; } dst[j++] = src[i]; } dst[j++] = '"'; dst[j] = '\0'; return new StringVal(new BroString(1, dst, j)); %} ## Finds all occurrences of a pattern in a string. ## ## str: The string to inspect. ## ## re: The pattern to look for in *str*. ## ## Returns: The set of strings in *str* that match *re*, or the empty set. ## ## .. zeek:see: find_last strstr function find_all%(str: string, re: pattern%) : string_set %{ TableVal* a = new TableVal(string_set); const u_char* s = str->Bytes(); const u_char* e = s + str->Len(); for ( const u_char* t = s; t < e; ++t ) { int n = re->MatchPrefix(t, e - t); if ( n >= 0 ) { Val* idx = new StringVal(n, (const char*) t); a->Assign(idx, 0); Unref(idx); t += n - 1; } } return a; %} ## Finds the last occurrence of a pattern in a string. This function returns ## the match that starts at the largest index in the string, which is not ## necessarily the longest match. For example, a pattern of ``/.*/`` will ## return the final character in the string. ## ## str: The string to inspect. ## ## re: The pattern to look for in *str*. ## ## Returns: The last string in *str* that matches *re*, or the empty string. ## ## .. zeek:see: find_all strstr function find_last%(str: string, re: pattern%) : string %{ const u_char* s = str->Bytes(); const u_char* e = s + str->Len(); for ( const u_char* t = e - 1; t >= s; --t ) { int n = re->MatchPrefix(t, e - t); if ( n >= 0 ) return new StringVal(n, (const char*) t); } return val_mgr->GetEmptyString(); %} ## Returns a hex dump for given input data. The hex dump renders 16 bytes per ## line, with hex on the left and ASCII (where printable) ## on the right. ## ## data_str: The string to dump in hex format. ## ## Returns: The hex dump of the given string. ## ## .. zeek:see:: string_to_ascii_hex bytestring_to_hexstr ## ## .. note:: Based on Netdude's hex editor code. ## function hexdump%(data_str: string%) : string %{ // The width of a line of text in the hex-mode view, consisting // of offset, hex view and ASCII view: // // 32 + 16 characters per 8 bytes, twice // (2*7) + Single space between bytes, twice // 4 + Two spaces between 8-byte sets and ASCII // 1 + For newline // 17 + For ASCII display, with spacer column // 6 For 5-digit offset counter, including spacer // #define HEX_LINE_WIDTH 74 #define HEX_LINE_START 6 #define HEX_LINE_END 53 #define HEX_LINE_START_ASCII 56 #define HEX_LINE_START_RIGHT_ASCII 65 #define HEX_LINE_LEFT_MIDDLE 28 #define HEX_LINE_RIGHT_MIDDLE 31 #define HEX_BLOCK_LEN 23 #define HEX_LINE_BYTES 16 #define NULL_CHAR '.' #define NONPRINT_CHAR '.' const u_char* data = data_str->Bytes(); unsigned data_size = data_str->Len(); if ( ! data ) return val_mgr->GetEmptyString(); int num_lines = (data_size / 16) + 1; int len = num_lines * HEX_LINE_WIDTH; u_char* hex_data = new u_char[len + 1]; if ( ! hex_data ) return val_mgr->GetEmptyString(); memset(hex_data, ' ', len); u_char* hex_data_ptr = hex_data; u_char* ascii_ptr = hex_data_ptr + 50; int x = 0, y = 0; for ( const u_char* data_ptr = data; data_ptr < data + data_size; ++data_ptr ) { if ( x == 0 ) { char offset[5]; snprintf(offset, sizeof(offset), "%.4tx", data_ptr - data); memcpy(hex_data_ptr, offset, 4); hex_data_ptr += 6; ascii_ptr = hex_data_ptr + 50; } char hex_byte[3]; snprintf(hex_byte, sizeof(hex_byte), "%.2x", (u_char) *data_ptr); int val = (u_char) *data_ptr; u_char ascii_byte = val; // If unprintable, use special characters: if ( val < 0x20 || val >= 0x7f ) { if ( val == 0 ) ascii_byte = NULL_CHAR; else ascii_byte = NONPRINT_CHAR; } *hex_data_ptr++ = hex_byte[0]; *hex_data_ptr++ = hex_byte[1]; *hex_data_ptr++ = ' '; *ascii_ptr++ = ascii_byte; if ( x == 7 ) { *hex_data_ptr++ = ' '; *ascii_ptr++ = ' '; } ++x; if ( x == 16 ) { x = 0; *ascii_ptr++ = '\n'; hex_data_ptr = ascii_ptr; } } // Terminate the string, but ensure it ends with a newline. if ( ascii_ptr[-1] != '\n' ) *ascii_ptr++ = '\n'; *ascii_ptr = 0; StringVal* result = new StringVal((const char*) hex_data); delete [] hex_data; return result; %} ## Returns a reversed copy of the string ## ## str: The string to reverse. ## ## Returns: A reversed copy of *str* ## function reverse%(str: string%) : string %{ string s = string((const char*)str->Bytes(), str->Len()); reverse(s.begin(), s.end()); return new StringVal(s.length(), (const char*)s.c_str()); %}