mirror of
https://github.com/zeek/zeek.git
synced 2025-10-11 19:18:19 +00:00

With the introduction of the package manager, it will get more common that applications are able to get information about the currently running version of Bro. With this commit, scripts can easily compare which version of Bro they are running. Commonly, this probably will either look like this (both lines check if the current Bro version is greater or equal to 2.5) @if ( Version::num >= 20500 ) or @if ( Version::greater_equal("2.5") ) Version::info contains detailed information about the running version of Bro, including beta flags, etc.
1331 lines
35 KiB
Text
1331 lines
35 KiB
Text
##! Definitions of built-in functions related to string processing and
|
|
##! manipulation.
|
|
|
|
|
|
%%{ // C segment
|
|
#include <vector>
|
|
#include <algorithm>
|
|
using namespace std;
|
|
|
|
#include "SmithWaterman.h"
|
|
%%}
|
|
|
|
## Calculates the Levenshtein distance between the two strings. See `Wikipedia
|
|
## <http://en.wikipedia.org/wiki/Levenshtein_distance>`__ for more information.
|
|
##
|
|
## s1: The first string.
|
|
##
|
|
## s2: The second string.
|
|
##
|
|
## Returns: The Levenshtein distance of two strings as a count.
|
|
##
|
|
function levenshtein_distance%(s1: string, s2: string%): count
|
|
%{
|
|
unsigned int n = s1->Len();
|
|
unsigned int m = s2->Len();
|
|
|
|
if ( ! n )
|
|
return new Val(m, TYPE_COUNT);
|
|
|
|
if ( ! m )
|
|
return new Val(n, TYPE_COUNT);
|
|
|
|
vector<vector<unsigned int> > d(n + 1, vector<unsigned int>(m + 1));
|
|
|
|
d[0][0] = 0;
|
|
|
|
for ( unsigned int i = 1; i <= n; ++i )
|
|
d[i][0] = i;
|
|
|
|
for ( unsigned int i = 1; i <= m; ++i )
|
|
d[0][i] = i;
|
|
|
|
for ( unsigned int i = 1; i <= n; ++i )
|
|
{
|
|
for ( unsigned int j = 1; j <= m; ++j )
|
|
d[i][j] = min(min(d[i-1][j] + 1, d[i][j-1] + 1),
|
|
d[i-1][j-1] + (s1->Bytes()[i-1] == s2->Bytes()[j-1] ? 0 : 1));
|
|
}
|
|
|
|
return new Val(d[n][m], TYPE_COUNT);
|
|
%}
|
|
|
|
## Concatenates all arguments into a single string. The function takes a
|
|
## variable number of arguments of type string and stitches them together.
|
|
##
|
|
## Returns: The concatenation of all (string) arguments.
|
|
##
|
|
## .. bro:see:: cat cat_sep cat_string_array cat_string_array_n
|
|
## fmt
|
|
## join_string_vec join_string_array
|
|
function string_cat%(...%): string
|
|
%{
|
|
int n = 0;
|
|
loop_over_list(@ARG@, i)
|
|
n += @ARG@[i]->AsString()->Len();
|
|
|
|
u_char* b = new u_char[n+1];
|
|
BroString* s = new BroString(1, b, n);
|
|
|
|
loop_over_list(@ARG@, j)
|
|
{
|
|
const BroString* s = @ARG@[j]->AsString();
|
|
memcpy(b, s->Bytes(), s->Len());
|
|
b += s->Len();
|
|
}
|
|
*b = 0;
|
|
|
|
return new StringVal(s);
|
|
%}
|
|
|
|
%%{
|
|
int string_array_to_vs(TableVal* tbl, int start, int end,
|
|
vector<const BroString*>& vs)
|
|
{
|
|
vs.clear();
|
|
for ( int i = start; i <= end; ++i )
|
|
{
|
|
Val* ind = new Val(i, TYPE_COUNT);
|
|
Val* v = tbl->Lookup(ind);
|
|
if ( ! v )
|
|
return 0;
|
|
vs.push_back(v->AsString());
|
|
#if 0
|
|
char* str = v->AsString()->Render();
|
|
DEBUG_MSG("string_array[%d] = \"%s\"\n", i, str);
|
|
delete [] str;
|
|
#endif
|
|
delete ind;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
int vs_to_string_array(vector<const BroString*>& vs, TableVal* tbl,
|
|
int start, int end)
|
|
{
|
|
for ( int i = start, j = 0; i <= end; ++i, ++j )
|
|
{
|
|
Val* ind = new Val(i, TYPE_COUNT);
|
|
tbl->Assign(ind, new StringVal(vs[j]->Len(),
|
|
(const char *)vs[j]->Bytes()));
|
|
Unref(ind);
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
BroString* cat_string_array_n(TableVal* tbl, int start, int end)
|
|
{
|
|
vector<const BroString*> vs;
|
|
string_array_to_vs(tbl, start, end, vs);
|
|
return concatenate(vs);
|
|
}
|
|
%%}
|
|
|
|
## Concatenates all elements in an array of strings.
|
|
##
|
|
## a: The :bro:type:`string_array` (``table[count] of string``).
|
|
##
|
|
## Returns: The concatenation of all elements in *a*.
|
|
##
|
|
## .. bro:see:: cat cat_sep string_cat cat_string_array_n
|
|
## fmt
|
|
## join_string_vec join_string_array
|
|
function cat_string_array%(a: string_array%): string &deprecated
|
|
%{
|
|
TableVal* tbl = a->AsTableVal();
|
|
return new StringVal(cat_string_array_n(tbl, 1, a->AsTable()->Length()));
|
|
%}
|
|
|
|
## Concatenates a specific range of elements in an array of strings.
|
|
##
|
|
## a: The :bro:type:`string_array` (``table[count] of string``).
|
|
##
|
|
## start: The array index of the first element of the range.
|
|
##
|
|
## end: The array index of the last element of the range.
|
|
##
|
|
## Returns: The concatenation of the range *[start, end]* in *a*.
|
|
##
|
|
## .. bro:see:: cat string_cat cat_string_array
|
|
## fmt
|
|
## join_string_vec join_string_array
|
|
function cat_string_array_n%(a: string_array, start: count, end: count%): string &deprecated
|
|
%{
|
|
TableVal* tbl = a->AsTableVal();
|
|
return new StringVal(cat_string_array_n(tbl, start, end));
|
|
%}
|
|
|
|
## Joins all values in the given array of strings with a separator placed
|
|
## between each element.
|
|
##
|
|
## sep: The separator to place between each element.
|
|
##
|
|
## a: The :bro:type:`string_array` (``table[count] of string``).
|
|
##
|
|
## Returns: The concatenation of all elements in *a*, with *sep* placed
|
|
## between each element.
|
|
##
|
|
## .. bro:see:: cat cat_sep string_cat cat_string_array cat_string_array_n
|
|
## fmt
|
|
## join_string_vec
|
|
function join_string_array%(sep: string, a: string_array%): string &deprecated
|
|
%{
|
|
vector<const BroString*> vs;
|
|
TableVal* tbl = a->AsTableVal();
|
|
int n = a->AsTable()->Length();
|
|
|
|
for ( int i = 1; i <= n; ++i )
|
|
{
|
|
Val* ind = new Val(i, TYPE_COUNT);
|
|
Val* v = tbl->Lookup(ind);
|
|
if ( ! v )
|
|
return 0;
|
|
|
|
vs.push_back(v->AsString());
|
|
Unref(ind);
|
|
|
|
if ( i < n )
|
|
vs.push_back(sep->AsString());
|
|
}
|
|
|
|
return new StringVal(concatenate(vs));
|
|
%}
|
|
|
|
## Joins all values in the given vector of strings with a separator placed
|
|
## between each element.
|
|
##
|
|
## sep: The separator to place between each element.
|
|
##
|
|
## vec: The :bro:type:`string_vec` (``vector of string``).
|
|
##
|
|
## Returns: The concatenation of all elements in *vec*, with *sep* placed
|
|
## between each element.
|
|
##
|
|
## .. bro:see:: cat cat_sep string_cat cat_string_array cat_string_array_n
|
|
## fmt
|
|
## join_string_array
|
|
function join_string_vec%(vec: string_vec, sep: string%): string
|
|
%{
|
|
ODesc d;
|
|
d.SetStyle(RAW_STYLE);
|
|
|
|
VectorVal *v = vec->AsVectorVal();
|
|
|
|
for ( unsigned i = 0; i < v->Size(); ++i )
|
|
{
|
|
if ( i > 0 )
|
|
d.Add(sep->CheckString(), 0);
|
|
|
|
Val* e = v->Lookup(i);
|
|
|
|
// If the element is empty, skip it.
|
|
if ( ! e )
|
|
continue;
|
|
|
|
e->Describe(&d);
|
|
}
|
|
|
|
BroString* s = new BroString(1, d.TakeBytes(), d.Len());
|
|
s->SetUseFreeToDelete(true);
|
|
|
|
return new StringVal(s);
|
|
%}
|
|
|
|
## Sorts an array of strings.
|
|
##
|
|
## a: The :bro:type:`string_array` (``table[count] of string``).
|
|
##
|
|
## Returns: A sorted copy of *a*.
|
|
##
|
|
## .. bro:see:: sort
|
|
function sort_string_array%(a: string_array%): string_array &deprecated
|
|
%{
|
|
TableVal* tbl = a->AsTableVal();
|
|
int n = a->AsTable()->Length();
|
|
|
|
vector<const BroString*> vs;
|
|
string_array_to_vs(tbl, 1, n, vs);
|
|
|
|
unsigned int i, j;
|
|
for ( i = 0; i < vs.size(); ++i )
|
|
{
|
|
const BroString* x = vs[i];
|
|
for ( j = i; j > 0; --j )
|
|
if ( Bstr_cmp(vs[j-1], x) <= 0 )
|
|
break;
|
|
else
|
|
vs[j] = vs[j-1];
|
|
vs[j] = x;
|
|
}
|
|
// sort(vs.begin(), vs.end(), Bstr_cmp);
|
|
|
|
TableVal* b = new TableVal(string_array);
|
|
vs_to_string_array(vs, b, 1, n);
|
|
return b;
|
|
%}
|
|
|
|
## Returns an edited version of a string that applies a special
|
|
## "backspace character" (usually ``\x08`` for backspace or ``\x7f`` for DEL).
|
|
## For example, ``edit("hello there", "e")`` returns ``"llo t"``.
|
|
##
|
|
## arg_s: The string to edit.
|
|
##
|
|
## arg_edit_char: A string of exactly one character that represents the
|
|
## "backspace character". If it is longer than one character Bro
|
|
## generates a run-time error and uses the first character in
|
|
## the string.
|
|
##
|
|
## Returns: An edited version of *arg_s* where *arg_edit_char* triggers the
|
|
## deletion of the last character.
|
|
##
|
|
## .. bro:see:: clean
|
|
## to_string_literal
|
|
## escape_string
|
|
## strip
|
|
function edit%(arg_s: string, arg_edit_char: string%): string
|
|
%{
|
|
if ( arg_edit_char->Len() != 1 )
|
|
builtin_error("not exactly one edit character", @ARG@[1]);
|
|
|
|
const u_char* s = arg_s->Bytes();
|
|
const u_char* edit_s = arg_edit_char->Bytes();
|
|
|
|
u_char edit_c = *edit_s;
|
|
|
|
int n = arg_s->Len();
|
|
u_char* new_s = new u_char[n+1];
|
|
int ind = 0;
|
|
|
|
for ( int i = 0; i < n; ++i )
|
|
{
|
|
if ( s[i] == edit_c )
|
|
{ // Delete last character
|
|
if ( --ind < 0 )
|
|
ind = 0;
|
|
}
|
|
else
|
|
new_s[ind++] = s[i];
|
|
}
|
|
|
|
new_s[ind] = '\0';
|
|
|
|
return new StringVal(new BroString(1, byte_vec(new_s), ind));
|
|
%}
|
|
|
|
## Get a substring from a string, given a starting position and length.
|
|
##
|
|
## s: The string to obtain a substring from.
|
|
##
|
|
## start: The starting position of the substring in *s*, where 1 is the first
|
|
## character. As a special case, 0 also represents the first character.
|
|
##
|
|
## n: The number of characters to extract, beginning at *start*.
|
|
##
|
|
## Returns: A substring of *s* of length *n* from position *start*.
|
|
function sub_bytes%(s: string, start: count, n: int%): string
|
|
%{
|
|
if ( start > 0 )
|
|
--start; // make it 0-based
|
|
|
|
BroString* ss = s->AsString()->GetSubstring(start, n);
|
|
|
|
if ( ! ss )
|
|
ss = new BroString("");
|
|
|
|
return new StringVal(ss);
|
|
%}
|
|
|
|
%%{
|
|
static int match_prefix(int s_len, const char* s, int t_len, const char* t)
|
|
{
|
|
for ( int i = 0; i < t_len; ++i )
|
|
{
|
|
if ( i >= s_len || s[i] != t[i] )
|
|
return 0;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
VectorVal* do_split_string(StringVal* str_val, RE_Matcher* re, int incl_sep,
|
|
int max_num_sep)
|
|
{
|
|
// string_vec is used early in the version script - do not use the NetVar.
|
|
VectorVal* rval = new VectorVal(internal_type("string_vec")->AsVectorType());
|
|
const u_char* s = str_val->Bytes();
|
|
int n = str_val->Len();
|
|
const u_char* end_of_s = s + n;
|
|
int num = 0;
|
|
int num_sep = 0;
|
|
|
|
int offset = 0;
|
|
while ( n >= 0 )
|
|
{
|
|
offset = 0;
|
|
// Find next match offset.
|
|
int end_of_match = 0;
|
|
while ( n > 0 &&
|
|
(end_of_match = re->MatchPrefix(s + offset, n)) <= 0 )
|
|
{
|
|
// Move on to next byte.
|
|
++offset;
|
|
--n;
|
|
}
|
|
|
|
if ( max_num_sep && num_sep >= max_num_sep )
|
|
{
|
|
offset = end_of_s - s;
|
|
n=0;
|
|
}
|
|
|
|
rval->Assign(num++, new StringVal(offset, (const char*) s));
|
|
|
|
// No more separators will be needed if this is the end of string.
|
|
if ( n <= 0 )
|
|
break;
|
|
|
|
if ( incl_sep )
|
|
{ // including the part that matches the pattern
|
|
rval->Assign(num++, new StringVal(end_of_match, (const char*) s+offset));
|
|
}
|
|
|
|
if ( max_num_sep && num_sep >= max_num_sep )
|
|
break;
|
|
|
|
++num_sep;
|
|
|
|
n -= end_of_match;
|
|
s += offset + end_of_match;;
|
|
|
|
if ( s > end_of_s )
|
|
reporter->InternalError("RegMatch in split goes beyond the string");
|
|
}
|
|
|
|
return rval;
|
|
}
|
|
|
|
Val* do_split(StringVal* str_val, RE_Matcher* re, int incl_sep, int max_num_sep)
|
|
{
|
|
TableVal* a = new TableVal(string_array);
|
|
const u_char* s = str_val->Bytes();
|
|
int n = str_val->Len();
|
|
const u_char* end_of_s = s + n;
|
|
int num = 0;
|
|
int num_sep = 0;
|
|
|
|
int offset = 0;
|
|
while ( n >= 0 )
|
|
{
|
|
offset = 0;
|
|
// Find next match offset.
|
|
int end_of_match = 0;
|
|
while ( n > 0 &&
|
|
(end_of_match = re->MatchPrefix(s + offset, n)) <= 0 )
|
|
{
|
|
// Move on to next byte.
|
|
++offset;
|
|
--n;
|
|
}
|
|
|
|
if ( max_num_sep && num_sep >= max_num_sep )
|
|
{
|
|
offset = end_of_s - s;
|
|
n=0;
|
|
}
|
|
|
|
Val* ind = new Val(++num, TYPE_COUNT);
|
|
a->Assign(ind, new StringVal(offset, (const char*) s));
|
|
Unref(ind);
|
|
|
|
// No more separators will be needed if this is the end of string.
|
|
if ( n <= 0 )
|
|
break;
|
|
|
|
if ( incl_sep )
|
|
{ // including the part that matches the pattern
|
|
ind = new Val(++num, TYPE_COUNT);
|
|
a->Assign(ind, new StringVal(end_of_match, (const char*) s+offset));
|
|
Unref(ind);
|
|
}
|
|
|
|
if ( max_num_sep && num_sep >= max_num_sep )
|
|
break;
|
|
|
|
++num_sep;
|
|
|
|
n -= end_of_match;
|
|
s += offset + end_of_match;;
|
|
|
|
if ( s > end_of_s )
|
|
reporter->InternalError("RegMatch in split goes beyond the string");
|
|
}
|
|
|
|
return a;
|
|
}
|
|
|
|
Val* do_sub(StringVal* str_val, RE_Matcher* re, StringVal* repl, int do_all)
|
|
{
|
|
const u_char* s = str_val->Bytes();
|
|
int offset = 0;
|
|
int n = str_val->Len();
|
|
|
|
// cut_points is a set of pairs of indices in str that should
|
|
// be removed/replaced. A pair <x,y> means "delete starting
|
|
// at offset x, up to but not including offset y".
|
|
List(ptr_compat_int) cut_points; // where RE matches pieces of str
|
|
|
|
int size = 0; // size of result
|
|
|
|
while ( n > 0 )
|
|
{
|
|
// Find next match offset.
|
|
int end_of_match;
|
|
while ( n > 0 &&
|
|
(end_of_match = re->MatchPrefix(&s[offset], n)) <= 0 )
|
|
{
|
|
// This character is going to be copied to the result.
|
|
++size;
|
|
|
|
// Move on to next character.
|
|
++offset;
|
|
--n;
|
|
}
|
|
|
|
if ( n <= 0 )
|
|
break;
|
|
|
|
// s[offset .. offset+end_of_match-1] matches re.
|
|
cut_points.append(offset);
|
|
cut_points.append(offset + end_of_match);
|
|
|
|
offset += end_of_match;
|
|
n -= end_of_match;
|
|
|
|
if ( ! do_all )
|
|
{
|
|
// We've now done the first substitution - finished.
|
|
// Include the remainder of the string in the result.
|
|
size += n;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// size now reflects amount of space copied. Factor in amount
|
|
// of space for replacement text.
|
|
int num_cut_points = cut_points.length() / 2;
|
|
size += num_cut_points * repl->Len();
|
|
|
|
// And a final NUL for good health.
|
|
++size;
|
|
|
|
byte_vec result = new u_char[size];
|
|
byte_vec r = result;
|
|
|
|
// Copy it all over.
|
|
int start_offset = 0;
|
|
for ( int i = 0; i < cut_points.length(); i += 2 /* loop over pairs */ )
|
|
{
|
|
int num_to_copy = cut_points[i] - start_offset;
|
|
memcpy(r, s + start_offset, num_to_copy);
|
|
|
|
r += num_to_copy;
|
|
start_offset = cut_points[i+1];
|
|
|
|
// Now add in replacement text.
|
|
memcpy(r, repl->Bytes(), repl->Len());
|
|
r += repl->Len();
|
|
}
|
|
|
|
// Copy final trailing characters.
|
|
int num_to_copy = str_val->Len() - start_offset;
|
|
memcpy(r, s + start_offset, num_to_copy);
|
|
r += num_to_copy;
|
|
|
|
// Final NUL. No need to increment r, since the length
|
|
// computed from it in the next statement does not include
|
|
// the NUL.
|
|
r[0] = '\0';
|
|
|
|
return new StringVal(new BroString(1, result, r - result));
|
|
}
|
|
%%}
|
|
|
|
## Splits a string into an array of strings according to a pattern.
|
|
##
|
|
## str: The string to split.
|
|
##
|
|
## re: The pattern describing the element separator in *str*.
|
|
##
|
|
## Returns: An array of strings where each element corresponds to a substring
|
|
## in *str* separated by *re*.
|
|
##
|
|
## .. bro:see:: split1 split_all split_n str_split split_string1 split_string_all split_string_n str_split
|
|
##
|
|
## .. note:: The returned table starts at index 1. Note that conceptually the
|
|
## return value is meant to be a vector and this might change in the
|
|
## future.
|
|
##
|
|
function split%(str: string, re: pattern%): string_array &deprecated
|
|
%{
|
|
return do_split(str, re, 0, 0);
|
|
%}
|
|
|
|
## Splits a string into an array of strings according to a pattern.
|
|
##
|
|
## str: The string to split.
|
|
##
|
|
## re: The pattern describing the element separator in *str*.
|
|
##
|
|
## Returns: An array of strings where each element corresponds to a substring
|
|
## in *str* separated by *re*.
|
|
##
|
|
## .. bro:see:: split_string1 split_string_all split_string_n str_split
|
|
##
|
|
function split_string%(str: string, re: pattern%): string_vec
|
|
%{
|
|
return do_split_string(str, re, 0, 0);
|
|
%}
|
|
|
|
## Splits a string *once* into a two-element array of strings according to a
|
|
## pattern. This function is the same as :bro:id:`split`, but *str* is only
|
|
## split once (if possible) at the earliest position and an array of two strings
|
|
## is returned.
|
|
##
|
|
## str: The string to split.
|
|
##
|
|
## re: The pattern describing the separator to split *str* in two pieces.
|
|
##
|
|
## Returns: An array of strings with two elements in which the first represents
|
|
## the substring in *str* up to the first occurence of *re*, and the
|
|
## second everything after *re*. An array of one string is returned
|
|
## when *s* cannot be split.
|
|
##
|
|
## .. bro:see:: split split_all split_n str_split split_string split_string_all split_string_n str_split
|
|
function split1%(str: string, re: pattern%): string_array &deprecated
|
|
%{
|
|
return do_split(str, re, 0, 1);
|
|
%}
|
|
|
|
## Splits a string *once* into a two-element array of strings according to a
|
|
## pattern. This function is the same as :bro:id:`split_string`, but *str* is
|
|
## only split once (if possible) at the earliest position and an array of two
|
|
## strings is returned.
|
|
##
|
|
## str: The string to split.
|
|
##
|
|
## re: The pattern describing the separator to split *str* in two pieces.
|
|
##
|
|
## Returns: An array of strings with two elements in which the first represents
|
|
## the substring in *str* up to the first occurence of *re*, and the
|
|
## second everything after *re*. An array of one string is returned
|
|
## when *s* cannot be split.
|
|
##
|
|
## .. bro:see:: split_string split_string_all split_string_n str_split
|
|
function split_string1%(str: string, re: pattern%): string_vec
|
|
%{
|
|
return do_split_string(str, re, 0, 1);
|
|
%}
|
|
|
|
## Splits a string into an array of strings according to a pattern. This
|
|
## function is the same as :bro:id:`split`, except that the separators are
|
|
## returned as well. For example, ``split_all("a-b--cd", /(\-)+/)`` returns
|
|
## ``{"a", "-", "b", "--", "cd"}``: odd-indexed elements do not match the
|
|
## pattern and even-indexed ones do.
|
|
##
|
|
## str: The string to split.
|
|
##
|
|
## re: The pattern describing the element separator in *str*.
|
|
##
|
|
## Returns: An array of strings where each two successive elements correspond
|
|
## to a substring in *str* of the part not matching *re* (odd-indexed)
|
|
## and the part that matches *re* (even-indexed).
|
|
##
|
|
## .. bro:see:: split split1 split_n str_split split_string split_string1 split_string_n str_split
|
|
function split_all%(str: string, re: pattern%): string_array &deprecated
|
|
%{
|
|
return do_split(str, re, 1, 0);
|
|
%}
|
|
|
|
## Splits a string into an array of strings according to a pattern. This
|
|
## function is the same as :bro:id:`split_string`, except that the separators
|
|
## are returned as well. For example, ``split_string_all("a-b--cd", /(\-)+/)``
|
|
## returns ``{"a", "-", "b", "--", "cd"}``: odd-indexed elements do match the
|
|
## pattern and even-indexed ones do not.
|
|
##
|
|
## str: The string to split.
|
|
##
|
|
## re: The pattern describing the element separator in *str*.
|
|
##
|
|
## Returns: An array of strings where each two successive elements correspond
|
|
## to a substring in *str* of the part not matching *re* (even-indexed)
|
|
## and the part that matches *re* (odd-indexed).
|
|
##
|
|
## .. bro:see:: split_string split_string1 split_string_n str_split
|
|
function split_string_all%(str: string, re: pattern%): string_vec
|
|
%{
|
|
return do_split_string(str, re, 1, 0);
|
|
%}
|
|
|
|
## Splits a string a given number of times into an array of strings according
|
|
## to a pattern. This function is similar to :bro:id:`split1` and
|
|
## :bro:id:`split_all`, but with customizable behavior with respect to
|
|
## including separators in the result and the number of times to split.
|
|
##
|
|
## str: The string to split.
|
|
##
|
|
## re: The pattern describing the element separator in *str*.
|
|
##
|
|
## incl_sep: A flag indicating whether to include the separator matches in the
|
|
## result (as in :bro:id:`split_all`).
|
|
##
|
|
## max_num_sep: The number of times to split *str*.
|
|
##
|
|
## Returns: An array of strings where, if *incl_sep* is true, each two
|
|
## successive elements correspond to a substring in *str* of the part
|
|
## not matching *re* (odd-indexed) and the part that matches *re*
|
|
## (even-indexed).
|
|
##
|
|
## .. bro:see:: split split1 split_all str_split split_string split_string1 split_string_all str_split
|
|
function split_n%(str: string, re: pattern,
|
|
incl_sep: bool, max_num_sep: count%): string_array &deprecated
|
|
%{
|
|
return do_split(str, re, incl_sep, max_num_sep);
|
|
%}
|
|
|
|
## Splits a string a given number of times into an array of strings according
|
|
## to a pattern. This function is similar to :bro:id:`split_string1` and
|
|
## :bro:id:`split_string_all`, but with customizable behavior with respect to
|
|
## including separators in the result and the number of times to split.
|
|
##
|
|
## str: The string to split.
|
|
##
|
|
## re: The pattern describing the element separator in *str*.
|
|
##
|
|
## incl_sep: A flag indicating whether to include the separator matches in the
|
|
## result (as in :bro:id:`split_string_all`).
|
|
##
|
|
## max_num_sep: The number of times to split *str*.
|
|
##
|
|
## Returns: An array of strings where, if *incl_sep* is true, each two
|
|
## successive elements correspond to a substring in *str* of the part
|
|
## not matching *re* (even-indexed) and the part that matches *re*
|
|
## (odd-indexed).
|
|
##
|
|
## .. bro:see:: split_string split_string1 split_string_all str_split
|
|
function split_string_n%(str: string, re: pattern,
|
|
incl_sep: bool, max_num_sep: count%): string_vec
|
|
%{
|
|
return do_split_string(str, re, incl_sep, max_num_sep);
|
|
%}
|
|
|
|
## Substitutes a given replacement string for the first occurrence of a pattern
|
|
## in a given string.
|
|
##
|
|
## str: The string to perform the substitution in.
|
|
##
|
|
## re: The pattern being replaced with *repl*.
|
|
##
|
|
## repl: The string that replaces *re*.
|
|
##
|
|
## Returns: A copy of *str* with the first occurence of *re* replaced with
|
|
## *repl*.
|
|
##
|
|
## .. bro:see:: gsub subst_string
|
|
function sub%(str: string, re: pattern, repl: string%): string
|
|
%{
|
|
return do_sub(str, re, repl, 0);
|
|
%}
|
|
|
|
## Substitutes a given replacement string for all occurrences of a pattern
|
|
## in a given string.
|
|
##
|
|
## str: The string to perform the substitution in.
|
|
##
|
|
## re: The pattern being replaced with *repl*.
|
|
##
|
|
## repl: The string that replaces *re*.
|
|
##
|
|
## Returns: A copy of *str* with all occurrences of *re* replaced with *repl*.
|
|
##
|
|
## .. bro:see:: sub subst_string
|
|
function gsub%(str: string, re: pattern, repl: string%): string
|
|
%{
|
|
return do_sub(str, re, repl, 1);
|
|
%}
|
|
|
|
|
|
## Lexicographically compares two strings.
|
|
##
|
|
## s1: The first string.
|
|
##
|
|
## s2: The second string.
|
|
##
|
|
## Returns: An integer greater than, equal to, or less than 0 according as
|
|
## *s1* is greater than, equal to, or less than *s2*.
|
|
function strcmp%(s1: string, s2: string%): int
|
|
%{
|
|
return new Val(Bstr_cmp(s1->AsString(), s2->AsString()), TYPE_INT);
|
|
%}
|
|
|
|
## Locates the first occurrence of one string in another.
|
|
##
|
|
## big: The string to look in.
|
|
##
|
|
## little: The (smaller) string to find inside *big*.
|
|
##
|
|
## Returns: The location of *little* in *big*, or 0 if *little* is not found in
|
|
## *big*.
|
|
##
|
|
## .. bro:see:: find_all find_last
|
|
function strstr%(big: string, little: string%): count
|
|
%{
|
|
return new Val(
|
|
1 + big->AsString()->FindSubstring(little->AsString()),
|
|
TYPE_COUNT);
|
|
%}
|
|
|
|
## Substitutes each (non-overlapping) appearance of a string in another.
|
|
##
|
|
## s: The string in which to perform the substitution.
|
|
##
|
|
## from: The string to look for which is replaced with *to*.
|
|
##
|
|
## to: The string that replaces all occurrences of *from* in *s*.
|
|
##
|
|
## Returns: A copy of *s* where each occurrence of *from* is replaced with *to*.
|
|
##
|
|
## .. bro:see:: sub gsub
|
|
function subst_string%(s: string, from: string, to: string%): string
|
|
%{
|
|
const int little_len = from->Len();
|
|
if ( little_len == 0 )
|
|
return s->Ref();
|
|
|
|
int big_len = s->Len();
|
|
const u_char* big = s->Bytes();
|
|
data_chunk_t dc;
|
|
vector<data_chunk_t> vs;
|
|
|
|
while ( big_len >= little_len )
|
|
{
|
|
int j = strstr_n(big_len, big, little_len, from->Bytes());
|
|
|
|
if ( j < 0 )
|
|
break;
|
|
|
|
if ( j > 0 )
|
|
{
|
|
dc.length = j; dc.data = (const char*) big;
|
|
vs.push_back(dc);
|
|
}
|
|
|
|
dc.length = to->Len();
|
|
dc.data = (const char*) (to->Bytes());
|
|
vs.push_back(dc);
|
|
|
|
j += little_len;
|
|
big += j;
|
|
big_len -= j;
|
|
}
|
|
|
|
if ( big_len > 0 )
|
|
{
|
|
dc.length = big_len; dc.data = (const char*) big;
|
|
vs.push_back(dc);
|
|
}
|
|
|
|
return new StringVal(concatenate(vs));
|
|
%}
|
|
|
|
## Replaces all uppercase letters in a string with their lowercase counterpart.
|
|
##
|
|
## str: The string to convert to lowercase letters.
|
|
##
|
|
## Returns: A copy of the given string with the uppercase letters (as indicated
|
|
## by ``isascii`` and ``isupper``) folded to lowercase
|
|
## (via ``tolower``).
|
|
##
|
|
## .. bro:see:: to_upper is_ascii
|
|
function to_lower%(str: string%): string
|
|
%{
|
|
const u_char* s = str->Bytes();
|
|
int n = str->Len();
|
|
u_char* lower_s = new u_char[n + 1];
|
|
u_char* ls = lower_s;
|
|
|
|
for ( int i = 0; i < n; ++i)
|
|
{
|
|
if ( isascii(s[i]) && isupper(s[i]) )
|
|
*ls++ = tolower(s[i]);
|
|
else
|
|
*ls++ = s[i];
|
|
}
|
|
|
|
*ls++ = '\0';
|
|
|
|
return new StringVal(new BroString(1, lower_s, n));
|
|
%}
|
|
|
|
## Replaces all lowercase letters in a string with their uppercase counterpart.
|
|
##
|
|
## str: The string to convert to uppercase letters.
|
|
##
|
|
## Returns: A copy of the given string with the lowercase letters (as indicated
|
|
## by ``isascii`` and ``islower``) folded to uppercase
|
|
## (via ``toupper``).
|
|
##
|
|
## .. bro:see:: to_lower is_ascii
|
|
function to_upper%(str: string%): string
|
|
%{
|
|
const u_char* s = str->Bytes();
|
|
int n = str->Len();
|
|
u_char* upper_s = new u_char[n + 1];
|
|
u_char* us = upper_s;
|
|
|
|
for ( int i = 0; i < n; ++i)
|
|
{
|
|
if ( isascii(s[i]) && islower(s[i]) )
|
|
*us++ = toupper(s[i]);
|
|
else
|
|
*us++ = s[i];
|
|
}
|
|
|
|
*us++ = '\0';
|
|
|
|
return new StringVal(new BroString(1, upper_s, n));
|
|
%}
|
|
|
|
## Replaces non-printable characters in a string with escaped sequences. The
|
|
## mappings are:
|
|
##
|
|
## - values not in *[32, 126]* to ``\xXX``
|
|
##
|
|
## If the string does not yet have a trailing NUL, one is added internally.
|
|
##
|
|
## In contrast to :bro:id:`escape_string`, this encoding is *not* fully reversible.`
|
|
##
|
|
## str: The string to escape.
|
|
##
|
|
## Returns: The escaped string.
|
|
##
|
|
## .. bro:see:: to_string_literal escape_string
|
|
function clean%(str: string%): string
|
|
%{
|
|
char* s = str->AsString()->Render();
|
|
return new StringVal(new BroString(1, byte_vec(s), strlen(s)));
|
|
%}
|
|
|
|
## Replaces non-printable characters in a string with escaped sequences. The
|
|
## mappings are:
|
|
##
|
|
## - values not in *[32, 126]* to ``\xXX``
|
|
## - ``\`` to ``\\``
|
|
## - ``'`` and ``""`` to ``\'`` and ``\"``, respectively.
|
|
##
|
|
## str: The string to escape.
|
|
##
|
|
## Returns: The escaped string.
|
|
##
|
|
## .. bro:see:: clean escape_string
|
|
function to_string_literal%(str: string%): string
|
|
%{
|
|
char* s = str->AsString()->Render(BroString::BRO_STRING_LITERAL);
|
|
return new StringVal(new BroString(1, byte_vec(s), strlen(s)));
|
|
%}
|
|
|
|
## Determines whether a given string contains only ASCII characters.
|
|
##
|
|
## str: The string to examine.
|
|
##
|
|
## Returns: False if any byte value of *str* is greater than 127, and true
|
|
## otherwise.
|
|
##
|
|
## .. bro:see:: to_upper to_lower
|
|
function is_ascii%(str: string%): bool
|
|
%{
|
|
int n = str->Len();
|
|
const u_char* s = str->Bytes();
|
|
|
|
for ( int i = 0; i < n; ++i )
|
|
if ( s[i] > 127 )
|
|
return new Val(0, TYPE_BOOL);
|
|
|
|
return new Val(1, TYPE_BOOL);
|
|
%}
|
|
|
|
## Replaces non-printable characters in a string with escaped sequences. The
|
|
## mappings are:
|
|
##
|
|
## - values not in *[32, 126]* to ``\xXX``
|
|
## - ``\`` to ``\\``
|
|
##
|
|
## In contrast to :bro:id:`clean`, this encoding is fully reversible.`
|
|
##
|
|
## str: The string to escape.
|
|
##
|
|
## Returns: The escaped string.
|
|
##
|
|
## .. bro:see:: clean to_string_literal
|
|
function escape_string%(s: string%): string
|
|
%{
|
|
char* escstr = s->AsString()->Render(BroString::ESC_HEX | BroString::ESC_ESC);
|
|
Val* val = new StringVal(escstr);
|
|
delete [] escstr;
|
|
return val;
|
|
%}
|
|
|
|
## Returns an ASCII hexadecimal representation of a string.
|
|
##
|
|
## s: The string to convert to hex.
|
|
##
|
|
## Returns: A copy of *s* where each byte is replaced with the corresponding
|
|
## hex nibble.
|
|
function string_to_ascii_hex%(s: string%): string
|
|
%{
|
|
char* x = new char[s->Len() * 2 + 1];
|
|
const u_char* sp = s->Bytes();
|
|
|
|
for ( int i = 0; i < s->Len(); ++i )
|
|
sprintf(x + i * 2, "%02x", sp[i]);
|
|
|
|
return new StringVal(new BroString(1, (u_char*) x, s->Len() * 2));
|
|
%}
|
|
|
|
## Uses the Smith-Waterman algorithm to find similar/overlapping substrings.
|
|
## See `Wikipedia <http://en.wikipedia.org/wiki/Smith%E2%80%93Waterman_algorithm>`__.
|
|
##
|
|
## s1: The first string.
|
|
##
|
|
## s2: The second string.
|
|
##
|
|
## params: Parameters for the Smith-Waterman algorithm.
|
|
##
|
|
## Returns: The result of the Smith-Waterman algorithm calculation.
|
|
function str_smith_waterman%(s1: string, s2: string, params: sw_params%) : sw_substring_vec
|
|
%{
|
|
SWParams sw_params(params->AsRecordVal()->Lookup(0)->AsCount(),
|
|
SWVariant(params->AsRecordVal()->Lookup(1)->AsCount()));
|
|
|
|
BroSubstring::Vec* subseq =
|
|
smith_waterman(s1->AsString(), s2->AsString(), sw_params);
|
|
VectorVal* result = BroSubstring::VecToPolicy(subseq);
|
|
delete_each(subseq);
|
|
delete subseq;
|
|
|
|
return result;
|
|
%}
|
|
|
|
## Splits a string into substrings with the help of an index vector of cutting
|
|
## points.
|
|
##
|
|
## s: The string to split.
|
|
##
|
|
## idx: The index vector (``vector of count``) with the cutting points.
|
|
##
|
|
## Returns: A vector of strings.
|
|
##
|
|
## .. bro:see:: split split1 split_all split_n
|
|
function str_split%(s: string, idx: index_vec%): string_vec
|
|
%{
|
|
vector<Val*>* idx_v = idx->AsVector();
|
|
BroString::IdxVec indices(idx_v->size());
|
|
unsigned int i;
|
|
|
|
for ( i = 0; i < idx_v->size(); i++ )
|
|
indices[i] = (*idx_v)[i]->AsCount();
|
|
|
|
BroString::Vec* result = s->AsString()->Split(indices);
|
|
VectorVal* result_v = new VectorVal(
|
|
internal_type("string_vec")->AsVectorType());
|
|
|
|
if ( result )
|
|
{
|
|
i = 1;
|
|
|
|
for ( BroString::VecIt it = result->begin();
|
|
it != result->end(); ++it, ++i )
|
|
result_v->Assign(i, new StringVal(*it));
|
|
// StringVal now possesses string.
|
|
|
|
delete result;
|
|
}
|
|
|
|
return result_v;
|
|
%}
|
|
|
|
## Strips whitespace at both ends of a string.
|
|
##
|
|
## str: The string to strip the whitespace from.
|
|
##
|
|
## Returns: A copy of *str* with leading and trailing whitespace removed.
|
|
##
|
|
## .. bro:see:: sub gsub
|
|
function strip%(str: string%): string
|
|
%{
|
|
const u_char* s = str->Bytes();
|
|
int n = str->Len();
|
|
|
|
if ( n == 0 )
|
|
// Empty string.
|
|
return new StringVal(new BroString(s, n, 1));
|
|
|
|
const u_char* sp = s;
|
|
|
|
// Move a pointer from the end of the string.
|
|
const u_char* e = sp + n - 1;
|
|
while ( e > sp && isspace(*e) )
|
|
--e;
|
|
|
|
// Move the pointer for the beginning of the string.
|
|
while ( isspace(*sp) && sp <= e )
|
|
++sp;
|
|
|
|
return new StringVal(new BroString(sp, (e - sp + 1), 1));
|
|
%}
|
|
|
|
## Generates a string of a given size and fills it with repetitions of a source
|
|
## string.
|
|
##
|
|
## len: The length of the output string.
|
|
##
|
|
## source: The string to concatenate repeatedly until *len* has been reached.
|
|
##
|
|
## Returns: A string of length *len* filled with *source*.
|
|
function string_fill%(len: int, source: string%): string
|
|
%{
|
|
const u_char* src = source->Bytes();
|
|
int64_t n = source->Len();
|
|
char* dst = new char[len];
|
|
|
|
for ( int i = 0; i < len; i += n )
|
|
::memcpy((dst + i), src, min(n, len - i));
|
|
|
|
dst[len - 1] = 0;
|
|
|
|
return new StringVal(new BroString(1, byte_vec(dst), len));
|
|
%}
|
|
|
|
## Takes a string and escapes characters that would allow execution of
|
|
## commands at the shell level. Must be used before including strings in
|
|
## :bro:id:`system` or similar calls.
|
|
##
|
|
## source: The string to escape.
|
|
##
|
|
## Returns: A shell-escaped version of *source*.
|
|
##
|
|
## .. bro:see:: system
|
|
function str_shell_escape%(source: string%): string
|
|
%{
|
|
unsigned j = 0;
|
|
const u_char* src = source->Bytes();
|
|
unsigned n = source->Len();
|
|
byte_vec dst = new u_char[n * 2 + 1];
|
|
|
|
for ( unsigned i = 0; i < n; ++i )
|
|
{
|
|
switch ( src[i] ) {
|
|
case '`': case '"': case '\\': case '$':
|
|
|
|
// case '|': case '&': case ';': case '(': case ')': case '<':
|
|
// case '>': case '\'': case '*': case '?': case '[': case ']':
|
|
// case '!': case '#': case '{': case '}':
|
|
dst[j++] = '\\';
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
dst[j++] = src[i];
|
|
}
|
|
|
|
dst[j] = '\0';
|
|
return new StringVal(new BroString(1, dst, j));
|
|
%}
|
|
|
|
## Finds all occurrences of a pattern in a string.
|
|
##
|
|
## str: The string to inspect.
|
|
##
|
|
## re: The pattern to look for in *str*.
|
|
##
|
|
## Returns: The set of strings in *str* that match *re*, or the empty set.
|
|
##
|
|
## .. bro:see: find_last strstr
|
|
function find_all%(str: string, re: pattern%) : string_set
|
|
%{
|
|
TableVal* a = new TableVal(string_set);
|
|
|
|
const u_char* s = str->Bytes();
|
|
const u_char* e = s + str->Len();
|
|
|
|
for ( const u_char* t = s; t < e; ++t )
|
|
{
|
|
int n = re->MatchPrefix(t, e - t);
|
|
if ( n >= 0 )
|
|
{
|
|
Val* idx = new StringVal(n, (const char*) t);
|
|
a->Assign(idx, 0);
|
|
Unref(idx);
|
|
t += n - 1;
|
|
}
|
|
}
|
|
|
|
return a;
|
|
%}
|
|
|
|
## Finds the last occurrence of a pattern in a string. This function returns
|
|
## the match that starts at the largest index in the string, which is not
|
|
## necessarily the longest match. For example, a pattern of ``/.*/`` will
|
|
## return the final character in the string.
|
|
##
|
|
## str: The string to inspect.
|
|
##
|
|
## re: The pattern to look for in *str*.
|
|
##
|
|
## Returns: The last string in *str* that matches *re*, or the empty string.
|
|
##
|
|
## .. bro:see: find_all strstr
|
|
function find_last%(str: string, re: pattern%) : string
|
|
%{
|
|
const u_char* s = str->Bytes();
|
|
const u_char* e = s + str->Len();
|
|
|
|
for ( const u_char* t = e - 1; t >= s; --t )
|
|
{
|
|
int n = re->MatchPrefix(t, e - t);
|
|
if ( n >= 0 )
|
|
return new StringVal(n, (const char*) t);
|
|
}
|
|
|
|
return new StringVal("");
|
|
%}
|
|
|
|
## Returns a hex dump for given input data. The hex dump renders 16 bytes per
|
|
## line, with hex on the left and ASCII (where printable)
|
|
## on the right.
|
|
##
|
|
## data_str: The string to dump in hex format.
|
|
##
|
|
## Returns: The hex dump of the given string.
|
|
##
|
|
## .. bro:see:: string_to_ascii_hex bytestring_to_hexstr
|
|
##
|
|
## .. note:: Based on Netdude's hex editor code.
|
|
##
|
|
function hexdump%(data_str: string%) : string
|
|
%{
|
|
|
|
// The width of a line of text in the hex-mode view, consisting
|
|
// of offset, hex view and ASCII view:
|
|
//
|
|
// 32 + 16 characters per 8 bytes, twice
|
|
// (2*7) + Single space between bytes, twice
|
|
// 4 + Two spaces between 8-byte sets and ASCII
|
|
// 1 + For newline
|
|
// 17 + For ASCII display, with spacer column
|
|
// 6 For 5-digit offset counter, including spacer
|
|
//
|
|
#define HEX_LINE_WIDTH 74
|
|
|
|
#define HEX_LINE_START 6
|
|
#define HEX_LINE_END 53
|
|
#define HEX_LINE_START_ASCII 56
|
|
#define HEX_LINE_START_RIGHT_ASCII 65
|
|
#define HEX_LINE_LEFT_MIDDLE 28
|
|
#define HEX_LINE_RIGHT_MIDDLE 31
|
|
#define HEX_BLOCK_LEN 23
|
|
#define HEX_LINE_BYTES 16
|
|
#define NULL_CHAR '.'
|
|
#define NONPRINT_CHAR '.'
|
|
|
|
const u_char* data = data_str->Bytes();
|
|
unsigned data_size = data_str->Len();
|
|
|
|
if ( ! data )
|
|
return new StringVal("");
|
|
|
|
int num_lines = (data_size / 16) + 1;
|
|
int len = num_lines * HEX_LINE_WIDTH;
|
|
u_char* hex_data = new u_char[len + 1];
|
|
if ( ! hex_data )
|
|
return new StringVal("");
|
|
|
|
memset(hex_data, ' ', len);
|
|
|
|
u_char* hex_data_ptr = hex_data;
|
|
u_char* ascii_ptr = hex_data_ptr + 50;
|
|
int x = 0, y = 0;
|
|
|
|
for ( const u_char* data_ptr = data; data_ptr < data + data_size;
|
|
++data_ptr )
|
|
{
|
|
if ( x == 0 )
|
|
{
|
|
char offset[5];
|
|
safe_snprintf(offset, sizeof(offset),
|
|
"%.4x", data_ptr - data);
|
|
memcpy(hex_data_ptr, offset, 4);
|
|
hex_data_ptr += 6;
|
|
ascii_ptr = hex_data_ptr + 50;
|
|
}
|
|
|
|
char hex_byte[3];
|
|
safe_snprintf(hex_byte, sizeof(hex_byte),
|
|
"%.2x", (u_char) *data_ptr);
|
|
|
|
int val = (u_char) *data_ptr;
|
|
|
|
u_char ascii_byte = val;
|
|
|
|
// If unprintable, use special characters:
|
|
if ( val < 0x20 || val >= 0x7f )
|
|
{
|
|
if ( val == 0 )
|
|
ascii_byte = NULL_CHAR;
|
|
else
|
|
ascii_byte = NONPRINT_CHAR;
|
|
}
|
|
|
|
*hex_data_ptr++ = hex_byte[0];
|
|
*hex_data_ptr++ = hex_byte[1];
|
|
*hex_data_ptr++ = ' ';
|
|
*ascii_ptr++ = ascii_byte;
|
|
|
|
if ( x == 7 )
|
|
{
|
|
*hex_data_ptr++ = ' ';
|
|
*ascii_ptr++ = ' ';
|
|
}
|
|
|
|
++x;
|
|
|
|
if ( x == 16 )
|
|
{
|
|
x = 0;
|
|
*ascii_ptr++ = '\n';
|
|
hex_data_ptr = ascii_ptr;
|
|
}
|
|
}
|
|
|
|
// Terminate the string, but ensure it ends with a newline.
|
|
if ( ascii_ptr[-1] != '\n' )
|
|
*ascii_ptr++ = '\n';
|
|
*ascii_ptr = 0;
|
|
|
|
StringVal* result = new StringVal((const char*) hex_data);
|
|
delete [] hex_data;
|
|
|
|
return result;
|
|
%}
|
|
|
|
## Returns a reversed copy of the string
|
|
##
|
|
## str: The string to reverse.
|
|
##
|
|
## Returns: A reversed copy of *str*
|
|
##
|
|
function reverse%(str: string%) : string
|
|
%{
|
|
string s = string((const char*)str->Bytes(), str->Len());
|
|
reverse(s.begin(), s.end());
|
|
return new StringVal(s.length(), (const char*)s.c_str());
|
|
%}
|