Merge remote-tracking branch 'origin/topic/timw/1040-str-split'

* origin/topic/timw/1040-str-split:
  GH-1040: Add zero-indexed version of str_split
This commit is contained in:
Jon Siwek 2020-07-06 21:06:41 -07:00
commit 7b15b82009
11 changed files with 76 additions and 14 deletions

View file

@ -1,4 +1,8 @@
3.2.0-dev.864 | 2020-07-06 21:06:41 -0700
* GH-1040: Add zero-indexed version of str_split (Tim Wojtulewicz, Corelight)
3.2.0-dev.862 | 2020-07-06 20:40:44 -0700
* GH-1041: Move compress_path to a bif that uses normalize_path (Tim Wojtulewicz, Corelight)

4
NEWS
View file

@ -255,6 +255,10 @@ Deprecated Functionality
- The "BroString.h" file is deprecated, use "ZeekString.h"
- The str_split() BIF is deprecated, use str_split_indices(). Note
that the former returns a vector with indices starting at 1 while the
later returns a vector with indices starting at 0.
Zeek 3.1.0
==========

View file

@ -1 +1 @@
3.2.0-dev.862
3.2.0-dev.864

2
doc

@ -1 +1 @@
Subproject commit d2380ce1047b4f097e1f2602bfc007d9610236bf
Subproject commit ec79581811902b86c54116c8e1983b647c68ca69

View file

@ -288,9 +288,8 @@ function generate_fingerprint(c: connection, key: string)
if ( !c?$ssh )
return;
local lx = str_split(md5_hash(key), vector(2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30));
lx[0] = "";
c$ssh$host_key = sub(join_string_vec(lx, ":"), /:/, "");
local lx = str_split_indices(md5_hash(key), vector(2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30));
c$ssh$host_key = join_string_vec(lx, ":");
}
event ssh1_server_host_key(c: connection, p: string, e: string) &priority=5

View file

@ -126,14 +126,14 @@ function normalize_mac(a: string): string
if ( |result| == 12 )
{
octets = str_split(result, vector(2, 4, 6, 8, 10));
return fmt("%s:%s:%s:%s:%s:%s", octets[1], octets[2], octets[3], octets[4], octets[5], octets[6]);
octets = str_split_indices(result, vector(2, 4, 6, 8, 10));
return fmt("%s:%s:%s:%s:%s:%s", octets[0], octets[1], octets[2], octets[3], octets[4], octets[5]);
}
if ( |result| == 16 )
{
octets = str_split(result, vector(2, 4, 6, 8, 10, 12, 14));
return fmt("%s:%s:%s:%s:%s:%s:%s:%s", octets[1], octets[2], octets[3], octets[4], octets[5], octets[6], octets[7], octets[8]);
octets = str_split_indices(result, vector(2, 4, 6, 8, 10, 12, 14));
return fmt("%s:%s:%s:%s:%s:%s:%s:%s", octets[0], octets[1], octets[2], octets[3], octets[4], octets[5], octets[6], octets[7]);
}
return "";

View file

@ -700,10 +700,10 @@ function str_smith_waterman%(s1: string, s2: string, params: sw_params%) : sw_su
##
## idx: The index vector (``vector of count``) with the cutting points.
##
## Returns: A vector of strings.
## Returns: A one-indexed vector of strings.
##
## .. zeek:see:: split_string split_string1 split_string_all split_string_n
function str_split%(s: string, idx: index_vec%): string_vec
function str_split%(s: string, idx: index_vec%): string_vec &deprecated="Remove in v4.1. Use str_split_indices."
%{
auto idx_v = idx->AsVector();
zeek::String::IdxVec indices(idx_v->size());
@ -730,6 +730,44 @@ function str_split%(s: string, idx: index_vec%): string_vec
return result_v;
%}
## Splits a string into substrings with the help of an index vector of cutting
## points. This differs from str_split() in that it does not return an empty element
## at the beginning of the result.
##
## s: The string to split.
##
## idx: The index vector (``vector of count``) with the cutting points
##
## Returns: A zero-indexed vector of strings.
##
## .. zeek:see:: split_string split_string1 split_string_all split_string_n
function str_split_indices%(s: string, idx: index_vec%): string_vec
%{
auto idx_v = idx->AsVector();
zeek::String::IdxVec indices(idx_v->size());
unsigned int i;
for ( i = 0; i < idx_v->size(); i++ )
indices[i] = (*idx_v)[i]->AsCount();
zeek::String::Vec* result = s->AsString()->Split(indices);
auto result_v = zeek::make_intrusive<zeek::VectorVal>(zeek::id::string_vec);
if ( result )
{
i = 0;
for ( zeek::String::VecIt it = result->begin();
it != result->end(); ++it, ++i )
result_v->Assign(i, zeek::make_intrusive<zeek::StringVal>(*it));
// StringVal now possesses string.
delete result;
}
return result_v;
%}
## Strips whitespace at both ends of a string.
##
## str: The string to strip the whitespace from.

View file

@ -20,6 +20,8 @@ s is a test
---------------------
[, thi, s i, s a tes, t]
---------------------
[thi, s i, s a tes, t]
---------------------
X-Mailer
Testing Test (http://www.example.com)
---------------------

View file

@ -1,3 +1,4 @@
warning in /Users/tim/Desktop/projects/zeek/testing/btest/.tmp/language.strings/strings.zeek, line 26: deprecated (str_split): Remove in v4.1. Use str_split_indices.
Input string: broisaveryneatids
String splitting
@ -9,6 +10,13 @@ a
very
neat
ids
Splitting 'broisaveryneatids' at 6 points in zero-indexed mode...
bro
is
a
very
neat
ids
Substrings
----------

View file

@ -26,6 +26,8 @@ event zeek_init()
print "---------------------";
print str_split(a, idx);
print "---------------------";
print str_split_indices(a, idx);
print "---------------------";
a = "X-Mailer: Testing Test (http://www.example.com)";
pat = /:[[:blank:]]*/;
print_string_vector(split_string1(a, pat));

View file

@ -1,5 +1,5 @@
# @TEST-EXEC: zeek -b %INPUT >output 2>&1
# @TEST-EXEC: btest-diff output
# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-remove-abspath btest-diff output
# Demo policy for string functions
#
@ -25,6 +25,12 @@ event zeek_init()
print fmt("Splitting '%s' at %d points...", s1, |idx1|);
local res_split: string_vec = str_split(s1, idx1);
for ( i in res_split )
print res_split[i];
print fmt("Splitting '%s' at %d points in zero-indexed mode...", s1, |idx1|);
res_split = str_split_indices(s1, idx1);
for ( i in res_split )
print res_split[i];
@ -45,4 +51,3 @@ event zeek_init()
print fmt("ids: %d", strstr(s1, "ids"));
print fmt("nono: %d", strstr(s1, "nono"));
}