Merge remote-tracking branch 'origin/topic/timw/1040-str-split'

* origin/topic/timw/1040-str-split:
  GH-1040: Add zero-indexed version of str_split
This commit is contained in:
Jon Siwek 2020-07-06 21:06:41 -07:00
commit 7b15b82009
11 changed files with 76 additions and 14 deletions

View file

@ -1,4 +1,8 @@
3.2.0-dev.864 | 2020-07-06 21:06:41 -0700
* GH-1040: Add zero-indexed version of str_split (Tim Wojtulewicz, Corelight)
3.2.0-dev.862 | 2020-07-06 20:40:44 -0700 3.2.0-dev.862 | 2020-07-06 20:40:44 -0700
* GH-1041: Move compress_path to a bif that uses normalize_path (Tim Wojtulewicz, Corelight) * GH-1041: Move compress_path to a bif that uses normalize_path (Tim Wojtulewicz, Corelight)

4
NEWS
View file

@ -255,6 +255,10 @@ Deprecated Functionality
- The "BroString.h" file is deprecated, use "ZeekString.h" - The "BroString.h" file is deprecated, use "ZeekString.h"
- The str_split() BIF is deprecated, use str_split_indices(). Note
that the former returns a vector with indices starting at 1 while the
later returns a vector with indices starting at 0.
Zeek 3.1.0 Zeek 3.1.0
========== ==========

View file

@ -1 +1 @@
3.2.0-dev.862 3.2.0-dev.864

2
doc

@ -1 +1 @@
Subproject commit d2380ce1047b4f097e1f2602bfc007d9610236bf Subproject commit ec79581811902b86c54116c8e1983b647c68ca69

View file

@ -288,9 +288,8 @@ function generate_fingerprint(c: connection, key: string)
if ( !c?$ssh ) if ( !c?$ssh )
return; return;
local lx = str_split(md5_hash(key), vector(2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30)); local lx = str_split_indices(md5_hash(key), vector(2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30));
lx[0] = ""; c$ssh$host_key = join_string_vec(lx, ":");
c$ssh$host_key = sub(join_string_vec(lx, ":"), /:/, "");
} }
event ssh1_server_host_key(c: connection, p: string, e: string) &priority=5 event ssh1_server_host_key(c: connection, p: string, e: string) &priority=5

View file

@ -126,14 +126,14 @@ function normalize_mac(a: string): string
if ( |result| == 12 ) if ( |result| == 12 )
{ {
octets = str_split(result, vector(2, 4, 6, 8, 10)); octets = str_split_indices(result, vector(2, 4, 6, 8, 10));
return fmt("%s:%s:%s:%s:%s:%s", octets[1], octets[2], octets[3], octets[4], octets[5], octets[6]); return fmt("%s:%s:%s:%s:%s:%s", octets[0], octets[1], octets[2], octets[3], octets[4], octets[5]);
} }
if ( |result| == 16 ) if ( |result| == 16 )
{ {
octets = str_split(result, vector(2, 4, 6, 8, 10, 12, 14)); octets = str_split_indices(result, vector(2, 4, 6, 8, 10, 12, 14));
return fmt("%s:%s:%s:%s:%s:%s:%s:%s", octets[1], octets[2], octets[3], octets[4], octets[5], octets[6], octets[7], octets[8]); return fmt("%s:%s:%s:%s:%s:%s:%s:%s", octets[0], octets[1], octets[2], octets[3], octets[4], octets[5], octets[6], octets[7]);
} }
return ""; return "";

View file

@ -700,10 +700,10 @@ function str_smith_waterman%(s1: string, s2: string, params: sw_params%) : sw_su
## ##
## idx: The index vector (``vector of count``) with the cutting points. ## idx: The index vector (``vector of count``) with the cutting points.
## ##
## Returns: A vector of strings. ## Returns: A one-indexed vector of strings.
## ##
## .. zeek:see:: split_string split_string1 split_string_all split_string_n ## .. zeek:see:: split_string split_string1 split_string_all split_string_n
function str_split%(s: string, idx: index_vec%): string_vec function str_split%(s: string, idx: index_vec%): string_vec &deprecated="Remove in v4.1. Use str_split_indices."
%{ %{
auto idx_v = idx->AsVector(); auto idx_v = idx->AsVector();
zeek::String::IdxVec indices(idx_v->size()); zeek::String::IdxVec indices(idx_v->size());
@ -730,6 +730,44 @@ function str_split%(s: string, idx: index_vec%): string_vec
return result_v; return result_v;
%} %}
## Splits a string into substrings with the help of an index vector of cutting
## points. This differs from str_split() in that it does not return an empty element
## at the beginning of the result.
##
## s: The string to split.
##
## idx: The index vector (``vector of count``) with the cutting points
##
## Returns: A zero-indexed vector of strings.
##
## .. zeek:see:: split_string split_string1 split_string_all split_string_n
function str_split_indices%(s: string, idx: index_vec%): string_vec
%{
auto idx_v = idx->AsVector();
zeek::String::IdxVec indices(idx_v->size());
unsigned int i;
for ( i = 0; i < idx_v->size(); i++ )
indices[i] = (*idx_v)[i]->AsCount();
zeek::String::Vec* result = s->AsString()->Split(indices);
auto result_v = zeek::make_intrusive<zeek::VectorVal>(zeek::id::string_vec);
if ( result )
{
i = 0;
for ( zeek::String::VecIt it = result->begin();
it != result->end(); ++it, ++i )
result_v->Assign(i, zeek::make_intrusive<zeek::StringVal>(*it));
// StringVal now possesses string.
delete result;
}
return result_v;
%}
## Strips whitespace at both ends of a string. ## Strips whitespace at both ends of a string.
## ##
## str: The string to strip the whitespace from. ## str: The string to strip the whitespace from.

View file

@ -20,6 +20,8 @@ s is a test
--------------------- ---------------------
[, thi, s i, s a tes, t] [, thi, s i, s a tes, t]
--------------------- ---------------------
[thi, s i, s a tes, t]
---------------------
X-Mailer X-Mailer
Testing Test (http://www.example.com) Testing Test (http://www.example.com)
--------------------- ---------------------

View file

@ -1,3 +1,4 @@
warning in /Users/tim/Desktop/projects/zeek/testing/btest/.tmp/language.strings/strings.zeek, line 26: deprecated (str_split): Remove in v4.1. Use str_split_indices.
Input string: broisaveryneatids Input string: broisaveryneatids
String splitting String splitting
@ -9,6 +10,13 @@ a
very very
neat neat
ids ids
Splitting 'broisaveryneatids' at 6 points in zero-indexed mode...
bro
is
a
very
neat
ids
Substrings Substrings
---------- ----------

View file

@ -26,6 +26,8 @@ event zeek_init()
print "---------------------"; print "---------------------";
print str_split(a, idx); print str_split(a, idx);
print "---------------------"; print "---------------------";
print str_split_indices(a, idx);
print "---------------------";
a = "X-Mailer: Testing Test (http://www.example.com)"; a = "X-Mailer: Testing Test (http://www.example.com)";
pat = /:[[:blank:]]*/; pat = /:[[:blank:]]*/;
print_string_vector(split_string1(a, pat)); print_string_vector(split_string1(a, pat));

View file

@ -1,5 +1,5 @@
# @TEST-EXEC: zeek -b %INPUT >output 2>&1 # @TEST-EXEC: zeek -b %INPUT >output 2>&1
# @TEST-EXEC: btest-diff output # @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-remove-abspath btest-diff output
# Demo policy for string functions # Demo policy for string functions
# #
@ -25,6 +25,12 @@ event zeek_init()
print fmt("Splitting '%s' at %d points...", s1, |idx1|); print fmt("Splitting '%s' at %d points...", s1, |idx1|);
local res_split: string_vec = str_split(s1, idx1); local res_split: string_vec = str_split(s1, idx1);
for ( i in res_split )
print res_split[i];
print fmt("Splitting '%s' at %d points in zero-indexed mode...", s1, |idx1|);
res_split = str_split_indices(s1, idx1);
for ( i in res_split ) for ( i in res_split )
print res_split[i]; print res_split[i];
@ -45,4 +51,3 @@ event zeek_init()
print fmt("ids: %d", strstr(s1, "ids")); print fmt("ids: %d", strstr(s1, "ids"));
print fmt("nono: %d", strstr(s1, "nono")); print fmt("nono: %d", strstr(s1, "nono"));
} }