From e6871ed3e9a9257832ff0cd3ff6b6e0fc9d4c840 Mon Sep 17 00:00:00 2001 From: Tim Wojtulewicz Date: Mon, 6 Jul 2020 12:58:38 -0700 Subject: [PATCH] GH-1040: Add zero-indexed version of str_split --- scripts/base/protocols/ssh/main.zeek | 7 ++-- scripts/base/utils/addrs.zeek | 8 ++-- src/strings.bif | 42 ++++++++++++++++++- testing/btest/Baseline/bifs.split_string/out | 2 + .../btest/Baseline/language.strings/output | 8 ++++ testing/btest/bifs/split_string.zeek | 4 +- testing/btest/language/strings.zeek | 9 +++- 7 files changed, 67 insertions(+), 13 deletions(-) diff --git a/scripts/base/protocols/ssh/main.zeek b/scripts/base/protocols/ssh/main.zeek index e8f8f25e58..085ea3799a 100644 --- a/scripts/base/protocols/ssh/main.zeek +++ b/scripts/base/protocols/ssh/main.zeek @@ -242,7 +242,7 @@ event ssh_capabilities(c: connection, cookie: string, capabilities: Capabilities server_caps$mac_algorithms); c$ssh$compression_alg = find_bidirectional_alg(client_caps$compression_algorithms, server_caps$compression_algorithms); - c$ssh$kex_alg = find_alg(client_caps$kex_algorithms, server_caps$kex_algorithms); + c$ssh$kex_alg = find_alg(client_caps$kex_algorithms, server_caps$kex_algorithms); c$ssh$host_key_alg = find_alg(client_caps$server_host_key_algorithms, server_caps$server_host_key_algorithms); } @@ -288,9 +288,8 @@ function generate_fingerprint(c: connection, key: string) if ( !c?$ssh ) return; - local lx = str_split(md5_hash(key), vector(2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30)); - lx[0] = ""; - c$ssh$host_key = sub(join_string_vec(lx, ":"), /:/, ""); + local lx = str_split_indices(md5_hash(key), vector(2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30)); + c$ssh$host_key = join_string_vec(lx, ":"); } event ssh1_server_host_key(c: connection, p: string, e: string) &priority=5 diff --git a/scripts/base/utils/addrs.zeek b/scripts/base/utils/addrs.zeek index c2d8779fe6..92af26be96 100644 --- a/scripts/base/utils/addrs.zeek +++ b/scripts/base/utils/addrs.zeek @@ -126,14 +126,14 @@ function normalize_mac(a: string): string if ( |result| == 12 ) { - octets = str_split(result, vector(2, 4, 6, 8, 10)); - return fmt("%s:%s:%s:%s:%s:%s", octets[1], octets[2], octets[3], octets[4], octets[5], octets[6]); + octets = str_split_indices(result, vector(2, 4, 6, 8, 10)); + return fmt("%s:%s:%s:%s:%s:%s", octets[0], octets[1], octets[2], octets[3], octets[4], octets[5]); } if ( |result| == 16 ) { - octets = str_split(result, vector(2, 4, 6, 8, 10, 12, 14)); - return fmt("%s:%s:%s:%s:%s:%s:%s:%s", octets[1], octets[2], octets[3], octets[4], octets[5], octets[6], octets[7], octets[8]); + octets = str_split_indices(result, vector(2, 4, 6, 8, 10, 12, 14)); + return fmt("%s:%s:%s:%s:%s:%s:%s:%s", octets[0], octets[1], octets[2], octets[3], octets[4], octets[5], octets[6], octets[7]); } return ""; diff --git a/src/strings.bif b/src/strings.bif index 59369d8dd8..5a77d50049 100644 --- a/src/strings.bif +++ b/src/strings.bif @@ -700,10 +700,10 @@ function str_smith_waterman%(s1: string, s2: string, params: sw_params%) : sw_su ## ## idx: The index vector (``vector of count``) with the cutting points. ## -## Returns: A vector of strings. +## Returns: A one-indexed vector of strings. ## ## .. zeek:see:: split_string split_string1 split_string_all split_string_n -function str_split%(s: string, idx: index_vec%): string_vec +function str_split%(s: string, idx: index_vec%): string_vec &deprecated="Remove in v4.1. Use str_split_indices." %{ auto idx_v = idx->AsVector(); zeek::String::IdxVec indices(idx_v->size()); @@ -730,6 +730,44 @@ function str_split%(s: string, idx: index_vec%): string_vec return result_v; %} +## Splits a string into substrings with the help of an index vector of cutting +## points. This differs from str_split() in that it does not return an empty element +## at the beginning of the result. +## +## s: The string to split. +## +## idx: The index vector (``vector of count``) with the cutting points +## +## Returns: A zero-indexed vector of strings. +## +## .. zeek:see:: split_string split_string1 split_string_all split_string_n +function str_split_indices%(s: string, idx: index_vec%): string_vec + %{ + auto idx_v = idx->AsVector(); + zeek::String::IdxVec indices(idx_v->size()); + unsigned int i; + + for ( i = 0; i < idx_v->size(); i++ ) + indices[i] = (*idx_v)[i]->AsCount(); + + zeek::String::Vec* result = s->AsString()->Split(indices); + auto result_v = zeek::make_intrusive(zeek::id::string_vec); + + if ( result ) + { + i = 0; + + for ( zeek::String::VecIt it = result->begin(); + it != result->end(); ++it, ++i ) + result_v->Assign(i, zeek::make_intrusive(*it)); + // StringVal now possesses string. + + delete result; + } + + return result_v; + %} + ## Strips whitespace at both ends of a string. ## ## str: The string to strip the whitespace from. diff --git a/testing/btest/Baseline/bifs.split_string/out b/testing/btest/Baseline/bifs.split_string/out index 0ec2541f3d..36a3a7a887 100644 --- a/testing/btest/Baseline/bifs.split_string/out +++ b/testing/btest/Baseline/bifs.split_string/out @@ -20,6 +20,8 @@ s is a test --------------------- [, thi, s i, s a tes, t] --------------------- +[thi, s i, s a tes, t] +--------------------- X-Mailer Testing Test (http://www.example.com) --------------------- diff --git a/testing/btest/Baseline/language.strings/output b/testing/btest/Baseline/language.strings/output index 525ce64916..cc77fe9f45 100644 --- a/testing/btest/Baseline/language.strings/output +++ b/testing/btest/Baseline/language.strings/output @@ -1,3 +1,4 @@ +warning in /Users/tim/Desktop/projects/zeek/testing/btest/.tmp/language.strings/strings.zeek, line 26: deprecated (str_split): Remove in v4.1. Use str_split_indices. Input string: broisaveryneatids String splitting @@ -9,6 +10,13 @@ a very neat ids +Splitting 'broisaveryneatids' at 6 points in zero-indexed mode... +bro +is +a +very +neat +ids Substrings ---------- diff --git a/testing/btest/bifs/split_string.zeek b/testing/btest/bifs/split_string.zeek index 9692f32da5..075d7cd0e2 100644 --- a/testing/btest/bifs/split_string.zeek +++ b/testing/btest/bifs/split_string.zeek @@ -1,5 +1,5 @@ # -# @TEST-EXEC: zeek -b %INPUT >out +# @TEST-EXEC: zeek %INPUT >out # @TEST-EXEC: btest-diff out function print_string_vector(v: string_vec) @@ -26,6 +26,8 @@ event zeek_init() print "---------------------"; print str_split(a, idx); print "---------------------"; + print str_split_indices(a, idx); + print "---------------------"; a = "X-Mailer: Testing Test (http://www.example.com)"; pat = /:[[:blank:]]*/; print_string_vector(split_string1(a, pat)); diff --git a/testing/btest/language/strings.zeek b/testing/btest/language/strings.zeek index a5d8cbf69b..b5c9bfe431 100644 --- a/testing/btest/language/strings.zeek +++ b/testing/btest/language/strings.zeek @@ -1,5 +1,5 @@ # @TEST-EXEC: zeek -b %INPUT >output 2>&1 -# @TEST-EXEC: btest-diff output +# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-remove-abspath btest-diff output # Demo policy for string functions # @@ -25,6 +25,12 @@ event zeek_init() print fmt("Splitting '%s' at %d points...", s1, |idx1|); local res_split: string_vec = str_split(s1, idx1); + for ( i in res_split ) + print res_split[i]; + + print fmt("Splitting '%s' at %d points in zero-indexed mode...", s1, |idx1|); + res_split = str_split_indices(s1, idx1); + for ( i in res_split ) print res_split[i]; @@ -45,4 +51,3 @@ event zeek_init() print fmt("ids: %d", strstr(s1, "ids")); print fmt("nono: %d", strstr(s1, "nono")); } -