Merge remote-tracking branch 'origin/topic/timw/1040-str-split'

* origin/topic/timw/1040-str-split: GH-1040: Add zero-indexed version of str_split
2025-10-02 14:48:21 +00:00 · 2020-07-06 21:06:41 -07:00 · 2020-07-06 21:06:41 -07:00 · 7b15b82009
commit 7b15b82009
parent 09483619ef e6871ed3e9
11 changed files with 76 additions and 14 deletions
--- a/4
+++ b/4
@ -1,4 +1,8 @@
 3.2.0-dev.864 | 2020-07-06 21:06:41 -0700
  * GH-1040: Add zero-indexed version of str_split (Tim Wojtulewicz, Corelight)
 3.2.0-dev.862 | 2020-07-06 20:40:44 -0700
  * GH-1041: Move compress_path to a bif that uses normalize_path (Tim Wojtulewicz, Corelight)
--- a/4
+++ b/4
@ -255,6 +255,10 @@ Deprecated Functionality
 - The "BroString.h" file is deprecated, use "ZeekString.h"
 - The str_split() BIF is deprecated, use str_split_indices().  Note
  that the former returns a vector with indices starting at 1 while the
  later returns a vector with indices starting at 0.
 Zeek 3.1.0
 ==========
--- a/2
+++ b/2
@ -1 +1 @@
-3.2.0-dev.862
+3.2.0-dev.864
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit d2380ce1047b4f097e1f2602bfc007d9610236bf
+Subproject commit ec79581811902b86c54116c8e1983b647c68ca69
--- a/scripts/base/protocols/ssh/main.zeek
+++ b/scripts/base/protocols/ssh/main.zeek
@ -242,7 +242,7 @@ event ssh_capabilities(c: connection, cookie: string, capabilities: Capabilities
 	                                               server_caps$mac_algorithms);
 	c$ssh$compression_alg = find_bidirectional_alg(client_caps$compression_algorithms,
 	                                               server_caps$compression_algorithms);
-	c$ssh$kex_alg         = find_alg(client_caps$kex_algorithms, server_caps$kex_algorithms);	
+	c$ssh$kex_alg         = find_alg(client_caps$kex_algorithms, server_caps$kex_algorithms);
 	c$ssh$host_key_alg    = find_alg(client_caps$server_host_key_algorithms,
 	                                 server_caps$server_host_key_algorithms);
 	}
@ -288,9 +288,8 @@ function generate_fingerprint(c: connection, key: string)
 	if ( !c?$ssh )
 		return;
-	local lx = str_split(md5_hash(key), vector(2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30));
+	local lx = str_split_indices(md5_hash(key), vector(2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30));
-	lx[0] = "";
+	c$ssh$host_key = join_string_vec(lx, ":");
 	c$ssh$host_key = sub(join_string_vec(lx, ":"), /:/, "");
 	}
 event ssh1_server_host_key(c: connection, p: string, e: string) &priority=5
--- a/scripts/base/utils/addrs.zeek
+++ b/scripts/base/utils/addrs.zeek
@ -126,14 +126,14 @@ function normalize_mac(a: string): string
 	if ( |result| == 12 )
 		{
-		octets = str_split(result, vector(2, 4, 6, 8, 10));
+		octets = str_split_indices(result, vector(2, 4, 6, 8, 10));
-		return fmt("%s:%s:%s:%s:%s:%s", octets[1], octets[2], octets[3], octets[4], octets[5], octets[6]);
+		return fmt("%s:%s:%s:%s:%s:%s", octets[0], octets[1], octets[2], octets[3], octets[4], octets[5]);
 		}
 	if ( |result| == 16 )
 		{
-		octets = str_split(result, vector(2, 4, 6, 8, 10, 12, 14));
+		octets = str_split_indices(result, vector(2, 4, 6, 8, 10, 12, 14));
-		return fmt("%s:%s:%s:%s:%s:%s:%s:%s", octets[1], octets[2], octets[3], octets[4], octets[5], octets[6], octets[7], octets[8]);
+		return fmt("%s:%s:%s:%s:%s:%s:%s:%s", octets[0], octets[1], octets[2], octets[3], octets[4], octets[5], octets[6], octets[7]);
 		}
 	return "";
--- a/src/strings.bif
+++ b/src/strings.bif
@ -700,10 +700,10 @@ function str_smith_waterman%(s1: string, s2: string, params: sw_params%) : sw_su
 ##
 ## idx: The index vector (``vector of count``) with the cutting points.
 ##
-## Returns: A vector of strings.
+## Returns: A one-indexed vector of strings.
 ##
 ## .. zeek:see:: split_string split_string1 split_string_all split_string_n
-function str_split%(s: string, idx: index_vec%): string_vec
+function str_split%(s: string, idx: index_vec%): string_vec &deprecated="Remove in v4.1. Use str_split_indices."
 	%{
 	auto idx_v = idx->AsVector();
 	zeek::String::IdxVec indices(idx_v->size());
@ -730,6 +730,44 @@ function str_split%(s: string, idx: index_vec%): string_vec
 	return result_v;
 	%}
 ## Splits a string into substrings with the help of an index vector of cutting
 ## points. This differs from str_split() in that it does not return an empty element
 ## at the beginning of the result.
 ##
 ## s: The string to split.
 ##
 ## idx: The index vector (``vector of count``) with the cutting points
 ##
 ## Returns: A zero-indexed vector of strings.
 ##
 ## .. zeek:see:: split_string split_string1 split_string_all split_string_n
 function str_split_indices%(s: string, idx: index_vec%): string_vec
 	%{
 	auto idx_v = idx->AsVector();
 	zeek::String::IdxVec indices(idx_v->size());
 	unsigned int i;
 	for ( i = 0; i < idx_v->size(); i++ )
 		indices[i] = (*idx_v)[i]->AsCount();
 	zeek::String::Vec* result = s->AsString()->Split(indices);
 	auto result_v = zeek::make_intrusive<zeek::VectorVal>(zeek::id::string_vec);
 	if ( result )
 		{
 		i = 0;
 		for ( zeek::String::VecIt it = result->begin();
 		      it != result->end(); ++it, ++i )
 			result_v->Assign(i, zeek::make_intrusive<zeek::StringVal>(*it));
 			// StringVal now possesses string.
 		delete result;
 		}
 	return result_v;
 	%}
 ## Strips whitespace at both ends of a string.
 ##
 ## str: The string to strip the whitespace from.
--- a/testing/btest/Baseline/bifs.split_string/out
+++ b/testing/btest/Baseline/bifs.split_string/out
@ -20,6 +20,8 @@ s is a test
 ---------------------
 [, thi, s i, s a tes, t]
 ---------------------
 [thi, s i, s a tes, t]
 ---------------------
 X-Mailer
 Testing Test (http://www.example.com)
 ---------------------
--- a/testing/btest/Baseline/language.strings/output
+++ b/testing/btest/Baseline/language.strings/output
@ -1,3 +1,4 @@
 warning in /Users/tim/Desktop/projects/zeek/testing/btest/.tmp/language.strings/strings.zeek, line 26: deprecated (str_split): Remove in v4.1. Use str_split_indices.
 Input string: broisaveryneatids
 String splitting
@ -9,6 +10,13 @@ a
 very
 neat
 ids
 Splitting 'broisaveryneatids' at 6 points in zero-indexed mode...
 bro
 is
 a
 very
 neat
 ids
 Substrings
 ----------
--- a/testing/btest/bifs/split_string.zeek
+++ b/testing/btest/bifs/split_string.zeek
@ -26,6 +26,8 @@ event zeek_init()
 	print "---------------------";
 	print str_split(a, idx);
 	print "---------------------";
 	print str_split_indices(a, idx);
 	print "---------------------";
 	a = "X-Mailer: Testing Test (http://www.example.com)";
 	pat = /:[[:blank:]]*/;
 	print_string_vector(split_string1(a, pat));
--- a/testing/btest/language/strings.zeek
+++ b/testing/btest/language/strings.zeek
@ -1,5 +1,5 @@
 # @TEST-EXEC: zeek -b %INPUT  >output 2>&1
-# @TEST-EXEC: btest-diff output
+# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-remove-abspath btest-diff output
 # Demo policy for string functions
 #
@ -25,6 +25,12 @@ event zeek_init()
 	print fmt("Splitting '%s' at %d points...", s1, |idx1|);
 	local res_split: string_vec = str_split(s1, idx1);
 	for ( i in res_split )
 		print res_split[i];
 	print fmt("Splitting '%s' at %d points in zero-indexed mode...", s1, |idx1|);
 	res_split = str_split_indices(s1, idx1);
 	for ( i in res_split )
 		print res_split[i];
@ -45,4 +51,3 @@ event zeek_init()
 	print fmt("ids: %d", strstr(s1, "ids"));
 	print fmt("nono: %d", strstr(s1, "nono"));
 }
		`@ -1 +1 @@`
			`Subproject commit d2380ce1047b4f097e1f2602bfc007d9610236bf`				`Subproject commit ec79581811902b86c54116c8e1983b647c68ca69`