diff --git a/scripts/base/init-bare.zeek b/scripts/base/init-bare.zeek index 792dffc549..e94d4db1ef 100644 --- a/scripts/base/init-bare.zeek +++ b/scripts/base/init-bare.zeek @@ -5572,6 +5572,10 @@ const bits_per_uid: count = 96 &redef; ## to generate installation-unique file IDs (the *id* field of :zeek:see:`fa_file`). const digest_salt = "Please change this value." &redef; +## Maximum string length allowed for calls to the :zeek:see:`find_all` and +## :zeek:see:`find_all_ordered` BIFs. +const max_find_all_string_length: int = 10000 &redef; + global done_with_network = F; event net_done(t: time) { done_with_network = T; } diff --git a/src/strings.bif b/src/strings.bif index 7dc090f2f8..1144bae14f 100644 --- a/src/strings.bif +++ b/src/strings.bif @@ -967,19 +967,54 @@ function safe_shell_quote%(source: string%): string return zeek::make_intrusive(new zeek::String(1, dst, j)); %} +%%{ +static bool exceeds_max_string_length(int str_len, int max_size, zeek::detail::Frame* frame) + { + bool using_constant = false; + if ( max_size < 0 ) + { + static auto max_find_all_string_length = zeek::id::find_val("max_find_all_string_length"); + max_size = max_find_all_string_length->Get(); + using_constant = true; + } + + if ( max_size > 0 && str_len > max_size ) + { + zeek::ODesc desc; + frame->GetCallLocation()->Describe(&desc); + std::string addl = zeek::util::fmt("%s: length %d exceeded %d", desc.Description(), str_len, max_size); + if ( using_constant ) + addl.append("(from constant max_find_all_string_length"); + + zeek::reporter->Weird("max_find_all_string_length_exceeded", addl.c_str()); + return true; + } + + return false; + } +%%} + ## Finds all occurrences of a pattern in a string. ## ## str: The string to inspect. ## ## re: The pattern to look for in *str*. ## +## max_str_size: The maximum string size allowed as input. If set to -1, this will use the +## :zeek:see:`max_find_all_string_length` global constant. If set to 0, this +## check is disabled. If the length of `str` is greater than this size, an +## empty set is returned. +## ## Returns: The set of strings in *str* that match *re*, or the empty set. ## ## .. zeek:see: find_all_ordered find_last strstr -function find_all%(str: string, re: pattern%) : string_set +function find_all%(str: string, re: pattern, max_str_size: int &default=-1%) : string_set %{ auto a = zeek::make_intrusive(zeek::id::string_set); + if ( exceeds_max_string_length(str->Len(), max_str_size, frame) ) + return a; + const u_char* s = str->Bytes(); const u_char* e = s + str->Len(); @@ -1005,13 +1040,21 @@ function find_all%(str: string, re: pattern%) : string_set ## ## re: The pattern to look for in *str*. ## +## max_str_size: The maximum string size allowed as input. If set to -1, this will use the +## :zeek:see:`max_find_all_string_length` global constant. If set to 0, this +## check is disabled. If the length of `str` is greater than this size, an +## empty set is returned. +## ## Returns: All strings in *str* that match *re*, or an empty vector. ## ## .. zeek:see: find_all find_last strstr -function find_all_ordered%(str: string, re: pattern%) : string_vec +function find_all_ordered%(str: string, re: pattern, max_str_size: int &default=-1%) : string_vec %{ auto a = zeek::make_intrusive(zeek::id::string_vec); + if ( exceeds_max_string_length(str->Len(), max_str_size, frame) ) + return a; + const u_char* s = str->Bytes(); const u_char* e = s + str->Len(); diff --git a/testing/btest/Baseline/bifs.find_all/out b/testing/btest/Baseline/bifs.find_all/out index 5dc6829d3a..edae6409cf 100644 --- a/testing/btest/Baseline/bifs.find_all/out +++ b/testing/btest/Baseline/bifs.find_all/out @@ -3,3 +3,4 @@ hi es ------------------- 0 +0 diff --git a/testing/btest/Baseline/bifs.find_all/weird.log b/testing/btest/Baseline/bifs.find_all/weird.log new file mode 100644 index 0000000000..21fc23cb74 --- /dev/null +++ b/testing/btest/Baseline/bifs.find_all/weird.log @@ -0,0 +1,11 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +#separator \x09 +#set_separator , +#empty_field (empty) +#unset_field - +#path weird +#open XXXX-XX-XX-XX-XX-XX +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p name addl notice peer source +#types time string addr port addr port string string bool string string +XXXXXXXXXX.XXXXXX - - - - - max_find_all_string_length_exceeded <...>/find_all.zeek, line 22: length 14 exceeded 5 F zeek - +#close XXXX-XX-XX-XX-XX-XX diff --git a/testing/btest/Baseline/bifs.find_all_ordered/out b/testing/btest/Baseline/bifs.find_all_ordered/out index 3b2a0f0a88..8bb29c37c3 100644 --- a/testing/btest/Baseline/bifs.find_all_ordered/out +++ b/testing/btest/Baseline/bifs.find_all_ordered/out @@ -7,3 +7,4 @@ [foo] [bar, foo] [] +[] diff --git a/testing/btest/Baseline/bifs.find_all_ordered/weird.log b/testing/btest/Baseline/bifs.find_all_ordered/weird.log new file mode 100644 index 0000000000..77f889b7e2 --- /dev/null +++ b/testing/btest/Baseline/bifs.find_all_ordered/weird.log @@ -0,0 +1,11 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +#separator \x09 +#set_separator , +#empty_field (empty) +#unset_field - +#path weird +#open XXXX-XX-XX-XX-XX-XX +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p name addl notice peer source +#types time string addr port addr port string string bool string string +XXXXXXXXXX.XXXXXX - - - - - max_find_all_string_length_exceeded <...>/find_all_ordered.zeek, line 24: length 14 exceeded 5 F zeek - +#close XXXX-XX-XX-XX-XX-XX diff --git a/testing/btest/bifs/find_all.zeek b/testing/btest/bifs/find_all.zeek index c51086ade0..4f9c486007 100644 --- a/testing/btest/bifs/find_all.zeek +++ b/testing/btest/bifs/find_all.zeek @@ -1,6 +1,8 @@ -# # @TEST-EXEC: zeek -b %INPUT >out # @TEST-EXEC: btest-diff out +# @TEST-EXEC: TEST_DIFF_CANONIFIER='$SCRIPTS/diff-canonifier | $SCRIPTS/diff-remove-abspath' btest-diff weird.log + +@load base/frameworks/notice/weird event zeek_init() { @@ -15,4 +17,8 @@ event zeek_init() print i; print "-------------------"; print |b2|; + + # Test input string length limiting. + local b3 = find_all(a, pat, 5); + print |b3|; } diff --git a/testing/btest/bifs/find_all_ordered.zeek b/testing/btest/bifs/find_all_ordered.zeek index 90d2cfa4f3..f8fa79818b 100644 --- a/testing/btest/bifs/find_all_ordered.zeek +++ b/testing/btest/bifs/find_all_ordered.zeek @@ -1,5 +1,8 @@ # @TEST-EXEC: zeek -b %INPUT >out # @TEST-EXEC: btest-diff out +# @TEST-EXEC: TEST_DIFF_CANONIFIER='$SCRIPTS/diff-canonifier | $SCRIPTS/diff-remove-abspath' btest-diff weird.log + +@load base/frameworks/notice/weird event zeek_init() { @@ -16,4 +19,8 @@ event zeek_init() for ( i in v ) print find_all_ordered(v[i], pat); + + # Test input string length limiting. + local b = find_all_ordered(v[0], pat, 5); + print b; } diff --git a/testing/btest/scripts/base/frameworks/telemetry/cluster.zeek b/testing/btest/scripts/base/frameworks/telemetry/cluster.zeek index 0ba192e7d9..df9ae29b32 100644 --- a/testing/btest/scripts/base/frameworks/telemetry/cluster.zeek +++ b/testing/btest/scripts/base/frameworks/telemetry/cluster.zeek @@ -39,7 +39,7 @@ event run_test() # from the response. Not sure how that's helpful. We simply # grep out the zeek_version_info{...} endpoint="..." pieces and # expect one for each node to exist as a smoke test. - local version_infos = find_all(response$body, /zeek_version_info\{[^}]+\}/); + local version_infos = find_all(response$body, /zeek_version_info\{[^}]+\}/, 0); local endpoints: vector of string; for ( info in version_infos ) for ( ep in find_all(info, /endpoint=\"[^"]+\"/))