diff --git a/CHANGES b/CHANGES index 1fe0fb74d1..e740d60b25 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,38 @@ +7.2.0-dev.210 | 2025-02-20 15:35:21 -0700 + + * Also trim trailing spaces in `to_count`/`to_int` inputs (Benjamin Bannier, Corelight) + + Previously we would already trim leading spaces in inputs to `to_count` + and `to_int`, effectively by just passing the behavior of the low-level + functions used in their implementations to the user. While this was + useful it was also inconsistent in that we did not allow trailing + spaces which we enable with this patch. + + * Align error handling in `to_int` with existing behavior of `to_count` (Benjamin Bannier, Corelight) + + Previously `to_int` would silently ignore invalid inputs and simply + return `0` while `to_count` would return an error; this patch changes + `to_int` to behave like `to_count`. + + This introduces a breaking change in that `to_int` now raises an error + for trailing spaces (but still accepts leading spaces) where it + previously would have silently accepted it. This is consistent with + the behavior of `to_count`, but one could also argue that both of + these should only accept properly trimmed input; I did not go that route + since that would introduce breaking changes for both these functions + instead of for just one of them. + + * Baseline handling of leading/trailing spaces in `to_count`/`to_int`. (Benjamin Bannier, Corelight) + + Currently `to_count` reports an error for trailing spaces (but not for + leading ones) while `to_int` silently accepts them. This patch adds + baselines capture the current behavior. + + * Add error messages to `to_count`/`to_int` baselines (Benjamin Bannier, Corelight) + + This captures error messages produced by `to_count`, but will also + baseline future error messages from `to_int` once we introduce them. + 7.2.0-dev.203 | 2025-02-18 08:51:23 -0700 * cirrus: Bump FreeBSD 14 task to 14.2 (Arne Welzel, Corelight) diff --git a/VERSION b/VERSION index 7b5c646cb1..1204c0ebf3 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -7.2.0-dev.205 +7.2.0-dev.210 diff --git a/src/zeek.bif b/src/zeek.bif index 628ed61aec..b96dfc842a 100644 --- a/src/zeek.bif +++ b/src/zeek.bif @@ -7,14 +7,16 @@ ##! You'll find most of Zeek's built-in functions that aren't protocol-specific ##! in this file. -%%{ // C segment -#include -#include +%%{ // C++ segment +#include + #include #include -#include +#include #include +#include #include +#include #include "zeek/digest.h" #include "zeek/Reporter.h" @@ -2613,12 +2615,11 @@ function to_int%(str: string%): int zeek_int_t i = strtoll(s, &end_s, 10); -#if 0 - // Not clear we should complain. For example, is " 205 " - // a legal conversion? - if ( s[0] == '\0' || end_s[0] != '\0' ) + if ( s[0] == '\0' || std::any_of(static_cast(end_s), s + ::strlen(s), + [](char c) { return ! (c == '\0' || ::isspace(c)); }) ) + { zeek::emit_builtin_error("bad conversion to integer", @ARG@[0]); -#endif + } return zeek::val_mgr->Int(i); %} @@ -2680,13 +2681,14 @@ function to_count%(str: string%): count const char* s = str->CheckString(); char* end_s; - uint64_t u = (uint64_t) strtoull(s, &end_s, 10); + uint64_t u = static_cast(strtoull(s, &end_s, 10)); - if ( s[0] == '\0' || end_s[0] != '\0' ) - { + if ( s[0] == '\0' || std::any_of(static_cast(end_s), s + ::strlen(s), + [](char c) { return ! (c == '\0' || ::isspace(c)); }) ) + { zeek::emit_builtin_error("bad conversion to count", @ARG@[0]); - u = 0; - } + u = 0; + } return zeek::val_mgr->Count(u); %} diff --git a/testing/btest/Baseline/bifs.to_count/err b/testing/btest/Baseline/bifs.to_count/err new file mode 100644 index 0000000000..1b96f363d0 --- /dev/null +++ b/testing/btest/Baseline/bifs.to_count/err @@ -0,0 +1,4 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +error in <...>/to_count.zeek, line 8: bad conversion to count (int_to_count(a) and -2) +error in <...>/to_count.zeek, line 20: bad conversion to count (to_count() and ) +error in <...>/to_count.zeek, line 25: bad conversion to count (to_count(not a count) and not a count) diff --git a/testing/btest/Baseline/bifs.to_count/out b/testing/btest/Baseline/bifs.to_count/out index 0c99f6e402..d727a32bbe 100644 --- a/testing/btest/Baseline/bifs.to_count/out +++ b/testing/btest/Baseline/bifs.to_count/out @@ -6,6 +6,8 @@ 7 0 18446744073709551611 +205 +206 0 123 9223372036854775808 and 9223372036854775808 are the same diff --git a/testing/btest/Baseline/bifs.to_int/err b/testing/btest/Baseline/bifs.to_int/err new file mode 100644 index 0000000000..1e9d23dcb2 --- /dev/null +++ b/testing/btest/Baseline/bifs.to_int/err @@ -0,0 +1,2 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +error in <...>/to_int.zeek, line 10: bad conversion to integer (to_int(not an int) and not an int) diff --git a/testing/btest/Baseline/bifs.to_int/out b/testing/btest/Baseline/bifs.to_int/out index c0c6d56ef5..7ba7c89957 100644 --- a/testing/btest/Baseline/bifs.to_int/out +++ b/testing/btest/Baseline/bifs.to_int/out @@ -3,6 +3,8 @@ -1 4294967296 0 +205 +206 3 4 -3 diff --git a/testing/btest/bifs/to_count.zeek b/testing/btest/bifs/to_count.zeek index 7489ca8b79..1e1ed72c2e 100644 --- a/testing/btest/bifs/to_count.zeek +++ b/testing/btest/bifs/to_count.zeek @@ -1,6 +1,6 @@ -# -# @TEST-EXEC: zeek -b %INPUT >out +# @TEST-EXEC: zeek -b %INPUT 1>out 2>err # @TEST-EXEC: btest-diff out +# @TEST-EXEC: TEST_DIFF_CANONIFIER=${SCRIPTS}/diff-remove-abspath btest-diff err event zeek_init() { @@ -19,17 +19,20 @@ event zeek_init() print to_count("7"); print to_count(""); print to_count("-5"); + # We automatically trim leading, but not trailing whitespace. + print to_count(" 205"); # Okay. + print to_count("206 "); # Error. print to_count("not a count"); local e: port = 123/tcp; print port_to_count(e); - local origString = "9223372036854775808"; + local origString = "9223372036854775808"; local directCount: count = 9223372036854775808; local fromStringCount: count = to_count(origString); if ( directCount == fromStringCount ) - print fmt("%s and %s are the same", directCount, fromStringCount); + print fmt("%s and %s are the same", directCount, fromStringCount); else - print fmt("%s and %s are not the same", directCount, fromStringCount); + print fmt("%s and %s are not the same", directCount, fromStringCount); } diff --git a/testing/btest/bifs/to_int.zeek b/testing/btest/bifs/to_int.zeek index 17e433f975..82d789dcea 100644 --- a/testing/btest/bifs/to_int.zeek +++ b/testing/btest/bifs/to_int.zeek @@ -1,6 +1,6 @@ -# -# @TEST-EXEC: zeek -b %INPUT >out +# @TEST-EXEC: zeek -b %INPUT 1>out 2>err # @TEST-EXEC: btest-diff out +# @TEST-EXEC: TEST_DIFF_CANONIFIER=${SCRIPTS}/diff-remove-abspath btest-diff err event zeek_init() { @@ -8,6 +8,9 @@ event zeek_init() print to_int("-1"); print to_int("4294967296"); print to_int("not an int"); + # We automatically trim leading, but not trailing whitespace. + print to_int(" 205"); # Okay. + print to_int("206 "); # Error. local a: double = 3.14; print double_to_int(a);