Merge remote-tracking branch 'origin/topic/bbannier/integer-conversion-error-handling'

* origin/topic/bbannier/integer-conversion-error-handling:
  Also trim trailing spaces in `to_count`/`to_int` inputs
  Align error handling in `to_int` with existing behavior of `to_count`
  Baseline handling of leading/trailing spaces in `to_count`/`to_int`.
  Add error messages to `to_count`/`to_int` baselines
This commit is contained in:
Tim Wojtulewicz 2025-02-20 15:35:21 -07:00
commit 07a03bbfe9
9 changed files with 75 additions and 22 deletions

35
CHANGES
View file

@ -1,3 +1,38 @@
7.2.0-dev.210 | 2025-02-20 15:35:21 -0700
* Also trim trailing spaces in `to_count`/`to_int` inputs (Benjamin Bannier, Corelight)
Previously we would already trim leading spaces in inputs to `to_count`
and `to_int`, effectively by just passing the behavior of the low-level
functions used in their implementations to the user. While this was
useful it was also inconsistent in that we did not allow trailing
spaces which we enable with this patch.
* Align error handling in `to_int` with existing behavior of `to_count` (Benjamin Bannier, Corelight)
Previously `to_int` would silently ignore invalid inputs and simply
return `0` while `to_count` would return an error; this patch changes
`to_int` to behave like `to_count`.
This introduces a breaking change in that `to_int` now raises an error
for trailing spaces (but still accepts leading spaces) where it
previously would have silently accepted it. This is consistent with
the behavior of `to_count`, but one could also argue that both of
these should only accept properly trimmed input; I did not go that route
since that would introduce breaking changes for both these functions
instead of for just one of them.
* Baseline handling of leading/trailing spaces in `to_count`/`to_int`. (Benjamin Bannier, Corelight)
Currently `to_count` reports an error for trailing spaces (but not for
leading ones) while `to_int` silently accepts them. This patch adds
baselines capture the current behavior.
* Add error messages to `to_count`/`to_int` baselines (Benjamin Bannier, Corelight)
This captures error messages produced by `to_count`, but will also
baseline future error messages from `to_int` once we introduce them.
7.2.0-dev.203 | 2025-02-18 08:51:23 -0700 7.2.0-dev.203 | 2025-02-18 08:51:23 -0700
* cirrus: Bump FreeBSD 14 task to 14.2 (Arne Welzel, Corelight) * cirrus: Bump FreeBSD 14 task to 14.2 (Arne Welzel, Corelight)

View file

@ -1 +1 @@
7.2.0-dev.205 7.2.0-dev.210

View file

@ -7,14 +7,16 @@
##! You'll find most of Zeek's built-in functions that aren't protocol-specific ##! You'll find most of Zeek's built-in functions that aren't protocol-specific
##! in this file. ##! in this file.
%%{ // C segment %%{ // C++ segment
#include <cmath> #include <sys/stat.h>
#include <vector>
#include <algorithm> #include <algorithm>
#include <cmath> #include <cmath>
#include <sys/stat.h> #include <cmath>
#include <cstdio> #include <cstdio>
#include <cstring>
#include <ctime> #include <ctime>
#include <vector>
#include "zeek/digest.h" #include "zeek/digest.h"
#include "zeek/Reporter.h" #include "zeek/Reporter.h"
@ -2613,12 +2615,11 @@ function to_int%(str: string%): int
zeek_int_t i = strtoll(s, &end_s, 10); zeek_int_t i = strtoll(s, &end_s, 10);
#if 0 if ( s[0] == '\0' || std::any_of(static_cast<const char*>(end_s), s + ::strlen(s),
// Not clear we should complain. For example, is " 205 " [](char c) { return ! (c == '\0' || ::isspace(c)); }) )
// a legal conversion? {
if ( s[0] == '\0' || end_s[0] != '\0' )
zeek::emit_builtin_error("bad conversion to integer", @ARG@[0]); zeek::emit_builtin_error("bad conversion to integer", @ARG@[0]);
#endif }
return zeek::val_mgr->Int(i); return zeek::val_mgr->Int(i);
%} %}
@ -2680,13 +2681,14 @@ function to_count%(str: string%): count
const char* s = str->CheckString(); const char* s = str->CheckString();
char* end_s; char* end_s;
uint64_t u = (uint64_t) strtoull(s, &end_s, 10); uint64_t u = static_cast<uint64_t>(strtoull(s, &end_s, 10));
if ( s[0] == '\0' || end_s[0] != '\0' ) if ( s[0] == '\0' || std::any_of(static_cast<const char*>(end_s), s + ::strlen(s),
{ [](char c) { return ! (c == '\0' || ::isspace(c)); }) )
{
zeek::emit_builtin_error("bad conversion to count", @ARG@[0]); zeek::emit_builtin_error("bad conversion to count", @ARG@[0]);
u = 0; u = 0;
} }
return zeek::val_mgr->Count(u); return zeek::val_mgr->Count(u);
%} %}

View file

@ -0,0 +1,4 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
error in <...>/to_count.zeek, line 8: bad conversion to count (int_to_count(a) and -2)
error in <...>/to_count.zeek, line 20: bad conversion to count (to_count() and )
error in <...>/to_count.zeek, line 25: bad conversion to count (to_count(not a count) and not a count)

View file

@ -6,6 +6,8 @@
7 7
0 0
18446744073709551611 18446744073709551611
205
206
0 0
123 123
9223372036854775808 and 9223372036854775808 are the same 9223372036854775808 and 9223372036854775808 are the same

View file

@ -0,0 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
error in <...>/to_int.zeek, line 10: bad conversion to integer (to_int(not an int) and not an int)

View file

@ -3,6 +3,8 @@
-1 -1
4294967296 4294967296
0 0
205
206
3 3
4 4
-3 -3

View file

@ -1,6 +1,6 @@
# # @TEST-EXEC: zeek -b %INPUT 1>out 2>err
# @TEST-EXEC: zeek -b %INPUT >out
# @TEST-EXEC: btest-diff out # @TEST-EXEC: btest-diff out
# @TEST-EXEC: TEST_DIFF_CANONIFIER=${SCRIPTS}/diff-remove-abspath btest-diff err
event zeek_init() event zeek_init()
{ {
@ -19,17 +19,20 @@ event zeek_init()
print to_count("7"); print to_count("7");
print to_count(""); print to_count("");
print to_count("-5"); print to_count("-5");
# We automatically trim leading, but not trailing whitespace.
print to_count(" 205"); # Okay.
print to_count("206 "); # Error.
print to_count("not a count"); print to_count("not a count");
local e: port = 123/tcp; local e: port = 123/tcp;
print port_to_count(e); print port_to_count(e);
local origString = "9223372036854775808"; local origString = "9223372036854775808";
local directCount: count = 9223372036854775808; local directCount: count = 9223372036854775808;
local fromStringCount: count = to_count(origString); local fromStringCount: count = to_count(origString);
if ( directCount == fromStringCount ) if ( directCount == fromStringCount )
print fmt("%s and %s are the same", directCount, fromStringCount); print fmt("%s and %s are the same", directCount, fromStringCount);
else else
print fmt("%s and %s are not the same", directCount, fromStringCount); print fmt("%s and %s are not the same", directCount, fromStringCount);
} }

View file

@ -1,6 +1,6 @@
# # @TEST-EXEC: zeek -b %INPUT 1>out 2>err
# @TEST-EXEC: zeek -b %INPUT >out
# @TEST-EXEC: btest-diff out # @TEST-EXEC: btest-diff out
# @TEST-EXEC: TEST_DIFF_CANONIFIER=${SCRIPTS}/diff-remove-abspath btest-diff err
event zeek_init() event zeek_init()
{ {
@ -8,6 +8,9 @@ event zeek_init()
print to_int("-1"); print to_int("-1");
print to_int("4294967296"); print to_int("4294967296");
print to_int("not an int"); print to_int("not an int");
# We automatically trim leading, but not trailing whitespace.
print to_int(" 205"); # Okay.
print to_int("206 "); # Error.
local a: double = 3.14; local a: double = 3.14;
print double_to_int(a); print double_to_int(a);