Merge remote-tracking branch 'origin/topic/jsiwek/gh-822-ubsan-ci'

* origin/topic/jsiwek/gh-822-ubsan-ci:
  Fix negative-value-left-shift undefined behavior in patricia trie
  Improve negation of ConstExpr
  Avoid signed integer overflow when combining SMB header PID bits
  Avoid unary negation of INT64_MIN in modp_litoa10
  Avoid double-to-int conversion overflows in modp_dtoa functions
  Fix divide-by-zero in Entropy analyzer
  Fix divide-by-zero in stats/profiling memory usage calculation
  Fix uninitialized field in POP3 fuzzer
  Add framework for running UndefinedBehaviorSanitizer in CI
This commit is contained in:
Jon Siwek 2020-09-24 08:16:45 -07:00
commit 8feca7291b
15 changed files with 256 additions and 86 deletions

View file

@ -4,7 +4,7 @@ btest_retries: &BTEST_RETRIES 2
memory: &MEMORY 4GB
config: &CONFIG --build-type=release --enable-cpp-tests --disable-broker-tests --prefix=$CIRRUS_WORKING_DIR/install
memcheck_config: &MEMCHECK_CONFIG --build-type=debug --enable-cpp-tests --disable-broker-tests --sanitizers=address --enable-fuzzers
sanitizer_config: &SANITIZER_CONFIG --build-type=debug --enable-cpp-tests --disable-broker-tests --sanitizers=address,undefined --enable-fuzzers
resources_template: &RESOURCES_TEMPLATE
cpu: *CPUS
@ -151,7 +151,7 @@ freebsd_task:
prepare_script: ./ci/freebsd/prepare.sh
<< : *CI_TEMPLATE
memcheck_task:
sanitizer_task:
container:
# Just uses a recent/common distro to run memory error/leak checks.
dockerfile: ci/ubuntu-18.04/Dockerfile
@ -161,4 +161,6 @@ memcheck_task:
<< : *CI_TEMPLATE
test_fuzzers_script: ./ci/test-fuzzers.sh
env:
ZEEK_CI_CONFIGURE_FLAGS: *MEMCHECK_CONFIG
ZEEK_CI_CONFIGURE_FLAGS: *SANITIZER_CONFIG
ZEEK_TAILORED_UB_CHECKS: 1
UBSAN_OPTIONS: print_stacktrace=1

55
CHANGES
View file

@ -1,3 +1,58 @@
3.3.0-dev.341 | 2020-09-24 08:16:45 -0700
* Fix negative-value-left-shift undefined behavior in patricia trie (Jon Siwek, Corelight)
* Improve negation of ConstExpr (Jon Siwek, Corelight)
* Instead of creating a NegExpr for negation of a literal/constant,
a ConstExpr is now created directly.
* For negation of integer literals, there's now an additional check
for whether the integer would be outside the range of possible 'int'
values. This can also help prevent the undefined behavior due to
overflow as a result of trying to represent the minimum 'int' value of
-9223372036854775808 as a literal in a script -- the unsigned value is
cast to signed yielding INT64_MIN, then INT64_MIN is negated.
* Avoid signed integer overflow when combining SMB header PID bits (Jon Siwek, Corelight)
Such an overflow invokes undefined behavior.
* Avoid unary negation of INT64_MIN in modp_litoa10 (Jon Siwek, Corelight)
Overlow can occur in that case, which is undefined behavior.
* Avoid double-to-int conversion overflows in modp_dtoa functions (Jon Siwek, Corelight)
Those methods already had a fallback to use sprintf() for large values
except:
* The check-for-large-value was unnecessarily done after many
operations that aren't relevant to the check and those operations can
result in a conversion overflow (undefined behavior).
* The check-for-large-value was using the literal value for a
32-bit INT_MAX instead of just using INT_MAX. For a platform where
`int` is less than 32-bits, the same conversion overflow from the
previous point could still occur (undefined behavior).
* The check-for-large-value was not inclusive of INT_MAX.
In a case where the conversion of INT_MAX itself to a double
can't be represented exactly, it's implementation-defined whether
the closest higher or closest lower representable-value is selected.
If the higher value is selected, then a `double` value comparing equal
to INT_MAX-as-converted-to-double would cause an overflow of an `int`
upon conversion (undefined behavior).
* Fix divide-by-zero in Entropy analyzer (Jon Siwek, Corelight)
* Fix divide-by-zero in stats/profiling memory usage calculation (Jon Siwek, Corelight)
* Fix uninitialized field in POP3 fuzzer (Jon Siwek, Corelight)
* Add framework for running UndefinedBehaviorSanitizer in CI (Jon Siwek, Corelight)
3.3.0-dev.329 | 2020-09-23 11:32:06 -0700
* Update NEWS (Tim Wojtulewicz, Corelight)

View file

@ -134,7 +134,71 @@ if ( ZEEK_SANITIZERS )
# interfere with the detection and cause CMAKE_THREAD_LIBS_INIT to not
# include -lpthread when it should.
find_package(Threads)
string(REPLACE "," " " _sanitizer_args "${ZEEK_SANITIZERS}")
separate_arguments(_sanitizer_args)
set(ZEEK_SANITIZERS "")
foreach ( _sanitizer ${_sanitizer_args} )
if ( ZEEK_SANITIZERS )
set(ZEEK_SANITIZERS "${ZEEK_SANITIZERS},")
endif ()
if ( NOT _sanitizer STREQUAL "undefined" )
set(ZEEK_SANITIZERS "${ZEEK_SANITIZERS}${_sanitizer}")
continue()
endif ()
if ( NOT DEFINED ZEEK_SANITIZER_UB_CHECKS )
if ( DEFINED ENV{ZEEK_TAILORED_UB_CHECKS} )
# list(APPEND _check_list "alignment") # TODO: fix associated errors
list(APPEND _check_list "bool")
# list(APPEND _check_list "builtin") # Not implemented in older GCCs
list(APPEND _check_list "bounds") # Covers both array/local bounds options below
# list(APPEND _check_list "array-bounds") # Not implemented by GCC
# list(APPEND _check_list "local-bounds") # Not normally part of "undefined"
list(APPEND _check_list "enum")
list(APPEND _check_list "float-cast-overflow")
list(APPEND _check_list "float-divide-by-zero")
# list(APPEND _check_list "function") # Not implemented by GCC
# list(APPEND _check_list "implicit-unsigned-integer-truncation") # Not truly UB
# list(APPEND _check_list "implicit-signed-integer-truncation") # Not truly UB
# list(APPEND _check_list "implicit-integer-sign-change") # Not truly UB
list(APPEND _check_list "integer-divide-by-zero")
list(APPEND _check_list "nonnull-attribute")
# list(APPEND _check_list "null") # TODO: fix associated errors
# list(APPEND _check_list "nullability-arg") # Not normally part of "undefined"
# list(APPEND _check_list "nullability-assign") # Not normally part of "undefined"
# list(APPEND _check_list "nullability-return") # Not normally part of "undefined"
# list(APPEND _check_list "objc-cast") # Not truly UB
list(APPEND _check_list "object-size")
# list(APPEND _check_list "pointer-overflow") # Not implemented in older GCCs
list(APPEND _check_list "return")
list(APPEND _check_list "returns-nonnull-attribute")
list(APPEND _check_list "shift")
# list(APPEND _check_list "unsigned-shift-base") # Not implemented by GCC
list(APPEND _check_list "signed-integer-overflow")
list(APPEND _check_list "unreachable")
# list(APPEND _check_list "unsigned-integer-overflow") # Not truly UB
list(APPEND _check_list "vla-bound")
# list(APPEND _check_list "vptr") # TODO: fix associated errors
string(REPLACE ";" "," _ub_checks "${_check_list}")
set(ZEEK_SANITIZER_UB_CHECKS "${_ub_checks}" CACHE INTERNAL "" FORCE)
else ()
set(ZEEK_SANITIZER_UB_CHECKS "undefined" CACHE INTERNAL "" FORCE)
endif ()
endif ()
set(ZEEK_SANITIZERS "${ZEEK_SANITIZERS}${ZEEK_SANITIZER_UB_CHECKS}")
endforeach ()
set(_sanitizer_flags "-fsanitize=${ZEEK_SANITIZERS}")
if ( ZEEK_SANITIZER_UB_CHECKS )
set(_sanitizer_flags "${_sanitizer_flags} -fno-sanitize-recover=${ZEEK_SANITIZER_UB_CHECKS}")
endif ()
set(_sanitizer_flags "${_sanitizer_flags} -fno-omit-frame-pointer")
set(_sanitizer_flags "${_sanitizer_flags} -fno-optimize-sibling-calls")

View file

@ -1 +1 @@
3.3.0-dev.329
3.3.0-dev.341

View file

@ -136,7 +136,7 @@ void RandTest::end(double* r_ent, double* r_chisq,
/* Calculate Monte Carlo value for PI from percentage of hits
within the circle */
montepi = 4.0 * (((double) inmont) / mcount);
montepi = mcount == 0 ? 0 : 4.0 * (((double) inmont) / mcount);
/* Return results through arguments */
*r_ent = ent;

View file

@ -126,6 +126,10 @@ void ProfileLogger::Log()
utime - first_utime, stime - first_stime, rtime - first_rtime));
int conn_mem_use = expensive ? sessions->ConnectionMemoryUsage() : 0;
double avg_conn_mem_use = 0;
if ( expensive && sessions->CurrentConnections() != 0 )
avg_conn_mem_use = conn_mem_use / static_cast<double>(sessions->CurrentConnections());
file->Write(util::fmt("%.06f Conns: total=%" PRIu64 " current=%" PRIu64 "/%" PRIi32 " mem=%" PRIi32 "K avg=%.1f table=%" PRIu32 "K connvals=%" PRIu32 "K\n",
run_state::network_time,
@ -133,7 +137,7 @@ void ProfileLogger::Log()
Connection::CurrentConnections(),
sessions->CurrentConnections(),
conn_mem_use,
expensive ? (conn_mem_use / double(sessions->CurrentConnections())) : 0,
avg_conn_mem_use,
expensive ? sessions->MemoryAllocation() / 1024 : 0,
expensive ? sessions->ConnectionMemoryUsageConnVals() / 1024 : 0
));

View file

@ -39,6 +39,11 @@
%}
refine connection SMB_Conn += {
function join_pid_bits(hi: uint16, lo: uint16): uint32
%{
return (static_cast<uint32_t>(hi) << 16) | static_cast<uint32_t>(lo);
%}
function proc_smb_message(h: SMB_Header, is_orig: bool): bool
%{
if ( smb1_message )
@ -306,7 +311,7 @@ type SMB_Header(is_orig: bool) = record {
} &let {
err_status_type = (flags2 >> 14) & 1;
unicode = (flags2 >> 15) & 1;
pid = (pid_high * 0x10000) + pid_low;
pid: uint32 = $context.connection.join_pid_bits(pid_high, pid_low);
is_pipe: bool = $context.connection.get_tree_is_pipe(tid);
proc : bool = $context.connection.proc_smb_message(this, is_orig);
} &byteorder=littleendian;

View file

@ -24,6 +24,7 @@ static zeek::Connection* add_connection()
conn_id.dst_addr = zeek::IPAddr("5.6.7.8");
conn_id.src_port = htons(23132);
conn_id.dst_port = htons(80);
conn_id.is_one_way = false;
zeek::detail::ConnIDKey key = zeek::detail::BuildConnIDKey(conn_id);
zeek::Connection* conn = new zeek::Connection(zeek::sessions, key, network_time_start,
&conn_id, 1, &p, nullptr);

View file

@ -6,6 +6,7 @@
#include <stdint.h>
#include <stdio.h>
#include <math.h>
#include <limits.h>
// other interesting references on num to string convesion
// http://www.jb.man.ac.uk/~slowe/cpp/itoa.html
@ -56,7 +57,7 @@ void modp_uitoa10(uint32_t value, char* str)
void modp_litoa10(int64_t value, char* str)
{
char* wstr=str;
uint64_t uvalue = (value < 0) ? -value : value;
uint64_t uvalue = (value < 0) ? (value == INT64_MIN ? (uint64_t)(INT64_MAX) + 1 : -value) : value;
// Conversion. Number is reversed.
do *wstr++ = (char)(48 + (uvalue % 10)); while(uvalue /= 10);
@ -88,8 +89,28 @@ void modp_dtoa(double value, char* str, int prec)
str[0] = 'n'; str[1] = 'a'; str[2] = 'n'; str[3] = '\0';
return;
}
/* we'll work in positive values and deal with the
negative sign issue later */
int neg = 0;
if (value < 0) {
neg = 1;
value = -value;
}
/* if input is larger than thres_max, revert to exponential */
const double thres_max = (double)(0x7FFFFFFF);
const double thres_max = (double)(INT_MAX);
/* for very large numbers switch back to native sprintf for exponentials.
anyone want to write code to replace this? */
/*
normal printf behavior is to print EVERY whole number digit
which can be 100s of characters overflowing your buffers == bad
*/
if (value >= thres_max) {
sprintf(str, "%e", neg ? -value : value);
return;
}
double diff = 0.0;
char* wstr = str;
@ -101,16 +122,6 @@ void modp_dtoa(double value, char* str, int prec)
prec = 9;
}
/* we'll work in positive values and deal with the
negative sign issue later */
int neg = 0;
if (value < 0) {
neg = 1;
value = -value;
}
int whole = (int) value;
double tmp = (value - whole) * _pow10[prec];
uint32_t frac = (uint32_t)(tmp);
@ -129,17 +140,6 @@ void modp_dtoa(double value, char* str, int prec)
++frac;
}
/* for very large numbers switch back to native sprintf for exponentials.
anyone want to write code to replace this? */
/*
normal printf behavior is to print EVERY whole number digit
which can be 100s of characters overflowing your buffers == bad
*/
if (value > thres_max) {
sprintf(str, "%e", neg ? -value : value);
return;
}
if (prec == 0) {
diff = value - whole;
if (diff > 0.5) {
@ -189,8 +189,27 @@ void modp_dtoa2(double value, char* str, int prec)
return;
}
/* we'll work in positive values and deal with the
negative sign issue later */
int neg = 0;
if (value < 0) {
neg = 1;
value = -value;
}
/* if input is larger than thres_max, revert to exponential */
const double thres_max = (double)(0x7FFFFFFF);
const double thres_max = (double)(INT_MAX);
/* for very large numbers switch back to native sprintf for exponentials.
anyone want to write code to replace this? */
/*
normal printf behavior is to print EVERY whole number digit
which can be 100s of characters overflowing your buffers == bad
*/
if (value >= thres_max) {
sprintf(str, "%e", neg ? -value : value);
return;
}
int count;
double diff = 0.0;
@ -203,16 +222,6 @@ void modp_dtoa2(double value, char* str, int prec)
prec = 9;
}
/* we'll work in positive values and deal with the
negative sign issue later */
int neg = 0;
if (value < 0) {
neg = 1;
value = -value;
}
int whole = (int) value;
double tmp = (value - whole) * _pow10[prec];
uint32_t frac = (uint32_t)(tmp);
@ -231,17 +240,6 @@ void modp_dtoa2(double value, char* str, int prec)
++frac;
}
/* for very large numbers switch back to native sprintf for exponentials.
anyone want to write code to replace this? */
/*
normal printf behavior is to print EVERY whole number digit
which can be 100s of characters overflowing your buffers == bad
*/
if (value > thres_max) {
sprintf(str, "%e", neg ? -value : value);
return;
}
if (prec == 0) {
diff = value - whole;
if (diff > 0.5) {
@ -301,21 +299,6 @@ void modp_dtoa3(double value, char* str, int n, int prec)
return;
}
/* if input is larger than thres_max, revert to exponential */
const double thres_max = (double)(0x7FFFFFFF);
int count;
double diff = 0.0;
char* wstr = str;
if (prec < 0) {
prec = 0;
} else if (prec > 9) {
/* precision of >= 10 can lead to overflow errors */
prec = 9;
}
/* we'll work in positive values and deal with the
negative sign issue later */
int neg = 0;
@ -324,32 +307,23 @@ void modp_dtoa3(double value, char* str, int n, int prec)
value = -value;
}
int whole = (int) value;
double tmp = (value - whole) * _pow10[prec];
uint32_t frac = (uint32_t)(tmp);
diff = tmp - frac;
if (diff > 0.5) {
++frac;
/* handle rollover, e.g. case 0.99 with prec 1 is 1.0 */
if (frac >= _pow10[prec]) {
frac = 0;
++whole;
}
} else if (diff == 0.5 && ((frac == 0) || (frac & 1))) {
/* if halfway, round up if odd, OR
if last digit is 0. That last part is strange */
++frac;
if (prec < 0) {
prec = 0;
} else if (prec > 9) {
/* precision of >= 10 can lead to overflow errors */
prec = 9;
}
/* if input is larger than thres_max, revert to exponential */
const double thres_max = (double)(INT_MAX);
/* for very large numbers switch back to native sprintf for exponentials.
anyone want to write code to replace this? */
/*
normal printf behavior is to print EVERY whole number digit
which can be 100s of characters overflowing your buffers == bad
*/
if (value > thres_max) {
if (value >= thres_max) {
/* ---- Modified part, compared to modp_dtoa3. */
int i = snprintf(str, n, "%.*f", prec, neg ? -value : value);
@ -373,6 +347,28 @@ void modp_dtoa3(double value, char* str, int n, int prec)
/* ---- End of modified part.. */
}
int count;
double diff = 0.0;
char* wstr = str;
int whole = (int) value;
double tmp = (value - whole) * _pow10[prec];
uint32_t frac = (uint32_t)(tmp);
diff = tmp - frac;
if (diff > 0.5) {
++frac;
/* handle rollover, e.g. case 0.99 with prec 1 is 1.0 */
if (frac >= _pow10[prec]) {
frac = 0;
++whole;
}
} else if (diff == 0.5 && ((frac == 0) || (frac & 1))) {
/* if halfway, round up if odd, OR
if last digit is 0. That last part is strange */
++frac;
}
if (prec == 0) {
diff = value - whole;
if (diff > 0.5) {

View file

@ -336,6 +336,36 @@ expr:
{
zeek::detail::set_location(@1, @2);
$$ = new zeek::detail::NegExpr({zeek::AdoptRef{}, $2});
if ( ! $$->IsError() && $2->IsConst() )
{
auto v = $2->ExprVal();
auto tag = v->GetType()->Tag();
if ( tag == zeek::TYPE_COUNT )
{
auto c = v->AsCount();
uint64_t int_max = static_cast<uint64_t>(INT64_MAX) + 1;
if ( c <= int_max )
{
auto ce = new zeek::detail::ConstExpr(zeek::val_mgr->Int(-c));
Unref($$);
$$ = ce;
}
else
{
$$->Error("literal is outside range of 'int' values");
$$->SetError();
}
}
else
{
auto ce = new zeek::detail::ConstExpr($$->Eval(nullptr));
Unref($$);
$$ = ce;
}
}
}
| '+' expr %prec '!'

View file

@ -96,7 +96,7 @@ comp_with_mask (void *addr, void *dest, u_int mask)
if ( /* mask/8 == 0 || */ memcmp (addr, dest, mask / 8) == 0) {
int n = mask / 8;
int m = ((-1) << (8 - (mask % 8)));
int m = -(1 << (8 - (mask % 8)));
if (mask % 8 == 0 || (((u_char *)addr)[n] & m) == (((u_char *)dest)[n] & m))
return (1);

View file

@ -0,0 +1,2 @@
error in /home/jon/pro/zeek/zeek/testing/btest/.tmp/language.int-negation-range/int-negation-range.zeek, line 8: literal is outside range of 'int' values (-9223372036854775809)
error in /home/jon/pro/zeek/zeek/testing/btest/.tmp/language.int-negation-range/int-negation-range.zeek, line 9: literal is outside range of 'int' values (-9223372036854775810)

View file

@ -30,3 +30,4 @@ ZEEK_DEFAULT_CONNECT_RETRY=1
ZEEK_DISABLE_ZEEKYGEN=1
ZEEK_ALLOW_INIT_ERRORS=1
ZEEK_SUPERVISOR_NO_SIGKILL=1
UBSAN_OPTIONS=print_stacktrace=1

View file

@ -0,0 +1,9 @@
# @TEST-EXEC-FAIL: zeek -b %INPUT >out 2>&1
# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-remove-abspath btest-diff out
local i_min = -9223372036854775808;
local i_min_p1 = -9223372036854775807;
# These should be caught at parse-time as outside the range of a 64-bit ints.
local i_min_m1 = -9223372036854775809;
local i_min_m2 = -9223372036854775810;

View file

@ -22,3 +22,4 @@ ZEEK_PROFILER_FILE=%(testbase)s/.tmp/script-coverage/XXXXXX
ZEEK_DNS_FAKE=1
# For fedora 21 - they disable MD5 for certificate verification and need setting an environment variable to permit it.
OPENSSL_ENABLE_MD5_VERIFY=1
UBSAN_OPTIONS=print_stacktrace=1