diff --git a/.cirrus.yml b/.cirrus.yml index 50e2923200..7a50417e0e 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -4,7 +4,7 @@ btest_retries: &BTEST_RETRIES 2 memory: &MEMORY 4GB config: &CONFIG --build-type=release --enable-cpp-tests --disable-broker-tests --prefix=$CIRRUS_WORKING_DIR/install -memcheck_config: &MEMCHECK_CONFIG --build-type=debug --enable-cpp-tests --disable-broker-tests --sanitizers=address --enable-fuzzers +sanitizer_config: &SANITIZER_CONFIG --build-type=debug --enable-cpp-tests --disable-broker-tests --sanitizers=address,undefined --enable-fuzzers resources_template: &RESOURCES_TEMPLATE cpu: *CPUS @@ -151,7 +151,7 @@ freebsd_task: prepare_script: ./ci/freebsd/prepare.sh << : *CI_TEMPLATE -memcheck_task: +sanitizer_task: container: # Just uses a recent/common distro to run memory error/leak checks. dockerfile: ci/ubuntu-18.04/Dockerfile @@ -161,4 +161,6 @@ memcheck_task: << : *CI_TEMPLATE test_fuzzers_script: ./ci/test-fuzzers.sh env: - ZEEK_CI_CONFIGURE_FLAGS: *MEMCHECK_CONFIG + ZEEK_CI_CONFIGURE_FLAGS: *SANITIZER_CONFIG + ZEEK_TAILORED_UB_CHECKS: 1 + UBSAN_OPTIONS: print_stacktrace=1 diff --git a/CHANGES b/CHANGES index 6c11163cf8..13c68f4502 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,58 @@ + +3.3.0-dev.341 | 2020-09-24 08:16:45 -0700 + + * Fix negative-value-left-shift undefined behavior in patricia trie (Jon Siwek, Corelight) + + * Improve negation of ConstExpr (Jon Siwek, Corelight) + + * Instead of creating a NegExpr for negation of a literal/constant, + a ConstExpr is now created directly. + + * For negation of integer literals, there's now an additional check + for whether the integer would be outside the range of possible 'int' + values. This can also help prevent the undefined behavior due to + overflow as a result of trying to represent the minimum 'int' value of + -9223372036854775808 as a literal in a script -- the unsigned value is + cast to signed yielding INT64_MIN, then INT64_MIN is negated. + + * Avoid signed integer overflow when combining SMB header PID bits (Jon Siwek, Corelight) + + Such an overflow invokes undefined behavior. + + * Avoid unary negation of INT64_MIN in modp_litoa10 (Jon Siwek, Corelight) + + Overlow can occur in that case, which is undefined behavior. + + * Avoid double-to-int conversion overflows in modp_dtoa functions (Jon Siwek, Corelight) + + Those methods already had a fallback to use sprintf() for large values + except: + + * The check-for-large-value was unnecessarily done after many + operations that aren't relevant to the check and those operations can + result in a conversion overflow (undefined behavior). + + * The check-for-large-value was using the literal value for a + 32-bit INT_MAX instead of just using INT_MAX. For a platform where + `int` is less than 32-bits, the same conversion overflow from the + previous point could still occur (undefined behavior). + + * The check-for-large-value was not inclusive of INT_MAX. + In a case where the conversion of INT_MAX itself to a double + can't be represented exactly, it's implementation-defined whether + the closest higher or closest lower representable-value is selected. + If the higher value is selected, then a `double` value comparing equal + to INT_MAX-as-converted-to-double would cause an overflow of an `int` + upon conversion (undefined behavior). + + * Fix divide-by-zero in Entropy analyzer (Jon Siwek, Corelight) + + * Fix divide-by-zero in stats/profiling memory usage calculation (Jon Siwek, Corelight) + + * Fix uninitialized field in POP3 fuzzer (Jon Siwek, Corelight) + + * Add framework for running UndefinedBehaviorSanitizer in CI (Jon Siwek, Corelight) + 3.3.0-dev.329 | 2020-09-23 11:32:06 -0700 * Update NEWS (Tim Wojtulewicz, Corelight) diff --git a/CMakeLists.txt b/CMakeLists.txt index 90d5c8bc54..0221ed788d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -134,7 +134,71 @@ if ( ZEEK_SANITIZERS ) # interfere with the detection and cause CMAKE_THREAD_LIBS_INIT to not # include -lpthread when it should. find_package(Threads) + + string(REPLACE "," " " _sanitizer_args "${ZEEK_SANITIZERS}") + separate_arguments(_sanitizer_args) + set(ZEEK_SANITIZERS "") + + foreach ( _sanitizer ${_sanitizer_args} ) + if ( ZEEK_SANITIZERS ) + set(ZEEK_SANITIZERS "${ZEEK_SANITIZERS},") + endif () + + if ( NOT _sanitizer STREQUAL "undefined" ) + set(ZEEK_SANITIZERS "${ZEEK_SANITIZERS}${_sanitizer}") + continue() + endif () + + if ( NOT DEFINED ZEEK_SANITIZER_UB_CHECKS ) + if ( DEFINED ENV{ZEEK_TAILORED_UB_CHECKS} ) + # list(APPEND _check_list "alignment") # TODO: fix associated errors + list(APPEND _check_list "bool") + # list(APPEND _check_list "builtin") # Not implemented in older GCCs + list(APPEND _check_list "bounds") # Covers both array/local bounds options below + # list(APPEND _check_list "array-bounds") # Not implemented by GCC + # list(APPEND _check_list "local-bounds") # Not normally part of "undefined" + list(APPEND _check_list "enum") + list(APPEND _check_list "float-cast-overflow") + list(APPEND _check_list "float-divide-by-zero") + # list(APPEND _check_list "function") # Not implemented by GCC + # list(APPEND _check_list "implicit-unsigned-integer-truncation") # Not truly UB + # list(APPEND _check_list "implicit-signed-integer-truncation") # Not truly UB + # list(APPEND _check_list "implicit-integer-sign-change") # Not truly UB + list(APPEND _check_list "integer-divide-by-zero") + list(APPEND _check_list "nonnull-attribute") + # list(APPEND _check_list "null") # TODO: fix associated errors + # list(APPEND _check_list "nullability-arg") # Not normally part of "undefined" + # list(APPEND _check_list "nullability-assign") # Not normally part of "undefined" + # list(APPEND _check_list "nullability-return") # Not normally part of "undefined" + # list(APPEND _check_list "objc-cast") # Not truly UB + list(APPEND _check_list "object-size") + # list(APPEND _check_list "pointer-overflow") # Not implemented in older GCCs + list(APPEND _check_list "return") + list(APPEND _check_list "returns-nonnull-attribute") + list(APPEND _check_list "shift") + # list(APPEND _check_list "unsigned-shift-base") # Not implemented by GCC + list(APPEND _check_list "signed-integer-overflow") + list(APPEND _check_list "unreachable") + # list(APPEND _check_list "unsigned-integer-overflow") # Not truly UB + list(APPEND _check_list "vla-bound") + # list(APPEND _check_list "vptr") # TODO: fix associated errors + + string(REPLACE ";" "," _ub_checks "${_check_list}") + set(ZEEK_SANITIZER_UB_CHECKS "${_ub_checks}" CACHE INTERNAL "" FORCE) + else () + set(ZEEK_SANITIZER_UB_CHECKS "undefined" CACHE INTERNAL "" FORCE) + endif () + endif () + + set(ZEEK_SANITIZERS "${ZEEK_SANITIZERS}${ZEEK_SANITIZER_UB_CHECKS}") + endforeach () + set(_sanitizer_flags "-fsanitize=${ZEEK_SANITIZERS}") + + if ( ZEEK_SANITIZER_UB_CHECKS ) + set(_sanitizer_flags "${_sanitizer_flags} -fno-sanitize-recover=${ZEEK_SANITIZER_UB_CHECKS}") + endif () + set(_sanitizer_flags "${_sanitizer_flags} -fno-omit-frame-pointer") set(_sanitizer_flags "${_sanitizer_flags} -fno-optimize-sibling-calls") diff --git a/VERSION b/VERSION index b9ad602047..6e4545a743 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.3.0-dev.329 +3.3.0-dev.341 diff --git a/src/RandTest.cc b/src/RandTest.cc index 217e050294..979c7c6673 100644 --- a/src/RandTest.cc +++ b/src/RandTest.cc @@ -136,7 +136,7 @@ void RandTest::end(double* r_ent, double* r_chisq, /* Calculate Monte Carlo value for PI from percentage of hits within the circle */ - montepi = 4.0 * (((double) inmont) / mcount); + montepi = mcount == 0 ? 0 : 4.0 * (((double) inmont) / mcount); /* Return results through arguments */ *r_ent = ent; diff --git a/src/Stats.cc b/src/Stats.cc index 8e421c483b..fde7921aad 100644 --- a/src/Stats.cc +++ b/src/Stats.cc @@ -126,6 +126,10 @@ void ProfileLogger::Log() utime - first_utime, stime - first_stime, rtime - first_rtime)); int conn_mem_use = expensive ? sessions->ConnectionMemoryUsage() : 0; + double avg_conn_mem_use = 0; + + if ( expensive && sessions->CurrentConnections() != 0 ) + avg_conn_mem_use = conn_mem_use / static_cast(sessions->CurrentConnections()); file->Write(util::fmt("%.06f Conns: total=%" PRIu64 " current=%" PRIu64 "/%" PRIi32 " mem=%" PRIi32 "K avg=%.1f table=%" PRIu32 "K connvals=%" PRIu32 "K\n", run_state::network_time, @@ -133,7 +137,7 @@ void ProfileLogger::Log() Connection::CurrentConnections(), sessions->CurrentConnections(), conn_mem_use, - expensive ? (conn_mem_use / double(sessions->CurrentConnections())) : 0, + avg_conn_mem_use, expensive ? sessions->MemoryAllocation() / 1024 : 0, expensive ? sessions->ConnectionMemoryUsageConnVals() / 1024 : 0 )); diff --git a/src/analyzer/protocol/smb/smb1-protocol.pac b/src/analyzer/protocol/smb/smb1-protocol.pac index 80c763d0eb..d661a907ae 100644 --- a/src/analyzer/protocol/smb/smb1-protocol.pac +++ b/src/analyzer/protocol/smb/smb1-protocol.pac @@ -39,6 +39,11 @@ %} refine connection SMB_Conn += { + function join_pid_bits(hi: uint16, lo: uint16): uint32 + %{ + return (static_cast(hi) << 16) | static_cast(lo); + %} + function proc_smb_message(h: SMB_Header, is_orig: bool): bool %{ if ( smb1_message ) @@ -306,7 +311,7 @@ type SMB_Header(is_orig: bool) = record { } &let { err_status_type = (flags2 >> 14) & 1; unicode = (flags2 >> 15) & 1; - pid = (pid_high * 0x10000) + pid_low; + pid: uint32 = $context.connection.join_pid_bits(pid_high, pid_low); is_pipe: bool = $context.connection.get_tree_is_pipe(tid); proc : bool = $context.connection.proc_smb_message(this, is_orig); } &byteorder=littleendian; diff --git a/src/fuzzers/pop3-fuzzer.cc b/src/fuzzers/pop3-fuzzer.cc index a8d18abf31..f4a68c6f74 100644 --- a/src/fuzzers/pop3-fuzzer.cc +++ b/src/fuzzers/pop3-fuzzer.cc @@ -24,6 +24,7 @@ static zeek::Connection* add_connection() conn_id.dst_addr = zeek::IPAddr("5.6.7.8"); conn_id.src_port = htons(23132); conn_id.dst_port = htons(80); + conn_id.is_one_way = false; zeek::detail::ConnIDKey key = zeek::detail::BuildConnIDKey(conn_id); zeek::Connection* conn = new zeek::Connection(zeek::sessions, key, network_time_start, &conn_id, 1, &p, nullptr); diff --git a/src/modp_numtoa.c b/src/modp_numtoa.c index 6475ad5fe6..ab6b2c95e8 100644 --- a/src/modp_numtoa.c +++ b/src/modp_numtoa.c @@ -6,6 +6,7 @@ #include #include #include +#include // other interesting references on num to string convesion // http://www.jb.man.ac.uk/~slowe/cpp/itoa.html @@ -56,7 +57,7 @@ void modp_uitoa10(uint32_t value, char* str) void modp_litoa10(int64_t value, char* str) { char* wstr=str; - uint64_t uvalue = (value < 0) ? -value : value; + uint64_t uvalue = (value < 0) ? (value == INT64_MIN ? (uint64_t)(INT64_MAX) + 1 : -value) : value; // Conversion. Number is reversed. do *wstr++ = (char)(48 + (uvalue % 10)); while(uvalue /= 10); @@ -88,8 +89,28 @@ void modp_dtoa(double value, char* str, int prec) str[0] = 'n'; str[1] = 'a'; str[2] = 'n'; str[3] = '\0'; return; } + + /* we'll work in positive values and deal with the + negative sign issue later */ + int neg = 0; + if (value < 0) { + neg = 1; + value = -value; + } + /* if input is larger than thres_max, revert to exponential */ - const double thres_max = (double)(0x7FFFFFFF); + const double thres_max = (double)(INT_MAX); + + /* for very large numbers switch back to native sprintf for exponentials. + anyone want to write code to replace this? */ + /* + normal printf behavior is to print EVERY whole number digit + which can be 100s of characters overflowing your buffers == bad + */ + if (value >= thres_max) { + sprintf(str, "%e", neg ? -value : value); + return; + } double diff = 0.0; char* wstr = str; @@ -101,16 +122,6 @@ void modp_dtoa(double value, char* str, int prec) prec = 9; } - - /* we'll work in positive values and deal with the - negative sign issue later */ - int neg = 0; - if (value < 0) { - neg = 1; - value = -value; - } - - int whole = (int) value; double tmp = (value - whole) * _pow10[prec]; uint32_t frac = (uint32_t)(tmp); @@ -129,17 +140,6 @@ void modp_dtoa(double value, char* str, int prec) ++frac; } - /* for very large numbers switch back to native sprintf for exponentials. - anyone want to write code to replace this? */ - /* - normal printf behavior is to print EVERY whole number digit - which can be 100s of characters overflowing your buffers == bad - */ - if (value > thres_max) { - sprintf(str, "%e", neg ? -value : value); - return; - } - if (prec == 0) { diff = value - whole; if (diff > 0.5) { @@ -189,8 +189,27 @@ void modp_dtoa2(double value, char* str, int prec) return; } + /* we'll work in positive values and deal with the + negative sign issue later */ + int neg = 0; + if (value < 0) { + neg = 1; + value = -value; + } + /* if input is larger than thres_max, revert to exponential */ - const double thres_max = (double)(0x7FFFFFFF); + const double thres_max = (double)(INT_MAX); + + /* for very large numbers switch back to native sprintf for exponentials. + anyone want to write code to replace this? */ + /* + normal printf behavior is to print EVERY whole number digit + which can be 100s of characters overflowing your buffers == bad + */ + if (value >= thres_max) { + sprintf(str, "%e", neg ? -value : value); + return; + } int count; double diff = 0.0; @@ -203,16 +222,6 @@ void modp_dtoa2(double value, char* str, int prec) prec = 9; } - - /* we'll work in positive values and deal with the - negative sign issue later */ - int neg = 0; - if (value < 0) { - neg = 1; - value = -value; - } - - int whole = (int) value; double tmp = (value - whole) * _pow10[prec]; uint32_t frac = (uint32_t)(tmp); @@ -231,17 +240,6 @@ void modp_dtoa2(double value, char* str, int prec) ++frac; } - /* for very large numbers switch back to native sprintf for exponentials. - anyone want to write code to replace this? */ - /* - normal printf behavior is to print EVERY whole number digit - which can be 100s of characters overflowing your buffers == bad - */ - if (value > thres_max) { - sprintf(str, "%e", neg ? -value : value); - return; - } - if (prec == 0) { diff = value - whole; if (diff > 0.5) { @@ -301,21 +299,6 @@ void modp_dtoa3(double value, char* str, int n, int prec) return; } - /* if input is larger than thres_max, revert to exponential */ - const double thres_max = (double)(0x7FFFFFFF); - - int count; - double diff = 0.0; - char* wstr = str; - - if (prec < 0) { - prec = 0; - } else if (prec > 9) { - /* precision of >= 10 can lead to overflow errors */ - prec = 9; - } - - /* we'll work in positive values and deal with the negative sign issue later */ int neg = 0; @@ -324,32 +307,23 @@ void modp_dtoa3(double value, char* str, int n, int prec) value = -value; } - - int whole = (int) value; - double tmp = (value - whole) * _pow10[prec]; - uint32_t frac = (uint32_t)(tmp); - diff = tmp - frac; - - if (diff > 0.5) { - ++frac; - /* handle rollover, e.g. case 0.99 with prec 1 is 1.0 */ - if (frac >= _pow10[prec]) { - frac = 0; - ++whole; - } - } else if (diff == 0.5 && ((frac == 0) || (frac & 1))) { - /* if halfway, round up if odd, OR - if last digit is 0. That last part is strange */ - ++frac; + if (prec < 0) { + prec = 0; + } else if (prec > 9) { + /* precision of >= 10 can lead to overflow errors */ + prec = 9; } + /* if input is larger than thres_max, revert to exponential */ + const double thres_max = (double)(INT_MAX); + /* for very large numbers switch back to native sprintf for exponentials. anyone want to write code to replace this? */ /* normal printf behavior is to print EVERY whole number digit which can be 100s of characters overflowing your buffers == bad */ - if (value > thres_max) { + if (value >= thres_max) { /* ---- Modified part, compared to modp_dtoa3. */ int i = snprintf(str, n, "%.*f", prec, neg ? -value : value); @@ -373,6 +347,28 @@ void modp_dtoa3(double value, char* str, int n, int prec) /* ---- End of modified part.. */ } + int count; + double diff = 0.0; + char* wstr = str; + + int whole = (int) value; + double tmp = (value - whole) * _pow10[prec]; + uint32_t frac = (uint32_t)(tmp); + diff = tmp - frac; + + if (diff > 0.5) { + ++frac; + /* handle rollover, e.g. case 0.99 with prec 1 is 1.0 */ + if (frac >= _pow10[prec]) { + frac = 0; + ++whole; + } + } else if (diff == 0.5 && ((frac == 0) || (frac & 1))) { + /* if halfway, round up if odd, OR + if last digit is 0. That last part is strange */ + ++frac; + } + if (prec == 0) { diff = value - whole; if (diff > 0.5) { diff --git a/src/parse.y b/src/parse.y index ae940e6dec..9620e1ecba 100644 --- a/src/parse.y +++ b/src/parse.y @@ -336,6 +336,36 @@ expr: { zeek::detail::set_location(@1, @2); $$ = new zeek::detail::NegExpr({zeek::AdoptRef{}, $2}); + + if ( ! $$->IsError() && $2->IsConst() ) + { + auto v = $2->ExprVal(); + auto tag = v->GetType()->Tag(); + + if ( tag == zeek::TYPE_COUNT ) + { + auto c = v->AsCount(); + uint64_t int_max = static_cast(INT64_MAX) + 1; + + if ( c <= int_max ) + { + auto ce = new zeek::detail::ConstExpr(zeek::val_mgr->Int(-c)); + Unref($$); + $$ = ce; + } + else + { + $$->Error("literal is outside range of 'int' values"); + $$->SetError(); + } + } + else + { + auto ce = new zeek::detail::ConstExpr($$->Eval(nullptr)); + Unref($$); + $$ = ce; + } + } } | '+' expr %prec '!' diff --git a/src/patricia.c b/src/patricia.c index 27fe5e004c..589fe8c24a 100644 --- a/src/patricia.c +++ b/src/patricia.c @@ -96,7 +96,7 @@ comp_with_mask (void *addr, void *dest, u_int mask) if ( /* mask/8 == 0 || */ memcmp (addr, dest, mask / 8) == 0) { int n = mask / 8; - int m = ((-1) << (8 - (mask % 8))); + int m = -(1 << (8 - (mask % 8))); if (mask % 8 == 0 || (((u_char *)addr)[n] & m) == (((u_char *)dest)[n] & m)) return (1); diff --git a/testing/btest/Baseline/language.int-negation-range/out b/testing/btest/Baseline/language.int-negation-range/out new file mode 100644 index 0000000000..161e8467b5 --- /dev/null +++ b/testing/btest/Baseline/language.int-negation-range/out @@ -0,0 +1,2 @@ +error in /home/jon/pro/zeek/zeek/testing/btest/.tmp/language.int-negation-range/int-negation-range.zeek, line 8: literal is outside range of 'int' values (-9223372036854775809) +error in /home/jon/pro/zeek/zeek/testing/btest/.tmp/language.int-negation-range/int-negation-range.zeek, line 9: literal is outside range of 'int' values (-9223372036854775810) diff --git a/testing/btest/btest.cfg b/testing/btest/btest.cfg index d11e8ce9f3..89d92b7036 100644 --- a/testing/btest/btest.cfg +++ b/testing/btest/btest.cfg @@ -30,3 +30,4 @@ ZEEK_DEFAULT_CONNECT_RETRY=1 ZEEK_DISABLE_ZEEKYGEN=1 ZEEK_ALLOW_INIT_ERRORS=1 ZEEK_SUPERVISOR_NO_SIGKILL=1 +UBSAN_OPTIONS=print_stacktrace=1 diff --git a/testing/btest/language/int-negation-range.zeek b/testing/btest/language/int-negation-range.zeek new file mode 100644 index 0000000000..ae7b4ede3f --- /dev/null +++ b/testing/btest/language/int-negation-range.zeek @@ -0,0 +1,9 @@ +# @TEST-EXEC-FAIL: zeek -b %INPUT >out 2>&1 +# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-remove-abspath btest-diff out + +local i_min = -9223372036854775808; +local i_min_p1 = -9223372036854775807; + +# These should be caught at parse-time as outside the range of a 64-bit ints. +local i_min_m1 = -9223372036854775809; +local i_min_m2 = -9223372036854775810; diff --git a/testing/external/subdir-btest.cfg b/testing/external/subdir-btest.cfg index 2729304b54..7014ea6118 100644 --- a/testing/external/subdir-btest.cfg +++ b/testing/external/subdir-btest.cfg @@ -22,3 +22,4 @@ ZEEK_PROFILER_FILE=%(testbase)s/.tmp/script-coverage/XXXXXX ZEEK_DNS_FAKE=1 # For fedora 21 - they disable MD5 for certificate verification and need setting an environment variable to permit it. OPENSSL_ENABLE_MD5_VERIFY=1 +UBSAN_OPTIONS=print_stacktrace=1