Merge remote-tracking branch 'origin/topic/jsiwek/gh-822-ubsan-ci'

* origin/topic/jsiwek/gh-822-ubsan-ci: Fix negative-value-left-shift undefined behavior in patricia trie Improve negation of ConstExpr Avoid signed integer overflow when combining SMB header PID bits Avoid unary negation of INT64_MIN in modp_litoa10 Avoid double-to-int conversion overflows in modp_dtoa functions Fix divide-by-zero in Entropy analyzer Fix divide-by-zero in stats/profiling memory usage calculation Fix uninitialized field in POP3 fuzzer Add framework for running UndefinedBehaviorSanitizer in CI
2025-10-12 11:38:20 +00:00 · 2020-09-24 08:16:45 -07:00 · 2020-09-24 08:16:45 -07:00 · 8feca7291b
commit 8feca7291b
parent 168da19e9a 428870be15
15 changed files with 256 additions and 86 deletions
--- a/.cirrus.yml
+++ b/.cirrus.yml
@ -4,7 +4,7 @@ btest_retries: &BTEST_RETRIES 2
 memory: &MEMORY 4GB

 config: &CONFIG --build-type=release --enable-cpp-tests  --disable-broker-tests --prefix=$CIRRUS_WORKING_DIR/install
-memcheck_config: &MEMCHECK_CONFIG --build-type=debug --enable-cpp-tests --disable-broker-tests --sanitizers=address --enable-fuzzers
+sanitizer_config: &SANITIZER_CONFIG --build-type=debug --enable-cpp-tests --disable-broker-tests --sanitizers=address,undefined --enable-fuzzers

 resources_template: &RESOURCES_TEMPLATE
  cpu: *CPUS
@ -151,7 +151,7 @@ freebsd_task:
  prepare_script: ./ci/freebsd/prepare.sh
  << : *CI_TEMPLATE

-memcheck_task:
+sanitizer_task:
  container:
    # Just uses a recent/common distro to run memory error/leak checks.
    dockerfile: ci/ubuntu-18.04/Dockerfile
@ -161,4 +161,6 @@ memcheck_task:
  << : *CI_TEMPLATE
  test_fuzzers_script: ./ci/test-fuzzers.sh
  env:
-    ZEEK_CI_CONFIGURE_FLAGS: *MEMCHECK_CONFIG
+    ZEEK_CI_CONFIGURE_FLAGS: *SANITIZER_CONFIG
+    ZEEK_TAILORED_UB_CHECKS: 1
+    UBSAN_OPTIONS: print_stacktrace=1
--- a/55
+++ b/55
@ -1,3 +1,58 @@
+
+3.3.0-dev.341 | 2020-09-24 08:16:45 -0700
+
+  * Fix negative-value-left-shift undefined behavior in patricia trie (Jon Siwek, Corelight)
+
+  * Improve negation of ConstExpr (Jon Siwek, Corelight)
+
+    * Instead of creating a NegExpr for negation of a literal/constant,
+      a ConstExpr is now created directly.
+
+    * For negation of integer literals, there's now an additional check
+      for whether the integer would be outside the range of possible 'int'
+      values.  This can also help prevent the undefined behavior due to
+      overflow as a result of trying to represent the minimum 'int' value of
+      -9223372036854775808 as a literal in a script -- the unsigned value is
+      cast to signed yielding INT64_MIN, then INT64_MIN is negated.
+
+  * Avoid signed integer overflow when combining SMB header PID bits (Jon Siwek, Corelight)
+
+    Such an overflow invokes undefined behavior.
+
+  * Avoid unary negation of INT64_MIN in modp_litoa10 (Jon Siwek, Corelight)
+
+    Overlow can occur in that case, which is undefined behavior.
+
+  * Avoid double-to-int conversion overflows in modp_dtoa functions (Jon Siwek, Corelight)
+
+    Those methods already had a fallback to use sprintf() for large values
+    except:
+
+    * The check-for-large-value was unnecessarily done after many
+      operations that aren't relevant to the check and those operations can
+      result in a conversion overflow (undefined behavior).
+
+    * The check-for-large-value was using the literal value for a
+      32-bit INT_MAX instead of just using INT_MAX.  For a platform where
+      `int` is less than 32-bits, the same conversion overflow from the
+      previous point could still occur (undefined behavior).
+
+    * The check-for-large-value was not inclusive of INT_MAX.
+      In a case where the conversion of INT_MAX itself to a double
+      can't be represented exactly, it's implementation-defined whether
+      the closest higher or closest lower representable-value is selected.
+      If the higher value is selected, then a `double` value comparing equal
+      to INT_MAX-as-converted-to-double would cause an overflow of an `int`
+      upon conversion (undefined behavior).
+
+  * Fix divide-by-zero in Entropy analyzer (Jon Siwek, Corelight)
+
+  * Fix divide-by-zero in stats/profiling memory usage calculation (Jon Siwek, Corelight)
+
+  * Fix uninitialized field in POP3 fuzzer (Jon Siwek, Corelight)
+
+  * Add framework for running UndefinedBehaviorSanitizer in CI (Jon Siwek, Corelight)
+
 3.3.0-dev.329 | 2020-09-23 11:32:06 -0700

  * Update NEWS (Tim Wojtulewicz, Corelight)
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -134,7 +134,71 @@ if ( ZEEK_SANITIZERS )
    # interfere with the detection and cause CMAKE_THREAD_LIBS_INIT to not
    # include -lpthread when it should.
    find_package(Threads)
+
+    string(REPLACE "," " " _sanitizer_args "${ZEEK_SANITIZERS}")
+    separate_arguments(_sanitizer_args)
+    set(ZEEK_SANITIZERS "")
+
+    foreach ( _sanitizer ${_sanitizer_args} )
+        if ( ZEEK_SANITIZERS )
+            set(ZEEK_SANITIZERS "${ZEEK_SANITIZERS},")
+        endif ()
+
+        if ( NOT _sanitizer STREQUAL "undefined" )
+            set(ZEEK_SANITIZERS "${ZEEK_SANITIZERS}${_sanitizer}")
+            continue()
+        endif ()
+
+        if ( NOT DEFINED ZEEK_SANITIZER_UB_CHECKS )
+            if ( DEFINED ENV{ZEEK_TAILORED_UB_CHECKS} )
+                # list(APPEND _check_list "alignment") # TODO: fix associated errors
+                list(APPEND _check_list "bool")
+                # list(APPEND _check_list "builtin") # Not implemented in older GCCs
+                list(APPEND _check_list "bounds") # Covers both array/local bounds options below
+                # list(APPEND _check_list "array-bounds") # Not implemented by GCC
+                # list(APPEND _check_list "local-bounds") # Not normally part of "undefined"
+                list(APPEND _check_list "enum")
+                list(APPEND _check_list "float-cast-overflow")
+                list(APPEND _check_list "float-divide-by-zero")
+                # list(APPEND _check_list "function") # Not implemented by GCC
+                # list(APPEND _check_list "implicit-unsigned-integer-truncation") # Not truly UB
+                # list(APPEND _check_list "implicit-signed-integer-truncation") # Not truly UB
+                # list(APPEND _check_list "implicit-integer-sign-change") # Not truly UB
+                list(APPEND _check_list "integer-divide-by-zero")
+                list(APPEND _check_list "nonnull-attribute")
+                # list(APPEND _check_list "null") # TODO: fix associated errors
+                # list(APPEND _check_list "nullability-arg") # Not normally part of "undefined"
+                # list(APPEND _check_list "nullability-assign") # Not normally part of "undefined"
+                # list(APPEND _check_list "nullability-return") # Not normally part of "undefined"
+                # list(APPEND _check_list "objc-cast") # Not truly UB
+                list(APPEND _check_list "object-size")
+                # list(APPEND _check_list "pointer-overflow") # Not implemented in older GCCs
+                list(APPEND _check_list "return")
+                list(APPEND _check_list "returns-nonnull-attribute")
+                list(APPEND _check_list "shift")
+                # list(APPEND _check_list "unsigned-shift-base") # Not implemented by GCC
+                list(APPEND _check_list "signed-integer-overflow")
+                list(APPEND _check_list "unreachable")
+                # list(APPEND _check_list "unsigned-integer-overflow") # Not truly UB
+                list(APPEND _check_list "vla-bound")
+                # list(APPEND _check_list "vptr") # TODO: fix associated errors
+
+                string(REPLACE ";" "," _ub_checks "${_check_list}")
+                set(ZEEK_SANITIZER_UB_CHECKS "${_ub_checks}" CACHE INTERNAL "" FORCE)
+            else ()
+                set(ZEEK_SANITIZER_UB_CHECKS "undefined" CACHE INTERNAL "" FORCE)
+            endif ()
+        endif ()
+
+        set(ZEEK_SANITIZERS "${ZEEK_SANITIZERS}${ZEEK_SANITIZER_UB_CHECKS}")
+    endforeach ()
+
    set(_sanitizer_flags "-fsanitize=${ZEEK_SANITIZERS}")
+
+    if ( ZEEK_SANITIZER_UB_CHECKS )
+        set(_sanitizer_flags "${_sanitizer_flags} -fno-sanitize-recover=${ZEEK_SANITIZER_UB_CHECKS}")
+    endif ()
+
    set(_sanitizer_flags "${_sanitizer_flags} -fno-omit-frame-pointer")
    set(_sanitizer_flags "${_sanitizer_flags} -fno-optimize-sibling-calls")

--- a/2
+++ b/2
@ -1 +1 @@
-3.3.0-dev.329
+3.3.0-dev.341
--- a/src/RandTest.cc
+++ b/src/RandTest.cc
@ -136,7 +136,7 @@ void RandTest::end(double* r_ent, double* r_chisq,

 	/* Calculate Monte Carlo value for PI from percentage of hits
 	   within the circle */
-	montepi = 4.0 * (((double) inmont) / mcount);
+	montepi = mcount == 0 ? 0 : 4.0 * (((double) inmont) / mcount);

 	/* Return results through arguments */
 	*r_ent = ent;
--- a/src/Stats.cc
+++ b/src/Stats.cc
@ -126,6 +126,10 @@ void ProfileLogger::Log()
 		utime - first_utime, stime - first_stime, rtime - first_rtime));

 	int conn_mem_use = expensive ? sessions->ConnectionMemoryUsage() : 0;
+	double avg_conn_mem_use = 0;
+
+	if ( expensive && sessions->CurrentConnections() != 0 )
+		avg_conn_mem_use = conn_mem_use / static_cast<double>(sessions->CurrentConnections());

 	file->Write(util::fmt("%.06f Conns: total=%" PRIu64 " current=%" PRIu64 "/%" PRIi32 " mem=%" PRIi32 "K avg=%.1f table=%" PRIu32 "K connvals=%" PRIu32 "K\n",
 		run_state::network_time,
@ -133,7 +137,7 @@ void ProfileLogger::Log()
 		Connection::CurrentConnections(),
 		sessions->CurrentConnections(),
 		conn_mem_use,
-		expensive ? (conn_mem_use / double(sessions->CurrentConnections())) : 0,
+		avg_conn_mem_use,
 		expensive ? sessions->MemoryAllocation() / 1024 : 0,
 		expensive ? sessions->ConnectionMemoryUsageConnVals() / 1024 : 0
 		));
--- a/src/analyzer/protocol/smb/smb1-protocol.pac
+++ b/src/analyzer/protocol/smb/smb1-protocol.pac
@ -39,6 +39,11 @@
 %}

 refine connection SMB_Conn += {
+	function join_pid_bits(hi: uint16, lo: uint16): uint32
+		%{
+		return (static_cast<uint32_t>(hi) << 16) | static_cast<uint32_t>(lo);
+		%}
+
 	function proc_smb_message(h: SMB_Header, is_orig: bool): bool
 		%{
 		if ( smb1_message )
@ -306,7 +311,7 @@ type SMB_Header(is_orig: bool) = record {
 } &let {
 	err_status_type = (flags2 >> 14) & 1;
 	unicode         = (flags2 >> 15) & 1;
-	pid             = (pid_high * 0x10000) + pid_low;
+	pid: uint32     = $context.connection.join_pid_bits(pid_high, pid_low);
 	is_pipe: bool   = $context.connection.get_tree_is_pipe(tid);
 	proc : bool     = $context.connection.proc_smb_message(this, is_orig);
 } &byteorder=littleendian;
--- a/src/fuzzers/pop3-fuzzer.cc
+++ b/src/fuzzers/pop3-fuzzer.cc
@ -24,6 +24,7 @@ static zeek::Connection* add_connection()
 	conn_id.dst_addr = zeek::IPAddr("5.6.7.8");
 	conn_id.src_port = htons(23132);
 	conn_id.dst_port = htons(80);
+	conn_id.is_one_way = false;
 	zeek::detail::ConnIDKey key = zeek::detail::BuildConnIDKey(conn_id);
 	zeek::Connection* conn = new zeek::Connection(zeek::sessions, key, network_time_start,
 	                                  &conn_id, 1, &p, nullptr);
--- a/src/modp_numtoa.c
+++ b/src/modp_numtoa.c
@ -6,6 +6,7 @@
 #include <stdint.h>
 #include <stdio.h>
 #include <math.h>
+#include <limits.h>

 // other interesting references on num to string convesion
 // http://www.jb.man.ac.uk/~slowe/cpp/itoa.html
@ -56,7 +57,7 @@ void modp_uitoa10(uint32_t value, char* str)
 void modp_litoa10(int64_t value, char* str)
 {
    char* wstr=str;
-    uint64_t uvalue = (value < 0) ? -value : value;
+    uint64_t uvalue = (value < 0) ? (value == INT64_MIN ? (uint64_t)(INT64_MAX) + 1 : -value) : value;

    // Conversion. Number is reversed.
    do *wstr++ = (char)(48 + (uvalue % 10)); while(uvalue /= 10);
@ -88,8 +89,28 @@ void modp_dtoa(double value, char* str, int prec)
        str[0] = 'n'; str[1] = 'a'; str[2] = 'n'; str[3] = '\0';
        return;
    }
+
+    /* we'll work in positive values and deal with the
+       negative sign issue later */
+    int neg = 0;
+    if (value < 0) {
+        neg = 1;
+        value = -value;
+    }
+
    /* if input is larger than thres_max, revert to exponential */
-    const double thres_max = (double)(0x7FFFFFFF);
+    const double thres_max = (double)(INT_MAX);
+
+    /* for very large numbers switch back to native sprintf for exponentials.
+       anyone want to write code to replace this? */
+    /*
+      normal printf behavior is to print EVERY whole number digit
+      which can be 100s of characters overflowing your buffers == bad
+    */
+    if (value >= thres_max) {
+        sprintf(str, "%e", neg ? -value : value);
+        return;
+    }

    double diff = 0.0;
    char* wstr = str;
@ -101,16 +122,6 @@ void modp_dtoa(double value, char* str, int prec)
        prec = 9;
    }

-
-    /* we'll work in positive values and deal with the
-       negative sign issue later */
-    int neg = 0;
-    if (value < 0) {
-        neg = 1;
-        value = -value;
-    }
-
-
    int whole = (int) value;
    double tmp = (value - whole) * _pow10[prec];
    uint32_t frac = (uint32_t)(tmp);
@ -129,17 +140,6 @@ void modp_dtoa(double value, char* str, int prec)
        ++frac;
    }

-    /* for very large numbers switch back to native sprintf for exponentials.
-       anyone want to write code to replace this? */
-    /*
-      normal printf behavior is to print EVERY whole number digit
-      which can be 100s of characters overflowing your buffers == bad
-    */
-    if (value > thres_max) {
-        sprintf(str, "%e", neg ? -value : value);
-        return;
-    }
-
    if (prec == 0) {
        diff = value - whole;
        if (diff > 0.5) {
@ -189,8 +189,27 @@ void modp_dtoa2(double value, char* str, int prec)
        return;
    }

+    /* we'll work in positive values and deal with the
+       negative sign issue later */
+    int neg = 0;
+    if (value < 0) {
+        neg = 1;
+        value = -value;
+    }
+
    /* if input is larger than thres_max, revert to exponential */
-    const double thres_max = (double)(0x7FFFFFFF);
+    const double thres_max = (double)(INT_MAX);
+
+    /* for very large numbers switch back to native sprintf for exponentials.
+       anyone want to write code to replace this? */
+    /*
+      normal printf behavior is to print EVERY whole number digit
+      which can be 100s of characters overflowing your buffers == bad
+    */
+    if (value >= thres_max) {
+        sprintf(str, "%e", neg ? -value : value);
+        return;
+    }

    int count;
    double diff = 0.0;
@ -203,16 +222,6 @@ void modp_dtoa2(double value, char* str, int prec)
        prec = 9;
    }

-
-    /* we'll work in positive values and deal with the
-       negative sign issue later */
-    int neg = 0;
-    if (value < 0) {
-        neg = 1;
-        value = -value;
-    }
-
-
    int whole = (int) value;
    double tmp = (value - whole) * _pow10[prec];
    uint32_t frac = (uint32_t)(tmp);
@ -231,17 +240,6 @@ void modp_dtoa2(double value, char* str, int prec)
        ++frac;
    }

-    /* for very large numbers switch back to native sprintf for exponentials.
-       anyone want to write code to replace this? */
-    /*
-      normal printf behavior is to print EVERY whole number digit
-      which can be 100s of characters overflowing your buffers == bad
-    */
-    if (value > thres_max) {
-        sprintf(str, "%e", neg ? -value : value);
-        return;
-    }
-
    if (prec == 0) {
        diff = value - whole;
        if (diff > 0.5) {
@ -301,21 +299,6 @@ void modp_dtoa3(double value, char* str, int n, int prec)
        return;
    }

-    /* if input is larger than thres_max, revert to exponential */
-    const double thres_max = (double)(0x7FFFFFFF);
-
-    int count;
-    double diff = 0.0;
-    char* wstr = str;
-
-    if (prec < 0) {
-        prec = 0;
-    } else if (prec > 9) {
-        /* precision of >= 10 can lead to overflow errors */
-        prec = 9;
-    }
-
-
    /* we'll work in positive values and deal with the
       negative sign issue later */
    int neg = 0;
@ -324,32 +307,23 @@ void modp_dtoa3(double value, char* str, int n, int prec)
        value = -value;
    }

-
-    int whole = (int) value;
-    double tmp = (value - whole) * _pow10[prec];
-    uint32_t frac = (uint32_t)(tmp);
-    diff = tmp - frac;
-
-    if (diff > 0.5) {
-        ++frac;
-        /* handle rollover, e.g.  case 0.99 with prec 1 is 1.0  */
-        if (frac >= _pow10[prec]) {
-            frac = 0;
-            ++whole;
-        }
-    } else if (diff == 0.5 && ((frac == 0) || (frac & 1))) {
-        /* if halfway, round up if odd, OR
-           if last digit is 0.  That last part is strange */
-        ++frac;
+    if (prec < 0) {
+        prec = 0;
+    } else if (prec > 9) {
+        /* precision of >= 10 can lead to overflow errors */
+        prec = 9;
    }

+    /* if input is larger than thres_max, revert to exponential */
+    const double thres_max = (double)(INT_MAX);
+
    /* for very large numbers switch back to native sprintf for exponentials.
       anyone want to write code to replace this? */
    /*
      normal printf behavior is to print EVERY whole number digit
      which can be 100s of characters overflowing your buffers == bad
    */
-    if (value > thres_max) {
+    if (value >= thres_max) {
        /* ---- Modified part, compared to modp_dtoa3. */
        int i = snprintf(str, n, "%.*f", prec, neg ? -value : value);

@ -373,6 +347,28 @@ void modp_dtoa3(double value, char* str, int n, int prec)
        /* ---- End of modified part.. */
    }

+    int count;
+    double diff = 0.0;
+    char* wstr = str;
+
+    int whole = (int) value;
+    double tmp = (value - whole) * _pow10[prec];
+    uint32_t frac = (uint32_t)(tmp);
+    diff = tmp - frac;
+
+    if (diff > 0.5) {
+        ++frac;
+        /* handle rollover, e.g.  case 0.99 with prec 1 is 1.0  */
+        if (frac >= _pow10[prec]) {
+            frac = 0;
+            ++whole;
+        }
+    } else if (diff == 0.5 && ((frac == 0) || (frac & 1))) {
+        /* if halfway, round up if odd, OR
+           if last digit is 0.  That last part is strange */
+        ++frac;
+    }
+
    if (prec == 0) {
        diff = value - whole;
        if (diff > 0.5) {
--- a/src/parse.y
+++ b/src/parse.y
@ -336,6 +336,36 @@ expr:
 			{
 			zeek::detail::set_location(@1, @2);
 			$$ = new zeek::detail::NegExpr({zeek::AdoptRef{}, $2});
+
+			if ( ! $$->IsError() && $2->IsConst() )
+				{
+				auto v = $2->ExprVal();
+				auto tag = v->GetType()->Tag();
+
+				if ( tag == zeek::TYPE_COUNT )
+					{
+					auto c = v->AsCount();
+					uint64_t int_max = static_cast<uint64_t>(INT64_MAX) + 1;
+
+					if ( c <= int_max )
+						{
+						auto ce = new zeek::detail::ConstExpr(zeek::val_mgr->Int(-c));
+						Unref($$);
+						$$ = ce;
+						}
+					else
+						{
+						$$->Error("literal is outside range of 'int' values");
+						$$->SetError();
+						}
+					}
+				else
+					{
+					auto ce = new zeek::detail::ConstExpr($$->Eval(nullptr));
+					Unref($$);
+					$$ = ce;
+					}
+				}
 			}

 	|	'+' expr	%prec '!'
--- a/src/patricia.c
+++ b/src/patricia.c
@ -96,7 +96,7 @@ comp_with_mask (void *addr, void *dest, u_int mask)

    if ( /* mask/8 == 0 || */ memcmp (addr, dest, mask / 8) == 0) {
 	int n = mask / 8;
-	int m = ((-1) << (8 - (mask % 8)));
+	int m = -(1 << (8 - (mask % 8)));

 	if (mask % 8 == 0 || (((u_char *)addr)[n] & m) == (((u_char *)dest)[n] & m))
 	    return (1);
--- a/testing/btest/Baseline/language.int-negation-range/out
+++ b/testing/btest/Baseline/language.int-negation-range/out
@ -0,0 +1,2 @@
+error in /home/jon/pro/zeek/zeek/testing/btest/.tmp/language.int-negation-range/int-negation-range.zeek, line 8: literal is outside range of 'int' values (-9223372036854775809)
+error in /home/jon/pro/zeek/zeek/testing/btest/.tmp/language.int-negation-range/int-negation-range.zeek, line 9: literal is outside range of 'int' values (-9223372036854775810)
--- a/testing/btest/btest.cfg
+++ b/testing/btest/btest.cfg
@ -30,3 +30,4 @@ ZEEK_DEFAULT_CONNECT_RETRY=1
 ZEEK_DISABLE_ZEEKYGEN=1
 ZEEK_ALLOW_INIT_ERRORS=1
 ZEEK_SUPERVISOR_NO_SIGKILL=1
+UBSAN_OPTIONS=print_stacktrace=1
--- a/testing/btest/language/int-negation-range.zeek
+++ b/testing/btest/language/int-negation-range.zeek
@ -0,0 +1,9 @@
+# @TEST-EXEC-FAIL: zeek -b %INPUT >out 2>&1
+# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-remove-abspath btest-diff out
+
+local i_min = -9223372036854775808;
+local i_min_p1 = -9223372036854775807;
+
+# These should be caught at parse-time as outside the range of a 64-bit ints.
+local i_min_m1 = -9223372036854775809;
+local i_min_m2 = -9223372036854775810;
--- a/testing/external/subdir-btest.cfg
+++ b/testing/external/subdir-btest.cfg
@ -22,3 +22,4 @@ ZEEK_PROFILER_FILE=%(testbase)s/.tmp/script-coverage/XXXXXX
 ZEEK_DNS_FAKE=1
 # For fedora 21 - they disable MD5 for certificate verification and need setting an environment variable to permit it.
 OPENSSL_ENABLE_MD5_VERIFY=1
+UBSAN_OPTIONS=print_stacktrace=1