diff --git a/.cirrus.yml b/.cirrus.yml index cf8da7f916..5c6c8fff73 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -355,18 +355,21 @@ ubsan_sanitizer_task: ZEEK_TAILORED_UB_CHECKS: 1 UBSAN_OPTIONS: print_stacktrace=1 -# tsan_sanitizer_task: -# container: -# # Just uses a recent/common distro to run memory error/leak checks. -# dockerfile: ci/ubuntu-20.04/Dockerfile -# << : *SANITIZERS_RESOURCE_TEMPLATE -# -# << : *CI_TEMPLATE -# test_fuzzers_script: ./ci/test-fuzzers.sh -# env: -# CXXFLAGS: -DZEEK_DICT_DEBUG -# ZEEK_CI_CONFIGURE_FLAGS: *TSAN_SANITIZER_CONFIG -# ZEEK_CI_DISABLE_SCRIPT_PROFILING: 1 +tsan_sanitizer_task: + container: + # Just uses a recent/common distro to run memory error/leak checks. + dockerfile: ci/ubuntu-20.04/Dockerfile + << : *SANITIZERS_RESOURCE_TEMPLATE + + << : *CI_TEMPLATE + << : *SKIP_TASK_ON_PR + env: + ZEEK_CI_CONFIGURE_FLAGS: *TSAN_SANITIZER_CONFIG + ZEEK_CI_DISABLE_SCRIPT_PROFILING: 1 + # If this is defined directly in the environment, configure fails to find + # OpenSSL. Instead we define it with a different name and then give it + # the correct name in the testing scripts. + ZEEK_TSAN_OPTIONS: suppressions=/zeek/ci/tsan_suppressions.txt windows_task: # 2 hour timeout just for potential of building Docker image taking a while diff --git a/ci/test.sh b/ci/test.sh index f26234b7bc..ebedf019e3 100755 --- a/ci/test.sh +++ b/ci/test.sh @@ -19,6 +19,10 @@ fi SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" . ${SCRIPT_DIR}/common.sh +if [ -n "${ZEEK_TSAN_OPTIONS}" ]; then + export TSAN_OPTIONS=${ZEEK_TSAN_OPTIONS} +fi + function pushd { command pushd "$@" >/dev/null || exit 1 } diff --git a/ci/tsan_suppressions.txt b/ci/tsan_suppressions.txt new file mode 100644 index 0000000000..f836f44bdf --- /dev/null +++ b/ci/tsan_suppressions.txt @@ -0,0 +1,34 @@ +# This is a list of suppressions for ThreadSanitizer. Anything listed here will be +# ignored during testing. See https://github.com/google/sanitizers/wiki/ThreadSanitizerSuppressions +# for documentation on how this file works. + +# There's a bug in libstdc++ that causes ThreadSanitizer to flag this as a data race. +# See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77704. Ignore any reports from this +# as it's really really noisy, and there's not much we can do to fix it. +race:std::ctype::narrow + +# ===================================================================== +# Everything below here are known failures in Zeek. These are here until they +# can be fixed, just so we can get the ThreadSanitizer builds running on Cirrus +# and catch anything new. If we can't fix something in this list (possibly the +# sqlite ones?) split them out into a separate block above here with a comment +# as to why. + +race:broker::internal::connector::run_impl +race:caf::net::multiplexer::set_thread_id +race:caf::action::run + +# This one causes supervisor.config-bare-mode to fail occasionally but not always +signal:caf::actor_control_block::enqueue + +# There's a bunch of failures down inside the sqlite code itself, mostly +# around opening the database in the SQLite input reader and the SQLite +# logging writer. +race:sqlite3MutexInit +race:sqlite3Malloc +race:sqlite3_mutex_enter +race:sqlite3_initialize + +# This one isn't actually in sqlite code, but some StringVal object gets ref'd by +# zeek::id::find_const and throws a data race. +race:zeek::logging::writer::detail::SQLite::DoInit