diff --git a/.cirrus.yml b/.cirrus.yml index f501f0bc4d..437388adf6 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -4,7 +4,7 @@ btest_retries: &BTEST_RETRIES 2 memory: &MEMORY 6GB config: &CONFIG --build-type=release --enable-cpp-tests -memcheck_config: &MEMCHECK_CONFIG --build-type=debug --enable-cpp-tests --sanitizers=address +memcheck_config: &MEMCHECK_CONFIG --build-type=debug --enable-cpp-tests --sanitizers=address --enable-fuzzers resources_template: &RESOURCES_TEMPLATE cpu: *CPUS @@ -133,5 +133,6 @@ memcheck_task: # AddressSanitizer uses a lot more memory than a typical config. memory: 16GB << : *CI_TEMPLATE + test_fuzzers_script: ./ci/test-fuzzers.sh env: ZEEK_CI_CONFIGURE_FLAGS: *MEMCHECK_CONFIG diff --git a/CHANGES b/CHANGES index f033a9fd1c..231226e8f2 100644 --- a/CHANGES +++ b/CHANGES @@ -1,4 +1,69 @@ +3.2.0-dev.505 | 2020-05-12 18:52:19 -0700 + + * Use zeek::detail namespace for fuzzer utils (Jon Siwek, Corelight) + + * Set terminating flag during fuzzer cleanup (Jon Siwek, Corelight) + + * Add missing include to standalone fuzzer driver (Jon Siwek, Corelight) + + * Improve standalone fuzzer driver error messages (Jon Siwek, Corelight) + + * Merge branch 'master' into topic/jsiwek/fuzzing (Jon Siwek, Corelight) + + * Test fuzzers against seed corpus under CI ASan build (Jon Siwek, Corelight) + + * Update fuzzing README with OSS-Fuzz integration notes (Jon Siwek, Corelight) + + * Link fuzzers against shared library to reduce executable sizes (Jon Siwek, Corelight) + + * Improve FuzzBuffer chunking + + Now allocates a new buffer for each chunk to better detect over-reads (Jon Siwek, Corelight) + + * Fix compiler warning in standalone fuzzer driver (Jon Siwek, Corelight) + + * Adjust minor fuzzing documentation (Jon Siwek, Corelight) + + * Exit immediately after running unit tests (Jon Siwek, Corelight) + + * Add OSS-Fuzz Zeek script search path to fuzzers (Jon Siwek, Corelight) + + * Assume libFuzzer when LIB_FUZZING_ENGINE file doesn't exist + + i.e. environment variable may be set, but not point to existing file (Jon Siwek, Corelight) + + * Change handling of LIB_FUZZING_ENGINE + + Should better support OSS-Fuzz, which may set it to either + be path to library to link against or linker flag like + "-fsanitize=fuzzer" (Jon Siwek, Corelight) + + * Change --enable-fuzzing to --enable-fuzzers + + Since it controls whether to build the fuzzer targets, not whether those + fuzzer targets actually use a fuzzing engine. (Jon Siwek, Corelight) + + * Add standalone driver for fuzz targets + + Useful for cases that don't need to use a fuzzing engine, but just run + the fuzz targets over some set of inputs, like for regression/CI tests. + + Also added a POP3 fuzzer dictionary, seed corpus, and README with + examples. (Jon Siwek, Corelight) + + * Add basic structure for fuzzing targets + + General changes: + + * Add -D/--deterministic command line option as + convenience/alternative to -G/--load-seeds (i.e. no file needed, it just + uses zero-initialized random seeds). It also changes Broker data + stores over to using deterministic timing rather than real time. + + * Add option to make Reporter abort on runtime scripting errors (Jon Siwek, Corelight) + + 3.2.0-dev.486 | 2020-05-11 11:11:51 -0700 * Limit rate at which MMDB error/status messages are emitted diff --git a/CMakeLists.txt b/CMakeLists.txt index f9290c452a..33e560d0ae 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,6 +20,10 @@ include(cmake/CheckCompilerArch.cmake) ######################################################################## ## Project/Build Configuration +if ( ZEEK_ENABLE_FUZZERS ) + # Fuzzers use shared lib to save disk space, so need -fPIC on everything + set(CMAKE_POSITION_INDEPENDENT_CODE ON) +endif () if (ENABLE_ZEEK_UNIT_TESTS) enable_testing() @@ -485,6 +489,9 @@ message( "\n debugging: ${USE_PERFTOOLS_DEBUG}" "\njemalloc: ${ENABLE_JEMALLOC}" "\n" + "\nFuzz Targets: ${ZEEK_ENABLE_FUZZERS}" + "\nFuzz Engine: ${ZEEK_FUZZING_ENGINE}" + "\n" "\n================================================================\n" ) diff --git a/VERSION b/VERSION index fef583e48a..b55b9311f5 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.2.0-dev.486 +3.2.0-dev.505 diff --git a/ci/test-fuzzers.sh b/ci/test-fuzzers.sh new file mode 100755 index 0000000000..c3ae93d469 --- /dev/null +++ b/ci/test-fuzzers.sh @@ -0,0 +1,40 @@ +#! /usr/bin/env bash + +result=0 + +echo "Testing fuzzers against their seed corpus" +echo "-----------------------------------------" + +cd build || result=1 +. ./zeek-path-dev.sh + +fuzzers=$(find ./src/fuzzers -name 'zeek-*-fuzzer') + +for fuzzer_path in ${fuzzers}; do + fuzzer_exe=$(basename ${fuzzer_path}) + fuzzer_name=$(echo ${fuzzer_exe} | sed 's/zeek-\(.*\)-fuzzer/\1/g') + corpus="../src/fuzzers/${fuzzer_name}-corpus.zip" + + if [[ -e ${corpus} ]]; then + echo "Fuzzer: ${fuzzer_exe} ${corpus}" + ( rm -rf corpus && mkdir corpus ) || result=1 + ( cd corpus && unzip ../${corpus} >/dev/null ) || result=1 + ${fuzzer_path} corpus/* >${fuzzer_exe}.out 2>${fuzzer_exe}.err + + if [[ $? -eq 0 ]]; then + tail -n 1 ${fuzzer_exe}.out + else + result=1 + cat ${fuzzer_exe}.out + echo " FAILED" + cat ${fuzzer_exe}.err + fi + else + echo "Skipping Fuzzer (no corpus): ${fuzzer_exe}" + fi + + echo "-----------------------------------------" +done + + +exit ${result} diff --git a/ci/ubuntu-18.04/Dockerfile b/ci/ubuntu-18.04/Dockerfile index 8aaf38701d..9f6276f95f 100644 --- a/ci/ubuntu-18.04/Dockerfile +++ b/ci/ubuntu-18.04/Dockerfile @@ -21,6 +21,7 @@ RUN apt-get update && apt-get -y install \ sqlite3 \ curl \ wget \ + unzip \ && rm -rf /var/lib/apt/lists/* # Many distros adhere to PEP 394's recommendation for `python` = `python2` so diff --git a/configure b/configure index 48c35e64a5..1fd036f169 100755 --- a/configure +++ b/configure @@ -47,6 +47,7 @@ Usage: $0 [OPTION]... [VAR=VALUE]... Optional Features: --enable-debug compile in debugging mode (like --build-type=Debug) --enable-coverage compile with code coverage support (implies debugging mode) + --enable-fuzzers build fuzzer targets --enable-mobile-ipv6 analyze mobile IPv6 features defined by RFC 6275 --enable-perftools enable use of Google perftools (use tcmalloc) --enable-perftools-debug use Google's perftools for debugging @@ -229,6 +230,9 @@ while [ $# -ne 0 ]; do append_cache_entry ENABLE_COVERAGE BOOL true append_cache_entry ENABLE_DEBUG BOOL true ;; + --enable-fuzzers) + append_cache_entry ZEEK_ENABLE_FUZZERS BOOL true + ;; --enable-debug) append_cache_entry ENABLE_DEBUG BOOL true ;; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b3b679735a..bc2ea56555 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -153,6 +153,8 @@ add_subdirectory(iosource) add_subdirectory(logging) add_subdirectory(probabilistic) +add_subdirectory(fuzzers) + ######################################################################## ## bro target @@ -204,12 +206,12 @@ set_source_files_properties(nb_dns.c PROPERTIES COMPILE_FLAGS -fno-strict-aliasing) set(MAIN_SRCS - main.cc digest.cc net_util.cc util.cc module_util.cc zeek-affinity.cc + zeek-setup.cc Anon.cc Attr.cc Base64.cc @@ -357,7 +359,7 @@ elseif(${COMPILER_ARCHITECTURE} STREQUAL "x86_64") ) endif () -set(bro_SRCS +set(zeek_SRCS ${CMAKE_CURRENT_BINARY_DIR}/version.c ${BIF_SRCS} ${BINPAC_AUXSRC} @@ -378,9 +380,16 @@ set(bro_SRCS ${MAIN_SRCS} ) -collect_headers(bro_HEADERS ${bro_SRCS}) +collect_headers(zeek_HEADERS ${zeek_SRCS}) -add_executable(zeek ${bro_SRCS} ${bro_HEADERS} ${bro_SUBDIR_LIBS} ${bro_PLUGIN_LIBS}) +add_library(zeek_objs OBJECT ${zeek_SRCS}) + +add_executable(zeek main.cc + $ + ${zeek_HEADERS} + ${bro_SUBDIR_LIBS} + ${bro_PLUGIN_LIBS} +) target_link_libraries(zeek ${zeekdeps} ${CMAKE_THREAD_LIBS_INIT} ${CMAKE_DL_LIBS}) if ( NOT "${bro_LINKER_FLAGS}" STREQUAL "" ) @@ -423,12 +432,12 @@ add_dependencies(generate_outputs generate_outputs_stage2a generate_outputs_stag # Build __load__.zeek files for standard *.bif.zeek. bro_bif_create_loader(bif_loader "${bro_BASE_BIF_SCRIPTS}") add_dependencies(bif_loader ${bro_PLUGIN_DEPS} ${bro_SUBDIR_DEPS}) -add_dependencies(zeek bif_loader) +add_dependencies(zeek_objs bif_loader) # Build __load__.zeek files for plugins/*.bif.zeek. bro_bif_create_loader(bif_loader_plugins "${bro_PLUGIN_BIF_SCRIPTS}") add_dependencies(bif_loader_plugins ${bro_PLUGIN_DEPS} ${bro_SUBDIR_DEPS}) -add_dependencies(zeek bif_loader_plugins) +add_dependencies(zeek_objs bif_loader_plugins) # Install *.bif.zeek. install(DIRECTORY ${CMAKE_BINARY_DIR}/scripts/base/bif DESTINATION ${ZEEK_SCRIPT_INSTALL_PATH}/base) diff --git a/src/Options.cc b/src/Options.cc index a68f76595c..d7799cab84 100644 --- a/src/Options.cc +++ b/src/Options.cc @@ -41,6 +41,8 @@ void zeek::Options::filter_supervised_node_options() bare_mode = og.bare_mode; perftools_check_leaks = og.perftools_check_leaks; perftools_profile = og.perftools_profile; + deterministic_mode = og.deterministic_mode; + abort_on_scripting_errors = og.abort_on_scripting_errors; pcap_filter = og.pcap_filter; signature_files = og.signature_files; @@ -93,6 +95,7 @@ void zeek::usage(const char* prog, int code) fprintf(stderr, " -B|--debug | Enable debugging output for selected streams ('-B help' for help)\n"); #endif fprintf(stderr, " -C|--no-checksums | ignore checksums\n"); + fprintf(stderr, " -D|--deterministic | initialize random seeds to zero\n"); fprintf(stderr, " -F|--force-dns | force DNS\n"); fprintf(stderr, " -G|--load-seeds | load seeds from given file\n"); fprintf(stderr, " -H|--save-seeds | save seeds to given file\n"); @@ -202,6 +205,7 @@ zeek::Options zeek::parse_cmdline(int argc, char** argv) {"version", no_argument, nullptr, 'v'}, {"no-checksums", no_argument, nullptr, 'C'}, {"force-dns", no_argument, nullptr, 'F'}, + {"deterministic", no_argument, nullptr, 'D'}, {"load-seeds", required_argument, nullptr, 'G'}, {"save-seeds", required_argument, nullptr, 'H'}, {"print-plugins", no_argument, nullptr, 'N'}, @@ -232,7 +236,7 @@ zeek::Options zeek::parse_cmdline(int argc, char** argv) }; char opts[256]; - safe_strncpy(opts, "B:e:f:G:H:I:i:j::n:p:r:s:T:t:U:w:X:CFNPQSWabdhv", + safe_strncpy(opts, "B:e:f:G:H:I:i:j::n:p:r:s:T:t:U:w:X:CDFNPQSWabdhv", sizeof(opts)); #ifdef USE_PERFTOOLS_DEBUG @@ -329,6 +333,9 @@ zeek::Options zeek::parse_cmdline(int argc, char** argv) case 'C': rval.ignore_checksums = true; break; + case 'D': + rval.deterministic_mode = true; + break; case 'E': rval.pseudo_realtime = 1.0; if ( optarg ) diff --git a/src/Options.h b/src/Options.h index 496a5fa203..2dcbdea853 100644 --- a/src/Options.h +++ b/src/Options.h @@ -53,6 +53,8 @@ struct Options { bool debug_scripts = false; bool perftools_check_leaks = false; bool perftools_profile = false; + bool deterministic_mode = false; + bool abort_on_scripting_errors = false; bool run_unit_tests = false; std::vector doctest_args; diff --git a/src/Reporter.cc b/src/Reporter.cc index 1363f2af31..b0e1955b45 100644 --- a/src/Reporter.cc +++ b/src/Reporter.cc @@ -32,8 +32,9 @@ int closelog(); Reporter* reporter = nullptr; -Reporter::Reporter() +Reporter::Reporter(bool arg_abort_on_scripting_errors) { + abort_on_scripting_errors = arg_abort_on_scripting_errors; errors = 0; via_events = false; in_error_handler = 0; @@ -157,6 +158,10 @@ void Reporter::ExprRuntimeError(const Expr* expr, const char* fmt, ...) d.Description(), fmt, ap); va_end(ap); PopLocation(); + + if ( abort_on_scripting_errors ) + abort(); + throw InterpreterException(); } @@ -170,6 +175,10 @@ void Reporter::RuntimeError(const Location* location, const char* fmt, ...) DoLog("runtime error", reporter_error, out, nullptr, nullptr, true, true, "", fmt, ap); va_end(ap); PopLocation(); + + if ( abort_on_scripting_errors ) + abort(); + throw InterpreterException(); } diff --git a/src/Reporter.h b/src/Reporter.h index 97f1759963..9224b3e8b6 100644 --- a/src/Reporter.h +++ b/src/Reporter.h @@ -44,7 +44,7 @@ public: using WeirdFlowMap = std::map; using WeirdSet = std::unordered_set; - Reporter(); + Reporter(bool abort_on_scripting_errors); ~Reporter(); // Initialize reporter-sepcific options that are defined in script-layer. @@ -262,6 +262,7 @@ private: bool warnings_to_stderr; bool errors_to_stderr; bool after_zeek_init; + bool abort_on_scripting_errors = false; std::list > locations; diff --git a/src/Sessions.cc b/src/Sessions.cc index dc8158ec00..251d8de3a8 100644 --- a/src/Sessions.cc +++ b/src/Sessions.cc @@ -1109,6 +1109,23 @@ void NetSessions::Drain() } } +void NetSessions::Clear() + { + for ( const auto& entry : tcp_conns ) + Unref(entry.second); + for ( const auto& entry : udp_conns ) + Unref(entry.second); + for ( const auto& entry : icmp_conns ) + Unref(entry.second); + for ( const auto& entry : fragments ) + Unref(entry.second); + + tcp_conns.clear(); + udp_conns.clear(); + icmp_conns.clear(); + fragments.clear(); + } + void NetSessions::GetStats(SessionStats& s) const { s.num_TCP_conns = tcp_conns.size(); diff --git a/src/Sessions.h b/src/Sessions.h index 9cefd6156a..67589d8742 100644 --- a/src/Sessions.h +++ b/src/Sessions.h @@ -72,6 +72,9 @@ public: // that are still active. void Drain(); + // Clears the session maps. + void Clear(); + void GetStats(SessionStats& s) const; void Weird(const char* name, const Packet* pkt, diff --git a/src/broker/Manager.cc b/src/broker/Manager.cc index f2b4d10c33..d54b061580 100644 --- a/src/broker/Manager.cc +++ b/src/broker/Manager.cc @@ -124,10 +124,10 @@ static std::string RenderMessage(const broker::error& e) #endif -Manager::Manager(bool arg_reading_pcaps) +Manager::Manager(bool arg_use_real_time) { bound_port = 0; - reading_pcaps = arg_reading_pcaps; + use_real_time = arg_use_real_time; after_zeek_init = false; peer_count = 0; log_batch_size = 0; @@ -166,7 +166,7 @@ void Manager::InitPostScript() broker::broker_options options; options.disable_ssl = get_option("Broker::disable_ssl")->AsBool(); options.forward = get_option("Broker::forward_messages")->AsBool(); - options.use_real_time = ! reading_pcaps; + options.use_real_time = use_real_time; BrokerConfig config{std::move(options)}; @@ -283,6 +283,14 @@ void Manager::FlushPendingQueries() } } +void Manager::ClearStores() + { + FlushPendingQueries(); + + for ( const auto& [name, handle] : data_stores ) + handle->store.clear(); + } + uint16_t Manager::Listen(const string& addr, uint16_t port) { if ( bstate->endpoint.is_shutdown() ) diff --git a/src/broker/Manager.h b/src/broker/Manager.h index 2a38967458..bbd38f88be 100644 --- a/src/broker/Manager.h +++ b/src/broker/Manager.h @@ -64,7 +64,7 @@ public: /** * Constructor. */ - Manager(bool reading_pcaps); + Manager(bool use_real_time); /** * Destructor. @@ -316,6 +316,11 @@ public: */ size_t FlushLogBuffers(); + /** + * Flushes all pending data store queries and also clears all contents. + */ + void ClearStores(); + /** * @return communication statistics. */ @@ -383,7 +388,7 @@ private: Stats statistics; uint16_t bound_port; - bool reading_pcaps; + bool use_real_time; bool after_zeek_init; int peer_count; diff --git a/src/fuzzers/CMakeLists.txt b/src/fuzzers/CMakeLists.txt new file mode 100644 index 0000000000..97a050d265 --- /dev/null +++ b/src/fuzzers/CMakeLists.txt @@ -0,0 +1,72 @@ +######################################################################## +## Fuzzing targets + +if ( NOT ZEEK_ENABLE_FUZZERS ) + return() +endif () + +if ( NOT DEFINED ZEEK_FUZZING_ENGINE AND DEFINED ENV{LIB_FUZZING_ENGINE} ) + if ( "$ENV{LIB_FUZZING_ENGINE}" STREQUAL "" ) + # Empty LIB_FUZZING_ENGINE, assume libFuzzer + set(ZEEK_FUZZING_ENGINE "-fsanitize=fuzzer" CACHE INTERNAL "" FORCE) + else () + STRING(SUBSTRING "$ENV{LIB_FUZZING_ENGINE}" 0 1 _first_char) + + if ( "${_first_char}" STREQUAL "-" OR EXISTS "$ENV{LIB_FUZZING_ENGINE}" ) + # Looks like a linker flag or valid file, use it + set(ZEEK_FUZZING_ENGINE "$ENV{LIB_FUZZING_ENGINE}" CACHE INTERNAL "" FORCE) + else () + message(WARNING "$ENV{LIB_FUZZING_ENGINE} does not exist, assume libFuzzer") + set(ZEEK_FUZZING_ENGINE "-fsanitize=fuzzer" CACHE INTERNAL "" FORCE) + endif () + endif () +endif () + +macro(ADD_FUZZ_TARGET _name) + set(_fuzz_target zeek-${_name}-fuzzer) + set(_fuzz_source ${_name}-fuzzer.cc) + + add_executable(${_fuzz_target} ${_fuzz_source} ${ARGN}) + + target_link_libraries(${_fuzz_target} + zeek_fuzzer_shared + ${BIND_LIBRARY} + ${CMAKE_THREAD_LIBS_INIT} ${CMAKE_DL_LIBS}) + + if ( DEFINED ZEEK_FUZZING_ENGINE ) + target_link_libraries(${_fuzz_target} ${ZEEK_FUZZING_ENGINE}) + else () + target_link_libraries(${_fuzz_target} + $) + endif () +endmacro () + +include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}) + +add_library(zeek_fuzzer_standalone OBJECT standalone-driver.cc) + +add_library(zeek_fuzzer_shared SHARED + $ + ${bro_SUBDIR_LIBS} + ${bro_PLUGIN_LIBS} + FuzzBuffer.cc +) + +set(zeek_fuzzer_shared_deps) + +foreach(_dep ${zeekdeps} ) + # The bind library is handled a bit hack-ishly since it defaults to + # linking it as static library by default on Linux, but at least + # on one common distro, that static library wasn't compiled with -fPIC + # and so not usable in the shared library we're trying to build. + # So instead, the fuzzer executable, not the shared lib, links it. + if ( NOT "${_dep}" STREQUAL "${BIND_LIBRARY}" ) + set(zeek_fuzzer_shared_deps ${zeek_fuzzer_shared_deps} ${_dep}) + endif () +endforeach () + +target_link_libraries(zeek_fuzzer_shared + ${zeek_fuzzer_shared_deps} + ${CMAKE_THREAD_LIBS_INIT} ${CMAKE_DL_LIBS}) + +add_fuzz_target(pop3) diff --git a/src/fuzzers/FuzzBuffer.cc b/src/fuzzers/FuzzBuffer.cc new file mode 100644 index 0000000000..ebae1a1db1 --- /dev/null +++ b/src/fuzzers/FuzzBuffer.cc @@ -0,0 +1,66 @@ +#if !defined(_GNU_SOURCE) +#define _GNU_SOURCE +#endif + +#include + +#include "FuzzBuffer.h" + +bool zeek::detail::FuzzBuffer::Valid() const + { + if ( end - begin < PKT_MAGIC_LEN + 2 ) + return false; + + if ( memcmp(begin, PKT_MAGIC, PKT_MAGIC_LEN) != 0) + return false; + + return true; + } + +std::optional zeek::detail::FuzzBuffer::Next() + { + if ( begin == end ) + return {}; + + auto pos = (const unsigned char*)memmem(begin, end - begin, + PKT_MAGIC, PKT_MAGIC_LEN); + + if ( ! pos ) + return {}; + + begin += PKT_MAGIC_LEN; + auto remaining = end - begin; + + if ( remaining < 2 ) + return {}; + + Chunk rval; + rval.is_orig = begin[0] & 0x01; + begin += 1; + + auto chunk_begin = begin; + + auto next = (const unsigned char*)memmem(begin, end - begin, + PKT_MAGIC, PKT_MAGIC_LEN); + + if ( next ) + begin = next; + else + begin = end; + + rval.size = begin - chunk_begin; + + if ( rval.size ) + { + // The point of allocating a new buffer here is to better detect + // analyzers that may over-read within a chunk -- ASan wouldn't + // complain if that happens to land within the full input buffer + // provided by the fuzzing engine, but will if we allocate a new buffer + // for each chunk. + rval.data = std::make_unique(rval.size); + memcpy(rval.data.get(), chunk_begin, rval.size); + return {std::move(rval)}; + } + + return {}; + } diff --git a/src/fuzzers/FuzzBuffer.h b/src/fuzzers/FuzzBuffer.h new file mode 100644 index 0000000000..1b9cdb58a3 --- /dev/null +++ b/src/fuzzers/FuzzBuffer.h @@ -0,0 +1,57 @@ +#pragma once + +#include +#include +#include + +namespace zeek { namespace detail { + +/** + * This structure helps chunk/simulate protocol conversions from arbitrary + * input strings (like those produced by fuzzing engines). A fuzzing engine + * passes in some input string, and we chunk it into originator/responder + * messages according to any PKT_MAGIC delimiting bytestrings found in that + * input (originator vs. responder is determined by inspecting low-bit of + * the byte immediately following PKT_MAGIC and then the remaining bytes up + * to the next PKT_MAGIC delimiter are considered to be the next buffer to + * send along to an analyzers Deliver method. + */ +class FuzzBuffer { +public: + + struct Chunk { + std::unique_ptr data; + size_t size; + bool is_orig; + }; + + static constexpr int PKT_MAGIC_LEN = 4; + static constexpr unsigned char PKT_MAGIC[PKT_MAGIC_LEN + 1] = "\1PKT"; + + /** + * Initialize fuzz buffer. + * @param data pointer to start of fuzzing buffer produced by fuzz engine. + * @param size size of the fuzzing buffer pointed to by *data*. + */ + FuzzBuffer(const unsigned char* data, size_t size) + : begin(data), end(data + size) + { } + + /** + * @return whether the fuzz buffer object is valid -- has enough bytes + * to Deliver to an analyzer and starts with a *PKT_MAGIC* bytestring. + */ + bool Valid() const; + + /** + * @return the next chunk to deliver, if one could be extracted + */ + std::optional Next(); + +private: + + const unsigned char* begin; + const unsigned char* end; +}; + +}} // namespace zeek::detail diff --git a/src/fuzzers/README b/src/fuzzers/README new file mode 100644 index 0000000000..1d6857ff1b --- /dev/null +++ b/src/fuzzers/README @@ -0,0 +1,102 @@ +Fuzz Testing +============ + +This directory contains fuzzing targets for various Zeek components. The +primary way to use these directly would be with a fuzzing engine such as +libFuzzer: https://llvm.org/docs/LibFuzzer.html + +Example Build: Initial Fuzzing and Seed Corpus +---------------------------------------------- + +First configure and build for fuzzing (with libFuzzer) and code coverage:: + + $ LIB_FUZZING_ENGINE="" CC=clang CXX=clang++ \ + ./configure --build-type=debug --build-dir=./build-fuzz-cov \ + --sanitizers=fuzzer-no-link --enable-fuzzers --enable-coverage + + $ cd build-fuzz-cov && make -j $(nproc) + +.. note:: + + The default assumption for empty value of ``LIB_FUZZING_ENGINE`` is to use + libFuzzer by linking with ``-fsanitize=fuzzer``, but that environment + variable may be changed to use another flag or direct path to fuzzing engine + library to link against. + +Now start fuzzing to generate an initial corpus (this uses the POP3 fuzzer as +an example):: + + $ mkdir corpus && ./src/fuzzers/zeek-pop3-fuzzer corpus \ + -dict=../src/fuzzers/pop3.dict -max_total_time=300 -fork=$(($(nproc) - 1)) + +You can set options, like the runtime and parallelism level, to taste. For +other fuzz targets, you'd also want to use a different dictionary or omit +entirely. + +To minimize the size of the corpus:: + + $ mkdir min-corpus && ./src/fuzzers/zeek-pop3-fuzzer -merge=1 min-corpus corpus + +To check the code coverage of the corpus:: + + $ ./src/fuzzers/zeek-pop3-fuzzer min-corpus/* + + $ llvm-cov gcov $(find . -name POP3.cc.gcda) | grep -A1 POP3.cc + + # Annotated source file is now output to POP3.cc.gcov + +If the code coverage isn't satisfying, there may be something wrong with +the fuzzer, it may need a better dictionary, or it may need to fuzz for longer. + +The corpus can be added to revision control for use in regression testing and +as seed for OSS-Fuzz (check first that the zip file is a size that's sane to +commit):: + + zip -j ../src/fuzzers/pop3-corpus.zip min-corpus/* + +Example Build: Run Standalone Fuzz Targets +------------------------------------------ + +Fuzz targets can still be run without a fuzzing engine driving them. In +standalone mode, they'll process all input files provided as arguments +(e.g. useful for regression testing). + +First configure and build:: + + $ ./configure --build-type=debug --build-dir=./build-fuzz-check \ + --sanitizers=address --enable-fuzzers + + $ cd build-fuzz-check && make -j $(nproc) + +Get a set of inputs to process (we're using the POP3 fuzzer/corpus as example):: + + $ mkdir corpus && ( cd corpus && unzip ../../src/fuzzers/pop3-corpus.zip ) + +Now run the standalone fuzzer on the input corpus:: + + $ ./src/fuzzers/zeek-pop3-fuzzer corpus/* + +Note that you can also configure this build for coverage reports to verify the +code coverage (see the CFLAGS/CXXFLAGS from the first "Initial Fuzzing" +section). There's also the following ASan option which may need to be used:: + + $ export ASAN_OPTIONS=detect_odr_violation=0 + +OSS-Fuzz Integration +-------------------- + +The OSS-Fuzz integration is all contained in the external OSS-Fuzz repo's +Zeek project: https://github.com/google/oss-fuzz + +There's not much to it other than Dockerfile and build script, but a couple +conventions to follow to support the OSS-Fuzz configuration: + +* Fuzz target names are all like ``zeek-*-fuzzer``. The OSS-Fuzz build + scripts expects that and won't pick up any fuzzer that are named differently. + +* Fuzzers should expect to have to load Zeek scripts from a directory named + ``oss-fuzz-zeek-scripts`` that lives next to the fuzzer executable. When + running fuzzers locally, the usual way of setting ``ZEEKPATH`` from the build + directory does still work, but fuzzers should additionally augment + ``ZEEKPATH`` with that special OSS-Fuzz scripts directory so they'll be able + to run in that environment. diff --git a/src/fuzzers/fuzzer-setup.h b/src/fuzzers/fuzzer-setup.h new file mode 100644 index 0000000000..e692ddd3c4 --- /dev/null +++ b/src/fuzzers/fuzzer-setup.h @@ -0,0 +1,61 @@ +#pragma once + +#include +#include + +#include "zeek-setup.h" + +#include "Event.h" +#include "Sessions.h" +#include "broker/Manager.h" +#include "file_analysis/Manager.h" + +extern "C" int LLVMFuzzerInitialize(int* argc, char*** argv) + { + auto zeekpath = getenv("ZEEKPATH"); + + if ( ! zeekpath ) + { + // Set up an expected script search path for use with OSS-Fuzz + auto constexpr oss_fuzz_scripts = "oss-fuzz-zeek-scripts"; + auto fuzzer_path = get_exe_path(*argv[0]); + auto fuzzer_dir = SafeDirname(fuzzer_path).result; + std::string fs = fmt("%s/%s", fuzzer_dir.data(), oss_fuzz_scripts); + auto p = fs.data(); + auto oss_fuzz_zeekpath = fmt(".:%s:%s/policy:%s/site", p, p, p); + + if ( setenv("ZEEKPATH", oss_fuzz_zeekpath, true) == -1 ) + abort(); + } + + zeek::Options options; + options.scripts_to_load.emplace_back("local.zeek"); + options.script_options_to_set.emplace_back("Site::local_nets={10.0.0.0/8}"); + options.script_options_to_set.emplace_back("Log::default_writer=Log::WRITER_NONE"); + options.deterministic_mode = true; + options.ignore_checksums = true; + options.abort_on_scripting_errors = true; + + if ( zeek::detail::setup(*argc, *argv, &options).code ) + abort(); + + return 0; + } + +namespace zeek { namespace detail { + +void fuzzer_cleanup_one_input() + { + terminating = true; + broker_mgr->ClearStores(); + file_mgr->Terminate(); + timer_mgr->Expire(); + + mgr.Drain(); + sessions->Drain(); + mgr.Drain(); + sessions->Clear(); + terminating = false; + } + +}} // namespace zeek::detail diff --git a/src/fuzzers/pop3-corpus.zip b/src/fuzzers/pop3-corpus.zip new file mode 100644 index 0000000000..5ce7592c96 Binary files /dev/null and b/src/fuzzers/pop3-corpus.zip differ diff --git a/src/fuzzers/pop3-fuzzer.cc b/src/fuzzers/pop3-fuzzer.cc new file mode 100644 index 0000000000..5cc4776aad --- /dev/null +++ b/src/fuzzers/pop3-fuzzer.cc @@ -0,0 +1,77 @@ +#include "binpac.h" + +#include "Net.h" +#include "Conn.h" +#include "Sessions.h" +#include "analyzer/Analyzer.h" +#include "analyzer/Manager.h" +#include "analyzer/protocol/pia/PIA.h" +#include "analyzer/protocol/tcp/TCP.h" + +#include "FuzzBuffer.h" +#include "fuzzer-setup.h" + +static constexpr auto ZEEK_FUZZ_ANALYZER = "pop3"; + +static Connection* add_connection() + { + static constexpr double network_time_start = 1439471031; + net_update_time(network_time_start); + + Packet p; + ConnID conn_id; + conn_id.src_addr = IPAddr("1.2.3.4"); + conn_id.dst_addr = IPAddr("5.6.7.8"); + conn_id.src_port = htons(23132); + conn_id.dst_port = htons(80); + ConnIDKey key = BuildConnIDKey(conn_id); + Connection* conn = new Connection(sessions, key, network_time_start, + &conn_id, 1, &p, nullptr); + conn->SetTransport(TRANSPORT_TCP); + sessions->Insert(conn); + return conn; + } + +static analyzer::Analyzer* add_analyzer(Connection* conn) + { + analyzer::tcp::TCP_Analyzer* tcp = new analyzer::tcp::TCP_Analyzer(conn); + analyzer::pia::PIA* pia = new analyzer::pia::PIA_TCP(conn); + auto a = analyzer_mgr->InstantiateAnalyzer(ZEEK_FUZZ_ANALYZER, conn); + tcp->AddChildAnalyzer(a); + tcp->AddChildAnalyzer(pia->AsAnalyzer()); + conn->SetRootAnalyzer(tcp, pia); + return a; + } + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) + { + zeek::detail::FuzzBuffer fb{data, size}; + + if ( ! fb.Valid() ) + return 0; + + auto conn = add_connection(); + auto a = add_analyzer(conn); + + for ( ; ; ) + { + auto chunk = fb.Next(); + + if ( ! chunk ) + break; + + try + { + a->DeliverStream(chunk->size, chunk->data.get(), chunk->is_orig); + } + catch ( const binpac::Exception& e ) + { + } + + chunk = {}; + mgr.Drain(); + } + + zeek::detail::fuzzer_cleanup_one_input(); + return 0; + } diff --git a/src/fuzzers/pop3.dict b/src/fuzzers/pop3.dict new file mode 100644 index 0000000000..1785e830d6 --- /dev/null +++ b/src/fuzzers/pop3.dict @@ -0,0 +1,21 @@ +"\x01PKT" +"OK" +"ERR" +"USER" +"PASS" +"APOP" +"AUTH" +"STAT" +"LIST" +"RETR" +"DELE" +"RSET" +"NOOP" +"LAST" +"QUIT" +"TOP" +"CAPA" +"UIDL" +"STLS" +"XSENDER" +"END" diff --git a/src/fuzzers/standalone-driver.cc b/src/fuzzers/standalone-driver.cc new file mode 100644 index 0000000000..69ca7202cf --- /dev/null +++ b/src/fuzzers/standalone-driver.cc @@ -0,0 +1,64 @@ +#include +#include +#include +#include +#include +#include +#include + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size); +extern "C" int LLVMFuzzerInitialize(int* argc, char*** argv); + +int main(int argc, char** argv) + { + using namespace std::chrono; + auto agg_start = high_resolution_clock::now(); + auto num_inputs = argc - 1; + printf("Standalone fuzzer processing %d inputs\n", num_inputs); + + LLVMFuzzerInitialize(&argc, &argv); + + for ( auto i = 0; i < num_inputs; ++i ) + { + auto input_file_name = argv[i + 1]; + printf(" %s:", input_file_name); + // If ASan ends up aborting, the previous stdout output may not + // be flushed, so make sure to that and make it easier to see + // what input caused the crash. + fflush(stdout); + + auto f = fopen(input_file_name, "r"); + + if ( ! f ) + { + printf(" failed to open file: %s\n", strerror(errno)); + abort(); + } + + fseek(f, 0, SEEK_END); + auto input_length = ftell(f); + fseek(f, 0, SEEK_SET); + + auto input_buffer = std::make_unique(input_length); + auto bytes_read = fread(input_buffer.get(), 1, input_length, f); + + if ( bytes_read != static_cast(input_length) ) + { + printf(" failed to read full file: %zu/%ld\n", + bytes_read, input_length); + abort(); + } + + auto start = high_resolution_clock::now(); + LLVMFuzzerTestOneInput(input_buffer.get(), input_length); + auto stop = high_resolution_clock::now(); + auto dt = duration(stop - start).count(); + + printf(" %6zu bytes, %f seconds\n", input_length, dt); + fclose(f); + } + + auto agg_stop = high_resolution_clock::now(); + auto agg_dt = duration(agg_stop - agg_start).count(); + printf("Processed %d inputs in %fs\n", num_inputs, agg_dt); + } diff --git a/src/main.cc b/src/main.cc index a95815c0de..b107dd213c 100644 --- a/src/main.cc +++ b/src/main.cc @@ -1,904 +1,26 @@ // See the file "COPYING" in the main distribution directory for copyright. #include "zeek-config.h" +#include "zeek-setup.h" -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef USE_IDMEF -extern "C" { -#include -} -#endif - -#include -#include - -#include "Options.h" -#include "input.h" -#include "DNS_Mgr.h" -#include "Frame.h" -#include "Scope.h" -#include "Event.h" -#include "File.h" -#include "Reporter.h" -#include "Net.h" -#include "NetVar.h" -#include "Var.h" -#include "Timer.h" -#include "Stmt.h" -#include "Desc.h" -#include "Debug.h" -#include "DFA.h" -#include "RuleMatcher.h" -#include "Anon.h" -#include "EventRegistry.h" -#include "Stats.h" -#include "Brofiler.h" -#include "Traverse.h" -#include "Trigger.h" -#include "Hash.h" - -#include "supervisor/Supervisor.h" -#include "threading/Manager.h" -#include "input/Manager.h" -#include "logging/Manager.h" -#include "input/readers/raw/Raw.h" -#include "analyzer/Manager.h" -#include "analyzer/Tag.h" -#include "plugin/Manager.h" -#include "file_analysis/Manager.h" -#include "zeekygen/Manager.h" #include "iosource/Manager.h" -#include "broker/Manager.h" - -#include "binpac_bro.h" - -#include "3rdparty/sqlite3.h" - -#define DOCTEST_CONFIG_IMPLEMENT -#include "3rdparty/doctest.h" - -Brofiler brofiler; - -#ifndef HAVE_STRSEP -extern "C" { -char* strsep(char**, const char*); -}; -#endif - -extern "C" { -#include "setsignal.h" -}; - -#ifdef USE_PERFTOOLS_DEBUG -HeapLeakChecker* heap_checker = 0; -int perftools_leaks = 0; -int perftools_profile = 0; -#endif - -DNS_Mgr* dns_mgr; -TimerMgr* timer_mgr; -ValManager* val_mgr = nullptr; -logging::Manager* log_mgr = nullptr; -threading::Manager* thread_mgr = nullptr; -input::Manager* input_mgr = nullptr; -plugin::Manager* plugin_mgr = nullptr; -analyzer::Manager* analyzer_mgr = nullptr; -file_analysis::Manager* file_mgr = nullptr; -zeekygen::Manager* zeekygen_mgr = nullptr; -iosource::Manager* iosource_mgr = nullptr; -bro_broker::Manager* broker_mgr = nullptr; -zeek::Supervisor* zeek::supervisor_mgr = nullptr; -trigger::Manager* trigger_mgr = nullptr; - -std::vector zeek_script_prefixes; -Stmt* stmts; -EventHandlerPtr net_done = nullptr; -RuleMatcher* rule_matcher = nullptr; -EventRegistry* event_registry = nullptr; -ProfileLogger* profiling_logger = nullptr; -ProfileLogger* segment_logger = nullptr; -SampleLogger* sample_logger = nullptr; -int signal_val = 0; -extern char version[]; -const char* command_line_policy = nullptr; -vector params; -set requested_plugins; -const char* proc_status_file = nullptr; - -OpaqueType* md5_type = nullptr; -OpaqueType* sha1_type = nullptr; -OpaqueType* sha256_type = nullptr; -OpaqueType* entropy_type = nullptr; -OpaqueType* cardinality_type = nullptr; -OpaqueType* topk_type = nullptr; -OpaqueType* bloomfilter_type = nullptr; -OpaqueType* x509_opaque_type = nullptr; -OpaqueType* ocsp_resp_opaque_type = nullptr; -OpaqueType* paraglob_type = nullptr; - -// Keep copy of command line -int bro_argc; -char** bro_argv; - -const char* zeek_version() - { -#ifdef DEBUG - static char* debug_version = nullptr; - - if ( ! debug_version ) - { - int n = strlen(version) + sizeof("-debug") + 1; - debug_version = new char[n]; - snprintf(debug_version, n, "%s%s", version, "-debug"); - } - - return debug_version; -#else - return version; -#endif - } - -static std::vector to_cargs(const std::vector& args) - { - std::vector rval; - rval.reserve(args.size()); - - for ( const auto& arg : args ) - rval.emplace_back(arg.data()); - - return rval; - } - -bool show_plugins(int level) - { - plugin::Manager::plugin_list plugins = plugin_mgr->ActivePlugins(); - - if ( ! plugins.size() ) - { - printf("No plugins registered, not even any built-ins. This is probably a bug.\n"); - return false; - } - - ODesc d; - - if ( level == 1 ) - d.SetShort(); - - int count = 0; - - for ( plugin::Manager::plugin_list::const_iterator i = plugins.begin(); i != plugins.end(); i++ ) - { - if ( requested_plugins.size() - && requested_plugins.find((*i)->Name()) == requested_plugins.end() ) - continue; - - (*i)->Describe(&d); - - if ( ! d.IsShort() ) - d.Add("\n"); - - ++count; - } - - printf("%s", d.Description()); - - plugin::Manager::inactive_plugin_list inactives = plugin_mgr->InactivePlugins(); - - if ( inactives.size() && ! requested_plugins.size() ) - { - printf("\nInactive dynamic plugins:\n"); - - for ( plugin::Manager::inactive_plugin_list::const_iterator i = inactives.begin(); i != inactives.end(); i++ ) - { - string name = (*i).first; - string path = (*i).second; - printf(" %s (%s)\n", name.c_str(), path.c_str()); - } - } - - return count != 0; - } - -void done_with_network() - { - set_processing_status("TERMINATING", "done_with_network"); - - // Cancel any pending alarms (watchdog, in particular). - (void) alarm(0); - - if ( net_done ) - { - mgr.Drain(); - // Don't propagate this event to remote clients. - mgr.Dispatch(new Event(net_done, - {make_intrusive(timer_mgr->Time(), TYPE_TIME)}), - true); - } - - if ( profiling_logger ) - profiling_logger->Log(); - - terminating = true; - - analyzer_mgr->Done(); - timer_mgr->Expire(); - dns_mgr->Flush(); - mgr.Drain(); - mgr.Drain(); - - net_finish(1); - -#ifdef USE_PERFTOOLS_DEBUG - - if ( perftools_profile ) - { - HeapProfilerDump("post net_run"); - HeapProfilerStop(); - } - - if ( heap_checker && ! heap_checker->NoLeaks() ) - { - fprintf(stderr, "Memory leaks - aborting.\n"); - abort(); - } -#endif - - ZEEK_LSAN_DISABLE(); - } - -void terminate_bro() - { - set_processing_status("TERMINATING", "terminate_bro"); - - terminating = true; - - iosource_mgr->Wakeup("terminate_bro"); - - // File analysis termination may produce events, so do it early on in - // the termination process. - file_mgr->Terminate(); - - brofiler.WriteStats(); - - EventHandlerPtr zeek_done = internal_handler("zeek_done"); - if ( zeek_done ) - mgr.Enqueue(zeek_done, zeek::Args{}); - - timer_mgr->Expire(); - mgr.Drain(); - - if ( profiling_logger ) - { - // FIXME: There are some occasional crashes in the memory - // allocation code when killing Bro. Disabling this for now. - if ( ! (signal_val == SIGTERM || signal_val == SIGINT) ) - profiling_logger->Log(); - - delete profiling_logger; - } - - mgr.Drain(); - - notifier::registry.Terminate(); - log_mgr->Terminate(); - input_mgr->Terminate(); - thread_mgr->Terminate(); - broker_mgr->Terminate(); - dns_mgr->Terminate(); - - mgr.Drain(); - - plugin_mgr->FinishPlugins(); - - delete zeekygen_mgr; - delete analyzer_mgr; - delete file_mgr; - // broker_mgr, timer_mgr, and supervisor are deleted via iosource_mgr - delete iosource_mgr; - delete event_registry; - delete log_mgr; - delete reporter; - delete plugin_mgr; - delete val_mgr; - - // free the global scope - pop_scope(); - - reporter = nullptr; - } - -void zeek_terminate_loop(const char* reason) - { - set_processing_status("TERMINATING", reason); - reporter->Info("%s", reason); - - net_get_final_stats(); - done_with_network(); - net_delete(); - - terminate_bro(); - - // Close files after net_delete(), because net_delete() - // might write to connection content files. - BroFile::CloseOpenFiles(); - - delete rule_matcher; - - exit(0); - } - -RETSIGTYPE sig_handler(int signo) - { - set_processing_status("TERMINATING", "sig_handler"); - signal_val = signo; - - if ( ! terminating ) - iosource_mgr->Wakeup("sig_handler"); - - return RETSIGVAL; - } - -static void atexit_handler() - { - set_processing_status("TERMINATED", "atexit"); - } - -static void bro_new_handler() - { - out_of_memory("new"); - } - -static std::vector get_script_signature_files() - { - std::vector rval; - - // Parse rule files defined on the script level. - char* script_signature_files = - copy_string(internal_val("signature_files")->AsString()->CheckString()); - - char* tmp = script_signature_files; - char* s; - while ( (s = strsep(&tmp, " \t")) ) - if ( *s ) - rval.emplace_back(s); - - delete [] script_signature_files; - return rval; - } - -static std::string get_exe_path(const std::string& invocation) - { - if ( invocation.empty() ) - return ""; - - if ( invocation[0] == '/' || invocation[0] == '~' ) - // Absolute path - return invocation; - - if ( invocation.find('/') != std::string::npos ) - { - // Relative path - char cwd[PATH_MAX]; - - if ( ! getcwd(cwd, sizeof(cwd)) ) - { - fprintf(stderr, "failed to get current directory: %s\n", - strerror(errno)); - exit(1); - } - - return std::string(cwd) + "/" + invocation; - } - - auto path = getenv("PATH"); - - if ( ! path ) - return ""; - - return find_file(invocation, path); - } +#include "supervisor/Supervisor.h" +#include "Net.h" int main(int argc, char** argv) { - ZEEK_LSAN_DISABLE(); - std::set_new_handler(bro_new_handler); + auto time_start = current_time(true); + auto setup_result = zeek::detail::setup(argc, argv); - auto zeek_exe_path = get_exe_path(argv[0]); + if ( setup_result.code ) + return setup_result.code; - if ( zeek_exe_path.empty() ) - { - fprintf(stderr, "failed to get path to executable '%s'", argv[0]); - exit(1); - } + auto& options = setup_result.options; + auto do_net_run = iosource_mgr->Size() > 0 || + have_pending_timers || + BifConst::exit_only_after_terminate; - bro_argc = argc; - bro_argv = new char* [argc]; - - for ( int i = 0; i < argc; i++ ) - bro_argv[i] = copy_string(argv[i]); - - auto options = zeek::parse_cmdline(argc, argv); - - if ( options.print_usage ) - zeek::usage(argv[0], 0); - - if ( options.print_version ) - { - fprintf(stdout, "%s version %s\n", argv[0], zeek_version()); - exit(0); - } - - if ( options.run_unit_tests ) - { - doctest::Context context; - auto dargs = to_cargs(options.doctest_args); - context.applyCommandLine(dargs.size(), dargs.data()); - ZEEK_LSAN_ENABLE(); - return context.run(); - } - - auto stem_state = zeek::Supervisor::CreateStem(options.supervisor_mode); - - if ( zeek::Supervisor::ThisNode() ) - zeek::Supervisor::ThisNode()->Init(&options); - - double time_start = current_time(true); - - brofiler.ReadStats(); - - auto dns_type = options.dns_mode; - - if ( dns_type == DNS_DEFAULT && zeek::fake_dns() ) - dns_type = DNS_FAKE; - - RETSIGTYPE (*oldhandler)(int); - - zeek_script_prefixes = options.script_prefixes; - auto zeek_prefixes = zeekenv("ZEEK_PREFIXES"); - - if ( zeek_prefixes ) - tokenize_string(zeek_prefixes, ":", &zeek_script_prefixes); - - pseudo_realtime = options.pseudo_realtime; - -#ifdef USE_PERFTOOLS_DEBUG - perftools_leaks = options.perftools_check_leaks; - perftools_profile = options.perftools_profile; -#endif - - if ( options.debug_scripts ) - { - g_policy_debug = options.debug_scripts; - fprintf(stderr, "Zeek script debugging ON.\n"); - } - - if ( options.script_code_to_exec ) - command_line_policy = options.script_code_to_exec->data(); - - if ( options.debug_script_tracing_file ) - { - g_trace_state.SetTraceFile(options.debug_script_tracing_file->data()); - g_trace_state.TraceOn(); - } - - if ( options.process_status_file ) - proc_status_file = options.process_status_file->data(); - - atexit(atexit_handler); - set_processing_status("INITIALIZING", "main"); - - bro_start_time = current_time(true); - - val_mgr = new ValManager(); - reporter = new Reporter(); - thread_mgr = new threading::Manager(); - plugin_mgr = new plugin::Manager(); - -#ifdef DEBUG - if ( options.debug_log_streams ) - { - debug_logger.EnableStreams(options.debug_log_streams->data()); - - if ( getenv("ZEEK_DEBUG_LOG_STDERR") ) - debug_logger.OpenDebugLog(nullptr); - else - debug_logger.OpenDebugLog("debug"); - } -#endif - - if ( options.supervisor_mode ) - { - zeek::Supervisor::Config cfg = {}; - cfg.zeek_exe_path = zeek_exe_path; - options.filter_supervisor_options(); - zeek::supervisor_mgr = new zeek::Supervisor(std::move(cfg), - std::move(*stem_state)); - } - - const char* seed_load_file = zeekenv("ZEEK_SEED_FILE"); - - if ( options.random_seed_input_file ) - seed_load_file = options.random_seed_input_file->data(); - - init_random_seed((seed_load_file && *seed_load_file ? seed_load_file : nullptr), - options.random_seed_output_file ? options.random_seed_output_file->data() : nullptr); - // DEBUG_MSG("HMAC key: %s\n", md5_digest_print(shared_hmac_md5_key)); - init_hash_function(); - - ERR_load_crypto_strings(); - OPENSSL_add_all_algorithms_conf(); - SSL_library_init(); - SSL_load_error_strings(); - - // FIXME: On systems that don't provide /dev/urandom, OpenSSL doesn't - // seed the PRNG. We should do this here (but at least Linux, FreeBSD - // and Solaris provide /dev/urandom). - - int r = sqlite3_initialize(); - - if ( r != SQLITE_OK ) - reporter->Error("Failed to initialize sqlite3: %s", sqlite3_errstr(r)); - -#ifdef USE_IDMEF - char* libidmef_dtd_path_cstr = new char[options.libidmef_dtd_file.size() + 1]; - safe_strncpy(libidmef_dtd_path_cstr, options.libidmef_dtd_file.data(), - options.libidmef_dtd_file.size()); - globalsInit(libidmef_dtd_path_cstr); // Init LIBIDMEF globals - createCurrentDoc("1.0"); // Set a global XML document -#endif - - timer_mgr = new PQ_TimerMgr(); - - auto zeekygen_cfg = options.zeekygen_config_file.value_or(""); - zeekygen_mgr = new zeekygen::Manager(zeekygen_cfg, bro_argv[0]); - - add_essential_input_file("base/init-bare.zeek"); - add_essential_input_file("base/init-frameworks-and-bifs.zeek"); - - if ( ! options.bare_mode ) - add_input_file("base/init-default.zeek"); - - plugin_mgr->SearchDynamicPlugins(bro_plugin_path()); - - if ( options.plugins_to_load.empty() && options.scripts_to_load.empty() && - options.script_options_to_set.empty() && - ! options.pcap_file && ! options.interface && - ! options.identifier_to_print && - ! command_line_policy && ! options.print_plugins && - ! options.supervisor_mode && ! zeek::Supervisor::ThisNode() ) - add_input_file("-"); - - for ( const auto& script_option : options.script_options_to_set ) - params.push_back(script_option); - - for ( const auto& plugin : options.plugins_to_load ) - requested_plugins.insert(plugin); - - for ( const auto& script : options.scripts_to_load ) - add_input_file(script.data()); - - push_scope(nullptr, nullptr); - - dns_mgr = new DNS_Mgr(dns_type); - - // It would nice if this were configurable. This is similar to the - // chicken and the egg problem. It would be configurable by parsing - // policy, but we can't parse policy without DNS resolution. - dns_mgr->SetDir(".state"); - - iosource_mgr = new iosource::Manager(); - event_registry = new EventRegistry(); - analyzer_mgr = new analyzer::Manager(); - log_mgr = new logging::Manager(); - input_mgr = new input::Manager(); - file_mgr = new file_analysis::Manager(); - broker_mgr = new bro_broker::Manager(options.pcap_file.has_value()); - trigger_mgr = new trigger::Manager(); - - plugin_mgr->InitPreScript(); - analyzer_mgr->InitPreScript(); - file_mgr->InitPreScript(); - zeekygen_mgr->InitPreScript(); - - bool missing_plugin = false; - - for ( set::const_iterator i = requested_plugins.begin(); - i != requested_plugins.end(); i++ ) - { - if ( ! plugin_mgr->ActivateDynamicPlugin(*i) ) - missing_plugin = true; - } - - if ( missing_plugin ) - reporter->FatalError("Failed to activate requested dynamic plugin(s)."); - - plugin_mgr->ActivateDynamicPlugins(! options.bare_mode); - - init_event_handlers(); - - md5_type = new OpaqueType("md5"); - sha1_type = new OpaqueType("sha1"); - sha256_type = new OpaqueType("sha256"); - entropy_type = new OpaqueType("entropy"); - cardinality_type = new OpaqueType("cardinality"); - topk_type = new OpaqueType("topk"); - bloomfilter_type = new OpaqueType("bloomfilter"); - x509_opaque_type = new OpaqueType("x509"); - ocsp_resp_opaque_type = new OpaqueType("ocsp_resp"); - paraglob_type = new OpaqueType("paraglob"); - - // The leak-checker tends to produce some false - // positives (memory which had already been - // allocated before we start the checking is - // nevertheless reported; see perftools docs), thus - // we suppress some messages here. - -#ifdef USE_PERFTOOLS_DEBUG - { - HeapLeakChecker::Disabler disabler; -#endif - - is_parsing = true; - yyparse(); - is_parsing = false; - - RecordVal::DoneParsing(); - TableVal::DoneParsing(); - - init_general_global_var(); - init_net_var(); - init_builtin_funcs_subdirs(); - - // Must come after plugin activation (and also after hash - // initialization). - binpac::FlowBuffer::Policy flowbuffer_policy; - flowbuffer_policy.max_capacity = global_scope()->Lookup( - "BinPAC::flowbuffer_capacity_max")->ID_Val()->AsCount(); - flowbuffer_policy.min_capacity = global_scope()->Lookup( - "BinPAC::flowbuffer_capacity_min")->ID_Val()->AsCount(); - flowbuffer_policy.contract_threshold = global_scope()->Lookup( - "BinPAC::flowbuffer_contract_threshold")->ID_Val()->AsCount(); - binpac::init(&flowbuffer_policy); - - plugin_mgr->InitBifs(); - - if ( reporter->Errors() > 0 ) - exit(1); - - iosource_mgr->InitPostScript(); - plugin_mgr->InitPostScript(); - zeekygen_mgr->InitPostScript(); - broker_mgr->InitPostScript(); - timer_mgr->InitPostScript(); - mgr.InitPostScript(); - - if ( zeek::supervisor_mgr ) - zeek::supervisor_mgr->InitPostScript(); - - if ( options.print_plugins ) - { - bool success = show_plugins(options.print_plugins); - exit(success ? 0 : 1); - } - - analyzer_mgr->InitPostScript(); - file_mgr->InitPostScript(); - dns_mgr->InitPostScript(); - - if ( options.parse_only ) - { - int rc = (reporter->Errors() > 0 ? 1 : 0); - exit(rc); - } - -#ifdef USE_PERFTOOLS_DEBUG - } -#endif - - if ( reporter->Errors() > 0 ) - { - delete dns_mgr; - exit(1); - } - - reporter->InitOptions(); - KeyedHash::InitOptions(); - zeekygen_mgr->GenerateDocs(); - - if ( options.pcap_filter ) - { - ID* id = global_scope()->Lookup("cmd_line_bpf_filter"); - - if ( ! id ) - reporter->InternalError("global cmd_line_bpf_filter not defined"); - - id->SetVal(make_intrusive(*options.pcap_filter)); - } - - auto all_signature_files = options.signature_files; - - // Append signature files defined in "signature_files" script option - for ( auto&& sf : get_script_signature_files() ) - all_signature_files.emplace_back(std::move(sf)); - - // Append signature files defined in @load-sigs - for ( const auto& sf : sig_files ) - all_signature_files.emplace_back(sf); - - if ( ! all_signature_files.empty() ) - { - rule_matcher = new RuleMatcher(options.signature_re_level); - if ( ! rule_matcher->ReadFiles(all_signature_files) ) - { - delete dns_mgr; - exit(1); - } - - if ( options.print_signature_debug_info ) - rule_matcher->PrintDebug(); - - file_mgr->InitMagic(); - } - - if ( g_policy_debug ) - // ### Add support for debug command file. - dbg_init_debugger(nullptr); - - if ( ! options.pcap_file && ! options.interface ) - { - Val* interfaces_val = internal_val("interfaces"); - if ( interfaces_val ) - { - char* interfaces_str = - interfaces_val->AsString()->Render(); - - if ( interfaces_str[0] != '\0' ) - options.interface = interfaces_str; - - delete [] interfaces_str; - } - } - - if ( dns_type != DNS_PRIME ) - net_init(options.interface, options.pcap_file, options.pcap_output_file, options.use_watchdog); - - net_done = internal_handler("net_done"); - - if ( ! g_policy_debug ) - { - (void) setsignal(SIGTERM, sig_handler); - (void) setsignal(SIGINT, sig_handler); - (void) setsignal(SIGPIPE, SIG_IGN); - } - - // Cooperate with nohup(1). - if ( (oldhandler = setsignal(SIGHUP, sig_handler)) != SIG_DFL ) - (void) setsignal(SIGHUP, oldhandler); - - if ( dns_type == DNS_PRIME ) - { - dns_mgr->Verify(); - dns_mgr->Resolve(); - - if ( ! dns_mgr->Save() ) - reporter->FatalError("can't update DNS cache"); - - mgr.Drain(); - delete dns_mgr; - exit(0); - } - - // Print the ID. - if ( options.identifier_to_print ) - { - ID* id = global_scope()->Lookup(*options.identifier_to_print); - if ( ! id ) - reporter->FatalError("No such ID: %s\n", options.identifier_to_print->data()); - - ODesc desc; - desc.SetQuotes(true); - desc.SetIncludeStats(true); - id->DescribeExtended(&desc); - - fprintf(stdout, "%s\n", desc.Description()); - exit(0); - } - - if ( profiling_interval > 0 ) - { - profiling_logger = new ProfileLogger(profiling_file->AsFile(), - profiling_interval); - - if ( segment_profiling ) - segment_logger = profiling_logger; - } - - if ( ! reading_live && ! reading_traces ) - // Set up network_time to track real-time, since - // we don't have any other source for it. - net_update_time(current_time()); - - EventHandlerPtr zeek_init = internal_handler("zeek_init"); - if ( zeek_init ) //### this should be a function - mgr.Enqueue(zeek_init, zeek::Args{}); - - EventRegistry::string_list dead_handlers = - event_registry->UnusedHandlers(); - - if ( ! dead_handlers.empty() && check_for_unused_event_handlers ) - { - for ( const string& handler : dead_handlers ) - reporter->Warning("event handler never invoked: %s", handler.c_str()); - } - - // Enable LeakSanitizer before zeek_init() and even before executing - // top-level statements. Even though it's not bad if a leak happens only - // once at initialization, we have to assume that script-layer code causing - // such a leak can be placed in any arbitrary event handler and potentially - // cause more severe problems. - ZEEK_LSAN_ENABLE(); - - if ( stmts ) - { - stmt_flow_type flow; - Frame f(current_scope()->Length(), nullptr, nullptr); - g_frame_stack.push_back(&f); - - try - { - stmts->Exec(&f, flow); - } - catch ( InterpreterException& ) - { - reporter->FatalError("failed to execute script statements at top-level scope"); - } - - g_frame_stack.pop_back(); - } - - if ( options.ignore_checksums ) - ignore_checksums = 1; - - if ( zeek_script_loaded ) - { - // Queue events reporting loaded scripts. - for ( std::list::iterator i = files_scanned.begin(); i != files_scanned.end(); i++ ) - { - if ( i->skipped ) - continue; - - mgr.Enqueue(zeek_script_loaded, - make_intrusive(i->name.c_str()), - val_mgr->Count(i->include_level) - ); - } - } - - reporter->ReportViaEvents(true); - - // Drain the event queue here to support the protocols framework configuring DPM - mgr.Drain(); - - if ( reporter->Errors() > 0 && ! zeekenv("ZEEK_ALLOW_INIT_ERRORS") ) - reporter->FatalError("errors occurred while initializing"); - - broker_mgr->ZeekInitDone(); - reporter->ZeekInitDone(); - analyzer_mgr->DumpDebug(); - - have_pending_timers = ! reading_traces && timer_mgr->Size() > 0; - - if ( iosource_mgr->Size() > 0 || - have_pending_timers || - BifConst::exit_only_after_terminate ) + if ( do_net_run ) { if ( profiling_logger ) profiling_logger->Log(); @@ -954,24 +76,7 @@ int main(int argc, char** argv) (mem_net_done_total - mem_net_start_total) / 1024 / 1024, (mem_net_done_malloced - mem_net_start_malloced) / 1024 / 1024); } - - done_with_network(); - net_delete(); } - terminate_bro(); - - sqlite3_shutdown(); - - ERR_free_strings(); - EVP_cleanup(); - CRYPTO_cleanup_all_ex_data(); - - // Close files after net_delete(), because net_delete() - // might write to connection content files. - BroFile::CloseOpenFiles(); - - delete rule_matcher; - - return 0; + return zeek::detail::cleanup(do_net_run); } diff --git a/src/util.cc b/src/util.cc index 70b2f33635..7b3394e8d7 100644 --- a/src/util.cc +++ b/src/util.cc @@ -1089,7 +1089,8 @@ void bro_srandom(unsigned int seed) srandom(seed); } -void init_random_seed(const char* read_file, const char* write_file) +void init_random_seed(const char* read_file, const char* write_file, + bool use_empty_seeds) { std::array buf = {}; size_t pos = 0; // accumulates entropy @@ -1104,6 +1105,8 @@ void init_random_seed(const char* read_file, const char* write_file) else seeds_done = true; } + else if ( use_empty_seeds ) + seeds_done = true; #ifdef HAVE_GETRANDOM if ( ! seeds_done ) @@ -1724,6 +1727,38 @@ static string find_file_in_path(const string& filename, const string& path, return string(); } +std::string get_exe_path(const std::string& invocation) + { + if ( invocation.empty() ) + return ""; + + if ( invocation[0] == '/' || invocation[0] == '~' ) + // Absolute path + return invocation; + + if ( invocation.find('/') != std::string::npos ) + { + // Relative path + char cwd[PATH_MAX]; + + if ( ! getcwd(cwd, sizeof(cwd)) ) + { + fprintf(stderr, "failed to get current directory: %s\n", + strerror(errno)); + exit(1); + } + + return std::string(cwd) + "/" + invocation; + } + + auto path = getenv("PATH"); + + if ( ! path ) + return ""; + + return find_file(invocation, path); + } + string find_file(const string& filename, const string& path_set, const string& opt_ext) { diff --git a/src/util.h b/src/util.h index 21051b0a66..1b94bc2fc5 100644 --- a/src/util.h +++ b/src/util.h @@ -202,13 +202,13 @@ extern std::string strstrip(std::string s); extern void hmac_md5(size_t size, const unsigned char* bytes, unsigned char digest[16]); -// Initializes RNGs for bro_random() and MD5 usage. If seed is given, then -// it is used (to provide determinism). If load_file is given, the seeds -// (both random & MD5) are loaded from that file. This takes precedence -// over the "seed" argument. If write_file is given, the seeds are written -// to that file. -// -extern void init_random_seed(const char* load_file, const char* write_file); +// Initializes RNGs for bro_random() and MD5 usage. If load_file is given, +// the seeds (both random & MD5) are loaded from that file. This takes +// precedence over the "use_empty_seeds" argument, which just +// zero-initializes all seed values. If write_file is given, the seeds are +// written to that file. +extern void init_random_seed(const char* load_file, const char* write_file, + bool use_empty_seeds); // Retrieves the initial seed computed after the very first call to // init_random_seed(). Repeated calls to init_random_seed() will not affect @@ -340,6 +340,14 @@ std::string normalize_path(std::string_view path); */ std::string without_bropath_component(std::string_view path); +/** + * Gets the full path used to invoke some executable. + * @param invocation any possible string that may be seen in argv[0], such as + * absolute path, relative path, or name to lookup in PATH. + * @return the absolute path to the executable file + */ +std::string get_exe_path(const std::string& invocation); + /** * Locate a file within a given search path. * @param filename Name of a file to find. diff --git a/src/zeek-setup.cc b/src/zeek-setup.cc new file mode 100644 index 0000000000..b8d65a01b4 --- /dev/null +++ b/src/zeek-setup.cc @@ -0,0 +1,893 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include "zeek-config.h" +#include "zeek-setup.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef USE_IDMEF +extern "C" { +#include +} +#endif + +#include +#include + +#include "Options.h" +#include "input.h" +#include "DNS_Mgr.h" +#include "Frame.h" +#include "Scope.h" +#include "Event.h" +#include "File.h" +#include "Reporter.h" +#include "Net.h" +#include "NetVar.h" +#include "Var.h" +#include "Timer.h" +#include "Stmt.h" +#include "Desc.h" +#include "Debug.h" +#include "DFA.h" +#include "RuleMatcher.h" +#include "Anon.h" +#include "EventRegistry.h" +#include "Stats.h" +#include "Brofiler.h" +#include "Traverse.h" +#include "Trigger.h" +#include "Hash.h" + +#include "supervisor/Supervisor.h" +#include "threading/Manager.h" +#include "input/Manager.h" +#include "logging/Manager.h" +#include "input/readers/raw/Raw.h" +#include "analyzer/Manager.h" +#include "analyzer/Tag.h" +#include "plugin/Manager.h" +#include "file_analysis/Manager.h" +#include "zeekygen/Manager.h" +#include "iosource/Manager.h" +#include "broker/Manager.h" + +#include "binpac_bro.h" + +#include "3rdparty/sqlite3.h" + +#define DOCTEST_CONFIG_IMPLEMENT +#include "3rdparty/doctest.h" + +Brofiler brofiler; + +#ifndef HAVE_STRSEP +extern "C" { +char* strsep(char**, const char*); +}; +#endif + +extern "C" { +#include "setsignal.h" +}; + +#ifdef USE_PERFTOOLS_DEBUG +HeapLeakChecker* heap_checker = 0; +int perftools_leaks = 0; +int perftools_profile = 0; +#endif + +DNS_Mgr* dns_mgr; +TimerMgr* timer_mgr; +ValManager* val_mgr = nullptr; +logging::Manager* log_mgr = nullptr; +threading::Manager* thread_mgr = nullptr; +input::Manager* input_mgr = nullptr; +plugin::Manager* plugin_mgr = nullptr; +analyzer::Manager* analyzer_mgr = nullptr; +file_analysis::Manager* file_mgr = nullptr; +zeekygen::Manager* zeekygen_mgr = nullptr; +iosource::Manager* iosource_mgr = nullptr; +bro_broker::Manager* broker_mgr = nullptr; +zeek::Supervisor* zeek::supervisor_mgr = nullptr; +trigger::Manager* trigger_mgr = nullptr; + +std::vector zeek_script_prefixes; +Stmt* stmts; +EventHandlerPtr net_done = nullptr; +RuleMatcher* rule_matcher = nullptr; +EventRegistry* event_registry = nullptr; +ProfileLogger* profiling_logger = nullptr; +ProfileLogger* segment_logger = nullptr; +SampleLogger* sample_logger = nullptr; +int signal_val = 0; +extern char version[]; +const char* command_line_policy = nullptr; +vector params; +set requested_plugins; +const char* proc_status_file = nullptr; + +OpaqueType* md5_type = nullptr; +OpaqueType* sha1_type = nullptr; +OpaqueType* sha256_type = nullptr; +OpaqueType* entropy_type = nullptr; +OpaqueType* cardinality_type = nullptr; +OpaqueType* topk_type = nullptr; +OpaqueType* bloomfilter_type = nullptr; +OpaqueType* x509_opaque_type = nullptr; +OpaqueType* ocsp_resp_opaque_type = nullptr; +OpaqueType* paraglob_type = nullptr; + +// Keep copy of command line +int bro_argc; +char** bro_argv; + +const char* zeek_version() + { +#ifdef DEBUG + static char* debug_version = nullptr; + + if ( ! debug_version ) + { + int n = strlen(version) + sizeof("-debug") + 1; + debug_version = new char[n]; + snprintf(debug_version, n, "%s%s", version, "-debug"); + } + + return debug_version; +#else + return version; +#endif + } + +static std::vector to_cargs(const std::vector& args) + { + std::vector rval; + rval.reserve(args.size()); + + for ( const auto& arg : args ) + rval.emplace_back(arg.data()); + + return rval; + } + +bool show_plugins(int level) + { + plugin::Manager::plugin_list plugins = plugin_mgr->ActivePlugins(); + + if ( ! plugins.size() ) + { + printf("No plugins registered, not even any built-ins. This is probably a bug.\n"); + return false; + } + + ODesc d; + + if ( level == 1 ) + d.SetShort(); + + int count = 0; + + for ( plugin::Manager::plugin_list::const_iterator i = plugins.begin(); i != plugins.end(); i++ ) + { + if ( requested_plugins.size() + && requested_plugins.find((*i)->Name()) == requested_plugins.end() ) + continue; + + (*i)->Describe(&d); + + if ( ! d.IsShort() ) + d.Add("\n"); + + ++count; + } + + printf("%s", d.Description()); + + plugin::Manager::inactive_plugin_list inactives = plugin_mgr->InactivePlugins(); + + if ( inactives.size() && ! requested_plugins.size() ) + { + printf("\nInactive dynamic plugins:\n"); + + for ( plugin::Manager::inactive_plugin_list::const_iterator i = inactives.begin(); i != inactives.end(); i++ ) + { + string name = (*i).first; + string path = (*i).second; + printf(" %s (%s)\n", name.c_str(), path.c_str()); + } + } + + return count != 0; + } + +void done_with_network() + { + set_processing_status("TERMINATING", "done_with_network"); + + // Cancel any pending alarms (watchdog, in particular). + (void) alarm(0); + + if ( net_done ) + { + mgr.Drain(); + // Don't propagate this event to remote clients. + mgr.Dispatch(new Event(net_done, + {make_intrusive(timer_mgr->Time(), TYPE_TIME)}), + true); + } + + if ( profiling_logger ) + profiling_logger->Log(); + + terminating = true; + + analyzer_mgr->Done(); + timer_mgr->Expire(); + dns_mgr->Flush(); + mgr.Drain(); + mgr.Drain(); + + net_finish(1); + +#ifdef USE_PERFTOOLS_DEBUG + + if ( perftools_profile ) + { + HeapProfilerDump("post net_run"); + HeapProfilerStop(); + } + + if ( heap_checker && ! heap_checker->NoLeaks() ) + { + fprintf(stderr, "Memory leaks - aborting.\n"); + abort(); + } +#endif + + ZEEK_LSAN_DISABLE(); + } + +void terminate_bro() + { + set_processing_status("TERMINATING", "terminate_bro"); + + terminating = true; + + iosource_mgr->Wakeup("terminate_bro"); + + // File analysis termination may produce events, so do it early on in + // the termination process. + file_mgr->Terminate(); + + brofiler.WriteStats(); + + EventHandlerPtr zeek_done = internal_handler("zeek_done"); + if ( zeek_done ) + mgr.Enqueue(zeek_done, zeek::Args{}); + + timer_mgr->Expire(); + mgr.Drain(); + + if ( profiling_logger ) + { + // FIXME: There are some occasional crashes in the memory + // allocation code when killing Bro. Disabling this for now. + if ( ! (signal_val == SIGTERM || signal_val == SIGINT) ) + profiling_logger->Log(); + + delete profiling_logger; + } + + mgr.Drain(); + + notifier::registry.Terminate(); + log_mgr->Terminate(); + input_mgr->Terminate(); + thread_mgr->Terminate(); + broker_mgr->Terminate(); + dns_mgr->Terminate(); + + mgr.Drain(); + + plugin_mgr->FinishPlugins(); + + delete zeekygen_mgr; + delete analyzer_mgr; + delete file_mgr; + // broker_mgr, timer_mgr, and supervisor are deleted via iosource_mgr + delete iosource_mgr; + delete event_registry; + delete log_mgr; + delete reporter; + delete plugin_mgr; + delete val_mgr; + + // free the global scope + pop_scope(); + + reporter = nullptr; + } + +void zeek_terminate_loop(const char* reason) + { + set_processing_status("TERMINATING", reason); + reporter->Info("%s", reason); + + net_get_final_stats(); + done_with_network(); + net_delete(); + + terminate_bro(); + + // Close files after net_delete(), because net_delete() + // might write to connection content files. + BroFile::CloseOpenFiles(); + + delete rule_matcher; + + exit(0); + } + +RETSIGTYPE sig_handler(int signo) + { + set_processing_status("TERMINATING", "sig_handler"); + signal_val = signo; + + if ( ! terminating ) + iosource_mgr->Wakeup("sig_handler"); + + return RETSIGVAL; + } + +static void atexit_handler() + { + set_processing_status("TERMINATED", "atexit"); + } + +static void bro_new_handler() + { + out_of_memory("new"); + } + +static std::vector get_script_signature_files() + { + std::vector rval; + + // Parse rule files defined on the script level. + char* script_signature_files = + copy_string(internal_val("signature_files")->AsString()->CheckString()); + + char* tmp = script_signature_files; + char* s; + while ( (s = strsep(&tmp, " \t")) ) + if ( *s ) + rval.emplace_back(s); + + delete [] script_signature_files; + return rval; + } + +zeek::detail::SetupResult zeek::detail::setup(int argc, char** argv, + zeek::Options* zopts) + { + ZEEK_LSAN_DISABLE(); + std::set_new_handler(bro_new_handler); + + auto zeek_exe_path = get_exe_path(argv[0]); + + if ( zeek_exe_path.empty() ) + { + fprintf(stderr, "failed to get path to executable '%s'", argv[0]); + exit(1); + } + + bro_argc = argc; + bro_argv = new char* [argc]; + + for ( int i = 0; i < argc; i++ ) + bro_argv[i] = copy_string(argv[i]); + + auto options = zopts ? *zopts : zeek::parse_cmdline(argc, argv); + + if ( options.print_usage ) + zeek::usage(argv[0], 0); + + if ( options.print_version ) + { + fprintf(stdout, "%s version %s\n", argv[0], zeek_version()); + exit(0); + } + + if ( options.run_unit_tests ) + { + doctest::Context context; + auto dargs = to_cargs(options.doctest_args); + context.applyCommandLine(dargs.size(), dargs.data()); + ZEEK_LSAN_ENABLE(); + exit(context.run()); + } + + auto stem_state = zeek::Supervisor::CreateStem(options.supervisor_mode); + + if ( zeek::Supervisor::ThisNode() ) + zeek::Supervisor::ThisNode()->Init(&options); + + brofiler.ReadStats(); + + auto dns_type = options.dns_mode; + + if ( dns_type == DNS_DEFAULT && zeek::fake_dns() ) + dns_type = DNS_FAKE; + + RETSIGTYPE (*oldhandler)(int); + + zeek_script_prefixes = options.script_prefixes; + auto zeek_prefixes = zeekenv("ZEEK_PREFIXES"); + + if ( zeek_prefixes ) + tokenize_string(zeek_prefixes, ":", &zeek_script_prefixes); + + pseudo_realtime = options.pseudo_realtime; + +#ifdef USE_PERFTOOLS_DEBUG + perftools_leaks = options.perftools_check_leaks; + perftools_profile = options.perftools_profile; +#endif + + if ( options.debug_scripts ) + { + g_policy_debug = options.debug_scripts; + fprintf(stderr, "Zeek script debugging ON.\n"); + } + + if ( options.script_code_to_exec ) + command_line_policy = options.script_code_to_exec->data(); + + if ( options.debug_script_tracing_file ) + { + g_trace_state.SetTraceFile(options.debug_script_tracing_file->data()); + g_trace_state.TraceOn(); + } + + if ( options.process_status_file ) + proc_status_file = options.process_status_file->data(); + + atexit(atexit_handler); + set_processing_status("INITIALIZING", "main"); + + bro_start_time = current_time(true); + + val_mgr = new ValManager(); + reporter = new Reporter(options.abort_on_scripting_errors); + thread_mgr = new threading::Manager(); + plugin_mgr = new plugin::Manager(); + +#ifdef DEBUG + if ( options.debug_log_streams ) + { + debug_logger.EnableStreams(options.debug_log_streams->data()); + + if ( getenv("ZEEK_DEBUG_LOG_STDERR") ) + debug_logger.OpenDebugLog(nullptr); + else + debug_logger.OpenDebugLog("debug"); + } +#endif + + if ( options.supervisor_mode ) + { + zeek::Supervisor::Config cfg = {}; + cfg.zeek_exe_path = zeek_exe_path; + options.filter_supervisor_options(); + zeek::supervisor_mgr = new zeek::Supervisor(std::move(cfg), + std::move(*stem_state)); + } + + const char* seed_load_file = zeekenv("ZEEK_SEED_FILE"); + + if ( options.random_seed_input_file ) + seed_load_file = options.random_seed_input_file->data(); + + init_random_seed((seed_load_file && *seed_load_file ? seed_load_file : nullptr), + options.random_seed_output_file ? options.random_seed_output_file->data() : nullptr, + options.deterministic_mode); + // DEBUG_MSG("HMAC key: %s\n", md5_digest_print(shared_hmac_md5_key)); + init_hash_function(); + + ERR_load_crypto_strings(); + OPENSSL_add_all_algorithms_conf(); + SSL_library_init(); + SSL_load_error_strings(); + + // FIXME: On systems that don't provide /dev/urandom, OpenSSL doesn't + // seed the PRNG. We should do this here (but at least Linux, FreeBSD + // and Solaris provide /dev/urandom). + + int r = sqlite3_initialize(); + + if ( r != SQLITE_OK ) + reporter->Error("Failed to initialize sqlite3: %s", sqlite3_errstr(r)); + +#ifdef USE_IDMEF + char* libidmef_dtd_path_cstr = new char[options.libidmef_dtd_file.size() + 1]; + safe_strncpy(libidmef_dtd_path_cstr, options.libidmef_dtd_file.data(), + options.libidmef_dtd_file.size()); + globalsInit(libidmef_dtd_path_cstr); // Init LIBIDMEF globals + createCurrentDoc("1.0"); // Set a global XML document +#endif + + timer_mgr = new PQ_TimerMgr(); + + auto zeekygen_cfg = options.zeekygen_config_file.value_or(""); + zeekygen_mgr = new zeekygen::Manager(zeekygen_cfg, bro_argv[0]); + + add_essential_input_file("base/init-bare.zeek"); + add_essential_input_file("base/init-frameworks-and-bifs.zeek"); + + if ( ! options.bare_mode ) + add_input_file("base/init-default.zeek"); + + plugin_mgr->SearchDynamicPlugins(bro_plugin_path()); + + if ( options.plugins_to_load.empty() && options.scripts_to_load.empty() && + options.script_options_to_set.empty() && + ! options.pcap_file && ! options.interface && + ! options.identifier_to_print && + ! command_line_policy && ! options.print_plugins && + ! options.supervisor_mode && ! zeek::Supervisor::ThisNode() ) + add_input_file("-"); + + for ( const auto& script_option : options.script_options_to_set ) + params.push_back(script_option); + + for ( const auto& plugin : options.plugins_to_load ) + requested_plugins.insert(plugin); + + for ( const auto& script : options.scripts_to_load ) + add_input_file(script.data()); + + push_scope(nullptr, nullptr); + + dns_mgr = new DNS_Mgr(dns_type); + + // It would nice if this were configurable. This is similar to the + // chicken and the egg problem. It would be configurable by parsing + // policy, but we can't parse policy without DNS resolution. + dns_mgr->SetDir(".state"); + + iosource_mgr = new iosource::Manager(); + event_registry = new EventRegistry(); + analyzer_mgr = new analyzer::Manager(); + log_mgr = new logging::Manager(); + input_mgr = new input::Manager(); + file_mgr = new file_analysis::Manager(); + auto broker_real_time = ! options.pcap_file && ! options.deterministic_mode; + broker_mgr = new bro_broker::Manager(broker_real_time); + trigger_mgr = new trigger::Manager(); + + plugin_mgr->InitPreScript(); + analyzer_mgr->InitPreScript(); + file_mgr->InitPreScript(); + zeekygen_mgr->InitPreScript(); + + bool missing_plugin = false; + + for ( set::const_iterator i = requested_plugins.begin(); + i != requested_plugins.end(); i++ ) + { + if ( ! plugin_mgr->ActivateDynamicPlugin(*i) ) + missing_plugin = true; + } + + if ( missing_plugin ) + reporter->FatalError("Failed to activate requested dynamic plugin(s)."); + + plugin_mgr->ActivateDynamicPlugins(! options.bare_mode); + + init_event_handlers(); + + md5_type = new OpaqueType("md5"); + sha1_type = new OpaqueType("sha1"); + sha256_type = new OpaqueType("sha256"); + entropy_type = new OpaqueType("entropy"); + cardinality_type = new OpaqueType("cardinality"); + topk_type = new OpaqueType("topk"); + bloomfilter_type = new OpaqueType("bloomfilter"); + x509_opaque_type = new OpaqueType("x509"); + ocsp_resp_opaque_type = new OpaqueType("ocsp_resp"); + paraglob_type = new OpaqueType("paraglob"); + + // The leak-checker tends to produce some false + // positives (memory which had already been + // allocated before we start the checking is + // nevertheless reported; see perftools docs), thus + // we suppress some messages here. + +#ifdef USE_PERFTOOLS_DEBUG + { + HeapLeakChecker::Disabler disabler; +#endif + + is_parsing = true; + yyparse(); + is_parsing = false; + + RecordVal::DoneParsing(); + TableVal::DoneParsing(); + + init_general_global_var(); + init_net_var(); + init_builtin_funcs_subdirs(); + + // Must come after plugin activation (and also after hash + // initialization). + binpac::FlowBuffer::Policy flowbuffer_policy; + flowbuffer_policy.max_capacity = global_scope()->Lookup( + "BinPAC::flowbuffer_capacity_max")->ID_Val()->AsCount(); + flowbuffer_policy.min_capacity = global_scope()->Lookup( + "BinPAC::flowbuffer_capacity_min")->ID_Val()->AsCount(); + flowbuffer_policy.contract_threshold = global_scope()->Lookup( + "BinPAC::flowbuffer_contract_threshold")->ID_Val()->AsCount(); + binpac::init(&flowbuffer_policy); + + plugin_mgr->InitBifs(); + + if ( reporter->Errors() > 0 ) + exit(1); + + iosource_mgr->InitPostScript(); + plugin_mgr->InitPostScript(); + zeekygen_mgr->InitPostScript(); + broker_mgr->InitPostScript(); + timer_mgr->InitPostScript(); + mgr.InitPostScript(); + + if ( zeek::supervisor_mgr ) + zeek::supervisor_mgr->InitPostScript(); + + if ( options.print_plugins ) + { + bool success = show_plugins(options.print_plugins); + exit(success ? 0 : 1); + } + + analyzer_mgr->InitPostScript(); + file_mgr->InitPostScript(); + dns_mgr->InitPostScript(); + + if ( options.parse_only ) + { + int rc = (reporter->Errors() > 0 ? 1 : 0); + exit(rc); + } + +#ifdef USE_PERFTOOLS_DEBUG + } +#endif + + if ( reporter->Errors() > 0 ) + { + delete dns_mgr; + exit(1); + } + + reporter->InitOptions(); + KeyedHash::InitOptions(); + zeekygen_mgr->GenerateDocs(); + + if ( options.pcap_filter ) + { + ID* id = global_scope()->Lookup("cmd_line_bpf_filter"); + + if ( ! id ) + reporter->InternalError("global cmd_line_bpf_filter not defined"); + + id->SetVal(make_intrusive(*options.pcap_filter)); + } + + auto all_signature_files = options.signature_files; + + // Append signature files defined in "signature_files" script option + for ( auto&& sf : get_script_signature_files() ) + all_signature_files.emplace_back(std::move(sf)); + + // Append signature files defined in @load-sigs + for ( const auto& sf : sig_files ) + all_signature_files.emplace_back(sf); + + if ( ! all_signature_files.empty() ) + { + rule_matcher = new RuleMatcher(options.signature_re_level); + if ( ! rule_matcher->ReadFiles(all_signature_files) ) + { + delete dns_mgr; + exit(1); + } + + if ( options.print_signature_debug_info ) + rule_matcher->PrintDebug(); + + file_mgr->InitMagic(); + } + + if ( g_policy_debug ) + // ### Add support for debug command file. + dbg_init_debugger(nullptr); + + if ( ! options.pcap_file && ! options.interface ) + { + Val* interfaces_val = internal_val("interfaces"); + if ( interfaces_val ) + { + char* interfaces_str = + interfaces_val->AsString()->Render(); + + if ( interfaces_str[0] != '\0' ) + options.interface = interfaces_str; + + delete [] interfaces_str; + } + } + + if ( dns_type != DNS_PRIME ) + net_init(options.interface, options.pcap_file, options.pcap_output_file, options.use_watchdog); + + net_done = internal_handler("net_done"); + + if ( ! g_policy_debug ) + { + (void) setsignal(SIGTERM, sig_handler); + (void) setsignal(SIGINT, sig_handler); + (void) setsignal(SIGPIPE, SIG_IGN); + } + + // Cooperate with nohup(1). + if ( (oldhandler = setsignal(SIGHUP, sig_handler)) != SIG_DFL ) + (void) setsignal(SIGHUP, oldhandler); + + if ( dns_type == DNS_PRIME ) + { + dns_mgr->Verify(); + dns_mgr->Resolve(); + + if ( ! dns_mgr->Save() ) + reporter->FatalError("can't update DNS cache"); + + mgr.Drain(); + delete dns_mgr; + exit(0); + } + + // Print the ID. + if ( options.identifier_to_print ) + { + ID* id = global_scope()->Lookup(*options.identifier_to_print); + if ( ! id ) + reporter->FatalError("No such ID: %s\n", options.identifier_to_print->data()); + + ODesc desc; + desc.SetQuotes(true); + desc.SetIncludeStats(true); + id->DescribeExtended(&desc); + + fprintf(stdout, "%s\n", desc.Description()); + exit(0); + } + + if ( profiling_interval > 0 ) + { + profiling_logger = new ProfileLogger(profiling_file->AsFile(), + profiling_interval); + + if ( segment_profiling ) + segment_logger = profiling_logger; + } + + if ( ! reading_live && ! reading_traces ) + // Set up network_time to track real-time, since + // we don't have any other source for it. + net_update_time(current_time()); + + EventHandlerPtr zeek_init = internal_handler("zeek_init"); + if ( zeek_init ) //### this should be a function + mgr.Enqueue(zeek_init, zeek::Args{}); + + EventRegistry::string_list dead_handlers = + event_registry->UnusedHandlers(); + + if ( ! dead_handlers.empty() && check_for_unused_event_handlers ) + { + for ( const string& handler : dead_handlers ) + reporter->Warning("event handler never invoked: %s", handler.c_str()); + } + + // Enable LeakSanitizer before zeek_init() and even before executing + // top-level statements. Even though it's not bad if a leak happens only + // once at initialization, we have to assume that script-layer code causing + // such a leak can be placed in any arbitrary event handler and potentially + // cause more severe problems. + ZEEK_LSAN_ENABLE(); + + if ( stmts ) + { + stmt_flow_type flow; + Frame f(current_scope()->Length(), nullptr, nullptr); + g_frame_stack.push_back(&f); + + try + { + stmts->Exec(&f, flow); + } + catch ( InterpreterException& ) + { + reporter->FatalError("failed to execute script statements at top-level scope"); + } + + g_frame_stack.pop_back(); + } + + if ( options.ignore_checksums ) + ignore_checksums = 1; + + if ( zeek_script_loaded ) + { + // Queue events reporting loaded scripts. + for ( std::list::iterator i = files_scanned.begin(); i != files_scanned.end(); i++ ) + { + if ( i->skipped ) + continue; + + mgr.Enqueue(zeek_script_loaded, + make_intrusive(i->name.c_str()), + val_mgr->Count(i->include_level) + ); + } + } + + reporter->ReportViaEvents(true); + + // Drain the event queue here to support the protocols framework configuring DPM + mgr.Drain(); + + if ( reporter->Errors() > 0 && ! zeekenv("ZEEK_ALLOW_INIT_ERRORS") ) + reporter->FatalError("errors occurred while initializing"); + + broker_mgr->ZeekInitDone(); + reporter->ZeekInitDone(); + analyzer_mgr->DumpDebug(); + + have_pending_timers = ! reading_traces && timer_mgr->Size() > 0; + + return {0, std::move(options)}; + } + +int zeek::detail::cleanup(bool did_net_run) + { + if ( did_net_run ) + done_with_network(); + + net_delete(); + terminate_bro(); + + sqlite3_shutdown(); + + ERR_free_strings(); + EVP_cleanup(); + CRYPTO_cleanup_all_ex_data(); + + // Close files after net_delete(), because net_delete() + // might write to connection content files. + BroFile::CloseOpenFiles(); + + delete rule_matcher; + + return 0; + } diff --git a/src/zeek-setup.h b/src/zeek-setup.h new file mode 100644 index 0000000000..22cb85fde7 --- /dev/null +++ b/src/zeek-setup.h @@ -0,0 +1,31 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#pragma once + +#include "Options.h" + +namespace zeek { namespace detail { + +struct SetupResult { + int code = 0; + zeek::Options options; +}; + +/** + * Initializes Zeek's global state. + * @param argc the argument count (same semantics as main function) + * @param argv the argument strings (same semantics as main function) + * @param options if provided, those options are used instead of + * deriving them by parsing the "argv" list. The "argv" list still + * needs to be provided regardless since some functionality requires + * it, particularly, several things use the value of argv[0]. + */ +SetupResult setup(int argc, char** argv, zeek::Options* options = nullptr); + +/** + * Cleans up Zeek's global state. + * @param did_net_run whether the net_run() was called. + */ +int cleanup(bool did_net_run); + +}} // namespace zeek::detail