diff --git a/CMakeLists.txt b/CMakeLists.txt index 8297f588b1..a453be0e8e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -481,6 +481,9 @@ message( "\n debugging: ${USE_PERFTOOLS_DEBUG}" "\njemalloc: ${ENABLE_JEMALLOC}" "\n" + "\nFuzz Targets: ${ZEEK_ENABLE_FUZZING}" + "\nFuzz Engine: ${ZEEK_FUZZING_ENGINE}" + "\n" "\n================================================================\n" ) diff --git a/src/fuzzers/CMakeLists.txt b/src/fuzzers/CMakeLists.txt index 6e6d3576b2..1d98ae2057 100644 --- a/src/fuzzers/CMakeLists.txt +++ b/src/fuzzers/CMakeLists.txt @@ -5,13 +5,17 @@ if ( NOT ZEEK_ENABLE_FUZZING ) return() endif () +if ( NOT DEFINED ZEEK_FUZZING_ENGINE AND DEFINED ENV{LIB_FUZZING_ENGINE} ) + set(ZEEK_FUZZING_ENGINE $ENV{LIB_FUZZING_ENGINE} CACHE INTERNAL "" FORCE) +endif () + macro(ADD_FUZZ_TARGET _name) - set(_fuzz_target zeek_fuzzer_${_name}) - set(_fuzz_source ${_name}.cc) + set(_fuzz_target zeek-${_name}-fuzzer) + set(_fuzz_source ${_name}-fuzzer.cc) add_executable(${_fuzz_target} ${_fuzz_source} ${ARGN} $ - $ + $ ${zeek_HEADERS} ${bro_SUBDIR_LIBS} ${bro_PLUGIN_LIBS}) @@ -19,12 +23,17 @@ macro(ADD_FUZZ_TARGET _name) target_link_libraries(${_fuzz_target} ${zeekdeps} ${CMAKE_THREAD_LIBS_INIT} ${CMAKE_DL_LIBS}) - # TODO: Use LIB_FUZZING_ENGINE env. var. if it exists - set_target_properties(${_fuzz_target} PROPERTIES LINK_FLAGS - "-fsanitize=fuzzer") + if ( DEFINED ZEEK_FUZZING_ENGINE ) + set_target_properties(${_fuzz_target} PROPERTIES LINK_FLAGS + ${ZEEK_FUZZING_ENGINE}) + else () + target_link_libraries(${_fuzz_target} + $) + endif () endmacro () include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}) -add_library(zeek_fuzzer_objs OBJECT FuzzBuffer.cc) +add_library(zeek_fuzzer_common OBJECT FuzzBuffer.cc) +add_library(zeek_fuzzer_standalone OBJECT standalone-driver.cc) add_fuzz_target(pop3) diff --git a/src/fuzzers/README b/src/fuzzers/README new file mode 100644 index 0000000000..e771b52010 --- /dev/null +++ b/src/fuzzers/README @@ -0,0 +1,79 @@ +Fuzz Testing +============ + +This directory contains fuzzing targets for various Zeek components. The +primary way to use these directly would be with a fuzzing engine such as +libFuzzer: https://llvm.org/docs/LibFuzzer.html + +Example Build: Initial Fuzzing and Seed Corpus +---------------------------------------------- + +First configure and build for fuzzing (with libFuzzer) and code coverage:: + + $ LIB_FUZZING_ENGINE="-fsanitize=fuzzer" CC=clang CXX=clang++ \ + CFLAGS="-fprofile-instr-generate -fcoverage-mapping" \ + CXXFLAGS="-fprofile-instr-generate -fcoverage-mapping" \ + ./configure --build-type=RelWithDebInfo --build-dir=./build-fuzz-cov \ + --sanitizers=fuzzer-no-link --enable-fuzzing + + $ cd build-fuzz-cov && make -j $(nproc) + +Now start fuzzing to generate an initial corpus (this uses the POP3 fuzzer as +an example):: + + $ mkdir corpus && ./src/fuzzers/zeek-pop3-fuzzer corpus \ + -dict=../src/fuzzers/pop3.dict -max_total_time=300 -fork=$(($(nproc) - 1)) + +You can set options, like the runtime and parallelism level, to taste. For +other fuzz targets, you'd also want to use a different dictionary or omit +entirely. + +To minimize the size of the corpus:: + + $ mkdir min-corpus && ./src/fuzzers/zeek-pop3-fuzzer -merge=1 min-corpus corpus + +To check the code coverage of the corpus:: + + $ ./src/fuzzers/zeek-pop3-fuzzer min-corpus/* + + $ llvm-profdata merge -sparse default.profraw -o zeek.profdata && \ + llvm-cov report ./src/fuzzers/zeek-pop3-fuzzer -instr-profile=zeek.profdata \ + ../src/analyzer/protocol/pop3/ + +If the code coverage isn't satisfying, there may be something wrong with +the fuzzer, it may need a better dictionary, or it may need to fuzz for longer. + +The corpus can be added to revision control for use in regression testing and +as seed for OSS-Fuzz (check first that the zip file is a size that's sane to +commit):: + + zip -j ../src/fuzzers/pop3-corpus.zip min-corpus/* + +Example Build: Run Standalone Fuzz Targets +------------------------------------------ + +Fuzz targets can still be run without a fuzzing engine driving them. In +standalone mode, they'll process all input files provided as arguments +(e.g. useful for regression testing). + +First configure and build:: + + $ CC=clang CXX=clang++ \ + ./configure --build-type=debug --build-dir=./build-fuzz-check \ + --sanitizers=address,fuzzer-no-link --enable-fuzzing + + $ cd build-fuzz-cov && make -j $(nproc) + +Get a set of inputs to process (we're using the POP3 fuzzer/corpus as example):: + + $ mkdir corpus && ( cd corpus && unzip ../../src/fuzzers/pop3-corpus.zip ) + +Now run the standalone fuzzer on the input corpus:: + + $ ./src/fuzzers/zeek-pop3-fuzzer corpus/* + +Note that you can also configure this build for coverage reports to verify the +code coverage (see the CFLAGS/CXXFLAGS from the first "Initial Fuzzing" +section). There's also the following ASan option which may need to be used:: + + $ export ASAN_OPTIONS=detect_odr_violation=0 diff --git a/src/fuzzers/fuzz-setup.h b/src/fuzzers/fuzzer-setup.h similarity index 96% rename from src/fuzzers/fuzz-setup.h rename to src/fuzzers/fuzzer-setup.h index 1ad8faff6a..a66d2a8448 100644 --- a/src/fuzzers/fuzz-setup.h +++ b/src/fuzzers/fuzzer-setup.h @@ -16,6 +16,7 @@ extern "C" int LLVMFuzzerInitialize(int* argc, char*** argv) options.script_options_to_set.emplace_back("Site::local_nets={10.0.0.0/8}"); options.script_options_to_set.emplace_back("Log::default_writer=Log::WRITER_NONE"); options.deterministic_mode = true; + options.ignore_checksums = true; options.abort_on_scripting_errors = true; if ( zeek::setup(*argc, *argv, &options).code ) diff --git a/src/fuzzers/pop3-corpus.zip b/src/fuzzers/pop3-corpus.zip new file mode 100644 index 0000000000..5ce7592c96 Binary files /dev/null and b/src/fuzzers/pop3-corpus.zip differ diff --git a/src/fuzzers/pop3.cc b/src/fuzzers/pop3-fuzzer.cc similarity index 98% rename from src/fuzzers/pop3.cc rename to src/fuzzers/pop3-fuzzer.cc index e98afec36f..89bd1e00b5 100644 --- a/src/fuzzers/pop3.cc +++ b/src/fuzzers/pop3-fuzzer.cc @@ -9,7 +9,7 @@ #include "analyzer/protocol/tcp/TCP.h" #include "FuzzBuffer.h" -#include "fuzz-setup.h" +#include "fuzzer-setup.h" static constexpr auto ZEEK_FUZZ_ANALYZER = "pop3"; diff --git a/src/fuzzers/pop3.dict b/src/fuzzers/pop3.dict new file mode 100644 index 0000000000..1785e830d6 --- /dev/null +++ b/src/fuzzers/pop3.dict @@ -0,0 +1,21 @@ +"\x01PKT" +"OK" +"ERR" +"USER" +"PASS" +"APOP" +"AUTH" +"STAT" +"LIST" +"RETR" +"DELE" +"RSET" +"NOOP" +"LAST" +"QUIT" +"TOP" +"CAPA" +"UIDL" +"STLS" +"XSENDER" +"END" diff --git a/src/fuzzers/standalone-driver.cc b/src/fuzzers/standalone-driver.cc new file mode 100644 index 0000000000..1557b1d055 --- /dev/null +++ b/src/fuzzers/standalone-driver.cc @@ -0,0 +1,47 @@ +#include +#include +#include +#include +#include + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size); +extern "C" int LLVMFuzzerInitialize(int* argc, char*** argv); + +int main(int argc, char** argv) + { + using namespace std::chrono; + auto agg_start = high_resolution_clock::now(); + auto num_inputs = argc - 1; + printf("Standalone fuzzer processing %d inputs\n", num_inputs); + + LLVMFuzzerInitialize(&argc, &argv); + + for ( auto i = 0; i < num_inputs; ++i ) + { + auto input_file_name = argv[i + 1]; + printf(" %s:", input_file_name); + + auto f = fopen(input_file_name, "r"); + assert(f); + + fseek(f, 0, SEEK_END); + auto input_length = ftell(f); + fseek(f, 0, SEEK_SET); + + auto input_buffer = std::make_unique(input_length); + auto bytes_read = fread(input_buffer.get(), 1, input_length, f); + assert(bytes_read == input_length); + + auto start = high_resolution_clock::now(); + LLVMFuzzerTestOneInput(input_buffer.get(), input_length); + auto stop = high_resolution_clock::now(); + auto dt = duration(stop - start).count(); + + printf(" %6zu bytes, %f seconds\n", input_length, dt); + fclose(f); + } + + auto agg_stop = high_resolution_clock::now(); + auto agg_dt = duration(agg_stop - agg_start).count(); + printf("Processed %d inputs in %fs\n", num_inputs, agg_dt); +}