Add standalone driver for fuzz targets

Useful for cases that don't need to use a fuzzing engine, but just run
the fuzz targets over some set of inputs, like for regression/CI tests.

Also added a POP3 fuzzer dictionary, seed corpus, and README with
examples.
This commit is contained in:
Jon Siwek 2020-04-23 20:09:22 -07:00
parent 8f1b34b915
commit 78b0b2183d
8 changed files with 168 additions and 8 deletions

View file

@ -481,6 +481,9 @@ message(
"\n debugging: ${USE_PERFTOOLS_DEBUG}"
"\njemalloc: ${ENABLE_JEMALLOC}"
"\n"
"\nFuzz Targets: ${ZEEK_ENABLE_FUZZING}"
"\nFuzz Engine: ${ZEEK_FUZZING_ENGINE}"
"\n"
"\n================================================================\n"
)

View file

@ -5,13 +5,17 @@ if ( NOT ZEEK_ENABLE_FUZZING )
return()
endif ()
if ( NOT DEFINED ZEEK_FUZZING_ENGINE AND DEFINED ENV{LIB_FUZZING_ENGINE} )
set(ZEEK_FUZZING_ENGINE $ENV{LIB_FUZZING_ENGINE} CACHE INTERNAL "" FORCE)
endif ()
macro(ADD_FUZZ_TARGET _name)
set(_fuzz_target zeek_fuzzer_${_name})
set(_fuzz_source ${_name}.cc)
set(_fuzz_target zeek-${_name}-fuzzer)
set(_fuzz_source ${_name}-fuzzer.cc)
add_executable(${_fuzz_target} ${_fuzz_source} ${ARGN}
$<TARGET_OBJECTS:zeek_objs>
$<TARGET_OBJECTS:zeek_fuzzer_objs>
$<TARGET_OBJECTS:zeek_fuzzer_common>
${zeek_HEADERS}
${bro_SUBDIR_LIBS}
${bro_PLUGIN_LIBS})
@ -19,12 +23,17 @@ macro(ADD_FUZZ_TARGET _name)
target_link_libraries(${_fuzz_target} ${zeekdeps}
${CMAKE_THREAD_LIBS_INIT} ${CMAKE_DL_LIBS})
# TODO: Use LIB_FUZZING_ENGINE env. var. if it exists
set_target_properties(${_fuzz_target} PROPERTIES LINK_FLAGS
"-fsanitize=fuzzer")
if ( DEFINED ZEEK_FUZZING_ENGINE )
set_target_properties(${_fuzz_target} PROPERTIES LINK_FLAGS
${ZEEK_FUZZING_ENGINE})
else ()
target_link_libraries(${_fuzz_target}
$<TARGET_OBJECTS:zeek_fuzzer_standalone>)
endif ()
endmacro ()
include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR})
add_library(zeek_fuzzer_objs OBJECT FuzzBuffer.cc)
add_library(zeek_fuzzer_common OBJECT FuzzBuffer.cc)
add_library(zeek_fuzzer_standalone OBJECT standalone-driver.cc)
add_fuzz_target(pop3)

79
src/fuzzers/README Normal file
View file

@ -0,0 +1,79 @@
Fuzz Testing
============
This directory contains fuzzing targets for various Zeek components. The
primary way to use these directly would be with a fuzzing engine such as
libFuzzer: https://llvm.org/docs/LibFuzzer.html
Example Build: Initial Fuzzing and Seed Corpus
----------------------------------------------
First configure and build for fuzzing (with libFuzzer) and code coverage::
$ LIB_FUZZING_ENGINE="-fsanitize=fuzzer" CC=clang CXX=clang++ \
CFLAGS="-fprofile-instr-generate -fcoverage-mapping" \
CXXFLAGS="-fprofile-instr-generate -fcoverage-mapping" \
./configure --build-type=RelWithDebInfo --build-dir=./build-fuzz-cov \
--sanitizers=fuzzer-no-link --enable-fuzzing
$ cd build-fuzz-cov && make -j $(nproc)
Now start fuzzing to generate an initial corpus (this uses the POP3 fuzzer as
an example)::
$ mkdir corpus && ./src/fuzzers/zeek-pop3-fuzzer corpus \
-dict=../src/fuzzers/pop3.dict -max_total_time=300 -fork=$(($(nproc) - 1))
You can set options, like the runtime and parallelism level, to taste. For
other fuzz targets, you'd also want to use a different dictionary or omit
entirely.
To minimize the size of the corpus::
$ mkdir min-corpus && ./src/fuzzers/zeek-pop3-fuzzer -merge=1 min-corpus corpus
To check the code coverage of the corpus::
$ ./src/fuzzers/zeek-pop3-fuzzer min-corpus/*
$ llvm-profdata merge -sparse default.profraw -o zeek.profdata && \
llvm-cov report ./src/fuzzers/zeek-pop3-fuzzer -instr-profile=zeek.profdata \
../src/analyzer/protocol/pop3/
If the code coverage isn't satisfying, there may be something wrong with
the fuzzer, it may need a better dictionary, or it may need to fuzz for longer.
The corpus can be added to revision control for use in regression testing and
as seed for OSS-Fuzz (check first that the zip file is a size that's sane to
commit)::
zip -j ../src/fuzzers/pop3-corpus.zip min-corpus/*
Example Build: Run Standalone Fuzz Targets
------------------------------------------
Fuzz targets can still be run without a fuzzing engine driving them. In
standalone mode, they'll process all input files provided as arguments
(e.g. useful for regression testing).
First configure and build::
$ CC=clang CXX=clang++ \
./configure --build-type=debug --build-dir=./build-fuzz-check \
--sanitizers=address,fuzzer-no-link --enable-fuzzing
$ cd build-fuzz-cov && make -j $(nproc)
Get a set of inputs to process (we're using the POP3 fuzzer/corpus as example)::
$ mkdir corpus && ( cd corpus && unzip ../../src/fuzzers/pop3-corpus.zip )
Now run the standalone fuzzer on the input corpus::
$ ./src/fuzzers/zeek-pop3-fuzzer corpus/*
Note that you can also configure this build for coverage reports to verify the
code coverage (see the CFLAGS/CXXFLAGS from the first "Initial Fuzzing"
section). There's also the following ASan option which may need to be used::
$ export ASAN_OPTIONS=detect_odr_violation=0

View file

@ -16,6 +16,7 @@ extern "C" int LLVMFuzzerInitialize(int* argc, char*** argv)
options.script_options_to_set.emplace_back("Site::local_nets={10.0.0.0/8}");
options.script_options_to_set.emplace_back("Log::default_writer=Log::WRITER_NONE");
options.deterministic_mode = true;
options.ignore_checksums = true;
options.abort_on_scripting_errors = true;
if ( zeek::setup(*argc, *argv, &options).code )

BIN
src/fuzzers/pop3-corpus.zip Normal file

Binary file not shown.

View file

@ -9,7 +9,7 @@
#include "analyzer/protocol/tcp/TCP.h"
#include "FuzzBuffer.h"
#include "fuzz-setup.h"
#include "fuzzer-setup.h"
static constexpr auto ZEEK_FUZZ_ANALYZER = "pop3";

21
src/fuzzers/pop3.dict Normal file
View file

@ -0,0 +1,21 @@
"\x01PKT"
"OK"
"ERR"
"USER"
"PASS"
"APOP"
"AUTH"
"STAT"
"LIST"
"RETR"
"DELE"
"RSET"
"NOOP"
"LAST"
"QUIT"
"TOP"
"CAPA"
"UIDL"
"STLS"
"XSENDER"
"END"

View file

@ -0,0 +1,47 @@
#include <cstdio>
#include <cstdint>
#include <cassert>
#include <memory>
#include <chrono>
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size);
extern "C" int LLVMFuzzerInitialize(int* argc, char*** argv);
int main(int argc, char** argv)
{
using namespace std::chrono;
auto agg_start = high_resolution_clock::now();
auto num_inputs = argc - 1;
printf("Standalone fuzzer processing %d inputs\n", num_inputs);
LLVMFuzzerInitialize(&argc, &argv);
for ( auto i = 0; i < num_inputs; ++i )
{
auto input_file_name = argv[i + 1];
printf(" %s:", input_file_name);
auto f = fopen(input_file_name, "r");
assert(f);
fseek(f, 0, SEEK_END);
auto input_length = ftell(f);
fseek(f, 0, SEEK_SET);
auto input_buffer = std::make_unique<uint8_t[]>(input_length);
auto bytes_read = fread(input_buffer.get(), 1, input_length, f);
assert(bytes_read == input_length);
auto start = high_resolution_clock::now();
LLVMFuzzerTestOneInput(input_buffer.get(), input_length);
auto stop = high_resolution_clock::now();
auto dt = duration<double>(stop - start).count();
printf(" %6zu bytes, %f seconds\n", input_length, dt);
fclose(f);
}
auto agg_stop = high_resolution_clock::now();
auto agg_dt = duration<double>(agg_stop - agg_start).count();
printf("Processed %d inputs in %fs\n", num_inputs, agg_dt);
}