From 7fac5837c3fd64128206bb7878674c25723aa10b Mon Sep 17 00:00:00 2001 From: Arne Welzel Date: Mon, 9 Oct 2023 10:29:57 +0200 Subject: [PATCH] iosource/pcap: Support configurable buffer size On Linux with a default ext4 or tmpfs filesystem, the default buffer size for reading a pcap is chosen as 4k (strace/gdb validated). When reading large pcaps containing raw data transfers, the syscall overhead for read becomes visible in profiles. Support configurability of the buffer size and default to 128kb. When processing a ~830M PCAP (16 UDP connections, each transferring ~50MB) in bare mode, this change improves runtime from 1.39 sec to 1.29 sec. Increasing the buffer further didn't provide a noticeable boost. --- NEWS | 3 ++ scripts/base/init-bare.zeek | 5 +++ src/iosource/pcap/Source.cc | 36 ++++++++++++++++++- src/iosource/pcap/Source.h | 4 +++ src/iosource/pcap/pcap.bif | 1 + .../Baseline/core.pcap.input-error/output2 | 2 +- .../Baseline/core.pcap.wrong-format/output | 2 ++ .../Baseline/core.pcap.wrong-format/output2 | 2 ++ testing/btest/core/pcap/wrong-format.zeek | 10 ++++++ 9 files changed, 63 insertions(+), 2 deletions(-) create mode 100644 testing/btest/Baseline/core.pcap.wrong-format/output create mode 100644 testing/btest/Baseline/core.pcap.wrong-format/output2 create mode 100644 testing/btest/core/pcap/wrong-format.zeek diff --git a/NEWS b/NEWS index 8262d1618e..5621c132fe 100644 --- a/NEWS +++ b/NEWS @@ -102,6 +102,9 @@ Changed Functionality - Parameter lists for functions, events and hooks now use commas instead of semicolons in error messages or when printing such functions. +- The IO buffer size used for PCAP file reading is now always 128kb. This + new default can be changed via ``Pcap::bufsize_offline_bytes``. + Removed Functionality --------------------- diff --git a/scripts/base/init-bare.zeek b/scripts/base/init-bare.zeek index 4e96175fda..c71faa7979 100644 --- a/scripts/base/init-bare.zeek +++ b/scripts/base/init-bare.zeek @@ -5286,6 +5286,11 @@ export { ## interfaces. const bufsize = 128 &redef; + ## Number of bytes to use for buffering file read operations when reading + ## from a PCAP file. Setting this to 0 uses operating system defaults + ## as chosen by fopen(). + const bufsize_offline_bytes = 128 * 1024 &redef; + ## Default timeout for packet sources without file descriptors. ## ## For libpcap based packet sources that do not provide a usable diff --git a/src/iosource/pcap/Source.cc b/src/iosource/pcap/Source.cc index c8869a0c7c..aa79935f87 100644 --- a/src/iosource/pcap/Source.cc +++ b/src/iosource/pcap/Source.cc @@ -10,6 +10,8 @@ #include #endif +#include + #include "zeek/Event.h" #include "zeek/iosource/BPF_Program.h" #include "zeek/iosource/Packet.h" @@ -176,10 +178,42 @@ void PcapSource::OpenOffline() { char errbuf[PCAP_ERRBUF_SIZE]; - pd = pcap_open_offline(props.path.c_str(), errbuf); + FILE* f = nullptr; + if ( props.path == "-" ) + { + f = stdin; + } + else + { + if ( f = fopen(props.path.c_str(), "rb"); ! f ) + { + Error(util::fmt("unable to open %s: %s", props.path.c_str(), strerror(errno))); + return; + } + + // Setup file IO buffering with a bufsize_offline_bytes sized + // buffer if set, otherwise use what fopen() took as the default. + if ( BifConst::Pcap::bufsize_offline_bytes != 0 ) + { + iobuf.resize(BifConst::Pcap::bufsize_offline_bytes); + if ( util::detail::setvbuf(f, iobuf.data(), _IOFBF, iobuf.size()) != 0 ) + { + Error(util::fmt("unable to setvbuf %s: %s", props.path.c_str(), strerror(errno))); + fclose(f); + return; + } + } + } + + // pcap_fopen_offline() takes ownership of f on success and + // pcap_close() elsewhere should close it, too. + pd = pcap_fopen_offline(f, errbuf); if ( ! pd ) { + if ( f != stdin ) + fclose(f); + Error(errbuf); return; } diff --git a/src/iosource/pcap/Source.h b/src/iosource/pcap/Source.h index cbff27963a..ad696f1596 100644 --- a/src/iosource/pcap/Source.h +++ b/src/iosource/pcap/Source.h @@ -4,6 +4,7 @@ #include // for u_char #include +#include extern "C" { @@ -44,6 +45,9 @@ private: pcap_t* pd; struct pcap_stat prev_pstat = {0}; + + // Buffer provided to setvbuf() when reading from a PCAP file. + std::vector iobuf; }; } // namespace zeek::iosource::pcap diff --git a/src/iosource/pcap/pcap.bif b/src/iosource/pcap/pcap.bif index e9ba0cbb5d..9d73cb402e 100644 --- a/src/iosource/pcap/pcap.bif +++ b/src/iosource/pcap/pcap.bif @@ -3,6 +3,7 @@ module Pcap; const snaplen: count; const bufsize: count; +const bufsize_offline_bytes: count; const non_fd_timeout: interval; %%{ diff --git a/testing/btest/Baseline/core.pcap.input-error/output2 b/testing/btest/Baseline/core.pcap.input-error/output2 index bbbdf5b077..df60727de5 100644 --- a/testing/btest/Baseline/core.pcap.input-error/output2 +++ b/testing/btest/Baseline/core.pcap.input-error/output2 @@ -1,3 +1,3 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. fatal error: problem with interface NO_SUCH_INTERFACE -fatal error: problem with trace file NO_SUCH_TRACE (NO_SUCH_TRACE: No such file or directory) +fatal error: problem with trace file NO_SUCH_TRACE (unable to open NO_SUCH_TRACE: No such file or directory) diff --git a/testing/btest/Baseline/core.pcap.wrong-format/output b/testing/btest/Baseline/core.pcap.wrong-format/output new file mode 100644 index 0000000000..8129e9c851 --- /dev/null +++ b/testing/btest/Baseline/core.pcap.wrong-format/output @@ -0,0 +1,2 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +fatal error: problem with trace file not-a.pcap (unknown file format) diff --git a/testing/btest/Baseline/core.pcap.wrong-format/output2 b/testing/btest/Baseline/core.pcap.wrong-format/output2 new file mode 100644 index 0000000000..e61dec2aac --- /dev/null +++ b/testing/btest/Baseline/core.pcap.wrong-format/output2 @@ -0,0 +1,2 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +fatal error: problem with trace file - (unknown file format) diff --git a/testing/btest/core/pcap/wrong-format.zeek b/testing/btest/core/pcap/wrong-format.zeek new file mode 100644 index 0000000000..c480dc1f92 --- /dev/null +++ b/testing/btest/core/pcap/wrong-format.zeek @@ -0,0 +1,10 @@ +# @TEST-REQUIRES: test "${ZEEK_USE_CPP}" != "1" +# @TEST-EXEC-FAIL: zeek -b -r not-a.pcap >output 2>&1 +# @TEST-EXEC: btest-diff output +# @TEST-EXEC-FAIL: cat not-a.pcap | zeek -b -r - >output2 2>&1 +# @TEST-EXEC: btest-diff output2 + +@TEST-START-FILE ./not-a.pcap +%PDF-1.5 +This isn't an actual pdf file, and neither a PCAP. +@TEST-END-FILE