From f00d6198af84eb85a6ee9ad8a1e9d86fb9ccb8df Mon Sep 17 00:00:00 2001 From: Arne Welzel Date: Fri, 31 Mar 2023 18:27:23 +0200 Subject: [PATCH] PktSrc: Introduce Pcap::non_fd_timeout Increasing this value 10x has lowered CPU usage on a Myricom based deployment significantly with reportedly no adverse side-effects. After reviewing the Zeek 3 IO loop, my hunch is that previously when no packets were available, we'd sleep 20usec every loop iteration after calling ->Process() on the packet source. With current master ->Process() is called 10 times on a packet source before going to sleep just once for 20 usec. Likely this explains the increased CPU usage reported. It's probably too risky to increase the current value, so introduce a const &redef value for advanced users to tweak it. A middle ground might be to lower ``io_poll_interval_live`` to 5 and increase the new ``Pcap::non_fd_timeout`` setting to 100usec. While this doesn't really fix #2296, we now have enough knobs for tweaking. Closes #2296. --- scripts/base/init-bare.zeek | 26 ++++++++++++++++++++++++++ src/iosource/PktSrc.cc | 2 +- src/iosource/pcap/pcap.bif | 1 + 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/scripts/base/init-bare.zeek b/scripts/base/init-bare.zeek index 847c2f21e8..ca1cf8052d 100644 --- a/scripts/base/init-bare.zeek +++ b/scripts/base/init-bare.zeek @@ -5207,6 +5207,32 @@ export { ## interfaces. const bufsize = 128 &redef; + ## Default timeout for packet sources without file descriptors. + ## + ## For libpcap based packet sources that do not provide a usable + ## file descriptor for select(), the timeout provided to the IO + ## loop is either zero if a packet was most recently available + ## or else this value. + ## + ## Depending on the expected packet rate per-worker and the amount of + ## available packet buffer, raising this value can significantly reduce + ## Zeek's CPU usage at the cost of a small delay before processing + ## packets. Setting this value too high may cause packet drops due + ## to running out of available buffer space. + ## + ## Increasing this value to 200usec on low-traffic Myricom based systems + ## (5 kpps per Zeek worker) has shown a 50% reduction in CPU usage. + ## + ## This is an advanced setting. Do monitor dropped packets and capture + ## loss information when changing it. + ## + ## .. note:: Packet sources that override ``GetNextTimeout()`` method + ## may not respect this value. + ## + ## .. zeek:see:: io_poll_interval_live + ## + const non_fd_timeout = 20usec &redef; + ## The definition of a "pcap interface". type Interface: record { ## The interface/device name. diff --git a/src/iosource/PktSrc.cc b/src/iosource/PktSrc.cc index 0fa9585621..c135200caa 100644 --- a/src/iosource/PktSrc.cc +++ b/src/iosource/PktSrc.cc @@ -324,7 +324,7 @@ double PktSrc::GetNextTimeout() if ( have_packet || had_packet ) return 0.0; - return 0.00002; + return BifConst::Pcap::non_fd_timeout; } // If there's an FD (offline or live) we want poll to do what it has to with it. diff --git a/src/iosource/pcap/pcap.bif b/src/iosource/pcap/pcap.bif index 14fc22c4d1..5be3dc8636 100644 --- a/src/iosource/pcap/pcap.bif +++ b/src/iosource/pcap/pcap.bif @@ -3,6 +3,7 @@ module Pcap; const snaplen: count; const bufsize: count; +const non_fd_timeout: interval; %%{ #include