From 01e305edd8ed91b68168c614bface253614034ac Mon Sep 17 00:00:00 2001
From: Arne Welzel <arne.welzel@corelight.com>
Date: Sat, 2 Dec 2023 23:21:09 +0100
Subject: [PATCH 1/3] UpdateConnVal: Avoid FieldOffset() calls

These can be significant if a lot of new connections and or events
are created for which an existing conn val needs updating and otherwise
things are very fast.
---
 src/analyzer/protocol/conn-size/ConnSize.cc   | 21 +++++++------------
 .../protocol/tcp/TCPSessionAdapter.cc         |  7 +++++--
 2 files changed, 13 insertions(+), 15 deletions(-)
diff --git a/src/analyzer/protocol/conn-size/ConnSize.cc b/src/analyzer/protocol/conn-size/ConnSize.cc
index 3ab6e2a789..4a03f1bb6d 100644
--- a/src/analyzer/protocol/conn-size/ConnSize.cc
+++ b/src/analyzer/protocol/conn-size/ConnSize.cc
@@ -139,20 +139,15 @@ void ConnSize_Analyzer::SetDurationThreshold(double duration) {
 }
 
 void ConnSize_Analyzer::UpdateConnVal(RecordVal* conn_val) {
-    // RecordType *connection_type is declared in NetVar.h
-    RecordVal* orig_endp = conn_val->GetFieldAs<RecordVal>("orig");
-    RecordVal* resp_endp = conn_val->GetFieldAs<RecordVal>("resp");
-
-    // endpoint is the RecordType from NetVar.h
-    int pktidx = id::endpoint->FieldOffset("num_pkts");
-    int bytesidx = id::endpoint->FieldOffset("num_bytes_ip");
-
-    if ( pktidx < 0 )
-        reporter->InternalError("'endpoint' record missing 'num_pkts' field");
-
-    if ( bytesidx < 0 )
-        reporter->InternalError("'endpoint' record missing 'num_bytes_ip' field");
+    static const auto& conn_type = zeek::id::find_type<zeek::RecordType>("connection");
+    static const int origidx = conn_type->FieldOffset("orig");
+    static const int respidx = conn_type->FieldOffset("resp");
+    static const auto& endpoint_type = zeek::id::find_type<zeek::RecordType>("endpoint");
+    static const int pktidx = endpoint_type->FieldOffset("num_pkts");
+    static const int bytesidx = endpoint_type->FieldOffset("num_bytes_ip");
 
+    auto* orig_endp = conn_val->GetFieldAs<RecordVal>(origidx);
+    auto* resp_endp = conn_val->GetFieldAs<RecordVal>(respidx);
     orig_endp->Assign(pktidx, orig_pkts);
     orig_endp->Assign(bytesidx, orig_bytes);
     resp_endp->Assign(pktidx, resp_pkts);
diff --git a/src/packet_analysis/protocol/tcp/TCPSessionAdapter.cc b/src/packet_analysis/protocol/tcp/TCPSessionAdapter.cc
index 332c01aa45..0f519ac230 100644
--- a/src/packet_analysis/protocol/tcp/TCPSessionAdapter.cc
+++ b/src/packet_analysis/protocol/tcp/TCPSessionAdapter.cc
@@ -1027,8 +1027,11 @@ void TCPSessionAdapter::FlipRoles() {
 }
 
 void TCPSessionAdapter::UpdateConnVal(RecordVal* conn_val) {
-    auto orig_endp_val = conn_val->GetFieldAs<RecordVal>("orig");
-    auto resp_endp_val = conn_val->GetFieldAs<RecordVal>("resp");
+    static const auto& conn_type = zeek::id::find_type<zeek::RecordType>("connection");
+    static const int origidx = conn_type->FieldOffset("orig");
+    static const int respidx = conn_type->FieldOffset("resp");
+    auto* orig_endp_val = conn_val->GetFieldAs<RecordVal>(origidx);
+    auto* resp_endp_val = conn_val->GetFieldAs<RecordVal>(respidx);
 
     orig_endp_val->Assign(0, orig->Size());
     orig_endp_val->Assign(1, orig->state);

From 46acd9168ec4c98c943a147ab576f201a4854556 Mon Sep 17 00:00:00 2001
From: Arne Welzel <arne.welzel@corelight.com>
Date: Sun, 3 Dec 2023 00:55:21 +0100
Subject: [PATCH 2/3] EventMgr: Remove queue_flare, use GetNextTimeout()
 instead

It can be visible overhead to call write() on the underlying pipe of the
EventMgr's flare whenever the first event is enqueued during an IO loop
iteration. Particularly in scenarios where there's about 1 event per packet
for long lived connections and script-side event processing is fast.

Given the event manager is drained anyhow at the end of the main loop, this
shouldn't be needed. In fact, the EventMgr.Process() method is basically
a stub. The one reason it is needed is when more events are enqueued during
a drain. That, however, can be dealt with by implementing GetNextTimeout()
to return 0.0 when there's more events queued. This way the main-loop's poll
timeout is 0.0 and it'll continue immediately.

This also allows to removes some extra code and drop the recently introduced
InitPostFork() addition: Without a pipe, there's no need to recreate it.
---
 src/Event.cc      | 18 ++----------------
 src/Event.h       |  9 ++++-----
 src/zeek-setup.cc |  2 --
 3 files changed, 6 insertions(+), 23 deletions(-)

diff --git a/src/Event.cc b/src/Event.cc
index ead50a1ad8..643484583f 100644
--- a/src/Event.cc
+++ b/src/Event.cc
@@ -99,7 +99,6 @@ void EventMgr::QueueEvent(Event* event) {
 
     if ( ! head ) {
         head = tail = event;
-        queue_flare.Fire();
     }
     else {
         tail->SetNext(event);
@@ -177,25 +176,12 @@ void EventMgr::Describe(ODesc* d) const {
 }
 
 void EventMgr::Process() {
-    queue_flare.Extinguish();
-
     // While it semes like the most logical thing to do, we dont want
     // to call Drain() as part of this method. It will get called at
-    // the end of net_run after all of the sources have been processed
+    // the end of run_loop after all of the sources have been processed
     // and had the opportunity to spawn new events.
 }
 
-void EventMgr::InitPostScript() {
-    iosource_mgr->Register(this, true, false);
-    if ( ! iosource_mgr->RegisterFd(queue_flare.FD(), this) )
-        reporter->FatalError("Failed to register event manager FD with iosource_mgr");
-}
-
-void EventMgr::InitPostFork() {
-    // Re-initialize the flare, closing and re-opening the underlying
-    // pipe FDs. This is needed so that each Zeek process in a supervisor
-    // setup has its own pipe instead of them all sharing a single pipe.
-    queue_flare = zeek::detail::Flare{};
-}
+void EventMgr::InitPostScript() { iosource_mgr->Register(this, true, false); }
 
 } // namespace zeek
diff --git a/src/Event.h b/src/Event.h
index 2fc9ee0e53..a1789aed87 100644
--- a/src/Event.h
+++ b/src/Event.h
@@ -106,14 +106,14 @@ public:
 
     void Describe(ODesc* d) const override;
 
-    double GetNextTimeout() override { return -1; }
+    // Let the IO loop know when there's more events to process
+    // by returning a zero-timeout.
+    double GetNextTimeout() override { return head ? 0.0 : -1.0; }
+
     void Process() override;
     const char* Tag() override { return "EventManager"; }
     void InitPostScript();
 
-    // Initialization to be done after a fork() happened.
-    void InitPostFork();
-
     uint64_t num_events_queued = 0;
     uint64_t num_events_dispatched = 0;
 
@@ -127,7 +127,6 @@ protected:
     double current_ts;
     RecordVal* src_val;
     bool draining;
-    detail::Flare queue_flare;
 };
 
 extern EventMgr event_mgr;
diff --git a/src/zeek-setup.cc b/src/zeek-setup.cc
index afe5cec284..4b351887aa 100644
--- a/src/zeek-setup.cc
+++ b/src/zeek-setup.cc
@@ -506,8 +506,6 @@ SetupResult setup(int argc, char** argv, Options* zopts) {
         // If we get here, we're a supervised node that just returned
         // from CreateStem() after being forked from the stem.
         Supervisor::ThisNode()->Init(&options);
-
-        event_mgr.InitPostFork();
     }
 
     script_coverage_mgr.ReadStats();

From d70b3d650624173ebeae0e3e9256640e774d03f1 Mon Sep 17 00:00:00 2001
From: Arne Welzel <arne.welzel@corelight.com>
Date: Mon, 4 Dec 2023 11:59:27 +0100
Subject: [PATCH 3/3] SegmentProfiler: Do not initialize initial_rusage

We use the SegmentProfiler in quite a few hot places and the memset of
the rusage structure (144bytes here) can show up significantly even if
the segment profiler itself isn't used.

Relates to #3485.
---
 src/Stats.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Stats.h b/src/Stats.h
index 22d11a242b..db08a11953 100644
--- a/src/Stats.h
+++ b/src/Stats.h
@@ -39,13 +39,13 @@ class SegmentProfiler {
 public:
     // The constructor takes some way of identifying the segment.
     SegmentProfiler(std::shared_ptr<SegmentStatsReporter> arg_reporter, const char* arg_name)
-        : reporter(std::move(arg_reporter)), name(arg_name), loc(), initial_rusage() {
+        : reporter(std::move(arg_reporter)), name(arg_name), loc() {
         if ( reporter )
             Init();
     }
 
     SegmentProfiler(std::shared_ptr<SegmentStatsReporter> arg_reporter, const Location* arg_loc)
-        : reporter(std::move(arg_reporter)), name(), loc(arg_loc), initial_rusage() {
+        : reporter(std::move(arg_reporter)), name(), loc(arg_loc) {
         if ( reporter )
             Init();
     }