Merge remote-tracking branch 'origin/topic/awelzel/3379-shared-ptr-and-micro-optimizations'

* origin/topic/awelzel/3379-shared-ptr-and-micro-optimizations:
  build_inner_connection: Use the outer packet's timestamp
  build_inner_connection: Avoid one extra Init()
  packet_analysis: Do not run DetectProtocol() on disabled analyzers
  packet_analysis/Dispatcher: Do not index table twice
  packet_analysis: Avoid shared_ptr copying for analyzer lookups
This commit is contained in:
Arne Welzel 2023-11-01 12:04:16 +01:00
commit 4eb1b71d1b
9 changed files with 95 additions and 41 deletions

34
CHANGES
View file

@ -1,3 +1,37 @@
6.2.0-dev.93 | 2023-11-01 12:04:16 +0100
* build_inner_connection: Use the outer packet's timestamp (Arne Welzel, Corelight)
Don't construct the timeval based on run_state, just use the timestamp
of the outer packet to avoid the extra int/double conversions required.
* build_inner_connection: Avoid one extra Init() (Arne Welzel, Corelight)
Packet::Init() is not so cheap as one might think: It computes a
timestamp from { 0, 0 } using double division. Just avoid this
by not initializing an empty Packet.
* packet_analysis: Do not run DetectProtocol() on disabled analyzers (Arne Welzel, Corelight)
This came up when disabling the TEREDO analyzer but still seeing its
DetectProtocol() method prominently in flame graphs.
* packet_analysis/Dispatcher: Do not index table twice (Arne Welzel, Corelight)
It's okay to return the nullptr that's in the table, no need to check
for != nullptr before dereferencing again.
* GH-3379: packet_analysis: Avoid shared_ptr copying for analyzer lookups (Arne Welzel, Corelight)
For deeply encapsulated connections (think AWS traffic mirroring format
like IP,UDP,GENEVE,IP,UDP,VXLAN,ETH,IP,TCP), the Dispatcher::Lookup()
method is fairly visible in profiles when running in bare mode.
This changes the Analyzer::Lookup() and Dispatcher::Lookup() return value
breaking the API in favor of the performance improvement.
Relates to zeek/zeek#3379.
6.2.0-dev.86 | 2023-10-31 16:17:33 +0000
* SSL: Add new extension types and ECH test (Johanna Amann, Corelight)

4
NEWS
View file

@ -9,6 +9,10 @@ Zeek 6.2.0
Breaking Changes
----------------
- The methods ``Dispatcher::Lookup()`` and ``Analyzer::Lookup()`` in the packet_analysis
namespace were changed to return a reference to a std::shared_ptr instead of a copy
for performance reasons.
New Functionality
-----------------

View file

@ -1 +1 @@
6.2.0-dev.86
6.2.0-dev.93

View file

@ -53,23 +53,47 @@ bool Analyzer::IsAnalyzer(const char* name) {
return packet_mgr->GetComponentName(tag) == name;
}
AnalyzerPtr Analyzer::Lookup(uint32_t identifier) const { return dispatcher.Lookup(identifier); }
const AnalyzerPtr& Analyzer::Lookup(uint32_t identifier) const { return dispatcher.Lookup(identifier); }
bool Analyzer::ForwardPacket(size_t len, const uint8_t* data, Packet* packet, uint32_t identifier) const {
auto inner_analyzer = Lookup(identifier);
if ( ! inner_analyzer ) {
for ( const auto& child : analyzers_to_detect ) {
if ( child->DetectProtocol(len, data, packet) ) {
DBG_LOG(DBG_PACKET_ANALYSIS, "Protocol detection in %s succeeded, next layer analyzer is %s",
GetAnalyzerName(), child->GetAnalyzerName());
inner_analyzer = child;
break;
}
// Find the next inner analyzer using identifier or via DetectProtocol(),
// otherwise return the default analyzer.
const AnalyzerPtr& Analyzer::FindInnerAnalyzer(size_t len, const uint8_t* data, Packet* packet,
uint32_t identifier) const {
const auto& identifier_based_analyzer = Lookup(identifier);
if ( identifier_based_analyzer )
return identifier_based_analyzer;
const auto& detect_based_analyzer = DetectInnerAnalyzer(len, data, packet);
if ( detect_based_analyzer )
return detect_based_analyzer;
return default_analyzer;
}
// Find the next inner analyzer via DetectProtocol(), otherwise the default analyzer.
const AnalyzerPtr& Analyzer::FindInnerAnalyzer(size_t len, const uint8_t* data, Packet* packet) const {
const auto& detect_based_analyzer = DetectInnerAnalyzer(len, data, packet);
if ( detect_based_analyzer )
return detect_based_analyzer;
return default_analyzer;
}
// Return an analyzer found via DetectProtocol() for the given data, else nil.
const AnalyzerPtr& Analyzer::DetectInnerAnalyzer(size_t len, const uint8_t* data, Packet* packet) const {
for ( const auto& child : analyzers_to_detect ) {
if ( child->IsEnabled() && child->DetectProtocol(len, data, packet) ) {
DBG_LOG(DBG_PACKET_ANALYSIS, "Protocol detection in %s succeeded, next layer analyzer is %s",
GetAnalyzerName(), child->GetAnalyzerName());
return child;
}
}
if ( ! inner_analyzer )
inner_analyzer = default_analyzer;
return nil;
}
bool Analyzer::ForwardPacket(size_t len, const uint8_t* data, Packet* packet, uint32_t identifier) const {
const auto& inner_analyzer = FindInnerAnalyzer(len, data, packet, identifier);
if ( ! inner_analyzer ) {
DBG_LOG(DBG_PACKET_ANALYSIS, "Analysis in %s failed, could not find analyzer for identifier %#x.",
@ -89,23 +113,12 @@ bool Analyzer::ForwardPacket(size_t len, const uint8_t* data, Packet* packet, ui
DBG_LOG(DBG_PACKET_ANALYSIS, "Analysis in %s succeeded, next layer identifier is %#x.", GetAnalyzerName(),
identifier);
return inner_analyzer->AnalyzePacket(len, data, packet);
}
bool Analyzer::ForwardPacket(size_t len, const uint8_t* data, Packet* packet) const {
AnalyzerPtr inner_analyzer = nullptr;
for ( const auto& child : analyzers_to_detect ) {
if ( child->DetectProtocol(len, data, packet) ) {
DBG_LOG(DBG_PACKET_ANALYSIS, "Protocol detection in %s succeeded, next layer analyzer is %s",
GetAnalyzerName(), child->GetAnalyzerName());
inner_analyzer = child;
break;
}
}
if ( ! inner_analyzer )
inner_analyzer = default_analyzer;
const auto& inner_analyzer = FindInnerAnalyzer(len, data, packet);
if ( ! inner_analyzer ) {
DBG_LOG(DBG_PACKET_ANALYSIS, "Analysis in %s stopped, no default analyzer available.", GetAnalyzerName());

View file

@ -9,12 +9,16 @@
#include "zeek/session/Session.h"
namespace zeek::packet_analysis {
class Analyzer;
using AnalyzerPtr = std::shared_ptr<Analyzer>;
/**
* Main packet analyzer interface.
*/
class Analyzer {
public:
static inline AnalyzerPtr nil = nullptr;
/**
* Constructor.
*
@ -198,7 +202,7 @@ protected:
* @return The analyzer registered for the given identifier. Returns a
* nullptr if no analyzer is registered.
*/
AnalyzerPtr Lookup(uint32_t identifier) const;
const AnalyzerPtr& Lookup(uint32_t identifier) const;
/**
* Returns an analyzer based on a script-land definition.
@ -256,6 +260,11 @@ private:
void EnqueueAnalyzerViolationInfo(session::Session* session, const char* reason, const char* data, int len,
const zeek::Tag& arg_tag);
// Internal helpers to find an appropriate next inner analyzer.
const AnalyzerPtr& FindInnerAnalyzer(size_t len, const uint8_t* data, Packet* packet, uint32_t identifier) const;
const AnalyzerPtr& FindInnerAnalyzer(size_t len, const uint8_t* data, Packet* packet) const;
const AnalyzerPtr& DetectInnerAnalyzer(size_t len, const uint8_t* data, Packet* packet) const;
zeek::Tag tag;
Dispatcher dispatcher;
AnalyzerPtr default_analyzer = nullptr;
@ -270,7 +279,4 @@ private:
void Init(const zeek::Tag& tag);
};
using AnalyzerPtr = std::shared_ptr<Analyzer>;
} // namespace zeek::packet_analysis

View file

@ -48,12 +48,12 @@ void Dispatcher::Register(uint32_t identifier, AnalyzerPtr analyzer) {
table[index] = std::move(analyzer);
}
AnalyzerPtr Dispatcher::Lookup(uint32_t identifier) const {
const AnalyzerPtr& Dispatcher::Lookup(uint32_t identifier) const {
int64_t index = identifier - lowest_identifier;
if ( index >= 0 && index < static_cast<int64_t>(table.size()) && table[index] != nullptr )
if ( index >= 0 && index < static_cast<int64_t>(table.size()) )
return table[index];
return nullptr;
return Analyzer::nil;
}
size_t Dispatcher::Count() const {

View file

@ -35,7 +35,7 @@ public:
* @return The analyzer registered for the given identifier. Returns a
* nullptr if no analyzer is registered.
*/
AnalyzerPtr Lookup(uint32_t identifier) const;
const AnalyzerPtr& Lookup(uint32_t identifier) const;
/**
* Returns the number of registered analyzers.

View file

@ -152,8 +152,6 @@ std::unique_ptr<Packet> build_inner_packet(Packet* outer_pkt, int* encap_index,
std::shared_ptr<EncapsulationStack> encap_stack, uint32_t inner_cap_len,
const u_char* data, int link_type, BifEnum::Tunnel::Type tunnel_type,
const Tag& analyzer_tag) {
auto inner_pkt = std::make_unique<Packet>();
assert(outer_pkt->cap_len >= inner_cap_len);
assert(outer_pkt->len >= outer_pkt->cap_len - inner_cap_len);
@ -166,10 +164,7 @@ std::unique_ptr<Packet> build_inner_packet(Packet* outer_pkt, int* encap_index,
uint32_t consumed_len = outer_pkt->cap_len - inner_cap_len;
uint32_t inner_wire_len = outer_pkt->len - consumed_len;
pkt_timeval ts;
ts.tv_sec = static_cast<time_t>(run_state::current_timestamp);
ts.tv_usec = static_cast<suseconds_t>((run_state::current_timestamp - static_cast<double>(ts.tv_sec)) * 1000000);
inner_pkt->Init(link_type, &ts, inner_cap_len, inner_wire_len, data);
auto inner_pkt = std::make_unique<Packet>(link_type, &outer_pkt->ts, inner_cap_len, inner_wire_len, data);
*encap_index = 0;
if ( outer_pkt->session ) {

View file

@ -77,6 +77,8 @@ protected:
* The wire length (pkt->len) of the inner packet is computed based on the wire length
* of the outer packet and the differences in capture lengths.
*
* The inner packet's timestamp is set to the same value as the outer packet's timestamp.
*
* @param outer_pkt The packet containing the encapsulation. This packet should contain
* @param encap_index A return value for the current index into the encapsulation stack.
* This is returned to allow analyzers to know what point in the stack they were operating