From 0ecbd4435e5d7eb43a6aa2118f6646260c921b50 Mon Sep 17 00:00:00 2001 From: Arne Welzel Date: Mon, 4 Nov 2024 14:41:19 +0100 Subject: [PATCH 1/2] ZeekString: Implement move constructor --- src/ZeekString.cc | 6 ++++++ src/ZeekString.h | 3 +++ 2 files changed, 9 insertions(+) diff --git a/src/ZeekString.cc b/src/ZeekString.cc index 6d2e39ac31..e5bfaacd5c 100644 --- a/src/ZeekString.cc +++ b/src/ZeekString.cc @@ -37,6 +37,12 @@ String::String(bool arg_final_NUL, byte_vec str, int arg_n) { use_free_to_delete = false; } +String::String(String&& other) noexcept + : b(other.b), n(other.n), final_NUL(other.final_NUL), use_free_to_delete(other.use_free_to_delete) { + other.b = nullptr; + other.Reset(); +} + String::String(const u_char* str, int arg_n, bool add_NUL) : String() { Set(str, arg_n, add_NUL); } String::String(std::string_view str) : String() { Set(str); } diff --git a/src/ZeekString.h b/src/ZeekString.h index 44fee2d8cd..433e318868 100644 --- a/src/ZeekString.h +++ b/src/ZeekString.h @@ -47,6 +47,9 @@ public: // Constructor that takes ownership of the vector passed in. String(bool arg_final_NUL, byte_vec str, int arg_n); + // Move constructor + String(String&& s) noexcept; + String(); ~String() { Reset(); } From e443624c32aee89cbf977570a5c1f891c48db29f Mon Sep 17 00:00:00 2001 From: Arne Welzel Date: Wed, 30 Oct 2024 13:22:53 +0100 Subject: [PATCH 2/2] RuleMatcher: Use a single list for tracking pattern_matches --- src/RuleAction.cc | 5 ++--- src/RuleMatcher.cc | 35 +++++++++++++++++++++-------------- src/RuleMatcher.h | 38 ++++++++++++++++++++++++++++++-------- 3 files changed, 53 insertions(+), 25 deletions(-) diff --git a/src/RuleAction.cc b/src/RuleAction.cc index 74e9a0a406..db58b5c838 100644 --- a/src/RuleAction.cc +++ b/src/RuleAction.cc @@ -104,9 +104,8 @@ void RuleActionEvent::DoAction(const Rule* parent, RuleEndpointState* state, con args.push_back(zeek::val_mgr->EmptyString()); if ( want_end_of_match ) { - // PList::member_pos() doesn't like const Rule*, need const_cast. - int rule_offset = state->matched_by_patterns.member_pos(const_cast(parent)); - MatchPos end_of_match = (rule_offset >= 0 && data) ? state->matched_text_end_of_match[rule_offset] : 0; + auto* match = state->FindRulePatternMatch(parent); + MatchPos end_of_match = (match != nullptr && data) ? match->end_of_match : 0; args.push_back(zeek::val_mgr->Count(end_of_match)); } diff --git a/src/RuleMatcher.cc b/src/RuleMatcher.cc index 93722a4c88..d88168467c 100644 --- a/src/RuleMatcher.cc +++ b/src/RuleMatcher.cc @@ -187,14 +187,24 @@ RuleEndpointState::RuleEndpointState(analyzer::Analyzer* arg_analyzer, bool arg_ pia = arg_PIA; } +const RuleEndpointState::RulePatternMatch* RuleEndpointState::FindRulePatternMatch(const Rule* r) const { + const auto it = + std::find_if(pattern_matches.begin(), pattern_matches.end(), [r](const auto& m) { return m.rule == r; }); + if ( it != pattern_matches.end() ) + return &(*it); + + return nullptr; +} + +void RuleEndpointState::AddRulePatternMatch(Rule* r, const u_char* data, int data_len, MatchPos end_of_match) { + pattern_matches.emplace_back(r, data, data_len, end_of_match); +} + RuleEndpointState::~RuleEndpointState() { for ( auto matcher : matchers ) { delete matcher->state; delete matcher; } - - for ( auto text : matched_text ) - delete text; } RuleFileMagicState::~RuleFileMagicState() { @@ -865,11 +875,9 @@ void RuleMatcher::Match(RuleEndpointState* state, Rule::PatternType type, const continue; // Remember that all patterns have matched. - if ( ! state->matched_by_patterns.is_member(r) ) { - state->matched_by_patterns.push_back(r); - String* s = new String(data, data_len, false); - state->matched_text.push_back(s); - state->matched_text_end_of_match.push_back(match_end_pos - pre_match_pos); + if ( ! state->FindRulePatternMatch(r) ) { + MatchPos end_of_match = match_end_pos - pre_match_pos; + state->AddRulePatternMatch(r, data, data_len, end_of_match); } DBG_LOG(DBG_RULES, "And has not already fired"); @@ -893,8 +901,8 @@ void RuleMatcher::FinishEndpoint(RuleEndpointState* state) { ExecPureRules(state, true); - loop_over_list(state->matched_by_patterns, i) - ExecRulePurely(state->matched_by_patterns[i], state->matched_text[i], state, true); + for ( const auto& m : state->pattern_matches ) + ExecRulePurely(m.rule, &m.text, state, true); } void RuleMatcher::ExecPureRules(RuleEndpointState* state, bool eos) { @@ -904,7 +912,7 @@ void RuleMatcher::ExecPureRules(RuleEndpointState* state, bool eos) { } } -bool RuleMatcher::ExecRulePurely(Rule* r, String* s, RuleEndpointState* state, bool eos) { +bool RuleMatcher::ExecRulePurely(Rule* r, const String* s, RuleEndpointState* state, bool eos) { if ( is_member_of(state->matched_rules, r->Index()) ) return false; @@ -999,9 +1007,8 @@ void RuleMatcher::ExecRule(Rule* rule, RuleEndpointState* state, bool eos) { // It must be a non-pure rule. It can only match right now if // all its patterns are satisfied already. - int pos = state->matched_by_patterns.member_pos(rule); - if ( pos >= 0 ) { // they are, so let's evaluate it - ExecRulePurely(rule, state->matched_text[pos], state, eos); + if ( auto* match = state->FindRulePatternMatch(rule) ) { // they are, so let's evaluate it + ExecRulePurely(rule, &match->text, state, eos); return; } } diff --git a/src/RuleMatcher.h b/src/RuleMatcher.h index 3d0de6a94f..68cef6ec70 100644 --- a/src/RuleMatcher.h +++ b/src/RuleMatcher.h @@ -12,6 +12,7 @@ #include "zeek/RE.h" #include "zeek/Rule.h" #include "zeek/ScannedFile.h" +#include "zeek/ZeekString.h" #include "zeek/plugin/Manager.h" // #define MATCHER_PRINT_STATS @@ -168,6 +169,31 @@ private: RuleEndpointState(analyzer::Analyzer* arg_analyzer, bool arg_is_orig, RuleEndpointState* arg_opposite, analyzer::pia::PIA* arg_PIA); + // Tracking pattern matches for a given Rule. + struct RulePatternMatch { + RulePatternMatch(Rule* rule, const u_char* data, int data_len, MatchPos end_of_match) + : rule(rule), text(data, data_len, false), end_of_match(end_of_match) {} + + RulePatternMatch(RulePatternMatch&& other) noexcept + : rule(other.rule), text(std::move(other.text)), end_of_match(other.end_of_match) { + other.rule = nullptr; + other.end_of_match = 0; + } + + RulePatternMatch(const RulePatternMatch&) = delete; + RulePatternMatch& operator=(const RulePatternMatch&) = delete; + + Rule* rule = nullptr; + String text; + MatchPos end_of_match = 0; + }; + + // Find the RulePatternMatch for rules for which all patterns + // matches. Returns null if no pattern match for the given + // rule exists. + const RulePatternMatch* FindRulePatternMatch(const Rule* r) const; + void AddRulePatternMatch(Rule* r, const u_char* data, int data_len, MatchPos end_of_match); + struct Matcher { RE_Match_State* state; Rule::PatternType type; @@ -183,13 +209,9 @@ private: matcher_list matchers; rule_hdr_test_list hdr_tests; - // The follow tracks which rules for which all patterns have matched, - // in a parallel list the (first instance of the) corresponding - // matched text, and in another parallel list the offset of the - // end of the last pattern match. - rule_list matched_by_patterns; - bstr_list matched_text; - match_offset_list matched_text_end_of_match; + // The following tracks all pattern matches for rules + // for which all patterns have matched. + std::vector pattern_matches; int payload_size; size_t current_pos; // The number of bytes fed into state. @@ -343,7 +365,7 @@ private: // Eval a rule under the assumption that all its patterns // have already matched. s holds the text the rule matched, // or nil if N/A. - bool ExecRulePurely(Rule* r, String* s, RuleEndpointState* state, bool eos); + bool ExecRulePurely(Rule* r, const String* s, RuleEndpointState* state, bool eos); // Execute the actions associated with a rule. void ExecRuleActions(Rule* r, RuleEndpointState* state, const u_char* data, int len, bool eos);