RuleMatcher: Use a single list for tracking pattern_matches

This commit is contained in:
Arne Welzel 2024-10-30 13:22:53 +01:00
parent 0ecbd4435e
commit e443624c32
3 changed files with 53 additions and 25 deletions

View file

@ -104,9 +104,8 @@ void RuleActionEvent::DoAction(const Rule* parent, RuleEndpointState* state, con
args.push_back(zeek::val_mgr->EmptyString());
if ( want_end_of_match ) {
// PList::member_pos() doesn't like const Rule*, need const_cast.
int rule_offset = state->matched_by_patterns.member_pos(const_cast<Rule*>(parent));
MatchPos end_of_match = (rule_offset >= 0 && data) ? state->matched_text_end_of_match[rule_offset] : 0;
auto* match = state->FindRulePatternMatch(parent);
MatchPos end_of_match = (match != nullptr && data) ? match->end_of_match : 0;
args.push_back(zeek::val_mgr->Count(end_of_match));
}

View file

@ -187,14 +187,24 @@ RuleEndpointState::RuleEndpointState(analyzer::Analyzer* arg_analyzer, bool arg_
pia = arg_PIA;
}
const RuleEndpointState::RulePatternMatch* RuleEndpointState::FindRulePatternMatch(const Rule* r) const {
const auto it =
std::find_if(pattern_matches.begin(), pattern_matches.end(), [r](const auto& m) { return m.rule == r; });
if ( it != pattern_matches.end() )
return &(*it);
return nullptr;
}
void RuleEndpointState::AddRulePatternMatch(Rule* r, const u_char* data, int data_len, MatchPos end_of_match) {
pattern_matches.emplace_back(r, data, data_len, end_of_match);
}
RuleEndpointState::~RuleEndpointState() {
for ( auto matcher : matchers ) {
delete matcher->state;
delete matcher;
}
for ( auto text : matched_text )
delete text;
}
RuleFileMagicState::~RuleFileMagicState() {
@ -865,11 +875,9 @@ void RuleMatcher::Match(RuleEndpointState* state, Rule::PatternType type, const
continue;
// Remember that all patterns have matched.
if ( ! state->matched_by_patterns.is_member(r) ) {
state->matched_by_patterns.push_back(r);
String* s = new String(data, data_len, false);
state->matched_text.push_back(s);
state->matched_text_end_of_match.push_back(match_end_pos - pre_match_pos);
if ( ! state->FindRulePatternMatch(r) ) {
MatchPos end_of_match = match_end_pos - pre_match_pos;
state->AddRulePatternMatch(r, data, data_len, end_of_match);
}
DBG_LOG(DBG_RULES, "And has not already fired");
@ -893,8 +901,8 @@ void RuleMatcher::FinishEndpoint(RuleEndpointState* state) {
ExecPureRules(state, true);
loop_over_list(state->matched_by_patterns, i)
ExecRulePurely(state->matched_by_patterns[i], state->matched_text[i], state, true);
for ( const auto& m : state->pattern_matches )
ExecRulePurely(m.rule, &m.text, state, true);
}
void RuleMatcher::ExecPureRules(RuleEndpointState* state, bool eos) {
@ -904,7 +912,7 @@ void RuleMatcher::ExecPureRules(RuleEndpointState* state, bool eos) {
}
}
bool RuleMatcher::ExecRulePurely(Rule* r, String* s, RuleEndpointState* state, bool eos) {
bool RuleMatcher::ExecRulePurely(Rule* r, const String* s, RuleEndpointState* state, bool eos) {
if ( is_member_of(state->matched_rules, r->Index()) )
return false;
@ -999,9 +1007,8 @@ void RuleMatcher::ExecRule(Rule* rule, RuleEndpointState* state, bool eos) {
// It must be a non-pure rule. It can only match right now if
// all its patterns are satisfied already.
int pos = state->matched_by_patterns.member_pos(rule);
if ( pos >= 0 ) { // they are, so let's evaluate it
ExecRulePurely(rule, state->matched_text[pos], state, eos);
if ( auto* match = state->FindRulePatternMatch(rule) ) { // they are, so let's evaluate it
ExecRulePurely(rule, &match->text, state, eos);
return;
}
}

View file

@ -12,6 +12,7 @@
#include "zeek/RE.h"
#include "zeek/Rule.h"
#include "zeek/ScannedFile.h"
#include "zeek/ZeekString.h"
#include "zeek/plugin/Manager.h"
// #define MATCHER_PRINT_STATS
@ -168,6 +169,31 @@ private:
RuleEndpointState(analyzer::Analyzer* arg_analyzer, bool arg_is_orig, RuleEndpointState* arg_opposite,
analyzer::pia::PIA* arg_PIA);
// Tracking pattern matches for a given Rule.
struct RulePatternMatch {
RulePatternMatch(Rule* rule, const u_char* data, int data_len, MatchPos end_of_match)
: rule(rule), text(data, data_len, false), end_of_match(end_of_match) {}
RulePatternMatch(RulePatternMatch&& other) noexcept
: rule(other.rule), text(std::move(other.text)), end_of_match(other.end_of_match) {
other.rule = nullptr;
other.end_of_match = 0;
}
RulePatternMatch(const RulePatternMatch&) = delete;
RulePatternMatch& operator=(const RulePatternMatch&) = delete;
Rule* rule = nullptr;
String text;
MatchPos end_of_match = 0;
};
// Find the RulePatternMatch for rules for which all patterns
// matches. Returns null if no pattern match for the given
// rule exists.
const RulePatternMatch* FindRulePatternMatch(const Rule* r) const;
void AddRulePatternMatch(Rule* r, const u_char* data, int data_len, MatchPos end_of_match);
struct Matcher {
RE_Match_State* state;
Rule::PatternType type;
@ -183,13 +209,9 @@ private:
matcher_list matchers;
rule_hdr_test_list hdr_tests;
// The follow tracks which rules for which all patterns have matched,
// in a parallel list the (first instance of the) corresponding
// matched text, and in another parallel list the offset of the
// end of the last pattern match.
rule_list matched_by_patterns;
bstr_list matched_text;
match_offset_list matched_text_end_of_match;
// The following tracks all pattern matches for rules
// for which all patterns have matched.
std::vector<RulePatternMatch> pattern_matches;
int payload_size;
size_t current_pos; // The number of bytes fed into state.
@ -343,7 +365,7 @@ private:
// Eval a rule under the assumption that all its patterns
// have already matched. s holds the text the rule matched,
// or nil if N/A.
bool ExecRulePurely(Rule* r, String* s, RuleEndpointState* state, bool eos);
bool ExecRulePurely(Rule* r, const String* s, RuleEndpointState* state, bool eos);
// Execute the actions associated with a rule.
void ExecRuleActions(Rule* r, RuleEndpointState* state, const u_char* data, int len, bool eos);