Merge remote-tracking branch 'origin/topic/awelzel/pattern-matches-rework'

* origin/topic/awelzel/pattern-matches-rework:
  RuleMatcher: Use a single list for tracking pattern_matches
  ZeekString: Implement move constructor
This commit is contained in:
Arne Welzel 2024-11-04 16:32:37 +01:00
commit db497d1da7
7 changed files with 77 additions and 26 deletions

14
CHANGES
View file

@ -1,3 +1,17 @@
7.1.0-dev.455 | 2024-11-04 16:32:37 +0100
* RuleMatcher: Use a single list for tracking pattern_matches (Arne Welzel, Corelight)
* ZeekString: Implement move constructor (Arne Welzel, Corelight)
* Bump and reenable spicy-format pre-commit hook (Benjamin Bannier, Corelight)
* Add test for TCP over 802.3/SNAP (Johanna Amann, Corelight)
This admittedly is a quite esoteric combination of protocols. But - as
we do correctly support them, it seems nice to have a slightly more
complete testcase that covers this.
7.1.0-dev.447 | 2024-10-30 14:52:40 +0100
* CI: Upgrade FreeBSD 14 to 14.1 (Evan Typanski, Corelight)

View file

@ -1 +1 @@
7.1.0-dev.447
7.1.0-dev.455

View file

@ -104,9 +104,8 @@ void RuleActionEvent::DoAction(const Rule* parent, RuleEndpointState* state, con
args.push_back(zeek::val_mgr->EmptyString());
if ( want_end_of_match ) {
// PList::member_pos() doesn't like const Rule*, need const_cast.
int rule_offset = state->matched_by_patterns.member_pos(const_cast<Rule*>(parent));
MatchPos end_of_match = (rule_offset >= 0 && data) ? state->matched_text_end_of_match[rule_offset] : 0;
auto* match = state->FindRulePatternMatch(parent);
MatchPos end_of_match = (match != nullptr && data) ? match->end_of_match : 0;
args.push_back(zeek::val_mgr->Count(end_of_match));
}

View file

@ -187,14 +187,24 @@ RuleEndpointState::RuleEndpointState(analyzer::Analyzer* arg_analyzer, bool arg_
pia = arg_PIA;
}
const RuleEndpointState::RulePatternMatch* RuleEndpointState::FindRulePatternMatch(const Rule* r) const {
const auto it =
std::find_if(pattern_matches.begin(), pattern_matches.end(), [r](const auto& m) { return m.rule == r; });
if ( it != pattern_matches.end() )
return &(*it);
return nullptr;
}
void RuleEndpointState::AddRulePatternMatch(Rule* r, const u_char* data, int data_len, MatchPos end_of_match) {
pattern_matches.emplace_back(r, data, data_len, end_of_match);
}
RuleEndpointState::~RuleEndpointState() {
for ( auto matcher : matchers ) {
delete matcher->state;
delete matcher;
}
for ( auto text : matched_text )
delete text;
}
RuleFileMagicState::~RuleFileMagicState() {
@ -865,11 +875,9 @@ void RuleMatcher::Match(RuleEndpointState* state, Rule::PatternType type, const
continue;
// Remember that all patterns have matched.
if ( ! state->matched_by_patterns.is_member(r) ) {
state->matched_by_patterns.push_back(r);
String* s = new String(data, data_len, false);
state->matched_text.push_back(s);
state->matched_text_end_of_match.push_back(match_end_pos - pre_match_pos);
if ( ! state->FindRulePatternMatch(r) ) {
MatchPos end_of_match = match_end_pos - pre_match_pos;
state->AddRulePatternMatch(r, data, data_len, end_of_match);
}
DBG_LOG(DBG_RULES, "And has not already fired");
@ -893,8 +901,8 @@ void RuleMatcher::FinishEndpoint(RuleEndpointState* state) {
ExecPureRules(state, true);
loop_over_list(state->matched_by_patterns, i)
ExecRulePurely(state->matched_by_patterns[i], state->matched_text[i], state, true);
for ( const auto& m : state->pattern_matches )
ExecRulePurely(m.rule, &m.text, state, true);
}
void RuleMatcher::ExecPureRules(RuleEndpointState* state, bool eos) {
@ -904,7 +912,7 @@ void RuleMatcher::ExecPureRules(RuleEndpointState* state, bool eos) {
}
}
bool RuleMatcher::ExecRulePurely(Rule* r, String* s, RuleEndpointState* state, bool eos) {
bool RuleMatcher::ExecRulePurely(Rule* r, const String* s, RuleEndpointState* state, bool eos) {
if ( is_member_of(state->matched_rules, r->Index()) )
return false;
@ -999,9 +1007,8 @@ void RuleMatcher::ExecRule(Rule* rule, RuleEndpointState* state, bool eos) {
// It must be a non-pure rule. It can only match right now if
// all its patterns are satisfied already.
int pos = state->matched_by_patterns.member_pos(rule);
if ( pos >= 0 ) { // they are, so let's evaluate it
ExecRulePurely(rule, state->matched_text[pos], state, eos);
if ( auto* match = state->FindRulePatternMatch(rule) ) { // they are, so let's evaluate it
ExecRulePurely(rule, &match->text, state, eos);
return;
}
}

View file

@ -12,6 +12,7 @@
#include "zeek/RE.h"
#include "zeek/Rule.h"
#include "zeek/ScannedFile.h"
#include "zeek/ZeekString.h"
#include "zeek/plugin/Manager.h"
// #define MATCHER_PRINT_STATS
@ -168,6 +169,31 @@ private:
RuleEndpointState(analyzer::Analyzer* arg_analyzer, bool arg_is_orig, RuleEndpointState* arg_opposite,
analyzer::pia::PIA* arg_PIA);
// Tracking pattern matches for a given Rule.
struct RulePatternMatch {
RulePatternMatch(Rule* rule, const u_char* data, int data_len, MatchPos end_of_match)
: rule(rule), text(data, data_len, false), end_of_match(end_of_match) {}
RulePatternMatch(RulePatternMatch&& other) noexcept
: rule(other.rule), text(std::move(other.text)), end_of_match(other.end_of_match) {
other.rule = nullptr;
other.end_of_match = 0;
}
RulePatternMatch(const RulePatternMatch&) = delete;
RulePatternMatch& operator=(const RulePatternMatch&) = delete;
Rule* rule = nullptr;
String text;
MatchPos end_of_match = 0;
};
// Find the RulePatternMatch for rules for which all patterns
// matches. Returns null if no pattern match for the given
// rule exists.
const RulePatternMatch* FindRulePatternMatch(const Rule* r) const;
void AddRulePatternMatch(Rule* r, const u_char* data, int data_len, MatchPos end_of_match);
struct Matcher {
RE_Match_State* state;
Rule::PatternType type;
@ -183,13 +209,9 @@ private:
matcher_list matchers;
rule_hdr_test_list hdr_tests;
// The follow tracks which rules for which all patterns have matched,
// in a parallel list the (first instance of the) corresponding
// matched text, and in another parallel list the offset of the
// end of the last pattern match.
rule_list matched_by_patterns;
bstr_list matched_text;
match_offset_list matched_text_end_of_match;
// The following tracks all pattern matches for rules
// for which all patterns have matched.
std::vector<RulePatternMatch> pattern_matches;
int payload_size;
size_t current_pos; // The number of bytes fed into state.
@ -343,7 +365,7 @@ private:
// Eval a rule under the assumption that all its patterns
// have already matched. s holds the text the rule matched,
// or nil if N/A.
bool ExecRulePurely(Rule* r, String* s, RuleEndpointState* state, bool eos);
bool ExecRulePurely(Rule* r, const String* s, RuleEndpointState* state, bool eos);
// Execute the actions associated with a rule.
void ExecRuleActions(Rule* r, RuleEndpointState* state, const u_char* data, int len, bool eos);

View file

@ -37,6 +37,12 @@ String::String(bool arg_final_NUL, byte_vec str, int arg_n) {
use_free_to_delete = false;
}
String::String(String&& other) noexcept
: b(other.b), n(other.n), final_NUL(other.final_NUL), use_free_to_delete(other.use_free_to_delete) {
other.b = nullptr;
other.Reset();
}
String::String(const u_char* str, int arg_n, bool add_NUL) : String() { Set(str, arg_n, add_NUL); }
String::String(std::string_view str) : String() { Set(str); }

View file

@ -47,6 +47,9 @@ public:
// Constructor that takes ownership of the vector passed in.
String(bool arg_final_NUL, byte_vec str, int arg_n);
// Move constructor
String(String&& s) noexcept;
String();
~String() { Reset(); }