diff --git a/src/DFA.cc b/src/DFA.cc index ad9521709e..dbfed71ba3 100644 --- a/src/DFA.cc +++ b/src/DFA.cc @@ -211,9 +211,10 @@ void DFA_State::Dump(FILE* f, DFA_Machine* m) if ( accept ) { - for ( int i = 0; i < accept->length(); ++i ) - fprintf(f, "%s accept #%d", - i > 0 ? "," : "", int((*accept)[i])); + AcceptingSet::const_iterator it; + + for ( it = accept->begin(); it != accept->end(); ++it ) + fprintf(f, "%s accept #%d", it == accept->begin() ? "" : ",", *it); } fprintf(f, "\n"); @@ -285,7 +286,7 @@ unsigned int DFA_State::Size() { return sizeof(*this) + pad_size(sizeof(DFA_State*) * num_sym) - + (accept ? pad_size(sizeof(int) * accept->length()) : 0) + + (accept ? pad_size(sizeof(int) * accept->size()) : 0) + (nfa_states ? pad_size(sizeof(NFA_State*) * nfa_states->length()) : 0) + (meta_ec ? meta_ec->Size() : 0) + (centry ? padded_sizeof(CacheEntry) : 0); @@ -470,33 +471,20 @@ int DFA_Machine::StateSetToDFA_State(NFA_state_list* state_set, return 0; AcceptingSet* accept = new AcceptingSet; + for ( int i = 0; i < state_set->length(); ++i ) { int acc = (*state_set)[i]->Accept(); if ( acc != NO_ACCEPT ) - { - int j; - for ( j = 0; j < accept->length(); ++j ) - if ( (*accept)[j] == acc ) - break; - - if ( j >= accept->length() ) - // It's not already present. - accept->append(acc); - } + accept->insert(acc); } - if ( accept->length() == 0 ) + if ( accept->empty() ) { delete accept; accept = 0; } - else - { - accept->sort(int_list_cmp); - accept->resize(0); - } DFA_State* ds = new DFA_State(state_count++, ec, state_set, accept); d = dfa_state_cache->Insert(ds, hash); diff --git a/src/RE.cc b/src/RE.cc index 87117c1c3a..4855b0e39a 100644 --- a/src/RE.cc +++ b/src/RE.cc @@ -3,6 +3,7 @@ #include "config.h" #include +#include #include "RE.h" #include "DFA.h" @@ -266,6 +267,15 @@ void Specific_RE_Matcher::Dump(FILE* f) dfa->Dump(f); } +inline void RE_Match_State::AddMatches(const AcceptingSet& as, + MatchPos position) + { + typedef std::pair am_idx; + + for ( AcceptingSet::const_iterator it = as.begin(); it != as.end(); ++it ) + accepted_matches.insert(am_idx(*it, position)); + } + bool RE_Match_State::Match(const u_char* bv, int n, bool bol, bool eol, bool clear) { @@ -283,14 +293,9 @@ bool RE_Match_State::Match(const u_char* bv, int n, current_state = dfa->StartState(); const AcceptingSet* ac = current_state->Accept(); + if ( ac ) - { - loop_over_list(*ac, i) - { - accepted.append((*ac)[i]); - match_pos.append(0); - } - } + AddMatches(*ac, 0); } else if ( clear ) @@ -301,7 +306,7 @@ bool RE_Match_State::Match(const u_char* bv, int n, current_pos = 0; - int old_matches = accepted.length(); + size_t old_matches = accepted_matches.size(); int ec; int m = bol ? n + 1 : n; @@ -324,25 +329,17 @@ bool RE_Match_State::Match(const u_char* bv, int n, break; } - if ( next_state->Accept() ) - { - const AcceptingSet* ac = next_state->Accept(); - loop_over_list(*ac, i) - { - if ( ! accepted.is_member((*ac)[i]) ) - { - accepted.append((*ac)[i]); - match_pos.append(current_pos); - } - } - } + const AcceptingSet* ac = next_state->Accept(); + + if ( ac ) + AddMatches(*ac, current_pos); ++current_pos; current_state = next_state; } - return accepted.length() != old_matches; + return accepted_matches.size() != old_matches; } int Specific_RE_Matcher::LongestMatch(const u_char* bv, int n) @@ -399,7 +396,8 @@ unsigned int Specific_RE_Matcher::MemoryAllocation() const + equiv_class.Size() - padded_sizeof(EquivClass) + (dfa ? dfa->MemoryAllocation() : 0) // this is ref counted; consider the bytes here? + padded_sizeof(*any_ccl) - + accepted->MemoryAllocation(); + + padded_sizeof(*accepted) + + accepted->size() * padded_sizeof(AcceptingSet::key_type); } RE_Matcher::RE_Matcher() diff --git a/src/RE.h b/src/RE.h index a2fc709c88..7437dbb8b8 100644 --- a/src/RE.h +++ b/src/RE.h @@ -9,6 +9,9 @@ #include "CCL.h" #include "EquivClass.h" +#include +#include + #include typedef int (*cce_func)(int); @@ -33,7 +36,10 @@ extern int re_lex(void); extern int clower(int); extern void synerr(const char str[]); -typedef int_list AcceptingSet; +typedef int AcceptIdx; +typedef std::set AcceptingSet; +typedef uint64 MatchPos; +typedef std::map AcceptingMatchSet; typedef name_list string_list; typedef enum { MATCH_ANYWHERE, MATCH_EXACTLY, } match_type; @@ -135,8 +141,8 @@ public: current_state = 0; } - const AcceptingSet* Accepted() const { return &accepted; } - const int_list* MatchPositions() const { return &match_pos; } + const AcceptingMatchSet& AcceptedMatches() const + { return accepted_matches; } // Returns the number of bytes feeded into the matcher so far int Length() { return current_pos; } @@ -149,16 +155,16 @@ public: { current_pos = -1; current_state = 0; - accepted.clear(); - match_pos.clear(); + accepted_matches.clear(); } + void AddMatches(const AcceptingSet& as, MatchPos position); + protected: DFA_Machine* dfa; int* ecs; - AcceptingSet accepted; - int_list match_pos; + AcceptingMatchSet accepted_matches; DFA_State* current_state; int current_pos; }; diff --git a/src/RuleMatcher.cc b/src/RuleMatcher.cc index 5e9dff0a1f..5cea843c8d 100644 --- a/src/RuleMatcher.cc +++ b/src/RuleMatcher.cc @@ -594,6 +594,29 @@ RuleFileMagicState* RuleMatcher::InitFileMagic() const return state; } +bool RuleMatcher::AllRulePatternsMatched(const Rule* r, MatchPos matchpos, + const AcceptingMatchSet& ams) + { + DBG_LOG(DBG_RULES, "Checking rule: %s", r->id); + + // Check whether all patterns of the rule have matched. + loop_over_list(r->patterns, j) + { + if ( ams.find(r->patterns[j]->id) == ams.end() ) + return false; + + // See if depth is satisfied. + if ( matchpos > r->patterns[j]->offset + r->patterns[j]->depth ) + return false; + + // FIXME: How to check for offset ??? ### + } + + DBG_LOG(DBG_RULES, "All patterns of rule satisfied"); + + return true; + } + RuleMatcher::MIME_Matches* RuleMatcher::Match(RuleFileMagicState* state, const u_char* data, uint64 len, MIME_Matches* rval) const @@ -636,56 +659,39 @@ RuleMatcher::MIME_Matches* RuleMatcher::Match(RuleFileMagicState* state, DBG_LOG(DBG_RULES, "New pattern match found"); - AcceptingSet accepted; - int_list matchpos; + AcceptingMatchSet accepted_matches; loop_over_list(state->matchers, y) { RuleFileMagicState::Matcher* m = state->matchers[y]; - const AcceptingSet* ac = m->state->Accepted(); - - loop_over_list(*ac, k) - { - if ( ! accepted.is_member((*ac)[k]) ) - { - accepted.append((*ac)[k]); - matchpos.append((*m->state->MatchPositions())[k]); - } - } + const AcceptingMatchSet& ams = m->state->AcceptedMatches(); + accepted_matches.insert(ams.begin(), ams.end()); } // Find rules for which patterns have matched. - rule_list matched; + set rule_matches; - loop_over_list(accepted, i) + for ( AcceptingMatchSet::const_iterator it = accepted_matches.begin(); + it != accepted_matches.end(); ++it ) { - Rule* r = Rule::rule_table[accepted[i] - 1]; + AcceptIdx aidx = it->first; + MatchPos mpos = it->second; - DBG_LOG(DBG_RULES, "Checking rule: %v", r->id); + Rule* r = Rule::rule_table[aidx - 1]; - loop_over_list(r->patterns, j) - { - if ( ! accepted.is_member(r->patterns[j]->id) ) - continue; - - if ( (unsigned int) matchpos[i] > - r->patterns[j]->offset + r->patterns[j]->depth ) - continue; - - DBG_LOG(DBG_RULES, "All patterns of rule satisfied"); - } - - if ( ! matched.is_member(r) ) - matched.append(r); + if ( AllRulePatternsMatched(r, mpos, accepted_matches) ) + rule_matches.insert(r); } - loop_over_list(matched, j) + for ( set::const_iterator it = rule_matches.begin(); + it != rule_matches.end(); ++it ) { - Rule* r = matched[j]; + Rule* r = *it; loop_over_list(r->actions, rai) { - const RuleActionMIME* ram = dynamic_cast(r->actions[rai]); + const RuleActionMIME* ram = + dynamic_cast(r->actions[rai]); if ( ! ram ) continue; @@ -876,66 +882,40 @@ void RuleMatcher::Match(RuleEndpointState* state, Rule::PatternType type, DBG_LOG(DBG_RULES, "New pattern match found"); - // Build a joined AcceptingSet. - AcceptingSet accepted; - int_list matchpos; + AcceptingMatchSet accepted_matches; - loop_over_list(state->matchers, y) + loop_over_list(state->matchers, y ) { RuleEndpointState::Matcher* m = state->matchers[y]; - const AcceptingSet* ac = m->state->Accepted(); - - loop_over_list(*ac, k) - { - if ( ! accepted.is_member((*ac)[k]) ) - { - accepted.append((*ac)[k]); - matchpos.append((*m->state->MatchPositions())[k]); - } - } + const AcceptingMatchSet& ams = m->state->AcceptedMatches(); + accepted_matches.insert(ams.begin(), ams.end()); } // Determine the rules for which all patterns have matched. // This code should be fast enough as long as there are only very few // matched patterns per connection (which is a plausible assumption). - rule_list matched; + // Find rules for which patterns have matched. + set rule_matches; - loop_over_list(accepted, i) + for ( AcceptingMatchSet::const_iterator it = accepted_matches.begin(); + it != accepted_matches.end(); ++it ) { - Rule* r = Rule::rule_table[accepted[i] - 1]; + AcceptIdx aidx = it->first; + MatchPos mpos = it->second; - DBG_LOG(DBG_RULES, "Checking rule: %s", r->id); + Rule* r = Rule::rule_table[aidx - 1]; - // Check whether all patterns of the rule have matched. - loop_over_list(r->patterns, j) - { - if ( ! accepted.is_member(r->patterns[j]->id) ) - goto next_pattern; - - // See if depth is satisfied. - if ( (unsigned int) matchpos[i] > - r->patterns[j]->offset + r->patterns[j]->depth ) - goto next_pattern; - - DBG_LOG(DBG_RULES, "All patterns of rule satisfied"); - - // FIXME: How to check for offset ??? ### - } - - // If not already in the list of matching rules, add it. - if ( ! matched.is_member(r) ) - matched.append(r); - -next_pattern: - continue; + if ( AllRulePatternsMatched(r, mpos, accepted_matches) ) + rule_matches.insert(r); } // Check which of the matching rules really belong to any of our nodes. - loop_over_list(matched, j) + for ( set::const_iterator it = rule_matches.begin(); + it != rule_matches.end(); ++it ) { - Rule* r = matched[j]; + Rule* r = *it; DBG_LOG(DBG_RULES, "Accepted rule: %s", r->id); diff --git a/src/RuleMatcher.h b/src/RuleMatcher.h index 52e00f6bad..da2838cb6d 100644 --- a/src/RuleMatcher.h +++ b/src/RuleMatcher.h @@ -361,6 +361,9 @@ private: void DumpStateStats(BroFile* f, RuleHdrTest* hdr_test); + static bool AllRulePatternsMatched(const Rule* r, MatchPos matchpos, + const AcceptingMatchSet& ams); + int RE_level; bool parse_error; RuleHdrTest* root;