mirror of
https://github.com/zeek/zeek.git
synced 2025-10-15 13:08:20 +00:00
RE: Remove RE_DisjunctiveMatcher and re-use MatchAll()
Seems we can just open code the CompileSet() usage in the TablePatternMatcher helper without indirecting through another class. Further, add the collection of indices into MatchAll() rather than duplicating its code in MatchDisjunction(). Doesn't seem like MatchAll() is used widely.
This commit is contained in:
parent
501b582bc7
commit
9ae99cdc44
3 changed files with 32 additions and 79 deletions
58
src/RE.cc
58
src/RE.cc
|
@ -194,6 +194,10 @@ bool Specific_RE_Matcher::MatchAll(const String* s) {
|
||||||
return MatchAll(s->Bytes(), s->Len());
|
return MatchAll(s->Bytes(), s->Len());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Specific_RE_Matcher::MatchSet(const String* s, std::vector<AcceptIdx>& matches) {
|
||||||
|
return MatchAll(s->Bytes(), s->Len(), &matches);
|
||||||
|
}
|
||||||
|
|
||||||
int Specific_RE_Matcher::Match(const char* s) { return Match((const u_char*)(s), strlen(s)); }
|
int Specific_RE_Matcher::Match(const char* s) { return Match((const u_char*)(s), strlen(s)); }
|
||||||
|
|
||||||
int Specific_RE_Matcher::Match(const String* s) { return Match(s->Bytes(), s->Len()); }
|
int Specific_RE_Matcher::Match(const String* s) { return Match(s->Bytes(), s->Len()); }
|
||||||
|
@ -202,7 +206,7 @@ int Specific_RE_Matcher::LongestMatch(const char* s) { return LongestMatch((cons
|
||||||
|
|
||||||
int Specific_RE_Matcher::LongestMatch(const String* s) { return LongestMatch(s->Bytes(), s->Len()); }
|
int Specific_RE_Matcher::LongestMatch(const String* s) { return LongestMatch(s->Bytes(), s->Len()); }
|
||||||
|
|
||||||
bool Specific_RE_Matcher::MatchAll(const u_char* bv, int n) {
|
bool Specific_RE_Matcher::MatchAll(const u_char* bv, int n, std::vector<AcceptIdx>* matches) {
|
||||||
if ( ! dfa )
|
if ( ! dfa )
|
||||||
// An empty pattern matches "all" iff what's being
|
// An empty pattern matches "all" iff what's being
|
||||||
// matched is empty.
|
// matched is empty.
|
||||||
|
@ -222,6 +226,11 @@ bool Specific_RE_Matcher::MatchAll(const u_char* bv, int n) {
|
||||||
if ( d )
|
if ( d )
|
||||||
d = d->Xtion(ecs[SYM_EOL], dfa);
|
d = d->Xtion(ecs[SYM_EOL], dfa);
|
||||||
|
|
||||||
|
if ( d && matches )
|
||||||
|
if ( const auto* a_set = d->Accept() )
|
||||||
|
for ( auto a : *a_set )
|
||||||
|
matches->push_back(a);
|
||||||
|
|
||||||
return d && d->Accept() != nullptr;
|
return d && d->Accept() != nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -255,33 +264,6 @@ int Specific_RE_Matcher::Match(const u_char* bv, int n) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Specific_RE_Matcher::MatchDisjunction(const String* s, std::vector<int>& matches) {
|
|
||||||
auto bv = s->Bytes();
|
|
||||||
auto n = s->Len();
|
|
||||||
|
|
||||||
ASSERT(dfa);
|
|
||||||
|
|
||||||
DFA_State* d = dfa->StartState();
|
|
||||||
d = d->Xtion(ecs[SYM_BOL], dfa);
|
|
||||||
|
|
||||||
while ( d ) {
|
|
||||||
if ( --n < 0 )
|
|
||||||
break;
|
|
||||||
|
|
||||||
int ec = ecs[*(bv++)];
|
|
||||||
d = d->Xtion(ec, dfa);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( d )
|
|
||||||
d = d->Xtion(ecs[SYM_EOL], dfa);
|
|
||||||
|
|
||||||
if ( d )
|
|
||||||
if ( auto a_set = d->Accept() )
|
|
||||||
for ( auto a : *a_set )
|
|
||||||
matches.push_back(a);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void Specific_RE_Matcher::Dump(FILE* f) { dfa->Dump(f); }
|
void Specific_RE_Matcher::Dump(FILE* f) { dfa->Dump(f); }
|
||||||
|
|
||||||
inline void RE_Match_State::AddMatches(const AcceptingSet& as, MatchPos position) {
|
inline void RE_Match_State::AddMatches(const AcceptingSet& as, MatchPos position) {
|
||||||
|
@ -456,26 +438,6 @@ void RE_Matcher::MakeSingleLine() {
|
||||||
|
|
||||||
bool RE_Matcher::Compile(bool lazy) { return re_anywhere->Compile(lazy) && re_exact->Compile(lazy); }
|
bool RE_Matcher::Compile(bool lazy) { return re_anywhere->Compile(lazy) && re_exact->Compile(lazy); }
|
||||||
|
|
||||||
RE_DisjunctiveMatcher::RE_DisjunctiveMatcher(const std::vector<const RE_Matcher*>& REs) {
|
|
||||||
matcher = std::make_unique<detail::Specific_RE_Matcher>(detail::MATCH_EXACTLY);
|
|
||||||
|
|
||||||
zeek::detail::string_list sl;
|
|
||||||
zeek::detail::int_list il;
|
|
||||||
|
|
||||||
for ( const auto* re : REs ) {
|
|
||||||
sl.push_back(const_cast<char*>(re->PatternText()));
|
|
||||||
il.push_back(sl.size());
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( ! matcher->CompileSet(sl, il) )
|
|
||||||
reporter->FatalError("failed compile set for disjunctive matcher");
|
|
||||||
}
|
|
||||||
|
|
||||||
void RE_DisjunctiveMatcher::Match(const String* s, std::vector<int>& matches) {
|
|
||||||
matches.clear();
|
|
||||||
return matcher->MatchDisjunction(s, matches);
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_SUITE("re_matcher") {
|
TEST_SUITE("re_matcher") {
|
||||||
TEST_CASE("simple_pattern") {
|
TEST_CASE("simple_pattern") {
|
||||||
RE_Matcher match("[0-9]+");
|
RE_Matcher match("[0-9]+");
|
||||||
|
|
34
src/RE.h
34
src/RE.h
|
@ -96,6 +96,14 @@ public:
|
||||||
// to the matching expressions. (idx must not contain zeros).
|
// to the matching expressions. (idx must not contain zeros).
|
||||||
bool CompileSet(const string_list& set, const int_list& idx);
|
bool CompileSet(const string_list& set, const int_list& idx);
|
||||||
|
|
||||||
|
// For use with CompileSet() to collect indices of all matched
|
||||||
|
// expressions into the matches vector. The matches vector is
|
||||||
|
// populated with the indices of all matching expressions provided
|
||||||
|
// to CompileSet()'s set and idx arguments.
|
||||||
|
//
|
||||||
|
// Behaves as MatchAll(), consuming the complete input string.
|
||||||
|
bool MatchSet(const String* s, std::vector<AcceptIdx>& matches);
|
||||||
|
|
||||||
// Returns the position in s just beyond where the first match
|
// Returns the position in s just beyond where the first match
|
||||||
// occurs, or 0 if there is no such position in s. Note that
|
// occurs, or 0 if there is no such position in s. Note that
|
||||||
// if the pattern matches empty strings, matching continues
|
// if the pattern matches empty strings, matching continues
|
||||||
|
@ -104,17 +112,6 @@ public:
|
||||||
int Match(const String* s);
|
int Match(const String* s);
|
||||||
int Match(const u_char* bv, int n);
|
int Match(const u_char* bv, int n);
|
||||||
|
|
||||||
// A disjunction is a collection of regular expressions (that under
|
|
||||||
// the hood are matches as a single RE, not serially) for which
|
|
||||||
// the match operation returns *all* of the matches. Disjunctions
|
|
||||||
// are constructed using the internal "||" RE operator, and the
|
|
||||||
// matches are returned as indices into the position, left-to-right,
|
|
||||||
// of which REs matched. IMPORTANT: the first RE is numbered 1, not 0.
|
|
||||||
//
|
|
||||||
// Note that there's no guarantee regarding the ordering of the
|
|
||||||
// returned matches if there is more than one.
|
|
||||||
void MatchDisjunction(const String* s, std::vector<int>& matches);
|
|
||||||
|
|
||||||
int LongestMatch(const char* s);
|
int LongestMatch(const char* s);
|
||||||
int LongestMatch(const String* s);
|
int LongestMatch(const String* s);
|
||||||
int LongestMatch(const u_char* bv, int n, bool bol = true, bool eol = true);
|
int LongestMatch(const u_char* bv, int n, bool bol = true, bool eol = true);
|
||||||
|
@ -136,7 +133,7 @@ protected:
|
||||||
// appending to an existing pattern_text.
|
// appending to an existing pattern_text.
|
||||||
void AddPat(const char* pat, const char* orig_fmt, const char* app_fmt);
|
void AddPat(const char* pat, const char* orig_fmt, const char* app_fmt);
|
||||||
|
|
||||||
bool MatchAll(const u_char* bv, int n);
|
bool MatchAll(const u_char* bv, int n, std::vector<AcceptIdx>* matches = nullptr);
|
||||||
|
|
||||||
match_type mt;
|
match_type mt;
|
||||||
bool multiline;
|
bool multiline;
|
||||||
|
@ -255,17 +252,4 @@ protected:
|
||||||
bool is_single_line = false;
|
bool is_single_line = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
class RE_DisjunctiveMatcher final {
|
|
||||||
public:
|
|
||||||
// Takes a collection of individual REs and builds a disjunctive
|
|
||||||
// matcher for the set.
|
|
||||||
RE_DisjunctiveMatcher(const std::vector<const RE_Matcher*>& REs);
|
|
||||||
|
|
||||||
// See MatchDisjunction() above.
|
|
||||||
void Match(const String* s, std::vector<int>& matches);
|
|
||||||
|
|
||||||
private:
|
|
||||||
std::unique_ptr<detail::Specific_RE_Matcher> matcher;
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace zeek
|
} // namespace zeek
|
||||||
|
|
19
src/Val.cc
19
src/Val.cc
|
@ -1455,7 +1455,7 @@ private:
|
||||||
// from having to re-build the matcher on every insert/delete in
|
// from having to re-build the matcher on every insert/delete in
|
||||||
// the common case that a whole bunch of those are done in a single
|
// the common case that a whole bunch of those are done in a single
|
||||||
// batch.
|
// batch.
|
||||||
std::unique_ptr<RE_DisjunctiveMatcher> matcher = nullptr;
|
std::unique_ptr<detail::Specific_RE_Matcher> matcher = nullptr;
|
||||||
|
|
||||||
// Maps matcher values to corresponding yields. When building the
|
// Maps matcher values to corresponding yields. When building the
|
||||||
// matcher we insert a nil at the head to accommodate how
|
// matcher we insert a nil at the head to accommodate how
|
||||||
|
@ -1473,8 +1473,8 @@ VectorValPtr detail::TablePatternMatcher::Lookup(const StringValPtr& s) {
|
||||||
Build();
|
Build();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<int> matches;
|
std::vector<AcceptIdx> matches;
|
||||||
matcher->Match(s->AsString(), matches);
|
matcher->MatchSet(s->AsString(), matches);
|
||||||
|
|
||||||
for ( auto m : matches )
|
for ( auto m : matches )
|
||||||
results->Append(matcher_yields[m]);
|
results->Append(matcher_yields[m]);
|
||||||
|
@ -1488,7 +1488,9 @@ void detail::TablePatternMatcher::Build() {
|
||||||
|
|
||||||
auto& tbl_dict = *tbl->Get();
|
auto& tbl_dict = *tbl->Get();
|
||||||
auto& tbl_hash = *tbl->GetTableHash();
|
auto& tbl_hash = *tbl->GetTableHash();
|
||||||
std::vector<const RE_Matcher*> patterns;
|
|
||||||
|
zeek::detail::string_list pattern_list;
|
||||||
|
zeek::detail::int_list index_list;
|
||||||
|
|
||||||
// We need to hold on to recovered hash key values so they don't
|
// We need to hold on to recovered hash key values so they don't
|
||||||
// get lost once a loop iteration goes out of scope.
|
// get lost once a loop iteration goes out of scope.
|
||||||
|
@ -1499,13 +1501,18 @@ void detail::TablePatternMatcher::Build() {
|
||||||
auto v = iter.value;
|
auto v = iter.value;
|
||||||
auto vl = tbl_hash.RecoverVals(*k);
|
auto vl = tbl_hash.RecoverVals(*k);
|
||||||
|
|
||||||
patterns.push_back(vl->AsListVal()->Idx(0)->AsPattern());
|
char* pt = const_cast<char*>(vl->AsListVal()->Idx(0)->AsPattern()->PatternText());
|
||||||
|
pattern_list.push_back(pt);
|
||||||
|
index_list.push_back(pattern_list.size());
|
||||||
matcher_yields.push_back(v->GetVal());
|
matcher_yields.push_back(v->GetVal());
|
||||||
|
|
||||||
hash_key_vals.push_back(std::move(vl));
|
hash_key_vals.push_back(std::move(vl));
|
||||||
}
|
}
|
||||||
|
|
||||||
matcher = std::make_unique<RE_DisjunctiveMatcher>(patterns);
|
matcher = std::make_unique<detail::Specific_RE_Matcher>(detail::MATCH_EXACTLY);
|
||||||
|
|
||||||
|
if ( ! matcher->CompileSet(pattern_list, index_list) )
|
||||||
|
reporter->FatalError("failed compile set for disjunctive matching");
|
||||||
}
|
}
|
||||||
|
|
||||||
TableVal::TableVal(TableTypePtr t, detail::AttributesPtr a) : Val(t) {
|
TableVal::TableVal(TableTypePtr t, detail::AttributesPtr a) : Val(t) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue