Reuse CompileSet() instead of || string formatting

This commit is contained in:
Arne Welzel 2023-11-01 11:51:25 +01:00
parent 61fcca8482
commit b55e1a122e
4 changed files with 14 additions and 28 deletions

View file

@ -172,6 +172,10 @@ bool Specific_RE_Matcher::CompileSet(const string_list& set, const int_list& idx
dfa = new DFA_Machine(nfa, EC()); dfa = new DFA_Machine(nfa, EC());
ecs = EC()->EquivClasses(); ecs = EC()->EquivClasses();
// dfa took ownership
Unref(nfa);
nfa = nullptr;
return true; return true;
} }
@ -455,13 +459,16 @@ bool RE_Matcher::Compile(bool lazy) { return re_anywhere->Compile(lazy) && re_ex
RE_DisjunctiveMatcher::RE_DisjunctiveMatcher(const std::vector<const RE_Matcher*>& REs) { RE_DisjunctiveMatcher::RE_DisjunctiveMatcher(const std::vector<const RE_Matcher*>& REs) {
matcher = std::make_unique<detail::Specific_RE_Matcher>(detail::MATCH_EXACTLY); matcher = std::make_unique<detail::Specific_RE_Matcher>(detail::MATCH_EXACTLY);
std::string disjunction; zeek::detail::string_list sl;
for ( auto re : REs ) zeek::detail::int_list il;
disjunction += std::string("||") + re->PatternText();
matcher->SetPat(disjunction.c_str()); for ( const auto* re : REs ) {
auto status = matcher->Compile(); sl.push_back(const_cast<char*>(re->PatternText()));
ASSERT(status); il.push_back(sl.size());
}
if ( ! matcher->CompileSet(sl, il) )
reporter->FatalError("failed compile set for disjunctive matcher");
} }
void RE_DisjunctiveMatcher::Match(const String* s, std::vector<int>& matches) { void RE_DisjunctiveMatcher::Match(const String* s, std::vector<int>& matches) {

View file

@ -36,7 +36,6 @@ extern CCL* curr_ccl;
extern NFA_Machine* nfa; extern NFA_Machine* nfa;
extern Specific_RE_Matcher* rem; extern Specific_RE_Matcher* rem;
extern const char* RE_parse_input; extern const char* RE_parse_input;
extern int RE_accept_num;
extern int clower(int); extern int clower(int);
extern void synerr(const char str[]); extern void synerr(const char str[]);

View file

@ -21,7 +21,6 @@ void yyerror(const char msg[]);
%} %}
%token TOK_CHAR TOK_NUMBER TOK_CCL TOK_CCE TOK_CASE_INSENSITIVE TOK_SINGLE_LINE %token TOK_CHAR TOK_NUMBER TOK_CCL TOK_CCE TOK_CASE_INSENSITIVE TOK_SINGLE_LINE
%token TOK_DISJUNCTION
%union { %union {
int int_val; int int_val;
@ -33,7 +32,7 @@ void yyerror(const char msg[]);
%type <int_val> TOK_CHAR TOK_NUMBER %type <int_val> TOK_CHAR TOK_NUMBER
%type <cce_val> TOK_CCE %type <cce_val> TOK_CCE
%type <ccl_val> TOK_CCL ccl full_ccl %type <ccl_val> TOK_CCL ccl full_ccl
%type <mach_val> re singleton series string disjunction %type <mach_val> re singleton series string
%destructor { delete $$; } <mach_val> %destructor { delete $$; } <mach_val>
@ -41,9 +40,6 @@ void yyerror(const char msg[]);
flexrule : re flexrule : re
{ $1->AddAccept(1); zeek::detail::nfa = $1; } { $1->AddAccept(1); zeek::detail::nfa = $1; }
| disjunction
{ zeek::detail::nfa = $1; }
| error | error
{ return 1; } { return 1; }
; ;
@ -55,18 +51,6 @@ re : re '|' series
{ $$ = new zeek::detail::NFA_Machine(new zeek::detail::EpsilonState()); } { $$ = new zeek::detail::NFA_Machine(new zeek::detail::EpsilonState()); }
; ;
disjunction : disjunction TOK_DISJUNCTION re
{
$3->AddAccept(++zeek::detail::RE_accept_num);
$$ = zeek::detail::make_alternate($1, $3);
}
| TOK_DISJUNCTION re
{
$2->AddAccept(++zeek::detail::RE_accept_num);
$$ = $2;
}
;
series : series singleton series : series singleton
{ $1->AppendMachine($2); $$ = $1; } { $1->AppendMachine($2); $$ = $1; }
| singleton | singleton

View file

@ -23,7 +23,6 @@
#include "re-parse.h" #include "re-parse.h"
const char* zeek::detail::RE_parse_input = nullptr; const char* zeek::detail::RE_parse_input = nullptr;
int zeek::detail::RE_accept_num = 0;
#define RET_CCE(func) \ #define RET_CCE(func) \
BEGIN(SC_CCL); \ BEGIN(SC_CCL); \
@ -144,8 +143,6 @@ CCL_EXPR ("[:"[[:alpha:]]+":]")
} }
} }
"||" return TOK_DISJUNCTION;
[|*+?.(){}] return yytext[0]; [|*+?.(){}] return yytext[0];
. yylval.int_val = yytext[0]; return TOK_CHAR; . yylval.int_val = yytext[0]; return TOK_CHAR;
\n return 0; // treat as end of pattern \n return 0; // treat as end of pattern
@ -240,7 +237,6 @@ YY_BUFFER_STATE RE_buf;
void RE_set_input(const char* str) void RE_set_input(const char* str)
{ {
zeek::detail::RE_parse_input = str; zeek::detail::RE_parse_input = str;
zeek::detail::RE_accept_num = 0;
RE_buf = yy_scan_string(str); RE_buf = yy_scan_string(str);
} }