From b55e1a122e5e93c84f3712bb576ef179213231cd Mon Sep 17 00:00:00 2001 From: Arne Welzel Date: Wed, 1 Nov 2023 11:51:25 +0100 Subject: [PATCH] Reuse CompileSet() instead of || string formatting --- src/RE.cc | 19 +++++++++++++------ src/RE.h | 1 - src/re-parse.y | 18 +----------------- src/re-scan.l | 4 ---- 4 files changed, 14 insertions(+), 28 deletions(-) diff --git a/src/RE.cc b/src/RE.cc index 87b69d29a2..70bb71b055 100644 --- a/src/RE.cc +++ b/src/RE.cc @@ -172,6 +172,10 @@ bool Specific_RE_Matcher::CompileSet(const string_list& set, const int_list& idx dfa = new DFA_Machine(nfa, EC()); ecs = EC()->EquivClasses(); + // dfa took ownership + Unref(nfa); + nfa = nullptr; + return true; } @@ -455,13 +459,16 @@ bool RE_Matcher::Compile(bool lazy) { return re_anywhere->Compile(lazy) && re_ex RE_DisjunctiveMatcher::RE_DisjunctiveMatcher(const std::vector& REs) { matcher = std::make_unique(detail::MATCH_EXACTLY); - std::string disjunction; - for ( auto re : REs ) - disjunction += std::string("||") + re->PatternText(); + zeek::detail::string_list sl; + zeek::detail::int_list il; - matcher->SetPat(disjunction.c_str()); - auto status = matcher->Compile(); - ASSERT(status); + for ( const auto* re : REs ) { + sl.push_back(const_cast(re->PatternText())); + il.push_back(sl.size()); + } + + if ( ! matcher->CompileSet(sl, il) ) + reporter->FatalError("failed compile set for disjunctive matcher"); } void RE_DisjunctiveMatcher::Match(const String* s, std::vector& matches) { diff --git a/src/RE.h b/src/RE.h index 8d7b28da30..52b446a306 100644 --- a/src/RE.h +++ b/src/RE.h @@ -36,7 +36,6 @@ extern CCL* curr_ccl; extern NFA_Machine* nfa; extern Specific_RE_Matcher* rem; extern const char* RE_parse_input; -extern int RE_accept_num; extern int clower(int); extern void synerr(const char str[]); diff --git a/src/re-parse.y b/src/re-parse.y index 2ee6bec9e2..2d6672df8d 100644 --- a/src/re-parse.y +++ b/src/re-parse.y @@ -21,7 +21,6 @@ void yyerror(const char msg[]); %} %token TOK_CHAR TOK_NUMBER TOK_CCL TOK_CCE TOK_CASE_INSENSITIVE TOK_SINGLE_LINE -%token TOK_DISJUNCTION %union { int int_val; @@ -33,7 +32,7 @@ void yyerror(const char msg[]); %type TOK_CHAR TOK_NUMBER %type TOK_CCE %type TOK_CCL ccl full_ccl -%type re singleton series string disjunction +%type re singleton series string %destructor { delete $$; } @@ -41,9 +40,6 @@ void yyerror(const char msg[]); flexrule : re { $1->AddAccept(1); zeek::detail::nfa = $1; } - | disjunction - { zeek::detail::nfa = $1; } - | error { return 1; } ; @@ -55,18 +51,6 @@ re : re '|' series { $$ = new zeek::detail::NFA_Machine(new zeek::detail::EpsilonState()); } ; -disjunction : disjunction TOK_DISJUNCTION re - { - $3->AddAccept(++zeek::detail::RE_accept_num); - $$ = zeek::detail::make_alternate($1, $3); - } - | TOK_DISJUNCTION re - { - $2->AddAccept(++zeek::detail::RE_accept_num); - $$ = $2; - } - ; - series : series singleton { $1->AppendMachine($2); $$ = $1; } | singleton diff --git a/src/re-scan.l b/src/re-scan.l index 7df4665640..f382393477 100644 --- a/src/re-scan.l +++ b/src/re-scan.l @@ -23,7 +23,6 @@ #include "re-parse.h" const char* zeek::detail::RE_parse_input = nullptr; -int zeek::detail::RE_accept_num = 0; #define RET_CCE(func) \ BEGIN(SC_CCL); \ @@ -144,8 +143,6 @@ CCL_EXPR ("[:"[[:alpha:]]+":]") } } - "||" return TOK_DISJUNCTION; - [|*+?.(){}] return yytext[0]; . yylval.int_val = yytext[0]; return TOK_CHAR; \n return 0; // treat as end of pattern @@ -240,7 +237,6 @@ YY_BUFFER_STATE RE_buf; void RE_set_input(const char* str) { zeek::detail::RE_parse_input = str; - zeek::detail::RE_accept_num = 0; RE_buf = yy_scan_string(str); }