diff --git a/src/RE.cc b/src/RE.cc index 3a790d2f4e..afb600bd4a 100644 --- a/src/RE.cc +++ b/src/RE.cc @@ -27,6 +27,8 @@ extern void RE_done_with_scan(); namespace zeek { namespace detail { +extern bool re_syntax_error; + Specific_RE_Matcher::Specific_RE_Matcher(match_type arg_mt, bool arg_multiline) : mt(arg_mt), multiline(arg_multiline), equiv_class(NUM_SYM) { any_ccl = nullptr; @@ -106,12 +108,13 @@ bool Specific_RE_Matcher::Compile(bool lazy) { return false; rem = this; + zeek::detail::re_syntax_error = false; RE_set_input(pattern_text.c_str()); int parse_status = RE_parse(); RE_done_with_scan(); - if ( parse_status ) { + if ( parse_status || zeek::detail::re_syntax_error ) { reporter->Error("error compiling pattern /%s/", pattern_text.c_str()); Unref(nfa); nfa = nullptr; @@ -533,6 +536,35 @@ TEST_SUITE("re_matcher") { CHECK(dj->MatchExactly("def")); delete dj; } + + TEST_CASE("synerr causes Compile() to fail") { + RE_Matcher match1("a{1,2}"); + CHECK(match1.Compile()); + + RE_Matcher match2("a{6,5}"); + CHECK_FALSE(match2.Compile()); + + RE_Matcher match3("a{1,a}"); + CHECK_FALSE(match3.Compile()); + + RE_Matcher match4("a{1,2"); + CHECK_FALSE(match4.Compile()); + + RE_Matcher match5("[1234"); + CHECK_FALSE(match5.Compile()); + + RE_Matcher match6("a[1234}"); + CHECK_FALSE(match6.Compile()); + + RE_Matcher match7("a\"b"); + CHECK_FALSE(match7.Compile()); + + RE_Matcher match8("a\"b\""); + CHECK(match8.Compile()); + + RE_Matcher match9("a\\\"b"); + CHECK(match9.Compile()); + } } } // namespace zeek diff --git a/src/re-parse.y b/src/re-parse.y index 2d6672df8d..46ba7f91d1 100644 --- a/src/re-parse.y +++ b/src/re-parse.y @@ -9,10 +9,10 @@ #include "zeek/EquivClass.h" #include "zeek/Reporter.h" -int csize = 256; -int syntax_error = 0; namespace zeek::detail { + constexpr int csize = 256; + bool re_syntax_error = 0; int cupper(int sym); int clower(int sym); } @@ -230,7 +230,7 @@ ccl : ccl TOK_CHAR '-' TOK_CHAR ccl_expr: TOK_CCE { - for ( int c = 0; c < csize; ++c ) + for ( int c = 0; c < zeek::detail::csize; ++c ) if ( isascii(c) && $1(c) ) zeek::detail::curr_ccl->Add(c); } @@ -265,7 +265,7 @@ int clower(int sym) void synerr(const char str[]) { - syntax_error = true; + zeek::detail::re_syntax_error = true; zeek::reporter->Error("%s (compiling pattern /%s/)", str, RE_parse_input); } diff --git a/src/re-scan.l b/src/re-scan.l index f382393477..3056762ea7 100644 --- a/src/re-scan.l +++ b/src/re-scan.l @@ -149,9 +149,13 @@ CCL_EXPR ("[:"[[:alpha:]]+":]") } { - [^"\n]$ zeek::detail::synerr("missing quote"); return '"'; [^"\n] yylval.int_val = yytext[0]; return TOK_CHAR; \" BEGIN(INITIAL); return '"'; + <> { + zeek::detail::synerr("missing quote"); + BEGIN(INITIAL); + return '"'; + } } { @@ -164,8 +168,8 @@ CCL_EXPR ("[:"[[:alpha:]]+":]") -/[^\]\n] return '-'; [^\]\n] yylval.int_val = yytext[0]; return TOK_CHAR; "]" BEGIN(INITIAL); return ']'; - [^\]]$ { - zeek::detail::synerr("bad character class"); + <> { + zeek::detail::synerr("unterminated character class"); BEGIN(INITIAL); return ']'; }