Merge remote-tracking branch 'origin/topic/awelzel/re-compile-syntax-error-propagation'

* origin/topic/awelzel/re-compile-syntax-error-propagation:
  RE/Specific_RE_Matcher: Make Compile() return false for syntax errors
This commit is contained in:
Arne Welzel 2024-03-05 19:22:46 +01:00
commit 09ad9b193a
3 changed files with 44 additions and 8 deletions

View file

@ -27,6 +27,8 @@ extern void RE_done_with_scan();
namespace zeek {
namespace detail {
extern bool re_syntax_error;
Specific_RE_Matcher::Specific_RE_Matcher(match_type arg_mt, bool arg_multiline)
: mt(arg_mt), multiline(arg_multiline), equiv_class(NUM_SYM) {
any_ccl = nullptr;
@ -106,12 +108,13 @@ bool Specific_RE_Matcher::Compile(bool lazy) {
return false;
rem = this;
zeek::detail::re_syntax_error = false;
RE_set_input(pattern_text.c_str());
int parse_status = RE_parse();
RE_done_with_scan();
if ( parse_status ) {
if ( parse_status || zeek::detail::re_syntax_error ) {
reporter->Error("error compiling pattern /%s/", pattern_text.c_str());
Unref(nfa);
nfa = nullptr;
@ -533,6 +536,35 @@ TEST_SUITE("re_matcher") {
CHECK(dj->MatchExactly("def"));
delete dj;
}
TEST_CASE("synerr causes Compile() to fail") {
RE_Matcher match1("a{1,2}");
CHECK(match1.Compile());
RE_Matcher match2("a{6,5}");
CHECK_FALSE(match2.Compile());
RE_Matcher match3("a{1,a}");
CHECK_FALSE(match3.Compile());
RE_Matcher match4("a{1,2");
CHECK_FALSE(match4.Compile());
RE_Matcher match5("[1234");
CHECK_FALSE(match5.Compile());
RE_Matcher match6("a[1234}");
CHECK_FALSE(match6.Compile());
RE_Matcher match7("a\"b");
CHECK_FALSE(match7.Compile());
RE_Matcher match8("a\"b\"");
CHECK(match8.Compile());
RE_Matcher match9("a\\\"b");
CHECK(match9.Compile());
}
}
} // namespace zeek

View file

@ -9,10 +9,10 @@
#include "zeek/EquivClass.h"
#include "zeek/Reporter.h"
int csize = 256;
int syntax_error = 0;
namespace zeek::detail {
constexpr int csize = 256;
bool re_syntax_error = 0;
int cupper(int sym);
int clower(int sym);
}
@ -230,7 +230,7 @@ ccl : ccl TOK_CHAR '-' TOK_CHAR
ccl_expr: TOK_CCE
{
for ( int c = 0; c < csize; ++c )
for ( int c = 0; c < zeek::detail::csize; ++c )
if ( isascii(c) && $1(c) )
zeek::detail::curr_ccl->Add(c);
}
@ -265,7 +265,7 @@ int clower(int sym)
void synerr(const char str[])
{
syntax_error = true;
zeek::detail::re_syntax_error = true;
zeek::reporter->Error("%s (compiling pattern /%s/)", str, RE_parse_input);
}

View file

@ -149,9 +149,13 @@ CCL_EXPR ("[:"[[:alpha:]]+":]")
}
<SC_QUOTE>{
[^"\n]$ zeek::detail::synerr("missing quote"); return '"';
[^"\n] yylval.int_val = yytext[0]; return TOK_CHAR;
\" BEGIN(INITIAL); return '"';
<<EOF>> {
zeek::detail::synerr("missing quote");
BEGIN(INITIAL);
return '"';
}
}
<SC_FIRST_CCL>{
@ -164,8 +168,8 @@ CCL_EXPR ("[:"[[:alpha:]]+":]")
-/[^\]\n] return '-';
[^\]\n] yylval.int_val = yytext[0]; return TOK_CHAR;
"]" BEGIN(INITIAL); return ']';
[^\]]$ {
zeek::detail::synerr("bad character class");
<<EOF>> {
zeek::detail::synerr("unterminated character class");
BEGIN(INITIAL);
return ']';
}