// parse.y - parser for flex input %{ #include #include "zeek/RE.h" #include "zeek/CCL.h" #include "zeek/NFA.h" #include "zeek/EquivClass.h" #include "zeek/Reporter.h" int csize = 256; int syntax_error = 0; namespace zeek::detail { int cupper(int sym); int clower(int sym); } void yyerror(const char msg[]); %} %token TOK_CHAR TOK_NUMBER TOK_CCL TOK_CCE TOK_CASE_INSENSITIVE %union { int int_val; cce_func cce_val; zeek::detail::CCL* ccl_val; zeek::detail::NFA_Machine* mach_val; } %type TOK_CHAR TOK_NUMBER %type TOK_CCE %type TOK_CCL ccl full_ccl %type re singleton series string %% flexrule : re { $1->AddAccept(1); zeek::detail::nfa = $1; } | error { return 1; } ; re : re '|' series { $$ = zeek::detail::make_alternate($1, $3); } | series | { $$ = new zeek::detail::NFA_Machine(new zeek::detail::EpsilonState()); } ; series : series singleton { $1->AppendMachine($2); } | singleton ; singleton : singleton '*' { $1->MakeClosure(); } | singleton '+' { $1->MakePositiveClosure(); } | singleton '?' { $1->MakeOptional(); } | singleton '{' TOK_NUMBER ',' TOK_NUMBER '}' { if ( $3 > $5 || $3 < 0 ) zeek::detail::synerr("bad iteration values"); else { if ( $3 == 0 ) { if ( $5 == 0 ) { $$ = new zeek::detail::NFA_Machine(new zeek::detail::EpsilonState()); Unref($1); } else { $1->MakeRepl(1, $5); $1->MakeOptional(); } } else $1->MakeRepl($3, $5); } } | singleton '{' TOK_NUMBER ',' '}' { if ( $3 < 0 ) zeek::detail::synerr("iteration value must be positive"); else if ( $3 == 0 ) $1->MakeClosure(); else $1->MakeRepl($3, NO_UPPER_BOUND); } | singleton '{' TOK_NUMBER '}' { if ( $3 < 0 ) zeek::detail::synerr("iteration value must be positive"); else if ( $3 == 0 ) { Unref($1); $$ = new zeek::detail::NFA_Machine(new zeek::detail::EpsilonState()); } else $1->LinkCopies($3-1); } | '.' { $$ = new zeek::detail::NFA_Machine(new zeek::detail::NFA_State(zeek::detail::rem->AnyCCL())); } | full_ccl { $1->Sort(); zeek::detail::rem->EC()->CCL_Use($1); $$ = new zeek::detail::NFA_Machine(new zeek::detail::NFA_State($1)); } | TOK_CCL { $$ = new zeek::detail::NFA_Machine(new zeek::detail::NFA_State($1)); } | '"' string '"' { $$ = $2; } | '(' re ')' { $$ = $2; } | TOK_CASE_INSENSITIVE re ')' { $$ = $2; zeek::detail::case_insensitive = 0; } | TOK_CHAR { auto sym = $1; if ( sym < 0 || ( sym >= NUM_SYM && sym != SYM_EPSILON ) ) { zeek::reporter->Error("bad symbol %d (compiling pattern /%s/)", sym, zeek::detail::RE_parse_input); return 1; } $$ = new zeek::detail::NFA_Machine(new zeek::detail::NFA_State(sym, zeek::detail::rem->EC())); } | '^' { $$ = new zeek::detail::NFA_Machine(new zeek::detail::NFA_State(SYM_BOL, zeek::detail::rem->EC())); $$->MarkBOL(); } | '$' { $$ = new zeek::detail::NFA_Machine(new zeek::detail::NFA_State(SYM_EOL, zeek::detail::rem->EC())); $$->MarkEOL(); } ; full_ccl : '[' ccl ']' { $$ = $2; } | '[' '^' ccl ']' { $3->Negate(); $$ = $3; } ; ccl : ccl TOK_CHAR '-' TOK_CHAR { if ( $2 > $4 ) zeek::detail::synerr("negative range in character class"); else if ( zeek::detail::case_insensitive && (isalpha($2) || isalpha($4)) ) { if ( isalpha($2) && isalpha($4) && isupper($2) == isupper($4) ) { // Compatible range, do both versions int l2 = tolower($2); int l4 = tolower($4); for ( int i = l2; i<= l4; ++i ) { $1->Add(i); $1->Add(toupper(i)); } } else zeek::detail::synerr("ambiguous case-insensitive character class"); } else { for ( int i = $2; i <= $4; ++i ) $1->Add(i); } } | ccl TOK_CHAR { if ( zeek::detail::case_insensitive && isalpha($2) ) { $1->Add(zeek::detail::clower($2)); $1->Add(zeek::detail::cupper($2)); } else $1->Add($2); } | ccl ccl_expr | { $$ = zeek::detail::curr_ccl; } ; ccl_expr: TOK_CCE { for ( int c = 0; c < csize; ++c ) if ( isascii(c) && $1(c) ) zeek::detail::curr_ccl->Add(c); } ; string : string TOK_CHAR { // Even if case-insensitivity is set, // leave this alone; that provides a way // of "escaping" out of insensitivity // if needed. $1->AppendState(new zeek::detail::NFA_State($2, zeek::detail::rem->EC())); } | { $$ = new zeek::detail::NFA_Machine(new zeek::detail::EpsilonState()); } ; %% namespace zeek::detail { int cupper(int sym) { return (isascii(sym) && islower(sym)) ? toupper(sym) : sym; } int clower(int sym) { return (isascii(sym) && isupper(sym)) ? tolower(sym) : sym; } void synerr(const char str[]) { syntax_error = true; zeek::reporter->Error("%s (compiling pattern /%s/)", str, RE_parse_input); } } // namespace zeek::detail void yyerror(const char msg[]) { }