mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 06:38:20 +00:00
262 lines
5.3 KiB
Text
262 lines
5.3 KiB
Text
// parse.y - parser for flex input
|
|
|
|
%{
|
|
#include <cstdlib>
|
|
|
|
#include "zeek/RE.h"
|
|
#include "zeek/CCL.h"
|
|
#include "zeek/NFA.h"
|
|
#include "zeek/EquivClass.h"
|
|
#include "zeek/Reporter.h"
|
|
|
|
namespace zeek::detail {
|
|
constexpr int csize = 256;
|
|
bool re_syntax_error = 0;
|
|
int cupper(int sym);
|
|
int clower(int sym);
|
|
}
|
|
|
|
void yyerror(const char msg[]);
|
|
%}
|
|
|
|
%token TOK_CHAR TOK_NUMBER TOK_CCL TOK_CCE TOK_CASE_INSENSITIVE TOK_SINGLE_LINE
|
|
|
|
%union {
|
|
int int_val;
|
|
cce_func cce_val;
|
|
zeek::detail::CCL* ccl_val;
|
|
zeek::detail::NFA_Machine* mach_val;
|
|
}
|
|
|
|
%type <int_val> TOK_CHAR TOK_NUMBER
|
|
%type <cce_val> TOK_CCE
|
|
%type <ccl_val> TOK_CCL ccl full_ccl
|
|
%type <mach_val> re singleton series string
|
|
|
|
%destructor { delete $$; } <mach_val>
|
|
|
|
%%
|
|
flexrule : re
|
|
{ $1->AddAccept(1); zeek::detail::nfa = $1; }
|
|
|
|
| error
|
|
{ return 1; }
|
|
;
|
|
|
|
re : re '|' series
|
|
{ $$ = zeek::detail::make_alternate($1, $3); }
|
|
| series
|
|
|
|
|
{ $$ = new zeek::detail::NFA_Machine(new zeek::detail::EpsilonState()); }
|
|
;
|
|
|
|
series : series singleton
|
|
{ $1->AppendMachine($2); $$ = $1; }
|
|
| singleton
|
|
;
|
|
|
|
singleton : singleton '*'
|
|
{ $1->MakeClosure(); $$ = $1; }
|
|
|
|
| singleton '+'
|
|
{ $1->MakePositiveClosure(); $$ = $1; }
|
|
|
|
| singleton '?'
|
|
{ $1->MakeOptional(); $$ = $1; }
|
|
|
|
| singleton '{' TOK_NUMBER ',' TOK_NUMBER '}'
|
|
{
|
|
if ( $3 > $5 || $3 < 0 )
|
|
zeek::detail::synerr("bad iteration values");
|
|
else {
|
|
if ( $3 == 0 ) {
|
|
if ( $5 == 0 ) {
|
|
$$ = new zeek::detail::NFA_Machine(new zeek::detail::EpsilonState());
|
|
Unref($1);
|
|
}
|
|
else {
|
|
$1->MakeRepl(1, $5);
|
|
$1->MakeOptional();
|
|
}
|
|
}
|
|
else
|
|
$1->MakeRepl($3, $5);
|
|
}
|
|
}
|
|
|
|
| singleton '{' TOK_NUMBER ',' '}'
|
|
{
|
|
if ( $3 < 0 )
|
|
zeek::detail::synerr("iteration value must be positive");
|
|
else if ( $3 == 0 )
|
|
$1->MakeClosure();
|
|
else
|
|
$1->MakeRepl($3, NO_UPPER_BOUND);
|
|
|
|
$$ = $1;
|
|
}
|
|
|
|
| singleton '{' TOK_NUMBER '}'
|
|
{
|
|
if ( $3 < 0 )
|
|
zeek::detail::synerr("iteration value must be positive");
|
|
else if ( $3 == 0 ) {
|
|
Unref($1);
|
|
$$ = new zeek::detail::NFA_Machine(new zeek::detail::EpsilonState());
|
|
}
|
|
else
|
|
$1->LinkCopies($3-1);
|
|
}
|
|
|
|
| '.'
|
|
{
|
|
$$ = new zeek::detail::NFA_Machine(new zeek::detail::NFA_State(
|
|
zeek::detail::rem->AnyCCL(zeek::detail::re_single_line)));
|
|
}
|
|
|
|
| full_ccl
|
|
{
|
|
$1->Sort();
|
|
zeek::detail::rem->EC()->CCL_Use($1);
|
|
$$ = new zeek::detail::NFA_Machine(new zeek::detail::NFA_State($1));
|
|
}
|
|
|
|
| TOK_CCL
|
|
{ $$ = new zeek::detail::NFA_Machine(new zeek::detail::NFA_State($1)); }
|
|
|
|
| '"' string '"'
|
|
{ $$ = $2; }
|
|
|
|
| '(' re ')'
|
|
{ $$ = $2; }
|
|
|
|
| TOK_CASE_INSENSITIVE re ')'
|
|
{ $$ = $2; zeek::detail::case_insensitive = false; }
|
|
|
|
| TOK_SINGLE_LINE re ')'
|
|
{ $$ = $2; zeek::detail::re_single_line = false; }
|
|
|
|
| TOK_CHAR
|
|
{
|
|
auto sym = $1;
|
|
|
|
if ( sym < 0 || ( sym >= NUM_SYM && sym != SYM_EPSILON ) ) {
|
|
zeek::reporter->Error("bad symbol %d (compiling pattern /%s/)", sym,
|
|
zeek::detail::RE_parse_input);
|
|
return 1;
|
|
}
|
|
|
|
$$ = new zeek::detail::NFA_Machine(new zeek::detail::NFA_State(sym, zeek::detail::rem->EC()));
|
|
}
|
|
|
|
| '^'
|
|
{
|
|
$$ = new zeek::detail::NFA_Machine(new zeek::detail::NFA_State(SYM_BOL, zeek::detail::rem->EC()));
|
|
$$->MarkBOL();
|
|
}
|
|
|
|
| '$'
|
|
{
|
|
$$ = new zeek::detail::NFA_Machine(new zeek::detail::NFA_State(SYM_EOL, zeek::detail::rem->EC()));
|
|
$$->MarkEOL();
|
|
}
|
|
;
|
|
|
|
full_ccl : '[' ccl ']'
|
|
{ $$ = $2; }
|
|
|
|
| '[' '^' ccl ']'
|
|
{
|
|
$3->Negate();
|
|
$$ = $3;
|
|
}
|
|
;
|
|
|
|
ccl : ccl TOK_CHAR '-' TOK_CHAR
|
|
{
|
|
if ( $2 > $4 )
|
|
zeek::detail::synerr("negative range in character class");
|
|
|
|
else if ( zeek::detail::case_insensitive && (isalpha($2) || isalpha($4)) ) {
|
|
if ( isalpha($2) && isalpha($4) && isupper($2) == isupper($4) ) {
|
|
// Compatible range, do both versions
|
|
int l2 = tolower($2);
|
|
int l4 = tolower($4);
|
|
|
|
for ( int i = l2; i<= l4; ++i ) {
|
|
$1->Add(i);
|
|
$1->Add(toupper(i));
|
|
}
|
|
}
|
|
else
|
|
zeek::detail::synerr("ambiguous case-insensitive character class");
|
|
}
|
|
|
|
else {
|
|
for ( int i = $2; i <= $4; ++i )
|
|
$1->Add(i);
|
|
}
|
|
}
|
|
|
|
| ccl TOK_CHAR
|
|
{
|
|
if ( zeek::detail::case_insensitive && isalpha($2) ) {
|
|
$1->Add(zeek::detail::clower($2));
|
|
$1->Add(zeek::detail::cupper($2));
|
|
}
|
|
else
|
|
$1->Add($2);
|
|
}
|
|
|
|
| ccl ccl_expr
|
|
|
|
|
|
|
{ $$ = zeek::detail::curr_ccl; }
|
|
;
|
|
|
|
ccl_expr: TOK_CCE
|
|
{
|
|
for ( int c = 0; c < zeek::detail::csize; ++c )
|
|
if ( isascii(c) && $1(c) )
|
|
zeek::detail::curr_ccl->Add(c);
|
|
}
|
|
;
|
|
|
|
string : string TOK_CHAR
|
|
{
|
|
// Even if case-insensitivity is set,
|
|
// leave this alone; that provides a way
|
|
// of "escaping" out of insensitivity
|
|
// if needed.
|
|
$1->AppendState(new zeek::detail::NFA_State($2, zeek::detail::rem->EC()));
|
|
$$ = $1;
|
|
}
|
|
|
|
|
|
|
{ $$ = new zeek::detail::NFA_Machine(new zeek::detail::EpsilonState()); }
|
|
;
|
|
%%
|
|
|
|
namespace zeek::detail {
|
|
|
|
int cupper(int sym)
|
|
{
|
|
return (isascii(sym) && islower(sym)) ? toupper(sym) : sym;
|
|
}
|
|
|
|
int clower(int sym)
|
|
{
|
|
return (isascii(sym) && isupper(sym)) ? tolower(sym) : sym;
|
|
}
|
|
|
|
void synerr(const char str[])
|
|
{
|
|
zeek::detail::re_syntax_error = true;
|
|
zeek::reporter->Error("%s (compiling pattern /%s/)", str, RE_parse_input);
|
|
}
|
|
|
|
} // namespace zeek::detail
|
|
|
|
void yyerror(const char msg[])
|
|
{
|
|
}
|