GH-1497: Support CRLF line-endings in Zeek scripts and signature files

This commit is contained in:
Jon Siwek 2021-04-08 20:15:54 -07:00
parent 4ae056b1a3
commit 312547ab0a
6 changed files with 55 additions and 12 deletions

View file

@ -139,12 +139,12 @@ HEX [0-9a-fA-F]+
IDCOMPONENT [A-Za-z_][A-Za-z_0-9]*
ID {IDCOMPONENT}(::{IDCOMPONENT})*
IP6 ("["({HEX}:){7}{HEX}"]")|("["0x{HEX}({HEX}|:)*"::"({HEX}|:)*"]")|("["({HEX}|:)*"::"({HEX}|:)*"]")|("["({HEX}:){6}({D}"."){3}{D}"]")|("["({HEX}|:)*"::"({HEX}|:)*({D}"."){3}{D}"]")
FILE [^ \t\n]+
PREFIX [^ \t\n]+
FILE [^ \t\r\n]+
PREFIX [^ \t\r\n]+
FLOAT (({D}*"."?{D})|({D}"."?{D}*))([eE][-+]?{D})?
H [A-Za-z0-9][A-Za-z0-9\-]*
HTLD [A-Za-z][A-Za-z0-9\-]*
ESCSEQ (\\([^\n]|[0-7]+|x[[:xdigit:]]+))
ESCSEQ (\\([^\r\n]|[0-7]+|x[[:xdigit:]]+))
%%
@ -175,7 +175,7 @@ ESCSEQ (\\([^\n]|[0-7]+|x[[:xdigit:]]+))
{WS} /* eat whitespace */
<INITIAL,IGNORE>\n {
<INITIAL,IGNORE>\r?\n {
++line_number;
++yylloc.first_line;
++yylloc.last_line;
@ -448,7 +448,7 @@ when return TOK_WHEN;
<IGNORE>@ifndef ++current_depth;
<IGNORE>@else return TOK_ATELSE;
<IGNORE>@endif return TOK_ATENDIF;
<IGNORE>[^@\n]+ /* eat */
<IGNORE>[^@\r\n]+ /* eat */
<IGNORE>. /* eat */
T RET_CONST(zeek::val_mgr->True()->Ref())
@ -513,7 +513,7 @@ F RET_CONST(zeek::val_mgr->False()->Ref())
({H}".")+{HTLD} RET_CONST(zeek::detail::dns_mgr->LookupHost(yytext).release())
\"([^\\\n\"]|{ESCSEQ})*\" {
\"([^\\\r\\\n\"]|{ESCSEQ})*\" {
const char* text = yytext;
int len = strlen(text) + 1;
int i = 0;
@ -546,7 +546,7 @@ F RET_CONST(zeek::val_mgr->False()->Ref())
RET_CONST(new zeek::StringVal(new zeek::String(1, (zeek::byte_vec) s, i-1)))
}
<RE>([^/\\\n]|{ESCSEQ})+ {
<RE>([^/\\\r\\\n]|{ESCSEQ})+ {
yylval.str = zeek::util::copy_string(yytext);
return TOK_PATTERN_TEXT;
}
@ -563,7 +563,9 @@ F RET_CONST(zeek::val_mgr->False()->Ref())
return TOK_PATTERN_END;
}
<RE>[\\\n] return yytext[0]; // should cause a parse error
<RE>\r?\n {
zeek::reporter->Error("patterns must not span multiple lines");
}
<*>. zeek::reporter->Error("unrecognized character: '%s'", zeek::util::get_escaped_string(yytext, false).data());