GH-1497: Support CRLF line-endings in Zeek scripts and signature files

This commit is contained in:
Jon Siwek 2021-04-08 20:15:54 -07:00
parent 4ae056b1a3
commit 312547ab0a
6 changed files with 55 additions and 12 deletions

1
.gitattributes vendored
View file

@ -3,3 +3,4 @@
*.bif linguist-language=C++ *.bif linguist-language=C++
*.l linguist-language=Lex *.l linguist-language=Lex
testing/btest/Baseline/** linguist-detectable=false testing/btest/Baseline/** linguist-detectable=false
testing/btest/language/crlf-parsing.zeek text eol=crlf

@ -1 +1 @@
Subproject commit 487d1d03bac4b51049bc109c862ca547257533cf Subproject commit 327a7e2e8e838858bdbcf85acb790f61d639639b

View file

@ -24,12 +24,12 @@ WS [ \t]+
D [0-9]+ D [0-9]+
H [0-9a-fA-F]+ H [0-9a-fA-F]+
HEX {H} HEX {H}
STRING \"([^\n\"]|\\\")*\" STRING \"([^\r\n\"]|\\\")*\"
IDCOMPONENT [a-zA-Z_][0-9a-zA-Z_-]* IDCOMPONENT [a-zA-Z_][0-9a-zA-Z_-]*
ID {IDCOMPONENT}(::{IDCOMPONENT})* ID {IDCOMPONENT}(::{IDCOMPONENT})*
IP6 ("["({HEX}:){7}{HEX}"]")|("["0x{HEX}({HEX}|:)*"::"({HEX}|:)*"]")|("["({HEX}|:)*"::"({HEX}|:)*"]")|("["({HEX}|:)*"::"({HEX}|:)*({D}"."){3}{D}"]") IP6 ("["({HEX}:){7}{HEX}"]")|("["0x{HEX}({HEX}|:)*"::"({HEX}|:)*"]")|("["({HEX}|:)*"::"({HEX}|:)*"]")|("["({HEX}|:)*"::"({HEX}|:)*({D}"."){3}{D}"]")
RE \/(\\\/)?([^/]|[^\\]\\\/)*\/i? RE \/(\\\/)?([^/]|[^\\]\\\/)*\/i?
META \.[^ \t]+{WS}[^\n]+ META \.[^ \t]+{WS}[^\r\n]+
PIDCOMPONENT [A-Za-z_][A-Za-z_0-9]* PIDCOMPONENT [A-Za-z_][A-Za-z_0-9]*
PID {PIDCOMPONENT}(::{PIDCOMPONENT})* PID {PIDCOMPONENT}(::{PIDCOMPONENT})*
@ -41,7 +41,7 @@ PID {PIDCOMPONENT}(::{PIDCOMPONENT})*
#.* /* eat comments */ #.* /* eat comments */
{WS} /* eat white space */ {WS} /* eat white space */
{META} /* eat any meta-data/comments */ {META} /* eat any meta-data/comments */
\n ++rules_line_number; \r?\n ++rules_line_number;
} }
{IP6} { {IP6} {

View file

@ -139,12 +139,12 @@ HEX [0-9a-fA-F]+
IDCOMPONENT [A-Za-z_][A-Za-z_0-9]* IDCOMPONENT [A-Za-z_][A-Za-z_0-9]*
ID {IDCOMPONENT}(::{IDCOMPONENT})* ID {IDCOMPONENT}(::{IDCOMPONENT})*
IP6 ("["({HEX}:){7}{HEX}"]")|("["0x{HEX}({HEX}|:)*"::"({HEX}|:)*"]")|("["({HEX}|:)*"::"({HEX}|:)*"]")|("["({HEX}:){6}({D}"."){3}{D}"]")|("["({HEX}|:)*"::"({HEX}|:)*({D}"."){3}{D}"]") IP6 ("["({HEX}:){7}{HEX}"]")|("["0x{HEX}({HEX}|:)*"::"({HEX}|:)*"]")|("["({HEX}|:)*"::"({HEX}|:)*"]")|("["({HEX}:){6}({D}"."){3}{D}"]")|("["({HEX}|:)*"::"({HEX}|:)*({D}"."){3}{D}"]")
FILE [^ \t\n]+ FILE [^ \t\r\n]+
PREFIX [^ \t\n]+ PREFIX [^ \t\r\n]+
FLOAT (({D}*"."?{D})|({D}"."?{D}*))([eE][-+]?{D})? FLOAT (({D}*"."?{D})|({D}"."?{D}*))([eE][-+]?{D})?
H [A-Za-z0-9][A-Za-z0-9\-]* H [A-Za-z0-9][A-Za-z0-9\-]*
HTLD [A-Za-z][A-Za-z0-9\-]* HTLD [A-Za-z][A-Za-z0-9\-]*
ESCSEQ (\\([^\n]|[0-7]+|x[[:xdigit:]]+)) ESCSEQ (\\([^\r\n]|[0-7]+|x[[:xdigit:]]+))
%% %%
@ -175,7 +175,7 @@ ESCSEQ (\\([^\n]|[0-7]+|x[[:xdigit:]]+))
{WS} /* eat whitespace */ {WS} /* eat whitespace */
<INITIAL,IGNORE>\n { <INITIAL,IGNORE>\r?\n {
++line_number; ++line_number;
++yylloc.first_line; ++yylloc.first_line;
++yylloc.last_line; ++yylloc.last_line;
@ -448,7 +448,7 @@ when return TOK_WHEN;
<IGNORE>@ifndef ++current_depth; <IGNORE>@ifndef ++current_depth;
<IGNORE>@else return TOK_ATELSE; <IGNORE>@else return TOK_ATELSE;
<IGNORE>@endif return TOK_ATENDIF; <IGNORE>@endif return TOK_ATENDIF;
<IGNORE>[^@\n]+ /* eat */ <IGNORE>[^@\r\n]+ /* eat */
<IGNORE>. /* eat */ <IGNORE>. /* eat */
T RET_CONST(zeek::val_mgr->True()->Ref()) T RET_CONST(zeek::val_mgr->True()->Ref())
@ -513,7 +513,7 @@ F RET_CONST(zeek::val_mgr->False()->Ref())
({H}".")+{HTLD} RET_CONST(zeek::detail::dns_mgr->LookupHost(yytext).release()) ({H}".")+{HTLD} RET_CONST(zeek::detail::dns_mgr->LookupHost(yytext).release())
\"([^\\\n\"]|{ESCSEQ})*\" { \"([^\\\r\\\n\"]|{ESCSEQ})*\" {
const char* text = yytext; const char* text = yytext;
int len = strlen(text) + 1; int len = strlen(text) + 1;
int i = 0; int i = 0;
@ -546,7 +546,7 @@ F RET_CONST(zeek::val_mgr->False()->Ref())
RET_CONST(new zeek::StringVal(new zeek::String(1, (zeek::byte_vec) s, i-1))) RET_CONST(new zeek::StringVal(new zeek::String(1, (zeek::byte_vec) s, i-1)))
} }
<RE>([^/\\\n]|{ESCSEQ})+ { <RE>([^/\\\r\\\n]|{ESCSEQ})+ {
yylval.str = zeek::util::copy_string(yytext); yylval.str = zeek::util::copy_string(yytext);
return TOK_PATTERN_TEXT; return TOK_PATTERN_TEXT;
} }
@ -563,7 +563,9 @@ F RET_CONST(zeek::val_mgr->False()->Ref())
return TOK_PATTERN_END; return TOK_PATTERN_END;
} }
<RE>[\\\n] return yytext[0]; // should cause a parse error <RE>\r?\n {
zeek::reporter->Error("patterns must not span multiple lines");
}
<*>. zeek::reporter->Error("unrecognized character: '%s'", zeek::util::get_escaped_string(yytext, false).data()); <*>. zeek::reporter->Error("unrecognized character: '%s'", zeek::util::get_escaped_string(yytext, false).data());

View file

@ -0,0 +1,5 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
first hello
hello T
last hello
zeek_init

View file

@ -0,0 +1,35 @@
# @TEST-EXEC: zeek -b %INPUT >out
# @TEST-EXEC: btest-diff out
# @TEST-DOC: Checks that CRLF line endings work in zeek/signature files
# Note the test file itself uses CRLFs and .gitattributes has an entry
# to ensure preservation of the CRLFs.
@TEST-START-FILE test.sig
signature blah
{
ip-proto == tcp
src-port == 21
payload /.*/
event "matched"
}
@TEST-END-FILE
@TEST-START-FILE test.zeek
event zeek_init()
{
print "zeek_init";
}
@TEST-END-FILE
@load test.zeek
@load-sigs test.sig
print "first hello";
@if ( T )
print "hello T";
@else
print "hello F";
@endif
print "last hello";