From 36e31e28ac118f7e451a749a0dc47d50e2b055f9 Mon Sep 17 00:00:00 2001 From: Tim Wojtulewicz Date: Tue, 12 Jul 2022 16:22:29 -0700 Subject: [PATCH 1/4] Add /s modifier to parser for patterns --- src/parse.y | 14 ++++++++++++-- src/scan.l | 21 ++++++++++++++++++--- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/src/parse.y b/src/parse.y index 25743e9599..637e40c318 100644 --- a/src/parse.y +++ b/src/parse.y @@ -54,7 +54,7 @@ %left '$' '[' ']' '(' ')' TOK_HAS_FIELD TOK_HAS_ATTR %nonassoc TOK_AS TOK_IS -%type opt_no_test opt_no_test_block TOK_PATTERN_END opt_deep when_flavor +%type opt_no_test opt_no_test_block opt_deep when_flavor %type TOK_ID TOK_PATTERN_TEXT %type local_id global_id def_global_id event_id global_or_event_id resolve_id begin_lambda case_type %type local_id_list case_type_list @@ -77,6 +77,7 @@ %type capture %type capture_list opt_captures when_captures %type when_head when_start when_clause +%type TOK_PATTERN_END %{ #include @@ -324,6 +325,11 @@ static StmtPtr build_local(ID* id, Type* t, InitClass ic, Expr* e, zeek::FuncType::Capture* capture; zeek::FuncType::CaptureList* captures; zeek::detail::WhenInfo* when_clause; + struct + { + bool ignore_case; + bool single_line; + } re_modes; } %% @@ -912,9 +918,13 @@ expr: auto* re = new RE_Matcher($3); delete [] $3; - if ( $4 ) + if ( $4.ignore_case ) re->MakeCaseInsensitive(); + if ( $4.single_line ) + { + } + re->Compile(); $$ = new ConstExpr(make_intrusive(re)); } diff --git a/src/scan.l b/src/scan.l index 17c07c9749..508c21cbfe 100644 --- a/src/scan.l +++ b/src/scan.l @@ -562,13 +562,28 @@ F RET_CONST(zeek::val_mgr->False()->Ref()) "/" { BEGIN(INITIAL); - yylval.b = false; + yylval.re_modes.ignore_case = false; + yylval.re_modes.single_line = false; return TOK_PATTERN_END; } -"/i" { +(\/[is]{0,2}) { BEGIN(INITIAL); - yylval.b = true; + + if (strlen(yytext) == 2) + { + yylval.re_modes.ignore_case = (yytext[1] == 'i'); + yylval.re_modes.single_line = (yytext[1] == 's'); + } + else + { + if ( yytext[1] == yytext[2] ) + zeek::reporter->Error("pattern has duplicate mode %c", yytext[1]); + + yylval.re_modes.ignore_case = (yytext[1] == 'i' || yytext[2] == 'i'); + yylval.re_modes.single_line = (yytext[1] == 's' || yytext[2] == 's'); + } + return TOK_PATTERN_END; } From abf2da781db00b4cc6726ee1123c2e13a383d5e7 Mon Sep 17 00:00:00 2001 From: Tim Wojtulewicz Date: Mon, 25 Jul 2022 12:54:12 -0700 Subject: [PATCH 2/4] Add basic unit tests for RE_Matcher --- src/RE.cc | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/src/RE.cc b/src/RE.cc index 2751b106a1..2ce2f69a2d 100644 --- a/src/RE.cc +++ b/src/RE.cc @@ -7,6 +7,7 @@ #include #include +#include "zeek/3rdparty/doctest.h" #include "zeek/CCL.h" #include "zeek/DFA.h" #include "zeek/EquivClass.h" @@ -488,4 +489,73 @@ bool RE_Matcher::Compile(bool lazy) return re_anywhere->Compile(lazy) && re_exact->Compile(lazy); } +TEST_SUITE("re_matcher") + { + + TEST_CASE("simple_pattern") + { + RE_Matcher match("[0-9]+"); + match.Compile(); + CHECK(strcmp(match.OrigText(), "[0-9]+") == 0); + CHECK(strcmp(match.PatternText(), "^?([0-9]+)$?") == 0); + CHECK(strcmp(match.AnywherePatternText(), "^?(.|\\n)*([0-9]+)") == 0); + + CHECK(match.MatchExactly("12345")); + CHECK_FALSE(match.MatchExactly("a12345")); + + // The documentation for MatchAnywhere says that it returns the + // "index just beyond where the first match occurs", which I would + // think means *after* the match. This is returning the position + // where the match starts though. + CHECK(match.MatchAnywhere("a1234bcd") == 2); + CHECK(match.MatchAnywhere("abcd") == 0); + } + + TEST_CASE("case_insensitive_mode") + { + RE_Matcher match("[a-z]+"); + match.MakeCaseInsensitive(); + match.Compile(); + CHECK(strcmp(match.PatternText(), "(?i:^?([a-z]+)$?)") == 0); + + CHECK(match.MatchExactly("abcDEF")); + } + + TEST_CASE("multi_pattern") + { + RE_Matcher match("[0-9]+"); + match.AddPat("[a-z]+"); + match.Compile(); + + CHECK(strcmp(match.PatternText(), "(^?([0-9]+)$?)|(^?([a-z]+)$?)") == 0); + + CHECK(match.MatchExactly("abc")); + CHECK(match.MatchExactly("123")); + CHECK_FALSE(match.MatchExactly("abc123")); + } + + TEST_CASE("modes_multi_pattern") + { + RE_Matcher match("[a-m]+"); + match.MakeCaseInsensitive(); + + match.AddPat("[n-z]+"); + match.Compile(); + + CHECK(strcmp(match.PatternText(), "((?i:^?([a-m]+)$?))|(^?([n-z]+)$?)") == 0); + CHECK(match.MatchExactly("aBc")); + CHECK(match.MatchExactly("nop")); + CHECK_FALSE(match.MatchExactly("NoP")); + + // TODO: this part isn't working at all. There's something about the second call + // to Compile() that's breaking something. + // match.MakeCaseInsensitive(); + // match.Compile(); + // CHECK(strcmp(match.PatternText(), "(?i:((?i:^?([a-m]+)$?))|(^?([n-z]+)$?))") == 0); + // CHECK(match.MatchExactly("aBc")); + // CHECK(match.MatchExactly("nop")); + // CHECK(match.MatchExactly("NoP")); + } + } + } // namespace zeek From f67f6e450738a18ec0d0d148cb6f3ad3760f9ac1 Mon Sep 17 00:00:00 2001 From: Tim Wojtulewicz Date: Tue, 12 Jul 2022 17:13:38 -0700 Subject: [PATCH 3/4] Code cleanup in RE_Matcher code - Use std::string in Specific_RE_Matcher instead of char* - Change a couple of ints-as-bools to bools --- src/RE.cc | 43 +++++-------------- src/RE.h | 15 ++++--- src/RuleMatcher.cc | 2 +- src/packet_analysis/protocol/teredo/Teredo.cc | 2 +- src/re-parse.y | 2 +- src/re-scan.l | 2 +- 6 files changed, 23 insertions(+), 43 deletions(-) diff --git a/src/RE.cc b/src/RE.cc index 2ce2f69a2d..13d21e59e5 100644 --- a/src/RE.cc +++ b/src/RE.cc @@ -17,7 +17,7 @@ zeek::detail::CCL* zeek::detail::curr_ccl = nullptr; zeek::detail::Specific_RE_Matcher* zeek::detail::rem = nullptr; zeek::detail::NFA_Machine* zeek::detail::nfa = nullptr; -int zeek::detail::case_insensitive = 0; +bool zeek::detail::case_insensitive = false; extern int RE_parse(void); extern void RE_set_input(const char* str); @@ -28,13 +28,10 @@ namespace zeek namespace detail { -Specific_RE_Matcher::Specific_RE_Matcher(match_type arg_mt, int arg_multiline) - : equiv_class(NUM_SYM) +Specific_RE_Matcher::Specific_RE_Matcher(match_type arg_mt, bool arg_multiline) + : mt(arg_mt), multiline(arg_multiline), equiv_class(NUM_SYM) { - mt = arg_mt; - multiline = arg_multiline; any_ccl = nullptr; - pattern_text = nullptr; dfa = nullptr; ecs = nullptr; accepted = new AcceptingSet(); @@ -46,7 +43,6 @@ Specific_RE_Matcher::~Specific_RE_Matcher() delete ccl_list[i]; Unref(dfa); - delete[] pattern_text; delete accepted; } @@ -90,51 +86,32 @@ void Specific_RE_Matcher::AddExactPat(const char* new_pat) void Specific_RE_Matcher::AddPat(const char* new_pat, const char* orig_fmt, const char* app_fmt) { - int n = strlen(new_pat); - - if ( pattern_text ) - n += strlen(pattern_text) + strlen(app_fmt); + if ( ! pattern_text.empty() ) + pattern_text = util::fmt(app_fmt, pattern_text.c_str(), new_pat); else - n += strlen(orig_fmt); - - char* s = new char[n + 5 /* slop */]; - - if ( pattern_text ) - sprintf(s, app_fmt, pattern_text, new_pat); - else - sprintf(s, orig_fmt, new_pat); - - delete[] pattern_text; - pattern_text = s; + pattern_text = util::fmt(orig_fmt, new_pat); } void Specific_RE_Matcher::MakeCaseInsensitive() { const char fmt[] = "(?i:%s)"; - int n = strlen(pattern_text) + strlen(fmt); - - char* s = new char[n + 5 /* slop */]; - - snprintf(s, n + 5, fmt, pattern_text); - - delete[] pattern_text; - pattern_text = s; + pattern_text = util::fmt(fmt, pattern_text.c_str()); } bool Specific_RE_Matcher::Compile(bool lazy) { - if ( ! pattern_text ) + if ( pattern_text.empty() ) return false; rem = this; - RE_set_input(pattern_text); + RE_set_input(pattern_text.c_str()); int parse_status = RE_parse(); RE_done_with_scan(); if ( parse_status ) { - reporter->Error("error compiling pattern /%s/", pattern_text); + reporter->Error("error compiling pattern /%s/", pattern_text.c_str()); Unref(nfa); nfa = nullptr; return false; diff --git a/src/RE.h b/src/RE.h index 43ab308dca..c71b8dd746 100644 --- a/src/RE.h +++ b/src/RE.h @@ -32,7 +32,7 @@ class DFA_State; class Specific_RE_Matcher; class CCL; -extern int case_insensitive; +extern bool case_insensitive; extern CCL* curr_ccl; extern NFA_Machine* nfa; extern Specific_RE_Matcher* rem; @@ -59,14 +59,15 @@ enum match_type class Specific_RE_Matcher { public: - explicit Specific_RE_Matcher(match_type mt, int multiline = 0); + explicit Specific_RE_Matcher(match_type mt, bool multiline = false); ~Specific_RE_Matcher(); void AddPat(const char* pat); void MakeCaseInsensitive(); + void SetSingleLineMode(); - void SetPat(const char* pat) { pattern_text = util::copy_string(pat); } + void SetPat(const char* pat) { pattern_text = pat; } bool Compile(bool lazy = false); @@ -117,7 +118,7 @@ public: EquivClass* EC() { return &equiv_class; } - const char* PatternText() const { return pattern_text; } + const char* PatternText() const { return pattern_text.c_str(); } DFA_Machine* DFA() const { return dfa; } @@ -135,11 +136,13 @@ protected: bool MatchAll(const u_char* bv, int n); match_type mt; - int multiline; - char* pattern_text; + bool multiline; + + std::string pattern_text; std::map defs; std::map ccl_dict; + std::vector modifiers; PList ccl_list; EquivClass equiv_class; int* ecs; diff --git a/src/RuleMatcher.cc b/src/RuleMatcher.cc index 58856eb5e4..5d22f5964d 100644 --- a/src/RuleMatcher.cc +++ b/src/RuleMatcher.cc @@ -526,7 +526,7 @@ void RuleMatcher::BuildPatternSets(RuleHdrTest::pattern_set_list* dst, const str if ( group_exprs.length() > sig_max_group_size || i == exprs.length() ) { RuleHdrTest::PatternSet* set = new RuleHdrTest::PatternSet; - set->re = new Specific_RE_Matcher(MATCH_EXACTLY, 1); + set->re = new Specific_RE_Matcher(MATCH_EXACTLY, true); set->re->CompileSet(group_exprs, group_ids); set->patterns = group_exprs; set->ids = group_ids; diff --git a/src/packet_analysis/protocol/teredo/Teredo.cc b/src/packet_analysis/protocol/teredo/Teredo.cc index fc755be951..d1a284d263 100644 --- a/src/packet_analysis/protocol/teredo/Teredo.cc +++ b/src/packet_analysis/protocol/teredo/Teredo.cc @@ -145,7 +145,7 @@ TeredoAnalyzer::TeredoAnalyzer() : zeek::packet_analysis::Analyzer("TEREDO") // } pattern_re = std::make_unique(zeek::detail::MATCH_EXACTLY, - 1); + true); pattern_re->AddPat("^(\\x00\\x00)|(\\x00\\x01)|([\\x60-\\x6f].{7}((\\x20\\x01\\x00\\x00)).{28})" "|([\\x60-\\x6f].{23}((\\x20\\x01\\x00\\x00))).{12}"); pattern_re->Compile(); diff --git a/src/re-parse.y b/src/re-parse.y index 75d1ba5350..5d8d2e9a63 100644 --- a/src/re-parse.y +++ b/src/re-parse.y @@ -132,7 +132,7 @@ singleton : singleton '*' { $$ = $2; } | TOK_CASE_INSENSITIVE re ')' - { $$ = $2; zeek::detail::case_insensitive = 0; } + { $$ = $2; zeek::detail::case_insensitive = false; } | TOK_CHAR { diff --git a/src/re-scan.l b/src/re-scan.l index d7d3c1eb05..494dc5b486 100644 --- a/src/re-scan.l +++ b/src/re-scan.l @@ -115,7 +115,7 @@ CCL_EXPR ("[:"[[:alpha:]]+":]") } } - "(?i:" zeek::detail::case_insensitive = 1; return TOK_CASE_INSENSITIVE; + "(?i:" zeek::detail::case_insensitive = true; return TOK_CASE_INSENSITIVE; [a-zA-Z] { if ( zeek::detail::case_insensitive ) From 18126c2d50616d5e799a5bcc6e16bfb57132e166 Mon Sep 17 00:00:00 2001 From: Tim Wojtulewicz Date: Mon, 25 Jul 2022 16:24:19 -0700 Subject: [PATCH 4/4] Add support for /s modifier to RE matcher and parser --- NEWS | 4 + src/RE.cc | 93 +++++++++++++++++---- src/RE.h | 13 ++- src/parse.y | 3 +- src/re-parse.y | 8 +- src/re-scan.l | 1 + src/scan.l | 2 +- testing/btest/Baseline/language.pattern/out | 4 + testing/btest/language/pattern.zeek | 5 ++ 9 files changed, 109 insertions(+), 24 deletions(-) diff --git a/NEWS b/NEWS index bd056bc781..99fd3b494a 100644 --- a/NEWS +++ b/NEWS @@ -24,6 +24,10 @@ Breaking Changes New Functionality ----------------- +- Added support for the /s regular expression modifier. Using this modifier in + patterns in Zeek scripts will cause the '.' character to also match newline + characters. + Changed Functionality --------------------- diff --git a/src/RE.cc b/src/RE.cc index 13d21e59e5..e6389de943 100644 --- a/src/RE.cc +++ b/src/RE.cc @@ -18,6 +18,7 @@ zeek::detail::CCL* zeek::detail::curr_ccl = nullptr; zeek::detail::Specific_RE_Matcher* zeek::detail::rem = nullptr; zeek::detail::NFA_Machine* zeek::detail::nfa = nullptr; bool zeek::detail::case_insensitive = false; +bool zeek::detail::re_single_line = false; extern int RE_parse(void); extern void RE_set_input(const char* str); @@ -32,6 +33,7 @@ Specific_RE_Matcher::Specific_RE_Matcher(match_type arg_mt, bool arg_multiline) : mt(arg_mt), multiline(arg_multiline), equiv_class(NUM_SYM) { any_ccl = nullptr; + single_line_ccl = nullptr; dfa = nullptr; ecs = nullptr; accepted = new AcceptingSet(); @@ -46,10 +48,22 @@ Specific_RE_Matcher::~Specific_RE_Matcher() delete accepted; } -CCL* Specific_RE_Matcher::AnyCCL() +CCL* Specific_RE_Matcher::AnyCCL(bool single_line_mode) { + if ( single_line_mode ) + { + if ( ! single_line_ccl ) + { + single_line_ccl = new CCL(); + single_line_ccl->Negate(); + EC()->CCL_Use(single_line_ccl); + } + + return single_line_ccl; + } + if ( ! any_ccl ) - { // Create the '.' character class. + { any_ccl = new CCL(); if ( ! multiline ) any_ccl->Add('\n'); @@ -98,6 +112,12 @@ void Specific_RE_Matcher::MakeCaseInsensitive() pattern_text = util::fmt(fmt, pattern_text.c_str()); } +void Specific_RE_Matcher::MakeSingleLine() + { + const char fmt[] = "(?s:%s)"; + pattern_text = util::fmt(fmt, pattern_text.c_str()); + } + bool Specific_RE_Matcher::Compile(bool lazy) { if ( pattern_text.empty() ) @@ -394,13 +414,10 @@ static RE_Matcher* matcher_merge(const RE_Matcher* re1, const RE_Matcher* re2, c const char* text1 = re1->PatternText(); const char* text2 = re2->PatternText(); - int n = strlen(text1) + strlen(text2) + strlen(merge_op) + 32 /* slop */; + size_t n = strlen(text1) + strlen(text2) + strlen(merge_op) + 32 /* slop */; - char* merge_text = new char[n]; - snprintf(merge_text, n, "(%s)%s(%s)", text1, merge_op, text2); - - RE_Matcher* merge = new RE_Matcher(merge_text); - delete[] merge_text; + std::string merge_text = util::fmt("(%s)%s(%s)", text1, merge_op, text2); + RE_Matcher* merge = new RE_Matcher(merge_text.c_str()); merge->Compile(); @@ -461,6 +478,14 @@ void RE_Matcher::MakeCaseInsensitive() is_case_insensitive = true; } +void RE_Matcher::MakeSingleLine() + { + re_anywhere->MakeSingleLine(); + re_exact->MakeSingleLine(); + + is_single_line = true; + } + bool RE_Matcher::Compile(bool lazy) { return re_anywhere->Compile(lazy) && re_exact->Compile(lazy); @@ -523,15 +548,51 @@ TEST_SUITE("re_matcher") CHECK(match.MatchExactly("aBc")); CHECK(match.MatchExactly("nop")); CHECK_FALSE(match.MatchExactly("NoP")); + } - // TODO: this part isn't working at all. There's something about the second call - // to Compile() that's breaking something. - // match.MakeCaseInsensitive(); - // match.Compile(); - // CHECK(strcmp(match.PatternText(), "(?i:((?i:^?([a-m]+)$?))|(^?([n-z]+)$?))") == 0); - // CHECK(match.MatchExactly("aBc")); - // CHECK(match.MatchExactly("nop")); - // CHECK(match.MatchExactly("NoP")); + TEST_CASE("single_line_mode") + { + RE_Matcher match(".*"); + match.MakeSingleLine(); + match.Compile(); + + CHECK(strcmp(match.PatternText(), "(?s:^?(.*)$?)") == 0); + CHECK(match.MatchExactly("abc\ndef")); + + RE_Matcher match2("fOO.*bAR"); + match2.MakeSingleLine(); + match2.Compile(); + + CHECK(strcmp(match2.PatternText(), "(?s:^?(fOO.*bAR)$?)") == 0); + CHECK(match.MatchExactly("fOOab\ncdbAR")); + + RE_Matcher match3("b.r"); + match3.MakeSingleLine(); + match3.Compile(); + CHECK(match3.MatchExactly("bar")); + CHECK(match3.MatchExactly("b\nr")); + + RE_Matcher match4("a.c"); + match4.MakeSingleLine(); + match4.AddPat("def"); + match4.Compile(); + CHECK(match4.MatchExactly("abc")); + CHECK(match4.MatchExactly("a\nc")); + } + + TEST_CASE("disjunction") + { + RE_Matcher match1("a.c"); + match1.MakeSingleLine(); + match1.Compile(); + RE_Matcher match2("def"); + match2.Compile(); + auto dj = detail::RE_Matcher_disjunction(&match1, &match2); + CHECK(dj->MatchExactly("abc")); + CHECK(dj->MatchExactly("a.c")); + CHECK(dj->MatchExactly("a\nc")); + CHECK(dj->MatchExactly("def")); + delete dj; } } diff --git a/src/RE.h b/src/RE.h index c71b8dd746..28c343f655 100644 --- a/src/RE.h +++ b/src/RE.h @@ -33,6 +33,7 @@ class Specific_RE_Matcher; class CCL; extern bool case_insensitive; +extern bool re_single_line; extern CCL* curr_ccl; extern NFA_Machine* nfa; extern Specific_RE_Matcher* rem; @@ -65,7 +66,7 @@ public: void AddPat(const char* pat); void MakeCaseInsensitive(); - void SetSingleLineMode(); + void MakeSingleLine(); void SetPat(const char* pat) { pattern_text = pat; } @@ -91,7 +92,7 @@ public: return nullptr; } CCL* LookupCCL(int index) { return ccl_list[index]; } - CCL* AnyCCL(); + CCL* AnyCCL(bool single_line_mode = false); void ConvertCCLs(); @@ -147,8 +148,10 @@ protected: EquivClass equiv_class; int* ecs; DFA_Machine* dfa; - CCL* any_ccl; AcceptingSet* accepted; + + CCL* any_ccl; + CCL* single_line_ccl; }; class RE_Match_State @@ -208,6 +211,9 @@ public: void MakeCaseInsensitive(); bool IsCaseInsensitive() const { return is_case_insensitive; } + void MakeSingleLine(); + bool IsSingleLine() const { return is_single_line; } + bool Compile(bool lazy = false); // Returns true if s exactly matches the pattern, false otherwise. @@ -243,6 +249,7 @@ protected: detail::Specific_RE_Matcher* re_exact; bool is_case_insensitive = false; + bool is_single_line = false; }; } // namespace zeek diff --git a/src/parse.y b/src/parse.y index 637e40c318..1336d177cf 100644 --- a/src/parse.y +++ b/src/parse.y @@ -922,8 +922,7 @@ expr: re->MakeCaseInsensitive(); if ( $4.single_line ) - { - } + re->MakeSingleLine(); re->Compile(); $$ = new ConstExpr(make_intrusive(re)); diff --git a/src/re-parse.y b/src/re-parse.y index 5d8d2e9a63..7a38820f90 100644 --- a/src/re-parse.y +++ b/src/re-parse.y @@ -20,7 +20,7 @@ namespace zeek::detail { void yyerror(const char msg[]); %} -%token TOK_CHAR TOK_NUMBER TOK_CCL TOK_CCE TOK_CASE_INSENSITIVE +%token TOK_CHAR TOK_NUMBER TOK_CCL TOK_CCE TOK_CASE_INSENSITIVE TOK_SINGLE_LINE %union { int int_val; @@ -112,7 +112,8 @@ singleton : singleton '*' | '.' { - $$ = new zeek::detail::NFA_Machine(new zeek::detail::NFA_State(zeek::detail::rem->AnyCCL())); + $$ = new zeek::detail::NFA_Machine(new zeek::detail::NFA_State( + zeek::detail::rem->AnyCCL(zeek::detail::re_single_line))); } | full_ccl @@ -134,6 +135,9 @@ singleton : singleton '*' | TOK_CASE_INSENSITIVE re ')' { $$ = $2; zeek::detail::case_insensitive = false; } + | TOK_SINGLE_LINE re ')' + { $$ = $2; zeek::detail::re_single_line = false; } + | TOK_CHAR { auto sym = $1; diff --git a/src/re-scan.l b/src/re-scan.l index 494dc5b486..5bffd812a7 100644 --- a/src/re-scan.l +++ b/src/re-scan.l @@ -116,6 +116,7 @@ CCL_EXPR ("[:"[[:alpha:]]+":]") } "(?i:" zeek::detail::case_insensitive = true; return TOK_CASE_INSENSITIVE; + "(?s:" zeek::detail::re_single_line = true; return TOK_SINGLE_LINE; [a-zA-Z] { if ( zeek::detail::case_insensitive ) diff --git a/src/scan.l b/src/scan.l index 508c21cbfe..169af2932b 100644 --- a/src/scan.l +++ b/src/scan.l @@ -570,7 +570,7 @@ F RET_CONST(zeek::val_mgr->False()->Ref()) (\/[is]{0,2}) { BEGIN(INITIAL); - if (strlen(yytext) == 2) + if ( strlen(yytext) == 2 ) { yylval.re_modes.ignore_case = (yytext[1] == 'i'); yylval.re_modes.single_line = (yytext[1] == 's'); diff --git a/testing/btest/Baseline/language.pattern/out b/testing/btest/Baseline/language.pattern/out index 8bbf981c12..5c1441502b 100644 --- a/testing/btest/Baseline/language.pattern/out +++ b/testing/btest/Baseline/language.pattern/out @@ -33,3 +33,7 @@ case-sensitive pattern (PASS) (?i:...) pattern construct (PASS) (?i:...) pattern construct (FAIL) (?i:...) pattern construct (PASS) +/s missing (PASS) +/s pattern modifier (PASS) +/s pattern disjunction (PASS) +/s pattern concatenation (PASS) diff --git a/testing/btest/language/pattern.zeek b/testing/btest/language/pattern.zeek index 05a84e713c..2567afc884 100644 --- a/testing/btest/language/pattern.zeek +++ b/testing/btest/language/pattern.zeek @@ -65,4 +65,9 @@ event zeek_init() test_case( "(?i:...) pattern construct", /foo|(?i:bar)/ in "xFOoy" ); test_case( "(?i:...) pattern construct", /foo|(?i:bar)/ | /foo/i in "xFOoy" ); + test_case( "/s missing", /fOO.*bAR/ != "fOOab\ncdbAR"); + test_case( "/s pattern modifier", /fOO.*bAR/s == "fOOab\ncdbAR"); + test_case( "/s pattern disjunction", /b.r/s | /bez/ == "b\nr" ); + test_case( "/s pattern concatenation", /b.r/s & /bez/ == "b\nrbez" ); + }