mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 14:48:21 +00:00
Merge remote-tracking branch 'origin/topic/timw/1129-slash-s-patterns'
* origin/topic/timw/1129-slash-s-patterns: Add support for /s modifier to RE matcher and parser Code cleanup in RE_Matcher code Add basic unit tests for RE_Matcher Add /s modifier to parser for patterns
This commit is contained in:
commit
2cba2415fd
13 changed files with 235 additions and 60 deletions
15
CHANGES
15
CHANGES
|
@ -1,3 +1,18 @@
|
||||||
|
5.1.0-dev.309 | 2022-08-02 11:33:22 -0700
|
||||||
|
|
||||||
|
* Add support for /s modifier to RE matcher and parser (Tim Wojtulewicz, Corelight)
|
||||||
|
|
||||||
|
* Code cleanup in RE_Matcher code (Tim Wojtulewicz, Corelight)
|
||||||
|
|
||||||
|
- Use std::string in Specific_RE_Matcher instead of char*
|
||||||
|
- Change a couple of ints-as-bools to bools
|
||||||
|
|
||||||
|
* Add basic unit tests for RE_Matcher (Tim Wojtulewicz, Corelight)
|
||||||
|
|
||||||
|
* Add /s modifier to parser for patterns (Tim Wojtulewicz, Corelight)
|
||||||
|
|
||||||
|
* Update gen-zam submodule [nomail] (Tim Wojtulewicz, Corelight)
|
||||||
|
|
||||||
5.1.0-dev.303 | 2022-08-01 09:56:45 -0700
|
5.1.0-dev.303 | 2022-08-01 09:56:45 -0700
|
||||||
|
|
||||||
* GH-1344: Give better warning when using a type that doesn't exist (Tim Wojtulewicz, Corelight)
|
* GH-1344: Give better warning when using a type that doesn't exist (Tim Wojtulewicz, Corelight)
|
||||||
|
|
4
NEWS
4
NEWS
|
@ -24,6 +24,10 @@ Breaking Changes
|
||||||
New Functionality
|
New Functionality
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
|
- Added support for the /s regular expression modifier. Using this modifier in
|
||||||
|
patterns in Zeek scripts will cause the '.' character to also match newline
|
||||||
|
characters.
|
||||||
|
|
||||||
Changed Functionality
|
Changed Functionality
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
|
|
2
VERSION
2
VERSION
|
@ -1 +1 @@
|
||||||
5.1.0-dev.303
|
5.1.0-dev.309
|
||||||
|
|
188
src/RE.cc
188
src/RE.cc
|
@ -7,6 +7,7 @@
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
|
||||||
|
#include "zeek/3rdparty/doctest.h"
|
||||||
#include "zeek/CCL.h"
|
#include "zeek/CCL.h"
|
||||||
#include "zeek/DFA.h"
|
#include "zeek/DFA.h"
|
||||||
#include "zeek/EquivClass.h"
|
#include "zeek/EquivClass.h"
|
||||||
|
@ -16,7 +17,8 @@
|
||||||
zeek::detail::CCL* zeek::detail::curr_ccl = nullptr;
|
zeek::detail::CCL* zeek::detail::curr_ccl = nullptr;
|
||||||
zeek::detail::Specific_RE_Matcher* zeek::detail::rem = nullptr;
|
zeek::detail::Specific_RE_Matcher* zeek::detail::rem = nullptr;
|
||||||
zeek::detail::NFA_Machine* zeek::detail::nfa = nullptr;
|
zeek::detail::NFA_Machine* zeek::detail::nfa = nullptr;
|
||||||
int zeek::detail::case_insensitive = 0;
|
bool zeek::detail::case_insensitive = false;
|
||||||
|
bool zeek::detail::re_single_line = false;
|
||||||
|
|
||||||
extern int RE_parse(void);
|
extern int RE_parse(void);
|
||||||
extern void RE_set_input(const char* str);
|
extern void RE_set_input(const char* str);
|
||||||
|
@ -27,13 +29,11 @@ namespace zeek
|
||||||
namespace detail
|
namespace detail
|
||||||
{
|
{
|
||||||
|
|
||||||
Specific_RE_Matcher::Specific_RE_Matcher(match_type arg_mt, int arg_multiline)
|
Specific_RE_Matcher::Specific_RE_Matcher(match_type arg_mt, bool arg_multiline)
|
||||||
: equiv_class(NUM_SYM)
|
: mt(arg_mt), multiline(arg_multiline), equiv_class(NUM_SYM)
|
||||||
{
|
{
|
||||||
mt = arg_mt;
|
|
||||||
multiline = arg_multiline;
|
|
||||||
any_ccl = nullptr;
|
any_ccl = nullptr;
|
||||||
pattern_text = nullptr;
|
single_line_ccl = nullptr;
|
||||||
dfa = nullptr;
|
dfa = nullptr;
|
||||||
ecs = nullptr;
|
ecs = nullptr;
|
||||||
accepted = new AcceptingSet();
|
accepted = new AcceptingSet();
|
||||||
|
@ -45,14 +45,25 @@ Specific_RE_Matcher::~Specific_RE_Matcher()
|
||||||
delete ccl_list[i];
|
delete ccl_list[i];
|
||||||
|
|
||||||
Unref(dfa);
|
Unref(dfa);
|
||||||
delete[] pattern_text;
|
|
||||||
delete accepted;
|
delete accepted;
|
||||||
}
|
}
|
||||||
|
|
||||||
CCL* Specific_RE_Matcher::AnyCCL()
|
CCL* Specific_RE_Matcher::AnyCCL(bool single_line_mode)
|
||||||
{
|
{
|
||||||
|
if ( single_line_mode )
|
||||||
|
{
|
||||||
|
if ( ! single_line_ccl )
|
||||||
|
{
|
||||||
|
single_line_ccl = new CCL();
|
||||||
|
single_line_ccl->Negate();
|
||||||
|
EC()->CCL_Use(single_line_ccl);
|
||||||
|
}
|
||||||
|
|
||||||
|
return single_line_ccl;
|
||||||
|
}
|
||||||
|
|
||||||
if ( ! any_ccl )
|
if ( ! any_ccl )
|
||||||
{ // Create the '.' character class.
|
{
|
||||||
any_ccl = new CCL();
|
any_ccl = new CCL();
|
||||||
if ( ! multiline )
|
if ( ! multiline )
|
||||||
any_ccl->Add('\n');
|
any_ccl->Add('\n');
|
||||||
|
@ -89,51 +100,38 @@ void Specific_RE_Matcher::AddExactPat(const char* new_pat)
|
||||||
|
|
||||||
void Specific_RE_Matcher::AddPat(const char* new_pat, const char* orig_fmt, const char* app_fmt)
|
void Specific_RE_Matcher::AddPat(const char* new_pat, const char* orig_fmt, const char* app_fmt)
|
||||||
{
|
{
|
||||||
int n = strlen(new_pat);
|
if ( ! pattern_text.empty() )
|
||||||
|
pattern_text = util::fmt(app_fmt, pattern_text.c_str(), new_pat);
|
||||||
if ( pattern_text )
|
|
||||||
n += strlen(pattern_text) + strlen(app_fmt);
|
|
||||||
else
|
else
|
||||||
n += strlen(orig_fmt);
|
pattern_text = util::fmt(orig_fmt, new_pat);
|
||||||
|
|
||||||
char* s = new char[n + 5 /* slop */];
|
|
||||||
|
|
||||||
if ( pattern_text )
|
|
||||||
sprintf(s, app_fmt, pattern_text, new_pat);
|
|
||||||
else
|
|
||||||
sprintf(s, orig_fmt, new_pat);
|
|
||||||
|
|
||||||
delete[] pattern_text;
|
|
||||||
pattern_text = s;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Specific_RE_Matcher::MakeCaseInsensitive()
|
void Specific_RE_Matcher::MakeCaseInsensitive()
|
||||||
{
|
{
|
||||||
const char fmt[] = "(?i:%s)";
|
const char fmt[] = "(?i:%s)";
|
||||||
int n = strlen(pattern_text) + strlen(fmt);
|
pattern_text = util::fmt(fmt, pattern_text.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
char* s = new char[n + 5 /* slop */];
|
void Specific_RE_Matcher::MakeSingleLine()
|
||||||
|
{
|
||||||
snprintf(s, n + 5, fmt, pattern_text);
|
const char fmt[] = "(?s:%s)";
|
||||||
|
pattern_text = util::fmt(fmt, pattern_text.c_str());
|
||||||
delete[] pattern_text;
|
|
||||||
pattern_text = s;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Specific_RE_Matcher::Compile(bool lazy)
|
bool Specific_RE_Matcher::Compile(bool lazy)
|
||||||
{
|
{
|
||||||
if ( ! pattern_text )
|
if ( pattern_text.empty() )
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
rem = this;
|
rem = this;
|
||||||
RE_set_input(pattern_text);
|
RE_set_input(pattern_text.c_str());
|
||||||
|
|
||||||
int parse_status = RE_parse();
|
int parse_status = RE_parse();
|
||||||
RE_done_with_scan();
|
RE_done_with_scan();
|
||||||
|
|
||||||
if ( parse_status )
|
if ( parse_status )
|
||||||
{
|
{
|
||||||
reporter->Error("error compiling pattern /%s/", pattern_text);
|
reporter->Error("error compiling pattern /%s/", pattern_text.c_str());
|
||||||
Unref(nfa);
|
Unref(nfa);
|
||||||
nfa = nullptr;
|
nfa = nullptr;
|
||||||
return false;
|
return false;
|
||||||
|
@ -416,13 +414,10 @@ static RE_Matcher* matcher_merge(const RE_Matcher* re1, const RE_Matcher* re2, c
|
||||||
const char* text1 = re1->PatternText();
|
const char* text1 = re1->PatternText();
|
||||||
const char* text2 = re2->PatternText();
|
const char* text2 = re2->PatternText();
|
||||||
|
|
||||||
int n = strlen(text1) + strlen(text2) + strlen(merge_op) + 32 /* slop */;
|
size_t n = strlen(text1) + strlen(text2) + strlen(merge_op) + 32 /* slop */;
|
||||||
|
|
||||||
char* merge_text = new char[n];
|
std::string merge_text = util::fmt("(%s)%s(%s)", text1, merge_op, text2);
|
||||||
snprintf(merge_text, n, "(%s)%s(%s)", text1, merge_op, text2);
|
RE_Matcher* merge = new RE_Matcher(merge_text.c_str());
|
||||||
|
|
||||||
RE_Matcher* merge = new RE_Matcher(merge_text);
|
|
||||||
delete[] merge_text;
|
|
||||||
|
|
||||||
merge->Compile();
|
merge->Compile();
|
||||||
|
|
||||||
|
@ -483,9 +478,122 @@ void RE_Matcher::MakeCaseInsensitive()
|
||||||
is_case_insensitive = true;
|
is_case_insensitive = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void RE_Matcher::MakeSingleLine()
|
||||||
|
{
|
||||||
|
re_anywhere->MakeSingleLine();
|
||||||
|
re_exact->MakeSingleLine();
|
||||||
|
|
||||||
|
is_single_line = true;
|
||||||
|
}
|
||||||
|
|
||||||
bool RE_Matcher::Compile(bool lazy)
|
bool RE_Matcher::Compile(bool lazy)
|
||||||
{
|
{
|
||||||
return re_anywhere->Compile(lazy) && re_exact->Compile(lazy);
|
return re_anywhere->Compile(lazy) && re_exact->Compile(lazy);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_SUITE("re_matcher")
|
||||||
|
{
|
||||||
|
|
||||||
|
TEST_CASE("simple_pattern")
|
||||||
|
{
|
||||||
|
RE_Matcher match("[0-9]+");
|
||||||
|
match.Compile();
|
||||||
|
CHECK(strcmp(match.OrigText(), "[0-9]+") == 0);
|
||||||
|
CHECK(strcmp(match.PatternText(), "^?([0-9]+)$?") == 0);
|
||||||
|
CHECK(strcmp(match.AnywherePatternText(), "^?(.|\\n)*([0-9]+)") == 0);
|
||||||
|
|
||||||
|
CHECK(match.MatchExactly("12345"));
|
||||||
|
CHECK_FALSE(match.MatchExactly("a12345"));
|
||||||
|
|
||||||
|
// The documentation for MatchAnywhere says that it returns the
|
||||||
|
// "index just beyond where the first match occurs", which I would
|
||||||
|
// think means *after* the match. This is returning the position
|
||||||
|
// where the match starts though.
|
||||||
|
CHECK(match.MatchAnywhere("a1234bcd") == 2);
|
||||||
|
CHECK(match.MatchAnywhere("abcd") == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_CASE("case_insensitive_mode")
|
||||||
|
{
|
||||||
|
RE_Matcher match("[a-z]+");
|
||||||
|
match.MakeCaseInsensitive();
|
||||||
|
match.Compile();
|
||||||
|
CHECK(strcmp(match.PatternText(), "(?i:^?([a-z]+)$?)") == 0);
|
||||||
|
|
||||||
|
CHECK(match.MatchExactly("abcDEF"));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_CASE("multi_pattern")
|
||||||
|
{
|
||||||
|
RE_Matcher match("[0-9]+");
|
||||||
|
match.AddPat("[a-z]+");
|
||||||
|
match.Compile();
|
||||||
|
|
||||||
|
CHECK(strcmp(match.PatternText(), "(^?([0-9]+)$?)|(^?([a-z]+)$?)") == 0);
|
||||||
|
|
||||||
|
CHECK(match.MatchExactly("abc"));
|
||||||
|
CHECK(match.MatchExactly("123"));
|
||||||
|
CHECK_FALSE(match.MatchExactly("abc123"));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_CASE("modes_multi_pattern")
|
||||||
|
{
|
||||||
|
RE_Matcher match("[a-m]+");
|
||||||
|
match.MakeCaseInsensitive();
|
||||||
|
|
||||||
|
match.AddPat("[n-z]+");
|
||||||
|
match.Compile();
|
||||||
|
|
||||||
|
CHECK(strcmp(match.PatternText(), "((?i:^?([a-m]+)$?))|(^?([n-z]+)$?)") == 0);
|
||||||
|
CHECK(match.MatchExactly("aBc"));
|
||||||
|
CHECK(match.MatchExactly("nop"));
|
||||||
|
CHECK_FALSE(match.MatchExactly("NoP"));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_CASE("single_line_mode")
|
||||||
|
{
|
||||||
|
RE_Matcher match(".*");
|
||||||
|
match.MakeSingleLine();
|
||||||
|
match.Compile();
|
||||||
|
|
||||||
|
CHECK(strcmp(match.PatternText(), "(?s:^?(.*)$?)") == 0);
|
||||||
|
CHECK(match.MatchExactly("abc\ndef"));
|
||||||
|
|
||||||
|
RE_Matcher match2("fOO.*bAR");
|
||||||
|
match2.MakeSingleLine();
|
||||||
|
match2.Compile();
|
||||||
|
|
||||||
|
CHECK(strcmp(match2.PatternText(), "(?s:^?(fOO.*bAR)$?)") == 0);
|
||||||
|
CHECK(match.MatchExactly("fOOab\ncdbAR"));
|
||||||
|
|
||||||
|
RE_Matcher match3("b.r");
|
||||||
|
match3.MakeSingleLine();
|
||||||
|
match3.Compile();
|
||||||
|
CHECK(match3.MatchExactly("bar"));
|
||||||
|
CHECK(match3.MatchExactly("b\nr"));
|
||||||
|
|
||||||
|
RE_Matcher match4("a.c");
|
||||||
|
match4.MakeSingleLine();
|
||||||
|
match4.AddPat("def");
|
||||||
|
match4.Compile();
|
||||||
|
CHECK(match4.MatchExactly("abc"));
|
||||||
|
CHECK(match4.MatchExactly("a\nc"));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_CASE("disjunction")
|
||||||
|
{
|
||||||
|
RE_Matcher match1("a.c");
|
||||||
|
match1.MakeSingleLine();
|
||||||
|
match1.Compile();
|
||||||
|
RE_Matcher match2("def");
|
||||||
|
match2.Compile();
|
||||||
|
auto dj = detail::RE_Matcher_disjunction(&match1, &match2);
|
||||||
|
CHECK(dj->MatchExactly("abc"));
|
||||||
|
CHECK(dj->MatchExactly("a.c"));
|
||||||
|
CHECK(dj->MatchExactly("a\nc"));
|
||||||
|
CHECK(dj->MatchExactly("def"));
|
||||||
|
delete dj;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace zeek
|
} // namespace zeek
|
||||||
|
|
26
src/RE.h
26
src/RE.h
|
@ -32,7 +32,8 @@ class DFA_State;
|
||||||
class Specific_RE_Matcher;
|
class Specific_RE_Matcher;
|
||||||
class CCL;
|
class CCL;
|
||||||
|
|
||||||
extern int case_insensitive;
|
extern bool case_insensitive;
|
||||||
|
extern bool re_single_line;
|
||||||
extern CCL* curr_ccl;
|
extern CCL* curr_ccl;
|
||||||
extern NFA_Machine* nfa;
|
extern NFA_Machine* nfa;
|
||||||
extern Specific_RE_Matcher* rem;
|
extern Specific_RE_Matcher* rem;
|
||||||
|
@ -59,14 +60,15 @@ enum match_type
|
||||||
class Specific_RE_Matcher
|
class Specific_RE_Matcher
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
explicit Specific_RE_Matcher(match_type mt, int multiline = 0);
|
explicit Specific_RE_Matcher(match_type mt, bool multiline = false);
|
||||||
~Specific_RE_Matcher();
|
~Specific_RE_Matcher();
|
||||||
|
|
||||||
void AddPat(const char* pat);
|
void AddPat(const char* pat);
|
||||||
|
|
||||||
void MakeCaseInsensitive();
|
void MakeCaseInsensitive();
|
||||||
|
void MakeSingleLine();
|
||||||
|
|
||||||
void SetPat(const char* pat) { pattern_text = util::copy_string(pat); }
|
void SetPat(const char* pat) { pattern_text = pat; }
|
||||||
|
|
||||||
bool Compile(bool lazy = false);
|
bool Compile(bool lazy = false);
|
||||||
|
|
||||||
|
@ -90,7 +92,7 @@ public:
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
CCL* LookupCCL(int index) { return ccl_list[index]; }
|
CCL* LookupCCL(int index) { return ccl_list[index]; }
|
||||||
CCL* AnyCCL();
|
CCL* AnyCCL(bool single_line_mode = false);
|
||||||
|
|
||||||
void ConvertCCLs();
|
void ConvertCCLs();
|
||||||
|
|
||||||
|
@ -117,7 +119,7 @@ public:
|
||||||
|
|
||||||
EquivClass* EC() { return &equiv_class; }
|
EquivClass* EC() { return &equiv_class; }
|
||||||
|
|
||||||
const char* PatternText() const { return pattern_text; }
|
const char* PatternText() const { return pattern_text.c_str(); }
|
||||||
|
|
||||||
DFA_Machine* DFA() const { return dfa; }
|
DFA_Machine* DFA() const { return dfa; }
|
||||||
|
|
||||||
|
@ -135,17 +137,21 @@ protected:
|
||||||
bool MatchAll(const u_char* bv, int n);
|
bool MatchAll(const u_char* bv, int n);
|
||||||
|
|
||||||
match_type mt;
|
match_type mt;
|
||||||
int multiline;
|
bool multiline;
|
||||||
char* pattern_text;
|
|
||||||
|
std::string pattern_text;
|
||||||
|
|
||||||
std::map<std::string, std::string> defs;
|
std::map<std::string, std::string> defs;
|
||||||
std::map<std::string, CCL*> ccl_dict;
|
std::map<std::string, CCL*> ccl_dict;
|
||||||
|
std::vector<char> modifiers;
|
||||||
PList<CCL> ccl_list;
|
PList<CCL> ccl_list;
|
||||||
EquivClass equiv_class;
|
EquivClass equiv_class;
|
||||||
int* ecs;
|
int* ecs;
|
||||||
DFA_Machine* dfa;
|
DFA_Machine* dfa;
|
||||||
CCL* any_ccl;
|
|
||||||
AcceptingSet* accepted;
|
AcceptingSet* accepted;
|
||||||
|
|
||||||
|
CCL* any_ccl;
|
||||||
|
CCL* single_line_ccl;
|
||||||
};
|
};
|
||||||
|
|
||||||
class RE_Match_State
|
class RE_Match_State
|
||||||
|
@ -205,6 +211,9 @@ public:
|
||||||
void MakeCaseInsensitive();
|
void MakeCaseInsensitive();
|
||||||
bool IsCaseInsensitive() const { return is_case_insensitive; }
|
bool IsCaseInsensitive() const { return is_case_insensitive; }
|
||||||
|
|
||||||
|
void MakeSingleLine();
|
||||||
|
bool IsSingleLine() const { return is_single_line; }
|
||||||
|
|
||||||
bool Compile(bool lazy = false);
|
bool Compile(bool lazy = false);
|
||||||
|
|
||||||
// Returns true if s exactly matches the pattern, false otherwise.
|
// Returns true if s exactly matches the pattern, false otherwise.
|
||||||
|
@ -240,6 +249,7 @@ protected:
|
||||||
detail::Specific_RE_Matcher* re_exact;
|
detail::Specific_RE_Matcher* re_exact;
|
||||||
|
|
||||||
bool is_case_insensitive = false;
|
bool is_case_insensitive = false;
|
||||||
|
bool is_single_line = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace zeek
|
} // namespace zeek
|
||||||
|
|
|
@ -526,7 +526,7 @@ void RuleMatcher::BuildPatternSets(RuleHdrTest::pattern_set_list* dst, const str
|
||||||
if ( group_exprs.length() > sig_max_group_size || i == exprs.length() )
|
if ( group_exprs.length() > sig_max_group_size || i == exprs.length() )
|
||||||
{
|
{
|
||||||
RuleHdrTest::PatternSet* set = new RuleHdrTest::PatternSet;
|
RuleHdrTest::PatternSet* set = new RuleHdrTest::PatternSet;
|
||||||
set->re = new Specific_RE_Matcher(MATCH_EXACTLY, 1);
|
set->re = new Specific_RE_Matcher(MATCH_EXACTLY, true);
|
||||||
set->re->CompileSet(group_exprs, group_ids);
|
set->re->CompileSet(group_exprs, group_ids);
|
||||||
set->patterns = group_exprs;
|
set->patterns = group_exprs;
|
||||||
set->ids = group_ids;
|
set->ids = group_ids;
|
||||||
|
|
|
@ -145,7 +145,7 @@ TeredoAnalyzer::TeredoAnalyzer() : zeek::packet_analysis::Analyzer("TEREDO")
|
||||||
// }
|
// }
|
||||||
|
|
||||||
pattern_re = std::make_unique<zeek::detail::Specific_RE_Matcher>(zeek::detail::MATCH_EXACTLY,
|
pattern_re = std::make_unique<zeek::detail::Specific_RE_Matcher>(zeek::detail::MATCH_EXACTLY,
|
||||||
1);
|
true);
|
||||||
pattern_re->AddPat("^(\\x00\\x00)|(\\x00\\x01)|([\\x60-\\x6f].{7}((\\x20\\x01\\x00\\x00)).{28})"
|
pattern_re->AddPat("^(\\x00\\x00)|(\\x00\\x01)|([\\x60-\\x6f].{7}((\\x20\\x01\\x00\\x00)).{28})"
|
||||||
"|([\\x60-\\x6f].{23}((\\x20\\x01\\x00\\x00))).{12}");
|
"|([\\x60-\\x6f].{23}((\\x20\\x01\\x00\\x00))).{12}");
|
||||||
pattern_re->Compile();
|
pattern_re->Compile();
|
||||||
|
|
13
src/parse.y
13
src/parse.y
|
@ -54,7 +54,7 @@
|
||||||
%left '$' '[' ']' '(' ')' TOK_HAS_FIELD TOK_HAS_ATTR
|
%left '$' '[' ']' '(' ')' TOK_HAS_FIELD TOK_HAS_ATTR
|
||||||
%nonassoc TOK_AS TOK_IS
|
%nonassoc TOK_AS TOK_IS
|
||||||
|
|
||||||
%type <b> opt_no_test opt_no_test_block TOK_PATTERN_END opt_deep when_flavor
|
%type <b> opt_no_test opt_no_test_block opt_deep when_flavor
|
||||||
%type <str> TOK_ID TOK_PATTERN_TEXT
|
%type <str> TOK_ID TOK_PATTERN_TEXT
|
||||||
%type <id> local_id global_id def_global_id event_id global_or_event_id resolve_id begin_lambda case_type
|
%type <id> local_id global_id def_global_id event_id global_or_event_id resolve_id begin_lambda case_type
|
||||||
%type <id_l> local_id_list case_type_list
|
%type <id_l> local_id_list case_type_list
|
||||||
|
@ -77,6 +77,7 @@
|
||||||
%type <capture> capture
|
%type <capture> capture
|
||||||
%type <captures> capture_list opt_captures when_captures
|
%type <captures> capture_list opt_captures when_captures
|
||||||
%type <when_clause> when_head when_start when_clause
|
%type <when_clause> when_head when_start when_clause
|
||||||
|
%type <re_modes> TOK_PATTERN_END
|
||||||
|
|
||||||
%{
|
%{
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
|
@ -324,6 +325,11 @@ static StmtPtr build_local(ID* id, Type* t, InitClass ic, Expr* e,
|
||||||
zeek::FuncType::Capture* capture;
|
zeek::FuncType::Capture* capture;
|
||||||
zeek::FuncType::CaptureList* captures;
|
zeek::FuncType::CaptureList* captures;
|
||||||
zeek::detail::WhenInfo* when_clause;
|
zeek::detail::WhenInfo* when_clause;
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
bool ignore_case;
|
||||||
|
bool single_line;
|
||||||
|
} re_modes;
|
||||||
}
|
}
|
||||||
|
|
||||||
%%
|
%%
|
||||||
|
@ -912,9 +918,12 @@ expr:
|
||||||
auto* re = new RE_Matcher($3);
|
auto* re = new RE_Matcher($3);
|
||||||
delete [] $3;
|
delete [] $3;
|
||||||
|
|
||||||
if ( $4 )
|
if ( $4.ignore_case )
|
||||||
re->MakeCaseInsensitive();
|
re->MakeCaseInsensitive();
|
||||||
|
|
||||||
|
if ( $4.single_line )
|
||||||
|
re->MakeSingleLine();
|
||||||
|
|
||||||
re->Compile();
|
re->Compile();
|
||||||
$$ = new ConstExpr(make_intrusive<PatternVal>(re));
|
$$ = new ConstExpr(make_intrusive<PatternVal>(re));
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,7 +20,7 @@ namespace zeek::detail {
|
||||||
void yyerror(const char msg[]);
|
void yyerror(const char msg[]);
|
||||||
%}
|
%}
|
||||||
|
|
||||||
%token TOK_CHAR TOK_NUMBER TOK_CCL TOK_CCE TOK_CASE_INSENSITIVE
|
%token TOK_CHAR TOK_NUMBER TOK_CCL TOK_CCE TOK_CASE_INSENSITIVE TOK_SINGLE_LINE
|
||||||
|
|
||||||
%union {
|
%union {
|
||||||
int int_val;
|
int int_val;
|
||||||
|
@ -112,7 +112,8 @@ singleton : singleton '*'
|
||||||
|
|
||||||
| '.'
|
| '.'
|
||||||
{
|
{
|
||||||
$$ = new zeek::detail::NFA_Machine(new zeek::detail::NFA_State(zeek::detail::rem->AnyCCL()));
|
$$ = new zeek::detail::NFA_Machine(new zeek::detail::NFA_State(
|
||||||
|
zeek::detail::rem->AnyCCL(zeek::detail::re_single_line)));
|
||||||
}
|
}
|
||||||
|
|
||||||
| full_ccl
|
| full_ccl
|
||||||
|
@ -132,7 +133,10 @@ singleton : singleton '*'
|
||||||
{ $$ = $2; }
|
{ $$ = $2; }
|
||||||
|
|
||||||
| TOK_CASE_INSENSITIVE re ')'
|
| TOK_CASE_INSENSITIVE re ')'
|
||||||
{ $$ = $2; zeek::detail::case_insensitive = 0; }
|
{ $$ = $2; zeek::detail::case_insensitive = false; }
|
||||||
|
|
||||||
|
| TOK_SINGLE_LINE re ')'
|
||||||
|
{ $$ = $2; zeek::detail::re_single_line = false; }
|
||||||
|
|
||||||
| TOK_CHAR
|
| TOK_CHAR
|
||||||
{
|
{
|
||||||
|
|
|
@ -115,7 +115,8 @@ CCL_EXPR ("[:"[[:alpha:]]+":]")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
"(?i:" zeek::detail::case_insensitive = 1; return TOK_CASE_INSENSITIVE;
|
"(?i:" zeek::detail::case_insensitive = true; return TOK_CASE_INSENSITIVE;
|
||||||
|
"(?s:" zeek::detail::re_single_line = true; return TOK_SINGLE_LINE;
|
||||||
|
|
||||||
[a-zA-Z] {
|
[a-zA-Z] {
|
||||||
if ( zeek::detail::case_insensitive )
|
if ( zeek::detail::case_insensitive )
|
||||||
|
|
21
src/scan.l
21
src/scan.l
|
@ -562,13 +562,28 @@ F RET_CONST(zeek::val_mgr->False()->Ref())
|
||||||
|
|
||||||
<RE>"/" {
|
<RE>"/" {
|
||||||
BEGIN(INITIAL);
|
BEGIN(INITIAL);
|
||||||
yylval.b = false;
|
yylval.re_modes.ignore_case = false;
|
||||||
|
yylval.re_modes.single_line = false;
|
||||||
return TOK_PATTERN_END;
|
return TOK_PATTERN_END;
|
||||||
}
|
}
|
||||||
|
|
||||||
<RE>"/i" {
|
<RE>(\/[is]{0,2}) {
|
||||||
BEGIN(INITIAL);
|
BEGIN(INITIAL);
|
||||||
yylval.b = true;
|
|
||||||
|
if ( strlen(yytext) == 2 )
|
||||||
|
{
|
||||||
|
yylval.re_modes.ignore_case = (yytext[1] == 'i');
|
||||||
|
yylval.re_modes.single_line = (yytext[1] == 's');
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if ( yytext[1] == yytext[2] )
|
||||||
|
zeek::reporter->Error("pattern has duplicate mode %c", yytext[1]);
|
||||||
|
|
||||||
|
yylval.re_modes.ignore_case = (yytext[1] == 'i' || yytext[2] == 'i');
|
||||||
|
yylval.re_modes.single_line = (yytext[1] == 's' || yytext[2] == 's');
|
||||||
|
}
|
||||||
|
|
||||||
return TOK_PATTERN_END;
|
return TOK_PATTERN_END;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -33,3 +33,7 @@ case-sensitive pattern (PASS)
|
||||||
(?i:...) pattern construct (PASS)
|
(?i:...) pattern construct (PASS)
|
||||||
(?i:...) pattern construct (FAIL)
|
(?i:...) pattern construct (FAIL)
|
||||||
(?i:...) pattern construct (PASS)
|
(?i:...) pattern construct (PASS)
|
||||||
|
/s missing (PASS)
|
||||||
|
/s pattern modifier (PASS)
|
||||||
|
/s pattern disjunction (PASS)
|
||||||
|
/s pattern concatenation (PASS)
|
||||||
|
|
|
@ -65,4 +65,9 @@ event zeek_init()
|
||||||
test_case( "(?i:...) pattern construct", /foo|(?i:bar)/ in "xFOoy" );
|
test_case( "(?i:...) pattern construct", /foo|(?i:bar)/ in "xFOoy" );
|
||||||
test_case( "(?i:...) pattern construct", /foo|(?i:bar)/ | /foo/i in "xFOoy" );
|
test_case( "(?i:...) pattern construct", /foo|(?i:bar)/ | /foo/i in "xFOoy" );
|
||||||
|
|
||||||
|
test_case( "/s missing", /fOO.*bAR/ != "fOOab\ncdbAR");
|
||||||
|
test_case( "/s pattern modifier", /fOO.*bAR/s == "fOOab\ncdbAR");
|
||||||
|
test_case( "/s pattern disjunction", /b.r/s | /bez/ == "b\nr" );
|
||||||
|
test_case( "/s pattern concatenation", /b.r/s & /bez/ == "b\nrbez" );
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue