mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 14:48:21 +00:00
Code cleanup in RE_Matcher code
- Use std::string in Specific_RE_Matcher instead of char* - Change a couple of ints-as-bools to bools
This commit is contained in:
parent
abf2da781d
commit
f67f6e4507
6 changed files with 23 additions and 43 deletions
43
src/RE.cc
43
src/RE.cc
|
@ -17,7 +17,7 @@
|
||||||
zeek::detail::CCL* zeek::detail::curr_ccl = nullptr;
|
zeek::detail::CCL* zeek::detail::curr_ccl = nullptr;
|
||||||
zeek::detail::Specific_RE_Matcher* zeek::detail::rem = nullptr;
|
zeek::detail::Specific_RE_Matcher* zeek::detail::rem = nullptr;
|
||||||
zeek::detail::NFA_Machine* zeek::detail::nfa = nullptr;
|
zeek::detail::NFA_Machine* zeek::detail::nfa = nullptr;
|
||||||
int zeek::detail::case_insensitive = 0;
|
bool zeek::detail::case_insensitive = false;
|
||||||
|
|
||||||
extern int RE_parse(void);
|
extern int RE_parse(void);
|
||||||
extern void RE_set_input(const char* str);
|
extern void RE_set_input(const char* str);
|
||||||
|
@ -28,13 +28,10 @@ namespace zeek
|
||||||
namespace detail
|
namespace detail
|
||||||
{
|
{
|
||||||
|
|
||||||
Specific_RE_Matcher::Specific_RE_Matcher(match_type arg_mt, int arg_multiline)
|
Specific_RE_Matcher::Specific_RE_Matcher(match_type arg_mt, bool arg_multiline)
|
||||||
: equiv_class(NUM_SYM)
|
: mt(arg_mt), multiline(arg_multiline), equiv_class(NUM_SYM)
|
||||||
{
|
{
|
||||||
mt = arg_mt;
|
|
||||||
multiline = arg_multiline;
|
|
||||||
any_ccl = nullptr;
|
any_ccl = nullptr;
|
||||||
pattern_text = nullptr;
|
|
||||||
dfa = nullptr;
|
dfa = nullptr;
|
||||||
ecs = nullptr;
|
ecs = nullptr;
|
||||||
accepted = new AcceptingSet();
|
accepted = new AcceptingSet();
|
||||||
|
@ -46,7 +43,6 @@ Specific_RE_Matcher::~Specific_RE_Matcher()
|
||||||
delete ccl_list[i];
|
delete ccl_list[i];
|
||||||
|
|
||||||
Unref(dfa);
|
Unref(dfa);
|
||||||
delete[] pattern_text;
|
|
||||||
delete accepted;
|
delete accepted;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -90,51 +86,32 @@ void Specific_RE_Matcher::AddExactPat(const char* new_pat)
|
||||||
|
|
||||||
void Specific_RE_Matcher::AddPat(const char* new_pat, const char* orig_fmt, const char* app_fmt)
|
void Specific_RE_Matcher::AddPat(const char* new_pat, const char* orig_fmt, const char* app_fmt)
|
||||||
{
|
{
|
||||||
int n = strlen(new_pat);
|
if ( ! pattern_text.empty() )
|
||||||
|
pattern_text = util::fmt(app_fmt, pattern_text.c_str(), new_pat);
|
||||||
if ( pattern_text )
|
|
||||||
n += strlen(pattern_text) + strlen(app_fmt);
|
|
||||||
else
|
else
|
||||||
n += strlen(orig_fmt);
|
pattern_text = util::fmt(orig_fmt, new_pat);
|
||||||
|
|
||||||
char* s = new char[n + 5 /* slop */];
|
|
||||||
|
|
||||||
if ( pattern_text )
|
|
||||||
sprintf(s, app_fmt, pattern_text, new_pat);
|
|
||||||
else
|
|
||||||
sprintf(s, orig_fmt, new_pat);
|
|
||||||
|
|
||||||
delete[] pattern_text;
|
|
||||||
pattern_text = s;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Specific_RE_Matcher::MakeCaseInsensitive()
|
void Specific_RE_Matcher::MakeCaseInsensitive()
|
||||||
{
|
{
|
||||||
const char fmt[] = "(?i:%s)";
|
const char fmt[] = "(?i:%s)";
|
||||||
int n = strlen(pattern_text) + strlen(fmt);
|
pattern_text = util::fmt(fmt, pattern_text.c_str());
|
||||||
|
|
||||||
char* s = new char[n + 5 /* slop */];
|
|
||||||
|
|
||||||
snprintf(s, n + 5, fmt, pattern_text);
|
|
||||||
|
|
||||||
delete[] pattern_text;
|
|
||||||
pattern_text = s;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Specific_RE_Matcher::Compile(bool lazy)
|
bool Specific_RE_Matcher::Compile(bool lazy)
|
||||||
{
|
{
|
||||||
if ( ! pattern_text )
|
if ( pattern_text.empty() )
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
rem = this;
|
rem = this;
|
||||||
RE_set_input(pattern_text);
|
RE_set_input(pattern_text.c_str());
|
||||||
|
|
||||||
int parse_status = RE_parse();
|
int parse_status = RE_parse();
|
||||||
RE_done_with_scan();
|
RE_done_with_scan();
|
||||||
|
|
||||||
if ( parse_status )
|
if ( parse_status )
|
||||||
{
|
{
|
||||||
reporter->Error("error compiling pattern /%s/", pattern_text);
|
reporter->Error("error compiling pattern /%s/", pattern_text.c_str());
|
||||||
Unref(nfa);
|
Unref(nfa);
|
||||||
nfa = nullptr;
|
nfa = nullptr;
|
||||||
return false;
|
return false;
|
||||||
|
|
15
src/RE.h
15
src/RE.h
|
@ -32,7 +32,7 @@ class DFA_State;
|
||||||
class Specific_RE_Matcher;
|
class Specific_RE_Matcher;
|
||||||
class CCL;
|
class CCL;
|
||||||
|
|
||||||
extern int case_insensitive;
|
extern bool case_insensitive;
|
||||||
extern CCL* curr_ccl;
|
extern CCL* curr_ccl;
|
||||||
extern NFA_Machine* nfa;
|
extern NFA_Machine* nfa;
|
||||||
extern Specific_RE_Matcher* rem;
|
extern Specific_RE_Matcher* rem;
|
||||||
|
@ -59,14 +59,15 @@ enum match_type
|
||||||
class Specific_RE_Matcher
|
class Specific_RE_Matcher
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
explicit Specific_RE_Matcher(match_type mt, int multiline = 0);
|
explicit Specific_RE_Matcher(match_type mt, bool multiline = false);
|
||||||
~Specific_RE_Matcher();
|
~Specific_RE_Matcher();
|
||||||
|
|
||||||
void AddPat(const char* pat);
|
void AddPat(const char* pat);
|
||||||
|
|
||||||
void MakeCaseInsensitive();
|
void MakeCaseInsensitive();
|
||||||
|
void SetSingleLineMode();
|
||||||
|
|
||||||
void SetPat(const char* pat) { pattern_text = util::copy_string(pat); }
|
void SetPat(const char* pat) { pattern_text = pat; }
|
||||||
|
|
||||||
bool Compile(bool lazy = false);
|
bool Compile(bool lazy = false);
|
||||||
|
|
||||||
|
@ -117,7 +118,7 @@ public:
|
||||||
|
|
||||||
EquivClass* EC() { return &equiv_class; }
|
EquivClass* EC() { return &equiv_class; }
|
||||||
|
|
||||||
const char* PatternText() const { return pattern_text; }
|
const char* PatternText() const { return pattern_text.c_str(); }
|
||||||
|
|
||||||
DFA_Machine* DFA() const { return dfa; }
|
DFA_Machine* DFA() const { return dfa; }
|
||||||
|
|
||||||
|
@ -135,11 +136,13 @@ protected:
|
||||||
bool MatchAll(const u_char* bv, int n);
|
bool MatchAll(const u_char* bv, int n);
|
||||||
|
|
||||||
match_type mt;
|
match_type mt;
|
||||||
int multiline;
|
bool multiline;
|
||||||
char* pattern_text;
|
|
||||||
|
std::string pattern_text;
|
||||||
|
|
||||||
std::map<std::string, std::string> defs;
|
std::map<std::string, std::string> defs;
|
||||||
std::map<std::string, CCL*> ccl_dict;
|
std::map<std::string, CCL*> ccl_dict;
|
||||||
|
std::vector<char> modifiers;
|
||||||
PList<CCL> ccl_list;
|
PList<CCL> ccl_list;
|
||||||
EquivClass equiv_class;
|
EquivClass equiv_class;
|
||||||
int* ecs;
|
int* ecs;
|
||||||
|
|
|
@ -526,7 +526,7 @@ void RuleMatcher::BuildPatternSets(RuleHdrTest::pattern_set_list* dst, const str
|
||||||
if ( group_exprs.length() > sig_max_group_size || i == exprs.length() )
|
if ( group_exprs.length() > sig_max_group_size || i == exprs.length() )
|
||||||
{
|
{
|
||||||
RuleHdrTest::PatternSet* set = new RuleHdrTest::PatternSet;
|
RuleHdrTest::PatternSet* set = new RuleHdrTest::PatternSet;
|
||||||
set->re = new Specific_RE_Matcher(MATCH_EXACTLY, 1);
|
set->re = new Specific_RE_Matcher(MATCH_EXACTLY, true);
|
||||||
set->re->CompileSet(group_exprs, group_ids);
|
set->re->CompileSet(group_exprs, group_ids);
|
||||||
set->patterns = group_exprs;
|
set->patterns = group_exprs;
|
||||||
set->ids = group_ids;
|
set->ids = group_ids;
|
||||||
|
|
|
@ -145,7 +145,7 @@ TeredoAnalyzer::TeredoAnalyzer() : zeek::packet_analysis::Analyzer("TEREDO")
|
||||||
// }
|
// }
|
||||||
|
|
||||||
pattern_re = std::make_unique<zeek::detail::Specific_RE_Matcher>(zeek::detail::MATCH_EXACTLY,
|
pattern_re = std::make_unique<zeek::detail::Specific_RE_Matcher>(zeek::detail::MATCH_EXACTLY,
|
||||||
1);
|
true);
|
||||||
pattern_re->AddPat("^(\\x00\\x00)|(\\x00\\x01)|([\\x60-\\x6f].{7}((\\x20\\x01\\x00\\x00)).{28})"
|
pattern_re->AddPat("^(\\x00\\x00)|(\\x00\\x01)|([\\x60-\\x6f].{7}((\\x20\\x01\\x00\\x00)).{28})"
|
||||||
"|([\\x60-\\x6f].{23}((\\x20\\x01\\x00\\x00))).{12}");
|
"|([\\x60-\\x6f].{23}((\\x20\\x01\\x00\\x00))).{12}");
|
||||||
pattern_re->Compile();
|
pattern_re->Compile();
|
||||||
|
|
|
@ -132,7 +132,7 @@ singleton : singleton '*'
|
||||||
{ $$ = $2; }
|
{ $$ = $2; }
|
||||||
|
|
||||||
| TOK_CASE_INSENSITIVE re ')'
|
| TOK_CASE_INSENSITIVE re ')'
|
||||||
{ $$ = $2; zeek::detail::case_insensitive = 0; }
|
{ $$ = $2; zeek::detail::case_insensitive = false; }
|
||||||
|
|
||||||
| TOK_CHAR
|
| TOK_CHAR
|
||||||
{
|
{
|
||||||
|
|
|
@ -115,7 +115,7 @@ CCL_EXPR ("[:"[[:alpha:]]+":]")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
"(?i:" zeek::detail::case_insensitive = 1; return TOK_CASE_INSENSITIVE;
|
"(?i:" zeek::detail::case_insensitive = true; return TOK_CASE_INSENSITIVE;
|
||||||
|
|
||||||
[a-zA-Z] {
|
[a-zA-Z] {
|
||||||
if ( zeek::detail::case_insensitive )
|
if ( zeek::detail::case_insensitive )
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue