Code cleanup in RE_Matcher code

- Use std::string in Specific_RE_Matcher instead of char*
- Change a couple of ints-as-bools to bools
This commit is contained in:
Tim Wojtulewicz 2022-07-12 17:13:38 -07:00
parent abf2da781d
commit f67f6e4507
6 changed files with 23 additions and 43 deletions

View file

@ -17,7 +17,7 @@
zeek::detail::CCL* zeek::detail::curr_ccl = nullptr; zeek::detail::CCL* zeek::detail::curr_ccl = nullptr;
zeek::detail::Specific_RE_Matcher* zeek::detail::rem = nullptr; zeek::detail::Specific_RE_Matcher* zeek::detail::rem = nullptr;
zeek::detail::NFA_Machine* zeek::detail::nfa = nullptr; zeek::detail::NFA_Machine* zeek::detail::nfa = nullptr;
int zeek::detail::case_insensitive = 0; bool zeek::detail::case_insensitive = false;
extern int RE_parse(void); extern int RE_parse(void);
extern void RE_set_input(const char* str); extern void RE_set_input(const char* str);
@ -28,13 +28,10 @@ namespace zeek
namespace detail namespace detail
{ {
Specific_RE_Matcher::Specific_RE_Matcher(match_type arg_mt, int arg_multiline) Specific_RE_Matcher::Specific_RE_Matcher(match_type arg_mt, bool arg_multiline)
: equiv_class(NUM_SYM) : mt(arg_mt), multiline(arg_multiline), equiv_class(NUM_SYM)
{ {
mt = arg_mt;
multiline = arg_multiline;
any_ccl = nullptr; any_ccl = nullptr;
pattern_text = nullptr;
dfa = nullptr; dfa = nullptr;
ecs = nullptr; ecs = nullptr;
accepted = new AcceptingSet(); accepted = new AcceptingSet();
@ -46,7 +43,6 @@ Specific_RE_Matcher::~Specific_RE_Matcher()
delete ccl_list[i]; delete ccl_list[i];
Unref(dfa); Unref(dfa);
delete[] pattern_text;
delete accepted; delete accepted;
} }
@ -90,51 +86,32 @@ void Specific_RE_Matcher::AddExactPat(const char* new_pat)
void Specific_RE_Matcher::AddPat(const char* new_pat, const char* orig_fmt, const char* app_fmt) void Specific_RE_Matcher::AddPat(const char* new_pat, const char* orig_fmt, const char* app_fmt)
{ {
int n = strlen(new_pat); if ( ! pattern_text.empty() )
pattern_text = util::fmt(app_fmt, pattern_text.c_str(), new_pat);
if ( pattern_text )
n += strlen(pattern_text) + strlen(app_fmt);
else else
n += strlen(orig_fmt); pattern_text = util::fmt(orig_fmt, new_pat);
char* s = new char[n + 5 /* slop */];
if ( pattern_text )
sprintf(s, app_fmt, pattern_text, new_pat);
else
sprintf(s, orig_fmt, new_pat);
delete[] pattern_text;
pattern_text = s;
} }
void Specific_RE_Matcher::MakeCaseInsensitive() void Specific_RE_Matcher::MakeCaseInsensitive()
{ {
const char fmt[] = "(?i:%s)"; const char fmt[] = "(?i:%s)";
int n = strlen(pattern_text) + strlen(fmt); pattern_text = util::fmt(fmt, pattern_text.c_str());
char* s = new char[n + 5 /* slop */];
snprintf(s, n + 5, fmt, pattern_text);
delete[] pattern_text;
pattern_text = s;
} }
bool Specific_RE_Matcher::Compile(bool lazy) bool Specific_RE_Matcher::Compile(bool lazy)
{ {
if ( ! pattern_text ) if ( pattern_text.empty() )
return false; return false;
rem = this; rem = this;
RE_set_input(pattern_text); RE_set_input(pattern_text.c_str());
int parse_status = RE_parse(); int parse_status = RE_parse();
RE_done_with_scan(); RE_done_with_scan();
if ( parse_status ) if ( parse_status )
{ {
reporter->Error("error compiling pattern /%s/", pattern_text); reporter->Error("error compiling pattern /%s/", pattern_text.c_str());
Unref(nfa); Unref(nfa);
nfa = nullptr; nfa = nullptr;
return false; return false;

View file

@ -32,7 +32,7 @@ class DFA_State;
class Specific_RE_Matcher; class Specific_RE_Matcher;
class CCL; class CCL;
extern int case_insensitive; extern bool case_insensitive;
extern CCL* curr_ccl; extern CCL* curr_ccl;
extern NFA_Machine* nfa; extern NFA_Machine* nfa;
extern Specific_RE_Matcher* rem; extern Specific_RE_Matcher* rem;
@ -59,14 +59,15 @@ enum match_type
class Specific_RE_Matcher class Specific_RE_Matcher
{ {
public: public:
explicit Specific_RE_Matcher(match_type mt, int multiline = 0); explicit Specific_RE_Matcher(match_type mt, bool multiline = false);
~Specific_RE_Matcher(); ~Specific_RE_Matcher();
void AddPat(const char* pat); void AddPat(const char* pat);
void MakeCaseInsensitive(); void MakeCaseInsensitive();
void SetSingleLineMode();
void SetPat(const char* pat) { pattern_text = util::copy_string(pat); } void SetPat(const char* pat) { pattern_text = pat; }
bool Compile(bool lazy = false); bool Compile(bool lazy = false);
@ -117,7 +118,7 @@ public:
EquivClass* EC() { return &equiv_class; } EquivClass* EC() { return &equiv_class; }
const char* PatternText() const { return pattern_text; } const char* PatternText() const { return pattern_text.c_str(); }
DFA_Machine* DFA() const { return dfa; } DFA_Machine* DFA() const { return dfa; }
@ -135,11 +136,13 @@ protected:
bool MatchAll(const u_char* bv, int n); bool MatchAll(const u_char* bv, int n);
match_type mt; match_type mt;
int multiline; bool multiline;
char* pattern_text;
std::string pattern_text;
std::map<std::string, std::string> defs; std::map<std::string, std::string> defs;
std::map<std::string, CCL*> ccl_dict; std::map<std::string, CCL*> ccl_dict;
std::vector<char> modifiers;
PList<CCL> ccl_list; PList<CCL> ccl_list;
EquivClass equiv_class; EquivClass equiv_class;
int* ecs; int* ecs;

View file

@ -526,7 +526,7 @@ void RuleMatcher::BuildPatternSets(RuleHdrTest::pattern_set_list* dst, const str
if ( group_exprs.length() > sig_max_group_size || i == exprs.length() ) if ( group_exprs.length() > sig_max_group_size || i == exprs.length() )
{ {
RuleHdrTest::PatternSet* set = new RuleHdrTest::PatternSet; RuleHdrTest::PatternSet* set = new RuleHdrTest::PatternSet;
set->re = new Specific_RE_Matcher(MATCH_EXACTLY, 1); set->re = new Specific_RE_Matcher(MATCH_EXACTLY, true);
set->re->CompileSet(group_exprs, group_ids); set->re->CompileSet(group_exprs, group_ids);
set->patterns = group_exprs; set->patterns = group_exprs;
set->ids = group_ids; set->ids = group_ids;

View file

@ -145,7 +145,7 @@ TeredoAnalyzer::TeredoAnalyzer() : zeek::packet_analysis::Analyzer("TEREDO")
// } // }
pattern_re = std::make_unique<zeek::detail::Specific_RE_Matcher>(zeek::detail::MATCH_EXACTLY, pattern_re = std::make_unique<zeek::detail::Specific_RE_Matcher>(zeek::detail::MATCH_EXACTLY,
1); true);
pattern_re->AddPat("^(\\x00\\x00)|(\\x00\\x01)|([\\x60-\\x6f].{7}((\\x20\\x01\\x00\\x00)).{28})" pattern_re->AddPat("^(\\x00\\x00)|(\\x00\\x01)|([\\x60-\\x6f].{7}((\\x20\\x01\\x00\\x00)).{28})"
"|([\\x60-\\x6f].{23}((\\x20\\x01\\x00\\x00))).{12}"); "|([\\x60-\\x6f].{23}((\\x20\\x01\\x00\\x00))).{12}");
pattern_re->Compile(); pattern_re->Compile();

View file

@ -132,7 +132,7 @@ singleton : singleton '*'
{ $$ = $2; } { $$ = $2; }
| TOK_CASE_INSENSITIVE re ')' | TOK_CASE_INSENSITIVE re ')'
{ $$ = $2; zeek::detail::case_insensitive = 0; } { $$ = $2; zeek::detail::case_insensitive = false; }
| TOK_CHAR | TOK_CHAR
{ {

View file

@ -115,7 +115,7 @@ CCL_EXPR ("[:"[[:alpha:]]+":]")
} }
} }
"(?i:" zeek::detail::case_insensitive = 1; return TOK_CASE_INSENSITIVE; "(?i:" zeek::detail::case_insensitive = true; return TOK_CASE_INSENSITIVE;
[a-zA-Z] { [a-zA-Z] {
if ( zeek::detail::case_insensitive ) if ( zeek::detail::case_insensitive )