From f67f6e450738a18ec0d0d148cb6f3ad3760f9ac1 Mon Sep 17 00:00:00 2001 From: Tim Wojtulewicz Date: Tue, 12 Jul 2022 17:13:38 -0700 Subject: [PATCH] Code cleanup in RE_Matcher code - Use std::string in Specific_RE_Matcher instead of char* - Change a couple of ints-as-bools to bools --- src/RE.cc | 43 +++++-------------- src/RE.h | 15 ++++--- src/RuleMatcher.cc | 2 +- src/packet_analysis/protocol/teredo/Teredo.cc | 2 +- src/re-parse.y | 2 +- src/re-scan.l | 2 +- 6 files changed, 23 insertions(+), 43 deletions(-) diff --git a/src/RE.cc b/src/RE.cc index 2ce2f69a2d..13d21e59e5 100644 --- a/src/RE.cc +++ b/src/RE.cc @@ -17,7 +17,7 @@ zeek::detail::CCL* zeek::detail::curr_ccl = nullptr; zeek::detail::Specific_RE_Matcher* zeek::detail::rem = nullptr; zeek::detail::NFA_Machine* zeek::detail::nfa = nullptr; -int zeek::detail::case_insensitive = 0; +bool zeek::detail::case_insensitive = false; extern int RE_parse(void); extern void RE_set_input(const char* str); @@ -28,13 +28,10 @@ namespace zeek namespace detail { -Specific_RE_Matcher::Specific_RE_Matcher(match_type arg_mt, int arg_multiline) - : equiv_class(NUM_SYM) +Specific_RE_Matcher::Specific_RE_Matcher(match_type arg_mt, bool arg_multiline) + : mt(arg_mt), multiline(arg_multiline), equiv_class(NUM_SYM) { - mt = arg_mt; - multiline = arg_multiline; any_ccl = nullptr; - pattern_text = nullptr; dfa = nullptr; ecs = nullptr; accepted = new AcceptingSet(); @@ -46,7 +43,6 @@ Specific_RE_Matcher::~Specific_RE_Matcher() delete ccl_list[i]; Unref(dfa); - delete[] pattern_text; delete accepted; } @@ -90,51 +86,32 @@ void Specific_RE_Matcher::AddExactPat(const char* new_pat) void Specific_RE_Matcher::AddPat(const char* new_pat, const char* orig_fmt, const char* app_fmt) { - int n = strlen(new_pat); - - if ( pattern_text ) - n += strlen(pattern_text) + strlen(app_fmt); + if ( ! pattern_text.empty() ) + pattern_text = util::fmt(app_fmt, pattern_text.c_str(), new_pat); else - n += strlen(orig_fmt); - - char* s = new char[n + 5 /* slop */]; - - if ( pattern_text ) - sprintf(s, app_fmt, pattern_text, new_pat); - else - sprintf(s, orig_fmt, new_pat); - - delete[] pattern_text; - pattern_text = s; + pattern_text = util::fmt(orig_fmt, new_pat); } void Specific_RE_Matcher::MakeCaseInsensitive() { const char fmt[] = "(?i:%s)"; - int n = strlen(pattern_text) + strlen(fmt); - - char* s = new char[n + 5 /* slop */]; - - snprintf(s, n + 5, fmt, pattern_text); - - delete[] pattern_text; - pattern_text = s; + pattern_text = util::fmt(fmt, pattern_text.c_str()); } bool Specific_RE_Matcher::Compile(bool lazy) { - if ( ! pattern_text ) + if ( pattern_text.empty() ) return false; rem = this; - RE_set_input(pattern_text); + RE_set_input(pattern_text.c_str()); int parse_status = RE_parse(); RE_done_with_scan(); if ( parse_status ) { - reporter->Error("error compiling pattern /%s/", pattern_text); + reporter->Error("error compiling pattern /%s/", pattern_text.c_str()); Unref(nfa); nfa = nullptr; return false; diff --git a/src/RE.h b/src/RE.h index 43ab308dca..c71b8dd746 100644 --- a/src/RE.h +++ b/src/RE.h @@ -32,7 +32,7 @@ class DFA_State; class Specific_RE_Matcher; class CCL; -extern int case_insensitive; +extern bool case_insensitive; extern CCL* curr_ccl; extern NFA_Machine* nfa; extern Specific_RE_Matcher* rem; @@ -59,14 +59,15 @@ enum match_type class Specific_RE_Matcher { public: - explicit Specific_RE_Matcher(match_type mt, int multiline = 0); + explicit Specific_RE_Matcher(match_type mt, bool multiline = false); ~Specific_RE_Matcher(); void AddPat(const char* pat); void MakeCaseInsensitive(); + void SetSingleLineMode(); - void SetPat(const char* pat) { pattern_text = util::copy_string(pat); } + void SetPat(const char* pat) { pattern_text = pat; } bool Compile(bool lazy = false); @@ -117,7 +118,7 @@ public: EquivClass* EC() { return &equiv_class; } - const char* PatternText() const { return pattern_text; } + const char* PatternText() const { return pattern_text.c_str(); } DFA_Machine* DFA() const { return dfa; } @@ -135,11 +136,13 @@ protected: bool MatchAll(const u_char* bv, int n); match_type mt; - int multiline; - char* pattern_text; + bool multiline; + + std::string pattern_text; std::map defs; std::map ccl_dict; + std::vector modifiers; PList ccl_list; EquivClass equiv_class; int* ecs; diff --git a/src/RuleMatcher.cc b/src/RuleMatcher.cc index 58856eb5e4..5d22f5964d 100644 --- a/src/RuleMatcher.cc +++ b/src/RuleMatcher.cc @@ -526,7 +526,7 @@ void RuleMatcher::BuildPatternSets(RuleHdrTest::pattern_set_list* dst, const str if ( group_exprs.length() > sig_max_group_size || i == exprs.length() ) { RuleHdrTest::PatternSet* set = new RuleHdrTest::PatternSet; - set->re = new Specific_RE_Matcher(MATCH_EXACTLY, 1); + set->re = new Specific_RE_Matcher(MATCH_EXACTLY, true); set->re->CompileSet(group_exprs, group_ids); set->patterns = group_exprs; set->ids = group_ids; diff --git a/src/packet_analysis/protocol/teredo/Teredo.cc b/src/packet_analysis/protocol/teredo/Teredo.cc index fc755be951..d1a284d263 100644 --- a/src/packet_analysis/protocol/teredo/Teredo.cc +++ b/src/packet_analysis/protocol/teredo/Teredo.cc @@ -145,7 +145,7 @@ TeredoAnalyzer::TeredoAnalyzer() : zeek::packet_analysis::Analyzer("TEREDO") // } pattern_re = std::make_unique(zeek::detail::MATCH_EXACTLY, - 1); + true); pattern_re->AddPat("^(\\x00\\x00)|(\\x00\\x01)|([\\x60-\\x6f].{7}((\\x20\\x01\\x00\\x00)).{28})" "|([\\x60-\\x6f].{23}((\\x20\\x01\\x00\\x00))).{12}"); pattern_re->Compile(); diff --git a/src/re-parse.y b/src/re-parse.y index 75d1ba5350..5d8d2e9a63 100644 --- a/src/re-parse.y +++ b/src/re-parse.y @@ -132,7 +132,7 @@ singleton : singleton '*' { $$ = $2; } | TOK_CASE_INSENSITIVE re ')' - { $$ = $2; zeek::detail::case_insensitive = 0; } + { $$ = $2; zeek::detail::case_insensitive = false; } | TOK_CHAR { diff --git a/src/re-scan.l b/src/re-scan.l index d7d3c1eb05..494dc5b486 100644 --- a/src/re-scan.l +++ b/src/re-scan.l @@ -115,7 +115,7 @@ CCL_EXPR ("[:"[[:alpha:]]+":]") } } - "(?i:" zeek::detail::case_insensitive = 1; return TOK_CASE_INSENSITIVE; + "(?i:" zeek::detail::case_insensitive = true; return TOK_CASE_INSENSITIVE; [a-zA-Z] { if ( zeek::detail::case_insensitive )