Move regex matching code to zeek namespaces

This commit is contained in:
Tim Wojtulewicz 2020-07-17 15:21:00 -07:00
parent 382812298d
commit c7dc7fc955
26 changed files with 266 additions and 172 deletions

@ -1 +1 @@
Subproject commit fbd29e6c8aed54a81402ac60f4b4d859ead35b94 Subproject commit 17fa8f31da6f950d3475113fe5c24000dc3cb6bd

View file

@ -8,6 +8,8 @@
#include "RE.h" #include "RE.h"
#include "DFA.h" #include "DFA.h"
namespace zeek::detail {
CCL::CCL() CCL::CCL()
{ {
syms = new int_list; syms = new int_list;
@ -48,3 +50,5 @@ unsigned int CCL::MemoryAllocation() const
{ {
return padded_sizeof(*this) + padded_sizeof(*syms) + pad_size(syms->size() * sizeof(int_list::value_type)); return padded_sizeof(*this) + padded_sizeof(*syms) + pad_size(syms->size() * sizeof(int_list::value_type));
} }
} // namespace zeek::detail

View file

@ -2,11 +2,12 @@
#pragma once #pragma once
#include <vector>
#include "util.h" // for ptr_compat_int #include "util.h" // for ptr_compat_int
#include <vector> namespace zeek::detail {
typedef std::vector<ptr_compat_int> int_list; using int_list = std::vector<ptr_compat_int>;
class CCL { class CCL {
public: public:
@ -32,3 +33,8 @@ protected:
int negated; int negated;
int index; int index;
}; };
} // namespace zeek::detail
using int_list [[deprecated("Remove in v4.1. Use zeek::detail::int_list.")]] = zeek::detail::int_list;
using CCL [[deprecated("Remove in v4.1. Use zeek::detail::CCL.")]] = zeek::detail::CCL;

View file

@ -873,12 +873,12 @@ const char* CompositeHash::RecoverOneVal(
case zeek::TYPE_PATTERN: case zeek::TYPE_PATTERN:
{ {
RE_Matcher* re = nullptr; zeek::RE_Matcher* re = nullptr;
if ( is_singleton ) if ( is_singleton )
{ {
kp1 = kp0; kp1 = kp0;
int divider = strlen(kp0) + 1; int divider = strlen(kp0) + 1;
re = new RE_Matcher(kp1, kp1 + divider); re = new zeek::RE_Matcher(kp1, kp1 + divider);
kp1 += k.Size(); kp1 += k.Size();
} }
else else
@ -886,7 +886,7 @@ const char* CompositeHash::RecoverOneVal(
const uint64_t* const len = AlignType<uint64_t>(kp0); const uint64_t* const len = AlignType<uint64_t>(kp0);
kp1 = reinterpret_cast<const char*>(len+2); kp1 = reinterpret_cast<const char*>(len+2);
re = new RE_Matcher(kp1, kp1 + len[0]); re = new zeek::RE_Matcher(kp1, kp1 + len[0]);
kp1 += len[0] + len[1]; kp1 += len[0] + len[1];
} }

View file

@ -25,9 +25,9 @@ class Connection;
class ConnectionTimer; class ConnectionTimer;
class NetSessions; class NetSessions;
class LoginConn; class LoginConn;
class Specific_RE_Matcher;
class EncapsulationStack; class EncapsulationStack;
ZEEK_FORWARD_DECLARE_NAMESPACED(Specific_RE_Matcher, zeek::detail);
ZEEK_FORWARD_DECLARE_NAMESPACED(RuleEndpointState, zeek::detail); ZEEK_FORWARD_DECLARE_NAMESPACED(RuleEndpointState, zeek::detail);
ZEEK_FORWARD_DECLARE_NAMESPACED(RuleHdrTest, zeek::detail); ZEEK_FORWARD_DECLARE_NAMESPACED(RuleHdrTest, zeek::detail);
ZEEK_FORWARD_DECLARE_NAMESPACED(Val, zeek); ZEEK_FORWARD_DECLARE_NAMESPACED(Val, zeek);

View file

@ -7,6 +7,8 @@
#include "Desc.h" #include "Desc.h"
#include "Hash.h" #include "Hash.h"
namespace zeek::detail {
unsigned int DFA_State::transition_counter = 0; unsigned int DFA_State::transition_counter = 0;
DFA_State::DFA_State(int arg_state_num, const EquivClass* ec, DFA_State::DFA_State(int arg_state_num, const EquivClass* ec,
@ -469,3 +471,5 @@ int DFA_Machine::Rep(int sym)
return -1; return -1;
} }
} // namespace zeek::detail

View file

@ -3,27 +3,26 @@
#pragma once #pragma once
#include "RE.h" // for typedef AcceptingSet
#include "Obj.h"
#include <map> #include <map>
#include <string> #include <string>
#include <assert.h> #include <assert.h>
#include <sys/types.h> // for u_char #include <sys/types.h> // for u_char
#include "NFA.h"
#include "RE.h" // for typedef AcceptingSet
#include "Obj.h"
namespace zeek::detail {
class DFA_State; class DFA_State;
class DFA_Machine;
// Transitions to the uncomputed state indicate that we haven't yet // Transitions to the uncomputed state indicate that we haven't yet
// computed the state to go to. // computed the state to go to.
#define DFA_UNCOMPUTED_STATE -2 #define DFA_UNCOMPUTED_STATE -2
#define DFA_UNCOMPUTED_STATE_PTR ((DFA_State*) DFA_UNCOMPUTED_STATE) #define DFA_UNCOMPUTED_STATE_PTR ((DFA_State*) DFA_UNCOMPUTED_STATE)
#include "NFA.h"
class DFA_Machine;
class DFA_State;
class DFA_State : public zeek::Obj { class DFA_State : public zeek::Obj {
public: public:
DFA_State(int state_num, const EquivClass* ec, DFA_State(int state_num, const EquivClass* ec,
@ -152,3 +151,10 @@ inline DFA_State* DFA_State::Xtion(int sym, DFA_Machine* machine)
else else
return xtions[sym]; return xtions[sym];
} }
} // namespace zeek::detail
using DFA_State [[deprecated("Remove in v4.1. Use zeek::detail::DFA_State.")]] = zeek::detail::DFA_State;
using DigestStr [[deprecated("Remove in v4.1. Use zeek::detail::DigestStr.")]] = zeek::detail::DigestStr;
using DFA_State_Cache [[deprecated("Remove in v4.1. Use zeek::detail::DFA_State_Cache.")]] = zeek::detail::DFA_State_Cache;
using DFA_Machine [[deprecated("Remove in v4.1. Use zeek::detail::DFA_Machine.")]] = zeek::detail::DFA_Machine;

View file

@ -5,6 +5,8 @@
#include "EquivClass.h" #include "EquivClass.h"
#include "CCL.h" #include "CCL.h"
namespace zeek::detail {
EquivClass::EquivClass(int arg_size) EquivClass::EquivClass(int arg_size)
{ {
size = arg_size; size = arg_size;
@ -188,3 +190,5 @@ int EquivClass::Size() const
{ {
return padded_sizeof(*this) + pad_size(sizeof(int) * size * (ccl_flags ? 5 : 4)); return padded_sizeof(*this) + pad_size(sizeof(int) * size * (ccl_flags ? 5 : 4));
} }
} // namespace zeek::detail

View file

@ -4,7 +4,9 @@
#include <stdio.h> #include <stdio.h>
class CCL; ZEEK_FORWARD_DECLARE_NAMESPACED(CCL, zeek::detail);
namespace zeek::detail {
class EquivClass { class EquivClass {
public: public:
@ -41,3 +43,5 @@ protected:
int* ccl_flags; int* ccl_flags;
int ec_nil, no_class, no_rep; int ec_nil, no_class, no_rep;
}; };
} // namespace zeek::detail

View file

@ -41,7 +41,7 @@ EventHandler* EventRegistry::Lookup(std::string_view name)
return nullptr; return nullptr;
} }
EventRegistry::string_list EventRegistry::Match(RE_Matcher* pattern) EventRegistry::string_list EventRegistry::Match(zeek::RE_Matcher* pattern)
{ {
string_list names; string_list names;
@ -120,4 +120,3 @@ void EventRegistry::SetErrorHandler(std::string_view name)
reporter->InternalWarning("unknown event handler '%s' in SetErrorHandler()", reporter->InternalWarning("unknown event handler '%s' in SetErrorHandler()",
std::string(name).c_str()); std::string(name).c_str());
} }

View file

@ -2,6 +2,8 @@
#pragma once #pragma once
#include "zeek-config.h"
#include <map> #include <map>
#include <memory> #include <memory>
#include <string> #include <string>
@ -10,7 +12,7 @@
class EventHandler; class EventHandler;
class EventHandlerPtr; class EventHandlerPtr;
class RE_Matcher; ZEEK_FORWARD_DECLARE_NAMESPACED(RE_Matcher, zeek);
// The registry keeps track of all events that we provide or handle. // The registry keeps track of all events that we provide or handle.
class EventRegistry { class EventRegistry {
@ -34,7 +36,7 @@ public:
// Returns a list of all local handlers that match the given pattern. // Returns a list of all local handlers that match the given pattern.
// Passes ownership of list. // Passes ownership of list.
using string_list = std::vector<std::string>; using string_list = std::vector<std::string>;
string_list Match(RE_Matcher* pattern); string_list Match(zeek::RE_Matcher* pattern);
// Marks a handler as handling errors. Error handler will not be called // Marks a handler as handling errors. Error handler will not be called
// recursively to avoid infinite loops in case they trigger an error // recursively to avoid infinite loops in case they trigger an error

View file

@ -9,6 +9,8 @@
#include <algorithm> #include <algorithm>
namespace zeek::detail {
static int nfa_state_id = 0; static int nfa_state_id = 0;
NFA_State::NFA_State(int arg_sym, EquivClass* ec) NFA_State::NFA_State(int arg_sym, EquivClass* ec)
@ -369,3 +371,5 @@ bool NFA_state_cmp_neg(const NFA_State* v1, const NFA_State* v2)
else else
return false; return false;
} }
} // namespace zeek::detail

View file

@ -5,14 +5,10 @@
#include "Obj.h" #include "Obj.h"
#include "List.h" #include "List.h"
class CCL; ZEEK_FORWARD_DECLARE_NAMESPACED(CCL, zeek::detail);
class NFA_State; ZEEK_FORWARD_DECLARE_NAMESPACED(EquivClass, zeek::detail);
class EquivClass;
ZEEK_FORWARD_DECLARE_NAMESPACED(Func, zeek); ZEEK_FORWARD_DECLARE_NAMESPACED(Func, zeek);
using NFA_state_list = zeek::PList<NFA_State>;
#define NO_ACCEPT 0 #define NO_ACCEPT 0
#define NO_UPPER_BOUND -1 #define NO_UPPER_BOUND -1
@ -24,6 +20,10 @@ using NFA_state_list = zeek::PList<NFA_State>;
#define SYM_EPSILON 259 #define SYM_EPSILON 259
#define SYM_CCL 260 #define SYM_CCL 260
namespace zeek::detail {
class NFA_State;
using NFA_state_list = zeek::PList<NFA_State>;
class NFA_State : public zeek::Obj { class NFA_State : public zeek::Obj {
public: public:
@ -135,3 +135,15 @@ extern NFA_state_list* epsilon_closure(NFA_state_list* states);
// For sorting NFA states based on their ID fields (decreasing) // For sorting NFA states based on their ID fields (decreasing)
extern bool NFA_state_cmp_neg(const NFA_State* v1, const NFA_State* v2); extern bool NFA_state_cmp_neg(const NFA_State* v1, const NFA_State* v2);
} // namespace zeek::detail
using NFA_state_list [[deprecated("Remove in v4.1. Use zeek::detail::NFA_state_list.")]] = zeek::detail::NFA_state_list;
using NFA_State [[deprecated("Remove in v4.1. Use zeek::detail::NFA_State.")]] = zeek::detail::NFA_State;
using EpsilonState [[deprecated("Remove in v4.1. Use zeek::detail::EpsilonState.")]] = zeek::detail::EpsilonState;
using NFA_Machine [[deprecated("Remove in v4.1. Use zeek::detail::NFA_Machine.")]] = zeek::detail::NFA_Machine;
// TODO: could at least the first two of these methods be made static methods in NFA_Machine and NFA_State to remove them from the namespace?
constexpr auto make_alternate [[deprecated("Remove in v4.1. Use zeek::detail::make_alternate.")]] = zeek::detail::make_alternate;
constexpr auto epsilon_closure [[deprecated("Remove in v4.1. Use zeek::detail::epsilon_closure.")]] = zeek::detail::epsilon_closure;
constexpr auto NFA_state_cmp_neg [[deprecated("Remove in v4.1. Use zeek::detail::NFA_state_cmp_neg.")]] = zeek::detail::NFA_state_cmp_neg;

110
src/RE.cc
View file

@ -12,16 +12,22 @@
#include "Reporter.h" #include "Reporter.h"
#include "ZeekString.h" #include "ZeekString.h"
CCL* curr_ccl = nullptr; zeek::detail::CCL* zeek::detail::curr_ccl = nullptr;
zeek::detail::CCL*& curr_ccl = zeek::detail::curr_ccl;
Specific_RE_Matcher* rem; zeek::detail::Specific_RE_Matcher* zeek::detail::rem = nullptr;
NFA_Machine* nfa = nullptr; zeek::detail::Specific_RE_Matcher*& rem = zeek::detail::rem;
int case_insensitive = 0; zeek::detail::NFA_Machine* zeek::detail::nfa = nullptr;
zeek::detail::NFA_Machine*& nfa = zeek::detail::nfa;
int zeek::detail::case_insensitive = 0;
int& case_insensitive = zeek::detail::case_insensitive;
extern int RE_parse(void); extern int RE_parse(void);
extern void RE_set_input(const char* str); extern void RE_set_input(const char* str);
extern void RE_done_with_scan(); extern void RE_done_with_scan();
namespace zeek {
namespace detail {
Specific_RE_Matcher::Specific_RE_Matcher(match_type arg_mt, int arg_multiline) Specific_RE_Matcher::Specific_RE_Matcher(match_type arg_mt, int arg_multiline)
: equiv_class(NUM_SYM) : equiv_class(NUM_SYM)
{ {
@ -440,51 +446,6 @@ unsigned int Specific_RE_Matcher::MemoryAllocation() const
+ accepted->size() * padded_sizeof(AcceptingSet::key_type); + accepted->size() * padded_sizeof(AcceptingSet::key_type);
} }
RE_Matcher::RE_Matcher()
{
re_anywhere = new Specific_RE_Matcher(MATCH_ANYWHERE);
re_exact = new Specific_RE_Matcher(MATCH_EXACTLY);
}
RE_Matcher::RE_Matcher(const char* pat)
{
re_anywhere = new Specific_RE_Matcher(MATCH_ANYWHERE);
re_exact = new Specific_RE_Matcher(MATCH_EXACTLY);
AddPat(pat);
}
RE_Matcher::RE_Matcher(const char* exact_pat, const char* anywhere_pat)
{
re_anywhere = new Specific_RE_Matcher(MATCH_ANYWHERE);
re_anywhere->SetPat(anywhere_pat);
re_exact = new Specific_RE_Matcher(MATCH_EXACTLY);
re_exact->SetPat(exact_pat);
}
RE_Matcher::~RE_Matcher()
{
delete re_anywhere;
delete re_exact;
}
void RE_Matcher::AddPat(const char* new_pat)
{
re_anywhere->AddPat(new_pat);
re_exact->AddPat(new_pat);
}
void RE_Matcher::MakeCaseInsensitive()
{
re_anywhere->MakeCaseInsensitive();
re_exact->MakeCaseInsensitive();
}
bool RE_Matcher::Compile(bool lazy)
{
return re_anywhere->Compile(lazy) && re_exact->Compile(lazy);
}
static RE_Matcher* matcher_merge(const RE_Matcher* re1, const RE_Matcher* re2, static RE_Matcher* matcher_merge(const RE_Matcher* re1, const RE_Matcher* re2,
const char* merge_op) const char* merge_op)
{ {
@ -513,3 +474,52 @@ RE_Matcher* RE_Matcher_disjunction(const RE_Matcher* re1, const RE_Matcher* re2)
{ {
return matcher_merge(re1, re2, "|"); return matcher_merge(re1, re2, "|");
} }
} // namespace detail
RE_Matcher::RE_Matcher()
{
re_anywhere = new detail::Specific_RE_Matcher(zeek::detail::MATCH_ANYWHERE);
re_exact = new detail::Specific_RE_Matcher(zeek::detail::MATCH_EXACTLY);
}
RE_Matcher::RE_Matcher(const char* pat)
{
re_anywhere = new detail::Specific_RE_Matcher(zeek::detail::MATCH_ANYWHERE);
re_exact = new detail::Specific_RE_Matcher(zeek::detail::MATCH_EXACTLY);
AddPat(pat);
}
RE_Matcher::RE_Matcher(const char* exact_pat, const char* anywhere_pat)
{
re_anywhere = new detail::Specific_RE_Matcher(zeek::detail::MATCH_ANYWHERE);
re_anywhere->SetPat(anywhere_pat);
re_exact = new detail::Specific_RE_Matcher(zeek::detail::MATCH_EXACTLY);
re_exact->SetPat(exact_pat);
}
RE_Matcher::~RE_Matcher()
{
delete re_anywhere;
delete re_exact;
}
void RE_Matcher::AddPat(const char* new_pat)
{
re_anywhere->AddPat(new_pat);
re_exact->AddPat(new_pat);
}
void RE_Matcher::MakeCaseInsensitive()
{
re_anywhere->MakeCaseInsensitive();
re_exact->MakeCaseInsensitive();
}
bool RE_Matcher::Compile(bool lazy)
{
return re_anywhere->Compile(lazy) && re_exact->Compile(lazy);
}
} // namespace zeek

View file

@ -17,30 +17,35 @@ typedef int (*cce_func)(int);
namespace zeek { class String; } namespace zeek { class String; }
using BroString [[deprecated("Remove in v4.1. Use zeek::String instead.")]] = zeek::String; using BroString [[deprecated("Remove in v4.1. Use zeek::String instead.")]] = zeek::String;
class CCL; ZEEK_FORWARD_DECLARE_NAMESPACED(NFA_Machine, zeek::detail);
class NFA_Machine; ZEEK_FORWARD_DECLARE_NAMESPACED(DFA_Machine, zeek::detail);
class DFA_Machine; ZEEK_FORWARD_DECLARE_NAMESPACED(DFA_State, zeek::detail);
class Specific_RE_Matcher; ZEEK_FORWARD_DECLARE_NAMESPACED(Specific_RE_Matcher, zeek::detail);
class RE_Matcher; ZEEK_FORWARD_DECLARE_NAMESPACED(RE_Matcher, zeek);
class DFA_State; ZEEK_FORWARD_DECLARE_NAMESPACED(CCL, zeek::detail);
// This method is automatically generated by flex and shouldn't be namespaced
extern int re_lex(void);
namespace zeek {
namespace detail {
extern int case_insensitive; extern int case_insensitive;
extern CCL* curr_ccl; extern CCL* curr_ccl;
extern NFA_Machine* nfa; extern zeek::detail::NFA_Machine* nfa;
extern Specific_RE_Matcher* rem; extern Specific_RE_Matcher* rem;
extern const char* RE_parse_input; extern const char* RE_parse_input;
extern int re_lex(void);
extern int clower(int); extern int clower(int);
extern void synerr(const char str[]); extern void synerr(const char str[]);
typedef int AcceptIdx; using AcceptIdx = int;
typedef std::set<AcceptIdx> AcceptingSet; using AcceptingSet = std::set<AcceptIdx>;
typedef uint64_t MatchPos; using MatchPos = uint64_t;
typedef std::map<AcceptIdx, MatchPos> AcceptingMatchSet; using AcceptingMatchSet = std::map<AcceptIdx, MatchPos>;
typedef zeek::name_list string_list; using string_list = zeek::name_list;
typedef enum { MATCH_ANYWHERE, MATCH_EXACTLY, } match_type; enum match_type { MATCH_ANYWHERE, MATCH_EXACTLY };
// A "specific" RE matcher will match one type of pattern: either // A "specific" RE matcher will match one type of pattern: either
// MATCH_ANYWHERE or MATCH_EXACTLY. // MATCH_ANYWHERE or MATCH_EXACTLY.
@ -106,7 +111,7 @@ public:
const char* PatternText() const { return pattern_text; } const char* PatternText() const { return pattern_text; }
DFA_Machine* DFA() const { return dfa; } zeek::detail::DFA_Machine* DFA() const { return dfa; }
void Dump(FILE* f); void Dump(FILE* f);
@ -133,7 +138,7 @@ protected:
zeek::PList<CCL> ccl_list; zeek::PList<CCL> ccl_list;
EquivClass equiv_class; EquivClass equiv_class;
int* ecs; int* ecs;
DFA_Machine* dfa; zeek::detail::DFA_Machine* dfa;
CCL* any_ccl; CCL* any_ccl;
AcceptingSet* accepted; AcceptingSet* accepted;
}; };
@ -168,14 +173,19 @@ public:
void AddMatches(const AcceptingSet& as, MatchPos position); void AddMatches(const AcceptingSet& as, MatchPos position);
protected: protected:
DFA_Machine* dfa; zeek::detail::DFA_Machine* dfa;
int* ecs; int* ecs;
AcceptingMatchSet accepted_matches; AcceptingMatchSet accepted_matches;
DFA_State* current_state; zeek::detail::DFA_State* current_state;
int current_pos; int current_pos;
}; };
extern RE_Matcher* RE_Matcher_conjunction(const RE_Matcher* re1, const RE_Matcher* re2);
extern RE_Matcher* RE_Matcher_disjunction(const RE_Matcher* re1, const RE_Matcher* re2);
} // namespace detail
class RE_Matcher final { class RE_Matcher final {
public: public:
RE_Matcher(); RE_Matcher();
@ -225,9 +235,30 @@ public:
} }
protected: protected:
Specific_RE_Matcher* re_anywhere; detail::Specific_RE_Matcher* re_anywhere;
Specific_RE_Matcher* re_exact; detail::Specific_RE_Matcher* re_exact;
}; };
extern RE_Matcher* RE_Matcher_conjunction(const RE_Matcher* re1, const RE_Matcher* re2); } // namespace zeek
extern RE_Matcher* RE_Matcher_disjunction(const RE_Matcher* re1, const RE_Matcher* re2);
constexpr auto clower [[deprecated("Remove in v4.1. Use zeek::detail::clower.")]] = zeek::detail::clower;
constexpr auto synerr [[deprecated("Remove in v4.1. Use zeek::detail::synerr.")]] = zeek::detail::synerr;
using AcceptIdx [[deprecated("Remove in v4.1. Use zeek::detail::AcceptIdx.")]] = zeek::detail::AcceptIdx;
using AcceptingSet [[deprecated("Remove in v4.1. Use zeek::detail::AcceptingSet.")]] = zeek::detail::AcceptingSet;
using MatchPos [[deprecated("Remove in v4.1. Use zeek::detail::MatchPos.")]] = zeek::detail::MatchPos;
using AcceptingMatchSet [[deprecated("Remove in v4.1. Use zeek::detail::AcceptingMatchSet.")]] = zeek::detail::AcceptingMatchSet;
using string_list [[deprecated("Remove in v4.1. Use zeek::detail::string_list.")]] = zeek::detail::string_list;
constexpr auto MATCH_ANYWHERE [[deprecated("Remove in v4.1. Use zeek::detail::MATCH_ANYWHERE.")]] = zeek::detail::MATCH_ANYWHERE;
constexpr auto MATCH_EXACTLY [[deprecated("Remove in v4.1. Use zeek::detail::MATCH_EXACTLY.")]] = zeek::detail::MATCH_EXACTLY;
using Specific_RE_Matcher [[deprecated("Remove in v4.1. Use zeek::detail::Specific_RE_Matcher.")]] = zeek::detail::Specific_RE_Matcher;
using RE_Match_State [[deprecated("Remove in v4.1. Use zeek::detail::RE_Match_State.")]] = zeek::detail::RE_Match_State;
using RE_Matcher [[deprecated("Remove in v4.1. Use zeek::RE_Matcher.")]] = zeek::RE_Matcher;
extern int& case_insensitive [[deprecated("Remove in v4.1. Use zeek::detail::case_insensitive")]];
extern zeek::detail::CCL*& curr_ccl [[deprecated("Remove in v4.1. Use zeek::detail::curr_ccl")]];;
extern zeek::detail::NFA_Machine*& nfa [[deprecated("Remove in v4.1. Use zeek::detail::nfa")]];;
extern zeek::detail::Specific_RE_Matcher*& rem [[deprecated("Remove in v4.1. Use zeek::detail::rem")]];;
extern const char*& RE_parse_input [[deprecated("Remove in v4.1. Use zeek::detail::RE_parse_input")]];;

View file

@ -29,9 +29,9 @@ extern const char* current_rule_file;
class BroFile; class BroFile;
class IntSet; class IntSet;
class RE_Match_State;
class Specific_RE_Matcher;
ZEEK_FORWARD_DECLARE_NAMESPACED(RE_Match_State, zeek::detail);
ZEEK_FORWARD_DECLARE_NAMESPACED(Specific_RE_Matcher, zeek::detail);
ZEEK_FORWARD_DECLARE_NAMESPACED(RuleMatcher, zeek::detail); ZEEK_FORWARD_DECLARE_NAMESPACED(RuleMatcher, zeek::detail);
ZEEK_FORWARD_DECLARE_NAMESPACED(IP_Hdr, zeek); ZEEK_FORWARD_DECLARE_NAMESPACED(IP_Hdr, zeek);
ZEEK_FORWARD_DECLARE_NAMESPACED(IPPrefix, zeek); ZEEK_FORWARD_DECLARE_NAMESPACED(IPPrefix, zeek);

View file

@ -44,7 +44,7 @@ using BroFunc [[deprecated("Remove in v4.1. Use zeek::detail::ScriptFunc instead
class BroFile; class BroFile;
class PrefixTable; class PrefixTable;
class StateAccess; class StateAccess;
class RE_Matcher; ZEEK_FORWARD_DECLARE_NAMESPACED(RE_Matcher, zeek);
class CompositeHash; class CompositeHash;
class HashKey; class HashKey;

View file

@ -17,15 +17,15 @@
using namespace analyzer::login; using namespace analyzer::login;
static RE_Matcher* re_skip_authentication = nullptr; static zeek::RE_Matcher* re_skip_authentication = nullptr;
static RE_Matcher* re_direct_login_prompts; static zeek::RE_Matcher* re_direct_login_prompts;
static RE_Matcher* re_login_prompts; static zeek::RE_Matcher* re_login_prompts;
static RE_Matcher* re_login_non_failure_msgs; static zeek::RE_Matcher* re_login_non_failure_msgs;
static RE_Matcher* re_login_failure_msgs; static zeek::RE_Matcher* re_login_failure_msgs;
static RE_Matcher* re_login_success_msgs; static zeek::RE_Matcher* re_login_success_msgs;
static RE_Matcher* re_login_timeouts; static zeek::RE_Matcher* re_login_timeouts;
static RE_Matcher* init_RE(zeek::ListVal* l); static zeek::RE_Matcher* init_RE(zeek::ListVal* l);
Login_Analyzer::Login_Analyzer(const char* name, Connection* conn) Login_Analyzer::Login_Analyzer(const char* name, Connection* conn)
: tcp::TCP_ApplicationAnalyzer(name, conn), user_text() : tcp::TCP_ApplicationAnalyzer(name, conn), user_text()
@ -625,9 +625,9 @@ void Login_Analyzer::FlushEmptyTypeahead()
delete [] PopUserText(); delete [] PopUserText();
} }
RE_Matcher* init_RE(zeek::ListVal* l) zeek::RE_Matcher* init_RE(zeek::ListVal* l)
{ {
RE_Matcher* re = l->BuildRE(); zeek::RE_Matcher* re = l->BuildRE();
if ( re ) if ( re )
re->Compile(); re->Compile();

View file

@ -421,7 +421,7 @@ struct val_converter {
if ( ! exact_text || ! anywhere_text ) if ( ! exact_text || ! anywhere_text )
return nullptr; return nullptr;
RE_Matcher* re = new RE_Matcher(exact_text->c_str(), auto* re = new zeek::RE_Matcher(exact_text->c_str(),
anywhere_text->c_str()); anywhere_text->c_str());
if ( ! re->Compile() ) if ( ! re->Compile() )
@ -745,7 +745,7 @@ struct type_checker {
if ( ! exact_text || ! anywhere_text ) if ( ! exact_text || ! anywhere_text )
return false; return false;
RE_Matcher* re = new RE_Matcher(exact_text->c_str(), auto* re = new zeek::RE_Matcher(exact_text->c_str(),
anywhere_text->c_str()); anywhere_text->c_str());
auto compiled = re->Compile(); auto compiled = re->Compile();
delete re; delete re;
@ -986,7 +986,7 @@ broker::expected<broker::data> bro_broker::val_to_data(const zeek::Val* v)
} }
case zeek::TYPE_PATTERN: case zeek::TYPE_PATTERN:
{ {
const RE_Matcher* p = v->AsPattern(); const zeek::RE_Matcher* p = v->AsPattern();
broker::vector rval = {p->PatternText(), p->AnywherePatternText()}; broker::vector rval = {p->PatternText(), p->AnywherePatternText()};
return {std::move(rval)}; return {std::move(rval)};
} }

View file

@ -2269,7 +2269,7 @@ zeek::Val* Manager::ValueToVal(const Stream* i, const Value* val, zeek::Type* re
case zeek::TYPE_PATTERN: case zeek::TYPE_PATTERN:
{ {
RE_Matcher* re = new RE_Matcher(val->val.pattern_text_val); auto* re = new zeek::RE_Matcher(val->val.pattern_text_val);
re->Compile(); re->Compile();
return new zeek::PatternVal(re); return new zeek::PatternVal(re);
} }

View file

@ -236,7 +236,7 @@ static bool expr_is_table_type_name(const zeek::detail::Expr* expr)
id_list* id_l; id_list* id_l;
zeek::detail::InitClass ic; zeek::detail::InitClass ic;
zeek::Val* val; zeek::Val* val;
RE_Matcher* re; zeek::RE_Matcher* re;
zeek::detail::Expr* expr; zeek::detail::Expr* expr;
zeek::detail::EventExpr* event_expr; zeek::detail::EventExpr* event_expr;
zeek::detail::Stmt* stmt; zeek::detail::Stmt* stmt;
@ -715,7 +715,7 @@ expr:
{ {
zeek::detail::set_location(@3); zeek::detail::set_location(@3);
RE_Matcher* re = new RE_Matcher($3); auto* re = new zeek::RE_Matcher($3);
delete [] $3; delete [] $3;
if ( $4 ) if ( $4 )

View file

@ -12,8 +12,11 @@
int csize = 256; int csize = 256;
int syntax_error = 0; int syntax_error = 0;
int cupper(int sym); namespace zeek::detail {
int clower(int sym); int cupper(int sym);
int clower(int sym);
}
void yyerror(const char msg[]); void yyerror(const char msg[]);
%} %}
@ -22,8 +25,8 @@ void yyerror(const char msg[]);
%union { %union {
int int_val; int int_val;
cce_func cce_val; cce_func cce_val;
CCL* ccl_val; zeek::detail::CCL* ccl_val;
NFA_Machine* mach_val; zeek::detail::NFA_Machine* mach_val;
} }
%type <int_val> TOK_CHAR TOK_NUMBER %type <int_val> TOK_CHAR TOK_NUMBER
@ -33,17 +36,17 @@ void yyerror(const char msg[]);
%% %%
flexrule : re flexrule : re
{ $1->AddAccept(1); nfa = $1; } { $1->AddAccept(1); zeek::detail::nfa = $1; }
| error | error
{ return 1; } { return 1; }
; ;
re : re '|' series re : re '|' series
{ $$ = make_alternate($1, $3); } { $$ = zeek::detail::make_alternate($1, $3); }
| series | series
| |
{ $$ = new NFA_Machine(new EpsilonState()); } { $$ = new zeek::detail::NFA_Machine(new zeek::detail::EpsilonState()); }
; ;
series : series singleton series : series singleton
@ -63,14 +66,14 @@ singleton : singleton '*'
| singleton '{' TOK_NUMBER ',' TOK_NUMBER '}' | singleton '{' TOK_NUMBER ',' TOK_NUMBER '}'
{ {
if ( $3 > $5 || $3 < 0 ) if ( $3 > $5 || $3 < 0 )
synerr("bad iteration values"); zeek::detail::synerr("bad iteration values");
else else
{ {
if ( $3 == 0 ) if ( $3 == 0 )
{ {
if ( $5 == 0 ) if ( $5 == 0 )
{ {
$$ = new NFA_Machine(new EpsilonState()); $$ = new zeek::detail::NFA_Machine(new zeek::detail::EpsilonState());
Unref($1); Unref($1);
} }
else else
@ -87,7 +90,7 @@ singleton : singleton '*'
| singleton '{' TOK_NUMBER ',' '}' | singleton '{' TOK_NUMBER ',' '}'
{ {
if ( $3 < 0 ) if ( $3 < 0 )
synerr("iteration value must be positive"); zeek::detail::synerr("iteration value must be positive");
else if ( $3 == 0 ) else if ( $3 == 0 )
$1->MakeClosure(); $1->MakeClosure();
else else
@ -97,11 +100,11 @@ singleton : singleton '*'
| singleton '{' TOK_NUMBER '}' | singleton '{' TOK_NUMBER '}'
{ {
if ( $3 < 0 ) if ( $3 < 0 )
synerr("iteration value must be positive"); zeek::detail::synerr("iteration value must be positive");
else if ( $3 == 0 ) else if ( $3 == 0 )
{ {
Unref($1); Unref($1);
$$ = new NFA_Machine(new EpsilonState()); $$ = new zeek::detail::NFA_Machine(new zeek::detail::EpsilonState());
} }
else else
$1->LinkCopies($3-1); $1->LinkCopies($3-1);
@ -109,18 +112,18 @@ singleton : singleton '*'
| '.' | '.'
{ {
$$ = new NFA_Machine(new NFA_State(rem->AnyCCL())); $$ = new zeek::detail::NFA_Machine(new zeek::detail::NFA_State(zeek::detail::rem->AnyCCL()));
} }
| full_ccl | full_ccl
{ {
$1->Sort(); $1->Sort();
rem->EC()->CCL_Use($1); zeek::detail::rem->EC()->CCL_Use($1);
$$ = new NFA_Machine(new NFA_State($1)); $$ = new zeek::detail::NFA_Machine(new zeek::detail::NFA_State($1));
} }
| TOK_CCL | TOK_CCL
{ $$ = new NFA_Machine(new NFA_State($1)); } { $$ = new zeek::detail::NFA_Machine(new zeek::detail::NFA_State($1)); }
| '"' string '"' | '"' string '"'
{ $$ = $2; } { $$ = $2; }
@ -129,7 +132,7 @@ singleton : singleton '*'
{ $$ = $2; } { $$ = $2; }
| TOK_CASE_INSENSITIVE re ')' | TOK_CASE_INSENSITIVE re ')'
{ $$ = $2; case_insensitive = 0; } { $$ = $2; zeek::detail::case_insensitive = 0; }
| TOK_CHAR | TOK_CHAR
{ {
@ -138,22 +141,22 @@ singleton : singleton '*'
if ( sym < 0 || ( sym >= NUM_SYM && sym != SYM_EPSILON ) ) if ( sym < 0 || ( sym >= NUM_SYM && sym != SYM_EPSILON ) )
{ {
reporter->Error("bad symbol %d (compiling pattern /%s/)", sym, reporter->Error("bad symbol %d (compiling pattern /%s/)", sym,
RE_parse_input); zeek::detail::RE_parse_input);
return 1; return 1;
} }
$$ = new NFA_Machine(new NFA_State(sym, rem->EC())); $$ = new zeek::detail::NFA_Machine(new zeek::detail::NFA_State(sym, zeek::detail::rem->EC()));
} }
| '^' | '^'
{ {
$$ = new NFA_Machine(new NFA_State(SYM_BOL, rem->EC())); $$ = new zeek::detail::NFA_Machine(new zeek::detail::NFA_State(SYM_BOL, zeek::detail::rem->EC()));
$$->MarkBOL(); $$->MarkBOL();
} }
| '$' | '$'
{ {
$$ = new NFA_Machine(new NFA_State(SYM_EOL, rem->EC())); $$ = new zeek::detail::NFA_Machine(new zeek::detail::NFA_State(SYM_EOL, zeek::detail::rem->EC()));
$$->MarkEOL(); $$->MarkEOL();
} }
; ;
@ -171,9 +174,9 @@ full_ccl : '[' ccl ']'
ccl : ccl TOK_CHAR '-' TOK_CHAR ccl : ccl TOK_CHAR '-' TOK_CHAR
{ {
if ( $2 > $4 ) if ( $2 > $4 )
synerr("negative range in character class"); zeek::detail::synerr("negative range in character class");
else if ( case_insensitive && else if ( zeek::detail::case_insensitive &&
(isalpha($2) || isalpha($4)) ) (isalpha($2) || isalpha($4)) )
{ {
if ( isalpha($2) && isalpha($4) && if ( isalpha($2) && isalpha($4) &&
@ -190,7 +193,7 @@ ccl : ccl TOK_CHAR '-' TOK_CHAR
} }
else else
synerr("ambiguous case-insensitive character class"); zeek::detail::synerr("ambiguous case-insensitive character class");
} }
else else
@ -202,10 +205,10 @@ ccl : ccl TOK_CHAR '-' TOK_CHAR
| ccl TOK_CHAR | ccl TOK_CHAR
{ {
if ( case_insensitive && isalpha($2) ) if ( zeek::detail::case_insensitive && isalpha($2) )
{ {
$1->Add(clower($2)); $1->Add(zeek::detail::clower($2));
$1->Add(cupper($2)); $1->Add(zeek::detail::cupper($2));
} }
else else
$1->Add($2); $1->Add($2);
@ -214,14 +217,14 @@ ccl : ccl TOK_CHAR '-' TOK_CHAR
| ccl ccl_expr | ccl ccl_expr
| |
{ $$ = curr_ccl; } { $$ = zeek::detail::curr_ccl; }
; ;
ccl_expr: TOK_CCE ccl_expr: TOK_CCE
{ {
for ( int c = 0; c < csize; ++c ) for ( int c = 0; c < csize; ++c )
if ( isascii(c) && $1(c) ) if ( isascii(c) && $1(c) )
curr_ccl->Add(c); zeek::detail::curr_ccl->Add(c);
} }
; ;
@ -231,14 +234,16 @@ string : string TOK_CHAR
// leave this alone; that provides a way // leave this alone; that provides a way
// of "escaping" out of insensitivity // of "escaping" out of insensitivity
// if needed. // if needed.
$1->AppendState(new NFA_State($2, rem->EC())); $1->AppendState(new zeek::detail::NFA_State($2, zeek::detail::rem->EC()));
} }
| |
{ $$ = new NFA_Machine(new EpsilonState()); } { $$ = new zeek::detail::NFA_Machine(new zeek::detail::EpsilonState()); }
; ;
%% %%
namespace zeek::detail {
int cupper(int sym) int cupper(int sym)
{ {
return (isascii(sym) && islower(sym)) ? toupper(sym) : sym; return (isascii(sym) && islower(sym)) ? toupper(sym) : sym;
@ -255,6 +260,8 @@ void synerr(const char str[])
reporter->Error("%s (compiling pattern /%s/)", str, RE_parse_input); reporter->Error("%s (compiling pattern /%s/)", str, RE_parse_input);
} }
} // namespace zeek::detail
void yyerror(const char msg[]) void yyerror(const char msg[])
{ {
} }

View file

@ -14,7 +14,8 @@
#include "re-parse.h" #include "re-parse.h"
const char* RE_parse_input = 0; const char* zeek::detail::RE_parse_input = nullptr;
const char*& RE_parse_input = zeek::detail::RE_parse_input;
#define RET_CCE(func) \ #define RET_CCE(func) \
BEGIN(SC_CCL); \ BEGIN(SC_CCL); \
@ -60,18 +61,18 @@ CCL_EXPR ("[:"[[:alpha:]]+":]")
"$" return '$'; "$" return '$';
"["({FIRST_CCL_CHAR}|{CCL_EXPR})({CCL_CHAR}|{CCL_EXPR})* { "["({FIRST_CCL_CHAR}|{CCL_EXPR})({CCL_CHAR}|{CCL_EXPR})* {
curr_ccl = rem->LookupCCL(yytext); zeek::detail::curr_ccl = zeek::detail::rem->LookupCCL(yytext);
if ( curr_ccl ) if ( zeek::detail::curr_ccl )
{ {
if ( yyinput() != ']' ) if ( yyinput() != ']' )
synerr("bad character class"); zeek::detail::synerr("bad character class");
yylval.ccl_val = curr_ccl; yylval.ccl_val = zeek::detail::curr_ccl;
return TOK_CCL; return TOK_CCL;
} }
else else
{ {
curr_ccl = new CCL(); zeek::detail::curr_ccl = new zeek::detail::CCL();
rem->InsertCCL(yytext, curr_ccl); zeek::detail::rem->InsertCCL(yytext, zeek::detail::curr_ccl);
// Push back everything but the leading bracket // Push back everything but the leading bracket
// so the ccl can be rescanned. // so the ccl can be rescanned.
@ -86,11 +87,11 @@ CCL_EXPR ("[:"[[:alpha:]]+":]")
char* nmstr = copy_string(yytext+1); char* nmstr = copy_string(yytext+1);
nmstr[yyleng - 2] = '\0'; // chop trailing brace nmstr[yyleng - 2] = '\0'; // chop trailing brace
std::string namedef = rem->LookupDef(nmstr); std::string namedef = zeek::detail::rem->LookupDef(nmstr);
delete nmstr; delete nmstr;
if ( namedef.empty() ) if ( namedef.empty() )
synerr("undefined definition"); zeek::detail::synerr("undefined definition");
else else
{ // push back name surrounded by ()'s { // push back name surrounded by ()'s
int len = namedef.size(); int len = namedef.size();
@ -115,10 +116,10 @@ CCL_EXPR ("[:"[[:alpha:]]+":]")
} }
} }
"(?i:" case_insensitive = 1; return TOK_CASE_INSENSITIVE; "(?i:" zeek::detail::case_insensitive = 1; return TOK_CASE_INSENSITIVE;
[a-zA-Z] { [a-zA-Z] {
if ( case_insensitive ) if ( zeek::detail::case_insensitive )
{ {
char c = yytext[0]; // unput trashes yytext! char c = yytext[0]; // unput trashes yytext!
// Push back the character inside a CCL, // Push back the character inside a CCL,
@ -140,7 +141,7 @@ CCL_EXPR ("[:"[[:alpha:]]+":]")
} }
<SC_QUOTE>{ <SC_QUOTE>{
[^"\n]$ synerr("missing quote"); return '"'; [^"\n]$ zeek::detail::synerr("missing quote"); return '"';
[^"\n] yylval.int_val = yytext[0]; return TOK_CHAR; [^"\n] yylval.int_val = yytext[0]; return TOK_CHAR;
\" BEGIN(INITIAL); return '"'; \" BEGIN(INITIAL); return '"';
} }
@ -156,7 +157,7 @@ CCL_EXPR ("[:"[[:alpha:]]+":]")
[^\]\n] yylval.int_val = yytext[0]; return TOK_CHAR; [^\]\n] yylval.int_val = yytext[0]; return TOK_CHAR;
"]" BEGIN(INITIAL); return ']'; "]" BEGIN(INITIAL); return ']';
[^\]]$ { [^\]]$ {
synerr("bad character class"); zeek::detail::synerr("bad character class");
BEGIN(INITIAL); BEGIN(INITIAL);
return ']'; return ']';
} }
@ -177,19 +178,19 @@ CCL_EXPR ("[:"[[:alpha:]]+":]")
"[:lower:]" { "[:lower:]" {
BEGIN(SC_CCL); BEGIN(SC_CCL);
yylval.cce_val = yylval.cce_val =
case_insensitive ? my_isalpha : my_islower; zeek::detail::case_insensitive ? my_isalpha : my_islower;
return TOK_CCE; return TOK_CCE;
} }
"[:upper:]" { "[:upper:]" {
BEGIN(SC_CCL); BEGIN(SC_CCL);
yylval.cce_val = yylval.cce_val =
case_insensitive ? my_isalpha : my_isupper; zeek::detail::case_insensitive ? my_isalpha : my_isupper;
return TOK_CCE; return TOK_CCE;
} }
{CCL_EXPR} { {CCL_EXPR} {
synerr("bad character class expression"); zeek::detail::synerr("bad character class expression");
BEGIN(SC_CCL); BEGIN(SC_CCL);
yylval.cce_val = my_isalnum; yylval.cce_val = my_isalnum;
return TOK_CCE; return TOK_CCE;
@ -203,7 +204,7 @@ CCL_EXPR ("[:"[[:alpha:]]+":]")
"}" BEGIN(INITIAL); return '}'; "}" BEGIN(INITIAL); return '}';
. { . {
synerr("bad character inside {}'s"); zeek::detail::synerr("bad character inside {}'s");
BEGIN(INITIAL); BEGIN(INITIAL);
return '}'; return '}';
} }
@ -219,7 +220,7 @@ CCL_EXPR ("[:"[[:alpha:]]+":]")
return TOK_CHAR; return TOK_CHAR;
} }
<*>.|\n synerr("bad character"); <*>.|\n zeek::detail::synerr("bad character");
%% %%
@ -227,7 +228,7 @@ YY_BUFFER_STATE RE_buf;
void RE_set_input(const char* str) void RE_set_input(const char* str)
{ {
RE_parse_input = str; zeek::detail::RE_parse_input = str;
RE_buf = yy_scan_string(str); RE_buf = yy_scan_string(str);
} }

View file

@ -48,7 +48,7 @@ extern zeek::EnumType* cur_enum_type;
// Track the @if... depth. // Track the @if... depth.
ptr_compat_int current_depth = 0; ptr_compat_int current_depth = 0;
int_list if_stack; zeek::detail::int_list if_stack;
int line_number = 1; int line_number = 1;
const char* filename = 0; // Absolute path of file currently being parsed. const char* filename = 0; // Absolute path of file currently being parsed.

View file

@ -200,7 +200,7 @@ static int match_prefix(int s_len, const char* s, int t_len, const char* t)
} }
static zeek::VectorValPtr do_split_string(zeek::StringVal* str_val, static zeek::VectorValPtr do_split_string(zeek::StringVal* str_val,
RE_Matcher* re, int incl_sep, zeek::RE_Matcher* re, int incl_sep,
int max_num_sep) int max_num_sep)
{ {
// string_vec is used early in the version script - do not use the NetVar. // string_vec is used early in the version script - do not use the NetVar.
@ -257,7 +257,7 @@ static zeek::VectorValPtr do_split_string(zeek::StringVal* str_val,
return rval; return rval;
} }
zeek::Val* do_split(zeek::StringVal* str_val, RE_Matcher* re, int incl_sep, int max_num_sep) zeek::Val* do_split(zeek::StringVal* str_val, zeek::RE_Matcher* re, int incl_sep, int max_num_sep)
{ {
auto* a = new zeek::TableVal(zeek::id::string_array); auto* a = new zeek::TableVal(zeek::id::string_array);
const u_char* s = str_val->Bytes(); const u_char* s = str_val->Bytes();

View file

@ -517,7 +517,7 @@ zeek::Val* Value::ValueToVal(const std::string& source, const Value* val, bool&
case zeek::TYPE_PATTERN: case zeek::TYPE_PATTERN:
{ {
RE_Matcher* re = new RE_Matcher(val->val.pattern_text_val); auto* re = new zeek::RE_Matcher(val->val.pattern_text_val);
re->Compile(); re->Compile();
return new zeek::PatternVal(re); return new zeek::PatternVal(re);
} }