diff --git a/src/DFA.cc b/src/DFA.cc index 57ea87335e..6a9e7c9096 100644 --- a/src/DFA.cc +++ b/src/DFA.cc @@ -21,11 +21,10 @@ DFA_State::DFA_State(int arg_state_num, const EquivClass* ec, nfa_states = arg_nfa_states; accept = arg_accept; mark = 0; - lock = 0; SymPartition(ec); - xtions = new DFA_State_Handle*[num_sym]; + xtions = new DFA_State*[num_sym]; for ( int i = 0; i < num_sym; ++i ) xtions[i] = DFA_UNCOMPUTED_STATE_PTR; @@ -34,11 +33,7 @@ DFA_State::DFA_State(int arg_state_num, const EquivClass* ec, DFA_State::~DFA_State() { for ( int i = 0; i < num_sym; ++i ) - { - DFA_State_Handle* s = xtions[i]; - if ( s && s != DFA_UNCOMPUTED_STATE_PTR ) - StateUnref(s); - } + DFA_State* s = xtions[i]; delete [] xtions; delete nfa_states; @@ -46,18 +41,8 @@ DFA_State::~DFA_State() delete meta_ec; } -void DFA_State::AddXtion(int sym, DFA_State_Handle* next_state) +void DFA_State::AddXtion(int sym, DFA_State* next_state) { - // The order is important here: first StateRef() the new, - // then StateUnref() the old. Otherwise, we may get a problem - // if both are equal. - - if ( next_state ) - StateRef(next_state); - - if ( xtions[sym] && xtions[sym] != DFA_UNCOMPUTED_STATE_PTR ) - StateUnref(xtions[sym]); - xtions[sym] = next_state; } @@ -94,14 +79,10 @@ void DFA_State::SymPartition(const EquivClass* ec) meta_ec->BuildECs(); } -DFA_State_Handle* DFA_State::ComputeXtion(int sym, DFA_Machine* machine) +DFA_State* DFA_State::ComputeXtion(int sym, DFA_Machine* machine) { - // Make sure we will not expire... - assert(IsLocked()); - int equiv_sym = meta_ec->EquivRep(sym); - if ( xtions[equiv_sym] != DFA_UNCOMPUTED_STATE_PTR && - StateIsValid(xtions[equiv_sym]) ) + if ( xtions[equiv_sym] != DFA_UNCOMPUTED_STATE_PTR ) { AddXtion(sym, xtions[equiv_sym]); return xtions[sym]; @@ -109,7 +90,7 @@ DFA_State_Handle* DFA_State::ComputeXtion(int sym, DFA_Machine* machine) const EquivClass* ec = machine->EC(); - DFA_State_Handle* next_d; + DFA_State* next_d; NFA_state_list* ns = SymFollowSet(equiv_sym, ec); if ( ns->length() > 0 ) @@ -211,7 +192,7 @@ void DFA_State::ClearMarks() for ( int i = 0; i < num_sym; ++i ) { - DFA_State_Handle* s = xtions[i]; + DFA_State* s = xtions[i]; if ( s && s != DFA_UNCOMPUTED_STATE_PTR ) (*xtions[i])->ClearMarks(); @@ -243,7 +224,7 @@ void DFA_State::Dump(FILE* f, DFA_Machine* m) int num_trans = 0; for ( int sym = 0; sym < num_sym; ++sym ) { - DFA_State_Handle* s = xtions[sym]; + DFA_State* s = xtions[sym]; if ( ! s ) continue; @@ -283,7 +264,7 @@ void DFA_State::Dump(FILE* f, DFA_Machine* m) for ( int sym = 0; sym < num_sym; ++sym ) { - DFA_State_Handle* s = xtions[sym]; + DFA_State* s = xtions[sym]; if ( s && s != DFA_UNCOMPUTED_STATE_PTR ) (*s)->Dump(f, m); @@ -294,7 +275,7 @@ void DFA_State::Stats(unsigned int* computed, unsigned int* uncomputed) { for ( int sym = 0; sym < num_sym; ++sym ) { - DFA_State_Handle* s = xtions[sym]; + DFA_State* s = xtions[sym]; if ( s == DFA_UNCOMPUTED_STATE_PTR ) (*uncomputed)++; @@ -313,11 +294,9 @@ unsigned int DFA_State::Size() + (centry ? padded_sizeof(CacheEntry) : 0); } - DFA_State_Cache::DFA_State_Cache(int arg_maxsize) { maxsize = arg_maxsize; - head = tail = 0; hits = misses = 0; } @@ -328,13 +307,12 @@ DFA_State_Cache::~DFA_State_Cache() while ( (e = (CacheEntry*) states.NextEntry(i)) ) { assert(e->state); - StateInvalidate(e->state); delete e->hash; delete e; } } -DFA_State_Handle* DFA_State_Cache::Lookup(const NFA_state_list& nfas, +DFA_State* DFA_State_Cache::Lookup(const NFA_state_list& nfas, HashKey** hash) { // We assume that state ID's don't exceed 10 digits, plus @@ -380,100 +358,24 @@ DFA_State_Handle* DFA_State_Cache::Lookup(const NFA_state_list& nfas, delete *hash; *hash = 0; - MoveToFront(e); - return e->state; } -DFA_State_Handle* DFA_State_Cache::Insert(DFA_State* state, HashKey* hash) +DFA_State* DFA_State_Cache::Insert(DFA_State* state, HashKey* hash) { CacheEntry* e; -#ifdef EXPIRE_DFA_STATES - if ( states.Length() == maxsize ) - { - // Remove oldest unlocked entry. - for ( e = tail; e; e = e->prev ) - if ( ! (*e->state)->lock ) - break; - if ( e ) - Remove(e); - } -#endif - e = new CacheEntry; -#ifdef EXPIRE_DFA_STATES - // Insert as head. - e->state = new DFA_State_Handle(state); - e->state->state->centry = e; -#else e->state = state; e->state->centry = e; -#endif e->hash = hash; - e->prev = 0; - e->next = head; - if ( head ) - head->prev = e; - head = e; - if ( ! tail ) - tail = e; states.Insert(hash, e); return e->state; } -void DFA_State_Cache::Remove(CacheEntry* e) - { - if ( e == head ) - { - head = e->next; - if ( head ) - head->prev = 0; - } - else - e->prev->next = e->next; - - if ( e == tail ) - { - tail = e->prev; - if ( tail ) - tail->next = 0; - } - else - e->next->prev = e->prev; - - states.Remove(e->hash); - - assert(e->state); - StateInvalidate(e->state); - delete e->hash; - delete e; - } - -void DFA_State_Cache::MoveToFront(CacheEntry* e) - { - ++hits; - - if ( e->prev ) - { - e->prev->next = e->next; - - if ( e->next ) - e->next->prev = e->prev; - else - tail = e->prev; - - e->prev = 0; - e->next = head; - - head->prev = e; - head = e; - } - } - void DFA_State_Cache::GetStats(Stats* s) { s->dfa_states = 0; @@ -514,9 +416,6 @@ DFA_Machine::DFA_Machine(NFA_Machine* n, EquivClass* arg_ec) { NFA_state_list* state_set = epsilon_closure(ns); (void) StateSetToDFA_State(state_set, start_state, ec); - - StateRef(start_state); - StateLock(start_state); } else start_state = 0; // Jam @@ -524,12 +423,6 @@ DFA_Machine::DFA_Machine(NFA_Machine* n, EquivClass* arg_ec) DFA_Machine::~DFA_Machine() { - if ( start_state ) - { - StateUnlock(start_state); - StateUnref(start_state); - } - delete dfa_state_cache; Unref(nfa); } @@ -571,12 +464,11 @@ unsigned int DFA_Machine::MemoryAllocation() const } int DFA_Machine::StateSetToDFA_State(NFA_state_list* state_set, - DFA_State_Handle*& d, const EquivClass* ec) + DFA_State*& d, const EquivClass* ec) { HashKey* hash; d = dfa_state_cache->Lookup(*state_set, &hash); - assert((! d) || StateIsValid(d)); if ( d ) return 0; diff --git a/src/DFA.h b/src/DFA.h index 6bf9fd640b..1fed0dd719 100644 --- a/src/DFA.h +++ b/src/DFA.h @@ -24,41 +24,7 @@ class DFA_State; // Transitions to the uncomputed state indicate that we haven't yet // computed the state to go to. #define DFA_UNCOMPUTED_STATE -2 -#define DFA_UNCOMPUTED_STATE_PTR ((DFA_State_Handle*) DFA_UNCOMPUTED_STATE) - -#ifdef EXPIRE_DFA_STATES - -class DFA_State_Handle { -public: - // The reference counting keeps track of this *handle* (not the state). - void Ref() { assert(state); ++refcount; } - void Unref() - { - if ( --refcount == 0 ) - delete this; - } - - inline void Invalidate(); - bool IsValid() const { return state != DFA_INVALID_STATE_PTR; } - - DFA_State* State() const { return state; } - DFA_State* operator->() const { return state; } - -protected: - friend class DFA_State_Cache; - - DFA_State_Handle(DFA_State* arg_state) - { state = arg_state; refcount = 1; } - - inline ~DFA_State_Handle(); - - DFA_State* state; - int refcount; -}; - -#else -typedef DFA_State DFA_State_Handle; -#endif +#define DFA_UNCOMPUTED_STATE_PTR ((DFA_State*) DFA_UNCOMPUTED_STATE) #include "NFA.h" @@ -76,9 +42,9 @@ public: int StateNum() const { return state_num; } int NFAStateNum() const { return nfa_states->length(); } - void AddXtion(int sym, DFA_State_Handle* next_state); + void AddXtion(int sym, DFA_State* next_state); - inline DFA_State_Handle* Xtion(int sym, DFA_Machine* machine); + inline DFA_State* Xtion(int sym, DFA_Machine* machine); const AcceptingSet* Accept() const { return accept; } void SymPartition(const EquivClass* ec); @@ -98,43 +64,31 @@ public: void Stats(unsigned int* computed, unsigned int* uncomputed); unsigned int Size(); - // Locking a state will keep it from expiring from a cache. - void Lock() { ++lock; } - void Unlock() { --lock; } - -#ifdef EXPIRE_DFA_STATES - bool IsLocked() { return lock != 0; } -#else - bool IsLocked() { return true; } DFA_State* operator->(){ return this; } -#endif protected: friend class DFA_State_Cache; - DFA_State_Handle* ComputeXtion(int sym, DFA_Machine* machine); + DFA_State* ComputeXtion(int sym, DFA_Machine* machine); void AppendIfNew(int sym, int_list* sym_list); int state_num; int num_sym; - DFA_State_Handle** xtions; + DFA_State** xtions; AcceptingSet* accept; NFA_state_list* nfa_states; EquivClass* meta_ec; // which ec's make same transition DFA_State* mark; - int lock; CacheEntry* centry; static unsigned int transition_counter; // see Xtion() }; struct CacheEntry { - DFA_State_Handle* state; + DFA_State* state; HashKey* hash; - CacheEntry* next; - CacheEntry* prev; }; class DFA_State_Cache { @@ -143,13 +97,11 @@ public: ~DFA_State_Cache(); // If the caller stores the handle, it has to call Ref() on it. - DFA_State_Handle* Lookup(const NFA_state_list& nfa_states, + DFA_State* Lookup(const NFA_state_list& nfa_states, HashKey** hash); // Takes ownership of both; hash is the one returned by Lookup(). - DFA_State_Handle* Insert(DFA_State* state, HashKey* hash); - - void MoveToFront(DFA_State* state) { MoveToFront(state->centry); } + DFA_State* Insert(DFA_State* state, HashKey* hash); int NumEntries() const { return states.Length(); } @@ -168,9 +120,6 @@ public: void GetStats(Stats* s); private: - void Remove(CacheEntry* e); - void MoveToFront(CacheEntry* e); - int maxsize; int hits; // Statistics @@ -180,10 +129,6 @@ private: // Hash indexed by NFA states (MD5s of them, actually). PDict(CacheEntry) states; - - // List in LRU order. - CacheEntry* head; - CacheEntry* tail; }; declare(PList,DFA_State); @@ -196,7 +141,7 @@ public: int* acc_array); ~DFA_Machine(); - DFA_State_Handle* StartState() const { return start_state; } + DFA_State* StartState() const { return start_state; } int NumStates() const { return dfa_state_cache->NumEntries(); } @@ -217,74 +162,18 @@ protected: int state_count; // The state list has to be sorted according to IDs. - int StateSetToDFA_State(NFA_state_list* state_set, DFA_State_Handle*& d, + int StateSetToDFA_State(NFA_state_list* state_set, DFA_State*& d, const EquivClass* ec); const EquivClass* EC() const { return ec; } EquivClass* ec; // equivalence classes corresponding to NFAs - DFA_State_Handle* start_state; + DFA_State* start_state; DFA_State_Cache* dfa_state_cache; NFA_Machine* nfa; }; -#ifdef EXPIRE_DFA_STATES - -inline DFA_State_Handle* DFA_State::Xtion(int sym, DFA_Machine* machine) - { - Lock(); - - // This is just a clumsy form of sampling... Instead of moving - // the state to the front of our LRU cache on each transition (which - // would be quite often) we just do it on every nth transition - // (counted across all DFA states). This is based on the observation - // that a very few of all states are used most of time. - // (currently n=10000; should it be configurable?) - if ( transition_counter++ % 10000 == 0 ) - machine->Cache()->MoveToFront(this); - - DFA_State_Handle* h; - - if ( xtions[sym] == DFA_UNCOMPUTED_STATE_PTR || - (xtions[sym] && ! xtions[sym]->IsValid()) ) - h = ComputeXtion(sym, machine); - else - h = xtions[sym]; - - Unlock(); - - return h; - } - -inline DFA_State_Handle::~DFA_State_Handle() - { - if ( state != DFA_INVALID_STATE_PTR ) - delete state; - } - -inline void DFA_State_Handle::Invalidate() - { - assert(state!=DFA_INVALID_STATE_PTR); - delete state; - state = DFA_INVALID_STATE_PTR; - Unref(); - } - -// Not nice but helps avoiding some overhead in the non-expiration case. -static inline void StateLock(DFA_State_Handle* s) { s->State()->Lock(); } -static inline void StateUnlock(DFA_State_Handle* s) { s->State()->Unlock(); } -static inline void StateRef(DFA_State_Handle* s) { s->Ref(); } -static inline void StateUnref(DFA_State_Handle* s) { s->Unref(); } -static inline void StateInvalidate(DFA_State_Handle* s) { s->Invalidate(); } - -static inline bool StateIsValid(DFA_State_Handle* s) - { - return ! s || s->IsValid(); - } - -#else - -inline DFA_State_Handle* DFA_State::Xtion(int sym, DFA_Machine* machine) +inline DFA_State* DFA_State::Xtion(int sym, DFA_Machine* machine) { if ( xtions[sym] == DFA_UNCOMPUTED_STATE_PTR ) return ComputeXtion(sym, machine); @@ -292,13 +181,4 @@ inline DFA_State_Handle* DFA_State::Xtion(int sym, DFA_Machine* machine) return xtions[sym]; } -static inline void StateLock(DFA_State_Handle* s) { } -static inline void StateUnlock(DFA_State_Handle* s) { } -static inline void StateRef(DFA_State_Handle* s) { } -static inline void StateUnref(DFA_State_Handle* s) { } -static inline void StateInvalidate(DFA_State_Handle* s) { } -static inline bool StateIsValid(DFA_State_Handle* s) { return true; } - -#endif - #endif diff --git a/src/RE.cc b/src/RE.cc index 19cc5737aa..e4258987dc 100644 --- a/src/RE.cc +++ b/src/RE.cc @@ -211,7 +211,7 @@ int Specific_RE_Matcher::MatchAll(const u_char* bv, int n) // matched is empty. return n == 0; - DFA_State_Handle* d = dfa->StartState(); + DFA_State* d = dfa->StartState(); d = (*d)->Xtion(ecs[SYM_BOL], dfa); while ( d ) @@ -236,7 +236,7 @@ int Specific_RE_Matcher::Match(const u_char* bv, int n) // An empty pattern matches anything. return 1; - DFA_State_Handle* d = dfa->StartState(); + DFA_State* d = dfa->StartState(); d = (*d)->Xtion(ecs[SYM_BOL], dfa); if ( ! d ) return 0; @@ -268,12 +268,6 @@ void Specific_RE_Matcher::Dump(FILE* f) dfa->Dump(f); } -RE_Match_State::~RE_Match_State() - { - if ( current_state ) - StateUnref(current_state); - } - bool RE_Match_State::Match(const u_char* bv, int n, bool bol, bool eol, bool clear) { @@ -289,7 +283,6 @@ bool RE_Match_State::Match(const u_char* bv, int n, // Initialize state and copy the accepting states of the start // state into the acceptance set. current_state = dfa->StartState(); - StateRef(current_state); const AcceptingSet* ac = (*current_state)->Accept(); if ( ac ) @@ -303,20 +296,11 @@ bool RE_Match_State::Match(const u_char* bv, int n, } else if ( clear ) - { - if ( current_state ) - StateUnref(current_state); - current_state = dfa->StartState(); - StateRef(current_state); - } if ( ! current_state ) return false; - else - (*current_state)->Unlock(); - current_pos = 0; int old_matches = accepted.length(); @@ -334,7 +318,7 @@ bool RE_Match_State::Match(const u_char* bv, int n, else ec = ecs[*(bv++)]; - DFA_State_Handle* next_state = (*current_state)->Xtion(ec,dfa); + DFA_State* next_state = (*current_state)->Xtion(ec,dfa); if ( ! next_state ) { @@ -357,15 +341,9 @@ bool RE_Match_State::Match(const u_char* bv, int n, ++current_pos; - StateRef(next_state); - StateUnref(current_state); current_state = next_state; } - // Make sure our state doesn't expire until we return. - if ( current_state ) - (*current_state)->Lock(); - return accepted.length() != old_matches; } @@ -377,7 +355,7 @@ int Specific_RE_Matcher::LongestMatch(const u_char* bv, int n) // Use -1 to indicate no match. int last_accept = -1; - DFA_State_Handle* d = dfa->StartState(); + DFA_State* d = dfa->StartState(); d = (*d)->Xtion(ecs[SYM_BOL], dfa); if ( ! d ) diff --git a/src/RE.h b/src/RE.h index 08da19c495..f46f835649 100644 --- a/src/RE.h +++ b/src/RE.h @@ -19,6 +19,7 @@ class NFA_Machine; class DFA_Machine; class Specific_RE_Matcher; class RE_Matcher; +class DFA_State; declare(PDict,char); declare(PDict,CCL); @@ -126,13 +127,6 @@ protected: AcceptingSet* accepted; }; -#ifdef EXPIRE_DFA_STATES - class DFA_State_Handle; -#else - class DFA_State; - typedef DFA_State DFA_State_Handle; -#endif - class RE_Match_State { public: RE_Match_State(Specific_RE_Matcher* matcher) @@ -143,8 +137,6 @@ public: current_state = 0; } - ~RE_Match_State(); - const AcceptingSet* Accepted() const { return &accepted; } const int_list* MatchPositions() const { return &match_pos; } @@ -169,7 +161,7 @@ protected: AcceptingSet accepted; int_list match_pos; - DFA_State_Handle* current_state; + DFA_State* current_state; int current_pos; };