Removing the EXPIRE_DFA_STATES code.

This commit is contained in:
Robin Sommer 2011-03-14 18:55:40 -07:00
parent c92154994a
commit 8eb241fde7
4 changed files with 31 additions and 289 deletions

View file

@ -21,11 +21,10 @@ DFA_State::DFA_State(int arg_state_num, const EquivClass* ec,
nfa_states = arg_nfa_states; nfa_states = arg_nfa_states;
accept = arg_accept; accept = arg_accept;
mark = 0; mark = 0;
lock = 0;
SymPartition(ec); SymPartition(ec);
xtions = new DFA_State_Handle*[num_sym]; xtions = new DFA_State*[num_sym];
for ( int i = 0; i < num_sym; ++i ) for ( int i = 0; i < num_sym; ++i )
xtions[i] = DFA_UNCOMPUTED_STATE_PTR; xtions[i] = DFA_UNCOMPUTED_STATE_PTR;
@ -34,11 +33,7 @@ DFA_State::DFA_State(int arg_state_num, const EquivClass* ec,
DFA_State::~DFA_State() DFA_State::~DFA_State()
{ {
for ( int i = 0; i < num_sym; ++i ) for ( int i = 0; i < num_sym; ++i )
{ DFA_State* s = xtions[i];
DFA_State_Handle* s = xtions[i];
if ( s && s != DFA_UNCOMPUTED_STATE_PTR )
StateUnref(s);
}
delete [] xtions; delete [] xtions;
delete nfa_states; delete nfa_states;
@ -46,18 +41,8 @@ DFA_State::~DFA_State()
delete meta_ec; delete meta_ec;
} }
void DFA_State::AddXtion(int sym, DFA_State_Handle* next_state) void DFA_State::AddXtion(int sym, DFA_State* next_state)
{ {
// The order is important here: first StateRef() the new,
// then StateUnref() the old. Otherwise, we may get a problem
// if both are equal.
if ( next_state )
StateRef(next_state);
if ( xtions[sym] && xtions[sym] != DFA_UNCOMPUTED_STATE_PTR )
StateUnref(xtions[sym]);
xtions[sym] = next_state; xtions[sym] = next_state;
} }
@ -94,14 +79,10 @@ void DFA_State::SymPartition(const EquivClass* ec)
meta_ec->BuildECs(); meta_ec->BuildECs();
} }
DFA_State_Handle* DFA_State::ComputeXtion(int sym, DFA_Machine* machine) DFA_State* DFA_State::ComputeXtion(int sym, DFA_Machine* machine)
{ {
// Make sure we will not expire...
assert(IsLocked());
int equiv_sym = meta_ec->EquivRep(sym); int equiv_sym = meta_ec->EquivRep(sym);
if ( xtions[equiv_sym] != DFA_UNCOMPUTED_STATE_PTR && if ( xtions[equiv_sym] != DFA_UNCOMPUTED_STATE_PTR )
StateIsValid(xtions[equiv_sym]) )
{ {
AddXtion(sym, xtions[equiv_sym]); AddXtion(sym, xtions[equiv_sym]);
return xtions[sym]; return xtions[sym];
@ -109,7 +90,7 @@ DFA_State_Handle* DFA_State::ComputeXtion(int sym, DFA_Machine* machine)
const EquivClass* ec = machine->EC(); const EquivClass* ec = machine->EC();
DFA_State_Handle* next_d; DFA_State* next_d;
NFA_state_list* ns = SymFollowSet(equiv_sym, ec); NFA_state_list* ns = SymFollowSet(equiv_sym, ec);
if ( ns->length() > 0 ) if ( ns->length() > 0 )
@ -211,7 +192,7 @@ void DFA_State::ClearMarks()
for ( int i = 0; i < num_sym; ++i ) for ( int i = 0; i < num_sym; ++i )
{ {
DFA_State_Handle* s = xtions[i]; DFA_State* s = xtions[i];
if ( s && s != DFA_UNCOMPUTED_STATE_PTR ) if ( s && s != DFA_UNCOMPUTED_STATE_PTR )
(*xtions[i])->ClearMarks(); (*xtions[i])->ClearMarks();
@ -243,7 +224,7 @@ void DFA_State::Dump(FILE* f, DFA_Machine* m)
int num_trans = 0; int num_trans = 0;
for ( int sym = 0; sym < num_sym; ++sym ) for ( int sym = 0; sym < num_sym; ++sym )
{ {
DFA_State_Handle* s = xtions[sym]; DFA_State* s = xtions[sym];
if ( ! s ) if ( ! s )
continue; continue;
@ -283,7 +264,7 @@ void DFA_State::Dump(FILE* f, DFA_Machine* m)
for ( int sym = 0; sym < num_sym; ++sym ) for ( int sym = 0; sym < num_sym; ++sym )
{ {
DFA_State_Handle* s = xtions[sym]; DFA_State* s = xtions[sym];
if ( s && s != DFA_UNCOMPUTED_STATE_PTR ) if ( s && s != DFA_UNCOMPUTED_STATE_PTR )
(*s)->Dump(f, m); (*s)->Dump(f, m);
@ -294,7 +275,7 @@ void DFA_State::Stats(unsigned int* computed, unsigned int* uncomputed)
{ {
for ( int sym = 0; sym < num_sym; ++sym ) for ( int sym = 0; sym < num_sym; ++sym )
{ {
DFA_State_Handle* s = xtions[sym]; DFA_State* s = xtions[sym];
if ( s == DFA_UNCOMPUTED_STATE_PTR ) if ( s == DFA_UNCOMPUTED_STATE_PTR )
(*uncomputed)++; (*uncomputed)++;
@ -313,11 +294,9 @@ unsigned int DFA_State::Size()
+ (centry ? padded_sizeof(CacheEntry) : 0); + (centry ? padded_sizeof(CacheEntry) : 0);
} }
DFA_State_Cache::DFA_State_Cache(int arg_maxsize) DFA_State_Cache::DFA_State_Cache(int arg_maxsize)
{ {
maxsize = arg_maxsize; maxsize = arg_maxsize;
head = tail = 0;
hits = misses = 0; hits = misses = 0;
} }
@ -328,13 +307,12 @@ DFA_State_Cache::~DFA_State_Cache()
while ( (e = (CacheEntry*) states.NextEntry(i)) ) while ( (e = (CacheEntry*) states.NextEntry(i)) )
{ {
assert(e->state); assert(e->state);
StateInvalidate(e->state);
delete e->hash; delete e->hash;
delete e; delete e;
} }
} }
DFA_State_Handle* DFA_State_Cache::Lookup(const NFA_state_list& nfas, DFA_State* DFA_State_Cache::Lookup(const NFA_state_list& nfas,
HashKey** hash) HashKey** hash)
{ {
// We assume that state ID's don't exceed 10 digits, plus // We assume that state ID's don't exceed 10 digits, plus
@ -380,100 +358,24 @@ DFA_State_Handle* DFA_State_Cache::Lookup(const NFA_state_list& nfas,
delete *hash; delete *hash;
*hash = 0; *hash = 0;
MoveToFront(e);
return e->state; return e->state;
} }
DFA_State_Handle* DFA_State_Cache::Insert(DFA_State* state, HashKey* hash) DFA_State* DFA_State_Cache::Insert(DFA_State* state, HashKey* hash)
{ {
CacheEntry* e; CacheEntry* e;
#ifdef EXPIRE_DFA_STATES
if ( states.Length() == maxsize )
{
// Remove oldest unlocked entry.
for ( e = tail; e; e = e->prev )
if ( ! (*e->state)->lock )
break;
if ( e )
Remove(e);
}
#endif
e = new CacheEntry; e = new CacheEntry;
#ifdef EXPIRE_DFA_STATES
// Insert as head.
e->state = new DFA_State_Handle(state);
e->state->state->centry = e;
#else
e->state = state; e->state = state;
e->state->centry = e; e->state->centry = e;
#endif
e->hash = hash; e->hash = hash;
e->prev = 0;
e->next = head;
if ( head )
head->prev = e;
head = e;
if ( ! tail )
tail = e;
states.Insert(hash, e); states.Insert(hash, e);
return e->state; return e->state;
} }
void DFA_State_Cache::Remove(CacheEntry* e)
{
if ( e == head )
{
head = e->next;
if ( head )
head->prev = 0;
}
else
e->prev->next = e->next;
if ( e == tail )
{
tail = e->prev;
if ( tail )
tail->next = 0;
}
else
e->next->prev = e->prev;
states.Remove(e->hash);
assert(e->state);
StateInvalidate(e->state);
delete e->hash;
delete e;
}
void DFA_State_Cache::MoveToFront(CacheEntry* e)
{
++hits;
if ( e->prev )
{
e->prev->next = e->next;
if ( e->next )
e->next->prev = e->prev;
else
tail = e->prev;
e->prev = 0;
e->next = head;
head->prev = e;
head = e;
}
}
void DFA_State_Cache::GetStats(Stats* s) void DFA_State_Cache::GetStats(Stats* s)
{ {
s->dfa_states = 0; s->dfa_states = 0;
@ -514,9 +416,6 @@ DFA_Machine::DFA_Machine(NFA_Machine* n, EquivClass* arg_ec)
{ {
NFA_state_list* state_set = epsilon_closure(ns); NFA_state_list* state_set = epsilon_closure(ns);
(void) StateSetToDFA_State(state_set, start_state, ec); (void) StateSetToDFA_State(state_set, start_state, ec);
StateRef(start_state);
StateLock(start_state);
} }
else else
start_state = 0; // Jam start_state = 0; // Jam
@ -524,12 +423,6 @@ DFA_Machine::DFA_Machine(NFA_Machine* n, EquivClass* arg_ec)
DFA_Machine::~DFA_Machine() DFA_Machine::~DFA_Machine()
{ {
if ( start_state )
{
StateUnlock(start_state);
StateUnref(start_state);
}
delete dfa_state_cache; delete dfa_state_cache;
Unref(nfa); Unref(nfa);
} }
@ -571,12 +464,11 @@ unsigned int DFA_Machine::MemoryAllocation() const
} }
int DFA_Machine::StateSetToDFA_State(NFA_state_list* state_set, int DFA_Machine::StateSetToDFA_State(NFA_state_list* state_set,
DFA_State_Handle*& d, const EquivClass* ec) DFA_State*& d, const EquivClass* ec)
{ {
HashKey* hash; HashKey* hash;
d = dfa_state_cache->Lookup(*state_set, &hash); d = dfa_state_cache->Lookup(*state_set, &hash);
assert((! d) || StateIsValid(d));
if ( d ) if ( d )
return 0; return 0;

144
src/DFA.h
View file

@ -24,41 +24,7 @@ class DFA_State;
// Transitions to the uncomputed state indicate that we haven't yet // Transitions to the uncomputed state indicate that we haven't yet
// computed the state to go to. // computed the state to go to.
#define DFA_UNCOMPUTED_STATE -2 #define DFA_UNCOMPUTED_STATE -2
#define DFA_UNCOMPUTED_STATE_PTR ((DFA_State_Handle*) DFA_UNCOMPUTED_STATE) #define DFA_UNCOMPUTED_STATE_PTR ((DFA_State*) DFA_UNCOMPUTED_STATE)
#ifdef EXPIRE_DFA_STATES
class DFA_State_Handle {
public:
// The reference counting keeps track of this *handle* (not the state).
void Ref() { assert(state); ++refcount; }
void Unref()
{
if ( --refcount == 0 )
delete this;
}
inline void Invalidate();
bool IsValid() const { return state != DFA_INVALID_STATE_PTR; }
DFA_State* State() const { return state; }
DFA_State* operator->() const { return state; }
protected:
friend class DFA_State_Cache;
DFA_State_Handle(DFA_State* arg_state)
{ state = arg_state; refcount = 1; }
inline ~DFA_State_Handle();
DFA_State* state;
int refcount;
};
#else
typedef DFA_State DFA_State_Handle;
#endif
#include "NFA.h" #include "NFA.h"
@ -76,9 +42,9 @@ public:
int StateNum() const { return state_num; } int StateNum() const { return state_num; }
int NFAStateNum() const { return nfa_states->length(); } int NFAStateNum() const { return nfa_states->length(); }
void AddXtion(int sym, DFA_State_Handle* next_state); void AddXtion(int sym, DFA_State* next_state);
inline DFA_State_Handle* Xtion(int sym, DFA_Machine* machine); inline DFA_State* Xtion(int sym, DFA_Machine* machine);
const AcceptingSet* Accept() const { return accept; } const AcceptingSet* Accept() const { return accept; }
void SymPartition(const EquivClass* ec); void SymPartition(const EquivClass* ec);
@ -98,43 +64,31 @@ public:
void Stats(unsigned int* computed, unsigned int* uncomputed); void Stats(unsigned int* computed, unsigned int* uncomputed);
unsigned int Size(); unsigned int Size();
// Locking a state will keep it from expiring from a cache.
void Lock() { ++lock; }
void Unlock() { --lock; }
#ifdef EXPIRE_DFA_STATES
bool IsLocked() { return lock != 0; }
#else
bool IsLocked() { return true; }
DFA_State* operator->(){ return this; } DFA_State* operator->(){ return this; }
#endif
protected: protected:
friend class DFA_State_Cache; friend class DFA_State_Cache;
DFA_State_Handle* ComputeXtion(int sym, DFA_Machine* machine); DFA_State* ComputeXtion(int sym, DFA_Machine* machine);
void AppendIfNew(int sym, int_list* sym_list); void AppendIfNew(int sym, int_list* sym_list);
int state_num; int state_num;
int num_sym; int num_sym;
DFA_State_Handle** xtions; DFA_State** xtions;
AcceptingSet* accept; AcceptingSet* accept;
NFA_state_list* nfa_states; NFA_state_list* nfa_states;
EquivClass* meta_ec; // which ec's make same transition EquivClass* meta_ec; // which ec's make same transition
DFA_State* mark; DFA_State* mark;
int lock;
CacheEntry* centry; CacheEntry* centry;
static unsigned int transition_counter; // see Xtion() static unsigned int transition_counter; // see Xtion()
}; };
struct CacheEntry { struct CacheEntry {
DFA_State_Handle* state; DFA_State* state;
HashKey* hash; HashKey* hash;
CacheEntry* next;
CacheEntry* prev;
}; };
class DFA_State_Cache { class DFA_State_Cache {
@ -143,13 +97,11 @@ public:
~DFA_State_Cache(); ~DFA_State_Cache();
// If the caller stores the handle, it has to call Ref() on it. // If the caller stores the handle, it has to call Ref() on it.
DFA_State_Handle* Lookup(const NFA_state_list& nfa_states, DFA_State* Lookup(const NFA_state_list& nfa_states,
HashKey** hash); HashKey** hash);
// Takes ownership of both; hash is the one returned by Lookup(). // Takes ownership of both; hash is the one returned by Lookup().
DFA_State_Handle* Insert(DFA_State* state, HashKey* hash); DFA_State* Insert(DFA_State* state, HashKey* hash);
void MoveToFront(DFA_State* state) { MoveToFront(state->centry); }
int NumEntries() const { return states.Length(); } int NumEntries() const { return states.Length(); }
@ -168,9 +120,6 @@ public:
void GetStats(Stats* s); void GetStats(Stats* s);
private: private:
void Remove(CacheEntry* e);
void MoveToFront(CacheEntry* e);
int maxsize; int maxsize;
int hits; // Statistics int hits; // Statistics
@ -180,10 +129,6 @@ private:
// Hash indexed by NFA states (MD5s of them, actually). // Hash indexed by NFA states (MD5s of them, actually).
PDict(CacheEntry) states; PDict(CacheEntry) states;
// List in LRU order.
CacheEntry* head;
CacheEntry* tail;
}; };
declare(PList,DFA_State); declare(PList,DFA_State);
@ -196,7 +141,7 @@ public:
int* acc_array); int* acc_array);
~DFA_Machine(); ~DFA_Machine();
DFA_State_Handle* StartState() const { return start_state; } DFA_State* StartState() const { return start_state; }
int NumStates() const { return dfa_state_cache->NumEntries(); } int NumStates() const { return dfa_state_cache->NumEntries(); }
@ -217,74 +162,18 @@ protected:
int state_count; int state_count;
// The state list has to be sorted according to IDs. // The state list has to be sorted according to IDs.
int StateSetToDFA_State(NFA_state_list* state_set, DFA_State_Handle*& d, int StateSetToDFA_State(NFA_state_list* state_set, DFA_State*& d,
const EquivClass* ec); const EquivClass* ec);
const EquivClass* EC() const { return ec; } const EquivClass* EC() const { return ec; }
EquivClass* ec; // equivalence classes corresponding to NFAs EquivClass* ec; // equivalence classes corresponding to NFAs
DFA_State_Handle* start_state; DFA_State* start_state;
DFA_State_Cache* dfa_state_cache; DFA_State_Cache* dfa_state_cache;
NFA_Machine* nfa; NFA_Machine* nfa;
}; };
#ifdef EXPIRE_DFA_STATES inline DFA_State* DFA_State::Xtion(int sym, DFA_Machine* machine)
inline DFA_State_Handle* DFA_State::Xtion(int sym, DFA_Machine* machine)
{
Lock();
// This is just a clumsy form of sampling... Instead of moving
// the state to the front of our LRU cache on each transition (which
// would be quite often) we just do it on every nth transition
// (counted across all DFA states). This is based on the observation
// that a very few of all states are used most of time.
// (currently n=10000; should it be configurable?)
if ( transition_counter++ % 10000 == 0 )
machine->Cache()->MoveToFront(this);
DFA_State_Handle* h;
if ( xtions[sym] == DFA_UNCOMPUTED_STATE_PTR ||
(xtions[sym] && ! xtions[sym]->IsValid()) )
h = ComputeXtion(sym, machine);
else
h = xtions[sym];
Unlock();
return h;
}
inline DFA_State_Handle::~DFA_State_Handle()
{
if ( state != DFA_INVALID_STATE_PTR )
delete state;
}
inline void DFA_State_Handle::Invalidate()
{
assert(state!=DFA_INVALID_STATE_PTR);
delete state;
state = DFA_INVALID_STATE_PTR;
Unref();
}
// Not nice but helps avoiding some overhead in the non-expiration case.
static inline void StateLock(DFA_State_Handle* s) { s->State()->Lock(); }
static inline void StateUnlock(DFA_State_Handle* s) { s->State()->Unlock(); }
static inline void StateRef(DFA_State_Handle* s) { s->Ref(); }
static inline void StateUnref(DFA_State_Handle* s) { s->Unref(); }
static inline void StateInvalidate(DFA_State_Handle* s) { s->Invalidate(); }
static inline bool StateIsValid(DFA_State_Handle* s)
{
return ! s || s->IsValid();
}
#else
inline DFA_State_Handle* DFA_State::Xtion(int sym, DFA_Machine* machine)
{ {
if ( xtions[sym] == DFA_UNCOMPUTED_STATE_PTR ) if ( xtions[sym] == DFA_UNCOMPUTED_STATE_PTR )
return ComputeXtion(sym, machine); return ComputeXtion(sym, machine);
@ -292,13 +181,4 @@ inline DFA_State_Handle* DFA_State::Xtion(int sym, DFA_Machine* machine)
return xtions[sym]; return xtions[sym];
} }
static inline void StateLock(DFA_State_Handle* s) { }
static inline void StateUnlock(DFA_State_Handle* s) { }
static inline void StateRef(DFA_State_Handle* s) { }
static inline void StateUnref(DFA_State_Handle* s) { }
static inline void StateInvalidate(DFA_State_Handle* s) { }
static inline bool StateIsValid(DFA_State_Handle* s) { return true; }
#endif
#endif #endif

View file

@ -211,7 +211,7 @@ int Specific_RE_Matcher::MatchAll(const u_char* bv, int n)
// matched is empty. // matched is empty.
return n == 0; return n == 0;
DFA_State_Handle* d = dfa->StartState(); DFA_State* d = dfa->StartState();
d = (*d)->Xtion(ecs[SYM_BOL], dfa); d = (*d)->Xtion(ecs[SYM_BOL], dfa);
while ( d ) while ( d )
@ -236,7 +236,7 @@ int Specific_RE_Matcher::Match(const u_char* bv, int n)
// An empty pattern matches anything. // An empty pattern matches anything.
return 1; return 1;
DFA_State_Handle* d = dfa->StartState(); DFA_State* d = dfa->StartState();
d = (*d)->Xtion(ecs[SYM_BOL], dfa); d = (*d)->Xtion(ecs[SYM_BOL], dfa);
if ( ! d ) return 0; if ( ! d ) return 0;
@ -268,12 +268,6 @@ void Specific_RE_Matcher::Dump(FILE* f)
dfa->Dump(f); dfa->Dump(f);
} }
RE_Match_State::~RE_Match_State()
{
if ( current_state )
StateUnref(current_state);
}
bool RE_Match_State::Match(const u_char* bv, int n, bool RE_Match_State::Match(const u_char* bv, int n,
bool bol, bool eol, bool clear) bool bol, bool eol, bool clear)
{ {
@ -289,7 +283,6 @@ bool RE_Match_State::Match(const u_char* bv, int n,
// Initialize state and copy the accepting states of the start // Initialize state and copy the accepting states of the start
// state into the acceptance set. // state into the acceptance set.
current_state = dfa->StartState(); current_state = dfa->StartState();
StateRef(current_state);
const AcceptingSet* ac = (*current_state)->Accept(); const AcceptingSet* ac = (*current_state)->Accept();
if ( ac ) if ( ac )
@ -303,20 +296,11 @@ bool RE_Match_State::Match(const u_char* bv, int n,
} }
else if ( clear ) else if ( clear )
{
if ( current_state )
StateUnref(current_state);
current_state = dfa->StartState(); current_state = dfa->StartState();
StateRef(current_state);
}
if ( ! current_state ) if ( ! current_state )
return false; return false;
else
(*current_state)->Unlock();
current_pos = 0; current_pos = 0;
int old_matches = accepted.length(); int old_matches = accepted.length();
@ -334,7 +318,7 @@ bool RE_Match_State::Match(const u_char* bv, int n,
else else
ec = ecs[*(bv++)]; ec = ecs[*(bv++)];
DFA_State_Handle* next_state = (*current_state)->Xtion(ec,dfa); DFA_State* next_state = (*current_state)->Xtion(ec,dfa);
if ( ! next_state ) if ( ! next_state )
{ {
@ -357,15 +341,9 @@ bool RE_Match_State::Match(const u_char* bv, int n,
++current_pos; ++current_pos;
StateRef(next_state);
StateUnref(current_state);
current_state = next_state; current_state = next_state;
} }
// Make sure our state doesn't expire until we return.
if ( current_state )
(*current_state)->Lock();
return accepted.length() != old_matches; return accepted.length() != old_matches;
} }
@ -377,7 +355,7 @@ int Specific_RE_Matcher::LongestMatch(const u_char* bv, int n)
// Use -1 to indicate no match. // Use -1 to indicate no match.
int last_accept = -1; int last_accept = -1;
DFA_State_Handle* d = dfa->StartState(); DFA_State* d = dfa->StartState();
d = (*d)->Xtion(ecs[SYM_BOL], dfa); d = (*d)->Xtion(ecs[SYM_BOL], dfa);
if ( ! d ) if ( ! d )

View file

@ -19,6 +19,7 @@ class NFA_Machine;
class DFA_Machine; class DFA_Machine;
class Specific_RE_Matcher; class Specific_RE_Matcher;
class RE_Matcher; class RE_Matcher;
class DFA_State;
declare(PDict,char); declare(PDict,char);
declare(PDict,CCL); declare(PDict,CCL);
@ -126,13 +127,6 @@ protected:
AcceptingSet* accepted; AcceptingSet* accepted;
}; };
#ifdef EXPIRE_DFA_STATES
class DFA_State_Handle;
#else
class DFA_State;
typedef DFA_State DFA_State_Handle;
#endif
class RE_Match_State { class RE_Match_State {
public: public:
RE_Match_State(Specific_RE_Matcher* matcher) RE_Match_State(Specific_RE_Matcher* matcher)
@ -143,8 +137,6 @@ public:
current_state = 0; current_state = 0;
} }
~RE_Match_State();
const AcceptingSet* Accepted() const { return &accepted; } const AcceptingSet* Accepted() const { return &accepted; }
const int_list* MatchPositions() const { return &match_pos; } const int_list* MatchPositions() const { return &match_pos; }
@ -169,7 +161,7 @@ protected:
AcceptingSet accepted; AcceptingSet accepted;
int_list match_pos; int_list match_pos;
DFA_State_Handle* current_state; DFA_State* current_state;
int current_pos; int current_pos;
}; };