mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 14:48:21 +00:00
Removing the EXPIRE_DFA_STATES code.
This commit is contained in:
parent
c92154994a
commit
8eb241fde7
4 changed files with 31 additions and 289 deletions
134
src/DFA.cc
134
src/DFA.cc
|
@ -21,11 +21,10 @@ DFA_State::DFA_State(int arg_state_num, const EquivClass* ec,
|
||||||
nfa_states = arg_nfa_states;
|
nfa_states = arg_nfa_states;
|
||||||
accept = arg_accept;
|
accept = arg_accept;
|
||||||
mark = 0;
|
mark = 0;
|
||||||
lock = 0;
|
|
||||||
|
|
||||||
SymPartition(ec);
|
SymPartition(ec);
|
||||||
|
|
||||||
xtions = new DFA_State_Handle*[num_sym];
|
xtions = new DFA_State*[num_sym];
|
||||||
|
|
||||||
for ( int i = 0; i < num_sym; ++i )
|
for ( int i = 0; i < num_sym; ++i )
|
||||||
xtions[i] = DFA_UNCOMPUTED_STATE_PTR;
|
xtions[i] = DFA_UNCOMPUTED_STATE_PTR;
|
||||||
|
@ -34,11 +33,7 @@ DFA_State::DFA_State(int arg_state_num, const EquivClass* ec,
|
||||||
DFA_State::~DFA_State()
|
DFA_State::~DFA_State()
|
||||||
{
|
{
|
||||||
for ( int i = 0; i < num_sym; ++i )
|
for ( int i = 0; i < num_sym; ++i )
|
||||||
{
|
DFA_State* s = xtions[i];
|
||||||
DFA_State_Handle* s = xtions[i];
|
|
||||||
if ( s && s != DFA_UNCOMPUTED_STATE_PTR )
|
|
||||||
StateUnref(s);
|
|
||||||
}
|
|
||||||
|
|
||||||
delete [] xtions;
|
delete [] xtions;
|
||||||
delete nfa_states;
|
delete nfa_states;
|
||||||
|
@ -46,18 +41,8 @@ DFA_State::~DFA_State()
|
||||||
delete meta_ec;
|
delete meta_ec;
|
||||||
}
|
}
|
||||||
|
|
||||||
void DFA_State::AddXtion(int sym, DFA_State_Handle* next_state)
|
void DFA_State::AddXtion(int sym, DFA_State* next_state)
|
||||||
{
|
{
|
||||||
// The order is important here: first StateRef() the new,
|
|
||||||
// then StateUnref() the old. Otherwise, we may get a problem
|
|
||||||
// if both are equal.
|
|
||||||
|
|
||||||
if ( next_state )
|
|
||||||
StateRef(next_state);
|
|
||||||
|
|
||||||
if ( xtions[sym] && xtions[sym] != DFA_UNCOMPUTED_STATE_PTR )
|
|
||||||
StateUnref(xtions[sym]);
|
|
||||||
|
|
||||||
xtions[sym] = next_state;
|
xtions[sym] = next_state;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -94,14 +79,10 @@ void DFA_State::SymPartition(const EquivClass* ec)
|
||||||
meta_ec->BuildECs();
|
meta_ec->BuildECs();
|
||||||
}
|
}
|
||||||
|
|
||||||
DFA_State_Handle* DFA_State::ComputeXtion(int sym, DFA_Machine* machine)
|
DFA_State* DFA_State::ComputeXtion(int sym, DFA_Machine* machine)
|
||||||
{
|
{
|
||||||
// Make sure we will not expire...
|
|
||||||
assert(IsLocked());
|
|
||||||
|
|
||||||
int equiv_sym = meta_ec->EquivRep(sym);
|
int equiv_sym = meta_ec->EquivRep(sym);
|
||||||
if ( xtions[equiv_sym] != DFA_UNCOMPUTED_STATE_PTR &&
|
if ( xtions[equiv_sym] != DFA_UNCOMPUTED_STATE_PTR )
|
||||||
StateIsValid(xtions[equiv_sym]) )
|
|
||||||
{
|
{
|
||||||
AddXtion(sym, xtions[equiv_sym]);
|
AddXtion(sym, xtions[equiv_sym]);
|
||||||
return xtions[sym];
|
return xtions[sym];
|
||||||
|
@ -109,7 +90,7 @@ DFA_State_Handle* DFA_State::ComputeXtion(int sym, DFA_Machine* machine)
|
||||||
|
|
||||||
const EquivClass* ec = machine->EC();
|
const EquivClass* ec = machine->EC();
|
||||||
|
|
||||||
DFA_State_Handle* next_d;
|
DFA_State* next_d;
|
||||||
|
|
||||||
NFA_state_list* ns = SymFollowSet(equiv_sym, ec);
|
NFA_state_list* ns = SymFollowSet(equiv_sym, ec);
|
||||||
if ( ns->length() > 0 )
|
if ( ns->length() > 0 )
|
||||||
|
@ -211,7 +192,7 @@ void DFA_State::ClearMarks()
|
||||||
|
|
||||||
for ( int i = 0; i < num_sym; ++i )
|
for ( int i = 0; i < num_sym; ++i )
|
||||||
{
|
{
|
||||||
DFA_State_Handle* s = xtions[i];
|
DFA_State* s = xtions[i];
|
||||||
|
|
||||||
if ( s && s != DFA_UNCOMPUTED_STATE_PTR )
|
if ( s && s != DFA_UNCOMPUTED_STATE_PTR )
|
||||||
(*xtions[i])->ClearMarks();
|
(*xtions[i])->ClearMarks();
|
||||||
|
@ -243,7 +224,7 @@ void DFA_State::Dump(FILE* f, DFA_Machine* m)
|
||||||
int num_trans = 0;
|
int num_trans = 0;
|
||||||
for ( int sym = 0; sym < num_sym; ++sym )
|
for ( int sym = 0; sym < num_sym; ++sym )
|
||||||
{
|
{
|
||||||
DFA_State_Handle* s = xtions[sym];
|
DFA_State* s = xtions[sym];
|
||||||
|
|
||||||
if ( ! s )
|
if ( ! s )
|
||||||
continue;
|
continue;
|
||||||
|
@ -283,7 +264,7 @@ void DFA_State::Dump(FILE* f, DFA_Machine* m)
|
||||||
|
|
||||||
for ( int sym = 0; sym < num_sym; ++sym )
|
for ( int sym = 0; sym < num_sym; ++sym )
|
||||||
{
|
{
|
||||||
DFA_State_Handle* s = xtions[sym];
|
DFA_State* s = xtions[sym];
|
||||||
|
|
||||||
if ( s && s != DFA_UNCOMPUTED_STATE_PTR )
|
if ( s && s != DFA_UNCOMPUTED_STATE_PTR )
|
||||||
(*s)->Dump(f, m);
|
(*s)->Dump(f, m);
|
||||||
|
@ -294,7 +275,7 @@ void DFA_State::Stats(unsigned int* computed, unsigned int* uncomputed)
|
||||||
{
|
{
|
||||||
for ( int sym = 0; sym < num_sym; ++sym )
|
for ( int sym = 0; sym < num_sym; ++sym )
|
||||||
{
|
{
|
||||||
DFA_State_Handle* s = xtions[sym];
|
DFA_State* s = xtions[sym];
|
||||||
|
|
||||||
if ( s == DFA_UNCOMPUTED_STATE_PTR )
|
if ( s == DFA_UNCOMPUTED_STATE_PTR )
|
||||||
(*uncomputed)++;
|
(*uncomputed)++;
|
||||||
|
@ -313,11 +294,9 @@ unsigned int DFA_State::Size()
|
||||||
+ (centry ? padded_sizeof(CacheEntry) : 0);
|
+ (centry ? padded_sizeof(CacheEntry) : 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
DFA_State_Cache::DFA_State_Cache(int arg_maxsize)
|
DFA_State_Cache::DFA_State_Cache(int arg_maxsize)
|
||||||
{
|
{
|
||||||
maxsize = arg_maxsize;
|
maxsize = arg_maxsize;
|
||||||
head = tail = 0;
|
|
||||||
hits = misses = 0;
|
hits = misses = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -328,13 +307,12 @@ DFA_State_Cache::~DFA_State_Cache()
|
||||||
while ( (e = (CacheEntry*) states.NextEntry(i)) )
|
while ( (e = (CacheEntry*) states.NextEntry(i)) )
|
||||||
{
|
{
|
||||||
assert(e->state);
|
assert(e->state);
|
||||||
StateInvalidate(e->state);
|
|
||||||
delete e->hash;
|
delete e->hash;
|
||||||
delete e;
|
delete e;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
DFA_State_Handle* DFA_State_Cache::Lookup(const NFA_state_list& nfas,
|
DFA_State* DFA_State_Cache::Lookup(const NFA_state_list& nfas,
|
||||||
HashKey** hash)
|
HashKey** hash)
|
||||||
{
|
{
|
||||||
// We assume that state ID's don't exceed 10 digits, plus
|
// We assume that state ID's don't exceed 10 digits, plus
|
||||||
|
@ -380,100 +358,24 @@ DFA_State_Handle* DFA_State_Cache::Lookup(const NFA_state_list& nfas,
|
||||||
delete *hash;
|
delete *hash;
|
||||||
*hash = 0;
|
*hash = 0;
|
||||||
|
|
||||||
MoveToFront(e);
|
|
||||||
|
|
||||||
return e->state;
|
return e->state;
|
||||||
}
|
}
|
||||||
|
|
||||||
DFA_State_Handle* DFA_State_Cache::Insert(DFA_State* state, HashKey* hash)
|
DFA_State* DFA_State_Cache::Insert(DFA_State* state, HashKey* hash)
|
||||||
{
|
{
|
||||||
CacheEntry* e;
|
CacheEntry* e;
|
||||||
|
|
||||||
#ifdef EXPIRE_DFA_STATES
|
|
||||||
if ( states.Length() == maxsize )
|
|
||||||
{
|
|
||||||
// Remove oldest unlocked entry.
|
|
||||||
for ( e = tail; e; e = e->prev )
|
|
||||||
if ( ! (*e->state)->lock )
|
|
||||||
break;
|
|
||||||
if ( e )
|
|
||||||
Remove(e);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
e = new CacheEntry;
|
e = new CacheEntry;
|
||||||
|
|
||||||
#ifdef EXPIRE_DFA_STATES
|
|
||||||
// Insert as head.
|
|
||||||
e->state = new DFA_State_Handle(state);
|
|
||||||
e->state->state->centry = e;
|
|
||||||
#else
|
|
||||||
e->state = state;
|
e->state = state;
|
||||||
e->state->centry = e;
|
e->state->centry = e;
|
||||||
#endif
|
|
||||||
e->hash = hash;
|
e->hash = hash;
|
||||||
e->prev = 0;
|
|
||||||
e->next = head;
|
|
||||||
if ( head )
|
|
||||||
head->prev = e;
|
|
||||||
head = e;
|
|
||||||
if ( ! tail )
|
|
||||||
tail = e;
|
|
||||||
|
|
||||||
states.Insert(hash, e);
|
states.Insert(hash, e);
|
||||||
|
|
||||||
return e->state;
|
return e->state;
|
||||||
}
|
}
|
||||||
|
|
||||||
void DFA_State_Cache::Remove(CacheEntry* e)
|
|
||||||
{
|
|
||||||
if ( e == head )
|
|
||||||
{
|
|
||||||
head = e->next;
|
|
||||||
if ( head )
|
|
||||||
head->prev = 0;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
e->prev->next = e->next;
|
|
||||||
|
|
||||||
if ( e == tail )
|
|
||||||
{
|
|
||||||
tail = e->prev;
|
|
||||||
if ( tail )
|
|
||||||
tail->next = 0;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
e->next->prev = e->prev;
|
|
||||||
|
|
||||||
states.Remove(e->hash);
|
|
||||||
|
|
||||||
assert(e->state);
|
|
||||||
StateInvalidate(e->state);
|
|
||||||
delete e->hash;
|
|
||||||
delete e;
|
|
||||||
}
|
|
||||||
|
|
||||||
void DFA_State_Cache::MoveToFront(CacheEntry* e)
|
|
||||||
{
|
|
||||||
++hits;
|
|
||||||
|
|
||||||
if ( e->prev )
|
|
||||||
{
|
|
||||||
e->prev->next = e->next;
|
|
||||||
|
|
||||||
if ( e->next )
|
|
||||||
e->next->prev = e->prev;
|
|
||||||
else
|
|
||||||
tail = e->prev;
|
|
||||||
|
|
||||||
e->prev = 0;
|
|
||||||
e->next = head;
|
|
||||||
|
|
||||||
head->prev = e;
|
|
||||||
head = e;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void DFA_State_Cache::GetStats(Stats* s)
|
void DFA_State_Cache::GetStats(Stats* s)
|
||||||
{
|
{
|
||||||
s->dfa_states = 0;
|
s->dfa_states = 0;
|
||||||
|
@ -514,9 +416,6 @@ DFA_Machine::DFA_Machine(NFA_Machine* n, EquivClass* arg_ec)
|
||||||
{
|
{
|
||||||
NFA_state_list* state_set = epsilon_closure(ns);
|
NFA_state_list* state_set = epsilon_closure(ns);
|
||||||
(void) StateSetToDFA_State(state_set, start_state, ec);
|
(void) StateSetToDFA_State(state_set, start_state, ec);
|
||||||
|
|
||||||
StateRef(start_state);
|
|
||||||
StateLock(start_state);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
start_state = 0; // Jam
|
start_state = 0; // Jam
|
||||||
|
@ -524,12 +423,6 @@ DFA_Machine::DFA_Machine(NFA_Machine* n, EquivClass* arg_ec)
|
||||||
|
|
||||||
DFA_Machine::~DFA_Machine()
|
DFA_Machine::~DFA_Machine()
|
||||||
{
|
{
|
||||||
if ( start_state )
|
|
||||||
{
|
|
||||||
StateUnlock(start_state);
|
|
||||||
StateUnref(start_state);
|
|
||||||
}
|
|
||||||
|
|
||||||
delete dfa_state_cache;
|
delete dfa_state_cache;
|
||||||
Unref(nfa);
|
Unref(nfa);
|
||||||
}
|
}
|
||||||
|
@ -571,12 +464,11 @@ unsigned int DFA_Machine::MemoryAllocation() const
|
||||||
}
|
}
|
||||||
|
|
||||||
int DFA_Machine::StateSetToDFA_State(NFA_state_list* state_set,
|
int DFA_Machine::StateSetToDFA_State(NFA_state_list* state_set,
|
||||||
DFA_State_Handle*& d, const EquivClass* ec)
|
DFA_State*& d, const EquivClass* ec)
|
||||||
{
|
{
|
||||||
HashKey* hash;
|
HashKey* hash;
|
||||||
d = dfa_state_cache->Lookup(*state_set, &hash);
|
d = dfa_state_cache->Lookup(*state_set, &hash);
|
||||||
|
|
||||||
assert((! d) || StateIsValid(d));
|
|
||||||
if ( d )
|
if ( d )
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
|
144
src/DFA.h
144
src/DFA.h
|
@ -24,41 +24,7 @@ class DFA_State;
|
||||||
// Transitions to the uncomputed state indicate that we haven't yet
|
// Transitions to the uncomputed state indicate that we haven't yet
|
||||||
// computed the state to go to.
|
// computed the state to go to.
|
||||||
#define DFA_UNCOMPUTED_STATE -2
|
#define DFA_UNCOMPUTED_STATE -2
|
||||||
#define DFA_UNCOMPUTED_STATE_PTR ((DFA_State_Handle*) DFA_UNCOMPUTED_STATE)
|
#define DFA_UNCOMPUTED_STATE_PTR ((DFA_State*) DFA_UNCOMPUTED_STATE)
|
||||||
|
|
||||||
#ifdef EXPIRE_DFA_STATES
|
|
||||||
|
|
||||||
class DFA_State_Handle {
|
|
||||||
public:
|
|
||||||
// The reference counting keeps track of this *handle* (not the state).
|
|
||||||
void Ref() { assert(state); ++refcount; }
|
|
||||||
void Unref()
|
|
||||||
{
|
|
||||||
if ( --refcount == 0 )
|
|
||||||
delete this;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void Invalidate();
|
|
||||||
bool IsValid() const { return state != DFA_INVALID_STATE_PTR; }
|
|
||||||
|
|
||||||
DFA_State* State() const { return state; }
|
|
||||||
DFA_State* operator->() const { return state; }
|
|
||||||
|
|
||||||
protected:
|
|
||||||
friend class DFA_State_Cache;
|
|
||||||
|
|
||||||
DFA_State_Handle(DFA_State* arg_state)
|
|
||||||
{ state = arg_state; refcount = 1; }
|
|
||||||
|
|
||||||
inline ~DFA_State_Handle();
|
|
||||||
|
|
||||||
DFA_State* state;
|
|
||||||
int refcount;
|
|
||||||
};
|
|
||||||
|
|
||||||
#else
|
|
||||||
typedef DFA_State DFA_State_Handle;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "NFA.h"
|
#include "NFA.h"
|
||||||
|
|
||||||
|
@ -76,9 +42,9 @@ public:
|
||||||
|
|
||||||
int StateNum() const { return state_num; }
|
int StateNum() const { return state_num; }
|
||||||
int NFAStateNum() const { return nfa_states->length(); }
|
int NFAStateNum() const { return nfa_states->length(); }
|
||||||
void AddXtion(int sym, DFA_State_Handle* next_state);
|
void AddXtion(int sym, DFA_State* next_state);
|
||||||
|
|
||||||
inline DFA_State_Handle* Xtion(int sym, DFA_Machine* machine);
|
inline DFA_State* Xtion(int sym, DFA_Machine* machine);
|
||||||
|
|
||||||
const AcceptingSet* Accept() const { return accept; }
|
const AcceptingSet* Accept() const { return accept; }
|
||||||
void SymPartition(const EquivClass* ec);
|
void SymPartition(const EquivClass* ec);
|
||||||
|
@ -98,43 +64,31 @@ public:
|
||||||
void Stats(unsigned int* computed, unsigned int* uncomputed);
|
void Stats(unsigned int* computed, unsigned int* uncomputed);
|
||||||
unsigned int Size();
|
unsigned int Size();
|
||||||
|
|
||||||
// Locking a state will keep it from expiring from a cache.
|
|
||||||
void Lock() { ++lock; }
|
|
||||||
void Unlock() { --lock; }
|
|
||||||
|
|
||||||
#ifdef EXPIRE_DFA_STATES
|
|
||||||
bool IsLocked() { return lock != 0; }
|
|
||||||
#else
|
|
||||||
bool IsLocked() { return true; }
|
|
||||||
DFA_State* operator->(){ return this; }
|
DFA_State* operator->(){ return this; }
|
||||||
#endif
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
friend class DFA_State_Cache;
|
friend class DFA_State_Cache;
|
||||||
|
|
||||||
DFA_State_Handle* ComputeXtion(int sym, DFA_Machine* machine);
|
DFA_State* ComputeXtion(int sym, DFA_Machine* machine);
|
||||||
void AppendIfNew(int sym, int_list* sym_list);
|
void AppendIfNew(int sym, int_list* sym_list);
|
||||||
|
|
||||||
int state_num;
|
int state_num;
|
||||||
int num_sym;
|
int num_sym;
|
||||||
|
|
||||||
DFA_State_Handle** xtions;
|
DFA_State** xtions;
|
||||||
|
|
||||||
AcceptingSet* accept;
|
AcceptingSet* accept;
|
||||||
NFA_state_list* nfa_states;
|
NFA_state_list* nfa_states;
|
||||||
EquivClass* meta_ec; // which ec's make same transition
|
EquivClass* meta_ec; // which ec's make same transition
|
||||||
DFA_State* mark;
|
DFA_State* mark;
|
||||||
int lock;
|
|
||||||
CacheEntry* centry;
|
CacheEntry* centry;
|
||||||
|
|
||||||
static unsigned int transition_counter; // see Xtion()
|
static unsigned int transition_counter; // see Xtion()
|
||||||
};
|
};
|
||||||
|
|
||||||
struct CacheEntry {
|
struct CacheEntry {
|
||||||
DFA_State_Handle* state;
|
DFA_State* state;
|
||||||
HashKey* hash;
|
HashKey* hash;
|
||||||
CacheEntry* next;
|
|
||||||
CacheEntry* prev;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class DFA_State_Cache {
|
class DFA_State_Cache {
|
||||||
|
@ -143,13 +97,11 @@ public:
|
||||||
~DFA_State_Cache();
|
~DFA_State_Cache();
|
||||||
|
|
||||||
// If the caller stores the handle, it has to call Ref() on it.
|
// If the caller stores the handle, it has to call Ref() on it.
|
||||||
DFA_State_Handle* Lookup(const NFA_state_list& nfa_states,
|
DFA_State* Lookup(const NFA_state_list& nfa_states,
|
||||||
HashKey** hash);
|
HashKey** hash);
|
||||||
|
|
||||||
// Takes ownership of both; hash is the one returned by Lookup().
|
// Takes ownership of both; hash is the one returned by Lookup().
|
||||||
DFA_State_Handle* Insert(DFA_State* state, HashKey* hash);
|
DFA_State* Insert(DFA_State* state, HashKey* hash);
|
||||||
|
|
||||||
void MoveToFront(DFA_State* state) { MoveToFront(state->centry); }
|
|
||||||
|
|
||||||
int NumEntries() const { return states.Length(); }
|
int NumEntries() const { return states.Length(); }
|
||||||
|
|
||||||
|
@ -168,9 +120,6 @@ public:
|
||||||
void GetStats(Stats* s);
|
void GetStats(Stats* s);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void Remove(CacheEntry* e);
|
|
||||||
void MoveToFront(CacheEntry* e);
|
|
||||||
|
|
||||||
int maxsize;
|
int maxsize;
|
||||||
|
|
||||||
int hits; // Statistics
|
int hits; // Statistics
|
||||||
|
@ -180,10 +129,6 @@ private:
|
||||||
|
|
||||||
// Hash indexed by NFA states (MD5s of them, actually).
|
// Hash indexed by NFA states (MD5s of them, actually).
|
||||||
PDict(CacheEntry) states;
|
PDict(CacheEntry) states;
|
||||||
|
|
||||||
// List in LRU order.
|
|
||||||
CacheEntry* head;
|
|
||||||
CacheEntry* tail;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
declare(PList,DFA_State);
|
declare(PList,DFA_State);
|
||||||
|
@ -196,7 +141,7 @@ public:
|
||||||
int* acc_array);
|
int* acc_array);
|
||||||
~DFA_Machine();
|
~DFA_Machine();
|
||||||
|
|
||||||
DFA_State_Handle* StartState() const { return start_state; }
|
DFA_State* StartState() const { return start_state; }
|
||||||
|
|
||||||
int NumStates() const { return dfa_state_cache->NumEntries(); }
|
int NumStates() const { return dfa_state_cache->NumEntries(); }
|
||||||
|
|
||||||
|
@ -217,74 +162,18 @@ protected:
|
||||||
int state_count;
|
int state_count;
|
||||||
|
|
||||||
// The state list has to be sorted according to IDs.
|
// The state list has to be sorted according to IDs.
|
||||||
int StateSetToDFA_State(NFA_state_list* state_set, DFA_State_Handle*& d,
|
int StateSetToDFA_State(NFA_state_list* state_set, DFA_State*& d,
|
||||||
const EquivClass* ec);
|
const EquivClass* ec);
|
||||||
const EquivClass* EC() const { return ec; }
|
const EquivClass* EC() const { return ec; }
|
||||||
|
|
||||||
EquivClass* ec; // equivalence classes corresponding to NFAs
|
EquivClass* ec; // equivalence classes corresponding to NFAs
|
||||||
DFA_State_Handle* start_state;
|
DFA_State* start_state;
|
||||||
DFA_State_Cache* dfa_state_cache;
|
DFA_State_Cache* dfa_state_cache;
|
||||||
|
|
||||||
NFA_Machine* nfa;
|
NFA_Machine* nfa;
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef EXPIRE_DFA_STATES
|
inline DFA_State* DFA_State::Xtion(int sym, DFA_Machine* machine)
|
||||||
|
|
||||||
inline DFA_State_Handle* DFA_State::Xtion(int sym, DFA_Machine* machine)
|
|
||||||
{
|
|
||||||
Lock();
|
|
||||||
|
|
||||||
// This is just a clumsy form of sampling... Instead of moving
|
|
||||||
// the state to the front of our LRU cache on each transition (which
|
|
||||||
// would be quite often) we just do it on every nth transition
|
|
||||||
// (counted across all DFA states). This is based on the observation
|
|
||||||
// that a very few of all states are used most of time.
|
|
||||||
// (currently n=10000; should it be configurable?)
|
|
||||||
if ( transition_counter++ % 10000 == 0 )
|
|
||||||
machine->Cache()->MoveToFront(this);
|
|
||||||
|
|
||||||
DFA_State_Handle* h;
|
|
||||||
|
|
||||||
if ( xtions[sym] == DFA_UNCOMPUTED_STATE_PTR ||
|
|
||||||
(xtions[sym] && ! xtions[sym]->IsValid()) )
|
|
||||||
h = ComputeXtion(sym, machine);
|
|
||||||
else
|
|
||||||
h = xtions[sym];
|
|
||||||
|
|
||||||
Unlock();
|
|
||||||
|
|
||||||
return h;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline DFA_State_Handle::~DFA_State_Handle()
|
|
||||||
{
|
|
||||||
if ( state != DFA_INVALID_STATE_PTR )
|
|
||||||
delete state;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void DFA_State_Handle::Invalidate()
|
|
||||||
{
|
|
||||||
assert(state!=DFA_INVALID_STATE_PTR);
|
|
||||||
delete state;
|
|
||||||
state = DFA_INVALID_STATE_PTR;
|
|
||||||
Unref();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Not nice but helps avoiding some overhead in the non-expiration case.
|
|
||||||
static inline void StateLock(DFA_State_Handle* s) { s->State()->Lock(); }
|
|
||||||
static inline void StateUnlock(DFA_State_Handle* s) { s->State()->Unlock(); }
|
|
||||||
static inline void StateRef(DFA_State_Handle* s) { s->Ref(); }
|
|
||||||
static inline void StateUnref(DFA_State_Handle* s) { s->Unref(); }
|
|
||||||
static inline void StateInvalidate(DFA_State_Handle* s) { s->Invalidate(); }
|
|
||||||
|
|
||||||
static inline bool StateIsValid(DFA_State_Handle* s)
|
|
||||||
{
|
|
||||||
return ! s || s->IsValid();
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
inline DFA_State_Handle* DFA_State::Xtion(int sym, DFA_Machine* machine)
|
|
||||||
{
|
{
|
||||||
if ( xtions[sym] == DFA_UNCOMPUTED_STATE_PTR )
|
if ( xtions[sym] == DFA_UNCOMPUTED_STATE_PTR )
|
||||||
return ComputeXtion(sym, machine);
|
return ComputeXtion(sym, machine);
|
||||||
|
@ -292,13 +181,4 @@ inline DFA_State_Handle* DFA_State::Xtion(int sym, DFA_Machine* machine)
|
||||||
return xtions[sym];
|
return xtions[sym];
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void StateLock(DFA_State_Handle* s) { }
|
|
||||||
static inline void StateUnlock(DFA_State_Handle* s) { }
|
|
||||||
static inline void StateRef(DFA_State_Handle* s) { }
|
|
||||||
static inline void StateUnref(DFA_State_Handle* s) { }
|
|
||||||
static inline void StateInvalidate(DFA_State_Handle* s) { }
|
|
||||||
static inline bool StateIsValid(DFA_State_Handle* s) { return true; }
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
30
src/RE.cc
30
src/RE.cc
|
@ -211,7 +211,7 @@ int Specific_RE_Matcher::MatchAll(const u_char* bv, int n)
|
||||||
// matched is empty.
|
// matched is empty.
|
||||||
return n == 0;
|
return n == 0;
|
||||||
|
|
||||||
DFA_State_Handle* d = dfa->StartState();
|
DFA_State* d = dfa->StartState();
|
||||||
d = (*d)->Xtion(ecs[SYM_BOL], dfa);
|
d = (*d)->Xtion(ecs[SYM_BOL], dfa);
|
||||||
|
|
||||||
while ( d )
|
while ( d )
|
||||||
|
@ -236,7 +236,7 @@ int Specific_RE_Matcher::Match(const u_char* bv, int n)
|
||||||
// An empty pattern matches anything.
|
// An empty pattern matches anything.
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
DFA_State_Handle* d = dfa->StartState();
|
DFA_State* d = dfa->StartState();
|
||||||
|
|
||||||
d = (*d)->Xtion(ecs[SYM_BOL], dfa);
|
d = (*d)->Xtion(ecs[SYM_BOL], dfa);
|
||||||
if ( ! d ) return 0;
|
if ( ! d ) return 0;
|
||||||
|
@ -268,12 +268,6 @@ void Specific_RE_Matcher::Dump(FILE* f)
|
||||||
dfa->Dump(f);
|
dfa->Dump(f);
|
||||||
}
|
}
|
||||||
|
|
||||||
RE_Match_State::~RE_Match_State()
|
|
||||||
{
|
|
||||||
if ( current_state )
|
|
||||||
StateUnref(current_state);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool RE_Match_State::Match(const u_char* bv, int n,
|
bool RE_Match_State::Match(const u_char* bv, int n,
|
||||||
bool bol, bool eol, bool clear)
|
bool bol, bool eol, bool clear)
|
||||||
{
|
{
|
||||||
|
@ -289,7 +283,6 @@ bool RE_Match_State::Match(const u_char* bv, int n,
|
||||||
// Initialize state and copy the accepting states of the start
|
// Initialize state and copy the accepting states of the start
|
||||||
// state into the acceptance set.
|
// state into the acceptance set.
|
||||||
current_state = dfa->StartState();
|
current_state = dfa->StartState();
|
||||||
StateRef(current_state);
|
|
||||||
|
|
||||||
const AcceptingSet* ac = (*current_state)->Accept();
|
const AcceptingSet* ac = (*current_state)->Accept();
|
||||||
if ( ac )
|
if ( ac )
|
||||||
|
@ -303,20 +296,11 @@ bool RE_Match_State::Match(const u_char* bv, int n,
|
||||||
}
|
}
|
||||||
|
|
||||||
else if ( clear )
|
else if ( clear )
|
||||||
{
|
|
||||||
if ( current_state )
|
|
||||||
StateUnref(current_state);
|
|
||||||
|
|
||||||
current_state = dfa->StartState();
|
current_state = dfa->StartState();
|
||||||
StateRef(current_state);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( ! current_state )
|
if ( ! current_state )
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
else
|
|
||||||
(*current_state)->Unlock();
|
|
||||||
|
|
||||||
current_pos = 0;
|
current_pos = 0;
|
||||||
|
|
||||||
int old_matches = accepted.length();
|
int old_matches = accepted.length();
|
||||||
|
@ -334,7 +318,7 @@ bool RE_Match_State::Match(const u_char* bv, int n,
|
||||||
else
|
else
|
||||||
ec = ecs[*(bv++)];
|
ec = ecs[*(bv++)];
|
||||||
|
|
||||||
DFA_State_Handle* next_state = (*current_state)->Xtion(ec,dfa);
|
DFA_State* next_state = (*current_state)->Xtion(ec,dfa);
|
||||||
|
|
||||||
if ( ! next_state )
|
if ( ! next_state )
|
||||||
{
|
{
|
||||||
|
@ -357,15 +341,9 @@ bool RE_Match_State::Match(const u_char* bv, int n,
|
||||||
|
|
||||||
++current_pos;
|
++current_pos;
|
||||||
|
|
||||||
StateRef(next_state);
|
|
||||||
StateUnref(current_state);
|
|
||||||
current_state = next_state;
|
current_state = next_state;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Make sure our state doesn't expire until we return.
|
|
||||||
if ( current_state )
|
|
||||||
(*current_state)->Lock();
|
|
||||||
|
|
||||||
return accepted.length() != old_matches;
|
return accepted.length() != old_matches;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -377,7 +355,7 @@ int Specific_RE_Matcher::LongestMatch(const u_char* bv, int n)
|
||||||
|
|
||||||
// Use -1 to indicate no match.
|
// Use -1 to indicate no match.
|
||||||
int last_accept = -1;
|
int last_accept = -1;
|
||||||
DFA_State_Handle* d = dfa->StartState();
|
DFA_State* d = dfa->StartState();
|
||||||
|
|
||||||
d = (*d)->Xtion(ecs[SYM_BOL], dfa);
|
d = (*d)->Xtion(ecs[SYM_BOL], dfa);
|
||||||
if ( ! d )
|
if ( ! d )
|
||||||
|
|
12
src/RE.h
12
src/RE.h
|
@ -19,6 +19,7 @@ class NFA_Machine;
|
||||||
class DFA_Machine;
|
class DFA_Machine;
|
||||||
class Specific_RE_Matcher;
|
class Specific_RE_Matcher;
|
||||||
class RE_Matcher;
|
class RE_Matcher;
|
||||||
|
class DFA_State;
|
||||||
|
|
||||||
declare(PDict,char);
|
declare(PDict,char);
|
||||||
declare(PDict,CCL);
|
declare(PDict,CCL);
|
||||||
|
@ -126,13 +127,6 @@ protected:
|
||||||
AcceptingSet* accepted;
|
AcceptingSet* accepted;
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef EXPIRE_DFA_STATES
|
|
||||||
class DFA_State_Handle;
|
|
||||||
#else
|
|
||||||
class DFA_State;
|
|
||||||
typedef DFA_State DFA_State_Handle;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
class RE_Match_State {
|
class RE_Match_State {
|
||||||
public:
|
public:
|
||||||
RE_Match_State(Specific_RE_Matcher* matcher)
|
RE_Match_State(Specific_RE_Matcher* matcher)
|
||||||
|
@ -143,8 +137,6 @@ public:
|
||||||
current_state = 0;
|
current_state = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
~RE_Match_State();
|
|
||||||
|
|
||||||
const AcceptingSet* Accepted() const { return &accepted; }
|
const AcceptingSet* Accepted() const { return &accepted; }
|
||||||
const int_list* MatchPositions() const { return &match_pos; }
|
const int_list* MatchPositions() const { return &match_pos; }
|
||||||
|
|
||||||
|
@ -169,7 +161,7 @@ protected:
|
||||||
|
|
||||||
AcceptingSet accepted;
|
AcceptingSet accepted;
|
||||||
int_list match_pos;
|
int_list match_pos;
|
||||||
DFA_State_Handle* current_state;
|
DFA_State* current_state;
|
||||||
int current_pos;
|
int current_pos;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue