DFA: remove uses of PDict

This commit is contained in:
Tim Wojtulewicz 2019-07-26 12:48:19 -07:00
parent acff8d5a2b
commit bbf49406c1
2 changed files with 36 additions and 59 deletions

View file

@ -17,7 +17,6 @@ DFA_State::DFA_State(int arg_state_num, const EquivClass* ec,
nfa_states = arg_nfa_states; nfa_states = arg_nfa_states;
accept = arg_accept; accept = arg_accept;
mark = 0; mark = 0;
centry = 0;
SymPartition(ec); SymPartition(ec);
@ -285,8 +284,7 @@ unsigned int DFA_State::Size()
+ pad_size(sizeof(DFA_State*) * num_sym) + pad_size(sizeof(DFA_State*) * num_sym)
+ (accept ? pad_size(sizeof(int) * accept->size()) : 0) + (accept ? pad_size(sizeof(int) * accept->size()) : 0)
+ (nfa_states ? pad_size(sizeof(NFA_State*) * nfa_states->length()) : 0) + (nfa_states ? pad_size(sizeof(NFA_State*) * nfa_states->length()) : 0)
+ (meta_ec ? meta_ec->Size() : 0) + (meta_ec ? meta_ec->Size() : 0);
+ (centry ? padded_sizeof(CacheEntry) : 0);
} }
DFA_State_Cache::DFA_State_Cache() DFA_State_Cache::DFA_State_Cache()
@ -296,19 +294,16 @@ DFA_State_Cache::DFA_State_Cache()
DFA_State_Cache::~DFA_State_Cache() DFA_State_Cache::~DFA_State_Cache()
{ {
IterCookie* i = states.InitForIteration(); for ( auto& entry : states )
CacheEntry* e;
while ( (e = (CacheEntry*) states.NextEntry(i)) )
{ {
assert(e->state); assert(entry.second);
delete e->hash; Unref(entry.second);
Unref(e->state);
delete e;
}
} }
DFA_State* DFA_State_Cache::Lookup(const NFA_state_list& nfas, states.clear();
HashKey** hash) }
DFA_State* DFA_State_Cache::Lookup(const NFA_state_list& nfas, DigestStr& digest)
{ {
// We assume that state ID's don't exceed 10 digits, plus // We assume that state ID's don't exceed 10 digits, plus
// we allow one more character for the delimiter. // we allow one more character for the delimiter.
@ -335,37 +330,27 @@ DFA_State* DFA_State_Cache::Lookup(const NFA_state_list& nfas,
// We use the short MD5 instead of the full string for the // We use the short MD5 instead of the full string for the
// HashKey because the data is copied into the key. // HashKey because the data is copied into the key.
u_char digest[16]; u_char digest_bytes[16];
internal_md5(id_tag, p - id_tag, digest); internal_md5(id_tag, p - id_tag, digest_bytes);
digest = DigestStr(digest_bytes, 16);
*hash = new HashKey(&digest, sizeof(digest)); auto entry = states.find(digest);
CacheEntry* e = states.Lookup(*hash); if ( entry == states.end() )
if ( ! e )
{ {
++misses; ++misses;
return 0; return nullptr;
} }
++hits; ++hits;
delete *hash; digest.clear();
*hash = 0;
return e->state; return entry->second;
} }
DFA_State* DFA_State_Cache::Insert(DFA_State* state, HashKey* hash) DFA_State* DFA_State_Cache::Insert(DFA_State* state, const DigestStr& digest)
{ {
CacheEntry* e; states.emplace(digest, state);
return state;
e = new CacheEntry;
e->state = state;
e->state->centry = e;
e->hash = hash;
states.Insert(hash, e);
return e->state;
} }
void DFA_State_Cache::GetStats(Stats* s) void DFA_State_Cache::GetStats(Stats* s)
@ -378,15 +363,13 @@ void DFA_State_Cache::GetStats(Stats* s)
s->hits = hits; s->hits = hits;
s->misses = misses; s->misses = misses;
CacheEntry* e; for ( const auto& state : states )
IterCookie* i = states.InitForIteration();
while ( (e = (CacheEntry*) states.NextEntry(i)) )
{ {
DFA_State* e = state.second;
++s->dfa_states; ++s->dfa_states;
s->nfa_states += e->state->NFAStateNum(); s->nfa_states += e->NFAStateNum();
e->state->Stats(&s->computed, &s->uncomputed); e->Stats(&s->computed, &s->uncomputed);
s->mem += pad_size(e->state->Size()) + padded_sizeof(*e->state); s->mem += pad_size(e->Size()) + padded_sizeof(*e);
} }
} }
@ -407,7 +390,7 @@ DFA_Machine::DFA_Machine(NFA_Machine* n, EquivClass* arg_ec)
if ( ns->length() > 0 ) if ( ns->length() > 0 )
{ {
NFA_state_list* state_set = epsilon_closure(ns); NFA_state_list* state_set = epsilon_closure(ns);
(void) StateSetToDFA_State(state_set, start_state, ec); StateSetToDFA_State(state_set, start_state, ec);
} }
else else
{ {
@ -445,14 +428,14 @@ unsigned int DFA_Machine::MemoryAllocation() const
+ nfa->MemoryAllocation(); + nfa->MemoryAllocation();
} }
int DFA_Machine::StateSetToDFA_State(NFA_state_list* state_set, bool DFA_Machine::StateSetToDFA_State(NFA_state_list* state_set,
DFA_State*& d, const EquivClass* ec) DFA_State*& d, const EquivClass* ec)
{ {
HashKey* hash; DigestStr digest;
d = dfa_state_cache->Lookup(*state_set, &hash); d = dfa_state_cache->Lookup(*state_set, digest);
if ( d ) if ( d )
return 0; return false;
AcceptingSet* accept = new AcceptingSet; AcceptingSet* accept = new AcceptingSet;
@ -471,9 +454,9 @@ int DFA_Machine::StateSetToDFA_State(NFA_state_list* state_set,
} }
DFA_State* ds = new DFA_State(state_count++, ec, state_set, accept); DFA_State* ds = new DFA_State(state_count++, ec, state_set, accept);
d = dfa_state_cache->Insert(ds, hash); d = dfa_state_cache->Insert(ds, digest);
return 1; return true;
} }
int DFA_Machine::Rep(int sym) int DFA_Machine::Rep(int sym)

View file

@ -17,7 +17,6 @@ class DFA_State;
class DFA_Machine; class DFA_Machine;
class DFA_State; class DFA_State;
struct CacheEntry;
class DFA_State : public BroObj { class DFA_State : public BroObj {
public: public:
@ -64,15 +63,11 @@ protected:
NFA_state_list* nfa_states; NFA_state_list* nfa_states;
EquivClass* meta_ec; // which ec's make same transition EquivClass* meta_ec; // which ec's make same transition
DFA_State* mark; DFA_State* mark;
CacheEntry* centry;
static unsigned int transition_counter; // see Xtion() static unsigned int transition_counter; // see Xtion()
}; };
struct CacheEntry { using DigestStr = basic_string<u_char>;
DFA_State* state;
HashKey* hash;
};
class DFA_State_Cache { class DFA_State_Cache {
public: public:
@ -80,13 +75,12 @@ public:
~DFA_State_Cache(); ~DFA_State_Cache();
// If the caller stores the handle, it has to call Ref() on it. // If the caller stores the handle, it has to call Ref() on it.
DFA_State* Lookup(const NFA_state_list& nfa_states, DFA_State* Lookup(const NFA_state_list& nfa_states, DigestStr& digest);
HashKey** hash);
// Takes ownership of both; hash is the one returned by Lookup(). // Takes ownership of both; hash is the one returned by Lookup().
DFA_State* Insert(DFA_State* state, HashKey* hash); DFA_State* Insert(DFA_State* state, const DigestStr& digest);
int NumEntries() const { return states.Length(); } int NumEntries() const { return states.size(); }
struct Stats { struct Stats {
// Sum of all NFA states // Sum of all NFA states
@ -106,7 +100,7 @@ private:
int misses; int misses;
// Hash indexed by NFA states (MD5s of them, actually). // Hash indexed by NFA states (MD5s of them, actually).
PDict<CacheEntry> states; std::map<DigestStr, DFA_State*> states;
}; };
class DFA_Machine : public BroObj { class DFA_Machine : public BroObj {
@ -134,7 +128,7 @@ protected:
int state_count; int state_count;
// The state list has to be sorted according to IDs. // The state list has to be sorted according to IDs.
int StateSetToDFA_State(NFA_state_list* state_set, DFA_State*& d, bool StateSetToDFA_State(NFA_state_list* state_set, DFA_State*& d,
const EquivClass* ec); const EquivClass* ec);
const EquivClass* EC() const { return ec; } const EquivClass* EC() const { return ec; }