zeek/src/NFA.cc

378 lines
7.5 KiB
C++

// See the file "COPYING" in the main distribution directory for copyright.
#include "zeek/zeek-config.h"
#include "zeek/NFA.h"
#include <algorithm>
#include "zeek/Desc.h"
#include "zeek/EquivClass.h"
#include "zeek/IntSet.h"
namespace zeek::detail {
static int nfa_state_id = 0;
NFA_State::NFA_State(int arg_sym, EquivClass* ec)
{
sym = arg_sym;
ccl = nullptr;
accept = NO_ACCEPT;
first_trans_is_back_ref = false;
mark = nullptr;
epsclosure = nullptr;
id = ++nfa_state_id;
// Fix up equivalence classes based on this transition. Note that any
// character which has its own transition gets its own equivalence
// class. Thus only characters which are only in character classes
// have a chance at being in the same equivalence class. E.g. "a|b"
// puts 'a' and 'b' into two different equivalence classes. "[ab]"
// puts them in the same equivalence class (barring other differences
// elsewhere in the input).
if ( ec && sym != SYM_EPSILON /* no associated symbol */ )
ec->UniqueChar(sym);
}
NFA_State::NFA_State(CCL* arg_ccl)
{
sym = SYM_CCL;
ccl = arg_ccl;
accept = NO_ACCEPT;
first_trans_is_back_ref = false;
mark = nullptr;
id = ++nfa_state_id;
epsclosure = nullptr;
}
NFA_State::~NFA_State()
{
for ( int i = 0; i < xtions.length(); ++i )
if ( i > 0 || ! first_trans_is_back_ref )
Unref(xtions[i]);
delete epsclosure;
}
void NFA_State::AddXtionsTo(NFA_state_list* ns)
{
for ( int i = 0; i < xtions.length(); ++i )
ns->push_back(xtions[i]);
}
NFA_State* NFA_State::DeepCopy()
{
if ( mark )
{
Ref(mark);
return mark;
}
NFA_State* copy = ccl ? new NFA_State(ccl) : new NFA_State(sym, nullptr);
SetMark(copy);
for ( int i = 0; i < xtions.length(); ++i )
copy->AddXtion(xtions[i]->DeepCopy());
return copy;
}
void NFA_State::ClearMarks()
{
if ( mark )
{
SetMark(nullptr);
for ( int i = 0; i < xtions.length(); ++i )
xtions[i]->ClearMarks();
}
}
NFA_state_list* NFA_State::EpsilonClosure()
{
if ( epsclosure )
return epsclosure;
epsclosure = new NFA_state_list;
NFA_state_list states;
states.push_back(this);
SetMark(this);
int i;
for ( i = 0; i < states.length(); ++i )
{
NFA_State* ns = states[i];
if ( ns->TransSym() == SYM_EPSILON )
{
NFA_state_list* x = ns->Transitions();
for ( int j = 0; j < x->length(); ++j )
{
NFA_State* nxt = (*x)[j];
if ( ! nxt->Mark() )
{
states.push_back(nxt);
nxt->SetMark(nxt);
}
}
if ( ns->Accept() != NO_ACCEPT )
epsclosure->push_back(ns);
}
else
// Non-epsilon transition - keep it.
epsclosure->push_back(ns);
}
// Clear out markers.
for ( i = 0; i < states.length(); ++i )
states[i]->SetMark(nullptr);
// Make it fit.
epsclosure->resize(0);
return epsclosure;
}
void NFA_State::Describe(ODesc* d) const
{
d->Add("NFA state");
}
void NFA_State::Dump(FILE* f)
{
if ( mark )
return;
fprintf(f, "NFA state %d, sym = %d, accept = %d:\n", id, sym, accept);
for ( int i = 0; i < xtions.length(); ++i )
fprintf(f, "\ttransition to %d\n", xtions[i]->ID());
SetMark(this);
for ( int i = 0; i < xtions.length(); ++i )
xtions[i]->Dump(f);
}
unsigned int NFA_State::TotalMemoryAllocation() const
{
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
return padded_sizeof(*this)
+ xtions.MemoryAllocation() - padded_sizeof(xtions)
+ (epsclosure ? epsclosure->MemoryAllocation() : 0);
#pragma GCC diagnostic pop
}
NFA_Machine::NFA_Machine(NFA_State* first, NFA_State* final)
{
first_state = first;
final_state = final ? final : first;
eol = bol = 0;
}
NFA_Machine::~NFA_Machine()
{
Unref(first_state);
}
void NFA_Machine::InsertEpsilon()
{
NFA_State* eps = new EpsilonState();
eps->AddXtion(first_state);
first_state = eps;
}
void NFA_Machine::AppendEpsilon()
{
AppendState(new EpsilonState());
}
void NFA_Machine::AddAccept(int accept_val)
{
// Hang the accepting number off an epsilon state. If it is associated
// with a state that has a non-epsilon out-transition, then the state
// will accept BEFORE it makes that transition, i.e., one character
// too soon.
if ( final_state->TransSym() != SYM_EPSILON )
AppendState(new EpsilonState());
final_state->SetAccept(accept_val);
}
void NFA_Machine::LinkCopies(int n)
{
if ( n <= 0 )
return;
// Make all the copies before doing any appending, otherwise
// subsequent DuplicateMachine()'s will include the extra
// copies!
NFA_Machine** copies = new NFA_Machine*[n];
int i;
for ( i = 0; i < n; ++i )
copies[i] = DuplicateMachine();
for ( i = 0; i < n; ++i )
AppendMachine(copies[i]);
delete [] copies;
}
NFA_Machine* NFA_Machine::DuplicateMachine()
{
NFA_State* new_first_state = first_state->DeepCopy();
NFA_Machine* new_m = new NFA_Machine(new_first_state, final_state->Mark());
first_state->ClearMarks();
return new_m;
}
void NFA_Machine::AppendState(NFA_State* s)
{
final_state->AddXtion(s);
final_state = s;
}
void NFA_Machine::AppendMachine(NFA_Machine* m)
{
AppendEpsilon();
final_state->AddXtion(m->FirstState());
final_state = m->FinalState();
Ref(m->FirstState()); // so states stay around after the following
Unref(m);
}
void NFA_Machine::MakeOptional()
{
InsertEpsilon();
AppendEpsilon();
first_state->AddXtion(final_state);
Ref(final_state);
}
void NFA_Machine::MakePositiveClosure()
{
AppendEpsilon();
final_state->AddXtion(first_state);
// Don't Ref the state the final epsilon points to, otherwise we'll
// have reference cycles that lead to leaks.
final_state->SetFirstTransIsBackRef();
}
void NFA_Machine::MakeRepl(int lower, int upper)
{
NFA_Machine* dup = nullptr;
if ( upper > lower || upper == NO_UPPER_BOUND )
dup = DuplicateMachine();
LinkCopies(lower - 1);
if ( upper == NO_UPPER_BOUND )
{
dup->MakeClosure();
AppendMachine(dup);
return;
}
while ( upper > lower )
{
NFA_Machine* dup2;
if ( --upper == lower )
// Don't need "dup" for any further copies
dup2 = dup;
else
dup2 = dup->DuplicateMachine();
dup2->MakeOptional();
AppendMachine(dup2);
}
}
void NFA_Machine::Describe(ODesc* d) const
{
d->Add("NFA machine");
}
void NFA_Machine::Dump(FILE* f)
{
first_state->Dump(f);
first_state->ClearMarks();
}
NFA_Machine* make_alternate(NFA_Machine* m1, NFA_Machine* m2)
{
if ( ! m1 )
return m2;
if ( ! m2 )
return m1;
NFA_State* first = new EpsilonState();
NFA_State* last = new EpsilonState();
first->AddXtion(m1->FirstState());
first->AddXtion(m2->FirstState());
m1->AppendState(last);
m2->AppendState(last);
Ref(last);
// Keep these around.
Ref(m1->FirstState());
Ref(m2->FirstState());
Unref(m1);
Unref(m2);
return new NFA_Machine(first, last);
}
NFA_state_list* epsilon_closure(NFA_state_list* states)
{
// We just keep one of this as it may get quite large.
static IntSet closuremap;
closuremap.Clear();
NFA_state_list* closure = new NFA_state_list;
for ( int i = 0; i < states->length(); ++i )
{
NFA_state_list* stateclosure = (*states)[i]->EpsilonClosure();
for ( int j = 0; j < stateclosure->length(); ++j )
{
NFA_State* ns = (*stateclosure)[j];
if ( ! closuremap.Contains(ns->ID()) )
{
closuremap.Insert(ns->ID());
closure->push_back(ns);
}
}
}
// Sort all of the closures in the list by ID
std::sort(closure->begin(), closure->end(), NFA_state_cmp_neg);
// Make it fit.
closure->resize(0);
delete states;
return closure;
}
bool NFA_state_cmp_neg(const NFA_State* v1, const NFA_State* v2)
{
if ( v1->ID() < v2->ID() )
return true;
else
return false;
}
} // namespace zeek::detail