zeek/src/EquivClass.cc
2011-08-04 15:21:18 -05:00

190 lines
4 KiB
C++

// See the file "COPYING" in the main distribution directory for copyright.
#include "config.h"
#include "EquivClass.h"
EquivClass::EquivClass(int arg_size)
{
size = arg_size;
fwd = new int[size];
bck = new int[size];
equiv_class = new int[size];
rep = new int[size];
ccl_flags = 0;
num_ecs = 0;
ec_nil = no_class = no_rep = size + 1;
bck[0] = ec_nil;
fwd[size - 1] = ec_nil;
for ( int i = 0; i < size; ++i )
{
if ( i > 0 )
{
fwd[i - 1] = i;
bck[i] = i - 1;
}
equiv_class[i] = no_class;
rep[i] = no_rep;
}
}
EquivClass::~EquivClass()
{
delete [] fwd;
delete [] bck;
delete [] equiv_class;
delete [] rep;
delete [] ccl_flags;
}
void EquivClass::ConvertCCL(CCL* ccl)
{
// For each character in the class, add the character's
// equivalence class to the new "character" class we are
// creating. Thus when we are all done, the character class
// will really consist of collections of equivalence classes
// instead of collections of characters.
int_list* c_syms = ccl->Syms();
int_list* new_syms = new int_list;
for ( int i = 0; i < c_syms->length(); ++i )
{
int sym = (*c_syms)[i];
if ( IsRep(sym) )
new_syms->append(SymEquivClass(sym));
}
ccl->ReplaceSyms(new_syms);
}
int EquivClass::BuildECs()
{
// Create equivalence class numbers. If bck[x] is nil,
// then x is the representative of its equivalence class.
for ( int i = 0; i < size; ++i )
if ( bck[i] == ec_nil )
{
equiv_class[i] = num_ecs++;
rep[i] = i;
for ( int j = fwd[i]; j != ec_nil; j = fwd[j] )
{
equiv_class[j] = equiv_class[i];
rep[j] = i;
}
}
return num_ecs;
}
void EquivClass::CCL_Use(CCL* ccl)
{
// Note that it doesn't matter whether or not the character class is
// negated. The same results will be obtained in either case.
if ( ! ccl_flags )
{
ccl_flags = new int[size];
for ( int i = 0; i < size; ++i )
ccl_flags[i] = 0;
}
int_list* csyms = ccl->Syms();
for ( int i = 0; i < csyms->length(); /* no increment */ )
{
int sym = (*csyms)[i];
int old_ec = bck[sym];
int new_ec = sym;
int j = i + 1;
for ( int k = fwd[sym]; k && k < size; k = fwd[k] )
{ // look for the symbol in the character class
for ( ; j < csyms->length(); ++j )
{
if ( (*csyms)[j] > k )
// Since the character class is sorted,
// we can stop.
break;
if ( (*csyms)[j] == k && ! ccl_flags[j] )
{
// We found an old companion of sym
// in the ccl. Link it into the new
// equivalence class and flag it as
// having been processed.
bck[k] = new_ec;
fwd[new_ec] = k;
new_ec = k;
// Set flag so we don't reprocess.
ccl_flags[j] = 1;
// Get next equivalence class member.
break;
}
}
if ( j < csyms->length() && (*csyms)[j] == k )
// We broke out of the above loop by finding
// an old companion - go to the next symbol.
continue;
// Symbol isn't in character class. Put it in the old
// equivalence class.
bck[k] = old_ec;
if ( old_ec != ec_nil )
fwd[old_ec] = k;
old_ec = k;
}
if ( bck[sym] != ec_nil || old_ec != bck[sym] )
{
bck[sym] = ec_nil;
fwd[old_ec] = ec_nil;
}
fwd[new_ec] = ec_nil;
// Find next ccl member to process.
for ( ++i; i < csyms->length() && ccl_flags[i]; ++i )
// Reset "doesn't need processing" flag.
ccl_flags[i] = 0;
}
}
void EquivClass::UniqueChar(int sym)
{
// If until now the character has been a proper subset of
// an equivalence class, break it away to create a new ec.
if ( fwd[sym] != ec_nil )
bck[fwd[sym]] = bck[sym];
if ( bck[sym] != ec_nil )
fwd[bck[sym]] = fwd[sym];
fwd[sym] = ec_nil;
bck[sym] = ec_nil;
}
void EquivClass::Dump(FILE* f)
{
fprintf(f, "%d symbols in EC yielded %d ecs\n", size, num_ecs);
for ( int i = 0; i < size; ++i )
if ( SymEquivClass(i) != 0 ) // skip usually huge default ec
fprintf(f, "map %d ('%c') -> %d\n", i, i, SymEquivClass(i));
}
int EquivClass::Size() const
{
return padded_sizeof(*this) + pad_size(sizeof(int) * size * (ccl_flags ? 5 : 4));
}