zeek/src/EquivClass.cc
2025-05-19 09:50:23 -07:00

172 lines
4.8 KiB
C++

// See the file "COPYING" in the main distribution directory for copyright.
#include "zeek/EquivClass.h"
#include "zeek/CCL.h"
#include "zeek/util.h"
namespace zeek::detail {
EquivClass::EquivClass(int arg_size) {
size = arg_size;
fwd = new int[size];
bck = new int[size];
equiv_class = new int[size];
rep = new int[size];
ccl_flags = nullptr;
num_ecs = 0;
ec_nil = no_class = no_rep = size + 1;
bck[0] = ec_nil;
fwd[size - 1] = ec_nil;
for ( int i = 0; i < size; ++i ) {
if ( i > 0 ) {
fwd[i - 1] = i;
bck[i] = i - 1;
}
equiv_class[i] = no_class;
rep[i] = no_rep;
}
}
EquivClass::~EquivClass() {
delete[] fwd;
delete[] bck;
delete[] equiv_class;
delete[] rep;
delete[] ccl_flags;
}
void EquivClass::ConvertCCL(CCL* ccl) {
// For each character in the class, add the character's
// equivalence class to the new "character" class we are
// creating. Thus when we are all done, the character class
// will really consist of collections of equivalence classes
// instead of collections of characters.
int_list* c_syms = ccl->Syms();
int_list* new_syms = new int_list;
for ( auto sym : *c_syms ) {
if ( IsRep(sym) )
new_syms->push_back(SymEquivClass(sym));
}
ccl->ReplaceSyms(new_syms);
}
int EquivClass::BuildECs() {
// Create equivalence class numbers. If bck[x] is nil,
// then x is the representative of its equivalence class.
for ( int i = 0; i < size; ++i )
if ( bck[i] == ec_nil ) {
equiv_class[i] = num_ecs++;
rep[i] = i;
for ( int j = fwd[i]; j != ec_nil; j = fwd[j] ) {
equiv_class[j] = equiv_class[i];
rep[j] = i;
}
}
return num_ecs;
}
void EquivClass::CCL_Use(CCL* ccl) {
// Note that it doesn't matter whether or not the character class is
// negated. The same results will be obtained in either case.
if ( ! ccl_flags ) {
ccl_flags = new int[size];
for ( int i = 0; i < size; ++i )
ccl_flags[i] = 0;
}
int_list* csyms = ccl->Syms();
for ( size_t i = 0; i < csyms->size(); /* no increment */ ) {
int sym = (*csyms)[i];
int old_ec = bck[sym];
int new_ec = sym;
size_t j = i + 1;
for ( int k = fwd[sym]; k && k < size; k = fwd[k] ) { // look for the symbol in the character class
for ( ; j < csyms->size(); ++j ) {
if ( (*csyms)[j] > k )
// Since the character class is sorted,
// we can stop.
break;
if ( (*csyms)[j] == k && ! ccl_flags[j] ) {
// We found an old companion of sym
// in the ccl. Link it into the new
// equivalence class and flag it as
// having been processed.
bck[k] = new_ec;
fwd[new_ec] = k;
new_ec = k;
// Set flag so we don't reprocess.
ccl_flags[j] = 1;
// Get next equivalence class member.
break;
}
}
if ( j < csyms->size() && (*csyms)[j] == k )
// We broke out of the above loop by finding
// an old companion - go to the next symbol.
continue;
// Symbol isn't in character class. Put it in the old
// equivalence class.
bck[k] = old_ec;
if ( old_ec != ec_nil )
fwd[old_ec] = k;
old_ec = k;
}
if ( bck[sym] != ec_nil || old_ec != bck[sym] ) {
bck[sym] = ec_nil;
fwd[old_ec] = ec_nil;
}
fwd[new_ec] = ec_nil;
// Find next ccl member to process.
for ( ++i; i < csyms->size() && ccl_flags[i]; ++i )
// Reset "doesn't need processing" flag.
ccl_flags[i] = 0;
}
}
void EquivClass::UniqueChar(int sym) {
// If until now the character has been a proper subset of
// an equivalence class, break it away to create a new ec.
if ( fwd[sym] != ec_nil )
bck[fwd[sym]] = bck[sym];
if ( bck[sym] != ec_nil )
fwd[bck[sym]] = fwd[sym];
fwd[sym] = ec_nil;
bck[sym] = ec_nil;
}
void EquivClass::Dump(FILE* f) {
fprintf(f, "%d symbols in EC yielded %d ecs\n", size, num_ecs);
for ( int i = 0; i < size; ++i )
if ( SymEquivClass(i) != 0 ) // skip usually huge default ec
fprintf(f, "map %d ('%c') -> %d\n", i, i, SymEquivClass(i));
}
int EquivClass::Size() const { return padded_sizeof(*this) + util::pad_size(sizeof(int) * size * (ccl_flags ? 5 : 4)); }
} // namespace zeek::detail