mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 06:38:20 +00:00
172 lines
4.8 KiB
C++
172 lines
4.8 KiB
C++
// See the file "COPYING" in the main distribution directory for copyright.
|
|
|
|
#include "zeek/EquivClass.h"
|
|
|
|
#include "zeek/CCL.h"
|
|
#include "zeek/util.h"
|
|
|
|
namespace zeek::detail {
|
|
|
|
EquivClass::EquivClass(int arg_size) {
|
|
size = arg_size;
|
|
fwd = new int[size];
|
|
bck = new int[size];
|
|
equiv_class = new int[size];
|
|
rep = new int[size];
|
|
ccl_flags = nullptr;
|
|
num_ecs = 0;
|
|
|
|
ec_nil = no_class = no_rep = size + 1;
|
|
|
|
bck[0] = ec_nil;
|
|
fwd[size - 1] = ec_nil;
|
|
|
|
for ( int i = 0; i < size; ++i ) {
|
|
if ( i > 0 ) {
|
|
fwd[i - 1] = i;
|
|
bck[i] = i - 1;
|
|
}
|
|
|
|
equiv_class[i] = no_class;
|
|
rep[i] = no_rep;
|
|
}
|
|
}
|
|
|
|
EquivClass::~EquivClass() {
|
|
delete[] fwd;
|
|
delete[] bck;
|
|
delete[] equiv_class;
|
|
delete[] rep;
|
|
delete[] ccl_flags;
|
|
}
|
|
|
|
void EquivClass::ConvertCCL(CCL* ccl) {
|
|
// For each character in the class, add the character's
|
|
// equivalence class to the new "character" class we are
|
|
// creating. Thus when we are all done, the character class
|
|
// will really consist of collections of equivalence classes
|
|
// instead of collections of characters.
|
|
|
|
int_list* c_syms = ccl->Syms();
|
|
int_list* new_syms = new int_list;
|
|
|
|
for ( auto sym : *c_syms ) {
|
|
if ( IsRep(sym) )
|
|
new_syms->push_back(SymEquivClass(sym));
|
|
}
|
|
|
|
ccl->ReplaceSyms(new_syms);
|
|
}
|
|
|
|
int EquivClass::BuildECs() {
|
|
// Create equivalence class numbers. If bck[x] is nil,
|
|
// then x is the representative of its equivalence class.
|
|
|
|
for ( int i = 0; i < size; ++i )
|
|
if ( bck[i] == ec_nil ) {
|
|
equiv_class[i] = num_ecs++;
|
|
rep[i] = i;
|
|
for ( int j = fwd[i]; j != ec_nil; j = fwd[j] ) {
|
|
equiv_class[j] = equiv_class[i];
|
|
rep[j] = i;
|
|
}
|
|
}
|
|
|
|
return num_ecs;
|
|
}
|
|
|
|
void EquivClass::CCL_Use(CCL* ccl) {
|
|
// Note that it doesn't matter whether or not the character class is
|
|
// negated. The same results will be obtained in either case.
|
|
|
|
if ( ! ccl_flags ) {
|
|
ccl_flags = new int[size];
|
|
for ( int i = 0; i < size; ++i )
|
|
ccl_flags[i] = 0;
|
|
}
|
|
|
|
int_list* csyms = ccl->Syms();
|
|
for ( size_t i = 0; i < csyms->size(); /* no increment */ ) {
|
|
int sym = (*csyms)[i];
|
|
|
|
int old_ec = bck[sym];
|
|
int new_ec = sym;
|
|
|
|
size_t j = i + 1;
|
|
|
|
for ( int k = fwd[sym]; k && k < size; k = fwd[k] ) { // look for the symbol in the character class
|
|
for ( ; j < csyms->size(); ++j ) {
|
|
if ( (*csyms)[j] > k )
|
|
// Since the character class is sorted,
|
|
// we can stop.
|
|
break;
|
|
|
|
if ( (*csyms)[j] == k && ! ccl_flags[j] ) {
|
|
// We found an old companion of sym
|
|
// in the ccl. Link it into the new
|
|
// equivalence class and flag it as
|
|
// having been processed.
|
|
bck[k] = new_ec;
|
|
fwd[new_ec] = k;
|
|
new_ec = k;
|
|
|
|
// Set flag so we don't reprocess.
|
|
ccl_flags[j] = 1;
|
|
|
|
// Get next equivalence class member.
|
|
break;
|
|
}
|
|
}
|
|
|
|
if ( j < csyms->size() && (*csyms)[j] == k )
|
|
// We broke out of the above loop by finding
|
|
// an old companion - go to the next symbol.
|
|
continue;
|
|
|
|
// Symbol isn't in character class. Put it in the old
|
|
// equivalence class.
|
|
bck[k] = old_ec;
|
|
if ( old_ec != ec_nil )
|
|
fwd[old_ec] = k;
|
|
|
|
old_ec = k;
|
|
}
|
|
|
|
if ( bck[sym] != ec_nil || old_ec != bck[sym] ) {
|
|
bck[sym] = ec_nil;
|
|
fwd[old_ec] = ec_nil;
|
|
}
|
|
|
|
fwd[new_ec] = ec_nil;
|
|
|
|
// Find next ccl member to process.
|
|
for ( ++i; i < csyms->size() && ccl_flags[i]; ++i )
|
|
// Reset "doesn't need processing" flag.
|
|
ccl_flags[i] = 0;
|
|
}
|
|
}
|
|
|
|
void EquivClass::UniqueChar(int sym) {
|
|
// If until now the character has been a proper subset of
|
|
// an equivalence class, break it away to create a new ec.
|
|
|
|
if ( fwd[sym] != ec_nil )
|
|
bck[fwd[sym]] = bck[sym];
|
|
|
|
if ( bck[sym] != ec_nil )
|
|
fwd[bck[sym]] = fwd[sym];
|
|
|
|
fwd[sym] = ec_nil;
|
|
bck[sym] = ec_nil;
|
|
}
|
|
|
|
void EquivClass::Dump(FILE* f) {
|
|
fprintf(f, "%d symbols in EC yielded %d ecs\n", size, num_ecs);
|
|
for ( int i = 0; i < size; ++i )
|
|
if ( SymEquivClass(i) != 0 ) // skip usually huge default ec
|
|
fprintf(f, "map %d ('%c') -> %d\n", i, i, SymEquivClass(i));
|
|
}
|
|
|
|
int EquivClass::Size() const { return padded_sizeof(*this) + util::pad_size(sizeof(int) * size * (ccl_flags ? 5 : 4)); }
|
|
|
|
} // namespace zeek::detail
|