mirror of
https://github.com/zeek/zeek.git
synced 2025-10-05 08:08:19 +00:00
Move BroSubstring to zeek::detail, rename to Substring
This commit is contained in:
parent
c7dc7fc955
commit
55d699af59
3 changed files with 74 additions and 58 deletions
|
@ -11,14 +11,16 @@
|
||||||
#include "Reporter.h"
|
#include "Reporter.h"
|
||||||
#include "Val.h"
|
#include "Val.h"
|
||||||
|
|
||||||
BroSubstring::BroSubstring(const BroSubstring& bst)
|
namespace zeek::detail {
|
||||||
|
|
||||||
|
Substring::Substring(const Substring& bst)
|
||||||
: zeek::String((const zeek::String&) bst), _num(), _new(bst._new)
|
: zeek::String((const zeek::String&) bst), _num(), _new(bst._new)
|
||||||
{
|
{
|
||||||
for ( BSSAlignVecCIt it = bst._aligns.begin(); it != bst._aligns.end(); ++it )
|
for ( BSSAlignVecCIt it = bst._aligns.begin(); it != bst._aligns.end(); ++it )
|
||||||
_aligns.push_back(*it);
|
_aligns.push_back(*it);
|
||||||
}
|
}
|
||||||
|
|
||||||
const BroSubstring& BroSubstring::operator=(const BroSubstring& bst)
|
const Substring& Substring::operator=(const Substring& bst)
|
||||||
{
|
{
|
||||||
zeek::String::operator=(bst);
|
zeek::String::operator=(bst);
|
||||||
|
|
||||||
|
@ -32,12 +34,12 @@ const BroSubstring& BroSubstring::operator=(const BroSubstring& bst)
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
void BroSubstring::AddAlignment(const zeek::String* str, int index)
|
void Substring::AddAlignment(const zeek::String* str, int index)
|
||||||
{
|
{
|
||||||
_aligns.push_back(BSSAlign(str, index));
|
_aligns.push_back(BSSAlign(str, index));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BroSubstring::DoesCover(const BroSubstring* bst) const
|
bool Substring::DoesCover(const Substring* bst) const
|
||||||
{
|
{
|
||||||
if ( _aligns.size() != bst->_aligns.size() )
|
if ( _aligns.size() != bst->_aligns.size() )
|
||||||
return false;
|
return false;
|
||||||
|
@ -56,7 +58,7 @@ bool BroSubstring::DoesCover(const BroSubstring* bst) const
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
zeek::VectorVal* BroSubstring::VecToPolicy(Vec* vec)
|
zeek::VectorVal* Substring::VecToPolicy(Vec* vec)
|
||||||
{
|
{
|
||||||
static auto sw_substring_type = zeek::id::find_type<zeek::RecordType>("sw_substring");
|
static auto sw_substring_type = zeek::id::find_type<zeek::RecordType>("sw_substring");
|
||||||
static auto sw_align_type = zeek::id::find_type<zeek::RecordType>("sw_align");
|
static auto sw_align_type = zeek::id::find_type<zeek::RecordType>("sw_align");
|
||||||
|
@ -69,7 +71,7 @@ zeek::VectorVal* BroSubstring::VecToPolicy(Vec* vec)
|
||||||
{
|
{
|
||||||
for ( size_t i = 0; i < vec->size(); ++i )
|
for ( size_t i = 0; i < vec->size(); ++i )
|
||||||
{
|
{
|
||||||
BroSubstring* bst = (*vec)[i];
|
Substring* bst = (*vec)[i];
|
||||||
|
|
||||||
auto st_val = zeek::make_intrusive<zeek::RecordVal>(sw_substring_type);
|
auto st_val = zeek::make_intrusive<zeek::RecordVal>(sw_substring_type);
|
||||||
st_val->Assign(0, zeek::make_intrusive<zeek::StringVal>(new zeek::String(*bst)));
|
st_val->Assign(0, zeek::make_intrusive<zeek::StringVal>(new zeek::String(*bst)));
|
||||||
|
@ -96,7 +98,7 @@ zeek::VectorVal* BroSubstring::VecToPolicy(Vec* vec)
|
||||||
return result.release();
|
return result.release();
|
||||||
}
|
}
|
||||||
|
|
||||||
BroSubstring::Vec* BroSubstring::VecFromPolicy(zeek::VectorVal* vec)
|
Substring::Vec* Substring::VecFromPolicy(zeek::VectorVal* vec)
|
||||||
{
|
{
|
||||||
Vec* result = new Vec();
|
Vec* result = new Vec();
|
||||||
|
|
||||||
|
@ -108,7 +110,7 @@ BroSubstring::Vec* BroSubstring::VecFromPolicy(zeek::VectorVal* vec)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
const zeek::String* str = v->AsRecordVal()->GetField(0)->AsString();
|
const zeek::String* str = v->AsRecordVal()->GetField(0)->AsString();
|
||||||
BroSubstring* substr = new BroSubstring(*str);
|
auto* substr = new Substring(*str);
|
||||||
|
|
||||||
const zeek::VectorVal* aligns = v->AsRecordVal()->GetField(1)->AsVectorVal();
|
const zeek::VectorVal* aligns = v->AsRecordVal()->GetField(1)->AsVectorVal();
|
||||||
for ( unsigned int j = 1; j <= aligns->Size(); ++j )
|
for ( unsigned int j = 1; j <= aligns->Size(); ++j )
|
||||||
|
@ -128,11 +130,11 @@ BroSubstring::Vec* BroSubstring::VecFromPolicy(zeek::VectorVal* vec)
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
char* BroSubstring::VecToString(Vec* vec)
|
char* Substring::VecToString(Vec* vec)
|
||||||
{
|
{
|
||||||
std::string result("[");
|
std::string result("[");
|
||||||
|
|
||||||
for ( BroSubstring::VecIt it = vec->begin(); it != vec->end(); ++it )
|
for ( Substring::VecIt it = vec->begin(); it != vec->end(); ++it )
|
||||||
{
|
{
|
||||||
result += (*it)->CheckString();
|
result += (*it)->CheckString();
|
||||||
result += ",";
|
result += ",";
|
||||||
|
@ -142,14 +144,14 @@ char* BroSubstring::VecToString(Vec* vec)
|
||||||
return strdup(result.c_str());
|
return strdup(result.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
zeek::String::IdxVec* BroSubstring::GetOffsetsVec(const Vec* vec, unsigned int index)
|
zeek::String::IdxVec* Substring::GetOffsetsVec(const Vec* vec, unsigned int index)
|
||||||
{
|
{
|
||||||
zeek::String::IdxVec* result = new zeek::String::IdxVec();
|
zeek::String::IdxVec* result = new zeek::String::IdxVec();
|
||||||
|
|
||||||
for ( VecCIt it = vec->begin(); it != vec->end(); ++it )
|
for ( VecCIt it = vec->begin(); it != vec->end(); ++it )
|
||||||
{
|
{
|
||||||
int start, end;
|
int start, end;
|
||||||
const BroSubstring* bst = (*it);
|
const Substring* bst = (*it);
|
||||||
|
|
||||||
if ( bst->_aligns.size() <= index )
|
if ( bst->_aligns.size() <= index )
|
||||||
continue;
|
continue;
|
||||||
|
@ -166,13 +168,13 @@ zeek::String::IdxVec* BroSubstring::GetOffsetsVec(const Vec* vec, unsigned int i
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool BroSubstringCmp::operator()(const BroSubstring* bst1,
|
bool SubstringCmp::operator()(const Substring* bst1,
|
||||||
const BroSubstring* bst2) const
|
const Substring* bst2) const
|
||||||
{
|
{
|
||||||
if ( _index >= bst1->GetNumAlignments() ||
|
if ( _index >= bst1->GetNumAlignments() ||
|
||||||
_index >= bst2->GetNumAlignments() )
|
_index >= bst2->GetNumAlignments() )
|
||||||
{
|
{
|
||||||
reporter->Warning("BroSubstringCmp::operator(): invalid index for input strings.\n");
|
reporter->Warning("SubstringCmp::operator(): invalid index for input strings.\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -260,7 +262,7 @@ private:
|
||||||
// @node: starting node.
|
// @node: starting node.
|
||||||
// @params: SW parameters.
|
// @params: SW parameters.
|
||||||
//
|
//
|
||||||
static void sw_collect_single(BroSubstring::Vec* result, SWNodeMatrix& matrix,
|
static void sw_collect_single(Substring::Vec* result, SWNodeMatrix& matrix,
|
||||||
SWNode* node, SWParams& params)
|
SWNode* node, SWParams& params)
|
||||||
{
|
{
|
||||||
std::string substring("");
|
std::string substring("");
|
||||||
|
@ -287,7 +289,7 @@ static void sw_collect_single(BroSubstring::Vec* result, SWNodeMatrix& matrix,
|
||||||
if ( substring.size() >= params._min_toklen )
|
if ( substring.size() >= params._min_toklen )
|
||||||
{
|
{
|
||||||
reverse(substring.begin(), substring.end());
|
reverse(substring.begin(), substring.end());
|
||||||
BroSubstring* bst = new BroSubstring(substring);
|
auto* bst = new Substring(substring);
|
||||||
bst->AddAlignment(matrix.GetRowsString(), row-1);
|
bst->AddAlignment(matrix.GetRowsString(), row-1);
|
||||||
bst->AddAlignment(matrix.GetColsString(), col-1);
|
bst->AddAlignment(matrix.GetColsString(), col-1);
|
||||||
result->push_back(bst);
|
result->push_back(bst);
|
||||||
|
@ -305,7 +307,7 @@ static void sw_collect_single(BroSubstring::Vec* result, SWNodeMatrix& matrix,
|
||||||
if ( substring.size() > 0 )
|
if ( substring.size() > 0 )
|
||||||
{
|
{
|
||||||
reverse(substring.begin(), substring.end());
|
reverse(substring.begin(), substring.end());
|
||||||
BroSubstring* bst = new BroSubstring(substring);
|
auto* bst = new Substring(substring);
|
||||||
bst->AddAlignment(matrix.GetRowsString(), row-1);
|
bst->AddAlignment(matrix.GetRowsString(), row-1);
|
||||||
bst->AddAlignment(matrix.GetColsString(), col-1);
|
bst->AddAlignment(matrix.GetColsString(), col-1);
|
||||||
result->push_back(bst);
|
result->push_back(bst);
|
||||||
|
@ -324,10 +326,10 @@ static void sw_collect_single(BroSubstring::Vec* result, SWNodeMatrix& matrix,
|
||||||
// common subsequences while tracking which nodes were visited earlier and which
|
// common subsequences while tracking which nodes were visited earlier and which
|
||||||
// substrings are redundant (i.e., fully covered by a larger common substring).
|
// substrings are redundant (i.e., fully covered by a larger common substring).
|
||||||
//
|
//
|
||||||
static void sw_collect_multiple(BroSubstring::Vec* result,
|
static void sw_collect_multiple(Substring::Vec* result,
|
||||||
SWNodeMatrix& matrix, SWParams& params)
|
SWNodeMatrix& matrix, SWParams& params)
|
||||||
{
|
{
|
||||||
std::vector<BroSubstring::Vec*> als;
|
std::vector<Substring::Vec*> als;
|
||||||
|
|
||||||
for ( int i = matrix.GetHeight() - 1; i > 0; --i )
|
for ( int i = matrix.GetHeight() - 1; i > 0; --i )
|
||||||
{
|
{
|
||||||
|
@ -338,21 +340,21 @@ static void sw_collect_multiple(BroSubstring::Vec* result,
|
||||||
if ( ! (node->swn_byte_assigned && ! node->swn_visited) )
|
if ( ! (node->swn_byte_assigned && ! node->swn_visited) )
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
BroSubstring::Vec* new_al = new BroSubstring::Vec();
|
auto* new_al = new Substring::Vec();
|
||||||
sw_collect_single(new_al, matrix, node, params);
|
sw_collect_single(new_al, matrix, node, params);
|
||||||
|
|
||||||
for ( std::vector<BroSubstring::Vec*>::iterator it = als.begin();
|
for ( std::vector<Substring::Vec*>::iterator it = als.begin();
|
||||||
it != als.end(); ++it )
|
it != als.end(); ++it )
|
||||||
{
|
{
|
||||||
BroSubstring::Vec* old_al = *it;
|
Substring::Vec* old_al = *it;
|
||||||
|
|
||||||
if ( old_al == nullptr )
|
if ( old_al == nullptr )
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
for ( BroSubstring::VecIt it2 = old_al->begin();
|
for ( Substring::VecIt it2 = old_al->begin();
|
||||||
it2 != old_al->end(); ++it2 )
|
it2 != old_al->end(); ++it2 )
|
||||||
{
|
{
|
||||||
for ( BroSubstring::VecIt it3 = new_al->begin();
|
for ( Substring::VecIt it3 = new_al->begin();
|
||||||
it3 != new_al->end(); ++it3 )
|
it3 != new_al->end(); ++it3 )
|
||||||
{
|
{
|
||||||
if ( (*it2)->DoesCover(*it3) )
|
if ( (*it2)->DoesCover(*it3) )
|
||||||
|
@ -380,15 +382,15 @@ end_loop:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for ( std::vector<BroSubstring::Vec*>::iterator it = als.begin();
|
for ( std::vector<Substring::Vec*>::iterator it = als.begin();
|
||||||
it != als.end(); ++it )
|
it != als.end(); ++it )
|
||||||
{
|
{
|
||||||
BroSubstring::Vec* al = *it;
|
Substring::Vec* al = *it;
|
||||||
|
|
||||||
if ( al == nullptr )
|
if ( al == nullptr )
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
for ( BroSubstring::VecIt it2 = al->begin();
|
for ( Substring::VecIt it2 = al->begin();
|
||||||
it2 != al->end(); ++it2 )
|
it2 != al->end(); ++it2 )
|
||||||
result->push_back(*it2);
|
result->push_back(*it2);
|
||||||
|
|
||||||
|
@ -398,10 +400,10 @@ end_loop:
|
||||||
|
|
||||||
// The main Smith-Waterman algorithm.
|
// The main Smith-Waterman algorithm.
|
||||||
//
|
//
|
||||||
BroSubstring::Vec* smith_waterman(const zeek::String* s1, const zeek::String* s2,
|
Substring::Vec* smith_waterman(const zeek::String* s1, const zeek::String* s2,
|
||||||
SWParams& params)
|
SWParams& params)
|
||||||
{
|
{
|
||||||
BroSubstring::Vec* result = new BroSubstring::Vec();
|
auto* result = new Substring::Vec();
|
||||||
|
|
||||||
if ( ! s1 || s1->Len() < int(params._min_toklen) ||
|
if ( ! s1 || s1->Len() < int(params._min_toklen) ||
|
||||||
! s2 || s2->Len() < int(params._min_toklen) )
|
! s2 || s2->Len() < int(params._min_toklen) )
|
||||||
|
@ -558,9 +560,11 @@ BroSubstring::Vec* smith_waterman(const zeek::String* s1, const zeek::String* s2
|
||||||
sw_collect_single(result, matrix, node_max, params);
|
sw_collect_single(result, matrix, node_max, params);
|
||||||
|
|
||||||
if ( len1 > len2 )
|
if ( len1 > len2 )
|
||||||
sort(result->begin(), result->end(), BroSubstringCmp(0));
|
sort(result->begin(), result->end(), SubstringCmp(0));
|
||||||
else
|
else
|
||||||
sort(result->begin(), result->end(), BroSubstringCmp(1));
|
sort(result->begin(), result->end(), SubstringCmp(1));
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace zeek::detail
|
||||||
|
|
|
@ -5,16 +5,18 @@
|
||||||
#include "ZeekString.h"
|
#include "ZeekString.h"
|
||||||
#include <map>
|
#include <map>
|
||||||
|
|
||||||
// BroSubstrings are essentially BroStrings, augmented with indexing
|
namespace zeek::detail {
|
||||||
// information required for the Smith-Waterman algorithm. Each substring
|
|
||||||
// can be marked as being a common substring of arbitrarily many strings,
|
/**
|
||||||
// for each of which we store where the substring starts.
|
* Substrings are essentially Strings, augmented with indexing information
|
||||||
//
|
* required for the Smith-Waterman algorithm. Each substring can be
|
||||||
//
|
* marked as being a common substring of arbitrarily many strings, for each
|
||||||
class BroSubstring : public zeek::String {
|
* of which we store where the substring starts.
|
||||||
|
*/
|
||||||
|
class Substring : public zeek::String {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
typedef std::vector<BroSubstring*> Vec;
|
typedef std::vector<Substring*> Vec;
|
||||||
typedef Vec::iterator VecIt;
|
typedef Vec::iterator VecIt;
|
||||||
typedef Vec::const_iterator VecCIt;
|
typedef Vec::const_iterator VecCIt;
|
||||||
|
|
||||||
|
@ -39,22 +41,22 @@ public:
|
||||||
typedef BSSAlignVec::iterator BSSAlignVecIt;
|
typedef BSSAlignVec::iterator BSSAlignVecIt;
|
||||||
typedef BSSAlignVec::const_iterator BSSAlignVecCIt;
|
typedef BSSAlignVec::const_iterator BSSAlignVecCIt;
|
||||||
|
|
||||||
explicit BroSubstring(const std::string& string)
|
explicit Substring(const std::string& string)
|
||||||
: zeek::String(string), _num(), _new(false) { }
|
: zeek::String(string), _num(), _new(false) { }
|
||||||
|
|
||||||
explicit BroSubstring(const zeek::String& string)
|
explicit Substring(const zeek::String& string)
|
||||||
: zeek::String(string), _num(), _new(false) { }
|
: zeek::String(string), _num(), _new(false) { }
|
||||||
|
|
||||||
BroSubstring(const BroSubstring& bst);
|
Substring(const Substring& bst);
|
||||||
|
|
||||||
const BroSubstring& operator=(const BroSubstring& bst);
|
const Substring& operator=(const Substring& bst);
|
||||||
|
|
||||||
// Returns true if this string completely covers the given one.
|
// Returns true if this string completely covers the given one.
|
||||||
// "Covering" means that the substring must be at least as long
|
// "Covering" means that the substring must be at least as long
|
||||||
// as the one compared to, and completely covers the range occupied
|
// as the one compared to, and completely covers the range occupied
|
||||||
// by the given one.
|
// by the given one.
|
||||||
//
|
//
|
||||||
bool DoesCover(const BroSubstring* bst) const;
|
bool DoesCover(const Substring* bst) const;
|
||||||
|
|
||||||
void AddAlignment(const zeek::String* string, int index);
|
void AddAlignment(const zeek::String* string, int index);
|
||||||
const BSSAlignVec& GetAlignments() const { return _aligns; }
|
const BSSAlignVec& GetAlignments() const { return _aligns; }
|
||||||
|
@ -78,7 +80,7 @@ private:
|
||||||
typedef std::map<std::string, void*> DataMap;
|
typedef std::map<std::string, void*> DataMap;
|
||||||
typedef DataMap::iterator DataMapIt;
|
typedef DataMap::iterator DataMapIt;
|
||||||
|
|
||||||
BroSubstring();
|
Substring();
|
||||||
|
|
||||||
// The alignments registered for this substring.
|
// The alignments registered for this substring.
|
||||||
BSSAlignVec _aligns;
|
BSSAlignVec _aligns;
|
||||||
|
@ -90,13 +92,13 @@ private:
|
||||||
bool _new;
|
bool _new;
|
||||||
};
|
};
|
||||||
|
|
||||||
// A comparison class that sorts BroSubstrings according to the string
|
// A comparison class that sorts Substrings according to the string
|
||||||
// offset value of the nth input string, where "nth" starts from 0.
|
// offset value of the nth input string, where "nth" starts from 0.
|
||||||
//
|
//
|
||||||
class BroSubstringCmp {
|
class SubstringCmp {
|
||||||
public:
|
public:
|
||||||
explicit BroSubstringCmp(unsigned int index) { _index = index; }
|
explicit SubstringCmp(unsigned int index) { _index = index; }
|
||||||
bool operator()(const BroSubstring* bst1, const BroSubstring* bst2) const;
|
bool operator()(const Substring* bst1, const Substring* bst2) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
unsigned int _index;
|
unsigned int _index;
|
||||||
|
@ -148,6 +150,16 @@ struct SWParams {
|
||||||
// input strings where the string occurs. On error, or if no common
|
// input strings where the string occurs. On error, or if no common
|
||||||
// subsequence exists, an empty vector is returned.
|
// subsequence exists, an empty vector is returned.
|
||||||
//
|
//
|
||||||
extern BroSubstring::Vec* smith_waterman(const zeek::String* s1,
|
extern Substring::Vec* smith_waterman(const zeek::String* s1,
|
||||||
const zeek::String* s2,
|
const zeek::String* s2,
|
||||||
SWParams& params);
|
SWParams& params);
|
||||||
|
|
||||||
|
} // namespace zeek::detail
|
||||||
|
|
||||||
|
using BroSubstring [[deprecated("Remove in v4.1. Use zeek::detail::Substring.")]] = zeek::detail::Substring;
|
||||||
|
using BroSubstringCmp [[deprecated("Remove in v4.1 Use zeel::detail::SubstringCmp.")]] = zeek::detail::SubstringCmp;
|
||||||
|
using SWParams [[deprecated("Remove in v4.1. Use zeek::detail::SWParams.")]] = zeek::detail::SWParams;
|
||||||
|
|
||||||
|
constexpr auto SW_SINGLE [[deprecated("Remove in v4.1. Use zeek::detai::SW_SINGLE.")]] = zeek::detail::SW_SINGLE;
|
||||||
|
constexpr auto SW_MULTIPLE [[deprecated("Remove in v4.1. Use zeek::detai::SW_MULTIPLE.")]] = zeek::detail::SW_MULTIPLE;
|
||||||
|
constexpr auto smith_waterman [[deprecated("Remove in v4.1. Use zeek::detail::smith_waterman.")]] = zeek::detail::smith_waterman;
|
||||||
|
|
|
@ -681,12 +681,12 @@ function string_to_ascii_hex%(s: string%): string
|
||||||
## Returns: The result of the Smith-Waterman algorithm calculation.
|
## Returns: The result of the Smith-Waterman algorithm calculation.
|
||||||
function str_smith_waterman%(s1: string, s2: string, params: sw_params%) : sw_substring_vec
|
function str_smith_waterman%(s1: string, s2: string, params: sw_params%) : sw_substring_vec
|
||||||
%{
|
%{
|
||||||
SWParams sw_params(params->AsRecordVal()->GetField(0)->AsCount(),
|
zeek::detail::SWParams sw_params(
|
||||||
SWVariant(params->AsRecordVal()->GetField(1)->AsCount()));
|
params->AsRecordVal()->GetField(0)->AsCount(),
|
||||||
|
zeek::detail::SWVariant(params->AsRecordVal()->GetField(1)->AsCount()));
|
||||||
|
|
||||||
BroSubstring::Vec* subseq =
|
auto* subseq = zeek::detail::smith_waterman(s1->AsString(), s2->AsString(), sw_params);
|
||||||
smith_waterman(s1->AsString(), s2->AsString(), sw_params);
|
auto result = zeek::VectorValPtr{zeek::AdoptRef{}, zeek::detail::Substring::VecToPolicy(subseq)};
|
||||||
auto result = zeek::VectorValPtr{zeek::AdoptRef{}, BroSubstring::VecToPolicy(subseq)};
|
|
||||||
delete_each(subseq);
|
delete_each(subseq);
|
||||||
delete subseq;
|
delete subseq;
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue