diff --git a/src/SmithWaterman.cc b/src/SmithWaterman.cc index b4b23a325b..e67a65b9e7 100644 --- a/src/SmithWaterman.cc +++ b/src/SmithWaterman.cc @@ -11,14 +11,16 @@ #include "Reporter.h" #include "Val.h" -BroSubstring::BroSubstring(const BroSubstring& bst) +namespace zeek::detail { + +Substring::Substring(const Substring& bst) : zeek::String((const zeek::String&) bst), _num(), _new(bst._new) { for ( BSSAlignVecCIt it = bst._aligns.begin(); it != bst._aligns.end(); ++it ) _aligns.push_back(*it); } -const BroSubstring& BroSubstring::operator=(const BroSubstring& bst) +const Substring& Substring::operator=(const Substring& bst) { zeek::String::operator=(bst); @@ -32,12 +34,12 @@ const BroSubstring& BroSubstring::operator=(const BroSubstring& bst) return *this; } -void BroSubstring::AddAlignment(const zeek::String* str, int index) +void Substring::AddAlignment(const zeek::String* str, int index) { _aligns.push_back(BSSAlign(str, index)); } -bool BroSubstring::DoesCover(const BroSubstring* bst) const +bool Substring::DoesCover(const Substring* bst) const { if ( _aligns.size() != bst->_aligns.size() ) return false; @@ -56,7 +58,7 @@ bool BroSubstring::DoesCover(const BroSubstring* bst) const return true; } -zeek::VectorVal* BroSubstring::VecToPolicy(Vec* vec) +zeek::VectorVal* Substring::VecToPolicy(Vec* vec) { static auto sw_substring_type = zeek::id::find_type("sw_substring"); static auto sw_align_type = zeek::id::find_type("sw_align"); @@ -69,7 +71,7 @@ zeek::VectorVal* BroSubstring::VecToPolicy(Vec* vec) { for ( size_t i = 0; i < vec->size(); ++i ) { - BroSubstring* bst = (*vec)[i]; + Substring* bst = (*vec)[i]; auto st_val = zeek::make_intrusive(sw_substring_type); st_val->Assign(0, zeek::make_intrusive(new zeek::String(*bst))); @@ -96,7 +98,7 @@ zeek::VectorVal* BroSubstring::VecToPolicy(Vec* vec) return result.release(); } -BroSubstring::Vec* BroSubstring::VecFromPolicy(zeek::VectorVal* vec) +Substring::Vec* Substring::VecFromPolicy(zeek::VectorVal* vec) { Vec* result = new Vec(); @@ -108,7 +110,7 @@ BroSubstring::Vec* BroSubstring::VecFromPolicy(zeek::VectorVal* vec) continue; const zeek::String* str = v->AsRecordVal()->GetField(0)->AsString(); - BroSubstring* substr = new BroSubstring(*str); + auto* substr = new Substring(*str); const zeek::VectorVal* aligns = v->AsRecordVal()->GetField(1)->AsVectorVal(); for ( unsigned int j = 1; j <= aligns->Size(); ++j ) @@ -128,11 +130,11 @@ BroSubstring::Vec* BroSubstring::VecFromPolicy(zeek::VectorVal* vec) return result; } -char* BroSubstring::VecToString(Vec* vec) +char* Substring::VecToString(Vec* vec) { std::string result("["); - for ( BroSubstring::VecIt it = vec->begin(); it != vec->end(); ++it ) + for ( Substring::VecIt it = vec->begin(); it != vec->end(); ++it ) { result += (*it)->CheckString(); result += ","; @@ -142,14 +144,14 @@ char* BroSubstring::VecToString(Vec* vec) return strdup(result.c_str()); } -zeek::String::IdxVec* BroSubstring::GetOffsetsVec(const Vec* vec, unsigned int index) +zeek::String::IdxVec* Substring::GetOffsetsVec(const Vec* vec, unsigned int index) { zeek::String::IdxVec* result = new zeek::String::IdxVec(); for ( VecCIt it = vec->begin(); it != vec->end(); ++it ) { int start, end; - const BroSubstring* bst = (*it); + const Substring* bst = (*it); if ( bst->_aligns.size() <= index ) continue; @@ -166,13 +168,13 @@ zeek::String::IdxVec* BroSubstring::GetOffsetsVec(const Vec* vec, unsigned int i } -bool BroSubstringCmp::operator()(const BroSubstring* bst1, - const BroSubstring* bst2) const +bool SubstringCmp::operator()(const Substring* bst1, + const Substring* bst2) const { if ( _index >= bst1->GetNumAlignments() || _index >= bst2->GetNumAlignments() ) { - reporter->Warning("BroSubstringCmp::operator(): invalid index for input strings.\n"); + reporter->Warning("SubstringCmp::operator(): invalid index for input strings.\n"); return false; } @@ -260,7 +262,7 @@ private: // @node: starting node. // @params: SW parameters. // -static void sw_collect_single(BroSubstring::Vec* result, SWNodeMatrix& matrix, +static void sw_collect_single(Substring::Vec* result, SWNodeMatrix& matrix, SWNode* node, SWParams& params) { std::string substring(""); @@ -287,7 +289,7 @@ static void sw_collect_single(BroSubstring::Vec* result, SWNodeMatrix& matrix, if ( substring.size() >= params._min_toklen ) { reverse(substring.begin(), substring.end()); - BroSubstring* bst = new BroSubstring(substring); + auto* bst = new Substring(substring); bst->AddAlignment(matrix.GetRowsString(), row-1); bst->AddAlignment(matrix.GetColsString(), col-1); result->push_back(bst); @@ -305,7 +307,7 @@ static void sw_collect_single(BroSubstring::Vec* result, SWNodeMatrix& matrix, if ( substring.size() > 0 ) { reverse(substring.begin(), substring.end()); - BroSubstring* bst = new BroSubstring(substring); + auto* bst = new Substring(substring); bst->AddAlignment(matrix.GetRowsString(), row-1); bst->AddAlignment(matrix.GetColsString(), col-1); result->push_back(bst); @@ -324,10 +326,10 @@ static void sw_collect_single(BroSubstring::Vec* result, SWNodeMatrix& matrix, // common subsequences while tracking which nodes were visited earlier and which // substrings are redundant (i.e., fully covered by a larger common substring). // -static void sw_collect_multiple(BroSubstring::Vec* result, +static void sw_collect_multiple(Substring::Vec* result, SWNodeMatrix& matrix, SWParams& params) { - std::vector als; + std::vector als; for ( int i = matrix.GetHeight() - 1; i > 0; --i ) { @@ -338,21 +340,21 @@ static void sw_collect_multiple(BroSubstring::Vec* result, if ( ! (node->swn_byte_assigned && ! node->swn_visited) ) continue; - BroSubstring::Vec* new_al = new BroSubstring::Vec(); + auto* new_al = new Substring::Vec(); sw_collect_single(new_al, matrix, node, params); - for ( std::vector::iterator it = als.begin(); + for ( std::vector::iterator it = als.begin(); it != als.end(); ++it ) { - BroSubstring::Vec* old_al = *it; + Substring::Vec* old_al = *it; if ( old_al == nullptr ) continue; - for ( BroSubstring::VecIt it2 = old_al->begin(); + for ( Substring::VecIt it2 = old_al->begin(); it2 != old_al->end(); ++it2 ) { - for ( BroSubstring::VecIt it3 = new_al->begin(); + for ( Substring::VecIt it3 = new_al->begin(); it3 != new_al->end(); ++it3 ) { if ( (*it2)->DoesCover(*it3) ) @@ -380,15 +382,15 @@ end_loop: } } - for ( std::vector::iterator it = als.begin(); + for ( std::vector::iterator it = als.begin(); it != als.end(); ++it ) { - BroSubstring::Vec* al = *it; + Substring::Vec* al = *it; if ( al == nullptr ) continue; - for ( BroSubstring::VecIt it2 = al->begin(); + for ( Substring::VecIt it2 = al->begin(); it2 != al->end(); ++it2 ) result->push_back(*it2); @@ -398,10 +400,10 @@ end_loop: // The main Smith-Waterman algorithm. // -BroSubstring::Vec* smith_waterman(const zeek::String* s1, const zeek::String* s2, +Substring::Vec* smith_waterman(const zeek::String* s1, const zeek::String* s2, SWParams& params) { - BroSubstring::Vec* result = new BroSubstring::Vec(); + auto* result = new Substring::Vec(); if ( ! s1 || s1->Len() < int(params._min_toklen) || ! s2 || s2->Len() < int(params._min_toklen) ) @@ -558,9 +560,11 @@ BroSubstring::Vec* smith_waterman(const zeek::String* s1, const zeek::String* s2 sw_collect_single(result, matrix, node_max, params); if ( len1 > len2 ) - sort(result->begin(), result->end(), BroSubstringCmp(0)); + sort(result->begin(), result->end(), SubstringCmp(0)); else - sort(result->begin(), result->end(), BroSubstringCmp(1)); + sort(result->begin(), result->end(), SubstringCmp(1)); return result; } + +} // namespace zeek::detail diff --git a/src/SmithWaterman.h b/src/SmithWaterman.h index a93becffe3..2ed4d71753 100644 --- a/src/SmithWaterman.h +++ b/src/SmithWaterman.h @@ -5,16 +5,18 @@ #include "ZeekString.h" #include -// BroSubstrings are essentially BroStrings, augmented with indexing -// information required for the Smith-Waterman algorithm. Each substring -// can be marked as being a common substring of arbitrarily many strings, -// for each of which we store where the substring starts. -// -// -class BroSubstring : public zeek::String { +namespace zeek::detail { + +/** + * Substrings are essentially Strings, augmented with indexing information + * required for the Smith-Waterman algorithm. Each substring can be + * marked as being a common substring of arbitrarily many strings, for each + * of which we store where the substring starts. + */ +class Substring : public zeek::String { public: - typedef std::vector Vec; + typedef std::vector Vec; typedef Vec::iterator VecIt; typedef Vec::const_iterator VecCIt; @@ -39,22 +41,22 @@ public: typedef BSSAlignVec::iterator BSSAlignVecIt; typedef BSSAlignVec::const_iterator BSSAlignVecCIt; - explicit BroSubstring(const std::string& string) + explicit Substring(const std::string& string) : zeek::String(string), _num(), _new(false) { } - explicit BroSubstring(const zeek::String& string) + explicit Substring(const zeek::String& string) : zeek::String(string), _num(), _new(false) { } - BroSubstring(const BroSubstring& bst); + Substring(const Substring& bst); - const BroSubstring& operator=(const BroSubstring& bst); + const Substring& operator=(const Substring& bst); // Returns true if this string completely covers the given one. // "Covering" means that the substring must be at least as long // as the one compared to, and completely covers the range occupied // by the given one. // - bool DoesCover(const BroSubstring* bst) const; + bool DoesCover(const Substring* bst) const; void AddAlignment(const zeek::String* string, int index); const BSSAlignVec& GetAlignments() const { return _aligns; } @@ -78,7 +80,7 @@ private: typedef std::map DataMap; typedef DataMap::iterator DataMapIt; - BroSubstring(); + Substring(); // The alignments registered for this substring. BSSAlignVec _aligns; @@ -90,13 +92,13 @@ private: bool _new; }; -// A comparison class that sorts BroSubstrings according to the string +// A comparison class that sorts Substrings according to the string // offset value of the nth input string, where "nth" starts from 0. // -class BroSubstringCmp { +class SubstringCmp { public: - explicit BroSubstringCmp(unsigned int index) { _index = index; } - bool operator()(const BroSubstring* bst1, const BroSubstring* bst2) const; + explicit SubstringCmp(unsigned int index) { _index = index; } + bool operator()(const Substring* bst1, const Substring* bst2) const; private: unsigned int _index; @@ -148,6 +150,16 @@ struct SWParams { // input strings where the string occurs. On error, or if no common // subsequence exists, an empty vector is returned. // -extern BroSubstring::Vec* smith_waterman(const zeek::String* s1, - const zeek::String* s2, - SWParams& params); +extern Substring::Vec* smith_waterman(const zeek::String* s1, + const zeek::String* s2, + SWParams& params); + +} // namespace zeek::detail + +using BroSubstring [[deprecated("Remove in v4.1. Use zeek::detail::Substring.")]] = zeek::detail::Substring; +using BroSubstringCmp [[deprecated("Remove in v4.1 Use zeel::detail::SubstringCmp.")]] = zeek::detail::SubstringCmp; +using SWParams [[deprecated("Remove in v4.1. Use zeek::detail::SWParams.")]] = zeek::detail::SWParams; + +constexpr auto SW_SINGLE [[deprecated("Remove in v4.1. Use zeek::detai::SW_SINGLE.")]] = zeek::detail::SW_SINGLE; +constexpr auto SW_MULTIPLE [[deprecated("Remove in v4.1. Use zeek::detai::SW_MULTIPLE.")]] = zeek::detail::SW_MULTIPLE; +constexpr auto smith_waterman [[deprecated("Remove in v4.1. Use zeek::detail::smith_waterman.")]] = zeek::detail::smith_waterman; diff --git a/src/strings.bif b/src/strings.bif index 380665fb0a..6fbb37084d 100644 --- a/src/strings.bif +++ b/src/strings.bif @@ -681,12 +681,12 @@ function string_to_ascii_hex%(s: string%): string ## Returns: The result of the Smith-Waterman algorithm calculation. function str_smith_waterman%(s1: string, s2: string, params: sw_params%) : sw_substring_vec %{ - SWParams sw_params(params->AsRecordVal()->GetField(0)->AsCount(), - SWVariant(params->AsRecordVal()->GetField(1)->AsCount())); + zeek::detail::SWParams sw_params( + params->AsRecordVal()->GetField(0)->AsCount(), + zeek::detail::SWVariant(params->AsRecordVal()->GetField(1)->AsCount())); - BroSubstring::Vec* subseq = - smith_waterman(s1->AsString(), s2->AsString(), sw_params); - auto result = zeek::VectorValPtr{zeek::AdoptRef{}, BroSubstring::VecToPolicy(subseq)}; + auto* subseq = zeek::detail::smith_waterman(s1->AsString(), s2->AsString(), sw_params); + auto result = zeek::VectorValPtr{zeek::AdoptRef{}, zeek::detail::Substring::VecToPolicy(subseq)}; delete_each(subseq); delete subseq;