From 13f80ed5dd6be85f80c4eface2ce079f968c61f8 Mon Sep 17 00:00:00 2001 From: Tim Wojtulewicz Date: Wed, 28 Apr 2021 23:49:49 +0000 Subject: [PATCH] Add unit tests to ZeekString.cc --- src/ZeekString.cc | 258 +++++++++++++++++++++++++++++++++++++++++++++- src/ZeekString.h | 14 +++ 2 files changed, 268 insertions(+), 4 deletions(-) diff --git a/src/ZeekString.cc b/src/ZeekString.cc index f87714fab2..ab20d62dd8 100644 --- a/src/ZeekString.cc +++ b/src/ZeekString.cc @@ -6,18 +6,23 @@ #include #include #include +#include // Needed for unit testing #include "zeek/Val.h" #include "zeek/ID.h" #include "zeek/Reporter.h" #include "zeek/util.h" +#include "zeek/3rdparty/doctest.h" + #ifdef DEBUG #define DEBUG_STR(msg) DBG_LOG(zeek::DBG_STRING, msg) #else #define DEBUG_STR(msg) #endif +using namespace std::string_literals; + namespace zeek { constexpr int String::EXPANDED_STRING; @@ -101,6 +106,19 @@ bool String::operator<(const String &bs) const return Bstr_cmp(this, &bs) < 0; } +bool String::operator==(std::string_view s) const + { + if ( static_cast(n) != s.size() ) + return false; + + if ( b == nullptr ) + { + return s.size() == 0; + } + + return (memcmp(b, s.data(), n) == 0); + } + void String::Adopt(byte_vec bytes, int len) { Reset(); @@ -169,6 +187,7 @@ const char* String::CheckString() const { // Either an embedded NUL, or no final NUL. char* exp_s = Render(); + if ( nulTerm ) reporter->Error("string with embedded NUL: \"%s\"", exp_s); else @@ -343,7 +362,7 @@ String::Vec* String::Split(const String::IdxVec& indices) const return result; } -VectorVal* String:: VecToPolicy(Vec* vec) +VectorVal* String::VecToPolicy(Vec* vec) { auto result = make_intrusive(id::string_vec); @@ -390,11 +409,10 @@ char* String::VecToString(const Vec* vec) return strdup(result.c_str()); } -bool StringLenCmp::operator()(String * const& bst1, - String * const& bst2) +bool StringLenCmp::operator()(String* const& bst1, String* const& bst2) { return _increasing ? (bst1->Len() < bst2->Len()) : - (bst1->Len() > bst2->Len()); + (bst1->Len() > bst2->Len()); } std::ostream& operator<<(std::ostream& os, const String& bs) @@ -496,3 +514,235 @@ void delete_strings(std::vector& v) } } // namespace zeek + +TEST_SUITE_BEGIN("ZeekString"); + +TEST_CASE("construction") + { + zeek::String s1{}; + CHECK_EQ(s1.Len(), 0); + CHECK_EQ(s1.Bytes(), nullptr); + CHECK_EQ(s1, ""); + + std::string text = "abcdef"; + zeek::byte_vec text2 = new u_char[7]; + memcpy(text2, text.c_str(), 7); + + zeek::String s2{text2, 6, false}; + CHECK_EQ(s2.Len(), 6); + + zeek::String s3{text2, 6, true}; + CHECK_EQ(s3.Len(), 6); + + zeek::String s4{"abcdef"}; + CHECK_EQ(s4.Len(), 6); + + zeek::String s5{std::string("abcdef")}; + CHECK_EQ(s5.Len(), 6); + + zeek::String s6{s5}; + CHECK_EQ(s6.Len(), 6); + + zeek::String s7{true, text2, 6}; + CHECK_EQ(s7.Len(), 6); + CHECK_EQ(s7.Bytes(), text2); + + // Construct a temporary reporter object for the next two tests + zeek::reporter = new zeek::Reporter(false); + + zeek::byte_vec text3 = new u_char[7]; + memcpy(text3, text.c_str(), 7); + zeek::String s8{false, text3, 6}; + CHECK_EQ(std::string(s8.CheckString()), ""); + + zeek::byte_vec text4 = new u_char[7]; + memcpy(text4, text.c_str(), 7); + text4[2] = '\0'; + zeek::String s9{false, text4, 6}; + CHECK_EQ(std::string(s9.CheckString()), ""); + + delete zeek::reporter; + + zeek::byte_vec text5 = (zeek::byte_vec)malloc(7); + memcpy(text5, text.c_str(), 7); + zeek::String s10{true, text5, 6}; + s10.SetUseFreeToDelete(1); + CHECK_EQ(s10.Bytes(), text5); + } + +TEST_CASE("set/assignment/comparison") + { + zeek::String s{"abc"}; + CHECK_EQ(s, "abc"); + + s.Set("def"); + CHECK_EQ(s, "def"); + + s.Set(std::string("ghi")); + CHECK_EQ(s, "ghi"); + + zeek::String s2{"abc"}; + s.Set(s2); + CHECK_EQ(s, "abc"); + + zeek::String s3{"def"}; + s = s3; + CHECK_EQ(s, "def"); + CHECK_EQ(s, s3); + CHECK(s2 < s3); + + s.Set("ghi"); + CHECK_FALSE(s < s2); + + std::string text = "abcdef"; + zeek::byte_vec text2 = new u_char[7]; + memcpy(text2, text.c_str(), 7); + s.Adopt(text2, 7); + + CHECK_EQ(s, "abcdef"); + CHECK_FALSE(s == s2); + + // This is a clearly invalid string and we probably shouldn't allow it to be + // constructed, but this test covers one if statement in Bstr_eq. + zeek::String s4(false, nullptr, 3); + CHECK_FALSE(s4 == s2); + + zeek::String s5{}; + CHECK_LT(s5, s); + CHECK_FALSE(s < s5); + } + +TEST_CASE("searching/modification") + { + zeek::String s{"this is a test"}; + auto* ss = s.GetSubstring(5, 4); + CHECK_EQ(*ss, "is a"); + delete ss; + + auto* ss2 = s.GetSubstring(-1, 4); + CHECK_EQ(ss2, nullptr); + ss2 = s.GetSubstring(s.Len() + 5, 4); + CHECK_EQ(ss2, nullptr); + + zeek::String s2{"test"}; + CHECK_EQ(s.FindSubstring(&s2), 10); + + s2.ToUpper(); + CHECK_EQ(s2, "TEST"); + + zeek::String::IdxVec indexes; + zeek::String::Vec* splits = s.Split(indexes); + CHECK_EQ(splits, nullptr); + + indexes.insert(indexes.end(), {4, 7, 9, -1, 30}); + splits = s.Split(indexes); + CHECK_EQ(splits->size(), 4); + CHECK_EQ(*(splits->at(0)), "this"); + CHECK_EQ(*(splits->at(1)), " is"); + CHECK_EQ(*(splits->at(2)), " a"); + CHECK_EQ(*(splits->at(3)), " test"); + + zeek::String* s3 = concatenate(*splits); + CHECK_EQ(s.Len(), s3->Len()); + CHECK_EQ(s, *s3); + delete s3; + + char* temp = zeek::String::VecToString(splits); + CHECK_EQ(std::string(temp), "[this, is, a, test,]"); + free(temp); + + for ( auto* entry : *splits ) + delete entry; + delete splits; + } + +TEST_CASE("rendering") + { + zeek::String s1("\\abcd\'\""); + auto* r = s1.Render(zeek::String::ESC_ESC); + CHECK_EQ(std::string(r), "\\\\abcd\'\""); + delete [] r; + + r = s1.Render(zeek::String::ESC_QUOT); + CHECK_EQ(std::string(r), "\\abcd\\\'\\\""); + delete [] r; + + r = s1.Render(zeek::String::ESC_ESC | zeek::String::ESC_QUOT | zeek::String::ESC_SER); + CHECK_EQ(std::string(r), "10 \\\\abcd\\\'\\\""); + delete [] r; + + zeek::byte_vec text = new u_char[6]; + text[0] = 3; + text[1] = 4; + text[2] = 5; + text[3] = 6; + text[4] = '\\'; + text[5] = '\''; + zeek::String s2(false, text, 6); + + r = s2.Render(zeek::String::ESC_HEX); + CHECK_EQ(std::string(r), "\\x03\\x04\\x05\\x06\\\'"); + delete [] r; + + int test_length = 0; + r = s2.Render(zeek::String::ESC_DOT, &test_length); + CHECK_EQ(std::string(r), "....\\\'"); + CHECK_EQ(test_length, 7); + delete [] r; + + r = s2.Render(zeek::String::BRO_STRING_LITERAL); + CHECK_EQ(std::string(r), "\\x03\\x04\\x05\\x06\\\\\\\'"); + delete [] r; + + std::ostringstream os1; + // This uses ESC_HEX, so it should be the same as the test above + os1 << s2; + CHECK_EQ(os1.str(), "\\x03\\x04\\x05\\x06\\\'"); + + std::ostringstream os2; + s2.Render(os2, zeek::String::ESC_HEX); + CHECK_EQ(os2.str(), "\\x03\\x04\\x05\\x06\\\'"); + } + +TEST_CASE("read") + { + std::string text1("5 abcde"); + std::istringstream iss1(text1); + zeek::String s1{}; + s1.Read(iss1); + CHECK_EQ(s1, "abcde"); + + std::string text2("abcde"); + std::istringstream iss2(text2); + zeek::String s2{}; + // Setting to something else disables reading the serialization format + s2.Read(iss2, zeek::String::ESC_HEX); + CHECK_EQ(s2, text2); + } + +TEST_CASE("misc") + { + std::vector sv = {new zeek::String{}, new zeek::String{}}; + CHECK_EQ(sv.size(), 2); + zeek::delete_strings(sv); + CHECK_EQ(sv.size(), 0); + + std::vector dv = {{5, "abcde"}, {6, "fghijk"}}; + auto* s = zeek::concatenate(dv); + CHECK_EQ(*s, "abcdefghijk"); + delete s; + + std::vector sv2 = {new zeek::String{"abcde"}, new zeek::String{"fghi"}}; + std::sort(sv2.begin(), sv2.end(), zeek::StringLenCmp(true)); + CHECK_EQ(*(sv2.front()), "fghi"); + CHECK_EQ(*(sv2.back()), "abcde"); + + std::sort(sv2.begin(), sv2.end(), zeek::StringLenCmp(false)); + CHECK_EQ(*(sv2.front()), "abcde"); + CHECK_EQ(*(sv2.back()), "fghi"); + + for ( auto* entry : sv2 ) + delete entry; + } + +TEST_SUITE_END(); diff --git a/src/ZeekString.h b/src/ZeekString.h index 30826be288..ed5caed239 100644 --- a/src/ZeekString.h +++ b/src/ZeekString.h @@ -19,6 +19,12 @@ class VectorVal; typedef u_char* byte_vec; +/** + * A container type for holding blocks of byte data. This can be used for + * character strings, but is not limited to that alone. This class provides + * methods for rendering byte data into character strings, including + * conversions of non-printable characters into other representations. + */ class String { public: typedef std::vector Vec; @@ -49,6 +55,7 @@ public: const String& operator=(const String& bs); bool operator==(const String& bs) const; bool operator<(const String& bs) const; + bool operator==(std::string_view s) const; byte_vec Bytes() const { return b; } int Len() const { return n; } @@ -71,6 +78,13 @@ public: void SetUseFreeToDelete(int use_it) { use_free_to_delete = use_it; } + /** + * Returns a character-string representation of the stored bytes. This + * method doesn't do any extra rendering or character conversions. If + * null characters are found in the middle of the data or if the data + * is missing a closing null character, an error string is returned and + * a error is reported. + */ const char* CheckString() const; enum render_style {