Add unit tests to ZeekString.cc

This commit is contained in:
Tim Wojtulewicz 2021-04-28 23:49:49 +00:00 committed by Tim Wojtulewicz
parent 8741388be6
commit 13f80ed5dd
2 changed files with 268 additions and 4 deletions

View file

@ -6,18 +6,23 @@
#include <ctype.h> #include <ctype.h>
#include <algorithm> #include <algorithm>
#include <iostream> #include <iostream>
#include <sstream> // Needed for unit testing
#include "zeek/Val.h" #include "zeek/Val.h"
#include "zeek/ID.h" #include "zeek/ID.h"
#include "zeek/Reporter.h" #include "zeek/Reporter.h"
#include "zeek/util.h" #include "zeek/util.h"
#include "zeek/3rdparty/doctest.h"
#ifdef DEBUG #ifdef DEBUG
#define DEBUG_STR(msg) DBG_LOG(zeek::DBG_STRING, msg) #define DEBUG_STR(msg) DBG_LOG(zeek::DBG_STRING, msg)
#else #else
#define DEBUG_STR(msg) #define DEBUG_STR(msg)
#endif #endif
using namespace std::string_literals;
namespace zeek { namespace zeek {
constexpr int String::EXPANDED_STRING; constexpr int String::EXPANDED_STRING;
@ -101,6 +106,19 @@ bool String::operator<(const String &bs) const
return Bstr_cmp(this, &bs) < 0; return Bstr_cmp(this, &bs) < 0;
} }
bool String::operator==(std::string_view s) const
{
if ( static_cast<size_t>(n) != s.size() )
return false;
if ( b == nullptr )
{
return s.size() == 0;
}
return (memcmp(b, s.data(), n) == 0);
}
void String::Adopt(byte_vec bytes, int len) void String::Adopt(byte_vec bytes, int len)
{ {
Reset(); Reset();
@ -169,6 +187,7 @@ const char* String::CheckString() const
{ {
// Either an embedded NUL, or no final NUL. // Either an embedded NUL, or no final NUL.
char* exp_s = Render(); char* exp_s = Render();
if ( nulTerm ) if ( nulTerm )
reporter->Error("string with embedded NUL: \"%s\"", exp_s); reporter->Error("string with embedded NUL: \"%s\"", exp_s);
else else
@ -343,7 +362,7 @@ String::Vec* String::Split(const String::IdxVec& indices) const
return result; return result;
} }
VectorVal* String:: VecToPolicy(Vec* vec) VectorVal* String::VecToPolicy(Vec* vec)
{ {
auto result = make_intrusive<VectorVal>(id::string_vec); auto result = make_intrusive<VectorVal>(id::string_vec);
@ -390,11 +409,10 @@ char* String::VecToString(const Vec* vec)
return strdup(result.c_str()); return strdup(result.c_str());
} }
bool StringLenCmp::operator()(String * const& bst1, bool StringLenCmp::operator()(String* const& bst1, String* const& bst2)
String * const& bst2)
{ {
return _increasing ? (bst1->Len() < bst2->Len()) : return _increasing ? (bst1->Len() < bst2->Len()) :
(bst1->Len() > bst2->Len()); (bst1->Len() > bst2->Len());
} }
std::ostream& operator<<(std::ostream& os, const String& bs) std::ostream& operator<<(std::ostream& os, const String& bs)
@ -496,3 +514,235 @@ void delete_strings(std::vector<const String*>& v)
} }
} // namespace zeek } // namespace zeek
TEST_SUITE_BEGIN("ZeekString");
TEST_CASE("construction")
{
zeek::String s1{};
CHECK_EQ(s1.Len(), 0);
CHECK_EQ(s1.Bytes(), nullptr);
CHECK_EQ(s1, "");
std::string text = "abcdef";
zeek::byte_vec text2 = new u_char[7];
memcpy(text2, text.c_str(), 7);
zeek::String s2{text2, 6, false};
CHECK_EQ(s2.Len(), 6);
zeek::String s3{text2, 6, true};
CHECK_EQ(s3.Len(), 6);
zeek::String s4{"abcdef"};
CHECK_EQ(s4.Len(), 6);
zeek::String s5{std::string("abcdef")};
CHECK_EQ(s5.Len(), 6);
zeek::String s6{s5};
CHECK_EQ(s6.Len(), 6);
zeek::String s7{true, text2, 6};
CHECK_EQ(s7.Len(), 6);
CHECK_EQ(s7.Bytes(), text2);
// Construct a temporary reporter object for the next two tests
zeek::reporter = new zeek::Reporter(false);
zeek::byte_vec text3 = new u_char[7];
memcpy(text3, text.c_str(), 7);
zeek::String s8{false, text3, 6};
CHECK_EQ(std::string(s8.CheckString()), "<string-with-NUL>");
zeek::byte_vec text4 = new u_char[7];
memcpy(text4, text.c_str(), 7);
text4[2] = '\0';
zeek::String s9{false, text4, 6};
CHECK_EQ(std::string(s9.CheckString()), "<string-with-NUL>");
delete zeek::reporter;
zeek::byte_vec text5 = (zeek::byte_vec)malloc(7);
memcpy(text5, text.c_str(), 7);
zeek::String s10{true, text5, 6};
s10.SetUseFreeToDelete(1);
CHECK_EQ(s10.Bytes(), text5);
}
TEST_CASE("set/assignment/comparison")
{
zeek::String s{"abc"};
CHECK_EQ(s, "abc");
s.Set("def");
CHECK_EQ(s, "def");
s.Set(std::string("ghi"));
CHECK_EQ(s, "ghi");
zeek::String s2{"abc"};
s.Set(s2);
CHECK_EQ(s, "abc");
zeek::String s3{"def"};
s = s3;
CHECK_EQ(s, "def");
CHECK_EQ(s, s3);
CHECK(s2 < s3);
s.Set("ghi");
CHECK_FALSE(s < s2);
std::string text = "abcdef";
zeek::byte_vec text2 = new u_char[7];
memcpy(text2, text.c_str(), 7);
s.Adopt(text2, 7);
CHECK_EQ(s, "abcdef");
CHECK_FALSE(s == s2);
// This is a clearly invalid string and we probably shouldn't allow it to be
// constructed, but this test covers one if statement in Bstr_eq.
zeek::String s4(false, nullptr, 3);
CHECK_FALSE(s4 == s2);
zeek::String s5{};
CHECK_LT(s5, s);
CHECK_FALSE(s < s5);
}
TEST_CASE("searching/modification")
{
zeek::String s{"this is a test"};
auto* ss = s.GetSubstring(5, 4);
CHECK_EQ(*ss, "is a");
delete ss;
auto* ss2 = s.GetSubstring(-1, 4);
CHECK_EQ(ss2, nullptr);
ss2 = s.GetSubstring(s.Len() + 5, 4);
CHECK_EQ(ss2, nullptr);
zeek::String s2{"test"};
CHECK_EQ(s.FindSubstring(&s2), 10);
s2.ToUpper();
CHECK_EQ(s2, "TEST");
zeek::String::IdxVec indexes;
zeek::String::Vec* splits = s.Split(indexes);
CHECK_EQ(splits, nullptr);
indexes.insert(indexes.end(), {4, 7, 9, -1, 30});
splits = s.Split(indexes);
CHECK_EQ(splits->size(), 4);
CHECK_EQ(*(splits->at(0)), "this");
CHECK_EQ(*(splits->at(1)), " is");
CHECK_EQ(*(splits->at(2)), " a");
CHECK_EQ(*(splits->at(3)), " test");
zeek::String* s3 = concatenate(*splits);
CHECK_EQ(s.Len(), s3->Len());
CHECK_EQ(s, *s3);
delete s3;
char* temp = zeek::String::VecToString(splits);
CHECK_EQ(std::string(temp), "[this, is, a, test,]");
free(temp);
for ( auto* entry : *splits )
delete entry;
delete splits;
}
TEST_CASE("rendering")
{
zeek::String s1("\\abcd\'\"");
auto* r = s1.Render(zeek::String::ESC_ESC);
CHECK_EQ(std::string(r), "\\\\abcd\'\"");
delete [] r;
r = s1.Render(zeek::String::ESC_QUOT);
CHECK_EQ(std::string(r), "\\abcd\\\'\\\"");
delete [] r;
r = s1.Render(zeek::String::ESC_ESC | zeek::String::ESC_QUOT | zeek::String::ESC_SER);
CHECK_EQ(std::string(r), "10 \\\\abcd\\\'\\\"");
delete [] r;
zeek::byte_vec text = new u_char[6];
text[0] = 3;
text[1] = 4;
text[2] = 5;
text[3] = 6;
text[4] = '\\';
text[5] = '\'';
zeek::String s2(false, text, 6);
r = s2.Render(zeek::String::ESC_HEX);
CHECK_EQ(std::string(r), "\\x03\\x04\\x05\\x06\\\'");
delete [] r;
int test_length = 0;
r = s2.Render(zeek::String::ESC_DOT, &test_length);
CHECK_EQ(std::string(r), "....\\\'");
CHECK_EQ(test_length, 7);
delete [] r;
r = s2.Render(zeek::String::BRO_STRING_LITERAL);
CHECK_EQ(std::string(r), "\\x03\\x04\\x05\\x06\\\\\\\'");
delete [] r;
std::ostringstream os1;
// This uses ESC_HEX, so it should be the same as the test above
os1 << s2;
CHECK_EQ(os1.str(), "\\x03\\x04\\x05\\x06\\\'");
std::ostringstream os2;
s2.Render(os2, zeek::String::ESC_HEX);
CHECK_EQ(os2.str(), "\\x03\\x04\\x05\\x06\\\'");
}
TEST_CASE("read")
{
std::string text1("5 abcde");
std::istringstream iss1(text1);
zeek::String s1{};
s1.Read(iss1);
CHECK_EQ(s1, "abcde");
std::string text2("abcde");
std::istringstream iss2(text2);
zeek::String s2{};
// Setting to something else disables reading the serialization format
s2.Read(iss2, zeek::String::ESC_HEX);
CHECK_EQ(s2, text2);
}
TEST_CASE("misc")
{
std::vector<const zeek::String*> sv = {new zeek::String{}, new zeek::String{}};
CHECK_EQ(sv.size(), 2);
zeek::delete_strings(sv);
CHECK_EQ(sv.size(), 0);
std::vector<zeek::data_chunk_t> dv = {{5, "abcde"}, {6, "fghijk"}};
auto* s = zeek::concatenate(dv);
CHECK_EQ(*s, "abcdefghijk");
delete s;
std::vector<zeek::String*> sv2 = {new zeek::String{"abcde"}, new zeek::String{"fghi"}};
std::sort(sv2.begin(), sv2.end(), zeek::StringLenCmp(true));
CHECK_EQ(*(sv2.front()), "fghi");
CHECK_EQ(*(sv2.back()), "abcde");
std::sort(sv2.begin(), sv2.end(), zeek::StringLenCmp(false));
CHECK_EQ(*(sv2.front()), "abcde");
CHECK_EQ(*(sv2.back()), "fghi");
for ( auto* entry : sv2 )
delete entry;
}
TEST_SUITE_END();

View file

@ -19,6 +19,12 @@ class VectorVal;
typedef u_char* byte_vec; typedef u_char* byte_vec;
/**
* A container type for holding blocks of byte data. This can be used for
* character strings, but is not limited to that alone. This class provides
* methods for rendering byte data into character strings, including
* conversions of non-printable characters into other representations.
*/
class String { class String {
public: public:
typedef std::vector<String*> Vec; typedef std::vector<String*> Vec;
@ -49,6 +55,7 @@ public:
const String& operator=(const String& bs); const String& operator=(const String& bs);
bool operator==(const String& bs) const; bool operator==(const String& bs) const;
bool operator<(const String& bs) const; bool operator<(const String& bs) const;
bool operator==(std::string_view s) const;
byte_vec Bytes() const { return b; } byte_vec Bytes() const { return b; }
int Len() const { return n; } int Len() const { return n; }
@ -71,6 +78,13 @@ public:
void SetUseFreeToDelete(int use_it) void SetUseFreeToDelete(int use_it)
{ use_free_to_delete = use_it; } { use_free_to_delete = use_it; }
/**
* Returns a character-string representation of the stored bytes. This
* method doesn't do any extra rendering or character conversions. If
* null characters are found in the middle of the data or if the data
* is missing a closing null character, an error string is returned and
* a error is reported.
*/
const char* CheckString() const; const char* CheckString() const;
enum render_style { enum render_style {