Add util::split methods for splitting strings

This commit is contained in:
Tim Wojtulewicz 2022-07-19 10:17:02 -07:00
parent 5665696a05
commit 510dd1cf71
2 changed files with 108 additions and 13 deletions

View file

@ -2696,19 +2696,6 @@ string json_escape_utf8(const char* val, size_t val_size, bool escape_printable_
return utf_result;
}
} // namespace zeek::util
extern "C" void out_of_memory(const char* where)
{
fprintf(stderr, "out of memory in %s.\n", where);
if ( zeek::reporter )
// Guess that might fail here if memory is really tight ...
zeek::reporter->FatalError("out of memory in %s.\n", where);
abort();
}
TEST_CASE("util filesystem")
{
zeek::filesystem::path path1("/a/b");
@ -2726,3 +2713,60 @@ TEST_CASE("util filesystem")
auto info = zeek::filesystem::space(".");
CHECK(info.capacity > 0);
}
TEST_CASE("util split")
{
using str_vec = std::vector<std::string>;
using wstr_vec = std::vector<std::wstring>;
SUBCASE("w/ delim")
{
CHECK_EQ(split("a:b:c", ""), str_vec({"a:b:c"}));
CHECK_EQ(split("", ""), str_vec({""}));
CHECK_EQ(split("a:b:c", ":"), str_vec({"a", "b", "c"}));
CHECK_EQ(split("a:b::c", ":"), str_vec({"a", "b", "", "c"}));
CHECK_EQ(split("a:b:::c", ":"), str_vec({"a", "b", "", "", "c"}));
CHECK_EQ(split(":a:b:c", ":"), str_vec({"", "a", "b", "c"}));
CHECK_EQ(split("::a:b:c", ":"), str_vec({"", "", "a", "b", "c"}));
CHECK_EQ(split("a:b:c:", ":"), str_vec({"a", "b", "c", ""}));
CHECK_EQ(split("a:b:c::", ":"), str_vec({"a", "b", "c", "", ""}));
CHECK_EQ(split("", ":"), str_vec({""}));
CHECK_EQ(split("12345", "1"), str_vec({"", "2345"}));
CHECK_EQ(split("12345", "23"), str_vec{"1", "45"});
CHECK_EQ(split("12345", "a"), str_vec{"12345"});
CHECK_EQ(split("12345", ""), str_vec{"12345"});
}
SUBCASE("wchar_t w/ delim")
{
CHECK_EQ(split(L"a:b:c", L""), wstr_vec({L"a:b:c"}));
CHECK_EQ(split(L"", L""), wstr_vec({L""}));
CHECK_EQ(split(L"a:b:c", L":"), wstr_vec({L"a", L"b", L"c"}));
CHECK_EQ(split(L"a:b::c", L":"), wstr_vec({L"a", L"b", L"", L"c"}));
CHECK_EQ(split(L"a:b:::c", L":"), wstr_vec({L"a", L"b", L"", L"", L"c"}));
CHECK_EQ(split(L":a:b:c", L":"), wstr_vec({L"", L"a", L"b", L"c"}));
CHECK_EQ(split(L"::a:b:c", L":"), wstr_vec({L"", L"", L"a", L"b", L"c"}));
CHECK_EQ(split(L"a:b:c:", L":"), wstr_vec({L"a", L"b", L"c", L""}));
CHECK_EQ(split(L"a:b:c::", L":"), wstr_vec({L"a", L"b", L"c", L"", L""}));
CHECK_EQ(split(L"", L":"), wstr_vec({L""}));
CHECK_EQ(split(L"12345", L"1"), wstr_vec({L"", L"2345"}));
CHECK_EQ(split(L"12345", L"23"), wstr_vec{L"1", L"45"});
CHECK_EQ(split(L"12345", L"a"), wstr_vec{L"12345"});
CHECK_EQ(split(L"12345", L""), wstr_vec{L"12345"});
}
}
} // namespace zeek::util
extern "C" void out_of_memory(const char* where)
{
fprintf(stderr, "out of memory in %s.\n", where);
if ( zeek::reporter )
// Guess that might fail here if memory is really tight ...
zeek::reporter->FatalError("out of memory in %s.\n", where);
abort();
}

View file

@ -571,5 +571,56 @@ std::string json_escape_utf8(const std::string& val, bool escape_printable_contr
std::string json_escape_utf8(const char* val, size_t val_size,
bool escape_printable_controls = true);
/**
* Splits a string at all occurrences of a delimiter. Successive occurrences
* of the delimiter will be split into multiple pieces.
*
* \note This function is not UTF8-aware.
*/
template <typename T> std::vector<T> split(T s, const T& delim)
{
// If there's no delimiter, return a copy of the existing string.
if ( delim.empty() )
return {T(s)};
// If the delimiter won't fit in the string, just return a copy as well.
if ( s.size() < delim.size() )
return {T(s)};
std::vector<T> l;
const bool ends_in_delim = (s.substr(s.size() - delim.size()) == delim);
do
{
size_t p = s.find(delim);
l.push_back(s.substr(0, p));
if ( p == std::string::npos )
break;
s = s.substr(p + delim.size());
} while ( ! s.empty() );
if ( ends_in_delim )
l.emplace_back(T{});
return l;
}
template <typename T, typename U = typename T::value_type*> std::vector<T> split(T s, U delim)
{
return split(s, T{delim});
}
inline std::vector<std::string> split(const char* s, const char* delim)
{
return split(std::string(s), std::string(delim));
}
inline std::vector<std::wstring> split(const wchar_t* s, const wchar_t* delim)
{
return split(std::wstring(s), std::wstring(delim));
}
} // namespace util
} // namespace zeek