diff --git a/src/util.cc b/src/util.cc index 1cd79f62f0..583dcc04b7 100644 --- a/src/util.cc +++ b/src/util.cc @@ -2696,19 +2696,6 @@ string json_escape_utf8(const char* val, size_t val_size, bool escape_printable_ return utf_result; } - } // namespace zeek::util - -extern "C" void out_of_memory(const char* where) - { - fprintf(stderr, "out of memory in %s.\n", where); - - if ( zeek::reporter ) - // Guess that might fail here if memory is really tight ... - zeek::reporter->FatalError("out of memory in %s.\n", where); - - abort(); - } - TEST_CASE("util filesystem") { zeek::filesystem::path path1("/a/b"); @@ -2726,3 +2713,60 @@ TEST_CASE("util filesystem") auto info = zeek::filesystem::space("."); CHECK(info.capacity > 0); } + +TEST_CASE("util split") + { + using str_vec = std::vector; + using wstr_vec = std::vector; + + SUBCASE("w/ delim") + { + CHECK_EQ(split("a:b:c", ""), str_vec({"a:b:c"})); + CHECK_EQ(split("", ""), str_vec({""})); + CHECK_EQ(split("a:b:c", ":"), str_vec({"a", "b", "c"})); + CHECK_EQ(split("a:b::c", ":"), str_vec({"a", "b", "", "c"})); + CHECK_EQ(split("a:b:::c", ":"), str_vec({"a", "b", "", "", "c"})); + CHECK_EQ(split(":a:b:c", ":"), str_vec({"", "a", "b", "c"})); + CHECK_EQ(split("::a:b:c", ":"), str_vec({"", "", "a", "b", "c"})); + CHECK_EQ(split("a:b:c:", ":"), str_vec({"a", "b", "c", ""})); + CHECK_EQ(split("a:b:c::", ":"), str_vec({"a", "b", "c", "", ""})); + CHECK_EQ(split("", ":"), str_vec({""})); + + CHECK_EQ(split("12345", "1"), str_vec({"", "2345"})); + CHECK_EQ(split("12345", "23"), str_vec{"1", "45"}); + CHECK_EQ(split("12345", "a"), str_vec{"12345"}); + CHECK_EQ(split("12345", ""), str_vec{"12345"}); + } + + SUBCASE("wchar_t w/ delim") + { + CHECK_EQ(split(L"a:b:c", L""), wstr_vec({L"a:b:c"})); + CHECK_EQ(split(L"", L""), wstr_vec({L""})); + CHECK_EQ(split(L"a:b:c", L":"), wstr_vec({L"a", L"b", L"c"})); + CHECK_EQ(split(L"a:b::c", L":"), wstr_vec({L"a", L"b", L"", L"c"})); + CHECK_EQ(split(L"a:b:::c", L":"), wstr_vec({L"a", L"b", L"", L"", L"c"})); + CHECK_EQ(split(L":a:b:c", L":"), wstr_vec({L"", L"a", L"b", L"c"})); + CHECK_EQ(split(L"::a:b:c", L":"), wstr_vec({L"", L"", L"a", L"b", L"c"})); + CHECK_EQ(split(L"a:b:c:", L":"), wstr_vec({L"a", L"b", L"c", L""})); + CHECK_EQ(split(L"a:b:c::", L":"), wstr_vec({L"a", L"b", L"c", L"", L""})); + CHECK_EQ(split(L"", L":"), wstr_vec({L""})); + + CHECK_EQ(split(L"12345", L"1"), wstr_vec({L"", L"2345"})); + CHECK_EQ(split(L"12345", L"23"), wstr_vec{L"1", L"45"}); + CHECK_EQ(split(L"12345", L"a"), wstr_vec{L"12345"}); + CHECK_EQ(split(L"12345", L""), wstr_vec{L"12345"}); + } + } + + } // namespace zeek::util + +extern "C" void out_of_memory(const char* where) + { + fprintf(stderr, "out of memory in %s.\n", where); + + if ( zeek::reporter ) + // Guess that might fail here if memory is really tight ... + zeek::reporter->FatalError("out of memory in %s.\n", where); + + abort(); + } diff --git a/src/util.h b/src/util.h index f146b4e0fb..2596fbba6f 100644 --- a/src/util.h +++ b/src/util.h @@ -571,5 +571,56 @@ std::string json_escape_utf8(const std::string& val, bool escape_printable_contr std::string json_escape_utf8(const char* val, size_t val_size, bool escape_printable_controls = true); +/** + * Splits a string at all occurrences of a delimiter. Successive occurrences + * of the delimiter will be split into multiple pieces. + * + * \note This function is not UTF8-aware. + */ +template std::vector split(T s, const T& delim) + { + // If there's no delimiter, return a copy of the existing string. + if ( delim.empty() ) + return {T(s)}; + + // If the delimiter won't fit in the string, just return a copy as well. + if ( s.size() < delim.size() ) + return {T(s)}; + + std::vector l; + + const bool ends_in_delim = (s.substr(s.size() - delim.size()) == delim); + + do + { + size_t p = s.find(delim); + l.push_back(s.substr(0, p)); + if ( p == std::string::npos ) + break; + + s = s.substr(p + delim.size()); + } while ( ! s.empty() ); + + if ( ends_in_delim ) + l.emplace_back(T{}); + + return l; + } + +template std::vector split(T s, U delim) + { + return split(s, T{delim}); + } + +inline std::vector split(const char* s, const char* delim) + { + return split(std::string(s), std::string(delim)); + } + +inline std::vector split(const wchar_t* s, const wchar_t* delim) + { + return split(std::wstring(s), std::wstring(delim)); + } + } // namespace util } // namespace zeek