From 0e28a7faf545e297a7f21e6a598faddb6d34f6ce Mon Sep 17 00:00:00 2001 From: Arne Welzel Date: Tue, 6 Sep 2022 17:15:11 +0200 Subject: [PATCH] strings: Implement join_string_set() as bif Haven't measured speed, but this is probably faster than the cat() and string invocations. --- scripts/base/utils/strings.zeek | 23 -------- src/strings.bif | 59 +++++++++++++++++++ testing/btest/Baseline/bifs.join_string/out | 4 ++ .../scripts.base.utils.strings/output | 2 - testing/btest/bifs/join_string.zeek | 6 ++ testing/btest/scripts/base/utils/strings.test | 3 - 6 files changed, 69 insertions(+), 28 deletions(-) diff --git a/scripts/base/utils/strings.zeek b/scripts/base/utils/strings.zeek index e50954309f..f776d91da4 100644 --- a/scripts/base/utils/strings.zeek +++ b/scripts/base/utils/strings.zeek @@ -9,29 +9,6 @@ function is_string_binary(s: string): bool return |gsub(s, /[\x00-\x7f]/, "")| * 100 / |s| >= 25; } -## Join a set of strings together, with elements delimited by a constant string. -## -## ss: a set of strings to join. -## -## j: the string used to join set elements. -## -## Returns: a string composed of all elements of the set, delimited by the -## joining string. -function join_string_set(ss: set[string], j: string): string - { - local output=""; - local i=0; - for ( s in ss ) - { - if ( i > 0 ) - output = cat(output, j); - - output = cat(output, s); - ++i; - } - return output; - } - ## Given a string, returns an escaped version. ## ## s: a string to escape. diff --git a/src/strings.bif b/src/strings.bif index fb2179a604..05eb2dd8bf 100644 --- a/src/strings.bif +++ b/src/strings.bif @@ -127,6 +127,65 @@ function join_string_vec%(vec: string_vec, sep: string%): string return zeek::make_intrusive(s); %} +## Joins all values in the given set of strings with a separator placed +## between each element. +## +## ss: The :zeek:type:`string_set` (``set[string]``). +## +## sep: The separator to place between each element. +## +## Returns: The concatenation of all elements in *s*, with *sep* placed +## between each element. +## +## .. zeek:see:: cat cat_sep string_cat +## fmt +## join_string_vec +function join_string_set%(ss: string_set, sep: string%): string + %{ + ODesc d; + d.SetStyle(RAW_STYLE); + + if ( ! ss->GetType()->IsSet () ) + { + zeek::emit_builtin_error("join_string_set() requires a string set argument"); + return val_mgr->EmptyString(); + } + + // Not sure we need to protect from this + const auto& it = ss->GetType()->AsTableType()->GetIndexTypes(); + if ( it.size() != 1 || it[0]->Tag() != TYPE_STRING ) + { + zeek::emit_builtin_error("join_string_set() requires a string set argument"); + return val_mgr->EmptyString(); + } + + int i = 0; + TableVal* tv = ss->AsTableVal(); + const PDict* loop_vals = tv->AsTable(); + + if ( ! loop_vals->Length() ) + return val_mgr->EmptyString(); + + for ( const auto& iter : *loop_vals ) + { + if ( i > 0 ) + d.AddN(reinterpret_cast(sep->Bytes()), sep->Len()); + + // Not sure this is fast - I guess we don't have access to the + // values used for the keys directly anymore. + auto k = iter.GetHashKey(); + auto ind_lv = tv->RecreateIndex(*k); + ind_lv->Describe(&d); + + ++i; + } + + zeek::String* str = new zeek::String(1, d.TakeBytes(), d.Len()); + str->SetUseFreeToDelete(true); + + return zeek::make_intrusive(str); + %} + ## Returns an edited version of a string that applies a special ## "backspace character" (usually ``\x08`` for backspace or ``\x7f`` for DEL). ## For example, ``edit("hello there", "e")`` returns ``"llo t"``. diff --git a/testing/btest/Baseline/bifs.join_string/out b/testing/btest/Baseline/bifs.join_string/out index a175b0b202..a30137eedf 100644 --- a/testing/btest/Baseline/bifs.join_string/out +++ b/testing/btest/Baseline/bifs.join_string/out @@ -4,3 +4,7 @@ thisisanothertest Test ...hi..there this\x00is\x00another\x00test + (empty) +one +two, one, three +twoone diff --git a/testing/btest/Baseline/scripts.base.utils.strings/output b/testing/btest/Baseline/scripts.base.utils.strings/output index 4d9327c6f7..70d5956904 100644 --- a/testing/btest/Baseline/scripts.base.utils.strings/output +++ b/testing/btest/Baseline/scripts.base.utils.strings/output @@ -3,8 +3,6 @@ '\xff\xff\xff\x00' IS considered binary '\x00\x00\xff\x00' IS considered binary '\x00\x00\x00\x00' is NOT considered binary -two, one, three -one hell\o w\orl\d \\hello world\\ hello world diff --git a/testing/btest/bifs/join_string.zeek b/testing/btest/bifs/join_string.zeek index 4ea08a11ed..b80040440d 100644 --- a/testing/btest/bifs/join_string.zeek +++ b/testing/btest/bifs/join_string.zeek @@ -19,4 +19,10 @@ event zeek_init() print join_string_vec(d, "-"); print join_string_vec(e, "."); print join_string_vec(c, "\x00"); + + local empty_set: set[string] = set(); + print fmt("%s (empty)", join_string_set(empty_set, ", ")); + print join_string_set(set("one"), ", "); + print join_string_set(set("one", "two", "three"), ", "); + print join_string_set(set("one", "two"), ""); } diff --git a/testing/btest/scripts/base/utils/strings.test b/testing/btest/scripts/base/utils/strings.test index 538c6b670d..3a2b3f54a9 100644 --- a/testing/btest/scripts/base/utils/strings.test +++ b/testing/btest/scripts/base/utils/strings.test @@ -16,9 +16,6 @@ test_binary_string("\xFF\xFF\xFF\x00"); test_binary_string("\x00\x00\xFF\x00"); test_binary_string("\x00\x00\x00\x00"); -print join_string_set(set("one", "two", "three"), ", "); -print join_string_set(set("one"), ", "); - print string_escape("hello world", "od"); print string_escape("\\hello world\\", "");