strings: Implement join_string_set() as bif

Haven't measured speed, but this is probably faster than
the cat() and string invocations.
This commit is contained in:
Arne Welzel 2022-09-06 17:15:11 +02:00
parent 31aeb58e10
commit 0e28a7faf5
6 changed files with 69 additions and 28 deletions

View file

@ -9,29 +9,6 @@ function is_string_binary(s: string): bool
return |gsub(s, /[\x00-\x7f]/, "")| * 100 / |s| >= 25; return |gsub(s, /[\x00-\x7f]/, "")| * 100 / |s| >= 25;
} }
## Join a set of strings together, with elements delimited by a constant string.
##
## ss: a set of strings to join.
##
## j: the string used to join set elements.
##
## Returns: a string composed of all elements of the set, delimited by the
## joining string.
function join_string_set(ss: set[string], j: string): string
{
local output="";
local i=0;
for ( s in ss )
{
if ( i > 0 )
output = cat(output, j);
output = cat(output, s);
++i;
}
return output;
}
## Given a string, returns an escaped version. ## Given a string, returns an escaped version.
## ##
## s: a string to escape. ## s: a string to escape.

View file

@ -127,6 +127,65 @@ function join_string_vec%(vec: string_vec, sep: string%): string
return zeek::make_intrusive<zeek::StringVal>(s); return zeek::make_intrusive<zeek::StringVal>(s);
%} %}
## Joins all values in the given set of strings with a separator placed
## between each element.
##
## ss: The :zeek:type:`string_set` (``set[string]``).
##
## sep: The separator to place between each element.
##
## Returns: The concatenation of all elements in *s*, with *sep* placed
## between each element.
##
## .. zeek:see:: cat cat_sep string_cat
## fmt
## join_string_vec
function join_string_set%(ss: string_set, sep: string%): string
%{
ODesc d;
d.SetStyle(RAW_STYLE);
if ( ! ss->GetType()->IsSet () )
{
zeek::emit_builtin_error("join_string_set() requires a string set argument");
return val_mgr->EmptyString();
}
// Not sure we need to protect from this
const auto& it = ss->GetType()->AsTableType()->GetIndexTypes();
if ( it.size() != 1 || it[0]->Tag() != TYPE_STRING )
{
zeek::emit_builtin_error("join_string_set() requires a string set argument");
return val_mgr->EmptyString();
}
int i = 0;
TableVal* tv = ss->AsTableVal();
const PDict<TableEntryVal>* loop_vals = tv->AsTable();
if ( ! loop_vals->Length() )
return val_mgr->EmptyString();
for ( const auto& iter : *loop_vals )
{
if ( i > 0 )
d.AddN(reinterpret_cast<const char*>(sep->Bytes()), sep->Len());
// Not sure this is fast - I guess we don't have access to the
// values used for the keys directly anymore.
auto k = iter.GetHashKey();
auto ind_lv = tv->RecreateIndex(*k);
ind_lv->Describe(&d);
++i;
}
zeek::String* str = new zeek::String(1, d.TakeBytes(), d.Len());
str->SetUseFreeToDelete(true);
return zeek::make_intrusive<zeek::StringVal>(str);
%}
## Returns an edited version of a string that applies a special ## Returns an edited version of a string that applies a special
## "backspace character" (usually ``\x08`` for backspace or ``\x7f`` for DEL). ## "backspace character" (usually ``\x08`` for backspace or ``\x7f`` for DEL).
## For example, ``edit("hello there", "e")`` returns ``"llo t"``. ## For example, ``edit("hello there", "e")`` returns ``"llo t"``.

View file

@ -4,3 +4,7 @@ thisisanothertest
Test Test
...hi..there ...hi..there
this\x00is\x00another\x00test this\x00is\x00another\x00test
(empty)
one
two, one, three
twoone

View file

@ -3,8 +3,6 @@
'\xff\xff\xff\x00' IS considered binary '\xff\xff\xff\x00' IS considered binary
'\x00\x00\xff\x00' IS considered binary '\x00\x00\xff\x00' IS considered binary
'\x00\x00\x00\x00' is NOT considered binary '\x00\x00\x00\x00' is NOT considered binary
two, one, three
one
hell\o w\orl\d hell\o w\orl\d
\\hello world\\ \\hello world\\
hello world hello world

View file

@ -19,4 +19,10 @@ event zeek_init()
print join_string_vec(d, "-"); print join_string_vec(d, "-");
print join_string_vec(e, "."); print join_string_vec(e, ".");
print join_string_vec(c, "\x00"); print join_string_vec(c, "\x00");
local empty_set: set[string] = set();
print fmt("%s (empty)", join_string_set(empty_set, ", "));
print join_string_set(set("one"), ", ");
print join_string_set(set("one", "two", "three"), ", ");
print join_string_set(set("one", "two"), "");
} }

View file

@ -16,9 +16,6 @@ test_binary_string("\xFF\xFF\xFF\x00");
test_binary_string("\x00\x00\xFF\x00"); test_binary_string("\x00\x00\xFF\x00");
test_binary_string("\x00\x00\x00\x00"); test_binary_string("\x00\x00\x00\x00");
print join_string_set(set("one", "two", "three"), ", ");
print join_string_set(set("one"), ", ");
print string_escape("hello world", "od"); print string_escape("hello world", "od");
print string_escape("\\hello world\\", ""); print string_escape("\\hello world\\", "");