strings: Implement join_string_set() as bif

Haven't measured speed, but this is probably faster than
the cat() and string invocations.
This commit is contained in:
Arne Welzel 2022-09-06 17:15:11 +02:00
parent 31aeb58e10
commit 0e28a7faf5
6 changed files with 69 additions and 28 deletions

View file

@ -9,29 +9,6 @@ function is_string_binary(s: string): bool
return |gsub(s, /[\x00-\x7f]/, "")| * 100 / |s| >= 25;
}
## Join a set of strings together, with elements delimited by a constant string.
##
## ss: a set of strings to join.
##
## j: the string used to join set elements.
##
## Returns: a string composed of all elements of the set, delimited by the
## joining string.
function join_string_set(ss: set[string], j: string): string
{
local output="";
local i=0;
for ( s in ss )
{
if ( i > 0 )
output = cat(output, j);
output = cat(output, s);
++i;
}
return output;
}
## Given a string, returns an escaped version.
##
## s: a string to escape.

View file

@ -127,6 +127,65 @@ function join_string_vec%(vec: string_vec, sep: string%): string
return zeek::make_intrusive<zeek::StringVal>(s);
%}
## Joins all values in the given set of strings with a separator placed
## between each element.
##
## ss: The :zeek:type:`string_set` (``set[string]``).
##
## sep: The separator to place between each element.
##
## Returns: The concatenation of all elements in *s*, with *sep* placed
## between each element.
##
## .. zeek:see:: cat cat_sep string_cat
## fmt
## join_string_vec
function join_string_set%(ss: string_set, sep: string%): string
%{
ODesc d;
d.SetStyle(RAW_STYLE);
if ( ! ss->GetType()->IsSet () )
{
zeek::emit_builtin_error("join_string_set() requires a string set argument");
return val_mgr->EmptyString();
}
// Not sure we need to protect from this
const auto& it = ss->GetType()->AsTableType()->GetIndexTypes();
if ( it.size() != 1 || it[0]->Tag() != TYPE_STRING )
{
zeek::emit_builtin_error("join_string_set() requires a string set argument");
return val_mgr->EmptyString();
}
int i = 0;
TableVal* tv = ss->AsTableVal();
const PDict<TableEntryVal>* loop_vals = tv->AsTable();
if ( ! loop_vals->Length() )
return val_mgr->EmptyString();
for ( const auto& iter : *loop_vals )
{
if ( i > 0 )
d.AddN(reinterpret_cast<const char*>(sep->Bytes()), sep->Len());
// Not sure this is fast - I guess we don't have access to the
// values used for the keys directly anymore.
auto k = iter.GetHashKey();
auto ind_lv = tv->RecreateIndex(*k);
ind_lv->Describe(&d);
++i;
}
zeek::String* str = new zeek::String(1, d.TakeBytes(), d.Len());
str->SetUseFreeToDelete(true);
return zeek::make_intrusive<zeek::StringVal>(str);
%}
## Returns an edited version of a string that applies a special
## "backspace character" (usually ``\x08`` for backspace or ``\x7f`` for DEL).
## For example, ``edit("hello there", "e")`` returns ``"llo t"``.

View file

@ -4,3 +4,7 @@ thisisanothertest
Test
...hi..there
this\x00is\x00another\x00test
(empty)
one
two, one, three
twoone

View file

@ -3,8 +3,6 @@
'\xff\xff\xff\x00' IS considered binary
'\x00\x00\xff\x00' IS considered binary
'\x00\x00\x00\x00' is NOT considered binary
two, one, three
one
hell\o w\orl\d
\\hello world\\
hello world

View file

@ -19,4 +19,10 @@ event zeek_init()
print join_string_vec(d, "-");
print join_string_vec(e, ".");
print join_string_vec(c, "\x00");
local empty_set: set[string] = set();
print fmt("%s (empty)", join_string_set(empty_set, ", "));
print join_string_set(set("one"), ", ");
print join_string_set(set("one", "two", "three"), ", ");
print join_string_set(set("one", "two"), "");
}

View file

@ -16,9 +16,6 @@ test_binary_string("\xFF\xFF\xFF\x00");
test_binary_string("\x00\x00\xFF\x00");
test_binary_string("\x00\x00\x00\x00");
print join_string_set(set("one", "two", "three"), ", ");
print join_string_set(set("one"), ", ");
print string_escape("hello world", "od");
print string_escape("\\hello world\\", "");