From 6c240dc0bb301ac88be31676389eb03be69dbba8 Mon Sep 17 00:00:00 2001 From: yexiaochuan Date: Wed, 30 Apr 2025 00:15:34 +0800 Subject: [PATCH 1/2] Add find_first string function --- src/script_opt/FuncInfo.cc | 1 + src/strings.bif | 24 ++++++++++++++++++++++++ testing/btest/bifs/find_first.zeek | 16 ++++++++++++++++ 3 files changed, 41 insertions(+) create mode 100644 testing/btest/bifs/find_first.zeek diff --git a/src/script_opt/FuncInfo.cc b/src/script_opt/FuncInfo.cc index be25a66a8f..046a9c6db8 100644 --- a/src/script_opt/FuncInfo.cc +++ b/src/script_opt/FuncInfo.cc @@ -265,6 +265,7 @@ static std::unordered_map func_attrs = { {"find_entropy", ATTR_FOLDABLE}, {"find_in_zeekpath", ATTR_IDEMPOTENT}, // can error {"find_last", ATTR_FOLDABLE}, + {"find_first", ATTR_FOLDABLE}, {"find_str", ATTR_FOLDABLE}, {"floor", ATTR_FOLDABLE}, {"flush_all", ATTR_NO_SCRIPT_SIDE_EFFECTS}, diff --git a/src/strings.bif b/src/strings.bif index 118bc82bb0..95cf5454c5 100644 --- a/src/strings.bif +++ b/src/strings.bif @@ -1107,6 +1107,30 @@ function find_last%(str: string, re: pattern%) : string return zeek::val_mgr->EmptyString(); %} +## Finds the first occurrence of a pattern in a string. +## +## str: The string to inspect. +## +## re: The pattern to look for in *str*. +## +## Returns: The first string in *str* that matches *re*, or the empty string. +## +## .. zeek:see:: find_all find_all_ordered find_last strstr +function find_first%(str: string, re: pattern%) : string + %{ + const u_char* s = str->Bytes(); + const u_char* e = s + str->Len(); + + for ( const u_char* t = s; t < e; ++t ) + { + int n = re->MatchPrefix(t, e - t); + if ( n >= 0 ) + return zeek::make_intrusive(n, (const char*) t); + } + + return zeek::val_mgr->EmptyString(); + %} + ## Returns a hex dump for given input data. The hex dump renders 16 bytes per ## line, with hex on the left and ASCII (where printable) ## on the right. diff --git a/testing/btest/bifs/find_first.zeek b/testing/btest/bifs/find_first.zeek new file mode 100644 index 0000000000..048de762d9 --- /dev/null +++ b/testing/btest/bifs/find_first.zeek @@ -0,0 +1,16 @@ +# @TEST-EXEC: zeek -b %INPUT >out +# @TEST-EXEC: btest-diff out + +event zeek_init() + { + local a = "this is a test"; + local pat = /hi|es/; + local pat2 = /aa|bb/; + + local b = find_first(a, pat); + local b2 = find_first(a, pat2); + + print b; + print "-------------------"; + print |b2|; + } From fd7045e27457fd8fb8764479b123787a020308af Mon Sep 17 00:00:00 2001 From: yexiaochuan Date: Fri, 2 May 2025 11:51:26 +0800 Subject: [PATCH 2/2] Add baseline for find_first test, update comments, and reorder function imports --- src/script_opt/FuncInfo.cc | 2 +- src/strings.bif | 8 ++++---- testing/btest/Baseline/bifs.find_first/out | 4 ++++ 3 files changed, 9 insertions(+), 5 deletions(-) create mode 100644 testing/btest/Baseline/bifs.find_first/out diff --git a/src/script_opt/FuncInfo.cc b/src/script_opt/FuncInfo.cc index 046a9c6db8..92f382db8f 100644 --- a/src/script_opt/FuncInfo.cc +++ b/src/script_opt/FuncInfo.cc @@ -263,9 +263,9 @@ static std::unordered_map func_attrs = { {"find_all", ATTR_FOLDABLE}, {"find_all_ordered", ATTR_FOLDABLE}, {"find_entropy", ATTR_FOLDABLE}, + {"find_first", ATTR_FOLDABLE}, {"find_in_zeekpath", ATTR_IDEMPOTENT}, // can error {"find_last", ATTR_FOLDABLE}, - {"find_first", ATTR_FOLDABLE}, {"find_str", ATTR_FOLDABLE}, {"floor", ATTR_FOLDABLE}, {"flush_all", ATTR_NO_SCRIPT_SIDE_EFFECTS}, diff --git a/src/strings.bif b/src/strings.bif index 95cf5454c5..b212ec782b 100644 --- a/src/strings.bif +++ b/src/strings.bif @@ -531,7 +531,7 @@ function strcmp%(s1: string, s2: string%): int ## Returns: The location of *little* in *big*, or 0 if *little* is not found in ## *big*. ## -## .. zeek:see:: find_all find_last +## .. zeek:see:: find_all find_first find_last function strstr%(big: string, little: string%): count %{ return zeek::val_mgr->Count( @@ -1015,7 +1015,7 @@ static bool exceeds_max_string_length(int str_len, int max_size, zeek::detail::F ## ## Returns: The set of strings in *str* that match *re*, or the empty set. ## -## .. zeek:see: find_all_ordered find_last strstr +## .. zeek:see: find_all_ordered find_first find_last strstr function find_all%(str: string, re: pattern, max_str_size: int &default=-1%) : string_set %{ auto a = zeek::make_intrusive(zeek::id::string_set); @@ -1055,7 +1055,7 @@ function find_all%(str: string, re: pattern, max_str_size: int &default=-1%) : s ## ## Returns: All strings in *str* that match *re*, or an empty vector. ## -## .. zeek:see: find_all find_last strstr +## .. zeek:see: find_all find_first find_last strstr function find_all_ordered%(str: string, re: pattern, max_str_size: int &default=-1%) : string_vec %{ auto a = zeek::make_intrusive(zeek::id::string_vec); @@ -1091,7 +1091,7 @@ function find_all_ordered%(str: string, re: pattern, max_str_size: int &default= ## ## Returns: The last string in *str* that matches *re*, or the empty string. ## -## .. zeek:see: find_all find_all_ordered strstr +## .. zeek:see: find_all find_all_ordered strstr find_first function find_last%(str: string, re: pattern%) : string %{ const u_char* s = str->Bytes(); diff --git a/testing/btest/Baseline/bifs.find_first/out b/testing/btest/Baseline/bifs.find_first/out new file mode 100644 index 0000000000..15ceaa5904 --- /dev/null +++ b/testing/btest/Baseline/bifs.find_first/out @@ -0,0 +1,4 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +hi +------------------- +0