mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 14:48:21 +00:00
Reversing the null-byte change to strings.bif once more.
I'm actually not sure how that got back in again, but it did ... Must have been during the merges.
This commit is contained in:
parent
dc5d3560f7
commit
c6631d1483
1 changed files with 101 additions and 79 deletions
170
src/strings.bif
170
src/strings.bif
|
@ -138,27 +138,27 @@ function sort_string_array%(a: string_array%): string_array
|
||||||
|
|
||||||
function edit%(arg_s: string, arg_edit_char: string%): string
|
function edit%(arg_s: string, arg_edit_char: string%): string
|
||||||
%{
|
%{
|
||||||
if ( arg_edit_char->Len() != 1 )
|
const char* s = arg_s->AsString()->CheckString();
|
||||||
|
const char* edit_s = arg_edit_char->AsString()->CheckString();
|
||||||
|
|
||||||
|
if ( strlen(edit_s) != 1 )
|
||||||
builtin_run_time("not exactly one edit character", @ARG@[1]);
|
builtin_run_time("not exactly one edit character", @ARG@[1]);
|
||||||
|
|
||||||
const u_char* s = arg_s->Bytes();
|
char edit_c = *edit_s;
|
||||||
const u_char* edit_s = arg_edit_char->Bytes();
|
|
||||||
|
|
||||||
u_char edit_c = *edit_s;
|
int n = strlen(s) + 1;
|
||||||
|
char* new_s = new char[n];
|
||||||
int n = arg_s->Len();
|
|
||||||
u_char* new_s = new u_char[n+1];
|
|
||||||
int ind = 0;
|
int ind = 0;
|
||||||
|
|
||||||
for ( int i=0; i<n; ++i )
|
for ( ; *s; ++s )
|
||||||
{
|
{
|
||||||
if ( s[i] == edit_c )
|
if ( *s == edit_c )
|
||||||
{ // Delete last character
|
{ // Delete last character
|
||||||
if ( --ind < 0 )
|
if ( --ind < 0 )
|
||||||
ind = 0;
|
ind = 0;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
new_s[ind++] = s[i];
|
new_s[ind++] = *s;
|
||||||
}
|
}
|
||||||
|
|
||||||
new_s[ind] = '\0';
|
new_s[ind] = '\0';
|
||||||
|
@ -198,6 +198,7 @@ static int match_prefix(int s_len, const char* s, int t_len, const char* t)
|
||||||
Val* do_split(StringVal* str_val, RE_Matcher* re, TableVal* other_sep,
|
Val* do_split(StringVal* str_val, RE_Matcher* re, TableVal* other_sep,
|
||||||
int incl_sep, int max_num_sep)
|
int incl_sep, int max_num_sep)
|
||||||
{
|
{
|
||||||
|
const BroString* str = str_val->AsString();
|
||||||
TableVal* a = new TableVal(internal_type("string_array")->AsTableType());
|
TableVal* a = new TableVal(internal_type("string_array")->AsTableType());
|
||||||
ListVal* other_strings = 0;
|
ListVal* other_strings = 0;
|
||||||
|
|
||||||
|
@ -208,55 +209,67 @@ Val* do_split(StringVal* str_val, RE_Matcher* re, TableVal* other_sep,
|
||||||
// the future we expect to change this by giving RE_Matcher a
|
// the future we expect to change this by giving RE_Matcher a
|
||||||
// const char* segment.
|
// const char* segment.
|
||||||
|
|
||||||
const u_char* s = str_val->Bytes();
|
const char* s = str->CheckString();
|
||||||
int n = str_val->Len();
|
int len = strlen(s);
|
||||||
const u_char* end_of_s = s + n;
|
const char* end_of_s = s + len;
|
||||||
int num = 0;
|
int num = 0;
|
||||||
int num_sep = 0;
|
int num_sep = 0;
|
||||||
|
|
||||||
|
while ( 1 )
|
||||||
|
{
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
|
const char* t;
|
||||||
|
|
||||||
while ( n > 0 )
|
if ( max_num_sep > 0 && num_sep >= max_num_sep )
|
||||||
|
t = end_of_s;
|
||||||
|
else
|
||||||
{
|
{
|
||||||
offset = 0;
|
for ( t = s; t < end_of_s; ++t )
|
||||||
// Find next match offset.
|
|
||||||
int end_of_match;
|
|
||||||
while ( n > 0 &&
|
|
||||||
(end_of_match = re->MatchPrefix(&s[offset], n)) <= 0 )
|
|
||||||
{
|
{
|
||||||
// Move on to next character.
|
offset = re->MatchPrefix(t);
|
||||||
++offset;
|
|
||||||
--n;
|
if ( other_strings )
|
||||||
|
{
|
||||||
|
val_list* vl = other_strings->Vals();
|
||||||
|
loop_over_list(*vl, i)
|
||||||
|
{
|
||||||
|
const BroString* sub =
|
||||||
|
(*vl)[i]->AsString();
|
||||||
|
if ( sub->Len() > offset &&
|
||||||
|
match_prefix(end_of_s - t,
|
||||||
|
t, sub->Len(),
|
||||||
|
(const char*) (sub->Bytes())) )
|
||||||
|
{
|
||||||
|
offset = sub->Len();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( offset > 0 )
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Val* ind = new Val(++num, TYPE_COUNT);
|
Val* ind = new Val(++num, TYPE_COUNT);
|
||||||
a->Assign(ind, new StringVal(offset, (const char*) s));
|
a->Assign(ind, new StringVal(t - s, s));
|
||||||
Unref(ind);
|
Unref(ind);
|
||||||
|
|
||||||
// No more separators will be needed if this is the end of string.
|
if ( t >= end_of_s )
|
||||||
if ( n <= 0 )
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
++num_sep;
|
||||||
|
|
||||||
if ( incl_sep )
|
if ( incl_sep )
|
||||||
{ // including the part that matches the pattern
|
{ // including the part that matches the pattern
|
||||||
ind = new Val(++num, TYPE_COUNT);
|
ind = new Val(++num, TYPE_COUNT);
|
||||||
a->Assign(ind, new StringVal(end_of_match, (const char*) s+offset));
|
a->Assign(ind, new StringVal(offset, t));
|
||||||
Unref(ind);
|
Unref(ind);
|
||||||
}
|
}
|
||||||
|
|
||||||
++num_sep;
|
s = t + offset;
|
||||||
if ( max_num_sep && num_sep >= max_num_sep )
|
|
||||||
break;
|
|
||||||
|
|
||||||
offset += end_of_match;
|
|
||||||
n -= end_of_match;
|
|
||||||
s += offset;
|
|
||||||
|
|
||||||
if ( s > end_of_s )
|
if ( s > end_of_s )
|
||||||
{
|
|
||||||
internal_error("RegMatch in split goes beyond the string");
|
internal_error("RegMatch in split goes beyond the string");
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if ( other_strings )
|
if ( other_strings )
|
||||||
delete other_strings;
|
delete other_strings;
|
||||||
|
@ -463,38 +476,42 @@ function subst_string%(s: string, from: string, to: string%): string
|
||||||
|
|
||||||
function to_lower%(str: string%): string
|
function to_lower%(str: string%): string
|
||||||
%{
|
%{
|
||||||
const u_char* s = str->Bytes();
|
const char* s = str->CheckString();
|
||||||
int n = str->Len();
|
int n = strlen(s) + 1;
|
||||||
char* lower_s = new char[n];
|
char* lower_s = new char[n];
|
||||||
char* ls = lower_s;
|
|
||||||
|
|
||||||
for (int i=0; i<n; ++i)
|
char* ls;
|
||||||
|
for ( ls = lower_s; *s; ++s )
|
||||||
{
|
{
|
||||||
if ( isascii(s[i]) && isupper(s[i]) )
|
if ( isascii(*s) && isupper(*s) )
|
||||||
*ls++ = tolower(s[i]);
|
*ls++ = tolower(*s);
|
||||||
else
|
else
|
||||||
*ls++ = s[i];
|
*ls++ = *s;
|
||||||
}
|
}
|
||||||
|
|
||||||
return new StringVal(new BroString(1, byte_vec(lower_s), n));
|
*ls = '\0';
|
||||||
|
|
||||||
|
return new StringVal(new BroString(1, byte_vec(lower_s), n-1));
|
||||||
%}
|
%}
|
||||||
|
|
||||||
function to_upper%(str: string%): string
|
function to_upper%(str: string%): string
|
||||||
%{
|
%{
|
||||||
const u_char* s = str->Bytes();
|
const char* s = str->CheckString();
|
||||||
int n = str->Len();
|
int n = strlen(s) + 1;
|
||||||
char* upper_s = new char[n];
|
char* upper_s = new char[n];
|
||||||
char* us = upper_s;
|
|
||||||
|
|
||||||
for (int i=0; i<n; ++i)
|
char* us;
|
||||||
|
for ( us = upper_s; *s; ++s )
|
||||||
{
|
{
|
||||||
if ( isascii(s[i]) && islower(s[i]) )
|
if ( isascii(*s) && islower(*s) )
|
||||||
*us++ = toupper(s[i]);
|
*us++ = toupper(*s);
|
||||||
else
|
else
|
||||||
*us++ = s[i];
|
*us++ = *s;
|
||||||
}
|
}
|
||||||
|
|
||||||
return new StringVal(new BroString(1, byte_vec(upper_s), n));
|
*us = '\0';
|
||||||
|
|
||||||
|
return new StringVal(new BroString(1, byte_vec(upper_s), n-1));
|
||||||
%}
|
%}
|
||||||
|
|
||||||
function clean%(str: string%): string
|
function clean%(str: string%): string
|
||||||
|
@ -587,34 +604,40 @@ function str_split%(s: string, idx: index_vec%): string_vec
|
||||||
|
|
||||||
function strip%(str: string%): string
|
function strip%(str: string%): string
|
||||||
%{
|
%{
|
||||||
const u_char* s = str->Bytes();
|
const char* s = str->CheckString();
|
||||||
int n = str->Len();
|
|
||||||
|
|
||||||
if ( n == 0 )
|
int n = strlen(s) + 1;
|
||||||
|
char* strip_s = new char[n];
|
||||||
|
|
||||||
|
if ( n == 1 )
|
||||||
// Empty string.
|
// Empty string.
|
||||||
return new StringVal(new BroString(s, n, 1));
|
return new StringVal(new BroString(1, byte_vec(strip_s), 0));
|
||||||
|
|
||||||
const u_char* sp = s;
|
while ( isspace(*s) )
|
||||||
// Move a pointer to the end of the string
|
++s;
|
||||||
const u_char* e = &sp[n-1];
|
|
||||||
while ( e > sp && isspace(*e) )
|
strncpy(strip_s, s, n);
|
||||||
|
|
||||||
|
char* s2 = strip_s;
|
||||||
|
char* e = &s2[strlen(s2) - 1];
|
||||||
|
|
||||||
|
while ( e > s2 && isspace(*e) )
|
||||||
--e;
|
--e;
|
||||||
|
|
||||||
// Move the pointer for the beginning of the string
|
e[1] = '\0'; // safe even if e hasn't changed, due to n = strlen + 1
|
||||||
while ( isspace(*sp) )
|
|
||||||
++sp;
|
|
||||||
|
|
||||||
return new StringVal(new BroString(sp, e-sp+1, 1));
|
return new StringVal(new BroString(1, byte_vec(s2), (e-s2)+1));
|
||||||
%}
|
%}
|
||||||
|
|
||||||
function string_fill%(len: int, source: string%): string
|
function string_fill%(len: int, source: string%): string
|
||||||
%{
|
%{
|
||||||
const u_char* src = source->Bytes();
|
const char* src = source->CheckString();
|
||||||
int n = source->Len();
|
|
||||||
|
int sn = strlen(src);
|
||||||
char* dst = new char[len];
|
char* dst = new char[len];
|
||||||
|
|
||||||
for ( int i = 0; i < len; i += n )
|
for ( int i = 0; i < len; i += sn )
|
||||||
::memcpy((dst + i), src, min(n, len - i));
|
::memcpy((dst + i), src, min(sn, len - i));
|
||||||
|
|
||||||
dst[len - 1] = 0;
|
dst[len - 1] = 0;
|
||||||
|
|
||||||
|
@ -627,12 +650,11 @@ function string_fill%(len: int, source: string%): string
|
||||||
#
|
#
|
||||||
function str_shell_escape%(source: string%): string
|
function str_shell_escape%(source: string%): string
|
||||||
%{
|
%{
|
||||||
uint j = 0;
|
unsigned j = 0;
|
||||||
const u_char* src = source->Bytes();
|
const char* src = source->CheckString();
|
||||||
uint n = source->Len();
|
char* dst = new char[strlen(src) * 2 + 1];
|
||||||
byte_vec dst = new u_char[n * 2 + 1];
|
|
||||||
|
|
||||||
for ( uint i = 0; i < n; ++i )
|
for ( unsigned i = 0; i < strlen(src); ++i )
|
||||||
{
|
{
|
||||||
switch ( src[i] ) {
|
switch ( src[i] ) {
|
||||||
case '`': case '"': case '\\': case '$':
|
case '`': case '"': case '\\': case '$':
|
||||||
|
@ -650,7 +672,7 @@ function str_shell_escape%(source: string%): string
|
||||||
}
|
}
|
||||||
|
|
||||||
dst[j] = '\0';
|
dst[j] = '\0';
|
||||||
return new StringVal(new BroString(1, dst, j));
|
return new StringVal(new BroString(1, byte_vec(dst), j));
|
||||||
%}
|
%}
|
||||||
|
|
||||||
# Returns all occurrences of the given pattern in the given string (an empty
|
# Returns all occurrences of the given pattern in the given string (an empty
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue