Modified all of the string functions that

used the CheckString function.  All now use
Bytes and Len to extract the bytes from string
arguments.  The result of this is that these
functions now don't fail when presented with
strings containing NULL bytes.

Signed-off-by: Seth Hall <seth@icir.org>
This commit is contained in:
Seth Hall 2010-10-13 14:32:27 -04:00
parent 61757ac78b
commit 5bf18fdb7f

View file

@ -138,27 +138,27 @@ function sort_string_array%(a: string_array%): string_array
function edit%(arg_s: string, arg_edit_char: string%): string function edit%(arg_s: string, arg_edit_char: string%): string
%{ %{
const char* s = arg_s->AsString()->CheckString(); if ( arg_edit_char->Len() != 1 )
const char* edit_s = arg_edit_char->AsString()->CheckString();
if ( strlen(edit_s) != 1 )
builtin_run_time("not exactly one edit character", @ARG@[1]); builtin_run_time("not exactly one edit character", @ARG@[1]);
char edit_c = *edit_s; const u_char* s = arg_s->Bytes();
const u_char* edit_s = arg_edit_char->Bytes();
int n = strlen(s) + 1; u_char edit_c = *edit_s;
char* new_s = new char[n];
int n = arg_s->Len();
u_char* new_s = new u_char[n+1];
int ind = 0; int ind = 0;
for ( ; *s; ++s ) for ( int i=0; i<n; ++i )
{ {
if ( *s == edit_c ) if ( s[i] == edit_c )
{ // Delete last character { // Delete last character
if ( --ind < 0 ) if ( --ind < 0 )
ind = 0; ind = 0;
} }
else else
new_s[ind++] = *s; new_s[ind++] = s[i];
} }
new_s[ind] = '\0'; new_s[ind] = '\0';
@ -198,7 +198,6 @@ static int match_prefix(int s_len, const char* s, int t_len, const char* t)
Val* do_split(StringVal* str_val, RE_Matcher* re, TableVal* other_sep, Val* do_split(StringVal* str_val, RE_Matcher* re, TableVal* other_sep,
int incl_sep, int max_num_sep) int incl_sep, int max_num_sep)
{ {
const BroString* str = str_val->AsString();
TableVal* a = new TableVal(internal_type("string_array")->AsTableType()); TableVal* a = new TableVal(internal_type("string_array")->AsTableType());
ListVal* other_strings = 0; ListVal* other_strings = 0;
@ -209,67 +208,55 @@ Val* do_split(StringVal* str_val, RE_Matcher* re, TableVal* other_sep,
// the future we expect to change this by giving RE_Matcher a // the future we expect to change this by giving RE_Matcher a
// const char* segment. // const char* segment.
const char* s = str->CheckString(); const u_char* s = str_val->Bytes();
int len = strlen(s); int n = str_val->Len();
const char* end_of_s = s + len; const u_char* end_of_s = s + n;
int num = 0; int num = 0;
int num_sep = 0; int num_sep = 0;
while ( 1 )
{
int offset = 0; int offset = 0;
const char* t;
if ( max_num_sep > 0 && num_sep >= max_num_sep ) while ( n > 0 )
t = end_of_s;
else
{ {
for ( t = s; t < end_of_s; ++t ) offset = 0;
// Find next match offset.
int end_of_match;
while ( n > 0 &&
(end_of_match = re->MatchPrefix(&s[offset], n)) <= 0 )
{ {
offset = re->MatchPrefix(t); // Move on to next character.
++offset;
if ( other_strings ) --n;
{
val_list* vl = other_strings->Vals();
loop_over_list(*vl, i)
{
const BroString* sub =
(*vl)[i]->AsString();
if ( sub->Len() > offset &&
match_prefix(end_of_s - t,
t, sub->Len(),
(const char*) (sub->Bytes())) )
{
offset = sub->Len();
}
}
}
if ( offset > 0 )
break;
}
} }
Val* ind = new Val(++num, TYPE_COUNT); Val* ind = new Val(++num, TYPE_COUNT);
a->Assign(ind, new StringVal(t - s, s)); a->Assign(ind, new StringVal(offset, (const char*) s));
Unref(ind); Unref(ind);
if ( t >= end_of_s ) // No more separators will be needed if this is the end of string.
if ( n <= 0 )
break; break;
++num_sep;
if ( incl_sep ) if ( incl_sep )
{ // including the part that matches the pattern { // including the part that matches the pattern
ind = new Val(++num, TYPE_COUNT); ind = new Val(++num, TYPE_COUNT);
a->Assign(ind, new StringVal(offset, t)); a->Assign(ind, new StringVal(end_of_match, (const char*) s+offset));
Unref(ind); Unref(ind);
} }
s = t + offset; ++num_sep;
if ( max_num_sep && num_sep >= max_num_sep )
break;
offset += end_of_match;
n -= end_of_match;
s += offset;
if ( s > end_of_s ) if ( s > end_of_s )
{
internal_error("RegMatch in split goes beyond the string"); internal_error("RegMatch in split goes beyond the string");
} }
}
if ( other_strings ) if ( other_strings )
delete other_strings; delete other_strings;
@ -476,42 +463,38 @@ function subst_string%(s: string, from: string, to: string%): string
function to_lower%(str: string%): string function to_lower%(str: string%): string
%{ %{
const char* s = str->CheckString(); const u_char* s = str->Bytes();
int n = strlen(s) + 1; int n = str->Len();
char* lower_s = new char[n]; char* lower_s = new char[n];
char* ls = lower_s;
char* ls; for (int i=0; i<n; ++i)
for ( ls = lower_s; *s; ++s )
{ {
if ( isascii(*s) && isupper(*s) ) if ( isascii(s[i]) && isupper(s[i]) )
*ls++ = tolower(*s); *ls++ = tolower(s[i]);
else else
*ls++ = *s; *ls++ = s[i];
} }
*ls = '\0'; return new StringVal(new BroString(1, byte_vec(lower_s), n));
return new StringVal(new BroString(1, byte_vec(lower_s), n-1));
%} %}
function to_upper%(str: string%): string function to_upper%(str: string%): string
%{ %{
const char* s = str->CheckString(); const u_char* s = str->Bytes();
int n = strlen(s) + 1; int n = str->Len();
char* upper_s = new char[n]; char* upper_s = new char[n];
char* us = upper_s;
char* us; for (int i=0; i<n; ++i)
for ( us = upper_s; *s; ++s )
{ {
if ( isascii(*s) && islower(*s) ) if ( isascii(s[i]) && islower(s[i]) )
*us++ = toupper(*s); *us++ = toupper(s[i]);
else else
*us++ = *s; *us++ = s[i];
} }
*us = '\0'; return new StringVal(new BroString(1, byte_vec(upper_s), n));
return new StringVal(new BroString(1, byte_vec(upper_s), n-1));
%} %}
function clean%(str: string%): string function clean%(str: string%): string
@ -604,40 +587,34 @@ function str_split%(s: string, idx: index_vec%): string_vec
function strip%(str: string%): string function strip%(str: string%): string
%{ %{
const char* s = str->CheckString(); const u_char* s = str->Bytes();
int n = str->Len();
int n = strlen(s) + 1; if ( n == 0 )
char* strip_s = new char[n];
if ( n == 1 )
// Empty string. // Empty string.
return new StringVal(new BroString(1, byte_vec(strip_s), 0)); return new StringVal(new BroString(s, n, 1));
while ( isspace(*s) ) const u_char* sp = s;
++s; // Move a pointer to the end of the string
const u_char* e = &sp[n-1];
strncpy(strip_s, s, n); while ( e > sp && isspace(*e) )
char* s2 = strip_s;
char* e = &s2[strlen(s2) - 1];
while ( e > s2 && isspace(*e) )
--e; --e;
e[1] = '\0'; // safe even if e hasn't changed, due to n = strlen + 1 // Move the pointer for the beginning of the string
while ( isspace(*sp) )
++sp;
return new StringVal(new BroString(1, byte_vec(s2), (e-s2)+1)); return new StringVal(new BroString(sp, e-sp+1, 1));
%} %}
function string_fill%(len: int, source: string%): string function string_fill%(len: int, source: string%): string
%{ %{
const char* src = source->CheckString(); const u_char* src = source->Bytes();
int n = source->Len();
int sn = strlen(src);
char* dst = new char[len]; char* dst = new char[len];
for ( int i = 0; i < len; i += sn ) for ( int i = 0; i < len; i += n )
::memcpy((dst + i), src, min(sn, len - i)); ::memcpy((dst + i), src, min(n, len - i));
dst[len - 1] = 0; dst[len - 1] = 0;
@ -650,11 +627,12 @@ function string_fill%(len: int, source: string%): string
# #
function str_shell_escape%(source: string%): string function str_shell_escape%(source: string%): string
%{ %{
unsigned j = 0; uint j = 0;
const char* src = source->CheckString(); const u_char* src = source->Bytes();
char* dst = new char[strlen(src) * 2 + 1]; uint n = source->Len();
byte_vec dst = new u_char[n * 2 + 1];
for ( unsigned i = 0; i < strlen(src); ++i ) for ( uint i = 0; i < n; ++i )
{ {
switch ( src[i] ) { switch ( src[i] ) {
case '`': case '"': case '\\': case '$': case '`': case '"': case '\\': case '$':
@ -672,7 +650,7 @@ function str_shell_escape%(source: string%): string
} }
dst[j] = '\0'; dst[j] = '\0';
return new StringVal(new BroString(1, byte_vec(dst), j)); return new StringVal(new BroString(1, dst, j));
%} %}
# Returns all occurrences of the given pattern in the given string (an empty # Returns all occurrences of the given pattern in the given string (an empty