diff --git a/src/strings.bif b/src/strings.bif index 44b0c57eb6..6044813476 100644 --- a/src/strings.bif +++ b/src/strings.bif @@ -138,27 +138,27 @@ function sort_string_array%(a: string_array%): string_array function edit%(arg_s: string, arg_edit_char: string%): string %{ - const char* s = arg_s->AsString()->CheckString(); - const char* edit_s = arg_edit_char->AsString()->CheckString(); - - if ( strlen(edit_s) != 1 ) + if ( arg_edit_char->Len() != 1 ) builtin_run_time("not exactly one edit character", @ARG@[1]); + + const u_char* s = arg_s->Bytes(); + const u_char* edit_s = arg_edit_char->Bytes(); - char edit_c = *edit_s; + u_char edit_c = *edit_s; - int n = strlen(s) + 1; - char* new_s = new char[n]; + int n = arg_s->Len(); + u_char* new_s = new u_char[n+1]; int ind = 0; - for ( ; *s; ++s ) + for ( int i=0; iAsString(); TableVal* a = new TableVal(internal_type("string_array")->AsTableType()); ListVal* other_strings = 0; @@ -209,66 +208,54 @@ Val* do_split(StringVal* str_val, RE_Matcher* re, TableVal* other_sep, // the future we expect to change this by giving RE_Matcher a // const char* segment. - const char* s = str->CheckString(); - int len = strlen(s); - const char* end_of_s = s + len; + const u_char* s = str_val->Bytes(); + int n = str_val->Len(); + const u_char* end_of_s = s + n; int num = 0; int num_sep = 0; + + int offset = 0; - while ( 1 ) + while ( n > 0 ) { - int offset = 0; - const char* t; - - if ( max_num_sep > 0 && num_sep >= max_num_sep ) - t = end_of_s; - else + offset = 0; + // Find next match offset. + int end_of_match; + while ( n > 0 && + (end_of_match = re->MatchPrefix(&s[offset], n)) <= 0 ) { - for ( t = s; t < end_of_s; ++t ) - { - offset = re->MatchPrefix(t); - - if ( other_strings ) - { - val_list* vl = other_strings->Vals(); - loop_over_list(*vl, i) - { - const BroString* sub = - (*vl)[i]->AsString(); - if ( sub->Len() > offset && - match_prefix(end_of_s - t, - t, sub->Len(), - (const char*) (sub->Bytes())) ) - { - offset = sub->Len(); - } - } - } - - if ( offset > 0 ) - break; - } + // Move on to next character. + ++offset; + --n; } - + Val* ind = new Val(++num, TYPE_COUNT); - a->Assign(ind, new StringVal(t - s, s)); + a->Assign(ind, new StringVal(offset, (const char*) s)); Unref(ind); - if ( t >= end_of_s ) + // No more separators will be needed if this is the end of string. + if ( n <= 0 ) break; - ++num_sep; - if ( incl_sep ) { // including the part that matches the pattern ind = new Val(++num, TYPE_COUNT); - a->Assign(ind, new StringVal(offset, t)); + a->Assign(ind, new StringVal(end_of_match, (const char*) s+offset)); Unref(ind); } - - s = t + offset; + + ++num_sep; + if ( max_num_sep && num_sep >= max_num_sep ) + break; + + offset += end_of_match; + n -= end_of_match; + s += offset; + if ( s > end_of_s ) + { internal_error("RegMatch in split goes beyond the string"); + } } if ( other_strings ) @@ -476,42 +463,38 @@ function subst_string%(s: string, from: string, to: string%): string function to_lower%(str: string%): string %{ - const char* s = str->CheckString(); - int n = strlen(s) + 1; + const u_char* s = str->Bytes(); + int n = str->Len(); char* lower_s = new char[n]; + char* ls = lower_s; - char* ls; - for ( ls = lower_s; *s; ++s ) + for (int i=0; iCheckString(); - int n = strlen(s) + 1; + const u_char* s = str->Bytes(); + int n = str->Len(); char* upper_s = new char[n]; - - char* us; - for ( us = upper_s; *s; ++s ) + char* us = upper_s; + + for (int i=0; iCheckString(); + const u_char* s = str->Bytes(); + int n = str->Len(); - int n = strlen(s) + 1; - char* strip_s = new char[n]; - - if ( n == 1 ) + if ( n == 0 ) // Empty string. - return new StringVal(new BroString(1, byte_vec(strip_s), 0)); + return new StringVal(new BroString(s, n, 1)); - while ( isspace(*s) ) - ++s; - - strncpy(strip_s, s, n); - - char* s2 = strip_s; - char* e = &s2[strlen(s2) - 1]; - - while ( e > s2 && isspace(*e) ) + const u_char* sp = s; + // Move a pointer to the end of the string + const u_char* e = &sp[n-1]; + while ( e > sp && isspace(*e) ) --e; - e[1] = '\0'; // safe even if e hasn't changed, due to n = strlen + 1 + // Move the pointer for the beginning of the string + while ( isspace(*sp) ) + ++sp; - return new StringVal(new BroString(1, byte_vec(s2), (e-s2)+1)); + return new StringVal(new BroString(sp, e-sp+1, 1)); %} function string_fill%(len: int, source: string%): string %{ - const char* src = source->CheckString(); - - int sn = strlen(src); + const u_char* src = source->Bytes(); + int n = source->Len(); char* dst = new char[len]; - for ( int i = 0; i < len; i += sn ) - ::memcpy((dst + i), src, min(sn, len - i)); + for ( int i = 0; i < len; i += n ) + ::memcpy((dst + i), src, min(n, len - i)); dst[len - 1] = 0; @@ -650,11 +627,12 @@ function string_fill%(len: int, source: string%): string # function str_shell_escape%(source: string%): string %{ - unsigned j = 0; - const char* src = source->CheckString(); - char* dst = new char[strlen(src) * 2 + 1]; + uint j = 0; + const u_char* src = source->Bytes(); + uint n = source->Len(); + byte_vec dst = new u_char[n * 2 + 1]; - for ( unsigned i = 0; i < strlen(src); ++i ) + for ( uint i = 0; i < n; ++i ) { switch ( src[i] ) { case '`': case '"': case '\\': case '$': @@ -672,7 +650,7 @@ function str_shell_escape%(source: string%): string } dst[j] = '\0'; - return new StringVal(new BroString(1, byte_vec(dst), j)); + return new StringVal(new BroString(1, dst, j)); %} # Returns all occurrences of the given pattern in the given string (an empty