diff --git a/src/strings.bif b/src/strings.bif index 6044813476..44b0c57eb6 100644 --- a/src/strings.bif +++ b/src/strings.bif @@ -138,27 +138,27 @@ function sort_string_array%(a: string_array%): string_array function edit%(arg_s: string, arg_edit_char: string%): string %{ - if ( arg_edit_char->Len() != 1 ) + const char* s = arg_s->AsString()->CheckString(); + const char* edit_s = arg_edit_char->AsString()->CheckString(); + + if ( strlen(edit_s) != 1 ) builtin_run_time("not exactly one edit character", @ARG@[1]); - - const u_char* s = arg_s->Bytes(); - const u_char* edit_s = arg_edit_char->Bytes(); - u_char edit_c = *edit_s; + char edit_c = *edit_s; - int n = arg_s->Len(); - u_char* new_s = new u_char[n+1]; + int n = strlen(s) + 1; + char* new_s = new char[n]; int ind = 0; - for ( int i=0; iAsString(); TableVal* a = new TableVal(internal_type("string_array")->AsTableType()); ListVal* other_strings = 0; @@ -208,54 +209,66 @@ Val* do_split(StringVal* str_val, RE_Matcher* re, TableVal* other_sep, // the future we expect to change this by giving RE_Matcher a // const char* segment. - const u_char* s = str_val->Bytes(); - int n = str_val->Len(); - const u_char* end_of_s = s + n; + const char* s = str->CheckString(); + int len = strlen(s); + const char* end_of_s = s + len; int num = 0; int num_sep = 0; - - int offset = 0; - while ( n > 0 ) + while ( 1 ) { - offset = 0; - // Find next match offset. - int end_of_match; - while ( n > 0 && - (end_of_match = re->MatchPrefix(&s[offset], n)) <= 0 ) + int offset = 0; + const char* t; + + if ( max_num_sep > 0 && num_sep >= max_num_sep ) + t = end_of_s; + else { - // Move on to next character. - ++offset; - --n; + for ( t = s; t < end_of_s; ++t ) + { + offset = re->MatchPrefix(t); + + if ( other_strings ) + { + val_list* vl = other_strings->Vals(); + loop_over_list(*vl, i) + { + const BroString* sub = + (*vl)[i]->AsString(); + if ( sub->Len() > offset && + match_prefix(end_of_s - t, + t, sub->Len(), + (const char*) (sub->Bytes())) ) + { + offset = sub->Len(); + } + } + } + + if ( offset > 0 ) + break; + } } - + Val* ind = new Val(++num, TYPE_COUNT); - a->Assign(ind, new StringVal(offset, (const char*) s)); + a->Assign(ind, new StringVal(t - s, s)); Unref(ind); - // No more separators will be needed if this is the end of string. - if ( n <= 0 ) + if ( t >= end_of_s ) break; + ++num_sep; + if ( incl_sep ) { // including the part that matches the pattern ind = new Val(++num, TYPE_COUNT); - a->Assign(ind, new StringVal(end_of_match, (const char*) s+offset)); + a->Assign(ind, new StringVal(offset, t)); Unref(ind); } - - ++num_sep; - if ( max_num_sep && num_sep >= max_num_sep ) - break; - - offset += end_of_match; - n -= end_of_match; - s += offset; - + + s = t + offset; if ( s > end_of_s ) - { internal_error("RegMatch in split goes beyond the string"); - } } if ( other_strings ) @@ -463,38 +476,42 @@ function subst_string%(s: string, from: string, to: string%): string function to_lower%(str: string%): string %{ - const u_char* s = str->Bytes(); - int n = str->Len(); + const char* s = str->CheckString(); + int n = strlen(s) + 1; char* lower_s = new char[n]; - char* ls = lower_s; - for (int i=0; iBytes(); - int n = str->Len(); + const char* s = str->CheckString(); + int n = strlen(s) + 1; char* upper_s = new char[n]; - char* us = upper_s; - - for (int i=0; iBytes(); - int n = str->Len(); + const char* s = str->CheckString(); - if ( n == 0 ) + int n = strlen(s) + 1; + char* strip_s = new char[n]; + + if ( n == 1 ) // Empty string. - return new StringVal(new BroString(s, n, 1)); + return new StringVal(new BroString(1, byte_vec(strip_s), 0)); - const u_char* sp = s; - // Move a pointer to the end of the string - const u_char* e = &sp[n-1]; - while ( e > sp && isspace(*e) ) + while ( isspace(*s) ) + ++s; + + strncpy(strip_s, s, n); + + char* s2 = strip_s; + char* e = &s2[strlen(s2) - 1]; + + while ( e > s2 && isspace(*e) ) --e; - // Move the pointer for the beginning of the string - while ( isspace(*sp) ) - ++sp; + e[1] = '\0'; // safe even if e hasn't changed, due to n = strlen + 1 - return new StringVal(new BroString(sp, e-sp+1, 1)); + return new StringVal(new BroString(1, byte_vec(s2), (e-s2)+1)); %} function string_fill%(len: int, source: string%): string %{ - const u_char* src = source->Bytes(); - int n = source->Len(); + const char* src = source->CheckString(); + + int sn = strlen(src); char* dst = new char[len]; - for ( int i = 0; i < len; i += n ) - ::memcpy((dst + i), src, min(n, len - i)); + for ( int i = 0; i < len; i += sn ) + ::memcpy((dst + i), src, min(sn, len - i)); dst[len - 1] = 0; @@ -627,12 +650,11 @@ function string_fill%(len: int, source: string%): string # function str_shell_escape%(source: string%): string %{ - uint j = 0; - const u_char* src = source->Bytes(); - uint n = source->Len(); - byte_vec dst = new u_char[n * 2 + 1]; + unsigned j = 0; + const char* src = source->CheckString(); + char* dst = new char[strlen(src) * 2 + 1]; - for ( uint i = 0; i < n; ++i ) + for ( unsigned i = 0; i < strlen(src); ++i ) { switch ( src[i] ) { case '`': case '"': case '\\': case '$': @@ -650,7 +672,7 @@ function str_shell_escape%(source: string%): string } dst[j] = '\0'; - return new StringVal(new BroString(1, dst, j)); + return new StringVal(new BroString(1, byte_vec(dst), j)); %} # Returns all occurrences of the given pattern in the given string (an empty