diff --git a/src/Val.cc b/src/Val.cc index eab52ef7db..f45a1ff6d0 100644 --- a/src/Val.cc +++ b/src/Val.cc @@ -807,6 +807,92 @@ unsigned int StringVal::MemoryAllocation() const return padded_sizeof(*this) + val.string_val->MemoryAllocation(); } +Val* StringVal::Substitute(RE_Matcher* re, StringVal* repl, bool do_all) + { + const u_char* s = Bytes(); + int offset = 0; + int n = Len(); + + // cut_points is a set of pairs of indices in str that should + // be removed/replaced. A pair means "delete starting + // at offset x, up to but not including offset y". + List(ptr_compat_int) cut_points; // where RE matches pieces of str + + int size = 0; // size of result + + while ( n > 0 ) + { + // Find next match offset. + int end_of_match; + while ( n > 0 && + (end_of_match = re->MatchPrefix(&s[offset], n)) <= 0 ) + { + // This character is going to be copied to the result. + ++size; + + // Move on to next character. + ++offset; + --n; + } + + if ( n <= 0 ) + break; + + // s[offset .. offset+end_of_match-1] matches re. + cut_points.append(offset); + cut_points.append(offset + end_of_match); + + offset += end_of_match; + n -= end_of_match; + + if ( ! do_all ) + { + // We've now done the first substitution - finished. + // Include the remainder of the string in the result. + size += n; + break; + } + } + + // size now reflects amount of space copied. Factor in amount + // of space for replacement text. + int num_cut_points = cut_points.length() / 2; + size += num_cut_points * repl->Len(); + + // And a final NUL for good health. + ++size; + + byte_vec result = new u_char[size]; + byte_vec r = result; + + // Copy it all over. + int start_offset = 0; + for ( int i = 0; i < cut_points.length(); i += 2 /* loop over pairs */ ) + { + int num_to_copy = cut_points[i] - start_offset; + memcpy(r, s + start_offset, num_to_copy); + + r += num_to_copy; + start_offset = cut_points[i+1]; + + // Now add in replacement text. + memcpy(r, repl->Bytes(), repl->Len()); + r += repl->Len(); + } + + // Copy final trailing characters. + int num_to_copy = Len() - start_offset; + memcpy(r, s + start_offset, num_to_copy); + r += num_to_copy; + + // Final NUL. No need to increment r, since the length + // computed from it in the next statement does not include + // the NUL. + r[0] = '\0'; + + return new StringVal(new BroString(1, result, r - result)); + } + Val* StringVal::DoClone(CloneState* state) { // We could likely treat this type as immutable and return a reference diff --git a/src/Val.h b/src/Val.h index b5fa130dd5..b2aea5d4e5 100644 --- a/src/Val.h +++ b/src/Val.h @@ -639,6 +639,8 @@ public: unsigned int MemoryAllocation() const override; + Val* Substitute(RE_Matcher* re, StringVal* repl, bool do_all); + protected: friend class Val; StringVal() {} diff --git a/src/strings.bif b/src/strings.bif index f2661f8cc9..42630e4b6b 100644 --- a/src/strings.bif +++ b/src/strings.bif @@ -351,91 +351,6 @@ Val* do_split(StringVal* str_val, RE_Matcher* re, int incl_sep, int max_num_sep) return a; } -Val* do_sub(StringVal* str_val, RE_Matcher* re, StringVal* repl, int do_all) - { - const u_char* s = str_val->Bytes(); - int offset = 0; - int n = str_val->Len(); - - // cut_points is a set of pairs of indices in str that should - // be removed/replaced. A pair means "delete starting - // at offset x, up to but not including offset y". - List(ptr_compat_int) cut_points; // where RE matches pieces of str - - int size = 0; // size of result - - while ( n > 0 ) - { - // Find next match offset. - int end_of_match; - while ( n > 0 && - (end_of_match = re->MatchPrefix(&s[offset], n)) <= 0 ) - { - // This character is going to be copied to the result. - ++size; - - // Move on to next character. - ++offset; - --n; - } - - if ( n <= 0 ) - break; - - // s[offset .. offset+end_of_match-1] matches re. - cut_points.append(offset); - cut_points.append(offset + end_of_match); - - offset += end_of_match; - n -= end_of_match; - - if ( ! do_all ) - { - // We've now done the first substitution - finished. - // Include the remainder of the string in the result. - size += n; - break; - } - } - - // size now reflects amount of space copied. Factor in amount - // of space for replacement text. - int num_cut_points = cut_points.length() / 2; - size += num_cut_points * repl->Len(); - - // And a final NUL for good health. - ++size; - - byte_vec result = new u_char[size]; - byte_vec r = result; - - // Copy it all over. - int start_offset = 0; - for ( int i = 0; i < cut_points.length(); i += 2 /* loop over pairs */ ) - { - int num_to_copy = cut_points[i] - start_offset; - memcpy(r, s + start_offset, num_to_copy); - - r += num_to_copy; - start_offset = cut_points[i+1]; - - // Now add in replacement text. - memcpy(r, repl->Bytes(), repl->Len()); - r += repl->Len(); - } - - // Copy final trailing characters. - int num_to_copy = str_val->Len() - start_offset; - memcpy(r, s + start_offset, num_to_copy); - r += num_to_copy; - - // Final NUL. No need to increment r, since the length - // computed from it in the next statement does not include - // the NUL. - r[0] = '\0'; - - return new StringVal(new BroString(1, result, r - result)); - } %%} ## Splits a string into an array of strings according to a pattern. @@ -535,7 +450,7 @@ function split_string_n%(str: string, re: pattern, ## .. zeek:see:: gsub subst_string function sub%(str: string, re: pattern, repl: string%): string %{ - return do_sub(str, re, repl, 0); + return str->Substitute(re, repl, false); %} ## Substitutes a given replacement string for all occurrences of a pattern @@ -552,7 +467,7 @@ function sub%(str: string, re: pattern, repl: string%): string ## .. zeek:see:: sub subst_string function gsub%(str: string, re: pattern, repl: string%): string %{ - return do_sub(str, re, repl, 1); + return str->Substitute(re, repl, true); %}