Move do_sub method from zeek.bif to StringVal class method

This commit is contained in:
Tim Wojtulewicz 2019-06-26 12:26:51 -07:00
parent 528bad72de
commit 2cb87c3309
3 changed files with 90 additions and 87 deletions

View file

@ -807,6 +807,92 @@ unsigned int StringVal::MemoryAllocation() const
return padded_sizeof(*this) + val.string_val->MemoryAllocation();
}
Val* StringVal::Substitute(RE_Matcher* re, StringVal* repl, bool do_all)
{
const u_char* s = Bytes();
int offset = 0;
int n = Len();
// cut_points is a set of pairs of indices in str that should
// be removed/replaced. A pair <x,y> means "delete starting
// at offset x, up to but not including offset y".
List(ptr_compat_int) cut_points; // where RE matches pieces of str
int size = 0; // size of result
while ( n > 0 )
{
// Find next match offset.
int end_of_match;
while ( n > 0 &&
(end_of_match = re->MatchPrefix(&s[offset], n)) <= 0 )
{
// This character is going to be copied to the result.
++size;
// Move on to next character.
++offset;
--n;
}
if ( n <= 0 )
break;
// s[offset .. offset+end_of_match-1] matches re.
cut_points.append(offset);
cut_points.append(offset + end_of_match);
offset += end_of_match;
n -= end_of_match;
if ( ! do_all )
{
// We've now done the first substitution - finished.
// Include the remainder of the string in the result.
size += n;
break;
}
}
// size now reflects amount of space copied. Factor in amount
// of space for replacement text.
int num_cut_points = cut_points.length() / 2;
size += num_cut_points * repl->Len();
// And a final NUL for good health.
++size;
byte_vec result = new u_char[size];
byte_vec r = result;
// Copy it all over.
int start_offset = 0;
for ( int i = 0; i < cut_points.length(); i += 2 /* loop over pairs */ )
{
int num_to_copy = cut_points[i] - start_offset;
memcpy(r, s + start_offset, num_to_copy);
r += num_to_copy;
start_offset = cut_points[i+1];
// Now add in replacement text.
memcpy(r, repl->Bytes(), repl->Len());
r += repl->Len();
}
// Copy final trailing characters.
int num_to_copy = Len() - start_offset;
memcpy(r, s + start_offset, num_to_copy);
r += num_to_copy;
// Final NUL. No need to increment r, since the length
// computed from it in the next statement does not include
// the NUL.
r[0] = '\0';
return new StringVal(new BroString(1, result, r - result));
}
Val* StringVal::DoClone(CloneState* state)
{
// We could likely treat this type as immutable and return a reference

View file

@ -639,6 +639,8 @@ public:
unsigned int MemoryAllocation() const override;
Val* Substitute(RE_Matcher* re, StringVal* repl, bool do_all);
protected:
friend class Val;
StringVal() {}

View file

@ -351,91 +351,6 @@ Val* do_split(StringVal* str_val, RE_Matcher* re, int incl_sep, int max_num_sep)
return a;
}
Val* do_sub(StringVal* str_val, RE_Matcher* re, StringVal* repl, int do_all)
{
const u_char* s = str_val->Bytes();
int offset = 0;
int n = str_val->Len();
// cut_points is a set of pairs of indices in str that should
// be removed/replaced. A pair <x,y> means "delete starting
// at offset x, up to but not including offset y".
List(ptr_compat_int) cut_points; // where RE matches pieces of str
int size = 0; // size of result
while ( n > 0 )
{
// Find next match offset.
int end_of_match;
while ( n > 0 &&
(end_of_match = re->MatchPrefix(&s[offset], n)) <= 0 )
{
// This character is going to be copied to the result.
++size;
// Move on to next character.
++offset;
--n;
}
if ( n <= 0 )
break;
// s[offset .. offset+end_of_match-1] matches re.
cut_points.append(offset);
cut_points.append(offset + end_of_match);
offset += end_of_match;
n -= end_of_match;
if ( ! do_all )
{
// We've now done the first substitution - finished.
// Include the remainder of the string in the result.
size += n;
break;
}
}
// size now reflects amount of space copied. Factor in amount
// of space for replacement text.
int num_cut_points = cut_points.length() / 2;
size += num_cut_points * repl->Len();
// And a final NUL for good health.
++size;
byte_vec result = new u_char[size];
byte_vec r = result;
// Copy it all over.
int start_offset = 0;
for ( int i = 0; i < cut_points.length(); i += 2 /* loop over pairs */ )
{
int num_to_copy = cut_points[i] - start_offset;
memcpy(r, s + start_offset, num_to_copy);
r += num_to_copy;
start_offset = cut_points[i+1];
// Now add in replacement text.
memcpy(r, repl->Bytes(), repl->Len());
r += repl->Len();
}
// Copy final trailing characters.
int num_to_copy = str_val->Len() - start_offset;
memcpy(r, s + start_offset, num_to_copy);
r += num_to_copy;
// Final NUL. No need to increment r, since the length
// computed from it in the next statement does not include
// the NUL.
r[0] = '\0';
return new StringVal(new BroString(1, result, r - result));
}
%%}
## Splits a string into an array of strings according to a pattern.
@ -535,7 +450,7 @@ function split_string_n%(str: string, re: pattern,
## .. zeek:see:: gsub subst_string
function sub%(str: string, re: pattern, repl: string%): string
%{
return do_sub(str, re, repl, 0);
return str->Substitute(re, repl, false);
%}
## Substitutes a given replacement string for all occurrences of a pattern
@ -552,7 +467,7 @@ function sub%(str: string, re: pattern, repl: string%): string
## .. zeek:see:: sub subst_string
function gsub%(str: string, re: pattern, repl: string%): string
%{
return do_sub(str, re, repl, 1);
return str->Substitute(re, repl, true);
%}