mirror of
https://github.com/zeek/zeek.git
synced 2025-10-07 17:18:20 +00:00
Move do_sub method from zeek.bif to StringVal class method
This commit is contained in:
parent
528bad72de
commit
2cb87c3309
3 changed files with 90 additions and 87 deletions
86
src/Val.cc
86
src/Val.cc
|
@ -807,6 +807,92 @@ unsigned int StringVal::MemoryAllocation() const
|
|||
return padded_sizeof(*this) + val.string_val->MemoryAllocation();
|
||||
}
|
||||
|
||||
Val* StringVal::Substitute(RE_Matcher* re, StringVal* repl, bool do_all)
|
||||
{
|
||||
const u_char* s = Bytes();
|
||||
int offset = 0;
|
||||
int n = Len();
|
||||
|
||||
// cut_points is a set of pairs of indices in str that should
|
||||
// be removed/replaced. A pair <x,y> means "delete starting
|
||||
// at offset x, up to but not including offset y".
|
||||
List(ptr_compat_int) cut_points; // where RE matches pieces of str
|
||||
|
||||
int size = 0; // size of result
|
||||
|
||||
while ( n > 0 )
|
||||
{
|
||||
// Find next match offset.
|
||||
int end_of_match;
|
||||
while ( n > 0 &&
|
||||
(end_of_match = re->MatchPrefix(&s[offset], n)) <= 0 )
|
||||
{
|
||||
// This character is going to be copied to the result.
|
||||
++size;
|
||||
|
||||
// Move on to next character.
|
||||
++offset;
|
||||
--n;
|
||||
}
|
||||
|
||||
if ( n <= 0 )
|
||||
break;
|
||||
|
||||
// s[offset .. offset+end_of_match-1] matches re.
|
||||
cut_points.append(offset);
|
||||
cut_points.append(offset + end_of_match);
|
||||
|
||||
offset += end_of_match;
|
||||
n -= end_of_match;
|
||||
|
||||
if ( ! do_all )
|
||||
{
|
||||
// We've now done the first substitution - finished.
|
||||
// Include the remainder of the string in the result.
|
||||
size += n;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// size now reflects amount of space copied. Factor in amount
|
||||
// of space for replacement text.
|
||||
int num_cut_points = cut_points.length() / 2;
|
||||
size += num_cut_points * repl->Len();
|
||||
|
||||
// And a final NUL for good health.
|
||||
++size;
|
||||
|
||||
byte_vec result = new u_char[size];
|
||||
byte_vec r = result;
|
||||
|
||||
// Copy it all over.
|
||||
int start_offset = 0;
|
||||
for ( int i = 0; i < cut_points.length(); i += 2 /* loop over pairs */ )
|
||||
{
|
||||
int num_to_copy = cut_points[i] - start_offset;
|
||||
memcpy(r, s + start_offset, num_to_copy);
|
||||
|
||||
r += num_to_copy;
|
||||
start_offset = cut_points[i+1];
|
||||
|
||||
// Now add in replacement text.
|
||||
memcpy(r, repl->Bytes(), repl->Len());
|
||||
r += repl->Len();
|
||||
}
|
||||
|
||||
// Copy final trailing characters.
|
||||
int num_to_copy = Len() - start_offset;
|
||||
memcpy(r, s + start_offset, num_to_copy);
|
||||
r += num_to_copy;
|
||||
|
||||
// Final NUL. No need to increment r, since the length
|
||||
// computed from it in the next statement does not include
|
||||
// the NUL.
|
||||
r[0] = '\0';
|
||||
|
||||
return new StringVal(new BroString(1, result, r - result));
|
||||
}
|
||||
|
||||
Val* StringVal::DoClone(CloneState* state)
|
||||
{
|
||||
// We could likely treat this type as immutable and return a reference
|
||||
|
|
|
@ -639,6 +639,8 @@ public:
|
|||
|
||||
unsigned int MemoryAllocation() const override;
|
||||
|
||||
Val* Substitute(RE_Matcher* re, StringVal* repl, bool do_all);
|
||||
|
||||
protected:
|
||||
friend class Val;
|
||||
StringVal() {}
|
||||
|
|
|
@ -351,91 +351,6 @@ Val* do_split(StringVal* str_val, RE_Matcher* re, int incl_sep, int max_num_sep)
|
|||
return a;
|
||||
}
|
||||
|
||||
Val* do_sub(StringVal* str_val, RE_Matcher* re, StringVal* repl, int do_all)
|
||||
{
|
||||
const u_char* s = str_val->Bytes();
|
||||
int offset = 0;
|
||||
int n = str_val->Len();
|
||||
|
||||
// cut_points is a set of pairs of indices in str that should
|
||||
// be removed/replaced. A pair <x,y> means "delete starting
|
||||
// at offset x, up to but not including offset y".
|
||||
List(ptr_compat_int) cut_points; // where RE matches pieces of str
|
||||
|
||||
int size = 0; // size of result
|
||||
|
||||
while ( n > 0 )
|
||||
{
|
||||
// Find next match offset.
|
||||
int end_of_match;
|
||||
while ( n > 0 &&
|
||||
(end_of_match = re->MatchPrefix(&s[offset], n)) <= 0 )
|
||||
{
|
||||
// This character is going to be copied to the result.
|
||||
++size;
|
||||
|
||||
// Move on to next character.
|
||||
++offset;
|
||||
--n;
|
||||
}
|
||||
|
||||
if ( n <= 0 )
|
||||
break;
|
||||
|
||||
// s[offset .. offset+end_of_match-1] matches re.
|
||||
cut_points.append(offset);
|
||||
cut_points.append(offset + end_of_match);
|
||||
|
||||
offset += end_of_match;
|
||||
n -= end_of_match;
|
||||
|
||||
if ( ! do_all )
|
||||
{
|
||||
// We've now done the first substitution - finished.
|
||||
// Include the remainder of the string in the result.
|
||||
size += n;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// size now reflects amount of space copied. Factor in amount
|
||||
// of space for replacement text.
|
||||
int num_cut_points = cut_points.length() / 2;
|
||||
size += num_cut_points * repl->Len();
|
||||
|
||||
// And a final NUL for good health.
|
||||
++size;
|
||||
|
||||
byte_vec result = new u_char[size];
|
||||
byte_vec r = result;
|
||||
|
||||
// Copy it all over.
|
||||
int start_offset = 0;
|
||||
for ( int i = 0; i < cut_points.length(); i += 2 /* loop over pairs */ )
|
||||
{
|
||||
int num_to_copy = cut_points[i] - start_offset;
|
||||
memcpy(r, s + start_offset, num_to_copy);
|
||||
|
||||
r += num_to_copy;
|
||||
start_offset = cut_points[i+1];
|
||||
|
||||
// Now add in replacement text.
|
||||
memcpy(r, repl->Bytes(), repl->Len());
|
||||
r += repl->Len();
|
||||
}
|
||||
|
||||
// Copy final trailing characters.
|
||||
int num_to_copy = str_val->Len() - start_offset;
|
||||
memcpy(r, s + start_offset, num_to_copy);
|
||||
r += num_to_copy;
|
||||
|
||||
// Final NUL. No need to increment r, since the length
|
||||
// computed from it in the next statement does not include
|
||||
// the NUL.
|
||||
r[0] = '\0';
|
||||
|
||||
return new StringVal(new BroString(1, result, r - result));
|
||||
}
|
||||
%%}
|
||||
|
||||
## Splits a string into an array of strings according to a pattern.
|
||||
|
@ -535,7 +450,7 @@ function split_string_n%(str: string, re: pattern,
|
|||
## .. zeek:see:: gsub subst_string
|
||||
function sub%(str: string, re: pattern, repl: string%): string
|
||||
%{
|
||||
return do_sub(str, re, repl, 0);
|
||||
return str->Substitute(re, repl, false);
|
||||
%}
|
||||
|
||||
## Substitutes a given replacement string for all occurrences of a pattern
|
||||
|
@ -552,7 +467,7 @@ function sub%(str: string, re: pattern, repl: string%): string
|
|||
## .. zeek:see:: sub subst_string
|
||||
function gsub%(str: string, re: pattern, repl: string%): string
|
||||
%{
|
||||
return do_sub(str, re, repl, 1);
|
||||
return str->Substitute(re, repl, true);
|
||||
%}
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue