Fixed the problem with do_split function which caused it to bail 1 separator early.

This commit is contained in:
Seth Hall 2010-12-09 15:23:54 -05:00
parent 0ebcf2dff4
commit a598bdb555

View file

@ -198,7 +198,6 @@ static int match_prefix(int s_len, const char* s, int t_len, const char* t)
Val* do_split(StringVal* str_val, RE_Matcher* re, TableVal* other_sep, Val* do_split(StringVal* str_val, RE_Matcher* re, TableVal* other_sep,
int incl_sep, int max_num_sep) int incl_sep, int max_num_sep)
{ {
const BroString* str = str_val->AsString();
TableVal* a = new TableVal(internal_type("string_array")->AsTableType()); TableVal* a = new TableVal(internal_type("string_array")->AsTableType());
ListVal* other_strings = 0; ListVal* other_strings = 0;
@ -209,66 +208,56 @@ Val* do_split(StringVal* str_val, RE_Matcher* re, TableVal* other_sep,
// the future we expect to change this by giving RE_Matcher a // the future we expect to change this by giving RE_Matcher a
// const char* segment. // const char* segment.
const char* s = str->CheckString(); const u_char* s = str_val->Bytes();
int len = strlen(s); int n = str_val->Len();
const char* end_of_s = s + len; const u_char* end_of_s = s + n;
int num = 0; int num = 0;
int num_sep = 0; int num_sep = 0;
int offset = 0;
while ( 1 ) while ( n > 0 )
{ {
int offset = 0; offset = 0;
const char* t; // Find next match offset.
int end_of_match;
if ( max_num_sep > 0 && num_sep >= max_num_sep ) while ( n > 0 &&
t = end_of_s; (end_of_match = re->MatchPrefix(&s[offset], n)) <= 0 )
else
{ {
for ( t = s; t < end_of_s; ++t ) printf("character %d\n", offset);
{ // Move on to next character.
offset = re->MatchPrefix(t); ++offset;
--n;
if ( other_strings )
{
val_list* vl = other_strings->Vals();
loop_over_list(*vl, i)
{
const BroString* sub =
(*vl)[i]->AsString();
if ( sub->Len() > offset &&
match_prefix(end_of_s - t,
t, sub->Len(),
(const char*) (sub->Bytes())) )
{
offset = sub->Len();
}
}
}
if ( offset > 0 )
break;
}
} }
Val* ind = new Val(++num, TYPE_COUNT); Val* ind = new Val(++num, TYPE_COUNT);
a->Assign(ind, new StringVal(t - s, s)); a->Assign(ind, new StringVal(offset, (const char*) s));
Unref(ind); Unref(ind);
if ( t >= end_of_s ) // No more separators will be needed if this is the end of string.
if ( n <= 0 )
break; break;
++num_sep;
if ( incl_sep ) if ( incl_sep )
{ // including the part that matches the pattern { // including the part that matches the pattern
ind = new Val(++num, TYPE_COUNT); ind = new Val(++num, TYPE_COUNT);
a->Assign(ind, new StringVal(offset, t)); a->Assign(ind, new StringVal(end_of_match, (const char*) s+offset));
Unref(ind); Unref(ind);
} }
s = t + offset; if ( max_num_sep && num_sep >= max_num_sep )
break;
++num_sep;
offset += end_of_match;
n -= end_of_match;
s += offset;
if ( s > end_of_s ) if ( s > end_of_s )
{
internal_error("RegMatch in split goes beyond the string"); internal_error("RegMatch in split goes beyond the string");
}
} }
if ( other_strings ) if ( other_strings )