Fixed the problem with do_split function which caused it to bail 1 separator early.

This commit is contained in:
Seth Hall 2010-12-09 15:23:54 -05:00
parent 0ebcf2dff4
commit a598bdb555

View file

@ -198,7 +198,6 @@ static int match_prefix(int s_len, const char* s, int t_len, const char* t)
Val* do_split(StringVal* str_val, RE_Matcher* re, TableVal* other_sep,
int incl_sep, int max_num_sep)
{
const BroString* str = str_val->AsString();
TableVal* a = new TableVal(internal_type("string_array")->AsTableType());
ListVal* other_strings = 0;
@ -209,66 +208,56 @@ Val* do_split(StringVal* str_val, RE_Matcher* re, TableVal* other_sep,
// the future we expect to change this by giving RE_Matcher a
// const char* segment.
const char* s = str->CheckString();
int len = strlen(s);
const char* end_of_s = s + len;
const u_char* s = str_val->Bytes();
int n = str_val->Len();
const u_char* end_of_s = s + n;
int num = 0;
int num_sep = 0;
int offset = 0;
while ( 1 )
while ( n > 0 )
{
int offset = 0;
const char* t;
if ( max_num_sep > 0 && num_sep >= max_num_sep )
t = end_of_s;
else
offset = 0;
// Find next match offset.
int end_of_match;
while ( n > 0 &&
(end_of_match = re->MatchPrefix(&s[offset], n)) <= 0 )
{
for ( t = s; t < end_of_s; ++t )
{
offset = re->MatchPrefix(t);
if ( other_strings )
{
val_list* vl = other_strings->Vals();
loop_over_list(*vl, i)
{
const BroString* sub =
(*vl)[i]->AsString();
if ( sub->Len() > offset &&
match_prefix(end_of_s - t,
t, sub->Len(),
(const char*) (sub->Bytes())) )
{
offset = sub->Len();
}
}
}
if ( offset > 0 )
break;
}
printf("character %d\n", offset);
// Move on to next character.
++offset;
--n;
}
Val* ind = new Val(++num, TYPE_COUNT);
a->Assign(ind, new StringVal(t - s, s));
a->Assign(ind, new StringVal(offset, (const char*) s));
Unref(ind);
if ( t >= end_of_s )
// No more separators will be needed if this is the end of string.
if ( n <= 0 )
break;
++num_sep;
if ( incl_sep )
{ // including the part that matches the pattern
ind = new Val(++num, TYPE_COUNT);
a->Assign(ind, new StringVal(offset, t));
a->Assign(ind, new StringVal(end_of_match, (const char*) s+offset));
Unref(ind);
}
s = t + offset;
if ( max_num_sep && num_sep >= max_num_sep )
break;
++num_sep;
offset += end_of_match;
n -= end_of_match;
s += offset;
if ( s > end_of_s )
{
internal_error("RegMatch in split goes beyond the string");
}
}
if ( other_strings )