mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 06:38:20 +00:00
Fixed the problem with do_split function which caused it to bail 1 separator early.
This commit is contained in:
parent
0ebcf2dff4
commit
a598bdb555
1 changed files with 32 additions and 43 deletions
|
@ -198,7 +198,6 @@ static int match_prefix(int s_len, const char* s, int t_len, const char* t)
|
||||||
Val* do_split(StringVal* str_val, RE_Matcher* re, TableVal* other_sep,
|
Val* do_split(StringVal* str_val, RE_Matcher* re, TableVal* other_sep,
|
||||||
int incl_sep, int max_num_sep)
|
int incl_sep, int max_num_sep)
|
||||||
{
|
{
|
||||||
const BroString* str = str_val->AsString();
|
|
||||||
TableVal* a = new TableVal(internal_type("string_array")->AsTableType());
|
TableVal* a = new TableVal(internal_type("string_array")->AsTableType());
|
||||||
ListVal* other_strings = 0;
|
ListVal* other_strings = 0;
|
||||||
|
|
||||||
|
@ -209,66 +208,56 @@ Val* do_split(StringVal* str_val, RE_Matcher* re, TableVal* other_sep,
|
||||||
// the future we expect to change this by giving RE_Matcher a
|
// the future we expect to change this by giving RE_Matcher a
|
||||||
// const char* segment.
|
// const char* segment.
|
||||||
|
|
||||||
const char* s = str->CheckString();
|
const u_char* s = str_val->Bytes();
|
||||||
int len = strlen(s);
|
int n = str_val->Len();
|
||||||
const char* end_of_s = s + len;
|
const u_char* end_of_s = s + n;
|
||||||
int num = 0;
|
int num = 0;
|
||||||
int num_sep = 0;
|
int num_sep = 0;
|
||||||
|
|
||||||
|
int offset = 0;
|
||||||
|
|
||||||
while ( 1 )
|
while ( n > 0 )
|
||||||
{
|
{
|
||||||
int offset = 0;
|
offset = 0;
|
||||||
const char* t;
|
// Find next match offset.
|
||||||
|
int end_of_match;
|
||||||
if ( max_num_sep > 0 && num_sep >= max_num_sep )
|
while ( n > 0 &&
|
||||||
t = end_of_s;
|
(end_of_match = re->MatchPrefix(&s[offset], n)) <= 0 )
|
||||||
else
|
|
||||||
{
|
{
|
||||||
for ( t = s; t < end_of_s; ++t )
|
printf("character %d\n", offset);
|
||||||
{
|
// Move on to next character.
|
||||||
offset = re->MatchPrefix(t);
|
++offset;
|
||||||
|
--n;
|
||||||
if ( other_strings )
|
|
||||||
{
|
|
||||||
val_list* vl = other_strings->Vals();
|
|
||||||
loop_over_list(*vl, i)
|
|
||||||
{
|
|
||||||
const BroString* sub =
|
|
||||||
(*vl)[i]->AsString();
|
|
||||||
if ( sub->Len() > offset &&
|
|
||||||
match_prefix(end_of_s - t,
|
|
||||||
t, sub->Len(),
|
|
||||||
(const char*) (sub->Bytes())) )
|
|
||||||
{
|
|
||||||
offset = sub->Len();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( offset > 0 )
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Val* ind = new Val(++num, TYPE_COUNT);
|
Val* ind = new Val(++num, TYPE_COUNT);
|
||||||
a->Assign(ind, new StringVal(t - s, s));
|
a->Assign(ind, new StringVal(offset, (const char*) s));
|
||||||
Unref(ind);
|
Unref(ind);
|
||||||
|
|
||||||
if ( t >= end_of_s )
|
// No more separators will be needed if this is the end of string.
|
||||||
|
if ( n <= 0 )
|
||||||
break;
|
break;
|
||||||
|
|
||||||
++num_sep;
|
|
||||||
|
|
||||||
if ( incl_sep )
|
if ( incl_sep )
|
||||||
{ // including the part that matches the pattern
|
{ // including the part that matches the pattern
|
||||||
ind = new Val(++num, TYPE_COUNT);
|
ind = new Val(++num, TYPE_COUNT);
|
||||||
a->Assign(ind, new StringVal(offset, t));
|
a->Assign(ind, new StringVal(end_of_match, (const char*) s+offset));
|
||||||
Unref(ind);
|
Unref(ind);
|
||||||
}
|
}
|
||||||
|
|
||||||
s = t + offset;
|
if ( max_num_sep && num_sep >= max_num_sep )
|
||||||
|
break;
|
||||||
|
|
||||||
|
++num_sep;
|
||||||
|
|
||||||
|
offset += end_of_match;
|
||||||
|
n -= end_of_match;
|
||||||
|
s += offset;
|
||||||
|
|
||||||
if ( s > end_of_s )
|
if ( s > end_of_s )
|
||||||
|
{
|
||||||
internal_error("RegMatch in split goes beyond the string");
|
internal_error("RegMatch in split goes beyond the string");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( other_strings )
|
if ( other_strings )
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue