Fix string slice notation, addresses BIT-1097.

Slice ranges were not correctly determined for negative indices and also
off by one in general (included one more element at the end of the
substring than what actually matched the index range).

It's now equivalent to Python slice notation.  Accessing a string at
a single index is also the same as Python except that an out-of-range
index returns an empty string instead of throwing an expection.
This commit is contained in:
Jon Siwek 2013-12-03 14:39:21 -06:00
parent dc52846b6f
commit 4014cdc277
4 changed files with 187 additions and 14 deletions

View file

@ -3037,6 +3037,16 @@ Val* IndexExpr::Eval(Frame* f) const
return result; return result;
} }
static int get_slice_index(int idx, int len)
{
if ( abs(idx) > len )
idx = idx > 0 ? len : 0; // Clamp maximum positive/negative indices.
else if ( idx < 0 )
idx += len; // Map to a positive index.
return idx;
}
Val* IndexExpr::Fold(Val* v1, Val* v2) const Val* IndexExpr::Fold(Val* v1, Val* v2) const
{ {
if ( IsError() ) if ( IsError() )
@ -3058,16 +3068,30 @@ Val* IndexExpr::Fold(Val* v1, Val* v2) const
const ListVal* lv = v2->AsListVal(); const ListVal* lv = v2->AsListVal();
const BroString* s = v1->AsString(); const BroString* s = v1->AsString();
int len = s->Len(); int len = s->Len();
bro_int_t first = lv->Index(0)->AsInt(); BroString* substring = 0;
bro_int_t last = lv->Length() > 1 ? lv->Index(1)->AsInt() : first;
if ( first < 0 ) if ( lv->Length() == 1 )
first += len; {
bro_int_t idx = lv->Index(0)->AsInt();
if ( last < 0 ) if ( idx < 0 )
last += len; idx += len;
// Out-of-range index will return null pointer.
substring = s->GetSubstring(idx, 1);
}
else
{
bro_int_t first = get_slice_index(lv->Index(0)->AsInt(), len);
bro_int_t last = get_slice_index(lv->Index(1)->AsInt(), len);
int substring_len = last - first;
if ( substring_len < 0 )
substring = 0;
else
substring = s->GetSubstring(first, substring_len);
}
BroString* substring = s->GetSubstring(first, last - first + 1);
return new StringVal(substring ? substring : new BroString("")); return new StringVal(substring ? substring : new BroString(""));
} }

View file

@ -1,13 +1,85 @@
1 1
12 1
123456 12345
0123456789 0123456789
8 8
789 78
9
9
9
0123456789
2 2
1 1
word[-100] =
word[-7] =
word[-6] =
word[-5] = H
word[-4] = e
word[-3] = l
word[-2] = p
word[-1] = A
word[0] = H
word[1] = e
word[2] = l
word[3] = p
word[4] = A
word[5] =
word[6] =
word[7] =
word[100] =
A
1234
123
12
1
B
C
0123
01
012345
D
4
45
E
01234
01
012345
F
234
23
2345
F

View file

@ -1,7 +1,10 @@
# @TEST-EXEC: bro -b %INPUT >out # @TEST-EXEC: bro -b %INPUT >out
# @TEST-EXEC: btest-diff out # @TEST-EXEC: btest-diff out
local word = "HelpA";
local s = "0123456789"; local s = "0123456789";
local indices = vector(-100, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 100);
print s[1]; print s[1];
print s[1:2]; print s[1:2];
print s[1:6]; print s[1:6];
@ -15,3 +18,77 @@ print s[20:23];
print s[-20:23]; print s[-20:23];
print s[0:5][2]; print s[0:5][2];
print s[0:5][1:3][0]; print s[0:5][1:3][0];
s = "012345";
for ( i in indices )
print fmt("word[%s] = %s", indices[i], word[indices[i]]);
print "";
print "A";
print s[1:-1];
print s[1:-2];
print s[1:-3];
print s[1:-4];
print s[1:-5];
print s[1:-6];
print s[1:-7];
print s[1:-8];
print s[1:-9];
print "";
print "B";
print s[-1:-1];
print s[-1:-2];
print s[-1:-3];
print s[-1:-4];
print "";
print "C";
print s[-100:-99];
print s[-100:-2];
print s[-100:0];
print s[-100:2];
print s[-100:100];
print "";
print "D";;
print s[-2:-99];
print s[-2:-3];
print s[-2:-1];
print s[-2:0];
print s[-2:2];
print s[-2:100];
print "";
print "E";;
print s[0:-100];
print s[0:-1];
print s[0:0];
print s[0:2];
print s[0:100];
print "";
print "F";;
print s[2:-100];
print s[2:-1];
print s[2:0];
print s[2:1];
print s[2:4];
print s[2:100];
print "";
print "F";;
print s[100:-100];
print s[100:-1];
print s[100:0];
print s[100:1];
print s[100:4];
print s[100:100];

View file

@ -60,7 +60,7 @@ event file_new(f: fa_file)
if ( f?$bof_buffer ) if ( f?$bof_buffer )
{ {
print "FILE_BOF_BUFFER"; print "FILE_BOF_BUFFER";
print f$bof_buffer[0:10]; print f$bof_buffer[0:11];
} }
if ( f?$mime_type ) if ( f?$mime_type )