Merge remote-tracking branch 'origin/topic/jsiwek/string-slicing-fix'

* origin/topic/jsiwek/string-slicing-fix:
  Fix string slice notation, addresses BIT-1097.
This commit is contained in:
Robin Sommer 2013-12-04 11:22:31 -08:00
commit 18a23fbdb8
6 changed files with 200 additions and 15 deletions

12
CHANGES
View file

@ -1,4 +1,16 @@
2.2-37 | 2013-12-04 11:22:31 -0800
* Fix string slice notation. Addresses BIT-1097. (Jon Siwek)
Slice ranges were not correctly determined for negative indices
and also off by one in general (included one more element at the
end of the substring than what actually matched the index range).
It's now equivalent to Python slice notation. Accessing a string
at a single index is also the same as Python except that an
out-of-range index returns an empty string instead of throwing an
expection.
2.2-35 | 2013-12-04 10:10:32 -0800
* Rework the automated script-reference documentation generation

View file

@ -1 +1 @@
2.2-35
2.2-37

View file

@ -3037,6 +3037,16 @@ Val* IndexExpr::Eval(Frame* f) const
return result;
}
static int get_slice_index(int idx, int len)
{
if ( abs(idx) > len )
idx = idx > 0 ? len : 0; // Clamp maximum positive/negative indices.
else if ( idx < 0 )
idx += len; // Map to a positive index.
return idx;
}
Val* IndexExpr::Fold(Val* v1, Val* v2) const
{
if ( IsError() )
@ -3058,16 +3068,30 @@ Val* IndexExpr::Fold(Val* v1, Val* v2) const
const ListVal* lv = v2->AsListVal();
const BroString* s = v1->AsString();
int len = s->Len();
bro_int_t first = lv->Index(0)->AsInt();
bro_int_t last = lv->Length() > 1 ? lv->Index(1)->AsInt() : first;
BroString* substring = 0;
if ( first < 0 )
first += len;
if ( lv->Length() == 1 )
{
bro_int_t idx = lv->Index(0)->AsInt();
if ( last < 0 )
last += len;
if ( idx < 0 )
idx += len;
// Out-of-range index will return null pointer.
substring = s->GetSubstring(idx, 1);
}
else
{
bro_int_t first = get_slice_index(lv->Index(0)->AsInt(), len);
bro_int_t last = get_slice_index(lv->Index(1)->AsInt(), len);
int substring_len = last - first;
if ( substring_len < 0 )
substring = 0;
else
substring = s->GetSubstring(first, substring_len);
}
BroString* substring = s->GetSubstring(first, last - first + 1);
return new StringVal(substring ? substring : new BroString(""));
}

View file

@ -1,13 +1,85 @@
1
12
123456
1
12345
0123456789
8
789
9
9
9
78
0123456789
2
1
word[-100] =
word[-7] =
word[-6] =
word[-5] = H
word[-4] = e
word[-3] = l
word[-2] = p
word[-1] = A
word[0] = H
word[1] = e
word[2] = l
word[3] = p
word[4] = A
word[5] =
word[6] =
word[7] =
word[100] =
A
1234
123
12
1
B
C
0123
01
012345
D
4
45
E
01234
01
012345
F
234
23
2345
F

View file

@ -1,7 +1,10 @@
# @TEST-EXEC: bro -b %INPUT >out
# @TEST-EXEC: btest-diff out
local word = "HelpA";
local s = "0123456789";
local indices = vector(-100, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 100);
print s[1];
print s[1:2];
print s[1:6];
@ -15,3 +18,77 @@ print s[20:23];
print s[-20:23];
print s[0:5][2];
print s[0:5][1:3][0];
s = "012345";
for ( i in indices )
print fmt("word[%s] = %s", indices[i], word[indices[i]]);
print "";
print "A";
print s[1:-1];
print s[1:-2];
print s[1:-3];
print s[1:-4];
print s[1:-5];
print s[1:-6];
print s[1:-7];
print s[1:-8];
print s[1:-9];
print "";
print "B";
print s[-1:-1];
print s[-1:-2];
print s[-1:-3];
print s[-1:-4];
print "";
print "C";
print s[-100:-99];
print s[-100:-2];
print s[-100:0];
print s[-100:2];
print s[-100:100];
print "";
print "D";;
print s[-2:-99];
print s[-2:-3];
print s[-2:-1];
print s[-2:0];
print s[-2:2];
print s[-2:100];
print "";
print "E";;
print s[0:-100];
print s[0:-1];
print s[0:0];
print s[0:2];
print s[0:100];
print "";
print "F";;
print s[2:-100];
print s[2:-1];
print s[2:0];
print s[2:1];
print s[2:4];
print s[2:100];
print "";
print "F";;
print s[100:-100];
print s[100:-1];
print s[100:0];
print s[100:1];
print s[100:4];
print s[100:100];

View file

@ -60,7 +60,7 @@ event file_new(f: fa_file)
if ( f?$bof_buffer )
{
print "FILE_BOF_BUFFER";
print f$bof_buffer[0:10];
print f$bof_buffer[0:11];
}
if ( f?$mime_type )