mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 06:38:20 +00:00
Fix string slice notation, addresses BIT-1097.
Slice ranges were not correctly determined for negative indices and also off by one in general (included one more element at the end of the substring than what actually matched the index range). It's now equivalent to Python slice notation. Accessing a string at a single index is also the same as Python except that an out-of-range index returns an empty string instead of throwing an expection.
This commit is contained in:
parent
dc52846b6f
commit
4014cdc277
4 changed files with 187 additions and 14 deletions
38
src/Expr.cc
38
src/Expr.cc
|
@ -3037,6 +3037,16 @@ Val* IndexExpr::Eval(Frame* f) const
|
|||
return result;
|
||||
}
|
||||
|
||||
static int get_slice_index(int idx, int len)
|
||||
{
|
||||
if ( abs(idx) > len )
|
||||
idx = idx > 0 ? len : 0; // Clamp maximum positive/negative indices.
|
||||
else if ( idx < 0 )
|
||||
idx += len; // Map to a positive index.
|
||||
|
||||
return idx;
|
||||
}
|
||||
|
||||
Val* IndexExpr::Fold(Val* v1, Val* v2) const
|
||||
{
|
||||
if ( IsError() )
|
||||
|
@ -3058,16 +3068,30 @@ Val* IndexExpr::Fold(Val* v1, Val* v2) const
|
|||
const ListVal* lv = v2->AsListVal();
|
||||
const BroString* s = v1->AsString();
|
||||
int len = s->Len();
|
||||
bro_int_t first = lv->Index(0)->AsInt();
|
||||
bro_int_t last = lv->Length() > 1 ? lv->Index(1)->AsInt() : first;
|
||||
BroString* substring = 0;
|
||||
|
||||
if ( first < 0 )
|
||||
first += len;
|
||||
if ( lv->Length() == 1 )
|
||||
{
|
||||
bro_int_t idx = lv->Index(0)->AsInt();
|
||||
|
||||
if ( last < 0 )
|
||||
last += len;
|
||||
if ( idx < 0 )
|
||||
idx += len;
|
||||
|
||||
// Out-of-range index will return null pointer.
|
||||
substring = s->GetSubstring(idx, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
bro_int_t first = get_slice_index(lv->Index(0)->AsInt(), len);
|
||||
bro_int_t last = get_slice_index(lv->Index(1)->AsInt(), len);
|
||||
int substring_len = last - first;
|
||||
|
||||
if ( substring_len < 0 )
|
||||
substring = 0;
|
||||
else
|
||||
substring = s->GetSubstring(first, substring_len);
|
||||
}
|
||||
|
||||
BroString* substring = s->GetSubstring(first, last - first + 1);
|
||||
return new StringVal(substring ? substring : new BroString(""));
|
||||
}
|
||||
|
||||
|
|
|
@ -1,13 +1,85 @@
|
|||
1
|
||||
12
|
||||
123456
|
||||
1
|
||||
12345
|
||||
0123456789
|
||||
8
|
||||
789
|
||||
9
|
||||
9
|
||||
9
|
||||
78
|
||||
|
||||
|
||||
|
||||
|
||||
0123456789
|
||||
2
|
||||
1
|
||||
word[-100] =
|
||||
word[-7] =
|
||||
word[-6] =
|
||||
word[-5] = H
|
||||
word[-4] = e
|
||||
word[-3] = l
|
||||
word[-2] = p
|
||||
word[-1] = A
|
||||
word[0] = H
|
||||
word[1] = e
|
||||
word[2] = l
|
||||
word[3] = p
|
||||
word[4] = A
|
||||
word[5] =
|
||||
word[6] =
|
||||
word[7] =
|
||||
word[100] =
|
||||
|
||||
A
|
||||
1234
|
||||
123
|
||||
12
|
||||
1
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
B
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
C
|
||||
|
||||
0123
|
||||
|
||||
01
|
||||
012345
|
||||
|
||||
D
|
||||
|
||||
|
||||
4
|
||||
|
||||
|
||||
45
|
||||
|
||||
E
|
||||
|
||||
01234
|
||||
|
||||
01
|
||||
012345
|
||||
|
||||
F
|
||||
|
||||
234
|
||||
|
||||
|
||||
23
|
||||
2345
|
||||
|
||||
F
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -1,7 +1,10 @@
|
|||
# @TEST-EXEC: bro -b %INPUT >out
|
||||
# @TEST-EXEC: btest-diff out
|
||||
|
||||
local word = "HelpA";
|
||||
local s = "0123456789";
|
||||
local indices = vector(-100, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 100);
|
||||
|
||||
print s[1];
|
||||
print s[1:2];
|
||||
print s[1:6];
|
||||
|
@ -15,3 +18,77 @@ print s[20:23];
|
|||
print s[-20:23];
|
||||
print s[0:5][2];
|
||||
print s[0:5][1:3][0];
|
||||
|
||||
s = "012345";
|
||||
|
||||
for ( i in indices )
|
||||
print fmt("word[%s] = %s", indices[i], word[indices[i]]);
|
||||
|
||||
print "";
|
||||
|
||||
print "A";
|
||||
print s[1:-1];
|
||||
print s[1:-2];
|
||||
print s[1:-3];
|
||||
print s[1:-4];
|
||||
print s[1:-5];
|
||||
print s[1:-6];
|
||||
print s[1:-7];
|
||||
print s[1:-8];
|
||||
print s[1:-9];
|
||||
|
||||
print "";
|
||||
|
||||
print "B";
|
||||
print s[-1:-1];
|
||||
print s[-1:-2];
|
||||
print s[-1:-3];
|
||||
print s[-1:-4];
|
||||
|
||||
print "";
|
||||
|
||||
print "C";
|
||||
print s[-100:-99];
|
||||
print s[-100:-2];
|
||||
print s[-100:0];
|
||||
print s[-100:2];
|
||||
print s[-100:100];
|
||||
|
||||
print "";
|
||||
|
||||
print "D";;
|
||||
print s[-2:-99];
|
||||
print s[-2:-3];
|
||||
print s[-2:-1];
|
||||
print s[-2:0];
|
||||
print s[-2:2];
|
||||
print s[-2:100];
|
||||
|
||||
print "";
|
||||
|
||||
print "E";;
|
||||
print s[0:-100];
|
||||
print s[0:-1];
|
||||
print s[0:0];
|
||||
print s[0:2];
|
||||
print s[0:100];
|
||||
|
||||
print "";
|
||||
|
||||
print "F";;
|
||||
print s[2:-100];
|
||||
print s[2:-1];
|
||||
print s[2:0];
|
||||
print s[2:1];
|
||||
print s[2:4];
|
||||
print s[2:100];
|
||||
|
||||
print "";
|
||||
|
||||
print "F";;
|
||||
print s[100:-100];
|
||||
print s[100:-1];
|
||||
print s[100:0];
|
||||
print s[100:1];
|
||||
print s[100:4];
|
||||
print s[100:100];
|
||||
|
|
|
@ -60,7 +60,7 @@ event file_new(f: fa_file)
|
|||
if ( f?$bof_buffer )
|
||||
{
|
||||
print "FILE_BOF_BUFFER";
|
||||
print f$bof_buffer[0:10];
|
||||
print f$bof_buffer[0:11];
|
||||
}
|
||||
|
||||
if ( f?$mime_type )
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue