From 4014cdc277d0e17b9895b52b81ff1ba7925eefb1 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Tue, 3 Dec 2013 14:39:21 -0600 Subject: [PATCH] Fix string slice notation, addresses BIT-1097. Slice ranges were not correctly determined for negative indices and also off by one in general (included one more element at the end of the substring than what actually matched the index range). It's now equivalent to Python slice notation. Accessing a string at a single index is also the same as Python except that an out-of-range index returns an empty string instead of throwing an expection. --- src/Expr.cc | 38 +++++++-- .../Baseline/language.string-indexing/out | 84 +++++++++++++++++-- testing/btest/language/string-indexing.bro | 77 +++++++++++++++++ testing/scripts/file-analysis-test.bro | 2 +- 4 files changed, 187 insertions(+), 14 deletions(-) diff --git a/src/Expr.cc b/src/Expr.cc index 907cfc904c..2b55184945 100644 --- a/src/Expr.cc +++ b/src/Expr.cc @@ -3037,6 +3037,16 @@ Val* IndexExpr::Eval(Frame* f) const return result; } +static int get_slice_index(int idx, int len) + { + if ( abs(idx) > len ) + idx = idx > 0 ? len : 0; // Clamp maximum positive/negative indices. + else if ( idx < 0 ) + idx += len; // Map to a positive index. + + return idx; + } + Val* IndexExpr::Fold(Val* v1, Val* v2) const { if ( IsError() ) @@ -3058,16 +3068,30 @@ Val* IndexExpr::Fold(Val* v1, Val* v2) const const ListVal* lv = v2->AsListVal(); const BroString* s = v1->AsString(); int len = s->Len(); - bro_int_t first = lv->Index(0)->AsInt(); - bro_int_t last = lv->Length() > 1 ? lv->Index(1)->AsInt() : first; + BroString* substring = 0; - if ( first < 0 ) - first += len; + if ( lv->Length() == 1 ) + { + bro_int_t idx = lv->Index(0)->AsInt(); - if ( last < 0 ) - last += len; + if ( idx < 0 ) + idx += len; + + // Out-of-range index will return null pointer. + substring = s->GetSubstring(idx, 1); + } + else + { + bro_int_t first = get_slice_index(lv->Index(0)->AsInt(), len); + bro_int_t last = get_slice_index(lv->Index(1)->AsInt(), len); + int substring_len = last - first; + + if ( substring_len < 0 ) + substring = 0; + else + substring = s->GetSubstring(first, substring_len); + } - BroString* substring = s->GetSubstring(first, last - first + 1); return new StringVal(substring ? substring : new BroString("")); } diff --git a/testing/btest/Baseline/language.string-indexing/out b/testing/btest/Baseline/language.string-indexing/out index 3359187d4c..99464302ed 100644 --- a/testing/btest/Baseline/language.string-indexing/out +++ b/testing/btest/Baseline/language.string-indexing/out @@ -1,13 +1,85 @@ 1 -12 -123456 +1 +12345 0123456789 8 -789 -9 -9 -9 +78 + + +0123456789 2 1 +word[-100] = +word[-7] = +word[-6] = +word[-5] = H +word[-4] = e +word[-3] = l +word[-2] = p +word[-1] = A +word[0] = H +word[1] = e +word[2] = l +word[3] = p +word[4] = A +word[5] = +word[6] = +word[7] = +word[100] = + +A +1234 +123 +12 +1 + + + + + + +B + + + + + +C + +0123 + +01 +012345 + +D + + +4 + + +45 + +E + +01234 + +01 +012345 + +F + +234 + + +23 +2345 + +F + + + + + + diff --git a/testing/btest/language/string-indexing.bro b/testing/btest/language/string-indexing.bro index f991b3c5fa..bff37f6bec 100644 --- a/testing/btest/language/string-indexing.bro +++ b/testing/btest/language/string-indexing.bro @@ -1,7 +1,10 @@ # @TEST-EXEC: bro -b %INPUT >out # @TEST-EXEC: btest-diff out +local word = "HelpA"; local s = "0123456789"; +local indices = vector(-100, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 100); + print s[1]; print s[1:2]; print s[1:6]; @@ -15,3 +18,77 @@ print s[20:23]; print s[-20:23]; print s[0:5][2]; print s[0:5][1:3][0]; + +s = "012345"; + +for ( i in indices ) + print fmt("word[%s] = %s", indices[i], word[indices[i]]); + +print ""; + +print "A"; +print s[1:-1]; +print s[1:-2]; +print s[1:-3]; +print s[1:-4]; +print s[1:-5]; +print s[1:-6]; +print s[1:-7]; +print s[1:-8]; +print s[1:-9]; + +print ""; + +print "B"; +print s[-1:-1]; +print s[-1:-2]; +print s[-1:-3]; +print s[-1:-4]; + +print ""; + +print "C"; +print s[-100:-99]; +print s[-100:-2]; +print s[-100:0]; +print s[-100:2]; +print s[-100:100]; + +print ""; + +print "D";; +print s[-2:-99]; +print s[-2:-3]; +print s[-2:-1]; +print s[-2:0]; +print s[-2:2]; +print s[-2:100]; + +print ""; + +print "E";; +print s[0:-100]; +print s[0:-1]; +print s[0:0]; +print s[0:2]; +print s[0:100]; + +print ""; + +print "F";; +print s[2:-100]; +print s[2:-1]; +print s[2:0]; +print s[2:1]; +print s[2:4]; +print s[2:100]; + +print ""; + +print "F";; +print s[100:-100]; +print s[100:-1]; +print s[100:0]; +print s[100:1]; +print s[100:4]; +print s[100:100]; diff --git a/testing/scripts/file-analysis-test.bro b/testing/scripts/file-analysis-test.bro index d84fadae5c..957218b48f 100644 --- a/testing/scripts/file-analysis-test.bro +++ b/testing/scripts/file-analysis-test.bro @@ -60,7 +60,7 @@ event file_new(f: fa_file) if ( f?$bof_buffer ) { print "FILE_BOF_BUFFER"; - print f$bof_buffer[0:10]; + print f$bof_buffer[0:11]; } if ( f?$mime_type )