From 85c4b0d2859140f1be679601d87e75a60b3559bb Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 29 Jun 2018 13:01:05 -0700 Subject: [PATCH] use PCRE syntax instead of the beautiful new (?i ...) syntax --- NEWS | 5 ++--- doc/script-reference/types.rst | 5 ++--- src/RE.cc | 2 +- src/re-scan.l | 2 +- testing/btest/Baseline/language.pattern/out | 6 +++--- testing/btest/language/pattern.bro | 6 +++--- 6 files changed, 12 insertions(+), 14 deletions(-) diff --git a/NEWS b/NEWS index a31491862f..02e632865c 100644 --- a/NEWS +++ b/NEWS @@ -262,9 +262,8 @@ New Functionality yields F, though it yields T for "xfOObar". You can achieve the same functionality for a subpattern enclosed in - parentheses by adding "+i" to the open parenthesis, optionally followed - by whitespace. So for example "/foo|(+i bar)/" will match "BaR", but - not "FoO". + parentheses by adding "?i:" to the open parenthesis. So for example + "/foo|(?i:bar)/" will match "BaR", but not "FoO". For both ways of specifying case-insensitivity, characters enclosed in double quotes maintain their case-sensitivity. So for example /"foo"/i diff --git a/doc/script-reference/types.rst b/doc/script-reference/types.rst index 36ed0f5bfa..99dac0be48 100644 --- a/doc/script-reference/types.rst +++ b/doc/script-reference/types.rst @@ -256,9 +256,8 @@ Here is a more detailed description of each type: a "foo", "Foo", "BaR", etc. You can also introduce a case-insensitive sub-pattern by enclosing it - in ``(+i````)``. For clarity, you can optionally include - trailing whitespace after the ``+i`` designator. So, for example, - ``/foo|(+i bar)/`` will match "foo" and "BaR", but *not* "Foo". + in ``(?i:````)``. So, for example, ``/foo|(?i:bar)/`` will + match "foo" and "BaR", but *not* "Foo". For both ways of specifying case-insensitivity, characters enclosed in double quotes maintain their case-sensitivity. So for example diff --git a/src/RE.cc b/src/RE.cc index cd37da18e9..9c17f2f992 100644 --- a/src/RE.cc +++ b/src/RE.cc @@ -104,7 +104,7 @@ void Specific_RE_Matcher::AddPat(const char* new_pat, void Specific_RE_Matcher::MakeCaseInsensitive() { - const char fmt[] = "(+i %s)"; + const char fmt[] = "(?i:%s)"; int n = strlen(pattern_text) + strlen(fmt); char* s = new char[n + 5 /* slop */]; diff --git a/src/re-scan.l b/src/re-scan.l index 0c6819bdd7..292f7a2e02 100644 --- a/src/re-scan.l +++ b/src/re-scan.l @@ -114,7 +114,7 @@ CCL_EXPR ("[:"[[:alpha:]]+":]") } } - "(+i"[ \t]* case_insensitive = 1; return TOK_CASE_INSENSITIVE; + "(?i:" case_insensitive = 1; return TOK_CASE_INSENSITIVE; [a-zA-Z] { if ( case_insensitive ) diff --git a/testing/btest/Baseline/language.pattern/out b/testing/btest/Baseline/language.pattern/out index 5c31320da9..dac62ab0fa 100644 --- a/testing/btest/Baseline/language.pattern/out +++ b/testing/btest/Baseline/language.pattern/out @@ -29,6 +29,6 @@ case-sensitive pattern (PASS) /i pattern concatenation (FAIL) /i pattern character class (FAIL) /i pattern character class (PASS) -(+i ...) pattern construct (PASS) -(+i ...) pattern construct (FAIL) -(+i ...) pattern construct (PASS) +(?i:...) pattern construct (PASS) +(?i:...) pattern construct (FAIL) +(?i:...) pattern construct (PASS) diff --git a/testing/btest/language/pattern.bro b/testing/btest/language/pattern.bro index 70eca233ea..e427b70e80 100644 --- a/testing/btest/language/pattern.bro +++ b/testing/btest/language/pattern.bro @@ -61,8 +61,8 @@ event bro_init() test_case( "/i pattern character class", /ba[0a-c99S-Z0]/i & /bEz/ == "bArbEz" ); test_case( "/i pattern character class", /ba[0a-c99M-S0]/i & /bEz/ == "bArbEz" ); - test_case( "(+i ...) pattern construct", /foo|(+i bar)/ in "xBAry" ); - test_case( "(+i ...) pattern construct", /foo|(+i bar)/ in "xFOoy" ); - test_case( "(+i ...) pattern construct", /foo|(+i bar)/ | /foo/i in "xFOoy" ); + test_case( "(?i:...) pattern construct", /foo|(?i:bar)/ in "xBAry" ); + test_case( "(?i:...) pattern construct", /foo|(?i:bar)/ in "xFOoy" ); + test_case( "(?i:...) pattern construct", /foo|(?i:bar)/ | /foo/i in "xFOoy" ); }