From 336990e234e2903d9e5a596fc1b53f000181cef8 Mon Sep 17 00:00:00 2001 From: Bernhard Amann Date: Mon, 23 Jul 2012 11:27:08 -0700 Subject: [PATCH 1/5] make reading ascii logfiles work when the input separator is different from \t. (Wrong escape character was used for reading header fields). --- src/input/readers/Ascii.cc | 4 ++-- testing/btest/scripts/base/frameworks/input/event.bro | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/input/readers/Ascii.cc b/src/input/readers/Ascii.cc index 73821d7cb6..297f8a7136 100644 --- a/src/input/readers/Ascii.cc +++ b/src/input/readers/Ascii.cc @@ -144,7 +144,7 @@ bool Ascii::ReadHeader(bool useCached) pos++; } - //printf("Updating fields from description %s\n", line.c_str()); + // printf("Updating fields from description %s\n", line.c_str()); columnMap.clear(); for ( int i = 0; i < NumFields(); i++ ) @@ -199,7 +199,7 @@ bool Ascii::GetLine(string& str) if ( str[0] != '#' ) return true; - if ( str.compare(0,8, "#fields\t") == 0 ) + if ( ( str.compare(0,7, "#fields") == 0 ) && ( str[7] == separator[0] ) ) { str = str.substr(8); return true; diff --git a/testing/btest/scripts/base/frameworks/input/event.bro b/testing/btest/scripts/base/frameworks/input/event.bro index d275cee59c..f07ca0c43e 100644 --- a/testing/btest/scripts/base/frameworks/input/event.bro +++ b/testing/btest/scripts/base/frameworks/input/event.bro @@ -48,7 +48,7 @@ event line(description: Input::EventDescription, tpe: Input::Event, i: int, b: b event bro_init() { try = 0; - outfile = open("../out"); + outfile = open("../out"); Input::add_event([$source="../input.log", $name="input", $fields=Val, $ev=line]); Input::remove("input"); } From 8e453663dd4d9540789614582ddce84f877a8b50 Mon Sep 17 00:00:00 2001 From: Bernhard Amann Date: Mon, 23 Jul 2012 12:43:42 -0700 Subject: [PATCH 2/5] Input framework now accepts escaped ascii values as input. I managed to completely forget to add unescaping to the input framework - this should fix it. It now works with the exact same escaping that is used by the writers (\x##). Includes one testcase that seems to work - everything else still passes. --- src/input/readers/Ascii.cc | 2 ++ src/util.cc | 70 ++++++++++++++++++++++++++++++-------- src/util.h | 1 + 3 files changed, 58 insertions(+), 15 deletions(-) diff --git a/src/input/readers/Ascii.cc b/src/input/readers/Ascii.cc index 297f8a7136..aaa124f0c1 100644 --- a/src/input/readers/Ascii.cc +++ b/src/input/readers/Ascii.cc @@ -438,6 +438,8 @@ bool Ascii::DoUpdate() if ( ! getline(splitstream, s, separator[0]) ) break; + s = get_unescaped_string(s); + stringfields[pos] = s; pos++; } diff --git a/src/util.cc b/src/util.cc index cd367cf825..544ba1b573 100644 --- a/src/util.cc +++ b/src/util.cc @@ -42,6 +42,46 @@ #include "Net.h" #include "Reporter.h" +/** + * Takes a string, unescapes all characters that are escaped as hex codes + * (\x##) and turns them into the equivalent ascii-codes. Returns a string + * containing no escaped values + * + * @param str string to unescape + * @return A str::string without escaped characters. + */ +std::string get_unescaped_string(const std::string& str) + { + char* buf = new char [str.length() + 1]; // it will at most have the same length as str. + char* bufpos = buf; + size_t pos = 0; + + while ( pos < str.length() ) + { + if ( str[pos] == '\\' && str[pos+1] == 'x' && + isxdigit(str[pos+2]) && isxdigit(str[pos+3]) ) + { + *bufpos = (decode_hex(str[pos+2]) << 4) + + decode_hex(str[pos+3]); + + pos += 4; + bufpos++; + } + else + { + *bufpos = str[pos]; + bufpos++; + pos++; + } + } + + *bufpos = 0; + + string outstring (buf, bufpos - buf); + delete [] buf; + return outstring; + } + /** * Takes a string, escapes characters into equivalent hex codes (\x##), and * returns a string containing all escaped values. @@ -53,25 +93,25 @@ * @return A std::string containing a list of escaped hex values of the form * \x## */ std::string get_escaped_string(const std::string& str, bool escape_all) -{ - char tbuf[16]; - string esc = ""; + { + char tbuf[16]; + string esc = ""; - for ( size_t i = 0; i < str.length(); ++i ) - { - char c = str[i]; + for ( size_t i = 0; i < str.length(); ++i ) + { + char c = str[i]; - if ( escape_all || isspace(c) || ! isascii(c) || ! isprint(c) ) - { - snprintf(tbuf, sizeof(tbuf), "\\x%02x", str[i]); - esc += tbuf; + if ( escape_all || isspace(c) || ! isascii(c) || ! isprint(c) ) + { + snprintf(tbuf, sizeof(tbuf), "\\x%02x", str[i]); + esc += tbuf; + } + else + esc += c; } - else - esc += c; - } - return esc; -} + return esc; + } char* copy_string(const char* s) { diff --git a/src/util.h b/src/util.h index a695c6df6a..fc4b60792b 100644 --- a/src/util.h +++ b/src/util.h @@ -90,6 +90,7 @@ void delete_each(T* t) delete *it; } +std::string get_unescaped_string(const std::string& str); std::string get_escaped_string(const std::string& str, bool escape_all); extern char* copy_string(const char* s); From 3163e8462928a0294605d690ed176ed528a64813 Mon Sep 17 00:00:00 2001 From: Bernhard Amann Date: Mon, 23 Jul 2012 12:46:09 -0700 Subject: [PATCH 3/5] and like nearly always - forgot the baseline. --- .../btest/Baseline/scripts.base.frameworks.input.binary/out | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 testing/btest/Baseline/scripts.base.frameworks.input.binary/out diff --git a/testing/btest/Baseline/scripts.base.frameworks.input.binary/out b/testing/btest/Baseline/scripts.base.frameworks.input.binary/out new file mode 100644 index 0000000000..deab902925 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.input.binary/out @@ -0,0 +1,6 @@ +abc^J\xffdef +DATA2 +abc|\xffdef +DATA2 +abc\xff|def +DATA2 From 90735c3164019bd124b26b14f522d4bc16e71f50 Mon Sep 17 00:00:00 2001 From: Bernhard Amann Date: Mon, 23 Jul 2012 12:51:07 -0700 Subject: [PATCH 4/5] and just to be a little bit careful - add check if the field description is long enough. Otherwise there might possibly be an access of uninitialized memory, when someone reads a file that contains just #fields without any following field descriptions. --- src/input/readers/Ascii.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/input/readers/Ascii.cc b/src/input/readers/Ascii.cc index aaa124f0c1..fd936b07b6 100644 --- a/src/input/readers/Ascii.cc +++ b/src/input/readers/Ascii.cc @@ -199,7 +199,7 @@ bool Ascii::GetLine(string& str) if ( str[0] != '#' ) return true; - if ( ( str.compare(0,7, "#fields") == 0 ) && ( str[7] == separator[0] ) ) + if ( ( str.length() > 8 ) && ( str.compare(0,7, "#fields") == 0 ) && ( str[7] == separator[0] ) ) { str = str.substr(8); return true; From f887535f1c706a727f683c2450114d4c5e322808 Mon Sep 17 00:00:00 2001 From: Bernhard Amann Date: Mon, 23 Jul 2012 17:28:27 -0700 Subject: [PATCH 5/5] fix problem with possible access to unititialized memory (thanks robin :) ) --- src/util.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/util.cc b/src/util.cc index 544ba1b573..da046133a6 100644 --- a/src/util.cc +++ b/src/util.cc @@ -50,13 +50,14 @@ * @param str string to unescape * @return A str::string without escaped characters. */ -std::string get_unescaped_string(const std::string& str) +std::string get_unescaped_string(const std::string& arg_str) { - char* buf = new char [str.length() + 1]; // it will at most have the same length as str. + const char* str = arg_str.c_str(); + char* buf = new char [arg_str.length() + 1]; // it will at most have the same length as str. char* bufpos = buf; size_t pos = 0; - while ( pos < str.length() ) + while ( pos < arg_str.length() ) { if ( str[pos] == '\\' && str[pos+1] == 'x' && isxdigit(str[pos+2]) && isxdigit(str[pos+3]) )