From 336990e234e2903d9e5a596fc1b53f000181cef8 Mon Sep 17 00:00:00 2001
From: Bernhard Amann <bernhard@icsi.berkeley.edu>
Date: Mon, 23 Jul 2012 11:27:08 -0700
Subject: [PATCH 1/5] make reading ascii logfiles work when the input separator
 is different from \t.

(Wrong escape character was used for reading header fields).
---
 src/input/readers/Ascii.cc                            | 4 ++--
 testing/btest/scripts/base/frameworks/input/event.bro | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/input/readers/Ascii.cc b/src/input/readers/Ascii.cc
index 73821d7cb6..297f8a7136 100644
--- a/src/input/readers/Ascii.cc
+++ b/src/input/readers/Ascii.cc
@@ -144,7 +144,7 @@ bool Ascii::ReadHeader(bool useCached)
 		pos++;
 		}
 
-	//printf("Updating fields from description %s\n", line.c_str());
+	// printf("Updating fields from description %s\n", line.c_str());
 	columnMap.clear();
 
 	for ( int i = 0; i < NumFields(); i++ )
@@ -199,7 +199,7 @@ bool Ascii::GetLine(string& str)
 		if ( str[0] != '#' )
 			return true;
 
-		if ( str.compare(0,8, "#fields\t") == 0 )
+		if ( ( str.compare(0,7, "#fields") == 0 ) && ( str[7] == separator[0] ) )
 			{
 			str = str.substr(8);
 			return true;
diff --git a/testing/btest/scripts/base/frameworks/input/event.bro b/testing/btest/scripts/base/frameworks/input/event.bro
index d275cee59c..f07ca0c43e 100644
--- a/testing/btest/scripts/base/frameworks/input/event.bro
+++ b/testing/btest/scripts/base/frameworks/input/event.bro
@@ -48,7 +48,7 @@ event line(description: Input::EventDescription, tpe: Input::Event, i: int, b: b
 event bro_init()
 	{
 	try = 0;
-    outfile = open("../out");
+	outfile = open("../out");
 	Input::add_event([$source="../input.log", $name="input", $fields=Val, $ev=line]);
 	Input::remove("input");
 	}

From 8e453663dd4d9540789614582ddce84f877a8b50 Mon Sep 17 00:00:00 2001
From: Bernhard Amann <bernhard@icsi.berkeley.edu>
Date: Mon, 23 Jul 2012 12:43:42 -0700
Subject: [PATCH 2/5] Input framework now accepts escaped ascii values as
 input.

I managed to completely forget to add unescaping to the input framework -
this should fix it. It now works with the exact same escaping that is
used by the writers (\x##).

Includes one testcase that seems to work - everything else still passes.
---
 src/input/readers/Ascii.cc |  2 ++
 src/util.cc                | 70 ++++++++++++++++++++++++++++++--------
 src/util.h                 |  1 +
 3 files changed, 58 insertions(+), 15 deletions(-)

diff --git a/src/input/readers/Ascii.cc b/src/input/readers/Ascii.cc
index 297f8a7136..aaa124f0c1 100644
--- a/src/input/readers/Ascii.cc
+++ b/src/input/readers/Ascii.cc
@@ -438,6 +438,8 @@ bool Ascii::DoUpdate()
 			if ( ! getline(splitstream, s, separator[0]) )
 				break;
 
+			s = get_unescaped_string(s);
+
 			stringfields[pos] = s;
 			pos++;
 			}
diff --git a/src/util.cc b/src/util.cc
index cd367cf825..544ba1b573 100644
--- a/src/util.cc
+++ b/src/util.cc
@@ -42,6 +42,46 @@
 #include "Net.h"
 #include "Reporter.h"
 
+/**
+ * Takes a string, unescapes all characters that are escaped as hex codes
+ * (\x##) and turns them into the equivalent ascii-codes. Returns a string
+ * containing no escaped values
+ *
+ * @param str string to unescape
+ * @return A str::string without escaped characters.
+ */
+std::string get_unescaped_string(const std::string& str)
+	{
+	char* buf = new char [str.length() + 1]; // it will at most have the same length as str.
+	char* bufpos = buf;
+	size_t pos = 0;
+
+	while ( pos < str.length() )
+		{
+		if ( str[pos] == '\\' && str[pos+1] == 'x' && 
+		     isxdigit(str[pos+2]) && isxdigit(str[pos+3]) ) 
+			{
+				*bufpos = (decode_hex(str[pos+2]) << 4) +
+					decode_hex(str[pos+3]);
+
+				pos += 4;
+				bufpos++;
+			}
+		else 
+			{
+			*bufpos = str[pos];
+			bufpos++;
+			pos++;
+			}
+		}
+
+	*bufpos = 0;
+
+	string outstring (buf, bufpos - buf);
+	delete [] buf;
+	return outstring;
+	}
+
 /**
  * Takes a string, escapes characters into equivalent hex codes (\x##), and
  * returns a string containing all escaped values.
@@ -53,25 +93,25 @@
  * @return A std::string containing a list of escaped hex values of the form
  * \x## */
 std::string get_escaped_string(const std::string& str, bool escape_all)
-{
-    char tbuf[16];
-    string esc = "";
+	{
+	char tbuf[16];
+	string esc = "";
 
-    for ( size_t i = 0; i < str.length(); ++i )
-        {
-	char c = str[i];
+	for ( size_t i = 0; i < str.length(); ++i )
+        	{
+		char c = str[i];
 
-	if ( escape_all || isspace(c) || ! isascii(c) || ! isprint(c) )
-		{
-		snprintf(tbuf, sizeof(tbuf), "\\x%02x", str[i]);
-		esc += tbuf;
+		if ( escape_all || isspace(c) || ! isascii(c) || ! isprint(c) )
+			{
+			snprintf(tbuf, sizeof(tbuf), "\\x%02x", str[i]);
+			esc += tbuf;
+			}
+		else
+			esc += c;
 		}
-	else
-		esc += c;
-	}
 
-    return esc;
-}
+	return esc;
+	}
 
 char* copy_string(const char* s)
 	{
diff --git a/src/util.h b/src/util.h
index a695c6df6a..fc4b60792b 100644
--- a/src/util.h
+++ b/src/util.h
@@ -90,6 +90,7 @@ void delete_each(T* t)
 		delete *it;
 	}
 
+std::string get_unescaped_string(const std::string& str);
 std::string get_escaped_string(const std::string& str, bool escape_all);
 
 extern char* copy_string(const char* s);

From 3163e8462928a0294605d690ed176ed528a64813 Mon Sep 17 00:00:00 2001
From: Bernhard Amann <bernhard@icsi.berkeley.edu>
Date: Mon, 23 Jul 2012 12:46:09 -0700
Subject: [PATCH 3/5] and like nearly always - forgot the baseline.

---
 .../btest/Baseline/scripts.base.frameworks.input.binary/out | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 testing/btest/Baseline/scripts.base.frameworks.input.binary/out

diff --git a/testing/btest/Baseline/scripts.base.frameworks.input.binary/out b/testing/btest/Baseline/scripts.base.frameworks.input.binary/out
new file mode 100644
index 0000000000..deab902925
--- /dev/null
+++ b/testing/btest/Baseline/scripts.base.frameworks.input.binary/out
@@ -0,0 +1,6 @@
+abc^J\xffdef
+DATA2
+abc|\xffdef
+DATA2
+abc\xff|def
+DATA2

From 90735c3164019bd124b26b14f522d4bc16e71f50 Mon Sep 17 00:00:00 2001
From: Bernhard Amann <bernhard@icsi.berkeley.edu>
Date: Mon, 23 Jul 2012 12:51:07 -0700
Subject: [PATCH 4/5] and just to be a little bit careful - add check if the
 field description is long enough. Otherwise there might possibly be an access
 of uninitialized memory, when someone reads a file that contains just #fields
 without any following field descriptions.

---
 src/input/readers/Ascii.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/input/readers/Ascii.cc b/src/input/readers/Ascii.cc
index aaa124f0c1..fd936b07b6 100644
--- a/src/input/readers/Ascii.cc
+++ b/src/input/readers/Ascii.cc
@@ -199,7 +199,7 @@ bool Ascii::GetLine(string& str)
 		if ( str[0] != '#' )
 			return true;
 
-		if ( ( str.compare(0,7, "#fields") == 0 ) && ( str[7] == separator[0] ) )
+		if ( ( str.length() > 8 ) && ( str.compare(0,7, "#fields") == 0 ) && ( str[7] == separator[0] ) )
 			{
 			str = str.substr(8);
 			return true;

From f887535f1c706a727f683c2450114d4c5e322808 Mon Sep 17 00:00:00 2001
From: Bernhard Amann <bernhard@icsi.berkeley.edu>
Date: Mon, 23 Jul 2012 17:28:27 -0700
Subject: [PATCH 5/5] fix problem with possible access to unititialized memory
 (thanks robin :) )

---
 src/util.cc | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/util.cc b/src/util.cc
index 544ba1b573..da046133a6 100644
--- a/src/util.cc
+++ b/src/util.cc
@@ -50,13 +50,14 @@
  * @param str string to unescape
  * @return A str::string without escaped characters.
  */
-std::string get_unescaped_string(const std::string& str)
+std::string get_unescaped_string(const std::string& arg_str)
 	{
-	char* buf = new char [str.length() + 1]; // it will at most have the same length as str.
+	const char* str = arg_str.c_str();
+	char* buf = new char [arg_str.length() + 1]; // it will at most have the same length as str.
 	char* bufpos = buf;
 	size_t pos = 0;
 
-	while ( pos < str.length() )
+	while ( pos < arg_str.length() )
 		{
 		if ( str[pos] == '\\' && str[pos+1] == 'x' && 
 		     isxdigit(str[pos+2]) && isxdigit(str[pos+3]) )