From 8aaccf1c95a9366d8be3a83c3c452e1ed53fb256 Mon Sep 17 00:00:00 2001
From: Robin Sommer <robin@icir.org>
Date: Thu, 6 Oct 2011 15:55:45 -0700
Subject: [PATCH 1/2] Logging speed improvements.

We now use Google's replacement functions for slow printf-based
num-to-ascii conversion.
---
 src/CMakeLists.txt    |   1 +
 src/Desc.cc           |  25 +++-
 src/Desc.h            |   2 +
 src/LogWriterAscii.cc |   8 +-
 src/LogWriterAscii.h  |   1 +
 src/modp_numtoa.c     | 291 ++++++++++++++++++++++++++++++++++++++++++
 src/modp_numtoa.h     | 102 +++++++++++++++
 src/util.h            |   5 +
 8 files changed, 426 insertions(+), 9 deletions(-)
 create mode 100644 src/modp_numtoa.c
 create mode 100644 src/modp_numtoa.h

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 996dfc2b20..9c785bddc8 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -405,6 +405,7 @@ set(bro_SRCS
     setsignal.c
     PacketDumper.cc
     strsep.c
+    modp_numtoa.c
     ${dns_SRCS}
     ${openssl_SRCS}
 )
diff --git a/src/Desc.cc b/src/Desc.cc
index 8c161e07b2..44d5ecb042 100644
--- a/src/Desc.cc
+++ b/src/Desc.cc
@@ -102,7 +102,7 @@ void ODesc::Add(int i)
 	else
 		{
 		char tmp[256];
-		sprintf(tmp, "%d", i);
+		modp_litoa10(i, tmp);
 		Add(tmp);
 		}
 	}
@@ -114,7 +114,7 @@ void ODesc::Add(uint32 u)
 	else
 		{
 		char tmp[256];
-		sprintf(tmp, "%u", u);
+		modp_ulitoa10(u, tmp);
 		Add(tmp);
 		}
 	}
@@ -126,7 +126,7 @@ void ODesc::Add(int64 i)
 	else
 		{
 		char tmp[256];
-		sprintf(tmp, "%" PRId64, i);
+		modp_litoa10(i, tmp);
 		Add(tmp);
 		}
 	}
@@ -138,7 +138,7 @@ void ODesc::Add(uint64 u)
 	else
 		{
 		char tmp[256];
-		sprintf(tmp, "%" PRIu64, u);
+		modp_ulitoa10(u, tmp);
 		Add(tmp);
 		}
 	}
@@ -150,7 +150,7 @@ void ODesc::Add(double d)
 	else
 		{
 		char tmp[256];
-		sprintf(tmp, IsReadable() ? "%.15g" : "%.17g", d);
+		modp_dtoa(d, tmp, IsReadable() ? 15 : 17);
 		Add(tmp);
 
 		if ( d == double(int(d)) )
@@ -334,3 +334,18 @@ void ODesc::OutOfMemory()
 	{
 	reporter->InternalError("out of memory");
 	}
+
+void ODesc::Clear()
+	{
+	offset = 0;
+
+	// If we've allocated an exceedingly large amount of space, free it.
+	if ( size > 10 * 1024 * 1024 )
+		{
+		free(base);
+		size = DEFAULT_SIZE;
+		base = safe_malloc(size);
+		((char*) base)[0] = '\0';
+		}
+	}
+
diff --git a/src/Desc.h b/src/Desc.h
index 5849736cbf..4ed05c1763 100644
--- a/src/Desc.h
+++ b/src/Desc.h
@@ -120,6 +120,8 @@ public:
 
 	int Len() const		{ return offset; }
 
+	void Clear();
+
 protected:
 	void Indent();
 
diff --git a/src/LogWriterAscii.cc b/src/LogWriterAscii.cc
index 04c90715fb..9fc71789d8 100644
--- a/src/LogWriterAscii.cc
+++ b/src/LogWriterAscii.cc
@@ -59,6 +59,7 @@ LogWriterAscii::LogWriterAscii()
 	memcpy(header_prefix, BifConst::LogAscii::header_prefix->Bytes(),
 	       header_prefix_len);
 
+	desc.SetEscape(separator, separator_len);
 	}
 
 LogWriterAscii::~LogWriterAscii()
@@ -184,8 +185,8 @@ bool LogWriterAscii::DoWriteOne(ODesc* desc, LogVal* val, const LogField* field)
 
 	case TYPE_TIME:
 	case TYPE_INTERVAL:
-		char buf[32];
-		snprintf(buf, sizeof(buf), "%.6f", val->val.double_val);
+		char buf[256];
+		modp_dtoa(val->val.double_val, buf, 6);
 		desc->Add(buf);
 		break;
 
@@ -261,8 +262,7 @@ bool LogWriterAscii::DoWrite(int num_fields, const LogField* const * fields,
 	if ( ! file )
 		DoInit(Path(), NumFields(), Fields());
 
-	ODesc desc(DESC_READABLE);
-	desc.SetEscape(separator, separator_len);
+	desc.Clear();
 
 	for ( int i = 0; i < num_fields; i++ )
 		{
diff --git a/src/LogWriterAscii.h b/src/LogWriterAscii.h
index 403767d589..7755f71d06 100644
--- a/src/LogWriterAscii.h
+++ b/src/LogWriterAscii.h
@@ -32,6 +32,7 @@ private:
 
 	FILE* file;
 	string fname;
+	ODesc desc;
 
 	// Options set from the script-level.
 	bool output_to_stdout;
diff --git a/src/modp_numtoa.c b/src/modp_numtoa.c
new file mode 100644
index 0000000000..6deb8a70ed
--- /dev/null
+++ b/src/modp_numtoa.c
@@ -0,0 +1,291 @@
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
+/* vi: set expandtab shiftwidth=4 tabstop=4: */
+
+#include "modp_numtoa.h"
+
+#include <stdint.h>
+#include <stdio.h>
+#include <math.h>
+
+// other interesting references on num to string convesion
+// http://www.jb.man.ac.uk/~slowe/cpp/itoa.html
+// and http://www.ddj.com/dept/cpp/184401596?pgno=6
+
+// Version 19-Nov-2007
+// Fixed round-to-even rules to match printf
+//   thanks to Johannes Otepka
+
+/**
+ * Powers of 10
+ * 10^0 to 10^9
+ */
+static const double _pow10[] = {1, 10, 100, 1000, 10000, 100000, 1000000,
+                               10000000, 100000000, 1000000000};
+
+static void strreverse(char* begin, char* end)
+{
+    char aux;
+    while (end > begin)
+        aux = *end, *end-- = *begin, *begin++ = aux;
+}
+
+void modp_itoa10(int32_t value, char* str)
+{
+    char* wstr=str;
+    // Take care of sign
+    unsigned int uvalue = (value < 0) ? -value : value;
+    // Conversion. Number is reversed.
+    do *wstr++ = (char)(48 + (uvalue % 10)); while(uvalue /= 10);
+    if (value < 0) *wstr++ = '-';
+    *wstr='\0';
+
+    // Reverse string
+    strreverse(str,wstr-1);
+}
+
+void modp_uitoa10(uint32_t value, char* str)
+{
+    char* wstr=str;
+    // Conversion. Number is reversed.
+    do *wstr++ = (char)(48 + (value % 10)); while (value /= 10);
+    *wstr='\0';
+    // Reverse string
+    strreverse(str, wstr-1);
+}
+
+void modp_litoa10(int64_t value, char* str)
+{
+    char* wstr=str;
+    unsigned long uvalue = (value < 0) ? -value : value;
+
+    // Conversion. Number is reversed.
+    do *wstr++ = (char)(48 + (uvalue % 10)); while(uvalue /= 10);
+    if (value < 0) *wstr++ = '-';
+    *wstr='\0';
+
+    // Reverse string
+    strreverse(str,wstr-1);
+}
+
+void modp_ulitoa10(uint64_t value, char* str)
+{
+    char* wstr=str;
+    // Conversion. Number is reversed.
+    do *wstr++ = (char)(48 + (value % 10)); while (value /= 10);
+    *wstr='\0';
+    // Reverse string
+    strreverse(str, wstr-1);
+}
+
+void modp_dtoa(double value, char* str, int prec)
+{
+    /* Hacky test for NaN
+     * under -fast-math this won't work, but then you also won't
+     * have correct nan values anyways.  The alternative is
+     * to link with libmath (bad) or hack IEEE double bits (bad)
+     */
+    if (! (value == value)) {
+        str[0] = 'n'; str[1] = 'a'; str[2] = 'n'; str[3] = '\0';
+        return;
+    }
+    /* if input is larger than thres_max, revert to exponential */
+    const double thres_max = (double)(0x7FFFFFFF);
+
+    double diff = 0.0;
+    char* wstr = str;
+
+    if (prec < 0) {
+        prec = 0;
+    } else if (prec > 9) {
+        /* precision of >= 10 can lead to overflow errors */
+        prec = 9;
+    }
+
+
+    /* we'll work in positive values and deal with the
+       negative sign issue later */
+    int neg = 0;
+    if (value < 0) {
+        neg = 1;
+        value = -value;
+    }
+
+
+    int whole = (int) value;
+    double tmp = (value - whole) * _pow10[prec];
+    uint32_t frac = (uint32_t)(tmp);
+    diff = tmp - frac;
+
+    if (diff > 0.5) {
+        ++frac;
+        /* handle rollover, e.g.  case 0.99 with prec 1 is 1.0  */
+        if (frac >= _pow10[prec]) {
+            frac = 0;
+            ++whole;
+        }
+    } else if (diff == 0.5 && ((frac == 0) || (frac & 1))) {
+        /* if halfway, round up if odd, OR
+           if last digit is 0.  That last part is strange */
+        ++frac;
+    }
+
+    /* for very large numbers switch back to native sprintf for exponentials.
+       anyone want to write code to replace this? */
+    /*
+      normal printf behavior is to print EVERY whole number digit
+      which can be 100s of characters overflowing your buffers == bad
+    */
+    if (value > thres_max) {
+        sprintf(str, "%e", neg ? -value : value);
+        return;
+    }
+
+    if (prec == 0) {
+        diff = value - whole;
+        if (diff > 0.5) {
+            /* greater than 0.5, round up, e.g. 1.6 -> 2 */
+            ++whole;
+        } else if (diff == 0.5 && (whole & 1)) {
+            /* exactly 0.5 and ODD, then round up */
+            /* 1.5 -> 2, but 2.5 -> 2 */
+            ++whole;
+        }
+    } else {
+        int count = prec;
+        // now do fractional part, as an unsigned number
+        do {
+            --count;
+            *wstr++ = (char)(48 + (frac % 10));
+        } while (frac /= 10);
+        // add extra 0s
+        while (count-- > 0) *wstr++ = '0';
+        // add decimal
+        *wstr++ = '.';
+    }
+
+    // do whole part
+    // Take care of sign
+    // Conversion. Number is reversed.
+    do *wstr++ = (char)(48 + (whole % 10)); while (whole /= 10);
+    if (neg) {
+        *wstr++ = '-';
+    }
+    *wstr='\0';
+    strreverse(str, wstr-1);
+}
+
+
+// This is near identical to modp_dtoa above
+//   The differnce is noted below
+void modp_dtoa2(double value, char* str, int prec)
+{
+    /* Hacky test for NaN
+     * under -fast-math this won't work, but then you also won't
+     * have correct nan values anyways.  The alternative is
+     * to link with libmath (bad) or hack IEEE double bits (bad)
+     */
+    if (! (value == value)) {
+        str[0] = 'n'; str[1] = 'a'; str[2] = 'n'; str[3] = '\0';
+        return;
+    }
+
+    /* if input is larger than thres_max, revert to exponential */
+    const double thres_max = (double)(0x7FFFFFFF);
+
+    int count;
+    double diff = 0.0;
+    char* wstr = str;
+
+    if (prec < 0) {
+        prec = 0;
+    } else if (prec > 9) {
+        /* precision of >= 10 can lead to overflow errors */
+        prec = 9;
+    }
+
+
+    /* we'll work in positive values and deal with the
+       negative sign issue later */
+    int neg = 0;
+    if (value < 0) {
+        neg = 1;
+        value = -value;
+    }
+
+
+    int whole = (int) value;
+    double tmp = (value - whole) * _pow10[prec];
+    uint32_t frac = (uint32_t)(tmp);
+    diff = tmp - frac;
+
+    if (diff > 0.5) {
+        ++frac;
+        /* handle rollover, e.g.  case 0.99 with prec 1 is 1.0  */
+        if (frac >= _pow10[prec]) {
+            frac = 0;
+            ++whole;
+        }
+    } else if (diff == 0.5 && ((frac == 0) || (frac & 1))) {
+        /* if halfway, round up if odd, OR
+           if last digit is 0.  That last part is strange */
+        ++frac;
+    }
+
+    /* for very large numbers switch back to native sprintf for exponentials.
+       anyone want to write code to replace this? */
+    /*
+      normal printf behavior is to print EVERY whole number digit
+      which can be 100s of characters overflowing your buffers == bad
+    */
+    if (value > thres_max) {
+        sprintf(str, "%e", neg ? -value : value);
+        return;
+    }
+
+    if (prec == 0) {
+        diff = value - whole;
+        if (diff > 0.5) {
+            /* greater than 0.5, round up, e.g. 1.6 -> 2 */
+            ++whole;
+        } else if (diff == 0.5 && (whole & 1)) {
+            /* exactly 0.5 and ODD, then round up */
+            /* 1.5 -> 2, but 2.5 -> 2 */
+            ++whole;
+        }
+
+        //vvvvvvvvvvvvvvvvvvv  Diff from modp_dto2
+    } else if (frac) {
+        count = prec;
+        // now do fractional part, as an unsigned number
+        // we know it is not 0 but we can have leading zeros, these
+        // should be removed
+        while (!(frac % 10)) {
+            --count;
+            frac /= 10;
+        }
+        //^^^^^^^^^^^^^^^^^^^  Diff from modp_dto2
+
+        // now do fractional part, as an unsigned number
+        do {
+            --count;
+            *wstr++ = (char)(48 + (frac % 10));
+        } while (frac /= 10);
+        // add extra 0s
+        while (count-- > 0) *wstr++ = '0';
+        // add decimal
+        *wstr++ = '.';
+    }
+
+    // do whole part
+    // Take care of sign
+    // Conversion. Number is reversed.
+    do *wstr++ = (char)(48 + (whole % 10)); while (whole /= 10);
+    if (neg) {
+        *wstr++ = '-';
+    }
+    *wstr='\0';
+    strreverse(str, wstr-1);
+}
+
+
+
diff --git a/src/modp_numtoa.h b/src/modp_numtoa.h
new file mode 100644
index 0000000000..b848163d1d
--- /dev/null
+++ b/src/modp_numtoa.h
@@ -0,0 +1,102 @@
+/* -*- mode: c++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
+/* vi: set expandtab shiftwidth=4 tabstop=4: */
+
+/**
+ * \file
+ *
+ * <pre>
+ * Copyright &copy; 2007, Nick Galbreath -- nickg [at] modp [dot] com
+ * All rights reserved.
+ * http://code.google.com/p/stringencoders/
+ * Released under the bsd license.
+ * </pre>
+ *
+ * This defines signed/unsigned integer, and 'double' to char buffer
+ * converters.  The standard way of doing this is with "sprintf", however
+ * these functions are
+ *   * guarenteed maximum size output
+ *   * 5-20x faster!
+ *   * core-dump safe
+ *
+ *
+ */
+
+#ifndef COM_MODP_STRINGENCODERS_NUMTOA_H
+#define COM_MODP_STRINGENCODERS_NUMTOA_H
+
+#ifdef __cplusplus
+#define BEGIN_C extern "C" {
+#define END_C }
+#else
+#define BEGIN_C
+#define END_C
+#endif
+
+BEGIN_C
+
+#include <stdint.h>
+
+/** \brief convert an signed integer to char buffer
+ *
+ * \param[in] value
+ * \param[out] buf the output buffer.  Should be 16 chars or more.
+ */
+void modp_itoa10(int32_t value, char* buf);
+
+/** \brief convert an unsigned integer to char buffer
+ *
+ * \param[in] value
+ * \param[out] buf The output buffer, should be 16 chars or more.
+ */
+void modp_uitoa10(uint32_t value, char* buf);
+
+/** \brief convert an signed long integer to char buffer
+ *
+ * \param[in] value
+ * \param[out] buf the output buffer.  Should be 24 chars or more.
+ */
+void modp_litoa10(int64_t value, char* buf);
+
+/** \brief convert an unsigned long integer to char buffer
+ *
+ * \param[in] value
+ * \param[out] buf The output buffer, should be 24 chars or more.
+ */
+void modp_ulitoa10(uint64_t value, char* buf);
+
+/** \brief convert a floating point number to char buffer with
+ *         fixed-precision format
+ *
+ * This is similar to "%.[0-9]f" in the printf style.  It will include
+ * trailing zeros
+ *
+ * If the input value is greater than 1<<31, then the output format
+ * will be switched exponential format.
+ *
+ * \param[in] value
+ * \param[out] buf  The allocated output buffer.  Should be 32 chars or more.
+ * \param[in] precision  Number of digits to the right of the decimal point.
+ *    Can only be 0-9.
+ */
+void modp_dtoa(double value, char* buf, int precision);
+
+/** \brief convert a floating point number to char buffer with a
+ *         variable-precision format, and no trailing zeros
+ *
+ * This is similar to "%.[0-9]f" in the printf style, except it will
+ * NOT include trailing zeros after the decimal point.  This type
+ * of format oddly does not exists with printf.
+ *
+ * If the input value is greater than 1<<31, then the output format
+ * will be switched exponential format.
+ *
+ * \param[in] value
+ * \param[out] buf  The allocated output buffer.  Should be 32 chars or more.
+ * \param[in] precision  Number of digits to the right of the decimal point.
+ *    Can only be 0-9.
+ */
+void modp_dtoa2(double value, char* buf, int precision);
+
+END_C
+
+#endif
diff --git a/src/util.h b/src/util.h
index 132aac4eac..6e76b0f61f 100644
--- a/src/util.h
+++ b/src/util.h
@@ -76,6 +76,11 @@ typedef int32 ptr_compat_int;
 # error "Unusual pointer size. Please report to bro@bro-ids.org."
 #endif
 
+extern "C"
+	{
+	#include "modp_numtoa.h"
+	}
+
 template <class T>
 void delete_each(T* t)
 	{

From 9e673e12983eb34d5863e91133bff367951a9486 Mon Sep 17 00:00:00 2001
From: Robin Sommer <robin@icir.org>
Date: Thu, 6 Oct 2011 16:11:08 -0700
Subject: [PATCH 2/2] Optimizing some MIME code.

---
 src/MIME.cc | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/MIME.cc b/src/MIME.cc
index 109b897b88..a7825496ec 100644
--- a/src/MIME.cc
+++ b/src/MIME.cc
@@ -873,11 +873,10 @@ void MIME_Entity::DataOctets(int len, const char* data)
 		if ( data_buf_offset < 0 && ! GetDataBuffer() )
 			return;
 
-		while ( data_buf_offset < data_buf_length && len > 0 )
-			{
-			data_buf_data[data_buf_offset++] = *data;
-			++data; --len;
-			}
+		int n = min(data_buf_length - data_buf_offset, len);
+		memcpy(data_buf_data + data_buf_offset, data, n);
+		data += n;
+		len -= n;
 
 		if ( data_buf_offset == data_buf_length )
 			{