From 8aaccf1c95a9366d8be3a83c3c452e1ed53fb256 Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Thu, 6 Oct 2011 15:55:45 -0700 Subject: [PATCH 1/2] Logging speed improvements. We now use Google's replacement functions for slow printf-based num-to-ascii conversion. --- src/CMakeLists.txt | 1 + src/Desc.cc | 25 +++- src/Desc.h | 2 + src/LogWriterAscii.cc | 8 +- src/LogWriterAscii.h | 1 + src/modp_numtoa.c | 291 ++++++++++++++++++++++++++++++++++++++++++ src/modp_numtoa.h | 102 +++++++++++++++ src/util.h | 5 + 8 files changed, 426 insertions(+), 9 deletions(-) create mode 100644 src/modp_numtoa.c create mode 100644 src/modp_numtoa.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 996dfc2b20..9c785bddc8 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -405,6 +405,7 @@ set(bro_SRCS setsignal.c PacketDumper.cc strsep.c + modp_numtoa.c ${dns_SRCS} ${openssl_SRCS} ) diff --git a/src/Desc.cc b/src/Desc.cc index 8c161e07b2..44d5ecb042 100644 --- a/src/Desc.cc +++ b/src/Desc.cc @@ -102,7 +102,7 @@ void ODesc::Add(int i) else { char tmp[256]; - sprintf(tmp, "%d", i); + modp_litoa10(i, tmp); Add(tmp); } } @@ -114,7 +114,7 @@ void ODesc::Add(uint32 u) else { char tmp[256]; - sprintf(tmp, "%u", u); + modp_ulitoa10(u, tmp); Add(tmp); } } @@ -126,7 +126,7 @@ void ODesc::Add(int64 i) else { char tmp[256]; - sprintf(tmp, "%" PRId64, i); + modp_litoa10(i, tmp); Add(tmp); } } @@ -138,7 +138,7 @@ void ODesc::Add(uint64 u) else { char tmp[256]; - sprintf(tmp, "%" PRIu64, u); + modp_ulitoa10(u, tmp); Add(tmp); } } @@ -150,7 +150,7 @@ void ODesc::Add(double d) else { char tmp[256]; - sprintf(tmp, IsReadable() ? "%.15g" : "%.17g", d); + modp_dtoa(d, tmp, IsReadable() ? 15 : 17); Add(tmp); if ( d == double(int(d)) ) @@ -334,3 +334,18 @@ void ODesc::OutOfMemory() { reporter->InternalError("out of memory"); } + +void ODesc::Clear() + { + offset = 0; + + // If we've allocated an exceedingly large amount of space, free it. + if ( size > 10 * 1024 * 1024 ) + { + free(base); + size = DEFAULT_SIZE; + base = safe_malloc(size); + ((char*) base)[0] = '\0'; + } + } + diff --git a/src/Desc.h b/src/Desc.h index 5849736cbf..4ed05c1763 100644 --- a/src/Desc.h +++ b/src/Desc.h @@ -120,6 +120,8 @@ public: int Len() const { return offset; } + void Clear(); + protected: void Indent(); diff --git a/src/LogWriterAscii.cc b/src/LogWriterAscii.cc index 04c90715fb..9fc71789d8 100644 --- a/src/LogWriterAscii.cc +++ b/src/LogWriterAscii.cc @@ -59,6 +59,7 @@ LogWriterAscii::LogWriterAscii() memcpy(header_prefix, BifConst::LogAscii::header_prefix->Bytes(), header_prefix_len); + desc.SetEscape(separator, separator_len); } LogWriterAscii::~LogWriterAscii() @@ -184,8 +185,8 @@ bool LogWriterAscii::DoWriteOne(ODesc* desc, LogVal* val, const LogField* field) case TYPE_TIME: case TYPE_INTERVAL: - char buf[32]; - snprintf(buf, sizeof(buf), "%.6f", val->val.double_val); + char buf[256]; + modp_dtoa(val->val.double_val, buf, 6); desc->Add(buf); break; @@ -261,8 +262,7 @@ bool LogWriterAscii::DoWrite(int num_fields, const LogField* const * fields, if ( ! file ) DoInit(Path(), NumFields(), Fields()); - ODesc desc(DESC_READABLE); - desc.SetEscape(separator, separator_len); + desc.Clear(); for ( int i = 0; i < num_fields; i++ ) { diff --git a/src/LogWriterAscii.h b/src/LogWriterAscii.h index 403767d589..7755f71d06 100644 --- a/src/LogWriterAscii.h +++ b/src/LogWriterAscii.h @@ -32,6 +32,7 @@ private: FILE* file; string fname; + ODesc desc; // Options set from the script-level. bool output_to_stdout; diff --git a/src/modp_numtoa.c b/src/modp_numtoa.c new file mode 100644 index 0000000000..6deb8a70ed --- /dev/null +++ b/src/modp_numtoa.c @@ -0,0 +1,291 @@ +/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */ +/* vi: set expandtab shiftwidth=4 tabstop=4: */ + +#include "modp_numtoa.h" + +#include +#include +#include + +// other interesting references on num to string convesion +// http://www.jb.man.ac.uk/~slowe/cpp/itoa.html +// and http://www.ddj.com/dept/cpp/184401596?pgno=6 + +// Version 19-Nov-2007 +// Fixed round-to-even rules to match printf +// thanks to Johannes Otepka + +/** + * Powers of 10 + * 10^0 to 10^9 + */ +static const double _pow10[] = {1, 10, 100, 1000, 10000, 100000, 1000000, + 10000000, 100000000, 1000000000}; + +static void strreverse(char* begin, char* end) +{ + char aux; + while (end > begin) + aux = *end, *end-- = *begin, *begin++ = aux; +} + +void modp_itoa10(int32_t value, char* str) +{ + char* wstr=str; + // Take care of sign + unsigned int uvalue = (value < 0) ? -value : value; + // Conversion. Number is reversed. + do *wstr++ = (char)(48 + (uvalue % 10)); while(uvalue /= 10); + if (value < 0) *wstr++ = '-'; + *wstr='\0'; + + // Reverse string + strreverse(str,wstr-1); +} + +void modp_uitoa10(uint32_t value, char* str) +{ + char* wstr=str; + // Conversion. Number is reversed. + do *wstr++ = (char)(48 + (value % 10)); while (value /= 10); + *wstr='\0'; + // Reverse string + strreverse(str, wstr-1); +} + +void modp_litoa10(int64_t value, char* str) +{ + char* wstr=str; + unsigned long uvalue = (value < 0) ? -value : value; + + // Conversion. Number is reversed. + do *wstr++ = (char)(48 + (uvalue % 10)); while(uvalue /= 10); + if (value < 0) *wstr++ = '-'; + *wstr='\0'; + + // Reverse string + strreverse(str,wstr-1); +} + +void modp_ulitoa10(uint64_t value, char* str) +{ + char* wstr=str; + // Conversion. Number is reversed. + do *wstr++ = (char)(48 + (value % 10)); while (value /= 10); + *wstr='\0'; + // Reverse string + strreverse(str, wstr-1); +} + +void modp_dtoa(double value, char* str, int prec) +{ + /* Hacky test for NaN + * under -fast-math this won't work, but then you also won't + * have correct nan values anyways. The alternative is + * to link with libmath (bad) or hack IEEE double bits (bad) + */ + if (! (value == value)) { + str[0] = 'n'; str[1] = 'a'; str[2] = 'n'; str[3] = '\0'; + return; + } + /* if input is larger than thres_max, revert to exponential */ + const double thres_max = (double)(0x7FFFFFFF); + + double diff = 0.0; + char* wstr = str; + + if (prec < 0) { + prec = 0; + } else if (prec > 9) { + /* precision of >= 10 can lead to overflow errors */ + prec = 9; + } + + + /* we'll work in positive values and deal with the + negative sign issue later */ + int neg = 0; + if (value < 0) { + neg = 1; + value = -value; + } + + + int whole = (int) value; + double tmp = (value - whole) * _pow10[prec]; + uint32_t frac = (uint32_t)(tmp); + diff = tmp - frac; + + if (diff > 0.5) { + ++frac; + /* handle rollover, e.g. case 0.99 with prec 1 is 1.0 */ + if (frac >= _pow10[prec]) { + frac = 0; + ++whole; + } + } else if (diff == 0.5 && ((frac == 0) || (frac & 1))) { + /* if halfway, round up if odd, OR + if last digit is 0. That last part is strange */ + ++frac; + } + + /* for very large numbers switch back to native sprintf for exponentials. + anyone want to write code to replace this? */ + /* + normal printf behavior is to print EVERY whole number digit + which can be 100s of characters overflowing your buffers == bad + */ + if (value > thres_max) { + sprintf(str, "%e", neg ? -value : value); + return; + } + + if (prec == 0) { + diff = value - whole; + if (diff > 0.5) { + /* greater than 0.5, round up, e.g. 1.6 -> 2 */ + ++whole; + } else if (diff == 0.5 && (whole & 1)) { + /* exactly 0.5 and ODD, then round up */ + /* 1.5 -> 2, but 2.5 -> 2 */ + ++whole; + } + } else { + int count = prec; + // now do fractional part, as an unsigned number + do { + --count; + *wstr++ = (char)(48 + (frac % 10)); + } while (frac /= 10); + // add extra 0s + while (count-- > 0) *wstr++ = '0'; + // add decimal + *wstr++ = '.'; + } + + // do whole part + // Take care of sign + // Conversion. Number is reversed. + do *wstr++ = (char)(48 + (whole % 10)); while (whole /= 10); + if (neg) { + *wstr++ = '-'; + } + *wstr='\0'; + strreverse(str, wstr-1); +} + + +// This is near identical to modp_dtoa above +// The differnce is noted below +void modp_dtoa2(double value, char* str, int prec) +{ + /* Hacky test for NaN + * under -fast-math this won't work, but then you also won't + * have correct nan values anyways. The alternative is + * to link with libmath (bad) or hack IEEE double bits (bad) + */ + if (! (value == value)) { + str[0] = 'n'; str[1] = 'a'; str[2] = 'n'; str[3] = '\0'; + return; + } + + /* if input is larger than thres_max, revert to exponential */ + const double thres_max = (double)(0x7FFFFFFF); + + int count; + double diff = 0.0; + char* wstr = str; + + if (prec < 0) { + prec = 0; + } else if (prec > 9) { + /* precision of >= 10 can lead to overflow errors */ + prec = 9; + } + + + /* we'll work in positive values and deal with the + negative sign issue later */ + int neg = 0; + if (value < 0) { + neg = 1; + value = -value; + } + + + int whole = (int) value; + double tmp = (value - whole) * _pow10[prec]; + uint32_t frac = (uint32_t)(tmp); + diff = tmp - frac; + + if (diff > 0.5) { + ++frac; + /* handle rollover, e.g. case 0.99 with prec 1 is 1.0 */ + if (frac >= _pow10[prec]) { + frac = 0; + ++whole; + } + } else if (diff == 0.5 && ((frac == 0) || (frac & 1))) { + /* if halfway, round up if odd, OR + if last digit is 0. That last part is strange */ + ++frac; + } + + /* for very large numbers switch back to native sprintf for exponentials. + anyone want to write code to replace this? */ + /* + normal printf behavior is to print EVERY whole number digit + which can be 100s of characters overflowing your buffers == bad + */ + if (value > thres_max) { + sprintf(str, "%e", neg ? -value : value); + return; + } + + if (prec == 0) { + diff = value - whole; + if (diff > 0.5) { + /* greater than 0.5, round up, e.g. 1.6 -> 2 */ + ++whole; + } else if (diff == 0.5 && (whole & 1)) { + /* exactly 0.5 and ODD, then round up */ + /* 1.5 -> 2, but 2.5 -> 2 */ + ++whole; + } + + //vvvvvvvvvvvvvvvvvvv Diff from modp_dto2 + } else if (frac) { + count = prec; + // now do fractional part, as an unsigned number + // we know it is not 0 but we can have leading zeros, these + // should be removed + while (!(frac % 10)) { + --count; + frac /= 10; + } + //^^^^^^^^^^^^^^^^^^^ Diff from modp_dto2 + + // now do fractional part, as an unsigned number + do { + --count; + *wstr++ = (char)(48 + (frac % 10)); + } while (frac /= 10); + // add extra 0s + while (count-- > 0) *wstr++ = '0'; + // add decimal + *wstr++ = '.'; + } + + // do whole part + // Take care of sign + // Conversion. Number is reversed. + do *wstr++ = (char)(48 + (whole % 10)); while (whole /= 10); + if (neg) { + *wstr++ = '-'; + } + *wstr='\0'; + strreverse(str, wstr-1); +} + + + diff --git a/src/modp_numtoa.h b/src/modp_numtoa.h new file mode 100644 index 0000000000..b848163d1d --- /dev/null +++ b/src/modp_numtoa.h @@ -0,0 +1,102 @@ +/* -*- mode: c++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */ +/* vi: set expandtab shiftwidth=4 tabstop=4: */ + +/** + * \file + * + *
+ * Copyright © 2007, Nick Galbreath -- nickg [at] modp [dot] com
+ * All rights reserved.
+ * http://code.google.com/p/stringencoders/
+ * Released under the bsd license.
+ * 
+ * + * This defines signed/unsigned integer, and 'double' to char buffer + * converters. The standard way of doing this is with "sprintf", however + * these functions are + * * guarenteed maximum size output + * * 5-20x faster! + * * core-dump safe + * + * + */ + +#ifndef COM_MODP_STRINGENCODERS_NUMTOA_H +#define COM_MODP_STRINGENCODERS_NUMTOA_H + +#ifdef __cplusplus +#define BEGIN_C extern "C" { +#define END_C } +#else +#define BEGIN_C +#define END_C +#endif + +BEGIN_C + +#include + +/** \brief convert an signed integer to char buffer + * + * \param[in] value + * \param[out] buf the output buffer. Should be 16 chars or more. + */ +void modp_itoa10(int32_t value, char* buf); + +/** \brief convert an unsigned integer to char buffer + * + * \param[in] value + * \param[out] buf The output buffer, should be 16 chars or more. + */ +void modp_uitoa10(uint32_t value, char* buf); + +/** \brief convert an signed long integer to char buffer + * + * \param[in] value + * \param[out] buf the output buffer. Should be 24 chars or more. + */ +void modp_litoa10(int64_t value, char* buf); + +/** \brief convert an unsigned long integer to char buffer + * + * \param[in] value + * \param[out] buf The output buffer, should be 24 chars or more. + */ +void modp_ulitoa10(uint64_t value, char* buf); + +/** \brief convert a floating point number to char buffer with + * fixed-precision format + * + * This is similar to "%.[0-9]f" in the printf style. It will include + * trailing zeros + * + * If the input value is greater than 1<<31, then the output format + * will be switched exponential format. + * + * \param[in] value + * \param[out] buf The allocated output buffer. Should be 32 chars or more. + * \param[in] precision Number of digits to the right of the decimal point. + * Can only be 0-9. + */ +void modp_dtoa(double value, char* buf, int precision); + +/** \brief convert a floating point number to char buffer with a + * variable-precision format, and no trailing zeros + * + * This is similar to "%.[0-9]f" in the printf style, except it will + * NOT include trailing zeros after the decimal point. This type + * of format oddly does not exists with printf. + * + * If the input value is greater than 1<<31, then the output format + * will be switched exponential format. + * + * \param[in] value + * \param[out] buf The allocated output buffer. Should be 32 chars or more. + * \param[in] precision Number of digits to the right of the decimal point. + * Can only be 0-9. + */ +void modp_dtoa2(double value, char* buf, int precision); + +END_C + +#endif diff --git a/src/util.h b/src/util.h index 132aac4eac..6e76b0f61f 100644 --- a/src/util.h +++ b/src/util.h @@ -76,6 +76,11 @@ typedef int32 ptr_compat_int; # error "Unusual pointer size. Please report to bro@bro-ids.org." #endif +extern "C" + { + #include "modp_numtoa.h" + } + template void delete_each(T* t) { From 9e673e12983eb34d5863e91133bff367951a9486 Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Thu, 6 Oct 2011 16:11:08 -0700 Subject: [PATCH 2/2] Optimizing some MIME code. --- src/MIME.cc | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/MIME.cc b/src/MIME.cc index 109b897b88..a7825496ec 100644 --- a/src/MIME.cc +++ b/src/MIME.cc @@ -873,11 +873,10 @@ void MIME_Entity::DataOctets(int len, const char* data) if ( data_buf_offset < 0 && ! GetDataBuffer() ) return; - while ( data_buf_offset < data_buf_length && len > 0 ) - { - data_buf_data[data_buf_offset++] = *data; - ++data; --len; - } + int n = min(data_buf_length - data_buf_offset, len); + memcpy(data_buf_data + data_buf_offset, data, n); + data += n; + len -= n; if ( data_buf_offset == data_buf_length ) {