Do not use scientific notations when printing doubles in logs.

Closes BIT-1558.
2025-10-02 14:48:21 +00:00 · 2016-05-23 14:42:13 -07:00 · 2016-05-23 14:42:13 -07:00 · d86bf15dbf
commit d86bf15dbf
parent 3581ead0d9
7 changed files with 197 additions and 5 deletions
--- a/src/Desc.cc
+++ b/src/Desc.cc
@ -4,6 +4,7 @@

 #include <stdlib.h>
 #include <errno.h>
+#include <math.h>

 #include "Desc.h"
 #include "File.h"
@ -138,17 +139,22 @@ void ODesc::Add(uint64 u)
 		}
 	}

-void ODesc::Add(double d)
+void ODesc::Add(double d, bool no_exp)
 	{
 	if ( IsBinary() )
 		AddBytes(&d, sizeof(d));
 	else
 		{
 		char tmp[256];
+
+		if ( no_exp )
+			modp_dtoa3(d, tmp, sizeof(tmp), IsReadable() ? 6 : 8);
+		else
 			modp_dtoa2(d, tmp, IsReadable() ? 6 : 8);
+
 		Add(tmp);

-		if ( d == double(int(d)) )
+		if ( nearbyint(d) == d && isfinite(d) && ! strchr(tmp, 'e') )
 			// disambiguate from integer
 			Add(".0");
 		}
--- a/src/Desc.h
+++ b/src/Desc.h
@ -81,7 +81,7 @@ public:
 	void Add(uint32 u);
 	void Add(int64 i);
 	void Add(uint64 u);
-	void Add(double d);
+	void Add(double d, bool no_exp=false);
 	void Add(const IPAddr& addr);
 	void Add(const IPPrefix& prefix);

--- a/src/modp_numtoa.c
+++ b/src/modp_numtoa.c
@ -287,5 +287,136 @@ void modp_dtoa2(double value, char* str, int prec)
    strreverse(str, wstr-1);
 }

+// This is near identical to modp_dtoa2 above, excep that it never uses
+// exponential notation and requires a buffer length.
+void modp_dtoa3(double value, char* str, int n, int prec)
+{
+    /* Hacky test for NaN
+     * under -fast-math this won't work, but then you also won't
+     * have correct nan values anyways.  The alternative is
+     * to link with libmath (bad) or hack IEEE double bits (bad)
+     */
+    if (! (value == value)) {
+        str[0] = 'n'; str[1] = 'a'; str[2] = 'n'; str[3] = '\0';
+        return;
+    }
+
+    /* if input is larger than thres_max, revert to exponential */
+    const double thres_max = (double)(0x7FFFFFFF);
+
+    int count;
+    double diff = 0.0;
+    char* wstr = str;
+
+    if (prec < 0) {
+        prec = 0;
+    } else if (prec > 9) {
+        /* precision of >= 10 can lead to overflow errors */
+        prec = 9;
+    }
+
+
+    /* we'll work in positive values and deal with the
+       negative sign issue later */
+    int neg = 0;
+    if (value < 0) {
+        neg = 1;
+        value = -value;
+    }
+
+
+    int whole = (int) value;
+    double tmp = (value - whole) * _pow10[prec];
+    uint32_t frac = (uint32_t)(tmp);
+    diff = tmp - frac;
+
+    if (diff > 0.5) {
+        ++frac;
+        /* handle rollover, e.g.  case 0.99 with prec 1 is 1.0  */
+        if (frac >= _pow10[prec]) {
+            frac = 0;
+            ++whole;
+        }
+    } else if (diff == 0.5 && ((frac == 0) || (frac & 1))) {
+        /* if halfway, round up if odd, OR
+           if last digit is 0.  That last part is strange */
+        ++frac;
+    }
+
+    /* for very large numbers switch back to native sprintf for exponentials.
+       anyone want to write code to replace this? */
+    /*
+      normal printf behavior is to print EVERY whole number digit
+      which can be 100s of characters overflowing your buffers == bad
+    */
+    if (value > thres_max) {
+        /* ---- Modified part, compared to modp_dtoa3. */
+        int i = snprintf(str, n, "%.*f", prec, neg ? -value : value);
+
+        if ( i < 0 || i >= n ) {
+        // Error or truncated output.
+            snprintf(str, n, "NAN");
+            return;
+            }
+
+        /* Remove trailing zeros. */
+
+        char* p;
+        for ( p = str + i - 1; p >= str && *p == '0'; --p );
+
+        if ( p >= str && *p == '.' )
+            --p;
+
+        *++p = '\0';
+        return;
+
+        /* ---- End of modified part.. */
+    }
+
+    if (prec == 0) {
+        diff = value - whole;
+        if (diff > 0.5) {
+            /* greater than 0.5, round up, e.g. 1.6 -> 2 */
+            ++whole;
+        } else if (diff == 0.5 && (whole & 1)) {
+            /* exactly 0.5 and ODD, then round up */
+            /* 1.5 -> 2, but 2.5 -> 2 */
+            ++whole;
+        }
+
+        //vvvvvvvvvvvvvvvvvvv  Diff from modp_dto2
+    } else if (frac) {
+        count = prec;
+        // now do fractional part, as an unsigned number
+        // we know it is not 0 but we can have leading zeros, these
+        // should be removed
+        while (!(frac % 10)) {
+            --count;
+            frac /= 10;
+        }
+        //^^^^^^^^^^^^^^^^^^^  Diff from modp_dto2
+
+        // now do fractional part, as an unsigned number
+        do {
+            --count;
+            *wstr++ = (char)(48 + (frac % 10));
+        } while (frac /= 10);
+        // add extra 0s
+        while (count-- > 0) *wstr++ = '0';
+        // add decimal
+        *wstr++ = '.';
+    }
+
+    // do whole part
+    // Take care of sign
+    // Conversion. Number is reversed.
+    do *wstr++ = (char)(48 + (whole % 10)); while (whole /= 10);
+    if (neg) {
+        *wstr++ = '-';
+    }
+    *wstr='\0';
+    strreverse(str, wstr-1);
+}
+


--- a/src/modp_numtoa.h
+++ b/src/modp_numtoa.h
@ -97,6 +97,15 @@ void modp_dtoa(double value, char* buf, int precision);
 */
 void modp_dtoa2(double value, char* buf, int precision);

+/** \brief convert a floating point number to char buffer with a
+ *         variable-precision format, no trailing zeros, and no
+ *   	scientific notation.
+ *
+ *  Other than avoiding scientific notation, this is the same as mop_dtoa2. It does however
+ *  require the max buffer length. The buffer will always be null-terminated.
+ */
+void modp_dtoa3(double value, char* buf, int n, int precision);
+
 END_C

 #endif
--- a/src/threading/formatters/Ascii.cc
+++ b/src/threading/formatters/Ascii.cc
@ -91,7 +91,7 @@ bool Ascii::Describe(ODesc* desc, threading::Value* val, const string& name) con
 		// Rendering via Add() truncates trailing 0s after the
 		// decimal point. The difference with TIME/INTERVAL is mainly
 		// to keep the log format consistent.
-		desc->Add(val->val.double_val);
+		desc->Add(val->val.double_val, true);
 		break;

 	case TYPE_INTERVAL:
--- a/testing/btest/Baseline/scripts.base.frameworks.logging.ascii-double/test.log
+++ b/testing/btest/Baseline/scripts.base.frameworks.logging.ascii-double/test.log
@ -0,0 +1,17 @@
+#separator \x09
+#set_separator	,
+#empty_field	(empty)
+#unset_field	-
+#path	test
+#open	2016-05-23-22-44-54
+#fields	d
+#types	double
+2153226000.0
+2153226000.1
+2153226000.123457
+1.0
+1.1
+1.123457
+1.1234
+3140000000000000.0
+#close	2016-05-23-22-44-54
--- a/testing/btest/scripts/base/frameworks/logging/ascii-double.bro
+++ b/testing/btest/scripts/base/frameworks/logging/ascii-double.bro
@ -0,0 +1,29 @@
+#
+# @TEST-EXEC: bro -b %INPUT
+# @TEST-EXEC: btest-diff test.log
+# 
+# Make sure  we do not write out scientific notation for doubles.
+
+module Test;
+
+export {
+	redef enum Log::ID += { LOG };
+
+	type Info: record {
+		d: double &log;
+	};
+}
+
+event bro_init()
+{
+	Log::create_stream(Test::LOG, [$columns=Info]);
+	Log::write(Test::LOG, [$d=2153226000.0]);
+	Log::write(Test::LOG, [$d=2153226000.1]);
+	Log::write(Test::LOG, [$d=2153226000.123456789]);
+	Log::write(Test::LOG, [$d=1.0]);
+	Log::write(Test::LOG, [$d=1.1]);
+	Log::write(Test::LOG, [$d=1.123456789]);
+	Log::write(Test::LOG, [$d=1.1234]);
+	Log::write(Test::LOG, [$d=3.14e15]);
+}
+