From 93813a507912812c0a6c2ee4b1911c19155ba61b Mon Sep 17 00:00:00 2001 From: Arne Welzel Date: Fri, 30 May 2025 12:30:50 +0200 Subject: [PATCH] logging/ascii/json: Make TS_MILLIS signed, add TS_MILLIS_UNSIGNED It seems TS_MILLIS is specifically for Elasticsearch and starting with Elasticsearch 8.2 epoch_millis does (again?) support negative epoch_millis, so make Zeek produce that by default. If this breaks a given deployment, they can switch Zeek back to TS_MILLIS_UNSIGNED. https://discuss.elastic.co/t/migration-from-es-6-8-to-7-17-issues-with-negative-date-epoch-timestamp/335259 https://github.com/elastic/elasticsearch/pull/80208 Thanks for @timo-mue for reporting! Closes #4494 --- NEWS | 11 +++++++ scripts/base/init-bare.zeek | 10 ++++-- src/logging/writers/ascii/Ascii.cc | 2 ++ src/threading/formatters/JSON.cc | 14 ++++++++- src/threading/formatters/JSON.h | 9 +++--- .../test.log.ts_epoch | 2 ++ .../test.log.ts_iso8601 | 2 ++ .../test.log.ts_millis | 2 ++ .../test.log.ts_millis_unsigned | 2 ++ .../frameworks/logging/ascii-negative-ts.zeek | 31 +++++++++++++++++++ 10 files changed, 78 insertions(+), 7 deletions(-) create mode 100644 testing/btest/Baseline/scripts.base.frameworks.logging.ascii-negative-ts/test.log.ts_epoch create mode 100644 testing/btest/Baseline/scripts.base.frameworks.logging.ascii-negative-ts/test.log.ts_iso8601 create mode 100644 testing/btest/Baseline/scripts.base.frameworks.logging.ascii-negative-ts/test.log.ts_millis create mode 100644 testing/btest/Baseline/scripts.base.frameworks.logging.ascii-negative-ts/test.log.ts_millis_unsigned create mode 100644 testing/btest/scripts/base/frameworks/logging/ascii-negative-ts.zeek diff --git a/NEWS b/NEWS index e9c99cd36c..02cb56fd06 100644 --- a/NEWS +++ b/NEWS @@ -24,6 +24,17 @@ Breaking Changes a small overhead when enabled. There's not enough users of network timestamp metadata to justify the complexity of treating it separate. +- The ASCII writer's ``JSON::TS_MILLIS`` timestamp format was changed to produce + signed integers. This matters for the representation for timestamps that are + before the UNIX epoch. These are now written as negative values, while previously + the negative value was interpreted as an unsigned integer, resulting in very large + timestamps, potentially causing issues for downstream consumers. + + If you prefer to always have unsigned values, it's possible to revert to the previous + behavior by setting: + + redef LogAscii::json_timestamps = JSON::TS_MILLIS_UNSIGNED; + New Functionality ----------------- diff --git a/scripts/base/init-bare.zeek b/scripts/base/init-bare.zeek index 9fd71dae1a..d19bb85a88 100644 --- a/scripts/base/init-bare.zeek +++ b/scripts/base/init-bare.zeek @@ -5493,10 +5493,16 @@ export { ## Timestamps will be formatted as UNIX epoch doubles. This is ## the format that Zeek typically writes out timestamps. TS_EPOCH, + ## Timestamps will be formatted as signed integers that + ## represent the number of milliseconds since the UNIX + ## epoch. Timestamps before the UNIX epoch are represented + ## as negative values. + TS_MILLIS, ## Timestamps will be formatted as unsigned integers that ## represent the number of milliseconds since the UNIX - ## epoch. - TS_MILLIS, + ## epoch. Timestamps before the UNIX epoch result in negative + ## values being interpreted as large unsigned integers. + TS_MILLIS_UNSIGNED, ## Timestamps will be formatted in the ISO8601 DateTime format. ## Subseconds are also included which isn't actually part of the ## standard but most consumers that parse ISO8601 seem to be able diff --git a/src/logging/writers/ascii/Ascii.cc b/src/logging/writers/ascii/Ascii.cc index 2f9fc861a4..5d96786b90 100644 --- a/src/logging/writers/ascii/Ascii.cc +++ b/src/logging/writers/ascii/Ascii.cc @@ -363,6 +363,8 @@ bool Ascii::InitFormatter() { tf = threading::formatter::JSON::TS_EPOCH; else if ( strcmp(json_timestamps.c_str(), "JSON::TS_MILLIS") == 0 ) tf = threading::formatter::JSON::TS_MILLIS; + else if ( strcmp(json_timestamps.c_str(), "JSON::TS_MILLIS_UNSIGNED") == 0 ) + tf = threading::formatter::JSON::TS_MILLIS_UNSIGNED; else if ( strcmp(json_timestamps.c_str(), "JSON::TS_ISO8601") == 0 ) tf = threading::formatter::JSON::TS_ISO8601; else { diff --git a/src/threading/formatters/JSON.cc b/src/threading/formatters/JSON.cc index 6070fd1955..13ed2eb2a4 100644 --- a/src/threading/formatters/JSON.cc +++ b/src/threading/formatters/JSON.cc @@ -121,7 +121,19 @@ void JSON::BuildJSON(zeek::json::detail::NullDoubleWriter& writer, Value* val, c else if ( timestamps == TS_MILLIS ) { // ElasticSearch uses milliseconds for timestamps - writer.Uint64((uint64_t)(val->val.double_val * 1000)); + writer.Int64((int64_t)(val->val.double_val * 1000)); + } + else if ( timestamps == TS_MILLIS_UNSIGNED ) { + // Without the cast through int64_t the resulting + // uint64_t value is zero for negative timestamps + // on arm64. This is UB territory, a negative value + // cannot be represented in uint64_t and so the + // compiler is free to do whatever. Prevent this by + // casting through an int64_t. + // + // https://stackoverflow.com/a/55057221 + uint64_t v = static_cast(static_cast(val->val.double_val * 1000)); + writer.Uint64(v); } break; diff --git a/src/threading/formatters/JSON.h b/src/threading/formatters/JSON.h index 68e2bb2918..b0621012c0 100644 --- a/src/threading/formatters/JSON.h +++ b/src/threading/formatters/JSON.h @@ -19,10 +19,11 @@ namespace zeek::threading::formatter { class JSON : public Formatter { public: enum TimeFormat { - TS_EPOCH, // Doubles that represents seconds from the UNIX epoch. - TS_ISO8601, // ISO 8601 defined human readable timestamp format. - TS_MILLIS // Milliseconds from the UNIX epoch. Some consumers need this (e.g., - // elasticsearch). + TS_EPOCH, // Doubles that represents seconds from the UNIX epoch. + TS_ISO8601, // ISO 8601 defined human readable timestamp format. + TS_MILLIS, // Signed milliseconds from the UNIX epoch. Some consumers need this (e.g., + // elasticsearch). + TS_MILLIS_UNSIGNED // Unsigned milliseconds from the UNIX epoch, overflowing. }; JSON(MsgThread* t, TimeFormat tf, bool include_unset_fields = false); diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.ascii-negative-ts/test.log.ts_epoch b/testing/btest/Baseline/scripts.base.frameworks.logging.ascii-negative-ts/test.log.ts_epoch new file mode 100644 index 0000000000..263eeb95d6 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.ascii-negative-ts/test.log.ts_epoch @@ -0,0 +1,2 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +{"ts":-315619200.0} diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.ascii-negative-ts/test.log.ts_iso8601 b/testing/btest/Baseline/scripts.base.frameworks.logging.ascii-negative-ts/test.log.ts_iso8601 new file mode 100644 index 0000000000..01fb152410 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.ascii-negative-ts/test.log.ts_iso8601 @@ -0,0 +1,2 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +{"ts":"1960-01-01T00:00:00.000000Z"} diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.ascii-negative-ts/test.log.ts_millis b/testing/btest/Baseline/scripts.base.frameworks.logging.ascii-negative-ts/test.log.ts_millis new file mode 100644 index 0000000000..d9d573b1d6 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.ascii-negative-ts/test.log.ts_millis @@ -0,0 +1,2 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +{"ts":-315619200000} diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.ascii-negative-ts/test.log.ts_millis_unsigned b/testing/btest/Baseline/scripts.base.frameworks.logging.ascii-negative-ts/test.log.ts_millis_unsigned new file mode 100644 index 0000000000..7117f1628d --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.ascii-negative-ts/test.log.ts_millis_unsigned @@ -0,0 +1,2 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +{"ts":18446743758090351616} diff --git a/testing/btest/scripts/base/frameworks/logging/ascii-negative-ts.zeek b/testing/btest/scripts/base/frameworks/logging/ascii-negative-ts.zeek new file mode 100644 index 0000000000..d04155cc91 --- /dev/null +++ b/testing/btest/scripts/base/frameworks/logging/ascii-negative-ts.zeek @@ -0,0 +1,31 @@ +# @TEST-DOC: Test timestamp representations of a negative value in JSON. +# +# @TEST-EXEC: zeek -b %INPUT LogAscii::json_timestamps=JSON::TS_EPOCH; +# @TEST-EXEC: mv test.log test.log.ts_epoch +# @TEST-EXEC: zeek -b %INPUT LogAscii::json_timestamps=JSON::TS_MILLIS; +# @TEST-EXEC: mv test.log test.log.ts_millis +# @TEST-EXEC: zeek -b %INPUT LogAscii::json_timestamps=JSON::TS_MILLIS_UNSIGNED; +# @TEST-EXEC: mv test.log test.log.ts_millis_unsigned +# @TEST-EXEC: zeek -b %INPUT LogAscii::json_timestamps=JSON::TS_ISO8601 +# @TEST-EXEC: mv test.log test.log.ts_iso8601 +# +# @TEST-EXEC: TEST_DIFF_CANONIFIER= btest-diff test.log.ts_epoch +# @TEST-EXEC: TEST_DIFF_CANONIFIER= btest-diff test.log.ts_millis +# @TEST-EXEC: TEST_DIFF_CANONIFIER= btest-diff test.log.ts_millis_unsigned +# @TEST-EXEC: TEST_DIFF_CANONIFIER= btest-diff test.log.ts_iso8601 + +module TEST; + +export { + redef enum Log::ID += { LOG }; + type Test: record { + ts: time &log; + }; +} + +redef LogAscii::use_json=T; + +event zeek_init() { + Log::create_stream(TEST::LOG, [$columns=TEST::Test, $path="test"]); + Log::write(TEST::LOG, [$ts=double_to_time(-315619200)]); +}