Merge remote-tracking branch 'origin/topic/awelzel/4494-ts-millis-signed'

* origin/topic/awelzel/4494-ts-millis-signed:
  logging/ascii/json: Make TS_MILLIS signed, add TS_MILLIS_UNSIGNED
This commit is contained in:
Arne Welzel 2025-05-30 17:24:05 +02:00
commit f4cd92e24a
12 changed files with 94 additions and 8 deletions

15
CHANGES
View file

@ -1,3 +1,18 @@
8.0.0-dev.288 | 2025-05-30 17:24:05 +0200
* GH-4494: logging/ascii/json: Make TS_MILLIS signed, add TS_MILLIS_UNSIGNED (Arne Welzel, Corelight)
It seems TS_MILLIS is specifically for Elasticsearch and starting with
Elasticsearch 8.2 epoch_millis does (again?) support negative epoch_millis,
so make Zeek produce that by default.
If this breaks a given deployment, they can switch Zeek back to TS_MILLIS_UNSIGNED.
https://discuss.elastic.co/t/migration-from-es-6-8-to-7-17-issues-with-negative-date-epoch-timestamp/335259
https://github.com/elastic/elasticsearch/pull/80208
Thanks for @timo-mue for reporting!
8.0.0-dev.286 | 2025-05-30 08:12:43 -0700
* Add move operations for LogWriteHeader (Tim Wojtulewicz, Corelight)

11
NEWS
View file

@ -24,6 +24,17 @@ Breaking Changes
a small overhead when enabled. There's not enough users of network timestamp
metadata to justify the complexity of treating it separate.
- The ASCII writer's ``JSON::TS_MILLIS`` timestamp format was changed to produce
signed integers. This matters for the representation for timestamps that are
before the UNIX epoch. These are now written as negative values, while previously
the negative value was interpreted as an unsigned integer, resulting in very large
timestamps, potentially causing issues for downstream consumers.
If you prefer to always have unsigned values, it's possible to revert to the previous
behavior by setting:
redef LogAscii::json_timestamps = JSON::TS_MILLIS_UNSIGNED;
New Functionality
-----------------

View file

@ -1 +1 @@
8.0.0-dev.286
8.0.0-dev.288

View file

@ -5493,10 +5493,16 @@ export {
## Timestamps will be formatted as UNIX epoch doubles. This is
## the format that Zeek typically writes out timestamps.
TS_EPOCH,
## Timestamps will be formatted as signed integers that
## represent the number of milliseconds since the UNIX
## epoch. Timestamps before the UNIX epoch are represented
## as negative values.
TS_MILLIS,
## Timestamps will be formatted as unsigned integers that
## represent the number of milliseconds since the UNIX
## epoch.
TS_MILLIS,
## epoch. Timestamps before the UNIX epoch result in negative
## values being interpreted as large unsigned integers.
TS_MILLIS_UNSIGNED,
## Timestamps will be formatted in the ISO8601 DateTime format.
## Subseconds are also included which isn't actually part of the
## standard but most consumers that parse ISO8601 seem to be able

View file

@ -363,6 +363,8 @@ bool Ascii::InitFormatter() {
tf = threading::formatter::JSON::TS_EPOCH;
else if ( strcmp(json_timestamps.c_str(), "JSON::TS_MILLIS") == 0 )
tf = threading::formatter::JSON::TS_MILLIS;
else if ( strcmp(json_timestamps.c_str(), "JSON::TS_MILLIS_UNSIGNED") == 0 )
tf = threading::formatter::JSON::TS_MILLIS_UNSIGNED;
else if ( strcmp(json_timestamps.c_str(), "JSON::TS_ISO8601") == 0 )
tf = threading::formatter::JSON::TS_ISO8601;
else {

View file

@ -121,7 +121,19 @@ void JSON::BuildJSON(zeek::json::detail::NullDoubleWriter& writer, Value* val, c
else if ( timestamps == TS_MILLIS ) {
// ElasticSearch uses milliseconds for timestamps
writer.Uint64((uint64_t)(val->val.double_val * 1000));
writer.Int64((int64_t)(val->val.double_val * 1000));
}
else if ( timestamps == TS_MILLIS_UNSIGNED ) {
// Without the cast through int64_t the resulting
// uint64_t value is zero for negative timestamps
// on arm64. This is UB territory, a negative value
// cannot be represented in uint64_t and so the
// compiler is free to do whatever. Prevent this by
// casting through an int64_t.
//
// https://stackoverflow.com/a/55057221
uint64_t v = static_cast<uint64_t>(static_cast<int64_t>(val->val.double_val * 1000));
writer.Uint64(v);
}
break;

View file

@ -19,10 +19,11 @@ namespace zeek::threading::formatter {
class JSON : public Formatter {
public:
enum TimeFormat {
TS_EPOCH, // Doubles that represents seconds from the UNIX epoch.
TS_ISO8601, // ISO 8601 defined human readable timestamp format.
TS_MILLIS // Milliseconds from the UNIX epoch. Some consumers need this (e.g.,
// elasticsearch).
TS_EPOCH, // Doubles that represents seconds from the UNIX epoch.
TS_ISO8601, // ISO 8601 defined human readable timestamp format.
TS_MILLIS, // Signed milliseconds from the UNIX epoch. Some consumers need this (e.g.,
// elasticsearch).
TS_MILLIS_UNSIGNED // Unsigned milliseconds from the UNIX epoch, overflowing.
};
JSON(MsgThread* t, TimeFormat tf, bool include_unset_fields = false);

View file

@ -0,0 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
{"ts":-315619200.0}

View file

@ -0,0 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
{"ts":"1960-01-01T00:00:00.000000Z"}

View file

@ -0,0 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
{"ts":-315619200000}

View file

@ -0,0 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
{"ts":18446743758090351616}

View file

@ -0,0 +1,31 @@
# @TEST-DOC: Test timestamp representations of a negative value in JSON.
#
# @TEST-EXEC: zeek -b %INPUT LogAscii::json_timestamps=JSON::TS_EPOCH;
# @TEST-EXEC: mv test.log test.log.ts_epoch
# @TEST-EXEC: zeek -b %INPUT LogAscii::json_timestamps=JSON::TS_MILLIS;
# @TEST-EXEC: mv test.log test.log.ts_millis
# @TEST-EXEC: zeek -b %INPUT LogAscii::json_timestamps=JSON::TS_MILLIS_UNSIGNED;
# @TEST-EXEC: mv test.log test.log.ts_millis_unsigned
# @TEST-EXEC: zeek -b %INPUT LogAscii::json_timestamps=JSON::TS_ISO8601
# @TEST-EXEC: mv test.log test.log.ts_iso8601
#
# @TEST-EXEC: TEST_DIFF_CANONIFIER= btest-diff test.log.ts_epoch
# @TEST-EXEC: TEST_DIFF_CANONIFIER= btest-diff test.log.ts_millis
# @TEST-EXEC: TEST_DIFF_CANONIFIER= btest-diff test.log.ts_millis_unsigned
# @TEST-EXEC: TEST_DIFF_CANONIFIER= btest-diff test.log.ts_iso8601
module TEST;
export {
redef enum Log::ID += { LOG };
type Test: record {
ts: time &log;
};
}
redef LogAscii::use_json=T;
event zeek_init() {
Log::create_stream(TEST::LOG, [$columns=TEST::Test, $path="test"]);
Log::write(TEST::LOG, [$ts=double_to_time(-315619200)]);
}