mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 06:38:20 +00:00
logging/ascii/json: Make TS_MILLIS signed, add TS_MILLIS_UNSIGNED
It seems TS_MILLIS is specifically for Elasticsearch and starting with Elasticsearch 8.2 epoch_millis does (again?) support negative epoch_millis, so make Zeek produce that by default. If this breaks a given deployment, they can switch Zeek back to TS_MILLIS_UNSIGNED. https://discuss.elastic.co/t/migration-from-es-6-8-to-7-17-issues-with-negative-date-epoch-timestamp/335259 https://github.com/elastic/elasticsearch/pull/80208 Thanks for @timo-mue for reporting! Closes #4494
This commit is contained in:
parent
f16ebd34b3
commit
93813a5079
10 changed files with 78 additions and 7 deletions
11
NEWS
11
NEWS
|
@ -24,6 +24,17 @@ Breaking Changes
|
||||||
a small overhead when enabled. There's not enough users of network timestamp
|
a small overhead when enabled. There's not enough users of network timestamp
|
||||||
metadata to justify the complexity of treating it separate.
|
metadata to justify the complexity of treating it separate.
|
||||||
|
|
||||||
|
- The ASCII writer's ``JSON::TS_MILLIS`` timestamp format was changed to produce
|
||||||
|
signed integers. This matters for the representation for timestamps that are
|
||||||
|
before the UNIX epoch. These are now written as negative values, while previously
|
||||||
|
the negative value was interpreted as an unsigned integer, resulting in very large
|
||||||
|
timestamps, potentially causing issues for downstream consumers.
|
||||||
|
|
||||||
|
If you prefer to always have unsigned values, it's possible to revert to the previous
|
||||||
|
behavior by setting:
|
||||||
|
|
||||||
|
redef LogAscii::json_timestamps = JSON::TS_MILLIS_UNSIGNED;
|
||||||
|
|
||||||
New Functionality
|
New Functionality
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
|
|
|
@ -5493,10 +5493,16 @@ export {
|
||||||
## Timestamps will be formatted as UNIX epoch doubles. This is
|
## Timestamps will be formatted as UNIX epoch doubles. This is
|
||||||
## the format that Zeek typically writes out timestamps.
|
## the format that Zeek typically writes out timestamps.
|
||||||
TS_EPOCH,
|
TS_EPOCH,
|
||||||
|
## Timestamps will be formatted as signed integers that
|
||||||
|
## represent the number of milliseconds since the UNIX
|
||||||
|
## epoch. Timestamps before the UNIX epoch are represented
|
||||||
|
## as negative values.
|
||||||
|
TS_MILLIS,
|
||||||
## Timestamps will be formatted as unsigned integers that
|
## Timestamps will be formatted as unsigned integers that
|
||||||
## represent the number of milliseconds since the UNIX
|
## represent the number of milliseconds since the UNIX
|
||||||
## epoch.
|
## epoch. Timestamps before the UNIX epoch result in negative
|
||||||
TS_MILLIS,
|
## values being interpreted as large unsigned integers.
|
||||||
|
TS_MILLIS_UNSIGNED,
|
||||||
## Timestamps will be formatted in the ISO8601 DateTime format.
|
## Timestamps will be formatted in the ISO8601 DateTime format.
|
||||||
## Subseconds are also included which isn't actually part of the
|
## Subseconds are also included which isn't actually part of the
|
||||||
## standard but most consumers that parse ISO8601 seem to be able
|
## standard but most consumers that parse ISO8601 seem to be able
|
||||||
|
|
|
@ -363,6 +363,8 @@ bool Ascii::InitFormatter() {
|
||||||
tf = threading::formatter::JSON::TS_EPOCH;
|
tf = threading::formatter::JSON::TS_EPOCH;
|
||||||
else if ( strcmp(json_timestamps.c_str(), "JSON::TS_MILLIS") == 0 )
|
else if ( strcmp(json_timestamps.c_str(), "JSON::TS_MILLIS") == 0 )
|
||||||
tf = threading::formatter::JSON::TS_MILLIS;
|
tf = threading::formatter::JSON::TS_MILLIS;
|
||||||
|
else if ( strcmp(json_timestamps.c_str(), "JSON::TS_MILLIS_UNSIGNED") == 0 )
|
||||||
|
tf = threading::formatter::JSON::TS_MILLIS_UNSIGNED;
|
||||||
else if ( strcmp(json_timestamps.c_str(), "JSON::TS_ISO8601") == 0 )
|
else if ( strcmp(json_timestamps.c_str(), "JSON::TS_ISO8601") == 0 )
|
||||||
tf = threading::formatter::JSON::TS_ISO8601;
|
tf = threading::formatter::JSON::TS_ISO8601;
|
||||||
else {
|
else {
|
||||||
|
|
|
@ -121,7 +121,19 @@ void JSON::BuildJSON(zeek::json::detail::NullDoubleWriter& writer, Value* val, c
|
||||||
|
|
||||||
else if ( timestamps == TS_MILLIS ) {
|
else if ( timestamps == TS_MILLIS ) {
|
||||||
// ElasticSearch uses milliseconds for timestamps
|
// ElasticSearch uses milliseconds for timestamps
|
||||||
writer.Uint64((uint64_t)(val->val.double_val * 1000));
|
writer.Int64((int64_t)(val->val.double_val * 1000));
|
||||||
|
}
|
||||||
|
else if ( timestamps == TS_MILLIS_UNSIGNED ) {
|
||||||
|
// Without the cast through int64_t the resulting
|
||||||
|
// uint64_t value is zero for negative timestamps
|
||||||
|
// on arm64. This is UB territory, a negative value
|
||||||
|
// cannot be represented in uint64_t and so the
|
||||||
|
// compiler is free to do whatever. Prevent this by
|
||||||
|
// casting through an int64_t.
|
||||||
|
//
|
||||||
|
// https://stackoverflow.com/a/55057221
|
||||||
|
uint64_t v = static_cast<uint64_t>(static_cast<int64_t>(val->val.double_val * 1000));
|
||||||
|
writer.Uint64(v);
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -19,10 +19,11 @@ namespace zeek::threading::formatter {
|
||||||
class JSON : public Formatter {
|
class JSON : public Formatter {
|
||||||
public:
|
public:
|
||||||
enum TimeFormat {
|
enum TimeFormat {
|
||||||
TS_EPOCH, // Doubles that represents seconds from the UNIX epoch.
|
TS_EPOCH, // Doubles that represents seconds from the UNIX epoch.
|
||||||
TS_ISO8601, // ISO 8601 defined human readable timestamp format.
|
TS_ISO8601, // ISO 8601 defined human readable timestamp format.
|
||||||
TS_MILLIS // Milliseconds from the UNIX epoch. Some consumers need this (e.g.,
|
TS_MILLIS, // Signed milliseconds from the UNIX epoch. Some consumers need this (e.g.,
|
||||||
// elasticsearch).
|
// elasticsearch).
|
||||||
|
TS_MILLIS_UNSIGNED // Unsigned milliseconds from the UNIX epoch, overflowing.
|
||||||
};
|
};
|
||||||
|
|
||||||
JSON(MsgThread* t, TimeFormat tf, bool include_unset_fields = false);
|
JSON(MsgThread* t, TimeFormat tf, bool include_unset_fields = false);
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
|
||||||
|
{"ts":-315619200.0}
|
|
@ -0,0 +1,2 @@
|
||||||
|
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
|
||||||
|
{"ts":"1960-01-01T00:00:00.000000Z"}
|
|
@ -0,0 +1,2 @@
|
||||||
|
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
|
||||||
|
{"ts":-315619200000}
|
|
@ -0,0 +1,2 @@
|
||||||
|
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
|
||||||
|
{"ts":18446743758090351616}
|
|
@ -0,0 +1,31 @@
|
||||||
|
# @TEST-DOC: Test timestamp representations of a negative value in JSON.
|
||||||
|
#
|
||||||
|
# @TEST-EXEC: zeek -b %INPUT LogAscii::json_timestamps=JSON::TS_EPOCH;
|
||||||
|
# @TEST-EXEC: mv test.log test.log.ts_epoch
|
||||||
|
# @TEST-EXEC: zeek -b %INPUT LogAscii::json_timestamps=JSON::TS_MILLIS;
|
||||||
|
# @TEST-EXEC: mv test.log test.log.ts_millis
|
||||||
|
# @TEST-EXEC: zeek -b %INPUT LogAscii::json_timestamps=JSON::TS_MILLIS_UNSIGNED;
|
||||||
|
# @TEST-EXEC: mv test.log test.log.ts_millis_unsigned
|
||||||
|
# @TEST-EXEC: zeek -b %INPUT LogAscii::json_timestamps=JSON::TS_ISO8601
|
||||||
|
# @TEST-EXEC: mv test.log test.log.ts_iso8601
|
||||||
|
#
|
||||||
|
# @TEST-EXEC: TEST_DIFF_CANONIFIER= btest-diff test.log.ts_epoch
|
||||||
|
# @TEST-EXEC: TEST_DIFF_CANONIFIER= btest-diff test.log.ts_millis
|
||||||
|
# @TEST-EXEC: TEST_DIFF_CANONIFIER= btest-diff test.log.ts_millis_unsigned
|
||||||
|
# @TEST-EXEC: TEST_DIFF_CANONIFIER= btest-diff test.log.ts_iso8601
|
||||||
|
|
||||||
|
module TEST;
|
||||||
|
|
||||||
|
export {
|
||||||
|
redef enum Log::ID += { LOG };
|
||||||
|
type Test: record {
|
||||||
|
ts: time &log;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
redef LogAscii::use_json=T;
|
||||||
|
|
||||||
|
event zeek_init() {
|
||||||
|
Log::create_stream(TEST::LOG, [$columns=TEST::Test, $path="test"]);
|
||||||
|
Log::write(TEST::LOG, [$ts=double_to_time(-315619200)]);
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue