From dc6e21d6aec362790bceb8698a36f22247d24fff Mon Sep 17 00:00:00 2001 From: Arne Welzel Date: Thu, 14 Oct 2021 10:54:14 +0200 Subject: [PATCH] logging/writers/ascii: shadow files: Add fsync() before rename() We're using shadow files for log rotation on systems with ext4 running Linux 4.19. We've observed zero-length shadow files in the logger's working directory after a power-outage. This leads to a broken/stuck logger process due to empty shadow files being considered invalid and the process exiting: error: failed to process leftover log 'conn.log.gz': Found leftover log, 'conn.log.gz', but the associated shadow file, '.shadow.conn.log.gz', required to process it is invalid PR #1137 introduced atomic renaming of shadow files and was supposed to handle this. However, after more investigation, the rename() has to be preceded by an fsync() in order to avoid zero-length files in the presence of hard-crashes or power-failures. This is generally operating system and filesystem dependent, but should not hurt to add. The performance impact can likely be neglected due to the low frequency and limited number of log streams. This has happened to others, too. Some references around this issue: * https://stackoverflow.com/questions/7433057/is-rename-without-fsync-safe * https://unix.stackexchange.com/questions/464382/which-filesystems-require-fsync-for-crash-safety-when-replacing-an-existing-fi * https://bugzilla.kernel.org/show_bug.cgi?id=15910 Reproducer This issue was reproduced artificially on Linux using the sysrq-trigger functionality to hard-reset the system shortly after a .shadow file was renamed to it's final destination with the following script watching for .shadow.conn.log.gz: #!/bin/bash set -eu dir=/data/logger-01/ # Allow everything via /proc/sysrq-trigger echo "1" > /proc/sys/kernel/sysrq inotifywait -m -e MOVED_TO --format '%e %w%f' "${dir}" | while read -r line; do if echo "${line}" | grep -q '^MOVED_TO .*/.shadow.conn.log.gz$'; then echo "RESET: $line" sleep 4 # Trigger a hard-reset without sync/unmount echo "b" > /proc/sysrq-trigger fi done This quite reliably (4 out of 4 times) yielded a system with zero-length shadow files and a broken logger after it came back online: $ ls -lha /data/logger-01/.shadow.* -rw-r--r-- 1 bro bro 0 Oct 14 02:26 .shadow.conn.log.gz -rw-r--r-- 1 bro bro 0 Oct 14 02:26 .shadow.dns.log.gz -rw-r--r-- 1 bro bro 0 Oct 14 02:26 .shadow.files.log.gz After this change while running the reproducer, the shadow files always contained content after a hard-reset. Rework with util::safe_fsync helper --- src/logging/writers/ascii/Ascii.cc | 4 +++- src/util.cc | 18 ++++++++++++++++++ src/util.h | 4 ++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/logging/writers/ascii/Ascii.cc b/src/logging/writers/ascii/Ascii.cc index a53464f925..fe39b86c76 100644 --- a/src/logging/writers/ascii/Ascii.cc +++ b/src/logging/writers/ascii/Ascii.cc @@ -465,7 +465,7 @@ bool Ascii::DoInit(const WriterInfo& info, int num_fields, const threading::Fiel if ( sfd < 0 ) { - Error(Fmt("cannot open %s: %s", sfname.data(), Strerror(errno))); + Error(Fmt("cannot open %s: %s", tmp_sfname.data(), Strerror(errno))); return false; } @@ -479,6 +479,8 @@ bool Ascii::DoInit(const WriterInfo& info, int num_fields, const threading::Fiel util::safe_write(sfd, "\n", 1); + util::safe_fsync(sfd); + util::safe_close(sfd); if ( rename(tmp_sfname.data(), sfname.data()) == -1 ) diff --git a/src/util.cc b/src/util.cc index cfa612a864..51527303c0 100644 --- a/src/util.cc +++ b/src/util.cc @@ -2162,6 +2162,24 @@ bool safe_pwrite(int fd, const unsigned char* data, size_t len, size_t offset) return true; } +void safe_fsync(int fd) + { + int r; + + do + { + r = fsync(fd); + } while ( r < 0 && errno == EINTR ); + + if ( r < 0 ) + { + char buf[128]; + zeek_strerror_r(errno, buf, sizeof(buf)); + fprintf(stderr, "safe_fsync error %d: %s\n", errno, buf); + abort(); + } + } + void safe_close(int fd) { /* diff --git a/src/util.h b/src/util.h index fa07f585a7..851191246a 100644 --- a/src/util.h +++ b/src/util.h @@ -461,6 +461,10 @@ extern bool safe_write(int fd, const char* data, int len); // Same as safe_write(), but for pwrite(). extern bool safe_pwrite(int fd, const unsigned char* data, size_t len, size_t offset); +// Like fsync() but handles interrupted system calls by retrying and +// aborts on unrecoverable errors. +extern void safe_fsync(int fd); + // Wraps close(2) to emit error messages and abort on unrecoverable errors. extern void safe_close(int fd);