From 02d0be90aa6fde5ee0832f69324367124b5bdec0 Mon Sep 17 00:00:00 2001 From: Craig Leres Date: Wed, 11 May 2022 13:13:31 -0700 Subject: [PATCH 1/9] Implement tail -F semantics for input framework MODE_STREAM Open /dev/null if the file is missing during init and wait for file to be created Collect initial ino, dev, and mtime when first opening the file Detect if the file has been replaced and open the new version --- src/input/readers/raw/Raw.cc | 70 ++++++++++++++++++++++++++++++++---- src/input/readers/raw/Raw.h | 1 + 2 files changed, 64 insertions(+), 7 deletions(-) diff --git a/src/input/readers/raw/Raw.cc b/src/input/readers/raw/Raw.cc index a8ba5df1c8..ee939d2a49 100644 --- a/src/input/readers/raw/Raw.cc +++ b/src/input/readers/raw/Raw.cc @@ -36,6 +36,7 @@ Raw::Raw(ReaderFrontend* frontend) firstrun = true; mtime = 0; ino = 0; + dev = 0; forcekill = false; offset = 0; separator.assign((const char*)BifConst::InputRaw::record_separator->Bytes(), @@ -280,12 +281,30 @@ bool Raw::OpenInput() else { file = std::unique_ptr(fopen(fname.c_str(), "r"), fclose); + if ( ! file && Info().mode == MODE_STREAM ) + { + // Watch /dev/null until the file appears + file = std::unique_ptr(fopen("/dev/null", "r"), fclose); + } + if ( ! file ) { Error(Fmt("Init: cannot open %s", fname.c_str())); return false; } + struct stat sb; + if ( fstat(fileno(file.get()), &sb) == -1 ) + { + Error(Fmt("Could not get fstat for %s", fname.c_str())); + } + else + { + mtime = sb.st_mtime; + ino = sb.st_ino; + dev = sb.st_dev; + } + if ( ! SetFDFlags(fileno(file.get()), F_SETFD, FD_CLOEXEC) ) Warning(Fmt("Init: cannot set close-on-exec for %s", fname.c_str())); } @@ -346,6 +365,7 @@ bool Raw::DoInit(const ReaderInfo& info, int num_fields, const Field* const* fie fname = info.source; mtime = 0; ino = 0; + dev = 0; execute = false; firstrun = true; int want_fields = 1; @@ -574,25 +594,61 @@ bool Raw::DoUpdate() mtime = sb.st_mtime; ino = sb.st_ino; + dev = sb.st_dev; // file changed. reread. // // fallthrough } case MODE_MANUAL: - case MODE_STREAM: - if ( Info().mode == MODE_STREAM && file ) - { - clearerr(file.get()); // remove end of file evil bits - break; - } - CloseInput(); if ( ! OpenInput() ) return false; break; + case MODE_STREAM: + // mode may not be used to execute child programs + assert(childpid == -1); + + // Clear possible EOF condition + if ( file ) + clearerr(file.get()); + + // check if the file has changed + struct stat sb; + if ( stat(fname.c_str(), &sb) == -1 ) + { + // File was removed + break; + } + + // Is it the same file? + if ( sb.st_ino == ino && sb.st_dev == dev ) + { + break; + } + + // File was replaced + FILE *tfile; + tfile = fopen(fname.c_str(), "r"); + if ( ! tfile ) + break; + + // stat newly opened file + if ( fstat(fileno(tfile), &sb) == -1 ) + { + Error(Fmt("Could not fstat %s", fname.c_str())); + break; + } + file.reset(nullptr); + file = std::unique_ptr(tfile, fclose); + ino = sb.st_ino; + dev = sb.st_dev; + offset = 0; + bufpos = 0; + break; + default: assert(false); } diff --git a/src/input/readers/raw/Raw.h b/src/input/readers/raw/Raw.h index 5e36848e26..7ebb334301 100644 --- a/src/input/readers/raw/Raw.h +++ b/src/input/readers/raw/Raw.h @@ -55,6 +55,7 @@ private: bool firstrun; time_t mtime; ino_t ino; + dev_t dev; // options set from the script-level. std::string separator; From 3b023c4fe3587459b84d680a77316a829b35aa89 Mon Sep 17 00:00:00 2001 From: Craig Leres Date: Wed, 11 May 2022 16:19:31 -0700 Subject: [PATCH 2/9] Only set mtime and ino in Raw::OpenInput() do this for MODE_STREAM and avoid breaking MODE_REREAD --- src/input/readers/raw/Raw.cc | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/input/readers/raw/Raw.cc b/src/input/readers/raw/Raw.cc index ee939d2a49..baddf7c629 100644 --- a/src/input/readers/raw/Raw.cc +++ b/src/input/readers/raw/Raw.cc @@ -293,16 +293,18 @@ bool Raw::OpenInput() return false; } - struct stat sb; - if ( fstat(fileno(file.get()), &sb) == -1 ) + if ( Info().mode == MODE_STREAM ) { - Error(Fmt("Could not get fstat for %s", fname.c_str())); - } - else - { - mtime = sb.st_mtime; - ino = sb.st_ino; - dev = sb.st_dev; + struct stat sb; + if ( fstat(fileno(file.get()), &sb) == -1 ) + { + Error(Fmt("Could not get fstat for %s", fname.c_str())); + } + else + { + ino = sb.st_ino; + dev = sb.st_dev; + } } if ( ! SetFDFlags(fileno(file.get()), F_SETFD, FD_CLOEXEC) ) From fe31e515b7301250f9c8c31749ce57c685d3e059 Mon Sep 17 00:00:00 2001 From: Craig Leres Date: Wed, 11 May 2022 13:13:31 -0700 Subject: [PATCH 3/9] Implement tail -F semantics for input framework MODE_STREAM Open /dev/null if the file is missing during init and wait for file to be created Collect initial ino, dev, and mtime when first opening the file Detect if the file has been replaced and open the new version --- src/input/readers/raw/Raw.cc | 70 ++++++++++++++++++++++++++++++++---- src/input/readers/raw/Raw.h | 1 + 2 files changed, 64 insertions(+), 7 deletions(-) diff --git a/src/input/readers/raw/Raw.cc b/src/input/readers/raw/Raw.cc index a8ba5df1c8..ee939d2a49 100644 --- a/src/input/readers/raw/Raw.cc +++ b/src/input/readers/raw/Raw.cc @@ -36,6 +36,7 @@ Raw::Raw(ReaderFrontend* frontend) firstrun = true; mtime = 0; ino = 0; + dev = 0; forcekill = false; offset = 0; separator.assign((const char*)BifConst::InputRaw::record_separator->Bytes(), @@ -280,12 +281,30 @@ bool Raw::OpenInput() else { file = std::unique_ptr(fopen(fname.c_str(), "r"), fclose); + if ( ! file && Info().mode == MODE_STREAM ) + { + // Watch /dev/null until the file appears + file = std::unique_ptr(fopen("/dev/null", "r"), fclose); + } + if ( ! file ) { Error(Fmt("Init: cannot open %s", fname.c_str())); return false; } + struct stat sb; + if ( fstat(fileno(file.get()), &sb) == -1 ) + { + Error(Fmt("Could not get fstat for %s", fname.c_str())); + } + else + { + mtime = sb.st_mtime; + ino = sb.st_ino; + dev = sb.st_dev; + } + if ( ! SetFDFlags(fileno(file.get()), F_SETFD, FD_CLOEXEC) ) Warning(Fmt("Init: cannot set close-on-exec for %s", fname.c_str())); } @@ -346,6 +365,7 @@ bool Raw::DoInit(const ReaderInfo& info, int num_fields, const Field* const* fie fname = info.source; mtime = 0; ino = 0; + dev = 0; execute = false; firstrun = true; int want_fields = 1; @@ -574,25 +594,61 @@ bool Raw::DoUpdate() mtime = sb.st_mtime; ino = sb.st_ino; + dev = sb.st_dev; // file changed. reread. // // fallthrough } case MODE_MANUAL: - case MODE_STREAM: - if ( Info().mode == MODE_STREAM && file ) - { - clearerr(file.get()); // remove end of file evil bits - break; - } - CloseInput(); if ( ! OpenInput() ) return false; break; + case MODE_STREAM: + // mode may not be used to execute child programs + assert(childpid == -1); + + // Clear possible EOF condition + if ( file ) + clearerr(file.get()); + + // check if the file has changed + struct stat sb; + if ( stat(fname.c_str(), &sb) == -1 ) + { + // File was removed + break; + } + + // Is it the same file? + if ( sb.st_ino == ino && sb.st_dev == dev ) + { + break; + } + + // File was replaced + FILE *tfile; + tfile = fopen(fname.c_str(), "r"); + if ( ! tfile ) + break; + + // stat newly opened file + if ( fstat(fileno(tfile), &sb) == -1 ) + { + Error(Fmt("Could not fstat %s", fname.c_str())); + break; + } + file.reset(nullptr); + file = std::unique_ptr(tfile, fclose); + ino = sb.st_ino; + dev = sb.st_dev; + offset = 0; + bufpos = 0; + break; + default: assert(false); } diff --git a/src/input/readers/raw/Raw.h b/src/input/readers/raw/Raw.h index 5e36848e26..7ebb334301 100644 --- a/src/input/readers/raw/Raw.h +++ b/src/input/readers/raw/Raw.h @@ -55,6 +55,7 @@ private: bool firstrun; time_t mtime; ino_t ino; + dev_t dev; // options set from the script-level. std::string separator; From e5c0cd4658ea0f851668d4b0e23c57a6a15d61b4 Mon Sep 17 00:00:00 2001 From: Craig Leres Date: Wed, 11 May 2022 16:19:31 -0700 Subject: [PATCH 4/9] Only set mtime and ino in Raw::OpenInput() do this for MODE_STREAM and avoid breaking MODE_REREAD --- src/input/readers/raw/Raw.cc | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/input/readers/raw/Raw.cc b/src/input/readers/raw/Raw.cc index ee939d2a49..baddf7c629 100644 --- a/src/input/readers/raw/Raw.cc +++ b/src/input/readers/raw/Raw.cc @@ -293,16 +293,18 @@ bool Raw::OpenInput() return false; } - struct stat sb; - if ( fstat(fileno(file.get()), &sb) == -1 ) + if ( Info().mode == MODE_STREAM ) { - Error(Fmt("Could not get fstat for %s", fname.c_str())); - } - else - { - mtime = sb.st_mtime; - ino = sb.st_ino; - dev = sb.st_dev; + struct stat sb; + if ( fstat(fileno(file.get()), &sb) == -1 ) + { + Error(Fmt("Could not get fstat for %s", fname.c_str())); + } + else + { + ino = sb.st_ino; + dev = sb.st_dev; + } } if ( ! SetFDFlags(fileno(file.get()), F_SETFD, FD_CLOEXEC) ) From 445e66b4ef6ee28cd24d78e7e2ae0cf035af8c16 Mon Sep 17 00:00:00 2001 From: Craig Leres Date: Tue, 17 May 2022 23:34:59 -0700 Subject: [PATCH 5/9] Conform to style police --- src/input/readers/raw/Raw.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/input/readers/raw/Raw.cc b/src/input/readers/raw/Raw.cc index baddf7c629..a1be46bf9f 100644 --- a/src/input/readers/raw/Raw.cc +++ b/src/input/readers/raw/Raw.cc @@ -632,7 +632,7 @@ bool Raw::DoUpdate() } // File was replaced - FILE *tfile; + FILE* tfile; tfile = fopen(fname.c_str(), "r"); if ( ! tfile ) break; From 53ab44c09899757127a288e34ae1b9faac94e329 Mon Sep 17 00:00:00 2001 From: Craig Leres Date: Sat, 4 Jun 2022 10:20:38 -0700 Subject: [PATCH 6/9] Remove child program check, it's probably wrong given the test failures it causes --- src/input/readers/raw/Raw.cc | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/input/readers/raw/Raw.cc b/src/input/readers/raw/Raw.cc index a1be46bf9f..48594d0c57 100644 --- a/src/input/readers/raw/Raw.cc +++ b/src/input/readers/raw/Raw.cc @@ -610,9 +610,6 @@ bool Raw::DoUpdate() break; case MODE_STREAM: - // mode may not be used to execute child programs - assert(childpid == -1); - // Clear possible EOF condition if ( file ) clearerr(file.get()); @@ -620,16 +617,12 @@ bool Raw::DoUpdate() // check if the file has changed struct stat sb; if ( stat(fname.c_str(), &sb) == -1 ) - { // File was removed break; - } // Is it the same file? if ( sb.st_ino == ino && sb.st_dev == dev ) - { break; - } // File was replaced FILE* tfile; From eb772d0d52d906cdc2b3bdefa6962735af8ead4e Mon Sep 17 00:00:00 2001 From: Craig Leres Date: Sun, 5 Jun 2022 13:56:28 -0700 Subject: [PATCH 7/9] Tweak some new comments --- src/input/readers/raw/Raw.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/input/readers/raw/Raw.cc b/src/input/readers/raw/Raw.cc index 48594d0c57..408ea9f50e 100644 --- a/src/input/readers/raw/Raw.cc +++ b/src/input/readers/raw/Raw.cc @@ -614,7 +614,7 @@ bool Raw::DoUpdate() if ( file ) clearerr(file.get()); - // check if the file has changed + // Check if the file has changed struct stat sb; if ( stat(fname.c_str(), &sb) == -1 ) // File was removed @@ -630,7 +630,7 @@ bool Raw::DoUpdate() if ( ! tfile ) break; - // stat newly opened file + // Stat newly opened file if ( fstat(fileno(tfile), &sb) == -1 ) { Error(Fmt("Could not fstat %s", fname.c_str())); From c765dce5f6ae934b957814de7a2086ee8e8a19c9 Mon Sep 17 00:00:00 2001 From: Craig Leres Date: Fri, 1 Jul 2022 10:03:15 -0700 Subject: [PATCH 8/9] Address concerns raised by @0xxon; avoid the new code path when reading from a pipe and return false if fstat() fails after sucessfully opening the file (unlikely). --- src/input/readers/raw/Raw.cc | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/input/readers/raw/Raw.cc b/src/input/readers/raw/Raw.cc index 2d0a0164a2..e37dadd439 100644 --- a/src/input/readers/raw/Raw.cc +++ b/src/input/readers/raw/Raw.cc @@ -614,6 +614,10 @@ bool Raw::DoUpdate() if ( file ) clearerr(file.get()); + // Done if reading from a pipe + if ( execute ) + break; + // Check if the file has changed struct stat sb; if ( stat(fname.c_str(), &sb) == -1 ) @@ -633,8 +637,9 @@ bool Raw::DoUpdate() // Stat newly opened file if ( fstat(fileno(tfile), &sb) == -1 ) { + // This is unlikely to fail Error(Fmt("Could not fstat %s", fname.c_str())); - break; + return false; } file.reset(nullptr); file = std::unique_ptr(tfile, fclose); From 6b52c5b2f913a76bc09b54af36745cd923a5a9c3 Mon Sep 17 00:00:00 2001 From: Craig Leres Date: Fri, 1 Jul 2022 13:23:06 -0700 Subject: [PATCH 9/9] Return false on error from the other place we call fstat() --- src/input/readers/raw/Raw.cc | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/input/readers/raw/Raw.cc b/src/input/readers/raw/Raw.cc index e37dadd439..30c6d3df8f 100644 --- a/src/input/readers/raw/Raw.cc +++ b/src/input/readers/raw/Raw.cc @@ -298,13 +298,12 @@ bool Raw::OpenInput() struct stat sb; if ( fstat(fileno(file.get()), &sb) == -1 ) { + // This is unlikely to fail Error(Fmt("Could not get fstat for %s", fname.c_str())); + return false; } - else - { - ino = sb.st_ino; - dev = sb.st_dev; - } + ino = sb.st_ino; + dev = sb.st_dev; } if ( ! SetFDFlags(fileno(file.get()), F_SETFD, FD_CLOEXEC) )