From f41f392743f8e2724d16f4a1f4e6b73c5654b2e8 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Fri, 24 Aug 2018 12:46:31 -0500 Subject: [PATCH 1/2] Improve input framework re-read logic Changed from checking for "has newer modification time" to "has different modification time or inode number". --- src/input/readers/ascii/Ascii.cc | 5 ++++- src/input/readers/ascii/Ascii.h | 2 ++ src/input/readers/binary/Binary.cc | 6 ++++-- src/input/readers/binary/Binary.h | 2 ++ src/input/readers/config/Config.cc | 5 ++++- src/input/readers/config/Config.h | 2 ++ src/input/readers/raw/Raw.cc | 5 ++++- src/input/readers/raw/Raw.h | 2 ++ .../base/frameworks/input/empty-values-hashing.bro | 4 ++-- .../base/frameworks/input/predicatemodifyandreread.bro | 10 +++++----- testing/btest/scripts/base/frameworks/input/reread.bro | 10 +++++----- .../btest/scripts/base/frameworks/input/twotables.bro | 4 ++-- 12 files changed, 38 insertions(+), 19 deletions(-) diff --git a/src/input/readers/ascii/Ascii.cc b/src/input/readers/ascii/Ascii.cc index d9120b91ae..30eff2669d 100644 --- a/src/input/readers/ascii/Ascii.cc +++ b/src/input/readers/ascii/Ascii.cc @@ -49,6 +49,7 @@ FieldMapping FieldMapping::subType() Ascii::Ascii(ReaderFrontend *frontend) : ReaderBackend(frontend) { mtime = 0; + ino = 0; suppress_warnings = false; fail_on_file_problem = false; fail_on_invalid_lines = false; @@ -281,10 +282,12 @@ bool Ascii::DoUpdate() return ! fail_on_file_problem; } - if ( sb.st_mtime <= mtime ) // no change + if ( sb.st_ino == ino && sb.st_mtime == mtime ) + // no change return true; mtime = sb.st_mtime; + ino = sb.st_ino; // file changed. reread. // fallthrough diff --git a/src/input/readers/ascii/Ascii.h b/src/input/readers/ascii/Ascii.h index 130fc25075..4fd236761e 100644 --- a/src/input/readers/ascii/Ascii.h +++ b/src/input/readers/ascii/Ascii.h @@ -7,6 +7,7 @@ #include #include #include +#include #include "input/ReaderBackend.h" #include "threading/formatters/Ascii.h" @@ -63,6 +64,7 @@ private: ifstream file; time_t mtime; + ino_t ino; // map columns in the file to columns to send back to the manager vector columnMap; diff --git a/src/input/readers/binary/Binary.cc b/src/input/readers/binary/Binary.cc index 560a80f9a0..5ae3cc7c15 100644 --- a/src/input/readers/binary/Binary.cc +++ b/src/input/readers/binary/Binary.cc @@ -14,7 +14,7 @@ using threading::Field; streamsize Binary::chunk_size = 0; Binary::Binary(ReaderFrontend *frontend) - : ReaderBackend(frontend), in(0), mtime(0), firstrun(true) + : ReaderBackend(frontend), in(0), mtime(0), ino(0), firstrun(true) { if ( ! chunk_size ) { @@ -78,6 +78,7 @@ bool Binary::DoInit(const ReaderInfo& info, int num_fields, { in = 0; mtime = 0; + ino = 0; firstrun = true; if ( ! info.source || strlen(info.source) == 0 ) @@ -160,11 +161,12 @@ int Binary::UpdateModificationTime() return -1; } - if ( sb.st_mtime <= mtime ) + if ( sb.st_ino == ino && sb.st_mtime == mtime ) // no change return 0; mtime = sb.st_mtime; + ino = sb.st_ino; return 1; } diff --git a/src/input/readers/binary/Binary.h b/src/input/readers/binary/Binary.h index 6fd5a3b001..796640bfb4 100644 --- a/src/input/readers/binary/Binary.h +++ b/src/input/readers/binary/Binary.h @@ -4,6 +4,7 @@ #define INPUT_READERS_BINARY_H #include +#include #include "input/ReaderBackend.h" @@ -36,6 +37,7 @@ private: string fname; ifstream* in; time_t mtime; + ino_t ino; bool firstrun; // options set from the script-level. diff --git a/src/input/readers/config/Config.cc b/src/input/readers/config/Config.cc index e50b468a36..eca276281c 100644 --- a/src/input/readers/config/Config.cc +++ b/src/input/readers/config/Config.cc @@ -23,6 +23,7 @@ using threading::Field; Config::Config(ReaderFrontend *frontend) : ReaderBackend(frontend) { mtime = 0; + ino = 0; suppress_warnings = false; fail_on_file_problem = false; @@ -146,10 +147,12 @@ bool Config::DoUpdate() return ! fail_on_file_problem; } - if ( sb.st_mtime <= mtime ) // no change + if ( sb.st_ino == ino && sb.st_mtime == mtime ) + // no change return true; mtime = sb.st_mtime; + ino = sb.st_ino; // file changed. reread. // fallthrough diff --git a/src/input/readers/config/Config.h b/src/input/readers/config/Config.h index b0dc83f725..336d72a989 100644 --- a/src/input/readers/config/Config.h +++ b/src/input/readers/config/Config.h @@ -8,6 +8,7 @@ #include #include #include +#include #include "input/ReaderBackend.h" #include "threading/formatters/Ascii.h" @@ -46,6 +47,7 @@ private: ifstream file; time_t mtime; + ino_t ino; bool fail_on_file_problem; // this is an internal indicator in case the read is currently in a failed state diff --git a/src/input/readers/raw/Raw.cc b/src/input/readers/raw/Raw.cc index 27d8b0c685..e41e4e77ac 100644 --- a/src/input/readers/raw/Raw.cc +++ b/src/input/readers/raw/Raw.cc @@ -31,6 +31,7 @@ Raw::Raw(ReaderFrontend *frontend) : ReaderBackend(frontend), file(nullptr, fclo execute = false; firstrun = true; mtime = 0; + ino = 0; forcekill = false; offset = 0; separator.assign( (const char*) BifConst::InputRaw::record_separator->Bytes(), @@ -341,6 +342,7 @@ bool Raw::DoInit(const ReaderInfo& info, int num_fields, const Field* const* fie fname = info.source; mtime = 0; + ino = 0; execute = false; firstrun = true; int want_fields = 1; @@ -553,11 +555,12 @@ bool Raw::DoUpdate() return false; } - if ( sb.st_mtime <= mtime ) + if ( sb.st_ino == ino && sb.st_mtime == mtime ) // no change return true; mtime = sb.st_mtime; + ino = sb.st_ino; // file changed. reread. // // fallthrough diff --git a/src/input/readers/raw/Raw.h b/src/input/readers/raw/Raw.h index c6075cbe70..2b9d4edcb4 100644 --- a/src/input/readers/raw/Raw.h +++ b/src/input/readers/raw/Raw.h @@ -6,6 +6,7 @@ #include #include #include +#include #include "input/ReaderBackend.h" @@ -51,6 +52,7 @@ private: bool execute; bool firstrun; time_t mtime; + ino_t ino; // options set from the script-level. string separator; diff --git a/testing/btest/scripts/base/frameworks/input/empty-values-hashing.bro b/testing/btest/scripts/base/frameworks/input/empty-values-hashing.bro index f25c9bc3f6..b46c299c2c 100644 --- a/testing/btest/scripts/base/frameworks/input/empty-values-hashing.bro +++ b/testing/btest/scripts/base/frameworks/input/empty-values-hashing.bro @@ -1,7 +1,7 @@ -# @TEST-EXEC: cp input1.log input.log +# @TEST-EXEC: mv input1.log input.log # @TEST-EXEC: btest-bg-run bro bro -b %INPUT # @TEST-EXEC: $SCRIPTS/wait-for-file bro/got1 5 || (btest-bg-wait -k 1 && false) -# @TEST-EXEC: cp input2.log input.log +# @TEST-EXEC: mv input2.log input.log # @TEST-EXEC: btest-bg-wait 10 # @TEST-EXEC: btest-diff out diff --git a/testing/btest/scripts/base/frameworks/input/predicatemodifyandreread.bro b/testing/btest/scripts/base/frameworks/input/predicatemodifyandreread.bro index 0ac5f104d0..2c6b58ff2d 100644 --- a/testing/btest/scripts/base/frameworks/input/predicatemodifyandreread.bro +++ b/testing/btest/scripts/base/frameworks/input/predicatemodifyandreread.bro @@ -1,13 +1,13 @@ -# @TEST-EXEC: cp input1.log input.log +# @TEST-EXEC: mv input1.log input.log # @TEST-EXEC: btest-bg-run bro bro -b %INPUT # @TEST-EXEC: $SCRIPTS/wait-for-file bro/got1 5 || (btest-bg-wait -k 1 && false) -# @TEST-EXEC: cp input2.log input.log +# @TEST-EXEC: mv input2.log input.log # @TEST-EXEC: $SCRIPTS/wait-for-file bro/got2 5 || (btest-bg-wait -k 1 && false) -# @TEST-EXEC: cp input3.log input.log +# @TEST-EXEC: mv input3.log input.log # @TEST-EXEC: $SCRIPTS/wait-for-file bro/got3 5 || (btest-bg-wait -k 1 && false) -# @TEST-EXEC: cp input4.log input.log +# @TEST-EXEC: mv input4.log input.log # @TEST-EXEC: $SCRIPTS/wait-for-file bro/got4 5 || (btest-bg-wait -k 1 && false) -# @TEST-EXEC: cp input5.log input.log +# @TEST-EXEC: mv input5.log input.log # @TEST-EXEC: btest-bg-wait 10 # @TEST-EXEC: btest-diff out # diff --git a/testing/btest/scripts/base/frameworks/input/reread.bro b/testing/btest/scripts/base/frameworks/input/reread.bro index e4bb09df39..53cb2a91a8 100644 --- a/testing/btest/scripts/base/frameworks/input/reread.bro +++ b/testing/btest/scripts/base/frameworks/input/reread.bro @@ -1,13 +1,13 @@ -# @TEST-EXEC: cp input1.log input.log +# @TEST-EXEC: mv input1.log input.log # @TEST-EXEC: btest-bg-run bro bro -b %INPUT # @TEST-EXEC: $SCRIPTS/wait-for-file bro/got1 5 || (btest-bg-wait -k 1 && false) -# @TEST-EXEC: cp input2.log input.log +# @TEST-EXEC: mv input2.log input.log # @TEST-EXEC: $SCRIPTS/wait-for-file bro/got2 5 || (btest-bg-wait -k 1 && false) -# @TEST-EXEC: cp input3.log input.log +# @TEST-EXEC: mv input3.log input.log # @TEST-EXEC: $SCRIPTS/wait-for-file bro/got3 5 || (btest-bg-wait -k 1 && false) -# @TEST-EXEC: cp input4.log input.log +# @TEST-EXEC: mv input4.log input.log # @TEST-EXEC: $SCRIPTS/wait-for-file bro/got4 5 || (btest-bg-wait -k 1 && false) -# @TEST-EXEC: cp input5.log input.log +# @TEST-EXEC: mv input5.log input.log # @TEST-EXEC: btest-bg-wait 10 # @TEST-EXEC: btest-diff out diff --git a/testing/btest/scripts/base/frameworks/input/twotables.bro b/testing/btest/scripts/base/frameworks/input/twotables.bro index 6f127ac4c2..5b6d833da3 100644 --- a/testing/btest/scripts/base/frameworks/input/twotables.bro +++ b/testing/btest/scripts/base/frameworks/input/twotables.bro @@ -1,7 +1,7 @@ -# @TEST-EXEC: cp input1.log input.log +# @TEST-EXEC: mv input1.log input.log # @TEST-EXEC: btest-bg-run bro bro -b %INPUT # @TEST-EXEC: $SCRIPTS/wait-for-file bro/got2 5 || (btest-bg-wait -k 1 && false) -# @TEST-EXEC: cp input3.log input.log +# @TEST-EXEC: mv input3.log input.log # @TEST-EXEC: btest-bg-wait 10 # @TEST-EXEC: btest-diff event.out # @TEST-EXEC: btest-diff pred1.out From 9bfc01b70527f4c7dede94a6c81282ea855b5440 Mon Sep 17 00:00:00 2001 From: Daniel Thayer Date: Mon, 27 Aug 2018 19:38:47 -0500 Subject: [PATCH 2/2] Convert more redef-able constants to runtime options --- scripts/base/files/extract/main.bro | 2 +- scripts/base/frameworks/input/main.bro | 4 ++-- scripts/base/frameworks/netcontrol/types.bro | 2 +- scripts/base/frameworks/notice/main.bro | 2 +- scripts/base/init-bare.bro | 4 ++-- scripts/base/protocols/conn/contents.bro | 2 +- scripts/base/protocols/ftp/info.bro | 2 +- scripts/base/protocols/http/main.bro | 2 +- scripts/base/protocols/socks/main.bro | 2 +- scripts/base/utils/active-http.bro | 4 ++-- scripts/base/utils/dir.bro | 2 +- 11 files changed, 14 insertions(+), 14 deletions(-) diff --git a/scripts/base/files/extract/main.bro b/scripts/base/files/extract/main.bro index 22207000bf..b2d1907e01 100644 --- a/scripts/base/files/extract/main.bro +++ b/scripts/base/files/extract/main.bro @@ -9,7 +9,7 @@ export { ## The default max size for extracted files (they won't exceed this ## number of bytes). A value of zero means unlimited. - const default_limit = 0 &redef; + option default_limit = 0; redef record Files::Info += { ## Local filename of extracted file. diff --git a/scripts/base/frameworks/input/main.bro b/scripts/base/frameworks/input/main.bro index e4b8571ba5..e5676002d4 100644 --- a/scripts/base/frameworks/input/main.bro +++ b/scripts/base/frameworks/input/main.bro @@ -24,10 +24,10 @@ export { }; ## The default input reader used. Defaults to `READER_ASCII`. - const default_reader = READER_ASCII &redef; + option default_reader = READER_ASCII; ## The default reader mode used. Defaults to `MANUAL`. - const default_mode = MANUAL &redef; + option default_mode = MANUAL; ## Separator between fields. ## Please note that the separator has to be exactly one character long. diff --git a/scripts/base/frameworks/netcontrol/types.bro b/scripts/base/frameworks/netcontrol/types.bro index cc42de14e0..7fda65ea6b 100644 --- a/scripts/base/frameworks/netcontrol/types.bro +++ b/scripts/base/frameworks/netcontrol/types.bro @@ -7,7 +7,7 @@ module NetControl; export { ## The default priority that is used when creating rules. - const default_priority: int = +0 &redef; + option default_priority: int = +0; ## The default priority that is used when using the high-level functions to ## push whitelist entries to the backends (:bro:see:`NetControl::whitelist_address` and diff --git a/scripts/base/frameworks/notice/main.bro b/scripts/base/frameworks/notice/main.bro index 1d49f4aa77..f4c3f64b42 100644 --- a/scripts/base/frameworks/notice/main.bro +++ b/scripts/base/frameworks/notice/main.bro @@ -53,7 +53,7 @@ export { ## utilizing the *identifier* field in :bro:type:`Notice::Info` records. ## Set this to "0secs" to completely disable automated notice ## suppression. - const default_suppression_interval = 1hrs &redef; + option default_suppression_interval = 1hrs; ## The record type that is used for representing and logging notices. type Info: record { diff --git a/scripts/base/init-bare.bro b/scripts/base/init-bare.bro index 70d59b30cf..8e51b312b0 100644 --- a/scripts/base/init-bare.bro +++ b/scripts/base/init-bare.bro @@ -383,12 +383,12 @@ type connection: record { ## Default amount of time a file can be inactive before the file analysis ## gives up and discards any internal state related to the file. -const default_file_timeout_interval: interval = 2 mins &redef; +option default_file_timeout_interval: interval = 2 mins; ## Default amount of bytes that file analysis will buffer in order to use ## for mime type matching. File analyzers attached at the time of mime type ## matching or later, will receive a copy of this buffer. -const default_file_bof_buffer_size: count = 4096 &redef; +option default_file_bof_buffer_size: count = 4096; ## A file that Bro is analyzing. This is Bro's type for describing the basic ## internal metadata collected about a "file", which is essentially just a diff --git a/scripts/base/protocols/conn/contents.bro b/scripts/base/protocols/conn/contents.bro index 96da885b20..dbfbbd0dc1 100644 --- a/scripts/base/protocols/conn/contents.bro +++ b/scripts/base/protocols/conn/contents.bro @@ -22,7 +22,7 @@ export { ## If this variable is set to ``T``, then all contents of all ## connections will be extracted. - const default_extract = F &redef; + option default_extract = F; } redef record connection += { diff --git a/scripts/base/protocols/ftp/info.bro b/scripts/base/protocols/ftp/info.bro index a9db7ba6a0..faff6a46ad 100644 --- a/scripts/base/protocols/ftp/info.bro +++ b/scripts/base/protocols/ftp/info.bro @@ -8,7 +8,7 @@ export { ## This setting changes if passwords used in FTP sessions are ## captured or not. - const default_capture_password = F &redef; + option default_capture_password = F; ## The expected endpoints of an FTP data channel. type ExpectedDataChannel: record { diff --git a/scripts/base/protocols/http/main.bro b/scripts/base/protocols/http/main.bro index 78e3a68d0f..5296964520 100644 --- a/scripts/base/protocols/http/main.bro +++ b/scripts/base/protocols/http/main.bro @@ -19,7 +19,7 @@ export { ## This setting changes if passwords used in Basic-Auth are captured or ## not. - const default_capture_password = F &redef; + option default_capture_password = F; ## The record type which contains the fields of the HTTP log. type Info: record { diff --git a/scripts/base/protocols/socks/main.bro b/scripts/base/protocols/socks/main.bro index 5d0ecf693d..341b6bbc84 100644 --- a/scripts/base/protocols/socks/main.bro +++ b/scripts/base/protocols/socks/main.bro @@ -7,7 +7,7 @@ export { redef enum Log::ID += { LOG }; ## Whether passwords are captured or not. - const default_capture_password = F &redef; + option default_capture_password = F; ## The record type which contains the fields of the SOCKS log. type Info: record { diff --git a/scripts/base/utils/active-http.bro b/scripts/base/utils/active-http.bro index de78eeac6d..a6b0f8111c 100644 --- a/scripts/base/utils/active-http.bro +++ b/scripts/base/utils/active-http.bro @@ -7,10 +7,10 @@ module ActiveHTTP; export { ## The default timeout for HTTP requests. - const default_max_time = 1min &redef; + option default_max_time = 1min; ## The default HTTP method/verb to use for requests. - const default_method = "GET" &redef; + option default_method = "GET"; type Response: record { ## Numeric response code from the server. diff --git a/scripts/base/utils/dir.bro b/scripts/base/utils/dir.bro index 3a2da7ffdd..c3598d039d 100644 --- a/scripts/base/utils/dir.bro +++ b/scripts/base/utils/dir.bro @@ -7,7 +7,7 @@ module Dir; export { ## The default interval this module checks for files in directories when ## using the :bro:see:`Dir::monitor` function. - const polling_interval = 30sec &redef; + option polling_interval = 30sec; ## Register a directory to monitor with a callback that is called ## every time a previously unseen file is seen. If a file is deleted