From 320b14ff09d501dad94c9771b1e35f65f5cab9d1 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Tue, 7 Jul 2020 17:29:52 -0700 Subject: [PATCH] GH-928: use realpath() instead of inode to de-duplicate scripts Duplicate script `@load` directives are now detected by comparing against canonical paths formed by realpath(). This fixes the previous, unexpected behavior of treating scripts that hardlink to same inode as duplicates: such links will now be loaded as distinct scripts since their canonical path differs. --- src/Net.h | 23 ++-- src/scan.l | 101 +++++++++--------- .../Baseline/core.load-duplicates-links/out | 3 + testing/btest/core/load-duplicates-links.zeek | 20 ++++ 4 files changed, 83 insertions(+), 64 deletions(-) create mode 100644 testing/btest/Baseline/core.load-duplicates-links/out create mode 100644 testing/btest/core/load-duplicates-links.zeek diff --git a/src/Net.h b/src/Net.h index ad9871063a..8af1659f9d 100644 --- a/src/Net.h +++ b/src/Net.h @@ -86,24 +86,23 @@ extern iosource::IOSource* current_iosrc; extern iosource::PktDumper* pkt_dumper; // where to save packets // Script file we have already scanned (or are in the process of scanning). -// They are identified by device and inode number. +// They are identified by normalized realpath. struct ScannedFile { - dev_t dev; - ino_t inode; int include_level; bool skipped; // This ScannedFile was @unload'd. bool prefixes_checked; // If loading prefixes for this file has been tried. std::string name; + std::string canonical_path; // normalized, absolute path via realpath() - ScannedFile(dev_t arg_dev, ino_t arg_inode, int arg_include_level, - const std::string& arg_name, bool arg_skipped = false, - bool arg_prefixes_checked = false) - : dev(arg_dev), inode(arg_inode), - include_level(arg_include_level), - skipped(arg_skipped), - prefixes_checked(arg_prefixes_checked), - name(arg_name) - { } + ScannedFile(int arg_include_level, + std::string arg_name, bool arg_skipped = false, + bool arg_prefixes_checked = false); + + /** + * Compares the canonical path of this file against every canonical path + * in files_scanned and returns whether there's any match. + */ + bool AlreadyScanned() const; }; extern std::list files_scanned; diff --git a/src/scan.l b/src/scan.l index ac34069215..3a253212c9 100644 --- a/src/scan.l +++ b/src/scan.l @@ -3,6 +3,9 @@ #include +#include +#include + #include #include #include @@ -39,13 +42,6 @@ using namespace zeek::detail; -namespace { -struct ZeekINode { - dev_t dev; - ino_t ino; -}; -} - extern YYLTYPE yylloc; // holds start line and column of token extern zeek::EnumType* cur_enum_type; @@ -101,30 +97,6 @@ static std::string find_relative_script_file(const std::string& filename) return find_script_file(filename, bro_path()); } -static ZeekINode get_inode(FILE* f, const std::string& path) - { - struct stat b; - - if ( fstat(fileno(f), &b) ) - reporter->FatalError("fstat of %s failed: %s\n", path.c_str(), - strerror(errno)); - - return {b.st_dev, b.st_ino}; - } - -static ZeekINode get_inode(const std::string& path) - { - FILE* f = open_file(path); - - if ( ! f ) - reporter->FatalError("failed to open %s\n", path.c_str()); - - auto inode = get_inode(f, path); - fclose(f); - - return inode; - } - class FileInfo { public: FileInfo(std::string restore_module = ""); @@ -430,9 +402,8 @@ when return TOK_WHEN; else { // All we have to do is pretend we've already scanned it. - auto i = get_inode(path); - ScannedFile sf(i.dev, i.ino, file_stack.length(), path, true); - files_scanned.push_back(sf); + ScannedFile sf(file_stack.length(), std::move(path), true); + files_scanned.push_back(std::move(sf)); } } @@ -598,19 +569,7 @@ YYLTYPE zeek::detail::GetCurrentLocation() return currloc; } -static bool already_scanned(ZeekINode in) - { - for ( const auto& it : files_scanned ) - if ( it.dev == in.dev && it.inode == in.ino ) - return true; - - return false; - } - -static bool already_scanned(const std::string& path) - { - return already_scanned(get_inode(path)); - } +static auto constexpr canonical_stdin_path = ""; static int load_files(const char* orig_file) { @@ -641,7 +600,7 @@ static int load_files(const char* orig_file) if ( streq(orig_file, "-") ) { f = stdin; - file_path = ""; + file_path = canonical_stdin_path; if ( g_policy_debug ) { @@ -664,9 +623,9 @@ static int load_files(const char* orig_file) reporter->FatalError("can't open %s", file_path.c_str()); } - auto i = get_inode(f, file_path); + ScannedFile sf(file_stack.length(), file_path); - if ( already_scanned(i) ) + if ( sf.AlreadyScanned() ) { if ( f != stdin ) fclose(f); @@ -674,8 +633,7 @@ static int load_files(const char* orig_file) return 0; } - ScannedFile sf(i.dev, i.ino, file_stack.length(), file_path); - files_scanned.push_back(sf); + files_scanned.push_back(std::move(sf)); if ( g_policy_debug && ! file_path.empty() ) { @@ -1079,3 +1037,42 @@ FileInfo::~FileInfo() if ( restore_module != "" ) zeek::detail::current_module = restore_module; } + +ScannedFile::ScannedFile(int arg_include_level, + std::string arg_name, + bool arg_skipped, + bool arg_prefixes_checked) + : include_level(arg_include_level), + skipped(arg_skipped), + prefixes_checked(arg_prefixes_checked), + name(std::move(arg_name)) + { + if ( name == canonical_stdin_path ) + canonical_path = canonical_stdin_path; + else + { + char buf[PATH_MAX]; + auto res = realpath(name.data(), buf); + + if ( ! res ) + reporter->FatalError("failed to get realpath() of %s: %s", + name.data(), strerror(errno)); + + canonical_path = res; + } + } + +bool ScannedFile::AlreadyScanned() const + { + auto rval = false; + + for ( const auto& it : files_scanned ) + if ( it.canonical_path == canonical_path ) + { + rval = true; + break; + } + + DBG_LOG(DBG_SCRIPTS, "AlreadyScanned result (%d) %s", rval, canonical_path.data()); + return rval; + } diff --git a/testing/btest/Baseline/core.load-duplicates-links/out b/testing/btest/Baseline/core.load-duplicates-links/out new file mode 100644 index 0000000000..86e041dad6 --- /dev/null +++ b/testing/btest/Baseline/core.load-duplicates-links/out @@ -0,0 +1,3 @@ +foo +bar +baz diff --git a/testing/btest/core/load-duplicates-links.zeek b/testing/btest/core/load-duplicates-links.zeek new file mode 100644 index 0000000000..623403bbad --- /dev/null +++ b/testing/btest/core/load-duplicates-links.zeek @@ -0,0 +1,20 @@ +# This tests Zeek's mechanism to prevent duplicate script loading on links. +# +# @TEST-EXEC: mkdir foo +# @TEST-EXEC: mkdir bar +# @TEST-EXEC: mkdir baz + +# @TEST-EXEC: echo 'event zeek_init() &priority=3 { print "foo"; }' >foo/main.zeek +# @TEST-EXEC: echo 'event zeek_init() &priority=2 { print "bar"; }' >bar/main.zeek +# @TEST-EXEC: echo 'event zeek_init() &priority=1 { print "baz"; }' >baz/main.zeek + +# @TEST-EXEC: echo "@load ./main" >common-load.zeek +# @TEST-EXEC: ln common-load.zeek foo/__load__.zeek +# @TEST-EXEC: ln common-load.zeek bar/__load__.zeek + +# @TEST-EXEC: echo "@load ./main" >baz/__load__.zeek +# @TEST-EXEC: echo "@load ./main-sym" >>baz/__load__.zeek +# @TEST-EXEC: (cd baz && ln -s main.zeek main-sym.zeek) + +# @TEST-EXEC: zeek -b foo bar baz foo/../foo bar/../bar baz/../baz $(pwd)/foo $(pwd)/bar $(pwd)/baz >out +# @TEST-EXEC: btest-diff out