GH-928: use realpath() instead of inode to de-duplicate scripts

Duplicate script `@load` directives are now detected by comparing
against canonical paths formed by realpath().  This fixes the previous,
unexpected behavior of treating scripts that hardlink to same
inode as duplicates: such links will now be loaded as distinct scripts
since their canonical path differs.
This commit is contained in:
Jon Siwek 2020-07-07 17:29:52 -07:00
parent 7f347a3b1e
commit 320b14ff09
4 changed files with 83 additions and 64 deletions

View file

@ -86,24 +86,23 @@ extern iosource::IOSource* current_iosrc;
extern iosource::PktDumper* pkt_dumper; // where to save packets extern iosource::PktDumper* pkt_dumper; // where to save packets
// Script file we have already scanned (or are in the process of scanning). // Script file we have already scanned (or are in the process of scanning).
// They are identified by device and inode number. // They are identified by normalized realpath.
struct ScannedFile { struct ScannedFile {
dev_t dev;
ino_t inode;
int include_level; int include_level;
bool skipped; // This ScannedFile was @unload'd. bool skipped; // This ScannedFile was @unload'd.
bool prefixes_checked; // If loading prefixes for this file has been tried. bool prefixes_checked; // If loading prefixes for this file has been tried.
std::string name; std::string name;
std::string canonical_path; // normalized, absolute path via realpath()
ScannedFile(dev_t arg_dev, ino_t arg_inode, int arg_include_level, ScannedFile(int arg_include_level,
const std::string& arg_name, bool arg_skipped = false, std::string arg_name, bool arg_skipped = false,
bool arg_prefixes_checked = false) bool arg_prefixes_checked = false);
: dev(arg_dev), inode(arg_inode),
include_level(arg_include_level), /**
skipped(arg_skipped), * Compares the canonical path of this file against every canonical path
prefixes_checked(arg_prefixes_checked), * in files_scanned and returns whether there's any match.
name(arg_name) */
{ } bool AlreadyScanned() const;
}; };
extern std::list<ScannedFile> files_scanned; extern std::list<ScannedFile> files_scanned;

View file

@ -3,6 +3,9 @@
#include <errno.h> #include <errno.h>
#include <climits>
#include <cstdlib>
#include <stack> #include <stack>
#include <list> #include <list>
#include <string> #include <string>
@ -39,13 +42,6 @@
using namespace zeek::detail; using namespace zeek::detail;
namespace {
struct ZeekINode {
dev_t dev;
ino_t ino;
};
}
extern YYLTYPE yylloc; // holds start line and column of token extern YYLTYPE yylloc; // holds start line and column of token
extern zeek::EnumType* cur_enum_type; extern zeek::EnumType* cur_enum_type;
@ -101,30 +97,6 @@ static std::string find_relative_script_file(const std::string& filename)
return find_script_file(filename, bro_path()); return find_script_file(filename, bro_path());
} }
static ZeekINode get_inode(FILE* f, const std::string& path)
{
struct stat b;
if ( fstat(fileno(f), &b) )
reporter->FatalError("fstat of %s failed: %s\n", path.c_str(),
strerror(errno));
return {b.st_dev, b.st_ino};
}
static ZeekINode get_inode(const std::string& path)
{
FILE* f = open_file(path);
if ( ! f )
reporter->FatalError("failed to open %s\n", path.c_str());
auto inode = get_inode(f, path);
fclose(f);
return inode;
}
class FileInfo { class FileInfo {
public: public:
FileInfo(std::string restore_module = ""); FileInfo(std::string restore_module = "");
@ -430,9 +402,8 @@ when return TOK_WHEN;
else else
{ {
// All we have to do is pretend we've already scanned it. // All we have to do is pretend we've already scanned it.
auto i = get_inode(path); ScannedFile sf(file_stack.length(), std::move(path), true);
ScannedFile sf(i.dev, i.ino, file_stack.length(), path, true); files_scanned.push_back(std::move(sf));
files_scanned.push_back(sf);
} }
} }
@ -598,19 +569,7 @@ YYLTYPE zeek::detail::GetCurrentLocation()
return currloc; return currloc;
} }
static bool already_scanned(ZeekINode in) static auto constexpr canonical_stdin_path = "<stdin>";
{
for ( const auto& it : files_scanned )
if ( it.dev == in.dev && it.inode == in.ino )
return true;
return false;
}
static bool already_scanned(const std::string& path)
{
return already_scanned(get_inode(path));
}
static int load_files(const char* orig_file) static int load_files(const char* orig_file)
{ {
@ -641,7 +600,7 @@ static int load_files(const char* orig_file)
if ( streq(orig_file, "-") ) if ( streq(orig_file, "-") )
{ {
f = stdin; f = stdin;
file_path = "<stdin>"; file_path = canonical_stdin_path;
if ( g_policy_debug ) if ( g_policy_debug )
{ {
@ -664,9 +623,9 @@ static int load_files(const char* orig_file)
reporter->FatalError("can't open %s", file_path.c_str()); reporter->FatalError("can't open %s", file_path.c_str());
} }
auto i = get_inode(f, file_path); ScannedFile sf(file_stack.length(), file_path);
if ( already_scanned(i) ) if ( sf.AlreadyScanned() )
{ {
if ( f != stdin ) if ( f != stdin )
fclose(f); fclose(f);
@ -674,8 +633,7 @@ static int load_files(const char* orig_file)
return 0; return 0;
} }
ScannedFile sf(i.dev, i.ino, file_stack.length(), file_path); files_scanned.push_back(std::move(sf));
files_scanned.push_back(sf);
if ( g_policy_debug && ! file_path.empty() ) if ( g_policy_debug && ! file_path.empty() )
{ {
@ -1079,3 +1037,42 @@ FileInfo::~FileInfo()
if ( restore_module != "" ) if ( restore_module != "" )
zeek::detail::current_module = restore_module; zeek::detail::current_module = restore_module;
} }
ScannedFile::ScannedFile(int arg_include_level,
std::string arg_name,
bool arg_skipped,
bool arg_prefixes_checked)
: include_level(arg_include_level),
skipped(arg_skipped),
prefixes_checked(arg_prefixes_checked),
name(std::move(arg_name))
{
if ( name == canonical_stdin_path )
canonical_path = canonical_stdin_path;
else
{
char buf[PATH_MAX];
auto res = realpath(name.data(), buf);
if ( ! res )
reporter->FatalError("failed to get realpath() of %s: %s",
name.data(), strerror(errno));
canonical_path = res;
}
}
bool ScannedFile::AlreadyScanned() const
{
auto rval = false;
for ( const auto& it : files_scanned )
if ( it.canonical_path == canonical_path )
{
rval = true;
break;
}
DBG_LOG(DBG_SCRIPTS, "AlreadyScanned result (%d) %s", rval, canonical_path.data());
return rval;
}

View file

@ -0,0 +1,3 @@
foo
bar
baz

View file

@ -0,0 +1,20 @@
# This tests Zeek's mechanism to prevent duplicate script loading on links.
#
# @TEST-EXEC: mkdir foo
# @TEST-EXEC: mkdir bar
# @TEST-EXEC: mkdir baz
# @TEST-EXEC: echo 'event zeek_init() &priority=3 { print "foo"; }' >foo/main.zeek
# @TEST-EXEC: echo 'event zeek_init() &priority=2 { print "bar"; }' >bar/main.zeek
# @TEST-EXEC: echo 'event zeek_init() &priority=1 { print "baz"; }' >baz/main.zeek
# @TEST-EXEC: echo "@load ./main" >common-load.zeek
# @TEST-EXEC: ln common-load.zeek foo/__load__.zeek
# @TEST-EXEC: ln common-load.zeek bar/__load__.zeek
# @TEST-EXEC: echo "@load ./main" >baz/__load__.zeek
# @TEST-EXEC: echo "@load ./main-sym" >>baz/__load__.zeek
# @TEST-EXEC: (cd baz && ln -s main.zeek main-sym.zeek)
# @TEST-EXEC: zeek -b foo bar baz foo/../foo bar/../bar baz/../baz $(pwd)/foo $(pwd)/bar $(pwd)/baz >out
# @TEST-EXEC: btest-diff out