mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 06:38:20 +00:00
GH-928: use realpath() instead of inode to de-duplicate scripts
Duplicate script `@load` directives are now detected by comparing against canonical paths formed by realpath(). This fixes the previous, unexpected behavior of treating scripts that hardlink to same inode as duplicates: such links will now be loaded as distinct scripts since their canonical path differs.
This commit is contained in:
parent
7f347a3b1e
commit
320b14ff09
4 changed files with 83 additions and 64 deletions
23
src/Net.h
23
src/Net.h
|
@ -86,24 +86,23 @@ extern iosource::IOSource* current_iosrc;
|
|||
extern iosource::PktDumper* pkt_dumper; // where to save packets
|
||||
|
||||
// Script file we have already scanned (or are in the process of scanning).
|
||||
// They are identified by device and inode number.
|
||||
// They are identified by normalized realpath.
|
||||
struct ScannedFile {
|
||||
dev_t dev;
|
||||
ino_t inode;
|
||||
int include_level;
|
||||
bool skipped; // This ScannedFile was @unload'd.
|
||||
bool prefixes_checked; // If loading prefixes for this file has been tried.
|
||||
std::string name;
|
||||
std::string canonical_path; // normalized, absolute path via realpath()
|
||||
|
||||
ScannedFile(dev_t arg_dev, ino_t arg_inode, int arg_include_level,
|
||||
const std::string& arg_name, bool arg_skipped = false,
|
||||
bool arg_prefixes_checked = false)
|
||||
: dev(arg_dev), inode(arg_inode),
|
||||
include_level(arg_include_level),
|
||||
skipped(arg_skipped),
|
||||
prefixes_checked(arg_prefixes_checked),
|
||||
name(arg_name)
|
||||
{ }
|
||||
ScannedFile(int arg_include_level,
|
||||
std::string arg_name, bool arg_skipped = false,
|
||||
bool arg_prefixes_checked = false);
|
||||
|
||||
/**
|
||||
* Compares the canonical path of this file against every canonical path
|
||||
* in files_scanned and returns whether there's any match.
|
||||
*/
|
||||
bool AlreadyScanned() const;
|
||||
};
|
||||
|
||||
extern std::list<ScannedFile> files_scanned;
|
||||
|
|
101
src/scan.l
101
src/scan.l
|
@ -3,6 +3,9 @@
|
|||
|
||||
#include <errno.h>
|
||||
|
||||
#include <climits>
|
||||
#include <cstdlib>
|
||||
|
||||
#include <stack>
|
||||
#include <list>
|
||||
#include <string>
|
||||
|
@ -39,13 +42,6 @@
|
|||
|
||||
using namespace zeek::detail;
|
||||
|
||||
namespace {
|
||||
struct ZeekINode {
|
||||
dev_t dev;
|
||||
ino_t ino;
|
||||
};
|
||||
}
|
||||
|
||||
extern YYLTYPE yylloc; // holds start line and column of token
|
||||
extern zeek::EnumType* cur_enum_type;
|
||||
|
||||
|
@ -101,30 +97,6 @@ static std::string find_relative_script_file(const std::string& filename)
|
|||
return find_script_file(filename, bro_path());
|
||||
}
|
||||
|
||||
static ZeekINode get_inode(FILE* f, const std::string& path)
|
||||
{
|
||||
struct stat b;
|
||||
|
||||
if ( fstat(fileno(f), &b) )
|
||||
reporter->FatalError("fstat of %s failed: %s\n", path.c_str(),
|
||||
strerror(errno));
|
||||
|
||||
return {b.st_dev, b.st_ino};
|
||||
}
|
||||
|
||||
static ZeekINode get_inode(const std::string& path)
|
||||
{
|
||||
FILE* f = open_file(path);
|
||||
|
||||
if ( ! f )
|
||||
reporter->FatalError("failed to open %s\n", path.c_str());
|
||||
|
||||
auto inode = get_inode(f, path);
|
||||
fclose(f);
|
||||
|
||||
return inode;
|
||||
}
|
||||
|
||||
class FileInfo {
|
||||
public:
|
||||
FileInfo(std::string restore_module = "");
|
||||
|
@ -430,9 +402,8 @@ when return TOK_WHEN;
|
|||
else
|
||||
{
|
||||
// All we have to do is pretend we've already scanned it.
|
||||
auto i = get_inode(path);
|
||||
ScannedFile sf(i.dev, i.ino, file_stack.length(), path, true);
|
||||
files_scanned.push_back(sf);
|
||||
ScannedFile sf(file_stack.length(), std::move(path), true);
|
||||
files_scanned.push_back(std::move(sf));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -598,19 +569,7 @@ YYLTYPE zeek::detail::GetCurrentLocation()
|
|||
return currloc;
|
||||
}
|
||||
|
||||
static bool already_scanned(ZeekINode in)
|
||||
{
|
||||
for ( const auto& it : files_scanned )
|
||||
if ( it.dev == in.dev && it.inode == in.ino )
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool already_scanned(const std::string& path)
|
||||
{
|
||||
return already_scanned(get_inode(path));
|
||||
}
|
||||
static auto constexpr canonical_stdin_path = "<stdin>";
|
||||
|
||||
static int load_files(const char* orig_file)
|
||||
{
|
||||
|
@ -641,7 +600,7 @@ static int load_files(const char* orig_file)
|
|||
if ( streq(orig_file, "-") )
|
||||
{
|
||||
f = stdin;
|
||||
file_path = "<stdin>";
|
||||
file_path = canonical_stdin_path;
|
||||
|
||||
if ( g_policy_debug )
|
||||
{
|
||||
|
@ -664,9 +623,9 @@ static int load_files(const char* orig_file)
|
|||
reporter->FatalError("can't open %s", file_path.c_str());
|
||||
}
|
||||
|
||||
auto i = get_inode(f, file_path);
|
||||
ScannedFile sf(file_stack.length(), file_path);
|
||||
|
||||
if ( already_scanned(i) )
|
||||
if ( sf.AlreadyScanned() )
|
||||
{
|
||||
if ( f != stdin )
|
||||
fclose(f);
|
||||
|
@ -674,8 +633,7 @@ static int load_files(const char* orig_file)
|
|||
return 0;
|
||||
}
|
||||
|
||||
ScannedFile sf(i.dev, i.ino, file_stack.length(), file_path);
|
||||
files_scanned.push_back(sf);
|
||||
files_scanned.push_back(std::move(sf));
|
||||
|
||||
if ( g_policy_debug && ! file_path.empty() )
|
||||
{
|
||||
|
@ -1079,3 +1037,42 @@ FileInfo::~FileInfo()
|
|||
if ( restore_module != "" )
|
||||
zeek::detail::current_module = restore_module;
|
||||
}
|
||||
|
||||
ScannedFile::ScannedFile(int arg_include_level,
|
||||
std::string arg_name,
|
||||
bool arg_skipped,
|
||||
bool arg_prefixes_checked)
|
||||
: include_level(arg_include_level),
|
||||
skipped(arg_skipped),
|
||||
prefixes_checked(arg_prefixes_checked),
|
||||
name(std::move(arg_name))
|
||||
{
|
||||
if ( name == canonical_stdin_path )
|
||||
canonical_path = canonical_stdin_path;
|
||||
else
|
||||
{
|
||||
char buf[PATH_MAX];
|
||||
auto res = realpath(name.data(), buf);
|
||||
|
||||
if ( ! res )
|
||||
reporter->FatalError("failed to get realpath() of %s: %s",
|
||||
name.data(), strerror(errno));
|
||||
|
||||
canonical_path = res;
|
||||
}
|
||||
}
|
||||
|
||||
bool ScannedFile::AlreadyScanned() const
|
||||
{
|
||||
auto rval = false;
|
||||
|
||||
for ( const auto& it : files_scanned )
|
||||
if ( it.canonical_path == canonical_path )
|
||||
{
|
||||
rval = true;
|
||||
break;
|
||||
}
|
||||
|
||||
DBG_LOG(DBG_SCRIPTS, "AlreadyScanned result (%d) %s", rval, canonical_path.data());
|
||||
return rval;
|
||||
}
|
||||
|
|
3
testing/btest/Baseline/core.load-duplicates-links/out
Normal file
3
testing/btest/Baseline/core.load-duplicates-links/out
Normal file
|
@ -0,0 +1,3 @@
|
|||
foo
|
||||
bar
|
||||
baz
|
20
testing/btest/core/load-duplicates-links.zeek
Normal file
20
testing/btest/core/load-duplicates-links.zeek
Normal file
|
@ -0,0 +1,20 @@
|
|||
# This tests Zeek's mechanism to prevent duplicate script loading on links.
|
||||
#
|
||||
# @TEST-EXEC: mkdir foo
|
||||
# @TEST-EXEC: mkdir bar
|
||||
# @TEST-EXEC: mkdir baz
|
||||
|
||||
# @TEST-EXEC: echo 'event zeek_init() &priority=3 { print "foo"; }' >foo/main.zeek
|
||||
# @TEST-EXEC: echo 'event zeek_init() &priority=2 { print "bar"; }' >bar/main.zeek
|
||||
# @TEST-EXEC: echo 'event zeek_init() &priority=1 { print "baz"; }' >baz/main.zeek
|
||||
|
||||
# @TEST-EXEC: echo "@load ./main" >common-load.zeek
|
||||
# @TEST-EXEC: ln common-load.zeek foo/__load__.zeek
|
||||
# @TEST-EXEC: ln common-load.zeek bar/__load__.zeek
|
||||
|
||||
# @TEST-EXEC: echo "@load ./main" >baz/__load__.zeek
|
||||
# @TEST-EXEC: echo "@load ./main-sym" >>baz/__load__.zeek
|
||||
# @TEST-EXEC: (cd baz && ln -s main.zeek main-sym.zeek)
|
||||
|
||||
# @TEST-EXEC: zeek -b foo bar baz foo/../foo bar/../bar baz/../baz $(pwd)/foo $(pwd)/bar $(pwd)/baz >out
|
||||
# @TEST-EXEC: btest-diff out
|
Loading…
Add table
Add a link
Reference in a new issue