mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 06:38:20 +00:00
GH-928: use realpath() instead of inode to de-duplicate scripts
Duplicate script `@load` directives are now detected by comparing against canonical paths formed by realpath(). This fixes the previous, unexpected behavior of treating scripts that hardlink to same inode as duplicates: such links will now be loaded as distinct scripts since their canonical path differs.
This commit is contained in:
parent
7f347a3b1e
commit
320b14ff09
4 changed files with 83 additions and 64 deletions
23
src/Net.h
23
src/Net.h
|
@ -86,24 +86,23 @@ extern iosource::IOSource* current_iosrc;
|
||||||
extern iosource::PktDumper* pkt_dumper; // where to save packets
|
extern iosource::PktDumper* pkt_dumper; // where to save packets
|
||||||
|
|
||||||
// Script file we have already scanned (or are in the process of scanning).
|
// Script file we have already scanned (or are in the process of scanning).
|
||||||
// They are identified by device and inode number.
|
// They are identified by normalized realpath.
|
||||||
struct ScannedFile {
|
struct ScannedFile {
|
||||||
dev_t dev;
|
|
||||||
ino_t inode;
|
|
||||||
int include_level;
|
int include_level;
|
||||||
bool skipped; // This ScannedFile was @unload'd.
|
bool skipped; // This ScannedFile was @unload'd.
|
||||||
bool prefixes_checked; // If loading prefixes for this file has been tried.
|
bool prefixes_checked; // If loading prefixes for this file has been tried.
|
||||||
std::string name;
|
std::string name;
|
||||||
|
std::string canonical_path; // normalized, absolute path via realpath()
|
||||||
|
|
||||||
ScannedFile(dev_t arg_dev, ino_t arg_inode, int arg_include_level,
|
ScannedFile(int arg_include_level,
|
||||||
const std::string& arg_name, bool arg_skipped = false,
|
std::string arg_name, bool arg_skipped = false,
|
||||||
bool arg_prefixes_checked = false)
|
bool arg_prefixes_checked = false);
|
||||||
: dev(arg_dev), inode(arg_inode),
|
|
||||||
include_level(arg_include_level),
|
/**
|
||||||
skipped(arg_skipped),
|
* Compares the canonical path of this file against every canonical path
|
||||||
prefixes_checked(arg_prefixes_checked),
|
* in files_scanned and returns whether there's any match.
|
||||||
name(arg_name)
|
*/
|
||||||
{ }
|
bool AlreadyScanned() const;
|
||||||
};
|
};
|
||||||
|
|
||||||
extern std::list<ScannedFile> files_scanned;
|
extern std::list<ScannedFile> files_scanned;
|
||||||
|
|
101
src/scan.l
101
src/scan.l
|
@ -3,6 +3,9 @@
|
||||||
|
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
|
|
||||||
|
#include <climits>
|
||||||
|
#include <cstdlib>
|
||||||
|
|
||||||
#include <stack>
|
#include <stack>
|
||||||
#include <list>
|
#include <list>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
@ -39,13 +42,6 @@
|
||||||
|
|
||||||
using namespace zeek::detail;
|
using namespace zeek::detail;
|
||||||
|
|
||||||
namespace {
|
|
||||||
struct ZeekINode {
|
|
||||||
dev_t dev;
|
|
||||||
ino_t ino;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
extern YYLTYPE yylloc; // holds start line and column of token
|
extern YYLTYPE yylloc; // holds start line and column of token
|
||||||
extern zeek::EnumType* cur_enum_type;
|
extern zeek::EnumType* cur_enum_type;
|
||||||
|
|
||||||
|
@ -101,30 +97,6 @@ static std::string find_relative_script_file(const std::string& filename)
|
||||||
return find_script_file(filename, bro_path());
|
return find_script_file(filename, bro_path());
|
||||||
}
|
}
|
||||||
|
|
||||||
static ZeekINode get_inode(FILE* f, const std::string& path)
|
|
||||||
{
|
|
||||||
struct stat b;
|
|
||||||
|
|
||||||
if ( fstat(fileno(f), &b) )
|
|
||||||
reporter->FatalError("fstat of %s failed: %s\n", path.c_str(),
|
|
||||||
strerror(errno));
|
|
||||||
|
|
||||||
return {b.st_dev, b.st_ino};
|
|
||||||
}
|
|
||||||
|
|
||||||
static ZeekINode get_inode(const std::string& path)
|
|
||||||
{
|
|
||||||
FILE* f = open_file(path);
|
|
||||||
|
|
||||||
if ( ! f )
|
|
||||||
reporter->FatalError("failed to open %s\n", path.c_str());
|
|
||||||
|
|
||||||
auto inode = get_inode(f, path);
|
|
||||||
fclose(f);
|
|
||||||
|
|
||||||
return inode;
|
|
||||||
}
|
|
||||||
|
|
||||||
class FileInfo {
|
class FileInfo {
|
||||||
public:
|
public:
|
||||||
FileInfo(std::string restore_module = "");
|
FileInfo(std::string restore_module = "");
|
||||||
|
@ -430,9 +402,8 @@ when return TOK_WHEN;
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// All we have to do is pretend we've already scanned it.
|
// All we have to do is pretend we've already scanned it.
|
||||||
auto i = get_inode(path);
|
ScannedFile sf(file_stack.length(), std::move(path), true);
|
||||||
ScannedFile sf(i.dev, i.ino, file_stack.length(), path, true);
|
files_scanned.push_back(std::move(sf));
|
||||||
files_scanned.push_back(sf);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -598,19 +569,7 @@ YYLTYPE zeek::detail::GetCurrentLocation()
|
||||||
return currloc;
|
return currloc;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool already_scanned(ZeekINode in)
|
static auto constexpr canonical_stdin_path = "<stdin>";
|
||||||
{
|
|
||||||
for ( const auto& it : files_scanned )
|
|
||||||
if ( it.dev == in.dev && it.inode == in.ino )
|
|
||||||
return true;
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool already_scanned(const std::string& path)
|
|
||||||
{
|
|
||||||
return already_scanned(get_inode(path));
|
|
||||||
}
|
|
||||||
|
|
||||||
static int load_files(const char* orig_file)
|
static int load_files(const char* orig_file)
|
||||||
{
|
{
|
||||||
|
@ -641,7 +600,7 @@ static int load_files(const char* orig_file)
|
||||||
if ( streq(orig_file, "-") )
|
if ( streq(orig_file, "-") )
|
||||||
{
|
{
|
||||||
f = stdin;
|
f = stdin;
|
||||||
file_path = "<stdin>";
|
file_path = canonical_stdin_path;
|
||||||
|
|
||||||
if ( g_policy_debug )
|
if ( g_policy_debug )
|
||||||
{
|
{
|
||||||
|
@ -664,9 +623,9 @@ static int load_files(const char* orig_file)
|
||||||
reporter->FatalError("can't open %s", file_path.c_str());
|
reporter->FatalError("can't open %s", file_path.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
auto i = get_inode(f, file_path);
|
ScannedFile sf(file_stack.length(), file_path);
|
||||||
|
|
||||||
if ( already_scanned(i) )
|
if ( sf.AlreadyScanned() )
|
||||||
{
|
{
|
||||||
if ( f != stdin )
|
if ( f != stdin )
|
||||||
fclose(f);
|
fclose(f);
|
||||||
|
@ -674,8 +633,7 @@ static int load_files(const char* orig_file)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
ScannedFile sf(i.dev, i.ino, file_stack.length(), file_path);
|
files_scanned.push_back(std::move(sf));
|
||||||
files_scanned.push_back(sf);
|
|
||||||
|
|
||||||
if ( g_policy_debug && ! file_path.empty() )
|
if ( g_policy_debug && ! file_path.empty() )
|
||||||
{
|
{
|
||||||
|
@ -1079,3 +1037,42 @@ FileInfo::~FileInfo()
|
||||||
if ( restore_module != "" )
|
if ( restore_module != "" )
|
||||||
zeek::detail::current_module = restore_module;
|
zeek::detail::current_module = restore_module;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ScannedFile::ScannedFile(int arg_include_level,
|
||||||
|
std::string arg_name,
|
||||||
|
bool arg_skipped,
|
||||||
|
bool arg_prefixes_checked)
|
||||||
|
: include_level(arg_include_level),
|
||||||
|
skipped(arg_skipped),
|
||||||
|
prefixes_checked(arg_prefixes_checked),
|
||||||
|
name(std::move(arg_name))
|
||||||
|
{
|
||||||
|
if ( name == canonical_stdin_path )
|
||||||
|
canonical_path = canonical_stdin_path;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
char buf[PATH_MAX];
|
||||||
|
auto res = realpath(name.data(), buf);
|
||||||
|
|
||||||
|
if ( ! res )
|
||||||
|
reporter->FatalError("failed to get realpath() of %s: %s",
|
||||||
|
name.data(), strerror(errno));
|
||||||
|
|
||||||
|
canonical_path = res;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ScannedFile::AlreadyScanned() const
|
||||||
|
{
|
||||||
|
auto rval = false;
|
||||||
|
|
||||||
|
for ( const auto& it : files_scanned )
|
||||||
|
if ( it.canonical_path == canonical_path )
|
||||||
|
{
|
||||||
|
rval = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
DBG_LOG(DBG_SCRIPTS, "AlreadyScanned result (%d) %s", rval, canonical_path.data());
|
||||||
|
return rval;
|
||||||
|
}
|
||||||
|
|
3
testing/btest/Baseline/core.load-duplicates-links/out
Normal file
3
testing/btest/Baseline/core.load-duplicates-links/out
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
foo
|
||||||
|
bar
|
||||||
|
baz
|
20
testing/btest/core/load-duplicates-links.zeek
Normal file
20
testing/btest/core/load-duplicates-links.zeek
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
# This tests Zeek's mechanism to prevent duplicate script loading on links.
|
||||||
|
#
|
||||||
|
# @TEST-EXEC: mkdir foo
|
||||||
|
# @TEST-EXEC: mkdir bar
|
||||||
|
# @TEST-EXEC: mkdir baz
|
||||||
|
|
||||||
|
# @TEST-EXEC: echo 'event zeek_init() &priority=3 { print "foo"; }' >foo/main.zeek
|
||||||
|
# @TEST-EXEC: echo 'event zeek_init() &priority=2 { print "bar"; }' >bar/main.zeek
|
||||||
|
# @TEST-EXEC: echo 'event zeek_init() &priority=1 { print "baz"; }' >baz/main.zeek
|
||||||
|
|
||||||
|
# @TEST-EXEC: echo "@load ./main" >common-load.zeek
|
||||||
|
# @TEST-EXEC: ln common-load.zeek foo/__load__.zeek
|
||||||
|
# @TEST-EXEC: ln common-load.zeek bar/__load__.zeek
|
||||||
|
|
||||||
|
# @TEST-EXEC: echo "@load ./main" >baz/__load__.zeek
|
||||||
|
# @TEST-EXEC: echo "@load ./main-sym" >>baz/__load__.zeek
|
||||||
|
# @TEST-EXEC: (cd baz && ln -s main.zeek main-sym.zeek)
|
||||||
|
|
||||||
|
# @TEST-EXEC: zeek -b foo bar baz foo/../foo bar/../bar baz/../baz $(pwd)/foo $(pwd)/bar $(pwd)/baz >out
|
||||||
|
# @TEST-EXEC: btest-diff out
|
Loading…
Add table
Add a link
Reference in a new issue