Merge remote-tracking branch 'origin/topic/jsiwek/gh-928-realpath-load-dedup'

* origin/topic/jsiwek/gh-928-realpath-load-dedup:
  GH-928: use realpath() instead of inode to de-duplicate scripts
This commit is contained in:
Robin Sommer 2020-07-08 15:39:46 +00:00
commit 776250fd8c
6 changed files with 89 additions and 65 deletions

View file

@ -1,4 +1,9 @@
3.2.0-dev.868 | 2020-07-08 15:39:46 +0000
* GH-928: use realpath() instead of inode to de-duplicate scripts.
(Jon Siwek, Corelight)
3.2.0-dev.864 | 2020-07-06 21:06:41 -0700
* GH-1040: Add zero-indexed version of str_split (Tim Wojtulewicz, Corelight)

View file

@ -1 +1 @@
3.2.0-dev.864
3.2.0-dev.868

View file

@ -86,24 +86,23 @@ extern iosource::IOSource* current_iosrc;
extern iosource::PktDumper* pkt_dumper; // where to save packets
// Script file we have already scanned (or are in the process of scanning).
// They are identified by device and inode number.
// They are identified by normalized realpath.
struct ScannedFile {
dev_t dev;
ino_t inode;
int include_level;
bool skipped; // This ScannedFile was @unload'd.
bool prefixes_checked; // If loading prefixes for this file has been tried.
std::string name;
std::string canonical_path; // normalized, absolute path via realpath()
ScannedFile(dev_t arg_dev, ino_t arg_inode, int arg_include_level,
const std::string& arg_name, bool arg_skipped = false,
bool arg_prefixes_checked = false)
: dev(arg_dev), inode(arg_inode),
include_level(arg_include_level),
skipped(arg_skipped),
prefixes_checked(arg_prefixes_checked),
name(arg_name)
{ }
ScannedFile(int arg_include_level,
std::string arg_name, bool arg_skipped = false,
bool arg_prefixes_checked = false);
/**
* Compares the canonical path of this file against every canonical path
* in files_scanned and returns whether there's any match.
*/
bool AlreadyScanned() const;
};
extern std::list<ScannedFile> files_scanned;

View file

@ -3,6 +3,9 @@
#include <errno.h>
#include <climits>
#include <cstdlib>
#include <stack>
#include <list>
#include <string>
@ -39,13 +42,6 @@
using namespace zeek::detail;
namespace {
struct ZeekINode {
dev_t dev;
ino_t ino;
};
}
extern YYLTYPE yylloc; // holds start line and column of token
extern zeek::EnumType* cur_enum_type;
@ -101,30 +97,6 @@ static std::string find_relative_script_file(const std::string& filename)
return find_script_file(filename, bro_path());
}
static ZeekINode get_inode(FILE* f, const std::string& path)
{
struct stat b;
if ( fstat(fileno(f), &b) )
reporter->FatalError("fstat of %s failed: %s\n", path.c_str(),
strerror(errno));
return {b.st_dev, b.st_ino};
}
static ZeekINode get_inode(const std::string& path)
{
FILE* f = open_file(path);
if ( ! f )
reporter->FatalError("failed to open %s\n", path.c_str());
auto inode = get_inode(f, path);
fclose(f);
return inode;
}
class FileInfo {
public:
FileInfo(std::string restore_module = "");
@ -430,9 +402,8 @@ when return TOK_WHEN;
else
{
// All we have to do is pretend we've already scanned it.
auto i = get_inode(path);
ScannedFile sf(i.dev, i.ino, file_stack.length(), path, true);
files_scanned.push_back(sf);
ScannedFile sf(file_stack.length(), std::move(path), true);
files_scanned.push_back(std::move(sf));
}
}
@ -598,19 +569,7 @@ YYLTYPE zeek::detail::GetCurrentLocation()
return currloc;
}
static bool already_scanned(ZeekINode in)
{
for ( const auto& it : files_scanned )
if ( it.dev == in.dev && it.inode == in.ino )
return true;
return false;
}
static bool already_scanned(const std::string& path)
{
return already_scanned(get_inode(path));
}
static auto constexpr canonical_stdin_path = "<stdin>";
static int load_files(const char* orig_file)
{
@ -641,7 +600,7 @@ static int load_files(const char* orig_file)
if ( streq(orig_file, "-") )
{
f = stdin;
file_path = "<stdin>";
file_path = canonical_stdin_path;
if ( g_policy_debug )
{
@ -664,9 +623,9 @@ static int load_files(const char* orig_file)
reporter->FatalError("can't open %s", file_path.c_str());
}
auto i = get_inode(f, file_path);
ScannedFile sf(file_stack.length(), file_path);
if ( already_scanned(i) )
if ( sf.AlreadyScanned() )
{
if ( f != stdin )
fclose(f);
@ -674,8 +633,7 @@ static int load_files(const char* orig_file)
return 0;
}
ScannedFile sf(i.dev, i.ino, file_stack.length(), file_path);
files_scanned.push_back(sf);
files_scanned.push_back(std::move(sf));
if ( g_policy_debug && ! file_path.empty() )
{
@ -1079,3 +1037,42 @@ FileInfo::~FileInfo()
if ( restore_module != "" )
zeek::detail::current_module = restore_module;
}
ScannedFile::ScannedFile(int arg_include_level,
std::string arg_name,
bool arg_skipped,
bool arg_prefixes_checked)
: include_level(arg_include_level),
skipped(arg_skipped),
prefixes_checked(arg_prefixes_checked),
name(std::move(arg_name))
{
if ( name == canonical_stdin_path )
canonical_path = canonical_stdin_path;
else
{
char buf[PATH_MAX];
auto res = realpath(name.data(), buf);
if ( ! res )
reporter->FatalError("failed to get realpath() of %s: %s",
name.data(), strerror(errno));
canonical_path = res;
}
}
bool ScannedFile::AlreadyScanned() const
{
auto rval = false;
for ( const auto& it : files_scanned )
if ( it.canonical_path == canonical_path )
{
rval = true;
break;
}
DBG_LOG(DBG_SCRIPTS, "AlreadyScanned result (%d) %s", rval, canonical_path.data());
return rval;
}

View file

@ -0,0 +1,3 @@
foo
bar
baz

View file

@ -0,0 +1,20 @@
# This tests Zeek's mechanism to prevent duplicate script loading on links.
#
# @TEST-EXEC: mkdir foo
# @TEST-EXEC: mkdir bar
# @TEST-EXEC: mkdir baz
# @TEST-EXEC: echo 'event zeek_init() &priority=3 { print "foo"; }' >foo/main.zeek
# @TEST-EXEC: echo 'event zeek_init() &priority=2 { print "bar"; }' >bar/main.zeek
# @TEST-EXEC: echo 'event zeek_init() &priority=1 { print "baz"; }' >baz/main.zeek
# @TEST-EXEC: echo "@load ./main" >common-load.zeek
# @TEST-EXEC: ln common-load.zeek foo/__load__.zeek
# @TEST-EXEC: ln common-load.zeek bar/__load__.zeek
# @TEST-EXEC: echo "@load ./main" >baz/__load__.zeek
# @TEST-EXEC: echo "@load ./main-sym" >>baz/__load__.zeek
# @TEST-EXEC: (cd baz && ln -s main.zeek main-sym.zeek)
# @TEST-EXEC: zeek -b foo bar baz foo/../foo bar/../bar baz/../baz $(pwd)/foo $(pwd)/bar $(pwd)/baz >out
# @TEST-EXEC: btest-diff out