Merge remote-tracking branch 'origin/topic/awelzel/2311-load-plugin-bare-mode'

* origin/topic/awelzel/2311-load-plugin-bare-mode:
  scan.l: Fix @load-plugin scripts loading
  scan.l: Extract switch_to() from load_files()
  ScannedFile: Allow skipping canonicalization
This commit is contained in:
Arne Welzel 2025-03-04 09:40:41 +01:00
commit a3a08fa0f3
12 changed files with 211 additions and 25 deletions

22
CHANGES
View file

@ -1,3 +1,25 @@
7.2.0-dev.244 | 2025-03-04 09:40:41 +0100
* GH-2311: scan.l: Fix @load-plugin scripts loading (Arne Welzel, Corelight)
For a plugin loaded via @load-plugin, create a YY_BUFFER_STATE holding
the required loads for the implicitly loaded files. In loaded scripts,
this generated file will show up with a path of the shared object file
of the plugin with the __preload__.zeek and __load__.zeek files loaded
by it.
Closes #2311
* scan.l: Extract switch_to() from load_files() (Arne Welzel, Corelight)
* ScannedFile: Allow skipping canonicalization (Arne Welzel, Corelight)
* Bump pre-commit hooks (Benjamin Bannier, Corelight)
* Always break lines when formatting `spicy_add_analyzer` (Benjamin Bannier, Corelight)
* Fix formatting of `zeek_add_plugin` (Benjamin Bannier, Corelight)
7.2.0-dev.232 | 2025-02-25 07:29:43 -0700 7.2.0-dev.232 | 2025-02-25 07:29:43 -0700
* CI: Use the right variable for the number of test jobs for zeekctl_debian11_task tasks (Tim Wojtulewicz, Corelight) * CI: Use the right variable for the number of test jobs for zeekctl_debian11_task tasks (Tim Wojtulewicz, Corelight)

View file

@ -1 +1 @@
7.2.0-dev.232 7.2.0-dev.244

View file

@ -13,14 +13,15 @@ namespace zeek::detail {
std::list<ScannedFile> files_scanned; std::list<ScannedFile> files_scanned;
std::vector<SignatureFile> sig_files; std::vector<SignatureFile> sig_files;
ScannedFile::ScannedFile(int arg_include_level, std::string arg_name, bool arg_skipped, bool arg_prefixes_checked) ScannedFile::ScannedFile(int arg_include_level, std::string arg_name, bool arg_skipped, bool arg_prefixes_checked,
bool arg_is_canonical)
: include_level(arg_include_level), : include_level(arg_include_level),
skipped(arg_skipped), skipped(arg_skipped),
prefixes_checked(arg_prefixes_checked), prefixes_checked(arg_prefixes_checked),
name(std::move(arg_name)) { name(std::move(arg_name)) {
if ( name == canonical_stdin_path ) if ( name == canonical_stdin_path )
canonical_path = canonical_stdin_path; canonical_path = canonical_stdin_path;
else { else if ( ! arg_is_canonical ) {
std::error_code ec; std::error_code ec;
auto canon = filesystem::canonical(name, ec); auto canon = filesystem::canonical(name, ec);
if ( ec ) if ( ec )
@ -28,6 +29,9 @@ ScannedFile::ScannedFile(int arg_include_level, std::string arg_name, bool arg_s
canonical_path = canon.string(); canonical_path = canon.string();
} }
else {
canonical_path = name;
}
} }
bool ScannedFile::AlreadyScanned() const { bool ScannedFile::AlreadyScanned() const {

View file

@ -12,10 +12,13 @@ namespace zeek::detail {
// Script file we have already scanned (or are in the process of scanning). // Script file we have already scanned (or are in the process of scanning).
// They are identified by normalized canonical path. // They are identified by normalized canonical path.
//
// If arg_is_canonical is set to true, assume arg_name is canonicalized and
// skip resolving the canonical name.
class ScannedFile { class ScannedFile {
public: public:
ScannedFile(int arg_include_level, std::string arg_name, bool arg_skipped = false, ScannedFile(int arg_include_level, std::string arg_name, bool arg_skipped = false,
bool arg_prefixes_checked = false); bool arg_prefixes_checked = false, bool arg_is_canonical = false);
/** /**
* Compares the canonical path of this file against every canonical path * Compares the canonical path of this file against every canonical path

View file

@ -219,6 +219,15 @@ static zeek::PList<FileInfo> file_stack;
// Returns true if the file is new, false if it's already been scanned. // Returns true if the file is new, false if it's already been scanned.
static int load_files(const char* file); static int load_files(const char* file);
// Update the current parsing and location state for the given file and buffer.
static int switch_to(const char* file, YY_BUFFER_STATE buffer);
// Be careful to never delete things from this list, as the strings
// are referred to (in order to save the locations of tokens and statements,
// for error reporting and debugging).
static zeek::name_list input_files;
static zeek::name_list essential_input_files;
// ### TODO: columns too - use yyless with '.' action? // ### TODO: columns too - use yyless with '.' action?
%} %}
@ -464,11 +473,75 @@ when return TOK_WHEN;
rc = PLUGIN_HOOK_WITH_RESULT(HOOK_LOAD_FILE_EXT, HookLoadFileExtended(zeek::plugin::Plugin::PLUGIN, plugin, ""), std::make_pair(-1, std::nullopt)); rc = PLUGIN_HOOK_WITH_RESULT(HOOK_LOAD_FILE_EXT, HookLoadFileExtended(zeek::plugin::Plugin::PLUGIN, plugin, ""), std::make_pair(-1, std::nullopt));
switch ( rc.first ) { switch ( rc.first ) {
case -1: case -1: {
// No plugin in charge of this file. (We ignore any returned content.) // No plugin took charge this @load-plugin directive.
auto pre_load_input_files = input_files.size();
zeek::plugin_mgr->ActivateDynamicPlugin(plugin); zeek::plugin_mgr->ActivateDynamicPlugin(plugin);
// No new input files: Likely the plugin was already loaded
// or has failed to load.
if ( input_files.size() == pre_load_input_files )
break; break;
// Lookup the plugin to get the path to the shared object.
// We use that for the loaded_scripts.log and name of the
// generated file loading the scripts.
const zeek::plugin::Plugin *pp = nullptr;
for ( const auto* p : zeek::plugin_mgr->ActivePlugins() )
{
if ( p->DynamicPlugin() && p->Name() == plugin )
{
pp = p;
break;
}
}
std::string name;
if ( pp )
name = pp->PluginPath();
else
{
// This shouldn't happen. If it does, we come up
// with an artificial filename rather than using
// the shared object name.
zeek::reporter->Warning("Did not find %s after loading", plugin);
name = std::string("@load-plugin ") + plugin;
}
// Render all needed @load lines into a string
std::string buf = "# @load-plugin generated script\n";
while ( input_files.size() > pre_load_input_files )
{
// Any relative files found by the plugin manager are
// converted to absolute paths relative to Zeek's working
// directory. That way it is clear where these are supposed
// to be found and find_relative_script_file() won't get
// confused by any ZEEKPATH settings. Also, plugin files
// containing any relative @loads themselves will work.
std::error_code ec;
auto canonical = zeek::filesystem::canonical(input_files[0]);
if ( ec )
zeek::reporter->FatalError("plugin script %s not found: %s",
input_files[0], ec.message().c_str());
buf += std::string("@load ") + canonical.string() + "\n";
delete[] input_files.remove_nth(0);
}
zeek::detail::zeekygen_mgr->Script(name);
zeek::detail::ScannedFile sf(file_stack.length(), name, false /*skipped*/,
true /*prefixes_checked*/, true /*is_canonical*/);
zeek::detail::files_scanned.push_back(std::move(sf));
file_stack.push_back(new FileInfo(zeek::detail::current_module));
YY_BUFFER_STATE buffer = yy_scan_bytes(buf.data(), buf.size());
switch_to(name.c_str(), buffer);
break;
}
case 0: case 0:
if ( ! zeek::reporter->Errors() ) if ( ! zeek::reporter->Errors() )
zeek::reporter->Error("Plugin reported error loading plugin %s", plugin); zeek::reporter->Error("Plugin reported error loading plugin %s", plugin);
@ -702,6 +775,23 @@ void zeek::detail::SetCurrentLocation(YYLTYPE currloc)
line_number = currloc.first_line; line_number = currloc.first_line;
} }
static int switch_to(const char* file, YY_BUFFER_STATE buffer)
{
yy_switch_to_buffer(buffer);
yylloc.first_line = yylloc.last_line = line_number = 1;
// Don't delete the old filename - it's pointed to by
// every Obj created when parsing it.
yylloc.filename = filename = zeek::util::copy_string(file);
current_file_has_conditionals = files_with_conditionals.count(filename) > 0;
entry_cond_depth.push_back(conditional_depth);
entry_pragma_stack_depth.push_back(pragma_stack.size());
return 1;
}
static int load_files(const char* orig_file) static int load_files(const char* orig_file)
{ {
std::string file_path = find_relative_script_file(orig_file); std::string file_path = find_relative_script_file(orig_file);
@ -800,19 +890,7 @@ static int load_files(const char* orig_file)
buffer = yy_create_buffer(f, YY_BUF_SIZE); buffer = yy_create_buffer(f, YY_BUF_SIZE);
} }
yy_switch_to_buffer(buffer); return switch_to(file_path.c_str(), buffer);
yylloc.first_line = yylloc.last_line = line_number = 1;
// Don't delete the old filename - it's pointed to by
// every Obj created when parsing it.
yylloc.filename = filename = zeek::util::copy_string(file_path.c_str());
current_file_has_conditionals = files_with_conditionals.count(filename) > 0;
entry_cond_depth.push_back(conditional_depth);
entry_pragma_stack_depth.push_back(pragma_stack.size());
return 1;
} }
void begin_RE() void begin_RE()
@ -942,12 +1020,6 @@ void reject_directive(zeek::detail::Stmt* s)
zeek::reporter->Error("incorrect use of directive"); zeek::reporter->Error("incorrect use of directive");
} }
// Be careful to never delete things from this list, as the strings
// are referred to (in order to save the locations of tokens and statements,
// for error reporting and debugging).
static zeek::name_list input_files;
static zeek::name_list essential_input_files;
void add_essential_input_file(const char* file) void add_essential_input_file(const char* file)
{ {
if ( ! file ) if ( ! file )

View file

@ -0,0 +1,11 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
<...>/enum-bif-plugin.zeek
<...>/enum-before-load-plugin.zeek
<...>/Demo-EnumBif.shared
<...>/__preload__.zeek
<...>/types.zeek
<...>/__load__.zeek
<...>/enumbif.bif.zeek
<...>/__load__.zeek
<...>/enum-after-load-plugin.zeek
<...>/enum-after-load-plugin-end.zeek

View file

@ -0,0 +1,11 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
<...>/enum-bif-plugin.zeek
<...>/enum-before-load-plugin.zeek
.<...>/Demo-EnumBif.shared
<...>/__preload__.zeek
<...>/types.zeek
<...>/__load__.zeek
<...>/enumbif.bif.zeek
<...>/__load__.zeek
<...>/enum-after-load-plugin.zeek
<...>/enum-after-load-plugin-end.zeek

View file

@ -0,0 +1,6 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
EnumBif::MyEnumA
{
EnumBif::MyEnumB,
EnumBif::MyEnumA
}

View file

@ -0,0 +1,6 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
EnumBif::MyEnumA
{
EnumBif::MyEnumB,
EnumBif::MyEnumA
}

View file

@ -0,0 +1,45 @@
# @TEST-DOC: Ensure the enum from the .bif file is available right after @load-plugin in bare mode.
# @TEST-EXEC: ${DIST}/auxil/zeek-aux/plugin-support/init-plugin -u . Demo EnumBif
# @TEST-EXEC: cp -r %DIR/enum-bif-plugin/* .
#
# @TEST-EXEC: ./configure --zeek-dist=${DIST} && make
#
# @TEST-EXEC: ZEEK_PLUGIN_PATH=`pwd` zeek -b %INPUT >output.abs
# @TEST-EXEC: grep '[Ee]num' loaded_scripts.log > loaded_scripts.log.abs
# @TEST-EXEC: ZEEK_PLUGIN_PATH=./build zeek -b %INPUT >output.rel
# @TEST-EXEC: grep '[Ee]num' loaded_scripts.log > loaded_scripts.log.rel
#
# @TEST-EXEC: TEST_DIFF_CANONIFIER= btest-diff output.abs
# @TEST-EXEC: TEST_DIFF_CANONIFIER="sed -E 's/(Demo-EnumBif)\.(.*)$/\1.shared/' | $SCRIPTS/diff-remove-abspath" btest-diff loaded_scripts.log.abs
# @TEST-EXEC: TEST_DIFF_CANONIFIER= btest-diff output.rel
# @TEST-EXEC: TEST_DIFF_CANONIFIER="sed -E 's/(Demo-EnumBif)\.(.*)$/\1.shared/' | $SCRIPTS/diff-remove-abspath" btest-diff loaded_scripts.log.rel
@load misc/loaded-scripts
@load ./enum-before-load-plugin
@load-plugin Demo::EnumBif
@load ./enum-after-load-plugin
event zeek_init()
{
print(EnumBif::MyEnumA);
print enum_names(EnumBif::MyEnum);
}
@load-plugin Demo::EnumBif
@load ./enum-after-load-plugin-end
@TEST-START-FILE enum-before-load-plugin.zeek
# empty
@TEST-END-FILE
@TEST-START-FILE enum-after-load-plugin.zeek
# empty
@TEST-END-FILE
@TEST-START-FILE enum-after-load-plugin-end.zeek
# empty
@TEST-END-FILE

View file

@ -0,0 +1,6 @@
module EnumBif;
enum MyEnum %{
MyEnumA,
MyEnumB,
%}