From 2a8040039a22ad8deaec0f66a594d08f21d92814 Mon Sep 17 00:00:00 2001 From: Arne Welzel Date: Fri, 28 Feb 2025 17:31:58 +0100 Subject: [PATCH 1/3] ScannedFile: Allow skipping canonicalization --- src/ScannedFile.cc | 8 ++++++-- src/ScannedFile.h | 5 ++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/ScannedFile.cc b/src/ScannedFile.cc index f6a5aa83cc..52df993ac5 100644 --- a/src/ScannedFile.cc +++ b/src/ScannedFile.cc @@ -13,14 +13,15 @@ namespace zeek::detail { std::list files_scanned; std::vector sig_files; -ScannedFile::ScannedFile(int arg_include_level, std::string arg_name, bool arg_skipped, bool arg_prefixes_checked) +ScannedFile::ScannedFile(int arg_include_level, std::string arg_name, bool arg_skipped, bool arg_prefixes_checked, + bool arg_is_canonical) : include_level(arg_include_level), skipped(arg_skipped), prefixes_checked(arg_prefixes_checked), name(std::move(arg_name)) { if ( name == canonical_stdin_path ) canonical_path = canonical_stdin_path; - else { + else if ( ! arg_is_canonical ) { std::error_code ec; auto canon = filesystem::canonical(name, ec); if ( ec ) @@ -28,6 +29,9 @@ ScannedFile::ScannedFile(int arg_include_level, std::string arg_name, bool arg_s canonical_path = canon.string(); } + else { + canonical_path = name; + } } bool ScannedFile::AlreadyScanned() const { diff --git a/src/ScannedFile.h b/src/ScannedFile.h index 05c816b912..2658d47f48 100644 --- a/src/ScannedFile.h +++ b/src/ScannedFile.h @@ -12,10 +12,13 @@ namespace zeek::detail { // Script file we have already scanned (or are in the process of scanning). // They are identified by normalized canonical path. +// +// If arg_is_canonical is set to true, assume arg_name is canonicalized and +// skip resolving the canonical name. class ScannedFile { public: ScannedFile(int arg_include_level, std::string arg_name, bool arg_skipped = false, - bool arg_prefixes_checked = false); + bool arg_prefixes_checked = false, bool arg_is_canonical = false); /** * Compares the canonical path of this file against every canonical path From d079a2b9a81210e0e8f6085f717dcd4bc4e779f9 Mon Sep 17 00:00:00 2001 From: Arne Welzel Date: Fri, 28 Feb 2025 17:36:23 +0100 Subject: [PATCH 2/3] scan.l: Extract switch_to() from load_files() --- src/scan.l | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/src/scan.l b/src/scan.l index e254856798..21ef434bc4 100644 --- a/src/scan.l +++ b/src/scan.l @@ -219,6 +219,9 @@ static zeek::PList file_stack; // Returns true if the file is new, false if it's already been scanned. static int load_files(const char* file); +// Update the current parsing and location state for the given file and buffer. +static int switch_to(const char* file, YY_BUFFER_STATE buffer); + // ### TODO: columns too - use yyless with '.' action? %} @@ -702,6 +705,23 @@ void zeek::detail::SetCurrentLocation(YYLTYPE currloc) line_number = currloc.first_line; } +static int switch_to(const char* file, YY_BUFFER_STATE buffer) + { + yy_switch_to_buffer(buffer); + yylloc.first_line = yylloc.last_line = line_number = 1; + + // Don't delete the old filename - it's pointed to by + // every Obj created when parsing it. + yylloc.filename = filename = zeek::util::copy_string(file); + + current_file_has_conditionals = files_with_conditionals.count(filename) > 0; + + entry_cond_depth.push_back(conditional_depth); + entry_pragma_stack_depth.push_back(pragma_stack.size()); + + return 1; + } + static int load_files(const char* orig_file) { std::string file_path = find_relative_script_file(orig_file); @@ -800,19 +820,7 @@ static int load_files(const char* orig_file) buffer = yy_create_buffer(f, YY_BUF_SIZE); } - yy_switch_to_buffer(buffer); - yylloc.first_line = yylloc.last_line = line_number = 1; - - // Don't delete the old filename - it's pointed to by - // every Obj created when parsing it. - yylloc.filename = filename = zeek::util::copy_string(file_path.c_str()); - - current_file_has_conditionals = files_with_conditionals.count(filename) > 0; - - entry_cond_depth.push_back(conditional_depth); - entry_pragma_stack_depth.push_back(pragma_stack.size()); - - return 1; + return switch_to(file_path.c_str(), buffer); } void begin_RE() From ab99f8e233328452802f01a9112468b10e41feea Mon Sep 17 00:00:00 2001 From: Arne Welzel Date: Thu, 27 Feb 2025 17:28:53 +0100 Subject: [PATCH 3/3] scan.l: Fix @load-plugin scripts loading For a plugin loaded via @load-plugin, create a YY_BUFFER_STATE holding the required loads for the implicitly loaded files. In loaded scripts, this generated file will show up with a path of the shared object file of the plugin with the __preload__.zeek and __load__.zeek files loaded by it. Closes #2311 --- src/scan.l | 80 +++++++++++++++++-- .../loaded_scripts.log.abs | 11 +++ .../loaded_scripts.log.rel | 11 +++ .../plugins.enum-bif-plugin/output.abs | 6 ++ .../plugins.enum-bif-plugin/output.rel | 6 ++ testing/btest/plugins/enum-bif-plugin.zeek | 45 +++++++++++ .../plugins/enum-bif-plugin/.btest-ignore | 0 .../plugins/enum-bif-plugin/src/enumbif.bif | 6 ++ 8 files changed, 157 insertions(+), 8 deletions(-) create mode 100644 testing/btest/Baseline/plugins.enum-bif-plugin/loaded_scripts.log.abs create mode 100644 testing/btest/Baseline/plugins.enum-bif-plugin/loaded_scripts.log.rel create mode 100644 testing/btest/Baseline/plugins.enum-bif-plugin/output.abs create mode 100644 testing/btest/Baseline/plugins.enum-bif-plugin/output.rel create mode 100644 testing/btest/plugins/enum-bif-plugin.zeek create mode 100644 testing/btest/plugins/enum-bif-plugin/.btest-ignore create mode 100644 testing/btest/plugins/enum-bif-plugin/src/enumbif.bif diff --git a/src/scan.l b/src/scan.l index 21ef434bc4..44cebc94b1 100644 --- a/src/scan.l +++ b/src/scan.l @@ -222,6 +222,12 @@ static int load_files(const char* file); // Update the current parsing and location state for the given file and buffer. static int switch_to(const char* file, YY_BUFFER_STATE buffer); +// Be careful to never delete things from this list, as the strings +// are referred to (in order to save the locations of tokens and statements, +// for error reporting and debugging). +static zeek::name_list input_files; +static zeek::name_list essential_input_files; + // ### TODO: columns too - use yyless with '.' action? %} @@ -467,10 +473,74 @@ when return TOK_WHEN; rc = PLUGIN_HOOK_WITH_RESULT(HOOK_LOAD_FILE_EXT, HookLoadFileExtended(zeek::plugin::Plugin::PLUGIN, plugin, ""), std::make_pair(-1, std::nullopt)); switch ( rc.first ) { - case -1: - // No plugin in charge of this file. (We ignore any returned content.) + case -1: { + // No plugin took charge this @load-plugin directive. + auto pre_load_input_files = input_files.size(); zeek::plugin_mgr->ActivateDynamicPlugin(plugin); + + // No new input files: Likely the plugin was already loaded + // or has failed to load. + if ( input_files.size() == pre_load_input_files ) + break; + + // Lookup the plugin to get the path to the shared object. + // We use that for the loaded_scripts.log and name of the + // generated file loading the scripts. + const zeek::plugin::Plugin *pp = nullptr; + for ( const auto* p : zeek::plugin_mgr->ActivePlugins() ) + { + if ( p->DynamicPlugin() && p->Name() == plugin ) + { + pp = p; + break; + } + } + + std::string name; + if ( pp ) + name = pp->PluginPath(); + else + { + // This shouldn't happen. If it does, we come up + // with an artificial filename rather than using + // the shared object name. + zeek::reporter->Warning("Did not find %s after loading", plugin); + name = std::string("@load-plugin ") + plugin; + } + + // Render all needed @load lines into a string + std::string buf = "# @load-plugin generated script\n"; + + while ( input_files.size() > pre_load_input_files ) + { + // Any relative files found by the plugin manager are + // converted to absolute paths relative to Zeek's working + // directory. That way it is clear where these are supposed + // to be found and find_relative_script_file() won't get + // confused by any ZEEKPATH settings. Also, plugin files + // containing any relative @loads themselves will work. + std::error_code ec; + auto canonical = zeek::filesystem::canonical(input_files[0]); + if ( ec ) + zeek::reporter->FatalError("plugin script %s not found: %s", + input_files[0], ec.message().c_str()); + + buf += std::string("@load ") + canonical.string() + "\n"; + + delete[] input_files.remove_nth(0); + } + + zeek::detail::zeekygen_mgr->Script(name); + zeek::detail::ScannedFile sf(file_stack.length(), name, false /*skipped*/, + true /*prefixes_checked*/, true /*is_canonical*/); + zeek::detail::files_scanned.push_back(std::move(sf)); + + file_stack.push_back(new FileInfo(zeek::detail::current_module)); + + YY_BUFFER_STATE buffer = yy_scan_bytes(buf.data(), buf.size()); + switch_to(name.c_str(), buffer); break; + } case 0: if ( ! zeek::reporter->Errors() ) @@ -950,12 +1020,6 @@ void reject_directive(zeek::detail::Stmt* s) zeek::reporter->Error("incorrect use of directive"); } -// Be careful to never delete things from this list, as the strings -// are referred to (in order to save the locations of tokens and statements, -// for error reporting and debugging). -static zeek::name_list input_files; -static zeek::name_list essential_input_files; - void add_essential_input_file(const char* file) { if ( ! file ) diff --git a/testing/btest/Baseline/plugins.enum-bif-plugin/loaded_scripts.log.abs b/testing/btest/Baseline/plugins.enum-bif-plugin/loaded_scripts.log.abs new file mode 100644 index 0000000000..7ad1bc1f87 --- /dev/null +++ b/testing/btest/Baseline/plugins.enum-bif-plugin/loaded_scripts.log.abs @@ -0,0 +1,11 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +<...>/enum-bif-plugin.zeek + <...>/enum-before-load-plugin.zeek + <...>/Demo-EnumBif.shared + <...>/__preload__.zeek + <...>/types.zeek + <...>/__load__.zeek + <...>/enumbif.bif.zeek + <...>/__load__.zeek + <...>/enum-after-load-plugin.zeek + <...>/enum-after-load-plugin-end.zeek diff --git a/testing/btest/Baseline/plugins.enum-bif-plugin/loaded_scripts.log.rel b/testing/btest/Baseline/plugins.enum-bif-plugin/loaded_scripts.log.rel new file mode 100644 index 0000000000..398207c02c --- /dev/null +++ b/testing/btest/Baseline/plugins.enum-bif-plugin/loaded_scripts.log.rel @@ -0,0 +1,11 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +<...>/enum-bif-plugin.zeek + <...>/enum-before-load-plugin.zeek + .<...>/Demo-EnumBif.shared + <...>/__preload__.zeek + <...>/types.zeek + <...>/__load__.zeek + <...>/enumbif.bif.zeek + <...>/__load__.zeek + <...>/enum-after-load-plugin.zeek + <...>/enum-after-load-plugin-end.zeek diff --git a/testing/btest/Baseline/plugins.enum-bif-plugin/output.abs b/testing/btest/Baseline/plugins.enum-bif-plugin/output.abs new file mode 100644 index 0000000000..d386a0bc59 --- /dev/null +++ b/testing/btest/Baseline/plugins.enum-bif-plugin/output.abs @@ -0,0 +1,6 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +EnumBif::MyEnumA +{ +EnumBif::MyEnumB, +EnumBif::MyEnumA +} diff --git a/testing/btest/Baseline/plugins.enum-bif-plugin/output.rel b/testing/btest/Baseline/plugins.enum-bif-plugin/output.rel new file mode 100644 index 0000000000..d386a0bc59 --- /dev/null +++ b/testing/btest/Baseline/plugins.enum-bif-plugin/output.rel @@ -0,0 +1,6 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +EnumBif::MyEnumA +{ +EnumBif::MyEnumB, +EnumBif::MyEnumA +} diff --git a/testing/btest/plugins/enum-bif-plugin.zeek b/testing/btest/plugins/enum-bif-plugin.zeek new file mode 100644 index 0000000000..df8c796ea7 --- /dev/null +++ b/testing/btest/plugins/enum-bif-plugin.zeek @@ -0,0 +1,45 @@ +# @TEST-DOC: Ensure the enum from the .bif file is available right after @load-plugin in bare mode. +# @TEST-EXEC: ${DIST}/auxil/zeek-aux/plugin-support/init-plugin -u . Demo EnumBif +# @TEST-EXEC: cp -r %DIR/enum-bif-plugin/* . +# +# @TEST-EXEC: ./configure --zeek-dist=${DIST} && make +# +# @TEST-EXEC: ZEEK_PLUGIN_PATH=`pwd` zeek -b %INPUT >output.abs +# @TEST-EXEC: grep '[Ee]num' loaded_scripts.log > loaded_scripts.log.abs +# @TEST-EXEC: ZEEK_PLUGIN_PATH=./build zeek -b %INPUT >output.rel +# @TEST-EXEC: grep '[Ee]num' loaded_scripts.log > loaded_scripts.log.rel +# +# @TEST-EXEC: TEST_DIFF_CANONIFIER= btest-diff output.abs +# @TEST-EXEC: TEST_DIFF_CANONIFIER="sed -E 's/(Demo-EnumBif)\.(.*)$/\1.shared/' | $SCRIPTS/diff-remove-abspath" btest-diff loaded_scripts.log.abs +# @TEST-EXEC: TEST_DIFF_CANONIFIER= btest-diff output.rel +# @TEST-EXEC: TEST_DIFF_CANONIFIER="sed -E 's/(Demo-EnumBif)\.(.*)$/\1.shared/' | $SCRIPTS/diff-remove-abspath" btest-diff loaded_scripts.log.rel + +@load misc/loaded-scripts + +@load ./enum-before-load-plugin + +@load-plugin Demo::EnumBif + +@load ./enum-after-load-plugin + +event zeek_init() + { + print(EnumBif::MyEnumA); + print enum_names(EnumBif::MyEnum); + } + +@load-plugin Demo::EnumBif + +@load ./enum-after-load-plugin-end + +@TEST-START-FILE enum-before-load-plugin.zeek +# empty +@TEST-END-FILE + +@TEST-START-FILE enum-after-load-plugin.zeek +# empty +@TEST-END-FILE + +@TEST-START-FILE enum-after-load-plugin-end.zeek +# empty +@TEST-END-FILE diff --git a/testing/btest/plugins/enum-bif-plugin/.btest-ignore b/testing/btest/plugins/enum-bif-plugin/.btest-ignore new file mode 100644 index 0000000000..e69de29bb2 diff --git a/testing/btest/plugins/enum-bif-plugin/src/enumbif.bif b/testing/btest/plugins/enum-bif-plugin/src/enumbif.bif new file mode 100644 index 0000000000..d4fbf749f0 --- /dev/null +++ b/testing/btest/plugins/enum-bif-plugin/src/enumbif.bif @@ -0,0 +1,6 @@ +module EnumBif; + +enum MyEnum %{ + MyEnumA, + MyEnumB, +%}