From ae0b328826935cd71b6bfa7c50fa31a4659fa78f Mon Sep 17 00:00:00 2001 From: Johanna Amann Date: Wed, 22 Nov 2023 14:09:53 +0000 Subject: [PATCH] Spicy: allow providing file id in zeek::file_begin Allow spicy parsers to generate their own file IDs and provide them to Zeek. This duplicates functionality that is currently possible (and used) by some binpac-based analyzers. One example for an analyzer creating its own file IDs is the SSL analyzer. --- doc | 2 +- scripts/spicy/zeek.spicy | 3 +- src/spicy/cookie.h | 3 +- src/spicy/runtime-support.cc | 10 +-- src/spicy/runtime-support.h | 3 +- .../files.log | 3 + .../output | 3 + .../file-analysis-data-in-with-fuid.zeek | 63 +++++++++++++++++++ 8 files changed, 82 insertions(+), 8 deletions(-) create mode 100644 testing/btest/Baseline/spicy.file-analysis-data-in-with-fuid/files.log create mode 100644 testing/btest/Baseline/spicy.file-analysis-data-in-with-fuid/output create mode 100644 testing/btest/spicy/file-analysis-data-in-with-fuid.zeek diff --git a/doc b/doc index 7cc16f9ffb..972cf59cc3 160000 --- a/doc +++ b/doc @@ -1 +1 @@ -Subproject commit 7cc16f9ffbb61b1fb1e9d0e65c53121f34cb11dc +Subproject commit 972cf59cc337b8fd85244722640381a4c1cfcb13 diff --git a/scripts/spicy/zeek.spicy b/scripts/spicy/zeek.spicy index 0afde17e8b..6cc445b628 100644 --- a/scripts/spicy/zeek.spicy +++ b/scripts/spicy/zeek.spicy @@ -107,8 +107,9 @@ public function protocol_handle_close(handle: ProtocolHandle): void &cxxname="ze ## Signals the beginning of a file to Zeek's file analysis, associating it with the current connection. ## Optionally, a mime type can be provided. It will be passed on to Zeek's file analysis framework. +## Optionally, a file ID can be provided. It will be passed on to Zeek's file analysis framework. ## Returns the Zeek-side file ID of the new file. -public function file_begin(mime_type: optional = Null) : string &cxxname="zeek::spicy::rt::file_begin"; +public function file_begin(mime_type: optional = Null, fuid: optional = Null) : string &cxxname="zeek::spicy::rt::file_begin"; ## Returns the current file's FUID. public function fuid() : string &cxxname="zeek::spicy::rt::fuid"; diff --git a/src/spicy/cookie.h b/src/spicy/cookie.h index bc59f09a7b..6823dfc488 100644 --- a/src/spicy/cookie.h +++ b/src/spicy/cookie.h @@ -50,8 +50,9 @@ public: /** * Begins analysis for a new file, pushing a new state object onto the * stack. + * @param fid Optional precomputed file ID to use for the new file. */ - FileState* push(); + FileState* push(std::optional fid = {}); /** Returns true if the stack is currently empty. */ bool isEmpty() const { return _stack.empty(); } diff --git a/src/spicy/runtime-support.cc b/src/spicy/runtime-support.cc index c59465aa4b..2aae3c5ae9 100644 --- a/src/spicy/runtime-support.cc +++ b/src/spicy/runtime-support.cc @@ -681,9 +681,11 @@ void rt::protocol_handle_close(const ProtocolHandle& handle) { c->analyzer->RemoveChildAnalyzer(handle.id()); } -rt::cookie::FileState* rt::cookie::FileStateStack::push() { +rt::cookie::FileState* rt::cookie::FileStateStack::push(std::optional fid_provided) { auto _ = hilti::rt::profiler::start("zeek/rt/file-stack-push"); - auto fid = file_mgr->HashHandle(hilti::rt::fmt("%s.%d", _analyzer_id, ++_id_counter)); + auto fid = fid_provided.value_or(file_mgr->HashHandle(hilti::rt::fmt("%s.%d", _analyzer_id, ++_id_counter))); + if ( find(fid) ) + throw InvalidValue(hilti::rt::fmt("Duplicate file id %s provided", fid)); _stack.emplace_back(fid); return &_stack.back(); } @@ -774,10 +776,10 @@ std::string rt::fuid() { throw ValueUnavailable("fuid() not available in current context"); } -std::string rt::file_begin(const std::optional& mime_type) { +std::string rt::file_begin(const std::optional& mime_type, const std::optional& fuid) { auto _ = hilti::rt::profiler::start("zeek/rt/file_begin"); auto cookie = static_cast(hilti::rt::context::cookie()); - auto* fstate = _file_state_stack(cookie)->push(); + auto* fstate = _file_state_stack(cookie)->push(fuid); fstate->mime_type = mime_type; // Feed an empty chunk into the analysis to force creating the file state inside Zeek. diff --git a/src/spicy/runtime-support.h b/src/spicy/runtime-support.h index 0ba1962d0f..e61aea53e5 100644 --- a/src/spicy/runtime-support.h +++ b/src/spicy/runtime-support.h @@ -382,9 +382,10 @@ void protocol_handle_close(const ProtocolHandle& handle); * with the current connection. * * @param mime_type optional mime type passed to Zeek + * @param fid optional file ID passed to Zeek * @returns Zeek-side file ID of the new file */ -std::string file_begin(const std::optional& mime_type); +std::string file_begin(const std::optional& mime_type, const std::optional& fid); /** * Returns the current file's FUID. diff --git a/testing/btest/Baseline/spicy.file-analysis-data-in-with-fuid/files.log b/testing/btest/Baseline/spicy.file-analysis-data-in-with-fuid/files.log new file mode 100644 index 0000000000..96b5da33b3 --- /dev/null +++ b/testing/btest/Baseline/spicy.file-analysis-data-in-with-fuid/files.log @@ -0,0 +1,3 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +FaAaAaAaAaAaAaAaAa foo-1.txt +FaAaAaAaAaAaAaAaAa foo-2.txt diff --git a/testing/btest/Baseline/spicy.file-analysis-data-in-with-fuid/output b/testing/btest/Baseline/spicy.file-analysis-data-in-with-fuid/output new file mode 100644 index 0000000000..a1b10d3fd2 --- /dev/null +++ b/testing/btest/Baseline/spicy.file-analysis-data-in-with-fuid/output @@ -0,0 +1,3 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +FaAaAaAaAaAaAaAaAa +FaAaAaAaAaAaAaAaAa diff --git a/testing/btest/spicy/file-analysis-data-in-with-fuid.zeek b/testing/btest/spicy/file-analysis-data-in-with-fuid.zeek new file mode 100644 index 0000000000..12200dee5b --- /dev/null +++ b/testing/btest/spicy/file-analysis-data-in-with-fuid.zeek @@ -0,0 +1,63 @@ +# @TEST-REQUIRES: have-spicy +# +# @TEST-EXEC: spicyz -d -o test.hlto ssh.spicy ./ssh-cond.evt +# This is equivalent to file-analysis-data-in, besides the fact that we provide our own file ID. +# +# @TEST-EXEC: zeek -r ${TRACES}/ssh/single-conn.trace test.hlto %INPUT Spicy::enable_print=T | sort >output +# +# @TEST-EXEC: cat files.log | zeek-cut fuid filename >files.log.tmp && mv files.log.tmp files.log +# @TEST-EXEC: btest-diff files.log +# +# @TEST-EXEC: TEST_DIFF_CANONIFIER=diff-canonifier-spicy btest-diff output + +# @TEST-START-FILE ssh.spicy +module SSH; + +import spicy; +import zeek; + +global file_counter = 0; + +public type Banner = unit { + magic : /SSH-/ { + # This is a bit of cheating. + local d: spicy::Base64Stream; + local dec : bytes = spicy::base64_decode(d, b"MIIESjCCAzKgAwIBAgINAeO0mqGNiqmBJWlQuDANBgkqhkiG9w0BAQsFADBMMSAwHgYDVQQLExdHbG9iYWxTaWduIFJvb3QgQ0EgLSBSMjETMBEGA1UEChMKR2xvYmFsU2lnbjETMBEGA1UEAxMKR2xvYmFsU2lnbjAeFw0xNzA2MTUwMDAwNDJaFw0yMTEyMTUwMDAwNDJaMEIxCzAJBgNVBAYTAlVTMR4wHAYDVQQKExVHb29nbGUgVHJ1c3QgU2VydmljZXMxEzARBgNVBAMTCkdUUyBDQSAxTzEwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDQGM9F1IvN05zkQO9+tN1pIRvJzzyOTHW5DzEZhD2ePCnvUA0Qk28FgICfKqC9EksC4T2fWBYk/jCfC3R3VZMdS/dN4ZKCEPZRrAzDsiKUDzRrmBBJ5wudgzndIMYcLe/RGGFl5yODIKgjEv/SJH/UL+dEaltN11BmsK+eQmMF++AcxGNhr59qM/9il71I2dN8FGfcddwuaej4bXhp0LcQBbjxMcI7JP0aM3T4I+DsaxmKFsbjzaTNC9uzpFlgOIg7rR25xoynUxv8vNmkq7zdPGHXkxWY7oG9j+JkRyBABk7XrJfoucBZEqFJJSPk7XA0LKW0Y3z5oz2D0c1tJKwHAgMBAAGjggEzMIIBLzAOBgNVHQ8BAf8EBAMCAYYwHQYDVR0lBBYwFAYIKwYBBQUHAwEGCCsGAQUFBwMCMBIGA1UdEwEB/wQIMAYBAf8CAQAwHQYDVR0OBBYEFJjR+G4Q68+b7GCfGJAboOt9Cf0rMB8GA1UdIwQYMBaAFJviB1dnHB7AagbeWbSaLd/cGYYuMDUGCCsGAQUFBwEBBCkwJzAlBggrBgEFBQcwAYYZaHR0cDovL29jc3AucGtpLmdvb2cvZ3NyMjAyBgNVHR8EKzApMCegJaAjhiFodHRwOi8vY3JsLnBraS5nb29nL2dzcjIvZ3NyMi5jcmwwPwYDVR0gBDgwNjA0BgZngQwBAgIwKjAoBggrBgEFBQcCARYcaHR0cHM6Ly9wa2kuZ29vZy9yZXBvc2l0b3J5LzANBgkqhkiG9w0BAQsFAAOCAQEAGoA+Nnn78y6pRjd9XlQWNa7HTgiZ/r3RNGkmUmYHPQq6Scti9PEajvwRT2iWTHQr02fesqOqBY2ETUwgZQ+lltoNFvhsO9tvBCOIazpswWC9aJ9xju4tWDQH8NVU6YZZ/XteDSGU9YzJqPjY8q3MDxrzmqepBCf5o8mw/wJ4a2G6xzUr6Fb6T8McDO22PLRL6u3M4Tzs3A2M1j6bykJYi8wWIRdAvKLWZu/axBVbzYmqmwkm5zLSDW5nIAJbELCQCZwMH56t2Dvqofxs6BBcCFIZUSpxu6x6td0V7SvJCCosirSmIatj/9dSSVDQibet8q/7UK4v4ZUN80atnZz1yg=="); + dec += spicy::base64_finish(d); + + print self.file_id; + zeek::file_data_in(dec); + } + version : /[^-]*/; + dash : /-/; + software: /[^\r\n]*/; + + var file_id: string = zeek::file_begin("application/x-x509-ca-cert", "FaAaAaAaAaAaAaAaAa"); + var file_name: string = "foo-%d.txt" % ++file_counter; +}; + +on Banner::%done { zeek::file_end(self.file_id); } + +# @TEST-END-FILE + +# @TEST-START-FILE ssh-cond.evt + +import zeek; + +protocol analyzer spicy::SSH over TCP: + parse with SSH::Banner, + port 22/tcp, + replaces SSH; + +on SSH::Banner::software -> event have_filename($file, self.file_name); + +# @TEST-END-FILE + +# Trigger creation of `files.log`. +@load base/protocols/ssl +redef X509::log_x509_in_files_log = T; + +event have_filename(f: fa_file, filename: string) + { + f$info$filename = filename; + }