Spicy: allow providing file id in zeek::file_begin

Allow spicy parsers to generate their own file IDs and provide them to
Zeek. This duplicates functionality that is currently possible (and
used) by some binpac-based analyzers. One example for an analyzer
creating its own file IDs is the SSL analyzer.
This commit is contained in:
Johanna Amann 2023-11-22 14:09:53 +00:00
parent a092fe6a8a
commit ae0b328826
8 changed files with 82 additions and 8 deletions

2
doc

@ -1 +1 @@
Subproject commit 7cc16f9ffbb61b1fb1e9d0e65c53121f34cb11dc Subproject commit 972cf59cc337b8fd85244722640381a4c1cfcb13

View file

@ -107,8 +107,9 @@ public function protocol_handle_close(handle: ProtocolHandle): void &cxxname="ze
## Signals the beginning of a file to Zeek's file analysis, associating it with the current connection. ## Signals the beginning of a file to Zeek's file analysis, associating it with the current connection.
## Optionally, a mime type can be provided. It will be passed on to Zeek's file analysis framework. ## Optionally, a mime type can be provided. It will be passed on to Zeek's file analysis framework.
## Optionally, a file ID can be provided. It will be passed on to Zeek's file analysis framework.
## Returns the Zeek-side file ID of the new file. ## Returns the Zeek-side file ID of the new file.
public function file_begin(mime_type: optional<string> = Null) : string &cxxname="zeek::spicy::rt::file_begin"; public function file_begin(mime_type: optional<string> = Null, fuid: optional<string> = Null) : string &cxxname="zeek::spicy::rt::file_begin";
## Returns the current file's FUID. ## Returns the current file's FUID.
public function fuid() : string &cxxname="zeek::spicy::rt::fuid"; public function fuid() : string &cxxname="zeek::spicy::rt::fuid";

View file

@ -50,8 +50,9 @@ public:
/** /**
* Begins analysis for a new file, pushing a new state object onto the * Begins analysis for a new file, pushing a new state object onto the
* stack. * stack.
* @param fid Optional precomputed file ID to use for the new file.
*/ */
FileState* push(); FileState* push(std::optional<std::string> fid = {});
/** Returns true if the stack is currently empty. */ /** Returns true if the stack is currently empty. */
bool isEmpty() const { return _stack.empty(); } bool isEmpty() const { return _stack.empty(); }

View file

@ -681,9 +681,11 @@ void rt::protocol_handle_close(const ProtocolHandle& handle) {
c->analyzer->RemoveChildAnalyzer(handle.id()); c->analyzer->RemoveChildAnalyzer(handle.id());
} }
rt::cookie::FileState* rt::cookie::FileStateStack::push() { rt::cookie::FileState* rt::cookie::FileStateStack::push(std::optional<std::string> fid_provided) {
auto _ = hilti::rt::profiler::start("zeek/rt/file-stack-push"); auto _ = hilti::rt::profiler::start("zeek/rt/file-stack-push");
auto fid = file_mgr->HashHandle(hilti::rt::fmt("%s.%d", _analyzer_id, ++_id_counter)); auto fid = fid_provided.value_or(file_mgr->HashHandle(hilti::rt::fmt("%s.%d", _analyzer_id, ++_id_counter)));
if ( find(fid) )
throw InvalidValue(hilti::rt::fmt("Duplicate file id %s provided", fid));
_stack.emplace_back(fid); _stack.emplace_back(fid);
return &_stack.back(); return &_stack.back();
} }
@ -774,10 +776,10 @@ std::string rt::fuid() {
throw ValueUnavailable("fuid() not available in current context"); throw ValueUnavailable("fuid() not available in current context");
} }
std::string rt::file_begin(const std::optional<std::string>& mime_type) { std::string rt::file_begin(const std::optional<std::string>& mime_type, const std::optional<std::string>& fuid) {
auto _ = hilti::rt::profiler::start("zeek/rt/file_begin"); auto _ = hilti::rt::profiler::start("zeek/rt/file_begin");
auto cookie = static_cast<Cookie*>(hilti::rt::context::cookie()); auto cookie = static_cast<Cookie*>(hilti::rt::context::cookie());
auto* fstate = _file_state_stack(cookie)->push(); auto* fstate = _file_state_stack(cookie)->push(fuid);
fstate->mime_type = mime_type; fstate->mime_type = mime_type;
// Feed an empty chunk into the analysis to force creating the file state inside Zeek. // Feed an empty chunk into the analysis to force creating the file state inside Zeek.

View file

@ -382,9 +382,10 @@ void protocol_handle_close(const ProtocolHandle& handle);
* with the current connection. * with the current connection.
* *
* @param mime_type optional mime type passed to Zeek * @param mime_type optional mime type passed to Zeek
* @param fid optional file ID passed to Zeek
* @returns Zeek-side file ID of the new file * @returns Zeek-side file ID of the new file
*/ */
std::string file_begin(const std::optional<std::string>& mime_type); std::string file_begin(const std::optional<std::string>& mime_type, const std::optional<std::string>& fid);
/** /**
* Returns the current file's FUID. * Returns the current file's FUID.

View file

@ -0,0 +1,3 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
FaAaAaAaAaAaAaAaAa foo-1.txt
FaAaAaAaAaAaAaAaAa foo-2.txt

View file

@ -0,0 +1,3 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
FaAaAaAaAaAaAaAaAa
FaAaAaAaAaAaAaAaAa

View file

@ -0,0 +1,63 @@
# @TEST-REQUIRES: have-spicy
#
# @TEST-EXEC: spicyz -d -o test.hlto ssh.spicy ./ssh-cond.evt
# This is equivalent to file-analysis-data-in, besides the fact that we provide our own file ID.
#
# @TEST-EXEC: zeek -r ${TRACES}/ssh/single-conn.trace test.hlto %INPUT Spicy::enable_print=T | sort >output
#
# @TEST-EXEC: cat files.log | zeek-cut fuid filename >files.log.tmp && mv files.log.tmp files.log
# @TEST-EXEC: btest-diff files.log
#
# @TEST-EXEC: TEST_DIFF_CANONIFIER=diff-canonifier-spicy btest-diff output
# @TEST-START-FILE ssh.spicy
module SSH;
import spicy;
import zeek;
global file_counter = 0;
public type Banner = unit {
magic : /SSH-/ {
# This is a bit of cheating.
local d: spicy::Base64Stream;
local dec : bytes = spicy::base64_decode(d, b"MIIESjCCAzKgAwIBAgINAeO0mqGNiqmBJWlQuDANBgkqhkiG9w0BAQsFADBMMSAwHgYDVQQLExdHbG9iYWxTaWduIFJvb3QgQ0EgLSBSMjETMBEGA1UEChMKR2xvYmFsU2lnbjETMBEGA1UEAxMKR2xvYmFsU2lnbjAeFw0xNzA2MTUwMDAwNDJaFw0yMTEyMTUwMDAwNDJaMEIxCzAJBgNVBAYTAlVTMR4wHAYDVQQKExVHb29nbGUgVHJ1c3QgU2VydmljZXMxEzARBgNVBAMTCkdUUyBDQSAxTzEwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDQGM9F1IvN05zkQO9+tN1pIRvJzzyOTHW5DzEZhD2ePCnvUA0Qk28FgICfKqC9EksC4T2fWBYk/jCfC3R3VZMdS/dN4ZKCEPZRrAzDsiKUDzRrmBBJ5wudgzndIMYcLe/RGGFl5yODIKgjEv/SJH/UL+dEaltN11BmsK+eQmMF++AcxGNhr59qM/9il71I2dN8FGfcddwuaej4bXhp0LcQBbjxMcI7JP0aM3T4I+DsaxmKFsbjzaTNC9uzpFlgOIg7rR25xoynUxv8vNmkq7zdPGHXkxWY7oG9j+JkRyBABk7XrJfoucBZEqFJJSPk7XA0LKW0Y3z5oz2D0c1tJKwHAgMBAAGjggEzMIIBLzAOBgNVHQ8BAf8EBAMCAYYwHQYDVR0lBBYwFAYIKwYBBQUHAwEGCCsGAQUFBwMCMBIGA1UdEwEB/wQIMAYBAf8CAQAwHQYDVR0OBBYEFJjR+G4Q68+b7GCfGJAboOt9Cf0rMB8GA1UdIwQYMBaAFJviB1dnHB7AagbeWbSaLd/cGYYuMDUGCCsGAQUFBwEBBCkwJzAlBggrBgEFBQcwAYYZaHR0cDovL29jc3AucGtpLmdvb2cvZ3NyMjAyBgNVHR8EKzApMCegJaAjhiFodHRwOi8vY3JsLnBraS5nb29nL2dzcjIvZ3NyMi5jcmwwPwYDVR0gBDgwNjA0BgZngQwBAgIwKjAoBggrBgEFBQcCARYcaHR0cHM6Ly9wa2kuZ29vZy9yZXBvc2l0b3J5LzANBgkqhkiG9w0BAQsFAAOCAQEAGoA+Nnn78y6pRjd9XlQWNa7HTgiZ/r3RNGkmUmYHPQq6Scti9PEajvwRT2iWTHQr02fesqOqBY2ETUwgZQ+lltoNFvhsO9tvBCOIazpswWC9aJ9xju4tWDQH8NVU6YZZ/XteDSGU9YzJqPjY8q3MDxrzmqepBCf5o8mw/wJ4a2G6xzUr6Fb6T8McDO22PLRL6u3M4Tzs3A2M1j6bykJYi8wWIRdAvKLWZu/axBVbzYmqmwkm5zLSDW5nIAJbELCQCZwMH56t2Dvqofxs6BBcCFIZUSpxu6x6td0V7SvJCCosirSmIatj/9dSSVDQibet8q/7UK4v4ZUN80atnZz1yg==");
dec += spicy::base64_finish(d);
print self.file_id;
zeek::file_data_in(dec);
}
version : /[^-]*/;
dash : /-/;
software: /[^\r\n]*/;
var file_id: string = zeek::file_begin("application/x-x509-ca-cert", "FaAaAaAaAaAaAaAaAa");
var file_name: string = "foo-%d.txt" % ++file_counter;
};
on Banner::%done { zeek::file_end(self.file_id); }
# @TEST-END-FILE
# @TEST-START-FILE ssh-cond.evt
import zeek;
protocol analyzer spicy::SSH over TCP:
parse with SSH::Banner,
port 22/tcp,
replaces SSH;
on SSH::Banner::software -> event have_filename($file, self.file_name);
# @TEST-END-FILE
# Trigger creation of `files.log`.
@load base/protocols/ssl
redef X509::log_x509_in_files_log = T;
event have_filename(f: fa_file, filename: string)
{
f$info$filename = filename;
}