Merge remote-tracking branch 'origin/topic/johanna/spicy-allow-providing-file-ids'

* origin/topic/johanna/spicy-allow-providing-file-ids:
  Spicy: allow providing file id in zeek::file_begin
This commit is contained in:
Johanna Amann 2023-11-23 17:18:32 +00:00
commit 5baa2841e8
11 changed files with 64 additions and 14 deletions

11
CHANGES
View file

@ -1,3 +1,14 @@
6.2.0-dev.187 | 2023-11-23 17:19:51 +0000
* Spicy: allow providing file id in zeek::file_begin (Johanna Amann, Corelight)
Allow spicy parsers to generate their own file IDs and provide them to
Zeek. This duplicates functionality that is currently possible (and
used) by some binpac-based analyzers. One example for an analyzer
creating its own file IDs is the SSL analyzer.
* Bump cmake submodule for INCLUDE_DIRS fix (Arne Welzel, Corelight)
6.2.0-dev.180 | 2023-11-22 10:34:18 +0100
* Integrate review feedback (Dominik Charousset, Corelight)

View file

@ -1 +1 @@
6.2.0-dev.180
6.2.0-dev.187

2
doc

@ -1 +1 @@
Subproject commit 7c2248ec62ad21097c7f80874a9e46385f74c665
Subproject commit 35b5d2daf1578d00eae08012b83d3b59f71d00e4

View file

@ -107,8 +107,9 @@ public function protocol_handle_close(handle: ProtocolHandle): void &cxxname="ze
## Signals the beginning of a file to Zeek's file analysis, associating it with the current connection.
## Optionally, a mime type can be provided. It will be passed on to Zeek's file analysis framework.
## Optionally, a file ID can be provided. It will be passed on to Zeek's file analysis framework.
## Returns the Zeek-side file ID of the new file.
public function file_begin(mime_type: optional<string> = Null) : string &cxxname="zeek::spicy::rt::file_begin";
public function file_begin(mime_type: optional<string> = Null, fuid: optional<string> = Null) : string &cxxname="zeek::spicy::rt::file_begin";
## Returns the current file's FUID.
public function fuid() : string &cxxname="zeek::spicy::rt::fuid";

View file

@ -50,8 +50,9 @@ public:
/**
* Begins analysis for a new file, pushing a new state object onto the
* stack.
* @param fid Optional precomputed file ID to use for the new file.
*/
FileState* push();
FileState* push(std::optional<std::string> fid = {});
/** Returns true if the stack is currently empty. */
bool isEmpty() const { return _stack.empty(); }

View file

@ -681,10 +681,17 @@ void rt::protocol_handle_close(const ProtocolHandle& handle) {
c->analyzer->RemoveChildAnalyzer(handle.id());
}
rt::cookie::FileState* rt::cookie::FileStateStack::push() {
rt::cookie::FileState* rt::cookie::FileStateStack::push(std::optional<std::string> fid_provided) {
auto _ = hilti::rt::profiler::start("zeek/rt/file-stack-push");
auto fid = file_mgr->HashHandle(hilti::rt::fmt("%s.%d", _analyzer_id, ++_id_counter));
_stack.emplace_back(fid);
if ( fid_provided && find(*fid_provided) )
throw InvalidValue(hilti::rt::fmt("Duplicate file id %s provided", *fid_provided));
std::string fid;
if ( fid_provided )
fid = *fid_provided;
else
fid = file_mgr->HashHandle(hilti::rt::fmt("%s.%d", _analyzer_id, ++_id_counter));
_stack.emplace_back(std::move(fid));
return &_stack.back();
}
@ -774,10 +781,10 @@ std::string rt::fuid() {
throw ValueUnavailable("fuid() not available in current context");
}
std::string rt::file_begin(const std::optional<std::string>& mime_type) {
std::string rt::file_begin(const std::optional<std::string>& mime_type, const std::optional<std::string>& fuid) {
auto _ = hilti::rt::profiler::start("zeek/rt/file_begin");
auto cookie = static_cast<Cookie*>(hilti::rt::context::cookie());
auto* fstate = _file_state_stack(cookie)->push();
auto* fstate = _file_state_stack(cookie)->push(fuid);
fstate->mime_type = mime_type;
// Feed an empty chunk into the analysis to force creating the file state inside Zeek.

View file

@ -382,9 +382,10 @@ void protocol_handle_close(const ProtocolHandle& handle);
* with the current connection.
*
* @param mime_type optional mime type passed to Zeek
* @param fid optional file ID passed to Zeek
* @returns Zeek-side file ID of the new file
*/
std::string file_begin(const std::optional<std::string>& mime_type);
std::string file_begin(const std::optional<std::string>& mime_type, const std::optional<std::string>& fid);
/**
* Returns the current file's FUID.

View file

@ -0,0 +1,3 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
FaAaAaAaAaAaAaAaAa foo-1.txt
FaAaAaAaAaAaAaAaAa foo-2.txt

View file

@ -0,0 +1,3 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
FaAaAaAaAaAaAaAaAa
FaAaAaAaAaAaAaAaAa

View file

@ -1,7 +1,8 @@
# @TEST-REQUIRES: have-spicy
#
# @TEST-EXEC: spicyz -d -o test.hlto ssh.spicy ./ssh-cond.evt
# @TEST-EXEC: zeek -r ${TRACES}/ssh/single-conn.trace test.hlto %INPUT Spicy::enable_print=T | sort >output
# @TEST-EXEC: cat ssh.spicy ssh-1.spicy > ssh-test.spicy
# @TEST-EXEC: spicyz -d -o test.hlto ssh-test.spicy ./ssh-cond.evt
# @TEST-EXEC: zeek -r ${TRACES}/ssh/single-conn.trace test.hlto %INPUT Spicy::enable_print=T | sort >output-1
#
# @TEST-EXEC: cat x509.log | grep -v ^# | cut -f 4-5 >x509.log.tmp && mv x509.log.tmp x509.log
# @TEST-EXEC: btest-diff x509.log
@ -9,7 +10,15 @@
# @TEST-EXEC: cat files.log | zeek-cut sha1 filename >files.log.tmp && mv files.log.tmp files.log
# @TEST-EXEC: btest-diff files.log
#
# @TEST-EXEC: TEST_DIFF_CANONIFIER=diff-canonifier-spicy btest-diff output
# @TEST-EXEC: cat ssh.spicy ssh-2.spicy > ssh-test.spicy
# @TEST-EXEC: spicyz -d -o test.hlto ssh-test.spicy ./ssh-cond.evt
# @TEST-EXEC: zeek -r ${TRACES}/ssh/single-conn.trace test.hlto %INPUT Spicy::enable_print=T | sort >output-2
#
# @TEST-EXEC: cat files.log | zeek-cut fuid filename >files.log.tmp && mv files.log.tmp files-2.log
# @TEST-EXEC: btest-diff files-2.log
#
# @TEST-EXEC: TEST_DIFF_CANONIFIER=diff-canonifier-spicy btest-diff output-1
# @TEST-EXEC: TEST_DIFF_CANONIFIER=diff-canonifier-spicy btest-diff output-2
# @TEST-START-FILE ssh.spicy
module SSH;
@ -33,7 +42,7 @@ public type Banner = unit {
dash : /-/;
software: /[^\r\n]*/;
var file_id: string = zeek::file_begin("application/x-x509-ca-cert");
var file_id: string;
var file_name: string = "foo-%d.txt" % ++file_counter;
};
@ -41,6 +50,20 @@ on Banner::%done { zeek::file_end(self.file_id); }
# @TEST-END-FILE
# First test case - just let Zeek generate the File ID
# @TEST-START-FILE ssh-1.spicy
on Banner::%init { self.file_id = zeek::file_begin("application/x-x509-ca-cert"); }
# @TEST-END-FILE ssh-1.spicy
# Second test case - provide a file ID
# @TEST-START-FILE ssh-2.spicy
on Banner::%init { self.file_id = zeek::file_begin("application/x-x509-ca-cert", "FaAaAaAaAaAaAaAaAa"); }
# @TEST-END-FILE ssh-2.spicy
# @TEST-START-FILE ssh-cond.evt
import zeek;