file_analysis: Implement AnalyzerViolation() for file_analysis/Analyzer

Add a test parsing a malformed PE file showing that analyzer_violation_info
is raised with the fa_file object set.

It could be interesting to pass through an optional connection if one
exists, but access is provided through f$conns, too.
This commit is contained in:
Arne Welzel 2022-08-31 18:07:18 +02:00
parent bc8fd5a4c6
commit d5cd023dff
10 changed files with 145 additions and 6 deletions

3
NEWS
View file

@ -17,6 +17,9 @@ New Functionality
on ``AllAnalyzers::Tag`` values named ``is_protocol_analyzer()``, on ``AllAnalyzers::Tag`` values named ``is_protocol_analyzer()``,
``is_packet_analyzer()`` and ``is_file_analyzer()``. ``is_packet_analyzer()`` and ``is_file_analyzer()``.
- File analyzers can now raise analyzer violations to the script-layer via
the new AnalyzerViolation() method.
Deprecated Functionality Deprecated Functionality
------------------------ ------------------------

View file

@ -2,9 +2,14 @@
#include "zeek/file_analysis/Analyzer.h" #include "zeek/file_analysis/Analyzer.h"
#include "zeek/Event.h"
#include "zeek/Val.h" #include "zeek/Val.h"
#include "zeek/file_analysis/File.h"
#include "zeek/file_analysis/Manager.h" #include "zeek/file_analysis/Manager.h"
// For analyzer_violation_info
#include "event.bif.netvar_h"
namespace zeek::file_analysis namespace zeek::file_analysis
{ {
@ -23,7 +28,7 @@ void Analyzer::SetAnalyzerTag(const zeek::Tag& arg_tag)
Analyzer::Analyzer(zeek::Tag arg_tag, RecordValPtr arg_args, File* arg_file) Analyzer::Analyzer(zeek::Tag arg_tag, RecordValPtr arg_args, File* arg_file)
: tag(arg_tag), args(std::move(arg_args)), file(arg_file), got_stream_delivery(false), : tag(arg_tag), args(std::move(arg_args)), file(arg_file), got_stream_delivery(false),
skip(false) skip(false), analyzer_confirmed(false)
{ {
id = ++id_counter; id = ++id_counter;
} }
@ -33,4 +38,44 @@ Analyzer::Analyzer(RecordValPtr arg_args, File* arg_file)
{ {
} }
void Analyzer::AnalyzerConfirmation(zeek::Tag arg_tag)
{
if ( analyzer_confirmed )
return;
analyzer_confirmed = true;
if ( ! analyzer_confirmation_info )
return;
static auto info_type = zeek::id::find_type<RecordType>("AnalyzerConfirmationInfo");
static auto info_f_idx = info_type->FieldOffset("f");
auto info = zeek::make_intrusive<RecordVal>(info_type);
info->Assign(info_f_idx, GetFile()->ToVal());
const auto& tval = arg_tag ? arg_tag.AsVal() : tag.AsVal();
event_mgr.Enqueue(analyzer_confirmation_info, tval, info);
}
void Analyzer::AnalyzerViolation(const char* reason, const char* data, int len, zeek::Tag arg_tag)
{
if ( ! analyzer_violation_info )
return;
static auto info_type = zeek::id::find_type<RecordType>("AnalyzerViolationInfo");
static auto info_reason_idx = info_type->FieldOffset("reason");
static auto info_f_idx = info_type->FieldOffset("f");
static auto info_data_idx = info_type->FieldOffset("data");
auto info = zeek::make_intrusive<RecordVal>(info_type);
info->Assign(info_reason_idx, make_intrusive<StringVal>(reason));
info->Assign(info_f_idx, GetFile()->ToVal());
if ( data && len )
info->Assign(info_data_idx, make_intrusive<StringVal>(len, data));
const auto& tval = arg_tag ? arg_tag.AsVal() : tag.AsVal();
event_mgr.Enqueue(analyzer_violation_info, tval, info);
}
} // namespace zeek::file_analysis } // namespace zeek::file_analysis

View file

@ -133,6 +133,38 @@ public:
*/ */
bool Skipping() const { return skip; } bool Skipping() const { return skip; }
/**
* Signals to Zeek that the analyzer has recognized the input to indeed
* conform to the expected format. This should be called as early as
* possible during file analysis. It may turn into \c analyzer_confirmation_info
* events at the script-layer (but only once per file , even if the method is
* called multiple times).
*
* If tag is given, it overrides the analyzer tag passed to the
* scripting layer; the default is the one of the analyzer itself.
*/
virtual void AnalyzerConfirmation(zeek::Tag tag = zeek::Tag());
/**
* Signals to Zeek that the analyzer has found a sever violation
* that could indicate it's not parsing the expected file format.
* This turns into \c analyzer_violation_info events at the script-layer
* (one such event is raised for each call to this method so that the
* script-layer can built up a notion of how prevalent violations are; the
* more, the less likely it's the right format).
*
* @param reason A textual description of the error encountered.
*
* @param data An optional pointer to the malformed data.
*
* @param len If \a data is given, the length of it.
*
* @param tag If tag is given, it overrides the analyzer tag passed to the
* scripting layer; the default is the one of the analyzer itself.
*/
virtual void AnalyzerViolation(const char* reason, const char* data = nullptr, int len = 0,
zeek::Tag tag = zeek::Tag());
protected: protected:
/** /**
* Constructor. Only derived classes are meant to be instantiated. * Constructor. Only derived classes are meant to be instantiated.
@ -161,6 +193,7 @@ private:
File* file; /**< The file to which the analyzer is attached. */ File* file; /**< The file to which the analyzer is attached. */
bool got_stream_delivery; bool got_stream_delivery;
bool skip; bool skip;
bool analyzer_confirmed;
static ID id_counter; static ID id_counter;
}; };

View file

@ -30,6 +30,7 @@ bool PE::DeliverStream(const u_char* data, uint64_t len)
} }
catch ( const binpac::Exception& e ) catch ( const binpac::Exception& e )
{ {
AnalyzerViolation(util::fmt("Binpac exception: %s", e.c_msg()));
return false; return false;
} }

View file

@ -0,0 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
Files::ANALYZER_PE, Binpac exception: binpac exception: &enforce violation : DOS_Header:AddressOfNewExeHeader, FKPuH630Tmj6UQUMP7, {\x0aPE\x0a}

View file

@ -0,0 +1,10 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
#separator \x09
#set_separator ,
#empty_field (empty)
#unset_field -
#path files
#open XXXX-XX-XX-XX-XX-XX
#fields ts fuid uid id.orig_h id.orig_p id.resp_h id.resp_p source depth analyzers mime_type filename duration local_orig is_orig seen_bytes total_bytes missing_bytes overflow_bytes timedout parent_fuid
#types time string string addr port addr port string count set[string] string string interval bool bool count count count count bool string
XXXXXXXXXX.XXXXXX FKPuH630Tmj6UQUMP7 - - - - - ./myfile.exe 0 PE application/x-dosexec - 0.000000 - - 64 - 0 0 F -

View file

@ -4,15 +4,18 @@ Demo::Foo - A Foo test analyzer (dynamic, version 1.0.0)
[Event] foo_piece [Event] foo_piece
=== ===
analyzer_confirmation_info, Files::ANALYZER_FOO, FCceqBvpMfirSN0Ri
foo_piece, FCceqBvpMfirSN0Ri, The National Center foo_piece, FCceqBvpMfirSN0Ri, The National Center
foo_piece, FCceqBvpMfirSN0Ri, net, consult your lo foo_piece, FCceqBvpMfirSN0Ri, net, consult your lo
foo_piece, FCceqBvpMfirSN0Ri, most everything else foo_piece, FCceqBvpMfirSN0Ri, most everything else
foo_piece, FCceqBvpMfirSN0Ri, low:\x0a\x0a /Mac foo_piece, FCceqBvpMfirSN0Ri, low:\x0a\x0a /Mac
foo_piece, FCceqBvpMfirSN0Ri, es and directories o foo_piece, FCceqBvpMfirSN0Ri, es and directories o
analyzer_violation_info, Files::ANALYZER_FOO, FCceqBvpMfirSN0Ri, test violation 5, es and directori
foo_piece, FCceqBvpMfirSN0Ri, r example, here is a foo_piece, FCceqBvpMfirSN0Ri, r example, here is a
foo_piece, FCceqBvpMfirSN0Ri, application, StuffIt foo_piece, FCceqBvpMfirSN0Ri, application, StuffIt
foo_piece, FCceqBvpMfirSN0Ri, tion BinHex by doubl foo_piece, FCceqBvpMfirSN0Ri, tion BinHex by doubl
foo_piece, FCceqBvpMfirSN0Ri, laced, or are going foo_piece, FCceqBvpMfirSN0Ri, laced, or are going
foo_piece, FCceqBvpMfirSN0Ri, sers several documen foo_piece, FCceqBvpMfirSN0Ri, sers several documen
analyzer_violation_info, Files::ANALYZER_FOO, FCceqBvpMfirSN0Ri, test violation 10, sers several doc
foo_piece, FCceqBvpMfirSN0Ri, er or can be printed foo_piece, FCceqBvpMfirSN0Ri, er or can be printed
foo_piece, FCceqBvpMfirSN0Ri, \x0a\x0aBug reports shoul foo_piece, FCceqBvpMfirSN0Ri, \x0a\x0aBug reports shoul

View file

@ -0,0 +1,24 @@
# @TEST-DOC: Verify analyzer_violation_info is raised for an invalid PE file.
# @TEST-EXEC: zeek -b %INPUT
# @TEST-EXEC: btest-diff .stdout
# @TEST-EXEC: btest-diff files.log
@load base/frameworks/files
@load base/files/pe
event analyzer_violation_info(tag: AllAnalyzers::Tag, info: AnalyzerViolationInfo)
{
print tag, info$reason, info$f$id, cat(info$f$info$analyzers);
}
event zeek_init()
{
local source: string = "./myfile.exe";
Input::add_analysis([$source=source, $name=source]);
}
# This file triggers a binpac exception for PE that is reported through
# analyzer_violation_info
@TEST-START-FILE ./myfile.exe
MZ0000000000000000000000000000000000000000000000000000000000000
@TEST-END-FILE

View file

@ -1,8 +1,8 @@
#include "Foo.h" #include "Foo.h"
#include <zeek/file_analysis/File.h> #include <zeek/file_analysis/File.h>
#include <zeek/file_analysis/Manager.h> #include <zeek/file_analysis/Manager.h>
#include <algorithm>
#include "events.bif.h" #include "events.bif.h"
@ -21,7 +21,16 @@ zeek::file_analysis::Analyzer* Foo::Instantiate(zeek::RecordValPtr args,
bool Foo::DeliverStream(const u_char* data, uint64_t len) bool Foo::DeliverStream(const u_char* data, uint64_t len)
{ {
static int i = 0;
AnalyzerConfirmation();
zeek::event_mgr.Enqueue(foo_piece, GetFile()->ToVal(), zeek::event_mgr.Enqueue(foo_piece, GetFile()->ToVal(),
zeek::make_intrusive<zeek::StringVal>(new zeek::String(data, len, 0))); zeek::make_intrusive<zeek::StringVal>(new zeek::String(data, len, 0)));
if ( ++i % 5 == 0 )
{
uint64_t threshold = 16;
AnalyzerViolation(zeek::util::fmt("test violation %d", i),
reinterpret_cast<const char*>(data), std::min(len, threshold));
}
return true; return true;
} }

View file

@ -7,12 +7,21 @@
# @TEST-EXEC: TEST_DIFF_CANONIFIER= btest-diff output # @TEST-EXEC: TEST_DIFF_CANONIFIER= btest-diff output
event file_new(f: fa_file) event file_new(f: fa_file)
{ {
Files::add_analyzer(f, Files::ANALYZER_FOO); Files::add_analyzer(f, Files::ANALYZER_FOO);
} }
event foo_piece(f: fa_file, data: string) event foo_piece(f: fa_file, data: string)
{ {
print "foo_piece", f$id, sub_bytes(data, 0, 20); print "foo_piece", f$id, sub_bytes(data, 0, 20);
} }
event analyzer_confirmation_info(tag: AllAnalyzers::Tag, info: AnalyzerConfirmationInfo)
{
print "analyzer_confirmation_info", tag, info$f$id;
}
event analyzer_violation_info(tag: AllAnalyzers::Tag, info: AnalyzerViolationInfo)
{
print "analyzer_violation_info", tag, info$f$id, info$reason, info$data;
}