Reformat Zeek in Spicy style

This largely copies over Spicy's `.clang-format` configuration file. The
one place where we deviate is header include order since Zeek depends on
headers being included in a certain order.
This commit is contained in:
Benjamin Bannier 2023-10-10 21:13:34 +02:00
parent 7b8e7ed72c
commit f5a76c1aed
786 changed files with 131714 additions and 153609 deletions

View file

@ -10,93 +10,85 @@
#include "const.bif.netvar_h" // for max_analyzer_violations
#include "event.bif.netvar_h" // for analyzer_violation_info
namespace zeek::file_analysis
{
namespace zeek::file_analysis {
ID Analyzer::id_counter = 0;
Analyzer::~Analyzer()
{
DBG_LOG(DBG_FILE_ANALYSIS, "Destroy file analyzer %s", file_mgr->GetComponentName(tag).c_str());
}
Analyzer::~Analyzer() {
DBG_LOG(DBG_FILE_ANALYSIS, "Destroy file analyzer %s", file_mgr->GetComponentName(tag).c_str());
}
void Analyzer::SetAnalyzerTag(const zeek::Tag& arg_tag)
{
assert(! tag || tag == arg_tag);
tag = arg_tag;
}
void Analyzer::SetAnalyzerTag(const zeek::Tag& arg_tag) {
assert(! tag || tag == arg_tag);
tag = arg_tag;
}
Analyzer::Analyzer(zeek::Tag arg_tag, RecordValPtr arg_args, File* arg_file)
: tag(arg_tag), args(std::move(arg_args)), file(arg_file), got_stream_delivery(false),
skip(false), analyzer_confirmed(false)
{
id = ++id_counter;
}
: tag(arg_tag),
args(std::move(arg_args)),
file(arg_file),
got_stream_delivery(false),
skip(false),
analyzer_confirmed(false) {
id = ++id_counter;
}
Analyzer::Analyzer(RecordValPtr arg_args, File* arg_file)
: Analyzer({}, std::move(arg_args), arg_file)
{
}
Analyzer::Analyzer(RecordValPtr arg_args, File* arg_file) : Analyzer({}, std::move(arg_args), arg_file) {}
const char* Analyzer::GetAnalyzerName() const
{
assert(tag);
return file_mgr->GetComponentName(tag).c_str();
}
const char* Analyzer::GetAnalyzerName() const {
assert(tag);
return file_mgr->GetComponentName(tag).c_str();
}
void Analyzer::AnalyzerConfirmation(zeek::Tag arg_tag)
{
if ( analyzer_confirmed )
return;
void Analyzer::AnalyzerConfirmation(zeek::Tag arg_tag) {
if ( analyzer_confirmed )
return;
analyzer_confirmed = true;
analyzer_confirmed = true;
if ( ! analyzer_confirmation_info )
return;
if ( ! analyzer_confirmation_info )
return;
static auto info_type = zeek::id::find_type<RecordType>("AnalyzerConfirmationInfo");
static auto info_f_idx = info_type->FieldOffset("f");
static auto info_type = zeek::id::find_type<RecordType>("AnalyzerConfirmationInfo");
static auto info_f_idx = info_type->FieldOffset("f");
auto info = zeek::make_intrusive<RecordVal>(info_type);
info->Assign(info_f_idx, GetFile()->ToVal());
auto info = zeek::make_intrusive<RecordVal>(info_type);
info->Assign(info_f_idx, GetFile()->ToVal());
const auto& tval = arg_tag ? arg_tag.AsVal() : tag.AsVal();
event_mgr.Enqueue(analyzer_confirmation_info, tval, info);
}
const auto& tval = arg_tag ? arg_tag.AsVal() : tag.AsVal();
event_mgr.Enqueue(analyzer_confirmation_info, tval, info);
}
void Analyzer::AnalyzerViolation(const char* reason, const char* data, int len, zeek::Tag arg_tag)
{
++analyzer_violations;
void Analyzer::AnalyzerViolation(const char* reason, const char* data, int len, zeek::Tag arg_tag) {
++analyzer_violations;
if ( analyzer_violations > BifConst::max_analyzer_violations )
{
if ( analyzer_violations == BifConst::max_analyzer_violations + 1 )
Weird("too_many_analyzer_violations");
if ( analyzer_violations > BifConst::max_analyzer_violations ) {
if ( analyzer_violations == BifConst::max_analyzer_violations + 1 )
Weird("too_many_analyzer_violations");
return;
}
return;
}
if ( ! analyzer_violation_info )
return;
if ( ! analyzer_violation_info )
return;
static auto info_type = zeek::id::find_type<RecordType>("AnalyzerViolationInfo");
static auto info_reason_idx = info_type->FieldOffset("reason");
static auto info_f_idx = info_type->FieldOffset("f");
static auto info_data_idx = info_type->FieldOffset("data");
static auto info_type = zeek::id::find_type<RecordType>("AnalyzerViolationInfo");
static auto info_reason_idx = info_type->FieldOffset("reason");
static auto info_f_idx = info_type->FieldOffset("f");
static auto info_data_idx = info_type->FieldOffset("data");
auto info = zeek::make_intrusive<RecordVal>(info_type);
info->Assign(info_reason_idx, make_intrusive<StringVal>(reason));
info->Assign(info_f_idx, GetFile()->ToVal());
if ( data && len )
info->Assign(info_data_idx, make_intrusive<StringVal>(len, data));
auto info = zeek::make_intrusive<RecordVal>(info_type);
info->Assign(info_reason_idx, make_intrusive<StringVal>(reason));
info->Assign(info_f_idx, GetFile()->ToVal());
if ( data && len )
info->Assign(info_data_idx, make_intrusive<StringVal>(len, data));
const auto& tval = arg_tag ? arg_tag.AsVal() : tag.AsVal();
event_mgr.Enqueue(analyzer_violation_info, tval, info);
}
const auto& tval = arg_tag ? arg_tag.AsVal() : tag.AsVal();
event_mgr.Enqueue(analyzer_violation_info, tval, info);
}
void Analyzer::Weird(const char* name, const char* addl)
{
zeek::reporter->Weird(GetFile(), name, addl, GetAnalyzerName());
}
void Analyzer::Weird(const char* name, const char* addl) {
zeek::reporter->Weird(GetFile(), name, addl, GetAnalyzerName());
}
} // namespace zeek::file_analysis
} // namespace zeek::file_analysis

View file

@ -6,14 +6,12 @@
#include "zeek/Tag.h"
namespace zeek
{
namespace zeek {
class RecordVal;
using RecordValPtr = IntrusivePtr<RecordVal>;
namespace file_analysis
{
namespace file_analysis {
class File;
using ID = uint32_t;
@ -21,195 +19,194 @@ using ID = uint32_t;
/**
* Base class for analyzers that can be attached to file_analysis::File objects.
*/
class Analyzer
{
class Analyzer {
public:
/**
* Destructor. Nothing special about it. Virtual since we definitely expect
* to delete instances of derived classes via pointers to this class.
*/
virtual ~Analyzer();
/**
* Destructor. Nothing special about it. Virtual since we definitely expect
* to delete instances of derived classes via pointers to this class.
*/
virtual ~Analyzer();
/**
* Initializes the analyzer before input processing starts.
*/
virtual void Init() { }
/**
* Initializes the analyzer before input processing starts.
*/
virtual void Init() {}
/**
* Finishes the analyzer's operation after all input has been parsed.
*/
virtual void Done() { }
/**
* Finishes the analyzer's operation after all input has been parsed.
*/
virtual void Done() {}
/**
* Subclasses may override this method to receive file data non-sequentially.
* @param data points to start of a chunk of file data.
* @param len length in bytes of the chunk of data pointed to by \a data.
* @param offset the byte offset within full file that data chunk starts.
* @return true if the analyzer is still in a valid state to continue
* receiving data/events or false if it's essentially "done".
*/
virtual bool DeliverChunk(const u_char* data, uint64_t len, uint64_t offset) { return true; }
/**
* Subclasses may override this method to receive file data non-sequentially.
* @param data points to start of a chunk of file data.
* @param len length in bytes of the chunk of data pointed to by \a data.
* @param offset the byte offset within full file that data chunk starts.
* @return true if the analyzer is still in a valid state to continue
* receiving data/events or false if it's essentially "done".
*/
virtual bool DeliverChunk(const u_char* data, uint64_t len, uint64_t offset) { return true; }
/**
* Subclasses may override this method to receive file sequentially.
* @param data points to start of the next chunk of file data.
* @param len length in bytes of the chunk of data pointed to by \a data.
* @return true if the analyzer is still in a valid state to continue
* receiving data/events or false if it's essentially "done".
*/
virtual bool DeliverStream(const u_char* data, uint64_t len) { return true; }
/**
* Subclasses may override this method to receive file sequentially.
* @param data points to start of the next chunk of file data.
* @param len length in bytes of the chunk of data pointed to by \a data.
* @return true if the analyzer is still in a valid state to continue
* receiving data/events or false if it's essentially "done".
*/
virtual bool DeliverStream(const u_char* data, uint64_t len) { return true; }
/**
* Subclasses may override this method to specifically handle an EOF signal,
* which means no more data is going to be incoming and the analyzer
* may be deleted/cleaned up soon.
* @return true if the analyzer is still in a valid state to continue
* receiving data/events or false if it's essentially "done".
*/
virtual bool EndOfFile() { return true; }
/**
* Subclasses may override this method to specifically handle an EOF signal,
* which means no more data is going to be incoming and the analyzer
* may be deleted/cleaned up soon.
* @return true if the analyzer is still in a valid state to continue
* receiving data/events or false if it's essentially "done".
*/
virtual bool EndOfFile() { return true; }
/**
* Subclasses may override this method to handle missing data in a file.
* @param offset the byte offset within full file at which the missing
* data chunk occurs.
* @param len the number of missing bytes.
* @return true if the analyzer is still in a valid state to continue
* receiving data/events or false if it's essentially "done".
*/
virtual bool Undelivered(uint64_t offset, uint64_t len) { return true; }
/**
* Subclasses may override this method to handle missing data in a file.
* @param offset the byte offset within full file at which the missing
* data chunk occurs.
* @param len the number of missing bytes.
* @return true if the analyzer is still in a valid state to continue
* receiving data/events or false if it's essentially "done".
*/
virtual bool Undelivered(uint64_t offset, uint64_t len) { return true; }
/**
* @return the analyzer type enum value.
*/
zeek::Tag Tag() const { return tag; }
/**
* @return the analyzer type enum value.
*/
zeek::Tag Tag() const { return tag; }
/**
* @return the name of the analyzer.
*/
const char* GetAnalyzerName() const;
/**
* @return the name of the analyzer.
*/
const char* GetAnalyzerName() const;
/**
* Returns the analyzer instance's internal ID. These IDs are unique
* across all analyzers instantiated and can thus be used to
* identify a specific instance.
*/
ID GetID() const { return id; }
/**
* Returns the analyzer instance's internal ID. These IDs are unique
* across all analyzers instantiated and can thus be used to
* identify a specific instance.
*/
ID GetID() const { return id; }
/**
* @return the AnalyzerArgs associated with the analyzer.
*/
const RecordValPtr& GetArgs() const { return args; }
/**
* @return the AnalyzerArgs associated with the analyzer.
*/
const RecordValPtr& GetArgs() const { return args; }
/**
* @return the file_analysis::File object to which the analyzer is attached.
*/
File* GetFile() const { return file; }
/**
* @return the file_analysis::File object to which the analyzer is attached.
*/
File* GetFile() const { return file; }
/**
* Sets the tag associated with the analyzer's type. Note that this
* can be called only right after construction, if the constructor
* did not receive a name or tag. The method cannot be used to change
* an existing tag.
*/
void SetAnalyzerTag(const zeek::Tag& tag);
/**
* Sets the tag associated with the analyzer's type. Note that this
* can be called only right after construction, if the constructor
* did not receive a name or tag. The method cannot be used to change
* an existing tag.
*/
void SetAnalyzerTag(const zeek::Tag& tag);
/**
* @return true if the analyzer has ever seen a stream-wise delivery.
*/
bool GotStreamDelivery() const { return got_stream_delivery; }
/**
* @return true if the analyzer has ever seen a stream-wise delivery.
*/
bool GotStreamDelivery() const { return got_stream_delivery; }
/**
* Flag the analyzer as having seen a stream-wise delivery.
*/
void SetGotStreamDelivery() { got_stream_delivery = true; }
/**
* Flag the analyzer as having seen a stream-wise delivery.
*/
void SetGotStreamDelivery() { got_stream_delivery = true; }
/**
* Signals that the analyzer is to skip all further input
* processing. This won't have an immediate effect internally, but
* the flag can be queried through Skipping().
*
* @param do_skip If true, further processing will be skipped.
*/
void SetSkip(bool do_skip) { skip = do_skip; }
/**
* Signals that the analyzer is to skip all further input
* processing. This won't have an immediate effect internally, but
* the flag can be queried through Skipping().
*
* @param do_skip If true, further processing will be skipped.
*/
void SetSkip(bool do_skip) { skip = do_skip; }
/**
* Returns true if the analyzer has been told to skip processing all
* further input.
*/
bool Skipping() const { return skip; }
/**
* Returns true if the analyzer has been told to skip processing all
* further input.
*/
bool Skipping() const { return skip; }
/**
* Signals to Zeek that the analyzer has recognized the input to indeed
* conform to the expected format. This should be called as early as
* possible during file analysis. It may turn into \c analyzer_confirmation_info
* events at the script-layer (but only once per file , even if the method is
* called multiple times).
*
* If tag is given, it overrides the analyzer tag passed to the
* scripting layer; the default is the one of the analyzer itself.
*/
virtual void AnalyzerConfirmation(zeek::Tag tag = zeek::Tag());
/**
* Signals to Zeek that the analyzer has recognized the input to indeed
* conform to the expected format. This should be called as early as
* possible during file analysis. It may turn into \c analyzer_confirmation_info
* events at the script-layer (but only once per file , even if the method is
* called multiple times).
*
* If tag is given, it overrides the analyzer tag passed to the
* scripting layer; the default is the one of the analyzer itself.
*/
virtual void AnalyzerConfirmation(zeek::Tag tag = zeek::Tag());
/**
* Signals to Zeek that the analyzer has found a sever violation
* that could indicate it's not parsing the expected file format.
* This turns into \c analyzer_violation_info events at the script-layer
* (one such event is raised for each call to this method so that the
* script-layer can built up a notion of how prevalent violations are; the
* more, the less likely it's the right format).
*
* @param reason A textual description of the error encountered.
*
* @param data An optional pointer to the malformed data.
*
* @param len If \a data is given, the length of it.
*
* @param tag If tag is given, it overrides the analyzer tag passed to the
* scripting layer; the default is the one of the analyzer itself.
*/
virtual void AnalyzerViolation(const char* reason, const char* data = nullptr, int len = 0,
zeek::Tag tag = zeek::Tag());
/**
* Signals to Zeek that the analyzer has found a sever violation
* that could indicate it's not parsing the expected file format.
* This turns into \c analyzer_violation_info events at the script-layer
* (one such event is raised for each call to this method so that the
* script-layer can built up a notion of how prevalent violations are; the
* more, the less likely it's the right format).
*
* @param reason A textual description of the error encountered.
*
* @param data An optional pointer to the malformed data.
*
* @param len If \a data is given, the length of it.
*
* @param tag If tag is given, it overrides the analyzer tag passed to the
* scripting layer; the default is the one of the analyzer itself.
*/
virtual void AnalyzerViolation(const char* reason, const char* data = nullptr, int len = 0,
zeek::Tag tag = zeek::Tag());
/**
* Convenience function that forwards directly to the corresponding
* reporter->Weird(file, ...).
*/
void Weird(const char* name, const char* addl = "");
/**
* Convenience function that forwards directly to the corresponding
* reporter->Weird(file, ...).
*/
void Weird(const char* name, const char* addl = "");
protected:
/**
* Constructor. Only derived classes are meant to be instantiated.
* @param arg_tag the tag defining the analyzer's type.
* @param arg_args an \c AnalyzerArgs (script-layer type) value specifying
* tunable options, if any, related to a particular analyzer type.
* @param arg_file the file to which the analyzer is being attached.
*/
Analyzer(zeek::Tag arg_tag, RecordValPtr arg_args, File* arg_file);
/**
* Constructor. Only derived classes are meant to be instantiated.
* @param arg_tag the tag defining the analyzer's type.
* @param arg_args an \c AnalyzerArgs (script-layer type) value specifying
* tunable options, if any, related to a particular analyzer type.
* @param arg_file the file to which the analyzer is being attached.
*/
Analyzer(zeek::Tag arg_tag, RecordValPtr arg_args, File* arg_file);
/**
* Constructor. Only derived classes are meant to be instantiated.
* As this version of the constructor does not receive a name or tag,
* SetAnalyzerTag() must be called before the instance can be used.
*
* @param arg_args an \c AnalyzerArgs (script-layer type) value specifying
* tunable options, if any, related to a particular analyzer type.
* @param arg_file the file to which the analyzer is being attached.
*/
Analyzer(RecordValPtr arg_args, File* arg_file);
/**
* Constructor. Only derived classes are meant to be instantiated.
* As this version of the constructor does not receive a name or tag,
* SetAnalyzerTag() must be called before the instance can be used.
*
* @param arg_args an \c AnalyzerArgs (script-layer type) value specifying
* tunable options, if any, related to a particular analyzer type.
* @param arg_file the file to which the analyzer is being attached.
*/
Analyzer(RecordValPtr arg_args, File* arg_file);
private:
ID id; /**< Unique instance ID. */
zeek::Tag tag; /**< The particular type of the analyzer instance. */
RecordValPtr args; /**< \c AnalyzerArgs val gives tunable analyzer params. */
File* file; /**< The file to which the analyzer is attached. */
bool got_stream_delivery;
bool skip;
bool analyzer_confirmed;
ID id; /**< Unique instance ID. */
zeek::Tag tag; /**< The particular type of the analyzer instance. */
RecordValPtr args; /**< \c AnalyzerArgs val gives tunable analyzer params. */
File* file; /**< The file to which the analyzer is attached. */
bool got_stream_delivery;
bool skip;
bool analyzer_confirmed;
uint64_t analyzer_violations = 0;
uint64_t analyzer_violations = 0;
static ID id_counter;
};
static ID id_counter;
};
} // namespace file_analysis
} // namespace zeek
} // namespace file_analysis
} // namespace zeek

View file

@ -9,194 +9,162 @@
#include "zeek/file_analysis/Manager.h"
#include "zeek/file_analysis/file_analysis.bif.h"
namespace zeek::file_analysis::detail
{
namespace zeek::file_analysis::detail {
static void analyzer_del_func(void* v)
{
file_analysis::Analyzer* a = (file_analysis::Analyzer*)v;
static void analyzer_del_func(void* v) {
file_analysis::Analyzer* a = (file_analysis::Analyzer*)v;
a->Done();
delete a;
}
a->Done();
delete a;
}
AnalyzerSet::AnalyzerSet(File* arg_file) : file(arg_file)
{
analyzer_map.SetDeleteFunc(analyzer_del_func);
}
AnalyzerSet::AnalyzerSet(File* arg_file) : file(arg_file) { analyzer_map.SetDeleteFunc(analyzer_del_func); }
AnalyzerSet::~AnalyzerSet()
{
while ( ! mod_queue.empty() )
{
Modification* mod = mod_queue.front();
mod->Abort();
delete mod;
mod_queue.pop();
}
}
AnalyzerSet::~AnalyzerSet() {
while ( ! mod_queue.empty() ) {
Modification* mod = mod_queue.front();
mod->Abort();
delete mod;
mod_queue.pop();
}
}
Analyzer* AnalyzerSet::Find(const zeek::Tag& tag, RecordValPtr args)
{
auto key = GetKey(tag, std::move(args));
Analyzer* rval = analyzer_map.Lookup(key.get());
return rval;
}
Analyzer* AnalyzerSet::Find(const zeek::Tag& tag, RecordValPtr args) {
auto key = GetKey(tag, std::move(args));
Analyzer* rval = analyzer_map.Lookup(key.get());
return rval;
}
bool AnalyzerSet::Add(const zeek::Tag& tag, RecordValPtr args)
{
auto key = GetKey(tag, args);
bool AnalyzerSet::Add(const zeek::Tag& tag, RecordValPtr args) {
auto key = GetKey(tag, args);
if ( analyzer_map.Lookup(key.get()) )
{
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Instantiate analyzer %s skipped: already exists",
file->GetID().c_str(), file_mgr->GetComponentName(tag).c_str());
if ( analyzer_map.Lookup(key.get()) ) {
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Instantiate analyzer %s skipped: already exists", file->GetID().c_str(),
file_mgr->GetComponentName(tag).c_str());
return true;
}
return true;
}
file_analysis::Analyzer* a = InstantiateAnalyzer(tag, std::move(args));
file_analysis::Analyzer* a = InstantiateAnalyzer(tag, std::move(args));
if ( ! a )
return false;
if ( ! a )
return false;
Insert(a, std::move(key));
Insert(a, std::move(key));
return true;
}
return true;
}
Analyzer* AnalyzerSet::QueueAdd(const zeek::Tag& tag, RecordValPtr args)
{
auto key = GetKey(tag, args);
file_analysis::Analyzer* a = InstantiateAnalyzer(tag, std::move(args));
Analyzer* AnalyzerSet::QueueAdd(const zeek::Tag& tag, RecordValPtr args) {
auto key = GetKey(tag, args);
file_analysis::Analyzer* a = InstantiateAnalyzer(tag, std::move(args));
if ( ! a )
return nullptr;
if ( ! a )
return nullptr;
mod_queue.push(new AddMod(a, std::move(key)));
mod_queue.push(new AddMod(a, std::move(key)));
return a;
}
return a;
}
bool AnalyzerSet::AddMod::Perform(AnalyzerSet* set)
{
if ( set->analyzer_map.Lookup(key.get()) )
{
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Add analyzer %s skipped: already exists",
a->GetFile()->GetID().c_str(), file_mgr->GetComponentName(a->Tag()).c_str());
bool AnalyzerSet::AddMod::Perform(AnalyzerSet* set) {
if ( set->analyzer_map.Lookup(key.get()) ) {
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Add analyzer %s skipped: already exists", a->GetFile()->GetID().c_str(),
file_mgr->GetComponentName(a->Tag()).c_str());
Abort();
return true;
}
Abort();
return true;
}
set->Insert(a, std::move(key));
set->Insert(a, std::move(key));
return true;
}
return true;
}
void AnalyzerSet::AddMod::Abort()
{
delete a;
}
void AnalyzerSet::AddMod::Abort() { delete a; }
bool AnalyzerSet::Remove(const zeek::Tag& tag, RecordValPtr args)
{
return Remove(tag, GetKey(tag, std::move(args)));
}
bool AnalyzerSet::Remove(const zeek::Tag& tag, RecordValPtr args) { return Remove(tag, GetKey(tag, std::move(args))); }
bool AnalyzerSet::Remove(const zeek::Tag& tag, std::unique_ptr<zeek::detail::HashKey> key)
{
auto a = (file_analysis::Analyzer*)analyzer_map.Remove(key.get());
bool AnalyzerSet::Remove(const zeek::Tag& tag, std::unique_ptr<zeek::detail::HashKey> key) {
auto a = (file_analysis::Analyzer*)analyzer_map.Remove(key.get());
if ( ! a )
{
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Skip remove analyzer %s", file->GetID().c_str(),
file_mgr->GetComponentName(tag).c_str());
return false;
}
if ( ! a ) {
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Skip remove analyzer %s", file->GetID().c_str(),
file_mgr->GetComponentName(tag).c_str());
return false;
}
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Remove analyzer %s", file->GetID().c_str(),
file_mgr->GetComponentName(tag).c_str());
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Remove analyzer %s", file->GetID().c_str(),
file_mgr->GetComponentName(tag).c_str());
a->Done();
a->Done();
// We don't delete the analyzer object right here because the remove
// operation may execute at a time when it can still be accessed.
// Instead we let the file know to delete the analyzer later.
file->DoneWithAnalyzer(a);
// We don't delete the analyzer object right here because the remove
// operation may execute at a time when it can still be accessed.
// Instead we let the file know to delete the analyzer later.
file->DoneWithAnalyzer(a);
return true;
}
return true;
}
bool AnalyzerSet::QueueRemove(const zeek::Tag& tag, RecordValPtr args)
{
auto key = GetKey(tag, std::move(args));
auto rval = analyzer_map.Lookup(key.get());
mod_queue.push(new RemoveMod(tag, std::move(key)));
return rval;
}
bool AnalyzerSet::QueueRemove(const zeek::Tag& tag, RecordValPtr args) {
auto key = GetKey(tag, std::move(args));
auto rval = analyzer_map.Lookup(key.get());
mod_queue.push(new RemoveMod(tag, std::move(key)));
return rval;
}
bool AnalyzerSet::RemoveMod::Perform(AnalyzerSet* set)
{
return set->Remove(tag, std::move(key));
}
bool AnalyzerSet::RemoveMod::Perform(AnalyzerSet* set) { return set->Remove(tag, std::move(key)); }
std::unique_ptr<zeek::detail::HashKey> AnalyzerSet::GetKey(const zeek::Tag& t,
RecordValPtr args) const
{
auto lv = make_intrusive<ListVal>(TYPE_ANY);
lv->Append(t.AsVal());
lv->Append(std::move(args));
auto key = file_mgr->GetAnalyzerHash()->MakeHashKey(*lv, true);
std::unique_ptr<zeek::detail::HashKey> AnalyzerSet::GetKey(const zeek::Tag& t, RecordValPtr args) const {
auto lv = make_intrusive<ListVal>(TYPE_ANY);
lv->Append(t.AsVal());
lv->Append(std::move(args));
auto key = file_mgr->GetAnalyzerHash()->MakeHashKey(*lv, true);
if ( ! key )
reporter->InternalError("AnalyzerArgs type mismatch");
if ( ! key )
reporter->InternalError("AnalyzerArgs type mismatch");
return key;
}
return key;
}
file_analysis::Analyzer* AnalyzerSet::InstantiateAnalyzer(const Tag& tag, RecordValPtr args) const
{
auto a = file_mgr->InstantiateAnalyzer(tag, std::move(args), file);
file_analysis::Analyzer* AnalyzerSet::InstantiateAnalyzer(const Tag& tag, RecordValPtr args) const {
auto a = file_mgr->InstantiateAnalyzer(tag, std::move(args), file);
if ( ! a )
{
auto c = file_mgr->Lookup(tag);
if ( ! a ) {
auto c = file_mgr->Lookup(tag);
if ( c && ! c->Enabled() )
return nullptr;
if ( c && ! c->Enabled() )
return nullptr;
reporter->Error("[%s] Failed file analyzer %s instantiation", file->GetID().c_str(),
file_mgr->GetComponentName(tag).c_str());
return nullptr;
}
reporter->Error("[%s] Failed file analyzer %s instantiation", file->GetID().c_str(),
file_mgr->GetComponentName(tag).c_str());
return nullptr;
}
return a;
}
return a;
}
void AnalyzerSet::Insert(file_analysis::Analyzer* a, std::unique_ptr<zeek::detail::HashKey> key)
{
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Add analyzer %s", file->GetID().c_str(),
file_mgr->GetComponentName(a->Tag()).c_str());
analyzer_map.Insert(key.get(), a);
void AnalyzerSet::Insert(file_analysis::Analyzer* a, std::unique_ptr<zeek::detail::HashKey> key) {
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Add analyzer %s", file->GetID().c_str(),
file_mgr->GetComponentName(a->Tag()).c_str());
analyzer_map.Insert(key.get(), a);
a->Init();
}
a->Init();
}
void AnalyzerSet::DrainModifications()
{
if ( mod_queue.empty() )
return;
void AnalyzerSet::DrainModifications() {
if ( mod_queue.empty() )
return;
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Start analyzer mod queue flush", file->GetID().c_str());
do
{
Modification* mod = mod_queue.front();
mod->Perform(this);
delete mod;
mod_queue.pop();
} while ( ! mod_queue.empty() );
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] End flushing analyzer mod queue.", file->GetID().c_str());
}
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Start analyzer mod queue flush", file->GetID().c_str());
do {
Modification* mod = mod_queue.front();
mod->Perform(this);
delete mod;
mod_queue.pop();
} while ( ! mod_queue.empty() );
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] End flushing analyzer mod queue.", file->GetID().c_str());
}
} // namespace zeek::file_analysis::detail
} // namespace zeek::file_analysis::detail

View file

@ -8,20 +8,17 @@
#include "zeek/Dict.h"
#include "zeek/Tag.h"
namespace zeek
{
namespace zeek {
class RecordVal;
using RecordValPtr = IntrusivePtr<RecordVal>;
namespace file_analysis
{
namespace file_analysis {
class Analyzer;
class File;
namespace detail
{
namespace detail {
/**
* A set of file analysis analyzers indexed by an \c AnalyzerArgs (script-layer
@ -29,190 +26,182 @@ namespace detail
* modifications can happen at well-defined times (e.g. to make sure a loop
* iterator isn't invalidated).
*/
class AnalyzerSet
{
class AnalyzerSet {
public:
/**
* Constructor. Nothing special.
* @param arg_file the file to which all analyzers in the set are attached.
*/
explicit AnalyzerSet(File* arg_file);
/**
* Constructor. Nothing special.
* @param arg_file the file to which all analyzers in the set are attached.
*/
explicit AnalyzerSet(File* arg_file);
/**
* Destructor. Any queued analyzer additions/removals are aborted and
* will not occur.
*/
~AnalyzerSet();
/**
* Destructor. Any queued analyzer additions/removals are aborted and
* will not occur.
*/
~AnalyzerSet();
/**
* Looks up an analyzer by its tag and arguments.
* @param tag an analyzer tag.
* @param args an \c AnalyzerArgs record.
* @return pointer to an analyzer instance, or a null pointer if not found.
*/
Analyzer* Find(const zeek::Tag& tag, RecordValPtr args);
/**
* Looks up an analyzer by its tag and arguments.
* @param tag an analyzer tag.
* @param args an \c AnalyzerArgs record.
* @return pointer to an analyzer instance, or a null pointer if not found.
*/
Analyzer* Find(const zeek::Tag& tag, RecordValPtr args);
/**
* Attach an analyzer to #file immediately.
* @param tag the analyzer tag of the file analyzer to add.
* @param args an \c AnalyzerArgs value which specifies an analyzer.
* @return true if analyzer was instantiated/attached, else false.
*/
bool Add(const zeek::Tag& tag, RecordValPtr args);
/**
* Attach an analyzer to #file immediately.
* @param tag the analyzer tag of the file analyzer to add.
* @param args an \c AnalyzerArgs value which specifies an analyzer.
* @return true if analyzer was instantiated/attached, else false.
*/
bool Add(const zeek::Tag& tag, RecordValPtr args);
/**
* Queue the attachment of an analyzer to #file.
* @param tag the analyzer tag of the file analyzer to add.
* @param args an \c AnalyzerArgs value which specifies an analyzer.
* @return if successful, a pointer to a newly instantiated analyzer else
* a null pointer. The caller does *not* take ownership of the memory.
*/
file_analysis::Analyzer* QueueAdd(const zeek::Tag& tag, RecordValPtr args);
/**
* Queue the attachment of an analyzer to #file.
* @param tag the analyzer tag of the file analyzer to add.
* @param args an \c AnalyzerArgs value which specifies an analyzer.
* @return if successful, a pointer to a newly instantiated analyzer else
* a null pointer. The caller does *not* take ownership of the memory.
*/
file_analysis::Analyzer* QueueAdd(const zeek::Tag& tag, RecordValPtr args);
/**
* Remove an analyzer from #file immediately.
* @param tag the analyzer tag of the file analyzer to remove.
* @param args an \c AnalyzerArgs value which specifies an analyzer.
* @return false if analyzer didn't exist and so wasn't removed, else true.
*/
bool Remove(const zeek::Tag& tag, RecordValPtr args);
/**
* Remove an analyzer from #file immediately.
* @param tag the analyzer tag of the file analyzer to remove.
* @param args an \c AnalyzerArgs value which specifies an analyzer.
* @return false if analyzer didn't exist and so wasn't removed, else true.
*/
bool Remove(const zeek::Tag& tag, RecordValPtr args);
/**
* Queue the removal of an analyzer from #file.
* @param tag the analyzer tag of the file analyzer to remove.
* @param args an \c AnalyzerArgs value which specifies an analyzer.
* @return true if analyzer exists at time of call, else false;
*/
bool QueueRemove(const zeek::Tag& tag, RecordValPtr args);
/**
* Queue the removal of an analyzer from #file.
* @param tag the analyzer tag of the file analyzer to remove.
* @param args an \c AnalyzerArgs value which specifies an analyzer.
* @return true if analyzer exists at time of call, else false;
*/
bool QueueRemove(const zeek::Tag& tag, RecordValPtr args);
/**
* Perform all queued modifications to the current analyzer set.
*/
void DrainModifications();
/**
* Perform all queued modifications to the current analyzer set.
*/
void DrainModifications();
// Iterator support
using iterator = zeek::DictIterator<file_analysis::Analyzer>;
;
using const_iterator = const iterator;
using reverse_iterator = std::reverse_iterator<iterator>;
using const_reverse_iterator = std::reverse_iterator<const_iterator>;
// Iterator support
using iterator = zeek::DictIterator<file_analysis::Analyzer>;
;
using const_iterator = const iterator;
using reverse_iterator = std::reverse_iterator<iterator>;
using const_reverse_iterator = std::reverse_iterator<const_iterator>;
iterator begin() { return analyzer_map.begin(); }
iterator end() { return analyzer_map.end(); }
const_iterator begin() const { return analyzer_map.begin(); }
const_iterator end() const { return analyzer_map.end(); }
const_iterator cbegin() { return analyzer_map.cbegin(); }
const_iterator cend() { return analyzer_map.cend(); }
iterator begin() { return analyzer_map.begin(); }
iterator end() { return analyzer_map.end(); }
const_iterator begin() const { return analyzer_map.begin(); }
const_iterator end() const { return analyzer_map.end(); }
const_iterator cbegin() { return analyzer_map.cbegin(); }
const_iterator cend() { return analyzer_map.cend(); }
protected:
/**
* Get a hash key which represents an analyzer instance.
* @param tag the file analyzer tag.
* @param args an \c AnalyzerArgs value which specifies an analyzer.
* @return the hash key calculated from \a args
*/
std::unique_ptr<zeek::detail::HashKey> GetKey(const zeek::Tag& tag, RecordValPtr args) const;
/**
* Get a hash key which represents an analyzer instance.
* @param tag the file analyzer tag.
* @param args an \c AnalyzerArgs value which specifies an analyzer.
* @return the hash key calculated from \a args
*/
std::unique_ptr<zeek::detail::HashKey> GetKey(const zeek::Tag& tag, RecordValPtr args) const;
/**
* Create an instance of a file analyzer.
* @param tag the tag of a file analyzer.
* @param args an \c AnalyzerArgs value which specifies an analyzer.
* @return a new file analyzer instance.
*/
file_analysis::Analyzer* InstantiateAnalyzer(const zeek::Tag& tag, RecordValPtr args) const;
/**
* Create an instance of a file analyzer.
* @param tag the tag of a file analyzer.
* @param args an \c AnalyzerArgs value which specifies an analyzer.
* @return a new file analyzer instance.
*/
file_analysis::Analyzer* InstantiateAnalyzer(const zeek::Tag& tag, RecordValPtr args) const;
/**
* Insert an analyzer instance into the set.
* @param a an analyzer instance.
* @param key the hash key which represents the analyzer's \c AnalyzerArgs.
*/
void Insert(file_analysis::Analyzer* a, std::unique_ptr<zeek::detail::HashKey> key);
/**
* Insert an analyzer instance into the set.
* @param a an analyzer instance.
* @param key the hash key which represents the analyzer's \c AnalyzerArgs.
*/
void Insert(file_analysis::Analyzer* a, std::unique_ptr<zeek::detail::HashKey> key);
/**
* Remove an analyzer instance from the set.
* @param tag enumerator which specifies type of the analyzer to remove,
* just used for debugging messages.
* @param key the hash key which represents the analyzer's \c AnalyzerArgs.
*/
bool Remove(const zeek::Tag& tag, std::unique_ptr<zeek::detail::HashKey> key);
/**
* Remove an analyzer instance from the set.
* @param tag enumerator which specifies type of the analyzer to remove,
* just used for debugging messages.
* @param key the hash key which represents the analyzer's \c AnalyzerArgs.
*/
bool Remove(const zeek::Tag& tag, std::unique_ptr<zeek::detail::HashKey> key);
private:
File* file; /**< File which owns the set */
PDict<file_analysis::Analyzer> analyzer_map; /**< Indexed by AnalyzerArgs. */
File* file; /**< File which owns the set */
PDict<file_analysis::Analyzer> analyzer_map; /**< Indexed by AnalyzerArgs. */
/**
* Abstract base class for analyzer set modifications.
*/
class Modification
{
public:
virtual ~Modification() { }
/**
* Abstract base class for analyzer set modifications.
*/
class Modification {
public:
virtual ~Modification() {}
/**
* Perform the modification on an analyzer set.
* @param set the analyzer set on which the modification will happen.
* @return true if the modification altered \a set.
*/
virtual bool Perform(AnalyzerSet* set) = 0;
/**
* Perform the modification on an analyzer set.
* @param set the analyzer set on which the modification will happen.
* @return true if the modification altered \a set.
*/
virtual bool Perform(AnalyzerSet* set) = 0;
/**
* Don't perform the modification on the analyzer set and clean up.
*/
virtual void Abort() = 0;
};
/**
* Don't perform the modification on the analyzer set and clean up.
*/
virtual void Abort() = 0;
};
/**
* Represents a request to add an analyzer to an analyzer set.
*/
class AddMod final : public Modification
{
public:
/**
* Construct request which can add an analyzer to an analyzer set.
* @param arg_a an analyzer instance to add to an analyzer set.
* @param arg_key hash key representing the analyzer's \c AnalyzerArgs.
*/
AddMod(file_analysis::Analyzer* arg_a, std::unique_ptr<zeek::detail::HashKey> arg_key)
: Modification(), a(arg_a), key(std::move(arg_key))
{
}
~AddMod() override { }
bool Perform(AnalyzerSet* set) override;
void Abort() override;
/**
* Represents a request to add an analyzer to an analyzer set.
*/
class AddMod final : public Modification {
public:
/**
* Construct request which can add an analyzer to an analyzer set.
* @param arg_a an analyzer instance to add to an analyzer set.
* @param arg_key hash key representing the analyzer's \c AnalyzerArgs.
*/
AddMod(file_analysis::Analyzer* arg_a, std::unique_ptr<zeek::detail::HashKey> arg_key)
: Modification(), a(arg_a), key(std::move(arg_key)) {}
~AddMod() override {}
bool Perform(AnalyzerSet* set) override;
void Abort() override;
protected:
file_analysis::Analyzer* a;
std::unique_ptr<zeek::detail::HashKey> key;
};
protected:
file_analysis::Analyzer* a;
std::unique_ptr<zeek::detail::HashKey> key;
};
/**
* Represents a request to remove an analyzer from an analyzer set.
*/
class RemoveMod final : public Modification
{
public:
/**
* Construct request which can remove an analyzer from an analyzer set.
* @param arg_a an analyzer instance to add to an analyzer set.
* @param arg_key hash key representing the analyzer's \c AnalyzerArgs.
*/
RemoveMod(const zeek::Tag& arg_tag, std::unique_ptr<zeek::detail::HashKey> arg_key)
: Modification(), tag(arg_tag), key(std::move(arg_key))
{
}
~RemoveMod() override { }
bool Perform(AnalyzerSet* set) override;
void Abort() override { }
/**
* Represents a request to remove an analyzer from an analyzer set.
*/
class RemoveMod final : public Modification {
public:
/**
* Construct request which can remove an analyzer from an analyzer set.
* @param arg_a an analyzer instance to add to an analyzer set.
* @param arg_key hash key representing the analyzer's \c AnalyzerArgs.
*/
RemoveMod(const zeek::Tag& arg_tag, std::unique_ptr<zeek::detail::HashKey> arg_key)
: Modification(), tag(arg_tag), key(std::move(arg_key)) {}
~RemoveMod() override {}
bool Perform(AnalyzerSet* set) override;
void Abort() override {}
protected:
zeek::Tag tag;
std::unique_ptr<zeek::detail::HashKey> key;
};
protected:
zeek::Tag tag;
std::unique_ptr<zeek::detail::HashKey> key;
};
using ModQueue = std::queue<Modification*>;
ModQueue mod_queue; /**< A queue of analyzer additions/removals requests. */
};
using ModQueue = std::queue<Modification*>;
ModQueue mod_queue; /**< A queue of analyzer additions/removals requests. */
};
} // namespace detail
} // namespace file_analysis
} // namespace zeek
} // namespace detail
} // namespace file_analysis
} // namespace zeek

View file

@ -6,33 +6,27 @@
#include "zeek/file_analysis/Manager.h"
#include "zeek/util.h"
namespace zeek::file_analysis
{
namespace zeek::file_analysis {
Component::Component(const std::string& name, factory_function arg_factory, Tag::subtype_t subtype,
bool arg_enabled)
: plugin::Component(plugin::component::FILE_ANALYZER, name, subtype, file_mgr->GetTagType())
{
factory_func = arg_factory;
enabled = arg_enabled;
}
Component::Component(const std::string& name, factory_function arg_factory, Tag::subtype_t subtype, bool arg_enabled)
: plugin::Component(plugin::component::FILE_ANALYZER, name, subtype, file_mgr->GetTagType()) {
factory_func = arg_factory;
enabled = arg_enabled;
}
void Component::Initialize()
{
InitializeTag();
file_mgr->RegisterComponent(this, "ANALYZER_");
}
void Component::Initialize() {
InitializeTag();
file_mgr->RegisterComponent(this, "ANALYZER_");
}
void Component::DoDescribe(ODesc* d) const
{
if ( factory_func )
{
d->Add("ANALYZER_");
d->Add(CanonicalName());
d->Add(", ");
}
void Component::DoDescribe(ODesc* d) const {
if ( factory_func ) {
d->Add("ANALYZER_");
d->Add(CanonicalName());
d->Add(", ");
}
d->Add(enabled ? "enabled" : "disabled");
}
d->Add(enabled ? "enabled" : "disabled");
}
} // namespace zeek::file_analysis
} // namespace zeek::file_analysis

View file

@ -7,14 +7,12 @@
#include "zeek/Tag.h"
#include "zeek/plugin/Component.h"
namespace zeek
{
namespace zeek {
class RecordVal;
using RecordValPtr = zeek::IntrusivePtr<RecordVal>;
namespace file_analysis
{
namespace file_analysis {
class File;
class Analyzer;
@ -26,80 +24,78 @@ class Manager;
* A plugin can provide a specific file analyzer by registering this
* analyzer component, describing the analyzer.
*/
class Component : public plugin::Component
{
class Component : public plugin::Component {
public:
using factory_function = Analyzer* (*)(RecordValPtr args, File* file);
using factory_function = Analyzer* (*)(RecordValPtr args, File* file);
/**
* Constructor.
*
* @param name The name of the provided analyzer. This name is used
* across the system to identify the analyzer, e.g., when calling
* file_analysis::Manager::InstantiateAnalyzer with a name.
*
* @param factory A factory function to instantiate instances of the
* analyzer's class, which must be derived directly or indirectly
* from file_analysis::Analyzer. This is typically a static \c
* Instantiate() method inside the class that just allocates and
* returns a new instance.
*
* @param subtype A subtype associated with this component that
* further distinguishes it. The subtype will be integrated into the
* Tag that the manager associates with this analyzer, and
* analyzer instances can accordingly access it via Tag().
* If not used, leave at zero.
*
* @param enabled If false the analyzer starts out as disabled and
* hence won't be used. It can still be enabled later via the
* manager, including from script-land.
*/
Component(const std::string& name, factory_function factory, zeek::Tag::subtype_t subtype = 0,
bool enabled = true);
/**
* Constructor.
*
* @param name The name of the provided analyzer. This name is used
* across the system to identify the analyzer, e.g., when calling
* file_analysis::Manager::InstantiateAnalyzer with a name.
*
* @param factory A factory function to instantiate instances of the
* analyzer's class, which must be derived directly or indirectly
* from file_analysis::Analyzer. This is typically a static \c
* Instantiate() method inside the class that just allocates and
* returns a new instance.
*
* @param subtype A subtype associated with this component that
* further distinguishes it. The subtype will be integrated into the
* Tag that the manager associates with this analyzer, and
* analyzer instances can accordingly access it via Tag().
* If not used, leave at zero.
*
* @param enabled If false the analyzer starts out as disabled and
* hence won't be used. It can still be enabled later via the
* manager, including from script-land.
*/
Component(const std::string& name, factory_function factory, zeek::Tag::subtype_t subtype = 0, bool enabled = true);
/**
* Destructor.
*/
~Component() override = default;
/**
* Destructor.
*/
~Component() override = default;
/**
* Initialization function. This function has to be called before any
* plugin component functionality is used; it is used to add the
* plugin component to the list of components and to initialize tags
*/
void Initialize() override;
/**
* Initialization function. This function has to be called before any
* plugin component functionality is used; it is used to add the
* plugin component to the list of components and to initialize tags
*/
void Initialize() override;
/**
* Returns the analyzer's factory function.
*/
factory_function FactoryFunction() const { return factory_func; }
/**
* Returns the analyzer's factory function.
*/
factory_function FactoryFunction() const { return factory_func; }
/**
* Returns true if the analyzer is currently enabled and hence
* available for use.
*/
bool Enabled() const { return enabled; }
/**
* Returns true if the analyzer is currently enabled and hence
* available for use.
*/
bool Enabled() const { return enabled; }
/**
* Enables or disables this analyzer.
*
* @param arg_enabled True to enabled, false to disable.
*
*/
void SetEnabled(bool arg_enabled) { enabled = arg_enabled; }
/**
* Enables or disables this analyzer.
*
* @param arg_enabled True to enabled, false to disable.
*
*/
void SetEnabled(bool arg_enabled) { enabled = arg_enabled; }
protected:
/**
* Overridden from plugin::Component.
*/
void DoDescribe(ODesc* d) const override;
/**
* Overridden from plugin::Component.
*/
void DoDescribe(ODesc* d) const override;
private:
friend class Manager;
friend class Manager;
factory_function factory_func; // The analyzer's factory callback.
bool enabled; // True if the analyzer is enabled.
};
factory_function factory_func; // The analyzer's factory callback.
bool enabled; // True if the analyzer is enabled.
};
} // namespace file_analysis
} // namespace zeek
} // namespace file_analysis
} // namespace zeek

File diff suppressed because it is too large Load diff

View file

@ -13,8 +13,7 @@
#include "zeek/ZeekString.h"
#include "zeek/file_analysis/AnalyzerSet.h"
namespace zeek
{
namespace zeek {
class Connection;
class EventHandlerPtr;
@ -23,354 +22,347 @@ class RecordType;
using RecordValPtr = IntrusivePtr<RecordVal>;
using RecordTypePtr = IntrusivePtr<RecordType>;
namespace file_analysis
{
namespace file_analysis {
class FileReassembler;
/**
* Wrapper class around \c fa_file record values from script layer.
*/
class File
{
class File {
public:
/**
* Destructor. Nothing fancy, releases a reference to the wrapped
* \c fa_file value.
*/
~File();
/**
* Destructor. Nothing fancy, releases a reference to the wrapped
* \c fa_file value.
*/
~File();
/**
* @return the wrapped \c fa_file record value, #val.
*/
const RecordValPtr& ToVal() const { return val; }
/**
* @return the wrapped \c fa_file record value, #val.
*/
const RecordValPtr& ToVal() const { return val; }
/**
* @return the value of the "source" field from #val record or an empty
* string if it's not initialized.
*/
std::string GetSource() const;
/**
* @return the value of the "source" field from #val record or an empty
* string if it's not initialized.
*/
std::string GetSource() const;
/**
* Set the "source" field from #val record to \a source.
* @param source the new value of the "source" field.
*/
void SetSource(const std::string& source);
/**
* Set the "source" field from #val record to \a source.
* @param source the new value of the "source" field.
*/
void SetSource(const std::string& source);
/**
* @return value (seconds) of the "timeout_interval" field from #val record.
*/
double GetTimeoutInterval() const;
/**
* @return value (seconds) of the "timeout_interval" field from #val record.
*/
double GetTimeoutInterval() const;
/**
* Set the "timeout_interval" field from #val record to \a interval seconds.
* @param interval the new value of the "timeout_interval" field.
*/
void SetTimeoutInterval(double interval);
/**
* Set the "timeout_interval" field from #val record to \a interval seconds.
* @param interval the new value of the "timeout_interval" field.
*/
void SetTimeoutInterval(double interval);
/**
* Change the maximum size that an attached extraction analyzer is allowed.
* @param args the file extraction analyzer whose limit needs changed.
* @param bytes new limit.
* @return false if no extraction analyzer is active, else true.
*/
bool SetExtractionLimit(RecordValPtr args, uint64_t bytes);
/**
* Change the maximum size that an attached extraction analyzer is allowed.
* @param args the file extraction analyzer whose limit needs changed.
* @param bytes new limit.
* @return false if no extraction analyzer is active, else true.
*/
bool SetExtractionLimit(RecordValPtr args, uint64_t bytes);
/**
* @return value of the "id" field from #val record.
*/
const std::string& GetID() const { return id; }
/**
* @return value of the "id" field from #val record.
*/
const std::string& GetID() const { return id; }
/**
* @return value of "last_active" field in #val record;
*/
double GetLastActivityTime() const;
/**
* @return value of "last_active" field in #val record;
*/
double GetLastActivityTime() const;
/**
* Refreshes "last_active" field of #val record with current network time.
*/
void UpdateLastActivityTime();
/**
* Refreshes "last_active" field of #val record with current network time.
*/
void UpdateLastActivityTime();
/**
* Set "total_bytes" field of #val record to \a size.
* @param size the new value of the "total_bytes" field.
*/
void SetTotalBytes(uint64_t size);
/**
* Set "total_bytes" field of #val record to \a size.
* @param size the new value of the "total_bytes" field.
*/
void SetTotalBytes(uint64_t size);
/**
* @return true if file analysis is complete for the file, else false.
* It is incomplete if the total size is unknown or if the number of bytes
* streamed to analyzers (either as data delivers or gap information)
* matches the known total size.
*/
bool IsComplete() const;
/**
* @return true if file analysis is complete for the file, else false.
* It is incomplete if the total size is unknown or if the number of bytes
* streamed to analyzers (either as data delivers or gap information)
* matches the known total size.
*/
bool IsComplete() const;
/**
* Create a timer to be dispatched after the amount of time indicated by
* the "timeout_interval" field of the #val record in order to check if
* "last_active" field is old enough to timeout analysis of the file.
*/
void ScheduleInactivityTimer() const;
/**
* Create a timer to be dispatched after the amount of time indicated by
* the "timeout_interval" field of the #val record in order to check if
* "last_active" field is old enough to timeout analysis of the file.
*/
void ScheduleInactivityTimer() const;
/**
* Queues attaching an analyzer. Only one analyzer per type can be attached
* at a time unless the arguments differ.
* @param tag the analyzer tag of the file analyzer to add.
* @param args an \c AnalyzerArgs value representing a file analyzer.
* @return false if analyzer can't be instantiated, else true.
*/
bool AddAnalyzer(zeek::Tag tag, RecordValPtr args);
/**
* Queues attaching an analyzer. Only one analyzer per type can be attached
* at a time unless the arguments differ.
* @param tag the analyzer tag of the file analyzer to add.
* @param args an \c AnalyzerArgs value representing a file analyzer.
* @return false if analyzer can't be instantiated, else true.
*/
bool AddAnalyzer(zeek::Tag tag, RecordValPtr args);
/**
* Queues removal of an analyzer.
* @param tag the analyzer tag of the file analyzer to remove.
* @param args an \c AnalyzerArgs value representing a file analyzer.
* @return true if analyzer was active at time of call, else false.
*/
bool RemoveAnalyzer(zeek::Tag tag, RecordValPtr args);
/**
* Queues removal of an analyzer.
* @param tag the analyzer tag of the file analyzer to remove.
* @param args an \c AnalyzerArgs value representing a file analyzer.
* @return true if analyzer was active at time of call, else false.
*/
bool RemoveAnalyzer(zeek::Tag tag, RecordValPtr args);
/**
* Signal that this analyzer can be deleted once it's safe to do so.
*/
void DoneWithAnalyzer(Analyzer* analyzer);
/**
* Signal that this analyzer can be deleted once it's safe to do so.
*/
void DoneWithAnalyzer(Analyzer* analyzer);
/**
* Pass in non-sequential data and deliver to attached analyzers.
* @param data pointer to start of a chunk of file data.
* @param len number of bytes in the data chunk.
* @param offset number of bytes from start of file at which chunk occurs.
*/
void DataIn(const u_char* data, uint64_t len, uint64_t offset);
/**
* Pass in non-sequential data and deliver to attached analyzers.
* @param data pointer to start of a chunk of file data.
* @param len number of bytes in the data chunk.
* @param offset number of bytes from start of file at which chunk occurs.
*/
void DataIn(const u_char* data, uint64_t len, uint64_t offset);
/**
* Pass in sequential data and deliver to attached analyzers.
* @param data pointer to start of a chunk of file data.
* @param len number of bytes in the data chunk.
*/
void DataIn(const u_char* data, uint64_t len);
/**
* Pass in sequential data and deliver to attached analyzers.
* @param data pointer to start of a chunk of file data.
* @param len number of bytes in the data chunk.
*/
void DataIn(const u_char* data, uint64_t len);
/**
* Inform attached analyzers about end of file being seen.
*/
void EndOfFile();
/**
* Inform attached analyzers about end of file being seen.
*/
void EndOfFile();
/**
* Inform attached analyzers about a gap in file stream.
* @param offset number of bytes into file at which missing chunk starts.
* @param len length in bytes of the missing chunk of file data.
*/
void Gap(uint64_t offset, uint64_t len);
/**
* Inform attached analyzers about a gap in file stream.
* @param offset number of bytes into file at which missing chunk starts.
* @param len length in bytes of the missing chunk of file data.
*/
void Gap(uint64_t offset, uint64_t len);
/**
* @param h pointer to an event handler.
* @return true if event has a handler and the file isn't ignored.
*/
bool FileEventAvailable(EventHandlerPtr h);
/**
* @param h pointer to an event handler.
* @return true if event has a handler and the file isn't ignored.
*/
bool FileEventAvailable(EventHandlerPtr h);
/**
* Raises an event related to the file's life-cycle, the only parameter
* to that event is the \c fa_file record..
* @param h pointer to an event handler.
*/
void FileEvent(EventHandlerPtr h);
/**
* Raises an event related to the file's life-cycle, the only parameter
* to that event is the \c fa_file record..
* @param h pointer to an event handler.
*/
void FileEvent(EventHandlerPtr h);
/**
* Raises an event related to the file's life-cycle.
* @param h pointer to an event handler.
* @param args list of argument values to pass to event call.
*/
void FileEvent(EventHandlerPtr h, Args args);
/**
* Raises an event related to the file's life-cycle.
* @param h pointer to an event handler.
* @param args list of argument values to pass to event call.
*/
void FileEvent(EventHandlerPtr h, Args args);
/**
* Sets the MIME type for a file to a specific value.
*
* Setting the MIME type has to be done before the MIME type is
* inferred from the content, and before any data is passed to the
* analyzer (the beginning of file buffer has to be empty). After
* data has been sent or a MIME type has been set once, it cannot be
* changed.
*
* This function should only be called when it does not make sense
* to perform automated MIME type detections. This is e.g. the case
* in protocols where the file type is fixed in the protocol description.
* This is for example the case for TLS and X.509 certificates.
*
* @param mime_type mime type to set
* @return true if the mime type was set. False if it could not be set because
* a mime type was already set or inferred.
*/
bool SetMime(const std::string& mime_type);
/**
* Sets the MIME type for a file to a specific value.
*
* Setting the MIME type has to be done before the MIME type is
* inferred from the content, and before any data is passed to the
* analyzer (the beginning of file buffer has to be empty). After
* data has been sent or a MIME type has been set once, it cannot be
* changed.
*
* This function should only be called when it does not make sense
* to perform automated MIME type detections. This is e.g. the case
* in protocols where the file type is fixed in the protocol description.
* This is for example the case for TLS and X.509 certificates.
*
* @param mime_type mime type to set
* @return true if the mime type was set. False if it could not be set because
* a mime type was already set or inferred.
*/
bool SetMime(const std::string& mime_type);
/**
* Whether to permit a weird to carry on through the full reporter/weird
* framework.
*/
bool PermitWeird(const char* name, uint64_t threshold, uint64_t rate, double duration);
/**
* Whether to permit a weird to carry on through the full reporter/weird
* framework.
*/
bool PermitWeird(const char* name, uint64_t threshold, uint64_t rate, double duration);
protected:
friend class Manager;
friend class FileReassembler;
friend class Manager;
friend class FileReassembler;
/**
* Constructor; only file_analysis::Manager should be creating these.
* @param file_id an identifier string for the file in pretty hash form
* (similar to connection uids).
* @param source_name the value for the source field to fill in.
* @param conn a network connection over which the file is transferred.
* @param tag the network protocol over which the file is transferred.
* @param is_orig true if the file is being transferred from the originator
* of the connection to the responder. False indicates the other
* direction.
*/
File(const std::string& file_id, const std::string& source_name, Connection* conn = nullptr,
zeek::Tag tag = zeek::Tag::Error, bool is_orig = false);
/**
* Constructor; only file_analysis::Manager should be creating these.
* @param file_id an identifier string for the file in pretty hash form
* (similar to connection uids).
* @param source_name the value for the source field to fill in.
* @param conn a network connection over which the file is transferred.
* @param tag the network protocol over which the file is transferred.
* @param is_orig true if the file is being transferred from the originator
* of the connection to the responder. False indicates the other
* direction.
*/
File(const std::string& file_id, const std::string& source_name, Connection* conn = nullptr,
zeek::Tag tag = zeek::Tag::Error, bool is_orig = false);
/**
* Updates the "conn_ids" and "conn_uids" fields in #val record with the
* \c conn_id and UID taken from \a conn.
* @param conn the connection over which a part of the file has been seen.
* @param is_orig true if the connection originator is sending the file.
* @return true if the connection was previously unknown.
*/
bool UpdateConnectionFields(Connection* conn, bool is_orig);
/**
* Updates the "conn_ids" and "conn_uids" fields in #val record with the
* \c conn_id and UID taken from \a conn.
* @param conn the connection over which a part of the file has been seen.
* @param is_orig true if the connection originator is sending the file.
* @return true if the connection was previously unknown.
*/
bool UpdateConnectionFields(Connection* conn, bool is_orig);
/**
* Raise the file_over_new_connection event with given arguments.
*/
void RaiseFileOverNewConnection(Connection* conn, bool is_orig);
/**
* Raise the file_over_new_connection event with given arguments.
*/
void RaiseFileOverNewConnection(Connection* conn, bool is_orig);
/**
* Increment a byte count field of #val record by \a size.
* @param size number of bytes by which to increment.
* @param field_idx the index of the field in \c fa_file to increment.
*/
void IncrementByteCount(uint64_t size, int field_idx);
/**
* Increment a byte count field of #val record by \a size.
* @param size number of bytes by which to increment.
* @param field_idx the index of the field in \c fa_file to increment.
*/
void IncrementByteCount(uint64_t size, int field_idx);
/**
* Wrapper to RecordVal::GetFieldOrDefault for the field in #val at index
* \a idx which automatically unrefs the Val and returns a converted value.
* @param idx the index of a field of type "count" in \c fa_file.
* @return the value of the field, which may be it &default.
*/
uint64_t LookupFieldDefaultCount(int idx) const;
/**
* Wrapper to RecordVal::GetFieldOrDefault for the field in #val at index
* \a idx which automatically unrefs the Val and returns a converted value.
* @param idx the index of a field of type "count" in \c fa_file.
* @return the value of the field, which may be it &default.
*/
uint64_t LookupFieldDefaultCount(int idx) const;
/**
* Wrapper to RecordVal::GetFieldOrDefault for the field in #val at index
* \a idx which automatically unrefs the Val and returns a converted value.
* @param idx the index of a field of type "interval" in \c fa_file.
* @return the value of the field, which may be it &default.
*/
double LookupFieldDefaultInterval(int idx) const;
/**
* Wrapper to RecordVal::GetFieldOrDefault for the field in #val at index
* \a idx which automatically unrefs the Val and returns a converted value.
* @param idx the index of a field of type "interval" in \c fa_file.
* @return the value of the field, which may be it &default.
*/
double LookupFieldDefaultInterval(int idx) const;
/**
* Buffers incoming data at the beginning of a file.
* @param data pointer to a data chunk to buffer.
* @param len number of bytes in the data chunk.
* @return true if buffering is still required, else false
*/
bool BufferBOF(const u_char* data, uint64_t len);
/**
* Buffers incoming data at the beginning of a file.
* @param data pointer to a data chunk to buffer.
* @param len number of bytes in the data chunk.
* @return true if buffering is still required, else false
*/
bool BufferBOF(const u_char* data, uint64_t len);
/**
* Does metadata inference (e.g. mime type detection via file
* magic signatures) using data in the BOF (beginning-of-file) buffer
* and raises an event with the metadata.
*/
void InferMetadata();
/**
* Does metadata inference (e.g. mime type detection via file
* magic signatures) using data in the BOF (beginning-of-file) buffer
* and raises an event with the metadata.
*/
void InferMetadata();
/**
* Enables reassembly on the file.
*/
void EnableReassembly();
/**
* Enables reassembly on the file.
*/
void EnableReassembly();
/**
* Disables reassembly on the file. If there is an existing reassembler
* for the file, this will cause it to be deleted and won't allow a new
* one to be created until reassembly is reenabled.
*/
void DisableReassembly();
/**
* Disables reassembly on the file. If there is an existing reassembler
* for the file, this will cause it to be deleted and won't allow a new
* one to be created until reassembly is reenabled.
*/
void DisableReassembly();
/**
* Set a maximum allowed bytes of memory for file reassembly for this file.
*/
void SetReassemblyBuffer(uint64_t max);
/**
* Set a maximum allowed bytes of memory for file reassembly for this file.
*/
void SetReassemblyBuffer(uint64_t max);
/**
* Perform stream-wise delivery for analyzers that need it.
*/
void DeliverStream(const u_char* data, uint64_t len);
/**
* Perform stream-wise delivery for analyzers that need it.
*/
void DeliverStream(const u_char* data, uint64_t len);
/**
* Perform chunk-wise delivery for analyzers that need it.
*/
void DeliverChunk(const u_char* data, uint64_t len, uint64_t offset);
/**
* Perform chunk-wise delivery for analyzers that need it.
*/
void DeliverChunk(const u_char* data, uint64_t len, uint64_t offset);
/**
* Lookup a record field index/offset by name.
* @param field_name the name of the record field.
* @param type the record type for which the field will be looked up.
* @return the field offset in #val record corresponding to \a field_name.
*/
static int Idx(const std::string& field_name, const RecordType* type);
static int Idx(const std::string& field_name, const RecordTypePtr& type)
{
return Idx(field_name, type.get());
}
/**
* Lookup a record field index/offset by name.
* @param field_name the name of the record field.
* @param type the record type for which the field will be looked up.
* @return the field offset in #val record corresponding to \a field_name.
*/
static int Idx(const std::string& field_name, const RecordType* type);
static int Idx(const std::string& field_name, const RecordTypePtr& type) { return Idx(field_name, type.get()); }
/**
* Initializes static member.
*/
static void StaticInit();
/**
* Initializes static member.
*/
static void StaticInit();
protected:
std::string id; /**< A pretty hash that likely identifies file */
RecordValPtr val; /**< \c fa_file from script layer. */
FileReassembler* file_reassembler; /**< A reassembler for the file if it's needed. */
uint64_t stream_offset; /**< The offset of the file which has been forwarded. */
uint64_t reassembly_max_buffer; /**< Maximum allowed buffer for reassembly. */
bool did_metadata_inference; /**< Whether the metadata inference has already been attempted. */
bool reassembly_enabled; /**< Whether file stream reassembly is needed. */
bool postpone_timeout; /**< Whether postponing timeout is requested. */
bool done; /**< If this object is about to be deleted. */
detail::AnalyzerSet analyzers; /**< A set of attached file analyzers. */
std::list<Analyzer*> done_analyzers; /**< Analyzers we're done with, remembered here until they
can be safely deleted. */
std::string id; /**< A pretty hash that likely identifies file */
RecordValPtr val; /**< \c fa_file from script layer. */
FileReassembler* file_reassembler; /**< A reassembler for the file if it's needed. */
uint64_t stream_offset; /**< The offset of the file which has been forwarded. */
uint64_t reassembly_max_buffer; /**< Maximum allowed buffer for reassembly. */
bool did_metadata_inference; /**< Whether the metadata inference has already been attempted. */
bool reassembly_enabled; /**< Whether file stream reassembly is needed. */
bool postpone_timeout; /**< Whether postponing timeout is requested. */
bool done; /**< If this object is about to be deleted. */
detail::AnalyzerSet analyzers; /**< A set of attached file analyzers. */
std::list<Analyzer*> done_analyzers; /**< Analyzers we're done with, remembered here until they
can be safely deleted. */
struct BOF_Buffer
{
BOF_Buffer() : full(false), size(0) { }
~BOF_Buffer()
{
for ( size_t i = 0; i < chunks.size(); ++i )
delete chunks[i];
}
struct BOF_Buffer {
BOF_Buffer() : full(false), size(0) {}
~BOF_Buffer() {
for ( size_t i = 0; i < chunks.size(); ++i )
delete chunks[i];
}
bool full;
uint64_t size;
String::CVec chunks;
} bof_buffer; /**< Beginning of file buffer. */
bool full;
uint64_t size;
String::CVec chunks;
} bof_buffer; /**< Beginning of file buffer. */
zeek::detail::WeirdStateMap weird_state;
zeek::detail::WeirdStateMap weird_state;
static int id_idx;
static int parent_id_idx;
static int source_idx;
static int is_orig_idx;
static int conns_idx;
static int last_active_idx;
static int seen_bytes_idx;
static int total_bytes_idx;
static int missing_bytes_idx;
static int overflow_bytes_idx;
static int timeout_interval_idx;
static int bof_buffer_size_idx;
static int bof_buffer_idx;
static int mime_type_idx;
static int mime_types_idx;
static int meta_inferred_idx;
static int id_idx;
static int parent_id_idx;
static int source_idx;
static int is_orig_idx;
static int conns_idx;
static int last_active_idx;
static int seen_bytes_idx;
static int total_bytes_idx;
static int missing_bytes_idx;
static int overflow_bytes_idx;
static int timeout_interval_idx;
static int bof_buffer_size_idx;
static int bof_buffer_idx;
static int mime_type_idx;
static int mime_types_idx;
static int meta_inferred_idx;
static int meta_mime_type_idx;
static int meta_mime_types_idx;
};
static int meta_mime_type_idx;
static int meta_mime_types_idx;
};
} // namespace file_analysis
} // namespace zeek
} // namespace file_analysis
} // namespace zeek

View file

@ -5,167 +5,146 @@
#include "zeek/3rdparty/doctest.h"
#include "zeek/file_analysis/File.h"
namespace zeek::file_analysis
{
namespace zeek::file_analysis {
class File;
FileReassembler::FileReassembler(File* f, uint64_t starting_offset)
: Reassembler(starting_offset, REASSEM_FILE), the_file(f), flushing(false)
{
}
: Reassembler(starting_offset, REASSEM_FILE), the_file(f), flushing(false) {}
uint64_t FileReassembler::Flush()
{
if ( flushing )
return 0;
uint64_t FileReassembler::Flush() {
if ( flushing )
return 0;
if ( block_list.Empty() )
return 0;
if ( block_list.Empty() )
return 0;
const auto& last_block = block_list.LastBlock();
const auto& last_block = block_list.LastBlock();
// This is expected to call back into FileReassembler::Undelivered().
flushing = true;
uint64_t rval = TrimToSeq(last_block.upper);
flushing = false;
return rval;
}
// This is expected to call back into FileReassembler::Undelivered().
flushing = true;
uint64_t rval = TrimToSeq(last_block.upper);
flushing = false;
return rval;
}
uint64_t FileReassembler::FlushTo(uint64_t sequence)
{
if ( flushing )
return 0;
uint64_t FileReassembler::FlushTo(uint64_t sequence) {
if ( flushing )
return 0;
flushing = true;
uint64_t rval = TrimToSeq(sequence);
flushing = false;
last_reassem_seq = sequence;
return rval;
}
flushing = true;
uint64_t rval = TrimToSeq(sequence);
flushing = false;
last_reassem_seq = sequence;
return rval;
}
void FileReassembler::BlockInserted(DataBlockMap::const_iterator it)
{
const auto& start_block = it->second;
assert(start_block.seq < start_block.upper);
if ( start_block.seq > last_reassem_seq || start_block.upper <= last_reassem_seq )
return;
void FileReassembler::BlockInserted(DataBlockMap::const_iterator it) {
const auto& start_block = it->second;
assert(start_block.seq < start_block.upper);
if ( start_block.seq > last_reassem_seq || start_block.upper <= last_reassem_seq )
return;
while ( it != block_list.End() )
{
const auto& b = it->second;
while ( it != block_list.End() ) {
const auto& b = it->second;
if ( b.seq > last_reassem_seq )
break;
if ( b.seq > last_reassem_seq )
break;
if ( b.seq == last_reassem_seq )
{ // New stuff.
uint64_t len = b.Size();
last_reassem_seq += len;
the_file->DeliverStream(b.block, len);
}
if ( b.seq == last_reassem_seq ) { // New stuff.
uint64_t len = b.Size();
last_reassem_seq += len;
the_file->DeliverStream(b.block, len);
}
++it;
}
++it;
}
// Throw out forwarded data
TrimToSeq(last_reassem_seq);
}
// Throw out forwarded data
TrimToSeq(last_reassem_seq);
}
void FileReassembler::Undelivered(uint64_t up_to_seq)
{
// If we have blocks that begin below up_to_seq, deliver them.
auto it = block_list.Begin();
void FileReassembler::Undelivered(uint64_t up_to_seq) {
// If we have blocks that begin below up_to_seq, deliver them.
auto it = block_list.Begin();
while ( it != block_list.End() )
{
const auto& b = it->second;
while ( it != block_list.End() ) {
const auto& b = it->second;
if ( b.seq < last_reassem_seq )
{
// Already delivered this block.
++it;
continue;
}
if ( b.seq < last_reassem_seq ) {
// Already delivered this block.
++it;
continue;
}
if ( b.seq >= up_to_seq )
// Block is beyond what we need to process at this point.
break;
if ( b.seq >= up_to_seq )
// Block is beyond what we need to process at this point.
break;
uint64_t gap_at_seq = last_reassem_seq;
uint64_t gap_len = b.seq - last_reassem_seq;
the_file->Gap(gap_at_seq, gap_len);
last_reassem_seq += gap_len;
BlockInserted(it);
// Inserting a block may cause trimming of what's buffered,
// so have to assume 'b' is invalid, hence re-assign to start.
it = block_list.Begin();
}
uint64_t gap_at_seq = last_reassem_seq;
uint64_t gap_len = b.seq - last_reassem_seq;
the_file->Gap(gap_at_seq, gap_len);
last_reassem_seq += gap_len;
BlockInserted(it);
// Inserting a block may cause trimming of what's buffered,
// so have to assume 'b' is invalid, hence re-assign to start.
it = block_list.Begin();
}
if ( up_to_seq > last_reassem_seq )
{
the_file->Gap(last_reassem_seq, up_to_seq - last_reassem_seq);
last_reassem_seq = up_to_seq;
}
}
if ( up_to_seq > last_reassem_seq ) {
the_file->Gap(last_reassem_seq, up_to_seq - last_reassem_seq);
last_reassem_seq = up_to_seq;
}
}
void FileReassembler::Overlap(const u_char* b1, const u_char* b2, uint64_t n)
{
// Not doing anything here yet.
}
} // end file_analysis
void FileReassembler::Overlap(const u_char* b1, const u_char* b2, uint64_t n) {
// Not doing anything here yet.
}
} // namespace zeek::file_analysis
// Test reassembler logic through FileReassembler.
TEST_CASE("file reassembler")
{
// Can not construct due to protected constructor.
class TestFile : public zeek::file_analysis::File
{
public:
TestFile(const std::string& file_id, const std::string& source_name)
: zeek::file_analysis::File(file_id, source_name)
{
}
};
TEST_CASE("file reassembler") {
// Can not construct due to protected constructor.
class TestFile : public zeek::file_analysis::File {
public:
TestFile(const std::string& file_id, const std::string& source_name)
: zeek::file_analysis::File(file_id, source_name) {}
};
auto f = std::make_unique<TestFile>("test_file_id", "test_source_name");
auto r = std::make_unique<zeek::file_analysis::FileReassembler>(f.get(), 0);
auto f = std::make_unique<TestFile>("test_file_id", "test_source_name");
auto r = std::make_unique<zeek::file_analysis::FileReassembler>(f.get(), 0);
const u_char* data = (u_char*)("0123456789ABCDEF");
const u_char* data = (u_char*)("0123456789ABCDEF");
SUBCASE("block overlap and 64bit overflow")
{
r->NewBlock(0.0, 0xfffffffffffffff7, 3, data);
r->NewBlock(0.0, 0xfffffffffffffff7, 15, data);
r->NewBlock(0.0, 0xfffffffffffffff3, 15, data);
SUBCASE("block overlap and 64bit overflow") {
r->NewBlock(0.0, 0xfffffffffffffff7, 3, data);
r->NewBlock(0.0, 0xfffffffffffffff7, 15, data);
r->NewBlock(0.0, 0xfffffffffffffff3, 15, data);
// 0xfffffffffffffff3 through 0xffffffffffffffff
CHECK_EQ(r->TotalSize(), 12);
// 0xfffffffffffffff3 through 0xffffffffffffffff
CHECK_EQ(r->TotalSize(), 12);
// This previously hung with an endless loop.
r->Flush();
CHECK_FALSE(r->HasBlocks());
CHECK_EQ(r->TotalSize(), 0);
}
// This previously hung with an endless loop.
r->Flush();
CHECK_FALSE(r->HasBlocks());
CHECK_EQ(r->TotalSize(), 0);
}
SUBCASE("reject NewBlock() at 64 bit limit")
{
r->NewBlock(0.0, 0xffffffffffffffff, 4, data);
CHECK_FALSE(r->HasBlocks());
CHECK_EQ(r->TotalSize(), 0);
}
SUBCASE("reject NewBlock() at 64 bit limit") {
r->NewBlock(0.0, 0xffffffffffffffff, 4, data);
CHECK_FALSE(r->HasBlocks());
CHECK_EQ(r->TotalSize(), 0);
}
SUBCASE("truncate NewBlock() to upper 64 bit limit")
{
r->NewBlock(0.0, 0xfffffffffffffffa, 8, data);
CHECK(r->HasBlocks());
CHECK_EQ(r->TotalSize(), 5);
}
SUBCASE("truncate NewBlock() to upper 64 bit limit") {
r->NewBlock(0.0, 0xfffffffffffffffa, 8, data);
CHECK(r->HasBlocks());
CHECK_EQ(r->TotalSize(), 5);
}
SUBCASE("no truncation")
{
r->NewBlock(0.0, 0xfffffffffffffff7, 8, data);
CHECK(r->HasBlocks());
CHECK_EQ(r->TotalSize(), 8);
}
}
SUBCASE("no truncation") {
r->NewBlock(0.0, 0xfffffffffffffff7, 8, data);
CHECK(r->HasBlocks());
CHECK_EQ(r->TotalSize(), 8);
}
}

View file

@ -4,60 +4,57 @@
#include "zeek/Reassem.h"
namespace zeek
{
namespace zeek {
class Connection;
class File;
namespace file_analysis
{
namespace file_analysis {
class File;
class FileReassembler final : public Reassembler
{
class FileReassembler final : public Reassembler {
public:
FileReassembler(File* f, uint64_t starting_offset);
~FileReassembler() override = default;
FileReassembler(File* f, uint64_t starting_offset);
~FileReassembler() override = default;
void Done();
void Done();
// Checks if we have delivered all contents that we can possibly
// deliver for this endpoint.
void CheckEOF();
// Checks if we have delivered all contents that we can possibly
// deliver for this endpoint.
void CheckEOF();
/**
* Discards all contents of the reassembly buffer. This will spin through
* the buffer and call File::DeliverStream() and File::Gap() wherever
* appropriate.
* @return the number of new bytes now detected as gaps in the file.
*/
uint64_t Flush();
/**
* Discards all contents of the reassembly buffer. This will spin through
* the buffer and call File::DeliverStream() and File::Gap() wherever
* appropriate.
* @return the number of new bytes now detected as gaps in the file.
*/
uint64_t Flush();
/**
* Discards all contents of the reassembly buffer up to a given sequence
* number. This will spin through the buffer and call
* File::DeliverStream() and File::Gap() wherever appropriate.
* @param sequence the sequence number to flush until.
* @return the number of new bytes now detected as gaps in the file.
*/
uint64_t FlushTo(uint64_t sequence);
/**
* Discards all contents of the reassembly buffer up to a given sequence
* number. This will spin through the buffer and call
* File::DeliverStream() and File::Gap() wherever appropriate.
* @param sequence the sequence number to flush until.
* @return the number of new bytes now detected as gaps in the file.
*/
uint64_t FlushTo(uint64_t sequence);
/**
* @return whether the reassembler is currently is the process of flushing
* out the contents of its buffer.
*/
bool IsCurrentlyFlushing() const { return flushing; }
/**
* @return whether the reassembler is currently is the process of flushing
* out the contents of its buffer.
*/
bool IsCurrentlyFlushing() const { return flushing; }
protected:
void Undelivered(uint64_t up_to_seq) override;
void BlockInserted(DataBlockMap::const_iterator it) override;
void Overlap(const u_char* b1, const u_char* b2, uint64_t n) override;
void Undelivered(uint64_t up_to_seq) override;
void BlockInserted(DataBlockMap::const_iterator it) override;
void Overlap(const u_char* b1, const u_char* b2, uint64_t n) override;
File* the_file = nullptr;
bool flushing = false;
};
File* the_file = nullptr;
bool flushing = false;
};
} // namespace file_analysis
} // namespace zeek
} // namespace file_analysis
} // namespace zeek

View file

@ -5,42 +5,38 @@
#include "zeek/file_analysis/File.h"
#include "zeek/file_analysis/Manager.h"
namespace zeek::file_analysis::detail
{
namespace zeek::file_analysis::detail {
FileTimer::FileTimer(double t, const std::string& id, double interval)
: zeek::detail::Timer(t + interval, zeek::detail::TIMER_FILE_ANALYSIS_INACTIVITY), file_id(id)
{
DBG_LOG(DBG_FILE_ANALYSIS, "New %f second timeout timer for %s", interval, file_id.c_str());
}
: zeek::detail::Timer(t + interval, zeek::detail::TIMER_FILE_ANALYSIS_INACTIVITY), file_id(id) {
DBG_LOG(DBG_FILE_ANALYSIS, "New %f second timeout timer for %s", interval, file_id.c_str());
}
void FileTimer::Dispatch(double t, bool is_expire)
{
File* file = file_mgr->LookupFile(file_id);
void FileTimer::Dispatch(double t, bool is_expire) {
File* file = file_mgr->LookupFile(file_id);
if ( ! file )
return;
if ( ! file )
return;
double last_active = file->GetLastActivityTime();
double inactive_time = t > last_active ? t - last_active : 0.0;
double last_active = file->GetLastActivityTime();
double inactive_time = t > last_active ? t - last_active : 0.0;
DBG_LOG(DBG_FILE_ANALYSIS,
"Checking inactivity for %s, last active at %f, "
"inactive for %f",
file_id.c_str(), last_active, inactive_time);
DBG_LOG(DBG_FILE_ANALYSIS,
"Checking inactivity for %s, last active at %f, "
"inactive for %f",
file_id.c_str(), last_active, inactive_time);
if ( last_active == 0.0 )
{
// was created when network_time was zero, so re-schedule w/ valid time
file->UpdateLastActivityTime();
file->ScheduleInactivityTimer();
return;
}
if ( last_active == 0.0 ) {
// was created when network_time was zero, so re-schedule w/ valid time
file->UpdateLastActivityTime();
file->ScheduleInactivityTimer();
return;
}
if ( inactive_time >= file->GetTimeoutInterval() )
file_mgr->Timeout(file_id);
else if ( ! is_expire )
file->ScheduleInactivityTimer();
}
if ( inactive_time >= file->GetTimeoutInterval() )
file_mgr->Timeout(file_id);
else if ( ! is_expire )
file->ScheduleInactivityTimer();
}
} // namespace zeek::file_analysis::detail
} // namespace zeek::file_analysis::detail

View file

@ -6,33 +6,31 @@
#include "zeek/Timer.h"
namespace zeek::file_analysis::detail
{
namespace zeek::file_analysis::detail {
/**
* Timer to periodically check if file analysis for a given file is inactive.
*/
class FileTimer final : public zeek::detail::Timer
{
class FileTimer final : public zeek::detail::Timer {
public:
/**
* Constructor, nothing interesting about it.
* @param t unix time at which the timer should start ticking.
* @param id the file identifier which will be checked for inactivity.
* @param interval amount of time after \a t to check for inactivity.
*/
FileTimer(double t, const std::string& id, double interval);
/**
* Constructor, nothing interesting about it.
* @param t unix time at which the timer should start ticking.
* @param id the file identifier which will be checked for inactivity.
* @param interval amount of time after \a t to check for inactivity.
*/
FileTimer(double t, const std::string& id, double interval);
/**
* Check inactivity of file_analysis::File corresponding to #file_id,
* reschedule if active, else call file_analysis::Manager::Timeout.
* @param t current unix time
* @param is_expire true if all pending timers are being expired.
*/
void Dispatch(double t, bool is_expire) override;
/**
* Check inactivity of file_analysis::File corresponding to #file_id,
* reschedule if active, else call file_analysis::Manager::Timeout.
* @param t current unix time
* @param is_expire true if all pending timers are being expired.
*/
void Dispatch(double t, bool is_expire) override;
private:
std::string file_id;
};
std::string file_id;
};
} // namespace zeek::file_analysis::detail
} // namespace zeek::file_analysis::detail

View file

@ -16,532 +16,477 @@
using namespace std;
namespace zeek::file_analysis
{
namespace zeek::file_analysis {
Manager::Manager()
: plugin::ComponentManager<file_analysis::Component>("Files", "Tag", "AllAnalyzers"),
current_file_id(), magic_state(), cumulative_files(0), max_files(0)
{
}
: plugin::ComponentManager<file_analysis::Component>("Files", "Tag", "AllAnalyzers"),
current_file_id(),
magic_state(),
cumulative_files(0),
max_files(0) {}
Manager::~Manager()
{
for ( MIMEMap::iterator i = mime_types.begin(); i != mime_types.end(); i++ )
delete i->second;
Manager::~Manager() {
for ( MIMEMap::iterator i = mime_types.begin(); i != mime_types.end(); i++ )
delete i->second;
// Have to assume that too much of Zeek has been shutdown by this point
// to do anything more than reclaim memory.
for ( const auto& entry : id_map )
delete entry.second;
// Have to assume that too much of Zeek has been shutdown by this point
// to do anything more than reclaim memory.
for ( const auto& entry : id_map )
delete entry.second;
delete magic_state;
delete analyzer_hash;
}
delete magic_state;
delete analyzer_hash;
}
void Manager::InitPreScript() { }
void Manager::InitPreScript() {}
void Manager::InitPostScript()
{
auto t = make_intrusive<TypeList>();
t->Append(GetTagType());
t->Append(BifType::Record::Files::AnalyzerArgs);
analyzer_hash = new zeek::detail::CompositeHash(std::move(t));
}
void Manager::InitPostScript() {
auto t = make_intrusive<TypeList>();
t->Append(GetTagType());
t->Append(BifType::Record::Files::AnalyzerArgs);
analyzer_hash = new zeek::detail::CompositeHash(std::move(t));
}
void Manager::InitMagic()
{
delete magic_state;
magic_state = zeek::detail::rule_matcher->InitFileMagic();
}
void Manager::InitMagic() {
delete magic_state;
magic_state = zeek::detail::rule_matcher->InitFileMagic();
}
void Manager::Terminate()
{
vector<string> keys;
keys.reserve(id_map.size());
void Manager::Terminate() {
vector<string> keys;
keys.reserve(id_map.size());
for ( const auto& entry : id_map )
keys.push_back(entry.first);
for ( const auto& entry : id_map )
keys.push_back(entry.first);
for ( const string& key : keys )
Timeout(key, true);
for ( const string& key : keys )
Timeout(key, true);
event_mgr.Drain();
}
event_mgr.Drain();
}
string Manager::HashHandle(const string& handle) const
{
zeek::detail::hash128_t hash;
zeek::detail::KeyedHash::StaticHash128(handle.data(), handle.size(), &hash);
string Manager::HashHandle(const string& handle) const {
zeek::detail::hash128_t hash;
zeek::detail::KeyedHash::StaticHash128(handle.data(), handle.size(), &hash);
return UID(zeek::detail::bits_per_uid, hash, 2).Base62("F");
}
return UID(zeek::detail::bits_per_uid, hash, 2).Base62("F");
}
void Manager::SetHandle(const string& handle)
{
if ( handle.empty() )
return;
void Manager::SetHandle(const string& handle) {
if ( handle.empty() )
return;
#ifdef DEBUG
if ( zeek::detail::debug_logger.IsEnabled(DBG_FILE_ANALYSIS) )
{
String tmp{handle};
auto rendered = tmp.Render();
DBG_LOG(DBG_FILE_ANALYSIS, "Set current handle to %s", rendered);
delete[] rendered;
}
if ( zeek::detail::debug_logger.IsEnabled(DBG_FILE_ANALYSIS) ) {
String tmp{handle};
auto rendered = tmp.Render();
DBG_LOG(DBG_FILE_ANALYSIS, "Set current handle to %s", rendered);
delete[] rendered;
}
#endif
current_file_id = HashHandle(handle);
}
current_file_id = HashHandle(handle);
}
string Manager::DataIn(const u_char* data, uint64_t len, uint64_t offset, const zeek::Tag& tag,
Connection* conn, bool is_orig, const string& precomputed_id,
const string& mime_type)
{
string id = precomputed_id.empty() ? GetFileID(tag, conn, is_orig) : precomputed_id;
File* file = GetFile(id, conn, tag, is_orig);
string Manager::DataIn(const u_char* data, uint64_t len, uint64_t offset, const zeek::Tag& tag, Connection* conn,
bool is_orig, const string& precomputed_id, const string& mime_type) {
string id = precomputed_id.empty() ? GetFileID(tag, conn, is_orig) : precomputed_id;
File* file = GetFile(id, conn, tag, is_orig);
if ( ! file )
return "";
if ( ! file )
return "";
// This only has any effect when
// * called for the first time for a file
// * being called before file->DataIn is called for the first time (before data is
// added to the bof buffer).
// Afterwards SetMime just ignores what is passed to it. Thus this only has effect during
// the first Manager::DataIn call for each file.
if ( ! mime_type.empty() )
file->SetMime(mime_type);
// This only has any effect when
// * called for the first time for a file
// * being called before file->DataIn is called for the first time (before data is
// added to the bof buffer).
// Afterwards SetMime just ignores what is passed to it. Thus this only has effect during
// the first Manager::DataIn call for each file.
if ( ! mime_type.empty() )
file->SetMime(mime_type);
file->DataIn(data, len, offset);
file->DataIn(data, len, offset);
if ( file->IsComplete() )
{
RemoveFile(file->GetID());
return "";
}
if ( file->IsComplete() ) {
RemoveFile(file->GetID());
return "";
}
return id;
}
return id;
}
string Manager::DataIn(const u_char* data, uint64_t len, const zeek::Tag& tag, Connection* conn,
bool is_orig, const string& precomputed_id, const string& mime_type)
{
string id = precomputed_id.empty() ? GetFileID(tag, conn, is_orig) : precomputed_id;
// Sequential data input shouldn't be going over multiple conns, so don't
// do the check to update connection set.
File* file = GetFile(id, conn, tag, is_orig, false);
string Manager::DataIn(const u_char* data, uint64_t len, const zeek::Tag& tag, Connection* conn, bool is_orig,
const string& precomputed_id, const string& mime_type) {
string id = precomputed_id.empty() ? GetFileID(tag, conn, is_orig) : precomputed_id;
// Sequential data input shouldn't be going over multiple conns, so don't
// do the check to update connection set.
File* file = GetFile(id, conn, tag, is_orig, false);
if ( ! file )
return "";
if ( ! file )
return "";
if ( ! mime_type.empty() )
file->SetMime(mime_type);
if ( ! mime_type.empty() )
file->SetMime(mime_type);
file->DataIn(data, len);
file->DataIn(data, len);
if ( file->IsComplete() )
{
RemoveFile(file->GetID());
return "";
}
if ( file->IsComplete() ) {
RemoveFile(file->GetID());
return "";
}
return id;
}
return id;
}
void Manager::DataIn(const u_char* data, uint64_t len, const string& file_id, const string& source,
const string& mime_type)
{
File* file = GetFile(file_id, nullptr, zeek::Tag::Error, false, false, source.c_str());
const string& mime_type) {
File* file = GetFile(file_id, nullptr, zeek::Tag::Error, false, false, source.c_str());
if ( ! file )
return;
if ( ! file )
return;
if ( ! mime_type.empty() )
file->SetMime(mime_type);
if ( ! mime_type.empty() )
file->SetMime(mime_type);
file->DataIn(data, len);
file->DataIn(data, len);
if ( file->IsComplete() )
RemoveFile(file->GetID());
}
if ( file->IsComplete() )
RemoveFile(file->GetID());
}
void Manager::DataIn(const u_char* data, uint64_t len, uint64_t offset, const string& file_id,
const string& source, const string& mime_type)
{
File* file = GetFile(file_id, nullptr, zeek::Tag::Error, false, false, source.c_str());
void Manager::DataIn(const u_char* data, uint64_t len, uint64_t offset, const string& file_id, const string& source,
const string& mime_type) {
File* file = GetFile(file_id, nullptr, zeek::Tag::Error, false, false, source.c_str());
if ( ! file )
return;
if ( ! file )
return;
if ( ! mime_type.empty() )
file->SetMime(mime_type);
if ( ! mime_type.empty() )
file->SetMime(mime_type);
file->DataIn(data, len, offset);
file->DataIn(data, len, offset);
if ( file->IsComplete() )
RemoveFile(file->GetID());
}
if ( file->IsComplete() )
RemoveFile(file->GetID());
}
void Manager::EndOfFile(const zeek::Tag& tag, Connection* conn)
{
EndOfFile(tag, conn, true);
EndOfFile(tag, conn, false);
}
void Manager::EndOfFile(const zeek::Tag& tag, Connection* conn) {
EndOfFile(tag, conn, true);
EndOfFile(tag, conn, false);
}
void Manager::EndOfFile(const zeek::Tag& tag, Connection* conn, bool is_orig)
{
// Don't need to create a file if we're just going to remove it right away.
RemoveFile(GetFileID(tag, conn, is_orig));
}
void Manager::EndOfFile(const zeek::Tag& tag, Connection* conn, bool is_orig) {
// Don't need to create a file if we're just going to remove it right away.
RemoveFile(GetFileID(tag, conn, is_orig));
}
void Manager::EndOfFile(const string& file_id)
{
RemoveFile(file_id);
}
void Manager::EndOfFile(const string& file_id) { RemoveFile(file_id); }
string Manager::Gap(uint64_t offset, uint64_t len, const zeek::Tag& tag, Connection* conn,
bool is_orig, const string& precomputed_id)
{
string id = precomputed_id.empty() ? GetFileID(tag, conn, is_orig) : precomputed_id;
File* file = GetFile(id, conn, tag, is_orig);
string Manager::Gap(uint64_t offset, uint64_t len, const zeek::Tag& tag, Connection* conn, bool is_orig,
const string& precomputed_id) {
string id = precomputed_id.empty() ? GetFileID(tag, conn, is_orig) : precomputed_id;
File* file = GetFile(id, conn, tag, is_orig);
if ( ! file )
return "";
if ( ! file )
return "";
file->Gap(offset, len);
return id;
}
file->Gap(offset, len);
return id;
}
string Manager::SetSize(uint64_t size, const zeek::Tag& tag, Connection* conn, bool is_orig,
const string& precomputed_id)
{
string id = precomputed_id.empty() ? GetFileID(tag, conn, is_orig) : precomputed_id;
File* file = GetFile(id, conn, tag, is_orig);
const string& precomputed_id) {
string id = precomputed_id.empty() ? GetFileID(tag, conn, is_orig) : precomputed_id;
File* file = GetFile(id, conn, tag, is_orig);
if ( ! file )
return "";
if ( ! file )
return "";
file->SetTotalBytes(size);
file->SetTotalBytes(size);
if ( file->IsComplete() )
{
RemoveFile(file->GetID());
return "";
}
if ( file->IsComplete() ) {
RemoveFile(file->GetID());
return "";
}
return id;
}
return id;
}
bool Manager::SetTimeoutInterval(const string& file_id, double interval) const
{
File* file = LookupFile(file_id);
bool Manager::SetTimeoutInterval(const string& file_id, double interval) const {
File* file = LookupFile(file_id);
if ( ! file )
return false;
if ( ! file )
return false;
if ( interval > 0 )
file->postpone_timeout = true;
if ( interval > 0 )
file->postpone_timeout = true;
file->SetTimeoutInterval(interval);
return true;
}
file->SetTimeoutInterval(interval);
return true;
}
bool Manager::EnableReassembly(const string& file_id)
{
File* file = LookupFile(file_id);
bool Manager::EnableReassembly(const string& file_id) {
File* file = LookupFile(file_id);
if ( ! file )
return false;
if ( ! file )
return false;
file->EnableReassembly();
return true;
}
file->EnableReassembly();
return true;
}
bool Manager::DisableReassembly(const string& file_id)
{
File* file = LookupFile(file_id);
bool Manager::DisableReassembly(const string& file_id) {
File* file = LookupFile(file_id);
if ( ! file )
return false;
if ( ! file )
return false;
file->DisableReassembly();
return true;
}
file->DisableReassembly();
return true;
}
bool Manager::SetReassemblyBuffer(const string& file_id, uint64_t max)
{
File* file = LookupFile(file_id);
bool Manager::SetReassemblyBuffer(const string& file_id, uint64_t max) {
File* file = LookupFile(file_id);
if ( ! file )
return false;
if ( ! file )
return false;
file->SetReassemblyBuffer(max);
return true;
}
file->SetReassemblyBuffer(max);
return true;
}
bool Manager::SetExtractionLimit(const string& file_id, RecordValPtr args, uint64_t n) const
{
File* file = LookupFile(file_id);
bool Manager::SetExtractionLimit(const string& file_id, RecordValPtr args, uint64_t n) const {
File* file = LookupFile(file_id);
if ( ! file )
return false;
if ( ! file )
return false;
return file->SetExtractionLimit(std::move(args), n);
}
return file->SetExtractionLimit(std::move(args), n);
}
bool Manager::AddAnalyzer(const string& file_id, const zeek::Tag& tag, RecordValPtr args) const
{
File* file = LookupFile(file_id);
bool Manager::AddAnalyzer(const string& file_id, const zeek::Tag& tag, RecordValPtr args) const {
File* file = LookupFile(file_id);
if ( ! file )
return false;
if ( ! file )
return false;
return file->AddAnalyzer(tag, std::move(args));
}
return file->AddAnalyzer(tag, std::move(args));
}
bool Manager::RemoveAnalyzer(const string& file_id, const zeek::Tag& tag, RecordValPtr args) const
{
File* file = LookupFile(file_id);
bool Manager::RemoveAnalyzer(const string& file_id, const zeek::Tag& tag, RecordValPtr args) const {
File* file = LookupFile(file_id);
if ( ! file )
return false;
if ( ! file )
return false;
return file->RemoveAnalyzer(tag, std::move(args));
}
return file->RemoveAnalyzer(tag, std::move(args));
}
File* Manager::GetFile(const string& file_id, Connection* conn, const zeek::Tag& tag, bool is_orig,
bool update_conn, const char* source_name)
{
if ( file_id.empty() )
return nullptr;
File* Manager::GetFile(const string& file_id, Connection* conn, const zeek::Tag& tag, bool is_orig, bool update_conn,
const char* source_name) {
if ( file_id.empty() )
return nullptr;
if ( IsIgnored(file_id) )
return nullptr;
if ( IsIgnored(file_id) )
return nullptr;
File* rval = LookupFile(file_id);
File* rval = LookupFile(file_id);
if ( ! rval )
{
rval = new File(file_id, source_name ? source_name : analyzer_mgr->GetComponentName(tag),
conn, tag, is_orig);
id_map[file_id] = rval;
if ( ! rval ) {
rval = new File(file_id, source_name ? source_name : analyzer_mgr->GetComponentName(tag), conn, tag, is_orig);
id_map[file_id] = rval;
++cumulative_files;
if ( id_map.size() > max_files )
max_files = id_map.size();
++cumulative_files;
if ( id_map.size() > max_files )
max_files = id_map.size();
rval->ScheduleInactivityTimer();
rval->ScheduleInactivityTimer();
// Generate file_new after inserting it into manager's mapping
// in case script-layer calls back into core from the event.
rval->FileEvent(file_new);
// Same for file_over_new_connection.
rval->RaiseFileOverNewConnection(conn, is_orig);
// Generate file_new after inserting it into manager's mapping
// in case script-layer calls back into core from the event.
rval->FileEvent(file_new);
// Same for file_over_new_connection.
rval->RaiseFileOverNewConnection(conn, is_orig);
if ( IsIgnored(file_id) )
return nullptr;
}
else
{
rval->UpdateLastActivityTime();
if ( IsIgnored(file_id) )
return nullptr;
}
else {
rval->UpdateLastActivityTime();
if ( update_conn && rval->UpdateConnectionFields(conn, is_orig) )
rval->RaiseFileOverNewConnection(conn, is_orig);
}
if ( update_conn && rval->UpdateConnectionFields(conn, is_orig) )
rval->RaiseFileOverNewConnection(conn, is_orig);
}
return rval;
}
return rval;
}
File* Manager::LookupFile(const string& file_id) const
{
const auto& entry = id_map.find(file_id);
if ( entry == id_map.end() )
return nullptr;
File* Manager::LookupFile(const string& file_id) const {
const auto& entry = id_map.find(file_id);
if ( entry == id_map.end() )
return nullptr;
return entry->second;
}
return entry->second;
}
void Manager::Timeout(const string& file_id, bool is_terminating)
{
File* file = LookupFile(file_id);
void Manager::Timeout(const string& file_id, bool is_terminating) {
File* file = LookupFile(file_id);
if ( ! file )
return;
if ( ! file )
return;
file->postpone_timeout = false;
file->postpone_timeout = false;
file->FileEvent(file_timeout);
file->FileEvent(file_timeout);
if ( file->postpone_timeout && ! is_terminating )
{
DBG_LOG(DBG_FILE_ANALYSIS, "Postpone file analysis timeout for %s", file->GetID().c_str());
file->UpdateLastActivityTime();
file->ScheduleInactivityTimer();
return;
}
if ( file->postpone_timeout && ! is_terminating ) {
DBG_LOG(DBG_FILE_ANALYSIS, "Postpone file analysis timeout for %s", file->GetID().c_str());
file->UpdateLastActivityTime();
file->ScheduleInactivityTimer();
return;
}
DBG_LOG(DBG_FILE_ANALYSIS, "File analysis timeout for %s", file->GetID().c_str());
DBG_LOG(DBG_FILE_ANALYSIS, "File analysis timeout for %s", file->GetID().c_str());
RemoveFile(file->GetID());
}
RemoveFile(file->GetID());
}
bool Manager::IgnoreFile(const string& file_id)
{
if ( ! LookupFile(file_id) )
return false;
bool Manager::IgnoreFile(const string& file_id) {
if ( ! LookupFile(file_id) )
return false;
DBG_LOG(DBG_FILE_ANALYSIS, "Ignore FileID %s", file_id.c_str());
DBG_LOG(DBG_FILE_ANALYSIS, "Ignore FileID %s", file_id.c_str());
ignored.insert(file_id);
return true;
}
ignored.insert(file_id);
return true;
}
bool Manager::RemoveFile(const string& file_id)
{
// Can't remove from the dictionary/map right away as invoking EndOfFile
// may cause some events to be executed which actually depend on the file
// still being in the dictionary/map.
File* f = LookupFile(file_id);
bool Manager::RemoveFile(const string& file_id) {
// Can't remove from the dictionary/map right away as invoking EndOfFile
// may cause some events to be executed which actually depend on the file
// still being in the dictionary/map.
File* f = LookupFile(file_id);
if ( ! f )
return false;
if ( ! f )
return false;
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Remove file", file_id.c_str());
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Remove file", file_id.c_str());
f->EndOfFile();
id_map.erase(file_id);
ignored.erase(file_id);
delete f;
return true;
}
f->EndOfFile();
bool Manager::IsIgnored(const string& file_id)
{
return ignored.find(file_id) != ignored.end();
}
string Manager::GetFileID(const zeek::Tag& tag, Connection* c, bool is_orig)
{
current_file_id.clear();
id_map.erase(file_id);
ignored.erase(file_id);
delete f;
return true;
}
if ( IsDisabled(tag) )
return "";
if ( ! get_file_handle )
return "";
DBG_LOG(DBG_FILE_ANALYSIS, "Raise get_file_handle() for protocol analyzer %s",
analyzer_mgr->GetComponentName(tag).c_str());
const auto& tagval = tag.AsVal();
event_mgr.Enqueue(get_file_handle, tagval, c->GetVal(), val_mgr->Bool(is_orig));
event_mgr.Drain(); // need file handle immediately so we don't have to buffer data
return current_file_id;
}
bool Manager::IsDisabled(const zeek::Tag& tag)
{
if ( ! disabled )
disabled = id::find_const("Files::disable")->AsTableVal();
bool Manager::IsIgnored(const string& file_id) { return ignored.find(file_id) != ignored.end(); }
auto index = val_mgr->Count(bool(tag));
auto yield = disabled->FindOrDefault(index);
string Manager::GetFileID(const zeek::Tag& tag, Connection* c, bool is_orig) {
current_file_id.clear();
if ( ! yield )
return false;
if ( IsDisabled(tag) )
return "";
return yield->AsBool();
}
if ( ! get_file_handle )
return "";
Analyzer* Manager::InstantiateAnalyzer(const Tag& tag, RecordValPtr args, File* f) const
{
Component* c = Lookup(tag);
DBG_LOG(DBG_FILE_ANALYSIS, "Raise get_file_handle() for protocol analyzer %s",
analyzer_mgr->GetComponentName(tag).c_str());
if ( ! c )
{
reporter->InternalWarning("unknown file analyzer instantiation request: %s",
tag.AsString().c_str());
return nullptr;
}
const auto& tagval = tag.AsVal();
if ( ! c->Enabled() )
{
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Skip instantiation of disabled analyzer %s", f->id.c_str(),
GetComponentName(tag).c_str());
return nullptr;
}
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Instantiate analyzer %s", f->id.c_str(),
GetComponentName(tag).c_str());
Analyzer* a;
event_mgr.Enqueue(get_file_handle, tagval, c->GetVal(), val_mgr->Bool(is_orig));
event_mgr.Drain(); // need file handle immediately so we don't have to buffer data
return current_file_id;
}
if ( c->factory_func )
a = c->factory_func(std::move(args), f);
else
{
reporter->InternalWarning("file analyzer %s cannot be instantiated "
"dynamically",
c->CanonicalName().c_str());
return nullptr;
}
if ( ! a )
reporter->InternalError("file analyzer instantiation failed");
a->SetAnalyzerTag(tag);
return a;
}
zeek::detail::RuleMatcher::MIME_Matches*
Manager::DetectMIME(const u_char* data, uint64_t len,
zeek::detail::RuleMatcher::MIME_Matches* rval) const
{
if ( ! magic_state )
reporter->InternalError("file magic signature state not initialized");
bool Manager::IsDisabled(const zeek::Tag& tag) {
if ( ! disabled )
disabled = id::find_const("Files::disable")->AsTableVal();
rval = zeek::detail::rule_matcher->Match(magic_state, data, len, rval);
zeek::detail::rule_matcher->ClearFileMagicState(magic_state);
return rval;
}
string Manager::DetectMIME(const u_char* data, uint64_t len) const
{
zeek::detail::RuleMatcher::MIME_Matches matches;
DetectMIME(data, len, &matches);
if ( matches.empty() )
return "";
return *(matches.begin()->second.begin());
}
VectorValPtr GenMIMEMatchesVal(const zeek::detail::RuleMatcher::MIME_Matches& m)
{
static auto mime_matches = id::find_type<VectorType>("mime_matches");
static auto mime_match = id::find_type<RecordType>("mime_match");
auto rval = make_intrusive<VectorVal>(mime_matches);
auto index = val_mgr->Count(bool(tag));
auto yield = disabled->FindOrDefault(index);
for ( zeek::detail::RuleMatcher::MIME_Matches::const_iterator it = m.begin(); it != m.end();
++it )
{
auto element = make_intrusive<RecordVal>(mime_match);
for ( set<string>::const_iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2 )
{
element->Assign(0, it->first);
element->Assign(1, *it2);
}
rval->Assign(rval->Size(), std::move(element));
}
return rval;
}
} // namespace zeek::file_analysis
if ( ! yield )
return false;
return yield->AsBool();
}
Analyzer* Manager::InstantiateAnalyzer(const Tag& tag, RecordValPtr args, File* f) const {
Component* c = Lookup(tag);
if ( ! c ) {
reporter->InternalWarning("unknown file analyzer instantiation request: %s", tag.AsString().c_str());
return nullptr;
}
if ( ! c->Enabled() ) {
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Skip instantiation of disabled analyzer %s", f->id.c_str(),
GetComponentName(tag).c_str());
return nullptr;
}
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Instantiate analyzer %s", f->id.c_str(), GetComponentName(tag).c_str());
Analyzer* a;
if ( c->factory_func )
a = c->factory_func(std::move(args), f);
else {
reporter->InternalWarning(
"file analyzer %s cannot be instantiated "
"dynamically",
c->CanonicalName().c_str());
return nullptr;
}
if ( ! a )
reporter->InternalError("file analyzer instantiation failed");
a->SetAnalyzerTag(tag);
return a;
}
zeek::detail::RuleMatcher::MIME_Matches* Manager::DetectMIME(const u_char* data, uint64_t len,
zeek::detail::RuleMatcher::MIME_Matches* rval) const {
if ( ! magic_state )
reporter->InternalError("file magic signature state not initialized");
rval = zeek::detail::rule_matcher->Match(magic_state, data, len, rval);
zeek::detail::rule_matcher->ClearFileMagicState(magic_state);
return rval;
}
string Manager::DetectMIME(const u_char* data, uint64_t len) const {
zeek::detail::RuleMatcher::MIME_Matches matches;
DetectMIME(data, len, &matches);
if ( matches.empty() )
return "";
return *(matches.begin()->second.begin());
}
VectorValPtr GenMIMEMatchesVal(const zeek::detail::RuleMatcher::MIME_Matches& m) {
static auto mime_matches = id::find_type<VectorType>("mime_matches");
static auto mime_match = id::find_type<RecordType>("mime_match");
auto rval = make_intrusive<VectorVal>(mime_matches);
for ( zeek::detail::RuleMatcher::MIME_Matches::const_iterator it = m.begin(); it != m.end(); ++it ) {
auto element = make_intrusive<RecordVal>(mime_match);
for ( set<string>::const_iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2 ) {
element->Assign(0, it->first);
element->Assign(1, *it2);
}
rval->Assign(rval->Size(), std::move(element));
}
return rval;
}
} // namespace zeek::file_analysis

View file

@ -12,446 +12,436 @@
#include "zeek/file_analysis/FileTimer.h"
#include "zeek/plugin/ComponentManager.h"
namespace zeek
{
namespace zeek {
class TableVal;
class VectorVal;
namespace run_state
{
namespace run_state {
extern bool terminating;
} // namespace run_state
} // namespace run_state
namespace analyzer
{
namespace analyzer {
class Analyzer;
} // namespace analyzer
} // namespace analyzer
namespace detail
{
namespace detail {
class CompositeHash;
}
}
namespace file_analysis
{
namespace file_analysis {
class File;
/**
* Main entry point for interacting with file analysis.
*/
class Manager : public plugin::ComponentManager<Component>
{
class Manager : public plugin::ComponentManager<Component> {
public:
/**
* Constructor.
*/
Manager();
/**
* Constructor.
*/
Manager();
/**
* Destructor. Times out any currently active file analyses.
*/
~Manager();
/**
* Destructor. Times out any currently active file analyses.
*/
~Manager();
/**
* First-stage initialization of the manager. This is called early on
* during Zeek's initialization, before any scripts are processed.
*/
void InitPreScript();
/**
* First-stage initialization of the manager. This is called early on
* during Zeek's initialization, before any scripts are processed.
*/
void InitPreScript();
/**
* Second-stage initialization of the manager. This is called late
* during Zeek's initialization after any scripts are processed.
*/
void InitPostScript();
/**
* Second-stage initialization of the manager. This is called late
* during Zeek's initialization after any scripts are processed.
*/
void InitPostScript();
/**
* Initializes the state required to match against file magic signatures
* for MIME type identification.
*/
void InitMagic();
/**
* Initializes the state required to match against file magic signatures
* for MIME type identification.
*/
void InitMagic();
/**
* Times out any active file analysis to prepare for shutdown.
*/
void Terminate();
/**
* Times out any active file analysis to prepare for shutdown.
*/
void Terminate();
/**
* Creates a file identifier from a unique file handle string.
* @param handle a unique string (may contain NULs) which identifies
* a single file.
* @return a prettified MD5 hash of \a handle, truncated to *bits_per_uid* bits.
*/
std::string HashHandle(const std::string& handle) const;
/**
* Creates a file identifier from a unique file handle string.
* @param handle a unique string (may contain NULs) which identifies
* a single file.
* @return a prettified MD5 hash of \a handle, truncated to *bits_per_uid* bits.
*/
std::string HashHandle(const std::string& handle) const;
/**
* Take in a unique file handle string to identify next piece of
* incoming file data/information.
* @param handle a unique string (may contain NULs) which identifies
* a single file.
*/
void SetHandle(const std::string& handle);
/**
* Take in a unique file handle string to identify next piece of
* incoming file data/information.
* @param handle a unique string (may contain NULs) which identifies
* a single file.
*/
void SetHandle(const std::string& handle);
/**
* Pass in non-sequential file data.
* @param data pointer to start of a chunk of file data.
* @param len number of bytes in the data chunk.
* @param offset number of bytes from start of file that data chunk occurs.
* @param tag network protocol over which the file data is transferred.
* @param conn network connection over which the file data is transferred.
* @param is_orig true if the file is being sent from connection originator
* or false if is being sent in the opposite direction.
* @param precomputed_file_id may be set to a previous return value in order to
* bypass costly file handle lookups.
* @param mime_type may be set to the mime type of the file, if already known due
* to the protocol. This is, e.g., the case in TLS connections where X.509
* certificates are passed as files; here the type of the file is set by
* the protocol. If this parameter is given, MIME type detection will be
* disabled.
* This parameter only has any effect for the first DataIn call of each
* file. It is ignored for all subsequent calls.
* @return a unique file ID string which, in certain contexts, may be
* cached and passed back into a subsequent function call in order
* to avoid costly file handle lookups (which have to go through
* the \c get_file_handle script-layer event). An empty string
* indicates the associate file is not going to be analyzed further.
*/
std::string DataIn(const u_char* data, uint64_t len, uint64_t offset, const zeek::Tag& tag,
Connection* conn, bool is_orig, const std::string& precomputed_file_id = "",
const std::string& mime_type = "");
/**
* Pass in non-sequential file data.
* @param data pointer to start of a chunk of file data.
* @param len number of bytes in the data chunk.
* @param offset number of bytes from start of file that data chunk occurs.
* @param tag network protocol over which the file data is transferred.
* @param conn network connection over which the file data is transferred.
* @param is_orig true if the file is being sent from connection originator
* or false if is being sent in the opposite direction.
* @param precomputed_file_id may be set to a previous return value in order to
* bypass costly file handle lookups.
* @param mime_type may be set to the mime type of the file, if already known due
* to the protocol. This is, e.g., the case in TLS connections where X.509
* certificates are passed as files; here the type of the file is set by
* the protocol. If this parameter is given, MIME type detection will be
* disabled.
* This parameter only has any effect for the first DataIn call of each
* file. It is ignored for all subsequent calls.
* @return a unique file ID string which, in certain contexts, may be
* cached and passed back into a subsequent function call in order
* to avoid costly file handle lookups (which have to go through
* the \c get_file_handle script-layer event). An empty string
* indicates the associate file is not going to be analyzed further.
*/
std::string DataIn(const u_char* data, uint64_t len, uint64_t offset, const zeek::Tag& tag, Connection* conn,
bool is_orig, const std::string& precomputed_file_id = "", const std::string& mime_type = "");
/**
* Pass in sequential file data.
* @param data pointer to start of a chunk of file data.
* @param len number of bytes in the data chunk.
* @param tag network protocol over which the file data is transferred.
* @param conn network connection over which the file data is transferred.
* @param is_orig true if the file is being sent from connection originator
* or false if is being sent in the opposite direction.
* @param precomputed_file_id may be set to a previous return value in order to
* bypass costly file handle lookups.
* @param mime_type may be set to the mime type of the file, if already known due
* to the protocol. This is, e.g., the case in TLS connections where X.509
* certificates are passed as files; here the type of the file is set by
* the protocol. If this parameter is give, mime type detection will be
* disabled.
* This parameter is only used for the first bit of data for each file.
* @return a unique file ID string which, in certain contexts, may be
* cached and passed back into a subsequent function call in order
* to avoid costly file handle lookups (which have to go through
* the \c get_file_handle script-layer event). An empty string
* indicates the associated file is not going to be analyzed further.
*/
std::string DataIn(const u_char* data, uint64_t len, const zeek::Tag& tag, Connection* conn,
bool is_orig, const std::string& precomputed_file_id = "",
const std::string& mime_type = "");
/**
* Pass in sequential file data.
* @param data pointer to start of a chunk of file data.
* @param len number of bytes in the data chunk.
* @param tag network protocol over which the file data is transferred.
* @param conn network connection over which the file data is transferred.
* @param is_orig true if the file is being sent from connection originator
* or false if is being sent in the opposite direction.
* @param precomputed_file_id may be set to a previous return value in order to
* bypass costly file handle lookups.
* @param mime_type may be set to the mime type of the file, if already known due
* to the protocol. This is, e.g., the case in TLS connections where X.509
* certificates are passed as files; here the type of the file is set by
* the protocol. If this parameter is give, mime type detection will be
* disabled.
* This parameter is only used for the first bit of data for each file.
* @return a unique file ID string which, in certain contexts, may be
* cached and passed back into a subsequent function call in order
* to avoid costly file handle lookups (which have to go through
* the \c get_file_handle script-layer event). An empty string
* indicates the associated file is not going to be analyzed further.
*/
std::string DataIn(const u_char* data, uint64_t len, const zeek::Tag& tag, Connection* conn, bool is_orig,
const std::string& precomputed_file_id = "", const std::string& mime_type = "");
/**
* Pass in sequential file data from external source (e.g. input framework).
* @param data pointer to start of a chunk of file data.
* @param len number of bytes in the data chunk.
* @param file_id an identifier for the file (usually a hash of \a source).
* @param source uniquely identifies the file and should also describe
* in human-readable form where the file input is coming from (e.g.
* a local file path).
* @param mime_type may be set to the mime type of the file, if already known due
* to the protocol. This is, e.g., the case in TLS connections where X.509
* certificates are passed as files; here the type of the file is set by
* the protocol. If this parameter is give, mime type detection will be
* disabled.
* This parameter is only used for the first bit of data for each file.
*/
void DataIn(const u_char* data, uint64_t len, const std::string& file_id,
const std::string& source, const std::string& mime_type = "");
/**
* Pass in sequential file data from external source (e.g. input framework).
* @param data pointer to start of a chunk of file data.
* @param len number of bytes in the data chunk.
* @param file_id an identifier for the file (usually a hash of \a source).
* @param source uniquely identifies the file and should also describe
* in human-readable form where the file input is coming from (e.g.
* a local file path).
* @param mime_type may be set to the mime type of the file, if already known due
* to the protocol. This is, e.g., the case in TLS connections where X.509
* certificates are passed as files; here the type of the file is set by
* the protocol. If this parameter is give, mime type detection will be
* disabled.
* This parameter is only used for the first bit of data for each file.
*/
void DataIn(const u_char* data, uint64_t len, const std::string& file_id, const std::string& source,
const std::string& mime_type = "");
/**
* Pass in sequential file data from external source (e.g. input framework).
* @param data pointer to start of a chunk of file data.
* @param len number of bytes in the data chunk.
* @param offset number of bytes from start of file that data chunk occurs.
* @param file_id an identifier for the file (usually a hash of \a source).
* @param source uniquely identifies the file and should also describe
* in human-readable form where the file input is coming from (e.g.
* a local file path).
* @param mime_type may be set to the mime type of the file, if already known due
* to the protocol. This is, e.g., the case in TLS connections where X.509
* certificates are passed as files; here the type of the file is set by
* the protocol. If this parameter is give, mime type detection will be
* disabled.
* This parameter is only used for the first bit of data for each file.
*/
void DataIn(const u_char* data, uint64_t len, uint64_t offset, const std::string& file_id,
const std::string& source, const std::string& mime_type = "");
/**
* Pass in sequential file data from external source (e.g. input framework).
* @param data pointer to start of a chunk of file data.
* @param len number of bytes in the data chunk.
* @param offset number of bytes from start of file that data chunk occurs.
* @param file_id an identifier for the file (usually a hash of \a source).
* @param source uniquely identifies the file and should also describe
* in human-readable form where the file input is coming from (e.g.
* a local file path).
* @param mime_type may be set to the mime type of the file, if already known due
* to the protocol. This is, e.g., the case in TLS connections where X.509
* certificates are passed as files; here the type of the file is set by
* the protocol. If this parameter is give, mime type detection will be
* disabled.
* This parameter is only used for the first bit of data for each file.
*/
void DataIn(const u_char* data, uint64_t len, uint64_t offset, const std::string& file_id,
const std::string& source, const std::string& mime_type = "");
/**
* Signal the end of file data regardless of which direction it is being
* sent over the connection.
* @param tag network protocol over which the file data is transferred.
* @param conn network connection over which the file data is transferred.
*/
void EndOfFile(const zeek::Tag& tag, Connection* conn);
/**
* Signal the end of file data regardless of which direction it is being
* sent over the connection.
* @param tag network protocol over which the file data is transferred.
* @param conn network connection over which the file data is transferred.
*/
void EndOfFile(const zeek::Tag& tag, Connection* conn);
/**
* Signal the end of file data being transferred over a connection in
* a particular direction.
* @param tag network protocol over which the file data is transferred.
* @param conn network connection over which the file data is transferred.
*/
void EndOfFile(const zeek::Tag& tag, Connection* conn, bool is_orig);
/**
* Signal the end of file data being transferred over a connection in
* a particular direction.
* @param tag network protocol over which the file data is transferred.
* @param conn network connection over which the file data is transferred.
*/
void EndOfFile(const zeek::Tag& tag, Connection* conn, bool is_orig);
/**
* Signal the end of file data being transferred using the file identifier.
* @param file_id the file identifier/hash.
*/
void EndOfFile(const std::string& file_id);
/**
* Signal the end of file data being transferred using the file identifier.
* @param file_id the file identifier/hash.
*/
void EndOfFile(const std::string& file_id);
/**
* Signal a gap in the file data stream.
* @param offset number of bytes into file at which missing chunk starts.
* @param len length in bytes of the missing chunk of file data.
* @param tag network protocol over which the file data is transferred.
* @param conn network connection over which the file data is transferred.
* @param is_orig true if the file is being sent from connection originator
* or false if is being sent in the opposite direction.
* @param precomputed_file_id may be set to a previous return value in order to
* bypass costly file handle lookups.
* @return a unique file ID string which, in certain contexts, may be
* cached and passed back into a subsequent function call in order
* to avoid costly file handle lookups (which have to go through
* the \c get_file_handle script-layer event). An empty string
* indicates the associate file is not going to be analyzed further.
*/
std::string Gap(uint64_t offset, uint64_t len, const zeek::Tag& tag, Connection* conn,
bool is_orig, const std::string& precomputed_file_id = "");
/**
* Signal a gap in the file data stream.
* @param offset number of bytes into file at which missing chunk starts.
* @param len length in bytes of the missing chunk of file data.
* @param tag network protocol over which the file data is transferred.
* @param conn network connection over which the file data is transferred.
* @param is_orig true if the file is being sent from connection originator
* or false if is being sent in the opposite direction.
* @param precomputed_file_id may be set to a previous return value in order to
* bypass costly file handle lookups.
* @return a unique file ID string which, in certain contexts, may be
* cached and passed back into a subsequent function call in order
* to avoid costly file handle lookups (which have to go through
* the \c get_file_handle script-layer event). An empty string
* indicates the associate file is not going to be analyzed further.
*/
std::string Gap(uint64_t offset, uint64_t len, const zeek::Tag& tag, Connection* conn, bool is_orig,
const std::string& precomputed_file_id = "");
/**
* Provide the expected number of bytes that comprise a file.
* @param size the number of bytes in the full file.
* @param tag network protocol over which the file data is transferred.
* @param conn network connection over which the file data is transferred.
* @param is_orig true if the file is being sent from connection originator
* or false if is being sent in the opposite direction.
* @param precomputed_file_id may be set to a previous return value in order to
* bypass costly file handle lookups.
* @return a unique file ID string which, in certain contexts, may be
* cached and passed back into a subsequent function call in order
* to avoid costly file handle lookups (which have to go through
* the \c get_file_handle script-layer event). An empty string
* indicates the associate file is not going to be analyzed further.
*/
std::string SetSize(uint64_t size, const zeek::Tag& tag, Connection* conn, bool is_orig,
const std::string& precomputed_file_id = "");
/**
* Provide the expected number of bytes that comprise a file.
* @param size the number of bytes in the full file.
* @param tag network protocol over which the file data is transferred.
* @param conn network connection over which the file data is transferred.
* @param is_orig true if the file is being sent from connection originator
* or false if is being sent in the opposite direction.
* @param precomputed_file_id may be set to a previous return value in order to
* bypass costly file handle lookups.
* @return a unique file ID string which, in certain contexts, may be
* cached and passed back into a subsequent function call in order
* to avoid costly file handle lookups (which have to go through
* the \c get_file_handle script-layer event). An empty string
* indicates the associate file is not going to be analyzed further.
*/
std::string SetSize(uint64_t size, const zeek::Tag& tag, Connection* conn, bool is_orig,
const std::string& precomputed_file_id = "");
/**
* Starts ignoring a file, which will finally be removed from internal
* mappings on EOF or TIMEOUT.
* @param file_id the file identifier/hash.
* @return false if file identifier did not map to anything, else true.
*/
bool IgnoreFile(const std::string& file_id);
/**
* Starts ignoring a file, which will finally be removed from internal
* mappings on EOF or TIMEOUT.
* @param file_id the file identifier/hash.
* @return false if file identifier did not map to anything, else true.
*/
bool IgnoreFile(const std::string& file_id);
/**
* Set's an inactivity threshold for the file.
* @param file_id the file identifier/hash.
* @param interval the amount of time in which no activity is seen for
* the file identified by \a file_id that will cause the file
* to be considered stale, timed out, and then resource reclaimed.
* @return false if file identifier did not map to anything, else true.
*/
bool SetTimeoutInterval(const std::string& file_id, double interval) const;
/**
* Set's an inactivity threshold for the file.
* @param file_id the file identifier/hash.
* @param interval the amount of time in which no activity is seen for
* the file identified by \a file_id that will cause the file
* to be considered stale, timed out, and then resource reclaimed.
* @return false if file identifier did not map to anything, else true.
*/
bool SetTimeoutInterval(const std::string& file_id, double interval) const;
/**
* Enable the reassembler for a file.
*/
bool EnableReassembly(const std::string& file_id);
/**
* Enable the reassembler for a file.
*/
bool EnableReassembly(const std::string& file_id);
/**
* Disable the reassembler for a file.
*/
bool DisableReassembly(const std::string& file_id);
/**
* Disable the reassembler for a file.
*/
bool DisableReassembly(const std::string& file_id);
/**
* Set the reassembly for a file in bytes.
*/
bool SetReassemblyBuffer(const std::string& file_id, uint64_t max);
/**
* Set the reassembly for a file in bytes.
*/
bool SetReassemblyBuffer(const std::string& file_id, uint64_t max);
/**
* Sets a limit on the maximum size allowed for extracting the file
* to local disk;
* @param file_id the file identifier/hash.
* @param args a \c AnalyzerArgs value which describes a file analyzer,
* which should be a file extraction analyzer.
* @param n the new extraction limit, in bytes.
* @return false if file identifier and analyzer did not map to anything,
* else true.
*/
bool SetExtractionLimit(const std::string& file_id, RecordValPtr args, uint64_t n) const;
/**
* Sets a limit on the maximum size allowed for extracting the file
* to local disk;
* @param file_id the file identifier/hash.
* @param args a \c AnalyzerArgs value which describes a file analyzer,
* which should be a file extraction analyzer.
* @param n the new extraction limit, in bytes.
* @return false if file identifier and analyzer did not map to anything,
* else true.
*/
bool SetExtractionLimit(const std::string& file_id, RecordValPtr args, uint64_t n) const;
/**
* Try to retrieve a file that's being analyzed, using its identifier/hash.
* @param file_id the file identifier/hash.
* @return the File object mapped to \a file_id, or a null pointer if no
* mapping exists.
*/
File* LookupFile(const std::string& file_id) const;
/**
* Try to retrieve a file that's being analyzed, using its identifier/hash.
* @param file_id the file identifier/hash.
* @return the File object mapped to \a file_id, or a null pointer if no
* mapping exists.
*/
File* LookupFile(const std::string& file_id) const;
/**
* Queue attachment of an analyzer to the file identifier. Multiple
* analyzers of a given type can be attached per file identifier at a time
* as long as the arguments differ.
* @param file_id the file identifier/hash.
* @param tag the analyzer tag of the file analyzer to add.
* @param args a \c AnalyzerArgs value which describes a file analyzer.
* @return false if the analyzer failed to be instantiated, else true.
*/
bool AddAnalyzer(const std::string& file_id, const zeek::Tag& tag, RecordValPtr args) const;
/**
* Queue attachment of an analyzer to the file identifier. Multiple
* analyzers of a given type can be attached per file identifier at a time
* as long as the arguments differ.
* @param file_id the file identifier/hash.
* @param tag the analyzer tag of the file analyzer to add.
* @param args a \c AnalyzerArgs value which describes a file analyzer.
* @return false if the analyzer failed to be instantiated, else true.
*/
bool AddAnalyzer(const std::string& file_id, const zeek::Tag& tag, RecordValPtr args) const;
/**
* Queue removal of an analyzer for a given file identifier.
* @param file_id the file identifier/hash.
* @param tag the analyzer tag of the file analyzer to remove.
* @param args a \c AnalyzerArgs value which describes a file analyzer.
* @return true if the analyzer is active at the time of call, else false.
*/
bool RemoveAnalyzer(const std::string& file_id, const zeek::Tag& tag, RecordValPtr args) const;
/**
* Queue removal of an analyzer for a given file identifier.
* @param file_id the file identifier/hash.
* @param tag the analyzer tag of the file analyzer to remove.
* @param args a \c AnalyzerArgs value which describes a file analyzer.
* @return true if the analyzer is active at the time of call, else false.
*/
bool RemoveAnalyzer(const std::string& file_id, const zeek::Tag& tag, RecordValPtr args) const;
/**
* Tells whether analysis for a file is active or ignored.
* @param file_id the file identifier/hash.
* @return whether the file mapped to \a file_id is being ignored.
*/
bool IsIgnored(const std::string& file_id);
/**
* Tells whether analysis for a file is active or ignored.
* @param file_id the file identifier/hash.
* @return whether the file mapped to \a file_id is being ignored.
*/
bool IsIgnored(const std::string& file_id);
/**
* Instantiates a new file analyzer instance for the file.
* @param tag The file analyzer's tag.
* @param args The file analyzer argument/option values.
* @param f The file analyzer is to be associated with.
* @return The new analyzer instance or null if tag is invalid.
*/
Analyzer* InstantiateAnalyzer(const Tag& tag, RecordValPtr args, File* f) const;
/**
* Instantiates a new file analyzer instance for the file.
* @param tag The file analyzer's tag.
* @param args The file analyzer argument/option values.
* @param f The file analyzer is to be associated with.
* @return The new analyzer instance or null if tag is invalid.
*/
Analyzer* InstantiateAnalyzer(const Tag& tag, RecordValPtr args, File* f) const;
/**
* Returns a set of all matching MIME magic signatures for a given
* chunk of data.
* @param data A chunk of bytes to match magic MIME signatures against.
* @param len The number of bytes in \a data.
* @param rval An optional preexisting structure in which to insert
* new matches. If it's a null pointer, an object is
* allocated and returned from the method.
* @return Set of all matching file magic signatures, which may be
* an object allocated by the method if \a rval is a null pointer.
*/
zeek::detail::RuleMatcher::MIME_Matches*
DetectMIME(const u_char* data, uint64_t len,
zeek::detail::RuleMatcher::MIME_Matches* rval) const;
/**
* Returns a set of all matching MIME magic signatures for a given
* chunk of data.
* @param data A chunk of bytes to match magic MIME signatures against.
* @param len The number of bytes in \a data.
* @param rval An optional preexisting structure in which to insert
* new matches. If it's a null pointer, an object is
* allocated and returned from the method.
* @return Set of all matching file magic signatures, which may be
* an object allocated by the method if \a rval is a null pointer.
*/
zeek::detail::RuleMatcher::MIME_Matches* DetectMIME(const u_char* data, uint64_t len,
zeek::detail::RuleMatcher::MIME_Matches* rval) const;
/**
* Returns the strongest MIME magic signature match for a given data chunk.
* @param data A chunk of bytes to match magic MIME signatures against.
* @param len The number of bytes in \a data.
* @returns The MIME type string of the strongest file magic signature
* match, or an empty string if nothing matched.
*/
std::string DetectMIME(const u_char* data, uint64_t len) const;
/**
* Returns the strongest MIME magic signature match for a given data chunk.
* @param data A chunk of bytes to match magic MIME signatures against.
* @param len The number of bytes in \a data.
* @returns The MIME type string of the strongest file magic signature
* match, or an empty string if nothing matched.
*/
std::string DetectMIME(const u_char* data, uint64_t len) const;
uint64_t CurrentFiles() { return id_map.size(); }
uint64_t CurrentFiles() { return id_map.size(); }
uint64_t MaxFiles() { return max_files; }
uint64_t MaxFiles() { return max_files; }
uint64_t CumulativeFiles() { return cumulative_files; }
uint64_t CumulativeFiles() { return cumulative_files; }
zeek::detail::CompositeHash* GetAnalyzerHash() const { return analyzer_hash; }
zeek::detail::CompositeHash* GetAnalyzerHash() const { return analyzer_hash; }
protected:
friend class detail::FileTimer;
friend class detail::FileTimer;
/**
* Create a new file to be analyzed or retrieve an existing one.
* @param file_id the file identifier/hash.
* @param conn network connection, if any, over which the file is
* transferred.
* @param tag network protocol, if any, over which the file is transferred.
* @param is_orig true if the file is being sent from connection originator
* or false if is being sent in the opposite direction (or if it
* this file isn't related to a connection).
* @param update_conn whether we need to update connection-related field
* in the \c fa_file record value associated with the file.
* @param an optional value of the source field to fill in.
* @return the File object mapped to \a file_id or a null pointer if
* analysis is being ignored for the associated file. An File
* object may be created if a mapping doesn't exist, and if it did
* exist, the activity time is refreshed along with any
* connection-related fields.
*/
File* GetFile(const std::string& file_id, Connection* conn = nullptr,
const zeek::Tag& tag = zeek::Tag::Error, bool is_orig = false,
bool update_conn = true, const char* source_name = nullptr);
/**
* Create a new file to be analyzed or retrieve an existing one.
* @param file_id the file identifier/hash.
* @param conn network connection, if any, over which the file is
* transferred.
* @param tag network protocol, if any, over which the file is transferred.
* @param is_orig true if the file is being sent from connection originator
* or false if is being sent in the opposite direction (or if it
* this file isn't related to a connection).
* @param update_conn whether we need to update connection-related field
* in the \c fa_file record value associated with the file.
* @param an optional value of the source field to fill in.
* @return the File object mapped to \a file_id or a null pointer if
* analysis is being ignored for the associated file. An File
* object may be created if a mapping doesn't exist, and if it did
* exist, the activity time is refreshed along with any
* connection-related fields.
*/
File* GetFile(const std::string& file_id, Connection* conn = nullptr, const zeek::Tag& tag = zeek::Tag::Error,
bool is_orig = false, bool update_conn = true, const char* source_name = nullptr);
/**
* Evaluate timeout policy for a file and remove the File object mapped to
* \a file_id if needed.
* @param file_id the file identifier/hash.
* @param is_termination whether the Manager (and probably Zeek) is in a
* terminating state. If true, then the timeout cannot be postponed.
*/
void Timeout(const std::string& file_id, bool is_terminating = run_state::terminating);
/**
* Evaluate timeout policy for a file and remove the File object mapped to
* \a file_id if needed.
* @param file_id the file identifier/hash.
* @param is_termination whether the Manager (and probably Zeek) is in a
* terminating state. If true, then the timeout cannot be postponed.
*/
void Timeout(const std::string& file_id, bool is_terminating = run_state::terminating);
/**
* Immediately remove file_analysis::File object associated with \a file_id.
* @param file_id the file identifier/hash.
* @return false if file id string did not map to anything, else true.
*/
bool RemoveFile(const std::string& file_id);
/**
* Immediately remove file_analysis::File object associated with \a file_id.
* @param file_id the file identifier/hash.
* @return false if file id string did not map to anything, else true.
*/
bool RemoveFile(const std::string& file_id);
/**
* Sets #current_file_id to a hash of a unique file handle string based on
* what the \c get_file_handle event derives from the connection params.
* Event queue is flushed so that we can get the handle value immediately.
* @param tag network protocol over which the file is transferred.
* @param conn network connection over which the file is transferred.
* @param is_orig true if the file is being sent from connection originator
* or false if is being sent in the opposite direction.
* @return #current_file_id, which is a hash of a unique file handle string
* set by a \c get_file_handle event handler.
*/
std::string GetFileID(const zeek::Tag& tag, Connection* c, bool is_orig);
/**
* Sets #current_file_id to a hash of a unique file handle string based on
* what the \c get_file_handle event derives from the connection params.
* Event queue is flushed so that we can get the handle value immediately.
* @param tag network protocol over which the file is transferred.
* @param conn network connection over which the file is transferred.
* @param is_orig true if the file is being sent from connection originator
* or false if is being sent in the opposite direction.
* @return #current_file_id, which is a hash of a unique file handle string
* set by a \c get_file_handle event handler.
*/
std::string GetFileID(const zeek::Tag& tag, Connection* c, bool is_orig);
/**
* Check if analysis is available for files transferred over a given
* network protocol.
* @param tag the network protocol over which files can be transferred and
* analyzed by the file analysis framework.
* @return whether file analysis is disabled for the analyzer given by
* \a tag.
*/
static bool IsDisabled(const zeek::Tag& tag);
/**
* Check if analysis is available for files transferred over a given
* network protocol.
* @param tag the network protocol over which files can be transferred and
* analyzed by the file analysis framework.
* @return whether file analysis is disabled for the analyzer given by
* \a tag.
*/
static bool IsDisabled(const zeek::Tag& tag);
private:
using TagSet = std::set<Tag>;
using MIMEMap = std::map<std::string, TagSet*>;
using TagSet = std::set<Tag>;
using MIMEMap = std::map<std::string, TagSet*>;
TagSet* LookupMIMEType(const std::string& mtype, bool add_if_not_found);
TagSet* LookupMIMEType(const std::string& mtype, bool add_if_not_found);
std::map<std::string, File*> id_map; /**< Map file ID to file_analysis::File records. */
std::set<std::string> ignored; /**< Ignored files. Will be finally removed on EOF. */
std::string current_file_id; /**< Hash of what get_file_handle event sets. */
zeek::detail::RuleFileMagicState* magic_state; /**< File magic signature match state. */
MIMEMap mime_types; /**< Mapping of MIME types to analyzers. */
std::map<std::string, File*> id_map; /**< Map file ID to file_analysis::File records. */
std::set<std::string> ignored; /**< Ignored files. Will be finally removed on EOF. */
std::string current_file_id; /**< Hash of what get_file_handle event sets. */
zeek::detail::RuleFileMagicState* magic_state; /**< File magic signature match state. */
MIMEMap mime_types; /**< Mapping of MIME types to analyzers. */
inline static TableVal* disabled = nullptr; /**< Table of disabled analyzers. */
inline static TableType* tag_set_type = nullptr; /**< Type for set[tag]. */
inline static TableVal* disabled = nullptr; /**< Table of disabled analyzers. */
inline static TableType* tag_set_type = nullptr; /**< Type for set[tag]. */
size_t cumulative_files;
size_t max_files;
size_t cumulative_files;
size_t max_files;
zeek::detail::CompositeHash* analyzer_hash = nullptr;
};
zeek::detail::CompositeHash* analyzer_hash = nullptr;
};
/**
* Returns a script-layer value corresponding to the \c mime_matches type.
@ -459,8 +449,8 @@ private:
*/
VectorValPtr GenMIMEMatchesVal(const zeek::detail::RuleMatcher::MIME_Matches& m);
} // namespace file_analysis
} // namespace file_analysis
extern file_analysis::Manager* file_mgr;
} // namespace zeek
} // namespace zeek

View file

@ -10,57 +10,49 @@
#include "zeek/file_analysis/Manager.h"
#include "zeek/util.h"
namespace zeek::file_analysis::detail
{
namespace zeek::file_analysis::detail {
DataEvent::DataEvent(RecordValPtr args, file_analysis::File* file, EventHandlerPtr ce,
EventHandlerPtr se)
: file_analysis::Analyzer(file_mgr->GetComponentTag("DATA_EVENT"), std::move(args), file),
chunk_event(ce), stream_event(se)
{
}
DataEvent::DataEvent(RecordValPtr args, file_analysis::File* file, EventHandlerPtr ce, EventHandlerPtr se)
: file_analysis::Analyzer(file_mgr->GetComponentTag("DATA_EVENT"), std::move(args), file),
chunk_event(ce),
stream_event(se) {}
file_analysis::Analyzer* DataEvent::Instantiate(RecordValPtr args, file_analysis::File* file)
{
const auto& chunk_val = args->GetField("chunk_event");
const auto& stream_val = args->GetField("stream_event");
file_analysis::Analyzer* DataEvent::Instantiate(RecordValPtr args, file_analysis::File* file) {
const auto& chunk_val = args->GetField("chunk_event");
const auto& stream_val = args->GetField("stream_event");
if ( ! chunk_val && ! stream_val )
return nullptr;
if ( ! chunk_val && ! stream_val )
return nullptr;
EventHandlerPtr chunk;
EventHandlerPtr stream;
EventHandlerPtr chunk;
EventHandlerPtr stream;
if ( chunk_val )
chunk = event_registry->Lookup(chunk_val->AsFunc()->Name());
if ( chunk_val )
chunk = event_registry->Lookup(chunk_val->AsFunc()->Name());
if ( stream_val )
stream = event_registry->Lookup(stream_val->AsFunc()->Name());
if ( stream_val )
stream = event_registry->Lookup(stream_val->AsFunc()->Name());
return new DataEvent(std::move(args), file, chunk, stream);
}
return new DataEvent(std::move(args), file, chunk, stream);
}
bool DataEvent::DeliverChunk(const u_char* data, uint64_t len, uint64_t offset)
{
if ( ! chunk_event )
return true;
bool DataEvent::DeliverChunk(const u_char* data, uint64_t len, uint64_t offset) {
if ( ! chunk_event )
return true;
event_mgr.Enqueue(chunk_event, GetFile()->ToVal(),
make_intrusive<StringVal>(new String(data, len, false)),
val_mgr->Count(offset));
event_mgr.Enqueue(chunk_event, GetFile()->ToVal(), make_intrusive<StringVal>(new String(data, len, false)),
val_mgr->Count(offset));
return true;
}
return true;
}
bool DataEvent::DeliverStream(const u_char* data, uint64_t len)
{
if ( ! stream_event )
return true;
bool DataEvent::DeliverStream(const u_char* data, uint64_t len) {
if ( ! stream_event )
return true;
event_mgr.Enqueue(stream_event, GetFile()->ToVal(),
make_intrusive<StringVal>(new String(data, len, false)));
event_mgr.Enqueue(stream_event, GetFile()->ToVal(), make_intrusive<StringVal>(new String(data, len, false)));
return true;
}
return true;
}
} // namespace zeek::file_analysis::detail
} // namespace zeek::file_analysis::detail

View file

@ -9,58 +9,56 @@
#include "zeek/file_analysis/Analyzer.h"
#include "zeek/file_analysis/File.h"
namespace zeek::file_analysis::detail
{
namespace zeek::file_analysis::detail {
/**
* An analyzer to send file data to script-layer via events.
*/
class DataEvent : public file_analysis::Analyzer
{
class DataEvent : public file_analysis::Analyzer {
public:
/**
* Generates the event, if any, specified by the "chunk_event" field of this
* analyzer's \c AnalyzerArgs. This is for non-sequential file data input.
* @param data pointer to start of file data chunk.
* @param len number of bytes in the data chunk.
* @param offset number of bytes from start of file at which chunk occurs.
* @return always true
*/
bool DeliverChunk(const u_char* data, uint64_t len, uint64_t offset) override;
/**
* Generates the event, if any, specified by the "chunk_event" field of this
* analyzer's \c AnalyzerArgs. This is for non-sequential file data input.
* @param data pointer to start of file data chunk.
* @param len number of bytes in the data chunk.
* @param offset number of bytes from start of file at which chunk occurs.
* @return always true
*/
bool DeliverChunk(const u_char* data, uint64_t len, uint64_t offset) override;
/**
* Generates the event, if any, specified by the "stream_event" field of
* this analyzer's \c AnalyzerArgs. This is for sequential file data input.
* @param data pointer to start of file data chunk.
* @param len number of bytes in the data chunk.
* @return always true
*/
bool DeliverStream(const u_char* data, uint64_t len) override;
/**
* Generates the event, if any, specified by the "stream_event" field of
* this analyzer's \c AnalyzerArgs. This is for sequential file data input.
* @param data pointer to start of file data chunk.
* @param len number of bytes in the data chunk.
* @return always true
*/
bool DeliverStream(const u_char* data, uint64_t len) override;
/**
* Create a new instance of a DataEvent analyzer.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
* @return the new DataEvent analyzer instance or a null pointer if
* no "chunk_event" or "stream_event" field was specified in \a args.
*/
static file_analysis::Analyzer* Instantiate(RecordValPtr args, file_analysis::File* file);
/**
* Create a new instance of a DataEvent analyzer.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
* @return the new DataEvent analyzer instance or a null pointer if
* no "chunk_event" or "stream_event" field was specified in \a args.
*/
static file_analysis::Analyzer* Instantiate(RecordValPtr args, file_analysis::File* file);
protected:
/**
* Constructor.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
* @param ce pointer to event handler which will be called to receive
* non-sequential file data.
* @param se pointer to event handler which will be called to receive
* sequential file data.
*/
DataEvent(RecordValPtr args, file_analysis::File* file, EventHandlerPtr ce, EventHandlerPtr se);
/**
* Constructor.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
* @param ce pointer to event handler which will be called to receive
* non-sequential file data.
* @param se pointer to event handler which will be called to receive
* sequential file data.
*/
DataEvent(RecordValPtr args, file_analysis::File* file, EventHandlerPtr ce, EventHandlerPtr se);
private:
EventHandlerPtr chunk_event;
EventHandlerPtr stream_event;
};
EventHandlerPtr chunk_event;
EventHandlerPtr stream_event;
};
} // namespace zeek::file_analysis::detail
} // namespace zeek::file_analysis::detail

View file

@ -5,22 +5,19 @@
#include "zeek/file_analysis/Component.h"
#include "zeek/file_analysis/analyzer/data_event/DataEvent.h"
namespace zeek::plugin::detail::Zeek_FileDataEvent
{
namespace zeek::plugin::detail::Zeek_FileDataEvent {
class Plugin : public zeek::plugin::Plugin
{
class Plugin : public zeek::plugin::Plugin {
public:
zeek::plugin::Configuration Configure() override
{
AddComponent(new zeek::file_analysis::Component(
"DATA_EVENT", zeek::file_analysis::detail::DataEvent::Instantiate));
zeek::plugin::Configuration Configure() override {
AddComponent(
new zeek::file_analysis::Component("DATA_EVENT", zeek::file_analysis::detail::DataEvent::Instantiate));
zeek::plugin::Configuration config;
config.name = "Zeek::FileDataEvent";
config.description = "Delivers file content";
return config;
}
} plugin;
zeek::plugin::Configuration config;
config.name = "Zeek::FileDataEvent";
config.description = "Delivers file content";
return config;
}
} plugin;
} // namespace zeek::plugin::detail::Zeek_FileDataEvent
} // namespace zeek::plugin::detail::Zeek_FileDataEvent

View file

@ -8,67 +8,55 @@
#include "zeek/file_analysis/Manager.h"
#include "zeek/util.h"
namespace zeek::file_analysis::detail
{
namespace zeek::file_analysis::detail {
Entropy::Entropy(RecordValPtr args, file_analysis::File* file)
: file_analysis::Analyzer(file_mgr->GetComponentTag("ENTROPY"), std::move(args), file)
{
entropy = new EntropyVal;
fed = false;
}
: file_analysis::Analyzer(file_mgr->GetComponentTag("ENTROPY"), std::move(args), file) {
entropy = new EntropyVal;
fed = false;
}
Entropy::~Entropy()
{
Unref(entropy);
}
Entropy::~Entropy() { Unref(entropy); }
file_analysis::Analyzer* Entropy::Instantiate(RecordValPtr args, file_analysis::File* file)
{
return new Entropy(std::move(args), file);
}
file_analysis::Analyzer* Entropy::Instantiate(RecordValPtr args, file_analysis::File* file) {
return new Entropy(std::move(args), file);
}
bool Entropy::DeliverStream(const u_char* data, uint64_t len)
{
if ( ! fed )
fed = len > 0;
bool Entropy::DeliverStream(const u_char* data, uint64_t len) {
if ( ! fed )
fed = len > 0;
entropy->Feed(data, len);
return true;
}
entropy->Feed(data, len);
return true;
}
bool Entropy::EndOfFile()
{
Finalize();
return false;
}
bool Entropy::EndOfFile() {
Finalize();
return false;
}
bool Entropy::Undelivered(uint64_t offset, uint64_t len)
{
return false;
}
bool Entropy::Undelivered(uint64_t offset, uint64_t len) { return false; }
void Entropy::Finalize()
{
if ( ! fed )
return;
void Entropy::Finalize() {
if ( ! fed )
return;
if ( ! file_entropy )
return;
if ( ! file_entropy )
return;
double montepi, scc, ent, mean, chisq;
montepi = scc = ent = mean = chisq = 0.0;
entropy->Get(&ent, &chisq, &mean, &montepi, &scc);
double montepi, scc, ent, mean, chisq;
montepi = scc = ent = mean = chisq = 0.0;
entropy->Get(&ent, &chisq, &mean, &montepi, &scc);
static auto entropy_test_result = id::find_type<RecordType>("entropy_test_result");
auto ent_result = make_intrusive<RecordVal>(entropy_test_result);
ent_result->Assign(0, ent);
ent_result->Assign(1, chisq);
ent_result->Assign(2, mean);
ent_result->Assign(3, montepi);
ent_result->Assign(4, scc);
static auto entropy_test_result = id::find_type<RecordType>("entropy_test_result");
auto ent_result = make_intrusive<RecordVal>(entropy_test_result);
ent_result->Assign(0, ent);
ent_result->Assign(1, chisq);
ent_result->Assign(2, mean);
ent_result->Assign(3, montepi);
ent_result->Assign(4, scc);
event_mgr.Enqueue(file_entropy, GetFile()->ToVal(), std::move(ent_result));
}
event_mgr.Enqueue(file_entropy, GetFile()->ToVal(), std::move(ent_result));
}
} // namespace zeek::file_analysis::detail
} // namespace zeek::file_analysis::detail

View file

@ -10,71 +10,69 @@
#include "zeek/file_analysis/File.h"
#include "zeek/file_analysis/analyzer/entropy/events.bif.h"
namespace zeek::file_analysis::detail
{
namespace zeek::file_analysis::detail {
/**
* An analyzer to produce entropy of file contents.
*/
class Entropy : public file_analysis::Analyzer
{
class Entropy : public file_analysis::Analyzer {
public:
/**
* Destructor.
*/
~Entropy() override;
/**
* Destructor.
*/
~Entropy() override;
/**
* Create a new instance of an Entropy analyzer.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
* @return the new Entropy analyzer instance or a null pointer if the
* the "extraction_file" field of \a args wasn't set.
*/
static file_analysis::Analyzer* Instantiate(RecordValPtr args, file_analysis::File* file);
/**
* Create a new instance of an Entropy analyzer.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
* @return the new Entropy analyzer instance or a null pointer if the
* the "extraction_file" field of \a args wasn't set.
*/
static file_analysis::Analyzer* Instantiate(RecordValPtr args, file_analysis::File* file);
/**
* Calculate entropy of next chunk of file contents.
* @param data pointer to start of a chunk of a file data.
* @param len number of bytes in the data chunk.
* @return false if the digest is in an invalid state, else true.
*/
bool DeliverStream(const u_char* data, uint64_t len) override;
/**
* Calculate entropy of next chunk of file contents.
* @param data pointer to start of a chunk of a file data.
* @param len number of bytes in the data chunk.
* @return false if the digest is in an invalid state, else true.
*/
bool DeliverStream(const u_char* data, uint64_t len) override;
/**
* Finalizes the calculation and raises a "file_entropy_test" event.
* @return always false so analyze will be detached from file.
*/
bool EndOfFile() override;
/**
* Finalizes the calculation and raises a "file_entropy_test" event.
* @return always false so analyze will be detached from file.
*/
bool EndOfFile() override;
/**
* Missing data can't be handled, so just indicate the this analyzer should
* be removed from receiving further data. The entropy will not be finalized.
* @param offset byte offset in file at which missing chunk starts.
* @param len number of missing bytes.
* @return always false so analyzer will detach from file.
*/
bool Undelivered(uint64_t offset, uint64_t len) override;
/**
* Missing data can't be handled, so just indicate the this analyzer should
* be removed from receiving further data. The entropy will not be finalized.
* @param offset byte offset in file at which missing chunk starts.
* @param len number of missing bytes.
* @return always false so analyzer will detach from file.
*/
bool Undelivered(uint64_t offset, uint64_t len) override;
protected:
/**
* Constructor.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
* @param hv specific hash calculator object.
* @param kind human readable name of the hash algorithm to use.
*/
Entropy(RecordValPtr args, file_analysis::File* file);
/**
* Constructor.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
* @param hv specific hash calculator object.
* @param kind human readable name of the hash algorithm to use.
*/
Entropy(RecordValPtr args, file_analysis::File* file);
/**
* If some file contents have been seen, finalizes the entropy of them and
* raises the "file_entropy" event with the results.
*/
void Finalize();
/**
* If some file contents have been seen, finalizes the entropy of them and
* raises the "file_entropy" event with the results.
*/
void Finalize();
private:
EntropyVal* entropy;
bool fed;
};
EntropyVal* entropy;
bool fed;
};
} // namespace zeek::file_analysis::detail
} // namespace zeek::file_analysis::detail

View file

@ -5,22 +5,18 @@
#include "zeek/file_analysis/Component.h"
#include "zeek/file_analysis/analyzer/entropy/Entropy.h"
namespace zeek::plugin::detail::Zeek_FileEntropy
{
namespace zeek::plugin::detail::Zeek_FileEntropy {
class Plugin : public zeek::plugin::Plugin
{
class Plugin : public zeek::plugin::Plugin {
public:
zeek::plugin::Configuration Configure() override
{
AddComponent(new zeek::file_analysis::Component(
"ENTROPY", zeek::file_analysis::detail::Entropy::Instantiate));
zeek::plugin::Configuration Configure() override {
AddComponent(new zeek::file_analysis::Component("ENTROPY", zeek::file_analysis::detail::Entropy::Instantiate));
zeek::plugin::Configuration config;
config.name = "Zeek::FileEntropy";
config.description = "Entropy test file content";
return config;
}
} plugin;
zeek::plugin::Configuration config;
config.name = "Zeek::FileEntropy";
config.description = "Entropy test file content";
return config;
}
} plugin;
} // namespace zeek::plugin::detail::Zeek_FileEntropy
} // namespace zeek::plugin::detail::Zeek_FileEntropy

View file

@ -9,67 +9,59 @@
#include "zeek/file_analysis/Manager.h"
#include "zeek/util.h"
namespace zeek::file_analysis::detail
{
namespace zeek::file_analysis::detail {
Extract::Extract(RecordValPtr args, file_analysis::File* file, const std::string& arg_filename,
uint64_t arg_limit, bool arg_limit_includes_missing)
: file_analysis::Analyzer(file_mgr->GetComponentTag("EXTRACT"), std::move(args), file),
filename(arg_filename), limit(arg_limit), written(0),
limit_includes_missing(arg_limit_includes_missing)
{
char buf[128];
file_stream = fopen(filename.data(), "wb");
Extract::Extract(RecordValPtr args, file_analysis::File* file, const std::string& arg_filename, uint64_t arg_limit,
bool arg_limit_includes_missing)
: file_analysis::Analyzer(file_mgr->GetComponentTag("EXTRACT"), std::move(args), file),
filename(arg_filename),
limit(arg_limit),
written(0),
limit_includes_missing(arg_limit_includes_missing) {
char buf[128];
file_stream = fopen(filename.data(), "wb");
if ( file_stream )
{
// Try to ensure full buffering.
if ( util::detail::setvbuf(file_stream, nullptr, _IOFBF, BUFSIZ) )
{
util::zeek_strerror_r(errno, buf, sizeof(buf));
reporter->Warning("cannot set buffering mode for %s: %s", filename.data(), buf);
}
}
else
{
util::zeek_strerror_r(errno, buf, sizeof(buf));
reporter->Error("cannot open %s: %s", filename.c_str(), buf);
}
}
if ( file_stream ) {
// Try to ensure full buffering.
if ( util::detail::setvbuf(file_stream, nullptr, _IOFBF, BUFSIZ) ) {
util::zeek_strerror_r(errno, buf, sizeof(buf));
reporter->Warning("cannot set buffering mode for %s: %s", filename.data(), buf);
}
}
else {
util::zeek_strerror_r(errno, buf, sizeof(buf));
reporter->Error("cannot open %s: %s", filename.c_str(), buf);
}
}
Extract::~Extract()
{
if ( file_stream && fclose(file_stream) )
{
char buf[128];
util::zeek_strerror_r(errno, buf, sizeof(buf));
reporter->Error("cannot close %s: %s", filename.data(), buf);
}
}
Extract::~Extract() {
if ( file_stream && fclose(file_stream) ) {
char buf[128];
util::zeek_strerror_r(errno, buf, sizeof(buf));
reporter->Error("cannot close %s: %s", filename.data(), buf);
}
}
static ValPtr get_extract_field_val(const RecordValPtr& args, const char* name)
{
const auto& rval = args->GetField(name);
static ValPtr get_extract_field_val(const RecordValPtr& args, const char* name) {
const auto& rval = args->GetField(name);
if ( ! rval )
reporter->Error("File extraction analyzer missing arg field: %s", name);
if ( ! rval )
reporter->Error("File extraction analyzer missing arg field: %s", name);
return rval;
}
return rval;
}
file_analysis::Analyzer* Extract::Instantiate(RecordValPtr args, file_analysis::File* file)
{
const auto& fname = get_extract_field_val(args, "extract_filename");
const auto& limit = get_extract_field_val(args, "extract_limit");
const auto& extract_limit_includes_missing = get_extract_field_val(
args, "extract_limit_includes_missing");
file_analysis::Analyzer* Extract::Instantiate(RecordValPtr args, file_analysis::File* file) {
const auto& fname = get_extract_field_val(args, "extract_filename");
const auto& limit = get_extract_field_val(args, "extract_limit");
const auto& extract_limit_includes_missing = get_extract_field_val(args, "extract_limit_includes_missing");
if ( ! fname || ! limit || ! extract_limit_includes_missing )
return nullptr;
if ( ! fname || ! limit || ! extract_limit_includes_missing )
return nullptr;
return new Extract(std::move(args), file, fname->AsString()->CheckString(), limit->AsCount(),
extract_limit_includes_missing->AsBool());
}
return new Extract(std::move(args), file, fname->AsString()->CheckString(), limit->AsCount(),
extract_limit_includes_missing->AsBool());
}
/**
* Check if we are exceeding the write limit with this write.
@ -79,118 +71,102 @@ file_analysis::Analyzer* Extract::Instantiate(RecordValPtr args, file_analysis::
* @param n number of bytes to write to keep within limit
* @returns true if limit exceeded
*/
static bool check_limit_exceeded(uint64_t lim, uint64_t written, uint64_t len, uint64_t* n)
{
if ( lim == 0 )
{
*n = len;
return false;
}
static bool check_limit_exceeded(uint64_t lim, uint64_t written, uint64_t len, uint64_t* n) {
if ( lim == 0 ) {
*n = len;
return false;
}
if ( written >= lim )
{
*n = 0;
return true;
}
else if ( written + len > lim )
{
*n = lim - written;
return true;
}
else
{
*n = len;
}
if ( written >= lim ) {
*n = 0;
return true;
}
else if ( written + len > lim ) {
*n = lim - written;
return true;
}
else {
*n = len;
}
return false;
}
return false;
}
bool Extract::DeliverStream(const u_char* data, uint64_t len)
{
if ( ! file_stream )
return false;
bool Extract::DeliverStream(const u_char* data, uint64_t len) {
if ( ! file_stream )
return false;
uint64_t towrite = 0;
bool limit_exceeded = check_limit_exceeded(limit, written, len, &towrite);
uint64_t towrite = 0;
bool limit_exceeded = check_limit_exceeded(limit, written, len, &towrite);
if ( limit_exceeded && file_extraction_limit )
{
file_analysis::File* f = GetFile();
f->FileEvent(file_extraction_limit,
{f->ToVal(), GetArgs(), val_mgr->Count(limit), val_mgr->Count(len)});
if ( limit_exceeded && file_extraction_limit ) {
file_analysis::File* f = GetFile();
f->FileEvent(file_extraction_limit, {f->ToVal(), GetArgs(), val_mgr->Count(limit), val_mgr->Count(len)});
// Limit may have been modified by a BIF, re-check it.
limit_exceeded = check_limit_exceeded(limit, written, len, &towrite);
}
// Limit may have been modified by a BIF, re-check it.
limit_exceeded = check_limit_exceeded(limit, written, len, &towrite);
}
char buf[128];
char buf[128];
if ( towrite > 0 )
{
if ( fwrite(data, towrite, 1, file_stream) != 1 )
{
util::zeek_strerror_r(errno, buf, sizeof(buf));
reporter->Error("failed to write to extracted file %s: %s", filename.data(), buf);
fclose(file_stream);
file_stream = nullptr;
return false;
}
if ( towrite > 0 ) {
if ( fwrite(data, towrite, 1, file_stream) != 1 ) {
util::zeek_strerror_r(errno, buf, sizeof(buf));
reporter->Error("failed to write to extracted file %s: %s", filename.data(), buf);
fclose(file_stream);
file_stream = nullptr;
return false;
}
written += towrite;
}
written += towrite;
}
// Assume we may not try to write anything more for a while due to reaching
// the extraction limit and the file analysis File still proceeding to
// do other analysis without destructing/closing this one until the very end,
// so flush anything currently buffered.
if ( limit_exceeded && fflush(file_stream) )
{
util::zeek_strerror_r(errno, buf, sizeof(buf));
reporter->Warning("cannot fflush extracted file %s: %s", filename.data(), buf);
}
// Assume we may not try to write anything more for a while due to reaching
// the extraction limit and the file analysis File still proceeding to
// do other analysis without destructing/closing this one until the very end,
// so flush anything currently buffered.
if ( limit_exceeded && fflush(file_stream) ) {
util::zeek_strerror_r(errno, buf, sizeof(buf));
reporter->Warning("cannot fflush extracted file %s: %s", filename.data(), buf);
}
return (! limit_exceeded);
}
return (! limit_exceeded);
}
bool Extract::Undelivered(uint64_t offset, uint64_t len)
{
if ( ! file_stream )
return false;
bool Extract::Undelivered(uint64_t offset, uint64_t len) {
if ( ! file_stream )
return false;
if ( limit_includes_missing )
{
uint64_t towrite = 0;
bool limit_exceeded = check_limit_exceeded(limit, written, len, &towrite);
// if the limit is exceeded, we have to raise the event. This gives scripts the opportunity
// to raise the limit.
if ( limit_exceeded && file_extraction_limit )
{
file_analysis::File* f = GetFile();
f->FileEvent(file_extraction_limit,
{f->ToVal(), GetArgs(), val_mgr->Count(limit), val_mgr->Count(len)});
// we have to check again if the limit is still exceedee
limit_exceeded = check_limit_exceeded(limit, written, len, &towrite);
}
if ( limit_includes_missing ) {
uint64_t towrite = 0;
bool limit_exceeded = check_limit_exceeded(limit, written, len, &towrite);
// if the limit is exceeded, we have to raise the event. This gives scripts the opportunity
// to raise the limit.
if ( limit_exceeded && file_extraction_limit ) {
file_analysis::File* f = GetFile();
f->FileEvent(file_extraction_limit, {f->ToVal(), GetArgs(), val_mgr->Count(limit), val_mgr->Count(len)});
// we have to check again if the limit is still exceedee
limit_exceeded = check_limit_exceeded(limit, written, len, &towrite);
}
// if the limit is exceeded, abort and don't do anything - no reason to seek.
if ( limit_exceeded )
return false;
// if the limit is exceeded, abort and don't do anything - no reason to seek.
if ( limit_exceeded )
return false;
// if we don't skip holes, count this hole against the write limit
written += len;
}
// if we don't skip holes, count this hole against the write limit
written += len;
}
if ( fseek(file_stream, len + offset, SEEK_SET) != 0 )
{
char buf[128];
util::zeek_strerror_r(errno, buf, sizeof(buf));
reporter->Error("failed to seek in extracted file %s: %s", filename.data(), buf);
fclose(file_stream);
file_stream = nullptr;
return false;
}
if ( fseek(file_stream, len + offset, SEEK_SET) != 0 ) {
char buf[128];
util::zeek_strerror_r(errno, buf, sizeof(buf));
reporter->Error("failed to seek in extracted file %s: %s", filename.data(), buf);
fclose(file_stream);
file_stream = nullptr;
return false;
}
return true;
}
return true;
}
} // namespace zeek::file_analysis::detail
} // namespace zeek::file_analysis::detail

View file

@ -10,72 +10,70 @@
#include "zeek/file_analysis/File.h"
#include "zeek/file_analysis/analyzer/extract/events.bif.h"
namespace zeek::file_analysis::detail
{
namespace zeek::file_analysis::detail {
/**
* An analyzer to extract content of files to local disk.
*/
class Extract : public file_analysis::Analyzer
{
class Extract : public file_analysis::Analyzer {
public:
/**
* Destructor. Will close the file that was used for data extraction.
*/
~Extract() override;
/**
* Destructor. Will close the file that was used for data extraction.
*/
~Extract() override;
/**
* Write a chunk of file data to the local extraction file.
* @param data pointer to a chunk of file data.
* @param len number of bytes in the data chunk.
* @return false if there was no extraction file open and the data couldn't
* be written, else true.
*/
bool DeliverStream(const u_char* data, uint64_t len) override;
/**
* Write a chunk of file data to the local extraction file.
* @param data pointer to a chunk of file data.
* @param len number of bytes in the data chunk.
* @return false if there was no extraction file open and the data couldn't
* be written, else true.
*/
bool DeliverStream(const u_char* data, uint64_t len) override;
/**
* Report undelivered bytes.
* @param offset distance into the file where the gap occurred.
* @param len number of bytes undelivered.
* @return true
*/
bool Undelivered(uint64_t offset, uint64_t len) override;
/**
* Report undelivered bytes.
* @param offset distance into the file where the gap occurred.
* @param len number of bytes undelivered.
* @return true
*/
bool Undelivered(uint64_t offset, uint64_t len) override;
/**
* Create a new instance of an Extract analyzer.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
* @return the new Extract analyzer instance or a null pointer if the
* the "extraction_file" field of \a args wasn't set.
*/
static file_analysis::Analyzer* Instantiate(RecordValPtr args, file_analysis::File* file);
/**
* Create a new instance of an Extract analyzer.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
* @return the new Extract analyzer instance or a null pointer if the
* the "extraction_file" field of \a args wasn't set.
*/
static file_analysis::Analyzer* Instantiate(RecordValPtr args, file_analysis::File* file);
/**
* Sets the maximum allowed extracted file size. A value of zero means
* "no limit".
* @param bytes number of bytes allowed to be extracted
*/
void SetLimit(uint64_t bytes) { limit = bytes; }
/**
* Sets the maximum allowed extracted file size. A value of zero means
* "no limit".
* @param bytes number of bytes allowed to be extracted
*/
void SetLimit(uint64_t bytes) { limit = bytes; }
protected:
/**
* Constructor.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
* @param arg_filename a file system path which specifies the local file
* to which the contents of the file will be extracted/written.
* @param arg_limit the maximum allowed file size.
* @param arg_limit_includes_missing missing bytes count towards limit if true.
*/
Extract(RecordValPtr args, file_analysis::File* file, const std::string& arg_filename,
uint64_t arg_limit, bool arg_limit_includes_missing);
/**
* Constructor.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
* @param arg_filename a file system path which specifies the local file
* to which the contents of the file will be extracted/written.
* @param arg_limit the maximum allowed file size.
* @param arg_limit_includes_missing missing bytes count towards limit if true.
*/
Extract(RecordValPtr args, file_analysis::File* file, const std::string& arg_filename, uint64_t arg_limit,
bool arg_limit_includes_missing);
private:
std::string filename;
FILE* file_stream;
uint64_t limit; // the file extraction limit
uint64_t written; // how many bytes we have written so far
bool limit_includes_missing; // do count missing bytes against limit if true
};
std::string filename;
FILE* file_stream;
uint64_t limit; // the file extraction limit
uint64_t written; // how many bytes we have written so far
bool limit_includes_missing; // do count missing bytes against limit if true
};
} // namespace zeek::file_analysis::detail
} // namespace zeek::file_analysis::detail

View file

@ -5,22 +5,18 @@
#include "zeek/file_analysis/Component.h"
#include "zeek/file_analysis/analyzer/extract/Extract.h"
namespace zeek::plugin::detail::Zeek_FileExtract
{
namespace zeek::plugin::detail::Zeek_FileExtract {
class Plugin : public zeek::plugin::Plugin
{
class Plugin : public zeek::plugin::Plugin {
public:
zeek::plugin::Configuration Configure() override
{
AddComponent(new zeek::file_analysis::Component(
"EXTRACT", zeek::file_analysis::detail::Extract::Instantiate));
zeek::plugin::Configuration Configure() override {
AddComponent(new zeek::file_analysis::Component("EXTRACT", zeek::file_analysis::detail::Extract::Instantiate));
zeek::plugin::Configuration config;
config.name = "Zeek::FileExtract";
config.description = "Extract file content";
return config;
}
} plugin;
zeek::plugin::Configuration config;
config.name = "Zeek::FileExtract";
config.description = "Extract file content";
return config;
}
} plugin;
} // namespace zeek::plugin::detail::Zeek_FileExtract
} // namespace zeek::plugin::detail::Zeek_FileExtract

View file

@ -8,58 +8,49 @@
#include "zeek/file_analysis/Manager.h"
#include "zeek/util.h"
namespace zeek::file_analysis::detail
{
namespace zeek::file_analysis::detail {
StringValPtr MD5::kind_val = make_intrusive<StringVal>("md5");
StringValPtr SHA1::kind_val = make_intrusive<StringVal>("sha1");
StringValPtr SHA256::kind_val = make_intrusive<StringVal>("sha256");
Hash::Hash(RecordValPtr args, file_analysis::File* file, HashVal* hv, StringValPtr arg_kind)
: file_analysis::Analyzer(file_mgr->GetComponentTag(util::to_upper(arg_kind->ToStdString())),
std::move(args), file),
hash(hv), fed(false), kind(std::move(arg_kind))
{
hash->Init();
}
: file_analysis::Analyzer(file_mgr->GetComponentTag(util::to_upper(arg_kind->ToStdString())), std::move(args),
file),
hash(hv),
fed(false),
kind(std::move(arg_kind)) {
hash->Init();
}
Hash::~Hash()
{
Unref(hash);
}
Hash::~Hash() { Unref(hash); }
bool Hash::DeliverStream(const u_char* data, uint64_t len)
{
if ( ! hash->IsValid() )
return false;
bool Hash::DeliverStream(const u_char* data, uint64_t len) {
if ( ! hash->IsValid() )
return false;
if ( ! fed )
fed = len > 0;
if ( ! fed )
fed = len > 0;
hash->Feed(data, len);
return true;
}
hash->Feed(data, len);
return true;
}
bool Hash::EndOfFile()
{
Finalize();
return false;
}
bool Hash::EndOfFile() {
Finalize();
return false;
}
bool Hash::Undelivered(uint64_t offset, uint64_t len)
{
return false;
}
bool Hash::Undelivered(uint64_t offset, uint64_t len) { return false; }
void Hash::Finalize()
{
if ( ! hash->IsValid() || ! fed )
return;
void Hash::Finalize() {
if ( ! hash->IsValid() || ! fed )
return;
if ( ! file_hash )
return;
if ( ! file_hash )
return;
event_mgr.Enqueue(file_hash, GetFile()->ToVal(), kind, hash->Get());
}
event_mgr.Enqueue(file_hash, GetFile()->ToVal(), kind, hash->Get());
}
} // namespace zeek::file_analysis::detail
} // namespace zeek::file_analysis::detail

View file

@ -10,159 +10,143 @@
#include "zeek/file_analysis/File.h"
#include "zeek/file_analysis/analyzer/hash/events.bif.h"
namespace zeek::file_analysis::detail
{
namespace zeek::file_analysis::detail {
/**
* An analyzer to produce a hash of file contents.
*/
class Hash : public file_analysis::Analyzer
{
class Hash : public file_analysis::Analyzer {
public:
/**
* Destructor.
*/
~Hash() override;
/**
* Destructor.
*/
~Hash() override;
/**
* Incrementally hash next chunk of file contents.
* @param data pointer to start of a chunk of a file data.
* @param len number of bytes in the data chunk.
* @return false if the digest is in an invalid state, else true.
*/
bool DeliverStream(const u_char* data, uint64_t len) override;
/**
* Incrementally hash next chunk of file contents.
* @param data pointer to start of a chunk of a file data.
* @param len number of bytes in the data chunk.
* @return false if the digest is in an invalid state, else true.
*/
bool DeliverStream(const u_char* data, uint64_t len) override;
/**
* Finalizes the hash and raises a "file_hash" event.
* @return always false so analyze will be detached from file.
*/
bool EndOfFile() override;
/**
* Finalizes the hash and raises a "file_hash" event.
* @return always false so analyze will be detached from file.
*/
bool EndOfFile() override;
/**
* Missing data can't be handled, so just indicate the this analyzer should
* be removed from receiving further data. The hash will not be finalized.
* @param offset byte offset in file at which missing chunk starts.
* @param len number of missing bytes.
* @return always false so analyzer will detach from file.
*/
bool Undelivered(uint64_t offset, uint64_t len) override;
/**
* Missing data can't be handled, so just indicate the this analyzer should
* be removed from receiving further data. The hash will not be finalized.
* @param offset byte offset in file at which missing chunk starts.
* @param len number of missing bytes.
* @return always false so analyzer will detach from file.
*/
bool Undelivered(uint64_t offset, uint64_t len) override;
protected:
/**
* Constructor.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
* @param hv specific hash calculator object.
* @param kind human readable name of the hash algorithm to use.
*/
Hash(RecordValPtr args, file_analysis::File* file, HashVal* hv, StringValPtr kind);
/**
* Constructor.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
* @param hv specific hash calculator object.
* @param kind human readable name of the hash algorithm to use.
*/
Hash(RecordValPtr args, file_analysis::File* file, HashVal* hv, StringValPtr kind);
/**
* If some file contents have been seen, finalizes the hash of them and
* raises the "file_hash" event with the results.
*/
void Finalize();
/**
* If some file contents have been seen, finalizes the hash of them and
* raises the "file_hash" event with the results.
*/
void Finalize();
private:
HashVal* hash;
bool fed;
StringValPtr kind;
};
HashVal* hash;
bool fed;
StringValPtr kind;
};
/**
* An analyzer to produce an MD5 hash of file contents.
*/
class MD5 final : public Hash
{
class MD5 final : public Hash {
public:
/**
* Create a new instance of the MD5 hashing file analyzer.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
* @return the new MD5 analyzer instance or a null pointer if there's no
* handler for the "file_hash" event.
*/
static file_analysis::Analyzer* Instantiate(RecordValPtr args, file_analysis::File* file)
{
return file_hash ? new MD5(std::move(args), file) : nullptr;
}
/**
* Create a new instance of the MD5 hashing file analyzer.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
* @return the new MD5 analyzer instance or a null pointer if there's no
* handler for the "file_hash" event.
*/
static file_analysis::Analyzer* Instantiate(RecordValPtr args, file_analysis::File* file) {
return file_hash ? new MD5(std::move(args), file) : nullptr;
}
private:
/**
* Constructor.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
*/
MD5(RecordValPtr args, file_analysis::File* file)
: Hash(std::move(args), file, new MD5Val(), MD5::kind_val)
{
}
/**
* Constructor.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
*/
MD5(RecordValPtr args, file_analysis::File* file) : Hash(std::move(args), file, new MD5Val(), MD5::kind_val) {}
static StringValPtr kind_val;
};
static StringValPtr kind_val;
};
/**
* An analyzer to produce a SHA1 hash of file contents.
*/
class SHA1 final : public Hash
{
class SHA1 final : public Hash {
public:
/**
* Create a new instance of the SHA1 hashing file analyzer.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
* @return the new MD5 analyzer instance or a null pointer if there's no
* handler for the "file_hash" event.
*/
static file_analysis::Analyzer* Instantiate(RecordValPtr args, file_analysis::File* file)
{
return file_hash ? new SHA1(std::move(args), file) : nullptr;
}
/**
* Create a new instance of the SHA1 hashing file analyzer.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
* @return the new MD5 analyzer instance or a null pointer if there's no
* handler for the "file_hash" event.
*/
static file_analysis::Analyzer* Instantiate(RecordValPtr args, file_analysis::File* file) {
return file_hash ? new SHA1(std::move(args), file) : nullptr;
}
private:
/**
* Constructor.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
*/
SHA1(RecordValPtr args, file_analysis::File* file)
: Hash(std::move(args), file, new SHA1Val(), SHA1::kind_val)
{
}
/**
* Constructor.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
*/
SHA1(RecordValPtr args, file_analysis::File* file) : Hash(std::move(args), file, new SHA1Val(), SHA1::kind_val) {}
static StringValPtr kind_val;
};
static StringValPtr kind_val;
};
/**
* An analyzer to produce a SHA256 hash of file contents.
*/
class SHA256 final : public Hash
{
class SHA256 final : public Hash {
public:
/**
* Create a new instance of the SHA256 hashing file analyzer.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
* @return the new MD5 analyzer instance or a null pointer if there's no
* handler for the "file_hash" event.
*/
static file_analysis::Analyzer* Instantiate(RecordValPtr args, file_analysis::File* file)
{
return file_hash ? new SHA256(std::move(args), file) : nullptr;
}
/**
* Create a new instance of the SHA256 hashing file analyzer.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
* @return the new MD5 analyzer instance or a null pointer if there's no
* handler for the "file_hash" event.
*/
static file_analysis::Analyzer* Instantiate(RecordValPtr args, file_analysis::File* file) {
return file_hash ? new SHA256(std::move(args), file) : nullptr;
}
private:
/**
* Constructor.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
*/
SHA256(RecordValPtr args, file_analysis::File* file)
: Hash(std::move(args), file, new SHA256Val(), SHA256::kind_val)
{
}
/**
* Constructor.
* @param args the \c AnalyzerArgs value which represents the analyzer.
* @param file the file to which the analyzer will be attached.
*/
SHA256(RecordValPtr args, file_analysis::File* file)
: Hash(std::move(args), file, new SHA256Val(), SHA256::kind_val) {}
static StringValPtr kind_val;
};
static StringValPtr kind_val;
};
} // namespace zeek::file_analysis
} // namespace zeek::file_analysis::detail

View file

@ -5,26 +5,20 @@
#include "zeek/file_analysis/Component.h"
#include "zeek/file_analysis/analyzer/hash/Hash.h"
namespace zeek::plugin::detail::Zeek_FileHash
{
namespace zeek::plugin::detail::Zeek_FileHash {
class Plugin : public zeek::plugin::Plugin
{
class Plugin : public zeek::plugin::Plugin {
public:
zeek::plugin::Configuration Configure() override
{
AddComponent(new zeek::file_analysis::Component(
"MD5", zeek::file_analysis::detail::MD5::Instantiate));
AddComponent(new zeek::file_analysis::Component(
"SHA1", zeek::file_analysis::detail::SHA1::Instantiate));
AddComponent(new zeek::file_analysis::Component(
"SHA256", zeek::file_analysis::detail::SHA256::Instantiate));
zeek::plugin::Configuration Configure() override {
AddComponent(new zeek::file_analysis::Component("MD5", zeek::file_analysis::detail::MD5::Instantiate));
AddComponent(new zeek::file_analysis::Component("SHA1", zeek::file_analysis::detail::SHA1::Instantiate));
AddComponent(new zeek::file_analysis::Component("SHA256", zeek::file_analysis::detail::SHA256::Instantiate));
zeek::plugin::Configuration config;
config.name = "Zeek::FileHash";
config.description = "Hash file content";
return config;
}
} plugin;
zeek::plugin::Configuration config;
config.name = "Zeek::FileHash";
config.description = "Hash file content";
return config;
}
} plugin;
} // namespace zeek::plugin::detail::Zeek_FileHash
} // namespace zeek::plugin::detail::Zeek_FileHash

View file

@ -2,44 +2,34 @@
#include "zeek/file_analysis/Manager.h"
namespace zeek::file_analysis::detail
{
namespace zeek::file_analysis::detail {
PE::PE(RecordValPtr args, file_analysis::File* file)
: file_analysis::Analyzer(file_mgr->GetComponentTag("PE"), std::move(args), file)
{
conn = new binpac::PE::MockConnection(this);
interp = new binpac::PE::File(conn);
done = false;
}
: file_analysis::Analyzer(file_mgr->GetComponentTag("PE"), std::move(args), file) {
conn = new binpac::PE::MockConnection(this);
interp = new binpac::PE::File(conn);
done = false;
}
PE::~PE()
{
delete interp;
delete conn;
}
PE::~PE() {
delete interp;
delete conn;
}
bool PE::DeliverStream(const u_char* data, uint64_t len)
{
if ( conn->is_done() )
return false;
bool PE::DeliverStream(const u_char* data, uint64_t len) {
if ( conn->is_done() )
return false;
try
{
interp->NewData(data, data + len);
}
catch ( const binpac::Exception& e )
{
AnalyzerViolation(util::fmt("Binpac exception: %s", e.c_msg()));
return false;
}
try {
interp->NewData(data, data + len);
} catch ( const binpac::Exception& e ) {
AnalyzerViolation(util::fmt("Binpac exception: %s", e.c_msg()));
return false;
}
return ! conn->is_done();
}
return ! conn->is_done();
}
bool PE::EndOfFile()
{
return false;
}
bool PE::EndOfFile() { return false; }
} // namespace zeek::file_analysis::detail
} // namespace zeek::file_analysis::detail

View file

@ -7,31 +7,28 @@
#include "file_analysis/analyzer/pe/pe_pac.h"
namespace zeek::file_analysis::detail
{
namespace zeek::file_analysis::detail {
/**
* Analyze Portable Executable files
*/
class PE : public file_analysis::Analyzer
{
class PE : public file_analysis::Analyzer {
public:
~PE();
~PE();
static file_analysis::Analyzer* Instantiate(RecordValPtr args, file_analysis::File* file)
{
return new PE(std::move(args), file);
}
static file_analysis::Analyzer* Instantiate(RecordValPtr args, file_analysis::File* file) {
return new PE(std::move(args), file);
}
virtual bool DeliverStream(const u_char* data, uint64_t len);
virtual bool DeliverStream(const u_char* data, uint64_t len);
virtual bool EndOfFile();
virtual bool EndOfFile();
protected:
PE(RecordValPtr args, file_analysis::File* file);
binpac::PE::File* interp;
binpac::PE::MockConnection* conn;
bool done;
};
PE(RecordValPtr args, file_analysis::File* file);
binpac::PE::File* interp;
binpac::PE::MockConnection* conn;
bool done;
};
} // namespace zeek::file_analysis::detail
} // namespace zeek::file_analysis::detail

View file

@ -5,22 +5,18 @@
#include "zeek/file_analysis/Component.h"
#include "zeek/file_analysis/analyzer/pe/PE.h"
namespace zeek::plugin::detail::Zeek_PE
{
namespace zeek::plugin::detail::Zeek_PE {
class Plugin : public zeek::plugin::Plugin
{
class Plugin : public zeek::plugin::Plugin {
public:
zeek::plugin::Configuration Configure() override
{
AddComponent(
new zeek::file_analysis::Component("PE", zeek::file_analysis::detail::PE::Instantiate));
zeek::plugin::Configuration Configure() override {
AddComponent(new zeek::file_analysis::Component("PE", zeek::file_analysis::detail::PE::Instantiate));
zeek::plugin::Configuration config;
config.name = "Zeek::PE";
config.description = "Portable Executable analyzer";
return config;
}
} plugin;
zeek::plugin::Configuration config;
config.name = "Zeek::PE";
config.description = "Portable Executable analyzer";
return config;
}
} plugin;
} // namespace zeek::plugin::detail::Zeek_PE
} // namespace zeek::plugin::detail::Zeek_PE

File diff suppressed because it is too large Load diff

View file

@ -7,36 +7,32 @@
#include "zeek/file_analysis/analyzer/x509/X509Common.h"
namespace zeek::file_analysis
{
namespace zeek::file_analysis {
class File;
namespace detail
{
namespace detail {
class OCSP : public file_analysis::detail::X509Common
{
class OCSP : public file_analysis::detail::X509Common {
public:
bool DeliverStream(const u_char* data, uint64_t len) override;
bool Undelivered(uint64_t offset, uint64_t len) override;
bool EndOfFile() override;
bool DeliverStream(const u_char* data, uint64_t len) override;
bool Undelivered(uint64_t offset, uint64_t len) override;
bool EndOfFile() override;
static file_analysis::Analyzer* InstantiateRequest(RecordValPtr args,
file_analysis::File* file);
static file_analysis::Analyzer* InstantiateReply(RecordValPtr args, file_analysis::File* file);
static file_analysis::Analyzer* InstantiateRequest(RecordValPtr args, file_analysis::File* file);
static file_analysis::Analyzer* InstantiateReply(RecordValPtr args, file_analysis::File* file);
protected:
OCSP(RecordValPtr args, file_analysis::File* file, bool request);
OCSP(RecordValPtr args, file_analysis::File* file, bool request);
private:
void ParseResponse(OCSP_RESPONSE*);
void ParseRequest(OCSP_REQUEST*);
void ParseExtensionsSpecific(X509_EXTENSION* ex, bool, ASN1_OBJECT*, const char*) override;
void ParseResponse(OCSP_RESPONSE*);
void ParseRequest(OCSP_REQUEST*);
void ParseExtensionsSpecific(X509_EXTENSION* ex, bool, ASN1_OBJECT*, const char*) override;
std::string ocsp_data;
bool request = false; // true if ocsp request, false if reply
};
std::string ocsp_data;
bool request = false; // true if ocsp request, false if reply
};
} // namespace detail
} // namespace zeek::file_analysis
} // namespace detail
} // namespace zeek::file_analysis

View file

@ -6,32 +6,27 @@
#include "zeek/file_analysis/analyzer/x509/OCSP.h"
#include "zeek/file_analysis/analyzer/x509/X509.h"
namespace zeek::plugin::detail::Zeek_X509
{
namespace zeek::plugin::detail::Zeek_X509 {
class Plugin : public zeek::plugin::Plugin
{
class Plugin : public zeek::plugin::Plugin {
public:
zeek::plugin::Configuration Configure() override
{
AddComponent(new zeek::file_analysis::Component(
"X509", zeek::file_analysis::detail::X509::Instantiate));
AddComponent(new zeek::file_analysis::Component(
"OCSP_REQUEST", zeek::file_analysis::detail::OCSP::InstantiateRequest));
AddComponent(new zeek::file_analysis::Component(
"OCSP_REPLY", zeek::file_analysis::detail::OCSP::InstantiateReply));
zeek::plugin::Configuration Configure() override {
AddComponent(new zeek::file_analysis::Component("X509", zeek::file_analysis::detail::X509::Instantiate));
AddComponent(
new zeek::file_analysis::Component("OCSP_REQUEST", zeek::file_analysis::detail::OCSP::InstantiateRequest));
AddComponent(
new zeek::file_analysis::Component("OCSP_REPLY", zeek::file_analysis::detail::OCSP::InstantiateReply));
zeek::plugin::Configuration config;
config.name = "Zeek::X509";
config.description = "X509 and OCSP analyzer";
return config;
}
zeek::plugin::Configuration config;
config.name = "Zeek::X509";
config.description = "X509 and OCSP analyzer";
return config;
}
void Done() override
{
zeek::plugin::Plugin::Done();
zeek::file_analysis::detail::X509::FreeRootStore();
}
} plugin;
void Done() override {
zeek::plugin::Plugin::Done();
zeek::file_analysis::detail::X509::FreeRootStore();
}
} plugin;
} // namespace zeek::plugin::detail::Zeek_X509
} // namespace zeek::plugin::detail::Zeek_X509

File diff suppressed because it is too large Load diff

View file

@ -30,122 +30,113 @@
#define OCSP_SINGLERESP_get0_id(s) (s)->certId
static X509* X509_OBJECT_get0_X509(const X509_OBJECT* a)
{
if ( a == nullptr || a->type != X509_LU_X509 )
return nullptr;
return a->data.x509;
}
static X509* X509_OBJECT_get0_X509(const X509_OBJECT* a) {
if ( a == nullptr || a->type != X509_LU_X509 )
return nullptr;
return a->data.x509;
}
static void DSA_get0_pqg(const DSA* d, const BIGNUM** p, const BIGNUM** q, const BIGNUM** g)
{
if ( p != nullptr )
*p = d->p;
if ( q != nullptr )
*q = d->q;
if ( g != nullptr )
*g = d->g;
}
static void DSA_get0_pqg(const DSA* d, const BIGNUM** p, const BIGNUM** q, const BIGNUM** g) {
if ( p != nullptr )
*p = d->p;
if ( q != nullptr )
*q = d->q;
if ( g != nullptr )
*g = d->g;
}
static void RSA_get0_key(const RSA* r, const BIGNUM** n, const BIGNUM** e, const BIGNUM** d)
{
if ( n != nullptr )
*n = r->n;
if ( e != nullptr )
*e = r->e;
if ( d != nullptr )
*d = r->d;
}
static void RSA_get0_key(const RSA* r, const BIGNUM** n, const BIGNUM** e, const BIGNUM** d) {
if ( n != nullptr )
*n = r->n;
if ( e != nullptr )
*e = r->e;
if ( d != nullptr )
*d = r->d;
}
#endif
#endif
namespace zeek::file_analysis::detail
{
namespace zeek::file_analysis::detail {
class X509Val;
class X509 : public file_analysis::detail::X509Common
{
class X509 : public file_analysis::detail::X509Common {
public:
bool DeliverStream(const u_char* data, uint64_t len) override;
bool Undelivered(uint64_t offset, uint64_t len) override;
bool EndOfFile() override;
bool DeliverStream(const u_char* data, uint64_t len) override;
bool Undelivered(uint64_t offset, uint64_t len) override;
bool EndOfFile() override;
/**
* Converts an X509 certificate into a \c X509::Certificate record
* value. This is a static function that can be called from external,
* it doesn't depend on the state of any particular file analyzer.
*
* @param cert_val The certificate to converts.
*
* @param f A file associated with the certificate, if any
* (primarily for error reporting).
*
* @param Returns the new record value and passes ownership to
* caller.
*/
static RecordValPtr ParseCertificate(X509Val* cert_val, file_analysis::File* file = nullptr);
/**
* Converts an X509 certificate into a \c X509::Certificate record
* value. This is a static function that can be called from external,
* it doesn't depend on the state of any particular file analyzer.
*
* @param cert_val The certificate to converts.
*
* @param f A file associated with the certificate, if any
* (primarily for error reporting).
*
* @param Returns the new record value and passes ownership to
* caller.
*/
static RecordValPtr ParseCertificate(X509Val* cert_val, file_analysis::File* file = nullptr);
static file_analysis::Analyzer* Instantiate(RecordValPtr args, file_analysis::File* file)
{
return new X509(std::move(args), file);
}
static file_analysis::Analyzer* Instantiate(RecordValPtr args, file_analysis::File* file) {
return new X509(std::move(args), file);
}
/**
* Retrieves OpenSSL's representation of an X509 certificate store
* associated with a script-layer certificate root table variable/value.
* The underlying X509 store will be created if it has not been already,
* else the previously allocated one for the same table will be returned.
*
* @param root_certs The script-layer certificate root table value.
*
* @return OpenSSL's X509 store associated with the table value.
*/
static X509_STORE* GetRootStore(TableVal* root_certs);
/**
* Retrieves OpenSSL's representation of an X509 certificate store
* associated with a script-layer certificate root table variable/value.
* The underlying X509 store will be created if it has not been already,
* else the previously allocated one for the same table will be returned.
*
* @param root_certs The script-layer certificate root table value.
*
* @return OpenSSL's X509 store associated with the table value.
*/
static X509_STORE* GetRootStore(TableVal* root_certs);
/**
* Frees memory obtained from OpenSSL that is associated with the global
* X509 certificate store used by the Zeek scripting-layer. This primarily
* exists so leak checkers like LeakSanitizer don't count the
* globally-allocated mapping as a leak. Would be easy to suppress/ignore
* it, but that could accidentally silence cases where some new code
* mistakenly overwrites a table element without freeing it.
*/
static void FreeRootStore();
/**
* Frees memory obtained from OpenSSL that is associated with the global
* X509 certificate store used by the Zeek scripting-layer. This primarily
* exists so leak checkers like LeakSanitizer don't count the
* globally-allocated mapping as a leak. Would be easy to suppress/ignore
* it, but that could accidentally silence cases where some new code
* mistakenly overwrites a table element without freeing it.
*/
static void FreeRootStore();
/**
* Sets the table[string] that used as the certificate cache inside of Zeek.
*/
static void SetCertificateCache(TableValPtr cache) { certificate_cache = std::move(cache); }
/**
* Sets the table[string] that used as the certificate cache inside of Zeek.
*/
static void SetCertificateCache(TableValPtr cache) { certificate_cache = std::move(cache); }
/**
* Sets the callback when a certificate cache hit is encountered
*/
static void SetCertificateCacheHitCallback(FuncPtr func)
{
cache_hit_callback = std::move(func);
}
/**
* Sets the callback when a certificate cache hit is encountered
*/
static void SetCertificateCacheHitCallback(FuncPtr func) { cache_hit_callback = std::move(func); }
protected:
X509(RecordValPtr args, file_analysis::File* file);
X509(RecordValPtr args, file_analysis::File* file);
private:
void ParseBasicConstraints(X509_EXTENSION* ex);
void ParseSAN(X509_EXTENSION* ex);
void ParseExtensionsSpecific(X509_EXTENSION* ex, bool, ASN1_OBJECT*, const char*) override;
void ParseBasicConstraints(X509_EXTENSION* ex);
void ParseSAN(X509_EXTENSION* ex);
void ParseExtensionsSpecific(X509_EXTENSION* ex, bool, ASN1_OBJECT*, const char*) override;
std::string cert_data;
std::string cert_data;
// Helpers for ParseCertificate.
static StringValPtr KeyCurve(EVP_PKEY* key);
static unsigned int KeyLength(EVP_PKEY* key);
/** X509 stores associated with global script-layer values */
inline static std::map<Val*, X509_STORE*> x509_stores = std::map<Val*, X509_STORE*>();
inline static TableValPtr certificate_cache = nullptr;
inline static FuncPtr cache_hit_callback = nullptr;
};
// Helpers for ParseCertificate.
static StringValPtr KeyCurve(EVP_PKEY* key);
static unsigned int KeyLength(EVP_PKEY* key);
/** X509 stores associated with global script-layer values */
inline static std::map<Val*, X509_STORE*> x509_stores = std::map<Val*, X509_STORE*>();
inline static TableValPtr certificate_cache = nullptr;
inline static FuncPtr cache_hit_callback = nullptr;
};
/**
* This class wraps an OpenSSL X509 data structure.
@ -154,49 +145,48 @@ private:
* script-land. Otherwise, we cannot verify certificates from Zeek
* scriptland
*/
class X509Val : public OpaqueVal
{
class X509Val : public OpaqueVal {
public:
/**
* Construct an X509Val.
*
* @param certificate specifies the wrapped OpenSSL certificate
*
* @return A newly initialized X509Val.
*/
explicit X509Val(::X509* certificate);
/**
* Construct an X509Val.
*
* @param certificate specifies the wrapped OpenSSL certificate
*
* @return A newly initialized X509Val.
*/
explicit X509Val(::X509* certificate);
/**
* Clone an X509Val
*
* @param state certifies the state of the clone operation (duplicate tracking)
*
* @return A cloned X509Val.
*/
ValPtr DoClone(CloneState* state) override;
/**
* Clone an X509Val
*
* @param state certifies the state of the clone operation (duplicate tracking)
*
* @return A cloned X509Val.
*/
ValPtr DoClone(CloneState* state) override;
/**
* Destructor.
*/
~X509Val() override;
/**
* Destructor.
*/
~X509Val() override;
/**
* Get the wrapped X509 certificate. Please take care, that the
* internal OpenSSL reference counting stays the same.
*
* @return The wrapped OpenSSL X509 certificate.
*/
::X509* GetCertificate() const;
/**
* Get the wrapped X509 certificate. Please take care, that the
* internal OpenSSL reference counting stays the same.
*
* @return The wrapped OpenSSL X509 certificate.
*/
::X509* GetCertificate() const;
protected:
/**
* Construct an empty X509Val. Only used for deserialization
*/
X509Val();
/**
* Construct an empty X509Val. Only used for deserialization
*/
X509Val();
DECLARE_OPAQUE_VALUE(X509Val)
DECLARE_OPAQUE_VALUE(X509Val)
private:
::X509* certificate; // the wrapped certificate
};
::X509* certificate; // the wrapped certificate
};
} // namespace zeek::file_analysis::detail
} // namespace zeek::file_analysis::detail

View file

@ -14,334 +14,295 @@
#include "zeek/file_analysis/analyzer/x509/types.bif.h"
#include "zeek/file_analysis/analyzer/x509/x509-extension_pac.h"
namespace zeek::file_analysis::detail
{
namespace zeek::file_analysis::detail {
X509Common::X509Common(const zeek::Tag& arg_tag, RecordValPtr arg_args,
file_analysis::File* arg_file)
: file_analysis::Analyzer(arg_tag, std::move(arg_args), arg_file)
{
}
X509Common::X509Common(const zeek::Tag& arg_tag, RecordValPtr arg_args, file_analysis::File* arg_file)
: file_analysis::Analyzer(arg_tag, std::move(arg_args), arg_file) {}
static void EmitWeird(const char* name, file_analysis::File* file, const char* addl = "")
{
if ( file )
reporter->Weird(file, name, addl);
else
reporter->Weird(name);
}
static void EmitWeird(const char* name, file_analysis::File* file, const char* addl = "") {
if ( file )
reporter->Weird(file, name, addl);
else
reporter->Weird(name);
}
double X509Common::GetTimeFromAsn1(const ASN1_TIME* atime, file_analysis::File* f,
Reporter* reporter)
{
time_t lResult = 0;
double X509Common::GetTimeFromAsn1(const ASN1_TIME* atime, file_analysis::File* f, Reporter* reporter) {
time_t lResult = 0;
char lBuffer[26];
char* pBuffer = lBuffer;
char lBuffer[26];
char* pBuffer = lBuffer;
const char* pString = (const char*)atime->data;
unsigned int remaining = atime->length;
const char* pString = (const char*)atime->data;
unsigned int remaining = atime->length;
if ( atime->type == V_ASN1_UTCTIME )
{
if ( remaining < 11 || remaining > 17 )
{
EmitWeird("x509_utc_length", f);
return 0;
}
if ( atime->type == V_ASN1_UTCTIME ) {
if ( remaining < 11 || remaining > 17 ) {
EmitWeird("x509_utc_length", f);
return 0;
}
if ( pString[remaining - 1] != 'Z' )
{
// not valid according to RFC 2459 4.1.2.5.1
EmitWeird("x509_utc_format", f);
return 0;
}
if ( pString[remaining - 1] != 'Z' ) {
// not valid according to RFC 2459 4.1.2.5.1
EmitWeird("x509_utc_format", f);
return 0;
}
// year is first two digits in YY format. Buffer expects YYYY format.
if ( pString[0] < '5' ) // RFC 2459 4.1.2.5.1
{
*(pBuffer++) = '2';
*(pBuffer++) = '0';
}
else
{
*(pBuffer++) = '1';
*(pBuffer++) = '9';
}
// year is first two digits in YY format. Buffer expects YYYY format.
if ( pString[0] < '5' ) // RFC 2459 4.1.2.5.1
{
*(pBuffer++) = '2';
*(pBuffer++) = '0';
}
else {
*(pBuffer++) = '1';
*(pBuffer++) = '9';
}
memcpy(pBuffer, pString, 10);
pBuffer += 10;
pString += 10;
remaining -= 10;
}
else if ( atime->type == V_ASN1_GENERALIZEDTIME )
{
// generalized time. We apparently ignore the YYYYMMDDHH case
// for now and assume we always have minutes and seconds.
// This should be ok because it is specified as a requirement in RFC 2459 4.1.2.5.2
memcpy(pBuffer, pString, 10);
pBuffer += 10;
pString += 10;
remaining -= 10;
}
else if ( atime->type == V_ASN1_GENERALIZEDTIME ) {
// generalized time. We apparently ignore the YYYYMMDDHH case
// for now and assume we always have minutes and seconds.
// This should be ok because it is specified as a requirement in RFC 2459 4.1.2.5.2
if ( remaining < 12 || remaining > 23 )
{
EmitWeird("x509_gen_time_length", f);
return 0;
}
if ( remaining < 12 || remaining > 23 ) {
EmitWeird("x509_gen_time_length", f);
return 0;
}
memcpy(pBuffer, pString, 12);
pBuffer += 12;
pString += 12;
remaining -= 12;
}
else
{
EmitWeird("x509_invalid_time_type", f);
return 0;
}
memcpy(pBuffer, pString, 12);
pBuffer += 12;
pString += 12;
remaining -= 12;
}
else {
EmitWeird("x509_invalid_time_type", f);
return 0;
}
if ( (remaining == 0) || (*pString == 'Z') || (*pString == '-') || (*pString == '+') )
{
*(pBuffer++) = '0';
*(pBuffer++) = '0';
}
if ( (remaining == 0) || (*pString == 'Z') || (*pString == '-') || (*pString == '+') ) {
*(pBuffer++) = '0';
*(pBuffer++) = '0';
}
else if ( remaining >= 2 )
{
*(pBuffer++) = *(pString++);
*(pBuffer++) = *(pString++);
else if ( remaining >= 2 ) {
*(pBuffer++) = *(pString++);
*(pBuffer++) = *(pString++);
remaining -= 2;
remaining -= 2;
// Skip any fractional seconds...
if ( (remaining > 0) && (*pString == '.') )
{
pString++;
remaining--;
// Skip any fractional seconds...
if ( (remaining > 0) && (*pString == '.') ) {
pString++;
remaining--;
while ( (remaining > 0) && (*pString >= '0') && (*pString <= '9') )
{
pString++;
remaining--;
}
}
}
while ( (remaining > 0) && (*pString >= '0') && (*pString <= '9') ) {
pString++;
remaining--;
}
}
}
else
{
EmitWeird("x509_time_add_char", f);
return 0;
}
else {
EmitWeird("x509_time_add_char", f);
return 0;
}
*(pBuffer++) = 'Z';
*(pBuffer++) = '\0';
*(pBuffer++) = 'Z';
*(pBuffer++) = '\0';
time_t lSecondsFromUTC;
time_t lSecondsFromUTC;
if ( remaining == 0 || *pString == 'Z' )
lSecondsFromUTC = 0;
else
{
if ( remaining < 5 )
{
EmitWeird("x509_time_offset_underflow", f);
return 0;
}
if ( remaining == 0 || *pString == 'Z' )
lSecondsFromUTC = 0;
else {
if ( remaining < 5 ) {
EmitWeird("x509_time_offset_underflow", f);
return 0;
}
if ( (*pString != '+') && (*pString != '-') )
{
EmitWeird("x509_time_offset_type", f);
return 0;
}
if ( (*pString != '+') && (*pString != '-') ) {
EmitWeird("x509_time_offset_type", f);
return 0;
}
lSecondsFromUTC = ((pString[1] - '0') * 10 + (pString[2] - '0')) * 60;
lSecondsFromUTC += (pString[3] - '0') * 10 + (pString[4] - '0');
lSecondsFromUTC = ((pString[1] - '0') * 10 + (pString[2] - '0')) * 60;
lSecondsFromUTC += (pString[3] - '0') * 10 + (pString[4] - '0');
if ( *pString == '-' )
lSecondsFromUTC = -lSecondsFromUTC;
}
if ( *pString == '-' )
lSecondsFromUTC = -lSecondsFromUTC;
}
tm lTime;
lTime.tm_sec = ((lBuffer[12] - '0') * 10) + (lBuffer[13] - '0');
lTime.tm_min = ((lBuffer[10] - '0') * 10) + (lBuffer[11] - '0');
lTime.tm_hour = ((lBuffer[8] - '0') * 10) + (lBuffer[9] - '0');
lTime.tm_mday = ((lBuffer[6] - '0') * 10) + (lBuffer[7] - '0');
lTime.tm_mon = (((lBuffer[4] - '0') * 10) + (lBuffer[5] - '0')) - 1;
lTime.tm_year = (lBuffer[0] - '0') * 1000 + (lBuffer[1] - '0') * 100 +
((lBuffer[2] - '0') * 10) + (lBuffer[3] - '0');
tm lTime;
lTime.tm_sec = ((lBuffer[12] - '0') * 10) + (lBuffer[13] - '0');
lTime.tm_min = ((lBuffer[10] - '0') * 10) + (lBuffer[11] - '0');
lTime.tm_hour = ((lBuffer[8] - '0') * 10) + (lBuffer[9] - '0');
lTime.tm_mday = ((lBuffer[6] - '0') * 10) + (lBuffer[7] - '0');
lTime.tm_mon = (((lBuffer[4] - '0') * 10) + (lBuffer[5] - '0')) - 1;
lTime.tm_year =
(lBuffer[0] - '0') * 1000 + (lBuffer[1] - '0') * 100 + ((lBuffer[2] - '0') * 10) + (lBuffer[3] - '0');
if ( lTime.tm_year > 1900 )
lTime.tm_year -= 1900;
if ( lTime.tm_year > 1900 )
lTime.tm_year -= 1900;
lTime.tm_wday = 0;
lTime.tm_yday = 0;
lTime.tm_isdst = 0; // No DST adjustment requested
lTime.tm_wday = 0;
lTime.tm_yday = 0;
lTime.tm_isdst = 0; // No DST adjustment requested
lResult = mktime(&lTime);
lResult = mktime(&lTime);
if ( lResult )
{
if ( lTime.tm_isdst != 0 )
lResult -= 3600; // mktime may adjust for DST (OS dependent)
if ( lResult ) {
if ( lTime.tm_isdst != 0 )
lResult -= 3600; // mktime may adjust for DST (OS dependent)
lResult += lSecondsFromUTC;
}
lResult += lSecondsFromUTC;
}
else
lResult = 0;
else
lResult = 0;
return lResult;
}
return lResult;
}
void X509Common::ParseSignedCertificateTimestamps(X509_EXTENSION* ext)
{
// Ok, signed certificate timestamps are a bit of an odd case out; we don't
// want to use the (basically nonexistent) OpenSSL functionality to parse them.
// Instead we have our own, self-written binpac parser to parse just them,
// which we will initialize here and tear down immediately again.
void X509Common::ParseSignedCertificateTimestamps(X509_EXTENSION* ext) {
// Ok, signed certificate timestamps are a bit of an odd case out; we don't
// want to use the (basically nonexistent) OpenSSL functionality to parse them.
// Instead we have our own, self-written binpac parser to parse just them,
// which we will initialize here and tear down immediately again.
ASN1_OCTET_STRING* ext_val = X509_EXTENSION_get_data(ext);
// the octet string of the extension contains the octet string which in turn
// contains the SCT. Obviously.
ASN1_OCTET_STRING* ext_val = X509_EXTENSION_get_data(ext);
// the octet string of the extension contains the octet string which in turn
// contains the SCT. Obviously.
unsigned char* ext_val_copy = (unsigned char*)OPENSSL_malloc(ext_val->length);
unsigned char* ext_val_second_pointer = ext_val_copy;
memcpy(ext_val_copy, ext_val->data, ext_val->length);
unsigned char* ext_val_copy = (unsigned char*)OPENSSL_malloc(ext_val->length);
unsigned char* ext_val_second_pointer = ext_val_copy;
memcpy(ext_val_copy, ext_val->data, ext_val->length);
ASN1_OCTET_STRING* inner = d2i_ASN1_OCTET_STRING(NULL, (const unsigned char**)&ext_val_copy,
ext_val->length);
if ( ! inner )
{
OPENSSL_free(ext_val_second_pointer);
reporter->Error(
"X509::ParseSignedCertificateTimestamps could not parse inner octet string");
return;
}
ASN1_OCTET_STRING* inner = d2i_ASN1_OCTET_STRING(NULL, (const unsigned char**)&ext_val_copy, ext_val->length);
if ( ! inner ) {
OPENSSL_free(ext_val_second_pointer);
reporter->Error("X509::ParseSignedCertificateTimestamps could not parse inner octet string");
return;
}
binpac::X509Extension::MockConnection* conn = new binpac::X509Extension::MockConnection(this);
binpac::X509Extension::SignedCertTimestampExt* interp =
new binpac::X509Extension::SignedCertTimestampExt(conn);
binpac::X509Extension::MockConnection* conn = new binpac::X509Extension::MockConnection(this);
binpac::X509Extension::SignedCertTimestampExt* interp = new binpac::X509Extension::SignedCertTimestampExt(conn);
try
{
interp->NewData(inner->data, inner->data + inner->length);
}
catch ( const binpac::Exception& e )
{
// throw a warning or sth
reporter->Error("X509::ParseSignedCertificateTimestamps could not parse SCT");
}
try {
interp->NewData(inner->data, inner->data + inner->length);
} catch ( const binpac::Exception& e ) {
// throw a warning or sth
reporter->Error("X509::ParseSignedCertificateTimestamps could not parse SCT");
}
ASN1_OCTET_STRING_free(inner);
OPENSSL_free(ext_val_second_pointer);
ASN1_OCTET_STRING_free(inner);
OPENSSL_free(ext_val_second_pointer);
interp->FlowEOF();
interp->FlowEOF();
delete interp;
delete conn;
}
delete interp;
delete conn;
}
void X509Common::ParseExtension(X509_EXTENSION* ex, const EventHandlerPtr& h, bool global)
{
char name[256];
char oid[256];
void X509Common::ParseExtension(X509_EXTENSION* ex, const EventHandlerPtr& h, bool global) {
char name[256];
char oid[256];
ASN1_OBJECT* ext_asn = X509_EXTENSION_get_object(ex);
const char* short_name = OBJ_nid2sn(OBJ_obj2nid(ext_asn));
ASN1_OBJECT* ext_asn = X509_EXTENSION_get_object(ex);
const char* short_name = OBJ_nid2sn(OBJ_obj2nid(ext_asn));
OBJ_obj2txt(name, 255, ext_asn, 0);
OBJ_obj2txt(oid, 255, ext_asn, 1);
OBJ_obj2txt(name, 255, ext_asn, 0);
OBJ_obj2txt(oid, 255, ext_asn, 1);
int critical = 0;
if ( X509_EXTENSION_get_critical(ex) != 0 )
critical = 1;
int critical = 0;
if ( X509_EXTENSION_get_critical(ex) != 0 )
critical = 1;
BIO* bio = BIO_new(BIO_s_mem());
if ( ! X509V3_EXT_print(bio, ex, 0, 0) )
{
unsigned char* buf = nullptr;
int len = i2d_ASN1_OCTET_STRING(X509_EXTENSION_get_data(ex), &buf);
if ( len >= 0 )
{
BIO_write(bio, buf, len);
OPENSSL_free(buf);
}
}
BIO* bio = BIO_new(BIO_s_mem());
if ( ! X509V3_EXT_print(bio, ex, 0, 0) ) {
unsigned char* buf = nullptr;
int len = i2d_ASN1_OCTET_STRING(X509_EXTENSION_get_data(ex), &buf);
if ( len >= 0 ) {
BIO_write(bio, buf, len);
OPENSSL_free(buf);
}
}
auto ext_val = GetExtensionFromBIO(bio, GetFile());
auto ext_val = GetExtensionFromBIO(bio, GetFile());
if ( ! h )
{
// let individual analyzers parse more.
ParseExtensionsSpecific(ex, global, ext_asn, oid);
return;
}
if ( ! h ) {
// let individual analyzers parse more.
ParseExtensionsSpecific(ex, global, ext_asn, oid);
return;
}
if ( ! ext_val )
ext_val = make_intrusive<StringVal>(0, "");
if ( ! ext_val )
ext_val = make_intrusive<StringVal>(0, "");
auto pX509Ext = make_intrusive<RecordVal>(BifType::Record::X509::Extension);
pX509Ext->Assign(0, name);
auto pX509Ext = make_intrusive<RecordVal>(BifType::Record::X509::Extension);
pX509Ext->Assign(0, name);
if ( short_name && strlen(short_name) > 0 )
pX509Ext->Assign(1, short_name);
if ( short_name && strlen(short_name) > 0 )
pX509Ext->Assign(1, short_name);
pX509Ext->Assign(2, oid);
pX509Ext->Assign(3, critical);
pX509Ext->Assign(4, ext_val);
pX509Ext->Assign(2, oid);
pX509Ext->Assign(3, critical);
pX509Ext->Assign(4, ext_val);
// send off generic extension event
//
// and then look if we have a specialized event for the extension we just
// parsed. And if we have it, we send the specialized event on top of the
// generic event that we just had. I know, that is... kind of not nice,
// but I am not sure if there is a better way to do it...
// send off generic extension event
//
// and then look if we have a specialized event for the extension we just
// parsed. And if we have it, we send the specialized event on top of the
// generic event that we just had. I know, that is... kind of not nice,
// but I am not sure if there is a better way to do it...
if ( h == ocsp_extension )
event_mgr.Enqueue(h, GetFile()->ToVal(), std::move(pX509Ext), val_mgr->Bool(global));
else
event_mgr.Enqueue(h, GetFile()->ToVal(), std::move(pX509Ext));
if ( h == ocsp_extension )
event_mgr.Enqueue(h, GetFile()->ToVal(), std::move(pX509Ext), val_mgr->Bool(global));
else
event_mgr.Enqueue(h, GetFile()->ToVal(), std::move(pX509Ext));
// let individual analyzers parse more.
ParseExtensionsSpecific(ex, global, ext_asn, oid);
}
// let individual analyzers parse more.
ParseExtensionsSpecific(ex, global, ext_asn, oid);
}
StringValPtr X509Common::GetExtensionFromBIO(BIO* bio, file_analysis::File* f)
{
BIO_flush(bio);
ERR_clear_error();
int length = BIO_pending(bio);
StringValPtr X509Common::GetExtensionFromBIO(BIO* bio, file_analysis::File* f) {
BIO_flush(bio);
ERR_clear_error();
int length = BIO_pending(bio);
if ( ERR_peek_error() != 0 )
{
char tmp[120];
ERR_error_string_n(ERR_get_error(), tmp, sizeof(tmp));
EmitWeird("x509_get_ext_from_bio", f, tmp);
BIO_free_all(bio);
return nullptr;
}
if ( ERR_peek_error() != 0 ) {
char tmp[120];
ERR_error_string_n(ERR_get_error(), tmp, sizeof(tmp));
EmitWeird("x509_get_ext_from_bio", f, tmp);
BIO_free_all(bio);
return nullptr;
}
if ( length == 0 )
{
BIO_free_all(bio);
return val_mgr->EmptyString();
}
if ( length == 0 ) {
BIO_free_all(bio);
return val_mgr->EmptyString();
}
char* buffer = (char*)malloc(length);
char* buffer = (char*)malloc(length);
if ( ! buffer )
{
// Just emit an error here and try to continue instead of aborting
// because it's unclear the length value is very reliable.
reporter->Error("X509::GetExtensionFromBIO malloc(%d) failed", length);
BIO_free_all(bio);
return nullptr;
}
if ( ! buffer ) {
// Just emit an error here and try to continue instead of aborting
// because it's unclear the length value is very reliable.
reporter->Error("X509::GetExtensionFromBIO malloc(%d) failed", length);
BIO_free_all(bio);
return nullptr;
}
BIO_read(bio, (void*)buffer, length);
auto ext_val = make_intrusive<StringVal>(length, buffer);
BIO_read(bio, (void*)buffer, length);
auto ext_val = make_intrusive<StringVal>(length, buffer);
free(buffer);
BIO_free_all(bio);
free(buffer);
BIO_free_all(bio);
return ext_val;
}
return ext_val;
}
} // namespace zeek::file_analysis::detail
} // namespace zeek::file_analysis::detail

View file

@ -10,52 +10,48 @@
#include "zeek/file_analysis/Analyzer.h"
namespace zeek
{
namespace zeek {
class EventHandlerPtr;
class Reporter;
class StringVal;
template <class T> class IntrusivePtr;
template<class T>
class IntrusivePtr;
using StringValPtr = IntrusivePtr<StringVal>;
namespace file_analysis
{
namespace file_analysis {
class File;
namespace detail
{
namespace detail {
class X509Common : public file_analysis::Analyzer
{
class X509Common : public file_analysis::Analyzer {
public:
~X509Common() override{};
~X509Common() override{};
/**
* Retrieve an X509 extension value from an OpenSSL BIO to which it was
* written.
*
* @param bio the OpenSSL BIO to read. It will be freed by the function,
* including when an error occurs.
*
* @param f an associated file, if any (used for error reporting).
*
* @return The X509 extension value.
*/
static StringValPtr GetExtensionFromBIO(BIO* bio, file_analysis::File* f = nullptr);
/**
* Retrieve an X509 extension value from an OpenSSL BIO to which it was
* written.
*
* @param bio the OpenSSL BIO to read. It will be freed by the function,
* including when an error occurs.
*
* @param f an associated file, if any (used for error reporting).
*
* @return The X509 extension value.
*/
static StringValPtr GetExtensionFromBIO(BIO* bio, file_analysis::File* f = nullptr);
static double GetTimeFromAsn1(const ASN1_TIME* atime, file_analysis::File* f,
Reporter* reporter);
static double GetTimeFromAsn1(const ASN1_TIME* atime, file_analysis::File* f, Reporter* reporter);
protected:
X509Common(const zeek::Tag& arg_tag, RecordValPtr arg_args, file_analysis::File* arg_file);
X509Common(const zeek::Tag& arg_tag, RecordValPtr arg_args, file_analysis::File* arg_file);
void ParseExtension(X509_EXTENSION* ex, const EventHandlerPtr& h, bool global);
void ParseSignedCertificateTimestamps(X509_EXTENSION* ext);
virtual void ParseExtensionsSpecific(X509_EXTENSION* ex, bool, ASN1_OBJECT*, const char*) = 0;
};
void ParseExtension(X509_EXTENSION* ex, const EventHandlerPtr& h, bool global);
void ParseSignedCertificateTimestamps(X509_EXTENSION* ext);
virtual void ParseExtensionsSpecific(X509_EXTENSION* ex, bool, ASN1_OBJECT*, const char*) = 0;
};
} // namespace detail
} // namespace file_analysis
} // namespace zeek
} // namespace detail
} // namespace file_analysis
} // namespace zeek