Rework file_analysis::AnalyzerSet to use unordered_map instead of Dictionary

This commit is contained in:
Tim Wojtulewicz 2023-11-07 11:28:39 -07:00
parent 8c6a650f23
commit f7365c9daa
3 changed files with 75 additions and 70 deletions

View file

@ -9,15 +9,7 @@
#include "zeek/file_analysis/Manager.h" #include "zeek/file_analysis/Manager.h"
namespace zeek::file_analysis::detail { namespace zeek::file_analysis::detail {
AnalyzerSet::AnalyzerSet(File* arg_file) : file(arg_file) {}
static void analyzer_del_func(void* v) {
file_analysis::Analyzer* a = (file_analysis::Analyzer*)v;
a->Done();
delete a;
}
AnalyzerSet::AnalyzerSet(File* arg_file) : file(arg_file) { analyzer_map.SetDeleteFunc(analyzer_del_func); }
AnalyzerSet::~AnalyzerSet() { AnalyzerSet::~AnalyzerSet() {
while ( ! mod_queue.empty() ) { while ( ! mod_queue.empty() ) {
@ -26,48 +18,64 @@ AnalyzerSet::~AnalyzerSet() {
delete mod; delete mod;
mod_queue.pop(); mod_queue.pop();
} }
for ( const auto& a : analyzer_map )
delete a.second;
analyzer_map.clear();
} }
Analyzer* AnalyzerSet::Find(const zeek::Tag& tag, RecordValPtr args) { Analyzer* AnalyzerSet::Find(const zeek::Tag& tag, RecordValPtr args) {
auto key = GetKey(tag, std::move(args)); auto lv = make_intrusive<ListVal>(TYPE_ANY);
Analyzer* rval = analyzer_map.Lookup(key.get()); lv->Append(tag.AsVal());
return rval; lv->Append(std::move(args));
auto it = analyzer_map.find(lv);
if ( it != analyzer_map.end() )
return it->second;
return nullptr;
} }
bool AnalyzerSet::Add(const zeek::Tag& tag, RecordValPtr args) { bool AnalyzerSet::Add(const zeek::Tag& tag, RecordValPtr args) {
auto key = GetKey(tag, args); auto lv = make_intrusive<ListVal>(TYPE_ANY);
lv->Append(tag.AsVal());
lv->Append(args);
if ( analyzer_map.Lookup(key.get()) ) { if ( analyzer_map.count(lv) != 0 ) {
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Instantiate analyzer %s skipped: already exists", file->GetID().c_str(), DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Instantiate analyzer %s skipped: already exists", file->GetID().c_str(),
file_mgr->GetComponentName(tag).c_str()); file_mgr->GetComponentName(tag).c_str());
return true; return true;
} }
file_analysis::Analyzer* a = InstantiateAnalyzer(tag, std::move(args)); file_analysis::Analyzer* a = InstantiateAnalyzer(tag, args);
if ( ! a ) if ( ! a )
return false; return false;
Insert(a, std::move(key)); Insert(a, tag, std::move(args));
return true; return true;
} }
Analyzer* AnalyzerSet::QueueAdd(const zeek::Tag& tag, RecordValPtr args) { Analyzer* AnalyzerSet::QueueAdd(const zeek::Tag& tag, RecordValPtr args) {
auto key = GetKey(tag, args); file_analysis::Analyzer* a = InstantiateAnalyzer(tag, args);
file_analysis::Analyzer* a = InstantiateAnalyzer(tag, std::move(args));
if ( ! a ) if ( ! a )
return nullptr; return nullptr;
mod_queue.push(new AddMod(a, std::move(key))); mod_queue.push(new AddMod(a, tag, std::move(args)));
return a; return a;
} }
bool AnalyzerSet::AddMod::Perform(AnalyzerSet* set) { bool AnalyzerSet::AddMod::Perform(AnalyzerSet* set) {
if ( set->analyzer_map.Lookup(key.get()) ) { auto lv = make_intrusive<ListVal>(TYPE_ANY);
lv->Append(tag.AsVal());
lv->Append(args);
if ( set->analyzer_map.find(lv) != set->analyzer_map.end() ) {
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Add analyzer %s skipped: already exists", a->GetFile()->GetID().c_str(), DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Add analyzer %s skipped: already exists", a->GetFile()->GetID().c_str(),
file_mgr->GetComponentName(a->Tag()).c_str()); file_mgr->GetComponentName(a->Tag()).c_str());
@ -75,19 +83,21 @@ bool AnalyzerSet::AddMod::Perform(AnalyzerSet* set) {
return true; return true;
} }
set->Insert(a, std::move(key)); set->Insert(a, tag, args);
return true; return true;
} }
void AnalyzerSet::AddMod::Abort() { delete a; } void AnalyzerSet::AddMod::Abort() { delete a; }
bool AnalyzerSet::Remove(const zeek::Tag& tag, RecordValPtr args) { return Remove(tag, GetKey(tag, std::move(args))); } bool AnalyzerSet::Remove(const zeek::Tag& tag, RecordValPtr args) {
auto lv = make_intrusive<ListVal>(TYPE_ANY);
lv->Append(tag.AsVal());
lv->Append(std::move(args));
bool AnalyzerSet::Remove(const zeek::Tag& tag, std::unique_ptr<zeek::detail::HashKey> key) { auto a = analyzer_map.find(lv);
auto a = (file_analysis::Analyzer*)analyzer_map.Remove(key.get());
if ( ! a ) { if ( a == analyzer_map.end() ) {
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Skip remove analyzer %s", file->GetID().c_str(), DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Skip remove analyzer %s", file->GetID().c_str(),
file_mgr->GetComponentName(tag).c_str()); file_mgr->GetComponentName(tag).c_str());
return false; return false;
@ -96,37 +106,31 @@ bool AnalyzerSet::Remove(const zeek::Tag& tag, std::unique_ptr<zeek::detail::Has
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Remove analyzer %s", file->GetID().c_str(), DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Remove analyzer %s", file->GetID().c_str(),
file_mgr->GetComponentName(tag).c_str()); file_mgr->GetComponentName(tag).c_str());
a->Done(); a->second->Done();
// We don't delete the analyzer object right here because the remove // We don't delete the analyzer object right here because the remove
// operation may execute at a time when it can still be accessed. // operation may execute at a time when it can still be accessed.
// Instead we let the file know to delete the analyzer later. // Instead we let the file know to delete the analyzer later.
file->DoneWithAnalyzer(a); file->DoneWithAnalyzer(a->second);
analyzer_map.erase(a);
return true; return true;
} }
bool AnalyzerSet::QueueRemove(const zeek::Tag& tag, RecordValPtr args) { bool AnalyzerSet::QueueRemove(const zeek::Tag& tag, RecordValPtr args) {
auto key = GetKey(tag, std::move(args));
auto rval = analyzer_map.Lookup(key.get());
mod_queue.push(new RemoveMod(tag, std::move(key)));
return rval;
}
bool AnalyzerSet::RemoveMod::Perform(AnalyzerSet* set) { return set->Remove(tag, std::move(key)); }
std::unique_ptr<zeek::detail::HashKey> AnalyzerSet::GetKey(const zeek::Tag& t, RecordValPtr args) const {
auto lv = make_intrusive<ListVal>(TYPE_ANY); auto lv = make_intrusive<ListVal>(TYPE_ANY);
lv->Append(t.AsVal()); lv->Append(tag.AsVal());
lv->Append(std::move(args)); lv->Append(args);
auto key = file_mgr->GetAnalyzerHash()->MakeHashKey(*lv, true);
if ( ! key ) auto it = analyzer_map.find(lv);
reporter->InternalError("AnalyzerArgs type mismatch");
return key; mod_queue.push(new RemoveMod(tag, std::move(args)));
return it != analyzer_map.end();
} }
bool AnalyzerSet::RemoveMod::Perform(AnalyzerSet* set) { return set->Remove(tag, args); }
file_analysis::Analyzer* AnalyzerSet::InstantiateAnalyzer(const Tag& tag, RecordValPtr args) const { file_analysis::Analyzer* AnalyzerSet::InstantiateAnalyzer(const Tag& tag, RecordValPtr args) const {
auto a = file_mgr->InstantiateAnalyzer(tag, std::move(args), file); auto a = file_mgr->InstantiateAnalyzer(tag, std::move(args), file);
@ -144,10 +148,15 @@ file_analysis::Analyzer* AnalyzerSet::InstantiateAnalyzer(const Tag& tag, Record
return a; return a;
} }
void AnalyzerSet::Insert(file_analysis::Analyzer* a, std::unique_ptr<zeek::detail::HashKey> key) { void AnalyzerSet::Insert(file_analysis::Analyzer* a, const Tag& tag, RecordValPtr args) {
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Add analyzer %s", file->GetID().c_str(), DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Add analyzer %s", file->GetID().c_str(),
file_mgr->GetComponentName(a->Tag()).c_str()); file_mgr->GetComponentName(a->Tag()).c_str());
analyzer_map.Insert(key.get(), a);
auto lv = make_intrusive<ListVal>(TYPE_ANY);
lv->Append(tag.AsVal());
lv->Append(std::move(args));
analyzer_map.insert({lv, a});
a->Init(); a->Init();
} }

View file

@ -7,6 +7,7 @@
#include "zeek/Dict.h" #include "zeek/Dict.h"
#include "zeek/Tag.h" #include "zeek/Tag.h"
#include "zeek/Val.h"
namespace zeek { namespace zeek {
@ -87,9 +88,9 @@ public:
void DrainModifications(); void DrainModifications();
// Iterator support // Iterator support
using iterator = zeek::DictIterator<file_analysis::Analyzer>; using MapType = std::unordered_map<IntrusivePtr<ListVal>, file_analysis::Analyzer*, ListValHasher, ListValEqualTo>;
; using iterator = MapType::iterator;
using const_iterator = const iterator; using const_iterator = MapType::const_iterator;
using reverse_iterator = std::reverse_iterator<iterator>; using reverse_iterator = std::reverse_iterator<iterator>;
using const_reverse_iterator = std::reverse_iterator<const_iterator>; using const_reverse_iterator = std::reverse_iterator<const_iterator>;
@ -100,15 +101,9 @@ public:
const_iterator cbegin() { return analyzer_map.cbegin(); } const_iterator cbegin() { return analyzer_map.cbegin(); }
const_iterator cend() { return analyzer_map.cend(); } const_iterator cend() { return analyzer_map.cend(); }
protected: size_t Size() const { return analyzer_map.size(); }
/**
* Get a hash key which represents an analyzer instance.
* @param tag the file analyzer tag.
* @param args an \c AnalyzerArgs value which specifies an analyzer.
* @return the hash key calculated from \a args
*/
std::unique_ptr<zeek::detail::HashKey> GetKey(const zeek::Tag& tag, RecordValPtr args) const;
protected:
/** /**
* Create an instance of a file analyzer. * Create an instance of a file analyzer.
* @param tag the tag of a file analyzer. * @param tag the tag of a file analyzer.
@ -122,7 +117,7 @@ protected:
* @param a an analyzer instance. * @param a an analyzer instance.
* @param key the hash key which represents the analyzer's \c AnalyzerArgs. * @param key the hash key which represents the analyzer's \c AnalyzerArgs.
*/ */
void Insert(file_analysis::Analyzer* a, std::unique_ptr<zeek::detail::HashKey> key); void Insert(file_analysis::Analyzer* a, const zeek::Tag& tag, RecordValPtr args);
/** /**
* Remove an analyzer instance from the set. * Remove an analyzer instance from the set.
@ -133,8 +128,8 @@ protected:
bool Remove(const zeek::Tag& tag, std::unique_ptr<zeek::detail::HashKey> key); bool Remove(const zeek::Tag& tag, std::unique_ptr<zeek::detail::HashKey> key);
private: private:
File* file; /**< File which owns the set */ File* file; /**< File which owns the set */
PDict<file_analysis::Analyzer> analyzer_map; /**< Indexed by AnalyzerArgs. */ MapType analyzer_map;
/** /**
* Abstract base class for analyzer set modifications. * Abstract base class for analyzer set modifications.
@ -166,15 +161,16 @@ private:
* @param arg_a an analyzer instance to add to an analyzer set. * @param arg_a an analyzer instance to add to an analyzer set.
* @param arg_key hash key representing the analyzer's \c AnalyzerArgs. * @param arg_key hash key representing the analyzer's \c AnalyzerArgs.
*/ */
AddMod(file_analysis::Analyzer* arg_a, std::unique_ptr<zeek::detail::HashKey> arg_key) AddMod(file_analysis::Analyzer* arg_a, zeek::Tag arg_tag, RecordValPtr arg_args)
: Modification(), a(arg_a), key(std::move(arg_key)) {} : Modification(), tag(std::move(arg_tag)), args(std::move(arg_args)) {}
~AddMod() override {} ~AddMod() override = default;
bool Perform(AnalyzerSet* set) override; bool Perform(AnalyzerSet* set) override;
void Abort() override; void Abort() override;
protected: protected:
file_analysis::Analyzer* a; file_analysis::Analyzer* a;
std::unique_ptr<zeek::detail::HashKey> key; zeek::Tag tag;
RecordValPtr args;
}; };
/** /**
@ -187,15 +183,15 @@ private:
* @param arg_a an analyzer instance to add to an analyzer set. * @param arg_a an analyzer instance to add to an analyzer set.
* @param arg_key hash key representing the analyzer's \c AnalyzerArgs. * @param arg_key hash key representing the analyzer's \c AnalyzerArgs.
*/ */
RemoveMod(zeek::Tag arg_tag, std::unique_ptr<zeek::detail::HashKey> arg_key) RemoveMod(zeek::Tag arg_tag, RecordValPtr arg_args)
: Modification(), tag(std::move(arg_tag)), key(std::move(arg_key)) {} : Modification(), tag(std::move(arg_tag)), args(std::move(arg_args)) {}
~RemoveMod() override {} ~RemoveMod() override = default;
bool Perform(AnalyzerSet* set) override; bool Perform(AnalyzerSet* set) override;
void Abort() override {} void Abort() override {}
protected: protected:
zeek::Tag tag; zeek::Tag tag;
std::unique_ptr<zeek::detail::HashKey> key; RecordValPtr args;
}; };
using ModQueue = std::queue<Modification*>; using ModQueue = std::queue<Modification*>;

View file

@ -321,7 +321,7 @@ void File::DeliverStream(const u_char* data, uint64_t len) {
util::fmt_bytes((const char*)data, std::min((uint64_t)40, len)), len > 40 ? "..." : ""); util::fmt_bytes((const char*)data, std::min((uint64_t)40, len)), len > 40 ? "..." : "");
for ( const auto& entry : analyzers ) { for ( const auto& entry : analyzers ) {
auto* a = entry.value; auto* a = entry.second;
DBG_LOG(DBG_FILE_ANALYSIS, "stream delivery to analyzer %s", file_mgr->GetComponentName(a->Tag()).c_str()); DBG_LOG(DBG_FILE_ANALYSIS, "stream delivery to analyzer %s", file_mgr->GetComponentName(a->Tag()).c_str());
if ( ! a->GotStreamDelivery() ) { if ( ! a->GotStreamDelivery() ) {
@ -409,7 +409,7 @@ void File::DeliverChunk(const u_char* data, uint64_t len, uint64_t offset) {
util::fmt_bytes((const char*)data, std::min((uint64_t)40, len)), len > 40 ? "..." : ""); util::fmt_bytes((const char*)data, std::min((uint64_t)40, len)), len > 40 ? "..." : "");
for ( const auto& entry : analyzers ) { for ( const auto& entry : analyzers ) {
auto* a = entry.value; auto* a = entry.second;
DBG_LOG(DBG_FILE_ANALYSIS, "chunk delivery to analyzer %s", file_mgr->GetComponentName(a->Tag()).c_str()); DBG_LOG(DBG_FILE_ANALYSIS, "chunk delivery to analyzer %s", file_mgr->GetComponentName(a->Tag()).c_str());
if ( ! a->Skipping() ) { if ( ! a->Skipping() ) {
@ -461,7 +461,7 @@ void File::EndOfFile() {
done = true; done = true;
for ( const auto& entry : analyzers ) { for ( const auto& entry : analyzers ) {
auto* a = entry.value; auto* a = entry.second;
if ( ! a->EndOfFile() ) if ( ! a->EndOfFile() )
analyzers.QueueRemove(a->Tag(), a->GetArgs()); analyzers.QueueRemove(a->Tag(), a->GetArgs());
@ -491,7 +491,7 @@ void File::Gap(uint64_t offset, uint64_t len) {
} }
for ( const auto& entry : analyzers ) { for ( const auto& entry : analyzers ) {
auto* a = entry.value; auto* a = entry.second;
if ( ! a->Undelivered(offset, len) ) if ( ! a->Undelivered(offset, len) )
analyzers.QueueRemove(a->Tag(), a->GetArgs()); analyzers.QueueRemove(a->Tag(), a->GetArgs());