Refactor file analysis file ID lookup.

Now using a dictionary instead of std::map as order doesn't matter and
lookup time shouldn't increase as more files are in process of being
analyzed.
This commit is contained in:
Jon Siwek 2014-04-18 16:35:43 -05:00
parent 80d7a1482c
commit bc5c02cb74
2 changed files with 30 additions and 28 deletions

View file

@ -54,8 +54,11 @@ void Manager::Terminate()
{ {
vector<string> keys; vector<string> keys;
for ( IDMap::iterator it = id_map.begin(); it != id_map.end(); ++it ) IterCookie* it = id_map.InitForIteration();
keys.push_back(it->first); HashKey* key;
while ( id_map.NextEntry(key, it) )
keys.push_back(static_cast<const char*>(key->Key()));
for ( size_t i = 0; i < keys.size(); ++i ) for ( size_t i = 0; i < keys.size(); ++i )
Timeout(keys[i], true); Timeout(keys[i], true);
@ -249,11 +252,12 @@ File* Manager::GetFile(const string& file_id, Connection* conn,
if ( IsIgnored(file_id) ) if ( IsIgnored(file_id) )
return 0; return 0;
File* rval = id_map[file_id]; File* rval = id_map.Lookup(file_id.c_str());
if ( ! rval ) if ( ! rval )
{ {
rval = id_map[file_id] = new File(file_id, conn, tag, is_orig); rval = new File(file_id, conn, tag, is_orig);
id_map.Insert(file_id.c_str(), rval);
rval->ScheduleInactivityTimer(); rval->ScheduleInactivityTimer();
if ( IsIgnored(file_id) ) if ( IsIgnored(file_id) )
@ -272,12 +276,7 @@ File* Manager::GetFile(const string& file_id, Connection* conn,
File* Manager::LookupFile(const string& file_id) const File* Manager::LookupFile(const string& file_id) const
{ {
IDMap::const_iterator it = id_map.find(file_id); return id_map.Lookup(file_id.c_str());
if ( it == id_map.end() )
return 0;
return it->second;
} }
void Manager::Timeout(const string& file_id, bool is_terminating) void Manager::Timeout(const string& file_id, bool is_terminating)
@ -308,37 +307,38 @@ void Manager::Timeout(const string& file_id, bool is_terminating)
bool Manager::IgnoreFile(const string& file_id) bool Manager::IgnoreFile(const string& file_id)
{ {
if ( id_map.find(file_id) == id_map.end() ) if ( ! id_map.Lookup(file_id.c_str()) )
return false; return false;
DBG_LOG(DBG_FILE_ANALYSIS, "Ignore FileID %s", file_id.c_str()); DBG_LOG(DBG_FILE_ANALYSIS, "Ignore FileID %s", file_id.c_str());
ignored.insert(file_id); delete ignored.Insert(file_id.c_str(), new bool);
return true; return true;
} }
bool Manager::RemoveFile(const string& file_id) bool Manager::RemoveFile(const string& file_id)
{ {
IDMap::iterator it = id_map.find(file_id); HashKey key(file_id.c_str());
// Can't remove from the dictionary/map right away as invoking EndOfFile
// may cause some events to be executed which actually depend on the file
// still being in the dictionary/map.
File* f = static_cast<File*>(id_map.Lookup(&key));
if ( it == id_map.end() ) if ( ! f )
return false; return false;
DBG_LOG(DBG_FILE_ANALYSIS, "Remove FileID %s", file_id.c_str()); DBG_LOG(DBG_FILE_ANALYSIS, "Remove FileID %s", file_id.c_str());
it->second->EndOfFile(); f->EndOfFile();
delete f;
delete it->second; id_map.Remove(&key);
id_map.erase(file_id); delete static_cast<bool*>(ignored.Remove(&key));
ignored.erase(file_id);
return true; return true;
} }
bool Manager::IsIgnored(const string& file_id) bool Manager::IsIgnored(const string& file_id)
{ {
return ignored.find(file_id) != ignored.end(); return ignored.Lookup(file_id.c_str()) != 0;
} }
string Manager::GetFileID(analyzer::Tag tag, Connection* c, bool is_orig) string Manager::GetFileID(analyzer::Tag tag, Connection* c, bool is_orig)

View file

@ -4,10 +4,9 @@
#define FILE_ANALYSIS_MANAGER_H #define FILE_ANALYSIS_MANAGER_H
#include <string> #include <string>
#include <map>
#include <set>
#include <queue> #include <queue>
#include "Dict.h"
#include "Net.h" #include "Net.h"
#include "Conn.h" #include "Conn.h"
#include "Val.h" #include "Val.h"
@ -27,6 +26,9 @@
namespace file_analysis { namespace file_analysis {
declare(PDict,bool);
declare(PDict,File);
/** /**
* Main entry point for interacting with file analysis. * Main entry point for interacting with file analysis.
*/ */
@ -288,8 +290,8 @@ public:
protected: protected:
friend class FileTimer; friend class FileTimer;
typedef set<string> IDSet; typedef PDict(bool) IDSet;
typedef map<string, File*> IDMap; typedef PDict(File) IDMap;
/** /**
* Create a new file to be analyzed or retrieve an existing one. * Create a new file to be analyzed or retrieve an existing one.
@ -361,8 +363,8 @@ protected:
private: private:
IDMap id_map; /**< Map file ID to file_analysis::File records. */ PDict(File) id_map; /**< Map file ID to file_analysis::File records. */
IDSet ignored; /**< Ignored files. Will be finally removed on EOF. */ PDict(bool) ignored; /**< Ignored files. Will be finally removed on EOF. */
string current_file_id; /**< Hash of what get_file_handle event sets. */ string current_file_id; /**< Hash of what get_file_handle event sets. */
RuleFileMagicState* magic_state; /**< File magic signature match state. */ RuleFileMagicState* magic_state; /**< File magic signature match state. */