Merge remote-tracking branch 'origin/topic/jsiwek/faf-perf'

* origin/topic/jsiwek/faf-perf:
  Adapt HTTP partial content to cache file analysis IDs.
  Adapt SSL analyzer to generate file analysis handles itself.
  Adapt more of HTTP analyzer to use cached file analysis IDs.
  Adapt IRC/FTP analyzers to cache file analysis IDs.
  Refactor regex/signature AcceptingSet data structure and usages.
  Enforce data size limit when checking files for MIME matches.
  Refactor file analysis file ID lookup.
This commit is contained in:
Robin Sommer 2014-04-24 16:12:30 -07:00
commit de20b4f0fb
21 changed files with 246 additions and 250 deletions

20
CHANGES
View file

@ -1,4 +1,24 @@
2.2-353 | 2014-04-24 16:12:30 -0700
* Adapt HTTP partial content to cache file analysis IDs. (Jon Siwek)
* Adapt SSL analyzer to generate file analysis handles itself. (Jon
Siwek)
* Adapt more of HTTP analyzer to use cached file analysis IDs. (Jon
Siwek)
* Adapt IRC/FTP analyzers to cache file analysis IDs. (Jon Siwek)
* Refactor regex/signature AcceptingSet data structure and usages.
(Jon Siwek)
* Enforce data size limit when checking files for MIME matches. (Jon
Siwek)
* Refactor file analysis file ID lookup. (Jon Siwek)
2.2-344 | 2014-04-22 20:13:30 -0700
* Refactor various hex escaping code. (Jon Siwek)

View file

@ -1 +1 @@
2.2-344
2.2-353

@ -1 +1 @@
Subproject commit d99150801b7844e082b5421d1efe4050702d350e
Subproject commit 5266f45a6839ea9b6f1825ba0fd448a721cb42be

View file

@ -52,22 +52,8 @@ export {
function get_file_handle(c: connection, is_orig: bool): string
{
set_session(c);
local depth: count;
if ( is_orig )
{
depth = c$ssl$client_depth;
++c$ssl$client_depth;
}
else
{
depth = c$ssl$server_depth;
++c$ssl$server_depth;
}
return cat(Analyzer::ANALYZER_SSL, c$start_time, is_orig, id_string(c$id), depth);
# Unused. File handles are generated in the analyzer.
return "";
}
function describe_file(f: fa_file): string

View file

@ -811,6 +811,17 @@ void Connection::Describe(ODesc* d) const
d->NL();
}
void Connection::IDString(ODesc* d) const
{
d->Add(orig_addr);
d->AddRaw(":", 1);
d->Add(ntohs(orig_port));
d->AddRaw(" > ", 3);
d->Add(resp_addr);
d->AddRaw(":", 1);
d->Add(ntohs(resp_port));
}
bool Connection::Serialize(SerialInfo* info) const
{
return SerialObj::Serialize(info);

View file

@ -204,6 +204,7 @@ public:
bool IsPersistent() { return persistent; }
void Describe(ODesc* d) const;
void IDString(ODesc* d) const;
TimerMgr* GetTimerMgr() const;

View file

@ -211,9 +211,10 @@ void DFA_State::Dump(FILE* f, DFA_Machine* m)
if ( accept )
{
for ( int i = 0; i < accept->length(); ++i )
fprintf(f, "%s accept #%d",
i > 0 ? "," : "", int((*accept)[i]));
AcceptingSet::const_iterator it;
for ( it = accept->begin(); it != accept->end(); ++it )
fprintf(f, "%s accept #%d", it == accept->begin() ? "" : ",", *it);
}
fprintf(f, "\n");
@ -285,7 +286,7 @@ unsigned int DFA_State::Size()
{
return sizeof(*this)
+ pad_size(sizeof(DFA_State*) * num_sym)
+ (accept ? pad_size(sizeof(int) * accept->length()) : 0)
+ (accept ? pad_size(sizeof(int) * accept->size()) : 0)
+ (nfa_states ? pad_size(sizeof(NFA_State*) * nfa_states->length()) : 0)
+ (meta_ec ? meta_ec->Size() : 0)
+ (centry ? padded_sizeof(CacheEntry) : 0);
@ -470,33 +471,20 @@ int DFA_Machine::StateSetToDFA_State(NFA_state_list* state_set,
return 0;
AcceptingSet* accept = new AcceptingSet;
for ( int i = 0; i < state_set->length(); ++i )
{
int acc = (*state_set)[i]->Accept();
if ( acc != NO_ACCEPT )
{
int j;
for ( j = 0; j < accept->length(); ++j )
if ( (*accept)[j] == acc )
break;
if ( j >= accept->length() )
// It's not already present.
accept->append(acc);
}
accept->insert(acc);
}
if ( accept->length() == 0 )
if ( accept->empty() )
{
delete accept;
accept = 0;
}
else
{
accept->sort(int_list_cmp);
accept->resize(0);
}
DFA_State* ds = new DFA_State(state_count++, ec, state_set, accept);
d = dfa_state_cache->Insert(ds, hash);

View file

@ -3,6 +3,7 @@
#include "config.h"
#include <stdlib.h>
#include <utility>
#include "RE.h"
#include "DFA.h"
@ -266,6 +267,15 @@ void Specific_RE_Matcher::Dump(FILE* f)
dfa->Dump(f);
}
inline void RE_Match_State::AddMatches(const AcceptingSet& as,
MatchPos position)
{
typedef std::pair<AcceptIdx, MatchPos> am_idx;
for ( AcceptingSet::const_iterator it = as.begin(); it != as.end(); ++it )
accepted_matches.insert(am_idx(*it, position));
}
bool RE_Match_State::Match(const u_char* bv, int n,
bool bol, bool eol, bool clear)
{
@ -283,14 +293,9 @@ bool RE_Match_State::Match(const u_char* bv, int n,
current_state = dfa->StartState();
const AcceptingSet* ac = current_state->Accept();
if ( ac )
{
loop_over_list(*ac, i)
{
accepted.append((*ac)[i]);
match_pos.append(0);
}
}
AddMatches(*ac, 0);
}
else if ( clear )
@ -301,7 +306,7 @@ bool RE_Match_State::Match(const u_char* bv, int n,
current_pos = 0;
int old_matches = accepted.length();
size_t old_matches = accepted_matches.size();
int ec;
int m = bol ? n + 1 : n;
@ -324,25 +329,17 @@ bool RE_Match_State::Match(const u_char* bv, int n,
break;
}
if ( next_state->Accept() )
{
const AcceptingSet* ac = next_state->Accept();
loop_over_list(*ac, i)
{
if ( ! accepted.is_member((*ac)[i]) )
{
accepted.append((*ac)[i]);
match_pos.append(current_pos);
}
}
}
if ( ac )
AddMatches(*ac, current_pos);
++current_pos;
current_state = next_state;
}
return accepted.length() != old_matches;
return accepted_matches.size() != old_matches;
}
int Specific_RE_Matcher::LongestMatch(const u_char* bv, int n)
@ -399,7 +396,8 @@ unsigned int Specific_RE_Matcher::MemoryAllocation() const
+ equiv_class.Size() - padded_sizeof(EquivClass)
+ (dfa ? dfa->MemoryAllocation() : 0) // this is ref counted; consider the bytes here?
+ padded_sizeof(*any_ccl)
+ accepted->MemoryAllocation();
+ padded_sizeof(*accepted)
+ accepted->size() * padded_sizeof(AcceptingSet::key_type);
}
RE_Matcher::RE_Matcher()

View file

@ -9,6 +9,9 @@
#include "CCL.h"
#include "EquivClass.h"
#include <set>
#include <map>
#include <ctype.h>
typedef int (*cce_func)(int);
@ -33,7 +36,10 @@ extern int re_lex(void);
extern int clower(int);
extern void synerr(const char str[]);
typedef int_list AcceptingSet;
typedef int AcceptIdx;
typedef std::set<AcceptIdx> AcceptingSet;
typedef uint64 MatchPos;
typedef std::map<AcceptIdx, MatchPos> AcceptingMatchSet;
typedef name_list string_list;
typedef enum { MATCH_ANYWHERE, MATCH_EXACTLY, } match_type;
@ -135,8 +141,8 @@ public:
current_state = 0;
}
const AcceptingSet* Accepted() const { return &accepted; }
const int_list* MatchPositions() const { return &match_pos; }
const AcceptingMatchSet& AcceptedMatches() const
{ return accepted_matches; }
// Returns the number of bytes feeded into the matcher so far
int Length() { return current_pos; }
@ -149,16 +155,16 @@ public:
{
current_pos = -1;
current_state = 0;
accepted.clear();
match_pos.clear();
accepted_matches.clear();
}
void AddMatches(const AcceptingSet& as, MatchPos position);
protected:
DFA_Machine* dfa;
int* ecs;
AcceptingSet accepted;
int_list match_pos;
AcceptingMatchSet accepted_matches;
DFA_State* current_state;
int current_pos;
};

View file

@ -594,6 +594,29 @@ RuleFileMagicState* RuleMatcher::InitFileMagic() const
return state;
}
bool RuleMatcher::AllRulePatternsMatched(const Rule* r, MatchPos matchpos,
const AcceptingMatchSet& ams)
{
DBG_LOG(DBG_RULES, "Checking rule: %s", r->id);
// Check whether all patterns of the rule have matched.
loop_over_list(r->patterns, j)
{
if ( ams.find(r->patterns[j]->id) == ams.end() )
return false;
// See if depth is satisfied.
if ( matchpos > r->patterns[j]->offset + r->patterns[j]->depth )
return false;
// FIXME: How to check for offset ??? ###
}
DBG_LOG(DBG_RULES, "All patterns of rule satisfied");
return true;
}
RuleMatcher::MIME_Matches* RuleMatcher::Match(RuleFileMagicState* state,
const u_char* data, uint64 len,
MIME_Matches* rval) const
@ -636,56 +659,39 @@ RuleMatcher::MIME_Matches* RuleMatcher::Match(RuleFileMagicState* state,
DBG_LOG(DBG_RULES, "New pattern match found");
AcceptingSet accepted;
int_list matchpos;
AcceptingMatchSet accepted_matches;
loop_over_list(state->matchers, y)
{
RuleFileMagicState::Matcher* m = state->matchers[y];
const AcceptingSet* ac = m->state->Accepted();
loop_over_list(*ac, k)
{
if ( ! accepted.is_member((*ac)[k]) )
{
accepted.append((*ac)[k]);
matchpos.append((*m->state->MatchPositions())[k]);
}
}
const AcceptingMatchSet& ams = m->state->AcceptedMatches();
accepted_matches.insert(ams.begin(), ams.end());
}
// Find rules for which patterns have matched.
rule_list matched;
set<Rule*> rule_matches;
loop_over_list(accepted, i)
for ( AcceptingMatchSet::const_iterator it = accepted_matches.begin();
it != accepted_matches.end(); ++it )
{
Rule* r = Rule::rule_table[accepted[i] - 1];
AcceptIdx aidx = it->first;
MatchPos mpos = it->second;
DBG_LOG(DBG_RULES, "Checking rule: %v", r->id);
Rule* r = Rule::rule_table[aidx - 1];
loop_over_list(r->patterns, j)
{
if ( ! accepted.is_member(r->patterns[j]->id) )
continue;
if ( (unsigned int) matchpos[i] >
r->patterns[j]->offset + r->patterns[j]->depth )
continue;
DBG_LOG(DBG_RULES, "All patterns of rule satisfied");
if ( AllRulePatternsMatched(r, mpos, accepted_matches) )
rule_matches.insert(r);
}
if ( ! matched.is_member(r) )
matched.append(r);
}
loop_over_list(matched, j)
for ( set<Rule*>::const_iterator it = rule_matches.begin();
it != rule_matches.end(); ++it )
{
Rule* r = matched[j];
Rule* r = *it;
loop_over_list(r->actions, rai)
{
const RuleActionMIME* ram = dynamic_cast<const RuleActionMIME*>(r->actions[rai]);
const RuleActionMIME* ram =
dynamic_cast<const RuleActionMIME*>(r->actions[rai]);
if ( ! ram )
continue;
@ -876,66 +882,40 @@ void RuleMatcher::Match(RuleEndpointState* state, Rule::PatternType type,
DBG_LOG(DBG_RULES, "New pattern match found");
// Build a joined AcceptingSet.
AcceptingSet accepted;
int_list matchpos;
AcceptingMatchSet accepted_matches;
loop_over_list(state->matchers, y )
{
RuleEndpointState::Matcher* m = state->matchers[y];
const AcceptingSet* ac = m->state->Accepted();
loop_over_list(*ac, k)
{
if ( ! accepted.is_member((*ac)[k]) )
{
accepted.append((*ac)[k]);
matchpos.append((*m->state->MatchPositions())[k]);
}
}
const AcceptingMatchSet& ams = m->state->AcceptedMatches();
accepted_matches.insert(ams.begin(), ams.end());
}
// Determine the rules for which all patterns have matched.
// This code should be fast enough as long as there are only very few
// matched patterns per connection (which is a plausible assumption).
rule_list matched;
// Find rules for which patterns have matched.
set<Rule*> rule_matches;
loop_over_list(accepted, i)
for ( AcceptingMatchSet::const_iterator it = accepted_matches.begin();
it != accepted_matches.end(); ++it )
{
Rule* r = Rule::rule_table[accepted[i] - 1];
AcceptIdx aidx = it->first;
MatchPos mpos = it->second;
DBG_LOG(DBG_RULES, "Checking rule: %s", r->id);
Rule* r = Rule::rule_table[aidx - 1];
// Check whether all patterns of the rule have matched.
loop_over_list(r->patterns, j)
{
if ( ! accepted.is_member(r->patterns[j]->id) )
goto next_pattern;
// See if depth is satisfied.
if ( (unsigned int) matchpos[i] >
r->patterns[j]->offset + r->patterns[j]->depth )
goto next_pattern;
DBG_LOG(DBG_RULES, "All patterns of rule satisfied");
// FIXME: How to check for offset ??? ###
}
// If not already in the list of matching rules, add it.
if ( ! matched.is_member(r) )
matched.append(r);
next_pattern:
continue;
if ( AllRulePatternsMatched(r, mpos, accepted_matches) )
rule_matches.insert(r);
}
// Check which of the matching rules really belong to any of our nodes.
loop_over_list(matched, j)
for ( set<Rule*>::const_iterator it = rule_matches.begin();
it != rule_matches.end(); ++it )
{
Rule* r = matched[j];
Rule* r = *it;
DBG_LOG(DBG_RULES, "Accepted rule: %s", r->id);

View file

@ -361,6 +361,9 @@ private:
void DumpStateStats(BroFile* f, RuleHdrTest* hdr_test);
static bool AllRulePatternsMatched(const Rule* r, MatchPos matchpos,
const AcceptingMatchSet& ams);
int RE_level;
bool parse_error;
RuleHdrTest* root;

View file

@ -31,12 +31,25 @@ void File_Analyzer::DeliverStream(int len, const u_char* data, bool orig)
if ( buffer_len == BUFFER_SIZE )
Identify();
}
return;
if ( orig )
file_id_orig = file_mgr->DataIn(data, len, GetAnalyzerTag(), Conn(),
orig, file_id_orig);
else
file_id_resp = file_mgr->DataIn(data, len, GetAnalyzerTag(), Conn(),
orig, file_id_resp);
}
void File_Analyzer::Undelivered(int seq, int len, bool orig)
{
TCP_ApplicationAnalyzer::Undelivered(seq, len, orig);
if ( orig )
file_id_orig = file_mgr->Gap(seq, len, GetAnalyzerTag(), Conn(), orig,
file_id_orig);
else
file_id_resp = file_mgr->Gap(seq, len, GetAnalyzerTag(), Conn(), orig,
file_id_resp);
}
void File_Analyzer::Done()
@ -45,6 +58,16 @@ void File_Analyzer::Done()
if ( buffer_len && buffer_len != BUFFER_SIZE )
Identify();
if ( ! file_id_orig.empty() )
file_mgr->EndOfFile(file_id_orig);
else
file_mgr->EndOfFile(GetAnalyzerTag(), Conn(), true);
if ( ! file_id_resp.empty() )
file_mgr->EndOfFile(file_id_resp);
else
file_mgr->EndOfFile(GetAnalyzerTag(), Conn(), false);
}
void File_Analyzer::Identify()
@ -61,49 +84,3 @@ void File_Analyzer::Identify()
vl->append(new StringVal(match));
ConnectionEvent(file_transferred, vl);
}
IRC_Data::IRC_Data(Connection* conn)
: File_Analyzer("IRC_Data", conn)
{
}
void IRC_Data::Done()
{
File_Analyzer::Done();
file_mgr->EndOfFile(GetAnalyzerTag(), Conn());
}
void IRC_Data::DeliverStream(int len, const u_char* data, bool orig)
{
File_Analyzer::DeliverStream(len, data, orig);
file_mgr->DataIn(data, len, GetAnalyzerTag(), Conn(), orig);
}
void IRC_Data::Undelivered(int seq, int len, bool orig)
{
File_Analyzer::Undelivered(seq, len, orig);
file_mgr->Gap(seq, len, GetAnalyzerTag(), Conn(), orig);
}
FTP_Data::FTP_Data(Connection* conn)
: File_Analyzer("FTP_Data", conn)
{
}
void FTP_Data::Done()
{
File_Analyzer::Done();
file_mgr->EndOfFile(GetAnalyzerTag(), Conn());
}
void FTP_Data::DeliverStream(int len, const u_char* data, bool orig)
{
File_Analyzer::DeliverStream(len, data, orig);
file_mgr->DataIn(data, len, GetAnalyzerTag(), Conn(), orig);
}
void FTP_Data::Undelivered(int seq, int len, bool orig)
{
File_Analyzer::Undelivered(seq, len, orig);
file_mgr->Gap(seq, len, GetAnalyzerTag(), Conn(), orig);
}

View file

@ -28,17 +28,15 @@ protected:
static const int BUFFER_SIZE = 1024;
char buffer[BUFFER_SIZE];
int buffer_len;
string file_id_orig;
string file_id_resp;
};
class IRC_Data : public File_Analyzer {
public:
IRC_Data(Connection* conn);
virtual void Done();
virtual void DeliverStream(int len, const u_char* data, bool orig);
virtual void Undelivered(int seq, int len, bool orig);
IRC_Data(Connection* conn)
: File_Analyzer("IRC_Data", conn)
{ }
static Analyzer* InstantiateAnalyzer(Connection* conn)
{ return new IRC_Data(conn); }
@ -46,13 +44,9 @@ public:
class FTP_Data : public File_Analyzer {
public:
FTP_Data(Connection* conn);
virtual void Done();
virtual void DeliverStream(int len, const u_char* data, bool orig);
virtual void Undelivered(int seq, int len, bool orig);
FTP_Data(Connection* conn)
: File_Analyzer("FTP_Data", conn)
{ }
static Analyzer* InstantiateAnalyzer(Connection* conn)
{ return new FTP_Data(conn); }

View file

@ -243,10 +243,10 @@ int HTTP_Entity::Undelivered(int64_t len)
return 0;
if ( is_partial_content )
file_mgr->Gap(body_length, len,
precomputed_file_id = file_mgr->Gap(body_length, len,
http_message->MyHTTP_Analyzer()->GetAnalyzerTag(),
http_message->MyHTTP_Analyzer()->Conn(),
http_message->IsOrig());
http_message->IsOrig(), precomputed_file_id);
else
precomputed_file_id = file_mgr->Gap(body_length, len,
http_message->MyHTTP_Analyzer()->GetAnalyzerTag(),
@ -306,15 +306,15 @@ void HTTP_Entity::SubmitData(int len, const char* buf)
if ( is_partial_content )
{
if ( send_size && instance_length > 0 )
file_mgr->SetSize(instance_length,
precomputed_file_id = file_mgr->SetSize(instance_length,
http_message->MyHTTP_Analyzer()->GetAnalyzerTag(),
http_message->MyHTTP_Analyzer()->Conn(),
http_message->IsOrig());
http_message->IsOrig(), precomputed_file_id);
file_mgr->DataIn(reinterpret_cast<const u_char*>(buf), len, offset,
precomputed_file_id = file_mgr->DataIn(reinterpret_cast<const u_char*>(buf), len, offset,
http_message->MyHTTP_Analyzer()->GetAnalyzerTag(),
http_message->MyHTTP_Analyzer()->Conn(),
http_message->IsOrig());
http_message->IsOrig(), precomputed_file_id);
offset += len;
}
@ -583,9 +583,16 @@ void HTTP_Message::Done(const int interrupted, const char* detail)
top_level->EndOfData();
if ( is_orig || MyHTTP_Analyzer()->HTTP_ReplyCode() != 206 )
// multipart/byteranges may span multiple connections
{
// multipart/byteranges may span multiple connections, so don't EOF.
HTTP_Entity* he = dynamic_cast<HTTP_Entity*>(top_level);
if ( he && ! he->FileID().empty() )
file_mgr->EndOfFile(he->FileID());
else
file_mgr->EndOfFile(MyHTTP_Analyzer()->GetAnalyzerTag(),
MyHTTP_Analyzer()->Conn(), is_orig);
}
if ( http_message_done )
{
@ -663,9 +670,16 @@ void HTTP_Message::EndEntity(mime::MIME_Entity* entity)
Done();
else if ( is_orig || MyHTTP_Analyzer()->HTTP_ReplyCode() != 206 )
{
HTTP_Entity* he = dynamic_cast<HTTP_Entity*>(entity);
if ( he && ! he->FileID().empty() )
file_mgr->EndOfFile(he->FileID());
else
file_mgr->EndOfFile(MyHTTP_Analyzer()->GetAnalyzerTag(),
MyHTTP_Analyzer()->Conn(), is_orig);
}
}
void HTTP_Message::SubmitHeader(mime::MIME_Header* h)
{

View file

@ -46,6 +46,7 @@ public:
int64_t BodyLength() const { return body_length; }
int64_t HeaderLength() const { return header_length; }
void SkipBody() { deliver_body = 0; }
const string& FileID() const { return precomputed_file_id; }
protected:
class UncompressedOutput;

View file

@ -231,15 +231,26 @@ refine connection SSL_Conn += {
if ( certificates->size() == 0 )
return true;
ODesc common;
common.AddRaw("Analyzer::ANALYZER_SSL");
common.Add(bro_analyzer()->Conn()->StartTime());
common.AddRaw(${rec.is_orig} ? "T" : "F", 1);
bro_analyzer()->Conn()->IDString(&common);
for ( unsigned int i = 0; i < certificates->size(); ++i )
{
const bytestring& cert = (*certificates)[i];
string fid = file_mgr->DataIn(reinterpret_cast<const u_char*>(cert.data()), cert.length(),
bro_analyzer()->GetAnalyzerTag(), bro_analyzer()->Conn(),
${rec.is_orig});
ODesc file_handle;
file_handle.Add(common.Description());
file_handle.Add(i);
file_mgr->EndOfFile(fid);
string file_id = file_mgr->HashHandle(file_handle.Description());
file_mgr->DataIn(reinterpret_cast<const u_char*>(cert.data()),
cert.length(), bro_analyzer()->GetAnalyzerTag(),
bro_analyzer()->Conn(), ${rec.is_orig}, file_id);
file_mgr->EndOfFile(file_id);
}
return true;
%}

View file

@ -283,6 +283,7 @@ bool File::BufferBOF(const u_char* data, uint64 len)
bool File::DetectMIME(const u_char* data, uint64 len)
{
RuleMatcher::MIME_Matches matches;
len = min(len, LookupFieldDefaultCount(bof_buffer_size_idx));
file_mgr->DetectMIME(data, len, &matches);
if ( matches.empty() )

View file

@ -54,8 +54,11 @@ void Manager::Terminate()
{
vector<string> keys;
for ( IDMap::iterator it = id_map.begin(); it != id_map.end(); ++it )
keys.push_back(it->first);
IterCookie* it = id_map.InitForIteration();
HashKey* key;
while ( id_map.NextEntry(key, it) )
keys.push_back(static_cast<const char*>(key->Key()));
for ( size_t i = 0; i < keys.size(); ++i )
Timeout(keys[i], true);
@ -249,11 +252,12 @@ File* Manager::GetFile(const string& file_id, Connection* conn,
if ( IsIgnored(file_id) )
return 0;
File* rval = id_map[file_id];
File* rval = id_map.Lookup(file_id.c_str());
if ( ! rval )
{
rval = id_map[file_id] = new File(file_id, conn, tag, is_orig);
rval = new File(file_id, conn, tag, is_orig);
id_map.Insert(file_id.c_str(), rval);
rval->ScheduleInactivityTimer();
if ( IsIgnored(file_id) )
@ -272,12 +276,7 @@ File* Manager::GetFile(const string& file_id, Connection* conn,
File* Manager::LookupFile(const string& file_id) const
{
IDMap::const_iterator it = id_map.find(file_id);
if ( it == id_map.end() )
return 0;
return it->second;
return id_map.Lookup(file_id.c_str());
}
void Manager::Timeout(const string& file_id, bool is_terminating)
@ -308,37 +307,38 @@ void Manager::Timeout(const string& file_id, bool is_terminating)
bool Manager::IgnoreFile(const string& file_id)
{
if ( id_map.find(file_id) == id_map.end() )
if ( ! id_map.Lookup(file_id.c_str()) )
return false;
DBG_LOG(DBG_FILE_ANALYSIS, "Ignore FileID %s", file_id.c_str());
ignored.insert(file_id);
delete ignored.Insert(file_id.c_str(), new bool);
return true;
}
bool Manager::RemoveFile(const string& file_id)
{
IDMap::iterator it = id_map.find(file_id);
HashKey key(file_id.c_str());
// Can't remove from the dictionary/map right away as invoking EndOfFile
// may cause some events to be executed which actually depend on the file
// still being in the dictionary/map.
File* f = static_cast<File*>(id_map.Lookup(&key));
if ( it == id_map.end() )
if ( ! f )
return false;
DBG_LOG(DBG_FILE_ANALYSIS, "Remove FileID %s", file_id.c_str());
it->second->EndOfFile();
delete it->second;
id_map.erase(file_id);
ignored.erase(file_id);
f->EndOfFile();
delete f;
id_map.Remove(&key);
delete static_cast<bool*>(ignored.Remove(&key));
return true;
}
bool Manager::IsIgnored(const string& file_id)
{
return ignored.find(file_id) != ignored.end();
return ignored.Lookup(file_id.c_str()) != 0;
}
string Manager::GetFileID(analyzer::Tag tag, Connection* c, bool is_orig)

View file

@ -4,10 +4,9 @@
#define FILE_ANALYSIS_MANAGER_H
#include <string>
#include <map>
#include <set>
#include <queue>
#include "Dict.h"
#include "Net.h"
#include "Conn.h"
#include "Val.h"
@ -27,6 +26,9 @@
namespace file_analysis {
declare(PDict,bool);
declare(PDict,File);
/**
* Main entry point for interacting with file analysis.
*/
@ -288,8 +290,8 @@ public:
protected:
friend class FileTimer;
typedef set<string> IDSet;
typedef map<string, File*> IDMap;
typedef PDict(bool) IDSet;
typedef PDict(File) IDMap;
/**
* Create a new file to be analyzed or retrieve an existing one.
@ -361,8 +363,8 @@ protected:
private:
IDMap id_map; /**< Map file ID to file_analysis::File records. */
IDSet ignored; /**< Ignored files. Will be finally removed on EOF. */
PDict(File) id_map; /**< Map file ID to file_analysis::File records. */
PDict(bool) ignored; /**< Ignored files. Will be finally removed on EOF. */
string current_file_id; /**< Hash of what get_file_handle event sets. */
RuleFileMagicState* magic_state; /**< File magic signature match state. */

View file

@ -16,15 +16,15 @@
#empty_field (empty)
#unset_field -
#path mime_metrics
#open 2014-03-06-17-30-44
#open 2014-04-21-21-34-08
#fields ts ts_delta mtype uniq_hosts hits bytes
#types time interval string count count count
1389719059.311698 300.000000 text/html 1 4 53070
1389719059.311698 300.000000 text/html 1 3 47335
1389719059.311698 300.000000 image/jpeg 1 1 186859
1389719059.311698 300.000000 application/pgp-signature 1 1 836
1389719059.311698 300.000000 text/plain 1 12 113982
1389719059.311698 300.000000 text/plain 1 13 119717
1389719059.311698 300.000000 image/gif 1 1 172
1389719059.311698 300.000000 image/png 1 9 82176
1389719059.311698 300.000000 image/x-icon 1 2 2300
#close 2014-03-06-17-30-44
#close 2014-04-21-21-34-08

View file

@ -27,4 +27,7 @@ status:
coverage:
@for repo in $(REPOS); do (cd $$repo && echo "Coverage for '$$repo' repo:" && make coverage); done
update-timing:
@for repo in $(REPOS); do (cd $$repo && echo "Coverage for '$$repo' repo:" && make update-timing); done
.PHONY: all brief init pull push status coverage