diff --git a/CHANGES b/CHANGES index 23c3af8997..fc4b7a8ea8 100644 --- a/CHANGES +++ b/CHANGES @@ -1,4 +1,24 @@ +2.2-353 | 2014-04-24 16:12:30 -0700 + + * Adapt HTTP partial content to cache file analysis IDs. (Jon Siwek) + + * Adapt SSL analyzer to generate file analysis handles itself. (Jon + Siwek) + + * Adapt more of HTTP analyzer to use cached file analysis IDs. (Jon + Siwek) + + * Adapt IRC/FTP analyzers to cache file analysis IDs. (Jon Siwek) + + * Refactor regex/signature AcceptingSet data structure and usages. + (Jon Siwek) + + * Enforce data size limit when checking files for MIME matches. (Jon + Siwek) + + * Refactor file analysis file ID lookup. (Jon Siwek) + 2.2-344 | 2014-04-22 20:13:30 -0700 * Refactor various hex escaping code. (Jon Siwek) diff --git a/VERSION b/VERSION index b5dc07e467..4d60030d10 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.2-344 +2.2-353 diff --git a/aux/broctl b/aux/broctl index d99150801b..5266f45a68 160000 --- a/aux/broctl +++ b/aux/broctl @@ -1 +1 @@ -Subproject commit d99150801b7844e082b5421d1efe4050702d350e +Subproject commit 5266f45a6839ea9b6f1825ba0fd448a721cb42be diff --git a/scripts/base/protocols/ssl/files.bro b/scripts/base/protocols/ssl/files.bro index 6b33c0f87b..fbdd6e454e 100644 --- a/scripts/base/protocols/ssl/files.bro +++ b/scripts/base/protocols/ssl/files.bro @@ -52,22 +52,8 @@ export { function get_file_handle(c: connection, is_orig: bool): string { - set_session(c); - - local depth: count; - - if ( is_orig ) - { - depth = c$ssl$client_depth; - ++c$ssl$client_depth; - } - else - { - depth = c$ssl$server_depth; - ++c$ssl$server_depth; - } - - return cat(Analyzer::ANALYZER_SSL, c$start_time, is_orig, id_string(c$id), depth); + # Unused. File handles are generated in the analyzer. + return ""; } function describe_file(f: fa_file): string diff --git a/src/Conn.cc b/src/Conn.cc index fa89f26d35..bc62902421 100644 --- a/src/Conn.cc +++ b/src/Conn.cc @@ -811,6 +811,17 @@ void Connection::Describe(ODesc* d) const d->NL(); } +void Connection::IDString(ODesc* d) const + { + d->Add(orig_addr); + d->AddRaw(":", 1); + d->Add(ntohs(orig_port)); + d->AddRaw(" > ", 3); + d->Add(resp_addr); + d->AddRaw(":", 1); + d->Add(ntohs(resp_port)); + } + bool Connection::Serialize(SerialInfo* info) const { return SerialObj::Serialize(info); diff --git a/src/Conn.h b/src/Conn.h index d982d3879d..966c77a9f8 100644 --- a/src/Conn.h +++ b/src/Conn.h @@ -204,6 +204,7 @@ public: bool IsPersistent() { return persistent; } void Describe(ODesc* d) const; + void IDString(ODesc* d) const; TimerMgr* GetTimerMgr() const; diff --git a/src/DFA.cc b/src/DFA.cc index ad9521709e..dbfed71ba3 100644 --- a/src/DFA.cc +++ b/src/DFA.cc @@ -211,9 +211,10 @@ void DFA_State::Dump(FILE* f, DFA_Machine* m) if ( accept ) { - for ( int i = 0; i < accept->length(); ++i ) - fprintf(f, "%s accept #%d", - i > 0 ? "," : "", int((*accept)[i])); + AcceptingSet::const_iterator it; + + for ( it = accept->begin(); it != accept->end(); ++it ) + fprintf(f, "%s accept #%d", it == accept->begin() ? "" : ",", *it); } fprintf(f, "\n"); @@ -285,7 +286,7 @@ unsigned int DFA_State::Size() { return sizeof(*this) + pad_size(sizeof(DFA_State*) * num_sym) - + (accept ? pad_size(sizeof(int) * accept->length()) : 0) + + (accept ? pad_size(sizeof(int) * accept->size()) : 0) + (nfa_states ? pad_size(sizeof(NFA_State*) * nfa_states->length()) : 0) + (meta_ec ? meta_ec->Size() : 0) + (centry ? padded_sizeof(CacheEntry) : 0); @@ -470,33 +471,20 @@ int DFA_Machine::StateSetToDFA_State(NFA_state_list* state_set, return 0; AcceptingSet* accept = new AcceptingSet; + for ( int i = 0; i < state_set->length(); ++i ) { int acc = (*state_set)[i]->Accept(); if ( acc != NO_ACCEPT ) - { - int j; - for ( j = 0; j < accept->length(); ++j ) - if ( (*accept)[j] == acc ) - break; - - if ( j >= accept->length() ) - // It's not already present. - accept->append(acc); - } + accept->insert(acc); } - if ( accept->length() == 0 ) + if ( accept->empty() ) { delete accept; accept = 0; } - else - { - accept->sort(int_list_cmp); - accept->resize(0); - } DFA_State* ds = new DFA_State(state_count++, ec, state_set, accept); d = dfa_state_cache->Insert(ds, hash); diff --git a/src/RE.cc b/src/RE.cc index 87117c1c3a..4855b0e39a 100644 --- a/src/RE.cc +++ b/src/RE.cc @@ -3,6 +3,7 @@ #include "config.h" #include +#include #include "RE.h" #include "DFA.h" @@ -266,6 +267,15 @@ void Specific_RE_Matcher::Dump(FILE* f) dfa->Dump(f); } +inline void RE_Match_State::AddMatches(const AcceptingSet& as, + MatchPos position) + { + typedef std::pair am_idx; + + for ( AcceptingSet::const_iterator it = as.begin(); it != as.end(); ++it ) + accepted_matches.insert(am_idx(*it, position)); + } + bool RE_Match_State::Match(const u_char* bv, int n, bool bol, bool eol, bool clear) { @@ -283,14 +293,9 @@ bool RE_Match_State::Match(const u_char* bv, int n, current_state = dfa->StartState(); const AcceptingSet* ac = current_state->Accept(); + if ( ac ) - { - loop_over_list(*ac, i) - { - accepted.append((*ac)[i]); - match_pos.append(0); - } - } + AddMatches(*ac, 0); } else if ( clear ) @@ -301,7 +306,7 @@ bool RE_Match_State::Match(const u_char* bv, int n, current_pos = 0; - int old_matches = accepted.length(); + size_t old_matches = accepted_matches.size(); int ec; int m = bol ? n + 1 : n; @@ -324,25 +329,17 @@ bool RE_Match_State::Match(const u_char* bv, int n, break; } - if ( next_state->Accept() ) - { - const AcceptingSet* ac = next_state->Accept(); - loop_over_list(*ac, i) - { - if ( ! accepted.is_member((*ac)[i]) ) - { - accepted.append((*ac)[i]); - match_pos.append(current_pos); - } - } - } + const AcceptingSet* ac = next_state->Accept(); + + if ( ac ) + AddMatches(*ac, current_pos); ++current_pos; current_state = next_state; } - return accepted.length() != old_matches; + return accepted_matches.size() != old_matches; } int Specific_RE_Matcher::LongestMatch(const u_char* bv, int n) @@ -399,7 +396,8 @@ unsigned int Specific_RE_Matcher::MemoryAllocation() const + equiv_class.Size() - padded_sizeof(EquivClass) + (dfa ? dfa->MemoryAllocation() : 0) // this is ref counted; consider the bytes here? + padded_sizeof(*any_ccl) - + accepted->MemoryAllocation(); + + padded_sizeof(*accepted) + + accepted->size() * padded_sizeof(AcceptingSet::key_type); } RE_Matcher::RE_Matcher() diff --git a/src/RE.h b/src/RE.h index a2fc709c88..7437dbb8b8 100644 --- a/src/RE.h +++ b/src/RE.h @@ -9,6 +9,9 @@ #include "CCL.h" #include "EquivClass.h" +#include +#include + #include typedef int (*cce_func)(int); @@ -33,7 +36,10 @@ extern int re_lex(void); extern int clower(int); extern void synerr(const char str[]); -typedef int_list AcceptingSet; +typedef int AcceptIdx; +typedef std::set AcceptingSet; +typedef uint64 MatchPos; +typedef std::map AcceptingMatchSet; typedef name_list string_list; typedef enum { MATCH_ANYWHERE, MATCH_EXACTLY, } match_type; @@ -135,8 +141,8 @@ public: current_state = 0; } - const AcceptingSet* Accepted() const { return &accepted; } - const int_list* MatchPositions() const { return &match_pos; } + const AcceptingMatchSet& AcceptedMatches() const + { return accepted_matches; } // Returns the number of bytes feeded into the matcher so far int Length() { return current_pos; } @@ -149,16 +155,16 @@ public: { current_pos = -1; current_state = 0; - accepted.clear(); - match_pos.clear(); + accepted_matches.clear(); } + void AddMatches(const AcceptingSet& as, MatchPos position); + protected: DFA_Machine* dfa; int* ecs; - AcceptingSet accepted; - int_list match_pos; + AcceptingMatchSet accepted_matches; DFA_State* current_state; int current_pos; }; diff --git a/src/RuleMatcher.cc b/src/RuleMatcher.cc index 5e9dff0a1f..5cea843c8d 100644 --- a/src/RuleMatcher.cc +++ b/src/RuleMatcher.cc @@ -594,6 +594,29 @@ RuleFileMagicState* RuleMatcher::InitFileMagic() const return state; } +bool RuleMatcher::AllRulePatternsMatched(const Rule* r, MatchPos matchpos, + const AcceptingMatchSet& ams) + { + DBG_LOG(DBG_RULES, "Checking rule: %s", r->id); + + // Check whether all patterns of the rule have matched. + loop_over_list(r->patterns, j) + { + if ( ams.find(r->patterns[j]->id) == ams.end() ) + return false; + + // See if depth is satisfied. + if ( matchpos > r->patterns[j]->offset + r->patterns[j]->depth ) + return false; + + // FIXME: How to check for offset ??? ### + } + + DBG_LOG(DBG_RULES, "All patterns of rule satisfied"); + + return true; + } + RuleMatcher::MIME_Matches* RuleMatcher::Match(RuleFileMagicState* state, const u_char* data, uint64 len, MIME_Matches* rval) const @@ -636,56 +659,39 @@ RuleMatcher::MIME_Matches* RuleMatcher::Match(RuleFileMagicState* state, DBG_LOG(DBG_RULES, "New pattern match found"); - AcceptingSet accepted; - int_list matchpos; + AcceptingMatchSet accepted_matches; loop_over_list(state->matchers, y) { RuleFileMagicState::Matcher* m = state->matchers[y]; - const AcceptingSet* ac = m->state->Accepted(); - - loop_over_list(*ac, k) - { - if ( ! accepted.is_member((*ac)[k]) ) - { - accepted.append((*ac)[k]); - matchpos.append((*m->state->MatchPositions())[k]); - } - } + const AcceptingMatchSet& ams = m->state->AcceptedMatches(); + accepted_matches.insert(ams.begin(), ams.end()); } // Find rules for which patterns have matched. - rule_list matched; + set rule_matches; - loop_over_list(accepted, i) + for ( AcceptingMatchSet::const_iterator it = accepted_matches.begin(); + it != accepted_matches.end(); ++it ) { - Rule* r = Rule::rule_table[accepted[i] - 1]; + AcceptIdx aidx = it->first; + MatchPos mpos = it->second; - DBG_LOG(DBG_RULES, "Checking rule: %v", r->id); + Rule* r = Rule::rule_table[aidx - 1]; - loop_over_list(r->patterns, j) - { - if ( ! accepted.is_member(r->patterns[j]->id) ) - continue; - - if ( (unsigned int) matchpos[i] > - r->patterns[j]->offset + r->patterns[j]->depth ) - continue; - - DBG_LOG(DBG_RULES, "All patterns of rule satisfied"); - } - - if ( ! matched.is_member(r) ) - matched.append(r); + if ( AllRulePatternsMatched(r, mpos, accepted_matches) ) + rule_matches.insert(r); } - loop_over_list(matched, j) + for ( set::const_iterator it = rule_matches.begin(); + it != rule_matches.end(); ++it ) { - Rule* r = matched[j]; + Rule* r = *it; loop_over_list(r->actions, rai) { - const RuleActionMIME* ram = dynamic_cast(r->actions[rai]); + const RuleActionMIME* ram = + dynamic_cast(r->actions[rai]); if ( ! ram ) continue; @@ -876,66 +882,40 @@ void RuleMatcher::Match(RuleEndpointState* state, Rule::PatternType type, DBG_LOG(DBG_RULES, "New pattern match found"); - // Build a joined AcceptingSet. - AcceptingSet accepted; - int_list matchpos; + AcceptingMatchSet accepted_matches; - loop_over_list(state->matchers, y) + loop_over_list(state->matchers, y ) { RuleEndpointState::Matcher* m = state->matchers[y]; - const AcceptingSet* ac = m->state->Accepted(); - - loop_over_list(*ac, k) - { - if ( ! accepted.is_member((*ac)[k]) ) - { - accepted.append((*ac)[k]); - matchpos.append((*m->state->MatchPositions())[k]); - } - } + const AcceptingMatchSet& ams = m->state->AcceptedMatches(); + accepted_matches.insert(ams.begin(), ams.end()); } // Determine the rules for which all patterns have matched. // This code should be fast enough as long as there are only very few // matched patterns per connection (which is a plausible assumption). - rule_list matched; + // Find rules for which patterns have matched. + set rule_matches; - loop_over_list(accepted, i) + for ( AcceptingMatchSet::const_iterator it = accepted_matches.begin(); + it != accepted_matches.end(); ++it ) { - Rule* r = Rule::rule_table[accepted[i] - 1]; + AcceptIdx aidx = it->first; + MatchPos mpos = it->second; - DBG_LOG(DBG_RULES, "Checking rule: %s", r->id); + Rule* r = Rule::rule_table[aidx - 1]; - // Check whether all patterns of the rule have matched. - loop_over_list(r->patterns, j) - { - if ( ! accepted.is_member(r->patterns[j]->id) ) - goto next_pattern; - - // See if depth is satisfied. - if ( (unsigned int) matchpos[i] > - r->patterns[j]->offset + r->patterns[j]->depth ) - goto next_pattern; - - DBG_LOG(DBG_RULES, "All patterns of rule satisfied"); - - // FIXME: How to check for offset ??? ### - } - - // If not already in the list of matching rules, add it. - if ( ! matched.is_member(r) ) - matched.append(r); - -next_pattern: - continue; + if ( AllRulePatternsMatched(r, mpos, accepted_matches) ) + rule_matches.insert(r); } // Check which of the matching rules really belong to any of our nodes. - loop_over_list(matched, j) + for ( set::const_iterator it = rule_matches.begin(); + it != rule_matches.end(); ++it ) { - Rule* r = matched[j]; + Rule* r = *it; DBG_LOG(DBG_RULES, "Accepted rule: %s", r->id); diff --git a/src/RuleMatcher.h b/src/RuleMatcher.h index 52e00f6bad..da2838cb6d 100644 --- a/src/RuleMatcher.h +++ b/src/RuleMatcher.h @@ -361,6 +361,9 @@ private: void DumpStateStats(BroFile* f, RuleHdrTest* hdr_test); + static bool AllRulePatternsMatched(const Rule* r, MatchPos matchpos, + const AcceptingMatchSet& ams); + int RE_level; bool parse_error; RuleHdrTest* root; diff --git a/src/analyzer/protocol/file/File.cc b/src/analyzer/protocol/file/File.cc index 4476043721..4ea8dffaa8 100644 --- a/src/analyzer/protocol/file/File.cc +++ b/src/analyzer/protocol/file/File.cc @@ -31,12 +31,25 @@ void File_Analyzer::DeliverStream(int len, const u_char* data, bool orig) if ( buffer_len == BUFFER_SIZE ) Identify(); } - return; + + if ( orig ) + file_id_orig = file_mgr->DataIn(data, len, GetAnalyzerTag(), Conn(), + orig, file_id_orig); + else + file_id_resp = file_mgr->DataIn(data, len, GetAnalyzerTag(), Conn(), + orig, file_id_resp); } void File_Analyzer::Undelivered(int seq, int len, bool orig) { TCP_ApplicationAnalyzer::Undelivered(seq, len, orig); + + if ( orig ) + file_id_orig = file_mgr->Gap(seq, len, GetAnalyzerTag(), Conn(), orig, + file_id_orig); + else + file_id_resp = file_mgr->Gap(seq, len, GetAnalyzerTag(), Conn(), orig, + file_id_resp); } void File_Analyzer::Done() @@ -45,6 +58,16 @@ void File_Analyzer::Done() if ( buffer_len && buffer_len != BUFFER_SIZE ) Identify(); + + if ( ! file_id_orig.empty() ) + file_mgr->EndOfFile(file_id_orig); + else + file_mgr->EndOfFile(GetAnalyzerTag(), Conn(), true); + + if ( ! file_id_resp.empty() ) + file_mgr->EndOfFile(file_id_resp); + else + file_mgr->EndOfFile(GetAnalyzerTag(), Conn(), false); } void File_Analyzer::Identify() @@ -61,49 +84,3 @@ void File_Analyzer::Identify() vl->append(new StringVal(match)); ConnectionEvent(file_transferred, vl); } - -IRC_Data::IRC_Data(Connection* conn) - : File_Analyzer("IRC_Data", conn) - { - } - -void IRC_Data::Done() - { - File_Analyzer::Done(); - file_mgr->EndOfFile(GetAnalyzerTag(), Conn()); - } - -void IRC_Data::DeliverStream(int len, const u_char* data, bool orig) - { - File_Analyzer::DeliverStream(len, data, orig); - file_mgr->DataIn(data, len, GetAnalyzerTag(), Conn(), orig); - } - -void IRC_Data::Undelivered(int seq, int len, bool orig) - { - File_Analyzer::Undelivered(seq, len, orig); - file_mgr->Gap(seq, len, GetAnalyzerTag(), Conn(), orig); - } - -FTP_Data::FTP_Data(Connection* conn) - : File_Analyzer("FTP_Data", conn) - { - } - -void FTP_Data::Done() - { - File_Analyzer::Done(); - file_mgr->EndOfFile(GetAnalyzerTag(), Conn()); - } - -void FTP_Data::DeliverStream(int len, const u_char* data, bool orig) - { - File_Analyzer::DeliverStream(len, data, orig); - file_mgr->DataIn(data, len, GetAnalyzerTag(), Conn(), orig); - } - -void FTP_Data::Undelivered(int seq, int len, bool orig) - { - File_Analyzer::Undelivered(seq, len, orig); - file_mgr->Gap(seq, len, GetAnalyzerTag(), Conn(), orig); - } diff --git a/src/analyzer/protocol/file/File.h b/src/analyzer/protocol/file/File.h index 7afbd569c4..9376dcc7c3 100644 --- a/src/analyzer/protocol/file/File.h +++ b/src/analyzer/protocol/file/File.h @@ -28,17 +28,15 @@ protected: static const int BUFFER_SIZE = 1024; char buffer[BUFFER_SIZE]; int buffer_len; + string file_id_orig; + string file_id_resp; }; class IRC_Data : public File_Analyzer { public: - IRC_Data(Connection* conn); - - virtual void Done(); - - virtual void DeliverStream(int len, const u_char* data, bool orig); - - virtual void Undelivered(int seq, int len, bool orig); + IRC_Data(Connection* conn) + : File_Analyzer("IRC_Data", conn) + { } static Analyzer* InstantiateAnalyzer(Connection* conn) { return new IRC_Data(conn); } @@ -46,13 +44,9 @@ public: class FTP_Data : public File_Analyzer { public: - FTP_Data(Connection* conn); - - virtual void Done(); - - virtual void DeliverStream(int len, const u_char* data, bool orig); - - virtual void Undelivered(int seq, int len, bool orig); + FTP_Data(Connection* conn) + : File_Analyzer("FTP_Data", conn) + { } static Analyzer* InstantiateAnalyzer(Connection* conn) { return new FTP_Data(conn); } diff --git a/src/analyzer/protocol/http/HTTP.cc b/src/analyzer/protocol/http/HTTP.cc index f676643b7c..feddeb00a9 100644 --- a/src/analyzer/protocol/http/HTTP.cc +++ b/src/analyzer/protocol/http/HTTP.cc @@ -243,10 +243,10 @@ int HTTP_Entity::Undelivered(int64_t len) return 0; if ( is_partial_content ) - file_mgr->Gap(body_length, len, + precomputed_file_id = file_mgr->Gap(body_length, len, http_message->MyHTTP_Analyzer()->GetAnalyzerTag(), http_message->MyHTTP_Analyzer()->Conn(), - http_message->IsOrig()); + http_message->IsOrig(), precomputed_file_id); else precomputed_file_id = file_mgr->Gap(body_length, len, http_message->MyHTTP_Analyzer()->GetAnalyzerTag(), @@ -306,15 +306,15 @@ void HTTP_Entity::SubmitData(int len, const char* buf) if ( is_partial_content ) { if ( send_size && instance_length > 0 ) - file_mgr->SetSize(instance_length, + precomputed_file_id = file_mgr->SetSize(instance_length, http_message->MyHTTP_Analyzer()->GetAnalyzerTag(), http_message->MyHTTP_Analyzer()->Conn(), - http_message->IsOrig()); + http_message->IsOrig(), precomputed_file_id); - file_mgr->DataIn(reinterpret_cast(buf), len, offset, + precomputed_file_id = file_mgr->DataIn(reinterpret_cast(buf), len, offset, http_message->MyHTTP_Analyzer()->GetAnalyzerTag(), http_message->MyHTTP_Analyzer()->Conn(), - http_message->IsOrig()); + http_message->IsOrig(), precomputed_file_id); offset += len; } @@ -583,9 +583,16 @@ void HTTP_Message::Done(const int interrupted, const char* detail) top_level->EndOfData(); if ( is_orig || MyHTTP_Analyzer()->HTTP_ReplyCode() != 206 ) - // multipart/byteranges may span multiple connections - file_mgr->EndOfFile(MyHTTP_Analyzer()->GetAnalyzerTag(), - MyHTTP_Analyzer()->Conn(), is_orig); + { + // multipart/byteranges may span multiple connections, so don't EOF. + HTTP_Entity* he = dynamic_cast(top_level); + + if ( he && ! he->FileID().empty() ) + file_mgr->EndOfFile(he->FileID()); + else + file_mgr->EndOfFile(MyHTTP_Analyzer()->GetAnalyzerTag(), + MyHTTP_Analyzer()->Conn(), is_orig); + } if ( http_message_done ) { @@ -663,8 +670,15 @@ void HTTP_Message::EndEntity(mime::MIME_Entity* entity) Done(); else if ( is_orig || MyHTTP_Analyzer()->HTTP_ReplyCode() != 206 ) - file_mgr->EndOfFile(MyHTTP_Analyzer()->GetAnalyzerTag(), - MyHTTP_Analyzer()->Conn(), is_orig); + { + HTTP_Entity* he = dynamic_cast(entity); + + if ( he && ! he->FileID().empty() ) + file_mgr->EndOfFile(he->FileID()); + else + file_mgr->EndOfFile(MyHTTP_Analyzer()->GetAnalyzerTag(), + MyHTTP_Analyzer()->Conn(), is_orig); + } } void HTTP_Message::SubmitHeader(mime::MIME_Header* h) diff --git a/src/analyzer/protocol/http/HTTP.h b/src/analyzer/protocol/http/HTTP.h index 48a611b63b..0318dc9601 100644 --- a/src/analyzer/protocol/http/HTTP.h +++ b/src/analyzer/protocol/http/HTTP.h @@ -46,6 +46,7 @@ public: int64_t BodyLength() const { return body_length; } int64_t HeaderLength() const { return header_length; } void SkipBody() { deliver_body = 0; } + const string& FileID() const { return precomputed_file_id; } protected: class UncompressedOutput; diff --git a/src/analyzer/protocol/ssl/ssl-analyzer.pac b/src/analyzer/protocol/ssl/ssl-analyzer.pac index 49104fa549..5f9d092440 100644 --- a/src/analyzer/protocol/ssl/ssl-analyzer.pac +++ b/src/analyzer/protocol/ssl/ssl-analyzer.pac @@ -231,15 +231,26 @@ refine connection SSL_Conn += { if ( certificates->size() == 0 ) return true; + ODesc common; + common.AddRaw("Analyzer::ANALYZER_SSL"); + common.Add(bro_analyzer()->Conn()->StartTime()); + common.AddRaw(${rec.is_orig} ? "T" : "F", 1); + bro_analyzer()->Conn()->IDString(&common); + for ( unsigned int i = 0; i < certificates->size(); ++i ) { const bytestring& cert = (*certificates)[i]; - string fid = file_mgr->DataIn(reinterpret_cast(cert.data()), cert.length(), - bro_analyzer()->GetAnalyzerTag(), bro_analyzer()->Conn(), - ${rec.is_orig}); + ODesc file_handle; + file_handle.Add(common.Description()); + file_handle.Add(i); - file_mgr->EndOfFile(fid); + string file_id = file_mgr->HashHandle(file_handle.Description()); + + file_mgr->DataIn(reinterpret_cast(cert.data()), + cert.length(), bro_analyzer()->GetAnalyzerTag(), + bro_analyzer()->Conn(), ${rec.is_orig}, file_id); + file_mgr->EndOfFile(file_id); } return true; %} diff --git a/src/file_analysis/File.cc b/src/file_analysis/File.cc index e8a7ea15ee..2772b55418 100644 --- a/src/file_analysis/File.cc +++ b/src/file_analysis/File.cc @@ -283,6 +283,7 @@ bool File::BufferBOF(const u_char* data, uint64 len) bool File::DetectMIME(const u_char* data, uint64 len) { RuleMatcher::MIME_Matches matches; + len = min(len, LookupFieldDefaultCount(bof_buffer_size_idx)); file_mgr->DetectMIME(data, len, &matches); if ( matches.empty() ) diff --git a/src/file_analysis/Manager.cc b/src/file_analysis/Manager.cc index 5ff7bd7186..3f04ebfc2b 100644 --- a/src/file_analysis/Manager.cc +++ b/src/file_analysis/Manager.cc @@ -54,8 +54,11 @@ void Manager::Terminate() { vector keys; - for ( IDMap::iterator it = id_map.begin(); it != id_map.end(); ++it ) - keys.push_back(it->first); + IterCookie* it = id_map.InitForIteration(); + HashKey* key; + + while ( id_map.NextEntry(key, it) ) + keys.push_back(static_cast(key->Key())); for ( size_t i = 0; i < keys.size(); ++i ) Timeout(keys[i], true); @@ -249,11 +252,12 @@ File* Manager::GetFile(const string& file_id, Connection* conn, if ( IsIgnored(file_id) ) return 0; - File* rval = id_map[file_id]; + File* rval = id_map.Lookup(file_id.c_str()); if ( ! rval ) { - rval = id_map[file_id] = new File(file_id, conn, tag, is_orig); + rval = new File(file_id, conn, tag, is_orig); + id_map.Insert(file_id.c_str(), rval); rval->ScheduleInactivityTimer(); if ( IsIgnored(file_id) ) @@ -272,12 +276,7 @@ File* Manager::GetFile(const string& file_id, Connection* conn, File* Manager::LookupFile(const string& file_id) const { - IDMap::const_iterator it = id_map.find(file_id); - - if ( it == id_map.end() ) - return 0; - - return it->second; + return id_map.Lookup(file_id.c_str()); } void Manager::Timeout(const string& file_id, bool is_terminating) @@ -308,37 +307,38 @@ void Manager::Timeout(const string& file_id, bool is_terminating) bool Manager::IgnoreFile(const string& file_id) { - if ( id_map.find(file_id) == id_map.end() ) + if ( ! id_map.Lookup(file_id.c_str()) ) return false; DBG_LOG(DBG_FILE_ANALYSIS, "Ignore FileID %s", file_id.c_str()); - ignored.insert(file_id); - + delete ignored.Insert(file_id.c_str(), new bool); return true; } bool Manager::RemoveFile(const string& file_id) { - IDMap::iterator it = id_map.find(file_id); + HashKey key(file_id.c_str()); + // Can't remove from the dictionary/map right away as invoking EndOfFile + // may cause some events to be executed which actually depend on the file + // still being in the dictionary/map. + File* f = static_cast(id_map.Lookup(&key)); - if ( it == id_map.end() ) + if ( ! f ) return false; DBG_LOG(DBG_FILE_ANALYSIS, "Remove FileID %s", file_id.c_str()); - it->second->EndOfFile(); - - delete it->second; - id_map.erase(file_id); - ignored.erase(file_id); - + f->EndOfFile(); + delete f; + id_map.Remove(&key); + delete static_cast(ignored.Remove(&key)); return true; } bool Manager::IsIgnored(const string& file_id) { - return ignored.find(file_id) != ignored.end(); + return ignored.Lookup(file_id.c_str()) != 0; } string Manager::GetFileID(analyzer::Tag tag, Connection* c, bool is_orig) diff --git a/src/file_analysis/Manager.h b/src/file_analysis/Manager.h index bb6aaab971..2137e81389 100644 --- a/src/file_analysis/Manager.h +++ b/src/file_analysis/Manager.h @@ -4,10 +4,9 @@ #define FILE_ANALYSIS_MANAGER_H #include -#include -#include #include +#include "Dict.h" #include "Net.h" #include "Conn.h" #include "Val.h" @@ -27,6 +26,9 @@ namespace file_analysis { +declare(PDict,bool); +declare(PDict,File); + /** * Main entry point for interacting with file analysis. */ @@ -288,8 +290,8 @@ public: protected: friend class FileTimer; - typedef set IDSet; - typedef map IDMap; + typedef PDict(bool) IDSet; + typedef PDict(File) IDMap; /** * Create a new file to be analyzed or retrieve an existing one. @@ -361,8 +363,8 @@ protected: private: - IDMap id_map; /**< Map file ID to file_analysis::File records. */ - IDSet ignored; /**< Ignored files. Will be finally removed on EOF. */ + PDict(File) id_map; /**< Map file ID to file_analysis::File records. */ + PDict(bool) ignored; /**< Ignored files. Will be finally removed on EOF. */ string current_file_id; /**< Hash of what get_file_handle event sets. */ RuleFileMagicState* magic_state; /**< File magic signature match state. */ diff --git a/testing/btest/Baseline/doc.sphinx.mimestats/btest-doc.sphinx.mimestats#1 b/testing/btest/Baseline/doc.sphinx.mimestats/btest-doc.sphinx.mimestats#1 index 3cd6a49e11..3d6b9dffad 100644 --- a/testing/btest/Baseline/doc.sphinx.mimestats/btest-doc.sphinx.mimestats#1 +++ b/testing/btest/Baseline/doc.sphinx.mimestats/btest-doc.sphinx.mimestats#1 @@ -16,15 +16,15 @@ #empty_field (empty) #unset_field - #path mime_metrics - #open 2014-03-06-17-30-44 + #open 2014-04-21-21-34-08 #fields ts ts_delta mtype uniq_hosts hits bytes #types time interval string count count count - 1389719059.311698 300.000000 text/html 1 4 53070 + 1389719059.311698 300.000000 text/html 1 3 47335 1389719059.311698 300.000000 image/jpeg 1 1 186859 1389719059.311698 300.000000 application/pgp-signature 1 1 836 - 1389719059.311698 300.000000 text/plain 1 12 113982 + 1389719059.311698 300.000000 text/plain 1 13 119717 1389719059.311698 300.000000 image/gif 1 1 172 1389719059.311698 300.000000 image/png 1 9 82176 1389719059.311698 300.000000 image/x-icon 1 2 2300 - #close 2014-03-06-17-30-44 + #close 2014-04-21-21-34-08 diff --git a/testing/external/Makefile b/testing/external/Makefile index 9715b3d669..6c1ac97e60 100644 --- a/testing/external/Makefile +++ b/testing/external/Makefile @@ -27,4 +27,7 @@ status: coverage: @for repo in $(REPOS); do (cd $$repo && echo "Coverage for '$$repo' repo:" && make coverage); done +update-timing: + @for repo in $(REPOS); do (cd $$repo && echo "Coverage for '$$repo' repo:" && make update-timing); done + .PHONY: all brief init pull push status coverage