Merge remote-tracking branch 'origin/topic/jsiwek/faf-perf'

* origin/topic/jsiwek/faf-perf:
  Adapt HTTP partial content to cache file analysis IDs.
  Adapt SSL analyzer to generate file analysis handles itself.
  Adapt more of HTTP analyzer to use cached file analysis IDs.
  Adapt IRC/FTP analyzers to cache file analysis IDs.
  Refactor regex/signature AcceptingSet data structure and usages.
  Enforce data size limit when checking files for MIME matches.
  Refactor file analysis file ID lookup.
This commit is contained in:
Robin Sommer 2014-04-24 16:12:30 -07:00
commit de20b4f0fb
21 changed files with 246 additions and 250 deletions

20
CHANGES
View file

@ -1,4 +1,24 @@
2.2-353 | 2014-04-24 16:12:30 -0700
* Adapt HTTP partial content to cache file analysis IDs. (Jon Siwek)
* Adapt SSL analyzer to generate file analysis handles itself. (Jon
Siwek)
* Adapt more of HTTP analyzer to use cached file analysis IDs. (Jon
Siwek)
* Adapt IRC/FTP analyzers to cache file analysis IDs. (Jon Siwek)
* Refactor regex/signature AcceptingSet data structure and usages.
(Jon Siwek)
* Enforce data size limit when checking files for MIME matches. (Jon
Siwek)
* Refactor file analysis file ID lookup. (Jon Siwek)
2.2-344 | 2014-04-22 20:13:30 -0700 2.2-344 | 2014-04-22 20:13:30 -0700
* Refactor various hex escaping code. (Jon Siwek) * Refactor various hex escaping code. (Jon Siwek)

View file

@ -1 +1 @@
2.2-344 2.2-353

@ -1 +1 @@
Subproject commit d99150801b7844e082b5421d1efe4050702d350e Subproject commit 5266f45a6839ea9b6f1825ba0fd448a721cb42be

View file

@ -52,22 +52,8 @@ export {
function get_file_handle(c: connection, is_orig: bool): string function get_file_handle(c: connection, is_orig: bool): string
{ {
set_session(c); # Unused. File handles are generated in the analyzer.
return "";
local depth: count;
if ( is_orig )
{
depth = c$ssl$client_depth;
++c$ssl$client_depth;
}
else
{
depth = c$ssl$server_depth;
++c$ssl$server_depth;
}
return cat(Analyzer::ANALYZER_SSL, c$start_time, is_orig, id_string(c$id), depth);
} }
function describe_file(f: fa_file): string function describe_file(f: fa_file): string

View file

@ -811,6 +811,17 @@ void Connection::Describe(ODesc* d) const
d->NL(); d->NL();
} }
void Connection::IDString(ODesc* d) const
{
d->Add(orig_addr);
d->AddRaw(":", 1);
d->Add(ntohs(orig_port));
d->AddRaw(" > ", 3);
d->Add(resp_addr);
d->AddRaw(":", 1);
d->Add(ntohs(resp_port));
}
bool Connection::Serialize(SerialInfo* info) const bool Connection::Serialize(SerialInfo* info) const
{ {
return SerialObj::Serialize(info); return SerialObj::Serialize(info);

View file

@ -204,6 +204,7 @@ public:
bool IsPersistent() { return persistent; } bool IsPersistent() { return persistent; }
void Describe(ODesc* d) const; void Describe(ODesc* d) const;
void IDString(ODesc* d) const;
TimerMgr* GetTimerMgr() const; TimerMgr* GetTimerMgr() const;

View file

@ -211,9 +211,10 @@ void DFA_State::Dump(FILE* f, DFA_Machine* m)
if ( accept ) if ( accept )
{ {
for ( int i = 0; i < accept->length(); ++i ) AcceptingSet::const_iterator it;
fprintf(f, "%s accept #%d",
i > 0 ? "," : "", int((*accept)[i])); for ( it = accept->begin(); it != accept->end(); ++it )
fprintf(f, "%s accept #%d", it == accept->begin() ? "" : ",", *it);
} }
fprintf(f, "\n"); fprintf(f, "\n");
@ -285,7 +286,7 @@ unsigned int DFA_State::Size()
{ {
return sizeof(*this) return sizeof(*this)
+ pad_size(sizeof(DFA_State*) * num_sym) + pad_size(sizeof(DFA_State*) * num_sym)
+ (accept ? pad_size(sizeof(int) * accept->length()) : 0) + (accept ? pad_size(sizeof(int) * accept->size()) : 0)
+ (nfa_states ? pad_size(sizeof(NFA_State*) * nfa_states->length()) : 0) + (nfa_states ? pad_size(sizeof(NFA_State*) * nfa_states->length()) : 0)
+ (meta_ec ? meta_ec->Size() : 0) + (meta_ec ? meta_ec->Size() : 0)
+ (centry ? padded_sizeof(CacheEntry) : 0); + (centry ? padded_sizeof(CacheEntry) : 0);
@ -470,33 +471,20 @@ int DFA_Machine::StateSetToDFA_State(NFA_state_list* state_set,
return 0; return 0;
AcceptingSet* accept = new AcceptingSet; AcceptingSet* accept = new AcceptingSet;
for ( int i = 0; i < state_set->length(); ++i ) for ( int i = 0; i < state_set->length(); ++i )
{ {
int acc = (*state_set)[i]->Accept(); int acc = (*state_set)[i]->Accept();
if ( acc != NO_ACCEPT ) if ( acc != NO_ACCEPT )
{ accept->insert(acc);
int j;
for ( j = 0; j < accept->length(); ++j )
if ( (*accept)[j] == acc )
break;
if ( j >= accept->length() )
// It's not already present.
accept->append(acc);
}
} }
if ( accept->length() == 0 ) if ( accept->empty() )
{ {
delete accept; delete accept;
accept = 0; accept = 0;
} }
else
{
accept->sort(int_list_cmp);
accept->resize(0);
}
DFA_State* ds = new DFA_State(state_count++, ec, state_set, accept); DFA_State* ds = new DFA_State(state_count++, ec, state_set, accept);
d = dfa_state_cache->Insert(ds, hash); d = dfa_state_cache->Insert(ds, hash);

View file

@ -3,6 +3,7 @@
#include "config.h" #include "config.h"
#include <stdlib.h> #include <stdlib.h>
#include <utility>
#include "RE.h" #include "RE.h"
#include "DFA.h" #include "DFA.h"
@ -266,6 +267,15 @@ void Specific_RE_Matcher::Dump(FILE* f)
dfa->Dump(f); dfa->Dump(f);
} }
inline void RE_Match_State::AddMatches(const AcceptingSet& as,
MatchPos position)
{
typedef std::pair<AcceptIdx, MatchPos> am_idx;
for ( AcceptingSet::const_iterator it = as.begin(); it != as.end(); ++it )
accepted_matches.insert(am_idx(*it, position));
}
bool RE_Match_State::Match(const u_char* bv, int n, bool RE_Match_State::Match(const u_char* bv, int n,
bool bol, bool eol, bool clear) bool bol, bool eol, bool clear)
{ {
@ -283,14 +293,9 @@ bool RE_Match_State::Match(const u_char* bv, int n,
current_state = dfa->StartState(); current_state = dfa->StartState();
const AcceptingSet* ac = current_state->Accept(); const AcceptingSet* ac = current_state->Accept();
if ( ac ) if ( ac )
{ AddMatches(*ac, 0);
loop_over_list(*ac, i)
{
accepted.append((*ac)[i]);
match_pos.append(0);
}
}
} }
else if ( clear ) else if ( clear )
@ -301,7 +306,7 @@ bool RE_Match_State::Match(const u_char* bv, int n,
current_pos = 0; current_pos = 0;
int old_matches = accepted.length(); size_t old_matches = accepted_matches.size();
int ec; int ec;
int m = bol ? n + 1 : n; int m = bol ? n + 1 : n;
@ -324,25 +329,17 @@ bool RE_Match_State::Match(const u_char* bv, int n,
break; break;
} }
if ( next_state->Accept() )
{
const AcceptingSet* ac = next_state->Accept(); const AcceptingSet* ac = next_state->Accept();
loop_over_list(*ac, i)
{ if ( ac )
if ( ! accepted.is_member((*ac)[i]) ) AddMatches(*ac, current_pos);
{
accepted.append((*ac)[i]);
match_pos.append(current_pos);
}
}
}
++current_pos; ++current_pos;
current_state = next_state; current_state = next_state;
} }
return accepted.length() != old_matches; return accepted_matches.size() != old_matches;
} }
int Specific_RE_Matcher::LongestMatch(const u_char* bv, int n) int Specific_RE_Matcher::LongestMatch(const u_char* bv, int n)
@ -399,7 +396,8 @@ unsigned int Specific_RE_Matcher::MemoryAllocation() const
+ equiv_class.Size() - padded_sizeof(EquivClass) + equiv_class.Size() - padded_sizeof(EquivClass)
+ (dfa ? dfa->MemoryAllocation() : 0) // this is ref counted; consider the bytes here? + (dfa ? dfa->MemoryAllocation() : 0) // this is ref counted; consider the bytes here?
+ padded_sizeof(*any_ccl) + padded_sizeof(*any_ccl)
+ accepted->MemoryAllocation(); + padded_sizeof(*accepted)
+ accepted->size() * padded_sizeof(AcceptingSet::key_type);
} }
RE_Matcher::RE_Matcher() RE_Matcher::RE_Matcher()

View file

@ -9,6 +9,9 @@
#include "CCL.h" #include "CCL.h"
#include "EquivClass.h" #include "EquivClass.h"
#include <set>
#include <map>
#include <ctype.h> #include <ctype.h>
typedef int (*cce_func)(int); typedef int (*cce_func)(int);
@ -33,7 +36,10 @@ extern int re_lex(void);
extern int clower(int); extern int clower(int);
extern void synerr(const char str[]); extern void synerr(const char str[]);
typedef int_list AcceptingSet; typedef int AcceptIdx;
typedef std::set<AcceptIdx> AcceptingSet;
typedef uint64 MatchPos;
typedef std::map<AcceptIdx, MatchPos> AcceptingMatchSet;
typedef name_list string_list; typedef name_list string_list;
typedef enum { MATCH_ANYWHERE, MATCH_EXACTLY, } match_type; typedef enum { MATCH_ANYWHERE, MATCH_EXACTLY, } match_type;
@ -135,8 +141,8 @@ public:
current_state = 0; current_state = 0;
} }
const AcceptingSet* Accepted() const { return &accepted; } const AcceptingMatchSet& AcceptedMatches() const
const int_list* MatchPositions() const { return &match_pos; } { return accepted_matches; }
// Returns the number of bytes feeded into the matcher so far // Returns the number of bytes feeded into the matcher so far
int Length() { return current_pos; } int Length() { return current_pos; }
@ -149,16 +155,16 @@ public:
{ {
current_pos = -1; current_pos = -1;
current_state = 0; current_state = 0;
accepted.clear(); accepted_matches.clear();
match_pos.clear();
} }
void AddMatches(const AcceptingSet& as, MatchPos position);
protected: protected:
DFA_Machine* dfa; DFA_Machine* dfa;
int* ecs; int* ecs;
AcceptingSet accepted; AcceptingMatchSet accepted_matches;
int_list match_pos;
DFA_State* current_state; DFA_State* current_state;
int current_pos; int current_pos;
}; };

View file

@ -594,6 +594,29 @@ RuleFileMagicState* RuleMatcher::InitFileMagic() const
return state; return state;
} }
bool RuleMatcher::AllRulePatternsMatched(const Rule* r, MatchPos matchpos,
const AcceptingMatchSet& ams)
{
DBG_LOG(DBG_RULES, "Checking rule: %s", r->id);
// Check whether all patterns of the rule have matched.
loop_over_list(r->patterns, j)
{
if ( ams.find(r->patterns[j]->id) == ams.end() )
return false;
// See if depth is satisfied.
if ( matchpos > r->patterns[j]->offset + r->patterns[j]->depth )
return false;
// FIXME: How to check for offset ??? ###
}
DBG_LOG(DBG_RULES, "All patterns of rule satisfied");
return true;
}
RuleMatcher::MIME_Matches* RuleMatcher::Match(RuleFileMagicState* state, RuleMatcher::MIME_Matches* RuleMatcher::Match(RuleFileMagicState* state,
const u_char* data, uint64 len, const u_char* data, uint64 len,
MIME_Matches* rval) const MIME_Matches* rval) const
@ -636,56 +659,39 @@ RuleMatcher::MIME_Matches* RuleMatcher::Match(RuleFileMagicState* state,
DBG_LOG(DBG_RULES, "New pattern match found"); DBG_LOG(DBG_RULES, "New pattern match found");
AcceptingSet accepted; AcceptingMatchSet accepted_matches;
int_list matchpos;
loop_over_list(state->matchers, y) loop_over_list(state->matchers, y)
{ {
RuleFileMagicState::Matcher* m = state->matchers[y]; RuleFileMagicState::Matcher* m = state->matchers[y];
const AcceptingSet* ac = m->state->Accepted(); const AcceptingMatchSet& ams = m->state->AcceptedMatches();
accepted_matches.insert(ams.begin(), ams.end());
loop_over_list(*ac, k)
{
if ( ! accepted.is_member((*ac)[k]) )
{
accepted.append((*ac)[k]);
matchpos.append((*m->state->MatchPositions())[k]);
}
}
} }
// Find rules for which patterns have matched. // Find rules for which patterns have matched.
rule_list matched; set<Rule*> rule_matches;
loop_over_list(accepted, i) for ( AcceptingMatchSet::const_iterator it = accepted_matches.begin();
it != accepted_matches.end(); ++it )
{ {
Rule* r = Rule::rule_table[accepted[i] - 1]; AcceptIdx aidx = it->first;
MatchPos mpos = it->second;
DBG_LOG(DBG_RULES, "Checking rule: %v", r->id); Rule* r = Rule::rule_table[aidx - 1];
loop_over_list(r->patterns, j) if ( AllRulePatternsMatched(r, mpos, accepted_matches) )
{ rule_matches.insert(r);
if ( ! accepted.is_member(r->patterns[j]->id) )
continue;
if ( (unsigned int) matchpos[i] >
r->patterns[j]->offset + r->patterns[j]->depth )
continue;
DBG_LOG(DBG_RULES, "All patterns of rule satisfied");
} }
if ( ! matched.is_member(r) ) for ( set<Rule*>::const_iterator it = rule_matches.begin();
matched.append(r); it != rule_matches.end(); ++it )
}
loop_over_list(matched, j)
{ {
Rule* r = matched[j]; Rule* r = *it;
loop_over_list(r->actions, rai) loop_over_list(r->actions, rai)
{ {
const RuleActionMIME* ram = dynamic_cast<const RuleActionMIME*>(r->actions[rai]); const RuleActionMIME* ram =
dynamic_cast<const RuleActionMIME*>(r->actions[rai]);
if ( ! ram ) if ( ! ram )
continue; continue;
@ -876,66 +882,40 @@ void RuleMatcher::Match(RuleEndpointState* state, Rule::PatternType type,
DBG_LOG(DBG_RULES, "New pattern match found"); DBG_LOG(DBG_RULES, "New pattern match found");
// Build a joined AcceptingSet. AcceptingMatchSet accepted_matches;
AcceptingSet accepted;
int_list matchpos;
loop_over_list(state->matchers, y) loop_over_list(state->matchers, y )
{ {
RuleEndpointState::Matcher* m = state->matchers[y]; RuleEndpointState::Matcher* m = state->matchers[y];
const AcceptingSet* ac = m->state->Accepted(); const AcceptingMatchSet& ams = m->state->AcceptedMatches();
accepted_matches.insert(ams.begin(), ams.end());
loop_over_list(*ac, k)
{
if ( ! accepted.is_member((*ac)[k]) )
{
accepted.append((*ac)[k]);
matchpos.append((*m->state->MatchPositions())[k]);
}
}
} }
// Determine the rules for which all patterns have matched. // Determine the rules for which all patterns have matched.
// This code should be fast enough as long as there are only very few // This code should be fast enough as long as there are only very few
// matched patterns per connection (which is a plausible assumption). // matched patterns per connection (which is a plausible assumption).
rule_list matched; // Find rules for which patterns have matched.
set<Rule*> rule_matches;
loop_over_list(accepted, i) for ( AcceptingMatchSet::const_iterator it = accepted_matches.begin();
it != accepted_matches.end(); ++it )
{ {
Rule* r = Rule::rule_table[accepted[i] - 1]; AcceptIdx aidx = it->first;
MatchPos mpos = it->second;
DBG_LOG(DBG_RULES, "Checking rule: %s", r->id); Rule* r = Rule::rule_table[aidx - 1];
// Check whether all patterns of the rule have matched. if ( AllRulePatternsMatched(r, mpos, accepted_matches) )
loop_over_list(r->patterns, j) rule_matches.insert(r);
{
if ( ! accepted.is_member(r->patterns[j]->id) )
goto next_pattern;
// See if depth is satisfied.
if ( (unsigned int) matchpos[i] >
r->patterns[j]->offset + r->patterns[j]->depth )
goto next_pattern;
DBG_LOG(DBG_RULES, "All patterns of rule satisfied");
// FIXME: How to check for offset ??? ###
}
// If not already in the list of matching rules, add it.
if ( ! matched.is_member(r) )
matched.append(r);
next_pattern:
continue;
} }
// Check which of the matching rules really belong to any of our nodes. // Check which of the matching rules really belong to any of our nodes.
loop_over_list(matched, j) for ( set<Rule*>::const_iterator it = rule_matches.begin();
it != rule_matches.end(); ++it )
{ {
Rule* r = matched[j]; Rule* r = *it;
DBG_LOG(DBG_RULES, "Accepted rule: %s", r->id); DBG_LOG(DBG_RULES, "Accepted rule: %s", r->id);

View file

@ -361,6 +361,9 @@ private:
void DumpStateStats(BroFile* f, RuleHdrTest* hdr_test); void DumpStateStats(BroFile* f, RuleHdrTest* hdr_test);
static bool AllRulePatternsMatched(const Rule* r, MatchPos matchpos,
const AcceptingMatchSet& ams);
int RE_level; int RE_level;
bool parse_error; bool parse_error;
RuleHdrTest* root; RuleHdrTest* root;

View file

@ -31,12 +31,25 @@ void File_Analyzer::DeliverStream(int len, const u_char* data, bool orig)
if ( buffer_len == BUFFER_SIZE ) if ( buffer_len == BUFFER_SIZE )
Identify(); Identify();
} }
return;
if ( orig )
file_id_orig = file_mgr->DataIn(data, len, GetAnalyzerTag(), Conn(),
orig, file_id_orig);
else
file_id_resp = file_mgr->DataIn(data, len, GetAnalyzerTag(), Conn(),
orig, file_id_resp);
} }
void File_Analyzer::Undelivered(int seq, int len, bool orig) void File_Analyzer::Undelivered(int seq, int len, bool orig)
{ {
TCP_ApplicationAnalyzer::Undelivered(seq, len, orig); TCP_ApplicationAnalyzer::Undelivered(seq, len, orig);
if ( orig )
file_id_orig = file_mgr->Gap(seq, len, GetAnalyzerTag(), Conn(), orig,
file_id_orig);
else
file_id_resp = file_mgr->Gap(seq, len, GetAnalyzerTag(), Conn(), orig,
file_id_resp);
} }
void File_Analyzer::Done() void File_Analyzer::Done()
@ -45,6 +58,16 @@ void File_Analyzer::Done()
if ( buffer_len && buffer_len != BUFFER_SIZE ) if ( buffer_len && buffer_len != BUFFER_SIZE )
Identify(); Identify();
if ( ! file_id_orig.empty() )
file_mgr->EndOfFile(file_id_orig);
else
file_mgr->EndOfFile(GetAnalyzerTag(), Conn(), true);
if ( ! file_id_resp.empty() )
file_mgr->EndOfFile(file_id_resp);
else
file_mgr->EndOfFile(GetAnalyzerTag(), Conn(), false);
} }
void File_Analyzer::Identify() void File_Analyzer::Identify()
@ -61,49 +84,3 @@ void File_Analyzer::Identify()
vl->append(new StringVal(match)); vl->append(new StringVal(match));
ConnectionEvent(file_transferred, vl); ConnectionEvent(file_transferred, vl);
} }
IRC_Data::IRC_Data(Connection* conn)
: File_Analyzer("IRC_Data", conn)
{
}
void IRC_Data::Done()
{
File_Analyzer::Done();
file_mgr->EndOfFile(GetAnalyzerTag(), Conn());
}
void IRC_Data::DeliverStream(int len, const u_char* data, bool orig)
{
File_Analyzer::DeliverStream(len, data, orig);
file_mgr->DataIn(data, len, GetAnalyzerTag(), Conn(), orig);
}
void IRC_Data::Undelivered(int seq, int len, bool orig)
{
File_Analyzer::Undelivered(seq, len, orig);
file_mgr->Gap(seq, len, GetAnalyzerTag(), Conn(), orig);
}
FTP_Data::FTP_Data(Connection* conn)
: File_Analyzer("FTP_Data", conn)
{
}
void FTP_Data::Done()
{
File_Analyzer::Done();
file_mgr->EndOfFile(GetAnalyzerTag(), Conn());
}
void FTP_Data::DeliverStream(int len, const u_char* data, bool orig)
{
File_Analyzer::DeliverStream(len, data, orig);
file_mgr->DataIn(data, len, GetAnalyzerTag(), Conn(), orig);
}
void FTP_Data::Undelivered(int seq, int len, bool orig)
{
File_Analyzer::Undelivered(seq, len, orig);
file_mgr->Gap(seq, len, GetAnalyzerTag(), Conn(), orig);
}

View file

@ -28,17 +28,15 @@ protected:
static const int BUFFER_SIZE = 1024; static const int BUFFER_SIZE = 1024;
char buffer[BUFFER_SIZE]; char buffer[BUFFER_SIZE];
int buffer_len; int buffer_len;
string file_id_orig;
string file_id_resp;
}; };
class IRC_Data : public File_Analyzer { class IRC_Data : public File_Analyzer {
public: public:
IRC_Data(Connection* conn); IRC_Data(Connection* conn)
: File_Analyzer("IRC_Data", conn)
virtual void Done(); { }
virtual void DeliverStream(int len, const u_char* data, bool orig);
virtual void Undelivered(int seq, int len, bool orig);
static Analyzer* InstantiateAnalyzer(Connection* conn) static Analyzer* InstantiateAnalyzer(Connection* conn)
{ return new IRC_Data(conn); } { return new IRC_Data(conn); }
@ -46,13 +44,9 @@ public:
class FTP_Data : public File_Analyzer { class FTP_Data : public File_Analyzer {
public: public:
FTP_Data(Connection* conn); FTP_Data(Connection* conn)
: File_Analyzer("FTP_Data", conn)
virtual void Done(); { }
virtual void DeliverStream(int len, const u_char* data, bool orig);
virtual void Undelivered(int seq, int len, bool orig);
static Analyzer* InstantiateAnalyzer(Connection* conn) static Analyzer* InstantiateAnalyzer(Connection* conn)
{ return new FTP_Data(conn); } { return new FTP_Data(conn); }

View file

@ -243,10 +243,10 @@ int HTTP_Entity::Undelivered(int64_t len)
return 0; return 0;
if ( is_partial_content ) if ( is_partial_content )
file_mgr->Gap(body_length, len, precomputed_file_id = file_mgr->Gap(body_length, len,
http_message->MyHTTP_Analyzer()->GetAnalyzerTag(), http_message->MyHTTP_Analyzer()->GetAnalyzerTag(),
http_message->MyHTTP_Analyzer()->Conn(), http_message->MyHTTP_Analyzer()->Conn(),
http_message->IsOrig()); http_message->IsOrig(), precomputed_file_id);
else else
precomputed_file_id = file_mgr->Gap(body_length, len, precomputed_file_id = file_mgr->Gap(body_length, len,
http_message->MyHTTP_Analyzer()->GetAnalyzerTag(), http_message->MyHTTP_Analyzer()->GetAnalyzerTag(),
@ -306,15 +306,15 @@ void HTTP_Entity::SubmitData(int len, const char* buf)
if ( is_partial_content ) if ( is_partial_content )
{ {
if ( send_size && instance_length > 0 ) if ( send_size && instance_length > 0 )
file_mgr->SetSize(instance_length, precomputed_file_id = file_mgr->SetSize(instance_length,
http_message->MyHTTP_Analyzer()->GetAnalyzerTag(), http_message->MyHTTP_Analyzer()->GetAnalyzerTag(),
http_message->MyHTTP_Analyzer()->Conn(), http_message->MyHTTP_Analyzer()->Conn(),
http_message->IsOrig()); http_message->IsOrig(), precomputed_file_id);
file_mgr->DataIn(reinterpret_cast<const u_char*>(buf), len, offset, precomputed_file_id = file_mgr->DataIn(reinterpret_cast<const u_char*>(buf), len, offset,
http_message->MyHTTP_Analyzer()->GetAnalyzerTag(), http_message->MyHTTP_Analyzer()->GetAnalyzerTag(),
http_message->MyHTTP_Analyzer()->Conn(), http_message->MyHTTP_Analyzer()->Conn(),
http_message->IsOrig()); http_message->IsOrig(), precomputed_file_id);
offset += len; offset += len;
} }
@ -583,9 +583,16 @@ void HTTP_Message::Done(const int interrupted, const char* detail)
top_level->EndOfData(); top_level->EndOfData();
if ( is_orig || MyHTTP_Analyzer()->HTTP_ReplyCode() != 206 ) if ( is_orig || MyHTTP_Analyzer()->HTTP_ReplyCode() != 206 )
// multipart/byteranges may span multiple connections {
// multipart/byteranges may span multiple connections, so don't EOF.
HTTP_Entity* he = dynamic_cast<HTTP_Entity*>(top_level);
if ( he && ! he->FileID().empty() )
file_mgr->EndOfFile(he->FileID());
else
file_mgr->EndOfFile(MyHTTP_Analyzer()->GetAnalyzerTag(), file_mgr->EndOfFile(MyHTTP_Analyzer()->GetAnalyzerTag(),
MyHTTP_Analyzer()->Conn(), is_orig); MyHTTP_Analyzer()->Conn(), is_orig);
}
if ( http_message_done ) if ( http_message_done )
{ {
@ -663,9 +670,16 @@ void HTTP_Message::EndEntity(mime::MIME_Entity* entity)
Done(); Done();
else if ( is_orig || MyHTTP_Analyzer()->HTTP_ReplyCode() != 206 ) else if ( is_orig || MyHTTP_Analyzer()->HTTP_ReplyCode() != 206 )
{
HTTP_Entity* he = dynamic_cast<HTTP_Entity*>(entity);
if ( he && ! he->FileID().empty() )
file_mgr->EndOfFile(he->FileID());
else
file_mgr->EndOfFile(MyHTTP_Analyzer()->GetAnalyzerTag(), file_mgr->EndOfFile(MyHTTP_Analyzer()->GetAnalyzerTag(),
MyHTTP_Analyzer()->Conn(), is_orig); MyHTTP_Analyzer()->Conn(), is_orig);
} }
}
void HTTP_Message::SubmitHeader(mime::MIME_Header* h) void HTTP_Message::SubmitHeader(mime::MIME_Header* h)
{ {

View file

@ -46,6 +46,7 @@ public:
int64_t BodyLength() const { return body_length; } int64_t BodyLength() const { return body_length; }
int64_t HeaderLength() const { return header_length; } int64_t HeaderLength() const { return header_length; }
void SkipBody() { deliver_body = 0; } void SkipBody() { deliver_body = 0; }
const string& FileID() const { return precomputed_file_id; }
protected: protected:
class UncompressedOutput; class UncompressedOutput;

View file

@ -231,15 +231,26 @@ refine connection SSL_Conn += {
if ( certificates->size() == 0 ) if ( certificates->size() == 0 )
return true; return true;
ODesc common;
common.AddRaw("Analyzer::ANALYZER_SSL");
common.Add(bro_analyzer()->Conn()->StartTime());
common.AddRaw(${rec.is_orig} ? "T" : "F", 1);
bro_analyzer()->Conn()->IDString(&common);
for ( unsigned int i = 0; i < certificates->size(); ++i ) for ( unsigned int i = 0; i < certificates->size(); ++i )
{ {
const bytestring& cert = (*certificates)[i]; const bytestring& cert = (*certificates)[i];
string fid = file_mgr->DataIn(reinterpret_cast<const u_char*>(cert.data()), cert.length(), ODesc file_handle;
bro_analyzer()->GetAnalyzerTag(), bro_analyzer()->Conn(), file_handle.Add(common.Description());
${rec.is_orig}); file_handle.Add(i);
file_mgr->EndOfFile(fid); string file_id = file_mgr->HashHandle(file_handle.Description());
file_mgr->DataIn(reinterpret_cast<const u_char*>(cert.data()),
cert.length(), bro_analyzer()->GetAnalyzerTag(),
bro_analyzer()->Conn(), ${rec.is_orig}, file_id);
file_mgr->EndOfFile(file_id);
} }
return true; return true;
%} %}

View file

@ -283,6 +283,7 @@ bool File::BufferBOF(const u_char* data, uint64 len)
bool File::DetectMIME(const u_char* data, uint64 len) bool File::DetectMIME(const u_char* data, uint64 len)
{ {
RuleMatcher::MIME_Matches matches; RuleMatcher::MIME_Matches matches;
len = min(len, LookupFieldDefaultCount(bof_buffer_size_idx));
file_mgr->DetectMIME(data, len, &matches); file_mgr->DetectMIME(data, len, &matches);
if ( matches.empty() ) if ( matches.empty() )

View file

@ -54,8 +54,11 @@ void Manager::Terminate()
{ {
vector<string> keys; vector<string> keys;
for ( IDMap::iterator it = id_map.begin(); it != id_map.end(); ++it ) IterCookie* it = id_map.InitForIteration();
keys.push_back(it->first); HashKey* key;
while ( id_map.NextEntry(key, it) )
keys.push_back(static_cast<const char*>(key->Key()));
for ( size_t i = 0; i < keys.size(); ++i ) for ( size_t i = 0; i < keys.size(); ++i )
Timeout(keys[i], true); Timeout(keys[i], true);
@ -249,11 +252,12 @@ File* Manager::GetFile(const string& file_id, Connection* conn,
if ( IsIgnored(file_id) ) if ( IsIgnored(file_id) )
return 0; return 0;
File* rval = id_map[file_id]; File* rval = id_map.Lookup(file_id.c_str());
if ( ! rval ) if ( ! rval )
{ {
rval = id_map[file_id] = new File(file_id, conn, tag, is_orig); rval = new File(file_id, conn, tag, is_orig);
id_map.Insert(file_id.c_str(), rval);
rval->ScheduleInactivityTimer(); rval->ScheduleInactivityTimer();
if ( IsIgnored(file_id) ) if ( IsIgnored(file_id) )
@ -272,12 +276,7 @@ File* Manager::GetFile(const string& file_id, Connection* conn,
File* Manager::LookupFile(const string& file_id) const File* Manager::LookupFile(const string& file_id) const
{ {
IDMap::const_iterator it = id_map.find(file_id); return id_map.Lookup(file_id.c_str());
if ( it == id_map.end() )
return 0;
return it->second;
} }
void Manager::Timeout(const string& file_id, bool is_terminating) void Manager::Timeout(const string& file_id, bool is_terminating)
@ -308,37 +307,38 @@ void Manager::Timeout(const string& file_id, bool is_terminating)
bool Manager::IgnoreFile(const string& file_id) bool Manager::IgnoreFile(const string& file_id)
{ {
if ( id_map.find(file_id) == id_map.end() ) if ( ! id_map.Lookup(file_id.c_str()) )
return false; return false;
DBG_LOG(DBG_FILE_ANALYSIS, "Ignore FileID %s", file_id.c_str()); DBG_LOG(DBG_FILE_ANALYSIS, "Ignore FileID %s", file_id.c_str());
ignored.insert(file_id); delete ignored.Insert(file_id.c_str(), new bool);
return true; return true;
} }
bool Manager::RemoveFile(const string& file_id) bool Manager::RemoveFile(const string& file_id)
{ {
IDMap::iterator it = id_map.find(file_id); HashKey key(file_id.c_str());
// Can't remove from the dictionary/map right away as invoking EndOfFile
// may cause some events to be executed which actually depend on the file
// still being in the dictionary/map.
File* f = static_cast<File*>(id_map.Lookup(&key));
if ( it == id_map.end() ) if ( ! f )
return false; return false;
DBG_LOG(DBG_FILE_ANALYSIS, "Remove FileID %s", file_id.c_str()); DBG_LOG(DBG_FILE_ANALYSIS, "Remove FileID %s", file_id.c_str());
it->second->EndOfFile(); f->EndOfFile();
delete f;
delete it->second; id_map.Remove(&key);
id_map.erase(file_id); delete static_cast<bool*>(ignored.Remove(&key));
ignored.erase(file_id);
return true; return true;
} }
bool Manager::IsIgnored(const string& file_id) bool Manager::IsIgnored(const string& file_id)
{ {
return ignored.find(file_id) != ignored.end(); return ignored.Lookup(file_id.c_str()) != 0;
} }
string Manager::GetFileID(analyzer::Tag tag, Connection* c, bool is_orig) string Manager::GetFileID(analyzer::Tag tag, Connection* c, bool is_orig)

View file

@ -4,10 +4,9 @@
#define FILE_ANALYSIS_MANAGER_H #define FILE_ANALYSIS_MANAGER_H
#include <string> #include <string>
#include <map>
#include <set>
#include <queue> #include <queue>
#include "Dict.h"
#include "Net.h" #include "Net.h"
#include "Conn.h" #include "Conn.h"
#include "Val.h" #include "Val.h"
@ -27,6 +26,9 @@
namespace file_analysis { namespace file_analysis {
declare(PDict,bool);
declare(PDict,File);
/** /**
* Main entry point for interacting with file analysis. * Main entry point for interacting with file analysis.
*/ */
@ -288,8 +290,8 @@ public:
protected: protected:
friend class FileTimer; friend class FileTimer;
typedef set<string> IDSet; typedef PDict(bool) IDSet;
typedef map<string, File*> IDMap; typedef PDict(File) IDMap;
/** /**
* Create a new file to be analyzed or retrieve an existing one. * Create a new file to be analyzed or retrieve an existing one.
@ -361,8 +363,8 @@ protected:
private: private:
IDMap id_map; /**< Map file ID to file_analysis::File records. */ PDict(File) id_map; /**< Map file ID to file_analysis::File records. */
IDSet ignored; /**< Ignored files. Will be finally removed on EOF. */ PDict(bool) ignored; /**< Ignored files. Will be finally removed on EOF. */
string current_file_id; /**< Hash of what get_file_handle event sets. */ string current_file_id; /**< Hash of what get_file_handle event sets. */
RuleFileMagicState* magic_state; /**< File magic signature match state. */ RuleFileMagicState* magic_state; /**< File magic signature match state. */

View file

@ -16,15 +16,15 @@
#empty_field (empty) #empty_field (empty)
#unset_field - #unset_field -
#path mime_metrics #path mime_metrics
#open 2014-03-06-17-30-44 #open 2014-04-21-21-34-08
#fields ts ts_delta mtype uniq_hosts hits bytes #fields ts ts_delta mtype uniq_hosts hits bytes
#types time interval string count count count #types time interval string count count count
1389719059.311698 300.000000 text/html 1 4 53070 1389719059.311698 300.000000 text/html 1 3 47335
1389719059.311698 300.000000 image/jpeg 1 1 186859 1389719059.311698 300.000000 image/jpeg 1 1 186859
1389719059.311698 300.000000 application/pgp-signature 1 1 836 1389719059.311698 300.000000 application/pgp-signature 1 1 836
1389719059.311698 300.000000 text/plain 1 12 113982 1389719059.311698 300.000000 text/plain 1 13 119717
1389719059.311698 300.000000 image/gif 1 1 172 1389719059.311698 300.000000 image/gif 1 1 172
1389719059.311698 300.000000 image/png 1 9 82176 1389719059.311698 300.000000 image/png 1 9 82176
1389719059.311698 300.000000 image/x-icon 1 2 2300 1389719059.311698 300.000000 image/x-icon 1 2 2300
#close 2014-03-06-17-30-44 #close 2014-04-21-21-34-08

View file

@ -27,4 +27,7 @@ status:
coverage: coverage:
@for repo in $(REPOS); do (cd $$repo && echo "Coverage for '$$repo' repo:" && make coverage); done @for repo in $(REPOS); do (cd $$repo && echo "Coverage for '$$repo' repo:" && make coverage); done
update-timing:
@for repo in $(REPOS); do (cd $$repo && echo "Coverage for '$$repo' repo:" && make update-timing); done
.PHONY: all brief init pull push status coverage .PHONY: all brief init pull push status coverage