A set of file analysis extensions.

- Enable manager to associate analyzers with a MIME type. With that,
  one can now say enable all analyzers for, e.g., "image/gif". This is
  exposed to script-land as

    Files::add_analyzers_for_mime_type(f: fa_file, mtype: string)

  For MIME types identified via libmagic, this happens automatically
  (via the file_new() handler in files/main.bro).

- Extend the analyzer API to better match that of protocol analyzers:

    - Adding unique analyzer IDs so that we can refer to instances
      from script-land.

    - Adding subtypes to Components so that a single analyzer
      implementation can support different types of analyzers
      internally.

    - Add an analyzer method SetTag() that allows to set the tag after
      construction.

    - Adding Init() and Done() methods for consistency with what other
      classes offer.

- Add debug logging to the file_analysis stream.

TODO: test cases missing for the new script-land functionality.
This commit is contained in:
Robin Sommer 2013-11-26 11:16:58 -08:00
parent f0fe270029
commit d34f23c8d4
12 changed files with 337 additions and 30 deletions

View file

@ -1,6 +1,7 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include <string>
#include <algorithm>
#include "File.h"
#include "FileTimer.h"
@ -82,7 +83,7 @@ File::File(const string& file_id, Connection* conn, analyzer::Tag tag,
{
StaticInit();
DBG_LOG(DBG_FILE_ANALYSIS, "Creating new File object %s", file_id.c_str());
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Creating new File object", file_id.c_str());
val = new RecordVal(fa_file_type);
val->Assign(id_idx, new StringVal(file_id.c_str()));
@ -100,7 +101,7 @@ File::File(const string& file_id, Connection* conn, analyzer::Tag tag,
File::~File()
{
DBG_LOG(DBG_FILE_ANALYSIS, "Destroying File object %s", id.c_str());
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Destroying File object", id.c_str());
Unref(val);
// Queue may not be empty in the case where only content gaps were seen.
@ -229,6 +230,7 @@ void File::IncrementByteCount(uint64 size, int field_idx)
void File::SetTotalBytes(uint64 size)
{
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Total bytes %" PRIu64, id.c_str(), size);
val->Assign(total_bytes_idx, new Val(size, TYPE_COUNT));
}
@ -251,11 +253,17 @@ void File::ScheduleInactivityTimer() const
bool File::AddAnalyzer(file_analysis::Tag tag, RecordVal* args)
{
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Queuing addition of %s analyzer",
id.c_str(), file_mgr->GetComponentName(tag));
return done ? false : analyzers.QueueAdd(tag, args);
}
bool File::RemoveAnalyzer(file_analysis::Tag tag, RecordVal* args)
{
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Queuing remove of %s analyzer",
id.c_str(), file_mgr->GetComponentName(tag));
return done ? false : analyzers.QueueRemove(tag, args);
}
@ -284,16 +292,18 @@ bool File::DetectMIME(const u_char* data, uint64 len)
if ( mime )
{
// strip off charset
const char* mime_end = strchr(mime, ';');
if ( mime_end )
// strip off charset
val->Assign(mime_type_idx, new StringVal(mime_end - mime, mime));
else
val->Assign(mime_type_idx, new StringVal(mime));
StringVal* mime_val = mime_end ?
new StringVal(mime_end - mime, mime) :
new StringVal(mime);
val->Assign(mime_type_idx, mime_val);
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Detected MIME type %s", id.c_str(), mime_val->CheckString());
}
return mime;
return true;
}
void File::ReplayBOF()
@ -314,7 +324,6 @@ void File::ReplayBOF()
val->Assign(bof_buffer_idx, new StringVal(bs));
DetectMIME(bs->Bytes(), bs->Len());
FileEvent(file_new);
for ( size_t i = 0; i < bof_buffer.chunks.size(); ++i )
@ -333,6 +342,11 @@ void File::DataIn(const u_char* data, uint64 len, uint64 offset)
first_chunk = false;
}
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] %" PRIu64 " bytes in at offset" PRIu64 "; %s [%s]",
id.c_str(), len, offset,
IsComplete() ? "complete" : "incomplete",
fmt_bytes((const char*) data, min((uint64)40, len)), len > 40 ? "..." : "");
file_analysis::Analyzer* a = 0;
IterCookie* c = analyzers.InitForIteration();
@ -367,6 +381,11 @@ void File::DataIn(const u_char* data, uint64 len)
missed_bof = false;
}
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] %" PRIu64 " bytes in; %s [%s]",
id.c_str(), len,
IsComplete() ? "complete" : "incomplete",
fmt_bytes((const char*) data, min((uint64)40, len)), len > 40 ? "..." : "");
file_analysis::Analyzer* a = 0;
IterCookie* c = analyzers.InitForIteration();
@ -391,6 +410,8 @@ void File::DataIn(const u_char* data, uint64 len)
void File::EndOfFile()
{
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] End of file", id.c_str());
if ( done )
return;
@ -417,6 +438,9 @@ void File::EndOfFile()
void File::Gap(uint64 offset, uint64 len)
{
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Gap of size %" PRIu64 " at offset %" PRIu64,
id.c_str(), offset, len);
analyzers.DrainModifications();
// If we were buffering the beginning of the file, a gap means we've got