mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 22:58:20 +00:00
A set of file analysis extensions.
- Enable manager to associate analyzers with a MIME type. With that, one can now say enable all analyzers for, e.g., "image/gif". This is exposed to script-land as Files::add_analyzers_for_mime_type(f: fa_file, mtype: string) For MIME types identified via libmagic, this happens automatically (via the file_new() handler in files/main.bro). - Extend the analyzer API to better match that of protocol analyzers: - Adding unique analyzer IDs so that we can refer to instances from script-land. - Adding subtypes to Components so that a single analyzer implementation can support different types of analyzers internally. - Add an analyzer method SetTag() that allows to set the tag after construction. - Adding Init() and Done() methods for consistency with what other classes offer. - Add debug logging to the file_analysis stream. TODO: test cases missing for the new script-land functionality.
This commit is contained in:
parent
f0fe270029
commit
d34f23c8d4
12 changed files with 337 additions and 30 deletions
|
@ -1,6 +1,7 @@
|
|||
// See the file "COPYING" in the main distribution directory for copyright.
|
||||
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
|
||||
#include "File.h"
|
||||
#include "FileTimer.h"
|
||||
|
@ -82,7 +83,7 @@ File::File(const string& file_id, Connection* conn, analyzer::Tag tag,
|
|||
{
|
||||
StaticInit();
|
||||
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "Creating new File object %s", file_id.c_str());
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Creating new File object", file_id.c_str());
|
||||
|
||||
val = new RecordVal(fa_file_type);
|
||||
val->Assign(id_idx, new StringVal(file_id.c_str()));
|
||||
|
@ -100,7 +101,7 @@ File::File(const string& file_id, Connection* conn, analyzer::Tag tag,
|
|||
|
||||
File::~File()
|
||||
{
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "Destroying File object %s", id.c_str());
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Destroying File object", id.c_str());
|
||||
Unref(val);
|
||||
|
||||
// Queue may not be empty in the case where only content gaps were seen.
|
||||
|
@ -229,6 +230,7 @@ void File::IncrementByteCount(uint64 size, int field_idx)
|
|||
|
||||
void File::SetTotalBytes(uint64 size)
|
||||
{
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Total bytes %" PRIu64, id.c_str(), size);
|
||||
val->Assign(total_bytes_idx, new Val(size, TYPE_COUNT));
|
||||
}
|
||||
|
||||
|
@ -251,11 +253,17 @@ void File::ScheduleInactivityTimer() const
|
|||
|
||||
bool File::AddAnalyzer(file_analysis::Tag tag, RecordVal* args)
|
||||
{
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Queuing addition of %s analyzer",
|
||||
id.c_str(), file_mgr->GetComponentName(tag));
|
||||
|
||||
return done ? false : analyzers.QueueAdd(tag, args);
|
||||
}
|
||||
|
||||
bool File::RemoveAnalyzer(file_analysis::Tag tag, RecordVal* args)
|
||||
{
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Queuing remove of %s analyzer",
|
||||
id.c_str(), file_mgr->GetComponentName(tag));
|
||||
|
||||
return done ? false : analyzers.QueueRemove(tag, args);
|
||||
}
|
||||
|
||||
|
@ -284,16 +292,18 @@ bool File::DetectMIME(const u_char* data, uint64 len)
|
|||
|
||||
if ( mime )
|
||||
{
|
||||
// strip off charset
|
||||
const char* mime_end = strchr(mime, ';');
|
||||
|
||||
if ( mime_end )
|
||||
// strip off charset
|
||||
val->Assign(mime_type_idx, new StringVal(mime_end - mime, mime));
|
||||
else
|
||||
val->Assign(mime_type_idx, new StringVal(mime));
|
||||
StringVal* mime_val = mime_end ?
|
||||
new StringVal(mime_end - mime, mime) :
|
||||
new StringVal(mime);
|
||||
|
||||
val->Assign(mime_type_idx, mime_val);
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Detected MIME type %s", id.c_str(), mime_val->CheckString());
|
||||
}
|
||||
|
||||
return mime;
|
||||
return true;
|
||||
}
|
||||
|
||||
void File::ReplayBOF()
|
||||
|
@ -314,7 +324,6 @@ void File::ReplayBOF()
|
|||
val->Assign(bof_buffer_idx, new StringVal(bs));
|
||||
|
||||
DetectMIME(bs->Bytes(), bs->Len());
|
||||
|
||||
FileEvent(file_new);
|
||||
|
||||
for ( size_t i = 0; i < bof_buffer.chunks.size(); ++i )
|
||||
|
@ -333,6 +342,11 @@ void File::DataIn(const u_char* data, uint64 len, uint64 offset)
|
|||
first_chunk = false;
|
||||
}
|
||||
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] %" PRIu64 " bytes in at offset" PRIu64 "; %s [%s]",
|
||||
id.c_str(), len, offset,
|
||||
IsComplete() ? "complete" : "incomplete",
|
||||
fmt_bytes((const char*) data, min((uint64)40, len)), len > 40 ? "..." : "");
|
||||
|
||||
file_analysis::Analyzer* a = 0;
|
||||
IterCookie* c = analyzers.InitForIteration();
|
||||
|
||||
|
@ -367,6 +381,11 @@ void File::DataIn(const u_char* data, uint64 len)
|
|||
missed_bof = false;
|
||||
}
|
||||
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] %" PRIu64 " bytes in; %s [%s]",
|
||||
id.c_str(), len,
|
||||
IsComplete() ? "complete" : "incomplete",
|
||||
fmt_bytes((const char*) data, min((uint64)40, len)), len > 40 ? "..." : "");
|
||||
|
||||
file_analysis::Analyzer* a = 0;
|
||||
IterCookie* c = analyzers.InitForIteration();
|
||||
|
||||
|
@ -391,6 +410,8 @@ void File::DataIn(const u_char* data, uint64 len)
|
|||
|
||||
void File::EndOfFile()
|
||||
{
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] End of file", id.c_str());
|
||||
|
||||
if ( done )
|
||||
return;
|
||||
|
||||
|
@ -417,6 +438,9 @@ void File::EndOfFile()
|
|||
|
||||
void File::Gap(uint64 offset, uint64 len)
|
||||
{
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Gap of size %" PRIu64 " at offset %" PRIu64,
|
||||
id.c_str(), offset, len);
|
||||
|
||||
analyzers.DrainModifications();
|
||||
|
||||
// If we were buffering the beginning of the file, a gap means we've got
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue